efl - eo - massively improve eo cal resolv and data scope get with cache

BEWARE! this breaks eo ABI. _eo_call_resolve and _eo_data_scope_get
are 2 of the biggest cpu users in eo. they easily consume like 10-15%
cpu between them on tests that drive a lot of api - like simply
scrolling a genlist around. this is a lot of overhead for efl. this
fixes that to make them far leaner. In fact this got an overall 10%
cpu usage drop and that includes all of the actual rendering, and code
work, so this would drop the eo overhead of these functions incredibly
low. using this much cpu just on doing call marshalling is a bug and
thus - this is a fix, but ... with an abi break to boot. more abi
breaks may happen before release to try and get them all in this
release so we don't have to do them again later.

note i actually tested 4, 3, 2, and 1 cache slots, and 1 was the
fastest. 2 was very close behind and then it got worse. all were
better than with no cache though.

benchmark test method:

export ELM_ENGINE=gl
export ELM_TEST_AUTOBOUNCE=1

while [ 1 ]; do sync; sync; sync; time elementary_test -to genlist;
sleep 1; done

take the 2nd to the 8th results (7 runs) and total up system and user
time. copmpare this to the same without the cache. with the cache cpu
time used is 90.3% of the cpu time used without - thus a win. at least
in my tests.

@fix
This commit is contained in:
Carsten Haitzler 2015-10-21 20:06:23 +09:00
parent b7d2f4f814
commit 7cc41473a3
2 changed files with 90 additions and 33 deletions

View File

@ -437,27 +437,40 @@ EAPI Eina_Bool eo_shutdown(void);
typedef struct _Eo_Op_Call_Data
{
Eo *obj;
Eo_Class *klass; // remove this not necessary in Eo_Hook_Call
void *func;
void *data;
} Eo_Op_Call_Data;
typedef void (*Eo_Hook_Call)(const Eo_Class *klass_id, const Eo *obj, const char *eo_func_name, void *func, ...);
#define EO_CALL_CACHE_SIZE 1
EAPI extern Eo_Hook_Call eo_hook_call_pre;
EAPI extern Eo_Hook_Call eo_hook_call_post;
typedef struct _Eo_Call_Cache_Index
{
const void *klass;
} Eo_Call_Cache_Index;
typedef struct _Eo_Call_Cache_Entry
{
const void *func;
} Eo_Call_Cache_Entry;
typedef struct _Eo_Call_Cache_Off
{
int off;
} Eo_Call_Cache_Off;
typedef struct _Eo_Call_Cache
{
Eo_Call_Cache_Index index[EO_CALL_CACHE_SIZE];
Eo_Call_Cache_Entry entry[EO_CALL_CACHE_SIZE];
Eo_Call_Cache_Off off [EO_CALL_CACHE_SIZE];
#if EO_CALL_CACHE_SIZE > 1
int next_slot;
#endif
} Eo_Call_Cache;
// to pass the internal function call to EO_FUNC_BODY (as Func parameter)
#define EO_FUNC_CALL(...) __VA_ARGS__
#define EO_HOOK_CALL_PREPARE(Hook, FuncName) \
if (Hook) \
Hook(___call.klass, ___call.obj, FuncName, ___call.func);
#define EO_HOOK_CALL_PREPAREV(Hook, FuncName, ...) \
if (Hook) \
Hook(___call.klass, ___call.obj, FuncName, ___call.func, __VA_ARGS__);
#ifndef _WIN32
# define EO_FUNC_COMMON_OP_FUNC(Name) ((const void *) Name)
#else
@ -466,11 +479,13 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
// cache OP id, get real fct and object data then do the call
#define EO_FUNC_COMMON_OP(Name, DefRet) \
Eo_Op_Call_Data ___call; \
static Eo_Call_Cache ___callcache = { 0 }; \
static Eo_Op ___op = EO_NOOP; \
Eo_Op_Call_Data ___call; \
if (___op == EO_NOOP) \
___op = _eo_api_op_id_get(EO_FUNC_COMMON_OP_FUNC(Name)); \
if (!_eo_call_resolve(#Name, ___op, &___call, __FILE__, __LINE__)) return DefRet; \
___op = _eo_api_op_id_get(EO_FUNC_COMMON_OP_FUNC(Name)); \
if (!_eo_call_resolve(#Name, ___op, &___call, &___callcache, \
__FILE__, __LINE__)) return DefRet; \
_Eo_##Name##_func _func_ = (_Eo_##Name##_func) ___call.func; \
// to define an EAPI function
@ -481,9 +496,7 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
typedef Ret (*_Eo_##Name##_func)(Eo *, void *obj_data); \
Ret _r; \
EO_FUNC_COMMON_OP(Name, DefRet); \
EO_HOOK_CALL_PREPARE(eo_hook_call_pre, #Name); \
_r = _func_(___call.obj, ___call.data); \
EO_HOOK_CALL_PREPARE(eo_hook_call_post, #Name); \
return _r; \
}
@ -493,9 +506,7 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
{ \
typedef void (*_Eo_##Name##_func)(Eo *, void *obj_data); \
EO_FUNC_COMMON_OP(Name, ); \
EO_HOOK_CALL_PREPARE(eo_hook_call_pre, #Name); \
_func_(___call.obj, ___call.data); \
EO_HOOK_CALL_PREPARE(eo_hook_call_post, #Name); \
}
#define EO_FUNC_BODYV(Name, Ret, DefRet, Arguments, ...) \
@ -505,9 +516,7 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
typedef Ret (*_Eo_##Name##_func)(Eo *, void *obj_data, __VA_ARGS__); \
Ret _r; \
EO_FUNC_COMMON_OP(Name, DefRet); \
EO_HOOK_CALL_PREPAREV(eo_hook_call_pre, #Name, Arguments); \
_r = _func_(___call.obj, ___call.data, Arguments); \
EO_HOOK_CALL_PREPAREV(eo_hook_call_post, #Name, Arguments); \
return _r; \
}
@ -517,9 +526,7 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
{ \
typedef void (*_Eo_##Name##_func)(Eo *, void *obj_data, __VA_ARGS__); \
EO_FUNC_COMMON_OP(Name, ); \
EO_HOOK_CALL_PREPAREV(eo_hook_call_pre, #Name, Arguments); \
_func_(___call.obj, ___call.data, Arguments); \
EO_HOOK_CALL_PREPAREV(eo_hook_call_post, #Name, Arguments); \
}
#ifndef _WIN32
@ -537,7 +544,7 @@ EAPI extern Eo_Hook_Call eo_hook_call_post;
EAPI Eo_Op _eo_api_op_id_get(const void *api_func);
// gets the real function pointer and the object data
EAPI Eina_Bool _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, const char *file, int line);
EAPI Eina_Bool _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, Eo_Call_Cache *callcache, const char *file, int line);
// start of eo_do barrier, gets the object pointer and ref it, put it on the stask
EAPI Eina_Bool _eo_do_start(const Eo *obj, const Eo_Class *cur_klass, Eina_Bool is_super, void *eo_stack);

View File

@ -243,9 +243,6 @@ _eo_kls_itr_next(const _Eo_Class *orig_kls, const _Eo_Class *cur_klass, Eo_Op op
/************************************ EO ************************************/
EAPI Eo_Hook_Call eo_hook_call_pre = NULL;
EAPI Eo_Hook_Call eo_hook_call_post = NULL;
#define EO_INVALID_DATA (void *) -1
// 1024 entries == 16k or 32k (32 or 64bit) for eo call stack. that's 1023
// imbricated/recursive calls it can handle before barfing. i'd say that's ok
@ -524,11 +521,13 @@ _eo_do_end(void *eo_stack)
_eo_call_stack_resize(stack, EINA_FALSE);
}
#define EO_CALL_RESOLVE_CACHE 1
EAPI Eina_Bool
_eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, const char *file, int line)
_eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, Eo_Call_Cache *callcache, const char *file, int line)
{
Eo_Stack_Frame *fptr;
const _Eo_Class *klass;
const _Eo_Class *klass, *inputklass;
const op_type_funcs *func;
Eina_Bool is_obj;
@ -539,7 +538,7 @@ _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, c
is_obj = !_eo_is_a_class(fptr->eo_id);
klass = (is_obj) ? fptr->o.obj->klass : fptr->o.kls;
inputklass = klass = (is_obj) ? fptr->o.obj->klass : fptr->o.kls;
if (op == EO_NOOP)
{
@ -550,6 +549,43 @@ _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, c
return EINA_FALSE;
}
#ifdef EO_CALL_RESOLVE_CACHE
if (!fptr->cur_klass)
{
# if EO_CALL_CACHE_SIZE > 1
int i;
for (i = 0; i < EO_CALL_CACHE_SIZE; i++)
# else
const int i = 0;
# endif
{
if ((const void *)inputklass == callcache->index[i].klass)
{
func = (const op_type_funcs *)callcache->entry[i].func;
call->func = func->func;
if (is_obj)
{
call->obj = (Eo *)fptr->eo_id;
if (func->src == fptr->o.obj->klass)
{
if (fptr->obj_data == EO_INVALID_DATA)
fptr->obj_data = (char *)fptr->o.obj + callcache->off[i].off;
call->data = fptr->obj_data;
}
else
call->data = (char *)fptr->o.obj + callcache->off[i].off;
}
else
{
call->data = NULL;
}
return EINA_TRUE;
}
}
}
#endif
/* If we have a current class, we need to itr to the next. */
if (fptr->cur_klass)
{
@ -571,7 +607,6 @@ _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, c
if (EINA_LIKELY(func->func && func->src))
{
call->func = func->func;
call->klass = _eo_class_id_get(klass);
if (is_obj)
{
@ -588,10 +623,26 @@ _eo_call_resolve(const char *func_name, const Eo_Op op, Eo_Op_Call_Data *call, c
}
else
{
call->obj = call->klass;
call->data = NULL;
}
#ifdef EO_CALL_RESOLVE_CACHE
if (!fptr->cur_klass)
{
# if EO_CALL_CACHE_SIZE > 1
const int slot = callcache->next_slot;
# else
const int slot = 0;
# endif
callcache->index[slot].klass = (const void *)inputklass;
callcache->entry[slot].func = (const void *)func;
callcache->off[slot].off = (int)((long)((char *)call->data - (char *)fptr->o.obj));
# if EO_CALL_CACHE_SIZE > 1
callcache->next_slot = (slot + 1) % EO_CALL_CACHE_SIZE;
# endif
}
#endif
return EINA_TRUE;
}
@ -623,7 +674,6 @@ end:
if (EINA_LIKELY(func->func && func->src))
{
call->obj = _eo_id_get(emb_obj);
call->klass = _eo_class_id_get(emb_obj->klass);
call->func = func->func;
call->data = _eo_data_scope_get(emb_obj, func->src);