aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Guyomarc'h <jean@guyomarch.bzh>2018-01-13 22:23:46 +0100
committerJean Guyomarc'h <jean@guyomarch.bzh>2018-01-13 22:23:46 +0100
commit5bf56ee88a959d50166f5cd6ebc9bd95a1c712d2 (patch)
treee4a301a9279896e10e76a8177d533a9fe0530c71
parentwip/eo: rewrite access tables (diff)
downloadefl-devs/jayji/perfs.tar.gz
eo: use AVX2 to resolve all the bitwise in one godevs/jayji/perfs
FIXME: detection of avx2 is crap. Implem is not portable at all.
-rw-r--r--configure.ac18
-rw-r--r--src/Makefile_Eo.am1
-rw-r--r--src/lib/eo/eo_ptr_indirection.c44
3 files changed, 49 insertions, 14 deletions
diff --git a/configure.ac b/configure.ac
index c399877c59..c295879338 100644
--- a/configure.ac
+++ b/configure.ac
@@ -550,6 +550,7 @@ EFLALL_CFLAGS="${EFLALL_CFLAGS}"
## CPU architecture specific assembly
build_cpu_mmx="no"
+build_cpu_avx2="no"
build_cpu_sse3="no"
build_cpu_altivec="no"
build_cpu_neon="no"
@@ -566,6 +567,7 @@ AC_ARG_ENABLE([neon],
])
SSE3_CFLAGS=""
+AVX2_CFLAGS=""
ALTIVEC_CFLAGS=""
NEON_CFLAGS=""
@@ -581,10 +583,23 @@ case $host_cpu in
[build_cpu_sse3="no"])
AC_MSG_CHECKING([whether to build SSE3 code])
AC_MSG_RESULT([${build_cpu_sse3}])
-
if test "x$build_cpu_sse3" = "xyes" ; then
SSE3_CFLAGS="-msse3"
fi
+
+
+ AC_CHECK_HEADER([avxintrin.h],
+ [
+ AC_DEFINE(BUILD_AVX2, 1, [Build AVX2 Code])
+ build_cpu_avx2="yes"
+ ],
+ [build_cpu_avx2="no"])
+ AC_MSG_CHECKING([whether to build AVX2 code])
+ AC_MSG_RESULT([${build_cpu_avx2}])
+
+# if test "x$build_cpu_avx2" = "xyes" ; then
+ AVX2_CFLAGS="-mavx2"
+ # fi
;;
*power* | *ppc*)
build_cpu_altivec="yes"
@@ -665,6 +680,7 @@ esac
AC_SUBST([ALTIVEC_CFLAGS])
AC_SUBST([SSE3_CFLAGS])
+AC_SUBST([AVX2_CFLAGS])
AC_SUBST([NEON_CFLAGS])
#### Checks for linker characteristics
diff --git a/src/Makefile_Eo.am b/src/Makefile_Eo.am
index 438fbf222a..db55420cd7 100644
--- a/src/Makefile_Eo.am
+++ b/src/Makefile_Eo.am
@@ -37,6 +37,7 @@ lib/eo/efl_future.c
lib_eo_libeo_la_CPPFLAGS = \
-I$(top_builddir)/src/lib/efl \
@EO_CFLAGS@ \
+@AVX2_CFLAGS@ \
@VALGRIND_CFLAGS@
lib_eo_libeo_la_LIBADD = @EO_LIBS@
lib_eo_libeo_la_DEPENDENCIES = @EO_INTERNAL_LIBS@
diff --git a/src/lib/eo/eo_ptr_indirection.c b/src/lib/eo/eo_ptr_indirection.c
index 4b3adbf1a5..1d4766ca66 100644
--- a/src/lib/eo/eo_ptr_indirection.c
+++ b/src/lib/eo/eo_ptr_indirection.c
@@ -2,6 +2,7 @@
# include <config.h>
#endif
+#include <immintrin.h>
#include "eo_ptr_indirection.h"
extern Eina_Thread _efl_object_main_thread;
@@ -95,7 +96,24 @@ _eo_obj_pointer_get(const Eo_Id obj_id, const char *restrict func_name, const ch
&&do_domain_thread,
&&do_domain_other,
};
- const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN;
+
+ /* DOMAIN, GENERATION, OBJ_TAG, ENTRY */
+ const __m256i src = _mm256_set_epi64x(obj_id, obj_id, obj_id, obj_id);
+ const __m256i shift = _mm256_set_epi64x(SHIFT_DOMAIN, 0, 0, SHIFT_ENTRY_ID);
+ const __m256i masks = _mm256_set_epi64x(MASK_DOMAIN, MASK_GENERATIONS, MASK_OBJ_TAG, MASK_ENTRY_ID);
+
+ const __m256i shifted = _mm256_srav_epi32(src, shift);
+ const __m256i result = _mm256_and_si256(shifted, masks);
+
+ const int64_t domain = _mm256_extract_epi64(result, 3);
+ const int64_t generation = _mm256_extract_epi64(result, 2);
+ const int64_t tag_bit = _mm256_extract_epi64(result, 1);
+ const int64_t entry_id = _mm256_extract_epi64(result, 0);
+
+// printf("===> dom=%lli gen=0x%llx tag=0x%llx entry=%lli\n", domain, generation, tag_bit, entry_id);
+
+
+// const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN;
goto *jump[domain];
do_domain_main: EINA_HOT
@@ -105,11 +123,11 @@ do_domain_main: EINA_HOT
return _eo_main_id_table.cache.object;
/* XXX This could definitely be done in one go with vectorization */
- const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
- const unsigned int generation = obj_id & MASK_GENERATIONS;
+ // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
+ // const unsigned int generation = obj_id & MASK_GENERATIONS;
- // get tag bit to check later down below - pipelining
- const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
+ // // get tag bit to check later down below - pipelining
+ // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
if (EINA_UNLIKELY(!tag_bit ||
(entry_id >= _eo_main_id_table.count)))
goto main_err;
@@ -146,11 +164,11 @@ do_domain_other: EINA_COLD
if (obj_id == table->cache.id)
return table->cache.object;
- const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
- const unsigned int generation = obj_id & MASK_GENERATIONS;
+ // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
+ // const unsigned int generation = obj_id & MASK_GENERATIONS;
- // get tag bit to check later down below - pipelining
- const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
+ // // get tag bit to check later down below - pipelining
+ // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
if (EINA_UNLIKELY(!tag_bit ||
(entry_id >= table->count))) goto err;
@@ -183,11 +201,11 @@ do_domain_shared: EINA_COLD
// by EO_OBJ_DONE() to release
return table->cache.object;
- const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
- const unsigned int generation = obj_id & MASK_GENERATIONS;
+ // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
+ // const unsigned int generation = obj_id & MASK_GENERATIONS;
- // get tag bit to check later down below - pipelining
- const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
+ // // get tag bit to check later down below - pipelining
+ // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
if (EINA_UNLIKELY((!tag_bit ||
entry_id >= table->count))) goto err_shared;