summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Guyomarc'h <jean@guyomarch.bzh>2018-01-13 22:23:46 +0100
committerJean Guyomarc'h <jean@guyomarch.bzh>2018-01-13 22:23:46 +0100
commit5bf56ee88a959d50166f5cd6ebc9bd95a1c712d2 (patch)
treee4a301a9279896e10e76a8177d533a9fe0530c71
parent96dc5e1ef34869e792b5f83cd38bfb92332e5a07 (diff)
eo: use AVX2 to resolve all the bitwise in one godevs/jayji/perfs
FIXME: detection of avx2 is crap. Implem is not portable at all.
-rw-r--r--configure.ac18
-rw-r--r--src/Makefile_Eo.am1
-rw-r--r--src/lib/eo/eo_ptr_indirection.c44
3 files changed, 49 insertions, 14 deletions
diff --git a/configure.ac b/configure.ac
index c399877c59..c295879338 100644
--- a/configure.ac
+++ b/configure.ac
@@ -550,6 +550,7 @@ EFLALL_CFLAGS="${EFLALL_CFLAGS}"
550## CPU architecture specific assembly 550## CPU architecture specific assembly
551 551
552build_cpu_mmx="no" 552build_cpu_mmx="no"
553build_cpu_avx2="no"
553build_cpu_sse3="no" 554build_cpu_sse3="no"
554build_cpu_altivec="no" 555build_cpu_altivec="no"
555build_cpu_neon="no" 556build_cpu_neon="no"
@@ -566,6 +567,7 @@ AC_ARG_ENABLE([neon],
566 ]) 567 ])
567 568
568SSE3_CFLAGS="" 569SSE3_CFLAGS=""
570AVX2_CFLAGS=""
569ALTIVEC_CFLAGS="" 571ALTIVEC_CFLAGS=""
570NEON_CFLAGS="" 572NEON_CFLAGS=""
571 573
@@ -581,10 +583,23 @@ case $host_cpu in
581 [build_cpu_sse3="no"]) 583 [build_cpu_sse3="no"])
582 AC_MSG_CHECKING([whether to build SSE3 code]) 584 AC_MSG_CHECKING([whether to build SSE3 code])
583 AC_MSG_RESULT([${build_cpu_sse3}]) 585 AC_MSG_RESULT([${build_cpu_sse3}])
584
585 if test "x$build_cpu_sse3" = "xyes" ; then 586 if test "x$build_cpu_sse3" = "xyes" ; then
586 SSE3_CFLAGS="-msse3" 587 SSE3_CFLAGS="-msse3"
587 fi 588 fi
589
590
591 AC_CHECK_HEADER([avxintrin.h],
592 [
593 AC_DEFINE(BUILD_AVX2, 1, [Build AVX2 Code])
594 build_cpu_avx2="yes"
595 ],
596 [build_cpu_avx2="no"])
597 AC_MSG_CHECKING([whether to build AVX2 code])
598 AC_MSG_RESULT([${build_cpu_avx2}])
599
600# if test "x$build_cpu_avx2" = "xyes" ; then
601 AVX2_CFLAGS="-mavx2"
602 # fi
588 ;; 603 ;;
589 *power* | *ppc*) 604 *power* | *ppc*)
590 build_cpu_altivec="yes" 605 build_cpu_altivec="yes"
@@ -665,6 +680,7 @@ esac
665 680
666AC_SUBST([ALTIVEC_CFLAGS]) 681AC_SUBST([ALTIVEC_CFLAGS])
667AC_SUBST([SSE3_CFLAGS]) 682AC_SUBST([SSE3_CFLAGS])
683AC_SUBST([AVX2_CFLAGS])
668AC_SUBST([NEON_CFLAGS]) 684AC_SUBST([NEON_CFLAGS])
669 685
670#### Checks for linker characteristics 686#### Checks for linker characteristics
diff --git a/src/Makefile_Eo.am b/src/Makefile_Eo.am
index 438fbf222a..db55420cd7 100644
--- a/src/Makefile_Eo.am
+++ b/src/Makefile_Eo.am
@@ -37,6 +37,7 @@ lib/eo/efl_future.c
37lib_eo_libeo_la_CPPFLAGS = \ 37lib_eo_libeo_la_CPPFLAGS = \
38-I$(top_builddir)/src/lib/efl \ 38-I$(top_builddir)/src/lib/efl \
39@EO_CFLAGS@ \ 39@EO_CFLAGS@ \
40@AVX2_CFLAGS@ \
40@VALGRIND_CFLAGS@ 41@VALGRIND_CFLAGS@
41lib_eo_libeo_la_LIBADD = @EO_LIBS@ 42lib_eo_libeo_la_LIBADD = @EO_LIBS@
42lib_eo_libeo_la_DEPENDENCIES = @EO_INTERNAL_LIBS@ 43lib_eo_libeo_la_DEPENDENCIES = @EO_INTERNAL_LIBS@
diff --git a/src/lib/eo/eo_ptr_indirection.c b/src/lib/eo/eo_ptr_indirection.c
index 4b3adbf1a5..1d4766ca66 100644
--- a/src/lib/eo/eo_ptr_indirection.c
+++ b/src/lib/eo/eo_ptr_indirection.c
@@ -2,6 +2,7 @@
2# include <config.h> 2# include <config.h>
3#endif 3#endif
4 4
5#include <immintrin.h>
5#include "eo_ptr_indirection.h" 6#include "eo_ptr_indirection.h"
6 7
7extern Eina_Thread _efl_object_main_thread; 8extern Eina_Thread _efl_object_main_thread;
@@ -95,7 +96,24 @@ _eo_obj_pointer_get(const Eo_Id obj_id, const char *restrict func_name, const ch
95 &&do_domain_thread, 96 &&do_domain_thread,
96 &&do_domain_other, 97 &&do_domain_other,
97 }; 98 };
98 const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN; 99
100 /* DOMAIN, GENERATION, OBJ_TAG, ENTRY */
101 const __m256i src = _mm256_set_epi64x(obj_id, obj_id, obj_id, obj_id);
102 const __m256i shift = _mm256_set_epi64x(SHIFT_DOMAIN, 0, 0, SHIFT_ENTRY_ID);
103 const __m256i masks = _mm256_set_epi64x(MASK_DOMAIN, MASK_GENERATIONS, MASK_OBJ_TAG, MASK_ENTRY_ID);
104
105 const __m256i shifted = _mm256_srav_epi32(src, shift);
106 const __m256i result = _mm256_and_si256(shifted, masks);
107
108 const int64_t domain = _mm256_extract_epi64(result, 3);
109 const int64_t generation = _mm256_extract_epi64(result, 2);
110 const int64_t tag_bit = _mm256_extract_epi64(result, 1);
111 const int64_t entry_id = _mm256_extract_epi64(result, 0);
112
113// printf("===> dom=%lli gen=0x%llx tag=0x%llx entry=%lli\n", domain, generation, tag_bit, entry_id);
114
115
116// const unsigned int domain = (obj_id >> SHIFT_DOMAIN) & MASK_DOMAIN;
99 goto *jump[domain]; 117 goto *jump[domain];
100 118
101do_domain_main: EINA_HOT 119do_domain_main: EINA_HOT
@@ -105,11 +123,11 @@ do_domain_main: EINA_HOT
105 return _eo_main_id_table.cache.object; 123 return _eo_main_id_table.cache.object;
106 124
107 /* XXX This could definitely be done in one go with vectorization */ 125 /* XXX This could definitely be done in one go with vectorization */
108 const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; 126 // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
109 const unsigned int generation = obj_id & MASK_GENERATIONS; 127 // const unsigned int generation = obj_id & MASK_GENERATIONS;
110 128
111 // get tag bit to check later down below - pipelining 129 // // get tag bit to check later down below - pipelining
112 const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; 130 // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
113 if (EINA_UNLIKELY(!tag_bit || 131 if (EINA_UNLIKELY(!tag_bit ||
114 (entry_id >= _eo_main_id_table.count))) 132 (entry_id >= _eo_main_id_table.count)))
115 goto main_err; 133 goto main_err;
@@ -146,11 +164,11 @@ do_domain_other: EINA_COLD
146 if (obj_id == table->cache.id) 164 if (obj_id == table->cache.id)
147 return table->cache.object; 165 return table->cache.object;
148 166
149 const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; 167 // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
150 const unsigned int generation = obj_id & MASK_GENERATIONS; 168 // const unsigned int generation = obj_id & MASK_GENERATIONS;
151 169
152 // get tag bit to check later down below - pipelining 170 // // get tag bit to check later down below - pipelining
153 const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; 171 // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
154 if (EINA_UNLIKELY(!tag_bit || 172 if (EINA_UNLIKELY(!tag_bit ||
155 (entry_id >= table->count))) goto err; 173 (entry_id >= table->count))) goto err;
156 174
@@ -183,11 +201,11 @@ do_domain_shared: EINA_COLD
183 // by EO_OBJ_DONE() to release 201 // by EO_OBJ_DONE() to release
184 return table->cache.object; 202 return table->cache.object;
185 203
186 const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID; 204 // const size_t entry_id = (obj_id >> SHIFT_ENTRY_ID) & MASK_ENTRY_ID;
187 const unsigned int generation = obj_id & MASK_GENERATIONS; 205 // const unsigned int generation = obj_id & MASK_GENERATIONS;
188 206
189 // get tag bit to check later down below - pipelining 207 // // get tag bit to check later down below - pipelining
190 const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG; 208 // const Eo_Id tag_bit = (obj_id) & MASK_OBJ_TAG;
191 if (EINA_UNLIKELY((!tag_bit || 209 if (EINA_UNLIKELY((!tag_bit ||
192 entry_id >= table->count))) goto err_shared; 210 entry_id >= table->count))) goto err_shared;
193 211