summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--configure.ac18
-rw-r--r--src/lib/evas/common/evas_blit_main.c8
-rw-r--r--src/lib/evas/common/evas_cpu.c9
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_color_neon.c10
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c47
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c14
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c33
-rw-r--r--src/lib/evas/common/evas_op_copy/op_copy_color_neon.c9
8 files changed, 145 insertions, 3 deletions
diff --git a/configure.ac b/configure.ac
index 9eed98ce9b..63cc54ddce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -576,6 +576,21 @@ case $host_cpu in
576 CFLAGS="${CFLAGS_save}" 576 CFLAGS="${CFLAGS_save}"
577 fi 577 fi
578 ;; 578 ;;
579 aarch64*)
580 if test "x${want_neon}" = "xyes"; then
581 build_cpu_neon="yes"
582 AC_MSG_CHECKING([whether to use NEON instructions])
583 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <arm_neon.h>]], [[volatile uint32x4_t test = vdupq_n_u32(0x1);]])],[
584 AC_MSG_RESULT([yes])
585 AC_DEFINE([BUILD_NEON], [1], [Build NEON Code])
586 AC_DEFINE([BUILD_NEON_INTRINSICS], [1], [Build NEON Intrinsics])
587 build_cpu_neon="yes"
588 ],[
589 AC_MSG_RESULT([no])
590 build_cpu_neon="no"
591 ])
592 fi
593 ;;
579esac 594esac
580 595
581AC_SUBST([ALTIVEC_CFLAGS]) 596AC_SUBST([ALTIVEC_CFLAGS])
@@ -4741,6 +4756,9 @@ case $host_cpu in
4741 arm*) 4756 arm*)
4742 EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}]) 4757 EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}])
4743 ;; 4758 ;;
4759 aarch64*)
4760 EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}])
4761 ;;
4744esac 4762esac
4745 4763
4746if test "${have_linux}" = "yes"; then 4764if test "${have_linux}" = "yes"; then
diff --git a/src/lib/evas/common/evas_blit_main.c b/src/lib/evas/common/evas_blit_main.c
index 7f8faa18a4..4da4034742 100644
--- a/src/lib/evas/common/evas_blit_main.c
+++ b/src/lib/evas/common/evas_blit_main.c
@@ -132,6 +132,9 @@ evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len)
132static void 132static void
133evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) 133evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
134{ 134{
135#ifdef BUILD_NEON_INTRINSICS
136evas_common_copy_pixels_rev_c(src, dst, len);
137#else
135 uint32_t *tmp = (void *)37; 138 uint32_t *tmp = (void *)37;
136#define AP "evas_common_copy_rev_pixels_neon_" 139#define AP "evas_common_copy_rev_pixels_neon_"
137 asm volatile ( 140 asm volatile (
@@ -228,6 +231,7 @@ evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
228 ); 231 );
229#undef AP 232#undef AP
230 233
234#endif
231} 235}
232#endif 236#endif
233 237
@@ -324,6 +328,9 @@ evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len)
324#ifdef BUILD_NEON 328#ifdef BUILD_NEON
325static void 329static void
326evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ 330evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
331#ifdef BUILD_NEON_INTRINSICS
332evas_common_copy_pixels_c(src, dst, len);
333#else
327 uint32_t *e,*tmp = (void *)37; 334 uint32_t *e,*tmp = (void *)37;
328 e = dst + len; 335 e = dst + len;
329#define AP "evas_common_copy_pixels_neon_" 336#define AP "evas_common_copy_pixels_neon_"
@@ -410,6 +417,7 @@ evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
410 ); 417 );
411#undef AP 418#undef AP
412 419
420#endif
413} 421}
414#endif /* BUILD_NEON */ 422#endif /* BUILD_NEON */
415 423
diff --git a/src/lib/evas/common/evas_cpu.c b/src/lib/evas/common/evas_cpu.c
index 41390989d8..0f83258806 100644
--- a/src/lib/evas/common/evas_cpu.c
+++ b/src/lib/evas/common/evas_cpu.c
@@ -2,6 +2,11 @@
2#ifdef BUILD_MMX 2#ifdef BUILD_MMX
3#include "evas_mmx.h" 3#include "evas_mmx.h"
4#endif 4#endif
5#ifdef BUILD_NEON
6#ifdef BUILD_NEON_INTRINSICS
7#include <arm_neon.h>
8#endif
9#endif
5#if defined BUILD_SSE3 10#if defined BUILD_SSE3
6#include <immintrin.h> 11#include <immintrin.h>
7#endif 12#endif
@@ -92,6 +97,9 @@ evas_common_cpu_neon_test(void)
92{ 97{
93//#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) 98//#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70)
94#ifdef BUILD_NEON 99#ifdef BUILD_NEON
100#ifdef BUILD_NEON_INTRINSICS
101 volatile uint32x4_t temp = vdupq_n_u32(0x1);
102#else
95 asm volatile ( 103 asm volatile (
96 ".fpu neon \n\t" 104 ".fpu neon \n\t"
97 "vqadd.u8 d0, d1, d0\n" 105 "vqadd.u8 d0, d1, d0\n"
@@ -101,6 +109,7 @@ evas_common_cpu_neon_test(void)
101 "d0", "d1" 109 "d0", "d1"
102 ); 110 );
103#endif 111#endif
112#endif
104//#endif 113//#endif
105} 114}
106 115
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
index 9e94298cc6..2bf14c1f7c 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
@@ -3,6 +3,14 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { 5_op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e, a = 256 - (c >> 24);
8 UNROLL8_PLD_WHILE(d, l, e,
9 {
10 *d = c + MUL_256(a, *d);
11 d++;
12 });
13#else
6 DATA32 *e, *tmp = 0; 14 DATA32 *e, *tmp = 0;
7#define AP "B_C_DP" 15#define AP "B_C_DP"
8 asm volatile ( 16 asm volatile (
@@ -142,7 +150,7 @@ _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA3
142 150
143 ); 151 );
144#undef AP 152#undef AP
145 153#endif
146} 154}
147 155
148#define _op_blend_caa_dp_neon _op_blend_c_dp_neon 156#define _op_blend_caa_dp_neon _op_blend_c_dp_neon
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
index 99f4b38625..dbeb0638b3 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
@@ -19,6 +19,30 @@
19#ifdef BUILD_NEON 19#ifdef BUILD_NEON
20static void 20static void
21_op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { 21_op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) {
22#ifdef BUILD_NEON_INTRINSICS
23 DATA32 *e;
24 int alpha = 256 - (c >> 24);
25 UNROLL8_PLD_WHILE(d, l, e,
26 {
27 DATA32 a = *m;
28 switch(a)
29 {
30 case 0:
31 break;
32 case 255:
33 *d = c + MUL_256(alpha, *d);
34 break;
35 default:
36 {
37 DATA32 mc = MUL_SYM(a, c);
38 a = 256 - (mc >> 24);
39 *d = mc + MUL_256(a, *d);
40 }
41 break;
42 }
43 m++; d++;
44 });
45#else
22 DATA32 *e = d + l; 46 DATA32 *e = d + l;
23 47
24 // everything we can do only once per cycle 48 // everything we can do only once per cycle
@@ -142,12 +166,34 @@ _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, in
142 "q10", "q15", "q14", "memory" 166 "q10", "q15", "q14", "memory"
143 ); 167 );
144 } 168 }
169#endif
145} 170}
146#endif 171#endif
147 172
148#ifdef BUILD_NEON 173#ifdef BUILD_NEON
149static void 174static void
150_op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { 175_op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) {
176#ifdef BUILD_NEON_INTRINSICS
177 DATA32 *e;
178 int alpha;
179 UNROLL8_PLD_WHILE(d, l, e,
180 {
181 alpha = *m;
182 switch(alpha)
183 {
184 case 0:
185 break;
186 case 255:
187 *d = c;
188 break;
189 default:
190 alpha++;
191 *d = INTERP_256(alpha, c, *d);
192 break;
193 }
194 m++; d++;
195 });
196#else
151 DATA32 *e,*tmp; 197 DATA32 *e,*tmp;
152 int alpha; 198 int alpha;
153 199
@@ -372,6 +418,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d,
372 418
373 ); 419 );
374#undef AP 420#undef AP
421#endif
375} 422}
376#endif 423#endif
377 424
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
index d6b3a733dd..c47ec7c47f 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
@@ -7,7 +7,18 @@
7 * reads, then two writes, a miss on read is 'just' two reads */ 7 * reads, then two writes, a miss on read is 'just' two reads */
8static void 8static void
9_op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { 9_op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) {
10 10#ifdef BUILD_NEON_INTRINSICS
11 DATA32 *e;
12 int alpha;
13 UNROLL8_PLD_WHILE(d, l, e,
14 {
15 DATA32 sc = MUL4_SYM(c, *s);
16 alpha = 256 - (sc >> 24);
17 *d = sc + MUL_256(alpha, *d);
18 d++;
19 s++;
20 });
21#else
11#define AP "blend_p_c_dp_" 22#define AP "blend_p_c_dp_"
12 asm volatile ( 23 asm volatile (
13 ".fpu neon\n\t" 24 ".fpu neon\n\t"
@@ -92,6 +103,7 @@ _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DAT
92 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" 103 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory"
93 ); 104 );
94#undef AP 105#undef AP
106#endif
95} 107}
96 108
97static void 109static void
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
index 4b9993b42b..3c32790c81 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
@@ -3,6 +3,16 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 5_op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e;
8 int alpha;
9 UNROLL8_PLD_WHILE(d, l, e,
10 {
11 alpha = 256 - (*s >> 24);
12 *d = *s++ + MUL_256(alpha, *d);
13 d++;
14 });
15#else
6#define AP "blend_p_dp_" 16#define AP "blend_p_dp_"
7 asm volatile ( 17 asm volatile (
8 ".fpu neon \n\t" 18 ".fpu neon \n\t"
@@ -238,11 +248,31 @@ _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
238 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered 248 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered
239 ); 249 );
240#undef AP 250#undef AP
241 251#endif
242} 252}
243 253
244static void 254static void
245_op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 255_op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
256#ifdef BUILD_NEON_INTRINSICS
257 DATA32 *e;
258 int alpha;
259 UNROLL8_PLD_WHILE(d, l, e,
260 {
261 switch (*s & 0xff000000)
262 {
263 case 0:
264 break;
265 case 0xff000000:
266 *d = *s;
267 break;
268 default:
269 alpha = 256 - (*s >> 24);
270 *d = *s + MUL_256(alpha, *d);
271 break;
272 }
273 s++; d++;
274 });
275#else
246#define AP "blend_pas_dp_" 276#define AP "blend_pas_dp_"
247 DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; 277 DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912;
248 asm volatile ( 278 asm volatile (
@@ -447,6 +477,7 @@ _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
447 "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" 477 "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory"
448 ); 478 );
449#undef AP 479#undef AP
480#endif
450} 481}
451 482
452#define _op_blend_pan_dp_neon NULL 483#define _op_blend_pan_dp_neon NULL
diff --git a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
index 96310cdf3a..009bd750ea 100644
--- a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
+++ b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
@@ -3,6 +3,14 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 5_op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e;
8 UNROLL8_PLD_WHILE(d, l, e,
9 {
10 *d = c;
11 d++;
12 });
13#else
6#define AP "COPY_C_DP_" 14#define AP "COPY_C_DP_"
7 uint32_t *e = d + l,*tmp; 15 uint32_t *e = d + l,*tmp;
8 asm volatile ( 16 asm volatile (
@@ -85,6 +93,7 @@ _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
85 93
86 94
87 ); 95 );
96#endif
88} 97}
89 98
90#define _op_copy_cn_dp_neon _op_copy_c_dp_neon 99#define _op_copy_cn_dp_neon _op_copy_c_dp_neon