diff options
Diffstat (limited to '')
-rw-r--r-- | configure.ac | 18 | ||||
-rw-r--r-- | src/lib/evas/common/evas_blit_main.c | 8 | ||||
-rw-r--r-- | src/lib/evas/common/evas_cpu.c | 9 | ||||
-rw-r--r-- | src/lib/evas/common/evas_op_blend/op_blend_color_neon.c | 10 | ||||
-rw-r--r-- | src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c | 47 | ||||
-rw-r--r-- | src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c | 14 | ||||
-rw-r--r-- | src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c | 33 | ||||
-rw-r--r-- | src/lib/evas/common/evas_op_copy/op_copy_color_neon.c | 9 |
8 files changed, 145 insertions, 3 deletions
diff --git a/configure.ac b/configure.ac index 9eed98ce9b..63cc54ddce 100644 --- a/configure.ac +++ b/configure.ac | |||
@@ -576,6 +576,21 @@ case $host_cpu in | |||
576 | CFLAGS="${CFLAGS_save}" | 576 | CFLAGS="${CFLAGS_save}" |
577 | fi | 577 | fi |
578 | ;; | 578 | ;; |
579 | aarch64*) | ||
580 | if test "x${want_neon}" = "xyes"; then | ||
581 | build_cpu_neon="yes" | ||
582 | AC_MSG_CHECKING([whether to use NEON instructions]) | ||
583 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <arm_neon.h>]], [[volatile uint32x4_t test = vdupq_n_u32(0x1);]])],[ | ||
584 | AC_MSG_RESULT([yes]) | ||
585 | AC_DEFINE([BUILD_NEON], [1], [Build NEON Code]) | ||
586 | AC_DEFINE([BUILD_NEON_INTRINSICS], [1], [Build NEON Intrinsics]) | ||
587 | build_cpu_neon="yes" | ||
588 | ],[ | ||
589 | AC_MSG_RESULT([no]) | ||
590 | build_cpu_neon="no" | ||
591 | ]) | ||
592 | fi | ||
593 | ;; | ||
579 | esac | 594 | esac |
580 | 595 | ||
581 | AC_SUBST([ALTIVEC_CFLAGS]) | 596 | AC_SUBST([ALTIVEC_CFLAGS]) |
@@ -4741,6 +4756,9 @@ case $host_cpu in | |||
4741 | arm*) | 4756 | arm*) |
4742 | EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}]) | 4757 | EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}]) |
4743 | ;; | 4758 | ;; |
4759 | aarch64*) | ||
4760 | EFL_ADD_FEATURE([cpu], [neon], [${build_cpu_neon}]) | ||
4761 | ;; | ||
4744 | esac | 4762 | esac |
4745 | 4763 | ||
4746 | if test "${have_linux}" = "yes"; then | 4764 | if test "${have_linux}" = "yes"; then |
diff --git a/src/lib/evas/common/evas_blit_main.c b/src/lib/evas/common/evas_blit_main.c index 7f8faa18a4..4da4034742 100644 --- a/src/lib/evas/common/evas_blit_main.c +++ b/src/lib/evas/common/evas_blit_main.c | |||
@@ -132,6 +132,9 @@ evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len) | |||
132 | static void | 132 | static void |
133 | evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) | 133 | evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) |
134 | { | 134 | { |
135 | #ifdef BUILD_NEON_INTRINSICS | ||
136 | evas_common_copy_pixels_rev_c(src, dst, len); | ||
137 | #else | ||
135 | uint32_t *tmp = (void *)37; | 138 | uint32_t *tmp = (void *)37; |
136 | #define AP "evas_common_copy_rev_pixels_neon_" | 139 | #define AP "evas_common_copy_rev_pixels_neon_" |
137 | asm volatile ( | 140 | asm volatile ( |
@@ -228,6 +231,7 @@ evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) | |||
228 | ); | 231 | ); |
229 | #undef AP | 232 | #undef AP |
230 | 233 | ||
234 | #endif | ||
231 | } | 235 | } |
232 | #endif | 236 | #endif |
233 | 237 | ||
@@ -324,6 +328,9 @@ evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len) | |||
324 | #ifdef BUILD_NEON | 328 | #ifdef BUILD_NEON |
325 | static void | 329 | static void |
326 | evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ | 330 | evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ |
331 | #ifdef BUILD_NEON_INTRINSICS | ||
332 | evas_common_copy_pixels_c(src, dst, len); | ||
333 | #else | ||
327 | uint32_t *e,*tmp = (void *)37; | 334 | uint32_t *e,*tmp = (void *)37; |
328 | e = dst + len; | 335 | e = dst + len; |
329 | #define AP "evas_common_copy_pixels_neon_" | 336 | #define AP "evas_common_copy_pixels_neon_" |
@@ -410,6 +417,7 @@ evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ | |||
410 | ); | 417 | ); |
411 | #undef AP | 418 | #undef AP |
412 | 419 | ||
420 | #endif | ||
413 | } | 421 | } |
414 | #endif /* BUILD_NEON */ | 422 | #endif /* BUILD_NEON */ |
415 | 423 | ||
diff --git a/src/lib/evas/common/evas_cpu.c b/src/lib/evas/common/evas_cpu.c index 41390989d8..0f83258806 100644 --- a/src/lib/evas/common/evas_cpu.c +++ b/src/lib/evas/common/evas_cpu.c | |||
@@ -2,6 +2,11 @@ | |||
2 | #ifdef BUILD_MMX | 2 | #ifdef BUILD_MMX |
3 | #include "evas_mmx.h" | 3 | #include "evas_mmx.h" |
4 | #endif | 4 | #endif |
5 | #ifdef BUILD_NEON | ||
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | #include <arm_neon.h> | ||
8 | #endif | ||
9 | #endif | ||
5 | #if defined BUILD_SSE3 | 10 | #if defined BUILD_SSE3 |
6 | #include <immintrin.h> | 11 | #include <immintrin.h> |
7 | #endif | 12 | #endif |
@@ -92,6 +97,9 @@ evas_common_cpu_neon_test(void) | |||
92 | { | 97 | { |
93 | //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) | 98 | //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) |
94 | #ifdef BUILD_NEON | 99 | #ifdef BUILD_NEON |
100 | #ifdef BUILD_NEON_INTRINSICS | ||
101 | volatile uint32x4_t temp = vdupq_n_u32(0x1); | ||
102 | #else | ||
95 | asm volatile ( | 103 | asm volatile ( |
96 | ".fpu neon \n\t" | 104 | ".fpu neon \n\t" |
97 | "vqadd.u8 d0, d1, d0\n" | 105 | "vqadd.u8 d0, d1, d0\n" |
@@ -101,6 +109,7 @@ evas_common_cpu_neon_test(void) | |||
101 | "d0", "d1" | 109 | "d0", "d1" |
102 | ); | 110 | ); |
103 | #endif | 111 | #endif |
112 | #endif | ||
104 | //#endif | 113 | //#endif |
105 | } | 114 | } |
106 | 115 | ||
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c index 9e94298cc6..2bf14c1f7c 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c | |||
@@ -3,6 +3,14 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { | 5 | _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e, a = 256 - (c >> 24); | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | *d = c + MUL_256(a, *d); | ||
11 | d++; | ||
12 | }); | ||
13 | #else | ||
6 | DATA32 *e, *tmp = 0; | 14 | DATA32 *e, *tmp = 0; |
7 | #define AP "B_C_DP" | 15 | #define AP "B_C_DP" |
8 | asm volatile ( | 16 | asm volatile ( |
@@ -142,7 +150,7 @@ _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA3 | |||
142 | 150 | ||
143 | ); | 151 | ); |
144 | #undef AP | 152 | #undef AP |
145 | 153 | #endif | |
146 | } | 154 | } |
147 | 155 | ||
148 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon | 156 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon |
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c index 99f4b38625..dbeb0638b3 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c | |||
@@ -19,6 +19,30 @@ | |||
19 | #ifdef BUILD_NEON | 19 | #ifdef BUILD_NEON |
20 | static void | 20 | static void |
21 | _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 21 | _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
22 | #ifdef BUILD_NEON_INTRINSICS | ||
23 | DATA32 *e; | ||
24 | int alpha = 256 - (c >> 24); | ||
25 | UNROLL8_PLD_WHILE(d, l, e, | ||
26 | { | ||
27 | DATA32 a = *m; | ||
28 | switch(a) | ||
29 | { | ||
30 | case 0: | ||
31 | break; | ||
32 | case 255: | ||
33 | *d = c + MUL_256(alpha, *d); | ||
34 | break; | ||
35 | default: | ||
36 | { | ||
37 | DATA32 mc = MUL_SYM(a, c); | ||
38 | a = 256 - (mc >> 24); | ||
39 | *d = mc + MUL_256(a, *d); | ||
40 | } | ||
41 | break; | ||
42 | } | ||
43 | m++; d++; | ||
44 | }); | ||
45 | #else | ||
22 | DATA32 *e = d + l; | 46 | DATA32 *e = d + l; |
23 | 47 | ||
24 | // everything we can do only once per cycle | 48 | // everything we can do only once per cycle |
@@ -142,12 +166,34 @@ _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, in | |||
142 | "q10", "q15", "q14", "memory" | 166 | "q10", "q15", "q14", "memory" |
143 | ); | 167 | ); |
144 | } | 168 | } |
169 | #endif | ||
145 | } | 170 | } |
146 | #endif | 171 | #endif |
147 | 172 | ||
148 | #ifdef BUILD_NEON | 173 | #ifdef BUILD_NEON |
149 | static void | 174 | static void |
150 | _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 175 | _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
176 | #ifdef BUILD_NEON_INTRINSICS | ||
177 | DATA32 *e; | ||
178 | int alpha; | ||
179 | UNROLL8_PLD_WHILE(d, l, e, | ||
180 | { | ||
181 | alpha = *m; | ||
182 | switch(alpha) | ||
183 | { | ||
184 | case 0: | ||
185 | break; | ||
186 | case 255: | ||
187 | *d = c; | ||
188 | break; | ||
189 | default: | ||
190 | alpha++; | ||
191 | *d = INTERP_256(alpha, c, *d); | ||
192 | break; | ||
193 | } | ||
194 | m++; d++; | ||
195 | }); | ||
196 | #else | ||
151 | DATA32 *e,*tmp; | 197 | DATA32 *e,*tmp; |
152 | int alpha; | 198 | int alpha; |
153 | 199 | ||
@@ -372,6 +418,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, | |||
372 | 418 | ||
373 | ); | 419 | ); |
374 | #undef AP | 420 | #undef AP |
421 | #endif | ||
375 | } | 422 | } |
376 | #endif | 423 | #endif |
377 | 424 | ||
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c index d6b3a733dd..c47ec7c47f 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c | |||
@@ -7,7 +7,18 @@ | |||
7 | * reads, then two writes, a miss on read is 'just' two reads */ | 7 | * reads, then two writes, a miss on read is 'just' two reads */ |
8 | static void | 8 | static void |
9 | _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { | 9 | _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { |
10 | 10 | #ifdef BUILD_NEON_INTRINSICS | |
11 | DATA32 *e; | ||
12 | int alpha; | ||
13 | UNROLL8_PLD_WHILE(d, l, e, | ||
14 | { | ||
15 | DATA32 sc = MUL4_SYM(c, *s); | ||
16 | alpha = 256 - (sc >> 24); | ||
17 | *d = sc + MUL_256(alpha, *d); | ||
18 | d++; | ||
19 | s++; | ||
20 | }); | ||
21 | #else | ||
11 | #define AP "blend_p_c_dp_" | 22 | #define AP "blend_p_c_dp_" |
12 | asm volatile ( | 23 | asm volatile ( |
13 | ".fpu neon\n\t" | 24 | ".fpu neon\n\t" |
@@ -92,6 +103,7 @@ _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DAT | |||
92 | : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" | 103 | : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" |
93 | ); | 104 | ); |
94 | #undef AP | 105 | #undef AP |
106 | #endif | ||
95 | } | 107 | } |
96 | 108 | ||
97 | static void | 109 | static void |
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c index 4b9993b42b..3c32790c81 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c | |||
@@ -3,6 +3,16 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 5 | _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e; | ||
8 | int alpha; | ||
9 | UNROLL8_PLD_WHILE(d, l, e, | ||
10 | { | ||
11 | alpha = 256 - (*s >> 24); | ||
12 | *d = *s++ + MUL_256(alpha, *d); | ||
13 | d++; | ||
14 | }); | ||
15 | #else | ||
6 | #define AP "blend_p_dp_" | 16 | #define AP "blend_p_dp_" |
7 | asm volatile ( | 17 | asm volatile ( |
8 | ".fpu neon \n\t" | 18 | ".fpu neon \n\t" |
@@ -238,11 +248,31 @@ _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
238 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered | 248 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered |
239 | ); | 249 | ); |
240 | #undef AP | 250 | #undef AP |
241 | 251 | #endif | |
242 | } | 252 | } |
243 | 253 | ||
244 | static void | 254 | static void |
245 | _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 255 | _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
256 | #ifdef BUILD_NEON_INTRINSICS | ||
257 | DATA32 *e; | ||
258 | int alpha; | ||
259 | UNROLL8_PLD_WHILE(d, l, e, | ||
260 | { | ||
261 | switch (*s & 0xff000000) | ||
262 | { | ||
263 | case 0: | ||
264 | break; | ||
265 | case 0xff000000: | ||
266 | *d = *s; | ||
267 | break; | ||
268 | default: | ||
269 | alpha = 256 - (*s >> 24); | ||
270 | *d = *s + MUL_256(alpha, *d); | ||
271 | break; | ||
272 | } | ||
273 | s++; d++; | ||
274 | }); | ||
275 | #else | ||
246 | #define AP "blend_pas_dp_" | 276 | #define AP "blend_pas_dp_" |
247 | DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; | 277 | DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; |
248 | asm volatile ( | 278 | asm volatile ( |
@@ -447,6 +477,7 @@ _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
447 | "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" | 477 | "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" |
448 | ); | 478 | ); |
449 | #undef AP | 479 | #undef AP |
480 | #endif | ||
450 | } | 481 | } |
451 | 482 | ||
452 | #define _op_blend_pan_dp_neon NULL | 483 | #define _op_blend_pan_dp_neon NULL |
diff --git a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c index 96310cdf3a..009bd750ea 100644 --- a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c +++ b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c | |||
@@ -3,6 +3,14 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 5 | _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | *d = c; | ||
11 | d++; | ||
12 | }); | ||
13 | #else | ||
6 | #define AP "COPY_C_DP_" | 14 | #define AP "COPY_C_DP_" |
7 | uint32_t *e = d + l,*tmp; | 15 | uint32_t *e = d + l,*tmp; |
8 | asm volatile ( | 16 | asm volatile ( |
@@ -85,6 +93,7 @@ _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
85 | 93 | ||
86 | 94 | ||
87 | ); | 95 | ); |
96 | #endif | ||
88 | } | 97 | } |
89 | 98 | ||
90 | #define _op_copy_cn_dp_neon _op_copy_c_dp_neon | 99 | #define _op_copy_cn_dp_neon _op_copy_c_dp_neon |