diff options
author | Yury Usishchev <y.usishchev@samsung.com> | 2015-04-15 17:21:33 +0200 |
---|---|---|
committer | Cedric BAIL <cedric@osg.samsung.com> | 2015-05-07 09:53:08 +0200 |
commit | 71eec44ccc9ab43e728ba986fadce6c6cfd2ff7c (patch) | |
tree | 9ed9801c7a1534f69cff124c2071e53d8845d35a /src | |
parent | 88b30ef28c47106891d44f62798424c745ec1b8c (diff) |
evas: enable NEON-optimized code for aarch64.
Summary:
Add new define, BUILD_NEON_INTRINSICS to control whether NEON inline code or
NEON intrinsics should be built.
GCC NEON intrinsics can be built both for armv7 and armv8. However NEON inline
code can be built only for armv7.
@feature
Reviewers: raster, stefan_schmidt, cedric
Subscribers: cedric, stefan_schmidt
Projects: #efl
Differential Revision: https://phab.enlightenment.org/D2309
Signed-off-by: Cedric BAIL <cedric@osg.samsung.com>
Diffstat (limited to '')
7 files changed, 127 insertions, 3 deletions
diff --git a/src/lib/evas/common/evas_blit_main.c b/src/lib/evas/common/evas_blit_main.c index 7f8faa18a4..4da4034742 100644 --- a/src/lib/evas/common/evas_blit_main.c +++ b/src/lib/evas/common/evas_blit_main.c | |||
@@ -132,6 +132,9 @@ evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len) | |||
132 | static void | 132 | static void |
133 | evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) | 133 | evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) |
134 | { | 134 | { |
135 | #ifdef BUILD_NEON_INTRINSICS | ||
136 | evas_common_copy_pixels_rev_c(src, dst, len); | ||
137 | #else | ||
135 | uint32_t *tmp = (void *)37; | 138 | uint32_t *tmp = (void *)37; |
136 | #define AP "evas_common_copy_rev_pixels_neon_" | 139 | #define AP "evas_common_copy_rev_pixels_neon_" |
137 | asm volatile ( | 140 | asm volatile ( |
@@ -228,6 +231,7 @@ evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) | |||
228 | ); | 231 | ); |
229 | #undef AP | 232 | #undef AP |
230 | 233 | ||
234 | #endif | ||
231 | } | 235 | } |
232 | #endif | 236 | #endif |
233 | 237 | ||
@@ -324,6 +328,9 @@ evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len) | |||
324 | #ifdef BUILD_NEON | 328 | #ifdef BUILD_NEON |
325 | static void | 329 | static void |
326 | evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ | 330 | evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ |
331 | #ifdef BUILD_NEON_INTRINSICS | ||
332 | evas_common_copy_pixels_c(src, dst, len); | ||
333 | #else | ||
327 | uint32_t *e,*tmp = (void *)37; | 334 | uint32_t *e,*tmp = (void *)37; |
328 | e = dst + len; | 335 | e = dst + len; |
329 | #define AP "evas_common_copy_pixels_neon_" | 336 | #define AP "evas_common_copy_pixels_neon_" |
@@ -410,6 +417,7 @@ evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ | |||
410 | ); | 417 | ); |
411 | #undef AP | 418 | #undef AP |
412 | 419 | ||
420 | #endif | ||
413 | } | 421 | } |
414 | #endif /* BUILD_NEON */ | 422 | #endif /* BUILD_NEON */ |
415 | 423 | ||
diff --git a/src/lib/evas/common/evas_cpu.c b/src/lib/evas/common/evas_cpu.c index 41390989d8..0f83258806 100644 --- a/src/lib/evas/common/evas_cpu.c +++ b/src/lib/evas/common/evas_cpu.c | |||
@@ -2,6 +2,11 @@ | |||
2 | #ifdef BUILD_MMX | 2 | #ifdef BUILD_MMX |
3 | #include "evas_mmx.h" | 3 | #include "evas_mmx.h" |
4 | #endif | 4 | #endif |
5 | #ifdef BUILD_NEON | ||
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | #include <arm_neon.h> | ||
8 | #endif | ||
9 | #endif | ||
5 | #if defined BUILD_SSE3 | 10 | #if defined BUILD_SSE3 |
6 | #include <immintrin.h> | 11 | #include <immintrin.h> |
7 | #endif | 12 | #endif |
@@ -92,6 +97,9 @@ evas_common_cpu_neon_test(void) | |||
92 | { | 97 | { |
93 | //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) | 98 | //#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) |
94 | #ifdef BUILD_NEON | 99 | #ifdef BUILD_NEON |
100 | #ifdef BUILD_NEON_INTRINSICS | ||
101 | volatile uint32x4_t temp = vdupq_n_u32(0x1); | ||
102 | #else | ||
95 | asm volatile ( | 103 | asm volatile ( |
96 | ".fpu neon \n\t" | 104 | ".fpu neon \n\t" |
97 | "vqadd.u8 d0, d1, d0\n" | 105 | "vqadd.u8 d0, d1, d0\n" |
@@ -101,6 +109,7 @@ evas_common_cpu_neon_test(void) | |||
101 | "d0", "d1" | 109 | "d0", "d1" |
102 | ); | 110 | ); |
103 | #endif | 111 | #endif |
112 | #endif | ||
104 | //#endif | 113 | //#endif |
105 | } | 114 | } |
106 | 115 | ||
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c index 9e94298cc6..2bf14c1f7c 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c | |||
@@ -3,6 +3,14 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { | 5 | _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e, a = 256 - (c >> 24); | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | *d = c + MUL_256(a, *d); | ||
11 | d++; | ||
12 | }); | ||
13 | #else | ||
6 | DATA32 *e, *tmp = 0; | 14 | DATA32 *e, *tmp = 0; |
7 | #define AP "B_C_DP" | 15 | #define AP "B_C_DP" |
8 | asm volatile ( | 16 | asm volatile ( |
@@ -142,7 +150,7 @@ _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA3 | |||
142 | 150 | ||
143 | ); | 151 | ); |
144 | #undef AP | 152 | #undef AP |
145 | 153 | #endif | |
146 | } | 154 | } |
147 | 155 | ||
148 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon | 156 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon |
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c index 99f4b38625..dbeb0638b3 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c | |||
@@ -19,6 +19,30 @@ | |||
19 | #ifdef BUILD_NEON | 19 | #ifdef BUILD_NEON |
20 | static void | 20 | static void |
21 | _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 21 | _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
22 | #ifdef BUILD_NEON_INTRINSICS | ||
23 | DATA32 *e; | ||
24 | int alpha = 256 - (c >> 24); | ||
25 | UNROLL8_PLD_WHILE(d, l, e, | ||
26 | { | ||
27 | DATA32 a = *m; | ||
28 | switch(a) | ||
29 | { | ||
30 | case 0: | ||
31 | break; | ||
32 | case 255: | ||
33 | *d = c + MUL_256(alpha, *d); | ||
34 | break; | ||
35 | default: | ||
36 | { | ||
37 | DATA32 mc = MUL_SYM(a, c); | ||
38 | a = 256 - (mc >> 24); | ||
39 | *d = mc + MUL_256(a, *d); | ||
40 | } | ||
41 | break; | ||
42 | } | ||
43 | m++; d++; | ||
44 | }); | ||
45 | #else | ||
22 | DATA32 *e = d + l; | 46 | DATA32 *e = d + l; |
23 | 47 | ||
24 | // everything we can do only once per cycle | 48 | // everything we can do only once per cycle |
@@ -142,12 +166,34 @@ _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, in | |||
142 | "q10", "q15", "q14", "memory" | 166 | "q10", "q15", "q14", "memory" |
143 | ); | 167 | ); |
144 | } | 168 | } |
169 | #endif | ||
145 | } | 170 | } |
146 | #endif | 171 | #endif |
147 | 172 | ||
148 | #ifdef BUILD_NEON | 173 | #ifdef BUILD_NEON |
149 | static void | 174 | static void |
150 | _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 175 | _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
176 | #ifdef BUILD_NEON_INTRINSICS | ||
177 | DATA32 *e; | ||
178 | int alpha; | ||
179 | UNROLL8_PLD_WHILE(d, l, e, | ||
180 | { | ||
181 | alpha = *m; | ||
182 | switch(alpha) | ||
183 | { | ||
184 | case 0: | ||
185 | break; | ||
186 | case 255: | ||
187 | *d = c; | ||
188 | break; | ||
189 | default: | ||
190 | alpha++; | ||
191 | *d = INTERP_256(alpha, c, *d); | ||
192 | break; | ||
193 | } | ||
194 | m++; d++; | ||
195 | }); | ||
196 | #else | ||
151 | DATA32 *e,*tmp; | 197 | DATA32 *e,*tmp; |
152 | int alpha; | 198 | int alpha; |
153 | 199 | ||
@@ -372,6 +418,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, | |||
372 | 418 | ||
373 | ); | 419 | ); |
374 | #undef AP | 420 | #undef AP |
421 | #endif | ||
375 | } | 422 | } |
376 | #endif | 423 | #endif |
377 | 424 | ||
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c index d6b3a733dd..c47ec7c47f 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c | |||
@@ -7,7 +7,18 @@ | |||
7 | * reads, then two writes, a miss on read is 'just' two reads */ | 7 | * reads, then two writes, a miss on read is 'just' two reads */ |
8 | static void | 8 | static void |
9 | _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { | 9 | _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { |
10 | 10 | #ifdef BUILD_NEON_INTRINSICS | |
11 | DATA32 *e; | ||
12 | int alpha; | ||
13 | UNROLL8_PLD_WHILE(d, l, e, | ||
14 | { | ||
15 | DATA32 sc = MUL4_SYM(c, *s); | ||
16 | alpha = 256 - (sc >> 24); | ||
17 | *d = sc + MUL_256(alpha, *d); | ||
18 | d++; | ||
19 | s++; | ||
20 | }); | ||
21 | #else | ||
11 | #define AP "blend_p_c_dp_" | 22 | #define AP "blend_p_c_dp_" |
12 | asm volatile ( | 23 | asm volatile ( |
13 | ".fpu neon\n\t" | 24 | ".fpu neon\n\t" |
@@ -92,6 +103,7 @@ _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DAT | |||
92 | : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" | 103 | : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" |
93 | ); | 104 | ); |
94 | #undef AP | 105 | #undef AP |
106 | #endif | ||
95 | } | 107 | } |
96 | 108 | ||
97 | static void | 109 | static void |
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c index 4b9993b42b..3c32790c81 100644 --- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c +++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c | |||
@@ -3,6 +3,16 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 5 | _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e; | ||
8 | int alpha; | ||
9 | UNROLL8_PLD_WHILE(d, l, e, | ||
10 | { | ||
11 | alpha = 256 - (*s >> 24); | ||
12 | *d = *s++ + MUL_256(alpha, *d); | ||
13 | d++; | ||
14 | }); | ||
15 | #else | ||
6 | #define AP "blend_p_dp_" | 16 | #define AP "blend_p_dp_" |
7 | asm volatile ( | 17 | asm volatile ( |
8 | ".fpu neon \n\t" | 18 | ".fpu neon \n\t" |
@@ -238,11 +248,31 @@ _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
238 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered | 248 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered |
239 | ); | 249 | ); |
240 | #undef AP | 250 | #undef AP |
241 | 251 | #endif | |
242 | } | 252 | } |
243 | 253 | ||
244 | static void | 254 | static void |
245 | _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 255 | _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
256 | #ifdef BUILD_NEON_INTRINSICS | ||
257 | DATA32 *e; | ||
258 | int alpha; | ||
259 | UNROLL8_PLD_WHILE(d, l, e, | ||
260 | { | ||
261 | switch (*s & 0xff000000) | ||
262 | { | ||
263 | case 0: | ||
264 | break; | ||
265 | case 0xff000000: | ||
266 | *d = *s; | ||
267 | break; | ||
268 | default: | ||
269 | alpha = 256 - (*s >> 24); | ||
270 | *d = *s + MUL_256(alpha, *d); | ||
271 | break; | ||
272 | } | ||
273 | s++; d++; | ||
274 | }); | ||
275 | #else | ||
246 | #define AP "blend_pas_dp_" | 276 | #define AP "blend_pas_dp_" |
247 | DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; | 277 | DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; |
248 | asm volatile ( | 278 | asm volatile ( |
@@ -447,6 +477,7 @@ _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
447 | "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" | 477 | "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" |
448 | ); | 478 | ); |
449 | #undef AP | 479 | #undef AP |
480 | #endif | ||
450 | } | 481 | } |
451 | 482 | ||
452 | #define _op_blend_pan_dp_neon NULL | 483 | #define _op_blend_pan_dp_neon NULL |
diff --git a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c index 96310cdf3a..009bd750ea 100644 --- a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c +++ b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c | |||
@@ -3,6 +3,14 @@ | |||
3 | #ifdef BUILD_NEON | 3 | #ifdef BUILD_NEON |
4 | static void | 4 | static void |
5 | _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | 5 | _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { |
6 | #ifdef BUILD_NEON_INTRINSICS | ||
7 | DATA32 *e; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | *d = c; | ||
11 | d++; | ||
12 | }); | ||
13 | #else | ||
6 | #define AP "COPY_C_DP_" | 14 | #define AP "COPY_C_DP_" |
7 | uint32_t *e = d + l,*tmp; | 15 | uint32_t *e = d + l,*tmp; |
8 | asm volatile ( | 16 | asm volatile ( |
@@ -85,6 +93,7 @@ _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | |||
85 | 93 | ||
86 | 94 | ||
87 | ); | 95 | ); |
96 | #endif | ||
88 | } | 97 | } |
89 | 98 | ||
90 | #define _op_copy_cn_dp_neon _op_copy_c_dp_neon | 99 | #define _op_copy_cn_dp_neon _op_copy_c_dp_neon |