summaryrefslogtreecommitdiff
path: root/src/lib/evas/common
diff options
context:
space:
mode:
authorYury Usishchev <y.usishchev@samsung.com>2015-04-15 17:21:33 +0200
committerCedric BAIL <cedric@osg.samsung.com>2015-05-07 09:53:08 +0200
commit71eec44ccc9ab43e728ba986fadce6c6cfd2ff7c (patch)
tree9ed9801c7a1534f69cff124c2071e53d8845d35a /src/lib/evas/common
parent88b30ef28c47106891d44f62798424c745ec1b8c (diff)
evas: enable NEON-optimized code for aarch64.
Summary: Add new define, BUILD_NEON_INTRINSICS to control whether NEON inline code or NEON intrinsics should be built. GCC NEON intrinsics can be built both for armv7 and armv8. However NEON inline code can be built only for armv7. @feature Reviewers: raster, stefan_schmidt, cedric Subscribers: cedric, stefan_schmidt Projects: #efl Differential Revision: https://phab.enlightenment.org/D2309 Signed-off-by: Cedric BAIL <cedric@osg.samsung.com>
Diffstat (limited to 'src/lib/evas/common')
-rw-r--r--src/lib/evas/common/evas_blit_main.c8
-rw-r--r--src/lib/evas/common/evas_cpu.c9
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_color_neon.c10
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c47
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c14
-rw-r--r--src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c33
-rw-r--r--src/lib/evas/common/evas_op_copy/op_copy_color_neon.c9
7 files changed, 127 insertions, 3 deletions
diff --git a/src/lib/evas/common/evas_blit_main.c b/src/lib/evas/common/evas_blit_main.c
index 7f8faa1..4da4034 100644
--- a/src/lib/evas/common/evas_blit_main.c
+++ b/src/lib/evas/common/evas_blit_main.c
@@ -132,6 +132,9 @@ evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len)
132static void 132static void
133evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len) 133evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
134{ 134{
135#ifdef BUILD_NEON_INTRINSICS
136evas_common_copy_pixels_rev_c(src, dst, len);
137#else
135 uint32_t *tmp = (void *)37; 138 uint32_t *tmp = (void *)37;
136#define AP "evas_common_copy_rev_pixels_neon_" 139#define AP "evas_common_copy_rev_pixels_neon_"
137 asm volatile ( 140 asm volatile (
@@ -228,6 +231,7 @@ evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
228 ); 231 );
229#undef AP 232#undef AP
230 233
234#endif
231} 235}
232#endif 236#endif
233 237
@@ -324,6 +328,9 @@ evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len)
324#ifdef BUILD_NEON 328#ifdef BUILD_NEON
325static void 329static void
326evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){ 330evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
331#ifdef BUILD_NEON_INTRINSICS
332evas_common_copy_pixels_c(src, dst, len);
333#else
327 uint32_t *e,*tmp = (void *)37; 334 uint32_t *e,*tmp = (void *)37;
328 e = dst + len; 335 e = dst + len;
329#define AP "evas_common_copy_pixels_neon_" 336#define AP "evas_common_copy_pixels_neon_"
@@ -410,6 +417,7 @@ evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
410 ); 417 );
411#undef AP 418#undef AP
412 419
420#endif
413} 421}
414#endif /* BUILD_NEON */ 422#endif /* BUILD_NEON */
415 423
diff --git a/src/lib/evas/common/evas_cpu.c b/src/lib/evas/common/evas_cpu.c
index 4139098..0f83258 100644
--- a/src/lib/evas/common/evas_cpu.c
+++ b/src/lib/evas/common/evas_cpu.c
@@ -2,6 +2,11 @@
2#ifdef BUILD_MMX 2#ifdef BUILD_MMX
3#include "evas_mmx.h" 3#include "evas_mmx.h"
4#endif 4#endif
5#ifdef BUILD_NEON
6#ifdef BUILD_NEON_INTRINSICS
7#include <arm_neon.h>
8#endif
9#endif
5#if defined BUILD_SSE3 10#if defined BUILD_SSE3
6#include <immintrin.h> 11#include <immintrin.h>
7#endif 12#endif
@@ -92,6 +97,9 @@ evas_common_cpu_neon_test(void)
92{ 97{
93//#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70) 98//#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 70)
94#ifdef BUILD_NEON 99#ifdef BUILD_NEON
100#ifdef BUILD_NEON_INTRINSICS
101 volatile uint32x4_t temp = vdupq_n_u32(0x1);
102#else
95 asm volatile ( 103 asm volatile (
96 ".fpu neon \n\t" 104 ".fpu neon \n\t"
97 "vqadd.u8 d0, d1, d0\n" 105 "vqadd.u8 d0, d1, d0\n"
@@ -101,6 +109,7 @@ evas_common_cpu_neon_test(void)
101 "d0", "d1" 109 "d0", "d1"
102 ); 110 );
103#endif 111#endif
112#endif
104//#endif 113//#endif
105} 114}
106 115
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
index 9e94298..2bf14c1 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_color_neon.c
@@ -3,6 +3,14 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) { 5_op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e, a = 256 - (c >> 24);
8 UNROLL8_PLD_WHILE(d, l, e,
9 {
10 *d = c + MUL_256(a, *d);
11 d++;
12 });
13#else
6 DATA32 *e, *tmp = 0; 14 DATA32 *e, *tmp = 0;
7#define AP "B_C_DP" 15#define AP "B_C_DP"
8 asm volatile ( 16 asm volatile (
@@ -142,7 +150,7 @@ _op_blend_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA3
142 150
143 ); 151 );
144#undef AP 152#undef AP
145 153#endif
146} 154}
147 155
148#define _op_blend_caa_dp_neon _op_blend_c_dp_neon 156#define _op_blend_caa_dp_neon _op_blend_c_dp_neon
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
index 99f4b38..dbeb063 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_mask_color_neon.c
@@ -19,6 +19,30 @@
19#ifdef BUILD_NEON 19#ifdef BUILD_NEON
20static void 20static void
21_op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { 21_op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) {
22#ifdef BUILD_NEON_INTRINSICS
23 DATA32 *e;
24 int alpha = 256 - (c >> 24);
25 UNROLL8_PLD_WHILE(d, l, e,
26 {
27 DATA32 a = *m;
28 switch(a)
29 {
30 case 0:
31 break;
32 case 255:
33 *d = c + MUL_256(alpha, *d);
34 break;
35 default:
36 {
37 DATA32 mc = MUL_SYM(a, c);
38 a = 256 - (mc >> 24);
39 *d = mc + MUL_256(a, *d);
40 }
41 break;
42 }
43 m++; d++;
44 });
45#else
22 DATA32 *e = d + l; 46 DATA32 *e = d + l;
23 47
24 // everything we can do only once per cycle 48 // everything we can do only once per cycle
@@ -142,12 +166,34 @@ _op_blend_mas_c_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, in
142 "q10", "q15", "q14", "memory" 166 "q10", "q15", "q14", "memory"
143 ); 167 );
144 } 168 }
169#endif
145} 170}
146#endif 171#endif
147 172
148#ifdef BUILD_NEON 173#ifdef BUILD_NEON
149static void 174static void
150_op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) { 175_op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, int l) {
176#ifdef BUILD_NEON_INTRINSICS
177 DATA32 *e;
178 int alpha;
179 UNROLL8_PLD_WHILE(d, l, e,
180 {
181 alpha = *m;
182 switch(alpha)
183 {
184 case 0:
185 break;
186 case 255:
187 *d = c;
188 break;
189 default:
190 alpha++;
191 *d = INTERP_256(alpha, c, *d);
192 break;
193 }
194 m++; d++;
195 });
196#else
151 DATA32 *e,*tmp; 197 DATA32 *e,*tmp;
152 int alpha; 198 int alpha;
153 199
@@ -372,6 +418,7 @@ _op_blend_mas_can_dp_neon(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d,
372 418
373 ); 419 );
374#undef AP 420#undef AP
421#endif
375} 422}
376#endif 423#endif
377 424
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
index d6b3a73..c47ec7c 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_color_neon.c
@@ -7,7 +7,18 @@
7 * reads, then two writes, a miss on read is 'just' two reads */ 7 * reads, then two writes, a miss on read is 'just' two reads */
8static void 8static void
9_op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) { 9_op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 * __restrict d, int l) {
10 10#ifdef BUILD_NEON_INTRINSICS
11 DATA32 *e;
12 int alpha;
13 UNROLL8_PLD_WHILE(d, l, e,
14 {
15 DATA32 sc = MUL4_SYM(c, *s);
16 alpha = 256 - (sc >> 24);
17 *d = sc + MUL_256(alpha, *d);
18 d++;
19 s++;
20 });
21#else
11#define AP "blend_p_c_dp_" 22#define AP "blend_p_c_dp_"
12 asm volatile ( 23 asm volatile (
13 ".fpu neon\n\t" 24 ".fpu neon\n\t"
@@ -92,6 +103,7 @@ _op_blend_p_c_dp_neon(DATA32 * __restrict s, DATA8 *m EINA_UNUSED, DATA32 c, DAT
92 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory" 103 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "memory"
93 ); 104 );
94#undef AP 105#undef AP
106#endif
95} 107}
96 108
97static void 109static void
diff --git a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
index 4b9993b..3c32790 100644
--- a/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
+++ b/src/lib/evas/common/evas_op_blend/op_blend_pixel_neon.c
@@ -3,6 +3,16 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 5_op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e;
8 int alpha;
9 UNROLL8_PLD_WHILE(d, l, e,
10 {
11 alpha = 256 - (*s >> 24);
12 *d = *s++ + MUL_256(alpha, *d);
13 d++;
14 });
15#else
6#define AP "blend_p_dp_" 16#define AP "blend_p_dp_"
7 asm volatile ( 17 asm volatile (
8 ".fpu neon \n\t" 18 ".fpu neon \n\t"
@@ -238,11 +248,31 @@ _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
238 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered 248 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered
239 ); 249 );
240#undef AP 250#undef AP
241 251#endif
242} 252}
243 253
244static void 254static void
245_op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 255_op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
256#ifdef BUILD_NEON_INTRINSICS
257 DATA32 *e;
258 int alpha;
259 UNROLL8_PLD_WHILE(d, l, e,
260 {
261 switch (*s & 0xff000000)
262 {
263 case 0:
264 break;
265 case 0xff000000:
266 *d = *s;
267 break;
268 default:
269 alpha = 256 - (*s >> 24);
270 *d = *s + MUL_256(alpha, *d);
271 break;
272 }
273 s++; d++;
274 });
275#else
246#define AP "blend_pas_dp_" 276#define AP "blend_pas_dp_"
247 DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; 277 DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912;
248 asm volatile ( 278 asm volatile (
@@ -447,6 +477,7 @@ _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
447 "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" 477 "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory"
448 ); 478 );
449#undef AP 479#undef AP
480#endif
450} 481}
451 482
452#define _op_blend_pan_dp_neon NULL 483#define _op_blend_pan_dp_neon NULL
diff --git a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
index 96310cd..009bd75 100644
--- a/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
+++ b/src/lib/evas/common/evas_op_copy/op_copy_color_neon.c
@@ -3,6 +3,14 @@
3#ifdef BUILD_NEON 3#ifdef BUILD_NEON
4static void 4static void
5_op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { 5_op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
6#ifdef BUILD_NEON_INTRINSICS
7 DATA32 *e;
8 UNROLL8_PLD_WHILE(d, l, e,
9 {
10 *d = c;
11 d++;
12 });
13#else
6#define AP "COPY_C_DP_" 14#define AP "COPY_C_DP_"
7 uint32_t *e = d + l,*tmp; 15 uint32_t *e = d + l,*tmp;
8 asm volatile ( 16 asm volatile (
@@ -85,6 +93,7 @@ _op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
85 93
86 94
87 ); 95 );
96#endif
88} 97}
89 98
90#define _op_copy_cn_dp_neon _op_copy_c_dp_neon 99#define _op_copy_cn_dp_neon _op_copy_c_dp_neon