summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYury Usischev <y.usishchev@samsung.com>2013-08-02 18:06:55 +0900
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2013-08-02 18:06:55 +0900
commitbd6de4ba8c9711c1c010a3b1b311738d248c26ce (patch)
tree3c79b3b4e8708b3a9154022acda726ced1f52fb3
parenta3165bff15303e2e1ab0c969bf30577e2a0d031c (diff)
Add neon for upscaling and map routines in evas.
-rw-r--r--AUTHORS1
-rw-r--r--ChangeLog4
-rw-r--r--NEWS1
-rw-r--r--src/lib/evas/common/evas_map_image_core.c3
-rw-r--r--src/lib/evas/common/evas_map_image_loop.c90
-rw-r--r--src/lib/evas/common/evas_scale_smooth.c44
-rw-r--r--src/lib/evas/common/evas_scale_smooth_scaler_up.c26
-rw-r--r--src/lib/evas/include/evas_blend_ops.h58
8 files changed, 220 insertions, 7 deletions
diff --git a/AUTHORS b/AUTHORS
index a42f1e411e..e0e0cecbe4 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -125,6 +125,7 @@ Patryk Kaczmarek <patryk.k@samsung.com>
125Zbigniew Kosinski <z.kosinski@samsung.com> 125Zbigniew Kosinski <z.kosinski@samsung.com>
126Paulo Cavalcanti <paulo.cavalcanti@linux.intel.com> 126Paulo Cavalcanti <paulo.cavalcanti@linux.intel.com>
127Jean-Philippe Andre <jp.andre@samsung.com> 127Jean-Philippe Andre <jp.andre@samsung.com>
128Yury Usischev <y.usishchev@samsung.com>
128 129
129 130
130Ecore 131Ecore
diff --git a/ChangeLog b/ChangeLog
index 4cd2a4f364..d45dab1af3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
12013-08-02 Yury Usischev
2
3 * Add neon optimizations for several scaling/map routines in evas
4
12013-08-02 Cedric Bail 52013-08-02 Cedric Bail
2 6
3 * Evas: change mapping policy for image loader (RANDOM during header, 7 * Evas: change mapping policy for image loader (RANDOM during header,
diff --git a/NEWS b/NEWS
index bbbdc06763..243bf6d12a 100644
--- a/NEWS
+++ b/NEWS
@@ -201,6 +201,7 @@ Improvements:
201 - Use eo array of callbacks to reduce callbacks memory footprint of Evas_Object_Box and Evas_Object_Table. 201 - Use eo array of callbacks to reduce callbacks memory footprint of Evas_Object_Box and Evas_Object_Table.
202 - Optimized path for when map use the same color for all corner. 202 - Optimized path for when map use the same color for all corner.
203 - Asynchronous preload of GL texture. 203 - Asynchronous preload of GL texture.
204 - Add neon assembly for upscaling and map routines
204 * Ecore_Con: 205 * Ecore_Con:
205 - Rebase dns.c against upstream 206 - Rebase dns.c against upstream
206 * Edje: 207 * Edje:
diff --git a/src/lib/evas/common/evas_map_image_core.c b/src/lib/evas/common/evas_map_image_core.c
index 7e44c4b161..6e2be0e30a 100644
--- a/src/lib/evas/common/evas_map_image_core.c
+++ b/src/lib/evas/common/evas_map_image_core.c
@@ -19,6 +19,9 @@
19#ifdef SCALE_USING_MMX 19#ifdef SCALE_USING_MMX
20 pxor_r2r(mm0, mm0); 20 pxor_r2r(mm0, mm0);
21 MOV_A2R(ALPHA_255, mm5) 21 MOV_A2R(ALPHA_255, mm5)
22#elif defined SCALE_USING_NEON
23 FPU_NEON;
24 VMOV_I2R_NEON(q2, #255);
22#endif 25#endif
23 26
24 line = &(spans[y - ystart]); 27 line = &(spans[y - ystart]);
diff --git a/src/lib/evas/common/evas_map_image_loop.c b/src/lib/evas/common/evas_map_image_loop.c
index fc322860aa..a8a49eb7f4 100644
--- a/src/lib/evas/common/evas_map_image_loop.c
+++ b/src/lib/evas/common/evas_map_image_loop.c
@@ -1,13 +1,27 @@
1#ifdef SMOOTH 1#ifdef SMOOTH
2{ 2{
3# ifdef SCALE_USING_MMX 3# ifdef SCALE_USING_MMX
4# ifdef COLMUL 4# ifdef COLMUL
5# ifdef COLSAME 5# ifdef COLSAME
6 MOV_P2R(c1, mm7, mm0); // col 6 MOV_P2R(c1, mm7, mm0); // col
7# endif
8# endif 7# endif
9# endif 8# endif
10 while (ww > 0) 9# endif
10# ifdef SCALE_USING_NEON
11# ifdef COLMUL
12# ifndef COLBLACK
13 // this part can be done here as c1 and c2 are constants in the cycle
14 FPU_NEON;
15 VMOV_M2R_NEON(d18, c1);
16 VEOR_NEON(q8);
17 VMOV_M2R_NEON(d19, c2);
18 VZIP_NEON(q9, q8);
19 VMOV_R2R_NEON(d19, d16);
20 // here we have c1 and c2 spread through q9 register
21# endif
22# endif
23# endif
24 while (ww > 0)
11 { 25 {
12# ifdef COLBLACK 26# ifdef COLBLACK
13 *d = 0xff000000; // col 27 *d = 0xff000000; // col
@@ -77,6 +91,41 @@
77# endif 91# endif
78# endif 92# endif
79 MOV_R2P(mm1, *d, mm0); 93 MOV_R2P(mm1, *d, mm0);
94# elif defined SCALE_USING_NEON
95 // not sure if we need this condition, but it doesn't affect the result
96 if (val1 | val2 | val3 | val4)
97 {
98 FPU_NEON;
99# ifdef COLMUL
100 // initialize alpha for interpolation of c1 and c2
101 VDUP_NEON(d15, cv >> 16);
102 // copy c1 and c2 as algorithm will overwrite it
103 VMOV_R2R_NEON(q6, q9);
104 cv += cd; // col
105# endif
106 VMOV_M2R_NEON(d8, val1);
107 VEOR_NEON(q0);
108 VMOV_M2R_NEON(d9, val3);
109 VMOV_M2R_NEON(d10, val2);
110 VEOR_NEON(q1);
111 VMOV_M2R_NEON(d11, val4);
112 VDUP_NEON(q3, ru);
113 VDUP_NEON(d14, rv);
114 VZIP_NEON(q4, q0);
115 VZIP_NEON(q5, q1);
116 VMOV_R2R_NEON(d9, d0);
117 VMOV_R2R_NEON(d11, d2);
118 // by this point we have all required data in right registers
119 INTERP_256_NEON(q3, q5, q4, q2); // interpolate val1,val2 and val3,val4
120 VSWP_NEON(d9, d12); // move result of val3,val4 interpolation (and c1 if COLMUL is defined) for next step
121 INTERP_256_NEON(q7, q6, q4, q2); // second stage of interpolation, also here c1 and c2 are interpolated
122# ifdef COLMUL
123 MUL4_SYM_NEON(d8, d9, d4); // do required multiplication
124# endif
125 VMOV_R2M_NEON(q4, d8, d); // save result to d
126 }
127 else
128 *d = val1;
80# else 129# else
81 val1 = INTERP_256(ru, val2, val1); 130 val1 = INTERP_256(ru, val2, val1);
82 val3 = INTERP_256(ru, val4, val3); 131 val3 = INTERP_256(ru, val4, val3);
@@ -102,10 +151,23 @@
102} 151}
103#else 152#else
104{ 153{
154# ifdef SCALE_USING_NEON
155# ifdef COLMUL
156# ifndef COLBLACK
157 // c1 and c2 are constants inside the cycle
158 FPU_NEON;
159 VMOV_M2R_NEON(d10, c1);
160 VEOR_NEON(q0);
161 VMOV_M2R_NEON(d11, c2);
162 VZIP_NEON(q5, q0);
163 VMOV_R2R_NEON(d11, d0);
164# endif
165# endif
166# endif
105 while (ww > 0) 167 while (ww > 0)
106 { 168 {
107# ifdef COLMUL 169# ifdef COLMUL
108# ifndef COLBLACK 170# ifndef COLBLACK
109 DATA32 val1; 171 DATA32 val1;
110# ifdef COLSAME 172# ifdef COLSAME
111# else 173# else
@@ -121,11 +183,27 @@
121# ifdef COLMUL 183# ifdef COLMUL
122 val1 = *s; // col 184 val1 = *s; // col
123# ifdef COLSAME 185# ifdef COLSAME
186# ifdef SCALE_USING_NEON
124 *d = MUL4_SYM(c1, val1); 187 *d = MUL4_SYM(c1, val1);
125# else 188# else
189 *d = MUL4_SYM(c1, val1); // XXX: do this in neon
190# endif
191# else
192# ifdef SCALE_USING_NEON
193 FPU_NEON;
194 VMOV_M2R_NEON(d12, val1);
195 VMOV_R2R_NEON(q4, q5);
196 VEOR_NEON(q1);
197 VDUP_NEON(d15, cv >> 16);
198 VZIP_NEON(q6, q1);
199 INTERP_256_NEON(d15, d9, d8, d4); // interpolate c1 and c2
200 MUL4_SYM_NEON(d8, d12, d4); // multiply
201 VMOV_R2M_NEON(q4, d8, d); // save result
202# else
126 cval = INTERP_256((cv >> 16), c2, c1); // col 203 cval = INTERP_256((cv >> 16), c2, c1); // col
127 *d = MUL4_SYM(cval, val1); 204 *d = MUL4_SYM(cval, val1);
128 cv += cd; // col 205 cv += cd; // col
206# endif
129# endif 207# endif
130# else 208# else
131 *d = *s; 209 *d = *s;
diff --git a/src/lib/evas/common/evas_scale_smooth.c b/src/lib/evas/common/evas_scale_smooth.c
index 02dbe7d44d..61bda22b0a 100644
--- a/src/lib/evas/common/evas_scale_smooth.c
+++ b/src/lib/evas/common/evas_scale_smooth.c
@@ -97,6 +97,15 @@ scale_calc_a_points(int *p, int s, int d, int c, int cc)
97# include "evas_scale_smooth_scaler.c" 97# include "evas_scale_smooth_scaler.c"
98#endif 98#endif
99 99
100#ifdef BUILD_NEON
101# undef SCALE_FUNC
102# undef SCALE_USING_NEON
103# define SCALE_USING_NEON
104# define SCALE_FUNC evas_common_scale_rgba_in_to_out_clip_smooth_neon
105# include "evas_scale_smooth_scaler.c"
106# undef SCALE_USING_NEON
107#endif
108
100#undef SCALE_FUNC 109#undef SCALE_FUNC
101#define SCALE_FUNC _evas_common_scale_rgba_in_to_out_clip_smooth_c 110#define SCALE_FUNC _evas_common_scale_rgba_in_to_out_clip_smooth_c
102#undef SCALE_USING_MMX 111#undef SCALE_USING_MMX
@@ -197,6 +206,11 @@ evas_common_scale_rgba_in_to_out_clip_smooth(RGBA_Image *src, RGBA_Image *dst,
197 cb = evas_common_scale_rgba_in_to_out_clip_smooth_mmx; 206 cb = evas_common_scale_rgba_in_to_out_clip_smooth_mmx;
198 else 207 else
199#endif 208#endif
209#ifdef BUILD_NEON
210 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
211 cb = evas_common_scale_rgba_in_to_out_clip_smooth_neon;
212 else
213#endif
200 cb = evas_common_scale_rgba_in_to_out_clip_smooth_c; 214 cb = evas_common_scale_rgba_in_to_out_clip_smooth_c;
201 215
202 return evas_common_scale_rgba_in_to_out_clip_cb(src, dst, dc, 216 return evas_common_scale_rgba_in_to_out_clip_cb(src, dst, dc,
@@ -223,6 +237,16 @@ evas_common_scale_rgba_smooth_draw(RGBA_Image *src, RGBA_Image *dst, int dst_cli
223 dst_region_x, dst_region_y, dst_region_w, dst_region_h); 237 dst_region_x, dst_region_y, dst_region_w, dst_region_h);
224 else 238 else
225#endif 239#endif
240#ifdef BUILD_NEON
241 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
242 _evas_common_scale_rgba_in_to_out_clip_smooth_neon
243 (src, dst,
244 dst_clip_x, dst_clip_y, dst_clip_w, dst_clip_h,
245 mul_col, render_op,
246 src_region_x, src_region_y, src_region_w, src_region_h,
247 dst_region_x, dst_region_y, dst_region_w, dst_region_h);
248 else
249#endif
226 _evas_common_scale_rgba_in_to_out_clip_smooth_c 250 _evas_common_scale_rgba_in_to_out_clip_smooth_c
227 (src, dst, 251 (src, dst,
228 dst_clip_x, dst_clip_y, dst_clip_w, dst_clip_h, 252 dst_clip_x, dst_clip_y, dst_clip_w, dst_clip_h,
@@ -263,6 +287,15 @@ evas_common_scale_rgba_in_to_out_clip_smooth_do(const Cutout_Rects *reuse,
263 dst_region_w, dst_region_h); 287 dst_region_w, dst_region_h);
264 else 288 else
265# endif 289# endif
290#ifdef BUILD_NEON
291 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
292 evas_common_scale_rgba_in_to_out_clip_smooth_neon(src, dst, dc,
293 src_region_x, src_region_y,
294 src_region_w, src_region_h,
295 dst_region_x, dst_region_y,
296 dst_region_w, dst_region_h);
297 else
298#endif
266 evas_common_scale_rgba_in_to_out_clip_smooth_c(src, dst, dc, 299 evas_common_scale_rgba_in_to_out_clip_smooth_c(src, dst, dc,
267 src_region_x, src_region_y, 300 src_region_x, src_region_y,
268 src_region_w, src_region_h, 301 src_region_w, src_region_h,
@@ -287,7 +320,16 @@ evas_common_scale_rgba_in_to_out_clip_smooth_do(const Cutout_Rects *reuse,
287 dst_region_w, dst_region_h); 320 dst_region_w, dst_region_h);
288 else 321 else
289# endif 322# endif
290 evas_common_scale_rgba_in_to_out_clip_smooth_c(src, dst, dc, 323#ifdef BUILD_NEON
324 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
325 evas_common_scale_rgba_in_to_out_clip_smooth_neon(src, dst, dc,
326 src_region_x, src_region_y,
327 src_region_w, src_region_h,
328 dst_region_x, dst_region_y,
329 dst_region_w, dst_region_h);
330 else
331#endif
332 evas_common_scale_rgba_in_to_out_clip_smooth_c(src, dst, dc,
291 src_region_x, src_region_y, 333 src_region_x, src_region_y,
292 src_region_w, src_region_h, 334 src_region_w, src_region_h,
293 dst_region_x, dst_region_y, 335 dst_region_x, dst_region_y,
diff --git a/src/lib/evas/common/evas_scale_smooth_scaler_up.c b/src/lib/evas/common/evas_scale_smooth_scaler_up.c
index e43e0c7a6c..4b21d598dd 100644
--- a/src/lib/evas/common/evas_scale_smooth_scaler_up.c
+++ b/src/lib/evas/common/evas_scale_smooth_scaler_up.c
@@ -172,6 +172,10 @@
172 MOV_A2R(ay, mm4) 172 MOV_A2R(ay, mm4)
173 pxor_r2r(mm0, mm0); 173 pxor_r2r(mm0, mm0);
174 MOV_A2R(ALPHA_255, mm5) 174 MOV_A2R(ALPHA_255, mm5)
175#elif defined SCALE_USING_NEON
176 FPU_NEON;
177 VDUP_NEON(d12, ay);
178 VMOV_I2R_NEON(q2, #255);
175#endif 179#endif
176 pbuf = buf; pbuf_end = buf + dst_clip_w; 180 pbuf = buf; pbuf_end = buf + dst_clip_w;
177 sxx = sxx0; 181 sxx = sxx0;
@@ -210,6 +214,28 @@
210 INTERP_256_R2R(mm4, mm2, mm1, mm5) 214 INTERP_256_R2R(mm4, mm2, mm1, mm5)
211 MOV_R2P(mm1, *pbuf, mm0) 215 MOV_R2P(mm1, *pbuf, mm0)
212 pbuf++; 216 pbuf++;
217#elif defined SCALE_USING_NEON
218 if (p0 | p1 | p2 | p3)
219 {
220 FPU_NEON;
221 VMOV_M2R_NEON(d8, p0);
222 VEOR_NEON(q0);
223 VMOV_M2R_NEON(d9, p2);
224 VMOV_M2R_NEON(d10, p1);
225 VEOR_NEON(q1);
226 VMOV_M2R_NEON(d11, p3);
227 VDUP_NEON(q3, ax);
228 VZIP_NEON(q4, q0);
229 VZIP_NEON(q5, q1);
230 VMOV_R2R_NEON(d9, d0);
231 VMOV_R2R_NEON(d11, d2);
232 INTERP_256_NEON(q3, q5, q4, q2);
233 INTERP_256_NEON(d12, d9, d8, d5);
234 VMOV_R2M_NEON(q4, d8, pbuf);
235 pbuf++;
236 }
237 else
238 *pbuf++ = p0;
213#else 239#else
214 if (p0 | p1) 240 if (p0 | p1)
215 p0 = INTERP_256(ax, p1, p0); 241 p0 = INTERP_256(ax, p1, p0);
diff --git a/src/lib/evas/include/evas_blend_ops.h b/src/lib/evas/include/evas_blend_ops.h
index 0a78843579..3ae94379ec 100644
--- a/src/lib/evas/include/evas_blend_ops.h
+++ b/src/lib/evas/include/evas_blend_ops.h
@@ -186,6 +186,64 @@ extern const DATA32 ALPHA_256;
186 186
187#endif 187#endif
188 188
189/* some useful NEON macros */
190
191#ifdef BUILD_NEON
192#define FPU_NEON \
193 __asm__ __volatile__(".fpu neon \n\t");
194
195/* copy reg1 to reg2 */
196#define VMOV_R2R_NEON(reg1, reg2) \
197 __asm__ __volatile__("vmov " #reg1 ", " #reg2 " \n\t" ::: #reg1);
198
199/* copy 32bit value to lower bits of register reg */
200#define VMOV_M2R_NEON(reg, value) \
201 __asm__ __volatile__("vmov.32 " #reg "[0], %[val] \n\t" :: [val] "r" (value) : #reg);
202
203/* save 32bit value from lower 64 bits of register regq to memory location */
204/* pointed to by pointer, using 64bit register regd as temporary location */
205#define VMOV_R2M_NEON(regq, regd, pointer) \
206 __asm__ __volatile__("vqmovn.u16 " #regd ", " #regq " \n\t" \
207 "vst1.32 {" #regd "[0]}, [%[p]] \n\t" :: [p] "r" (pointer) : #regd, "memory");
208
209/* spread constant imm in register reg */
210#define VMOV_I2R_NEON(reg, imm) \
211 __asm__ __volatile__("vmov.i16 " #reg ", " #imm " \n\t" ::: #reg);
212
213/* spread value in register reg */
214#define VDUP_NEON(reg, value) \
215 __asm__ __volatile__("vdup.16 " #reg ", %[val] \n\t" :: [val] "r" (value) : #reg);
216
217/* interleave contents of reg1 and reg2 */
218#define VZIP_NEON(reg1, reg2) \
219 __asm__ __volatile__("vzip.8 " #reg1 ", " #reg2 " \n\t" ::: #reg1 , #reg2);
220
221/* swap contents of two registers */
222#define VSWP_NEON(reg1, reg2) \
223 __asm__ __volatile__("vswp " #reg1 ", " #reg2 " \n\t" ::: #reg1 , #reg2);
224
225/* set register to zero */
226#define VEOR_NEON(reg) \
227 __asm__ __volatile__("veor " #reg ", " #reg ", " #reg " \n\t" ::: #reg);
228
229/* do interpolation of every channel RGBA, result is contained in regy */
230#define INTERP_256_NEON(rega, regx, regy, reg255) \
231 __asm__ __volatile__("vsub.i16 " #regx ", " #regx ", " #regy " \n\t" \
232 "vmul.u16 " #regx ", " #regx ", " #rega " \n\t" \
233 "vsri.16 " #regx ", " #regx ", #8 \n\t" \
234 "vadd.i16 " #regx ", " #regx ", " #regy " \n\t" \
235 "vand " #regy ", " #regx ", " #reg255 " \n\t" \
236 ::: #regx, #regy );
237
238/* multiply every channel of regx and regy */
239#define MUL4_SYM_NEON(regx, regy, reg255) \
240 __asm__ __volatile__("vmul.u16 " #regx ", " #regx ", " #regy " \n\t" \
241 "vadd.i16 " #regx ", " #regx ", " #reg255 " \n\t" \
242 "vsri.16 " #regx ", " #regx ", #8 \n\t" \
243 "vand " #regx ", " #regx ", " #reg255 " \n\t" \
244 ::: #regx );
245
246#endif
189 247
190/* some useful SSE3 inline functions */ 248/* some useful SSE3 inline functions */
191 249