summaryrefslogtreecommitdiff
path: root/src/lib/evas/common/evas_map_image_loop.c
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2013-12-02 16:33:34 +0900
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2013-12-02 16:33:34 +0900
commit48d3253b45b0e155b70c22ac6ea0b769aaecbd3a (patch)
treedba121085078323b45aef90c0c0fb5f5dffa7694 /src/lib/evas/common/evas_map_image_loop.c
parent5b21fa71694fb2e09094e332710ddcafc216ae07 (diff)
NEON vectorization: added use of COLSAME define in map routine
Reviewers: raster Reviewed By: raster CC: cedric Differential Revision: https://phab.enlightenment.org/D341
Diffstat (limited to '')
-rw-r--r--src/lib/evas/common/evas_map_image_loop.c36
1 files changed, 34 insertions, 2 deletions
diff --git a/src/lib/evas/common/evas_map_image_loop.c b/src/lib/evas/common/evas_map_image_loop.c
index a8a49eb7f4..9da2ebfda0 100644
--- a/src/lib/evas/common/evas_map_image_loop.c
+++ b/src/lib/evas/common/evas_map_image_loop.c
@@ -8,15 +8,21 @@
8# endif 8# endif
9# endif 9# endif
10# ifdef SCALE_USING_NEON 10# ifdef SCALE_USING_NEON
11 FPU_NEON;
12 VMOV_I2R_NEON(q2, #255);
11# ifdef COLMUL 13# ifdef COLMUL
12# ifndef COLBLACK 14# ifndef COLBLACK
13 // this part can be done here as c1 and c2 are constants in the cycle 15 // this part can be done here as c1 and c2 are constants in the cycle
14 FPU_NEON; 16 FPU_NEON;
15 VMOV_M2R_NEON(d18, c1); 17 VMOV_M2R_NEON(d18, c1);
16 VEOR_NEON(q8); 18 VEOR_NEON(q8);
19# ifndef COLSAME
17 VMOV_M2R_NEON(d19, c2); 20 VMOV_M2R_NEON(d19, c2);
21# endif
18 VZIP_NEON(q9, q8); 22 VZIP_NEON(q9, q8);
23# ifndef COLSAME
19 VMOV_R2R_NEON(d19, d16); 24 VMOV_R2R_NEON(d19, d16);
25# endif
20 // here we have c1 and c2 spread through q9 register 26 // here we have c1 and c2 spread through q9 register
21# endif 27# endif
22# endif 28# endif
@@ -117,10 +123,22 @@
117 VMOV_R2R_NEON(d11, d2); 123 VMOV_R2R_NEON(d11, d2);
118 // by this point we have all required data in right registers 124 // by this point we have all required data in right registers
119 INTERP_256_NEON(q3, q5, q4, q2); // interpolate val1,val2 and val3,val4 125 INTERP_256_NEON(q3, q5, q4, q2); // interpolate val1,val2 and val3,val4
126# ifdef COLMUL
127# ifdef COLSAME
128 INTERP_256_NEON(d14, d9, d8, d4);
129# else
120 VSWP_NEON(d9, d12); // move result of val3,val4 interpolation (and c1 if COLMUL is defined) for next step 130 VSWP_NEON(d9, d12); // move result of val3,val4 interpolation (and c1 if COLMUL is defined) for next step
121 INTERP_256_NEON(q7, q6, q4, q2); // second stage of interpolation, also here c1 and c2 are interpolated 131 INTERP_256_NEON(q7, q6, q4, q2); // second stage of interpolation, also here c1 and c2 are interpolated
132# endif
133# else
134 INTERP_256_NEON(d14, d9, d8, d4);
135# endif
122# ifdef COLMUL 136# ifdef COLMUL
137# ifdef COLSAME
138 MUL4_SYM_NEON(d8, d12, d4);
139# else
123 MUL4_SYM_NEON(d8, d9, d4); // do required multiplication 140 MUL4_SYM_NEON(d8, d9, d4); // do required multiplication
141# endif
124# endif 142# endif
125 VMOV_R2M_NEON(q4, d8, d); // save result to d 143 VMOV_R2M_NEON(q4, d8, d); // save result to d
126 } 144 }
@@ -154,13 +172,22 @@
154# ifdef SCALE_USING_NEON 172# ifdef SCALE_USING_NEON
155# ifdef COLMUL 173# ifdef COLMUL
156# ifndef COLBLACK 174# ifndef COLBLACK
175# ifdef COLSAME
176 FPU_NEON;
177 VMOV_I2R_NEON(q2, #255);
178 VMOV_M2R_NEON(d10, c1);
179 VEOR_NEON(d0);
180 VZIP_NEON(d10, d0);
181# else
157 // c1 and c2 are constants inside the cycle 182 // c1 and c2 are constants inside the cycle
158 FPU_NEON; 183 FPU_NEON;
184 VMOV_I2R_NEON(q2, #255);
159 VMOV_M2R_NEON(d10, c1); 185 VMOV_M2R_NEON(d10, c1);
160 VEOR_NEON(q0); 186 VEOR_NEON(q0);
161 VMOV_M2R_NEON(d11, c2); 187 VMOV_M2R_NEON(d11, c2);
162 VZIP_NEON(q5, q0); 188 VZIP_NEON(q5, q0);
163 VMOV_R2R_NEON(d11, d0); 189 VMOV_R2R_NEON(d11, d0);
190# endif
164# endif 191# endif
165# endif 192# endif
166# endif 193# endif
@@ -184,9 +211,14 @@
184 val1 = *s; // col 211 val1 = *s; // col
185# ifdef COLSAME 212# ifdef COLSAME
186# ifdef SCALE_USING_NEON 213# ifdef SCALE_USING_NEON
187 *d = MUL4_SYM(c1, val1); 214 VMOV_M2R_NEON(d1, val1);
215 VEOR_NEON(d0);
216 VZIP_NEON(d1, d0);
217 VMOV_R2R_NEON(d0, d10);
218 MUL4_SYM_NEON(d0, d1, d4)
219 VMOV_R2M_NEON(q0, d0, d);
188# else 220# else
189 *d = MUL4_SYM(c1, val1); // XXX: do this in neon 221 *d = MUL4_SYM(c1, val1);
190# endif 222# endif
191# else 223# else
192# ifdef SCALE_USING_NEON 224# ifdef SCALE_USING_NEON