diff options
author | Yury Usischev <y.usishchev@samsung.com> | 2013-08-02 18:06:55 +0900 |
---|---|---|
committer | Carsten Haitzler (Rasterman) <raster@rasterman.com> | 2013-08-02 18:06:55 +0900 |
commit | bd6de4ba8c9711c1c010a3b1b311738d248c26ce (patch) | |
tree | 3c79b3b4e8708b3a9154022acda726ced1f52fb3 /src/lib/evas/include | |
parent | a3165bff15303e2e1ab0c969bf30577e2a0d031c (diff) |
Add neon for upscaling and map routines in evas.
Diffstat (limited to 'src/lib/evas/include')
-rw-r--r-- | src/lib/evas/include/evas_blend_ops.h | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/src/lib/evas/include/evas_blend_ops.h b/src/lib/evas/include/evas_blend_ops.h index 0a78843579..3ae94379ec 100644 --- a/src/lib/evas/include/evas_blend_ops.h +++ b/src/lib/evas/include/evas_blend_ops.h | |||
@@ -186,6 +186,64 @@ extern const DATA32 ALPHA_256; | |||
186 | 186 | ||
187 | #endif | 187 | #endif |
188 | 188 | ||
189 | /* some useful NEON macros */ | ||
190 | |||
191 | #ifdef BUILD_NEON | ||
192 | #define FPU_NEON \ | ||
193 | __asm__ __volatile__(".fpu neon \n\t"); | ||
194 | |||
195 | /* copy reg1 to reg2 */ | ||
196 | #define VMOV_R2R_NEON(reg1, reg2) \ | ||
197 | __asm__ __volatile__("vmov " #reg1 ", " #reg2 " \n\t" ::: #reg1); | ||
198 | |||
199 | /* copy 32bit value to lower bits of register reg */ | ||
200 | #define VMOV_M2R_NEON(reg, value) \ | ||
201 | __asm__ __volatile__("vmov.32 " #reg "[0], %[val] \n\t" :: [val] "r" (value) : #reg); | ||
202 | |||
203 | /* save 32bit value from lower 64 bits of register regq to memory location */ | ||
204 | /* pointed to by pointer, using 64bit register regd as temporary location */ | ||
205 | #define VMOV_R2M_NEON(regq, regd, pointer) \ | ||
206 | __asm__ __volatile__("vqmovn.u16 " #regd ", " #regq " \n\t" \ | ||
207 | "vst1.32 {" #regd "[0]}, [%[p]] \n\t" :: [p] "r" (pointer) : #regd, "memory"); | ||
208 | |||
209 | /* spread constant imm in register reg */ | ||
210 | #define VMOV_I2R_NEON(reg, imm) \ | ||
211 | __asm__ __volatile__("vmov.i16 " #reg ", " #imm " \n\t" ::: #reg); | ||
212 | |||
213 | /* spread value in register reg */ | ||
214 | #define VDUP_NEON(reg, value) \ | ||
215 | __asm__ __volatile__("vdup.16 " #reg ", %[val] \n\t" :: [val] "r" (value) : #reg); | ||
216 | |||
217 | /* interleave contents of reg1 and reg2 */ | ||
218 | #define VZIP_NEON(reg1, reg2) \ | ||
219 | __asm__ __volatile__("vzip.8 " #reg1 ", " #reg2 " \n\t" ::: #reg1 , #reg2); | ||
220 | |||
221 | /* swap contents of two registers */ | ||
222 | #define VSWP_NEON(reg1, reg2) \ | ||
223 | __asm__ __volatile__("vswp " #reg1 ", " #reg2 " \n\t" ::: #reg1 , #reg2); | ||
224 | |||
225 | /* set register to zero */ | ||
226 | #define VEOR_NEON(reg) \ | ||
227 | __asm__ __volatile__("veor " #reg ", " #reg ", " #reg " \n\t" ::: #reg); | ||
228 | |||
229 | /* do interpolation of every channel RGBA, result is contained in regy */ | ||
230 | #define INTERP_256_NEON(rega, regx, regy, reg255) \ | ||
231 | __asm__ __volatile__("vsub.i16 " #regx ", " #regx ", " #regy " \n\t" \ | ||
232 | "vmul.u16 " #regx ", " #regx ", " #rega " \n\t" \ | ||
233 | "vsri.16 " #regx ", " #regx ", #8 \n\t" \ | ||
234 | "vadd.i16 " #regx ", " #regx ", " #regy " \n\t" \ | ||
235 | "vand " #regy ", " #regx ", " #reg255 " \n\t" \ | ||
236 | ::: #regx, #regy ); | ||
237 | |||
238 | /* multiply every channel of regx and regy */ | ||
239 | #define MUL4_SYM_NEON(regx, regy, reg255) \ | ||
240 | __asm__ __volatile__("vmul.u16 " #regx ", " #regx ", " #regy " \n\t" \ | ||
241 | "vadd.i16 " #regx ", " #regx ", " #reg255 " \n\t" \ | ||
242 | "vsri.16 " #regx ", " #regx ", #8 \n\t" \ | ||
243 | "vand " #regx ", " #regx ", " #reg255 " \n\t" \ | ||
244 | ::: #regx ); | ||
245 | |||
246 | #endif | ||
189 | 247 | ||
190 | /* some useful SSE3 inline functions */ | 248 | /* some useful SSE3 inline functions */ |
191 | 249 | ||