summaryrefslogtreecommitdiff
path: root/src/lib/evas/common/evas_convert_rgb_32.c
diff options
context:
space:
mode:
authorSnacker (Vladimir) <wsnacker@mail.ru>2014-02-28 07:04:52 +0900
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2014-02-28 07:04:52 +0900
commit8e17290f1903d6ec51af517e4ab67d6e959a1843 (patch)
tree9e296678fb85202ed9dd17199d8ab5b842b778fa /src/lib/evas/common/evas_convert_rgb_32.c
parent30d7779178d8b961004dabae6b0ed682f6889e97 (diff)
@feature - Apply NEON intrisics improvement to rotation
Diffstat (limited to 'src/lib/evas/common/evas_convert_rgb_32.c')
-rw-r--r--src/lib/evas/common/evas_convert_rgb_32.c153
1 files changed, 124 insertions, 29 deletions
diff --git a/src/lib/evas/common/evas_convert_rgb_32.c b/src/lib/evas/common/evas_convert_rgb_32.c
index 11c47e26b0..aae9d37e12 100644
--- a/src/lib/evas/common/evas_convert_rgb_32.c
+++ b/src/lib/evas/common/evas_convert_rgb_32.c
@@ -1,5 +1,8 @@
1#include "evas_common_private.h" 1#include "evas_common_private.h"
2#include "evas_convert_rgb_32.h" 2#include "evas_convert_rgb_32.h"
3#ifdef BUILD_NEON
4#include <arm_neon.h>
5#endif
3 6
4void 7void
5evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED) 8evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
@@ -41,51 +44,143 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
41} 44}
42 45
43#ifdef TILE_ROTATE 46#ifdef TILE_ROTATE
47#ifdef BUILD_NEON
48#define ROT90_QUAD_COPY_LOOP \
49 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
50 { \
51 if((w%4) == 0) \
52 { \
53 int klght = 4 * src_stride; \
54 for(y = 0; y < h; y++) \
55 { \
56 const pix_type *s = &(src[(h - y - 1)]); \
57 pix_type *d = &(dst[(dst_stride * y)]); \
58 pix_type *ptr1 = s; \
59 pix_type *ptr2 = ptr1 + src_stride; \
60 pix_type *ptr3 = ptr2 + src_stride; \
61 pix_type *ptr4 = ptr3 + src_stride; \
62 for(x = 0; x < w; x+=4) \
63 { \
64 pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
65 vst1q_s32(d, vld1q_s32(s_array)); \
66 d += 4; \
67 ptr1 += klght; \
68 ptr2 += klght; \
69 ptr3 += klght; \
70 ptr4 += klght; \
71 } \
72 } \
73 } \
74 else \
75 { \
76 for (y = 0; y < h; y++) \
77 { \
78 const pix_type *s = &(src[(h - y - 1)]); \
79 pix_type *d = &(dst[(dst_stride * y)]); \
80 for (x = 0; x < w; x++) \
81 { \
82 *d++ = *s; \
83 s += src_stride; \
84 } \
85 } \
86 } \
87 } \
88 else
89#define ROT270_QUAD_COPY_LOOP \
90 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
91 if((w%4) == 0) \
92 { \
93 int klght = 4 * src_stride; \
94 for(y = 0; y < h; y++) \
95 { \
96 const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
97 pix_type *d = &(dst[(dst_stride * y)]); \
98 pix_type *ptr1 = s; \
99 pix_type *ptr2 = ptr1 + src_stride; \
100 pix_type *ptr3 = ptr2 + src_stride; \
101 pix_type *ptr4 = ptr3 + src_stride; \
102 for(x = 0; x < w; x+=4) \
103 { \
104 pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
105 vst1q_s32(d, vld1q_s32(s_array)); \
106 d += 4; \
107 ptr1 += klght; \
108 ptr2 += klght; \
109 ptr3 += klght; \
110 ptr4 += klght; \
111 } \
112 } \
113 } \
114 else \
115 { \
116 for (y = 0; y < h; y++) \
117 { \
118 const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
119 pix_type *d = &(dst[(dst_stride * y)]); \
120 for (x = 0; x < w; x++) \
121 { \
122 *d++ = *s; \
123 s += src_stride; \
124 } \
125 } \
126 } \
127 } \
128 else
129#else
130#define ROT90_QUAD_COPY_LOOP
131#define ROT270_QUAD_COPY_LOOP
132#endif
44#define FAST_SIMPLE_ROTATE(suffix, pix_type) \ 133#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
45 static void \ 134 static void \
46 blt_rotated_90_trivial_##suffix(pix_type *dst, \ 135 blt_rotated_90_trivial_##suffix(pix_type * restrict dst, \
47 int dst_stride, \ 136 int dst_stride, \
48 const pix_type *src, \ 137 const pix_type * restrict src, \
49 int src_stride, \ 138 int src_stride, \
50 int w, \ 139 int w, \
51 int h) \ 140 int h) \
52 { \ 141 { \
53 int x, y; \ 142 int x, y; \
54 for (y = 0; y < h; y++) \ 143 ROT90_QUAD_COPY_LOOP \
55 { \ 144 { \
56 const pix_type *s = src + (h - y - 1); \ 145 for (y = 0; y < h; y++) \
57 pix_type *d = dst + (dst_stride * y); \ 146 { \
58 for (x = 0; x < w; x++) \ 147 const pix_type *s = &(src[(h - y - 1)]); \
59 { \ 148 pix_type *d = &(dst[(dst_stride * y)]); \
60 *d++ = *s; \ 149 for (x = 0; x < w; x++) \
61 s += src_stride; \ 150 { \
62 } \ 151 *d++ = *s; \
63 } \ 152 s += src_stride; \
153 } \
154 } \
155 } \
64 } \ 156 } \
65 static void \ 157 static void \
66 blt_rotated_270_trivial_##suffix(pix_type *dst, \ 158 blt_rotated_270_trivial_##suffix(pix_type * restrict dst, \
67 int dst_stride, \ 159 int dst_stride, \
68 const pix_type *src, \ 160 const pix_type * restrict src, \
69 int src_stride, \ 161 int src_stride, \
70 int w, \ 162 int w, \
71 int h) \ 163 int h) \
72 { \ 164 { \
73 int x, y; \ 165 int x, y; \
74 for (y = 0; y < h; y++) \ 166 ROT270_QUAD_COPY_LOOP \
167 { \
168 for(y = 0; y < h; y++) \
75 { \ 169 { \
76 const pix_type *s = src + (src_stride * (w - 1)) + y; \ 170 const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
77 pix_type *d = dst + (dst_stride * y); \ 171 pix_type *d = &(dst[(dst_stride * y)]); \
78 for (x = 0; x < w; x++) \ 172 for (x = 0; x < w; x++) \
79 { \ 173 { \
80 *d++ = *s; \ 174 *d++ = *s; \
81 s -= src_stride; \ 175 s -= src_stride; \
82 } \ 176 } \
83 } \ 177 } \
178 } \
84 } \ 179 } \
85 static void \ 180 static void \
86 blt_rotated_90_##suffix(pix_type *dst, \ 181 blt_rotated_90_##suffix(pix_type * restrict dst, \
87 int dst_stride, \ 182 int dst_stride, \
88 const pix_type *src, \ 183 const pix_type * restrict src, \
89 int src_stride, \ 184 int src_stride, \
90 int w, \ 185 int w, \
91 int h) \ 186 int h) \
@@ -120,7 +215,7 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
120 { \ 215 { \
121 blt_rotated_90_trivial_##suffix(dst + x, \ 216 blt_rotated_90_trivial_##suffix(dst + x, \
122 dst_stride, \ 217 dst_stride, \
123 src + (src_stride * x), \ 218 &(src[(src_stride * x)]), \
124 src_stride, \ 219 src_stride, \
125 TILE_SIZE, \ 220 TILE_SIZE, \
126 h); \ 221 h); \
@@ -128,15 +223,15 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
128 if (trailing_pixels) \ 223 if (trailing_pixels) \
129 blt_rotated_90_trivial_##suffix(dst + w, \ 224 blt_rotated_90_trivial_##suffix(dst + w, \
130 dst_stride, \ 225 dst_stride, \
131 src + (w * src_stride), \ 226 &(src[(w * src_stride)]), \
132 src_stride, \ 227 src_stride, \
133 trailing_pixels, \ 228 trailing_pixels, \
134 h); \ 229 h); \
135 } \ 230 } \
136 static void \ 231 static void \
137 blt_rotated_270_##suffix(pix_type *dst, \ 232 blt_rotated_270_##suffix(pix_type * restrict dst, \
138 int dst_stride, \ 233 int dst_stride, \
139 const pix_type *src, \ 234 const pix_type * restrict src, \
140 int src_stride, \ 235 int src_stride, \
141 int w, \ 236 int w, \
142 int h) \ 237 int h) \
@@ -151,7 +246,7 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
151 leading_pixels = w; \ 246 leading_pixels = w; \
152 blt_rotated_270_trivial_##suffix(dst, \ 247 blt_rotated_270_trivial_##suffix(dst, \
153 dst_stride, \ 248 dst_stride, \
154 src + (src_stride * (w - leading_pixels)), \ 249 &(src[(src_stride * (w - leading_pixels))]), \
155 src_stride, \ 250 src_stride, \
156 leading_pixels, \ 251 leading_pixels, \
157 h); \ 252 h); \
@@ -171,7 +266,7 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
171 { \ 266 { \
172 blt_rotated_270_trivial_##suffix(dst + x, \ 267 blt_rotated_270_trivial_##suffix(dst + x, \
173 dst_stride, \ 268 dst_stride, \
174 src + (src_stride * (w - x - TILE_SIZE)), \ 269 &(src[(src_stride * (w - x - TILE_SIZE))]), \
175 src_stride, \ 270 src_stride, \
176 TILE_SIZE, \ 271 TILE_SIZE, \
177 h); \ 272 h); \