summaryrefslogtreecommitdiff
path: root/src/lib/ector
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2018-11-15 12:25:15 +0000
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2018-11-16 00:08:47 +0000
commitb9225fd710baeddd2ab6256e3b7b20e2e47e3633 (patch)
tree22d3a36c950cca4e50064b46c4d9b788fff4a2f2 /src/lib/ector
parent788507961a55670c959ed5aa57e84c818bc0f4a1 (diff)
ector - fix meson build with sse3 on ix86 (32bit)
Diffstat (limited to 'src/lib/ector')
-rw-r--r--src/lib/ector/meson.build5
-rw-r--r--src/lib/ector/software/ector_software_gradient.c272
-rw-r--r--src/lib/ector/software/ector_software_gradient.h63
-rw-r--r--src/lib/ector/software/ector_software_gradient_sse3.c214
-rw-r--r--src/lib/ector/software/meson.build10
5 files changed, 296 insertions, 268 deletions
diff --git a/src/lib/ector/meson.build b/src/lib/ector/meson.build
index 58e4f96d53..33be8d2b1b 100644
--- a/src/lib/ector/meson.build
+++ b/src/lib/ector/meson.build
@@ -2,6 +2,8 @@ ector_deps = [eina, emile, eet, eo, efl]
2ector_pub_deps = [eina, efl] 2ector_pub_deps = [eina, efl]
3 3
4pub_eo_file_target = [] 4pub_eo_file_target = []
5ector_opt_lib = [ ]
6
5 7
6ector_header_src = [ 8ector_header_src = [
7 # nothing for now ector stays only intree 9 # nothing for now ector stays only intree
@@ -76,7 +78,8 @@ ector_lib = library('ector',
76 dependencies: ector_pub_deps + [triangulator, freetype, draw, m] + ector_deps, 78 dependencies: ector_pub_deps + [triangulator, freetype, draw, m] + ector_deps,
77 include_directories : config_dir, 79 include_directories : config_dir,
78 install: true, 80 install: true,
79 version : meson.project_version() 81 version : meson.project_version(),
82 link_with: ector_opt_lib
80) 83)
81 84
82ector = declare_dependency( 85ector = declare_dependency(
diff --git a/src/lib/ector/software/ector_software_gradient.c b/src/lib/ector/software/ector_software_gradient.c
index 7f4e916511..1681d80724 100644
--- a/src/lib/ector/software/ector_software_gradient.c
+++ b/src/lib/ector/software/ector_software_gradient.c
@@ -1,15 +1,9 @@
1#ifdef HAVE_CONFIG_H 1#include "ector_software_gradient.h"
2#include "config.h"
3#endif
4
5#include <assert.h>
6#include <math.h>
7
8#include <software/Ector_Software.h>
9 2
10#include "ector_private.h" 3#ifdef BUILD_SSE3
11#include "ector_software_private.h" 4void _radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, float det, float delta_det, float delta_delta_det, float b, float delta_b);
12#include "draw.h" 5void _linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc);
6#endif
13 7
14#define GRADIENT_STOPTABLE_SIZE 1024 8#define GRADIENT_STOPTABLE_SIZE 1024
15#define FIXPT_BITS 8 9#define FIXPT_BITS 8
@@ -24,262 +18,6 @@ typedef void (*Ector_Linear_Helper_Func)(uint32_t *buffer, int length, Ector_Ren
24static Ector_Radial_Helper_Func _ector_radial_helper; 18static Ector_Radial_Helper_Func _ector_radial_helper;
25static Ector_Linear_Helper_Func _ector_linear_helper; 19static Ector_Linear_Helper_Func _ector_linear_helper;
26 20
27static inline int
28_gradient_clamp(const Ector_Renderer_Software_Gradient_Data *data, int ipos)
29{
30 int limit;
31
32 if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REPEAT)
33 {
34 ipos = ipos % GRADIENT_STOPTABLE_SIZE;
35 ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
36 }
37 else if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REFLECT)
38 {
39 limit = GRADIENT_STOPTABLE_SIZE * 2;
40 ipos = ipos % limit;
41 ipos = ipos < 0 ? limit + ipos : ipos;
42 ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - 1 - ipos : ipos;
43 }
44 else
45 {
46 if (ipos < 0) ipos = 0;
47 else if (ipos >= GRADIENT_STOPTABLE_SIZE)
48 ipos = GRADIENT_STOPTABLE_SIZE-1;
49 }
50 return ipos;
51}
52
53static uint32_t
54_gradient_pixel_fixed(const Ector_Renderer_Software_Gradient_Data *data, int fixed_pos)
55{
56 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
57
58 return data->color_table[_gradient_clamp(data, ipos)];
59}
60
61static inline uint32_t
62_gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos)
63{
64 int ipos = (int)(pos * (GRADIENT_STOPTABLE_SIZE - 1) + (float)(0.5));
65
66 return data->color_table[_gradient_clamp(data, ipos)];
67}
68
69
70#ifdef BUILD_SSE3
71#include <immintrin.h>
72
73#define GRADIENT_STOPTABLE_SIZE_SHIFT 10
74typedef union { __m128i v; int i[4];} vec4_i;
75typedef union { __m128 v; float f[4];} vec4_f;
76
77#define FETCH_CLAMP_INIT_F \
78 __m128 v_min = _mm_set1_ps(0.0f); \
79 __m128 v_max = _mm_set1_ps((float)(GRADIENT_STOPTABLE_SIZE-1)); \
80 __m128 v_halff = _mm_set1_ps(0.5f); \
81 __m128i v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); \
82 __m128i v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); \
83 __m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
84
85#define FETCH_CLAMP_REPEAT_F \
86 vec4_i index_vec; \
87 index_vec.v = _mm_and_si128(v_repeat_mask, _mm_cvttps_epi32(v_index));
88
89#define FETCH_CLAMP_REFLECT_F \
90 vec4_i index_vec; \
91 __m128i v_index_i = _mm_and_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \
92 __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
93 index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
94
95#define FETCH_CLAMP_PAD_F \
96 vec4_i index_vec; \
97 index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index)));
98
99#define FETCH_EPILOGUE_CPY \
100 *buffer++ = g_data->color_table[index_vec.i[0]]; \
101 *buffer++ = g_data->color_table[index_vec.i[1]]; \
102 *buffer++ = g_data->color_table[index_vec.i[2]]; \
103 *buffer++ = g_data->color_table[index_vec.i[3]]; \
104}
105
106static void
107loop_break(unsigned int *buffer, int length, int *lprealign, int *lby4 , int *lremaining)
108{
109 int l1=0, l2=0, l3=0;
110
111 while ((uintptr_t)buffer & 0xF)
112 buffer++ , l1++;
113
114 if(length <= l1)
115 {
116 l1 = length;
117 }
118 else
119 {
120 l3 = (length - l1) % 4;
121 l2 = length - l1 - l3 ;
122 }
123
124 *lprealign = l1;
125 *lby4 = l2;
126 *lremaining = l3;
127}
128
129static void
130_radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data,
131 float det, float delta_det, float delta_delta_det, float b, float delta_b)
132{
133 int lprealign, lby4, lremaining, i;
134 vec4_f det_vec;
135 vec4_f delta_det4_vec;
136 vec4_f b_vec;
137 __m128 v_delta_delta_det16;
138 __m128 v_delta_delta_det6;
139 __m128 v_delta_b4;
140
141 loop_break(buffer, length, &lprealign, &lby4, &lremaining);
142
143 // prealign loop
144 for (i = 0 ; i < lprealign ; i++)
145 {
146 *buffer++ = _gradient_pixel(g_data, sqrt(det) - b);
147 det += delta_det;
148 delta_det += delta_delta_det;
149 b += delta_b;
150 }
151
152 // lby4 16byte align loop
153 for (i = 0; i < 4; ++i)
154 {
155 det_vec.f[i] = det;
156 delta_det4_vec.f[i] = 4 * delta_det;
157 b_vec.f[i] = b;
158
159 det += delta_det;
160 delta_det += delta_delta_det;
161 b += delta_b;
162 }
163
164 v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det);
165 v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det);
166 v_delta_b4 = _mm_set1_ps(4 * delta_b);
167
168#define FETCH_RADIAL_PROLOGUE \
169 for (i = 0 ; i < lby4 ; i+=4) { \
170 __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \
171 __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \
172 det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
173 delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \
174 b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4);
175
176#define FETCH_RADIAL_LOOP(FETCH_CLAMP) \
177 FETCH_RADIAL_PROLOGUE; \
178 FETCH_CLAMP; \
179 FETCH_EPILOGUE_CPY;
180
181 FETCH_CLAMP_INIT_F;
182 switch (g_data->gd->s)
183 {
184 case EFL_GFX_GRADIENT_SPREAD_REPEAT:
185 FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F);
186 break;
187 case EFL_GFX_GRADIENT_SPREAD_REFLECT:
188 FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F);
189 break;
190 default:
191 FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F);
192 break;
193 }
194
195 // remaining loop
196 for (i = 0 ; i < lremaining ; i++)
197 *buffer++ = _gradient_pixel(g_data, sqrt(det_vec.f[i]) - b_vec.f[i]);
198}
199
200static void
201_linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc)
202{
203 int lprealign, lby4, lremaining, i;
204 vec4_i t_vec;
205 __m128i v_inc;
206 __m128i v_fxtpt_size;
207 __m128i v_min;
208 __m128i v_max;
209 __m128i v_repeat_mask;
210 __m128i v_reflect_mask;
211 __m128i v_reflect_limit;
212
213 loop_break(buffer, length, &lprealign, &lby4, &lremaining);
214
215 // prealign loop
216 for (i = 0 ; i < lprealign ; i++)
217 {
218 *buffer++ = _gradient_pixel_fixed(g_data, t);
219 t += inc;
220 }
221
222 // lby4 16byte align loop
223 for (i = 0; i < 4; ++i)
224 {
225 t_vec.i[i] = t;
226 t += inc;
227 }
228
229 v_inc = _mm_set1_epi32(4 * inc);
230 v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5);
231
232 v_min = _mm_set1_epi32(0);
233 v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE - 1));
234
235 v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT));
236 v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT + 1)));
237
238 v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
239
240#define FETCH_LINEAR_LOOP_PROLOGUE \
241 for (i = 0 ; i < lby4 ; i+=4) { \
242 vec4_i index_vec; \
243 __m128i v_index; \
244 v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \
245 t_vec.v = _mm_add_epi32(t_vec.v, v_inc);
246
247#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \
248 index_vec.v = _mm_and_si128(v_repeat_mask, v_index);
249
250#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \
251 __m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \
252 __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
253 index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
254
255#define FETCH_LINEAR_LOOP_CLAMP_PAD \
256 index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index));
257
258#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \
259 FETCH_LINEAR_LOOP_PROLOGUE; \
260 FETCH_LINEAR_LOOP_CLAMP; \
261 FETCH_EPILOGUE_CPY;
262
263 switch (g_data->gd->s)
264 {
265 case EFL_GFX_GRADIENT_SPREAD_REPEAT:
266 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT);
267 break;
268 case EFL_GFX_GRADIENT_SPREAD_REFLECT:
269 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT);
270 break;
271 default:
272 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD);
273 break;
274 }
275
276 // remaining loop
277 for (i = 0 ; i < lremaining ; i++)
278 *buffer++ = _gradient_pixel_fixed(g_data, t_vec.i[i]);
279}
280
281#endif
282
283static void 21static void
284_update_color_table(void *data, Ector_Software_Thread *t EINA_UNUSED) 22_update_color_table(void *data, Ector_Software_Thread *t EINA_UNUSED)
285{ 23{
diff --git a/src/lib/ector/software/ector_software_gradient.h b/src/lib/ector/software/ector_software_gradient.h
new file mode 100644
index 0000000000..2cbf2f99d7
--- /dev/null
+++ b/src/lib/ector/software/ector_software_gradient.h
@@ -0,0 +1,63 @@
1#ifndef ECTOR_SOFTWARE_GRADIENT_H
2# define ECTOR_SOFTWARE_GRADIENT_H
3
4#ifdef HAVE_CONFIG_H
5#include "config.h"
6#endif
7
8#include <assert.h>
9#include <math.h>
10
11#include <software/Ector_Software.h>
12
13#include "ector_private.h"
14#include "ector_software_private.h"
15#include "draw.h"
16
17#define GRADIENT_STOPTABLE_SIZE 1024
18#define FIXPT_BITS 8
19#define FIXPT_SIZE (1<<FIXPT_BITS)
20
21static inline int
22_gradient_clamp(const Ector_Renderer_Software_Gradient_Data *data, int ipos)
23{
24 int limit;
25
26 if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REPEAT)
27 {
28 ipos = ipos % GRADIENT_STOPTABLE_SIZE;
29 ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
30 }
31 else if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REFLECT)
32 {
33 limit = GRADIENT_STOPTABLE_SIZE * 2;
34 ipos = ipos % limit;
35 ipos = ipos < 0 ? limit + ipos : ipos;
36 ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - 1 - ipos : ipos;
37 }
38 else
39 {
40 if (ipos < 0) ipos = 0;
41 else if (ipos >= GRADIENT_STOPTABLE_SIZE)
42 ipos = GRADIENT_STOPTABLE_SIZE-1;
43 }
44 return ipos;
45}
46
47static inline uint32_t
48_gradient_pixel_fixed(const Ector_Renderer_Software_Gradient_Data *data, int fixed_pos)
49{
50 int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
51
52 return data->color_table[_gradient_clamp(data, ipos)];
53}
54
55static inline uint32_t
56_gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos)
57{
58 int ipos = (int)(pos * (GRADIENT_STOPTABLE_SIZE - 1) + (float)(0.5));
59
60 return data->color_table[_gradient_clamp(data, ipos)];
61}
62
63#endif
diff --git a/src/lib/ector/software/ector_software_gradient_sse3.c b/src/lib/ector/software/ector_software_gradient_sse3.c
new file mode 100644
index 0000000000..883bdbf37e
--- /dev/null
+++ b/src/lib/ector/software/ector_software_gradient_sse3.c
@@ -0,0 +1,214 @@
1#include "ector_software_gradient.h"
2
3#ifdef BUILD_SSE3
4#include <immintrin.h>
5
6#define GRADIENT_STOPTABLE_SIZE_SHIFT 10
7typedef union { __m128i v; int i[4];} vec4_i;
8typedef union { __m128 v; float f[4];} vec4_f;
9
10#define FETCH_CLAMP_INIT_F \
11 __m128 v_min = _mm_set1_ps(0.0f); \
12 __m128 v_max = _mm_set1_ps((float)(GRADIENT_STOPTABLE_SIZE-1)); \
13 __m128 v_halff = _mm_set1_ps(0.5f); \
14 __m128i v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); \
15 __m128i v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); \
16 __m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
17
18#define FETCH_CLAMP_REPEAT_F \
19 vec4_i index_vec; \
20 index_vec.v = _mm_and_si128(v_repeat_mask, _mm_cvttps_epi32(v_index));
21
22#define FETCH_CLAMP_REFLECT_F \
23 vec4_i index_vec; \
24 __m128i v_index_i = _mm_and_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \
25 __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
26 index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
27
28#define FETCH_CLAMP_PAD_F \
29 vec4_i index_vec; \
30 index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index)));
31
32#define FETCH_EPILOGUE_CPY \
33 *buffer++ = g_data->color_table[index_vec.i[0]]; \
34 *buffer++ = g_data->color_table[index_vec.i[1]]; \
35 *buffer++ = g_data->color_table[index_vec.i[2]]; \
36 *buffer++ = g_data->color_table[index_vec.i[3]]; \
37}
38
39static void
40loop_break(unsigned int *buffer, int length, int *lprealign, int *lby4 , int *lremaining)
41{
42 int l1=0, l2=0, l3=0;
43
44 while ((uintptr_t)buffer & 0xF)
45 buffer++ , l1++;
46
47 if(length <= l1)
48 {
49 l1 = length;
50 }
51 else
52 {
53 l3 = (length - l1) % 4;
54 l2 = length - l1 - l3 ;
55 }
56
57 *lprealign = l1;
58 *lby4 = l2;
59 *lremaining = l3;
60}
61
62void
63_radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data,
64 float det, float delta_det, float delta_delta_det, float b, float delta_b)
65{
66 int lprealign, lby4, lremaining, i;
67 vec4_f det_vec;
68 vec4_f delta_det4_vec;
69 vec4_f b_vec;
70 __m128 v_delta_delta_det16;
71 __m128 v_delta_delta_det6;
72 __m128 v_delta_b4;
73
74 loop_break(buffer, length, &lprealign, &lby4, &lremaining);
75
76 // prealign loop
77 for (i = 0 ; i < lprealign ; i++)
78 {
79 *buffer++ = _gradient_pixel(g_data, sqrt(det) - b);
80 det += delta_det;
81 delta_det += delta_delta_det;
82 b += delta_b;
83 }
84
85 // lby4 16byte align loop
86 for (i = 0; i < 4; ++i)
87 {
88 det_vec.f[i] = det;
89 delta_det4_vec.f[i] = 4 * delta_det;
90 b_vec.f[i] = b;
91
92 det += delta_det;
93 delta_det += delta_delta_det;
94 b += delta_b;
95 }
96
97 v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det);
98 v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det);
99 v_delta_b4 = _mm_set1_ps(4 * delta_b);
100
101#define FETCH_RADIAL_PROLOGUE \
102 for (i = 0 ; i < lby4 ; i+=4) { \
103 __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \
104 __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \
105 det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
106 delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \
107 b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4);
108
109#define FETCH_RADIAL_LOOP(FETCH_CLAMP) \
110 FETCH_RADIAL_PROLOGUE; \
111 FETCH_CLAMP; \
112 FETCH_EPILOGUE_CPY;
113
114 FETCH_CLAMP_INIT_F;
115 switch (g_data->gd->s)
116 {
117 case EFL_GFX_GRADIENT_SPREAD_REPEAT:
118 FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F);
119 break;
120 case EFL_GFX_GRADIENT_SPREAD_REFLECT:
121 FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F);
122 break;
123 default:
124 FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F);
125 break;
126 }
127
128 // remaining loop
129 for (i = 0 ; i < lremaining ; i++)
130 *buffer++ = _gradient_pixel(g_data, sqrt(det_vec.f[i]) - b_vec.f[i]);
131}
132
133void
134_linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc)
135{
136 int lprealign, lby4, lremaining, i;
137 vec4_i t_vec;
138 __m128i v_inc;
139 __m128i v_fxtpt_size;
140 __m128i v_min;
141 __m128i v_max;
142 __m128i v_repeat_mask;
143 __m128i v_reflect_mask;
144 __m128i v_reflect_limit;
145
146 loop_break(buffer, length, &lprealign, &lby4, &lremaining);
147
148 // prealign loop
149 for (i = 0 ; i < lprealign ; i++)
150 {
151 *buffer++ = _gradient_pixel_fixed(g_data, t);
152 t += inc;
153 }
154
155 // lby4 16byte align loop
156 for (i = 0; i < 4; ++i)
157 {
158 t_vec.i[i] = t;
159 t += inc;
160 }
161
162 v_inc = _mm_set1_epi32(4 * inc);
163 v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5);
164
165 v_min = _mm_set1_epi32(0);
166 v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE - 1));
167
168 v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT));
169 v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT + 1)));
170
171 v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
172
173#define FETCH_LINEAR_LOOP_PROLOGUE \
174 for (i = 0 ; i < lby4 ; i+=4) { \
175 vec4_i index_vec; \
176 __m128i v_index; \
177 v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \
178 t_vec.v = _mm_add_epi32(t_vec.v, v_inc);
179
180#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \
181 index_vec.v = _mm_and_si128(v_repeat_mask, v_index);
182
183#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \
184 __m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \
185 __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
186 index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
187
188#define FETCH_LINEAR_LOOP_CLAMP_PAD \
189 index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index));
190
191#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \
192 FETCH_LINEAR_LOOP_PROLOGUE; \
193 FETCH_LINEAR_LOOP_CLAMP; \
194 FETCH_EPILOGUE_CPY;
195
196 switch (g_data->gd->s)
197 {
198 case EFL_GFX_GRADIENT_SPREAD_REPEAT:
199 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT);
200 break;
201 case EFL_GFX_GRADIENT_SPREAD_REFLECT:
202 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT);
203 break;
204 default:
205 FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD);
206 break;
207 }
208
209 // remaining loop
210 for (i = 0 ; i < lremaining ; i++)
211 *buffer++ = _gradient_pixel_fixed(g_data, t_vec.i[i]);
212}
213
214#endif
diff --git a/src/lib/ector/software/meson.build b/src/lib/ector/software/meson.build
index 5e5dc599d7..b0e710c416 100644
--- a/src/lib/ector/software/meson.build
+++ b/src/lib/ector/software/meson.build
@@ -9,6 +9,16 @@ ector_src += files([
9 'ector_software_buffer.c', 9 'ector_software_buffer.c',
10]) 10])
11 11
12if cpu_sse3 == true
13 ector_opt = static_library('ector_opt',
14 sources: [ 'ector_software_gradient_sse3.c' ],
15 dependencies: ector_pub_deps + [triangulator, freetype, draw, m] + ector_deps,
16 include_directories: config_dir + [ include_directories('..') ],
17 c_args: ector_opt_c_args,
18 )
19 ector_opt_lib += [ ector_opt ]
20endif
21
12pub_eo_files = [ 22pub_eo_files = [
13 'ector_software_surface.eo', 23 'ector_software_surface.eo',
14 'ector_software_buffer.eo', 24 'ector_software_buffer.eo',