diff --git a/src/lib/ector/software/ector_software_gradient.c b/src/lib/ector/software/ector_software_gradient.c index 4a978e02cb..a68f77767e 100644 --- a/src/lib/ector/software/ector_software_gradient.c +++ b/src/lib/ector/software/ector_software_gradient.c @@ -16,19 +16,20 @@ #define FIXPT_BITS 8 #define FIXPT_SIZE (1<gd->s == EFL_GFX_GRADIENT_SPREAD_REPEAT) { ipos = ipos % GRADIENT_STOPTABLE_SIZE; @@ -50,11 +51,11 @@ _gradient_clamp(const Ector_Renderer_Software_Gradient_Data *data, int ipos) return ipos; } - static uint _gradient_pixel_fixed(const Ector_Renderer_Software_Gradient_Data *data, int fixed_pos) { int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS; + return data->color_table[_gradient_clamp(data, ipos)]; } @@ -62,6 +63,7 @@ static inline uint _gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos) { int ipos = (int)(pos * (GRADIENT_STOPTABLE_SIZE - 1) + (float)(0.5)); + return data->color_table[_gradient_clamp(data, ipos)]; } @@ -70,8 +72,8 @@ _gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos) #include #define GRADIENT_STOPTABLE_SIZE_SHIFT 10 -typedef union{ __m128i v; int i[4];}vec4_i; -typedef union{ __m128 v; float f[4];}vec4_f; +typedef union { __m128i v; int i[4];} vec4_i; +typedef union { __m128 v; float f[4];} vec4_f; #define FETCH_CLAMP_INIT_F \ __m128 v_min = _mm_set1_ps(0.0f); \ @@ -95,7 +97,6 @@ typedef union{ __m128 v; float f[4];}vec4_f; vec4_i index_vec; \ index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index))); - #define FETCH_EPILOGUE_CPY \ *buffer++ = g_data->color_table[index_vec.i[0]]; \ *buffer++ = g_data->color_table[index_vec.i[1]]; \ @@ -103,31 +104,43 @@ typedef union{ __m128 v; float f[4];}vec4_f; *buffer++ = g_data->color_table[index_vec.i[3]]; \ } -static void +static void loop_break(unsigned int *buffer, int length, int *lprealign, int *lby4 , int *lremaining) { - int l1=0,l2=0,l3=0; + int l1=0, l2=0, l3=0; + while ((uintptr_t)buffer & 0xF) buffer++ , l1++; if(length <= l1) - l1 = length; + { + l1 = length; + } else { - l3 = (length - l1)%4; + l3 = (length - l1) % 4; l2 = length - l1 - l3 ; } + *lprealign = l1; *lby4 = l2; *lremaining = l3; } -static void +static void _radial_helper_sse3(uint *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, float det, float delta_det, float delta_delta_det, float b, float delta_b) { int lprealign, lby4, lremaining, i; + vec4_f det_vec; + vec4_f delta_det4_vec; + vec4_f b_vec; + __m128 v_delta_delta_det16; + __m128 v_delta_delta_det6; + __m128 v_delta_b4; + loop_break(buffer, length, &lprealign, &lby4, &lremaining); + // prealign loop for (i = 0 ; i < lprealign ; i++) { @@ -138,10 +151,6 @@ _radial_helper_sse3(uint *buffer, int length, Ector_Renderer_Software_Gradient_D } // lby4 16byte align loop - vec4_f det_vec; - vec4_f delta_det4_vec; - vec4_f b_vec; - for (i = 0; i < 4; ++i) { det_vec.f[i] = det; @@ -153,37 +162,36 @@ _radial_helper_sse3(uint *buffer, int length, Ector_Renderer_Software_Gradient_D b += delta_b; } - __m128 v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det); - __m128 v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det); - __m128 v_delta_b4 = _mm_set1_ps(4 * delta_b); - -#define FETCH_RADIAL_PROLOGUE \ - for (i = 0 ; i < lby4 ; i+=4) { \ - __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \ - __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \ - det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \ - delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \ - b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4); + v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det); + v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det); + v_delta_b4 = _mm_set1_ps(4 * delta_b); +#define FETCH_RADIAL_PROLOGUE \ + for (i = 0 ; i < lby4 ; i+=4) { \ + __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \ + __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \ + det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \ + delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \ + b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4); #define FETCH_RADIAL_LOOP(FETCH_CLAMP) \ - FETCH_RADIAL_PROLOGUE \ - FETCH_CLAMP \ - FETCH_EPILOGUE_CPY + FETCH_RADIAL_PROLOGUE; \ + FETCH_CLAMP; \ + FETCH_EPILOGUE_CPY; - FETCH_CLAMP_INIT_F - switch (g_data->gd->s) - { - case EFL_GFX_GRADIENT_SPREAD_REPEAT: - FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F) - break; - case EFL_GFX_GRADIENT_SPREAD_REFLECT: - FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F) - break; - default: - FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F) - break; - } + FETCH_CLAMP_INIT_F; + switch (g_data->gd->s) + { + case EFL_GFX_GRADIENT_SPREAD_REPEAT: + FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F); + break; + case EFL_GFX_GRADIENT_SPREAD_REFLECT: + FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F); + break; + default: + FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F); + break; + } // remaining loop for (i = 0 ; i < lremaining ; i++) @@ -194,7 +202,17 @@ static void _linear_helper_sse3(uint *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc) { int lprealign, lby4, lremaining, i; - loop_break(buffer, length, &lprealign, &lby4, &lremaining); + vec4_i t_vec; + __m128i v_inc; + __m128i v_fxtpt_size; + __m128i v_min; + __m128i v_max; + __m128i v_repeat_mask; + __m128i v_reflect_mask; + __m128i v_reflect_limit; + + loop_break(buffer, length, &lprealign, &lby4, &lremaining); + // prealign loop for (i = 0 ; i < lprealign ; i++) { @@ -203,61 +221,58 @@ _linear_helper_sse3(uint *buffer, int length, Ector_Renderer_Software_Gradient_D } // lby4 16byte align loop - vec4_i t_vec; for (i = 0; i < 4; ++i) { t_vec.i[i] = t; t += inc; } - __m128i v_inc = _mm_set1_epi32(4 * inc); - __m128i v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5); + v_inc = _mm_set1_epi32(4 * inc); + v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5); - __m128i v_min = _mm_set1_epi32(0); - __m128i v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE-1)); + v_min = _mm_set1_epi32(0); + v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE - 1)); - __m128i v_repeat_mask = _mm_set1_epi32(~((uint)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); - __m128i v_reflect_mask = _mm_set1_epi32(~((uint)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); + v_repeat_mask = _mm_set1_epi32(~((uint)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); + v_reflect_mask = _mm_set1_epi32(~((uint)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT + 1))); - __m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1); + v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1); -#define FETCH_LINEAR_LOOP_PROLOGUE \ - for (i = 0 ; i < lby4 ; i+=4) { \ - vec4_i index_vec;\ - __m128i v_index;\ - v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \ - t_vec.v = _mm_add_epi32(t_vec.v, v_inc); +#define FETCH_LINEAR_LOOP_PROLOGUE \ + for (i = 0 ; i < lby4 ; i+=4) { \ + vec4_i index_vec; \ + __m128i v_index; \ + v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \ + t_vec.v = _mm_add_epi32(t_vec.v, v_inc); -#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \ - index_vec.v = _mm_and_si128(v_repeat_mask, v_index); +#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \ + index_vec.v = _mm_and_si128(v_repeat_mask, v_index); -#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \ - __m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \ - __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \ - index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv); +#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \ + __m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \ + __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \ + index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv); -#define FETCH_LINEAR_LOOP_CLAMP_PAD \ - index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index)); +#define FETCH_LINEAR_LOOP_CLAMP_PAD \ + index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index)); +#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \ + FETCH_LINEAR_LOOP_PROLOGUE; \ + FETCH_LINEAR_LOOP_CLAMP; \ + FETCH_EPILOGUE_CPY; - -#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \ - FETCH_LINEAR_LOOP_PROLOGUE \ - FETCH_LINEAR_LOOP_CLAMP \ - FETCH_EPILOGUE_CPY - - switch (g_data->gd->s) - { + switch (g_data->gd->s) + { case EFL_GFX_GRADIENT_SPREAD_REPEAT: - FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT) - break; + FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT); + break; case EFL_GFX_GRADIENT_SPREAD_REFLECT: - FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT) - break; + FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT); + break; default: - FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD) - break; - } + FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD); + break; + } // remaining loop for (i = 0 ; i < lremaining ; i++) @@ -282,6 +297,7 @@ _generate_gradient_color_table(Efl_Gfx_Gradient_Stop *gradient_stops, int stop_c Efl_Gfx_Gradient_Stop *curr, *next; uint current_color, next_color; double delta, t, incr, fpos; + assert(stop_count > 0); curr = gradient_stops; @@ -301,12 +317,14 @@ _generate_gradient_color_table(Efl_Gfx_Gradient_Stop *gradient_stops, int stop_c for (i = 0; i < stop_count - 1; ++i) { + BLEND_FUNC func; + curr = (gradient_stops + i); next = (gradient_stops + i + 1); delta = 1/(next->offset - curr->offset); if (next->a != 255) alpha = EINA_TRUE; next_color = ECTOR_ARGB_JOIN(next->a, next->r, next->g, next->b); - BLEND_FUNC func = &_ease_linear; + func = &_ease_linear; while (fpos < next->offset && pos < size) { t = func((fpos - curr->offset) * delta); @@ -334,7 +352,8 @@ update_color_table(Ector_Renderer_Software_Gradient_Data *gdata) if (gdata->color_table) return; gdata->color_table = malloc(GRADIENT_STOPTABLE_SIZE * 4); - gdata->alpha = _generate_gradient_color_table(gdata->gd->colors, gdata->gd->colors_count, gdata->color_table, GRADIENT_STOPTABLE_SIZE); + gdata->alpha = _generate_gradient_color_table(gdata->gd->colors, gdata->gd->colors_count, + gdata->color_table, GRADIENT_STOPTABLE_SIZE); } void @@ -352,6 +371,7 @@ _linear_helper_generic(uint *buffer, int length, Ector_Renderer_Software_Gradien int t_fixed, int inc_fixed) { int i; + for (i = 0 ; i < length ; i++) { *buffer++ = _gradient_pixel_fixed(g_data, t_fixed); @@ -366,6 +386,7 @@ fetch_linear_gradient(uint *buffer, Span_Data *data, int y, int x, int length) float t, inc, rx=0, ry=0; uint *end; int t_fixed, inc_fixed; + if (g_data->linear.l == 0) { t = inc = 0; @@ -394,7 +415,7 @@ fetch_linear_gradient(uint *buffer, Span_Data *data, int y, int x, int length) // we can use fixed point math t_fixed = (int)(t * FIXPT_SIZE); inc_fixed = (int)(inc * FIXPT_SIZE); - linear_helper(buffer, length, g_data, t_fixed, inc_fixed); + _ector_linear_helper(buffer, length, g_data, t_fixed, inc_fixed); } else { @@ -415,6 +436,7 @@ _radial_helper_generic(uint *buffer, int length, Ector_Renderer_Software_Gradien float delta_det, float delta_delta_det, float b, float delta_b) { int i; + for (i = 0 ; i < length ; i++) { *buffer++ = _gradient_pixel(g_data, sqrt(det) - b); @@ -431,6 +453,7 @@ fetch_radial_gradient(uint *buffer, Span_Data *data, int y, int x, int length) float rx, ry, inv_a, delta_rx, delta_ry, b, delta_b, b_delta_b, delta_b_delta_b, bb, delta_bb, rxrxryry, delta_rxrxryry, rx_plus_ry, delta_rx_plus_ry, det, delta_det, delta_delta_det; + // avoid division by zero if (fabsf(g_data->radial.a) <= 0.00001f) { @@ -470,20 +493,20 @@ fetch_radial_gradient(uint *buffer, Span_Data *data, int y, int x, int length) delta_det = (b_delta_b + delta_bb + 4 * g_data->radial.a * (rx_plus_ry + delta_rxrxryry)) * inv_a; delta_delta_det = (delta_b_delta_b + 4 * g_data->radial.a * delta_rx_plus_ry) * inv_a; - radial_helper(buffer, length, g_data, det, delta_det, delta_delta_det, b, delta_b); + _ector_radial_helper(buffer, length, g_data, det, delta_det, delta_delta_det, b, delta_b); } void drawhelper_gradient_init() { - radial_helper = _radial_helper_generic; - linear_helper = _linear_helper_generic; - #ifdef BUILD_SSE3 + _ector_radial_helper = _radial_helper_generic; + _ector_linear_helper = _linear_helper_generic; +#ifdef BUILD_SSE3 if (eina_cpu_features_get() & EINA_CPU_SSE3) { - radial_helper = _radial_helper_sse3; - linear_helper = _linear_helper_sse3; + _ector_radial_helper = _radial_helper_sse3; + _ector_linear_helper = _linear_helper_sse3; } - #endif +#endif }