ector - fix meson build with sse3 on ix86 (32bit)
This commit is contained in:
parent
788507961a
commit
b9225fd710
|
@ -119,6 +119,7 @@ cpu_neon_intrinsics = false
|
|||
cpu_altivec = false
|
||||
evas_opt_c_args = [ ]
|
||||
draw_opt_c_args = [ ]
|
||||
ector_opt_c_args = [ ]
|
||||
machine_c_args = [ ]
|
||||
compiler = meson.get_compiler('c')
|
||||
|
||||
|
@ -136,6 +137,7 @@ if host_machine.cpu_family() == 'x86' or host_machine.cpu_family() == 'x86_64'
|
|||
config_h.set10('BUILD_SSE3', true)
|
||||
evas_opt_c_args += [ '-msse3' ]
|
||||
draw_opt_c_args += [ '-msse3' ]
|
||||
ector_opt_c_args += [ '-msse3' ]
|
||||
cpu_sse3 = true
|
||||
message('x86 build - SSE3 enabled')
|
||||
endif
|
||||
|
|
|
@ -118,6 +118,8 @@ lib/ector/software/ector_renderer_software_gradient_linear.c \
|
|||
lib/ector/software/ector_renderer_software_gradient_radial.c \
|
||||
lib/ector/software/ector_renderer_software_shape.c \
|
||||
lib/ector/software/ector_software_gradient.c \
|
||||
lib/ector/software/ector_software_gradient_sse3.c \
|
||||
lib/ector/software/ector_software_gradient.h \
|
||||
lib/ector/software/ector_software_rasterizer.c \
|
||||
lib/ector/software/ector_software_surface.c \
|
||||
lib/ector/software/ector_software_buffer.c \
|
||||
|
|
|
@ -2,6 +2,8 @@ ector_deps = [eina, emile, eet, eo, efl]
|
|||
ector_pub_deps = [eina, efl]
|
||||
|
||||
pub_eo_file_target = []
|
||||
ector_opt_lib = [ ]
|
||||
|
||||
|
||||
ector_header_src = [
|
||||
# nothing for now ector stays only intree
|
||||
|
@ -76,7 +78,8 @@ ector_lib = library('ector',
|
|||
dependencies: ector_pub_deps + [triangulator, freetype, draw, m] + ector_deps,
|
||||
include_directories : config_dir,
|
||||
install: true,
|
||||
version : meson.project_version()
|
||||
version : meson.project_version(),
|
||||
link_with: ector_opt_lib
|
||||
)
|
||||
|
||||
ector = declare_dependency(
|
||||
|
|
|
@ -1,16 +1,10 @@
|
|||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#include "ector_software_gradient.h"
|
||||
|
||||
#ifdef BUILD_SSE3
|
||||
void _radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, float det, float delta_det, float delta_delta_det, float b, float delta_b);
|
||||
void _linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc);
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <software/Ector_Software.h>
|
||||
|
||||
#include "ector_private.h"
|
||||
#include "ector_software_private.h"
|
||||
#include "draw.h"
|
||||
|
||||
#define GRADIENT_STOPTABLE_SIZE 1024
|
||||
#define FIXPT_BITS 8
|
||||
#define FIXPT_SIZE (1<<FIXPT_BITS)
|
||||
|
@ -24,262 +18,6 @@ typedef void (*Ector_Linear_Helper_Func)(uint32_t *buffer, int length, Ector_Ren
|
|||
static Ector_Radial_Helper_Func _ector_radial_helper;
|
||||
static Ector_Linear_Helper_Func _ector_linear_helper;
|
||||
|
||||
static inline int
|
||||
_gradient_clamp(const Ector_Renderer_Software_Gradient_Data *data, int ipos)
|
||||
{
|
||||
int limit;
|
||||
|
||||
if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REPEAT)
|
||||
{
|
||||
ipos = ipos % GRADIENT_STOPTABLE_SIZE;
|
||||
ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
|
||||
}
|
||||
else if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REFLECT)
|
||||
{
|
||||
limit = GRADIENT_STOPTABLE_SIZE * 2;
|
||||
ipos = ipos % limit;
|
||||
ipos = ipos < 0 ? limit + ipos : ipos;
|
||||
ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - 1 - ipos : ipos;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ipos < 0) ipos = 0;
|
||||
else if (ipos >= GRADIENT_STOPTABLE_SIZE)
|
||||
ipos = GRADIENT_STOPTABLE_SIZE-1;
|
||||
}
|
||||
return ipos;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
_gradient_pixel_fixed(const Ector_Renderer_Software_Gradient_Data *data, int fixed_pos)
|
||||
{
|
||||
int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
|
||||
|
||||
return data->color_table[_gradient_clamp(data, ipos)];
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
_gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos)
|
||||
{
|
||||
int ipos = (int)(pos * (GRADIENT_STOPTABLE_SIZE - 1) + (float)(0.5));
|
||||
|
||||
return data->color_table[_gradient_clamp(data, ipos)];
|
||||
}
|
||||
|
||||
|
||||
#ifdef BUILD_SSE3
|
||||
#include <immintrin.h>
|
||||
|
||||
#define GRADIENT_STOPTABLE_SIZE_SHIFT 10
|
||||
typedef union { __m128i v; int i[4];} vec4_i;
|
||||
typedef union { __m128 v; float f[4];} vec4_f;
|
||||
|
||||
#define FETCH_CLAMP_INIT_F \
|
||||
__m128 v_min = _mm_set1_ps(0.0f); \
|
||||
__m128 v_max = _mm_set1_ps((float)(GRADIENT_STOPTABLE_SIZE-1)); \
|
||||
__m128 v_halff = _mm_set1_ps(0.5f); \
|
||||
__m128i v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); \
|
||||
__m128i v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); \
|
||||
__m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
|
||||
|
||||
#define FETCH_CLAMP_REPEAT_F \
|
||||
vec4_i index_vec; \
|
||||
index_vec.v = _mm_and_si128(v_repeat_mask, _mm_cvttps_epi32(v_index));
|
||||
|
||||
#define FETCH_CLAMP_REFLECT_F \
|
||||
vec4_i index_vec; \
|
||||
__m128i v_index_i = _mm_and_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \
|
||||
__m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
|
||||
index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
|
||||
|
||||
#define FETCH_CLAMP_PAD_F \
|
||||
vec4_i index_vec; \
|
||||
index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index)));
|
||||
|
||||
#define FETCH_EPILOGUE_CPY \
|
||||
*buffer++ = g_data->color_table[index_vec.i[0]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[1]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[2]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[3]]; \
|
||||
}
|
||||
|
||||
static void
|
||||
loop_break(unsigned int *buffer, int length, int *lprealign, int *lby4 , int *lremaining)
|
||||
{
|
||||
int l1=0, l2=0, l3=0;
|
||||
|
||||
while ((uintptr_t)buffer & 0xF)
|
||||
buffer++ , l1++;
|
||||
|
||||
if(length <= l1)
|
||||
{
|
||||
l1 = length;
|
||||
}
|
||||
else
|
||||
{
|
||||
l3 = (length - l1) % 4;
|
||||
l2 = length - l1 - l3 ;
|
||||
}
|
||||
|
||||
*lprealign = l1;
|
||||
*lby4 = l2;
|
||||
*lremaining = l3;
|
||||
}
|
||||
|
||||
static void
|
||||
_radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data,
|
||||
float det, float delta_det, float delta_delta_det, float b, float delta_b)
|
||||
{
|
||||
int lprealign, lby4, lremaining, i;
|
||||
vec4_f det_vec;
|
||||
vec4_f delta_det4_vec;
|
||||
vec4_f b_vec;
|
||||
__m128 v_delta_delta_det16;
|
||||
__m128 v_delta_delta_det6;
|
||||
__m128 v_delta_b4;
|
||||
|
||||
loop_break(buffer, length, &lprealign, &lby4, &lremaining);
|
||||
|
||||
// prealign loop
|
||||
for (i = 0 ; i < lprealign ; i++)
|
||||
{
|
||||
*buffer++ = _gradient_pixel(g_data, sqrt(det) - b);
|
||||
det += delta_det;
|
||||
delta_det += delta_delta_det;
|
||||
b += delta_b;
|
||||
}
|
||||
|
||||
// lby4 16byte align loop
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
det_vec.f[i] = det;
|
||||
delta_det4_vec.f[i] = 4 * delta_det;
|
||||
b_vec.f[i] = b;
|
||||
|
||||
det += delta_det;
|
||||
delta_det += delta_delta_det;
|
||||
b += delta_b;
|
||||
}
|
||||
|
||||
v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det);
|
||||
v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det);
|
||||
v_delta_b4 = _mm_set1_ps(4 * delta_b);
|
||||
|
||||
#define FETCH_RADIAL_PROLOGUE \
|
||||
for (i = 0 ; i < lby4 ; i+=4) { \
|
||||
__m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \
|
||||
__m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \
|
||||
det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
|
||||
delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \
|
||||
b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4);
|
||||
|
||||
#define FETCH_RADIAL_LOOP(FETCH_CLAMP) \
|
||||
FETCH_RADIAL_PROLOGUE; \
|
||||
FETCH_CLAMP; \
|
||||
FETCH_EPILOGUE_CPY;
|
||||
|
||||
FETCH_CLAMP_INIT_F;
|
||||
switch (g_data->gd->s)
|
||||
{
|
||||
case EFL_GFX_GRADIENT_SPREAD_REPEAT:
|
||||
FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F);
|
||||
break;
|
||||
case EFL_GFX_GRADIENT_SPREAD_REFLECT:
|
||||
FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F);
|
||||
break;
|
||||
default:
|
||||
FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F);
|
||||
break;
|
||||
}
|
||||
|
||||
// remaining loop
|
||||
for (i = 0 ; i < lremaining ; i++)
|
||||
*buffer++ = _gradient_pixel(g_data, sqrt(det_vec.f[i]) - b_vec.f[i]);
|
||||
}
|
||||
|
||||
static void
|
||||
_linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc)
|
||||
{
|
||||
int lprealign, lby4, lremaining, i;
|
||||
vec4_i t_vec;
|
||||
__m128i v_inc;
|
||||
__m128i v_fxtpt_size;
|
||||
__m128i v_min;
|
||||
__m128i v_max;
|
||||
__m128i v_repeat_mask;
|
||||
__m128i v_reflect_mask;
|
||||
__m128i v_reflect_limit;
|
||||
|
||||
loop_break(buffer, length, &lprealign, &lby4, &lremaining);
|
||||
|
||||
// prealign loop
|
||||
for (i = 0 ; i < lprealign ; i++)
|
||||
{
|
||||
*buffer++ = _gradient_pixel_fixed(g_data, t);
|
||||
t += inc;
|
||||
}
|
||||
|
||||
// lby4 16byte align loop
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
t_vec.i[i] = t;
|
||||
t += inc;
|
||||
}
|
||||
|
||||
v_inc = _mm_set1_epi32(4 * inc);
|
||||
v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5);
|
||||
|
||||
v_min = _mm_set1_epi32(0);
|
||||
v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE - 1));
|
||||
|
||||
v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT));
|
||||
v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT + 1)));
|
||||
|
||||
v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_PROLOGUE \
|
||||
for (i = 0 ; i < lby4 ; i+=4) { \
|
||||
vec4_i index_vec; \
|
||||
__m128i v_index; \
|
||||
v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \
|
||||
t_vec.v = _mm_add_epi32(t_vec.v, v_inc);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \
|
||||
index_vec.v = _mm_and_si128(v_repeat_mask, v_index);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \
|
||||
__m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \
|
||||
__m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
|
||||
index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_PAD \
|
||||
index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index));
|
||||
|
||||
#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \
|
||||
FETCH_LINEAR_LOOP_PROLOGUE; \
|
||||
FETCH_LINEAR_LOOP_CLAMP; \
|
||||
FETCH_EPILOGUE_CPY;
|
||||
|
||||
switch (g_data->gd->s)
|
||||
{
|
||||
case EFL_GFX_GRADIENT_SPREAD_REPEAT:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT);
|
||||
break;
|
||||
case EFL_GFX_GRADIENT_SPREAD_REFLECT:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT);
|
||||
break;
|
||||
default:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD);
|
||||
break;
|
||||
}
|
||||
|
||||
// remaining loop
|
||||
for (i = 0 ; i < lremaining ; i++)
|
||||
*buffer++ = _gradient_pixel_fixed(g_data, t_vec.i[i]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
_update_color_table(void *data, Ector_Software_Thread *t EINA_UNUSED)
|
||||
{
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
#ifndef ECTOR_SOFTWARE_GRADIENT_H
|
||||
# define ECTOR_SOFTWARE_GRADIENT_H
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <software/Ector_Software.h>
|
||||
|
||||
#include "ector_private.h"
|
||||
#include "ector_software_private.h"
|
||||
#include "draw.h"
|
||||
|
||||
#define GRADIENT_STOPTABLE_SIZE 1024
|
||||
#define FIXPT_BITS 8
|
||||
#define FIXPT_SIZE (1<<FIXPT_BITS)
|
||||
|
||||
static inline int
|
||||
_gradient_clamp(const Ector_Renderer_Software_Gradient_Data *data, int ipos)
|
||||
{
|
||||
int limit;
|
||||
|
||||
if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REPEAT)
|
||||
{
|
||||
ipos = ipos % GRADIENT_STOPTABLE_SIZE;
|
||||
ipos = ipos < 0 ? GRADIENT_STOPTABLE_SIZE + ipos : ipos;
|
||||
}
|
||||
else if (data->gd->s == EFL_GFX_GRADIENT_SPREAD_REFLECT)
|
||||
{
|
||||
limit = GRADIENT_STOPTABLE_SIZE * 2;
|
||||
ipos = ipos % limit;
|
||||
ipos = ipos < 0 ? limit + ipos : ipos;
|
||||
ipos = ipos >= GRADIENT_STOPTABLE_SIZE ? limit - 1 - ipos : ipos;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ipos < 0) ipos = 0;
|
||||
else if (ipos >= GRADIENT_STOPTABLE_SIZE)
|
||||
ipos = GRADIENT_STOPTABLE_SIZE-1;
|
||||
}
|
||||
return ipos;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
_gradient_pixel_fixed(const Ector_Renderer_Software_Gradient_Data *data, int fixed_pos)
|
||||
{
|
||||
int ipos = (fixed_pos + (FIXPT_SIZE / 2)) >> FIXPT_BITS;
|
||||
|
||||
return data->color_table[_gradient_clamp(data, ipos)];
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
_gradient_pixel(const Ector_Renderer_Software_Gradient_Data *data, float pos)
|
||||
{
|
||||
int ipos = (int)(pos * (GRADIENT_STOPTABLE_SIZE - 1) + (float)(0.5));
|
||||
|
||||
return data->color_table[_gradient_clamp(data, ipos)];
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,214 @@
|
|||
#include "ector_software_gradient.h"
|
||||
|
||||
#ifdef BUILD_SSE3
|
||||
#include <immintrin.h>
|
||||
|
||||
#define GRADIENT_STOPTABLE_SIZE_SHIFT 10
|
||||
typedef union { __m128i v; int i[4];} vec4_i;
|
||||
typedef union { __m128 v; float f[4];} vec4_f;
|
||||
|
||||
#define FETCH_CLAMP_INIT_F \
|
||||
__m128 v_min = _mm_set1_ps(0.0f); \
|
||||
__m128 v_max = _mm_set1_ps((float)(GRADIENT_STOPTABLE_SIZE-1)); \
|
||||
__m128 v_halff = _mm_set1_ps(0.5f); \
|
||||
__m128i v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); \
|
||||
__m128i v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); \
|
||||
__m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
|
||||
|
||||
#define FETCH_CLAMP_REPEAT_F \
|
||||
vec4_i index_vec; \
|
||||
index_vec.v = _mm_and_si128(v_repeat_mask, _mm_cvttps_epi32(v_index));
|
||||
|
||||
#define FETCH_CLAMP_REFLECT_F \
|
||||
vec4_i index_vec; \
|
||||
__m128i v_index_i = _mm_and_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \
|
||||
__m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
|
||||
index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
|
||||
|
||||
#define FETCH_CLAMP_PAD_F \
|
||||
vec4_i index_vec; \
|
||||
index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index)));
|
||||
|
||||
#define FETCH_EPILOGUE_CPY \
|
||||
*buffer++ = g_data->color_table[index_vec.i[0]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[1]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[2]]; \
|
||||
*buffer++ = g_data->color_table[index_vec.i[3]]; \
|
||||
}
|
||||
|
||||
static void
|
||||
loop_break(unsigned int *buffer, int length, int *lprealign, int *lby4 , int *lremaining)
|
||||
{
|
||||
int l1=0, l2=0, l3=0;
|
||||
|
||||
while ((uintptr_t)buffer & 0xF)
|
||||
buffer++ , l1++;
|
||||
|
||||
if(length <= l1)
|
||||
{
|
||||
l1 = length;
|
||||
}
|
||||
else
|
||||
{
|
||||
l3 = (length - l1) % 4;
|
||||
l2 = length - l1 - l3 ;
|
||||
}
|
||||
|
||||
*lprealign = l1;
|
||||
*lby4 = l2;
|
||||
*lremaining = l3;
|
||||
}
|
||||
|
||||
void
|
||||
_radial_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data,
|
||||
float det, float delta_det, float delta_delta_det, float b, float delta_b)
|
||||
{
|
||||
int lprealign, lby4, lremaining, i;
|
||||
vec4_f det_vec;
|
||||
vec4_f delta_det4_vec;
|
||||
vec4_f b_vec;
|
||||
__m128 v_delta_delta_det16;
|
||||
__m128 v_delta_delta_det6;
|
||||
__m128 v_delta_b4;
|
||||
|
||||
loop_break(buffer, length, &lprealign, &lby4, &lremaining);
|
||||
|
||||
// prealign loop
|
||||
for (i = 0 ; i < lprealign ; i++)
|
||||
{
|
||||
*buffer++ = _gradient_pixel(g_data, sqrt(det) - b);
|
||||
det += delta_det;
|
||||
delta_det += delta_delta_det;
|
||||
b += delta_b;
|
||||
}
|
||||
|
||||
// lby4 16byte align loop
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
det_vec.f[i] = det;
|
||||
delta_det4_vec.f[i] = 4 * delta_det;
|
||||
b_vec.f[i] = b;
|
||||
|
||||
det += delta_det;
|
||||
delta_det += delta_delta_det;
|
||||
b += delta_b;
|
||||
}
|
||||
|
||||
v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det);
|
||||
v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det);
|
||||
v_delta_b4 = _mm_set1_ps(4 * delta_b);
|
||||
|
||||
#define FETCH_RADIAL_PROLOGUE \
|
||||
for (i = 0 ; i < lby4 ; i+=4) { \
|
||||
__m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(det_vec.v), b_vec.v); \
|
||||
__m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_max), v_halff); \
|
||||
det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
|
||||
delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \
|
||||
b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4);
|
||||
|
||||
#define FETCH_RADIAL_LOOP(FETCH_CLAMP) \
|
||||
FETCH_RADIAL_PROLOGUE; \
|
||||
FETCH_CLAMP; \
|
||||
FETCH_EPILOGUE_CPY;
|
||||
|
||||
FETCH_CLAMP_INIT_F;
|
||||
switch (g_data->gd->s)
|
||||
{
|
||||
case EFL_GFX_GRADIENT_SPREAD_REPEAT:
|
||||
FETCH_RADIAL_LOOP(FETCH_CLAMP_REPEAT_F);
|
||||
break;
|
||||
case EFL_GFX_GRADIENT_SPREAD_REFLECT:
|
||||
FETCH_RADIAL_LOOP( FETCH_CLAMP_REFLECT_F);
|
||||
break;
|
||||
default:
|
||||
FETCH_RADIAL_LOOP(FETCH_CLAMP_PAD_F);
|
||||
break;
|
||||
}
|
||||
|
||||
// remaining loop
|
||||
for (i = 0 ; i < lremaining ; i++)
|
||||
*buffer++ = _gradient_pixel(g_data, sqrt(det_vec.f[i]) - b_vec.f[i]);
|
||||
}
|
||||
|
||||
void
|
||||
_linear_helper_sse3(uint32_t *buffer, int length, Ector_Renderer_Software_Gradient_Data *g_data, int t, int inc)
|
||||
{
|
||||
int lprealign, lby4, lremaining, i;
|
||||
vec4_i t_vec;
|
||||
__m128i v_inc;
|
||||
__m128i v_fxtpt_size;
|
||||
__m128i v_min;
|
||||
__m128i v_max;
|
||||
__m128i v_repeat_mask;
|
||||
__m128i v_reflect_mask;
|
||||
__m128i v_reflect_limit;
|
||||
|
||||
loop_break(buffer, length, &lprealign, &lby4, &lremaining);
|
||||
|
||||
// prealign loop
|
||||
for (i = 0 ; i < lprealign ; i++)
|
||||
{
|
||||
*buffer++ = _gradient_pixel_fixed(g_data, t);
|
||||
t += inc;
|
||||
}
|
||||
|
||||
// lby4 16byte align loop
|
||||
for (i = 0; i < 4; ++i)
|
||||
{
|
||||
t_vec.i[i] = t;
|
||||
t += inc;
|
||||
}
|
||||
|
||||
v_inc = _mm_set1_epi32(4 * inc);
|
||||
v_fxtpt_size = _mm_set1_epi32(FIXPT_SIZE * 0.5);
|
||||
|
||||
v_min = _mm_set1_epi32(0);
|
||||
v_max = _mm_set1_epi32((GRADIENT_STOPTABLE_SIZE - 1));
|
||||
|
||||
v_repeat_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT));
|
||||
v_reflect_mask = _mm_set1_epi32(~((uint32_t)(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT + 1)));
|
||||
|
||||
v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_PROLOGUE \
|
||||
for (i = 0 ; i < lby4 ; i+=4) { \
|
||||
vec4_i index_vec; \
|
||||
__m128i v_index; \
|
||||
v_index = _mm_srai_epi32(_mm_add_epi32(t_vec.v, v_fxtpt_size), FIXPT_BITS); \
|
||||
t_vec.v = _mm_add_epi32(t_vec.v, v_inc);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_REPEAT \
|
||||
index_vec.v = _mm_and_si128(v_repeat_mask, v_index);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_REFLECT \
|
||||
__m128i v_index_i = _mm_and_si128(v_reflect_mask, v_index); \
|
||||
__m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
|
||||
index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
|
||||
|
||||
#define FETCH_LINEAR_LOOP_CLAMP_PAD \
|
||||
index_vec.v = _mm_min_epi16(v_max, _mm_max_epi16(v_min, v_index));
|
||||
|
||||
#define FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP) \
|
||||
FETCH_LINEAR_LOOP_PROLOGUE; \
|
||||
FETCH_LINEAR_LOOP_CLAMP; \
|
||||
FETCH_EPILOGUE_CPY;
|
||||
|
||||
switch (g_data->gd->s)
|
||||
{
|
||||
case EFL_GFX_GRADIENT_SPREAD_REPEAT:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REPEAT);
|
||||
break;
|
||||
case EFL_GFX_GRADIENT_SPREAD_REFLECT:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_REFLECT);
|
||||
break;
|
||||
default:
|
||||
FETCH_LINEAR_LOOP(FETCH_LINEAR_LOOP_CLAMP_PAD);
|
||||
break;
|
||||
}
|
||||
|
||||
// remaining loop
|
||||
for (i = 0 ; i < lremaining ; i++)
|
||||
*buffer++ = _gradient_pixel_fixed(g_data, t_vec.i[i]);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -9,6 +9,16 @@ ector_src += files([
|
|||
'ector_software_buffer.c',
|
||||
])
|
||||
|
||||
if cpu_sse3 == true
|
||||
ector_opt = static_library('ector_opt',
|
||||
sources: [ 'ector_software_gradient_sse3.c' ],
|
||||
dependencies: ector_pub_deps + [triangulator, freetype, draw, m] + ector_deps,
|
||||
include_directories: config_dir + [ include_directories('..') ],
|
||||
c_args: ector_opt_c_args,
|
||||
)
|
||||
ector_opt_lib += [ ector_opt ]
|
||||
endif
|
||||
|
||||
pub_eo_files = [
|
||||
'ector_software_surface.eo',
|
||||
'ector_software_buffer.eo',
|
||||
|
|
Loading…
Reference in New Issue