ector: cleanup default backend drawer.

This commit is contained in:
Cedric BAIL 2015-09-12 07:31:49 +02:00
parent 576331955e
commit b3dc08bf8b
4 changed files with 158 additions and 130 deletions

View File

@ -15,28 +15,30 @@
*/
/*
result = s + d * sia
result = s + d * sia
dest = (s + d * sia) * ca + d * cia
= s * ca + d * (sia * ca + cia)
= s * ca + d * (1 - sa*ca)
*/
void
comp_func_solid_source_over(uint *dest, int length, uint color, uint const_alpha)
static void
_comp_func_solid_source_over(uint *dest, int length, uint color, uint const_alpha)
{
int ialpha, i;
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
ialpha = _alpha(~color);
ialpha = alpha_inverse(color);
for (i = 0; i < length; ++i)
dest[i] = color + BYTE_MUL(dest[i], ialpha);
}
static void
comp_func_source_over(uint *dest, const uint *src, int length, uint color, uint const_alpha)
_comp_func_source_over(uint *dest, const uint *src, int length, uint color, uint const_alpha)
{
int i;
uint s, sc, sia;
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
@ -49,7 +51,7 @@ comp_func_source_over(uint *dest, const uint *src, int length, uint color, uint
dest[i] = s;
else if (s != 0)
{
sia = _alpha(~s);
sia = alpha_inverse(s);
dest[i] = s + BYTE_MUL(dest[i], sia);
}
}
@ -60,7 +62,7 @@ comp_func_source_over(uint *dest, const uint *src, int length, uint color, uint
{
s = src[i];
sc = ECTOR_MUL4_SYM(color, s);
sia = _alpha(~sc);
sia = alpha_inverse(sc);
dest[i] = sc + BYTE_MUL(dest[i], sia);
}
}
@ -71,10 +73,14 @@ comp_func_source_over(uint *dest, const uint *src, int length, uint color, uint
dest = s * ca + d * cia
*/
static void
comp_func_solid_source(uint *dest, int length, uint color, uint const_alpha)
_comp_func_solid_source(uint *dest, int length, uint color, uint const_alpha)
{
int ialpha, i;
if (const_alpha == 255) _ector_memfill(dest, length, color);
if (const_alpha == 255)
{
_ector_memfill(dest, length, color);
}
else
{
ialpha = 255 - const_alpha;
@ -85,20 +91,23 @@ comp_func_solid_source(uint *dest, int length, uint color, uint const_alpha)
}
static void
comp_func_source(uint *dest, const uint *src, int length, uint color, uint const_alpha)
_comp_func_source(uint *dest, const uint *src, int length, uint color, uint const_alpha)
{
int i, ialpha;
uint src_color;
if (color == 0xffffffff) // No color multiplier
{
if (const_alpha == 255)
memcpy(dest, src, length * sizeof(uint));
{
memcpy(dest, src, length * sizeof(uint));
}
else
{
ialpha = 255 - const_alpha;
for (i = 0; i < length; ++i)
dest[i] = INTERPOLATE_PIXEL_256(src[i], const_alpha, dest[i], ialpha);
}
{
ialpha = 255 - const_alpha;
for (i = 0; i < length; ++i)
dest[i] = INTERPOLATE_PIXEL_256(src[i], const_alpha, dest[i], ialpha);
}
}
else
{
@ -109,24 +118,24 @@ comp_func_source(uint *dest, const uint *src, int length, uint color, uint const
}
else
{
ialpha = 255 - const_alpha;
for (i = 0; i < length; ++i)
{
src_color = ECTOR_MUL4_SYM(src[i], color);
dest[i] = INTERPOLATE_PIXEL_256(src_color, const_alpha, dest[i], ialpha);
}
ialpha = 255 - const_alpha;
for (i = 0; i < length; ++i)
{
src_color = ECTOR_MUL4_SYM(src[i], color);
dest[i] = INTERPOLATE_PIXEL_256(src_color, const_alpha, dest[i], ialpha);
}
}
}
}
RGBA_Comp_Func_Solid func_for_mode_solid[ECTOR_ROP_LAST] = {
comp_func_solid_source_over,
comp_func_solid_source
_comp_func_solid_source_over,
_comp_func_solid_source
};
RGBA_Comp_Func func_for_mode[ECTOR_ROP_LAST] = {
comp_func_source_over,
comp_func_source
_comp_func_source_over,
_comp_func_source
};
RGBA_Comp_Func_Solid
@ -146,13 +155,10 @@ RGBA_Comp_Func ector_comp_func_span_get(Ector_Rop op, uint color, Eina_Bool src_
{
if (op == ECTOR_ROP_BLEND) op = ECTOR_ROP_COPY;
}
return func_for_mode[op];
}
extern void init_drawhelper_gradient();
extern void init_draw_helper_sse2();
extern void init_draw_helper_neon();
void init_draw_helper()
{
init_drawhelper_gradient();

View File

@ -7,6 +7,7 @@
#ifdef BUILD_NEON
#include <arm_neon.h>
static void
comp_func_solid_source_over_neon(uint * __restrict dest, int length, uint color, uint const_alpha)
{
@ -39,8 +40,8 @@ comp_func_solid_source_over_neon(uint * __restrict dest, int length, uint color,
// alpha can only be 0 if color is 0x0. In that case we can just return.
// Otherwise we can assume alpha != 0. This allows more optimization in
// NEON code.
if(!color)
return;
if (!color)
return;
DATA32 *start = dest;
int size = length;
@ -53,46 +54,47 @@ comp_func_solid_source_over_neon(uint * __restrict dest, int length, uint color,
c_32x4 = vdupq_n_u32(color);
while (start < end)
{
d0_32x4 = vld1q_u32(start);
d1_32x4 = vld1q_u32(start+4);
d0_8x16 = vreinterpretq_u8_u32(d0_32x4);
d1_8x16 = vreinterpretq_u8_u32(d1_32x4);
{
d0_32x4 = vld1q_u32(start);
d1_32x4 = vld1q_u32(start+4);
d0_8x16 = vreinterpretq_u8_u32(d0_32x4);
d1_8x16 = vreinterpretq_u8_u32(d1_32x4);
d00_8x8 = vget_low_u8(d0_8x16);
d01_8x8 = vget_high_u8(d0_8x16);
d10_8x8 = vget_low_u8(d1_8x16);
d11_8x8 = vget_high_u8(d1_8x16);
d00_8x8 = vget_low_u8(d0_8x16);
d01_8x8 = vget_high_u8(d0_8x16);
d10_8x8 = vget_low_u8(d1_8x16);
d11_8x8 = vget_high_u8(d1_8x16);
temp00_16x8 = vmull_u8(alpha_8x8, d00_8x8);
temp01_16x8 = vmull_u8(alpha_8x8, d01_8x8);
temp10_16x8 = vmull_u8(alpha_8x8, d10_8x8);
temp11_16x8 = vmull_u8(alpha_8x8, d11_8x8);
temp00_16x8 = vmull_u8(alpha_8x8, d00_8x8);
temp01_16x8 = vmull_u8(alpha_8x8, d01_8x8);
temp10_16x8 = vmull_u8(alpha_8x8, d10_8x8);
temp11_16x8 = vmull_u8(alpha_8x8, d11_8x8);
temp00_8x8 = vshrn_n_u16(temp00_16x8,8);
temp01_8x8 = vshrn_n_u16(temp01_16x8,8);
temp10_8x8 = vshrn_n_u16(temp10_16x8,8);
temp11_8x8 = vshrn_n_u16(temp11_16x8,8);
temp00_8x8 = vshrn_n_u16(temp00_16x8,8);
temp01_8x8 = vshrn_n_u16(temp01_16x8,8);
temp10_8x8 = vshrn_n_u16(temp10_16x8,8);
temp11_8x8 = vshrn_n_u16(temp11_16x8,8);
temp0_8x16 = vcombine_u8(temp00_8x8, temp01_8x8);
temp1_8x16 = vcombine_u8(temp10_8x8, temp11_8x8);
temp0_8x16 = vcombine_u8(temp00_8x8, temp01_8x8);
temp1_8x16 = vcombine_u8(temp10_8x8, temp11_8x8);
temp0_32x4 = vreinterpretq_u32_u8(temp0_8x16);
temp1_32x4 = vreinterpretq_u32_u8(temp1_8x16);
temp0_32x4 = vreinterpretq_u32_u8(temp0_8x16);
temp1_32x4 = vreinterpretq_u32_u8(temp1_8x16);
d0_32x4 = vaddq_u32(c_32x4, temp0_32x4);
d1_32x4 = vaddq_u32(c_32x4, temp1_32x4);
d0_32x4 = vaddq_u32(c_32x4, temp0_32x4);
d1_32x4 = vaddq_u32(c_32x4, temp1_32x4);
vst1q_u32(start, d0_32x4);
vst1q_u32(start+4, d1_32x4);
start+=8;
}
vst1q_u32(start, d0_32x4);
vst1q_u32(start+4, d1_32x4);
start+=8;
}
end += (size & 7);
while (start < end)
{
*start = color + MUL_256(alpha, *start);
start++;
}
{
*start = color + MUL_256(alpha, *start);
start++;
}
}
/* Note: Optimisation is based on keeping _dest_ aligned: else it's a pair of
@ -132,6 +134,9 @@ comp_func_source_over_sse2(uint * __restrict dest, const uint * __restrict src,
uint8x8_t s1_8x8;
uint8x8_t sc0_8x8;
uint8x8_t sc1_8x8;
int size;
DATA32 *start;
DATA32 *end;
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
@ -143,69 +148,69 @@ comp_func_source_over_sse2(uint * __restrict dest, const uint * __restrict src,
x0_32x4 = vreinterpretq_u32_u8(x0_8x16);
x1_8x16 = vdupq_n_u8(0x1);
x1_32x4 = vreinterpretq_u32_u8(x1_8x16);
DATA32 *start = dest;
int size = l;
DATA32 *end = start + (size & ~3);
start = dest;
size = l;
end = start + (size & ~3);
while (start < end)
{
{
s_32x4 = vld1q_u32(src);
s_8x16 = vreinterpretq_u8_u32(s_32x4);
s_32x4 = vld1q_u32(src);
s_8x16 = vreinterpretq_u8_u32(s_32x4);
d_32x4 = vld1q_u32(start);
d_8x16 = vreinterpretq_u8_u32(d_32x4);
d0_8x8 = vget_low_u8(d_8x16);
d1_8x8 = vget_high_u8(d_8x16);
d_32x4 = vld1q_u32(start);
d_8x16 = vreinterpretq_u8_u32(d_32x4);
d0_8x8 = vget_low_u8(d_8x16);
d1_8x8 = vget_high_u8(d_8x16);
s0_8x8 = vget_low_u8(s_8x16);
s1_8x8 = vget_high_u8(s_8x16);
s0_8x8 = vget_low_u8(s_8x16);
s1_8x8 = vget_high_u8(s_8x16);
sc0_16x8 = vmull_u8(s0_8x8, c_8x8);
sc1_16x8 = vmull_u8(s1_8x8, c_8x8);
sc0_16x8 = vaddq_u16(sc0_16x8, x255_16x8);
sc1_16x8 = vaddq_u16(sc1_16x8, x255_16x8);
sc0_8x8 = vshrn_n_u16(sc0_16x8, 8);
sc1_8x8 = vshrn_n_u16(sc1_16x8, 8);
sc_8x16 = vcombine_u8(sc0_8x8, sc1_8x8);
sc0_16x8 = vmull_u8(s0_8x8, c_8x8);
sc1_16x8 = vmull_u8(s1_8x8, c_8x8);
sc0_16x8 = vaddq_u16(sc0_16x8, x255_16x8);
sc1_16x8 = vaddq_u16(sc1_16x8, x255_16x8);
sc0_8x8 = vshrn_n_u16(sc0_16x8, 8);
sc1_8x8 = vshrn_n_u16(sc1_16x8, 8);
sc_8x16 = vcombine_u8(sc0_8x8, sc1_8x8);
alpha_32x4 = vreinterpretq_u32_u8(sc_8x16);
alpha_32x4 = vshrq_n_u32(alpha_32x4, 24);
alpha_32x4 = vmulq_u32(x1_32x4, alpha_32x4);
alpha_8x16 = vreinterpretq_u8_u32(alpha_32x4);
alpha_8x16 = vsubq_u8(x0_8x16, alpha_8x16);
alpha0_8x8 = vget_low_u8(alpha_8x16);
alpha1_8x8 = vget_high_u8(alpha_8x16);
alpha_32x4 = vreinterpretq_u32_u8(sc_8x16);
alpha_32x4 = vshrq_n_u32(alpha_32x4, 24);
alpha_32x4 = vmulq_u32(x1_32x4, alpha_32x4);
alpha_8x16 = vreinterpretq_u8_u32(alpha_32x4);
alpha_8x16 = vsubq_u8(x0_8x16, alpha_8x16);
alpha0_8x8 = vget_low_u8(alpha_8x16);
alpha1_8x8 = vget_high_u8(alpha_8x16);
ad0_16x8 = vmull_u8(alpha0_8x8, d0_8x8);
ad1_16x8 = vmull_u8(alpha1_8x8, d1_8x8);
ad0_8x8 = vshrn_n_u16(ad0_16x8,8);
ad1_8x8 = vshrn_n_u16(ad1_16x8,8);
ad_8x16 = vcombine_u8(ad0_8x8, ad1_8x8);
ad_32x4 = vreinterpretq_u32_u8(ad_8x16);
ad0_16x8 = vmull_u8(alpha0_8x8, d0_8x8);
ad1_16x8 = vmull_u8(alpha1_8x8, d1_8x8);
ad0_8x8 = vshrn_n_u16(ad0_16x8,8);
ad1_8x8 = vshrn_n_u16(ad1_16x8,8);
ad_8x16 = vcombine_u8(ad0_8x8, ad1_8x8);
ad_32x4 = vreinterpretq_u32_u8(ad_8x16);
alpha_32x4 = vreinterpretq_u32_u8(alpha_8x16);
cond_32x4 = vceqq_u32(alpha_32x4, x0_32x4);
ad_32x4 = vbslq_u32(cond_32x4, d_32x4 , ad_32x4);
alpha_32x4 = vreinterpretq_u32_u8(alpha_8x16);
cond_32x4 = vceqq_u32(alpha_32x4, x0_32x4);
ad_32x4 = vbslq_u32(cond_32x4, d_32x4 , ad_32x4);
sc_32x4 = vreinterpretq_u32_u8(sc_8x16);
d_32x4 = vaddq_u32(sc_32x4, ad_32x4);
sc_32x4 = vreinterpretq_u32_u8(sc_8x16);
d_32x4 = vaddq_u32(sc_32x4, ad_32x4);
vst1q_u32(start, d_32x4);
vst1q_u32(start, d_32x4);
src+=4;
start+=4;
}
src+=4;
start+=4;
}
end += (size & 3);
while (start < end)
{
DATA32 sc = MUL4_SYM(color, *s);
DATA32 alpha = 256 - (sc >> 24);
*start = sc + MUL_256(alpha, *start);
start++;
src++;
}
{
DATA32 sc = MUL4_SYM(color, *s);
DATA32 alpha = 256 - (sc >> 24);
*start = sc + MUL_256(alpha, *start);
start++;
src++;
}
}
#endif
void

View File

@ -1,9 +1,7 @@
#ifndef ECTOR_DRAWHELPER_PRIVATE_H
#define ECTOR_DRAWHELPER_PRIVATE_H
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "ector_private.h"
#ifndef MIN
#define MIN( a, b ) ( (a) < (b) ? (a) : (b) )
@ -17,11 +15,6 @@
typedef unsigned int uint;
#endif
static inline int _alpha(uint c)
{
return c>>24;
}
#define ECTOR_ARGB_JOIN(a,r,g,b) \
(((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
@ -53,6 +46,13 @@ static inline int _alpha(uint c)
} \
}
static inline int
alpha_inverse(int color)
{
color = ~color;
return A_VAL(&color);
}
static inline void
_ector_memfill(uint *dest, int length, uint value)
{
@ -89,9 +89,14 @@ INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b)
typedef void (*RGBA_Comp_Func)(uint *dest, const uint *src, int length, uint mul_col, uint const_alpha);
typedef void (*RGBA_Comp_Func_Solid)(uint *dest, int length, uint color, uint const_alpha);
extern RGBA_Comp_Func_Solid func_for_mode_solid[ECTOR_ROP_LAST];
extern RGBA_Comp_Func func_for_mode[ECTOR_ROP_LAST];
void init_drawhelper_gradient();
void init_draw_helper_sse2();
void init_draw_helper_neon();
void init_draw_helper();
RGBA_Comp_Func_Solid ector_comp_func_solid_span_get(Ector_Rop op, uint color);

View File

@ -89,6 +89,7 @@ v4_mul_color_sse2(__m128i x, __m128i y)
{
const __m128i zero = _mm_setzero_si128();
const __m128i sym4_mask = _mm_set_epi32(0x00FF00FF, 0x000000FF, 0x00FF00FF, 0x000000FF);
__m128i x_l = _mm_unpacklo_epi8(x, zero);
__m128i x_h = _mm_unpackhi_epi8(x, zero);
@ -111,6 +112,7 @@ static inline __m128i
v4_ialpha_sse2(__m128i c)
{
__m128i a = _mm_srli_epi32(c, 24);
return _mm_sub_epi32(_mm_set1_epi32(0xff), a);
}
@ -141,10 +143,14 @@ comp_func_helper_sse2 (uint *dest, int length, uint color, uint alpha)
void
comp_func_solid_source_sse2(uint *dest, int length, uint color, uint const_alpha)
{
int ialpha;
if (const_alpha == 255) _ector_memfill(dest, length, color);
if (const_alpha == 255)
{
_ector_memfill(dest, length, color);
}
else
{
int ialpha;
ialpha = 255 - const_alpha;
color = BYTE_MUL(color, const_alpha);
comp_func_helper_sse2(dest, length, color, ialpha);
@ -155,9 +161,10 @@ void
comp_func_solid_source_over_sse2(uint *dest, int length, uint color, uint const_alpha)
{
int ialpha;
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
ialpha = _alpha(~color);
ialpha = alpha_inverse(color);
comp_func_helper_sse2(dest, length, color, ialpha);
}
@ -194,21 +201,23 @@ comp_func_solid_source_over_sse2(uint *dest, int length, uint color, uint const_
#define V4_COMP_OP_SRC \
v_src = v4_interpolate_color_sse2(v_alpha, v_src, v_dest);
static void
comp_func_source_sse2(uint *dest, const uint *src, int length, uint color, uint const_alpha)
{
int ialpha;
uint src_color;
if (color == 0xffffffff) // No color multiplier
{
if (const_alpha == 255)
memcpy(dest, src, length * sizeof(uint));
{
memcpy(dest, src, length * sizeof(uint));
}
else
{
ialpha = 255 - const_alpha;
__m128i v_alpha = _mm_set1_epi32(const_alpha);
LOOP_ALIGNED_U1_A4(dest, length,
{ /* UOP */
*dest = INTERPOLATE_PIXEL_256(*src, const_alpha, *dest, ialpha);
@ -225,6 +234,7 @@ comp_func_source_sse2(uint *dest, const uint *src, int length, uint color, uint
else
{
__m128i v_color = _mm_set1_epi32(color);
if (const_alpha == 255)
{
LOOP_ALIGNED_U1_A4(dest, length,
@ -243,6 +253,7 @@ comp_func_source_sse2(uint *dest, const uint *src, int length, uint color, uint
{
ialpha = 255 - const_alpha;
__m128i v_alpha = _mm_set1_epi32(const_alpha);
LOOP_ALIGNED_U1_A4(dest, length,
{ /* UOP */
src_color = ECTOR_MUL4_SYM(*src, color);
@ -264,6 +275,7 @@ static void
comp_func_source_over_sse2(uint *dest, const uint *src, int length, uint color, uint const_alpha)
{
uint s, sia;
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
@ -272,7 +284,7 @@ comp_func_source_over_sse2(uint *dest, const uint *src, int length, uint color,
LOOP_ALIGNED_U1_A4(dest, length,
{ /* UOP */
s = *src;
sia = _alpha(~s);
sia = alpha_inverse(s);
*dest = s + BYTE_MUL(*dest, sia);
dest++; src++; length--;
},
@ -286,10 +298,11 @@ comp_func_source_over_sse2(uint *dest, const uint *src, int length, uint color,
else
{
__m128i v_color = _mm_set1_epi32(color);
LOOP_ALIGNED_U1_A4(dest, length,
{ /* UOP */
s = ECTOR_MUL4_SYM(*src, color);
sia = _alpha(~s);
sia = alpha_inverse(s);
*dest = s + BYTE_MUL(*dest, sia);
dest++; src++; length--;
},
@ -321,4 +334,3 @@ init_draw_helper_sse2()
}
#endif
}