forked from enlightenment/efl
evas/op_blend: rename LOOP_ALIGNED_U1_A48_SSE3 to LOOP_ALIGNED_U1_A48
Summary: There's nothing SSE3-specific about that macro, let's use a more generic name for it. Since that's just a generic macro, we can also allow non-SSE (eg. NEON) code to use it if they want to Reviewers: cedric CC: cedric Differential Revision: https://phab.enlightenment.org/D528
This commit is contained in:
parent
6e1b9de112
commit
259f33679c
|
@ -10,7 +10,7 @@ _op_blend_c_dp_sse3(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, DATA3
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
const __m128i a_packed = _mm_set_epi32(a, a, a, a);
|
const __m128i a_packed = _mm_set_epi32(a, a, a, a);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
*d = c + MUL_256(a, *d);
|
*d = c + MUL_256(a, *d);
|
||||||
|
@ -94,7 +94,7 @@ _op_blend_rel_c_dp_sse3(DATA32 *s EINA_UNUSED, DATA8 *m EINA_UNUSED, DATA32 c, D
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
const __m128i alpha_packed = _mm_set_epi32(alpha, alpha, alpha, alpha);
|
const __m128i alpha_packed = _mm_set_epi32(alpha, alpha, alpha, alpha);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
*d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
|
*d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
|
||||||
|
|
|
@ -7,7 +7,7 @@ _op_blend_mas_c_dp_sse3(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d, in
|
||||||
|
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
DATA32 a = *m;
|
DATA32 a = *m;
|
||||||
|
@ -76,7 +76,7 @@ _op_blend_mas_can_dp_sse3(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d,
|
||||||
const __m128i one = _mm_set_epi32(1, 1, 1, 1);
|
const __m128i one = _mm_set_epi32(1, 1, 1, 1);
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
alpha = *m;
|
alpha = *m;
|
||||||
|
@ -215,7 +215,7 @@ _op_blend_rel_mas_c_dp_sse3(DATA32 *s EINA_UNUSED, DATA8 *m, DATA32 c, DATA32 *d
|
||||||
|
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
DATA32 mc = MUL_SYM(*m, c);
|
DATA32 mc = MUL_SYM(*m, c);
|
||||||
|
|
|
@ -9,7 +9,7 @@ _op_blend_p_c_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, int
|
||||||
|
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
DATA32 sc = MUL4_SYM(c, *s);
|
DATA32 sc = MUL4_SYM(c, *s);
|
||||||
|
@ -69,7 +69,7 @@ _op_blend_pan_c_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, in
|
||||||
const __m128i c_alpha = _mm_set_epi32(c_a, c_a, c_a, c_a);
|
const __m128i c_alpha = _mm_set_epi32(c_a, c_a, c_a, c_a);
|
||||||
const __m128i a0 = _mm_set_epi32(alpha, alpha, alpha, alpha);
|
const __m128i a0 = _mm_set_epi32(alpha, alpha, alpha, alpha);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
*d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
|
*d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
|
||||||
|
@ -119,7 +119,7 @@ _op_blend_p_can_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, in
|
||||||
int alpha;
|
int alpha;
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
alpha = 256 - (*s >> 24);
|
alpha = 256 - (*s >> 24);
|
||||||
|
@ -173,7 +173,7 @@ _op_blend_pan_can_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d,
|
||||||
|
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
*d++ = 0xff000000 + MUL3_SYM(c, *s);
|
*d++ = 0xff000000 + MUL3_SYM(c, *s);
|
||||||
|
@ -215,7 +215,7 @@ _op_blend_p_caa_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, in
|
||||||
c = 1 + (c & 0xff);
|
c = 1 + (c & 0xff);
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
DATA32 sc = MUL_256(c, *s);
|
DATA32 sc = MUL_256(c, *s);
|
||||||
|
@ -268,7 +268,7 @@ _op_blend_pan_caa_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d,
|
||||||
c = 1 + (c & 0xff);
|
c = 1 + (c & 0xff);
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c,c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c,c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
*d = INTERP_256(c, *s, *d);
|
*d = INTERP_256(c, *s, *d);
|
||||||
|
@ -397,7 +397,7 @@ _op_blend_rel_p_c_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d,
|
||||||
|
|
||||||
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
const __m128i c_packed = _mm_set_epi32(c, c, c, c);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
DATA32 sc = MUL4_SYM(c, *s);
|
DATA32 sc = MUL4_SYM(c, *s);
|
||||||
|
|
|
@ -7,7 +7,7 @@ _op_blend_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
|
||||||
|
|
||||||
int alpha;
|
int alpha;
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
alpha = *m;
|
alpha = *m;
|
||||||
|
@ -67,7 +67,7 @@ _op_blend_pas_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c EINA_UNUSED, DATA32 *d,
|
||||||
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
||||||
int alpha;
|
int alpha;
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
alpha = *m;
|
alpha = *m;
|
||||||
|
@ -199,7 +199,7 @@ _op_blend_rel_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
|
||||||
|
|
||||||
int alpha;
|
int alpha;
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
c = MUL_SYM(*m, *s);
|
c = MUL_SYM(*m, *s);
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
static void
|
static void
|
||||||
_op_blend_p_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c EINA_UNUSED, DATA32 *d, int l) {
|
_op_blend_p_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c EINA_UNUSED, DATA32 *d, int l) {
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
int alpha = 256 - (*s >> 24);
|
int alpha = 256 - (*s >> 24);
|
||||||
|
@ -56,7 +56,7 @@ _op_blend_pas_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c EINA_UNUSED, DAT
|
||||||
|
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
switch (*s & 0xff000000)
|
switch (*s & 0xff000000)
|
||||||
{
|
{
|
||||||
|
@ -182,7 +182,7 @@ _op_blend_rel_p_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d, in
|
||||||
|
|
||||||
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
int alpha = 256 - (*s >> 24);
|
int alpha = 256 - (*s >> 24);
|
||||||
|
@ -233,7 +233,7 @@ _op_blend_rel_pan_dp_sse3(DATA32 *s, DATA8 *m EINA_UNUSED, DATA32 c, DATA32 *d,
|
||||||
|
|
||||||
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
|
||||||
|
|
||||||
LOOP_ALIGNED_U1_A48_SSE3(d, l,
|
LOOP_ALIGNED_U1_A48(d, l,
|
||||||
{ /* UOP */
|
{ /* UOP */
|
||||||
|
|
||||||
c = 1 + (*d >> 24);
|
c = 1 + (*d >> 24);
|
||||||
|
|
|
@ -406,7 +406,10 @@ mul3_sym_sse3(__m128i x, __m128i y) {
|
||||||
return _mm_and_si128(res, RGB_MASK_SSE3);
|
return _mm_and_si128(res, RGB_MASK_SSE3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define LOOP_ALIGNED_U1_A48_SSE3(DEST, LENGTH, UOP, A4OP, A8OP) \
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define LOOP_ALIGNED_U1_A48(DEST, LENGTH, UOP, A4OP, A8OP) \
|
||||||
{ \
|
{ \
|
||||||
while((uintptr_t)DEST & 0xF && LENGTH) UOP \
|
while((uintptr_t)DEST & 0xF && LENGTH) UOP \
|
||||||
\
|
\
|
||||||
|
@ -429,8 +432,4 @@ mul3_sym_sse3(__m128i x, __m128i y) {
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue