forked from enlightenment/efl
solve neon rotation issue by moving to the tiled rotator
the tiles rotator is faster no matter what. this will fix D8099 by movoing to tiled rotation and nuking the neon code and we end uop being faster anyway in all cases. @fix
This commit is contained in:
parent
f234a2b6c7
commit
4758f06e63
20
configure.ac
20
configure.ac
|
@ -2538,19 +2538,6 @@ AC_ARG_ENABLE([pixman-image-scale-sample],
|
|||
],
|
||||
[have_pixman_image_scale_sample="no"])
|
||||
|
||||
# Tile rotate
|
||||
AC_ARG_ENABLE([tile-rotate],
|
||||
[AS_HELP_STRING([--enable-tile-rotate],[Enable tiled rotate algorithm. @<:@default=disabled@:>@])],
|
||||
[
|
||||
if test "x${enableval}" = "xyes" ; then
|
||||
have_tile_rotate="yes"
|
||||
CFOPT_WARNING="xyes"
|
||||
else
|
||||
have_tile_rotate="no"
|
||||
fi
|
||||
],
|
||||
[have_tile_rotate="no"])
|
||||
|
||||
# Ecore Buffer
|
||||
AC_ARG_ENABLE([ecore-buffer],
|
||||
[AS_HELP_STRING([--enable-ecore-buffer],[enable ecore-buffer. @<:@default=disabled@:>@])],
|
||||
|
@ -2984,13 +2971,6 @@ AC_CHECK_LIB([m], [lround],
|
|||
|
||||
### Configuration
|
||||
|
||||
## Tile rotation
|
||||
|
||||
if test "x${have_tile_rotate}" = "xyes" ; then
|
||||
AC_DEFINE(TILE_ROTATE, 1, [Enable tiled rotate algorithm])
|
||||
fi
|
||||
|
||||
|
||||
## dither options
|
||||
|
||||
AC_ARG_WITH([evas-dither-mask],
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
#include "evas_common_private.h"
|
||||
#include "evas_convert_rgb_32.h"
|
||||
#ifdef BUILD_NEON
|
||||
#include <arm_neon.h>
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// tiled rotate is faster in every case i've tested, so just use this
|
||||
// by default.
|
||||
#define TILE_ROTATE 1
|
||||
|
||||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
|
@ -19,9 +23,9 @@ evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jum
|
|||
|
||||
for (y = 0; y < h; y++)
|
||||
{
|
||||
func(src_ptr, dst_ptr, w);
|
||||
src_ptr += w + src_jump;
|
||||
dst_ptr += w + dst_jump;
|
||||
func(src_ptr, dst_ptr, w);
|
||||
src_ptr += w + src_jump;
|
||||
dst_ptr += w + dst_jump;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -44,234 +48,205 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
|
|||
}
|
||||
|
||||
#ifdef TILE_ROTATE
|
||||
#ifdef BUILD_NEON
|
||||
#define ROT90_QUAD_COPY_LOOP(pix_type) \
|
||||
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
|
||||
{ \
|
||||
if((w%4) == 0) \
|
||||
{ \
|
||||
int klght = 4 * src_stride; \
|
||||
for(y = 0; y < h; y++) \
|
||||
{ \
|
||||
const pix_type *s = &(src[(h - y - 1)]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
pix_type *ptr1 = s; \
|
||||
pix_type *ptr2 = ptr1 + src_stride; \
|
||||
pix_type *ptr3 = ptr2 + src_stride; \
|
||||
pix_type *ptr4 = ptr3 + src_stride; \
|
||||
for(x = 0; x < w; x+=4) \
|
||||
{ \
|
||||
pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
|
||||
vst1q_s32(d, vld1q_s32(s_array)); \
|
||||
d += 4; \
|
||||
ptr1 += klght; \
|
||||
ptr2 += klght; \
|
||||
ptr3 += klght; \
|
||||
ptr4 += klght; \
|
||||
} \
|
||||
} \
|
||||
# ifdef BUILD_NEON
|
||||
# define ROT90_QUAD_COPY_LOOP(pix_type) \
|
||||
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) { \
|
||||
if ((w % 4) == 0) { \
|
||||
int klght = 4 * src_stride; \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[h - y - 1]); \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
pix_type *ptr1 = s; \
|
||||
pix_type *ptr2 = ptr1 + src_stride; \
|
||||
pix_type *ptr3 = ptr2 + src_stride; \
|
||||
pix_type *ptr4 = ptr3 + src_stride; \
|
||||
for(x = 0; x < w; x += 4) { \
|
||||
pix_type s_array[4] = { *ptr1, *ptr2, *ptr3, *ptr4 }; \
|
||||
vst1q_s32(d, vld1q_s32(s_array)); \
|
||||
d += 4; \
|
||||
ptr1 += klght; \
|
||||
ptr2 += klght; \
|
||||
ptr3 += klght; \
|
||||
ptr4 += klght; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for (y = 0; y < h; y++) \
|
||||
{ \
|
||||
const pix_type *s = &(src[(h - y - 1)]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
for (x = 0; x < w; x++) \
|
||||
{ \
|
||||
*d++ = *s; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
else { \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[h - y - 1]); \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
for (x = 0; x < w; x++) { \
|
||||
*d++ = *s; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else
|
||||
#define ROT270_QUAD_COPY_LOOP(pix_type) \
|
||||
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
|
||||
{ \
|
||||
if((w%4) == 0) \
|
||||
{ \
|
||||
int klght = 4 * src_stride; \
|
||||
for(y = 0; y < h; y++) \
|
||||
{ \
|
||||
const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
pix_type *ptr1 = s; \
|
||||
pix_type *ptr2 = ptr1 + src_stride; \
|
||||
pix_type *ptr3 = ptr2 + src_stride; \
|
||||
pix_type *ptr4 = ptr3 + src_stride; \
|
||||
for(x = 0; x < w; x+=4) \
|
||||
{ \
|
||||
pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
|
||||
vst1q_s32(d, vld1q_s32(s_array)); \
|
||||
d += 4; \
|
||||
ptr1 += klght; \
|
||||
ptr2 += klght; \
|
||||
ptr3 += klght; \
|
||||
ptr4 += klght; \
|
||||
} \
|
||||
} \
|
||||
# define ROT270_QUAD_COPY_LOOP(pix_type) \
|
||||
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) { \
|
||||
if ((w % 4) == 0) { \
|
||||
int klght = 4 * src_stride; \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
pix_type *ptr1 = s; \
|
||||
pix_type *ptr2 = ptr1 + src_stride; \
|
||||
pix_type *ptr3 = ptr2 + src_stride; \
|
||||
pix_type *ptr4 = ptr3 + src_stride; \
|
||||
for(x = 0; x < w; x+=4) { \
|
||||
pix_type s_array[4] = { *ptr1, *ptr2, *ptr3, *ptr4 }; \
|
||||
vst1q_s32(d, vld1q_s32(s_array)); \
|
||||
d += 4; \
|
||||
ptr1 += klght; \
|
||||
ptr2 += klght; \
|
||||
ptr3 += klght; \
|
||||
ptr4 += klght; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for (y = 0; y < h; y++) \
|
||||
{ \
|
||||
const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
for (x = 0; x < w; x++) \
|
||||
{ \
|
||||
*d++ = *s; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else
|
||||
#else
|
||||
#define ROT90_QUAD_COPY_LOOP(pix_type)
|
||||
#define ROT270_QUAD_COPY_LOOP(pix_type)
|
||||
#endif
|
||||
#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
|
||||
static void \
|
||||
blt_rotated_90_trivial_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, y; \
|
||||
ROT90_QUAD_COPY_LOOP(pix_type) \
|
||||
{ \
|
||||
for (y = 0; y < h; y++) \
|
||||
{ \
|
||||
const pix_type *s = &(src[(h - y - 1)]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
for (x = 0; x < w; x++) \
|
||||
{ \
|
||||
*d++ = *s; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
static void \
|
||||
blt_rotated_270_trivial_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, y; \
|
||||
ROT270_QUAD_COPY_LOOP(pix_type) \
|
||||
{ \
|
||||
for(y = 0; y < h; y++) \
|
||||
{ \
|
||||
else { \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
|
||||
pix_type *d = &(dst[(dst_stride * y)]); \
|
||||
for (x = 0; x < w; x++) \
|
||||
{ \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
for (x = 0; x < w; x++) { \
|
||||
*d++ = *s; \
|
||||
s -= src_stride; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else
|
||||
# else
|
||||
# define ROT90_QUAD_COPY_LOOP(pix_type)
|
||||
# define ROT270_QUAD_COPY_LOOP(pix_type)
|
||||
# endif
|
||||
|
||||
# define FAST_SIMPLE_ROTATE(suffix, pix_type) \
|
||||
static void \
|
||||
blt_rotated_90_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
blt_rotated_90_trivial_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, y; \
|
||||
ROT90_QUAD_COPY_LOOP(pix_type) { \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[h - y - 1]); \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
for (x = 0; x < w; x++) { \
|
||||
*d++ = *s; \
|
||||
s += src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
static void \
|
||||
blt_rotated_270_trivial_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, y; \
|
||||
ROT270_QUAD_COPY_LOOP(pix_type) { \
|
||||
for (y = 0; y < h; y++) { \
|
||||
const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
|
||||
pix_type *d = &(dst[dst_stride * y]); \
|
||||
for (x = 0; x < w; x++) { \
|
||||
*d++ = *s; \
|
||||
s -= src_stride; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
static void \
|
||||
blt_rotated_90_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, leading_pixels = 0, trailing_pixels = 0; \
|
||||
const int TILE_SIZE = TILE_CACHE_LINE_SIZE / sizeof(pix_type); \
|
||||
if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) \
|
||||
{ \
|
||||
leading_pixels = TILE_SIZE - \
|
||||
(((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (leading_pixels > w) \
|
||||
leading_pixels = w; \
|
||||
blt_rotated_90_trivial_##suffix(dst, \
|
||||
dst_stride, \
|
||||
src, \
|
||||
src_stride, \
|
||||
leading_pixels, \
|
||||
h); \
|
||||
dst += leading_pixels; \
|
||||
src += leading_pixels * src_stride; \
|
||||
w -= leading_pixels; \
|
||||
} \
|
||||
if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) \
|
||||
{ \
|
||||
trailing_pixels = (((uintptr_t)(dst + w) & \
|
||||
(TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (trailing_pixels > w) \
|
||||
trailing_pixels = w; \
|
||||
w -= trailing_pixels; \
|
||||
} \
|
||||
for (x = 0; x < w; x += TILE_SIZE) \
|
||||
{ \
|
||||
blt_rotated_90_trivial_##suffix(dst + x, \
|
||||
dst_stride, \
|
||||
&(src[(src_stride * x)]), \
|
||||
src_stride, \
|
||||
TILE_SIZE, \
|
||||
h); \
|
||||
} \
|
||||
if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) { \
|
||||
leading_pixels = TILE_SIZE - \
|
||||
(((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (leading_pixels > w) leading_pixels = w; \
|
||||
blt_rotated_90_trivial_##suffix(dst, \
|
||||
dst_stride, \
|
||||
src, \
|
||||
src_stride, \
|
||||
leading_pixels, \
|
||||
h); \
|
||||
dst += leading_pixels; \
|
||||
src += leading_pixels * src_stride; \
|
||||
w -= leading_pixels; \
|
||||
} \
|
||||
if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) { \
|
||||
trailing_pixels = (((uintptr_t)(dst + w) & \
|
||||
(TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (trailing_pixels > w) trailing_pixels = w; \
|
||||
w -= trailing_pixels; \
|
||||
} \
|
||||
for (x = 0; x < w; x += TILE_SIZE) { \
|
||||
blt_rotated_90_trivial_##suffix(dst + x, \
|
||||
dst_stride, \
|
||||
&(src[src_stride * x]), \
|
||||
src_stride, \
|
||||
TILE_SIZE, \
|
||||
h); \
|
||||
} \
|
||||
if (trailing_pixels) \
|
||||
blt_rotated_90_trivial_##suffix(dst + w, \
|
||||
dst_stride, \
|
||||
&(src[(w * src_stride)]), \
|
||||
&(src[src_stride * w]), \
|
||||
src_stride, \
|
||||
trailing_pixels, \
|
||||
h); \
|
||||
} \
|
||||
static void \
|
||||
blt_rotated_270_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
blt_rotated_270_##suffix(pix_type * restrict dst, \
|
||||
int dst_stride, \
|
||||
const pix_type * restrict src, \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
int src_stride, \
|
||||
int w, \
|
||||
int h) \
|
||||
{ \
|
||||
int x, leading_pixels = 0, trailing_pixels = 0; \
|
||||
const int TILE_SIZE = TILE_CACHE_LINE_SIZE / sizeof(pix_type); \
|
||||
if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) \
|
||||
{ \
|
||||
leading_pixels = TILE_SIZE - \
|
||||
(((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (leading_pixels > w) \
|
||||
leading_pixels = w; \
|
||||
blt_rotated_270_trivial_##suffix(dst, \
|
||||
dst_stride, \
|
||||
&(src[(src_stride * (w - leading_pixels))]), \
|
||||
src_stride, \
|
||||
leading_pixels, \
|
||||
h); \
|
||||
dst += leading_pixels; \
|
||||
w -= leading_pixels; \
|
||||
} \
|
||||
if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) \
|
||||
{ \
|
||||
trailing_pixels = (((uintptr_t)(dst + w) & \
|
||||
(TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (trailing_pixels > w) \
|
||||
trailing_pixels = w; \
|
||||
w -= trailing_pixels; \
|
||||
src += trailing_pixels * src_stride; \
|
||||
} \
|
||||
for (x = 0; x < w; x += TILE_SIZE) \
|
||||
{ \
|
||||
blt_rotated_270_trivial_##suffix(dst + x, \
|
||||
dst_stride, \
|
||||
&(src[(src_stride * (w - x - TILE_SIZE))]), \
|
||||
src_stride, \
|
||||
TILE_SIZE, \
|
||||
h); \
|
||||
} \
|
||||
if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) { \
|
||||
leading_pixels = TILE_SIZE - \
|
||||
(((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (leading_pixels > w) leading_pixels = w; \
|
||||
blt_rotated_270_trivial_##suffix(dst, \
|
||||
dst_stride, \
|
||||
&(src[src_stride * (w - leading_pixels)]), \
|
||||
src_stride, \
|
||||
leading_pixels, \
|
||||
h); \
|
||||
dst += leading_pixels; \
|
||||
w -= leading_pixels; \
|
||||
} \
|
||||
if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) { \
|
||||
trailing_pixels = (((uintptr_t)(dst + w) & \
|
||||
(TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
|
||||
if (trailing_pixels > w) trailing_pixels = w; \
|
||||
w -= trailing_pixels; \
|
||||
src += trailing_pixels * src_stride; \
|
||||
} \
|
||||
for (x = 0; x < w; x += TILE_SIZE) { \
|
||||
blt_rotated_270_trivial_##suffix(dst + x, \
|
||||
dst_stride, \
|
||||
&(src[src_stride * (w - x - TILE_SIZE)]), \
|
||||
src_stride, \
|
||||
TILE_SIZE, \
|
||||
h); \
|
||||
} \
|
||||
if (trailing_pixels) \
|
||||
blt_rotated_270_trivial_##suffix(dst + w, \
|
||||
dst_stride, \
|
||||
|
@ -288,12 +263,13 @@ void
|
|||
evas_common_convert_rgba_to_32bpp_rgb_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
#ifdef TILE_ROTATE
|
||||
blt_rotated_270_8888((DATA8 *)dst, dst_jump+w, (const DATA8 *)src, src_jump+h, w, h) ;
|
||||
blt_rotated_270_8888((DATA32 *)dst, dst_jump + w,
|
||||
src, src_jump + h,
|
||||
w, h);
|
||||
#else
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
||||
CONVERT_LOOP_START_ROT_270();
|
||||
|
@ -305,15 +281,32 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_270 (DATA32 *src, DATA8 *dst, int
|
|||
return;
|
||||
}
|
||||
|
||||
/* speed measuring code - enable when optimizing to compare
|
||||
#include <time.h>
|
||||
static double
|
||||
get_time(void)
|
||||
{
|
||||
struct timespec t;
|
||||
|
||||
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||
return (double)t.tv_sec + (((double)t.tv_nsec) / 1000000000.0);
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgb_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
/*
|
||||
static double tt = 0.0;
|
||||
static unsigned long long pt = 0;
|
||||
double t0 = get_time();
|
||||
*/
|
||||
#ifdef TILE_ROTATE
|
||||
blt_rotated_90_8888((DATA8 *)dst, dst_jump+w, (const DATA8 *)src, src_jump+h, w, h) ;
|
||||
blt_rotated_90_8888((DATA32 *)dst, dst_jump + w,
|
||||
src, src_jump + h,
|
||||
w, h);
|
||||
#else
|
||||
# ifndef BUILD_NEON
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -322,117 +315,19 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_90 (DATA32 *src, DATA8 *dst, int
|
|||
*dst_ptr = *src_ptr;
|
||||
|
||||
CONVERT_LOOP_END_ROT_90();
|
||||
# elif defined BUILD_NEON_INTRINSICS
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
CONVERT_LOOP_START_ROT_90();
|
||||
|
||||
*dst_ptr = *src_ptr;
|
||||
|
||||
CONVERT_LOOP_END_ROT_90();
|
||||
# else
|
||||
if ((w & 1) || (h & 1))
|
||||
{
|
||||
/* Rarely (if ever) if ever: so slow path is fine */
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
CONVERT_LOOP_START_ROT_90();
|
||||
|
||||
*dst_ptr = *src_ptr;
|
||||
|
||||
CONVERT_LOOP_END_ROT_90();
|
||||
}
|
||||
else
|
||||
{
|
||||
# define AP "convert_rgba32_rot_90_"
|
||||
asm volatile (
|
||||
".fpu neon \n\t"
|
||||
" mov %[s1], %[src] \n\t"
|
||||
" add %[s1], %[s1], %[h],lsl #2 \n\t"
|
||||
" sub %[s1], #8 \n\t"
|
||||
|
||||
" mov %[s2], %[src] \n\t"
|
||||
" add %[s2], %[s2], %[h], lsl #3 \n\t"
|
||||
" add %[s2], %[s2], %[sjmp], lsr #1 \n\t"
|
||||
" sub %[s2], #8 \n\t"
|
||||
|
||||
" mov %[d1], %[dst] \n\t"
|
||||
|
||||
" add %[d2], %[d1], %[djmp] \n\t"
|
||||
" add %[d2], %[d2], %[w], lsl #2 \n\t"
|
||||
|
||||
" mov %[sadv], %[h], lsl #3 \n\t"
|
||||
" add %[sadv], %[sadv], %[sjmp], lsl #1\n\t"
|
||||
|
||||
" mov %[y], #0 \n\t"
|
||||
" mov %[x], #0 \n\t"
|
||||
AP"loop: \n\t"
|
||||
" vld1.u32 d0, [%[s1]] \n\t"
|
||||
" vld1.u32 d1, [%[s2]] \n\t"
|
||||
" add %[x], #2 \n\t"
|
||||
" add %[s1], %[sadv] \n\t"
|
||||
" add %[s2], %[sadv] \n\t"
|
||||
" vtrn.u32 d0, d1 \n\t"
|
||||
" cmp %[x], %[w] \n\t"
|
||||
" vst1.u32 d1, [%[d1]]! \n\t"
|
||||
" vst1.u32 d0, [%[d2]]! \n\t"
|
||||
" blt "AP"loop \n\t"
|
||||
|
||||
" mov %[x], #0 \n\t"
|
||||
" add %[d1], %[djmp] \n\t"
|
||||
" add %[d1], %[d1], %[w], lsl #2 \n\t"
|
||||
" add %[d2], %[djmp] \n\t"
|
||||
" add %[d2], %[d2], %[w], lsl #2 \n\t"
|
||||
|
||||
" mov %[s1], %[src] \n\t"
|
||||
" add %[s1], %[s1], %[h], lsl #2 \n\t"
|
||||
" sub %[s1], %[s1], %[y], lsl #2 \n\t"
|
||||
" sub %[s1], #16 \n\t"
|
||||
|
||||
" add %[s2], %[s1], %[h], lsl #2 \n\t"
|
||||
" add %[s2], %[s2], %[sjmp], lsl #2 \n\t"
|
||||
|
||||
" add %[y], #2 \n\t"
|
||||
|
||||
" cmp %[y], %[h] \n\t"
|
||||
" blt "AP"loop \n\t"
|
||||
|
||||
: // Out
|
||||
: [s1] "r" (1),
|
||||
[s2] "r" (11),
|
||||
[d1] "r" (2),
|
||||
[d2] "r" (12),
|
||||
[src] "r" (src),
|
||||
[dst] "r" (dst),
|
||||
[x] "r" (3),
|
||||
[y] "r" (4),
|
||||
[w] "r" (w),
|
||||
[h] "r" (h),
|
||||
[sadv] "r" (5),
|
||||
[sjmp] "r" (src_jump * 4),
|
||||
[djmp] "r" (dst_jump * 4 * 2)
|
||||
: "d0", "d1", "memory", "cc"// Clober
|
||||
|
||||
|
||||
);
|
||||
}
|
||||
# undef AP
|
||||
# endif
|
||||
#endif
|
||||
return;
|
||||
/*
|
||||
double t1 = get_time();
|
||||
tt += t1 - t0;
|
||||
pt += (w * h);
|
||||
printf("%1.2f mpix/sec (%1.9f @ %1.9f)\n", (double)pt / (tt * 1000000), tt, t1);
|
||||
*/
|
||||
}
|
||||
|
||||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgbx_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -449,8 +344,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888 (DATA32 *src, DATA8 *dst, int src_ju
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -467,8 +361,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_180 (DATA32 *src, DATA8 *dst, in
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -485,8 +378,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_270 (DATA32 *src, DATA8 *dst, in
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -503,8 +395,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_90 (DATA32 *src, DATA8 *dst, int
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgr_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -520,8 +411,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888 (DATA32 *src, DATA8 *dst, int src_jum
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -537,8 +427,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_180 (DATA32 *src, DATA8 *dst, int
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -554,8 +443,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_270 (DATA32 *src, DATA8 *dst, int
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -571,8 +459,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_90 (DATA32 *src, DATA8 *dst, int
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgrx_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -588,8 +475,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888 (DATA32 *src, DATA8 *dst, int src_ju
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -605,8 +491,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_180 (DATA32 *src, DATA8 *dst, in
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -622,8 +507,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_270 (DATA32 *src, DATA8 *dst, in
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
@ -639,8 +523,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_90 (DATA32 *src, DATA8 *dst, int
|
|||
void
|
||||
evas_common_convert_rgba_to_32bpp_rgb_666(DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
|
||||
{
|
||||
DATA32 *src_ptr;
|
||||
DATA32 *dst_ptr;
|
||||
DATA32 *src_ptr, *dst_ptr;
|
||||
int x, y;
|
||||
|
||||
dst_ptr = (DATA32 *)dst;
|
||||
|
|
Loading…
Reference in New Issue