Evas filters: Add template optimizable files for box blur

The new files (i386, sse3 and neon) are basically empty and fallback
to the C version. This is just to pave the way for full low-level
optimization... if someone has the time and skills to do it :)

Add both Alpha and RGBA template files.
This commit is contained in:
Jean-Philippe Andre 2014-03-25 16:54:57 +09:00
parent 4c390fb984
commit 634034af46
9 changed files with 276 additions and 5 deletions

View File

@ -496,7 +496,14 @@ EXTRA_DIST += \
lib/evas/filters/blur/blur_gaussian_alpha_.c \
lib/evas/filters/blur/blur_gaussian_rgba_.c \
lib/evas/filters/blur/blur_box_alpha_.c \
lib/evas/filters/blur/blur_box_rgba_.c
lib/evas/filters/blur/blur_box_alpha_i386.c \
lib/evas/filters/blur/blur_box_alpha_sse3.c \
lib/evas/filters/blur/blur_box_alpha_neon.c \
lib/evas/filters/blur/blur_box_rgba_.c \
lib/evas/filters/blur/blur_box_rgba_i386.c \
lib/evas/filters/blur/blur_box_rgba_sse3.c \
lib/evas/filters/blur/blur_box_rgba_neon.c
### Engines

View File

@ -0,0 +1,25 @@
#ifdef BUILD_MMX
static inline void
_box_blur_alpha_horiz_step_mmx(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_alpha_vert_step_mmx(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -0,0 +1,25 @@
#ifdef BUILD_NEON
static inline void
_box_blur_alpha_horiz_step_neon(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_alpha_vert_step_neon(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -0,0 +1,25 @@
#ifdef BUILD_SSE3
static inline void
_box_blur_alpha_horiz_step_sse3(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_alpha_vert_step_sse3(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -8,7 +8,7 @@
#include "../evas_filter_private.h"
static inline void
_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
_box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
@ -149,7 +149,7 @@ _box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
}
static inline void
_box_blur_vert_rgba_step(const DATA32* restrict const srcdata,
_box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,

View File

@ -0,0 +1,25 @@
#ifdef BUILD_MMX
static inline void
_box_blur_rgba_horiz_step_mmx(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_rgba_vert_step_mmx(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -0,0 +1,25 @@
#ifdef BUILD_NEON
static inline void
_box_blur_rgba_horiz_step_neon(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_rgba_vert_step_neon(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -0,0 +1,25 @@
#ifdef BUILD_SSE3
static inline void
_box_blur_rgba_horiz_step_sse3(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
}
static inline void
_box_blur_rgba_vert_step_sse3(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
}
#endif

View File

@ -34,12 +34,45 @@ _box_blur_auto_radius(int *radii, int r)
}
#include "./blur/blur_box_rgba_.c"
#ifdef BUILD_MMX
#include "./blur/blur_box_rgba_i386.c"
#endif
#ifdef BUILD_SSE3
#include "./blur/blur_box_rgba_sse3.c"
#endif
#ifdef BUILD_NEON
#include "./blur/blur_box_rgba_neon.c"
#endif
static void
_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_horiz_rgba_step(src, dst, radii, w, h);
#ifdef BUILD_SSE3
if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
{
_box_blur_rgba_horiz_step_sse3(src, dst, radii, w, h);
goto end;
}
#endif
#ifdef BUILD_MMX
if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
{
_box_blur_rgba_horiz_step_mmx(src, dst, radii, w, h);
goto end;
}
#endif
#ifdef BUILD_NEON
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
{
_box_blur_rgba_horiz_step_neon(src, dst, radii, w, h);
goto end;
}
#endif
_box_blur_rgba_horiz_step(src, dst, radii, w, h);
end:
DEBUG_TIME_END();
}
@ -47,7 +80,31 @@ static void
_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_vert_rgba_step(src, dst, radii, h, w);
#ifdef BUILD_SSE3
if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
{
_box_blur_rgba_vert_step_sse3(src, dst, radii, h, w);
goto end;
}
#endif
#ifdef BUILD_MMX
if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
{
_box_blur_rgba_vert_step_mmx(src, dst, radii, h, w);
goto end;
}
#endif
#ifdef BUILD_NEON
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
{
_box_blur_rgba_vert_step_neon(src, dst, radii, h, w);
goto end;
}
#endif
_box_blur_rgba_vert_step(src, dst, radii, h, w);
end:
DEBUG_TIME_END();
}
@ -110,12 +167,45 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
}
#include "./blur/blur_box_alpha_.c"
#ifdef BUILD_MMX
#include "./blur/blur_box_alpha_i386.c"
#endif
#ifdef BUILD_SSE3
#include "./blur/blur_box_alpha_sse3.c"
#endif
#ifdef BUILD_NEON
#include "./blur/blur_box_alpha_neon.c"
#endif
static void
_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
{
_box_blur_alpha_horiz_step_sse3(src, dst, radii, w, h);
goto end;
}
#endif
#ifdef BUILD_MMX
if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
{
_box_blur_alpha_horiz_step_mmx(src, dst, radii, w, h);
goto end;
}
#endif
#ifdef BUILD_NEON
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
{
_box_blur_alpha_horiz_step_neon(src, dst, radii, w, h);
goto end;
}
#endif
_box_blur_alpha_horiz_step(src, dst, radii, w, h);
end:
DEBUG_TIME_END();
}
@ -123,7 +213,31 @@ static void
_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3))
{
_box_blur_alpha_vert_step_sse3(src, dst, radii, h, w);
goto end;
}
#endif
#ifdef BUILD_MMX
if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
{
_box_blur_alpha_vert_step_mmx(src, dst, radii, h, w);
goto end;
}
#endif
#ifdef BUILD_NEON
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
{
_box_blur_alpha_vert_step_neon(src, dst, radii, h, w);
goto end;
}
#endif
_box_blur_alpha_vert_step(src, dst, radii, h, w);
end:
DEBUG_TIME_END();
}