evas filters: Use obscured region in box blur (SW)

Box blur is used by the default blur (in 2 or 3 passes) so it
is one of the most important and cpu-consuming filters in the
software engine.
This commit is contained in:
Jean-Philippe Andre 2017-03-15 14:51:51 +09:00
parent 1639c06118
commit e52a04ef8f
10 changed files with 186 additions and 115 deletions

View File

@ -7,12 +7,14 @@
#include "evas_filter_private.h"
static inline void
_box_blur_alpha_horiz_step(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_horiz_step(const uint8_t* restrict srcdata, int src_stride EINA_UNUSED,
uint8_t* restrict dstdata, int dst_stride EINA_UNUSED,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
const int len = region.w;
const int loops = region.h;
const DATA8* restrict src;
DATA8* restrict dst;
DATA8* restrict span1;
@ -126,16 +128,18 @@ _box_blur_alpha_horiz_step(const DATA8* restrict const srcdata,
// ATTENTION: Make sure the below code's inner loop is the SAME as above.
static inline void
_box_blur_alpha_vert_step(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_vert_step(const uint8_t* restrict srcdata, int src_stride EINA_UNUSED,
uint8_t* restrict dstdata, int dst_stride EINA_UNUSED,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
/* Note: This function tries to optimize cache hits by working on
* contiguous horizontal spans.
*/
const int len = region.h;
const int loops = region.w;
const int step = loops;
DATA8* restrict src;
DATA8* restrict dst;

View File

@ -1,25 +1,23 @@
#ifdef BUILD_MMX
static inline void
_box_blur_alpha_horiz_step_mmx(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_horiz_step_mmx(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_alpha_vert_step_mmx(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_vert_step_mmx(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -1,25 +1,23 @@
#ifdef BUILD_NEON
static inline void
_box_blur_alpha_horiz_step_neon(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_horiz_step_neon(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_alpha_vert_step_neon(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_vert_step_neon(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -1,25 +1,23 @@
#ifdef BUILD_SSE3
static inline void
_box_blur_alpha_horiz_step_sse3(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_horiz_step_sse3(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_alpha_vert_step_sse3(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
_box_blur_alpha_vert_step_sse3(const uint8_t* restrict src, int src_stride,
uint8_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_alpha_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_alpha_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -7,12 +7,14 @@
#include "evas_filter_private.h"
static inline void
_box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_horiz_step(const uint32_t* restrict srcdata, int src_stride,
uint32_t* restrict dstdata, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
const int len = region.w;
const int loops = region.h;
const DATA32* restrict src;
DATA32* restrict dst;
DATA32* restrict span1;
@ -29,6 +31,9 @@ _box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
}
#endif
srcdata += region.x + src_stride * region.y;
dstdata += region.x + dst_stride * region.y;
span1 = alloca(len * sizeof(DATA32));
span2 = alloca(len * sizeof(DATA32));
memset(span1, 0, len * sizeof(DATA32));
@ -40,9 +45,9 @@ _box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
int run;
// New line: reset source & destination pointers
src = srcdata + len * l;
src = srcdata + src_stride * l;
if (!radii[1]) // Only one run
dst = dstdata + len * l;
dst = dstdata + dst_stride * l;
else
dst = span1;
@ -140,7 +145,7 @@ _box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
else
{
// Last run: write directly to dstdata
dst = dstdata + len * l;
dst = dstdata + dst_stride * l;
}
}
}
@ -148,17 +153,18 @@ _box_blur_rgba_horiz_step(const DATA32* restrict const srcdata,
}
static inline void
_box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_vert_step(const uint32_t* restrict srcdata, int src_stride,
uint32_t* restrict dstdata, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
/* Note: This function tries to optimize cache hits by working on
* contiguous horizontal spans.
*/
const int step = loops;
const int len = region.h;
const int loops = region.w;
DATA32* restrict src;
DATA32* restrict dst;
DATA32* restrict span1;
@ -175,6 +181,9 @@ _box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
}
#endif
srcdata += region.x + src_stride * region.y;
dstdata += region.x + dst_stride * region.y;
span1 = alloca(len * sizeof(DATA32));
span2 = alloca(len * sizeof(DATA32));
memset(span1, 0, len * sizeof(DATA32));
@ -191,7 +200,7 @@ _box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
for (int k = len; k; --k)
{
*s++ = *srcptr;
srcptr += step;
srcptr += src_stride;
}
src = span1;
@ -290,7 +299,7 @@ _box_blur_rgba_vert_step(const DATA32* restrict const srcdata,
for (int k = len; k; --k)
{
*dstptr = *dst++;
dstptr += step;
dstptr += dst_stride;
}
}
}

View File

@ -1,25 +1,23 @@
#ifdef BUILD_MMX
static inline void
_box_blur_rgba_horiz_step_mmx(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_horiz_step_mmx(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_rgba_vert_step_mmx(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_vert_step_mmx(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -1,25 +1,23 @@
#ifdef BUILD_NEON
static inline void
_box_blur_rgba_horiz_step_neon(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_horiz_step_neon(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_rgba_vert_step_neon(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_vert_step_neon(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -1,25 +1,23 @@
#ifdef BUILD_SSE3
static inline void
_box_blur_rgba_horiz_step_sse3(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_horiz_step_sse3(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_horiz_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_horiz_step(src, src_stride, dst, dst_stride, radii, region);
}
static inline void
_box_blur_rgba_vert_step_sse3(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
_box_blur_rgba_vert_step_sse3(const uint32_t* restrict src, int src_stride,
uint32_t* restrict dst, int dst_stride,
const int* restrict const radii,
const int len,
const int loops)
Eina_Rectangle region)
{
// TODO: implement optimized code here and remove the following line:
_box_blur_rgba_vert_step(srcdata, dstdata, radii, len, loops);
_box_blur_rgba_vert_step(src, src_stride, dst, dst_stride, radii, region);
}
#endif

View File

@ -46,64 +46,68 @@ _box_blur_auto_radius(int *radii, int r)
#endif
static void
_box_blur_horiz_rgba(uint32_t *src, uint32_t *dst, int* radii, int w, int h)
_box_blur_horiz_rgba(const uint32_t *src, int src_stride,
uint32_t *dst, int dst_stride,
int* radii, Eina_Rectangle region)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (eina_cpu_features_get() & EINA_CPU_SSE3)
{
_box_blur_rgba_horiz_step_sse3(src, dst, radii, w, h);
_box_blur_rgba_horiz_step_sse3(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_MMX
if (eina_cpu_features_get() & EINA_CPU_MMX)
{
_box_blur_rgba_horiz_step_mmx(src, dst, radii, w, h);
_box_blur_rgba_horiz_step_mmx(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_NEON
if (eina_cpu_features_get() & EINA_CPU_NEON)
{
_box_blur_rgba_horiz_step_neon(src, dst, radii, w, h);
_box_blur_rgba_horiz_step_neon(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
_box_blur_rgba_horiz_step(src, dst, radii, w, h);
_box_blur_rgba_horiz_step(src, src_stride, dst, dst_stride, radii, region);
end:
DEBUG_TIME_END();
}
static void
_box_blur_vert_rgba(uint32_t *src, uint32_t *dst, int* radii, int w, int h)
_box_blur_vert_rgba(const uint32_t *src, int src_stride,
uint32_t *dst, int dst_stride,
int* radii, Eina_Rectangle region)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (eina_cpu_features_get() & EINA_CPU_SSE3)
{
_box_blur_rgba_vert_step_sse3(src, dst, radii, h, w);
_box_blur_rgba_vert_step_sse3(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_MMX
if (eina_cpu_features_get() & EINA_CPU_MMX)
{
_box_blur_rgba_vert_step_mmx(src, dst, radii, h, w);
_box_blur_rgba_vert_step_mmx(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_NEON
if (eina_cpu_features_get() & EINA_CPU_NEON)
{
_box_blur_rgba_vert_step_neon(src, dst, radii, h, w);
_box_blur_rgba_vert_step_neon(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
_box_blur_rgba_vert_step(src, dst, radii, h, w);
_box_blur_rgba_vert_step(src, src_stride, dst, dst_stride, radii, region);
end:
DEBUG_TIME_END();
@ -121,106 +125,172 @@ end:
#endif
static void
_box_blur_horiz_alpha(const DATA8 *src, DATA8 *dst, int* radii, int w, int h)
_box_blur_horiz_alpha(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
int* radii, Eina_Rectangle region)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (eina_cpu_features_get() & EINA_CPU_SSE3)
{
_box_blur_alpha_horiz_step_sse3(src, dst, radii, w, h);
_box_blur_alpha_horiz_step_sse3(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_MMX
if (eina_cpu_features_get() & EINA_CPU_MMX)
{
_box_blur_alpha_horiz_step_mmx(src, dst, radii, w, h);
_box_blur_alpha_horiz_step_mmx(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_NEON
if (eina_cpu_features_get() & EINA_CPU_NEON)
{
_box_blur_alpha_horiz_step_neon(src, dst, radii, w, h);
_box_blur_alpha_horiz_step_neon(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
_box_blur_alpha_horiz_step(src, dst, radii, w, h);
_box_blur_alpha_horiz_step(src, src_stride, dst, dst_stride, radii, region);
end:
DEBUG_TIME_END();
}
static void
_box_blur_vert_alpha(const DATA8 *src, DATA8 *dst, int* radii, int w, int h)
_box_blur_vert_alpha(const uint8_t *src, int src_stride,
uint8_t *dst, int dst_stride,
int* radii, Eina_Rectangle region)
{
DEBUG_TIME_BEGIN();
#ifdef BUILD_SSE3
if (eina_cpu_features_get() & EINA_CPU_SSE3)
{
_box_blur_alpha_vert_step_sse3(src, dst, radii, h, w);
_box_blur_alpha_vert_step_sse3(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_MMX
if (eina_cpu_features_get() & EINA_CPU_MMX)
{
_box_blur_alpha_vert_step_mmx(src, dst, radii, h, w);
_box_blur_alpha_vert_step_mmx(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
#ifdef BUILD_NEON
if (eina_cpu_features_get() & EINA_CPU_NEON)
{
_box_blur_alpha_vert_step_neon(src, dst, radii, h, w);
_box_blur_alpha_vert_step_neon(src, src_stride, dst, dst_stride, radii, region);
goto end;
}
#endif
_box_blur_alpha_vert_step(src, dst, radii, h, w);
_box_blur_alpha_vert_step(src, src_stride, dst, dst_stride, radii, region);
end:
DEBUG_TIME_END();
}
static inline Eina_Rectangle
_rect(int x, int y, int w, int h, int maxw, int maxh)
{
Eina_Rectangle rect;
if (x < 0)
{
w -= (-x);
x = 0;
}
if (y < 0)
{
h -= (-y);
y = 0;
}
if ((x + w) > maxw) w = maxw - x;
if ((y + h) > maxh) h = maxh - y;
if (w < 0) w = 0;
if (h < 0) h = 0;
rect.x = x;
rect.y = y;
rect.w = w;
rect.h = h;
return rect;
}
#define RECT(_x, _y, _w, _h) _rect(_x, _y, _w, _h, w, h)
static Eina_Bool
_box_blur_apply(Evas_Filter_Command *cmd, Eina_Bool vert, Eina_Bool rgba)
{
unsigned int src_len, src_stride, dst_len, dst_stride;
Eina_Bool ret = EINA_TRUE;
Eina_Bool ret = EINA_FALSE;
Eina_Rectangle o, region[4];
int radii[7] = {0};
unsigned int r;
int radius, regions, w, h;
void *src, *dst;
r = abs(vert ? cmd->blur.dy : cmd->blur.dx);
radius = abs(vert ? cmd->blur.dy : cmd->blur.dx);
src = _buffer_map_all(cmd->input->buffer, &src_len, E_READ, rgba ? E_ARGB : E_ALPHA, &src_stride);
dst = _buffer_map_all(cmd->output->buffer, &dst_len, E_WRITE, rgba ? E_ARGB : E_ALPHA, &dst_stride);
if (!src || !dst) goto unmap;
if (cmd->blur.auto_count)
_box_blur_auto_radius(radii, r);
_box_blur_auto_radius(radii, radius);
else for (int k = 0; k < cmd->blur.count; k++)
radii[k] = r;
radii[k] = radius;
if (src && dst)
w = cmd->input->w;
h = cmd->input->h;
o = cmd->ctx->obscured.effective;
if (!o.w || !o.h)
{
region[0] = RECT(0, 0, w, h);
regions = 1;
}
else if (!vert)
{
// top (full), left, right, bottom (full)
region[0] = RECT(0, 0, w, o.y);
region[1] = RECT(0, o.y, o.x, o.h);
region[2] = RECT(o.x + o.w, o.y, w - o.x - o.w, o.h);
region[3] = RECT(0, o.y + o.h, w, h - o.y - o.h);
regions = 4;
}
else
{
// left (full), top, bottom, right (full)
region[0] = RECT(0, 0, o.x, h);
region[1] = RECT(o.x, 0, o.w, o.y);
region[2] = RECT(o.x, o.y + o.h, o.w, h - o.y - o.h);
region[3] = RECT(o.x + o.w, 0, w - o.x - o.w, h);
regions = 4;
}
XDBG("Box blur on image %dx%d obscured by %d,%d %dx%d", w, h, o.x, o.y, o.w, o.h);
for (int k = 0; k < regions; k++)
{
XDBG("Box blur in region %d,%d %dx%d", region[k].x, region[k].y, region[k].w, region[k].h);
if (rgba)
{
if (!vert)
_box_blur_horiz_rgba(src, dst, radii, cmd->input->w, cmd->input->h);
_box_blur_horiz_rgba(src, src_stride / 4, dst, dst_stride / 4, radii, region[k]);
else
_box_blur_vert_rgba(src, dst, radii, cmd->input->w, cmd->input->h);
_box_blur_vert_rgba(src, src_stride / 4, dst, dst_stride / 4, radii, region[k]);
}
else
{
if (!vert)
_box_blur_horiz_alpha(src, dst, radii, cmd->input->w, cmd->input->h);
_box_blur_horiz_alpha(src, src_stride, dst, dst_stride, radii, region[k]);
else
_box_blur_vert_alpha(src, dst, radii, cmd->input->w, cmd->input->h);
_box_blur_vert_alpha(src, src_stride, dst, dst_stride, radii, region[k]);
}
}
else ret = EINA_FALSE;
ret = EINA_TRUE;
unmap:
ector_buffer_unmap(cmd->input->buffer, src, src_len);
ector_buffer_unmap(cmd->output->buffer, dst, dst_len);

View File

@ -34,8 +34,8 @@ _vflip_cpu(Evas_Filter_Command *cmd)
EINA_SAFETY_ON_FALSE_GOTO(in != out, end);
oy = cmd->draw.oy;
t = cmd->ctx->padt;
b = cmd->ctx->padb;
t = cmd->ctx->pad.final.t;
b = cmd->ctx->pad.final.b;
objh = h - t - b;
center = t + objh / 2 + oy;