Evas filters: Optimize alpha box blur

Use two optimizable functions for BOX blur: vertical and horizontal.
These functions will run as many times as requested (from 1 to 6 max).

The horizontal case is pretty straightforward as the source is already
contiguous (nice in terms of cache hits). The only catch is to swap
src and dst without ever writing to the input buffer.

In case of vertical blur, we apply the same method as above, after
rotating the column into a horizontal (contiguous) span, and rotating
it back afterwards.

Now, the same needs to be done for RGBA :)
This commit is contained in:
Jean-Philippe Andre 2014-03-12 10:20:27 +09:00
parent 2a1ba1b908
commit 4443ecfa8b
3 changed files with 275 additions and 62 deletions

View File

@ -1,68 +1,246 @@
/* @file blur_box_alpha_.c
* Should define the functions:
* - _box_blur_horiz_alpha_step
* - _box_blur_vert_alpha_step
* Defines the following function:
* _box_blur_alpha_step
*/
#include "evas_common_private.h"
#include "../evas_filter_private.h"
#if !defined (FUNCTION_NAME) || !defined (STEP)
# error Must define FUNCTION_NAME and STEP
static inline void
_box_blur_alpha_horiz_step(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
const DATA8* restrict src;
DATA8* restrict dst;
DATA8* restrict span1;
DATA8* restrict span2;
#if DIV_USING_BITSHIFT
int pow2_shifts[6] = {0};
int numerators[6] = {0};
for (int run = 0; radii[run]; run++)
{
const int div = radii[run] * 2 + 1;
pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
numerators[run] = (1 << pow2_shifts[run]) / (div);
}
#endif
static inline void
FUNCTION_NAME(const DATA8* restrict src, DATA8* restrict dst,
const int radius, const int len,
const int loops, const int loopstep)
{
DEFINE_DIVIDER(2 * radius + 1);
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
int acc = 0, k;
span1 = alloca(len);
span2 = alloca(len);
for (int l = loops; l; --l)
// For each line, apply as many blurs as requested
for (int l = 0; l < loops; l++)
{
const DATA8* restrict sr = src;
const DATA8* restrict sl = src;
DATA8* restrict d = dst;
int run;
for (k = left; k; k--)
// New line: reset source & destination pointers
src = srcdata + len * l;
if (!radii[1]) // Only one run
dst = dstdata + len * l;
else
dst = span1;
// Apply blur with current radius
for (run = 0; radii[run]; run++)
{
acc += *sr;
sr += STEP;
}
const int radius = radii[run];
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
int acc = 0;
for (k = 0; k < left; k++)
{
acc += *sr;
*d = acc / (k + left + 1);
sr += STEP;
d += STEP;
}
#if DIV_USING_BITSHIFT
const int pow2 = pow2_shifts[run];
const int numerator = numerators[run];
#else
const int divider = 2 * radius + 1;
#endif
for (k = len - (2 * radius); k; k--)
{
acc += *sr;
*d = DIVIDE(acc);
acc -= *sl;
sl += STEP;
sr += STEP;
d += STEP;
}
const DATA8* restrict sr = src;
const DATA8* restrict sl = src;
DATA8* restrict d = dst;
for (k = right; k; k--)
{
*d = acc / (k + right);
acc -= *sl;
d += STEP;
sl += STEP;
}
// Read-ahead & accumulate
for (int k = left; k; k--)
{
acc += *sr;
sr += 1;
}
src += loopstep;
dst += loopstep;
// Left edge
for (int k = 0; k < left; k++)
{
acc += *sr;
*d = acc / (k + left + 1);
sr += 1;
d += 1;
}
// Middle part, normal blur
for (int k = len - (2 * radius); k; k--)
{
acc += *sr;
*d = DIVIDE(acc);
acc -= *sl;
sl += 1;
sr += 1;
d += 1;
}
// Right edge
for (int k = right; k; k--)
{
*d = acc / (k + right);
acc -= *sl;
d += 1;
sl += 1;
}
// More runs to go: swap spans
if (radii[run + 1])
{
src = dst;
if (radii[run + 2])
{
// Two more runs: swap
DATA8* swap = span1;
span1 = span2;
span2 = swap;
dst = span1;
}
else
{
// Last run: write directly to dstdata
dst = dstdata + len * l;
}
}
}
}
}
#undef FUNCTION_NAME
#undef STEP
// ATTENTION: Make sure the below code's inner loop is the SAME as above.
static inline void
_box_blur_alpha_vert_step(const DATA8* restrict const srcdata,
DATA8* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
/* Note: This function tries to optimize cache hits by working on
* contiguous horizontal spans.
*/
const int step = loops;
DATA8* restrict src;
DATA8* restrict dst;
DATA8* restrict span1;
DATA8* restrict span2;
#if DIV_USING_BITSHIFT
int pow2_shifts[6] = {0};
int numerators[6] = {0};
for (int run = 0; radii[run]; run++)
{
const int div = radii[run] * 2 + 1;
pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
numerators[run] = (1 << pow2_shifts[run]) / (div);
}
#endif
span1 = alloca(len);
span2 = alloca(len);
// For each line, apply as many blurs as requested
for (int l = 0; l < loops; l++)
{
int run;
// Rotate input into work span
const DATA8* srcptr = srcdata + l;
DATA8* s = span1;
for (int k = len; k; --k)
{
*s++ = *srcptr;
srcptr += step;
}
src = span1;
dst = span2;
// Apply blur with current radius
for (run = 0; radii[run]; run++)
{
const int radius = radii[run];
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
int acc = 0;
#if DIV_USING_BITSHIFT
const int pow2 = pow2_shifts[run];
const int numerator = numerators[run];
#else
const int divider = 2 * radius + 1;
#endif
const DATA8* restrict sr = src;
const DATA8* restrict sl = src;
DATA8* restrict d = dst;
// Read-ahead & accumulate
for (int k = left; k; k--)
{
acc += *sr;
sr += 1;
}
// Left edge
for (int k = 0; k < left; k++)
{
acc += *sr;
*d = acc / (k + left + 1);
sr += 1;
d += 1;
}
// Middle part, normal blur
for (int k = len - (2 * radius); k; k--)
{
acc += *sr;
*d = DIVIDE(acc);
acc -= *sl;
sl += 1;
sr += 1;
d += 1;
}
// Right edge
for (int k = right; k; k--)
{
*d = acc / (k + right);
acc -= *sl;
d += 1;
sl += 1;
}
// More runs to go: swap spans
if (radii[run + 1])
{
DATA8* swap = src;
src = dst;
dst = swap;
}
}
// Last run: rotate & copy back to destination
DATA8* restrict dstptr = dstdata + l;
for (int k = len; k; --k)
{
*dstptr = *dst++;
dstptr += step;
}
}
}

View File

@ -973,7 +973,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
else
type = EVAS_FILTER_BLUR_BOX;
id = evas_filter_command_blur_add(ctx, drawctx, inbuf, tmp_in,
id = evas_filter_command_blur_add(ctx, drawctx, tmp_in, outbuf,
type, 0, dy, ox, oy, 0);
if (id < 0) goto fail;
cmd = _evas_filter_command_get(ctx, id);

View File

@ -4,6 +4,35 @@
#include <math.h>
#include <time.h>
static int
_box_blur_auto_radius(int *radii, int r)
{
if (r <= 2)
{
radii[0] = r;
radii[1] = 0;
WRN("Radius is too small for auto box blur: %d", r);
return 1;
}
else if (r <= 6)
{
radii[0] = r / 2;
radii[1] = r - radii[0] - 1;
radii[2] = 0;
DBG("Using auto radius for %d: %d %d", r, radii[0], radii[1]);
return 2;
}
else
{
radii[0] = (r + 3) / 3;
radii[1] = (r + 2) / 3;
radii[2] = r - radii[0] - radii[1];
radii[3] = 0;
DBG("Using auto radius for %d: %d %d %d", r, radii[0], radii[1], radii[2]);
return 3;
}
}
#define FUNCTION_NAME _box_blur_horiz_rgba_step
#define STEP (sizeof(DATA32))
#include "./blur/blur_box_rgba_.c"
@ -76,27 +105,21 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
return EINA_TRUE;
}
#define FUNCTION_NAME _box_blur_horiz_alpha_step
#define STEP 1
#include "./blur/blur_box_alpha_.c"
static void
_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_horiz_alpha_step(src, dst, radius, w, h, w);
_box_blur_alpha_horiz_step(src, dst, radii, w, h);
DEBUG_TIME_END();
}
#define FUNCTION_NAME _box_blur_vert_alpha_step
#define STEP loops
#include "./blur/blur_box_alpha_.c"
static void
_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_vert_alpha_step(src, dst, radius, h, w, 1);
_box_blur_alpha_vert_step(src, dst, radii, h, w);
DEBUG_TIME_END();
}
@ -104,6 +127,7 @@ static Eina_Bool
_box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@ -114,11 +138,16 @@ _box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd)
in = cmd->input->backing;
out = cmd->output->backing;
if (cmd->blur.auto_count)
_box_blur_auto_radius(radii, r);
else for (int k = 0; k < cmd->blur.count; k++)
radii[k] = r;
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE);
_box_blur_horiz_alpha(in->image.data8, out->image.data8, r,
_box_blur_horiz_alpha(in->image.data8, out->image.data8, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;
@ -128,6 +157,7 @@ static Eina_Bool
_box_blur_vert_apply_alpha(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@ -138,11 +168,16 @@ _box_blur_vert_apply_alpha(Evas_Filter_Command *cmd)
in = cmd->input->backing;
out = cmd->output->backing;
if (cmd->blur.auto_count)
_box_blur_auto_radius(radii, r);
else for (int k = 0; k < cmd->blur.count; k++)
radii[k] = r;
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE);
_box_blur_vert_alpha(in->image.data8, out->image.data8, r,
_box_blur_vert_alpha(in->image.data8, out->image.data8, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;