Evas filters: Optimize RGBA blur as well

Same as Alpha blur, use combination of box blurs,
and put all that code into optimizable functions.
This commit is contained in:
Jean-Philippe Andre 2014-03-12 13:55:44 +09:00
parent 4443ecfa8b
commit 592204fe73
4 changed files with 304 additions and 105 deletions

View File

@ -1,6 +1,7 @@
/* @file blur_box_alpha_.c
* Defines the following function:
* _box_blur_alpha_step
* Defines the following functions:
* _box_blur_alpha_horiz_step
* _box_blur_alpha_vert_step
*/
#include "evas_common_private.h"

View File

@ -7,89 +7,281 @@
#include "evas_common_private.h"
#include "../evas_filter_private.h"
#if !defined (FUNCTION_NAME) || !defined (STEP)
# error Must define FUNCTION_NAME and STEP
static inline void
_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
const DATA32* restrict src;
DATA32* restrict dst;
DATA32* restrict span1;
DATA32* restrict span2;
#if DIV_USING_BITSHIFT
int pow2_shifts[6] = {0};
int numerators[6] = {0};
for (int run = 0; radii[run]; run++)
{
const int div = radii[run] * 2 + 1;
pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
numerators[run] = (1 << pow2_shifts[run]) / (div);
}
#endif
static inline void
FUNCTION_NAME(const DATA32* restrict src, DATA32* restrict dst,
const int radius, const int len,
const int loops, const int loopstep)
{
DEFINE_DIVIDER(2 * radius + 1);
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
span1 = alloca(len * sizeof(DATA32));
span2 = alloca(len * sizeof(DATA32));
for (int l = loops; l; --l)
// For each line, apply as many blurs as requested
for (int l = 0; l < loops; l++)
{
int acc[4] = {0};
int x, k;
int divider;
int run;
const DATA8* restrict sl = (DATA8 *) src;
const DATA8* restrict sr = (DATA8 *) src;
DATA8* restrict d = (DATA8 *) dst;
// New line: reset source & destination pointers
src = srcdata + len * l;
if (!radii[1]) // Only one run
dst = dstdata + len * l;
else
dst = span1;
// Read-ahead
for (x = left; x; x--)
// Apply blur with current radius
for (run = 0; radii[run]; run++)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += STEP;
const int radius = radii[run];
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
#if DIV_USING_BITSHIFT
const int pow2 = pow2_shifts[run];
const int numerator = numerators[run];
#else
const int divider = 2 * radius + 1;
#endif
const DATA8* restrict sl = (DATA8 *) src;
const DATA8* restrict sr = (DATA8 *) src;
DATA8* restrict d = (DATA8 *) dst;
int acc[4] = {0};
int x, k;
// Read-ahead
for (x = left; x; x--)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
}
// Left
for (x = 0; x < left; x++)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
const int divider = x + left + 1;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += sizeof(DATA32);
}
// Main part
for (x = len - (2 * radius); x > 0; x--)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
d[ALPHA] = DIVIDE(acc[ALPHA]);
d[RED] = DIVIDE(acc[RED]);
d[GREEN] = DIVIDE(acc[GREEN]);
d[BLUE] = DIVIDE(acc[BLUE]);
d += sizeof(DATA32);
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += sizeof(DATA32);
}
// Right part
for (x = right; x; x--)
{
const int divider = x + right;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += sizeof(DATA32);
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += sizeof(DATA32);
}
// More runs to go: swap spans
if (radii[run + 1])
{
src = dst;
if (radii[run + 2])
{
// Two more runs: swap
DATA32* swap = span1;
span1 = span2;
span2 = swap;
dst = span1;
}
else
{
// Last run: write directly to dstdata
dst = dstdata + len * l;
}
}
}
// Left
for (x = 0; x < left; x++)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += STEP;
divider = x + left + 1;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += STEP;
}
// Main part
for (x = len - (2 * radius); x > 0; x--)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += STEP;
d[ALPHA] = DIVIDE(acc[ALPHA]);
d[RED] = DIVIDE(acc[RED]);
d[GREEN] = DIVIDE(acc[GREEN]);
d[BLUE] = DIVIDE(acc[BLUE]);
d += STEP;
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += STEP;
}
// Right part
for (x = right; x; x--)
{
divider = x + right;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += STEP;
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += STEP;
}
src += loopstep;
dst += loopstep;
}
}
#undef FUNCTION_NAME
#undef STEP
static inline void
_box_blur_vert_rgba_step(const DATA32* restrict const srcdata,
DATA32* restrict const dstdata,
const int* restrict const radii,
const int len,
const int loops)
{
/* Note: This function tries to optimize cache hits by working on
* contiguous horizontal spans.
*/
const int step = loops;
DATA32* restrict src;
DATA32* restrict dst;
DATA32* restrict span1;
DATA32* restrict span2;
#if DIV_USING_BITSHIFT
int pow2_shifts[6] = {0};
int numerators[6] = {0};
for (int run = 0; radii[run]; run++)
{
const int div = radii[run] * 2 + 1;
pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10);
numerators[run] = (1 << pow2_shifts[run]) / (div);
}
#endif
span1 = alloca(len * sizeof(DATA32));
span2 = alloca(len * sizeof(DATA32));
// For each line, apply as many blurs as requested
for (int l = 0; l < loops; l++)
{
int run;
// Rotate input into work span
const DATA32* srcptr = srcdata + l;
DATA32* s = span1;
for (int k = len; k; --k)
{
*s++ = *srcptr;
srcptr += step;
}
src = span1;
dst = span2;
// Apply blur with current radius
for (run = 0; radii[run]; run++)
{
const int radius = radii[run];
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
#if DIV_USING_BITSHIFT
const int pow2 = pow2_shifts[run];
const int numerator = numerators[run];
#else
const int divider = 2 * radius + 1;
#endif
const DATA8* restrict sl = (DATA8 *) src;
const DATA8* restrict sr = (DATA8 *) src;
DATA8* restrict d = (DATA8 *) dst;
int acc[4] = {0};
int x, k;
// Read-ahead
for (x = left; x; x--)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
}
// Left
for (x = 0; x < left; x++)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
const int divider = x + left + 1;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += sizeof(DATA32);
}
// Main part
for (x = len - (2 * radius); x > 0; x--)
{
for (k = 0; k < 4; k++)
acc[k] += sr[k];
sr += sizeof(DATA32);
d[ALPHA] = DIVIDE(acc[ALPHA]);
d[RED] = DIVIDE(acc[RED]);
d[GREEN] = DIVIDE(acc[GREEN]);
d[BLUE] = DIVIDE(acc[BLUE]);
d += sizeof(DATA32);
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += sizeof(DATA32);
}
// Right part
for (x = right; x; x--)
{
const int divider = x + right;
d[ALPHA] = acc[ALPHA] / divider;
d[RED] = acc[RED] / divider;
d[GREEN] = acc[GREEN] / divider;
d[BLUE] = acc[BLUE] / divider;
d += sizeof(DATA32);
for (k = 0; k < 4; k++)
acc[k] -= sl[k];
sl += sizeof(DATA32);
}
// More runs to go: swap spans
if (radii[run + 1])
{
DATA32* swap = src;
src = dst;
dst = swap;
}
}
// Last run: rotate & copy back to destination
DATA32* restrict dstptr = dstdata + l;
for (int k = len; k; --k)
{
*dstptr = *dst++;
dstptr += step;
}
}
}

View File

@ -915,6 +915,20 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
if (dy < 0) dy = 0;
if (!dx && !dy) goto fail;
in = _filter_buffer_get(ctx, inbuf);
if (!in)
{
ERR("Buffer %d does not exist [input].", inbuf);
goto fail;
}
out = _filter_buffer_get(ctx, outbuf);
if (!out)
{
ERR("Buffer %d does not exist [output].", outbuf);
goto fail;
}
switch (type)
{
case EVAS_FILTER_BLUR_GAUSSIAN:
@ -926,7 +940,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
break;
case EVAS_FILTER_BLUR_DEFAULT:
count = 1;
/* In DEFAULT mode we cheat, depending on the size of the kernel:
* For 1px to 2px, use true Gaussian blur.
@ -938,6 +951,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
* needed, of course!
*/
{
const Eina_Bool alpha = in->alpha_only;
int tmp_out = outbuf;
int tmp_in = inbuf;
int tmp_ox = ox;
@ -946,7 +960,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
id = -1;
if (dx && dy)
{
tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, EINA_TRUE);
tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, alpha);
if (!tmp) goto fail;
tmp_in = tmp_out = tmp->id;
tmp_ox = tmp_oy = 0;
@ -989,20 +1003,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx,
goto fail;
}
in = _filter_buffer_get(ctx, inbuf);
if (!in)
{
ERR("Buffer %d does not exist [input].", inbuf);
goto fail;
}
out = _filter_buffer_get(ctx, outbuf);
if (!out)
{
ERR("Buffer %d does not exist [output].", outbuf);
goto fail;
}
if (!in->alpha_only && out->alpha_only)
{
ERR("Output and input don't have the same format");

View File

@ -33,27 +33,21 @@ _box_blur_auto_radius(int *radii, int r)
}
}
#define FUNCTION_NAME _box_blur_horiz_rgba_step
#define STEP (sizeof(DATA32))
#include "./blur/blur_box_rgba_.c"
static void
_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_horiz_rgba_step(src, dst, radius, w, h, w);
_box_blur_horiz_rgba_step(src, dst, radii, w, h);
DEBUG_TIME_END();
}
#define FUNCTION_NAME _box_blur_vert_rgba_step
#define STEP (loops * sizeof(DATA32))
#include "./blur/blur_box_rgba_.c"
static void
_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h)
{
DEBUG_TIME_BEGIN();
_box_blur_vert_rgba_step(src, dst, radius, h, w, 1);
_box_blur_vert_rgba_step(src, dst, radii, h, w);
DEBUG_TIME_END();
}
@ -61,6 +55,7 @@ static Eina_Bool
_box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@ -71,11 +66,16 @@ _box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd)
in = cmd->input->backing;
out = cmd->output->backing;
if (cmd->blur.auto_count)
_box_blur_auto_radius(radii, r);
else for (int k = 0; k < cmd->blur.count; k++)
radii[k] = r;
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE);
_box_blur_horiz_rgba(in->image.data, out->image.data, r,
_box_blur_horiz_rgba(in->image.data, out->image.data, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;
@ -85,6 +85,7 @@ static Eina_Bool
_box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
{
RGBA_Image *in, *out;
int radii[7] = {0};
unsigned int r;
EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE);
@ -95,11 +96,16 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
in = cmd->input->backing;
out = cmd->output->backing;
if (cmd->blur.auto_count)
_box_blur_auto_radius(radii, r);
else for (int k = 0; k < cmd->blur.count; k++)
radii[k] = r;
EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE);
EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE);
EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE);
_box_blur_vert_rgba(in->image.data, out->image.data, r,
_box_blur_vert_rgba(in->image.data, out->image.data, radii,
in->cache_entry.w, in->cache_entry.h);
return EINA_TRUE;