Evas filters: Add optimizable blur function

Prepare optimization paths for blur operations, as they are VERY
costly. This simple change, when using gcc -O3 flag, boosts
horizontal blur performance by > 50%, because STEP is 1 (and
so, memory accesses, increments, etc... are all very simple)

The objective is to have support for NEON, MMX, SSE, too, with
runtime detection.
This commit is contained in:
Jean-Philippe Andre 2014-03-10 18:36:28 +09:00
parent 1960c97eb4
commit cb69700389
4 changed files with 96 additions and 76 deletions

View File

@ -441,6 +441,8 @@ lib/evas/filters/evas_filter_transform.c \
lib/evas/filters/evas_filter_utils.c \
lib/evas/filters/evas_filter_private.h
EXTRA_DIST += \
lib/evas/filters/blur/blur_gaussian_alpha_.c
### Engines

View File

@ -0,0 +1,74 @@
/* Datatypes and MIN macro */
#include "evas_common_private.h"
#include "../evas_filter_private.h"
#if !defined (FUNCTION_NAME) || !defined (STEP)
# error Must define FUNCTION_NAME and STEP
#endif
static inline void
FUNCTION_NAME(const DATA8* restrict srcdata, DATA8* restrict dstdata,
const int radius, const int len,
const int loops, const int loopstep,
const int* restrict weights, const int pow2_divider)
{
int i, j, k, acc, divider;
const int diameter = 2 * radius + 1;
const int left = MIN(radius, len);
const int right = MIN(radius, (len - radius));
const DATA8* restrict s;
const DATA8* restrict src;
DATA8* restrict dst;
for (i = loops; i; --i)
{
src = srcdata;
dst = dstdata;
// left
for (k = 0; k < left; k++, dst += STEP)
{
acc = 0;
divider = 0;
s = src;
for (j = 0; j <= k + radius; j++, s += STEP)
{
acc += (*s) * weights[j + radius - k];
divider += weights[j + radius - k];
}
//if (!divider) abort();
*dst = acc / divider;
}
// middle
for (k = radius; k < (len - radius); k++, src += STEP, dst += STEP)
{
acc = 0;
s = src;
for (j = 0; j < diameter; j++, s += STEP)
acc += (*s) * weights[j];
*dst = acc >> pow2_divider;
}
// right
for (k = 0; k < right; k++, dst += STEP, src += STEP)
{
acc = 0;
divider = 0;
s = src;
for (j = 0; j < 2 * radius - k; j++, s += STEP)
{
acc += (*s) * weights[j];
divider += weights[j];
}
//if (!divider) abort();
*dst = acc / divider;
}
dstdata += loopstep;
srcdata += loopstep;
}
}
#undef FUNCTION_NAME
#undef STEP

View File

@ -379,62 +379,6 @@ _sin_blur_weights_get(int *weights, int *pow2_divider, int radius)
*pow2_divider = nextpow2;
}
static void
_gaussian_blur_step_alpha(DATA8 *src, DATA8 *dst, int radius, int len, int step,
int *weights, int pow2_divider)
{
int j, k, acc, divider;
DATA8 *s = src;
const int diameter = 2 * radius + 1;
int left = MIN(radius, len);
int right = MIN(radius, (len - radius));
// left
for (k = 0; k < left; k++, dst += step)
{
acc = 0;
divider = 0;
s = src;
for (j = 0; j <= k + radius; j++, s += step)
{
acc += (*s) * weights[j + radius - k];
divider += weights[j + radius - k];
}
if (!divider) goto div_zero;
*dst = acc / divider;
}
// middle
for (k = radius; k < (len - radius); k++, src += step, dst += step)
{
acc = 0;
s = src;
for (j = 0; j < diameter; j++, s += step)
acc += (*s) * weights[j];
*dst = acc >> pow2_divider;
}
// right
for (k = 0; k < right; k++, dst += step, src += step)
{
acc = 0;
divider = 0;
s = src;
for (j = 0; j < 2 * radius - k; j++, s += step)
{
acc += (*s) * weights[j];
divider += weights[j];
}
if (!divider) goto div_zero;
*dst = acc / divider;
}
return;
div_zero:
CRI("Division by zero avoided! Something is very wrong here!");
}
static void
_gaussian_blur_step_rgba(DATA32 *src, DATA32 *dst, int radius, int len, int step,
int *weights, int pow2_divider)
@ -511,45 +455,40 @@ div_zero:
CRI("Division by zero avoided! Something is very wrong here!");
}
#define FUNCTION_NAME _gaussian_blur_horiz_alpha_step
#define STEP 1
#include "./blur/blur_gaussian_alpha_.c"
static void
_gaussian_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
_gaussian_blur_horiz_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h)
{
int *weights;
int k, pow2_div = 0;
int pow2_div = 0;
weights = alloca((2 * radius + 1) * sizeof(int));
_sin_blur_weights_get(weights, &pow2_div, radius);
DEBUG_TIME_BEGIN();
for (k = h; k; k--)
{
_gaussian_blur_step_alpha(src, dst, radius, w, 1, weights, pow2_div);
dst += w;
src += w;
}
_gaussian_blur_horiz_alpha_step(src, dst, radius, w, h, w, weights, pow2_div);
DEBUG_TIME_END();
}
// w steps, loops = w --> STEP = loops
#define FUNCTION_NAME _gaussian_blur_vert_alpha_step
#define STEP loops
#include "./blur/blur_gaussian_alpha_.c"
static void
_gaussian_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
_gaussian_blur_vert_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h)
{
int *weights;
int k, pow2_div = 0;
int pow2_div = 0;
weights = alloca((2 * radius + 1) * sizeof(int));
_sin_blur_weights_get(weights, &pow2_div, radius);
DEBUG_TIME_BEGIN();
for (k = w; k; k--)
{
_gaussian_blur_step_alpha(src, dst, radius, h, w, weights, pow2_div);
dst += 1;
src += 1;
}
_gaussian_blur_vert_alpha_step(src, dst, radius, h, w, 1, weights, pow2_div);
DEBUG_TIME_END();
}

View File

@ -29,6 +29,11 @@
#define GREEN_OF(a) (((a) >> 8) & 0xff)
#define BLUE_OF(a) ((a) & 0xff)
// The 'restrict' keyword is part of C99
#if __STDC_VERSION__ < 199901L
# define restrict
#endif
// Helpers
#define ENFN ctx->evas->engine.func
#define ENDT ctx->evas->engine.data.output