forked from enlightenment/efl
Evas filters: Add optimizable blur function
Prepare optimization paths for blur operations, as they are VERY costly. This simple change, when using gcc -O3 flag, boosts horizontal blur performance by > 50%, because STEP is 1 (and so, memory accesses, increments, etc... are all very simple) The objective is to have support for NEON, MMX, SSE, too, with runtime detection.
This commit is contained in:
parent
1960c97eb4
commit
cb69700389
|
@ -441,6 +441,8 @@ lib/evas/filters/evas_filter_transform.c \
|
|||
lib/evas/filters/evas_filter_utils.c \
|
||||
lib/evas/filters/evas_filter_private.h
|
||||
|
||||
EXTRA_DIST += \
|
||||
lib/evas/filters/blur/blur_gaussian_alpha_.c
|
||||
|
||||
### Engines
|
||||
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
/* Datatypes and MIN macro */
|
||||
#include "evas_common_private.h"
|
||||
#include "../evas_filter_private.h"
|
||||
|
||||
#if !defined (FUNCTION_NAME) || !defined (STEP)
|
||||
# error Must define FUNCTION_NAME and STEP
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
FUNCTION_NAME(const DATA8* restrict srcdata, DATA8* restrict dstdata,
|
||||
const int radius, const int len,
|
||||
const int loops, const int loopstep,
|
||||
const int* restrict weights, const int pow2_divider)
|
||||
{
|
||||
int i, j, k, acc, divider;
|
||||
const int diameter = 2 * radius + 1;
|
||||
const int left = MIN(radius, len);
|
||||
const int right = MIN(radius, (len - radius));
|
||||
const DATA8* restrict s;
|
||||
const DATA8* restrict src;
|
||||
DATA8* restrict dst;
|
||||
|
||||
for (i = loops; i; --i)
|
||||
{
|
||||
src = srcdata;
|
||||
dst = dstdata;
|
||||
|
||||
// left
|
||||
for (k = 0; k < left; k++, dst += STEP)
|
||||
{
|
||||
acc = 0;
|
||||
divider = 0;
|
||||
s = src;
|
||||
for (j = 0; j <= k + radius; j++, s += STEP)
|
||||
{
|
||||
acc += (*s) * weights[j + radius - k];
|
||||
divider += weights[j + radius - k];
|
||||
}
|
||||
//if (!divider) abort();
|
||||
*dst = acc / divider;
|
||||
}
|
||||
|
||||
// middle
|
||||
for (k = radius; k < (len - radius); k++, src += STEP, dst += STEP)
|
||||
{
|
||||
acc = 0;
|
||||
s = src;
|
||||
for (j = 0; j < diameter; j++, s += STEP)
|
||||
acc += (*s) * weights[j];
|
||||
*dst = acc >> pow2_divider;
|
||||
}
|
||||
|
||||
// right
|
||||
for (k = 0; k < right; k++, dst += STEP, src += STEP)
|
||||
{
|
||||
acc = 0;
|
||||
divider = 0;
|
||||
s = src;
|
||||
for (j = 0; j < 2 * radius - k; j++, s += STEP)
|
||||
{
|
||||
acc += (*s) * weights[j];
|
||||
divider += weights[j];
|
||||
}
|
||||
//if (!divider) abort();
|
||||
*dst = acc / divider;
|
||||
}
|
||||
|
||||
dstdata += loopstep;
|
||||
srcdata += loopstep;
|
||||
}
|
||||
}
|
||||
|
||||
#undef FUNCTION_NAME
|
||||
#undef STEP
|
|
@ -379,62 +379,6 @@ _sin_blur_weights_get(int *weights, int *pow2_divider, int radius)
|
|||
*pow2_divider = nextpow2;
|
||||
}
|
||||
|
||||
static void
|
||||
_gaussian_blur_step_alpha(DATA8 *src, DATA8 *dst, int radius, int len, int step,
|
||||
int *weights, int pow2_divider)
|
||||
{
|
||||
int j, k, acc, divider;
|
||||
DATA8 *s = src;
|
||||
const int diameter = 2 * radius + 1;
|
||||
int left = MIN(radius, len);
|
||||
int right = MIN(radius, (len - radius));
|
||||
|
||||
// left
|
||||
for (k = 0; k < left; k++, dst += step)
|
||||
{
|
||||
acc = 0;
|
||||
divider = 0;
|
||||
s = src;
|
||||
for (j = 0; j <= k + radius; j++, s += step)
|
||||
{
|
||||
acc += (*s) * weights[j + radius - k];
|
||||
divider += weights[j + radius - k];
|
||||
}
|
||||
if (!divider) goto div_zero;
|
||||
*dst = acc / divider;
|
||||
}
|
||||
|
||||
// middle
|
||||
for (k = radius; k < (len - radius); k++, src += step, dst += step)
|
||||
{
|
||||
acc = 0;
|
||||
s = src;
|
||||
for (j = 0; j < diameter; j++, s += step)
|
||||
acc += (*s) * weights[j];
|
||||
*dst = acc >> pow2_divider;
|
||||
}
|
||||
|
||||
// right
|
||||
for (k = 0; k < right; k++, dst += step, src += step)
|
||||
{
|
||||
acc = 0;
|
||||
divider = 0;
|
||||
s = src;
|
||||
for (j = 0; j < 2 * radius - k; j++, s += step)
|
||||
{
|
||||
acc += (*s) * weights[j];
|
||||
divider += weights[j];
|
||||
}
|
||||
if (!divider) goto div_zero;
|
||||
*dst = acc / divider;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
div_zero:
|
||||
CRI("Division by zero avoided! Something is very wrong here!");
|
||||
}
|
||||
|
||||
static void
|
||||
_gaussian_blur_step_rgba(DATA32 *src, DATA32 *dst, int radius, int len, int step,
|
||||
int *weights, int pow2_divider)
|
||||
|
@ -511,45 +455,40 @@ div_zero:
|
|||
CRI("Division by zero avoided! Something is very wrong here!");
|
||||
}
|
||||
|
||||
#define FUNCTION_NAME _gaussian_blur_horiz_alpha_step
|
||||
#define STEP 1
|
||||
#include "./blur/blur_gaussian_alpha_.c"
|
||||
|
||||
static void
|
||||
_gaussian_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
|
||||
_gaussian_blur_horiz_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h)
|
||||
{
|
||||
int *weights;
|
||||
int k, pow2_div = 0;
|
||||
int pow2_div = 0;
|
||||
|
||||
weights = alloca((2 * radius + 1) * sizeof(int));
|
||||
_sin_blur_weights_get(weights, &pow2_div, radius);
|
||||
|
||||
DEBUG_TIME_BEGIN();
|
||||
|
||||
for (k = h; k; k--)
|
||||
{
|
||||
_gaussian_blur_step_alpha(src, dst, radius, w, 1, weights, pow2_div);
|
||||
dst += w;
|
||||
src += w;
|
||||
}
|
||||
|
||||
_gaussian_blur_horiz_alpha_step(src, dst, radius, w, h, w, weights, pow2_div);
|
||||
DEBUG_TIME_END();
|
||||
}
|
||||
|
||||
// w steps, loops = w --> STEP = loops
|
||||
#define FUNCTION_NAME _gaussian_blur_vert_alpha_step
|
||||
#define STEP loops
|
||||
#include "./blur/blur_gaussian_alpha_.c"
|
||||
|
||||
static void
|
||||
_gaussian_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
|
||||
_gaussian_blur_vert_alpha(const DATA8 *src, DATA8 *dst, int radius, int w, int h)
|
||||
{
|
||||
int *weights;
|
||||
int k, pow2_div = 0;
|
||||
int pow2_div = 0;
|
||||
|
||||
weights = alloca((2 * radius + 1) * sizeof(int));
|
||||
_sin_blur_weights_get(weights, &pow2_div, radius);
|
||||
|
||||
DEBUG_TIME_BEGIN();
|
||||
|
||||
for (k = w; k; k--)
|
||||
{
|
||||
_gaussian_blur_step_alpha(src, dst, radius, h, w, weights, pow2_div);
|
||||
dst += 1;
|
||||
src += 1;
|
||||
}
|
||||
|
||||
_gaussian_blur_vert_alpha_step(src, dst, radius, h, w, 1, weights, pow2_div);
|
||||
DEBUG_TIME_END();
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,11 @@
|
|||
#define GREEN_OF(a) (((a) >> 8) & 0xff)
|
||||
#define BLUE_OF(a) ((a) & 0xff)
|
||||
|
||||
// The 'restrict' keyword is part of C99
|
||||
#if __STDC_VERSION__ < 199901L
|
||||
# define restrict
|
||||
#endif
|
||||
|
||||
// Helpers
|
||||
#define ENFN ctx->evas->engine.func
|
||||
#define ENDT ctx->evas->engine.data.output
|
||||
|
|
Loading…
Reference in New Issue