From 592204fe73cb7198aea2a3be3dbe1efdad6999b1 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Wed, 12 Mar 2014 13:55:44 +0900 Subject: [PATCH] Evas filters: Optimize RGBA blur as well Same as Alpha blur, use combination of box blurs, and put all that code into optimizable functions. --- src/lib/evas/filters/blur/blur_box_alpha_.c | 5 +- src/lib/evas/filters/blur/blur_box_rgba_.c | 342 +++++++++++++++----- src/lib/evas/filters/evas_filter.c | 32 +- src/lib/evas/filters/evas_filter_blur.c | 30 +- 4 files changed, 304 insertions(+), 105 deletions(-) diff --git a/src/lib/evas/filters/blur/blur_box_alpha_.c b/src/lib/evas/filters/blur/blur_box_alpha_.c index 4a9facdee0..71ac943886 100644 --- a/src/lib/evas/filters/blur/blur_box_alpha_.c +++ b/src/lib/evas/filters/blur/blur_box_alpha_.c @@ -1,6 +1,7 @@ /* @file blur_box_alpha_.c - * Defines the following function: - * _box_blur_alpha_step + * Defines the following functions: + * _box_blur_alpha_horiz_step + * _box_blur_alpha_vert_step */ #include "evas_common_private.h" diff --git a/src/lib/evas/filters/blur/blur_box_rgba_.c b/src/lib/evas/filters/blur/blur_box_rgba_.c index b930d6e554..95d381a774 100644 --- a/src/lib/evas/filters/blur/blur_box_rgba_.c +++ b/src/lib/evas/filters/blur/blur_box_rgba_.c @@ -7,89 +7,281 @@ #include "evas_common_private.h" #include "../evas_filter_private.h" -#if !defined (FUNCTION_NAME) || !defined (STEP) -# error Must define FUNCTION_NAME and STEP +static inline void +_box_blur_horiz_rgba_step(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + const DATA32* restrict src; + DATA32* restrict dst; + DATA32* restrict span1; + DATA32* restrict span2; + +#if DIV_USING_BITSHIFT + int pow2_shifts[6] = {0}; + int numerators[6] = {0}; + for (int run = 0; radii[run]; run++) + { + const int div = radii[run] * 2 + 1; + pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10); + numerators[run] = (1 << pow2_shifts[run]) / (div); + } #endif -static inline void -FUNCTION_NAME(const DATA32* restrict src, DATA32* restrict dst, - const int radius, const int len, - const int loops, const int loopstep) -{ - DEFINE_DIVIDER(2 * radius + 1); - const int left = MIN(radius, len); - const int right = MIN(radius, (len - radius)); + span1 = alloca(len * sizeof(DATA32)); + span2 = alloca(len * sizeof(DATA32)); - for (int l = loops; l; --l) + // For each line, apply as many blurs as requested + for (int l = 0; l < loops; l++) { - int acc[4] = {0}; - int x, k; - int divider; + int run; - const DATA8* restrict sl = (DATA8 *) src; - const DATA8* restrict sr = (DATA8 *) src; - DATA8* restrict d = (DATA8 *) dst; + // New line: reset source & destination pointers + src = srcdata + len * l; + if (!radii[1]) // Only one run + dst = dstdata + len * l; + else + dst = span1; - // Read-ahead - for (x = left; x; x--) + // Apply blur with current radius + for (run = 0; radii[run]; run++) { - for (k = 0; k < 4; k++) - acc[k] += sr[k]; - sr += STEP; + const int radius = radii[run]; + const int left = MIN(radius, len); + const int right = MIN(radius, (len - radius)); + +#if DIV_USING_BITSHIFT + const int pow2 = pow2_shifts[run]; + const int numerator = numerators[run]; +#else + const int divider = 2 * radius + 1; +#endif + + const DATA8* restrict sl = (DATA8 *) src; + const DATA8* restrict sr = (DATA8 *) src; + DATA8* restrict d = (DATA8 *) dst; + int acc[4] = {0}; + int x, k; + + // Read-ahead + for (x = left; x; x--) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + } + + // Left + for (x = 0; x < left; x++) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + + const int divider = x + left + 1; + d[ALPHA] = acc[ALPHA] / divider; + d[RED] = acc[RED] / divider; + d[GREEN] = acc[GREEN] / divider; + d[BLUE] = acc[BLUE] / divider; + d += sizeof(DATA32); + } + + // Main part + for (x = len - (2 * radius); x > 0; x--) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + + d[ALPHA] = DIVIDE(acc[ALPHA]); + d[RED] = DIVIDE(acc[RED]); + d[GREEN] = DIVIDE(acc[GREEN]); + d[BLUE] = DIVIDE(acc[BLUE]); + d += sizeof(DATA32); + + for (k = 0; k < 4; k++) + acc[k] -= sl[k]; + sl += sizeof(DATA32); + } + + // Right part + for (x = right; x; x--) + { + const int divider = x + right; + d[ALPHA] = acc[ALPHA] / divider; + d[RED] = acc[RED] / divider; + d[GREEN] = acc[GREEN] / divider; + d[BLUE] = acc[BLUE] / divider; + d += sizeof(DATA32); + + for (k = 0; k < 4; k++) + acc[k] -= sl[k]; + sl += sizeof(DATA32); + } + + // More runs to go: swap spans + if (radii[run + 1]) + { + src = dst; + if (radii[run + 2]) + { + // Two more runs: swap + DATA32* swap = span1; + span1 = span2; + span2 = swap; + dst = span1; + } + else + { + // Last run: write directly to dstdata + dst = dstdata + len * l; + } + } } - - // Left - for (x = 0; x < left; x++) - { - for (k = 0; k < 4; k++) - acc[k] += sr[k]; - sr += STEP; - - divider = x + left + 1; - d[ALPHA] = acc[ALPHA] / divider; - d[RED] = acc[RED] / divider; - d[GREEN] = acc[GREEN] / divider; - d[BLUE] = acc[BLUE] / divider; - d += STEP; - } - - // Main part - for (x = len - (2 * radius); x > 0; x--) - { - for (k = 0; k < 4; k++) - acc[k] += sr[k]; - sr += STEP; - - d[ALPHA] = DIVIDE(acc[ALPHA]); - d[RED] = DIVIDE(acc[RED]); - d[GREEN] = DIVIDE(acc[GREEN]); - d[BLUE] = DIVIDE(acc[BLUE]); - d += STEP; - - for (k = 0; k < 4; k++) - acc[k] -= sl[k]; - sl += STEP; - } - - // Right part - for (x = right; x; x--) - { - divider = x + right; - d[ALPHA] = acc[ALPHA] / divider; - d[RED] = acc[RED] / divider; - d[GREEN] = acc[GREEN] / divider; - d[BLUE] = acc[BLUE] / divider; - d += STEP; - - for (k = 0; k < 4; k++) - acc[k] -= sl[k]; - sl += STEP; - } - - src += loopstep; - dst += loopstep; } } -#undef FUNCTION_NAME -#undef STEP +static inline void +_box_blur_vert_rgba_step(const DATA32* restrict const srcdata, + DATA32* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + /* Note: This function tries to optimize cache hits by working on + * contiguous horizontal spans. + */ + + const int step = loops; + DATA32* restrict src; + DATA32* restrict dst; + DATA32* restrict span1; + DATA32* restrict span2; + +#if DIV_USING_BITSHIFT + int pow2_shifts[6] = {0}; + int numerators[6] = {0}; + for (int run = 0; radii[run]; run++) + { + const int div = radii[run] * 2 + 1; + pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10); + numerators[run] = (1 << pow2_shifts[run]) / (div); + } +#endif + + span1 = alloca(len * sizeof(DATA32)); + span2 = alloca(len * sizeof(DATA32)); + + // For each line, apply as many blurs as requested + for (int l = 0; l < loops; l++) + { + int run; + + // Rotate input into work span + const DATA32* srcptr = srcdata + l; + DATA32* s = span1; + for (int k = len; k; --k) + { + *s++ = *srcptr; + srcptr += step; + } + + src = span1; + dst = span2; + + // Apply blur with current radius + for (run = 0; radii[run]; run++) + { + const int radius = radii[run]; + const int left = MIN(radius, len); + const int right = MIN(radius, (len - radius)); + +#if DIV_USING_BITSHIFT + const int pow2 = pow2_shifts[run]; + const int numerator = numerators[run]; +#else + const int divider = 2 * radius + 1; +#endif + + const DATA8* restrict sl = (DATA8 *) src; + const DATA8* restrict sr = (DATA8 *) src; + DATA8* restrict d = (DATA8 *) dst; + int acc[4] = {0}; + int x, k; + + // Read-ahead + for (x = left; x; x--) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + } + + // Left + for (x = 0; x < left; x++) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + + const int divider = x + left + 1; + d[ALPHA] = acc[ALPHA] / divider; + d[RED] = acc[RED] / divider; + d[GREEN] = acc[GREEN] / divider; + d[BLUE] = acc[BLUE] / divider; + d += sizeof(DATA32); + } + + // Main part + for (x = len - (2 * radius); x > 0; x--) + { + for (k = 0; k < 4; k++) + acc[k] += sr[k]; + sr += sizeof(DATA32); + + d[ALPHA] = DIVIDE(acc[ALPHA]); + d[RED] = DIVIDE(acc[RED]); + d[GREEN] = DIVIDE(acc[GREEN]); + d[BLUE] = DIVIDE(acc[BLUE]); + d += sizeof(DATA32); + + for (k = 0; k < 4; k++) + acc[k] -= sl[k]; + sl += sizeof(DATA32); + } + + // Right part + for (x = right; x; x--) + { + const int divider = x + right; + d[ALPHA] = acc[ALPHA] / divider; + d[RED] = acc[RED] / divider; + d[GREEN] = acc[GREEN] / divider; + d[BLUE] = acc[BLUE] / divider; + d += sizeof(DATA32); + + for (k = 0; k < 4; k++) + acc[k] -= sl[k]; + sl += sizeof(DATA32); + } + + // More runs to go: swap spans + if (radii[run + 1]) + { + DATA32* swap = src; + src = dst; + dst = swap; + } + } + + // Last run: rotate & copy back to destination + DATA32* restrict dstptr = dstdata + l; + for (int k = len; k; --k) + { + *dstptr = *dst++; + dstptr += step; + } + } +} diff --git a/src/lib/evas/filters/evas_filter.c b/src/lib/evas/filters/evas_filter.c index 67006a2ae1..5b59fcccb8 100644 --- a/src/lib/evas/filters/evas_filter.c +++ b/src/lib/evas/filters/evas_filter.c @@ -915,6 +915,20 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, if (dy < 0) dy = 0; if (!dx && !dy) goto fail; + in = _filter_buffer_get(ctx, inbuf); + if (!in) + { + ERR("Buffer %d does not exist [input].", inbuf); + goto fail; + } + + out = _filter_buffer_get(ctx, outbuf); + if (!out) + { + ERR("Buffer %d does not exist [output].", outbuf); + goto fail; + } + switch (type) { case EVAS_FILTER_BLUR_GAUSSIAN: @@ -926,7 +940,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, break; case EVAS_FILTER_BLUR_DEFAULT: - count = 1; /* In DEFAULT mode we cheat, depending on the size of the kernel: * For 1px to 2px, use true Gaussian blur. @@ -938,6 +951,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, * needed, of course! */ { + const Eina_Bool alpha = in->alpha_only; int tmp_out = outbuf; int tmp_in = inbuf; int tmp_ox = ox; @@ -946,7 +960,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, id = -1; if (dx && dy) { - tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, EINA_TRUE); + tmp = evas_filter_temporary_buffer_get(ctx, 0, 0, alpha); if (!tmp) goto fail; tmp_in = tmp_out = tmp->id; tmp_ox = tmp_oy = 0; @@ -989,20 +1003,6 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, goto fail; } - in = _filter_buffer_get(ctx, inbuf); - if (!in) - { - ERR("Buffer %d does not exist [input].", inbuf); - goto fail; - } - - out = _filter_buffer_get(ctx, outbuf); - if (!out) - { - ERR("Buffer %d does not exist [output].", outbuf); - goto fail; - } - if (!in->alpha_only && out->alpha_only) { ERR("Output and input don't have the same format"); diff --git a/src/lib/evas/filters/evas_filter_blur.c b/src/lib/evas/filters/evas_filter_blur.c index 2220c9d8bb..36a06e153a 100644 --- a/src/lib/evas/filters/evas_filter_blur.c +++ b/src/lib/evas/filters/evas_filter_blur.c @@ -33,27 +33,21 @@ _box_blur_auto_radius(int *radii, int r) } } -#define FUNCTION_NAME _box_blur_horiz_rgba_step -#define STEP (sizeof(DATA32)) #include "./blur/blur_box_rgba_.c" static void -_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h) +_box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_horiz_rgba_step(src, dst, radius, w, h, w); + _box_blur_horiz_rgba_step(src, dst, radii, w, h); DEBUG_TIME_END(); } -#define FUNCTION_NAME _box_blur_vert_rgba_step -#define STEP (loops * sizeof(DATA32)) -#include "./blur/blur_box_rgba_.c" - static void -_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h) +_box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_vert_rgba_step(src, dst, radius, h, w, 1); + _box_blur_vert_rgba_step(src, dst, radii, h, w); DEBUG_TIME_END(); } @@ -61,6 +55,7 @@ static Eina_Bool _box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd) { RGBA_Image *in, *out; + int radii[7] = {0}; unsigned int r; EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE); @@ -71,11 +66,16 @@ _box_blur_horiz_apply_rgba(Evas_Filter_Command *cmd) in = cmd->input->backing; out = cmd->output->backing; + if (cmd->blur.auto_count) + _box_blur_auto_radius(radii, r); + else for (int k = 0; k < cmd->blur.count; k++) + radii[k] = r; + EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE); EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE); EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE); - _box_blur_horiz_rgba(in->image.data, out->image.data, r, + _box_blur_horiz_rgba(in->image.data, out->image.data, radii, in->cache_entry.w, in->cache_entry.h); return EINA_TRUE; @@ -85,6 +85,7 @@ static Eina_Bool _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd) { RGBA_Image *in, *out; + int radii[7] = {0}; unsigned int r; EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE); @@ -95,11 +96,16 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd) in = cmd->input->backing; out = cmd->output->backing; + if (cmd->blur.auto_count) + _box_blur_auto_radius(radii, r); + else for (int k = 0; k < cmd->blur.count; k++) + radii[k] = r; + EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data, EINA_FALSE); EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data, EINA_FALSE); EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE); - _box_blur_vert_rgba(in->image.data, out->image.data, r, + _box_blur_vert_rgba(in->image.data, out->image.data, radii, in->cache_entry.w, in->cache_entry.h); return EINA_TRUE;