From 4443ecfa8be65aef0dedcb0d749c9081769cc140 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Wed, 12 Mar 2014 10:20:27 +0900 Subject: [PATCH] Evas filters: Optimize alpha box blur Use two optimizable functions for BOX blur: vertical and horizontal. These functions will run as many times as requested (from 1 to 6 max). The horizontal case is pretty straightforward as the source is already contiguous (nice in terms of cache hits). The only catch is to swap src and dst without ever writing to the input buffer. In case of vertical blur, we apply the same method as above, after rotating the column into a horizontal (contiguous) span, and rotating it back afterwards. Now, the same needs to be done for RGBA :) --- src/lib/evas/filters/blur/blur_box_alpha_.c | 276 ++++++++++++++++---- src/lib/evas/filters/evas_filter.c | 2 +- src/lib/evas/filters/evas_filter_blur.c | 59 ++++- 3 files changed, 275 insertions(+), 62 deletions(-) diff --git a/src/lib/evas/filters/blur/blur_box_alpha_.c b/src/lib/evas/filters/blur/blur_box_alpha_.c index 220215238a..4a9facdee0 100644 --- a/src/lib/evas/filters/blur/blur_box_alpha_.c +++ b/src/lib/evas/filters/blur/blur_box_alpha_.c @@ -1,68 +1,246 @@ /* @file blur_box_alpha_.c - * Should define the functions: - * - _box_blur_horiz_alpha_step - * - _box_blur_vert_alpha_step + * Defines the following function: + * _box_blur_alpha_step */ #include "evas_common_private.h" #include "../evas_filter_private.h" -#if !defined (FUNCTION_NAME) || !defined (STEP) -# error Must define FUNCTION_NAME and STEP +static inline void +_box_blur_alpha_horiz_step(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + const DATA8* restrict src; + DATA8* restrict dst; + DATA8* restrict span1; + DATA8* restrict span2; + +#if DIV_USING_BITSHIFT + int pow2_shifts[6] = {0}; + int numerators[6] = {0}; + for (int run = 0; radii[run]; run++) + { + const int div = radii[run] * 2 + 1; + pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10); + numerators[run] = (1 << pow2_shifts[run]) / (div); + } #endif -static inline void -FUNCTION_NAME(const DATA8* restrict src, DATA8* restrict dst, - const int radius, const int len, - const int loops, const int loopstep) -{ - DEFINE_DIVIDER(2 * radius + 1); - const int left = MIN(radius, len); - const int right = MIN(radius, (len - radius)); - int acc = 0, k; + span1 = alloca(len); + span2 = alloca(len); - for (int l = loops; l; --l) + // For each line, apply as many blurs as requested + for (int l = 0; l < loops; l++) { - const DATA8* restrict sr = src; - const DATA8* restrict sl = src; - DATA8* restrict d = dst; + int run; - for (k = left; k; k--) + // New line: reset source & destination pointers + src = srcdata + len * l; + if (!radii[1]) // Only one run + dst = dstdata + len * l; + else + dst = span1; + + // Apply blur with current radius + for (run = 0; radii[run]; run++) { - acc += *sr; - sr += STEP; - } + const int radius = radii[run]; + const int left = MIN(radius, len); + const int right = MIN(radius, (len - radius)); + int acc = 0; - for (k = 0; k < left; k++) - { - acc += *sr; - *d = acc / (k + left + 1); - sr += STEP; - d += STEP; - } +#if DIV_USING_BITSHIFT + const int pow2 = pow2_shifts[run]; + const int numerator = numerators[run]; +#else + const int divider = 2 * radius + 1; +#endif - for (k = len - (2 * radius); k; k--) - { - acc += *sr; - *d = DIVIDE(acc); - acc -= *sl; - sl += STEP; - sr += STEP; - d += STEP; - } + const DATA8* restrict sr = src; + const DATA8* restrict sl = src; + DATA8* restrict d = dst; - for (k = right; k; k--) - { - *d = acc / (k + right); - acc -= *sl; - d += STEP; - sl += STEP; - } + // Read-ahead & accumulate + for (int k = left; k; k--) + { + acc += *sr; + sr += 1; + } - src += loopstep; - dst += loopstep; + // Left edge + for (int k = 0; k < left; k++) + { + acc += *sr; + *d = acc / (k + left + 1); + sr += 1; + d += 1; + } + + // Middle part, normal blur + for (int k = len - (2 * radius); k; k--) + { + acc += *sr; + *d = DIVIDE(acc); + acc -= *sl; + sl += 1; + sr += 1; + d += 1; + } + + // Right edge + for (int k = right; k; k--) + { + *d = acc / (k + right); + acc -= *sl; + d += 1; + sl += 1; + } + + // More runs to go: swap spans + if (radii[run + 1]) + { + src = dst; + if (radii[run + 2]) + { + // Two more runs: swap + DATA8* swap = span1; + span1 = span2; + span2 = swap; + dst = span1; + } + else + { + // Last run: write directly to dstdata + dst = dstdata + len * l; + } + } + } } } -#undef FUNCTION_NAME -#undef STEP +// ATTENTION: Make sure the below code's inner loop is the SAME as above. + +static inline void +_box_blur_alpha_vert_step(const DATA8* restrict const srcdata, + DATA8* restrict const dstdata, + const int* restrict const radii, + const int len, + const int loops) +{ + /* Note: This function tries to optimize cache hits by working on + * contiguous horizontal spans. + */ + + const int step = loops; + DATA8* restrict src; + DATA8* restrict dst; + DATA8* restrict span1; + DATA8* restrict span2; + +#if DIV_USING_BITSHIFT + int pow2_shifts[6] = {0}; + int numerators[6] = {0}; + for (int run = 0; radii[run]; run++) + { + const int div = radii[run] * 2 + 1; + pow2_shifts[run] = evas_filter_smallest_pow2_larger_than(div << 10); + numerators[run] = (1 << pow2_shifts[run]) / (div); + } +#endif + + span1 = alloca(len); + span2 = alloca(len); + + // For each line, apply as many blurs as requested + for (int l = 0; l < loops; l++) + { + int run; + + // Rotate input into work span + const DATA8* srcptr = srcdata + l; + DATA8* s = span1; + for (int k = len; k; --k) + { + *s++ = *srcptr; + srcptr += step; + } + + src = span1; + dst = span2; + + // Apply blur with current radius + for (run = 0; radii[run]; run++) + { + const int radius = radii[run]; + const int left = MIN(radius, len); + const int right = MIN(radius, (len - radius)); + int acc = 0; + +#if DIV_USING_BITSHIFT + const int pow2 = pow2_shifts[run]; + const int numerator = numerators[run]; +#else + const int divider = 2 * radius + 1; +#endif + + const DATA8* restrict sr = src; + const DATA8* restrict sl = src; + DATA8* restrict d = dst; + + // Read-ahead & accumulate + for (int k = left; k; k--) + { + acc += *sr; + sr += 1; + } + + // Left edge + for (int k = 0; k < left; k++) + { + acc += *sr; + *d = acc / (k + left + 1); + sr += 1; + d += 1; + } + + // Middle part, normal blur + for (int k = len - (2 * radius); k; k--) + { + acc += *sr; + *d = DIVIDE(acc); + acc -= *sl; + sl += 1; + sr += 1; + d += 1; + } + + // Right edge + for (int k = right; k; k--) + { + *d = acc / (k + right); + acc -= *sl; + d += 1; + sl += 1; + } + + // More runs to go: swap spans + if (radii[run + 1]) + { + DATA8* swap = src; + src = dst; + dst = swap; + } + } + + // Last run: rotate & copy back to destination + DATA8* restrict dstptr = dstdata + l; + for (int k = len; k; --k) + { + *dstptr = *dst++; + dstptr += step; + } + } +} diff --git a/src/lib/evas/filters/evas_filter.c b/src/lib/evas/filters/evas_filter.c index 6e899a901b..67006a2ae1 100644 --- a/src/lib/evas/filters/evas_filter.c +++ b/src/lib/evas/filters/evas_filter.c @@ -973,7 +973,7 @@ evas_filter_command_blur_add(Evas_Filter_Context *ctx, void *drawctx, else type = EVAS_FILTER_BLUR_BOX; - id = evas_filter_command_blur_add(ctx, drawctx, inbuf, tmp_in, + id = evas_filter_command_blur_add(ctx, drawctx, tmp_in, outbuf, type, 0, dy, ox, oy, 0); if (id < 0) goto fail; cmd = _evas_filter_command_get(ctx, id); diff --git a/src/lib/evas/filters/evas_filter_blur.c b/src/lib/evas/filters/evas_filter_blur.c index b2d9733a1e..2220c9d8bb 100644 --- a/src/lib/evas/filters/evas_filter_blur.c +++ b/src/lib/evas/filters/evas_filter_blur.c @@ -4,6 +4,35 @@ #include #include +static int +_box_blur_auto_radius(int *radii, int r) +{ + if (r <= 2) + { + radii[0] = r; + radii[1] = 0; + WRN("Radius is too small for auto box blur: %d", r); + return 1; + } + else if (r <= 6) + { + radii[0] = r / 2; + radii[1] = r - radii[0] - 1; + radii[2] = 0; + DBG("Using auto radius for %d: %d %d", r, radii[0], radii[1]); + return 2; + } + else + { + radii[0] = (r + 3) / 3; + radii[1] = (r + 2) / 3; + radii[2] = r - radii[0] - radii[1]; + radii[3] = 0; + DBG("Using auto radius for %d: %d %d %d", r, radii[0], radii[1], radii[2]); + return 3; + } +} + #define FUNCTION_NAME _box_blur_horiz_rgba_step #define STEP (sizeof(DATA32)) #include "./blur/blur_box_rgba_.c" @@ -76,27 +105,21 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd) return EINA_TRUE; } -#define FUNCTION_NAME _box_blur_horiz_alpha_step -#define STEP 1 #include "./blur/blur_box_alpha_.c" static void -_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h) +_box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_horiz_alpha_step(src, dst, radius, w, h, w); + _box_blur_alpha_horiz_step(src, dst, radii, w, h); DEBUG_TIME_END(); } -#define FUNCTION_NAME _box_blur_vert_alpha_step -#define STEP loops -#include "./blur/blur_box_alpha_.c" - static void -_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h) +_box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int* radii, int w, int h) { DEBUG_TIME_BEGIN(); - _box_blur_vert_alpha_step(src, dst, radius, h, w, 1); + _box_blur_alpha_vert_step(src, dst, radii, h, w); DEBUG_TIME_END(); } @@ -104,6 +127,7 @@ static Eina_Bool _box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd) { RGBA_Image *in, *out; + int radii[7] = {0}; unsigned int r; EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE); @@ -114,11 +138,16 @@ _box_blur_horiz_apply_alpha(Evas_Filter_Command *cmd) in = cmd->input->backing; out = cmd->output->backing; + if (cmd->blur.auto_count) + _box_blur_auto_radius(radii, r); + else for (int k = 0; k < cmd->blur.count; k++) + radii[k] = r; + EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE); EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE); EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.w >= (2*r + 1), EINA_FALSE); - _box_blur_horiz_alpha(in->image.data8, out->image.data8, r, + _box_blur_horiz_alpha(in->image.data8, out->image.data8, radii, in->cache_entry.w, in->cache_entry.h); return EINA_TRUE; @@ -128,6 +157,7 @@ static Eina_Bool _box_blur_vert_apply_alpha(Evas_Filter_Command *cmd) { RGBA_Image *in, *out; + int radii[7] = {0}; unsigned int r; EINA_SAFETY_ON_NULL_RETURN_VAL(cmd, EINA_FALSE); @@ -138,11 +168,16 @@ _box_blur_vert_apply_alpha(Evas_Filter_Command *cmd) in = cmd->input->backing; out = cmd->output->backing; + if (cmd->blur.auto_count) + _box_blur_auto_radius(radii, r); + else for (int k = 0; k < cmd->blur.count; k++) + radii[k] = r; + EINA_SAFETY_ON_NULL_RETURN_VAL(in->image.data8, EINA_FALSE); EINA_SAFETY_ON_NULL_RETURN_VAL(out->image.data8, EINA_FALSE); EINA_SAFETY_ON_FALSE_RETURN_VAL(out->cache_entry.h >= (2*r + 1), EINA_FALSE); - _box_blur_vert_alpha(in->image.data8, out->image.data8, r, + _box_blur_vert_alpha(in->image.data8, out->image.data8, radii, in->cache_entry.w, in->cache_entry.h); return EINA_TRUE;