Evas filters: Prepare optimization paths for BOX blur

Actually, there is a very nice trick with BOX blur. Pass BOX blur 3 times and you can approximate a GAUSSIAN blur with up to 3% accuracy. This is way more than enough for just a simple graphical effect. So, despite the crappy quality of BOX blur, we should optimize it a lot so we can replace large GAUSSIAN blurs with series of BOX blurs instead. Source: Wikipedia's page on box blur :) This commit also moves around some duplicated definitions.
2014-03-11 16:18:41 +09:00 · 2014-03-11 16:18:41 +09:00 · 4e249143a5
parent 0dace7721e
commit 4e249143a5
8 changed files with 217 additions and 216 deletions
--- a/src/Makefile_Evas.am
+++ b/src/Makefile_Evas.am
@ -442,7 +442,10 @@ lib/evas/filters/evas_filter_utils.c \
 lib/evas/filters/evas_filter_private.h

 EXTRA_DIST += \
-lib/evas/filters/blur/blur_gaussian_alpha_.c
+lib/evas/filters/blur/blur_gaussian_alpha_.c \
+lib/evas/filters/blur/blur_gaussian_rgba_.c \
+lib/evas/filters/blur/blur_box_alpha_.c \
+lib/evas/filters/blur/blur_box_rgba_.c

 ### Engines

--- a/src/lib/evas/filters/blur/blur_box_alpha_.c
+++ b/src/lib/evas/filters/blur/blur_box_alpha_.c
@ -0,0 +1,68 @@
+/* @file blur_box_alpha_.c
+ * Should define the functions:
+ * - _box_blur_horiz_alpha_step
+ * - _box_blur_vert_alpha_step
+ */
+
+#include "evas_common_private.h"
+#include "../evas_filter_private.h"
+
+#if !defined (FUNCTION_NAME) || !defined (STEP)
+# error Must define FUNCTION_NAME and STEP
+#endif
+
+static inline void
+FUNCTION_NAME(const DATA8* restrict src, DATA8* restrict dst,
+              const int radius, const int len,
+              const int loops, const int loopstep)
+{
+   DEFINE_DIVIDER(2 * radius + 1);
+   const int left = MIN(radius, len);
+   const int right = MIN(radius, (len - radius));
+   int acc = 0, k;
+
+   for (int l = loops; l; --l)
+     {
+        const DATA8* restrict sr = src;
+        const DATA8* restrict sl = src;
+        DATA8* restrict d = dst;
+
+        for (k = left; k; k--)
+          {
+             acc += *sr;
+             sr += STEP;
+          }
+
+        for (k = 0; k < left; k++)
+          {
+             acc += *sr;
+             *d = acc / (k + left + 1);
+             sr += STEP;
+             d += STEP;
+          }
+
+        for (k = len - (2 * radius); k; k--)
+          {
+             acc += *sr;
+             *d = DIVIDE(acc);
+             acc -= *sl;
+             sl += STEP;
+             sr += STEP;
+             d += STEP;
+          }
+
+        for (k = right; k; k--)
+          {
+             *d = acc / (k + right);
+             acc -= *sl;
+             d += STEP;
+             sl += STEP;
+          }
+
+        src += loopstep;
+        dst += loopstep;
+     }
+}
+
+#undef FUNCTION_NAME
+#undef STEP
--- a/src/lib/evas/filters/blur/blur_box_rgba_.c
+++ b/src/lib/evas/filters/blur/blur_box_rgba_.c
@ -0,0 +1,95 @@
+/* @file blur_box_rgba_.c
+ * Should define the functions:
+ * - _box_blur_horiz_rgba_step
+ * - _box_blur_vert_rgba_step
+ */
+
+#include "evas_common_private.h"
+#include "../evas_filter_private.h"
+
+#if !defined (FUNCTION_NAME) || !defined (STEP)
+# error Must define FUNCTION_NAME and STEP
+#endif
+
+static inline void
+FUNCTION_NAME(const DATA32* restrict src, DATA32* restrict dst,
+              const int radius, const int len,
+              const int loops, const int loopstep)
+{
+   DEFINE_DIVIDER(2 * radius + 1);
+   const int left = MIN(radius, len);
+   const int right = MIN(radius, (len - radius));
+
+   for (int l = loops; l; --l)
+     {
+        int acc[4] = {0};
+        int x, k;
+        int divider;
+
+        const DATA8* restrict sl = (DATA8 *) src;
+        const DATA8* restrict sr = (DATA8 *) src;
+        DATA8* restrict d = (DATA8 *) dst;
+
+        // Read-ahead
+        for (x = left; x; x--)
+          {
+             for (k = 0; k < 4; k++)
+               acc[k] += sr[k];
+             sr += STEP;
+          }
+
+        // Left
+        for (x = 0; x < left; x++)
+          {
+             for (k = 0; k < 4; k++)
+               acc[k] += sr[k];
+             sr += STEP;
+
+             divider = x + left + 1;
+             d[ALPHA] = acc[ALPHA] / divider;
+             d[RED]   = acc[RED]   / divider;
+             d[GREEN] = acc[GREEN] / divider;
+             d[BLUE]  = acc[BLUE]  / divider;
+             d += STEP;
+          }
+
+        // Main part
+        for (x = len - (2 * radius); x > 0; x--)
+          {
+             for (k = 0; k < 4; k++)
+               acc[k] += sr[k];
+             sr += STEP;
+
+             d[ALPHA] = DIVIDE(acc[ALPHA]);
+             d[RED]   = DIVIDE(acc[RED]);
+             d[GREEN] = DIVIDE(acc[GREEN]);
+             d[BLUE]  = DIVIDE(acc[BLUE]);
+             d += STEP;
+
+             for (k = 0; k < 4; k++)
+               acc[k] -= sl[k];
+             sl += STEP;
+          }
+
+        // Right part
+        for (x = right; x; x--)
+          {
+             divider = x + right;
+             d[ALPHA] = acc[ALPHA] / divider;
+             d[RED]   = acc[RED]   / divider;
+             d[GREEN] = acc[GREEN] / divider;
+             d[BLUE]  = acc[BLUE]  / divider;
+             d += STEP;
+
+             for (k = 0; k < 4; k++)
+               acc[k] -= sl[k];
+             sl += STEP;
+          }
+
+        src += loopstep;
+        dst += loopstep;
+     }
+}
+
+#undef FUNCTION_NAME
+#undef STEP
--- a/src/lib/evas/filters/evas_filter.c
+++ b/src/lib/evas/filters/evas_filter.c
@ -500,6 +500,7 @@ evas_filter_context_buffers_allocate_all(Evas_Filter_Context *ctx,

                  //DBG("Allocating temporary buffer of size %ux%u", sw, sh);
                  fb = evas_filter_buffer_alloc_new(ctx, sw, sh, in->alpha_only);
+                  if (!fb) goto alloc_fail;
                  fb->transient = EINA_TRUE;
               }
          }
@ -514,6 +515,7 @@ evas_filter_context_buffers_allocate_all(Evas_Filter_Context *ctx,

             //DBG("Allocating temporary buffer of size %ux%u", sw, sh);
             fb = evas_filter_buffer_alloc_new(ctx, sw, sh, in->alpha_only);
+             if (!fb) goto alloc_fail;
             fb->transient = EINA_TRUE;
          }

@ -553,11 +555,7 @@ evas_filter_context_buffers_allocate_all(Evas_Filter_Context *ctx,

        //DBG("Allocating buffer of size %ux%u alpha %d", fb->w, fb->h, fb->alpha_only);
        im = _rgba_image_alloc(fb, NULL);
-        if (!im)
-          {
-             ERR("Buffer %d allocation failed!", fb->id);
-             return EINA_FALSE;
-          }
+        if (!im) goto alloc_fail;

        fb->backing = im;
        fb->allocated = (im != NULL);
@ -566,6 +564,10 @@ evas_filter_context_buffers_allocate_all(Evas_Filter_Context *ctx,
     }

   return EINA_TRUE;
+
+alloc_fail:
+   ERR("Buffer allocation failed! Context size: %dx%d", w, h);
+   return EINA_FALSE;
 }

 int
@ -841,7 +843,7 @@ evas_filter_temporary_buffer_get(Evas_Filter_Context *ctx, int w, int h,
          }
     }

-   if (ctx->running && ctx->async)
+   if (ctx->running) // && ctx->async)
     {
        ERR("Can not create a new buffer from this thread!");
        return NULL;
--- a/src/lib/evas/filters/evas_filter_blend.c
+++ b/src/lib/evas/filters/evas_filter_blend.c
@ -5,25 +5,6 @@
 // Use a better formula than R+G+B for rgba to alpha conversion (RGB to YCbCr)
 #define RGBA2ALPHA_WEIGHTED 1

-#if DIV_USING_BITSHIFT
-static int
-_smallest_pow2_larger_than(int val)
-{
-   int n;
-
-   for (n = 0; n < 32; n++)
-     if (val <= (1 << n)) return n;
-
-   ERR("Value %d is too damn high!", val);
-   return 32;
-}
-# define DEFINE_DIVIDER(div) const int pow2 = _smallest_pow2_larger_than((div) << 10); const int numerator = (1 << pow2) / (div);
-# define DIVIDE(val) (((val) * numerator) >> pow2)
-#else
-# define DEFINE_DIAMETER(div) const int divider = (div);
-# define DIVIDE(val) ((val) / divider)
-#endif
-
 typedef Eina_Bool (*image_draw_func) (void *data, void *context, void *surface, void *image, int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, int dst_w, int dst_h, int smooth, Eina_Bool do_async);
 static Eina_Bool _mapped_blend(void *data, void *drawctx, void *in, void *out, Evas_Filter_Fill_Mode fillmode, int sx, int sy, int sw, int sh, int dx, int dy, int dw, int dh, image_draw_func image_draw);

@ -136,9 +117,9 @@ _filter_blend_cpu_generic_do(Evas_Filter_Command *cmd,

   // Stretch if necessary.

-   /* NOTE: As of 2014/02/21, this case is impossible. An alpha buffer will
-    * always be of the context buffer size, since only proxy buffers have
-    * different sizes... and proxies are all RGBA (never alpha only).
+   /* NOTE: As of 2014/03/11, this will happen only with RGBA buffers, since
+    * only proxy sources may be scaled. So, we don't need an alpha scaling
+    * algorithm just now.
    */

   if ((sw != dw || sh != dh) && (cmd->draw.fillmode & EVAS_FILTER_FILL_MODE_STRETCH_XY))
--- a/src/lib/evas/filters/evas_filter_blur.c
+++ b/src/lib/evas/filters/evas_filter_blur.c
@ -4,139 +4,27 @@
 #include <math.h>
 #include <time.h>

-#if DIV_USING_BITSHIFT
-static int
-_smallest_pow2_larger_than(int val)
-{
-   int n;
-
-   for (n = 0; n < 32; n++)
-     if (val <= (1 << n)) return n;
-
-   ERR("Value %d is too damn high!", val);
-   return 32;
-}
-
-/* Input:
- *  const int pow2 = _smallest_pow2_larger_than(divider * 1024);
- *  const int numerator = (1 << pow2) / divider;
- * Result:
- *  r = ((val * numerator) >> pow2);
- */
-# define DEFINE_DIAMETER(rad) const int pow2 = _smallest_pow2_larger_than((radius * 2 + 1) << 10); const int numerator = (1 << pow2) / (radius * 2 + 1);
-# define DIVIDE_BY_DIAMETER(val) (((val) * numerator) >> pow2)
-#else
-# define DEFINE_DIAMETER(rad) const int diameter = radius * 2 + 1;
-# define DIVIDE_BY_DIAMETER(val) ((val) / diameter)
-#endif
-
-/* RGBA functions */
-
-static void
-_box_blur_step_rgba(DATA32 *src, DATA32 *dst, int radius, int len, int step)
-{
-   DEFINE_DIAMETER(radius);
-   int acc[4] = {0};
-   DATA8 *d, *sr, *sl;
-   int x, k;
-   int divider;
-   int left = MIN(radius, len);
-   int right = MIN(radius, (len - radius));
-
-   d = (DATA8 *) dst;
-   sl = (DATA8 *) src;
-   sr = (DATA8 *) src;
-
-   // Read-ahead
-   for (x = left; x; x--)
-     {
-        for (k = 0; k < 4; k++)
-          acc[k] += sr[k];
-        sr += step;
-     }
-
-   // Left
-   for (x = 0; x < left; x++)
-     {
-        for (k = 0; k < 4; k++)
-          acc[k] += sr[k];
-        sr += step;
-
-        divider = x + left + 1;
-        d[ALPHA] = acc[ALPHA] / divider;
-        d[RED]   = acc[RED]   / divider;
-        d[GREEN] = acc[GREEN] / divider;
-        d[BLUE]  = acc[BLUE]  / divider;
-        d += step;
-     }
-
-   // Main part
-   for (x = len - (2 * radius); x > 0; x--)
-     {
-        for (k = 0; k < 4; k++)
-          acc[k] += sr[k];
-        sr += step;
-
-        d[ALPHA] = DIVIDE_BY_DIAMETER(acc[ALPHA]);
-        d[RED]   = DIVIDE_BY_DIAMETER(acc[RED]);
-        d[GREEN] = DIVIDE_BY_DIAMETER(acc[GREEN]);
-        d[BLUE]  = DIVIDE_BY_DIAMETER(acc[BLUE]);
-        d += step;
-
-        for (k = 0; k < 4; k++)
-          acc[k] -= sl[k];
-        sl += step;
-     }
-
-   // Right part
-   for (x = right; x; x--)
-     {
-        divider = x + right;
-        d[ALPHA] = acc[ALPHA] / divider;
-        d[RED]   = acc[RED]   / divider;
-        d[GREEN] = acc[GREEN] / divider;
-        d[BLUE]  = acc[BLUE]  / divider;
-        d += step;
-
-        for (k = 0; k < 4; k++)
-          acc[k] -= sl[k];
-        sl += step;
-     }
-}
+#define FUNCTION_NAME _box_blur_horiz_rgba_step
+#define STEP (sizeof(DATA32))
+#include "./blur/blur_box_rgba_.c"

 static void
 _box_blur_horiz_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
 {
-   int y;
-   int step = sizeof(DATA32);
-
   DEBUG_TIME_BEGIN();
-
-   for (y = 0; y < h; y++)
-     {
-        _box_blur_step_rgba(src, dst, radius, w, step);
-        src += w;
-        dst += w;
-     }
-
+   _box_blur_horiz_rgba_step(src, dst, radius, w, h, w);
   DEBUG_TIME_END();
 }

+#define FUNCTION_NAME _box_blur_vert_rgba_step
+#define STEP (loops * sizeof(DATA32))
+#include "./blur/blur_box_rgba_.c"
+
 static void
 _box_blur_vert_rgba(DATA32 *src, DATA32 *dst, int radius, int w, int h)
 {
-   int x;
-   int step = w * sizeof(DATA32);
-
   DEBUG_TIME_BEGIN();
-
-   for (x = 0; x < w; x++)
-     {
-        _box_blur_step_rgba(src, dst, radius, h, step);
-        src += 1;
-        dst += 1;
-     }
-
+   _box_blur_vert_rgba_step(src, dst, radius, h, w, 1);
   DEBUG_TIME_END();
 }

@ -188,84 +76,27 @@ _box_blur_vert_apply_rgba(Evas_Filter_Command *cmd)
   return EINA_TRUE;
 }

-/* Alpha only functions */
-
-/* Box blur */
-
-static void
-_box_blur_step_alpha(DATA8 *src, DATA8 *dst, int radius, int len, int step)
-{
-   int k;
-   int acc = 0;
-   DATA8 *sr = src, *sl = src, *d = dst;
-   DEFINE_DIAMETER(radius);
-   int left = MIN(radius, len);
-   int right = MIN(radius, (len - radius));
-
-   for (k = left; k; k--)
-     {
-        acc += *sr;
-        sr += step;
-     }
-
-   for (k = 0; k < left; k++)
-     {
-        acc += *sr;
-        *d = acc / (k + left + 1);
-        sr += step;
-        d += step;
-     }
-
-   for (k = len - (2 * radius); k; k--)
-     {
-        acc += *sr;
-        *d = DIVIDE_BY_DIAMETER(acc);
-        acc -= *sl;
-        sl += step;
-        sr += step;
-        d += step;
-     }
-
-   for (k = right; k; k--)
-     {
-        *d = acc / (k + right);
-        acc -= *sl;
-        d += step;
-        sl += step;
-     }
-}
+#define FUNCTION_NAME _box_blur_horiz_alpha_step
+#define STEP 1
+#include "./blur/blur_box_alpha_.c"

 static void
 _box_blur_horiz_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
 {
-   int k;
-
   DEBUG_TIME_BEGIN();
-
-   for (k = h; k; k--)
-     {
-        _box_blur_step_alpha(src, dst, radius, w, 1);
-        dst += w;
-        src += w;
-     }
-
+   _box_blur_horiz_alpha_step(src, dst, radius, w, h, w);
   DEBUG_TIME_END();
 }

+#define FUNCTION_NAME _box_blur_vert_alpha_step
+#define STEP loops
+#include "./blur/blur_box_alpha_.c"
+
 static void
 _box_blur_vert_alpha(DATA8 *src, DATA8 *dst, int radius, int w, int h)
 {
-   int k;
-
   DEBUG_TIME_BEGIN();
-
-   for (k = w; k; k--)
-     {
-        _box_blur_step_alpha(src, dst, radius, h, w);
-        dst += 1;
-        src += 1;
-     }
-
+   _box_blur_vert_alpha_step(src, dst, radius, h, w, 1);
   DEBUG_TIME_END();
 }

--- a/src/lib/evas/filters/evas_filter_private.h
+++ b/src/lib/evas/filters/evas_filter_private.h
@ -64,6 +64,14 @@
 # define DEBUG_TIME_END() do {} while(0)
 #endif

+#if DIV_USING_BITSHIFT
+# define DEFINE_DIVIDER(div) const int pow2 = evas_filter_smallest_pow2_larger_than((div) << 10); const int numerator = (1 << pow2) / (div);
+# define DIVIDE(val) (((val) * numerator) >> pow2)
+#else
+# define DEFINE_DIVIDER(div) const int divider = (div);
+# define DIVIDE(val) ((val) / divider)
+#endif
+
 typedef enum _Evas_Filter_Interpolation_Mode Evas_Filter_Interpolation_Mode;

 struct _Evas_Filter_Context
@ -226,5 +234,6 @@ Evas_Filter_Buffer *evas_filter_temporary_buffer_get(Evas_Filter_Context *ctx, i
 Evas_Filter_Buffer *evas_filter_buffer_scaled_get(Evas_Filter_Context *ctx, Evas_Filter_Buffer *src, unsigned w, unsigned h);
 Eina_Bool evas_filter_interpolate(DATA8* output /* 256 values */, DATA8* points /* pairs x + y */, int point_count, Evas_Filter_Interpolation_Mode mode);
 Evas_Filter_Command *_evas_filter_command_get(Evas_Filter_Context *ctx, int cmdid);
+int evas_filter_smallest_pow2_larger_than(int val);

 #endif // EVAS_FILTER_PRIVATE_H
--- a/src/lib/evas/filters/evas_filter_utils.c
+++ b/src/lib/evas/filters/evas_filter_utils.c
@ -140,3 +140,15 @@ evas_filter_interpolate(DATA8 *output, DATA8 *points, int point_count,
        return _interpolate_linear(output, points, point_count);
     }
 }
+
+int
+evas_filter_smallest_pow2_larger_than(int val)
+{
+   int n;
+
+   for (n = 0; n < 32; n++)
+     if (val <= (1 << n)) return n;
+
+   ERR("Value %d is too damn high!", val);
+   return 32;
+}