evas filters: Optimize GL blur with interpolation

This optimizes the GL blur algorithm by reducing the number of texel fetches (roughly half the number of before this patch). This works by exploiting GL's interpolation capabilities.
2017-03-21 19:06:05 +09:00 · 2017-03-21 19:06:05 +09:00 · ebeead4681
parent 6af3c20aeb
commit ebeead4681
5 changed files with 120 additions and 81 deletions
--- a/src/modules/evas/engines/gl_common/evas_gl_common.h
+++ b/src/modules/evas/engines/gl_common/evas_gl_common.h
@ -652,7 +652,7 @@ void              evas_gl_common_filter_displace_push(Evas_Engine_GL_Context *gc
 void              evas_gl_common_filter_curve_push(Evas_Engine_GL_Context *gc, Evas_GL_Texture *tex,
                                                   int x, int y, int w, int h, const uint8_t *points, int channel);
 void              evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc, Evas_GL_Texture *tex, double sx, double sy, double sw, double sh,
-                                                  double dx, double dy, double dw, double dh, GLfloat *values, GLfloat *offsets, int count,
+                                                  double dx, double dy, double dw, double dh, const double * const values, const double * const offsets, int count,
                                                  Eina_Bool horiz);

 int               evas_gl_common_shader_program_init(Evas_GL_Shared *shared);
--- a/src/modules/evas/engines/gl_common/evas_gl_context.c
+++ b/src/modules/evas/engines/gl_common/evas_gl_context.c
@ -3432,7 +3432,8 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
                                Evas_GL_Texture *tex,
                                double sx, double sy, double sw, double sh,
                                double dx, double dy, double dw, double dh,
-                                GLfloat *values, GLfloat *offsets, int count,
+                                const double * const weights,
+                                const double * const offsets, int count,
                                Eina_Bool horiz)
 {
   double ox1, oy1, ox2, oy2, ox3, oy3, ox4, oy4, pw, ph;
@ -3444,6 +3445,7 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
   Eina_Bool blend = EINA_TRUE;
   Eina_Bool smooth = EINA_TRUE;
   Shader_Type type = horiz ? SHD_FILTER_BLUR_X : SHD_FILTER_BLUR_Y;
+   GLuint *map_tex_data;
   GLuint map_tex;
   double sum;

@ -3492,21 +3494,44 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
   pipe_region_expand(gc, pn, dx, dy, dw, dh);
   PIPE_GROW(gc, pn, 6);

-   sum = values[0];
-   for (int k = 1; k < count; k++)
-     sum += 2.0 * values[k];
+   /* Convert double data to RGBA pixel data.
+    *
+    * We are not using GL_FLOAT or GL_DOUBLE because:
+    * - It's not as portable (needs extensions),
+    * - GL_DOUBLE didn't work during my tests (dunno why),
+    * - GL_FLOAT didn't seem to carry the proper precision all the way to
+    *   the fragment shader,
+    * - Real data buffers are not available in GLES 2.0,
+    * - GL_RGBA is 100% portable.
+    */
+   map_tex_data = alloca(2 * count * sizeof(*map_tex_data));
+   for (int k = 0; k < count; k++)
+     {
+        GLuint val;

-   // Synchronous upload of Nx1 RGBA texture (FIXME: no reuse)
+        if (k == 0) sum = weights[k];
+        else sum += 2.0 * weights[k];
+
+        // Weight is always > 0.0 and < 255.0 by maths
+        val = (GLuint) (weights[k] * 256.0 * 256.0 * 256.0);
+        map_tex_data[k] = val;
+
+        // Offset is always in [0.0 , 1.0] by definition
+        val = (GLuint) (offsets[k] * 256.0 * 256.0 * 256.0);
+        map_tex_data[k + count] = val;
+     }
+
+   // Synchronous upload of Nx2 RGBA texture (FIXME: no reuse)
   glGenTextures(1, &map_tex);
   glBindTexture(GL_TEXTURE_2D, map_tex);
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-   // FIXME: GLES2 requires extensions here!!!
-   glTexImage2D(GL_TEXTURE_2D, 0, GL_R16F, count, 1, 0, GL_RED, GL_FLOAT, values);
-   // FIXME: double values don't work??
-   //glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, count, 1, 0, GL_RED, GL_DOUBLE, values);
+   if (tex->gc->shared->info.unpack_row_length)
+     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+   glPixelStorei(GL_UNPACK_ALIGNMENT, sizeof(*map_tex_data));
+   glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, count, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, map_tex_data);

   // Set curve properties (no need for filter_data)
   gc->pipe[pn].shader.filter.map_tex = map_tex;
@ -3516,7 +3541,7 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
   // Set blur properties... WIP
   _filter_data_prepare(gc, pn, prog, 2);
   filter_data = gc->pipe[pn].array.filter_data;
-   filter_data[0] = count;
+   filter_data[0] = count - 1.0;
   filter_data[1] = horiz ? sw : sh;
   filter_data[2] = sum;
   filter_data[3] = 0.0; // unused
--- a/src/modules/evas/engines/gl_common/shader/evas_gl_shaders.x
+++ b/src/modules/evas/engines/gl_common/shader/evas_gl_shaders.x
@ -231,34 +231,39 @@ static const char fragment_glsl[] =
   "#else\n"
   "# define FETCH_PIXEL(x) fetch_pixel(0.0, (x))\n"
   "#endif\n"
+   "float weight_get(float u, float count, float index)\n"
+   "{\n"
+   "   vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;\n"
+   "   return val.a*255.0 + (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);\n"
+   "}\n"
+   "float offset_get(float u, float count, float index)\n"
+   "{\n"
+   "   // val.a is always 0 here ~ discard\n"
+   "   vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;\n"
+   "   return (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);\n"
+   "}\n"
   "void main()\n"
   "{\n"
-   "   float u, u_div, count, div, w;\n"
+   "   float u, texlen, count, div;\n"
+   "   float weight, offset;\n"
   "   vec4 acc, px;\n"
   "   count = blur_data.x;\n"
-   "   u_div = blur_data.y;\n"
-   "   //div = blur_data.z;\n"
-   "   // Center pixel\n"
-   "   w = texture2D(tex_filter, vec2(0.0, 0.0)).r;\n"
-   "   px = FETCH_PIXEL(u / u_div);\n"
-   "   acc = px * w;\n"
-   "   div = w;\n"
-   "   // Left & right\n"
-   "   for (u = 1; u <= count; u += 1.0)\n"
-   "#if 0\n"
-   "   div = 0.0;\n"
-   "   for (u = -count; u <= count; u += 1.0)\n"
+   "   texlen = blur_data.y;\n"
+   "   div = blur_data.z;\n"
+   "   // Center pixel, offset is 0.0\n"
+   "   weight = weight_get(0.0, count, 0.0);\n"
+   "   px = FETCH_PIXEL(0.0);\n"
+   "   acc = px * weight;\n"
+   "   for (u = 1.0; u <= count; u += 1.0)\n"
   "   {\n"
-   "      w = texture2D(tex_filter, vec2(abs(u) / count, 0.0)).r;\n"
-   "#ifndef SHD_FILTER_DIR_Y\n"
-   "      px = fetch_pixel(u / u_div, 0.0);\n"
-   "#else\n"
-   "      px = fetch_pixel(0.0, u / u_div);\n"
-   "#endif\n"
-   "      acc += px * w;\n"
-   "      div += w;\n"
+   "      weight = weight_get(u, count, 0.0);\n"
+   "      offset = offset_get(u, count, 1.0);\n"
+   "      // Left\n"
+   "      vec4 px1 = FETCH_PIXEL(-((offset + (2.0 * u) - 1.0)) / texlen);\n"
+   "      // Right\n"
+   "      vec4 px2 = FETCH_PIXEL((offset + (2.0 * u) - 1.0) / texlen);\n"
+   "      acc += (px1 + px2) * weight;\n"
   "   }\n"
-   "#endif\n"
   "#ifndef SHD_NOMUL\n"
   "   gl_FragColor = (acc / div) * col;\n"
   "#else\n"
--- a/src/modules/evas/engines/gl_common/shader/fragment.glsl
+++ b/src/modules/evas/engines/gl_common/shader/fragment.glsl
@ -251,40 +251,47 @@ vec4 fetch_pixel(float ox, float oy)
 # define FETCH_PIXEL(x) fetch_pixel(0.0, (x))
 #endif

+float weight_get(float u, float count, float index)
+{
+   vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;
+   return val.a*255.0 + (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);
+}
+
+float offset_get(float u, float count, float index)
+{
+   // val.a is always 0 here ~ discard
+   vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;
+   return (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);
+}
+
 void main()
 {
-   float u, u_div, count, div, w;
+   float u, texlen, count, div;
+   float weight, offset;
   vec4 acc, px;

   count = blur_data.x;
-   u_div = blur_data.y;
-   //div = blur_data.z;
+   texlen = blur_data.y;
+   div = blur_data.z;

-   // Center pixel
-   w = texture2D(tex_filter, vec2(0.0, 0.0)).r;
-   px = FETCH_PIXEL(u / u_div);
-   acc = px * w;
-   div = w;
+   // Center pixel, offset is 0.0
+   weight = weight_get(0.0, count, 0.0);
+   px = FETCH_PIXEL(0.0);
+   acc = px * weight;

-   // Left & right
-   for (u = 1; u <= count; u += 1.0)
-
-#if 0
-   div = 0.0;
-   for (u = -count; u <= count; u += 1.0)
+   for (u = 1.0; u <= count; u += 1.0)
   {
-      w = texture2D(tex_filter, vec2(abs(u) / count, 0.0)).r;
+      weight = weight_get(u, count, 0.0);
+      offset = offset_get(u, count, 1.0);

-#ifndef SHD_FILTER_DIR_Y
-      px = fetch_pixel(u / u_div, 0.0);
-#else
-      px = fetch_pixel(0.0, u / u_div);
-#endif
+      // Left
+      vec4 px1 = FETCH_PIXEL(-((offset + (2.0 * u) - 1.0)) / texlen);

-      acc += px * w;
-      div += w;
+      // Right
+      vec4 px2 = FETCH_PIXEL((offset + (2.0 * u) - 1.0) / texlen);
+
+      acc += (px1 + px2) * weight;
   }
-#endif

 #ifndef SHD_NOMUL
   gl_FragColor = (acc / div) * col;
--- a/src/modules/evas/engines/gl_generic/filters/gl_filter_blur.c
+++ b/src/modules/evas/engines/gl_generic/filters/gl_filter_blur.c
@ -6,26 +6,28 @@
 static inline double
 _radius_to_sigma(double radius)
 {
-   // FIXME: This was supposed to be sqrt(r/3) ~ or something close
+   // In theory, sqrt(radius / 3.0) but that means the outer pixel at radius
+   // pixels away from the center have ~0.001 weight.
+
+   // This is an experimental value - to be adjusted!
   return /*sqrt*/ (radius / 3.0);
 }

 static inline double
-_gaussian_val(double a, double b, double x)
+_gaussian_val(double a EINA_UNUSED, double b, double x)
 {
-   return a * exp(-(x*x/b));
+   return /*a * */ exp(-(x*x/b));
 }

 static void
-_gaussian_calc(double *values, int count, double radius)
+_gaussian_calc(double *values, int max_index, double radius)
 {
-   // f(x) = a * exp(-(x^2 / b))
+   // Gaussian: f(x) = a * exp(-(x^2 / b))
   // sigma is such that variance v = sigma^2
   // v is such that after 3 v the value is almost 0 (ressembles a radius)
   // a = 1 / (sigma * sqrt (2 * pi))
   // b = 2 * sigma^2
-
-   // FIXME: Some of this math doesn't fit right (values too small too fast)
+   // The constant a is not required since we always calculate the dividor

   double a, b, sigma;
   int k;
@ -34,37 +36,37 @@ _gaussian_calc(double *values, int count, double radius)
   a = 1.0 / (sigma * SQRT_2_PI);
   b = 2.0 * sigma * sigma;

-   for (k = 0; k <= count; k++)
+   for (k = 0; k <= max_index; k++)
     {
        values[k] = _gaussian_val(a, b, k);
-        ERR("Gauss %d: %f", k, values[k]);
+        XDBG("Gauss %d: %f", k, values[k]);
     }
 }

 static int
-_gaussian_interpolate(GLfloat **weights, GLfloat **offsets, double radius)
+_gaussian_interpolate(double **weights, double **offsets, double radius)
 {
-   int k, num, count;
-   GLfloat *w, *o;
+   int k, count, max_index;
+   double *w, *o;
   double *values;

-   count = (int) ceil(radius);
-   if (count & 0x1) count++;
-   values = alloca((count + 1) * sizeof(*values));
-    _gaussian_calc(values, count, radius);
+   max_index = (int) ceil(radius);
+   if (max_index & 0x1) max_index++;
+   values = alloca((max_index + 1) * sizeof(*values));
+    _gaussian_calc(values, max_index, radius);

-   num = (count / 2) + 1;
-   *offsets = o = calloc(1, num * sizeof(*o));
-   *weights = w = calloc(1, num * sizeof(*w));
+   count = (max_index / 2) + 1;
+   *offsets = o = calloc(1, count * sizeof(*o));
+   *weights = w = calloc(1, count * sizeof(*w));

   // Center pixel's weight
   k = 0;
   o[k] = 0.0;
   w[k] = values[0];
-   ERR("Interpolating weights %d: w %f o %f", k, w[k], o[k]);
+   XDBG("Interpolating weights %d: w %f o %f", k, w[k], o[k]);

   // Left & right pixels' interpolated weights
-   for (k = 1; k < num; k++)
+   for (k = 1; k < count; k++)
     {
        double w1, w2;

@ -72,11 +74,11 @@ _gaussian_interpolate(GLfloat **weights, GLfloat **offsets, double radius)
        w2 = values[(k - 1) * 2 + 2];
        w[k] = w1 + w2;
        if (EINA_DBL_EQ(w[k], 0.0)) continue;
-        o[k] = (w2 / w[k]) + (k - 1.0) * 2.0;
-        ERR("Interpolating weights %d: %f %f -> w %f o %f", k, w1, w2, w[k], o[k]);
+        o[k] = w2 / w[k];
+        XDBG("Interpolating weights %d: %f %f -> w %f o %f", k, w1, w2, w[k], o[k]);
     }

-   return num;
+   return count;
 }

 static Eina_Bool
@ -88,7 +90,7 @@ _gl_filter_blur(Render_Engine_GL_Generic *re, Evas_Filter_Command *cmd)
   Eina_Bool horiz;
   double sx, sy, sw, sh, ssx, ssy, ssw, ssh, dx, dy, dw, dh, radius;
   int nx, ny, nw, nh, count = 0;
-   GLfloat *weights, *offsets;
+   double *weights, *offsets;

   DEBUG_TIME_BEGIN();