evas filters: Optimize GL blur with interpolation

This optimizes the GL blur algorithm by reducing the number of
texel fetches (roughly half the number of before this patch). This
works by exploiting GL's interpolation capabilities.
This commit is contained in:
Jean-Philippe Andre 2017-03-21 19:06:05 +09:00
parent 6af3c20aeb
commit ebeead4681
5 changed files with 120 additions and 81 deletions

View File

@ -652,7 +652,7 @@ void evas_gl_common_filter_displace_push(Evas_Engine_GL_Context *gc
void evas_gl_common_filter_curve_push(Evas_Engine_GL_Context *gc, Evas_GL_Texture *tex,
int x, int y, int w, int h, const uint8_t *points, int channel);
void evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc, Evas_GL_Texture *tex, double sx, double sy, double sw, double sh,
double dx, double dy, double dw, double dh, GLfloat *values, GLfloat *offsets, int count,
double dx, double dy, double dw, double dh, const double * const values, const double * const offsets, int count,
Eina_Bool horiz);
int evas_gl_common_shader_program_init(Evas_GL_Shared *shared);

View File

@ -3432,7 +3432,8 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
Evas_GL_Texture *tex,
double sx, double sy, double sw, double sh,
double dx, double dy, double dw, double dh,
GLfloat *values, GLfloat *offsets, int count,
const double * const weights,
const double * const offsets, int count,
Eina_Bool horiz)
{
double ox1, oy1, ox2, oy2, ox3, oy3, ox4, oy4, pw, ph;
@ -3444,6 +3445,7 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
Eina_Bool blend = EINA_TRUE;
Eina_Bool smooth = EINA_TRUE;
Shader_Type type = horiz ? SHD_FILTER_BLUR_X : SHD_FILTER_BLUR_Y;
GLuint *map_tex_data;
GLuint map_tex;
double sum;
@ -3492,21 +3494,44 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
pipe_region_expand(gc, pn, dx, dy, dw, dh);
PIPE_GROW(gc, pn, 6);
sum = values[0];
for (int k = 1; k < count; k++)
sum += 2.0 * values[k];
/* Convert double data to RGBA pixel data.
*
* We are not using GL_FLOAT or GL_DOUBLE because:
* - It's not as portable (needs extensions),
* - GL_DOUBLE didn't work during my tests (dunno why),
* - GL_FLOAT didn't seem to carry the proper precision all the way to
* the fragment shader,
* - Real data buffers are not available in GLES 2.0,
* - GL_RGBA is 100% portable.
*/
map_tex_data = alloca(2 * count * sizeof(*map_tex_data));
for (int k = 0; k < count; k++)
{
GLuint val;
// Synchronous upload of Nx1 RGBA texture (FIXME: no reuse)
if (k == 0) sum = weights[k];
else sum += 2.0 * weights[k];
// Weight is always > 0.0 and < 255.0 by maths
val = (GLuint) (weights[k] * 256.0 * 256.0 * 256.0);
map_tex_data[k] = val;
// Offset is always in [0.0 , 1.0] by definition
val = (GLuint) (offsets[k] * 256.0 * 256.0 * 256.0);
map_tex_data[k + count] = val;
}
// Synchronous upload of Nx2 RGBA texture (FIXME: no reuse)
glGenTextures(1, &map_tex);
glBindTexture(GL_TEXTURE_2D, map_tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
// FIXME: GLES2 requires extensions here!!!
glTexImage2D(GL_TEXTURE_2D, 0, GL_R16F, count, 1, 0, GL_RED, GL_FLOAT, values);
// FIXME: double values don't work??
//glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, count, 1, 0, GL_RED, GL_DOUBLE, values);
if (tex->gc->shared->info.unpack_row_length)
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
glPixelStorei(GL_UNPACK_ALIGNMENT, sizeof(*map_tex_data));
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, count, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, map_tex_data);
// Set curve properties (no need for filter_data)
gc->pipe[pn].shader.filter.map_tex = map_tex;
@ -3516,7 +3541,7 @@ evas_gl_common_filter_blur_push(Evas_Engine_GL_Context *gc,
// Set blur properties... WIP
_filter_data_prepare(gc, pn, prog, 2);
filter_data = gc->pipe[pn].array.filter_data;
filter_data[0] = count;
filter_data[0] = count - 1.0;
filter_data[1] = horiz ? sw : sh;
filter_data[2] = sum;
filter_data[3] = 0.0; // unused

View File

@ -231,34 +231,39 @@ static const char fragment_glsl[] =
"#else\n"
"# define FETCH_PIXEL(x) fetch_pixel(0.0, (x))\n"
"#endif\n"
"float weight_get(float u, float count, float index)\n"
"{\n"
" vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;\n"
" return val.a*255.0 + (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);\n"
"}\n"
"float offset_get(float u, float count, float index)\n"
"{\n"
" // val.a is always 0 here ~ discard\n"
" vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;\n"
" return (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);\n"
"}\n"
"void main()\n"
"{\n"
" float u, u_div, count, div, w;\n"
" float u, texlen, count, div;\n"
" float weight, offset;\n"
" vec4 acc, px;\n"
" count = blur_data.x;\n"
" u_div = blur_data.y;\n"
" //div = blur_data.z;\n"
" // Center pixel\n"
" w = texture2D(tex_filter, vec2(0.0, 0.0)).r;\n"
" px = FETCH_PIXEL(u / u_div);\n"
" acc = px * w;\n"
" div = w;\n"
" // Left & right\n"
" for (u = 1; u <= count; u += 1.0)\n"
"#if 0\n"
" div = 0.0;\n"
" for (u = -count; u <= count; u += 1.0)\n"
" texlen = blur_data.y;\n"
" div = blur_data.z;\n"
" // Center pixel, offset is 0.0\n"
" weight = weight_get(0.0, count, 0.0);\n"
" px = FETCH_PIXEL(0.0);\n"
" acc = px * weight;\n"
" for (u = 1.0; u <= count; u += 1.0)\n"
" {\n"
" w = texture2D(tex_filter, vec2(abs(u) / count, 0.0)).r;\n"
"#ifndef SHD_FILTER_DIR_Y\n"
" px = fetch_pixel(u / u_div, 0.0);\n"
"#else\n"
" px = fetch_pixel(0.0, u / u_div);\n"
"#endif\n"
" acc += px * w;\n"
" div += w;\n"
" weight = weight_get(u, count, 0.0);\n"
" offset = offset_get(u, count, 1.0);\n"
" // Left\n"
" vec4 px1 = FETCH_PIXEL(-((offset + (2.0 * u) - 1.0)) / texlen);\n"
" // Right\n"
" vec4 px2 = FETCH_PIXEL((offset + (2.0 * u) - 1.0) / texlen);\n"
" acc += (px1 + px2) * weight;\n"
" }\n"
"#endif\n"
"#ifndef SHD_NOMUL\n"
" gl_FragColor = (acc / div) * col;\n"
"#else\n"

View File

@ -251,40 +251,47 @@ vec4 fetch_pixel(float ox, float oy)
# define FETCH_PIXEL(x) fetch_pixel(0.0, (x))
#endif
float weight_get(float u, float count, float index)
{
vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;
return val.a*255.0 + (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);
}
float offset_get(float u, float count, float index)
{
// val.a is always 0 here ~ discard
vec4 val = texture2D(tex_filter, vec2(u / count, index)).bgra;
return (val.r*255.0/256.0) + (val.g*255.0/256.0/256.0) + (val.b*255.0/256.0/256.0/256.0);
}
void main()
{
float u, u_div, count, div, w;
float u, texlen, count, div;
float weight, offset;
vec4 acc, px;
count = blur_data.x;
u_div = blur_data.y;
//div = blur_data.z;
texlen = blur_data.y;
div = blur_data.z;
// Center pixel
w = texture2D(tex_filter, vec2(0.0, 0.0)).r;
px = FETCH_PIXEL(u / u_div);
acc = px * w;
div = w;
// Center pixel, offset is 0.0
weight = weight_get(0.0, count, 0.0);
px = FETCH_PIXEL(0.0);
acc = px * weight;
// Left & right
for (u = 1; u <= count; u += 1.0)
#if 0
div = 0.0;
for (u = -count; u <= count; u += 1.0)
for (u = 1.0; u <= count; u += 1.0)
{
w = texture2D(tex_filter, vec2(abs(u) / count, 0.0)).r;
weight = weight_get(u, count, 0.0);
offset = offset_get(u, count, 1.0);
#ifndef SHD_FILTER_DIR_Y
px = fetch_pixel(u / u_div, 0.0);
#else
px = fetch_pixel(0.0, u / u_div);
#endif
// Left
vec4 px1 = FETCH_PIXEL(-((offset + (2.0 * u) - 1.0)) / texlen);
acc += px * w;
div += w;
// Right
vec4 px2 = FETCH_PIXEL((offset + (2.0 * u) - 1.0) / texlen);
acc += (px1 + px2) * weight;
}
#endif
#ifndef SHD_NOMUL
gl_FragColor = (acc / div) * col;

View File

@ -6,26 +6,28 @@
static inline double
_radius_to_sigma(double radius)
{
// FIXME: This was supposed to be sqrt(r/3) ~ or something close
// In theory, sqrt(radius / 3.0) but that means the outer pixel at radius
// pixels away from the center have ~0.001 weight.
// This is an experimental value - to be adjusted!
return /*sqrt*/ (radius / 3.0);
}
static inline double
_gaussian_val(double a, double b, double x)
_gaussian_val(double a EINA_UNUSED, double b, double x)
{
return a * exp(-(x*x/b));
return /*a * */ exp(-(x*x/b));
}
static void
_gaussian_calc(double *values, int count, double radius)
_gaussian_calc(double *values, int max_index, double radius)
{
// f(x) = a * exp(-(x^2 / b))
// Gaussian: f(x) = a * exp(-(x^2 / b))
// sigma is such that variance v = sigma^2
// v is such that after 3 v the value is almost 0 (ressembles a radius)
// a = 1 / (sigma * sqrt (2 * pi))
// b = 2 * sigma^2
// FIXME: Some of this math doesn't fit right (values too small too fast)
// The constant a is not required since we always calculate the dividor
double a, b, sigma;
int k;
@ -34,37 +36,37 @@ _gaussian_calc(double *values, int count, double radius)
a = 1.0 / (sigma * SQRT_2_PI);
b = 2.0 * sigma * sigma;
for (k = 0; k <= count; k++)
for (k = 0; k <= max_index; k++)
{
values[k] = _gaussian_val(a, b, k);
ERR("Gauss %d: %f", k, values[k]);
XDBG("Gauss %d: %f", k, values[k]);
}
}
static int
_gaussian_interpolate(GLfloat **weights, GLfloat **offsets, double radius)
_gaussian_interpolate(double **weights, double **offsets, double radius)
{
int k, num, count;
GLfloat *w, *o;
int k, count, max_index;
double *w, *o;
double *values;
count = (int) ceil(radius);
if (count & 0x1) count++;
values = alloca((count + 1) * sizeof(*values));
_gaussian_calc(values, count, radius);
max_index = (int) ceil(radius);
if (max_index & 0x1) max_index++;
values = alloca((max_index + 1) * sizeof(*values));
_gaussian_calc(values, max_index, radius);
num = (count / 2) + 1;
*offsets = o = calloc(1, num * sizeof(*o));
*weights = w = calloc(1, num * sizeof(*w));
count = (max_index / 2) + 1;
*offsets = o = calloc(1, count * sizeof(*o));
*weights = w = calloc(1, count * sizeof(*w));
// Center pixel's weight
k = 0;
o[k] = 0.0;
w[k] = values[0];
ERR("Interpolating weights %d: w %f o %f", k, w[k], o[k]);
XDBG("Interpolating weights %d: w %f o %f", k, w[k], o[k]);
// Left & right pixels' interpolated weights
for (k = 1; k < num; k++)
for (k = 1; k < count; k++)
{
double w1, w2;
@ -72,11 +74,11 @@ _gaussian_interpolate(GLfloat **weights, GLfloat **offsets, double radius)
w2 = values[(k - 1) * 2 + 2];
w[k] = w1 + w2;
if (EINA_DBL_EQ(w[k], 0.0)) continue;
o[k] = (w2 / w[k]) + (k - 1.0) * 2.0;
ERR("Interpolating weights %d: %f %f -> w %f o %f", k, w1, w2, w[k], o[k]);
o[k] = w2 / w[k];
XDBG("Interpolating weights %d: %f %f -> w %f o %f", k, w1, w2, w[k], o[k]);
}
return num;
return count;
}
static Eina_Bool
@ -88,7 +90,7 @@ _gl_filter_blur(Render_Engine_GL_Generic *re, Evas_Filter_Command *cmd)
Eina_Bool horiz;
double sx, sy, sw, sh, ssx, ssy, ssw, ssh, dx, dy, dw, dh, radius;
int nx, ny, nw, nh, count = 0;
GLfloat *weights, *offsets;
double *weights, *offsets;
DEBUG_TIME_BEGIN();