smooth map works (down to 1/2 size its good. its pretty much gl_linear, but

in software. but it looks gorgeous. rotations are groovey.



SVN revision: 43226
This commit is contained in:
Carsten Haitzler 2009-10-23 13:17:22 +00:00
parent 4a6609c55f
commit b36917b180
2 changed files with 209 additions and 104 deletions

View File

@ -10,7 +10,6 @@
#include "evas_convert_grypal_6.h"
#include "evas_convert_yuv.h"
#ifdef USE_DITHER_44
const DATA8 _evas_dither_44[4][4] =
{
{ 0, 8, 2, 10},
@ -18,9 +17,7 @@ const DATA8 _evas_dither_44[4][4] =
{ 3, 11, 1, 9},
{15, 7, 13, 5}
};
#endif
#ifdef USE_DITHER_128128
const DATA8 _evas_dither_128128[128][128] =
{
{ 0, 41, 23, 5, 17, 39, 7, 15, 62, 23, 40, 51, 31, 47, 9, 32, 52, 27, 57, 25, 6, 61, 27, 52, 37, 7, 40, 63, 18, 36, 10, 42, 25, 62, 45, 34, 20, 42, 37, 14, 35, 29, 50, 10, 61, 2, 40, 8, 37, 12, 58, 22, 5, 41, 10, 39, 0, 60, 11, 46, 2, 55, 38, 17, 36, 59, 13, 54, 37, 56, 8, 29, 16, 13, 63, 22, 41, 55, 7, 20, 49, 14, 23, 55, 37, 23, 19, 36, 15, 49, 23, 63, 30, 14, 38, 27, 53, 13, 22, 41, 19, 31, 7, 19, 50, 30, 49, 16, 3, 32, 56, 40, 29, 34, 8, 48, 19, 45, 4, 51, 12, 46, 35, 49, 16, 42, 12, 62 },
@ -152,7 +149,6 @@ const DATA8 _evas_dither_128128[128][128] =
{ 19, 42, 9, 48, 2, 44, 11, 37, 48, 20, 33, 16, 55, 35, 49, 15, 37, 20, 59, 16, 53, 22, 56, 31, 50, 11, 34, 54, 16, 51, 4, 49, 33, 53, 21, 28, 56, 24, 31, 9, 52, 16, 48, 24, 44, 13, 51, 20, 31, 49, 18, 6, 34, 2, 44, 14, 47, 8, 15, 43, 13, 41, 33, 52, 20, 61, 7, 51, 34, 62, 4, 20, 36, 33, 43, 8, 46, 13, 53, 17, 45, 42, 9, 31, 52, 11, 30, 56, 13, 59, 17, 44, 27, 6, 62, 11, 43, 17, 49, 38, 26, 2, 16, 27, 58, 21, 54, 18, 26, 5, 35, 61, 43, 27, 7, 39, 14, 58, 37, 55, 20, 33, 13, 40, 62, 10, 55, 5 },
{ 51, 14, 61, 29, 59, 20, 55, 31, 0, 49, 11, 60, 3, 26, 22, 56, 0, 40, 12, 43, 41, 8, 36, 0, 17, 57, 24, 2, 46, 26, 61, 18, 0, 38, 12, 59, 6, 49, 3, 57, 19, 63, 5, 33, 18, 54, 28, 56, 0, 43, 26, 46, 63, 27, 56, 22, 27, 54, 38, 28, 63, 24, 10, 45, 0, 31, 42, 21, 12, 25, 44, 49, 59, 6, 26, 50, 3, 34, 27, 59, 0, 35, 62, 16, 4, 58, 47, 0, 43, 24, 37, 2, 54, 20, 46, 31, 0, 56, 34, 5, 55, 45, 60, 37, 0, 40, 10, 38, 63, 46, 15, 20, 0, 53, 21, 62, 30, 11, 24, 27, 40, 0, 57, 26, 3, 45, 27, 35 }
};
#endif
EAPI void
evas_common_convert_init(void)

View File

@ -17,6 +17,7 @@ typedef struct _Span Span;
struct _Span
{
int x1, x2;
FPc o1, o2;
FPc u[2], v[2];
};
@ -31,6 +32,9 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
RGBA_Map_Point *p,
int smooth, int level);
//extern const DATA8 _evas_dither_44[4][4];
//extern const DATA8 _evas_dither_128128[128][128];
static FPc
_interp(int x1, int x2, int p, FPc u1, FPc u2)
{
@ -84,75 +88,22 @@ evas_common_map4_rgba(RGBA_Image *src, RGBA_Image *dst,
dc->clip.use = c; dc->clip.x = cx; dc->clip.y = cy; dc->clip.w = cw; dc->clip.h = ch;
}
// 12.63 % of time - this can improve
static void
evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
RGBA_Draw_Context *dc,
RGBA_Map_Point *p,
int smooth, int level)
_calc_spans(RGBA_Map_Point *p, Line *spans, int ystart, int yend, int cx, int cy, int cw, int ch)
{
int i;
int c, cx, cy, cw, ch;
int ytop, ybottom, ystart, yend, y, yp, sw, shp, swp;
int i, y, yp;
int py[4];
Line *spans = NULL;
DATA32 *buf, *sp;
RGBA_Gfx_Func func;
int edge[4][4], edge_num, swapped, order[4];
FPc uv[4][2], u, v, x, h, t;
c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
if (!c)
{
cx = 0;
cy = 0;
cw = dst->cache_entry.w;
ch = dst->cache_entry.h;
}
ytop = p[0].y;
for (i = 1; i < 4; i++)
{
if (p[i].y < ytop) ytop = p[i].y;
}
ybottom = p[0].y;
for (i = 1; i < 4; i++)
{
if (p[i].y > ybottom) ybottom = p[i].y;
}
ytop = ytop >> FP;
ybottom = ybottom >> FP;
if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
if (ytop < cy) ystart = cy;
else ystart = ytop;
if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
else yend = ybottom;
sp = src->image.data;
sw = src->cache_entry.w;
swp = sw << FP;
shp = src->cache_entry.h << FP;
#if 1
// maybe faster on x86?
#if 1 // maybe faster on x86?
for (i = 0; i < 4; i++) py[i] = p[i].y >> FP;
# define PY(x) (py[x])
#else
# define PY(x) (p[x].y >> FP)
#endif
spans = alloca((yend - ystart + 1) * sizeof(Line));
memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
for (i = 0; i < 4; i++)
{
if (p[i].u < 0) p[i].u = 0;
else if (p[i].u > (src->cache_entry.w << FP))
p[i].u = src->cache_entry.w << FP;
if (p[i].v < 0) p[i].v = 0;
else if (p[i].v > (src->cache_entry.h << FP))
p[i].v = src->cache_entry.h << FP;
}
if ((PY(0) == PY(1)) && (PY(0) == PY(2)) && (PY(0) == PY(3)))
{
// all on one line. eg:
@ -165,8 +116,6 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
{
for (y = ystart; y <= yend; y++)
{
int edge[4][3], edge_num, swapped, order[4];
FPc uv[4][2], u, v, x, h, t;
yp = y - ystart;
edge_num = 0;
@ -225,13 +174,12 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
// calculate line x points for each edge
for (i = 0; i < edge_num; i++)
{
int e1 = edge[i][0], e2 = edge[i][1];
int e1 = edge[i][0];
int e2 = edge[i][1];
h = (p[e2].y - p[e1].y) >> FP; // height of edge
t = (((y << FP) + (FP1 - 1)) - p[e1].y) >> FP;
x = p[e2].x - p[e1].x;
// printf(" x = %i, p1 = %i, p2 = %i\n", x, p[e1].x, p[e2].x);
// printf(" + %i\n", ((x * t) / h));
x = p[e1].x + ((x * t) / h);
u = p[e2].u - p[e1].u;
@ -243,10 +191,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
uv[i][1] = v;
uv[i][0] = u;
edge[i][2] = x >> FP;
// printf("%i | %3i: %3i - %i / %i [%i->%i]\n",
// y, i, x >> FP,
// t, h >> FP,
// p[e1].x >> FP, p[e2].x >> FP);
edge[i][3] = x;
// also fill in order
order[i] = i;
}
@ -270,19 +215,21 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
{
i = 0;
spans[yp].span[i].x1 = edge[order[0]][2];
spans[yp].span[i].o1 = edge[order[0]][3];
spans[yp].span[i].u[0] = uv[order[0]][0];
spans[yp].span[i].v[0] = uv[order[0]][1];
spans[yp].span[i].x2 = edge[order[1]][2];
spans[yp].span[i].o2 = edge[order[1]][3];
spans[yp].span[i].u[1] = uv[order[1]][0];
spans[yp].span[i].v[1] = uv[order[1]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
(spans[yp].span[i].x2 < 0))
(spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
if (spans[yp].span[i].x1 < 0)
if (spans[yp].span[i].x1 < cx)
{
spans[yp].span[i].u[0] =
_interp(spans[yp].span[i].x1,
@ -294,7 +241,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].x2, 0,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x1 = 0;
spans[yp].span[i].x1 = cx;
spans[yp].span[i].o1 = cx << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
@ -309,6 +257,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
spans[yp].span[i].o2 = (cx + cw) << FP;
}
i++;
spans[yp].span[i].x1 = -1;
@ -324,13 +273,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].u[1] = uv[order[1]][0];
spans[yp].span[i].v[1] = uv[order[1]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
(spans[yp].span[i].x2 < 0))
(spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
if (spans[yp].span[i].x1 < 0)
if (spans[yp].span[i].x1 < cx)
{
spans[yp].span[i].u[0] =
_interp(spans[yp].span[i].x1,
@ -342,7 +291,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].x2, 0,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x1 = 0;
spans[yp].span[i].x1 = cx;
spans[yp].span[i].o1 = cx << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
@ -357,6 +307,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
spans[yp].span[i].o2 = (cx + cw) << FP;
}
i++;
}
@ -367,13 +318,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].u[1] = uv[order[3]][0];
spans[yp].span[i].v[1] = uv[order[3]][1];
if ((spans[yp].span[i].x1 >= (cx + cw)) ||
(spans[yp].span[i].x2 < 0))
(spans[yp].span[i].x2 < cx))
{
spans[yp].span[i].x1 = -1;
}
else
{
int l = 0;
int l = cx;
if (i > 0) l = spans[yp].span[i - 1].x2;
if (spans[yp].span[i].x1 < l)
@ -388,7 +339,8 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].x2, l,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x1 = 0;
spans[yp].span[i].x1 = l;
spans[yp].span[i].o1 = l << FP;
}
if (spans[yp].span[i].x2 >= (cx + cw))
{
@ -403,6 +355,7 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
spans[yp].span[i].v[0],
spans[yp].span[i].v[1]);
spans[yp].span[i].x2 = (cx + cw);
spans[yp].span[i].o2 = (cx + cw) << FP;
}
}
}
@ -412,43 +365,197 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
}
}
}
}
if ((!src->cache_entry.flags.alpha) &&
(!dst->cache_entry.flags.alpha) &&
// 66.74 % of time
static void
evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
RGBA_Draw_Context *dc,
RGBA_Map_Point *p,
int smooth, int level)
{
int i;
int c, cx, cy, cw, ch;
int ytop, ybottom, ystart, yend, y, yp, sw, sh, shp, swp, direct;
Line *spans;
DATA32 *buf, *sp;
RGBA_Gfx_Func func;
// get the clip
c = dc->clip.use; cx = dc->clip.x; cy = dc->clip.y; cw = dc->clip.w; ch = dc->clip.h;
if (!c)
{
cx = 0;
cy = 0;
cw = dst->cache_entry.w;
ch = dst->cache_entry.h;
}
// find y yop line and y bottom line
ytop = p[0].y;
for (i = 1; i < 4; i++)
{
if (p[i].y < ytop) ytop = p[i].y;
}
ybottom = p[0].y;
for (i = 1; i < 4; i++)
{
if (p[i].y > ybottom) ybottom = p[i].y;
}
// convert to screen space from fixed point
ytop = ytop >> FP;
ybottom = ybottom >> FP;
// if its outside the clip vertical bounds - don't bother
if ((ytop >= (cy + ch)) || (ybottom < cy)) return;
// limit to the clip vertical bounds
if (ytop < cy) ystart = cy;
else ystart = ytop;
if (ybottom >= (cy + ch)) yend = (cy + ch) - 1;
else yend = ybottom;
// get some source image information
sp = src->image.data;
sw = src->cache_entry.w;
swp = sw << (FP + FPI);
shp = src->cache_entry.h << (FP + FPI);
// limit u,v coords of points to be within the source image
for (i = 0; i < 4; i++)
{
if (p[i].u < 0) p[i].u = 0;
else if (p[i].u > (src->cache_entry.w << FP))
p[i].u = src->cache_entry.w << FP;
if (p[i].v < 0) p[i].v = 0;
else if (p[i].v > (src->cache_entry.h << FP))
p[i].v = src->cache_entry.h << FP;
}
// allocate some spans to hold out span list
spans = alloca((yend - ystart + 1) * sizeof(Line));
if (!spans) return;
memset(spans, 0, (yend - ystart + 1) * sizeof(Line));
// calculate the spans list
_calc_spans(p, spans, ystart, yend, cx, cy, cw, ch);
// walk through spans and render
// if operation is solid, bypass buf and draw func and draw direct to dst
direct = 0;
if ((!src->cache_entry.flags.alpha) && (!dst->cache_entry.flags.alpha) &&
(!dc->mul.use))
direct = 1;
else
{
buf = alloca(cw * sizeof(DATA32));
if (!buf) return;
if (dc->mul.use)
func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
else
func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
}
if (smooth)
{
for (y = ystart; y <= yend; y++)
{
int x, w, ww, dx, dy, sx, sy;
FPc u, v, ud, vd, up, vp;
DATA32 *d, *dptr, *s, *so[4], val1, val2;
int x, w, ww;
FPc u, v, ud, vd, dv;
DATA32 *d, *s, *so[4], val1, val2;
yp = y - ystart;
for (i = 0; i < 2; i++)
{
if (spans[yp].span[i].x1 >= 0)
{
long long tl;
x = spans[yp].span[i].x1;
w = (spans[yp].span[i].x2 - x);
if (w <= 0) continue;
dv = (spans[yp].span[i].o2 - spans[yp].span[i].o1);
ww = w;
d = dst->image.data + (y * dst->cache_entry.w) + x;
u = spans[yp].span[i].u[0] << FPI;
v = spans[yp].span[i].v[0] << FPI;
ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
tl = (long long)ud * (w << FP);
tl = tl / dv;
ud = tl;
u -= (ud * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
tl = (long long)vd * (w << FP);
tl = tl / dv;
vd = tl;
v -= (vd * (spans[yp].span[i].o1 - (spans[yp].span[i].x1 << FP))) / FP1;
if (ud < 0) u -= 1;
if (vd < 0) v -= 1;
if (direct)
d = dst->image.data + (y * dst->cache_entry.w) + x;
else
d = buf;
while (ww > 0)
{
s = sp + ((v >> (FP + FPI)) * sw) +
(u >> (FP + FPI));
*d++ = *s;
FPc u1, v1, u2, v2;
FPc rv, ru;
DATA32 vala, valb, val1, val2;
u1 = u;
if (u1 < 0) u1 = 0;
else if (u1 >= swp) u1 = swp - 1;
v1 = v;
if (v1 < 0) v1 = 0;
else if (v1 >= shp) v1 = shp - 1;
u2 = u1 + FPFPI1;
if (u2 >= swp) u2 = swp - 1;
v2 = v1 + FPFPI1;
if (v2 >= shp) v2 = shp - 1;
ru = (u >> (FP + FPI - 8)) & 0xff;
rv = (v >> (FP + FPI - 8)) & 0xff;
s = sp + ((v1 >> (FP + FPI)) * sw) +
(u1 >> (FP + FPI));
val1 = *s;
s = sp + ((v1 >> (FP + FPI)) * sw) +
(u2 >> (FP + FPI));
val2 = *s;
vala = INTERP_256(ru, val2, val1);
s = sp + ((v2 >> (FP + FPI)) * sw) +
(u1 >> (FP + FPI));
val1 = *s;
s = sp + ((v2 >> (FP + FPI)) * sw) +
(u2 >> (FP + FPI));
val2 = *s;
valb = INTERP_256(ru, val2, val1);
*d++ = INTERP_256(rv, valb, vala);
u += ud;
v += vd;
ww--;
}
if (!direct)
{
d = dst->image.data;
d += (y * dst->cache_entry.w) + x;
func(buf, NULL, dc->mul.col, d, w);
}
}
else break;
}
@ -456,18 +563,11 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
}
else
{
buf = alloca(cw * sizeof(DATA32));
if (dc->mul.use)
func = evas_common_gfx_func_composite_pixel_color_span_get(src, dc->mul.col, dst, cw, dc->render_op);
else
func = evas_common_gfx_func_composite_pixel_span_get(src, dst, cw, dc->render_op);
for (y = ystart; y <= yend; y++)
{
int x, w, ww, dx, dy, sx, sy;
FPc u, v, ud, vd, up, vp;
DATA32 *d, *dptr, *s, *so[4], val1, val2;
int x, w, ww;
FPc u, v, ud, vd;
DATA32 *d, *s;
yp = y - ystart;
for (i = 0; i < 2; i++)
@ -479,13 +579,18 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
if (w <= 0) continue;
ww = w;
d = buf;
u = spans[yp].span[i].u[0] << FPI;
v = spans[yp].span[i].v[0] << FPI;
ud = ((spans[yp].span[i].u[1] << FPI) - u) / w;
vd = ((spans[yp].span[i].v[1] << FPI) - v) / w;
if (ud < 0) u -= 1;
if (vd < 0) v -= 1;
if (direct)
d = dst->image.data + (y * dst->cache_entry.w) + x;
else
d = buf;
while (ww > 0)
{
s = sp + ((v >> (FP + FPI)) * sw) +
@ -495,9 +600,13 @@ evas_common_map4_rgba_internal(RGBA_Image *src, RGBA_Image *dst,
v += vd;
ww--;
}
dptr = dst->image.data;
dptr += (y * dst->cache_entry.w) + x;
func(buf, NULL, dc->mul.col, dptr, w);
if (!direct)
{
d = dst->image.data;
d += (y * dst->cache_entry.w) + x;
func(buf, NULL, dc->mul.col, d, w);
}
}
else break;
}