From 1f4e1b0a9ea07d181dc6584a97d26f7c51b008db Mon Sep 17 00:00:00 2001 From: Carsten Haitzler Date: Thu, 20 Apr 2000 02:37:29 +0000 Subject: [PATCH] mmx scaling back in - but forcibly disabled. new C scalign for scalign down.. works now.. :) SVN revision: 2509 --- src/blend.c | 10 +- src/rend.c | 8 + src/scale.c | 1549 +++++++++++++++++++++++++++++++++++++++++---------- src/scale.h | 4 + 4 files changed, 1263 insertions(+), 308 deletions(-) diff --git a/src/blend.c b/src/blend.c index e944b66..044e6de 100644 --- a/src/blend.c +++ b/src/blend.c @@ -1109,9 +1109,9 @@ __imlib_BlendImageToImage(ImlibImage *im_src, ImlibImage *im_dst, /* setup h */ h = dh; /* set our scaling up in x / y dir flags */ - if (dw >= sw) + if (dw > sw) xup = 1; - if (dh >= sh) + if (dh > sh) yup = 1; if (!IMAGE_HAS_ALPHA(im_dst)) merge_alpha = 0; @@ -1132,6 +1132,11 @@ __imlib_BlendImageToImage(ImlibImage *im_src, ImlibImage *im_dst, /* scale the imagedata for this LINESIZE lines chunk of image */ if (aa) { +#ifdef DO_MMX_ASM /*\ TODO: runtime mmx check \*/ + __imlib_Scale_mmx_AARGBA(ypoints, xpoints, buf, xapoints, + yapoints, xup, yup, dxx, dyy + y, + 0, 0, dw, hh, dw, im_src->w); +#else if (IMAGE_HAS_ALPHA(im_src)) __imlib_ScaleAARGBA(ypoints, xpoints, buf, xapoints, yapoints, xup, yup, dxx, dyy + y, @@ -1140,6 +1145,7 @@ __imlib_BlendImageToImage(ImlibImage *im_src, ImlibImage *im_dst, __imlib_ScaleAARGB(ypoints, xpoints, buf, xapoints, yapoints, xup, yup, dxx, dyy + y, 0, 0, dw, hh, dw, im_src->w); +#endif } else __imlib_ScaleSampleRGBA(ypoints, xpoints, buf, dxx, dyy + y, diff --git a/src/rend.c b/src/rend.c index 1589a80..e217e4e 100644 --- a/src/rend.c +++ b/src/rend.c @@ -216,6 +216,13 @@ __imlib_RenderImage(Display *d, ImlibImage *im, /* scale the imagedata for this LINESIZE lines chunk of image data */ if (antialias) { +#ifdef DO_MMX_ASM /*\ TODO: runtime mmx check \*/ + __imlib_Scale_mmx_AARGBA(ypoints, xpoints, buf, xapoints, + yapoints, xup, yup, + ((sx * dw) / sw), + ((sy * dh) / sh) + y, + 0, 0, dw, hh, dw, im->w); +#else if (IMAGE_HAS_ALPHA(im)) __imlib_ScaleAARGBA(ypoints, xpoints, buf, xapoints, yapoints, xup, yup, @@ -226,6 +233,7 @@ __imlib_RenderImage(Display *d, ImlibImage *im, yapoints, xup, yup, ((sx * dw) / sw), ((sy * dh) / sh) + y, 0, 0, dw, hh, dw, im->w); +#endif } else __imlib_ScaleSampleRGBA(ypoints, xpoints, buf, diff --git a/src/scale.c b/src/scale.c index 88831ce..e9a2bca 100644 --- a/src/scale.c +++ b/src/scale.c @@ -11,63 +11,12 @@ #define INV_YAP (255 - yapoints[dyy + y]) #define YAP (yapoints[dyy + y]) -#if defined(DO_MMX_ASM) && defined(__GNUC__) -/*\ MMX asm versions. TODO: insn order for pairing on PMMX \*/ -#define INTERP_ARGB_XY(dest, src, sow, x, y) __asm__ (\ - "pxor %%mm6, %%mm6\n\t" \ - "movd %3, %%mm0\n\t" \ - "movd %4, %%mm1\n\t" \ - "punpcklwd %%mm0, %%mm0\n\t" \ - "punpcklwd %%mm1, %%mm1\n\t" \ - "punpckldq %%mm0, %%mm0\n\t" \ - "punpckldq %%mm1, %%mm1\n\t" \ - "movq (%1), %%mm2\n\t" \ - "movq (%1, %2, 4), %%mm4\n\t" \ - "movq %%mm2, %%mm3\n\t" \ - "movq %%mm4, %%mm5\n\t" \ - "punpcklbw %%mm6, %%mm2\n\t" \ - "punpcklbw %%mm6, %%mm4\n\t" \ - "punpckhbw %%mm6, %%mm3\n\t" \ - "punpckhbw %%mm6, %%mm5\n\t" \ - "psubw %%mm2, %%mm3\n\t" \ - "psubw %%mm4, %%mm5\n\t" \ - "psllw %5, %%mm3\n\t" \ - "psllw %5, %%mm5\n\t" \ - "pmulhw %%mm0, %%mm3\n\t" \ - "pmulhw %%mm0, %%mm5\n\t" \ - "paddw %%mm2, %%mm3\n\t" \ - "paddw %%mm4, %%mm5\n\t" \ - "psubw %%mm3, %%mm5\n\t" \ - "psllw %5, %%mm5\n\t" \ - "pmulhw %%mm1, %%mm5\n\t" \ - "paddw %%mm3, %%mm5\n\t" \ - "packuswb %%mm5, %%mm5\n\t" \ - "movd %%mm5, (%0)" \ - : /*\ No outputs \*/ \ - : "r" ((dest)), "r" ((src)), "r" ((sow)), \ - "g" ((x) << 4), "g" ((y) << 4), "I" (16 - 12)) - -#define INTERP_ARGB_Y(dest, src, sow, y) __asm__ (\ - "pxor %%mm6, %%mm6\n\t" \ - "movd %3, %%mm0\n\t" \ - "punpcklwd %%mm0, %%mm0\n\t" \ - "punpckldq %%mm0, %%mm0\n\t" \ - "movd (%1), %%mm2\n\t" \ - "movd (%1, %2, 4), %%mm4\n\t" \ - "punpcklbw %%mm6, %%mm2\n\t" \ - "punpcklbw %%mm6, %%mm4\n\t" \ - "psubw %%mm2, %%mm4\n\t" \ - "psllw %4, %%mm4\n\t" \ - "pmulhw %%mm0, %%mm4\n\t" \ - "paddw %%mm2, %%mm4\n\t" \ - "packuswb %%mm4, %%mm4\n\t" \ - "movd %%mm4, (%0)" \ - : /*\ No outputs \*/ \ - : "r" ((dest)), "r" ((src)), "r" ((sow)), \ - "g" ((y) << 4), "I" (16 - 12)) - -#define EMMS() __asm__ ("emms" : : ) +/* forcibly diable asm for scaling - comment out to enable */ +/* +#ifdef DO_MMX_ASM +#undef DO_MMX_ASM #endif +*/ DATA32 ** __imlib_CalcYPoints(DATA32 *src, int sw, int sh, int dh, int b1, int b2) @@ -75,8 +24,8 @@ __imlib_CalcYPoints(DATA32 *src, int sw, int sh, int dh, int b1, int b2) DATA32 **p; int i, j = 0; int val, inc; - - p = malloc(dh * sizeof(DATA32 *)); + + p = malloc((dh + 1) * sizeof(DATA32 *)); if (dh < (b1 + b2)) { if (dh < b1) @@ -106,7 +55,7 @@ __imlib_CalcYPoints(DATA32 *src, int sw, int sh, int dh, int b1, int b2) } val = (sh - b2) << 16; inc = 1 << 16; - for (i = 0; i < b2; i++) + for (i = 0; i <= b2; i++) { p[j++] = src + ((val >> 16) * sw); val += inc; @@ -119,8 +68,8 @@ __imlib_CalcXPoints(int sw, int dw, int b1, int b2) { int *p, i, j = 0; int val, inc; - - p = malloc(dw * sizeof(int)); + + p = malloc((dw + 1) * sizeof(int)); if (dw < (b1 + b2)) { if (dw < b1) @@ -150,7 +99,7 @@ __imlib_CalcXPoints(int sw, int dw, int b1, int b2) } val = (sw - b2) << 16; inc = 1 << 16; - for (i = 0; i < b2; i++) + for (i = 0; i <= b2; i++) { p[j++] = (val >> 16); val += inc; @@ -203,32 +152,30 @@ __imlib_CalcApoints(int s, int d, int b1, int b2) /* scaling down */ else { + int val, inc; + for (i = 0; i < b1; i++) - p[j++] = 1; + p[j++] = (1 << (16 + 14)) + (1 << 14); if (d > (b1 + b2)) { - int ss, dd; + int ss, dd, ap, Cp; ss = s - b1 - b2; dd = d - b1 - b2; + val = 0; + inc = (ss << 16) / dd; + Cp = ((dd << 14) / ss) + 1; for (i = 0; i < dd; i++) { - v = (((i + 1) * ss) / dd) - ((i * ss) / dd); - if (v != 1) - { - if (((((i + 1) * ss) / dd) + b1) >= s) - v = s - (((i * ss) / dd) + b1) - 1; - p[j++] = v; - } - else - p[j++] = v; - if (p[j - 1] < 1) - p[j - 1] = 1; + ap = ((0x100 - ((val >> 8) & 0xff)) * Cp) >> 8; + p[j] = ap | (Cp << 16); + j++; + val += inc; } } for (i = 0; i < b2; i++) - p[j++] = 1; - } + p[j++] = (1 << (16 + 14)) + (1 << 14); + } return p; } @@ -263,7 +210,7 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, int *xapoints, int *yapoints, char xup, char yup, int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow) { - DATA32 *sptr, *ssptr, *dptr; + DATA32 *sptr, *dptr; int x, y, i, j, end; end = dxx + dw; @@ -286,13 +233,7 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, if (XAP > 0) { -#ifdef INTERP_ARGB_XY - INTERP_ARGB_XY(dptr, ypoints[dyy + y] + xpoints[x], - sow, XAP, YAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -317,17 +258,10 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, b = ((bb * YAP) + (b * INV_YAP)) >> 16; a = ((aa * YAP) + (a * INV_YAP)) >> 16; *dptr++ = RGBA_COMPOSE(r, g, b, a); -#endif } else { -#ifdef INTERP_ARGB_Y - INTERP_ARGB_Y(dptr, ypoints[dyy + y] + xpoints[x], - sow, YAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_YAP; g = G_VAL(pix) * INV_YAP; b = B_VAL(pix) * INV_YAP; @@ -342,7 +276,6 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, b >>= 8; a >>= 8; *dptr++ = RGBA_COMPOSE(r, g, b, a); -#endif } } } @@ -356,13 +289,7 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, if (XAP > 0) { -#ifdef INTERP_ARGB_Y - INTERP_ARGB_Y(dptr, ypoints[dyy + y] + xpoints[x], - 1, XAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -377,7 +304,6 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, b >>= 8; a >>= 8; *dptr++ = RGBA_COMPOSE(r, g, b, a); -#endif } else *dptr++ = sptr[xpoints[x] ]; @@ -387,70 +313,147 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, } /* if we're scaling down vertically */ else if ((xup) && (!yup)) +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification \*/ + int Cy, j; + DATA32 *pix; + int r, g, b, a, rr, gg, bb, aa; + int yap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + pix = ypoints[dyy + y] + xpoints[x]; + r = (R_VAL(pix) * yap) >> 10; + g = (G_VAL(pix) * yap) >> 10; + b = (B_VAL(pix) * yap) >> 10; + a = (A_VAL(pix) * yap) >> 10; + pix += sow; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + r += (R_VAL(pix) * Cy) >> 10; + g += (G_VAL(pix) * Cy) >> 10; + b += (B_VAL(pix) * Cy) >> 10; + a += (A_VAL(pix) * Cy) >> 10; + pix += sow; + } + if (j > 0) + { + r += (R_VAL(pix) * j) >> 10; + g += (G_VAL(pix) * j) >> 10; + b += (B_VAL(pix) * j) >> 10; + a += (A_VAL(pix) * j) >> 10; + } + if (XAP > 0) + { + pix = ypoints[dyy + y] + xpoints[x] + 1; + rr = (R_VAL(pix) * yap) >> 10; + gg = (G_VAL(pix) * yap) >> 10; + bb = (B_VAL(pix) * yap) >> 10; + aa = (A_VAL(pix) * yap) >> 10; + pix += sow; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + rr += (R_VAL(pix) * Cy) >> 10; + gg += (G_VAL(pix) * Cy) >> 10; + bb += (B_VAL(pix) * Cy) >> 10; + aa += (A_VAL(pix) * Cy) >> 10; + pix += sow; + } + if (j > 0) + { + rr += (R_VAL(pix) * j) >> 10; + gg += (G_VAL(pix) * j) >> 10; + bb += (B_VAL(pix) * j) >> 10; + aa += (A_VAL(pix) * j) >> 10; + } + r = r * INV_XAP; + g = g * INV_XAP; + b = b * INV_XAP; + a = a * INV_XAP; + r = (r + ((rr * XAP))) >> 12; + g = (g + ((gg * XAP))) >> 12; + b = (b + ((bb * XAP))) >> 12; + a = (a + ((aa * XAP))) >> 12; + } + else + { + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + } + *dptr = RGBA_COMPOSE(r, g, b, a); + dptr++; + } + } + } +#else { /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) { + int yap; /* calculate the source line we'll scan from */ dptr = dest + dx + ((y + dy) * dow); sptr = ypoints[dyy + y]; - if (YAP > 1) + + yap = (ypoints[dyy + y + 1] - ypoints[dyy + y]) / sow; + if (yap > 1) { for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0, a = 0; int rr = 0, gg = 0, bb = 0, aa = 0; - int count; DATA32 *pix; if (XAP > 0) { - for (j = 0; j < YAP; j++) + pix = sptr + xpoints[x]; + for (j = 0; j < yap; j++) { - ssptr = ypoints[dyy + y] + (j * sow); - pix = &ssptr[xpoints[x]]; r += R_VAL(pix); g += G_VAL(pix); b += B_VAL(pix); a += A_VAL(pix); + rr += R_VAL(pix + 1); + gg += G_VAL(pix + 1); + bb += B_VAL(pix + 1); + aa += A_VAL(pix + 1); + pix += sow; } - count = j; - r = r * INV_XAP / count; - g = g * INV_XAP / count; - b = b * INV_XAP / count; - a = a * INV_XAP / count; - for (j = 0; j < YAP; j++) - { - ssptr = ypoints[dyy + y] + (j * sow); - pix = &ssptr[xpoints[x] + 1]; - rr += R_VAL(pix); - gg += G_VAL(pix); - bb += B_VAL(pix); - aa += A_VAL(pix); - } - count = j; - r = (r + ((rr * XAP) / count)) >> 8; - g = (g + ((gg * XAP) / count)) >> 8; - b = (b + ((bb * XAP) / count)) >> 8; - a = (a + ((aa * XAP) / count)) >> 8; + r = r * INV_XAP / yap; + g = g * INV_XAP / yap; + b = b * INV_XAP / yap; + a = a * INV_XAP / yap; + r = (r + ((rr * XAP) / yap)) >> 8; + g = (g + ((gg * XAP) / yap)) >> 8; + b = (b + ((bb * XAP) / yap)) >> 8; + a = (a + ((aa * XAP) / yap)) >> 8; *dptr++ = RGBA_COMPOSE(r, g, b, a); } else { - for (j = 0; j < YAP; j++) + pix = sptr + xpoints[x]; + for (j = 0; j < yap; j++) { - ssptr = ypoints[dyy + y] + (j *sow); - pix = &ssptr[xpoints[x]]; r += R_VAL(pix); g += G_VAL(pix); b += B_VAL(pix); a += A_VAL(pix); + pix += sow; } - count = j; - r /= count; - g /= count; - b /= count; - a /= count; + r /= yap; + g /= yap; + b /= yap; + a /= yap; *dptr++ = RGBA_COMPOSE(r, g, b, a); } } @@ -460,14 +463,12 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0, a = 0; - int rr = 0, gg = 0, bb = 0, aa = 0; int count; DATA32 *pix; if (XAP > 0) { - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -489,8 +490,92 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } +#endif /* if we're scaling down horizontally */ else if ((!xup) && (yup)) +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification \*/ + int Cx, j; + DATA32 *pix; + int r, g, b, a, rr, gg, bb, aa; + int xap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + pix = ypoints[dyy + y] + xpoints[x]; + r = (R_VAL(pix) * xap) >> 10; + g = (G_VAL(pix) * xap) >> 10; + b = (B_VAL(pix) * xap) >> 10; + a = (A_VAL(pix) * xap) >> 10; + pix++; + for (j = (1 << 14) - xap; j > Cx; j -= Cx) + { + r += (R_VAL(pix) * Cx) >> 10; + g += (G_VAL(pix) * Cx) >> 10; + b += (B_VAL(pix) * Cx) >> 10; + a += (A_VAL(pix) * Cx) >> 10; + pix++; + } + if (j > 0) + { + r += (R_VAL(pix) * j) >> 10; + g += (G_VAL(pix) * j) >> 10; + b += (B_VAL(pix) * j) >> 10; + a += (A_VAL(pix) * j) >> 10; + } + if (YAP > 0) + { + pix = ypoints[dyy + y] + xpoints[x] + sow; + rr = (R_VAL(pix) * xap) >> 10; + gg = (G_VAL(pix) * xap) >> 10; + bb = (B_VAL(pix) * xap) >> 10; + aa = (A_VAL(pix) * xap) >> 10; + pix++; + for (j = (1 << 14) - xap; j > Cx; j -= Cx) + { + rr += (R_VAL(pix) * Cx) >> 10; + gg += (G_VAL(pix) * Cx) >> 10; + bb += (B_VAL(pix) * Cx) >> 10; + aa += (A_VAL(pix) * Cx) >> 10; + pix++; + } + if (j > 0) + { + rr += (R_VAL(pix) * j) >> 10; + gg += (G_VAL(pix) * j) >> 10; + bb += (B_VAL(pix) * j) >> 10; + aa += (A_VAL(pix) * j) >> 10; + } + r = r * INV_YAP; + g = g * INV_YAP; + b = b * INV_YAP; + a = a * INV_YAP; + r = (r + ((rr * YAP))) >> 12; + g = (g + ((gg * YAP))) >> 12; + b = (b + ((bb * YAP))) >> 12; + a = (a + ((aa * YAP))) >> 12; + } + else + { + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + } + *dptr = RGBA_COMPOSE(r, g, b, a); + dptr++; + } + } + } +#else { /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) @@ -504,45 +589,41 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, { int r = 0, g = 0, b = 0, a = 0; int rr = 0, gg = 0, bb = 0, aa = 0; - int count; + int xap; DATA32 *pix; - if (XAP > 1) + xap = xpoints[x + 1] - xpoints[x]; + if (xap > 1) { - ssptr = ypoints[dyy + y]; - for (i = 0; i < XAP; i++) + pix = ypoints[dyy + y] + xpoints[x]; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); - a += A_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); + a += A_VAL(pix + i); } - count = i; - r = r * INV_YAP / count; - g = g * INV_YAP / count; - b = b * INV_YAP / count; - a = a * INV_YAP / count; - ssptr = ypoints[dyy + y] + sow; - for (i = 0; i < XAP; i++) + r = r * INV_YAP / xap; + g = g * INV_YAP / xap; + b = b * INV_YAP / xap; + a = a * INV_YAP / xap; + pix = ypoints[dyy + y] + xpoints[x] + sow; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - rr += R_VAL(pix); - gg += G_VAL(pix); - bb += B_VAL(pix); - aa += A_VAL(pix); + rr += R_VAL(pix + i); + gg += G_VAL(pix + i); + bb += B_VAL(pix + i); + aa += A_VAL(pix + i); } - count = i; - r = (r + ((rr * YAP) / count)) >> 8; - g = (g + ((gg * YAP) / count)) >> 8; - b = (b + ((bb * YAP) / count)) >> 8; - a = (a + ((aa * YAP) / count)) >> 8; + r = (r + ((rr * YAP) / xap)) >> 8; + g = (g + ((gg * YAP) / xap)) >> 8; + b = (b + ((bb * YAP) / xap)) >> 8; + a = (a + ((aa * YAP) / xap)) >> 8; *dptr++ = RGBA_COMPOSE(r, g, b, a); } else { - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_YAP; g = G_VAL(pix) * INV_YAP; b = B_VAL(pix) * INV_YAP; @@ -565,26 +646,24 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0, a = 0; - int rr = 0, gg = 0, bb = 0, aa = 0; - int count; + int xap; DATA32 *pix; - if (XAP > 1) + xap = xpoints[x + 1] - xpoints[x]; + if (xap > 1) { - ssptr = ypoints[dyy + y]; - for (i = 0; i < XAP; i++) + pix = ypoints[dyy + y] + xpoints[x]; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); - a += A_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); + a += A_VAL(pix + i); } - count = i; - r /= count; - g /= count; - b /= count; - a /= count; + r /= xap; + g /= xap; + b /= xap; + a /= xap; *dptr++ = RGBA_COMPOSE(r, g, b, a); } else @@ -593,40 +672,162 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } +#endif /* if we're scaling down horizontally & vertically */ else +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification: + |*| The operation 'b = (b * c) >> 16' translates to pmulhw, + |*| so the operation 'b = (b * c) >> d' would translate to + |*| psllw (16 - d), %mmb; pmulh %mmc, %mmb + \*/ + int Cx, Cy, i, j; + DATA32 *pix; + int a, r, g, b, ax, rx, gx, bx; + int xap, yap; + + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + sptr = ypoints[dyy + y] + xpoints[x]; + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + ax = (A_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + ax += (A_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + ax += (A_VAL(pix) * i) >> 9; + } + + r = (rx * yap) >> 14; + g = (gx * yap) >> 14; + b = (bx * yap) >> 14; + a = (ax * yap) >> 14; + + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + ax = (A_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + ax += (A_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + ax += (A_VAL(pix) * i) >> 9; + } + + r += (rx * Cy) >> 14; + g += (gx * Cy) >> 14; + b += (bx * Cy) >> 14; + a += (ax * Cy) >> 14; + } + if (j > 0) + { + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + ax = (A_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + ax += (A_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + ax += (A_VAL(pix) * i) >> 9; + } + + r += (rx * j) >> 14; + g += (gx * j) >> 14; + b += (bx * j) >> 14; + a += (ax * j) >> 14; + } + + R_VAL(dptr) = r >> 5; + G_VAL(dptr) = g >> 5; + B_VAL(dptr) = b >> 5; + A_VAL(dptr) = a >> 5; + dptr++; + } + } + } +#else { int count; DATA32 *pix; - int r, g, b, a; - int xp, xap, yap; + int a, r, g, b; /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) { + int yap = (ypoints[dyy + y + 1] - ypoints[dyy + y]) / sow; /* calculate the source line we'll scan from */ dptr = dest + dx + ((y + dy) * dow); sptr = ypoints[dyy + y]; for (x = dxx; x < end; x++) { - if ((XAP > 1) || (YAP > 1)) + int xap = xpoints[x + 1] - xpoints[x]; + if ((xap > 1) || (yap > 1)) { - r = 0; g = 0; b = 0; a = 0; count = 0; - xp = xpoints[x]; - ssptr = ypoints[dyy + y]; - for (j = 0; j < YAP; j++) + r = 0; g = 0; b = 0; + pix = ypoints[dyy + y] + xpoints[x]; + for (j = yap; --j >= 0; ) { - for (i = 0; i < XAP; i++) + for (i = xap; --i >= 0; ) { - pix = &ssptr[xp + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); - a += A_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); + a += A_VAL(pix + i); } - count += i; - ssptr += sow; + pix += sow; } + count = xap * yap; R_VAL(dptr) = r / count; G_VAL(dptr) = g / count; B_VAL(dptr) = b / count; @@ -638,8 +839,6 @@ __imlib_ScaleAARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } -#ifdef EMMS - EMMS(); #endif } @@ -649,7 +848,7 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, int *xapoints, int *yapoints, char xup, char yup, int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow) { - DATA32 *sptr, *ssptr, *dptr; + DATA32 *sptr, *dptr; int x, y, i, j, end; end = dxx + dw; @@ -672,13 +871,7 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, if (XAP > 0) { -#ifdef INTERP_ARGB_XY - INTERP_ARGB_XY(dptr, ypoints[dyy + y] + xpoints[x], - sow, XAP, YAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -698,17 +891,10 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, g = ((gg * YAP) + (g * INV_YAP)) >> 16; b = ((bb * YAP) + (b * INV_YAP)) >> 16; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); -#endif } else { -#ifdef INTERP_ARGB_Y - INTERP_ARGB_Y(dptr, ypoints[dyy + y] + xpoints[x], - sow, YAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_YAP; g = G_VAL(pix) * INV_YAP; b = B_VAL(pix) * INV_YAP; @@ -720,7 +906,6 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, g >>= 8; b >>= 8; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); -#endif } } } @@ -734,13 +919,7 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, if (XAP > 0) { -#ifdef INTERP_ARGB_Y - INTERP_ARGB_Y(dptr, ypoints[dyy + y] + xpoints[x], - 1, XAP); - dptr++; -#else - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -752,7 +931,6 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, g >>= 8; b >>= 8; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); -#endif } else *dptr++ = sptr[xpoints[x] ]; @@ -762,64 +940,132 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, } /* if we're scaling down vertically */ else if ((xup) && (!yup)) +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification \*/ + int Cy, j; + DATA32 *pix; + int r, g, b, rr, gg, bb; + int yap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + pix = ypoints[dyy + y] + xpoints[x]; + r = (R_VAL(pix) * yap) >> 10; + g = (G_VAL(pix) * yap) >> 10; + b = (B_VAL(pix) * yap) >> 10; + pix += sow; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + r += (R_VAL(pix) * Cy) >> 10; + g += (G_VAL(pix) * Cy) >> 10; + b += (B_VAL(pix) * Cy) >> 10; + pix += sow; + } + if (j > 0) + { + r += (R_VAL(pix) * j) >> 10; + g += (G_VAL(pix) * j) >> 10; + b += (B_VAL(pix) * j) >> 10; + } + if (XAP > 0) + { + pix = ypoints[dyy + y] + xpoints[x] + 1; + rr = (R_VAL(pix) * yap) >> 10; + gg = (G_VAL(pix) * yap) >> 10; + bb = (B_VAL(pix) * yap) >> 10; + pix += sow; + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + rr += (R_VAL(pix) * Cy) >> 10; + gg += (G_VAL(pix) * Cy) >> 10; + bb += (B_VAL(pix) * Cy) >> 10; + pix += sow; + } + if (j > 0) + { + rr += (R_VAL(pix) * j) >> 10; + gg += (G_VAL(pix) * j) >> 10; + bb += (B_VAL(pix) * j) >> 10; + } + r = r * INV_XAP; + g = g * INV_XAP; + b = b * INV_XAP; + r = (r + ((rr * XAP))) >> 12; + g = (g + ((gg * XAP))) >> 12; + b = (b + ((bb * XAP))) >> 12; + } + else + { + r >>= 4; + g >>= 4; + b >>= 4; + } + *dptr = RGBA_COMPOSE(r, g, b, 0xff); + dptr++; + } + } + } +#else { /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) { + int yap; /* calculate the source line we'll scan from */ dptr = dest + dx + ((y + dy) * dow); sptr = ypoints[dyy + y]; - if (YAP > 1) + + yap = (ypoints[dyy + y + 1] - ypoints[dyy + y]) / sow; + if (yap > 1) { for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0; int rr = 0, gg = 0, bb = 0; - int count; DATA32 *pix; if (XAP > 0) { - for (j = 0; j < YAP; j++) + pix = sptr + xpoints[x]; + for (j = 0; j < yap; j++) { - ssptr = ypoints[dyy + y] + (j * sow); - pix = &ssptr[xpoints[x]]; r += R_VAL(pix); g += G_VAL(pix); b += B_VAL(pix); + rr += R_VAL(pix + 1); + gg += G_VAL(pix + 1); + bb += B_VAL(pix + 1); + pix += sow; } - count = j; - r = r * INV_XAP / count; - g = g * INV_XAP / count; - b = b * INV_XAP / count; - for (j = 0; j < YAP; j++) - { - ssptr = ypoints[dyy + y] + (j * sow); - pix = &ssptr[xpoints[x] + 1]; - rr += R_VAL(pix); - gg += G_VAL(pix); - bb += B_VAL(pix); - } - count = j; - r = (r + ((rr * XAP) / count)) >> 8; - g = (g + ((gg * XAP) / count)) >> 8; - b = (b + ((bb * XAP) / count)) >> 8; + r = r * INV_XAP / yap; + g = g * INV_XAP / yap; + b = b * INV_XAP / yap; + r = (r + ((rr * XAP) / yap)) >> 8; + g = (g + ((gg * XAP) / yap)) >> 8; + b = (b + ((bb * XAP) / yap)) >> 8; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); } else { - for (j = 0; j < YAP; j++) + pix = sptr + xpoints[x]; + for (j = 0; j < yap; j++) { - ssptr = ypoints[dyy + y] + (j *sow); - pix = &ssptr[xpoints[x]]; r += R_VAL(pix); g += G_VAL(pix); b += B_VAL(pix); + pix += sow; } - count = j; - r /= count; - g /= count; - b /= count; + r /= yap; + g /= yap; + b /= yap; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); } } @@ -829,14 +1075,11 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0; - int rr = 0, gg = 0, bb = 0; - int count; DATA32 *pix; if (XAP > 0) { - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_XAP; g = G_VAL(pix) * INV_XAP; b = B_VAL(pix) * INV_XAP; @@ -855,8 +1098,83 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } +#endif /* if we're scaling down horizontally */ else if ((!xup) && (yup)) +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification \*/ + int Cx, j; + DATA32 *pix; + int r, g, b, rr, gg, bb; + int xap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + pix = ypoints[dyy + y] + xpoints[x]; + r = (R_VAL(pix) * xap) >> 10; + g = (G_VAL(pix) * xap) >> 10; + b = (B_VAL(pix) * xap) >> 10; + pix++; + for (j = (1 << 14) - xap; j > Cx; j -= Cx) + { + r += (R_VAL(pix) * Cx) >> 10; + g += (G_VAL(pix) * Cx) >> 10; + b += (B_VAL(pix) * Cx) >> 10; + pix++; + } + if (j > 0) + { + r += (R_VAL(pix) * j) >> 10; + g += (G_VAL(pix) * j) >> 10; + b += (B_VAL(pix) * j) >> 10; + } + if (YAP > 0) + { + pix = ypoints[dyy + y] + xpoints[x] + sow; + rr = (R_VAL(pix) * xap) >> 10; + gg = (G_VAL(pix) * xap) >> 10; + bb = (B_VAL(pix) * xap) >> 10; + pix++; + for (j = (1 << 14) - xap; j > Cx; j -= Cx) + { + rr += (R_VAL(pix) * Cx) >> 10; + gg += (G_VAL(pix) * Cx) >> 10; + bb += (B_VAL(pix) * Cx) >> 10; + pix++; + } + if (j > 0) + { + rr += (R_VAL(pix) * j) >> 10; + gg += (G_VAL(pix) * j) >> 10; + bb += (B_VAL(pix) * j) >> 10; + } + r = r * INV_YAP; + g = g * INV_YAP; + b = b * INV_YAP; + r = (r + ((rr * YAP))) >> 12; + g = (g + ((gg * YAP))) >> 12; + b = (b + ((bb * YAP))) >> 12; + } + else + { + r >>= 4; + g >>= 4; + b >>= 4; + } + *dptr = RGBA_COMPOSE(r, g, b, 0xff); + dptr++; + } + } + } +#else { /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) @@ -870,41 +1188,37 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, { int r = 0, g = 0, b = 0; int rr = 0, gg = 0, bb = 0; - int count; + int xap; DATA32 *pix; - if (XAP > 1) + xap = xpoints[x + 1] - xpoints[x]; + if (xap > 1) { - ssptr = ypoints[dyy + y]; - for (i = 0; i < XAP; i++) + pix = ypoints[dyy + y] + xpoints[x]; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); } - count = i; - r = r * INV_YAP / count; - g = g * INV_YAP / count; - b = b * INV_YAP / count; - ssptr = ypoints[dyy + y] + sow; - for (i = 0; i < XAP; i++) + r = r * INV_YAP / xap; + g = g * INV_YAP / xap; + b = b * INV_YAP / xap; + pix = ypoints[dyy + y] + xpoints[x] + sow; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - rr += R_VAL(pix); - gg += G_VAL(pix); - bb += B_VAL(pix); + rr += R_VAL(pix + i); + gg += G_VAL(pix + i); + bb += B_VAL(pix + i); } - count = i; - r = (r + ((rr * YAP) / count)) >> 8; - g = (g + ((gg * YAP) / count)) >> 8; - b = (b + ((bb * YAP) / count)) >> 8; + r = (r + ((rr * YAP) / xap)) >> 8; + g = (g + ((gg * YAP) / xap)) >> 8; + b = (b + ((bb * YAP) / xap)) >> 8; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); } else { - ssptr = ypoints[dyy + y]; - pix = &ssptr[xpoints[x]]; + pix = ypoints[dyy + y] + xpoints[x]; r = R_VAL(pix) * INV_YAP; g = G_VAL(pix) * INV_YAP; b = B_VAL(pix) * INV_YAP; @@ -924,24 +1238,22 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, for (x = dxx; x < end; x++) { int r = 0, g = 0, b = 0; - int rr = 0, gg = 0, bb = 0; - int count; + int xap; DATA32 *pix; - if (XAP > 1) + xap = xpoints[x + 1] - xpoints[x]; + if (xap > 1) { - ssptr = ypoints[dyy + y]; - for (i = 0; i < XAP; i++) + pix = ypoints[dyy + y] + xpoints[x]; + for (i = 0; i < xap; i++) { - pix = &ssptr[xpoints[x] + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); } - count = i; - r /= count; - g /= count; - b /= count; + r /= xap; + g /= xap; + b /= xap; *dptr++ = RGBA_COMPOSE(r, g, b, 0xff); } else @@ -950,40 +1262,145 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } +#endif /* fully optimized (i think) - onyl change of algorithm can help */ /* if we're scaling down horizontally & vertically */ else +#ifndef OLD_SCALE_DOWN + { + /*\ 'Correct' version, with math units prepared for MMXification \*/ + int Cx, Cy, i, j; + DATA32 *pix; + int r, g, b, rx, gx, bx; + int xap, yap; + + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + sptr = ypoints[dyy + y] + xpoints[x]; + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + } + + r = (rx * yap) >> 14; + g = (gx * yap) >> 14; + b = (bx * yap) >> 14; + + for (j = (1 << 14) - yap; j > Cy; j -= Cy) + { + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + } + + r += (rx * Cy) >> 14; + g += (gx * Cy) >> 14; + b += (bx * Cy) >> 14; + } + if (j > 0) + { + pix = sptr; + sptr += sow; + rx = (R_VAL(pix) * xap) >> 9; + gx = (G_VAL(pix) * xap) >> 9; + bx = (B_VAL(pix) * xap) >> 9; + pix++; + for (i = (1 << 14) - xap; i > Cx; i -= Cx) + { + rx += (R_VAL(pix) * Cx) >> 9; + gx += (G_VAL(pix) * Cx) >> 9; + bx += (B_VAL(pix) * Cx) >> 9; + pix++; + } + if (i > 0) + { + rx += (R_VAL(pix) * i) >> 9; + gx += (G_VAL(pix) * i) >> 9; + bx += (B_VAL(pix) * i) >> 9; + } + + r += (rx * j) >> 14; + g += (gx * j) >> 14; + b += (bx * j) >> 14; + } + + R_VAL(dptr) = r >> 5; + G_VAL(dptr) = g >> 5; + B_VAL(dptr) = b >> 5; + dptr++; + } + } + } +#else { int count; DATA32 *pix; int r, g, b; - int xp, xap, yap; /* go through every scanline in the output buffer */ for (y = 0; y < dh; y++) { + int yap = (ypoints[dyy + y + 1] - ypoints[dyy + y]) / sow; /* calculate the source line we'll scan from */ dptr = dest + dx + ((y + dy) * dow); sptr = ypoints[dyy + y]; for (x = dxx; x < end; x++) { - if ((XAP > 1) || (YAP > 1)) + int xap = xpoints[x + 1] - xpoints[x]; + if ((xap > 1) || (yap > 1)) { - r = 0; g = 0; b = 0; count = 0; - xp = xpoints[x]; - ssptr = ypoints[dyy + y]; - for (j = 0; j < YAP; j++) + r = 0; g = 0; b = 0; + pix = sptr + xpoints[x]; + for (j = yap; --j >= 0; ) { - for (i = 0; i < XAP; i++) + for (i = xap; --i >= 0; ) { - pix = &ssptr[xp + i]; - r += R_VAL(pix); - g += G_VAL(pix); - b += B_VAL(pix); + r += R_VAL(pix + i); + g += G_VAL(pix + i); + b += B_VAL(pix + i); } - count += i; - ssptr += sow; + pix += sow; } + count = xap * yap; R_VAL(dptr) = r / count; G_VAL(dptr) = g / count; B_VAL(dptr) = b / count; @@ -994,8 +1411,528 @@ __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, } } } -#ifdef EMMS - EMMS(); #endif } +#if defined(DO_MMX_ASM) && defined(__GNUC__) +void +__imlib_Scale_mmx_AARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, + int *xapoints, int *yapoints, char xup, char yup, + int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow) +{ + DATA32 *dptr; + int x, y, end; + + end = dxx + dw; + /* scaling up both ways */ + if ((xup) && (yup)) + { + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + /* calculate the source line we'll scan from */ + dptr = dest + dx + ((y + dy) * dow); + if (YAP > 0) + { + for (x = dxx; x < end; x++) + { + if (XAP > 0) + { + __asm__ ( + "pxor %%mm6, %%mm6\n\t" + "movd %3, %%mm0\n\t" + "movd %4, %%mm1\n\t" + "punpcklwd %%mm0, %%mm0\n\t" + "punpcklwd %%mm1, %%mm1\n\t" + "punpckldq %%mm0, %%mm0\n\t" + "punpckldq %%mm1, %%mm1\n\t" + "movq (%1), %%mm2\n\t" + "movq (%1, %2, 4), %%mm4\n\t" + "movq %%mm2, %%mm3\n\t" + "movq %%mm4, %%mm5\n\t" + "punpcklbw %%mm6, %%mm2\n\t" + "punpcklbw %%mm6, %%mm4\n\t" + "punpckhbw %%mm6, %%mm3\n\t" + "punpckhbw %%mm6, %%mm5\n\t" + "psubw %%mm2, %%mm3\n\t" + "psubw %%mm4, %%mm5\n\t" + "psllw $4, %%mm3\n\t" + "psllw $4, %%mm5\n\t" + "pmulhw %%mm0, %%mm3\n\t" + "pmulhw %%mm0, %%mm5\n\t" + "paddw %%mm2, %%mm3\n\t" + "paddw %%mm4, %%mm5\n\t" + "psubw %%mm3, %%mm5\n\t" + "psllw $4, %%mm5\n\t" + "pmulhw %%mm1, %%mm5\n\t" + "paddw %%mm3, %%mm5\n\t" + "packuswb %%mm5, %%mm5\n\t" + "movd %%mm5, (%0)" + : /*\ No outputs \*/ + : "r" (dptr), "r" (ypoints[dyy + y] + xpoints[x]), + "r" (sow), "g" ((XAP) << 4), + "g" ((YAP) << 4)); + } + else + { + __asm__ ( + "pxor %%mm6, %%mm6\n\t" + "movd %3, %%mm0\n\t" + "punpcklwd %%mm0, %%mm0\n\t" + "punpckldq %%mm0, %%mm0\n\t" + "movd (%1), %%mm2\n\t" + "movd (%1, %2, 4), %%mm4\n\t" + "punpcklbw %%mm6, %%mm2\n\t" + "punpcklbw %%mm6, %%mm4\n\t" + "psubw %%mm2, %%mm4\n\t" + "psllw $4, %%mm4\n\t" + "pmulhw %%mm0, %%mm4\n\t" + "paddw %%mm2, %%mm4\n\t" + "packuswb %%mm4, %%mm4\n\t" + "movd %%mm4, (%0)" + : /*\ No outputs \*/ + : "r" (dptr), "r" (ypoints[dyy + y] + xpoints[x]), + "r" (sow), "g" ((YAP) << 4)); + } + dptr++; + } + } + else + { + for (x = dxx; x < end; x++) + { + if (XAP > 0) + { + __asm__ ( + "pxor %%mm6, %%mm6\n\t" + "movd %2, %%mm0\n\t" + "punpcklwd %%mm0, %%mm0\n\t" + "punpckldq %%mm0, %%mm0\n\t" + "movq (%1), %%mm2\n\t" + "movq %%mm2, %%mm4\n\t" + "punpcklbw %%mm6, %%mm2\n\t" + "punpckhbw %%mm6, %%mm4\n\t" + "psubw %%mm2, %%mm4\n\t" + "psllw $4, %%mm4\n\t" + "pmulhw %%mm0, %%mm4\n\t" + "paddw %%mm2, %%mm4\n\t" + "packuswb %%mm4, %%mm4\n\t" + "movd %%mm4, (%0)" + : /*\ No outputs \*/ + : "r" (dptr), "r" (ypoints[dyy + y] + xpoints[x]), + "g" ((XAP) << 4)); + } + else + *dptr = *(ypoints[dyy + y] + xpoints[x]); + dptr++; + } + } + } + } + /* if we're scaling down vertically */ + else if ((xup) && (!yup)) + { + int Cy, j; + DATA32 *pix; + int r, g, b, a, rr, gg, bb, aa; + int yap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + __asm__ ( + "movd %4, %%mm4\n\t" /*\ Cy \*/ + "punpcklwd %%mm4, %%mm4\n\t" + "punpckldq %%mm4, %%mm4\n\t" + "movd %5, %%mm5\n\t" /*\ yap \*/ + "punpcklwd %%mm5, %%mm5\n\t" + "punpckldq %%mm5, %%mm5\n\t" + "pxor %%mm7, %%mm7\n\t" + "\n\t" + "movl %0, %%eax\n\t" /*\ p \*/ + "movd (%%eax), %%mm0\n\t" /*\ v = (*p * yap) >> 10 \*/ + "addl %1, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm0\n\t" + "psllw $6, %%mm0\n\t" + "pmulhw %%mm5, %%mm0\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - yap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm1\n\t" /*\ v += (*p * Cy) >> 10 \*/ + "addl %1, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm4, %%mm1\n\t" + "paddw %%mm1, %%mm0\n\t" + "\n\t" + "subl %4, %%ecx\n" /*\ i -= Cy; while i > Cy \*/ + "2:\n\t" + "cmpl %4, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd %%ecx, %%mm6\n\t" /*\ i \*/ + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "\n\t" + "movd (%%eax), %%mm1\n\t" /*\ v += (*p * i) >> 10 \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm0\n" + "5:\n\t" + "movl %3, %%eax\n\t" + "sall $5, %%eax\n\t" + "jz 6f\n\t" + "movd %%eax, %%mm3\n\t" /*\ XAP << 5 \*/ + "punpcklwd %%mm3, %%mm3\n\t" + "punpckldq %%mm3, %%mm3\n\t" + "\n\t" + "movl %0, %%eax\n\t" /*\ p + 1 \*/ + "addl $4, %%eax\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vv = (*p * yap) >> 10 \*/ + "addl %1, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $6, %%mm2\n\t" + "pmulhw %%mm5, %%mm2\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - yap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vv += (*p * Cy) >> 10 \*/ + "addl %1, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm4, %%mm1\n\t" + "paddw %%mm1, %%mm2\n\t" + "\n\t" + "subl %4, %%ecx\n" /*\ i -= Cy; while i > Cy \*/ + "2:\n\t" + "cmpl %4, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vv += (*p * i) >> 10 \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm2\n" + "5:\n\t" + "psubw %%mm0, %%mm2\n\t" /*\ v += (vv - v) * XAP \*/ + "psllw $3, %%mm2\n\t" + "pmulhw %%mm3, %%mm2\n\t" + "paddw %%mm2, %%mm0\n" + "6:\n\t" + "psrlw $4, %%mm0\n\t" + "packuswb %%mm0, %%mm0\n\t" + "movl %2, %%eax\n\t" + "movd %%mm0, (%%eax)" + : /*\ No outputs \*/ + : "r" (ypoints[dyy + y] + xpoints[x]), "g" (sow * 4), + "g" (dptr), "g" (XAP), "g" (Cy), "g" (yap) + : "ax", "cx"); + dptr++; + } + } + } + /* if we're scaling down horizontally */ + else if ((!xup) && (yup)) + { + int Cx, j; + DATA32 *pix; + int r, g, b, a, rr, gg, bb, aa; + int xap; + + /* go through every scanline in the output buffer */ + for (y = 0; y < dh; y++) + { + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + __asm__ ( + "movd %4, %%mm4\n\t" /*\ Cx \*/ + "punpcklwd %%mm4, %%mm4\n\t" + "punpckldq %%mm4, %%mm4\n\t" + "movd %5, %%mm5\n\t" /*\ xap \*/ + "punpcklwd %%mm5, %%mm5\n\t" + "punpckldq %%mm5, %%mm5\n\t" + "pxor %%mm7, %%mm7\n\t" + "\n\t" + "movl %0, %%eax\n\t" /*\ p \*/ + "movd (%%eax), %%mm0\n\t" /*\ v = (*p * xap) >> 10 \*/ + "addl $4, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm0\n\t" + "psllw $6, %%mm0\n\t" + "pmulhw %%mm5, %%mm0\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - xap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm1\n\t" /*\ v += (*p * Cx) >> 10 \*/ + "addl $4, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm4, %%mm1\n\t" + "paddw %%mm1, %%mm0\n\t" + "\n\t" + "subl %4, %%ecx\n" /*\ i -= Cx; while i > Cx \*/ + "2:\n\t" + "cmpl %4, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd %%ecx, %%mm6\n\t" /*\ i \*/ + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "\n\t" + "movd (%%eax), %%mm1\n\t" /*\ v += (*p * i) >> 10 \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm0\n" + "5:\n\t" + "movl %3, %%eax\n\t" + "sall $5, %%eax\n\t" + "jz 6f\n\t" + "movd %%eax, %%mm3\n\t" /*\ YAP << 5 \*/ + "punpcklwd %%mm3, %%mm3\n\t" + "punpckldq %%mm3, %%mm3\n\t" + "\n\t" + "movl %0, %%eax\n\t" /*\ p + 1 \*/ + "addl %1, %%eax\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vv = (*p * xap) >> 10 \*/ + "addl $4, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $6, %%mm2\n\t" + "pmulhw %%mm5, %%mm2\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - xap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vv += (*p * Cx) >> 10 \*/ + "addl $4, %%eax\n\t" /*\ p += sow \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm4, %%mm1\n\t" + "paddw %%mm1, %%mm2\n\t" + "\n\t" + "subl %4, %%ecx\n" /*\ i -= Cx; while i > Cx \*/ + "2:\n\t" + "cmpl %4, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vv += (*p * i) >> 10 \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $6, %%mm1\n\t" + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm2\n" + "5:\n\t" + "psubw %%mm0, %%mm2\n\t" /*\ v += (vv - v) * XAP \*/ + "psllw $3, %%mm2\n\t" + "pmulhw %%mm3, %%mm2\n\t" + "paddw %%mm2, %%mm0\n" + "6:\n\t" + "psrlw $4, %%mm0\n\t" + "packuswb %%mm0, %%mm0\n\t" + "movl %2, %%eax\n\t" + "movd %%mm0, (%%eax)" + : /*\ No outputs \*/ + : "r" (ypoints[dyy + y] + xpoints[x]), "g" (sow * 4), + "g" (dptr), "g" (YAP), "g" (Cx), "g" (xap) + : "ax", "cx"); + dptr++; + } + } + } + /* if we're scaling down horizontally & vertically */ + else + { + int Cx, Cy, xap, yap; + + for (y = 0; y < dh; y++) + { + Cy = YAP >> 16; + yap = YAP & 0xffff; + + dptr = dest + dx + ((y + dy) * dow); + for (x = dxx; x < end; x++) + { + Cx = XAP >> 16; + xap = XAP & 0xffff; + + __asm__ ( + "movd %3, %%mm3\n\t" /*\ Cx \*/ + "punpcklwd %%mm3, %%mm3\n\t" + "punpckldq %%mm3, %%mm3\n\t" + "movd %4, %%mm4\n\t" /*\ Cy \*/ + "punpcklwd %%mm4, %%mm4\n\t" + "punpckldq %%mm4, %%mm4\n\t" + "movd %5, %%mm5\n\t" /*\ xap \*/ + "punpcklwd %%mm5, %%mm5\n\t" + "punpckldq %%mm5, %%mm5\n\t" + "pxor %%mm7, %%mm7\n\t" + "\n\t" + "movl %0, %%esi\n\t" /*\ sptr \*/ + "movl %%esi, %%eax\n\t" /*\ p = sptr \*/ + "addl %1, %%esi\n\t" /*\ sptr += sow \*/ + "movd (%%eax), %%mm0\n\t" /*\ vx = (*p++ * xap) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "psllw $7, %%mm0\n\t" + "pmulhw %%mm5, %%mm0\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - xap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vx += (*p++ * Cx) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $7, %%mm1\n\t" + "pmulhw %%mm3, %%mm1\n\t" + "paddw %%mm1, %%mm0\n\t" + "\n\t" + "subl %3, %%ecx\n" /*\ i -= Cx; while i > Cx \*/ + "2:\n\t" + "cmpl %3, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd %%ecx, %%mm6\n\t" /*\ i \*/ + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "\n\t" + "movd (%%eax), %%mm1\n\t" /*\ vx += (*p * i) >> 9 \*/ + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $7, %%mm1\n\t" + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm0\n" + "5:\n\t" + "movd %6, %%mm2\n\t" /*\ yap \*/ + "punpcklwd %%mm2, %%mm2\n\t" + "punpckldq %%mm2, %%mm2\n\t" + "psllw $2, %%mm0\n\t" /*\ v = (vx * yap) >> 14 \*/ + "pmulhw %%mm2, %%mm0\n\t" + "\n\t" + "movl $0x4000, %%edx\n\t" /*\ j = 0x4000 - yap \*/ + "subl %6, %%edx\n\t" + "jbe 6f\n\t" /*\ j <= 0: Skip it \*/ + "jmp 4f\n" + "3:\n\t" + "movl %%esi, %%eax\n\t" /*\ p = sptr \*/ + "addl %1, %%esi\n\t" /*\ sptr += sow \*/ + "movd (%%eax), %%mm1\n\t" /*\ vx = (*p++ * xap) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $7, %%mm1\n\t" + "pmulhw %%mm5, %%mm1\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - xap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vx += (*p++ * Cx) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $7, %%mm2\n\t" + "pmulhw %%mm3, %%mm2\n\t" + "paddw %%mm2, %%mm1\n\t" + "\n\t" + "subl %3, %%ecx\n" /*\ i -= Cx; while i > Cx \*/ + "2:\n\t" + "cmpl %3, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vx += (*p * i) >> 9 \*/ + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $7, %%mm2\n\t" + "pmulhw %%mm6, %%mm2\n\t" + "paddw %%mm2, %%mm1\n" + "5:\n\t" + "psllw $2, %%mm1\n\t" /*\ v += (vx * Cy) >> 14 \*/ + "pmulhw %%mm4, %%mm1\n\t" + "paddw %%mm1, %%mm0\n\t" + "\n\t" + "subl %4, %%edx\n" /*\ j -= Cy; while j > Cy \*/ + "4:\n\t" + "cmpl %4, %%edx\n\t" + "jg 3b\n\t" + "\n\t" + "movl %%esi, %%eax\n\t" /*\ p = sptr \*/ + "movd (%%eax), %%mm1\n\t" /*\ vx = (*p++ * xap) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "psllw $7, %%mm1\n\t" + "pmulhw %%mm5, %%mm1\n\t" + "\n\t" + "movl $0x4000, %%ecx\n\t" /*\ i = 0x4000 - xap \*/ + "subl %5, %%ecx\n\t" + "jbe 5f\n\t" /*\ i <= 0: Skip it \*/ + "jmp 2f\n" + "1:\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vx += (*p++ * Cx) >> 9 \*/ + "addl $4, %%eax\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $7, %%mm2\n\t" + "pmulhw %%mm3, %%mm2\n\t" + "paddw %%mm2, %%mm1\n\t" + "\n\t" + "subl %3, %%ecx\n" /*\ i -= Cx; while i > Cx \*/ + "2:\n\t" + "cmpl %3, %%ecx\n\t" + "jg 1b\n\t" + "\n\t" + "movd (%%eax), %%mm2\n\t" /*\ vx += (*p * i) >> 9 \*/ + "punpcklbw %%mm7, %%mm2\n\t" + "psllw $7, %%mm2\n\t" + "pmulhw %%mm6, %%mm2\n\t" + "paddw %%mm2, %%mm1\n" + "5:\n\t" + "movd %%edx, %%mm6\n\t" /*\ j \*/ + "punpcklwd %%mm6, %%mm6\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "\n\t" + "psllw $2, %%mm1\n\t" /*\ v += (vx * j) >> 14 \*/ + "pmulhw %%mm6, %%mm1\n\t" + "paddw %%mm1, %%mm0\n" + "6:\n\t" + "psrlw $5, %%mm0\n\t" /*\ *dest = v >> 5 \*/ + "packuswb %%mm0, %%mm0\n\t" + "movl %2, %%eax\n\t" + "movd %%mm0, (%%eax)\n\t" + : /*\ No outputs \*/ + : "g" (ypoints[dyy + y] + xpoints[x]), "g" (sow * 4), + "g" (dptr), "g" (Cx), "g" (Cy), "g" (xap), "g" (yap) + : "si", "ax", "cx", "dx"); + dptr++; + } + } + } + __asm__ ("emms" : : ); +} +#else +void +__imlib_Scale_mmx_AARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, + int *xapoints, int *yapoints, char xup, char yup, + int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow) +{ + __imlib_ScaleAARGBA(ypoints, xpoints, dest, xapoints, yapoints, xup, yup, + dxx, dyy, dx, dy, dw, dh, dow, sow); +} +#endif diff --git a/src/scale.h b/src/scale.h index 5d52d5b..d6fd1e5 100644 --- a/src/scale.h +++ b/src/scale.h @@ -18,4 +18,8 @@ void __imlib_ScaleAARGB(DATA32 **ypoints, int *xpoints, DATA32 *dest, int *xapoints, int *yapoints, char xup, char yup, int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow); +void +__imlib_Scale_mmx_AARGBA(DATA32 **ypoints, int *xpoints, DATA32 *dest, + int *xapoints, int *yapoints, char xup, char yup, + int dxx, int dyy, int dx, int dy, int dw, int dh, int dow, int sow); #endif