summaryrefslogtreecommitdiff
path: root/src/lib/evas/common
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2016-08-19 11:04:50 +0900
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2016-08-19 11:08:55 +0900
commita739d4d7da5ff7053ab63c72eed632c246fe17b0 (patch)
treeb7ed99f89fce1f5b7ea3087571b282c827038f7d /src/lib/evas/common
parente56811ed4db61ac2ac14d28a7a8fac83f41c43b8 (diff)
evas software downscaler - get about 1.8x to 3x speedups for cases
this speeds up downscaling of images by somewhere between 1.8 to 3x dpeending on case and cpu etc. - this is ONLY for downscaling of an image buffer betweeb 50% width and/or height up to 100% of width and height. it's a special case optimization that cuts down the complexity of the full super sampling filter to just do a bilinear interpolation which is actually strictly correct for this size range and shouldn't drop quality. it uses fixed point (16.16) to do the sup pixel sampling. no mmx/asse or neon, but we could actually easily use it as we do use mmx/ee and neon in the bilinear upscaler to do interpolation so this would work here too. it just requires time and effort to make yet 2x more special cases and use the ASM to do the hard slog here. @optimize
Diffstat (limited to 'src/lib/evas/common')
-rw-r--r--src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c1036
1 files changed, 686 insertions, 350 deletions
diff --git a/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c b/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c
index 84fed3faf3..dfe0e5dc31 100644
--- a/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c
+++ b/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c
@@ -1,16 +1,34 @@
1#define BILINEAR_HALF_TO_FULL_SCALE 1
2
1{ 3{
2 int Cx, Cy, i, j; 4 int Cx, Cy, i, j;
3 DATA32 *dptr, *sptr, *pix, *pbuf; 5 DATA32 *dptr, *sptr, *pix, *pbuf;
4 DATA8 *mask; 6 DATA8 *mask;
5 int a, r, g, b, rx, gx, bx, ax; 7 int a, r, g, b, rx, gx, bx, ax;
6 int xap, yap, pos; 8 int xap, yap, pos;
7 int y; 9 int y = 0;
8 //int dyy, dxx; 10#ifdef BILINEAR_HALF_TO_FULL_SCALE
11 Eina_Bool bilinear_downscale = EINA_FALSE;
12#endif
9 13
10 DATA32 **yp; 14 DATA32 **yp;
11 int *xp; 15 int *xp;
12 int w = dst_clip_w; 16 int w = dst_clip_w;
13 17
18#ifdef BILINEAR_HALF_TO_FULL_SCALE
19 if (// image is not too big so that cululative error on steps might be
20 // noticable
21 (dst_region_w <= 4096) &&
22 (dst_region_h <= 4096) &&
23 (src_region_w <= 4096) &&
24 (src_region_h <= 4096) &&
25 // if image scale is between 50% size and up to 100% of size of
26 // original, then do a special case bilinear interplation scale
27 (dst_region_w >= (src_region_w / 2)) &&
28 (dst_region_h >= (src_region_h / 2)))
29 bilinear_downscale = EINA_TRUE;
30#endif
31
14 dptr = dst_ptr; 32 dptr = dst_ptr;
15 pos = (src_region_y * src_w) + src_region_x; 33 pos = (src_region_y * src_w) + src_region_x;
16 //dyy = dst_clip_y - dst_region_y; 34 //dyy = dst_clip_y - dst_region_y;
@@ -26,133 +44,247 @@
26#if 1 44#if 1
27 if (src->cache_entry.flags.alpha) 45 if (src->cache_entry.flags.alpha)
28 { 46 {
29 y = 0; 47#ifdef BILINEAR_HALF_TO_FULL_SCALE
30 while (dst_clip_h--) 48 if (bilinear_downscale)
31 { 49 {
32 Cy = *yapp >> 16; 50 DATA32 *lptr, *p1, *p2, *p3, *p4;
33 yap = *yapp & 0xffff; 51 unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
34 52 unsigned int xstep, ystep;
35 while (dst_clip_w--) 53 unsigned int a1, a2, r1, g1, b1, r2, g2, b2;
36 { 54
37 Cx = *xapp >> 16; 55 pix = src_data + (src_region_y * src_w) + src_region_x;
38 xap = *xapp & 0xffff; 56 xstep = (src_region_w << 16) / dst_region_w;
39 57 ystep = (src_region_h << 16) / dst_region_h;
40 sptr = *yp + *xp + pos; 58 ypos = (dst_clip_y - dst_region_y) * ystep;
41 pix = sptr; 59
42 sptr += src_w; 60 while (dst_clip_h--)
43 61 {
44 ax = (A_VAL(pix) * xap) >> 9; 62 xpos = (dst_clip_x - dst_region_x) * xstep;
45 rx = (R_VAL(pix) * xap) >> 9; 63 lptr = pix + ((ypos >> 16) * src_w);
46 gx = (G_VAL(pix) * xap) >> 9; 64
47 bx = (B_VAL(pix) * xap) >> 9; 65 if ((ypos >> 16) < ((unsigned int)src_h - 1))
48 pix++; 66 {
49 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 67 yfrac = ypos & 0xffff;
50 { 68 invyfrac = 0x10000 - yfrac;
51 ax += (A_VAL(pix) * Cx) >> 9; 69 while (dst_clip_w--)
52 rx += (R_VAL(pix) * Cx) >> 9; 70 {
53 gx += (G_VAL(pix) * Cx) >> 9; 71 p1 = lptr + (xpos >> 16);
54 bx += (B_VAL(pix) * Cx) >> 9; 72 p2 = p1 + 1;
55 pix++; 73 p3 = p1 + src_w;
56 } 74 p4 = p3 + 1;
57 if (i > 0) 75 xfrac = xpos & 0xffff;
58 { 76 invxfrac = 0x10000 - xfrac;
59 ax += (A_VAL(pix) * i) >> 9; 77
60 rx += (R_VAL(pix) * i) >> 9; 78 if (xfrac > 0)
61 gx += (G_VAL(pix) * i) >> 9; 79 {
62 bx += (B_VAL(pix) * i) >> 9; 80 a1 = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16;
63 } 81 a2 = ((invxfrac * A_VAL(p3)) + (xfrac * A_VAL(p4))) >> 16;
64 82 r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
65 a = (ax * yap) >> 14; 83 r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
66 r = (rx * yap) >> 14; 84 g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
67 g = (gx * yap) >> 14; 85 g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
68 b = (bx * yap) >> 14; 86 b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
69 87 b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
70 for (j = (1 << 14) - yap; j > Cy; j -= Cy) 88 }
71 { 89 else
72 pix = sptr; 90 {
73 sptr += src_w; 91 a1 = A_VAL(p1);
74 ax = (A_VAL(pix) * xap) >> 9; 92 a2 = A_VAL(p3);
75 rx = (R_VAL(pix) * xap) >> 9; 93 r1 = R_VAL(p1);
76 gx = (G_VAL(pix) * xap) >> 9; 94 r2 = R_VAL(p3);
77 bx = (B_VAL(pix) * xap) >> 9; 95 g1 = G_VAL(p1);
78 pix++; 96 g2 = G_VAL(p3);
79 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 97 b1 = B_VAL(p1);
80 { 98 b2 = B_VAL(p3);
81 ax += (A_VAL(pix) * Cx) >> 9; 99 }
82 rx += (R_VAL(pix) * Cx) >> 9; 100
83 gx += (G_VAL(pix) * Cx) >> 9; 101 a = ((invyfrac * a1) + (yfrac * a2)) >> 16;
84 bx += (B_VAL(pix) * Cx) >> 9; 102 r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
85 pix++; 103 g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
86 } 104 b = ((invyfrac * b1) + (yfrac * b2)) >> 16;
87 if (i > 0) 105
88 { 106 *pbuf++ = ARGB_JOIN(a, r, g, b);
89 ax += (A_VAL(pix) * i) >> 9; 107 xpos += xstep;
90 rx += (R_VAL(pix) * i) >> 9; 108 }
91 gx += (G_VAL(pix) * i) >> 9; 109 }
92 bx += (B_VAL(pix) * i) >> 9; 110 else
93 } 111 {
94 112 while (dst_clip_w--)
95 a += (ax * Cy) >> 14; 113 {
96 r += (rx * Cy) >> 14; 114 p1 = lptr + (xpos >> 16);
97 g += (gx * Cy) >> 14; 115 p2 = p1 + 1;
98 b += (bx * Cy) >> 14; 116 xfrac = xpos & 0xffff;
99 } 117 invxfrac = 0x10000 - xfrac;
100 if (j > 0) 118
101 { 119 if (xfrac > 0)
102 pix = sptr; 120 {
103 sptr += src_w; 121 a = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16;
104 ax = (A_VAL(pix) * xap) >> 9; 122 r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
105 rx = (R_VAL(pix) * xap) >> 9; 123 g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
106 gx = (G_VAL(pix) * xap) >> 9; 124 b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
107 bx = (B_VAL(pix) * xap) >> 9; 125 }
108 pix++; 126 else
109 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 127 {
110 { 128 a = A_VAL(p1);
111 ax += (A_VAL(pix) * Cx) >> 9; 129 r = R_VAL(p1);
112 rx += (R_VAL(pix) * Cx) >> 9; 130 g = G_VAL(p1);
113 gx += (G_VAL(pix) * Cx) >> 9; 131 b = B_VAL(p1);
114 bx += (B_VAL(pix) * Cx) >> 9; 132 }
115 pix++; 133
116 } 134 *pbuf++ = ARGB_JOIN(a, r, g, b);
117 if (i > 0) 135 xpos += xstep;
118 { 136 }
119 ax += (A_VAL(pix) * i) >> 9; 137 }
120 rx += (R_VAL(pix) * i) >> 9; 138 if (!mask_ie)
121 gx += (G_VAL(pix) * i) >> 9; 139 func(buf, NULL, mul_col, dptr, w);
122 bx += (B_VAL(pix) * i) >> 9; 140 else
123 } 141 {
124 142 mask = mask_ie->image.data8
125 a += (ax * j) >> 14; 143 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
126 r += (rx * j) >> 14; 144 + (dst_clip_x - mask_x);
127 g += (gx * j) >> 14; 145
128 b += (bx * j) >> 14; 146 if (mul_col != 0xffffffff)
129 } 147 func2(buf, NULL, mul_col, buf, w);
130 *pbuf++ = ARGB_JOIN(((a + (1 << 4)) >> 5), 148 func(buf, mask, 0, dptr, w);
131 ((r + (1 << 4)) >> 5), 149 }
132 ((g + (1 << 4)) >> 5), 150 y++;
133 ((b + (1 << 4)) >> 5)); 151
134 xp++; xapp++; 152 pbuf = buf;
135 } 153 ypos += ystep;
136 154 dptr += dst_w; dst_clip_w = w;
137 if (!mask_ie) 155 }
138 func(buf, NULL, mul_col, dptr, w); 156 }
139 else 157 else
140 { 158#endif
141 mask = mask_ie->image.data8 159 {
142 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) 160 while (dst_clip_h--)
143 + (dst_clip_x - mask_x); 161 {
144 162 Cy = *yapp >> 16;
145 if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, w); 163 yap = *yapp & 0xffff;
146 func(buf, mask, 0, dptr, w); 164
147 } 165 while (dst_clip_w--)
148 y++; 166 {
149 167 Cx = *xapp >> 16;
150 pbuf = buf; 168 xap = *xapp & 0xffff;
151 dptr += dst_w; dst_clip_w = w; 169
152 xp = xpoints;// + dxx; 170 sptr = *yp + *xp + pos;
153 xapp = xapoints;// + dxx; 171 pix = sptr;
154 yp++; yapp++; 172 sptr += src_w;
155 } 173
174 ax = (A_VAL(pix) * xap) >> 9;
175 rx = (R_VAL(pix) * xap) >> 9;
176 gx = (G_VAL(pix) * xap) >> 9;
177 bx = (B_VAL(pix) * xap) >> 9;
178 pix++;
179 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
180 {
181 ax += (A_VAL(pix) * Cx) >> 9;
182 rx += (R_VAL(pix) * Cx) >> 9;
183 gx += (G_VAL(pix) * Cx) >> 9;
184 bx += (B_VAL(pix) * Cx) >> 9;
185 pix++;
186 }
187 if (i > 0)
188 {
189 ax += (A_VAL(pix) * i) >> 9;
190 rx += (R_VAL(pix) * i) >> 9;
191 gx += (G_VAL(pix) * i) >> 9;
192 bx += (B_VAL(pix) * i) >> 9;
193 }
194
195 a = (ax * yap) >> 14;
196 r = (rx * yap) >> 14;
197 g = (gx * yap) >> 14;
198 b = (bx * yap) >> 14;
199
200 for (j = (1 << 14) - yap; j > Cy; j -= Cy)
201 {
202 pix = sptr;
203 sptr += src_w;
204 ax = (A_VAL(pix) * xap) >> 9;
205 rx = (R_VAL(pix) * xap) >> 9;
206 gx = (G_VAL(pix) * xap) >> 9;
207 bx = (B_VAL(pix) * xap) >> 9;
208 pix++;
209 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
210 {
211 ax += (A_VAL(pix) * Cx) >> 9;
212 rx += (R_VAL(pix) * Cx) >> 9;
213 gx += (G_VAL(pix) * Cx) >> 9;
214 bx += (B_VAL(pix) * Cx) >> 9;
215 pix++;
216 }
217 if (i > 0)
218 {
219 ax += (A_VAL(pix) * i) >> 9;
220 rx += (R_VAL(pix) * i) >> 9;
221 gx += (G_VAL(pix) * i) >> 9;
222 bx += (B_VAL(pix) * i) >> 9;
223 }
224
225 a += (ax * Cy) >> 14;
226 r += (rx * Cy) >> 14;
227 g += (gx * Cy) >> 14;
228 b += (bx * Cy) >> 14;
229 }
230 if (j > 0)
231 {
232 pix = sptr;
233 sptr += src_w;
234 ax = (A_VAL(pix) * xap) >> 9;
235 rx = (R_VAL(pix) * xap) >> 9;
236 gx = (G_VAL(pix) * xap) >> 9;
237 bx = (B_VAL(pix) * xap) >> 9;
238 pix++;
239 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
240 {
241 ax += (A_VAL(pix) * Cx) >> 9;
242 rx += (R_VAL(pix) * Cx) >> 9;
243 gx += (G_VAL(pix) * Cx) >> 9;
244 bx += (B_VAL(pix) * Cx) >> 9;
245 pix++;
246 }
247 if (i > 0)
248 {
249 ax += (A_VAL(pix) * i) >> 9;
250 rx += (R_VAL(pix) * i) >> 9;
251 gx += (G_VAL(pix) * i) >> 9;
252 bx += (B_VAL(pix) * i) >> 9;
253 }
254
255 a += (ax * j) >> 14;
256 r += (rx * j) >> 14;
257 g += (gx * j) >> 14;
258 b += (bx * j) >> 14;
259 }
260 *pbuf++ = ARGB_JOIN(((a + (1 << 4)) >> 5),
261 ((r + (1 << 4)) >> 5),
262 ((g + (1 << 4)) >> 5),
263 ((b + (1 << 4)) >> 5));
264 xp++; xapp++;
265 }
266
267 if (!mask_ie)
268 func(buf, NULL, mul_col, dptr, w);
269 else
270 {
271 mask = mask_ie->image.data8
272 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
273 + (dst_clip_x - mask_x);
274
275 if (mul_col != 0xffffffff)
276 func2(buf, NULL, mul_col, buf, w);
277 func(buf, mask, 0, dptr, w);
278 }
279 y++;
280
281 pbuf = buf;
282 dptr += dst_w; dst_clip_w = w;
283 xp = xpoints;// + dxx;
284 xapp = xapoints;// + dxx;
285 yp++; yapp++;
286 }
287 }
156 } 288 }
157 else 289 else
158 { 290 {
@@ -161,228 +293,432 @@
161 (!dst->cache_entry.flags.alpha) && 293 (!dst->cache_entry.flags.alpha) &&
162 (mul_col == 0xffffffff) && 294 (mul_col == 0xffffffff) &&
163 (!mask_ie)) 295 (!mask_ie))
164 { 296 {
165 while (dst_clip_h--) 297 // RGBA_Image *src, RGBA_Image *dst
166 { 298 // dst_clip_x, dst_clip_x\y, dst_clip_w, dst_clip_h
167 Cy = *yapp >> 16; 299 // src_region_x, src_region_y, src_region_w, src_region_h
168 yap = *yapp & 0xffff; 300 // dst_region_x, dst_region_y, dst_region_w, dst_region_h
169 301#ifdef BILINEAR_HALF_TO_FULL_SCALE
170 pbuf = dptr; 302 if (bilinear_downscale)
171 while (dst_clip_w--) 303 {
172 { 304 DATA32 *lptr, *p1, *p2, *p3, *p4;
173 Cx = *xapp >> 16; 305 unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
174 xap = *xapp & 0xffff; 306 unsigned int xstep, ystep;
175 307 unsigned int r1, g1, b1, r2, g2, b2;
176 sptr = *yp + *xp + pos; 308
177 pix = sptr; 309 pix = src_data + (src_region_y * src_w) + src_region_x;
178 sptr += src_w; 310 xstep = (src_region_w << 16) / dst_region_w;
179 311 ystep = (src_region_h << 16) / dst_region_h;
180 rx = (R_VAL(pix) * xap) >> 9; 312 ypos = (dst_clip_y - dst_region_y) * ystep;
181 gx = (G_VAL(pix) * xap) >> 9; 313
182 bx = (B_VAL(pix) * xap) >> 9; 314 while (dst_clip_h--)
183 pix++; 315 {
184 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 316 pbuf = dptr;
185 { 317 xpos = (dst_clip_x - dst_region_x) * xstep;
186 rx += (R_VAL(pix) * Cx) >> 9; 318 lptr = pix + ((ypos >> 16) * src_w);
187 gx += (G_VAL(pix) * Cx) >> 9; 319
188 bx += (B_VAL(pix) * Cx) >> 9; 320 if ((ypos >> 16) < ((unsigned int)src_h - 1))
189 pix++; 321 {
190 } 322 yfrac = ypos & 0xffff;
191 if (i > 0) 323 invyfrac = 0x10000 - yfrac;
192 { 324 while (dst_clip_w--)
193 rx += (R_VAL(pix) * i) >> 9; 325 {
194 gx += (G_VAL(pix) * i) >> 9; 326 p1 = lptr + (xpos >> 16);
195 bx += (B_VAL(pix) * i) >> 9; 327 p2 = p1 + 1;
196 } 328 p3 = p1 + src_w;
197 329 p4 = p3 + 1;
198 r = (rx * yap) >> 14; 330 xfrac = xpos & 0xffff;
199 g = (gx * yap) >> 14; 331 invxfrac = 0x10000 - xfrac;
200 b = (bx * yap) >> 14; 332
201 333 if (xfrac > 0)
202 for (j = (1 << 14) - yap; j > Cy; j -= Cy) 334 {
203 { 335 r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
204 pix = sptr; 336 r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
205 sptr += src_w; 337 g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
206 rx = (R_VAL(pix) * xap) >> 9; 338 g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
207 gx = (G_VAL(pix) * xap) >> 9; 339 b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
208 bx = (B_VAL(pix) * xap) >> 9; 340 b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
209 pix++; 341 }
210 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 342 else
211 { 343 {
212 rx += (R_VAL(pix) * Cx) >> 9; 344 r1 = R_VAL(p1);
213 gx += (G_VAL(pix) * Cx) >> 9; 345 r2 = R_VAL(p3);
214 bx += (B_VAL(pix) * Cx) >> 9; 346 g1 = G_VAL(p1);
215 pix++; 347 g2 = G_VAL(p3);
216 } 348 b1 = B_VAL(p1);
217 if (i > 0) 349 b2 = B_VAL(p3);
218 { 350 }
219 rx += (R_VAL(pix) * i) >> 9; 351
220 gx += (G_VAL(pix) * i) >> 9; 352 r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
221 bx += (B_VAL(pix) * i) >> 9; 353 g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
222 } 354 b = ((invyfrac * b1) + (yfrac * b2)) >> 16;
223 355
224 r += (rx * Cy) >> 14; 356 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
225 g += (gx * Cy) >> 14; 357 xpos += xstep;
226 b += (bx * Cy) >> 14; 358 }
227 } 359 }
228 if (j > 0) 360 else
229 { 361 {
230 pix = sptr; 362 while (dst_clip_w--)
231 sptr += src_w; 363 {
232 rx = (R_VAL(pix) * xap) >> 9; 364 p1 = lptr + (xpos >> 16);
233 gx = (G_VAL(pix) * xap) >> 9; 365 p2 = p1 + 1;
234 bx = (B_VAL(pix) * xap) >> 9; 366 xfrac = xpos & 0xffff;
235 pix++; 367 invxfrac = 0x10000 - xfrac;
236 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 368
237 { 369 if (xfrac > 0)
238 rx += (R_VAL(pix) * Cx) >> 9; 370 {
239 gx += (G_VAL(pix) * Cx) >> 9; 371 r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
240 bx += (B_VAL(pix) * Cx) >> 9; 372 g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
241 pix++; 373 b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
242 } 374 }
243 if (i > 0) 375 else
244 { 376 {
245 rx += (R_VAL(pix) * i) >> 9; 377 r = R_VAL(p1);
246 gx += (G_VAL(pix) * i) >> 9; 378 g = G_VAL(p1);
247 bx += (B_VAL(pix) * i) >> 9; 379 b = B_VAL(p1);
248 } 380 }
249 381
250 r += (rx * j) >> 14; 382 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
251 g += (gx * j) >> 14; 383 xpos += xstep;
252 b += (bx * j) >> 14; 384 }
253 } 385 }
254 *pbuf++ = ARGB_JOIN(0xff, 386 ypos += ystep;
255 ((r + (1 << 4)) >> 5), 387 dptr += dst_w; dst_clip_w = w;
256 ((g + (1 << 4)) >> 5), 388 }
257 ((b + (1 << 4)) >> 5)); 389 }
258 xp++; xapp++; 390 else
259 } 391#endif
260 392 {
261 dptr += dst_w; dst_clip_w = w; 393 while (dst_clip_h--)
262 xp = xpoints;// + dxx; 394 {
263 xapp = xapoints;// + dxx; 395 Cy = *yapp >> 16;
264 yp++; yapp++; 396 yap = *yapp & 0xffff;
265 } 397
266 } 398 pbuf = dptr;
267 else 399 while (dst_clip_w--)
400 {
401 Cx = *xapp >> 16;
402 xap = *xapp & 0xffff;
403
404 sptr = *yp + *xp + pos;
405 pix = sptr;
406 sptr += src_w;
407
408 rx = (R_VAL(pix) * xap) >> 9;
409 gx = (G_VAL(pix) * xap) >> 9;
410 bx = (B_VAL(pix) * xap) >> 9;
411 pix++;
412 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
413 {
414 rx += (R_VAL(pix) * Cx) >> 9;
415 gx += (G_VAL(pix) * Cx) >> 9;
416 bx += (B_VAL(pix) * Cx) >> 9;
417 pix++;
418 }
419 if (i > 0)
420 {
421 rx += (R_VAL(pix) * i) >> 9;
422 gx += (G_VAL(pix) * i) >> 9;
423 bx += (B_VAL(pix) * i) >> 9;
424 }
425
426 r = (rx * yap) >> 14;
427 g = (gx * yap) >> 14;
428 b = (bx * yap) >> 14;
429
430 for (j = (1 << 14) - yap; j > Cy; j -= Cy)
431 {
432 pix = sptr;
433 sptr += src_w;
434 rx = (R_VAL(pix) * xap) >> 9;
435 gx = (G_VAL(pix) * xap) >> 9;
436 bx = (B_VAL(pix) * xap) >> 9;
437 pix++;
438 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
439 {
440 rx += (R_VAL(pix) * Cx) >> 9;
441 gx += (G_VAL(pix) * Cx) >> 9;
442 bx += (B_VAL(pix) * Cx) >> 9;
443 pix++;
444 }
445 if (i > 0)
446 {
447 rx += (R_VAL(pix) * i) >> 9;
448 gx += (G_VAL(pix) * i) >> 9;
449 bx += (B_VAL(pix) * i) >> 9;
450 }
451
452 r += (rx * Cy) >> 14;
453 g += (gx * Cy) >> 14;
454 b += (bx * Cy) >> 14;
455 }
456 if (j > 0)
457 {
458 pix = sptr;
459 sptr += src_w;
460 rx = (R_VAL(pix) * xap) >> 9;
461 gx = (G_VAL(pix) * xap) >> 9;
462 bx = (B_VAL(pix) * xap) >> 9;
463 pix++;
464 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
465 {
466 rx += (R_VAL(pix) * Cx) >> 9;
467 gx += (G_VAL(pix) * Cx) >> 9;
468 bx += (B_VAL(pix) * Cx) >> 9;
469 pix++;
470 }
471 if (i > 0)
472 {
473 rx += (R_VAL(pix) * i) >> 9;
474 gx += (G_VAL(pix) * i) >> 9;
475 bx += (B_VAL(pix) * i) >> 9;
476 }
477
478 r += (rx * j) >> 14;
479 g += (gx * j) >> 14;
480 b += (bx * j) >> 14;
481 }
482 *pbuf++ = ARGB_JOIN(0xff,
483 ((r + (1 << 4)) >> 5),
484 ((g + (1 << 4)) >> 5),
485 ((b + (1 << 4)) >> 5));
486 xp++; xapp++;
487 }
488
489 dptr += dst_w; dst_clip_w = w;
490 xp = xpoints;// + dxx;
491 xapp = xapoints;// + dxx;
492 yp++; yapp++;
493 }
494 }
495 }
496 else
268#endif 497#endif
269 { 498 {
270 y = 0; 499#ifdef BILINEAR_HALF_TO_FULL_SCALE
271 while (dst_clip_h--) 500 if (bilinear_downscale)
272 { 501 {
273 Cy = *yapp >> 16; 502 DATA32 *lptr, *p1, *p2, *p3, *p4;
274 yap = *yapp & 0xffff; 503 unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
275 504 unsigned int xstep, ystep;
276 while (dst_clip_w--) 505 unsigned int r1, g1, b1, r2, g2, b2;
277 { 506
278 Cx = *xapp >> 16; 507 pix = src_data + (src_region_y * src_w) + src_region_x;
279 xap = *xapp & 0xffff; 508 xstep = (src_region_w << 16) / dst_region_w;
280 509 ystep = (src_region_h << 16) / dst_region_h;
281 sptr = *yp + *xp + pos; 510 ypos = (dst_clip_y - dst_region_y) * ystep;
282 pix = sptr; 511
283 sptr += src_w; 512 while (dst_clip_h--)
284 513 {
285 rx = (R_VAL(pix) * xap) >> 9; 514 xpos = (dst_clip_x - dst_region_x) * xstep;
286 gx = (G_VAL(pix) * xap) >> 9; 515 lptr = pix + ((ypos >> 16) * src_w);
287 bx = (B_VAL(pix) * xap) >> 9; 516
288 pix++; 517 if ((ypos >> 16) < ((unsigned int)src_h - 1))
289 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 518 {
290 { 519 yfrac = ypos & 0xffff;
291 rx += (R_VAL(pix) * Cx) >> 9; 520 invyfrac = 0x10000 - yfrac;
292 gx += (G_VAL(pix) * Cx) >> 9; 521 while (dst_clip_w--)
293 bx += (B_VAL(pix) * Cx) >> 9; 522 {
294 pix++; 523 p1 = lptr + (xpos >> 16);
295 } 524 p2 = p1 + 1;
296 if (i > 0) 525 p3 = p1 + src_w;
297 { 526 p4 = p3 + 1;
298 rx += (R_VAL(pix) * i) >> 9; 527 xfrac = xpos & 0xffff;
299 gx += (G_VAL(pix) * i) >> 9; 528 invxfrac = 0x10000 - xfrac;
300 bx += (B_VAL(pix) * i) >> 9; 529
301 } 530 if (xfrac > 0)
302 531 {
303 r = (rx * yap) >> 14; 532 r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
304 g = (gx * yap) >> 14; 533 r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
305 b = (bx * yap) >> 14; 534 g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
306 535 g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
307 for (j = (1 << 14) - yap; j > Cy; j -= Cy) 536 b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
308 { 537 b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
309 pix = sptr; 538 }
310 sptr += src_w; 539 else
311 rx = (R_VAL(pix) * xap) >> 9; 540 {
312 gx = (G_VAL(pix) * xap) >> 9; 541 r1 = R_VAL(p1);
313 bx = (B_VAL(pix) * xap) >> 9; 542 r2 = R_VAL(p3);
314 pix++; 543 g1 = G_VAL(p1);
315 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 544 g2 = G_VAL(p3);
316 { 545 b1 = B_VAL(p1);
317 rx += (R_VAL(pix) * Cx) >> 9; 546 b2 = B_VAL(p3);
318 gx += (G_VAL(pix) * Cx) >> 9; 547 }
319 bx += (B_VAL(pix) * Cx) >> 9; 548
320 pix++; 549 r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
321 } 550 g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
322 if (i > 0) 551 b = ((invyfrac * b1) + (yfrac * b2)) >> 16;
323 { 552
324 rx += (R_VAL(pix) * i) >> 9; 553 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
325 gx += (G_VAL(pix) * i) >> 9; 554 xpos += xstep;
326 bx += (B_VAL(pix) * i) >> 9; 555 }
327 } 556 }
328 557 else
329 r += (rx * Cy) >> 14; 558 {
330 g += (gx * Cy) >> 14; 559 while (dst_clip_w--)
331 b += (bx * Cy) >> 14; 560 {
332 } 561 p1 = lptr + (xpos >> 16);
333 if (j > 0) 562 p2 = p1 + 1;
334 { 563 xfrac = xpos & 0xffff;
335 pix = sptr; 564 invxfrac = 0x10000 - xfrac;
336 sptr += src_w; 565
337 rx = (R_VAL(pix) * xap) >> 9; 566 if (xfrac > 0)
338 gx = (G_VAL(pix) * xap) >> 9; 567 {
339 bx = (B_VAL(pix) * xap) >> 9; 568 r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
340 pix++; 569 g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
341 for (i = (1 << 14) - xap; i > Cx; i -= Cx) 570 b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
342 { 571 }
343 rx += (R_VAL(pix) * Cx) >> 9; 572 else
344 gx += (G_VAL(pix) * Cx) >> 9; 573 {
345 bx += (B_VAL(pix) * Cx) >> 9; 574 r = R_VAL(p1);
346 pix++; 575 g = G_VAL(p1);
347 } 576 b = B_VAL(p1);
348 if (i > 0) 577 }
349 { 578
350 rx += (R_VAL(pix) * i) >> 9; 579 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
351 gx += (G_VAL(pix) * i) >> 9; 580 xpos += xstep;
352 bx += (B_VAL(pix) * i) >> 9; 581 }
353 } 582 }
354 583 if (!mask_ie)
355 r += (rx * j) >> 14; 584 func(buf, NULL, mul_col, dptr, w);
356 g += (gx * j) >> 14; 585 else
357 b += (bx * j) >> 14; 586 {
358 } 587 mask = mask_ie->image.data8
359 *pbuf++ = ARGB_JOIN(0xff, 588 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
360 ((r + (1 << 4)) >> 5), 589 + (dst_clip_x - mask_x);
361 ((g + (1 << 4)) >> 5), 590
362 ((b + (1 << 4)) >> 5)); 591 if (mul_col != 0xffffffff)
363 xp++; xapp++; 592 func2(buf, NULL, mul_col, buf, w);
364 } 593 func(buf, mask, 0, dptr, w);
365 594 }
366 if (!mask_ie) 595 y++;
367 func(buf, NULL, mul_col, dptr, w); 596
368 else 597 pbuf = buf;
369 { 598 ypos += ystep;
370 mask = mask_ie->image.data8 599 dptr += dst_w; dst_clip_w = w;
371 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) 600 }
372 + (dst_clip_x - mask_x); 601 }
373 602 else
374 if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, w); 603#endif
375 func(buf, mask, 0, dptr, w); 604 {
376 } 605 while (dst_clip_h--)
377 y++; 606 {
378 607 Cy = *yapp >> 16;
379 pbuf = buf; 608 yap = *yapp & 0xffff;
380 dptr += dst_w; dst_clip_w = w; 609
381 xp = xpoints;// + dxx; 610 while (dst_clip_w--)
382 xapp = xapoints;// + dxx; 611 {
383 yp++; yapp++; 612 Cx = *xapp >> 16;
384 } 613 xap = *xapp & 0xffff;
385 } 614
615 sptr = *yp + *xp + pos;
616 pix = sptr;
617 sptr += src_w;
618
619 rx = (R_VAL(pix) * xap) >> 9;
620 gx = (G_VAL(pix) * xap) >> 9;
621 bx = (B_VAL(pix) * xap) >> 9;
622 pix++;
623 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
624 {
625 rx += (R_VAL(pix) * Cx) >> 9;
626 gx += (G_VAL(pix) * Cx) >> 9;
627 bx += (B_VAL(pix) * Cx) >> 9;
628 pix++;
629 }
630 if (i > 0)
631 {
632 rx += (R_VAL(pix) * i) >> 9;
633 gx += (G_VAL(pix) * i) >> 9;
634 bx += (B_VAL(pix) * i) >> 9;
635 }
636
637 r = (rx * yap) >> 14;
638 g = (gx * yap) >> 14;
639 b = (bx * yap) >> 14;
640
641 for (j = (1 << 14) - yap; j > Cy; j -= Cy)
642 {
643 pix = sptr;
644 sptr += src_w;
645 rx = (R_VAL(pix) * xap) >> 9;
646 gx = (G_VAL(pix) * xap) >> 9;
647 bx = (B_VAL(pix) * xap) >> 9;
648 pix++;
649 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
650 {
651 rx += (R_VAL(pix) * Cx) >> 9;
652 gx += (G_VAL(pix) * Cx) >> 9;
653 bx += (B_VAL(pix) * Cx) >> 9;
654 pix++;
655 }
656 if (i > 0)
657 {
658 rx += (R_VAL(pix) * i) >> 9;
659 gx += (G_VAL(pix) * i) >> 9;
660 bx += (B_VAL(pix) * i) >> 9;
661 }
662
663 r += (rx * Cy) >> 14;
664 g += (gx * Cy) >> 14;
665 b += (bx * Cy) >> 14;
666 }
667 if (j > 0)
668 {
669 pix = sptr;
670 sptr += src_w;
671 rx = (R_VAL(pix) * xap) >> 9;
672 gx = (G_VAL(pix) * xap) >> 9;
673 bx = (B_VAL(pix) * xap) >> 9;
674 pix++;
675 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
676 {
677 rx += (R_VAL(pix) * Cx) >> 9;
678 gx += (G_VAL(pix) * Cx) >> 9;
679 bx += (B_VAL(pix) * Cx) >> 9;
680 pix++;
681 }
682 if (i > 0)
683 {
684 rx += (R_VAL(pix) * i) >> 9;
685 gx += (G_VAL(pix) * i) >> 9;
686 bx += (B_VAL(pix) * i) >> 9;
687 }
688
689 r += (rx * j) >> 14;
690 g += (gx * j) >> 14;
691 b += (bx * j) >> 14;
692 }
693 *pbuf++ = ARGB_JOIN(0xff,
694 ((r + (1 << 4)) >> 5),
695 ((g + (1 << 4)) >> 5),
696 ((b + (1 << 4)) >> 5));
697 xp++; xapp++;
698 }
699
700 if (!mask_ie)
701 func(buf, NULL, mul_col, dptr, w);
702 else
703 {
704 mask = mask_ie->image.data8
705 + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
706 + (dst_clip_x - mask_x);
707
708 if (mul_col != 0xffffffff)
709 func2(buf, NULL, mul_col, buf, w);
710 func(buf, mask, 0, dptr, w);
711 }
712 y++;
713
714 pbuf = buf;
715 dptr += dst_w; dst_clip_w = w;
716 xp = xpoints;// + dxx;
717 xapp = xapoints;// + dxx;
718 yp++; yapp++;
719 }
720 }
721 }
386 } 722 }
387#else 723#else
388 /* MMX scaling down would go here */ 724 /* MMX scaling down would go here */