diff options
author | Carsten Haitzler (Rasterman) <raster@rasterman.com> | 2016-08-19 11:04:50 +0900 |
---|---|---|
committer | Carsten Haitzler (Rasterman) <raster@rasterman.com> | 2016-08-19 11:08:55 +0900 |
commit | a739d4d7da5ff7053ab63c72eed632c246fe17b0 (patch) | |
tree | b7ed99f89fce1f5b7ea3087571b282c827038f7d /src/lib/evas/common | |
parent | e56811ed4db61ac2ac14d28a7a8fac83f41c43b8 (diff) |
evas software downscaler - get about 1.8x to 3x speedups for cases
this speeds up downscaling of images by somewhere between 1.8 to 3x
dpeending on case and cpu etc. - this is ONLY for downscaling of an
image buffer betweeb 50% width and/or height up to 100% of width and
height. it's a special case optimization that cuts down the complexity
of the full super sampling filter to just do a bilinear interpolation
which is actually strictly correct for this size range and shouldn't
drop quality. it uses fixed point (16.16) to do the sup pixel sampling.
no mmx/asse or neon, but we could actually easily use it as we do use
mmx/ee and neon in the bilinear upscaler to do interpolation so this
would work here too. it just requires time and effort to make yet 2x
more special cases and use the ASM to do the hard slog here.
@optimize
Diffstat (limited to 'src/lib/evas/common')
-rw-r--r-- | src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c | 1036 |
1 files changed, 686 insertions, 350 deletions
diff --git a/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c b/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c index 84fed3faf3..dfe0e5dc31 100644 --- a/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c +++ b/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c | |||
@@ -1,16 +1,34 @@ | |||
1 | #define BILINEAR_HALF_TO_FULL_SCALE 1 | ||
2 | |||
1 | { | 3 | { |
2 | int Cx, Cy, i, j; | 4 | int Cx, Cy, i, j; |
3 | DATA32 *dptr, *sptr, *pix, *pbuf; | 5 | DATA32 *dptr, *sptr, *pix, *pbuf; |
4 | DATA8 *mask; | 6 | DATA8 *mask; |
5 | int a, r, g, b, rx, gx, bx, ax; | 7 | int a, r, g, b, rx, gx, bx, ax; |
6 | int xap, yap, pos; | 8 | int xap, yap, pos; |
7 | int y; | 9 | int y = 0; |
8 | //int dyy, dxx; | 10 | #ifdef BILINEAR_HALF_TO_FULL_SCALE |
11 | Eina_Bool bilinear_downscale = EINA_FALSE; | ||
12 | #endif | ||
9 | 13 | ||
10 | DATA32 **yp; | 14 | DATA32 **yp; |
11 | int *xp; | 15 | int *xp; |
12 | int w = dst_clip_w; | 16 | int w = dst_clip_w; |
13 | 17 | ||
18 | #ifdef BILINEAR_HALF_TO_FULL_SCALE | ||
19 | if (// image is not too big so that cululative error on steps might be | ||
20 | // noticable | ||
21 | (dst_region_w <= 4096) && | ||
22 | (dst_region_h <= 4096) && | ||
23 | (src_region_w <= 4096) && | ||
24 | (src_region_h <= 4096) && | ||
25 | // if image scale is between 50% size and up to 100% of size of | ||
26 | // original, then do a special case bilinear interplation scale | ||
27 | (dst_region_w >= (src_region_w / 2)) && | ||
28 | (dst_region_h >= (src_region_h / 2))) | ||
29 | bilinear_downscale = EINA_TRUE; | ||
30 | #endif | ||
31 | |||
14 | dptr = dst_ptr; | 32 | dptr = dst_ptr; |
15 | pos = (src_region_y * src_w) + src_region_x; | 33 | pos = (src_region_y * src_w) + src_region_x; |
16 | //dyy = dst_clip_y - dst_region_y; | 34 | //dyy = dst_clip_y - dst_region_y; |
@@ -26,133 +44,247 @@ | |||
26 | #if 1 | 44 | #if 1 |
27 | if (src->cache_entry.flags.alpha) | 45 | if (src->cache_entry.flags.alpha) |
28 | { | 46 | { |
29 | y = 0; | 47 | #ifdef BILINEAR_HALF_TO_FULL_SCALE |
30 | while (dst_clip_h--) | 48 | if (bilinear_downscale) |
31 | { | 49 | { |
32 | Cy = *yapp >> 16; | 50 | DATA32 *lptr, *p1, *p2, *p3, *p4; |
33 | yap = *yapp & 0xffff; | 51 | unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac; |
34 | 52 | unsigned int xstep, ystep; | |
35 | while (dst_clip_w--) | 53 | unsigned int a1, a2, r1, g1, b1, r2, g2, b2; |
36 | { | 54 | |
37 | Cx = *xapp >> 16; | 55 | pix = src_data + (src_region_y * src_w) + src_region_x; |
38 | xap = *xapp & 0xffff; | 56 | xstep = (src_region_w << 16) / dst_region_w; |
39 | 57 | ystep = (src_region_h << 16) / dst_region_h; | |
40 | sptr = *yp + *xp + pos; | 58 | ypos = (dst_clip_y - dst_region_y) * ystep; |
41 | pix = sptr; | 59 | |
42 | sptr += src_w; | 60 | while (dst_clip_h--) |
43 | 61 | { | |
44 | ax = (A_VAL(pix) * xap) >> 9; | 62 | xpos = (dst_clip_x - dst_region_x) * xstep; |
45 | rx = (R_VAL(pix) * xap) >> 9; | 63 | lptr = pix + ((ypos >> 16) * src_w); |
46 | gx = (G_VAL(pix) * xap) >> 9; | 64 | |
47 | bx = (B_VAL(pix) * xap) >> 9; | 65 | if ((ypos >> 16) < ((unsigned int)src_h - 1)) |
48 | pix++; | 66 | { |
49 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 67 | yfrac = ypos & 0xffff; |
50 | { | 68 | invyfrac = 0x10000 - yfrac; |
51 | ax += (A_VAL(pix) * Cx) >> 9; | 69 | while (dst_clip_w--) |
52 | rx += (R_VAL(pix) * Cx) >> 9; | 70 | { |
53 | gx += (G_VAL(pix) * Cx) >> 9; | 71 | p1 = lptr + (xpos >> 16); |
54 | bx += (B_VAL(pix) * Cx) >> 9; | 72 | p2 = p1 + 1; |
55 | pix++; | 73 | p3 = p1 + src_w; |
56 | } | 74 | p4 = p3 + 1; |
57 | if (i > 0) | 75 | xfrac = xpos & 0xffff; |
58 | { | 76 | invxfrac = 0x10000 - xfrac; |
59 | ax += (A_VAL(pix) * i) >> 9; | 77 | |
60 | rx += (R_VAL(pix) * i) >> 9; | 78 | if (xfrac > 0) |
61 | gx += (G_VAL(pix) * i) >> 9; | 79 | { |
62 | bx += (B_VAL(pix) * i) >> 9; | 80 | a1 = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16; |
63 | } | 81 | a2 = ((invxfrac * A_VAL(p3)) + (xfrac * A_VAL(p4))) >> 16; |
64 | 82 | r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; | |
65 | a = (ax * yap) >> 14; | 83 | r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16; |
66 | r = (rx * yap) >> 14; | 84 | g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
67 | g = (gx * yap) >> 14; | 85 | g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16; |
68 | b = (bx * yap) >> 14; | 86 | b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
69 | 87 | b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16; | |
70 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | 88 | } |
71 | { | 89 | else |
72 | pix = sptr; | 90 | { |
73 | sptr += src_w; | 91 | a1 = A_VAL(p1); |
74 | ax = (A_VAL(pix) * xap) >> 9; | 92 | a2 = A_VAL(p3); |
75 | rx = (R_VAL(pix) * xap) >> 9; | 93 | r1 = R_VAL(p1); |
76 | gx = (G_VAL(pix) * xap) >> 9; | 94 | r2 = R_VAL(p3); |
77 | bx = (B_VAL(pix) * xap) >> 9; | 95 | g1 = G_VAL(p1); |
78 | pix++; | 96 | g2 = G_VAL(p3); |
79 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 97 | b1 = B_VAL(p1); |
80 | { | 98 | b2 = B_VAL(p3); |
81 | ax += (A_VAL(pix) * Cx) >> 9; | 99 | } |
82 | rx += (R_VAL(pix) * Cx) >> 9; | 100 | |
83 | gx += (G_VAL(pix) * Cx) >> 9; | 101 | a = ((invyfrac * a1) + (yfrac * a2)) >> 16; |
84 | bx += (B_VAL(pix) * Cx) >> 9; | 102 | r = ((invyfrac * r1) + (yfrac * r2)) >> 16; |
85 | pix++; | 103 | g = ((invyfrac * g1) + (yfrac * g2)) >> 16; |
86 | } | 104 | b = ((invyfrac * b1) + (yfrac * b2)) >> 16; |
87 | if (i > 0) | 105 | |
88 | { | 106 | *pbuf++ = ARGB_JOIN(a, r, g, b); |
89 | ax += (A_VAL(pix) * i) >> 9; | 107 | xpos += xstep; |
90 | rx += (R_VAL(pix) * i) >> 9; | 108 | } |
91 | gx += (G_VAL(pix) * i) >> 9; | 109 | } |
92 | bx += (B_VAL(pix) * i) >> 9; | 110 | else |
93 | } | 111 | { |
94 | 112 | while (dst_clip_w--) | |
95 | a += (ax * Cy) >> 14; | 113 | { |
96 | r += (rx * Cy) >> 14; | 114 | p1 = lptr + (xpos >> 16); |
97 | g += (gx * Cy) >> 14; | 115 | p2 = p1 + 1; |
98 | b += (bx * Cy) >> 14; | 116 | xfrac = xpos & 0xffff; |
99 | } | 117 | invxfrac = 0x10000 - xfrac; |
100 | if (j > 0) | 118 | |
101 | { | 119 | if (xfrac > 0) |
102 | pix = sptr; | 120 | { |
103 | sptr += src_w; | 121 | a = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16; |
104 | ax = (A_VAL(pix) * xap) >> 9; | 122 | r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; |
105 | rx = (R_VAL(pix) * xap) >> 9; | 123 | g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
106 | gx = (G_VAL(pix) * xap) >> 9; | 124 | b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
107 | bx = (B_VAL(pix) * xap) >> 9; | 125 | } |
108 | pix++; | 126 | else |
109 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 127 | { |
110 | { | 128 | a = A_VAL(p1); |
111 | ax += (A_VAL(pix) * Cx) >> 9; | 129 | r = R_VAL(p1); |
112 | rx += (R_VAL(pix) * Cx) >> 9; | 130 | g = G_VAL(p1); |
113 | gx += (G_VAL(pix) * Cx) >> 9; | 131 | b = B_VAL(p1); |
114 | bx += (B_VAL(pix) * Cx) >> 9; | 132 | } |
115 | pix++; | 133 | |
116 | } | 134 | *pbuf++ = ARGB_JOIN(a, r, g, b); |
117 | if (i > 0) | 135 | xpos += xstep; |
118 | { | 136 | } |
119 | ax += (A_VAL(pix) * i) >> 9; | 137 | } |
120 | rx += (R_VAL(pix) * i) >> 9; | 138 | if (!mask_ie) |
121 | gx += (G_VAL(pix) * i) >> 9; | 139 | func(buf, NULL, mul_col, dptr, w); |
122 | bx += (B_VAL(pix) * i) >> 9; | 140 | else |
123 | } | 141 | { |
124 | 142 | mask = mask_ie->image.data8 | |
125 | a += (ax * j) >> 14; | 143 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) |
126 | r += (rx * j) >> 14; | 144 | + (dst_clip_x - mask_x); |
127 | g += (gx * j) >> 14; | 145 | |
128 | b += (bx * j) >> 14; | 146 | if (mul_col != 0xffffffff) |
129 | } | 147 | func2(buf, NULL, mul_col, buf, w); |
130 | *pbuf++ = ARGB_JOIN(((a + (1 << 4)) >> 5), | 148 | func(buf, mask, 0, dptr, w); |
131 | ((r + (1 << 4)) >> 5), | 149 | } |
132 | ((g + (1 << 4)) >> 5), | 150 | y++; |
133 | ((b + (1 << 4)) >> 5)); | 151 | |
134 | xp++; xapp++; | 152 | pbuf = buf; |
135 | } | 153 | ypos += ystep; |
136 | 154 | dptr += dst_w; dst_clip_w = w; | |
137 | if (!mask_ie) | 155 | } |
138 | func(buf, NULL, mul_col, dptr, w); | 156 | } |
139 | else | 157 | else |
140 | { | 158 | #endif |
141 | mask = mask_ie->image.data8 | 159 | { |
142 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) | 160 | while (dst_clip_h--) |
143 | + (dst_clip_x - mask_x); | 161 | { |
144 | 162 | Cy = *yapp >> 16; | |
145 | if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, w); | 163 | yap = *yapp & 0xffff; |
146 | func(buf, mask, 0, dptr, w); | 164 | |
147 | } | 165 | while (dst_clip_w--) |
148 | y++; | 166 | { |
149 | 167 | Cx = *xapp >> 16; | |
150 | pbuf = buf; | 168 | xap = *xapp & 0xffff; |
151 | dptr += dst_w; dst_clip_w = w; | 169 | |
152 | xp = xpoints;// + dxx; | 170 | sptr = *yp + *xp + pos; |
153 | xapp = xapoints;// + dxx; | 171 | pix = sptr; |
154 | yp++; yapp++; | 172 | sptr += src_w; |
155 | } | 173 | |
174 | ax = (A_VAL(pix) * xap) >> 9; | ||
175 | rx = (R_VAL(pix) * xap) >> 9; | ||
176 | gx = (G_VAL(pix) * xap) >> 9; | ||
177 | bx = (B_VAL(pix) * xap) >> 9; | ||
178 | pix++; | ||
179 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
180 | { | ||
181 | ax += (A_VAL(pix) * Cx) >> 9; | ||
182 | rx += (R_VAL(pix) * Cx) >> 9; | ||
183 | gx += (G_VAL(pix) * Cx) >> 9; | ||
184 | bx += (B_VAL(pix) * Cx) >> 9; | ||
185 | pix++; | ||
186 | } | ||
187 | if (i > 0) | ||
188 | { | ||
189 | ax += (A_VAL(pix) * i) >> 9; | ||
190 | rx += (R_VAL(pix) * i) >> 9; | ||
191 | gx += (G_VAL(pix) * i) >> 9; | ||
192 | bx += (B_VAL(pix) * i) >> 9; | ||
193 | } | ||
194 | |||
195 | a = (ax * yap) >> 14; | ||
196 | r = (rx * yap) >> 14; | ||
197 | g = (gx * yap) >> 14; | ||
198 | b = (bx * yap) >> 14; | ||
199 | |||
200 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | ||
201 | { | ||
202 | pix = sptr; | ||
203 | sptr += src_w; | ||
204 | ax = (A_VAL(pix) * xap) >> 9; | ||
205 | rx = (R_VAL(pix) * xap) >> 9; | ||
206 | gx = (G_VAL(pix) * xap) >> 9; | ||
207 | bx = (B_VAL(pix) * xap) >> 9; | ||
208 | pix++; | ||
209 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
210 | { | ||
211 | ax += (A_VAL(pix) * Cx) >> 9; | ||
212 | rx += (R_VAL(pix) * Cx) >> 9; | ||
213 | gx += (G_VAL(pix) * Cx) >> 9; | ||
214 | bx += (B_VAL(pix) * Cx) >> 9; | ||
215 | pix++; | ||
216 | } | ||
217 | if (i > 0) | ||
218 | { | ||
219 | ax += (A_VAL(pix) * i) >> 9; | ||
220 | rx += (R_VAL(pix) * i) >> 9; | ||
221 | gx += (G_VAL(pix) * i) >> 9; | ||
222 | bx += (B_VAL(pix) * i) >> 9; | ||
223 | } | ||
224 | |||
225 | a += (ax * Cy) >> 14; | ||
226 | r += (rx * Cy) >> 14; | ||
227 | g += (gx * Cy) >> 14; | ||
228 | b += (bx * Cy) >> 14; | ||
229 | } | ||
230 | if (j > 0) | ||
231 | { | ||
232 | pix = sptr; | ||
233 | sptr += src_w; | ||
234 | ax = (A_VAL(pix) * xap) >> 9; | ||
235 | rx = (R_VAL(pix) * xap) >> 9; | ||
236 | gx = (G_VAL(pix) * xap) >> 9; | ||
237 | bx = (B_VAL(pix) * xap) >> 9; | ||
238 | pix++; | ||
239 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
240 | { | ||
241 | ax += (A_VAL(pix) * Cx) >> 9; | ||
242 | rx += (R_VAL(pix) * Cx) >> 9; | ||
243 | gx += (G_VAL(pix) * Cx) >> 9; | ||
244 | bx += (B_VAL(pix) * Cx) >> 9; | ||
245 | pix++; | ||
246 | } | ||
247 | if (i > 0) | ||
248 | { | ||
249 | ax += (A_VAL(pix) * i) >> 9; | ||
250 | rx += (R_VAL(pix) * i) >> 9; | ||
251 | gx += (G_VAL(pix) * i) >> 9; | ||
252 | bx += (B_VAL(pix) * i) >> 9; | ||
253 | } | ||
254 | |||
255 | a += (ax * j) >> 14; | ||
256 | r += (rx * j) >> 14; | ||
257 | g += (gx * j) >> 14; | ||
258 | b += (bx * j) >> 14; | ||
259 | } | ||
260 | *pbuf++ = ARGB_JOIN(((a + (1 << 4)) >> 5), | ||
261 | ((r + (1 << 4)) >> 5), | ||
262 | ((g + (1 << 4)) >> 5), | ||
263 | ((b + (1 << 4)) >> 5)); | ||
264 | xp++; xapp++; | ||
265 | } | ||
266 | |||
267 | if (!mask_ie) | ||
268 | func(buf, NULL, mul_col, dptr, w); | ||
269 | else | ||
270 | { | ||
271 | mask = mask_ie->image.data8 | ||
272 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) | ||
273 | + (dst_clip_x - mask_x); | ||
274 | |||
275 | if (mul_col != 0xffffffff) | ||
276 | func2(buf, NULL, mul_col, buf, w); | ||
277 | func(buf, mask, 0, dptr, w); | ||
278 | } | ||
279 | y++; | ||
280 | |||
281 | pbuf = buf; | ||
282 | dptr += dst_w; dst_clip_w = w; | ||
283 | xp = xpoints;// + dxx; | ||
284 | xapp = xapoints;// + dxx; | ||
285 | yp++; yapp++; | ||
286 | } | ||
287 | } | ||
156 | } | 288 | } |
157 | else | 289 | else |
158 | { | 290 | { |
@@ -161,228 +293,432 @@ | |||
161 | (!dst->cache_entry.flags.alpha) && | 293 | (!dst->cache_entry.flags.alpha) && |
162 | (mul_col == 0xffffffff) && | 294 | (mul_col == 0xffffffff) && |
163 | (!mask_ie)) | 295 | (!mask_ie)) |
164 | { | 296 | { |
165 | while (dst_clip_h--) | 297 | // RGBA_Image *src, RGBA_Image *dst |
166 | { | 298 | // dst_clip_x, dst_clip_x\y, dst_clip_w, dst_clip_h |
167 | Cy = *yapp >> 16; | 299 | // src_region_x, src_region_y, src_region_w, src_region_h |
168 | yap = *yapp & 0xffff; | 300 | // dst_region_x, dst_region_y, dst_region_w, dst_region_h |
169 | 301 | #ifdef BILINEAR_HALF_TO_FULL_SCALE | |
170 | pbuf = dptr; | 302 | if (bilinear_downscale) |
171 | while (dst_clip_w--) | 303 | { |
172 | { | 304 | DATA32 *lptr, *p1, *p2, *p3, *p4; |
173 | Cx = *xapp >> 16; | 305 | unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac; |
174 | xap = *xapp & 0xffff; | 306 | unsigned int xstep, ystep; |
175 | 307 | unsigned int r1, g1, b1, r2, g2, b2; | |
176 | sptr = *yp + *xp + pos; | 308 | |
177 | pix = sptr; | 309 | pix = src_data + (src_region_y * src_w) + src_region_x; |
178 | sptr += src_w; | 310 | xstep = (src_region_w << 16) / dst_region_w; |
179 | 311 | ystep = (src_region_h << 16) / dst_region_h; | |
180 | rx = (R_VAL(pix) * xap) >> 9; | 312 | ypos = (dst_clip_y - dst_region_y) * ystep; |
181 | gx = (G_VAL(pix) * xap) >> 9; | 313 | |
182 | bx = (B_VAL(pix) * xap) >> 9; | 314 | while (dst_clip_h--) |
183 | pix++; | 315 | { |
184 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 316 | pbuf = dptr; |
185 | { | 317 | xpos = (dst_clip_x - dst_region_x) * xstep; |
186 | rx += (R_VAL(pix) * Cx) >> 9; | 318 | lptr = pix + ((ypos >> 16) * src_w); |
187 | gx += (G_VAL(pix) * Cx) >> 9; | 319 | |
188 | bx += (B_VAL(pix) * Cx) >> 9; | 320 | if ((ypos >> 16) < ((unsigned int)src_h - 1)) |
189 | pix++; | 321 | { |
190 | } | 322 | yfrac = ypos & 0xffff; |
191 | if (i > 0) | 323 | invyfrac = 0x10000 - yfrac; |
192 | { | 324 | while (dst_clip_w--) |
193 | rx += (R_VAL(pix) * i) >> 9; | 325 | { |
194 | gx += (G_VAL(pix) * i) >> 9; | 326 | p1 = lptr + (xpos >> 16); |
195 | bx += (B_VAL(pix) * i) >> 9; | 327 | p2 = p1 + 1; |
196 | } | 328 | p3 = p1 + src_w; |
197 | 329 | p4 = p3 + 1; | |
198 | r = (rx * yap) >> 14; | 330 | xfrac = xpos & 0xffff; |
199 | g = (gx * yap) >> 14; | 331 | invxfrac = 0x10000 - xfrac; |
200 | b = (bx * yap) >> 14; | 332 | |
201 | 333 | if (xfrac > 0) | |
202 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | 334 | { |
203 | { | 335 | r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; |
204 | pix = sptr; | 336 | r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16; |
205 | sptr += src_w; | 337 | g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
206 | rx = (R_VAL(pix) * xap) >> 9; | 338 | g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16; |
207 | gx = (G_VAL(pix) * xap) >> 9; | 339 | b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
208 | bx = (B_VAL(pix) * xap) >> 9; | 340 | b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16; |
209 | pix++; | 341 | } |
210 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 342 | else |
211 | { | 343 | { |
212 | rx += (R_VAL(pix) * Cx) >> 9; | 344 | r1 = R_VAL(p1); |
213 | gx += (G_VAL(pix) * Cx) >> 9; | 345 | r2 = R_VAL(p3); |
214 | bx += (B_VAL(pix) * Cx) >> 9; | 346 | g1 = G_VAL(p1); |
215 | pix++; | 347 | g2 = G_VAL(p3); |
216 | } | 348 | b1 = B_VAL(p1); |
217 | if (i > 0) | 349 | b2 = B_VAL(p3); |
218 | { | 350 | } |
219 | rx += (R_VAL(pix) * i) >> 9; | 351 | |
220 | gx += (G_VAL(pix) * i) >> 9; | 352 | r = ((invyfrac * r1) + (yfrac * r2)) >> 16; |
221 | bx += (B_VAL(pix) * i) >> 9; | 353 | g = ((invyfrac * g1) + (yfrac * g2)) >> 16; |
222 | } | 354 | b = ((invyfrac * b1) + (yfrac * b2)) >> 16; |
223 | 355 | ||
224 | r += (rx * Cy) >> 14; | 356 | *pbuf++ = ARGB_JOIN(0xff, r, g, b); |
225 | g += (gx * Cy) >> 14; | 357 | xpos += xstep; |
226 | b += (bx * Cy) >> 14; | 358 | } |
227 | } | 359 | } |
228 | if (j > 0) | 360 | else |
229 | { | 361 | { |
230 | pix = sptr; | 362 | while (dst_clip_w--) |
231 | sptr += src_w; | 363 | { |
232 | rx = (R_VAL(pix) * xap) >> 9; | 364 | p1 = lptr + (xpos >> 16); |
233 | gx = (G_VAL(pix) * xap) >> 9; | 365 | p2 = p1 + 1; |
234 | bx = (B_VAL(pix) * xap) >> 9; | 366 | xfrac = xpos & 0xffff; |
235 | pix++; | 367 | invxfrac = 0x10000 - xfrac; |
236 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 368 | |
237 | { | 369 | if (xfrac > 0) |
238 | rx += (R_VAL(pix) * Cx) >> 9; | 370 | { |
239 | gx += (G_VAL(pix) * Cx) >> 9; | 371 | r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; |
240 | bx += (B_VAL(pix) * Cx) >> 9; | 372 | g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
241 | pix++; | 373 | b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
242 | } | 374 | } |
243 | if (i > 0) | 375 | else |
244 | { | 376 | { |
245 | rx += (R_VAL(pix) * i) >> 9; | 377 | r = R_VAL(p1); |
246 | gx += (G_VAL(pix) * i) >> 9; | 378 | g = G_VAL(p1); |
247 | bx += (B_VAL(pix) * i) >> 9; | 379 | b = B_VAL(p1); |
248 | } | 380 | } |
249 | 381 | ||
250 | r += (rx * j) >> 14; | 382 | *pbuf++ = ARGB_JOIN(0xff, r, g, b); |
251 | g += (gx * j) >> 14; | 383 | xpos += xstep; |
252 | b += (bx * j) >> 14; | 384 | } |
253 | } | 385 | } |
254 | *pbuf++ = ARGB_JOIN(0xff, | 386 | ypos += ystep; |
255 | ((r + (1 << 4)) >> 5), | 387 | dptr += dst_w; dst_clip_w = w; |
256 | ((g + (1 << 4)) >> 5), | 388 | } |
257 | ((b + (1 << 4)) >> 5)); | 389 | } |
258 | xp++; xapp++; | 390 | else |
259 | } | 391 | #endif |
260 | 392 | { | |
261 | dptr += dst_w; dst_clip_w = w; | 393 | while (dst_clip_h--) |
262 | xp = xpoints;// + dxx; | 394 | { |
263 | xapp = xapoints;// + dxx; | 395 | Cy = *yapp >> 16; |
264 | yp++; yapp++; | 396 | yap = *yapp & 0xffff; |
265 | } | 397 | |
266 | } | 398 | pbuf = dptr; |
267 | else | 399 | while (dst_clip_w--) |
400 | { | ||
401 | Cx = *xapp >> 16; | ||
402 | xap = *xapp & 0xffff; | ||
403 | |||
404 | sptr = *yp + *xp + pos; | ||
405 | pix = sptr; | ||
406 | sptr += src_w; | ||
407 | |||
408 | rx = (R_VAL(pix) * xap) >> 9; | ||
409 | gx = (G_VAL(pix) * xap) >> 9; | ||
410 | bx = (B_VAL(pix) * xap) >> 9; | ||
411 | pix++; | ||
412 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
413 | { | ||
414 | rx += (R_VAL(pix) * Cx) >> 9; | ||
415 | gx += (G_VAL(pix) * Cx) >> 9; | ||
416 | bx += (B_VAL(pix) * Cx) >> 9; | ||
417 | pix++; | ||
418 | } | ||
419 | if (i > 0) | ||
420 | { | ||
421 | rx += (R_VAL(pix) * i) >> 9; | ||
422 | gx += (G_VAL(pix) * i) >> 9; | ||
423 | bx += (B_VAL(pix) * i) >> 9; | ||
424 | } | ||
425 | |||
426 | r = (rx * yap) >> 14; | ||
427 | g = (gx * yap) >> 14; | ||
428 | b = (bx * yap) >> 14; | ||
429 | |||
430 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | ||
431 | { | ||
432 | pix = sptr; | ||
433 | sptr += src_w; | ||
434 | rx = (R_VAL(pix) * xap) >> 9; | ||
435 | gx = (G_VAL(pix) * xap) >> 9; | ||
436 | bx = (B_VAL(pix) * xap) >> 9; | ||
437 | pix++; | ||
438 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
439 | { | ||
440 | rx += (R_VAL(pix) * Cx) >> 9; | ||
441 | gx += (G_VAL(pix) * Cx) >> 9; | ||
442 | bx += (B_VAL(pix) * Cx) >> 9; | ||
443 | pix++; | ||
444 | } | ||
445 | if (i > 0) | ||
446 | { | ||
447 | rx += (R_VAL(pix) * i) >> 9; | ||
448 | gx += (G_VAL(pix) * i) >> 9; | ||
449 | bx += (B_VAL(pix) * i) >> 9; | ||
450 | } | ||
451 | |||
452 | r += (rx * Cy) >> 14; | ||
453 | g += (gx * Cy) >> 14; | ||
454 | b += (bx * Cy) >> 14; | ||
455 | } | ||
456 | if (j > 0) | ||
457 | { | ||
458 | pix = sptr; | ||
459 | sptr += src_w; | ||
460 | rx = (R_VAL(pix) * xap) >> 9; | ||
461 | gx = (G_VAL(pix) * xap) >> 9; | ||
462 | bx = (B_VAL(pix) * xap) >> 9; | ||
463 | pix++; | ||
464 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
465 | { | ||
466 | rx += (R_VAL(pix) * Cx) >> 9; | ||
467 | gx += (G_VAL(pix) * Cx) >> 9; | ||
468 | bx += (B_VAL(pix) * Cx) >> 9; | ||
469 | pix++; | ||
470 | } | ||
471 | if (i > 0) | ||
472 | { | ||
473 | rx += (R_VAL(pix) * i) >> 9; | ||
474 | gx += (G_VAL(pix) * i) >> 9; | ||
475 | bx += (B_VAL(pix) * i) >> 9; | ||
476 | } | ||
477 | |||
478 | r += (rx * j) >> 14; | ||
479 | g += (gx * j) >> 14; | ||
480 | b += (bx * j) >> 14; | ||
481 | } | ||
482 | *pbuf++ = ARGB_JOIN(0xff, | ||
483 | ((r + (1 << 4)) >> 5), | ||
484 | ((g + (1 << 4)) >> 5), | ||
485 | ((b + (1 << 4)) >> 5)); | ||
486 | xp++; xapp++; | ||
487 | } | ||
488 | |||
489 | dptr += dst_w; dst_clip_w = w; | ||
490 | xp = xpoints;// + dxx; | ||
491 | xapp = xapoints;// + dxx; | ||
492 | yp++; yapp++; | ||
493 | } | ||
494 | } | ||
495 | } | ||
496 | else | ||
268 | #endif | 497 | #endif |
269 | { | 498 | { |
270 | y = 0; | 499 | #ifdef BILINEAR_HALF_TO_FULL_SCALE |
271 | while (dst_clip_h--) | 500 | if (bilinear_downscale) |
272 | { | 501 | { |
273 | Cy = *yapp >> 16; | 502 | DATA32 *lptr, *p1, *p2, *p3, *p4; |
274 | yap = *yapp & 0xffff; | 503 | unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac; |
275 | 504 | unsigned int xstep, ystep; | |
276 | while (dst_clip_w--) | 505 | unsigned int r1, g1, b1, r2, g2, b2; |
277 | { | 506 | |
278 | Cx = *xapp >> 16; | 507 | pix = src_data + (src_region_y * src_w) + src_region_x; |
279 | xap = *xapp & 0xffff; | 508 | xstep = (src_region_w << 16) / dst_region_w; |
280 | 509 | ystep = (src_region_h << 16) / dst_region_h; | |
281 | sptr = *yp + *xp + pos; | 510 | ypos = (dst_clip_y - dst_region_y) * ystep; |
282 | pix = sptr; | 511 | |
283 | sptr += src_w; | 512 | while (dst_clip_h--) |
284 | 513 | { | |
285 | rx = (R_VAL(pix) * xap) >> 9; | 514 | xpos = (dst_clip_x - dst_region_x) * xstep; |
286 | gx = (G_VAL(pix) * xap) >> 9; | 515 | lptr = pix + ((ypos >> 16) * src_w); |
287 | bx = (B_VAL(pix) * xap) >> 9; | 516 | |
288 | pix++; | 517 | if ((ypos >> 16) < ((unsigned int)src_h - 1)) |
289 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 518 | { |
290 | { | 519 | yfrac = ypos & 0xffff; |
291 | rx += (R_VAL(pix) * Cx) >> 9; | 520 | invyfrac = 0x10000 - yfrac; |
292 | gx += (G_VAL(pix) * Cx) >> 9; | 521 | while (dst_clip_w--) |
293 | bx += (B_VAL(pix) * Cx) >> 9; | 522 | { |
294 | pix++; | 523 | p1 = lptr + (xpos >> 16); |
295 | } | 524 | p2 = p1 + 1; |
296 | if (i > 0) | 525 | p3 = p1 + src_w; |
297 | { | 526 | p4 = p3 + 1; |
298 | rx += (R_VAL(pix) * i) >> 9; | 527 | xfrac = xpos & 0xffff; |
299 | gx += (G_VAL(pix) * i) >> 9; | 528 | invxfrac = 0x10000 - xfrac; |
300 | bx += (B_VAL(pix) * i) >> 9; | 529 | |
301 | } | 530 | if (xfrac > 0) |
302 | 531 | { | |
303 | r = (rx * yap) >> 14; | 532 | r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; |
304 | g = (gx * yap) >> 14; | 533 | r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16; |
305 | b = (bx * yap) >> 14; | 534 | g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
306 | 535 | g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16; | |
307 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | 536 | b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
308 | { | 537 | b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16; |
309 | pix = sptr; | 538 | } |
310 | sptr += src_w; | 539 | else |
311 | rx = (R_VAL(pix) * xap) >> 9; | 540 | { |
312 | gx = (G_VAL(pix) * xap) >> 9; | 541 | r1 = R_VAL(p1); |
313 | bx = (B_VAL(pix) * xap) >> 9; | 542 | r2 = R_VAL(p3); |
314 | pix++; | 543 | g1 = G_VAL(p1); |
315 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 544 | g2 = G_VAL(p3); |
316 | { | 545 | b1 = B_VAL(p1); |
317 | rx += (R_VAL(pix) * Cx) >> 9; | 546 | b2 = B_VAL(p3); |
318 | gx += (G_VAL(pix) * Cx) >> 9; | 547 | } |
319 | bx += (B_VAL(pix) * Cx) >> 9; | 548 | |
320 | pix++; | 549 | r = ((invyfrac * r1) + (yfrac * r2)) >> 16; |
321 | } | 550 | g = ((invyfrac * g1) + (yfrac * g2)) >> 16; |
322 | if (i > 0) | 551 | b = ((invyfrac * b1) + (yfrac * b2)) >> 16; |
323 | { | 552 | |
324 | rx += (R_VAL(pix) * i) >> 9; | 553 | *pbuf++ = ARGB_JOIN(0xff, r, g, b); |
325 | gx += (G_VAL(pix) * i) >> 9; | 554 | xpos += xstep; |
326 | bx += (B_VAL(pix) * i) >> 9; | 555 | } |
327 | } | 556 | } |
328 | 557 | else | |
329 | r += (rx * Cy) >> 14; | 558 | { |
330 | g += (gx * Cy) >> 14; | 559 | while (dst_clip_w--) |
331 | b += (bx * Cy) >> 14; | 560 | { |
332 | } | 561 | p1 = lptr + (xpos >> 16); |
333 | if (j > 0) | 562 | p2 = p1 + 1; |
334 | { | 563 | xfrac = xpos & 0xffff; |
335 | pix = sptr; | 564 | invxfrac = 0x10000 - xfrac; |
336 | sptr += src_w; | 565 | |
337 | rx = (R_VAL(pix) * xap) >> 9; | 566 | if (xfrac > 0) |
338 | gx = (G_VAL(pix) * xap) >> 9; | 567 | { |
339 | bx = (B_VAL(pix) * xap) >> 9; | 568 | r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16; |
340 | pix++; | 569 | g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16; |
341 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | 570 | b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16; |
342 | { | 571 | } |
343 | rx += (R_VAL(pix) * Cx) >> 9; | 572 | else |
344 | gx += (G_VAL(pix) * Cx) >> 9; | 573 | { |
345 | bx += (B_VAL(pix) * Cx) >> 9; | 574 | r = R_VAL(p1); |
346 | pix++; | 575 | g = G_VAL(p1); |
347 | } | 576 | b = B_VAL(p1); |
348 | if (i > 0) | 577 | } |
349 | { | 578 | |
350 | rx += (R_VAL(pix) * i) >> 9; | 579 | *pbuf++ = ARGB_JOIN(0xff, r, g, b); |
351 | gx += (G_VAL(pix) * i) >> 9; | 580 | xpos += xstep; |
352 | bx += (B_VAL(pix) * i) >> 9; | 581 | } |
353 | } | 582 | } |
354 | 583 | if (!mask_ie) | |
355 | r += (rx * j) >> 14; | 584 | func(buf, NULL, mul_col, dptr, w); |
356 | g += (gx * j) >> 14; | 585 | else |
357 | b += (bx * j) >> 14; | 586 | { |
358 | } | 587 | mask = mask_ie->image.data8 |
359 | *pbuf++ = ARGB_JOIN(0xff, | 588 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) |
360 | ((r + (1 << 4)) >> 5), | 589 | + (dst_clip_x - mask_x); |
361 | ((g + (1 << 4)) >> 5), | 590 | |
362 | ((b + (1 << 4)) >> 5)); | 591 | if (mul_col != 0xffffffff) |
363 | xp++; xapp++; | 592 | func2(buf, NULL, mul_col, buf, w); |
364 | } | 593 | func(buf, mask, 0, dptr, w); |
365 | 594 | } | |
366 | if (!mask_ie) | 595 | y++; |
367 | func(buf, NULL, mul_col, dptr, w); | 596 | |
368 | else | 597 | pbuf = buf; |
369 | { | 598 | ypos += ystep; |
370 | mask = mask_ie->image.data8 | 599 | dptr += dst_w; dst_clip_w = w; |
371 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) | 600 | } |
372 | + (dst_clip_x - mask_x); | 601 | } |
373 | 602 | else | |
374 | if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, w); | 603 | #endif |
375 | func(buf, mask, 0, dptr, w); | 604 | { |
376 | } | 605 | while (dst_clip_h--) |
377 | y++; | 606 | { |
378 | 607 | Cy = *yapp >> 16; | |
379 | pbuf = buf; | 608 | yap = *yapp & 0xffff; |
380 | dptr += dst_w; dst_clip_w = w; | 609 | |
381 | xp = xpoints;// + dxx; | 610 | while (dst_clip_w--) |
382 | xapp = xapoints;// + dxx; | 611 | { |
383 | yp++; yapp++; | 612 | Cx = *xapp >> 16; |
384 | } | 613 | xap = *xapp & 0xffff; |
385 | } | 614 | |
615 | sptr = *yp + *xp + pos; | ||
616 | pix = sptr; | ||
617 | sptr += src_w; | ||
618 | |||
619 | rx = (R_VAL(pix) * xap) >> 9; | ||
620 | gx = (G_VAL(pix) * xap) >> 9; | ||
621 | bx = (B_VAL(pix) * xap) >> 9; | ||
622 | pix++; | ||
623 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
624 | { | ||
625 | rx += (R_VAL(pix) * Cx) >> 9; | ||
626 | gx += (G_VAL(pix) * Cx) >> 9; | ||
627 | bx += (B_VAL(pix) * Cx) >> 9; | ||
628 | pix++; | ||
629 | } | ||
630 | if (i > 0) | ||
631 | { | ||
632 | rx += (R_VAL(pix) * i) >> 9; | ||
633 | gx += (G_VAL(pix) * i) >> 9; | ||
634 | bx += (B_VAL(pix) * i) >> 9; | ||
635 | } | ||
636 | |||
637 | r = (rx * yap) >> 14; | ||
638 | g = (gx * yap) >> 14; | ||
639 | b = (bx * yap) >> 14; | ||
640 | |||
641 | for (j = (1 << 14) - yap; j > Cy; j -= Cy) | ||
642 | { | ||
643 | pix = sptr; | ||
644 | sptr += src_w; | ||
645 | rx = (R_VAL(pix) * xap) >> 9; | ||
646 | gx = (G_VAL(pix) * xap) >> 9; | ||
647 | bx = (B_VAL(pix) * xap) >> 9; | ||
648 | pix++; | ||
649 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
650 | { | ||
651 | rx += (R_VAL(pix) * Cx) >> 9; | ||
652 | gx += (G_VAL(pix) * Cx) >> 9; | ||
653 | bx += (B_VAL(pix) * Cx) >> 9; | ||
654 | pix++; | ||
655 | } | ||
656 | if (i > 0) | ||
657 | { | ||
658 | rx += (R_VAL(pix) * i) >> 9; | ||
659 | gx += (G_VAL(pix) * i) >> 9; | ||
660 | bx += (B_VAL(pix) * i) >> 9; | ||
661 | } | ||
662 | |||
663 | r += (rx * Cy) >> 14; | ||
664 | g += (gx * Cy) >> 14; | ||
665 | b += (bx * Cy) >> 14; | ||
666 | } | ||
667 | if (j > 0) | ||
668 | { | ||
669 | pix = sptr; | ||
670 | sptr += src_w; | ||
671 | rx = (R_VAL(pix) * xap) >> 9; | ||
672 | gx = (G_VAL(pix) * xap) >> 9; | ||
673 | bx = (B_VAL(pix) * xap) >> 9; | ||
674 | pix++; | ||
675 | for (i = (1 << 14) - xap; i > Cx; i -= Cx) | ||
676 | { | ||
677 | rx += (R_VAL(pix) * Cx) >> 9; | ||
678 | gx += (G_VAL(pix) * Cx) >> 9; | ||
679 | bx += (B_VAL(pix) * Cx) >> 9; | ||
680 | pix++; | ||
681 | } | ||
682 | if (i > 0) | ||
683 | { | ||
684 | rx += (R_VAL(pix) * i) >> 9; | ||
685 | gx += (G_VAL(pix) * i) >> 9; | ||
686 | bx += (B_VAL(pix) * i) >> 9; | ||
687 | } | ||
688 | |||
689 | r += (rx * j) >> 14; | ||
690 | g += (gx * j) >> 14; | ||
691 | b += (bx * j) >> 14; | ||
692 | } | ||
693 | *pbuf++ = ARGB_JOIN(0xff, | ||
694 | ((r + (1 << 4)) >> 5), | ||
695 | ((g + (1 << 4)) >> 5), | ||
696 | ((b + (1 << 4)) >> 5)); | ||
697 | xp++; xapp++; | ||
698 | } | ||
699 | |||
700 | if (!mask_ie) | ||
701 | func(buf, NULL, mul_col, dptr, w); | ||
702 | else | ||
703 | { | ||
704 | mask = mask_ie->image.data8 | ||
705 | + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w) | ||
706 | + (dst_clip_x - mask_x); | ||
707 | |||
708 | if (mul_col != 0xffffffff) | ||
709 | func2(buf, NULL, mul_col, buf, w); | ||
710 | func(buf, mask, 0, dptr, w); | ||
711 | } | ||
712 | y++; | ||
713 | |||
714 | pbuf = buf; | ||
715 | dptr += dst_w; dst_clip_w = w; | ||
716 | xp = xpoints;// + dxx; | ||
717 | xapp = xapoints;// + dxx; | ||
718 | yp++; yapp++; | ||
719 | } | ||
720 | } | ||
721 | } | ||
386 | } | 722 | } |
387 | #else | 723 | #else |
388 | /* MMX scaling down would go here */ | 724 | /* MMX scaling down would go here */ |