summaryrefslogtreecommitdiff
path: root/src/lib/evas/common/evas_scale_smooth_scaler_downx_downy.c
blob: dfe0e5dc312b1c4063967057a68c2e095d50ec4b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
#define BILINEAR_HALF_TO_FULL_SCALE 1

{
   int Cx, Cy, i, j;
   DATA32 *dptr, *sptr, *pix, *pbuf;
   DATA8 *mask;
   int a, r, g, b, rx, gx, bx, ax;
   int xap, yap, pos;
   int y = 0;
#ifdef BILINEAR_HALF_TO_FULL_SCALE
   Eina_Bool bilinear_downscale = EINA_FALSE;
#endif

   DATA32  **yp;
   int *xp;
   int w = dst_clip_w;

#ifdef BILINEAR_HALF_TO_FULL_SCALE
   if (// image is not too big so that cululative error on steps might be
       // noticable
       (dst_region_w <= 4096) &&
       (dst_region_h <= 4096) &&
       (src_region_w <= 4096) &&
       (src_region_h <= 4096) &&
       // if image scale is between 50% size and up to 100% of size of
       // original, then do a special case bilinear interplation scale
       (dst_region_w >= (src_region_w / 2)) &&
       (dst_region_h >= (src_region_h / 2)))
     bilinear_downscale = EINA_TRUE;
#endif

   dptr = dst_ptr;
   pos = (src_region_y * src_w) + src_region_x;
   //dyy = dst_clip_y - dst_region_y;
   //dxx = dst_clip_x - dst_region_x;

   xp = xpoints;// + dxx;
   yp = ypoints;// + dyy;
   xapp = xapoints;// + dxx;
   yapp = yapoints;// + dyy;
   pbuf = buf;
/*#ifndef SCALE_USING_MMX */
/* for now there's no mmx down scaling - so C only */
#if 1
   if (src->cache_entry.flags.alpha)
     {
#ifdef BILINEAR_HALF_TO_FULL_SCALE
        if (bilinear_downscale)
          {
             DATA32 *lptr, *p1, *p2, *p3, *p4;
             unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
             unsigned int xstep, ystep;
             unsigned int a1, a2, r1, g1, b1, r2, g2, b2;

             pix = src_data + (src_region_y * src_w) + src_region_x;
             xstep = (src_region_w << 16) / dst_region_w;
             ystep = (src_region_h << 16) / dst_region_h;
             ypos = (dst_clip_y - dst_region_y) * ystep;

             while (dst_clip_h--)
               {
                  xpos = (dst_clip_x - dst_region_x) * xstep;
                  lptr = pix + ((ypos >> 16) * src_w);

                  if ((ypos >> 16) < ((unsigned int)src_h - 1))
                    {
                       yfrac = ypos & 0xffff;
                       invyfrac = 0x10000 - yfrac;
                       while (dst_clip_w--)
                         {
                            p1 = lptr + (xpos >> 16);
                            p2 = p1 + 1;
                            p3 = p1 + src_w;
                            p4 = p3 + 1;
                            xfrac = xpos & 0xffff;
                            invxfrac = 0x10000 - xfrac;

                            if (xfrac > 0)
                              {
                                 a1 = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16;
                                 a2 = ((invxfrac * A_VAL(p3)) + (xfrac * A_VAL(p4))) >> 16;
                                 r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                 r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
                                 g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                 g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
                                 b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                                 b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
                              }
                            else
                              {
                                 a1 = A_VAL(p1);
                                 a2 = A_VAL(p3);
                                 r1 = R_VAL(p1);
                                 r2 = R_VAL(p3);
                                 g1 = G_VAL(p1);
                                 g2 = G_VAL(p3);
                                 b1 = B_VAL(p1);
                                 b2 = B_VAL(p3);
                              }

                            a = ((invyfrac * a1) + (yfrac * a2)) >> 16;
                            r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
                            g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
                            b = ((invyfrac * b1) + (yfrac * b2)) >> 16;

                            *pbuf++ = ARGB_JOIN(a, r, g, b);
                            xpos += xstep;
                         }
                    }
                  else
                    {
                       while (dst_clip_w--)
                         {
                            p1 = lptr + (xpos >> 16);
                            p2 = p1 + 1;
                            xfrac = xpos & 0xffff;
                            invxfrac = 0x10000 - xfrac;

                            if (xfrac > 0)
                              {
                                 a = ((invxfrac * A_VAL(p1)) + (xfrac * A_VAL(p2))) >> 16;
                                 r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                 g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                 b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                              }
                            else
                              {
                                 a = A_VAL(p1);
                                 r = R_VAL(p1);
                                 g = G_VAL(p1);
                                 b = B_VAL(p1);
                              }

                            *pbuf++ = ARGB_JOIN(a, r, g, b);
                            xpos += xstep;
                         }
                    }
                  if (!mask_ie)
                    func(buf, NULL, mul_col, dptr, w);
                  else
                    {
                       mask = mask_ie->image.data8
                       + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
                       + (dst_clip_x - mask_x);

                       if (mul_col != 0xffffffff)
                         func2(buf, NULL, mul_col, buf, w);
                       func(buf, mask, 0, dptr, w);
                    }
                  y++;

                  pbuf = buf;
                  ypos += ystep;
                  dptr += dst_w;   dst_clip_w = w;
               }
          }
        else
#endif
          {
             while (dst_clip_h--)
               {
                  Cy = *yapp >> 16;
                  yap = *yapp & 0xffff;

                  while (dst_clip_w--)
                    {
                       Cx = *xapp >> 16;
                       xap = *xapp & 0xffff;

                       sptr = *yp + *xp + pos;
                       pix = sptr;
                       sptr += src_w;

                       ax = (A_VAL(pix) * xap) >> 9;
                       rx = (R_VAL(pix) * xap) >> 9;
                       gx = (G_VAL(pix) * xap) >> 9;
                       bx = (B_VAL(pix) * xap) >> 9;
                       pix++;
                       for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                         {
                            ax += (A_VAL(pix) * Cx) >> 9;
                            rx += (R_VAL(pix) * Cx) >> 9;
                            gx += (G_VAL(pix) * Cx) >> 9;
                            bx += (B_VAL(pix) * Cx) >> 9;
                            pix++;
                         }
                       if (i > 0)
                         {
                            ax += (A_VAL(pix) * i) >> 9;
                            rx += (R_VAL(pix) * i) >> 9;
                            gx += (G_VAL(pix) * i) >> 9;
                            bx += (B_VAL(pix) * i) >> 9;
                         }

                       a = (ax * yap) >> 14;
                       r = (rx * yap) >> 14;
                       g = (gx * yap) >> 14;
                       b = (bx * yap) >> 14;

                       for (j = (1 << 14) - yap; j > Cy; j -= Cy)
                         {
                            pix = sptr;
                            sptr += src_w;
                            ax = (A_VAL(pix) * xap) >> 9;
                            rx = (R_VAL(pix) * xap) >> 9;
                            gx = (G_VAL(pix) * xap) >> 9;
                            bx = (B_VAL(pix) * xap) >> 9;
                            pix++;
                            for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                              {
                                 ax += (A_VAL(pix) * Cx) >> 9;
                                 rx += (R_VAL(pix) * Cx) >> 9;
                                 gx += (G_VAL(pix) * Cx) >> 9;
                                 bx += (B_VAL(pix) * Cx) >> 9;
                                 pix++;
                              }
                            if (i > 0)
                              {
                                 ax += (A_VAL(pix) * i) >> 9;
                                 rx += (R_VAL(pix) * i) >> 9;
                                 gx += (G_VAL(pix) * i) >> 9;
                                 bx += (B_VAL(pix) * i) >> 9;
                              }

                            a += (ax * Cy) >> 14;
                            r += (rx * Cy) >> 14;
                            g += (gx * Cy) >> 14;
                            b += (bx * Cy) >> 14;
                         }
                       if (j > 0)
                         {
                            pix = sptr;
                            sptr += src_w;
                            ax = (A_VAL(pix) * xap) >> 9;
                            rx = (R_VAL(pix) * xap) >> 9;
                            gx = (G_VAL(pix) * xap) >> 9;
                            bx = (B_VAL(pix) * xap) >> 9;
                            pix++;
                            for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                              {
                                 ax += (A_VAL(pix) * Cx) >> 9;
                                 rx += (R_VAL(pix) * Cx) >> 9;
                                 gx += (G_VAL(pix) * Cx) >> 9;
                                 bx += (B_VAL(pix) * Cx) >> 9;
                                 pix++;
                              }
                            if (i > 0)
                              {
                                 ax += (A_VAL(pix) * i) >> 9;
                                 rx += (R_VAL(pix) * i) >> 9;
                                 gx += (G_VAL(pix) * i) >> 9;
                                 bx += (B_VAL(pix) * i) >> 9;
                              }

                            a += (ax * j) >> 14;
                            r += (rx * j) >> 14;
                            g += (gx * j) >> 14;
                            b += (bx * j) >> 14;
                         }
                       *pbuf++ = ARGB_JOIN(((a + (1 << 4)) >> 5),
                                           ((r + (1 << 4)) >> 5),
                                           ((g + (1 << 4)) >> 5),
                                           ((b + (1 << 4)) >> 5));
                       xp++;  xapp++;
                    }

                  if (!mask_ie)
                    func(buf, NULL, mul_col, dptr, w);
                  else
                    {
                       mask = mask_ie->image.data8
                       + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
                       + (dst_clip_x - mask_x);

                       if (mul_col != 0xffffffff)
                         func2(buf, NULL, mul_col, buf, w);
                       func(buf, mask, 0, dptr, w);
                    }
                  y++;

                  pbuf = buf;
                  dptr += dst_w;   dst_clip_w = w;
                  xp = xpoints;// + dxx;
                  xapp = xapoints;// + dxx;
                  yp++;  yapp++;
               }
          }
     }
   else
     {
#ifdef DIRECT_SCALE
        if ((!src->cache_entry.flags.alpha) &&
            (!dst->cache_entry.flags.alpha) &&
            (mul_col == 0xffffffff) &&
            (!mask_ie))
          {
             // RGBA_Image *src, RGBA_Image *dst
             // dst_clip_x, dst_clip_x\y, dst_clip_w, dst_clip_h
             // src_region_x, src_region_y, src_region_w, src_region_h
             // dst_region_x, dst_region_y, dst_region_w, dst_region_h
#ifdef BILINEAR_HALF_TO_FULL_SCALE
             if (bilinear_downscale)
               {
                  DATA32 *lptr, *p1, *p2, *p3, *p4;
                  unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
                  unsigned int xstep, ystep;
                  unsigned int r1, g1, b1, r2, g2, b2;

                  pix = src_data + (src_region_y * src_w) + src_region_x;
                  xstep = (src_region_w << 16) / dst_region_w;
                  ystep = (src_region_h << 16) / dst_region_h;
                  ypos = (dst_clip_y - dst_region_y) * ystep;

                  while (dst_clip_h--)
                    {
                       pbuf = dptr;
                       xpos = (dst_clip_x - dst_region_x) * xstep;
                       lptr = pix + ((ypos >> 16) * src_w);

                       if ((ypos >> 16) < ((unsigned int)src_h - 1))
                         {
                            yfrac = ypos & 0xffff;
                            invyfrac = 0x10000 - yfrac;
                            while (dst_clip_w--)
                              {
                                 p1 = lptr + (xpos >> 16);
                                 p2 = p1 + 1;
                                 p3 = p1 + src_w;
                                 p4 = p3 + 1;
                                 xfrac = xpos & 0xffff;
                                 invxfrac = 0x10000 - xfrac;

                                 if (xfrac > 0)
                                   {
                                      r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                      r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
                                      g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                      g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
                                      b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                                      b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
                                   }
                                 else
                                   {
                                      r1 = R_VAL(p1);
                                      r2 = R_VAL(p3);
                                      g1 = G_VAL(p1);
                                      g2 = G_VAL(p3);
                                      b1 = B_VAL(p1);
                                      b2 = B_VAL(p3);
                                   }

                                 r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
                                 g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
                                 b = ((invyfrac * b1) + (yfrac * b2)) >> 16;

                                 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
                                 xpos += xstep;
                              }
                         }
                       else
                         {
                            while (dst_clip_w--)
                              {
                                 p1 = lptr + (xpos >> 16);
                                 p2 = p1 + 1;
                                 xfrac = xpos & 0xffff;
                                 invxfrac = 0x10000 - xfrac;

                                 if (xfrac > 0)
                                   {
                                      r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                      g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                      b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                                   }
                                 else
                                   {
                                      r = R_VAL(p1);
                                      g = G_VAL(p1);
                                      b = B_VAL(p1);
                                   }

                                 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
                                 xpos += xstep;
                              }
                         }
                       ypos += ystep;
                       dptr += dst_w;   dst_clip_w = w;
                    }
               }
             else
#endif
               {
                  while (dst_clip_h--)
                    {
                       Cy = *yapp >> 16;
                       yap = *yapp & 0xffff;

                       pbuf = dptr;
                       while (dst_clip_w--)
                         {
                            Cx = *xapp >> 16;
                            xap = *xapp & 0xffff;

                            sptr = *yp + *xp + pos;
                            pix = sptr;
                            sptr += src_w;

                            rx = (R_VAL(pix) * xap) >> 9;
                            gx = (G_VAL(pix) * xap) >> 9;
                            bx = (B_VAL(pix) * xap) >> 9;
                            pix++;
                            for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                              {
                                 rx += (R_VAL(pix) * Cx) >> 9;
                                 gx += (G_VAL(pix) * Cx) >> 9;
                                 bx += (B_VAL(pix) * Cx) >> 9;
                                 pix++;
                              }
                            if (i > 0)
                              {
                                 rx += (R_VAL(pix) * i) >> 9;
                                 gx += (G_VAL(pix) * i) >> 9;
                                 bx += (B_VAL(pix) * i) >> 9;
                              }

                            r = (rx * yap) >> 14;
                            g = (gx * yap) >> 14;
                            b = (bx * yap) >> 14;

                            for (j = (1 << 14) - yap; j > Cy; j -= Cy)
                              {
                                 pix = sptr;
                                 sptr += src_w;
                                 rx = (R_VAL(pix) * xap) >> 9;
                                 gx = (G_VAL(pix) * xap) >> 9;
                                 bx = (B_VAL(pix) * xap) >> 9;
                                 pix++;
                                 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                                   {
                                      rx += (R_VAL(pix) * Cx) >> 9;
                                      gx += (G_VAL(pix) * Cx) >> 9;
                                      bx += (B_VAL(pix) * Cx) >> 9;
                                      pix++;
                                   }
                                 if (i > 0)
                                   {
                                      rx += (R_VAL(pix) * i) >> 9;
                                      gx += (G_VAL(pix) * i) >> 9;
                                      bx += (B_VAL(pix) * i) >> 9;
                                   }

                                 r += (rx * Cy) >> 14;
                                 g += (gx * Cy) >> 14;
                                 b += (bx * Cy) >> 14;
                              }
                            if (j > 0)
                              {
                                 pix = sptr;
                                 sptr += src_w;
                                 rx = (R_VAL(pix) * xap) >> 9;
                                 gx = (G_VAL(pix) * xap) >> 9;
                                 bx = (B_VAL(pix) * xap) >> 9;
                                 pix++;
                                 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                                   {
                                      rx += (R_VAL(pix) * Cx) >> 9;
                                      gx += (G_VAL(pix) * Cx) >> 9;
                                      bx += (B_VAL(pix) * Cx) >> 9;
                                      pix++;
                                   }
                                 if (i > 0)
                                   {
                                      rx += (R_VAL(pix) * i) >> 9;
                                      gx += (G_VAL(pix) * i) >> 9;
                                      bx += (B_VAL(pix) * i) >> 9;
                                   }

                                 r += (rx * j) >> 14;
                                 g += (gx * j) >> 14;
                                 b += (bx * j) >> 14;
                              }
                            *pbuf++ = ARGB_JOIN(0xff,
                                                ((r + (1 << 4)) >> 5),
                                                ((g + (1 << 4)) >> 5),
                                                ((b + (1 << 4)) >> 5));
                            xp++;  xapp++;
                         }

                       dptr += dst_w;   dst_clip_w = w;
                       xp = xpoints;// + dxx;
                       xapp = xapoints;// + dxx;
                       yp++;  yapp++;
                    }
               }
          }
        else
#endif
          {
#ifdef BILINEAR_HALF_TO_FULL_SCALE
             if (bilinear_downscale)
               {
                  DATA32 *lptr, *p1, *p2, *p3, *p4;
                  unsigned int xpos, ypos, xfrac, yfrac, invxfrac, invyfrac;
                  unsigned int xstep, ystep;
                  unsigned int r1, g1, b1, r2, g2, b2;

                  pix = src_data + (src_region_y * src_w) + src_region_x;
                  xstep = (src_region_w << 16) / dst_region_w;
                  ystep = (src_region_h << 16) / dst_region_h;
                  ypos = (dst_clip_y - dst_region_y) * ystep;

                  while (dst_clip_h--)
                    {
                       xpos = (dst_clip_x - dst_region_x) * xstep;
                       lptr = pix + ((ypos >> 16) * src_w);

                       if ((ypos >> 16) < ((unsigned int)src_h - 1))
                         {
                            yfrac = ypos & 0xffff;
                            invyfrac = 0x10000 - yfrac;
                            while (dst_clip_w--)
                              {
                                 p1 = lptr + (xpos >> 16);
                                 p2 = p1 + 1;
                                 p3 = p1 + src_w;
                                 p4 = p3 + 1;
                                 xfrac = xpos & 0xffff;
                                 invxfrac = 0x10000 - xfrac;

                                 if (xfrac > 0)
                                   {
                                      r1 = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                      r2 = ((invxfrac * R_VAL(p3)) + (xfrac * R_VAL(p4))) >> 16;
                                      g1 = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                      g2 = ((invxfrac * G_VAL(p3)) + (xfrac * G_VAL(p4))) >> 16;
                                      b1 = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                                      b2 = ((invxfrac * B_VAL(p3)) + (xfrac * B_VAL(p4))) >> 16;
                                   }
                                 else
                                   {
                                      r1 = R_VAL(p1);
                                      r2 = R_VAL(p3);
                                      g1 = G_VAL(p1);
                                      g2 = G_VAL(p3);
                                      b1 = B_VAL(p1);
                                      b2 = B_VAL(p3);
                                   }

                                 r = ((invyfrac * r1) + (yfrac * r2)) >> 16;
                                 g = ((invyfrac * g1) + (yfrac * g2)) >> 16;
                                 b = ((invyfrac * b1) + (yfrac * b2)) >> 16;

                                 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
                                 xpos += xstep;
                              }
                         }
                       else
                         {
                            while (dst_clip_w--)
                              {
                                 p1 = lptr + (xpos >> 16);
                                 p2 = p1 + 1;
                                 xfrac = xpos & 0xffff;
                                 invxfrac = 0x10000 - xfrac;

                                 if (xfrac > 0)
                                   {
                                      r = ((invxfrac * R_VAL(p1)) + (xfrac * R_VAL(p2))) >> 16;
                                      g = ((invxfrac * G_VAL(p1)) + (xfrac * G_VAL(p2))) >> 16;
                                      b = ((invxfrac * B_VAL(p1)) + (xfrac * B_VAL(p2))) >> 16;
                                   }
                                 else
                                   {
                                      r = R_VAL(p1);
                                      g = G_VAL(p1);
                                      b = B_VAL(p1);
                                   }

                                 *pbuf++ = ARGB_JOIN(0xff, r, g, b);
                                 xpos += xstep;
                              }
                         }
                       if (!mask_ie)
                         func(buf, NULL, mul_col, dptr, w);
                       else
                         {
                            mask = mask_ie->image.data8
                            + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
                            + (dst_clip_x - mask_x);

                            if (mul_col != 0xffffffff)
                              func2(buf, NULL, mul_col, buf, w);
                            func(buf, mask, 0, dptr, w);
                         }
                       y++;

                       pbuf = buf;
                       ypos += ystep;
                       dptr += dst_w;   dst_clip_w = w;
                    }
               }
             else
#endif
               {
                  while (dst_clip_h--)
                    {
                       Cy = *yapp >> 16;
                       yap = *yapp & 0xffff;

                       while (dst_clip_w--)
                         {
                            Cx = *xapp >> 16;
                            xap = *xapp & 0xffff;

                            sptr = *yp + *xp + pos;
                            pix = sptr;
                            sptr += src_w;

                            rx = (R_VAL(pix) * xap) >> 9;
                            gx = (G_VAL(pix) * xap) >> 9;
                            bx = (B_VAL(pix) * xap) >> 9;
                            pix++;
                            for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                              {
                                 rx += (R_VAL(pix) * Cx) >> 9;
                                 gx += (G_VAL(pix) * Cx) >> 9;
                                 bx += (B_VAL(pix) * Cx) >> 9;
                                 pix++;
                              }
                            if (i > 0)
                              {
                                 rx += (R_VAL(pix) * i) >> 9;
                                 gx += (G_VAL(pix) * i) >> 9;
                                 bx += (B_VAL(pix) * i) >> 9;
                              }

                            r = (rx * yap) >> 14;
                            g = (gx * yap) >> 14;
                            b = (bx * yap) >> 14;

                            for (j = (1 << 14) - yap; j > Cy; j -= Cy)
                              {
                                 pix = sptr;
                                 sptr += src_w;
                                 rx = (R_VAL(pix) * xap) >> 9;
                                 gx = (G_VAL(pix) * xap) >> 9;
                                 bx = (B_VAL(pix) * xap) >> 9;
                                 pix++;
                                 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                                   {
                                      rx += (R_VAL(pix) * Cx) >> 9;
                                      gx += (G_VAL(pix) * Cx) >> 9;
                                      bx += (B_VAL(pix) * Cx) >> 9;
                                      pix++;
                                   }
                                 if (i > 0)
                                   {
                                      rx += (R_VAL(pix) * i) >> 9;
                                      gx += (G_VAL(pix) * i) >> 9;
                                      bx += (B_VAL(pix) * i) >> 9;
                                   }

                                 r += (rx * Cy) >> 14;
                                 g += (gx * Cy) >> 14;
                                 b += (bx * Cy) >> 14;
                              }
                            if (j > 0)
                              {
                                 pix = sptr;
                                 sptr += src_w;
                                 rx = (R_VAL(pix) * xap) >> 9;
                                 gx = (G_VAL(pix) * xap) >> 9;
                                 bx = (B_VAL(pix) * xap) >> 9;
                                 pix++;
                                 for (i = (1 << 14) - xap; i > Cx; i -= Cx)
                                   {
                                      rx += (R_VAL(pix) * Cx) >> 9;
                                      gx += (G_VAL(pix) * Cx) >> 9;
                                      bx += (B_VAL(pix) * Cx) >> 9;
                                      pix++;
                                   }
                                 if (i > 0)
                                   {
                                      rx += (R_VAL(pix) * i) >> 9;
                                      gx += (G_VAL(pix) * i) >> 9;
                                      bx += (B_VAL(pix) * i) >> 9;
                                   }

                                 r += (rx * j) >> 14;
                                 g += (gx * j) >> 14;
                                 b += (bx * j) >> 14;
                              }
                            *pbuf++ = ARGB_JOIN(0xff,
                                                ((r + (1 << 4)) >> 5),
                                                ((g + (1 << 4)) >> 5),
                                                ((b + (1 << 4)) >> 5));
                            xp++;  xapp++;
                         }

                       if (!mask_ie)
                         func(buf, NULL, mul_col, dptr, w);
                       else
                         {
                            mask = mask_ie->image.data8
                            + ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
                            + (dst_clip_x - mask_x);

                            if (mul_col != 0xffffffff)
                              func2(buf, NULL, mul_col, buf, w);
                            func(buf, mask, 0, dptr, w);
                         }
                       y++;

                       pbuf = buf;
                       dptr += dst_w;   dst_clip_w = w;
                       xp = xpoints;// + dxx;
                       xapp = xapoints;// + dxx;
                       yp++;  yapp++;
                    }
               }
          }
     }
#else
   /* MMX scaling down would go here */
#endif
}