385 lines
12 KiB
C
385 lines
12 KiB
C
{
|
|
int srx = src_region_x, sry = src_region_y;
|
|
int srw = src_region_w, srh = src_region_h;
|
|
int drx = dst_region_x, dry = dst_region_y;
|
|
int drw = dst_region_w, drh = dst_region_h;
|
|
|
|
int dsxx, dsyy, sxx, syy, sx, sy;
|
|
int cx, cy;
|
|
int direct_scale = 0, buf_step = 0;
|
|
|
|
DATA32 *psrc, *pdst, *pdst_end;
|
|
DATA32 *buf, *pbuf, *pbuf_end;
|
|
DATA8 *mask;
|
|
RGBA_Gfx_Func func = NULL, func2 = NULL;
|
|
|
|
/* check value to make overflow(only check value related with overflow) */
|
|
if ((src_region_w > SCALE_SIZE_MAX) ||
|
|
(src_region_h > SCALE_SIZE_MAX)) return;
|
|
|
|
/* a scanline buffer */
|
|
pdst = dst_ptr; // it's been set at (dst_clip_x, dst_clip_y)
|
|
pdst_end = pdst + (dst_clip_h * dst_w);
|
|
if (mul_col == 0xffffffff && !mask_ie)
|
|
{
|
|
if ((render_op == _EVAS_RENDER_BLEND) && !src->cache_entry.flags.alpha)
|
|
{ direct_scale = 1; buf_step = dst->cache_entry.w; }
|
|
else if (render_op == _EVAS_RENDER_COPY)
|
|
{
|
|
direct_scale = 1; buf_step = dst->cache_entry.w;
|
|
if (src->cache_entry.flags.alpha)
|
|
dst->cache_entry.flags.alpha = 1;
|
|
}
|
|
}
|
|
if (!direct_scale)
|
|
{
|
|
buf = alloca(dst_clip_w * sizeof(DATA32));
|
|
if (!mask_ie)
|
|
{
|
|
if (mul_col != 0xffffffff)
|
|
func = evas_common_gfx_func_composite_pixel_color_span_get(src->cache_entry.flags.alpha, src->cache_entry.flags.alpha_sparse, mul_col, dst->cache_entry.flags.alpha, dst_clip_w, render_op);
|
|
else
|
|
func = evas_common_gfx_func_composite_pixel_span_get(src->cache_entry.flags.alpha, src->cache_entry.flags.alpha_sparse, dst->cache_entry.flags.alpha, dst_clip_w, render_op);
|
|
}
|
|
else
|
|
{
|
|
if (mul_col != 0xffffffff)
|
|
{
|
|
func = evas_common_gfx_func_composite_pixel_mask_span_get(src->cache_entry.flags.alpha, src->cache_entry.flags.alpha_sparse, dst->cache_entry.flags.alpha, dst_clip_w, render_op);
|
|
func2 = evas_common_gfx_func_composite_pixel_color_span_get(src->cache_entry.flags.alpha, src->cache_entry.flags.alpha_sparse, mul_col, dst->cache_entry.flags.alpha, dst_clip_w, EVAS_RENDER_COPY);
|
|
}
|
|
else
|
|
func = evas_common_gfx_func_composite_pixel_mask_span_get(src->cache_entry.flags.alpha, src->cache_entry.flags.alpha_sparse, dst->cache_entry.flags.alpha, dst_clip_w, render_op);
|
|
}
|
|
}
|
|
else
|
|
buf = pdst;
|
|
|
|
if ((srw > 1) && (drw > 1))
|
|
dsxx = ((srw - 1) << 16) / (drw - 1);
|
|
else
|
|
dsxx = (srw << 16) / drw;
|
|
if ((srh > 1) && (drh > 1))
|
|
dsyy = ((srh - 1) << 16) / (drh - 1);
|
|
else
|
|
dsyy = (srh << 16) / drh;
|
|
|
|
cx = dst_clip_x - drx;
|
|
cy = dst_clip_y - dry;
|
|
|
|
sxx = (dsxx * cx);
|
|
syy = (dsyy * cy);
|
|
|
|
sy = syy >> 16;
|
|
|
|
if (drh == srh)
|
|
{
|
|
int sxx0 = sxx;
|
|
int y = 0;
|
|
psrc = src->image.data + (src_w * (sry + cy)) + srx;
|
|
while (pdst < pdst_end)
|
|
{
|
|
pbuf = buf; pbuf_end = buf + dst_clip_w;
|
|
sxx = sxx0;
|
|
#ifdef SCALE_USING_MMX
|
|
pxor_r2r(mm0, mm0);
|
|
MOV_A2R(ALPHA_255, mm5)
|
|
#endif
|
|
while (pbuf < pbuf_end)
|
|
{
|
|
DATA32 p0, p1;
|
|
int ax;
|
|
|
|
sx = (sxx >> 16);
|
|
ax = 1 + ((sxx - (sx << 16)) >> 8);
|
|
p0 = p1 = *(psrc + sx);
|
|
if ((sx + 1) < srw)
|
|
p1 = *(psrc + sx + 1);
|
|
#ifdef SCALE_USING_MMX
|
|
MOV_P2R(p0, mm1, mm0)
|
|
if (p0 | p1)
|
|
{
|
|
MOV_A2R(ax, mm3)
|
|
MOV_P2R(p1, mm2, mm0)
|
|
INTERP_256_R2R(mm3, mm2, mm1, mm5)
|
|
}
|
|
MOV_R2P(mm1, *pbuf, mm0)
|
|
pbuf++;
|
|
#else
|
|
if (p0 | p1)
|
|
p0 = INTERP_256(ax, p1, p0);
|
|
*pbuf++ = p0;
|
|
#endif
|
|
sxx += dsxx;
|
|
}
|
|
/* * blend here [clip_w *] buf -> dptr * */
|
|
if (!direct_scale)
|
|
{
|
|
if (!mask_ie)
|
|
func(buf, NULL, mul_col, pdst, dst_clip_w);
|
|
else
|
|
{
|
|
mask = mask_ie->image.data8
|
|
+ ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
|
|
+ (dst_clip_x - mask_x);
|
|
|
|
if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, dst_clip_w);
|
|
func(buf, mask, 0, pdst, dst_clip_w);
|
|
}
|
|
y++;
|
|
}
|
|
|
|
pdst += dst_w;
|
|
psrc += src_w;
|
|
buf += buf_step;
|
|
}
|
|
|
|
goto done_scale_up;
|
|
}
|
|
else if (drw == srw)
|
|
{
|
|
DATA32 *ps = src->image.data + (src_w * sry) + srx + cx;
|
|
int y = 0;
|
|
|
|
while (pdst < pdst_end)
|
|
{
|
|
int ay;
|
|
|
|
sy = syy >> 16;
|
|
psrc = ps + (sy * src_w);
|
|
ay = 1 + ((syy - (sy << 16)) >> 8);
|
|
#ifdef SCALE_USING_MMX
|
|
pxor_r2r(mm0, mm0);
|
|
MOV_A2R(ALPHA_255, mm5)
|
|
MOV_A2R(ay, mm4)
|
|
#endif
|
|
pbuf = buf; pbuf_end = buf + dst_clip_w;
|
|
while (pbuf < pbuf_end)
|
|
{
|
|
DATA32 p0 = *psrc, p2 = p0;
|
|
|
|
if ((sy + 1) < srh)
|
|
p2 = *(psrc + src_w);
|
|
#ifdef SCALE_USING_MMX
|
|
MOV_P2R(p0, mm1, mm0)
|
|
if (p0 | p2)
|
|
{
|
|
MOV_P2R(p2, mm2, mm0)
|
|
INTERP_256_R2R(mm4, mm2, mm1, mm5)
|
|
}
|
|
MOV_R2P(mm1, *pbuf, mm0)
|
|
pbuf++;
|
|
#else
|
|
if (p0 | p2)
|
|
p0 = INTERP_256(ay, p2, p0);
|
|
*pbuf++ = p0;
|
|
#endif
|
|
psrc++;
|
|
}
|
|
/* * blend here [clip_w *] buf -> dptr * */
|
|
if (!direct_scale)
|
|
{
|
|
if (!mask_ie)
|
|
func(buf, NULL, mul_col, pdst, dst_clip_w);
|
|
else
|
|
{
|
|
mask = mask_ie->image.data8
|
|
+ ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
|
|
+ (dst_clip_x - mask_x);
|
|
|
|
if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, dst_clip_w);
|
|
func(buf, mask, 0, pdst, dst_clip_w);
|
|
}
|
|
y++;
|
|
}
|
|
pdst += dst_w;
|
|
syy += dsyy;
|
|
buf += buf_step;
|
|
}
|
|
goto done_scale_up;
|
|
}
|
|
|
|
{
|
|
DATA32 *ps = src->image.data + (src_w * sry) + srx;
|
|
int sxx0 = sxx;
|
|
int y = 0;
|
|
|
|
while (pdst < pdst_end)
|
|
{
|
|
int ay;
|
|
|
|
sy = syy >> 16;
|
|
psrc = ps + (sy * src_w);
|
|
ay = 1 + ((syy - (sy << 16)) >> 8);
|
|
#ifdef SCALE_USING_MMX
|
|
MOV_A2R(ay, mm4)
|
|
pxor_r2r(mm0, mm0);
|
|
MOV_A2R(ALPHA_255, mm5)
|
|
#elif defined SCALE_USING_NEON
|
|
uint16x8_t vay = vdupq_n_u16(ay);
|
|
#endif
|
|
pbuf = buf; pbuf_end = buf + dst_clip_w;
|
|
sxx = sxx0;
|
|
#ifdef SCALE_USING_NEON
|
|
while (pbuf+1 < pbuf_end) // 2 iterations only for NEON
|
|
#else
|
|
while (pbuf < pbuf_end)
|
|
#endif
|
|
{
|
|
int ax;
|
|
DATA32 *p, *q;
|
|
#ifdef SCALE_USING_NEON
|
|
int ax1;
|
|
DATA32 *p1, *q1;
|
|
uint32x2x2_t vp0, vp1;
|
|
uint16x8_t vax;
|
|
uint16x8_t vax1;
|
|
DATA32 pa[2][4];
|
|
#else
|
|
DATA32 p0, p1, p2, p3;
|
|
#endif
|
|
|
|
sx = sxx >> 16;
|
|
ax = 1 + ((sxx - (sx << 16)) >> 8);
|
|
p = psrc + sx; q = p + src_w;
|
|
#ifdef SCALE_USING_NEON
|
|
pa[0][0] = pa[0][1] = pa[0][2] = pa[0][3] = *p;
|
|
if ((sx + 1) < srw)
|
|
pa[0][1] = *(p + 1);
|
|
if ((sy + 1) < srh)
|
|
{
|
|
pa[0][2] = *q; pa[0][3] = pa[0][2];
|
|
if ((sx + 1) < srw)
|
|
pa[0][3] = *(q + 1);
|
|
}
|
|
vax = vdupq_n_u16(ax);
|
|
vp0.val[0] = vld1_u32(&pa[0][0]);
|
|
vp0.val[1] = vld1_u32(&pa[0][2]);
|
|
sxx += dsxx;
|
|
sx = sxx >> 16;
|
|
ax1 = 1 + ((sxx - (sx << 16)) >> 8);
|
|
p1 = psrc + sx; q1 = p1 + src_w;
|
|
pa[1][0] = pa[1][1] = pa[1][2] = pa[1][3] = *p1;
|
|
if ((sx + 1) < srw)
|
|
pa[1][1] = *(p1 + 1);
|
|
if ((sy + 1) < srh)
|
|
{
|
|
pa[1][2] = *q1; pa[1][3] = pa[1][2];
|
|
if ((sx + 1) < srw)
|
|
pa[1][3] = *(q1 + 1);
|
|
}
|
|
vax1 = vdupq_n_u16(ax1);
|
|
vp1.val[0] = vld1_u32(&pa[1][0]);
|
|
vp1.val[1] = vld1_u32(&pa[1][2]);
|
|
#else
|
|
p0 = p1 = p2 = p3 = *p;
|
|
if ((sx + 1) < srw)
|
|
p1 = *(p + 1);
|
|
if ((sy + 1) < srh)
|
|
{
|
|
p2 = *q; p3 = p2;
|
|
if ((sx + 1) < srw)
|
|
p3 = *(q + 1);
|
|
}
|
|
#endif
|
|
#ifdef SCALE_USING_MMX
|
|
MOV_A2R(ax, mm6)
|
|
MOV_P2R(p0, mm1, mm0)
|
|
if (p0 | p1)
|
|
{
|
|
MOV_P2R(p1, mm2, mm0)
|
|
INTERP_256_R2R(mm6, mm2, mm1, mm5)
|
|
}
|
|
MOV_P2R(p2, mm2, mm0)
|
|
if (p2 | p3)
|
|
{
|
|
MOV_P2R(p3, mm3, mm0)
|
|
INTERP_256_R2R(mm6, mm3, mm2, mm5)
|
|
}
|
|
INTERP_256_R2R(mm4, mm2, mm1, mm5)
|
|
MOV_R2P(mm1, *pbuf, mm0)
|
|
pbuf++;
|
|
#elif defined SCALE_USING_NEON
|
|
// (p0, p1), (p2, p3) ==> (p0, p2), (p1, p3)
|
|
vp0 = vzip_u32(vp0.val[0], vp0.val[1]);
|
|
// (p1 - p0, p3 - p2)
|
|
uint16x8_t vtmpq = vsubl_u8(vreinterpret_u8_u32(vp0.val[1]), vreinterpret_u8_u32(vp0.val[0]));
|
|
// p0 + (p1 - p0)*ax, p2 + (p3 - p2)*ax
|
|
vp0.val[0] = vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vp0.val[0]), vshrn_n_u16(vmulq_u16(vtmpq, vax), 8)));
|
|
vp1 = vzip_u32(vp1.val[0], vp1.val[1]);
|
|
vtmpq = vsubl_u8(vreinterpret_u8_u32(vp1.val[1]), vreinterpret_u8_u32(vp1.val[0]));
|
|
vp1.val[0] = vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vp1.val[0]), vshrn_n_u16(vmulq_u16(vtmpq, vax1), 8)));
|
|
// (p0, p2), (p4, p6) ==> (p0, p4), (p2, p6)
|
|
vp0 = vzip_u32(vp0.val[0], vp1.val[0]);
|
|
// (p2 - p0), (p6 - p4)
|
|
vtmpq = vsubl_u8(vreinterpret_u8_u32(vp0.val[1]), vreinterpret_u8_u32(vp0.val[0]));
|
|
// p0 + (p2 - p0)*ay, p4 + (p6 - p4)*ay
|
|
vp0.val[0] = vreinterpret_u32_u8(vadd_u8(vreinterpret_u8_u32(vp0.val[0]), vshrn_n_u16(vmulq_u16(vtmpq, vay), 8)));
|
|
vst1_u32(pbuf, vp0.val[0]);
|
|
pbuf += 2;
|
|
#else
|
|
if (p0 | p1)
|
|
p0 = INTERP_256(ax, p1, p0);
|
|
if (p2 | p3)
|
|
p2 = INTERP_256(ax, p3, p2);
|
|
if (p0 | p2)
|
|
p0 = INTERP_256(ay, p2, p0);
|
|
*pbuf++ = p0;
|
|
#endif
|
|
sxx += dsxx;
|
|
}
|
|
#if defined SCALE_USING_NEON
|
|
if (pbuf < pbuf_end) // For non-even length case
|
|
{
|
|
int ax;
|
|
DATA32 *p, *q;
|
|
DATA32 p0, p1, p2, p3;
|
|
|
|
sx = sxx >> 16;
|
|
ax = 1 + ((sxx - (sx << 16)) >> 8);
|
|
p = psrc + sx; q = p + src_w;
|
|
p0 = p1 = p2 = p3 = *p;
|
|
if ((sx + 1) < srw)
|
|
p1 = *(p + 1);
|
|
if ((sy + 1) < srh)
|
|
{
|
|
p2 = *q; p3 = p2;
|
|
if ((sx + 1) < srw)
|
|
p3 = *(q + 1);
|
|
}
|
|
if (p0 | p1)
|
|
p0 = INTERP_256(ax, p1, p0);
|
|
if (p2 | p3)
|
|
p2 = INTERP_256(ax, p3, p2);
|
|
if (p0 | p2)
|
|
p0 = INTERP_256(ay, p2, p0);
|
|
*pbuf++ = p0;
|
|
sxx += dsxx;
|
|
}
|
|
#endif
|
|
/* * blend here [clip_w *] buf -> dptr * */
|
|
if (!direct_scale)
|
|
{
|
|
if (!mask_ie)
|
|
func(buf, NULL, mul_col, pdst, dst_clip_w);
|
|
else
|
|
{
|
|
mask = mask_ie->image.data8
|
|
+ ((dst_clip_y - mask_y + y) * mask_ie->cache_entry.w)
|
|
+ (dst_clip_x - mask_x);
|
|
|
|
if (mul_col != 0xffffffff) func2(buf, NULL, mul_col, buf, dst_clip_w);
|
|
func(buf, mask, 0, pdst, dst_clip_w);
|
|
}
|
|
y++;
|
|
}
|
|
|
|
pdst += dst_w;
|
|
syy += dsyy;
|
|
buf += buf_step;
|
|
}
|
|
}
|
|
done_scale_up:
|
|
return;
|
|
}
|