diff options
author | Jean-Philippe Andre <jp.andre@samsung.com> | 2016-03-22 13:49:00 +0900 |
---|---|---|
committer | Jean-Philippe Andre <jp.andre@samsung.com> | 2016-03-22 13:49:05 +0900 |
commit | b82382e958dcab816e52c95b7696cf21583713d5 (patch) | |
tree | 9dfc53f4860fddd1f91090a420e7ee5454226b87 | |
parent | f9e655046868cb83cbc9ac2dcd139e3540e89285 (diff) |
evas: NEON scaling up fixed
Summary:
Previous implementation loaded data from memory first and then checked the borders.
Here I check the borders first as it is for C implementation.
This prevents read of non-accessible memory.
Reviewers: cedric, jypark, Hermet, jiin.moon, jpeg
Reviewed By: jpeg
Projects: #efl
Differential Revision: https://phab.enlightenment.org/D3809
-rw-r--r-- | src/lib/evas/common/evas_scale_smooth_scaler_up.c | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/src/lib/evas/common/evas_scale_smooth_scaler_up.c b/src/lib/evas/common/evas_scale_smooth_scaler_up.c index 5ba7805..f059e8f 100644 --- a/src/lib/evas/common/evas_scale_smooth_scaler_up.c +++ b/src/lib/evas/common/evas_scale_smooth_scaler_up.c | |||
@@ -234,6 +234,7 @@ | |||
234 | uint32x2x2_t vp0, vp1; | 234 | uint32x2x2_t vp0, vp1; |
235 | uint16x8_t vax; | 235 | uint16x8_t vax; |
236 | uint16x8_t vax1; | 236 | uint16x8_t vax1; |
237 | DATA32 pa[2][4]; | ||
237 | #else | 238 | #else |
238 | DATA32 p0, p1, p2, p3; | 239 | DATA32 p0, p1, p2, p3; |
239 | #endif | 240 | #endif |
@@ -242,30 +243,34 @@ | |||
242 | ax = 1 + ((sxx - (sx << 16)) >> 8); | 243 | ax = 1 + ((sxx - (sx << 16)) >> 8); |
243 | p = psrc + sx; q = p + src_w; | 244 | p = psrc + sx; q = p + src_w; |
244 | #ifdef SCALE_USING_NEON | 245 | #ifdef SCALE_USING_NEON |
245 | vax = vdupq_n_u16(ax); | 246 | pa[0][0] = pa[0][1] = pa[0][2] = pa[0][3] = *p; |
246 | vp0.val[0] = vld1_u32(p); | 247 | if ((sx + 1) < srw) |
247 | vp0.val[1] = vld1_u32(q); | 248 | pa[0][1] = *(p + 1); |
248 | if ((sx + 1) >= srw) | 249 | if ((sy + 1) < srh) |
249 | { | 250 | { |
250 | vp0.val[0] = vdup_lane_u32(vp0.val[0], 0); // p0, p1 | 251 | pa[0][2] = *q; pa[0][3] = pa[0][2]; |
251 | vp0.val[1] = vdup_lane_u32(vp0.val[1], 0); // p2, p3 | 252 | if ((sx + 1) < srw) |
253 | pa[0][3] = *(q + 1); | ||
252 | } | 254 | } |
253 | if ((sy + 1) >= srh) | 255 | vax = vdupq_n_u16(ax); |
254 | vp0.val[1] = vdup_lane_u32(vp0.val[0], 0); | 256 | vp0.val[0] = vld1_u32(&pa[0][0]); |
257 | vp0.val[1] = vld1_u32(&pa[0][2]); | ||
255 | sxx += dsxx; | 258 | sxx += dsxx; |
256 | sx = sxx >> 16; | 259 | sx = sxx >> 16; |
257 | ax1 = 1 + ((sxx - (sx << 16)) >> 8); | 260 | ax1 = 1 + ((sxx - (sx << 16)) >> 8); |
258 | vax1 = vdupq_n_u16(ax1); | ||
259 | p1 = psrc + sx; q1 = p1 + src_w; | 261 | p1 = psrc + sx; q1 = p1 + src_w; |
260 | vp1.val[0] = vld1_u32(p1); | 262 | pa[1][0] = pa[1][1] = pa[1][2] = pa[1][3] = *p1; |
261 | vp1.val[1] = vld1_u32(q1); | 263 | if ((sx + 1) < srw) |
262 | if ((sx + 1) >= srw) | 264 | pa[1][1] = *(p1 + 1); |
265 | if ((sy + 1) < srh) | ||
263 | { | 266 | { |
264 | vp1.val[0] = vdup_lane_u32(vp1.val[0], 0); // p4, p5 | 267 | pa[1][2] = *q1; pa[1][3] = pa[1][2]; |
265 | vp1.val[1] = vdup_lane_u32(vp1.val[1], 0); // p6, p7 | 268 | if ((sx + 1) < srw) |
269 | pa[1][3] = *(q1 + 1); | ||
266 | } | 270 | } |
267 | if ((sy + 1) >= srh) | 271 | vax1 = vdupq_n_u16(ax1); |
268 | vp1.val[1] = vdup_lane_u32(vp1.val[0], 0); | 272 | vp1.val[0] = vld1_u32(&pa[1][0]); |
273 | vp1.val[1] = vld1_u32(&pa[1][2]); | ||
269 | #else | 274 | #else |
270 | p0 = p1 = p2 = p3 = *p; | 275 | p0 = p1 = p2 = p3 = *p; |
271 | if ((sx + 1) < srw) | 276 | if ((sx + 1) < srw) |