diff --git a/legacy/evas/src/lib/engines/common/evas_blend_pixel_pixel.c b/legacy/evas/src/lib/engines/common/evas_blend_pixel_pixel.c index c2fb89a2b3..c298760384 100644 --- a/legacy/evas/src/lib/engines/common/evas_blend_pixel_pixel.c +++ b/legacy/evas/src/lib/engines/common/evas_blend_pixel_pixel.c @@ -204,13 +204,13 @@ evas_common_copy_pixels_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len) src_ptr = src; dst_ptr = dst; dst_end_ptr = dst + len; - dst_end_ptr_pre = dst + ((len / 10) * 10); + dst_end_ptr_pre = dst + ((len / 16) * 16); while (dst_ptr < dst_end_ptr_pre) { - MOVE_10DWORDS_MMX(src_ptr, dst_ptr); - src_ptr+=10; - dst_ptr+=10; + MOVE_16DWORDS_MMX(src_ptr, dst_ptr); + src_ptr+=16; + dst_ptr+=16; } while (dst_ptr < dst_end_ptr) { @@ -310,15 +310,14 @@ evas_common_copy_pixels_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len) src_ptr = src; dst_ptr = dst; dst_end_ptr = dst + len; - dst_end_ptr_pre = dst + ((len / 10) * 10); + dst_end_ptr_pre = dst + ((len / 16) * 16); while (dst_ptr < dst_end_ptr_pre) { - prefetch(&src_ptr[128]); - prefetch(&dst_ptr[128]); - MOVE_10DWORDS_MMX(src_ptr, dst_ptr); - src_ptr+=10; - dst_ptr+=10; + prefetch(&src_ptr[16]); + MOVE_16DWORDS_MMX(src_ptr, dst_ptr); + src_ptr+=16; + dst_ptr+=16; } while (dst_ptr < dst_end_ptr) { @@ -373,21 +372,21 @@ evas_common_copy_pixels_rev_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len) { DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre; - src_ptr = src + len - 10; - dst_ptr = dst + len - 10; + src_ptr = src + len - 16; + dst_ptr = dst + len - 16; dst_end_ptr = dst; - dst_end_ptr_pre = dst + len - ((len / 10) * 10); + dst_end_ptr_pre = dst + len - ((len / 16) * 16); - if (len >= 10) + if (len >= 16) { while (dst_ptr >= dst_end_ptr_pre) { - MOVE_10DWORDS_MMX(src_ptr, dst_ptr); - src_ptr-=10; - dst_ptr-=10; + MOVE_16DWORDS_MMX(src_ptr, dst_ptr); + src_ptr-=16; + dst_ptr-=16; } - src_ptr+=9; - dst_ptr+=9; + src_ptr+=15; + dst_ptr+=15; while (dst_ptr >= dst_end_ptr) { *dst_ptr = *src_ptr; @@ -415,23 +414,22 @@ evas_common_copy_pixels_rev_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len) { DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre; - src_ptr = src + len - 10; - dst_ptr = dst + len - 10; + src_ptr = src + len - 16; + dst_ptr = dst + len - 16; dst_end_ptr = dst; - dst_end_ptr_pre = dst + len - ((len / 10) * 10); + dst_end_ptr_pre = dst + len - ((len / 16) * 16); - if (len >= 10) + if (len >= 16) { while (dst_ptr >= dst_end_ptr_pre) { - prefetch(&src_ptr[-128]); - prefetch(&dst_ptr[-128]); + prefetch(&src_ptr[-16]); MOVE_10DWORDS_MMX(src_ptr, dst_ptr); - src_ptr-=10; - dst_ptr-=10; + src_ptr-=16; + dst_ptr-=16; } - src_ptr+=9; - dst_ptr+=9; + src_ptr+=15; + dst_ptr+=15; while (dst_ptr >= dst_end_ptr) { *dst_ptr = *src_ptr; diff --git a/legacy/evas/src/lib/engines/common/evas_image_main.c b/legacy/evas/src/lib/engines/common/evas_image_main.c index 41947a05af..60ad03c792 100644 --- a/legacy/evas/src/lib/engines/common/evas_image_main.c +++ b/legacy/evas/src/lib/engines/common/evas_image_main.c @@ -76,22 +76,13 @@ evas_common_image_shutdown(void) #endif } -/* alpha tiles! - asctually span lists - need to do it as span lists */ +#if 0 void evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize) { int x, y; DATA32 *ptr; -#if 1 - return; -#endif - /* hmm i only get about a 15% speedup on my "best cases". the complexity - * imho isn't worth the small gain, so i have disabled it here :( (this - * is best case scenario - average case will be much less gain) - * - * thought for now the only case is - */ if (is->spans) return; if (!(is->im->flags & RGBA_IMAGE_HAS_ALPHA)) return; /* FIXME: dont handle alpha only images yet */ @@ -149,26 +140,7 @@ evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize) } } } - -void -evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is) -{ - int i; - - if (!is->spans) return; - for (i = 0; i < is->h; i++) - { - while (is->spans[i]) - { - RGBA_Image_Span *sp; - - sp = is->spans[i]; - is->spans[i] = evas_object_list_remove(sp, sp); - free(sp); - } - } - free(is->spans); -} +#endif RGBA_Surface * evas_common_image_surface_new(RGBA_Image *im) @@ -219,7 +191,6 @@ evas_common_image_surface_dealloc(RGBA_Surface *is) free(is->data); is->data = NULL; } - evas_common_image_surface_alpha_tiles_free(is); } RGBA_Image * @@ -524,7 +495,6 @@ evas_common_image_dirty(RGBA_Image *im) { int i; - if (im->image) evas_common_image_surface_alpha_tiles_free(im->image); evas_common_image_unstore(im); im->flags |= RGBA_IMAGE_IS_DIRTY; } diff --git a/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler.c b/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler.c index 3df506613d..557c875d49 100644 --- a/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler.c +++ b/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler.c @@ -209,8 +209,6 @@ SCALE_FUNC(RGBA_Image *src, RGBA_Image *dst, * -:- * */ - /* 8x8 tiles - this will incurr about a < 2% memory overhead */ - evas_common_image_surface_alpha_tiles_calc(src->image, 8); /* if 1:1 scale */ if ((dst_region_w == src_region_w) && diff --git a/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler_noscale.c b/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler_noscale.c index 08b489488e..3b45aebfa0 100644 --- a/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler_noscale.c +++ b/legacy/evas/src/lib/engines/common/evas_scale_smooth_scaler_noscale.c @@ -32,63 +32,11 @@ Gfx_Func_Blend_Src_Dst func; func = evas_common_draw_func_blend_get(src, dst, dst_clip_w); -#if 0 - /* part of the spans experiemnt. doesnt seem to help much on top of - * what we already have - */ - if (src->image->spans) + for (y = 0; y < dst_clip_h; y++) { - int x2, y2; - int xoff, woff; - RGBA_Image_Flags pflags; - Gfx_Func_Blend_Src_Dst func_solid; - - pflags = src->flags; - src->flags &= ~RGBA_IMAGE_HAS_ALPHA; - func_solid = evas_common_draw_func_blend_get(src, dst, dst_clip_w); - src->flags = pflags; - - x2 = (dst_clip_x - dst_region_x) + src_region_x; - y2 = (dst_clip_y - dst_region_y) + src_region_y; - for (y = 0; y < dst_clip_h; y++, y2++) - { - Evas_Object_List *l; - - for (l = src->image->spans[y2]; l; l = l->next) - { - RGBA_Image_Span *sp; - - sp = l; - if ((sp->x + sp->w) > x2) - { - xoff = sp->x - x2; - woff = sp->w; - if (xoff < 0) - { - woff += xoff; - xoff = 0; - } - if ((xoff + woff) > (dst_clip_w)) - woff += (dst_clip_w) - (xoff + woff); - if (sp->v == 2) - func_solid(ptr + xoff, dst_ptr + xoff, woff); - else - func(ptr + xoff, dst_ptr + xoff, woff); - } - } - ptr += src_w; - dst_ptr += dst_w; - } - } - else -#endif - { - for (y = 0; y < dst_clip_h; y++) - { - func(ptr, dst_ptr, dst_clip_w); - ptr += src_w; - dst_ptr += dst_w; - } + func(ptr, dst_ptr, dst_clip_w); + ptr += src_w; + dst_ptr += dst_w; } } } diff --git a/legacy/evas/src/lib/include/evas_common.h b/legacy/evas/src/lib/include/evas_common.h index 14e4108021..699f6d47bc 100644 --- a/legacy/evas/src/lib/include/evas_common.h +++ b/legacy/evas/src/lib/include/evas_common.h @@ -291,14 +291,6 @@ struct _RGBA_Surface DATA32 *data; char no_free : 1; RGBA_Image *im; - RGBA_Image_Span **spans; -}; - -struct _RGBA_Image_Span -{ - Evas_Object_List _list_data; - int x, w; - int v; }; struct _RGBA_Image @@ -769,9 +761,6 @@ void evas_common_scale_rgba_in_to_out_clip_sample (RGBA_Image *src, RGBA_Im /****/ void evas_common_image_init (void); void evas_common_image_shutdown (void); - -void evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize); -void evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is); RGBA_Surface *evas_common_image_surface_new (RGBA_Image *im); void evas_common_image_surface_free (RGBA_Surface *is); diff --git a/legacy/evas/src/lib/include/evas_mmx.h b/legacy/evas/src/lib/include/evas_mmx.h index a0a8652db8..5bb56e766a 100644 --- a/legacy/evas/src/lib/include/evas_mmx.h +++ b/legacy/evas/src/lib/include/evas_mmx.h @@ -573,6 +573,24 @@ typedef union { : \ : "r" (var) \ ); +#define prefetch0(var) \ + __asm__ __volatile__ ( \ + "prefetcht0 (%0) \n" \ + : \ + : "r" (var) \ + ); +#define prefetch1(var) \ + __asm__ __volatile__ ( \ + "prefetcht1 (%0) \n" \ + : \ + : "r" (var) \ + ); +#define prefetch2(var) \ + __asm__ __volatile__ ( \ + "prefetcht2 (%0) \n" \ + : \ + : "r" (var) \ + ); #define pshufw(r1, r2, imm) \ __asm__ __volatile__ ( \ "pshufw $" #imm ", %" #r1 ", %" #r2 " \n" \