forked from enlightenment/efl
span lists were a failure. sloweer than the current brute force method - and
slower to boot. ooh.. found i was fuckign up the memcpy's. fixed :) and much faster too :) SVN revision: 13103
This commit is contained in:
parent
2b34d43044
commit
44f0d70286
|
@ -204,13 +204,13 @@ evas_common_copy_pixels_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len)
|
|||
src_ptr = src;
|
||||
dst_ptr = dst;
|
||||
dst_end_ptr = dst + len;
|
||||
dst_end_ptr_pre = dst + ((len / 10) * 10);
|
||||
dst_end_ptr_pre = dst + ((len / 16) * 16);
|
||||
|
||||
while (dst_ptr < dst_end_ptr_pre)
|
||||
{
|
||||
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr+=10;
|
||||
dst_ptr+=10;
|
||||
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr+=16;
|
||||
dst_ptr+=16;
|
||||
}
|
||||
while (dst_ptr < dst_end_ptr)
|
||||
{
|
||||
|
@ -310,15 +310,14 @@ evas_common_copy_pixels_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len)
|
|||
src_ptr = src;
|
||||
dst_ptr = dst;
|
||||
dst_end_ptr = dst + len;
|
||||
dst_end_ptr_pre = dst + ((len / 10) * 10);
|
||||
dst_end_ptr_pre = dst + ((len / 16) * 16);
|
||||
|
||||
while (dst_ptr < dst_end_ptr_pre)
|
||||
{
|
||||
prefetch(&src_ptr[128]);
|
||||
prefetch(&dst_ptr[128]);
|
||||
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr+=10;
|
||||
dst_ptr+=10;
|
||||
prefetch(&src_ptr[16]);
|
||||
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr+=16;
|
||||
dst_ptr+=16;
|
||||
}
|
||||
while (dst_ptr < dst_end_ptr)
|
||||
{
|
||||
|
@ -373,21 +372,21 @@ evas_common_copy_pixels_rev_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len)
|
|||
{
|
||||
DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
|
||||
|
||||
src_ptr = src + len - 10;
|
||||
dst_ptr = dst + len - 10;
|
||||
src_ptr = src + len - 16;
|
||||
dst_ptr = dst + len - 16;
|
||||
dst_end_ptr = dst;
|
||||
dst_end_ptr_pre = dst + len - ((len / 10) * 10);
|
||||
dst_end_ptr_pre = dst + len - ((len / 16) * 16);
|
||||
|
||||
if (len >= 10)
|
||||
if (len >= 16)
|
||||
{
|
||||
while (dst_ptr >= dst_end_ptr_pre)
|
||||
{
|
||||
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr-=10;
|
||||
dst_ptr-=10;
|
||||
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr-=16;
|
||||
dst_ptr-=16;
|
||||
}
|
||||
src_ptr+=9;
|
||||
dst_ptr+=9;
|
||||
src_ptr+=15;
|
||||
dst_ptr+=15;
|
||||
while (dst_ptr >= dst_end_ptr)
|
||||
{
|
||||
*dst_ptr = *src_ptr;
|
||||
|
@ -415,23 +414,22 @@ evas_common_copy_pixels_rev_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len)
|
|||
{
|
||||
DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
|
||||
|
||||
src_ptr = src + len - 10;
|
||||
dst_ptr = dst + len - 10;
|
||||
src_ptr = src + len - 16;
|
||||
dst_ptr = dst + len - 16;
|
||||
dst_end_ptr = dst;
|
||||
dst_end_ptr_pre = dst + len - ((len / 10) * 10);
|
||||
dst_end_ptr_pre = dst + len - ((len / 16) * 16);
|
||||
|
||||
if (len >= 10)
|
||||
if (len >= 16)
|
||||
{
|
||||
while (dst_ptr >= dst_end_ptr_pre)
|
||||
{
|
||||
prefetch(&src_ptr[-128]);
|
||||
prefetch(&dst_ptr[-128]);
|
||||
prefetch(&src_ptr[-16]);
|
||||
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
|
||||
src_ptr-=10;
|
||||
dst_ptr-=10;
|
||||
src_ptr-=16;
|
||||
dst_ptr-=16;
|
||||
}
|
||||
src_ptr+=9;
|
||||
dst_ptr+=9;
|
||||
src_ptr+=15;
|
||||
dst_ptr+=15;
|
||||
while (dst_ptr >= dst_end_ptr)
|
||||
{
|
||||
*dst_ptr = *src_ptr;
|
||||
|
|
|
@ -76,22 +76,13 @@ evas_common_image_shutdown(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* alpha tiles! - asctually span lists - need to do it as span lists */
|
||||
#if 0
|
||||
void
|
||||
evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize)
|
||||
{
|
||||
int x, y;
|
||||
DATA32 *ptr;
|
||||
|
||||
#if 1
|
||||
return;
|
||||
#endif
|
||||
/* hmm i only get about a 15% speedup on my "best cases". the complexity
|
||||
* imho isn't worth the small gain, so i have disabled it here :( (this
|
||||
* is best case scenario - average case will be much less gain)
|
||||
*
|
||||
* thought for now the only case is
|
||||
*/
|
||||
if (is->spans) return;
|
||||
if (!(is->im->flags & RGBA_IMAGE_HAS_ALPHA)) return;
|
||||
/* FIXME: dont handle alpha only images yet */
|
||||
|
@ -149,26 +140,7 @@ evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!is->spans) return;
|
||||
for (i = 0; i < is->h; i++)
|
||||
{
|
||||
while (is->spans[i])
|
||||
{
|
||||
RGBA_Image_Span *sp;
|
||||
|
||||
sp = is->spans[i];
|
||||
is->spans[i] = evas_object_list_remove(sp, sp);
|
||||
free(sp);
|
||||
}
|
||||
}
|
||||
free(is->spans);
|
||||
}
|
||||
#endif
|
||||
|
||||
RGBA_Surface *
|
||||
evas_common_image_surface_new(RGBA_Image *im)
|
||||
|
@ -219,7 +191,6 @@ evas_common_image_surface_dealloc(RGBA_Surface *is)
|
|||
free(is->data);
|
||||
is->data = NULL;
|
||||
}
|
||||
evas_common_image_surface_alpha_tiles_free(is);
|
||||
}
|
||||
|
||||
RGBA_Image *
|
||||
|
@ -524,7 +495,6 @@ evas_common_image_dirty(RGBA_Image *im)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (im->image) evas_common_image_surface_alpha_tiles_free(im->image);
|
||||
evas_common_image_unstore(im);
|
||||
im->flags |= RGBA_IMAGE_IS_DIRTY;
|
||||
}
|
||||
|
|
|
@ -209,8 +209,6 @@ SCALE_FUNC(RGBA_Image *src, RGBA_Image *dst,
|
|||
* -:-
|
||||
*
|
||||
*/
|
||||
/* 8x8 tiles - this will incurr about a < 2% memory overhead */
|
||||
evas_common_image_surface_alpha_tiles_calc(src->image, 8);
|
||||
|
||||
/* if 1:1 scale */
|
||||
if ((dst_region_w == src_region_w) &&
|
||||
|
|
|
@ -32,63 +32,11 @@
|
|||
Gfx_Func_Blend_Src_Dst func;
|
||||
|
||||
func = evas_common_draw_func_blend_get(src, dst, dst_clip_w);
|
||||
#if 0
|
||||
/* part of the spans experiemnt. doesnt seem to help much on top of
|
||||
* what we already have
|
||||
*/
|
||||
if (src->image->spans)
|
||||
for (y = 0; y < dst_clip_h; y++)
|
||||
{
|
||||
int x2, y2;
|
||||
int xoff, woff;
|
||||
RGBA_Image_Flags pflags;
|
||||
Gfx_Func_Blend_Src_Dst func_solid;
|
||||
|
||||
pflags = src->flags;
|
||||
src->flags &= ~RGBA_IMAGE_HAS_ALPHA;
|
||||
func_solid = evas_common_draw_func_blend_get(src, dst, dst_clip_w);
|
||||
src->flags = pflags;
|
||||
|
||||
x2 = (dst_clip_x - dst_region_x) + src_region_x;
|
||||
y2 = (dst_clip_y - dst_region_y) + src_region_y;
|
||||
for (y = 0; y < dst_clip_h; y++, y2++)
|
||||
{
|
||||
Evas_Object_List *l;
|
||||
|
||||
for (l = src->image->spans[y2]; l; l = l->next)
|
||||
{
|
||||
RGBA_Image_Span *sp;
|
||||
|
||||
sp = l;
|
||||
if ((sp->x + sp->w) > x2)
|
||||
{
|
||||
xoff = sp->x - x2;
|
||||
woff = sp->w;
|
||||
if (xoff < 0)
|
||||
{
|
||||
woff += xoff;
|
||||
xoff = 0;
|
||||
}
|
||||
if ((xoff + woff) > (dst_clip_w))
|
||||
woff += (dst_clip_w) - (xoff + woff);
|
||||
if (sp->v == 2)
|
||||
func_solid(ptr + xoff, dst_ptr + xoff, woff);
|
||||
else
|
||||
func(ptr + xoff, dst_ptr + xoff, woff);
|
||||
}
|
||||
}
|
||||
ptr += src_w;
|
||||
dst_ptr += dst_w;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
for (y = 0; y < dst_clip_h; y++)
|
||||
{
|
||||
func(ptr, dst_ptr, dst_clip_w);
|
||||
ptr += src_w;
|
||||
dst_ptr += dst_w;
|
||||
}
|
||||
func(ptr, dst_ptr, dst_clip_w);
|
||||
ptr += src_w;
|
||||
dst_ptr += dst_w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -291,14 +291,6 @@ struct _RGBA_Surface
|
|||
DATA32 *data;
|
||||
char no_free : 1;
|
||||
RGBA_Image *im;
|
||||
RGBA_Image_Span **spans;
|
||||
};
|
||||
|
||||
struct _RGBA_Image_Span
|
||||
{
|
||||
Evas_Object_List _list_data;
|
||||
int x, w;
|
||||
int v;
|
||||
};
|
||||
|
||||
struct _RGBA_Image
|
||||
|
@ -770,9 +762,6 @@ void evas_common_scale_rgba_in_to_out_clip_sample (RGBA_Image *src, RGBA_Im
|
|||
void evas_common_image_init (void);
|
||||
void evas_common_image_shutdown (void);
|
||||
|
||||
void evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize);
|
||||
void evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is);
|
||||
|
||||
RGBA_Surface *evas_common_image_surface_new (RGBA_Image *im);
|
||||
void evas_common_image_surface_free (RGBA_Surface *is);
|
||||
void evas_common_image_surface_alloc (RGBA_Surface *is);
|
||||
|
|
|
@ -573,6 +573,24 @@ typedef union {
|
|||
: \
|
||||
: "r" (var) \
|
||||
);
|
||||
#define prefetch0(var) \
|
||||
__asm__ __volatile__ ( \
|
||||
"prefetcht0 (%0) \n" \
|
||||
: \
|
||||
: "r" (var) \
|
||||
);
|
||||
#define prefetch1(var) \
|
||||
__asm__ __volatile__ ( \
|
||||
"prefetcht1 (%0) \n" \
|
||||
: \
|
||||
: "r" (var) \
|
||||
);
|
||||
#define prefetch2(var) \
|
||||
__asm__ __volatile__ ( \
|
||||
"prefetcht2 (%0) \n" \
|
||||
: \
|
||||
: "r" (var) \
|
||||
);
|
||||
#define pshufw(r1, r2, imm) \
|
||||
__asm__ __volatile__ ( \
|
||||
"pshufw $" #imm ", %" #r1 ", %" #r2 " \n" \
|
||||
|
|
Loading…
Reference in New Issue