span lists were a failure. sloweer than the current brute force method - and

slower to boot.

ooh.. found i was fuckign up the memcpy's. fixed :) and much faster too :)


SVN revision: 13103
This commit is contained in:
Carsten Haitzler 2005-01-27 10:05:41 +00:00
parent 2b34d43044
commit 44f0d70286
6 changed files with 51 additions and 130 deletions

View File

@ -204,13 +204,13 @@ evas_common_copy_pixels_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len)
src_ptr = src;
dst_ptr = dst;
dst_end_ptr = dst + len;
dst_end_ptr_pre = dst + ((len / 10) * 10);
dst_end_ptr_pre = dst + ((len / 16) * 16);
while (dst_ptr < dst_end_ptr_pre)
{
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
src_ptr+=10;
dst_ptr+=10;
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
src_ptr+=16;
dst_ptr+=16;
}
while (dst_ptr < dst_end_ptr)
{
@ -310,15 +310,14 @@ evas_common_copy_pixels_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len)
src_ptr = src;
dst_ptr = dst;
dst_end_ptr = dst + len;
dst_end_ptr_pre = dst + ((len / 10) * 10);
dst_end_ptr_pre = dst + ((len / 16) * 16);
while (dst_ptr < dst_end_ptr_pre)
{
prefetch(&src_ptr[128]);
prefetch(&dst_ptr[128]);
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
src_ptr+=10;
dst_ptr+=10;
prefetch(&src_ptr[16]);
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
src_ptr+=16;
dst_ptr+=16;
}
while (dst_ptr < dst_end_ptr)
{
@ -373,21 +372,21 @@ evas_common_copy_pixels_rev_rgba_to_rgba_mmx(DATA32 *src, DATA32 *dst, int len)
{
DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
src_ptr = src + len - 10;
dst_ptr = dst + len - 10;
src_ptr = src + len - 16;
dst_ptr = dst + len - 16;
dst_end_ptr = dst;
dst_end_ptr_pre = dst + len - ((len / 10) * 10);
dst_end_ptr_pre = dst + len - ((len / 16) * 16);
if (len >= 10)
if (len >= 16)
{
while (dst_ptr >= dst_end_ptr_pre)
{
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
src_ptr-=10;
dst_ptr-=10;
MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
src_ptr-=16;
dst_ptr-=16;
}
src_ptr+=9;
dst_ptr+=9;
src_ptr+=15;
dst_ptr+=15;
while (dst_ptr >= dst_end_ptr)
{
*dst_ptr = *src_ptr;
@ -415,23 +414,22 @@ evas_common_copy_pixels_rev_rgba_to_rgba_sse(DATA32 *src, DATA32 *dst, int len)
{
DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
src_ptr = src + len - 10;
dst_ptr = dst + len - 10;
src_ptr = src + len - 16;
dst_ptr = dst + len - 16;
dst_end_ptr = dst;
dst_end_ptr_pre = dst + len - ((len / 10) * 10);
dst_end_ptr_pre = dst + len - ((len / 16) * 16);
if (len >= 10)
if (len >= 16)
{
while (dst_ptr >= dst_end_ptr_pre)
{
prefetch(&src_ptr[-128]);
prefetch(&dst_ptr[-128]);
prefetch(&src_ptr[-16]);
MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
src_ptr-=10;
dst_ptr-=10;
src_ptr-=16;
dst_ptr-=16;
}
src_ptr+=9;
dst_ptr+=9;
src_ptr+=15;
dst_ptr+=15;
while (dst_ptr >= dst_end_ptr)
{
*dst_ptr = *src_ptr;

View File

@ -76,22 +76,13 @@ evas_common_image_shutdown(void)
#endif
}
/* alpha tiles! - asctually span lists - need to do it as span lists */
#if 0
void
evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize)
{
int x, y;
DATA32 *ptr;
#if 1
return;
#endif
/* hmm i only get about a 15% speedup on my "best cases". the complexity
* imho isn't worth the small gain, so i have disabled it here :( (this
* is best case scenario - average case will be much less gain)
*
* thought for now the only case is
*/
if (is->spans) return;
if (!(is->im->flags & RGBA_IMAGE_HAS_ALPHA)) return;
/* FIXME: dont handle alpha only images yet */
@ -149,26 +140,7 @@ evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize)
}
}
}
void
evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is)
{
int i;
if (!is->spans) return;
for (i = 0; i < is->h; i++)
{
while (is->spans[i])
{
RGBA_Image_Span *sp;
sp = is->spans[i];
is->spans[i] = evas_object_list_remove(sp, sp);
free(sp);
}
}
free(is->spans);
}
#endif
RGBA_Surface *
evas_common_image_surface_new(RGBA_Image *im)
@ -219,7 +191,6 @@ evas_common_image_surface_dealloc(RGBA_Surface *is)
free(is->data);
is->data = NULL;
}
evas_common_image_surface_alpha_tiles_free(is);
}
RGBA_Image *
@ -524,7 +495,6 @@ evas_common_image_dirty(RGBA_Image *im)
{
int i;
if (im->image) evas_common_image_surface_alpha_tiles_free(im->image);
evas_common_image_unstore(im);
im->flags |= RGBA_IMAGE_IS_DIRTY;
}

View File

@ -209,8 +209,6 @@ SCALE_FUNC(RGBA_Image *src, RGBA_Image *dst,
* -:-
*
*/
/* 8x8 tiles - this will incurr about a < 2% memory overhead */
evas_common_image_surface_alpha_tiles_calc(src->image, 8);
/* if 1:1 scale */
if ((dst_region_w == src_region_w) &&

View File

@ -32,63 +32,11 @@
Gfx_Func_Blend_Src_Dst func;
func = evas_common_draw_func_blend_get(src, dst, dst_clip_w);
#if 0
/* part of the spans experiemnt. doesnt seem to help much on top of
* what we already have
*/
if (src->image->spans)
for (y = 0; y < dst_clip_h; y++)
{
int x2, y2;
int xoff, woff;
RGBA_Image_Flags pflags;
Gfx_Func_Blend_Src_Dst func_solid;
pflags = src->flags;
src->flags &= ~RGBA_IMAGE_HAS_ALPHA;
func_solid = evas_common_draw_func_blend_get(src, dst, dst_clip_w);
src->flags = pflags;
x2 = (dst_clip_x - dst_region_x) + src_region_x;
y2 = (dst_clip_y - dst_region_y) + src_region_y;
for (y = 0; y < dst_clip_h; y++, y2++)
{
Evas_Object_List *l;
for (l = src->image->spans[y2]; l; l = l->next)
{
RGBA_Image_Span *sp;
sp = l;
if ((sp->x + sp->w) > x2)
{
xoff = sp->x - x2;
woff = sp->w;
if (xoff < 0)
{
woff += xoff;
xoff = 0;
}
if ((xoff + woff) > (dst_clip_w))
woff += (dst_clip_w) - (xoff + woff);
if (sp->v == 2)
func_solid(ptr + xoff, dst_ptr + xoff, woff);
else
func(ptr + xoff, dst_ptr + xoff, woff);
}
}
ptr += src_w;
dst_ptr += dst_w;
}
}
else
#endif
{
for (y = 0; y < dst_clip_h; y++)
{
func(ptr, dst_ptr, dst_clip_w);
ptr += src_w;
dst_ptr += dst_w;
}
func(ptr, dst_ptr, dst_clip_w);
ptr += src_w;
dst_ptr += dst_w;
}
}
}

View File

@ -291,14 +291,6 @@ struct _RGBA_Surface
DATA32 *data;
char no_free : 1;
RGBA_Image *im;
RGBA_Image_Span **spans;
};
struct _RGBA_Image_Span
{
Evas_Object_List _list_data;
int x, w;
int v;
};
struct _RGBA_Image
@ -769,9 +761,6 @@ void evas_common_scale_rgba_in_to_out_clip_sample (RGBA_Image *src, RGBA_Im
/****/
void evas_common_image_init (void);
void evas_common_image_shutdown (void);
void evas_common_image_surface_alpha_tiles_calc(RGBA_Surface *is, int tsize);
void evas_common_image_surface_alpha_tiles_free(RGBA_Surface *is);
RGBA_Surface *evas_common_image_surface_new (RGBA_Image *im);
void evas_common_image_surface_free (RGBA_Surface *is);

View File

@ -573,6 +573,24 @@ typedef union {
: \
: "r" (var) \
);
#define prefetch0(var) \
__asm__ __volatile__ ( \
"prefetcht0 (%0) \n" \
: \
: "r" (var) \
);
#define prefetch1(var) \
__asm__ __volatile__ ( \
"prefetcht1 (%0) \n" \
: \
: "r" (var) \
);
#define prefetch2(var) \
__asm__ __volatile__ ( \
"prefetcht2 (%0) \n" \
: \
: "r" (var) \
);
#define pshufw(r1, r2, imm) \
__asm__ __volatile__ ( \
"pshufw $" #imm ", %" #r1 ", %" #r2 " \n" \