Added new shade/tint routines, including some done in MMX assembly, by Willem Monsuwe <willem@stack.nl>. Thanks Willem! These should prove to be faster than the old stuff, probably by quite a bit. SVN revision: 2595eterm-0.10
parent
e6facd5833
commit
dda8ccf31f
7 changed files with 766 additions and 41 deletions
@ -0,0 +1,482 @@ |
||||
/* |
||||
* Copyright (C) 1997-2000, Michael Jennings |
||||
* |
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy |
||||
* of this software and associated documentation files (the "Software"), to |
||||
* deal in the Software without restriction, including without limitation the |
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
||||
* sell copies of the Software, and to permit persons to whom the Software is |
||||
* furnished to do so, subject to the following conditions: |
||||
* |
||||
* The above copyright notice and this permission notice shall be included in |
||||
* all copies of the Software, its documentation and marketing & publicity |
||||
* materials, and acknowledgment shall be given in the documentation, materials |
||||
* and software packages that this Software was used. |
||||
* |
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||||
* THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
||||
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||||
*/ |
||||
|
||||
#include "config.h" |
||||
|
||||
/* MMX routines for tinting XImages written by Willem Monsuwe <willem@stack.nl> */ |
||||
|
||||
/* Function calling conventions: |
||||
* shade_ximage_xx(void *data, int bpl, int w, int h, int rm, int gm, int bm);
|
||||
*/ |
||||
|
||||
#ifdef HAVE_MMX |
||||
|
||||
#define data 8(%ebp) |
||||
#define bpl 12(%ebp) |
||||
#define w 16(%ebp) |
||||
#define h 20(%ebp) |
||||
#define rm 24(%ebp) |
||||
#define gm 28(%ebp) |
||||
#define bm 32(%ebp) |
||||
|
||||
.global shade_ximage_15_mmx
|
||||
.type shade_ximage_15_mmx,@function
|
||||
.global shade_ximage_16_mmx
|
||||
.type shade_ximage_16_mmx,@function
|
||||
.global shade_ximage_32_mmx
|
||||
.type shade_ximage_32_mmx,@function
|
||||
|
||||
.bss |
||||
.text |
||||
.align 8
|
||||
|
||||
#define ENTER \ |
||||
pushl %ebp ;\
|
||||
movl %esp, %ebp ;\
|
||||
pushl %ebx ;\
|
||||
pushl %ecx ;\
|
||||
pushl %edx ;\
|
||||
pushl %edi ;\
|
||||
pushl %esi ;\
|
||||
movl data, %esi ;\
|
||||
movl w, %ebx ;\
|
||||
movl h, %edx |
||||
|
||||
#define LEAVE \ |
||||
4: ;\
|
||||
emms ;\
|
||||
popl %esi ;\
|
||||
popl %edi ;\
|
||||
popl %edx ;\
|
||||
popl %ecx ;\
|
||||
popl %ebx ;\
|
||||
movl %ebp, %esp ;\
|
||||
popl %ebp ;\
|
||||
ret |
||||
|
||||
|
||||
shade_ximage_15_mmx: |
||||
ENTER |
||||
|
||||
leal -6(%esi, %ebx, 2), %esi |
||||
negl %ebx |
||||
jz 5f |
||||
|
||||
/* Setup multipliers */ |
||||
movd rm, %mm5 |
||||
movd gm, %mm6 |
||||
movd bm, %mm7 |
||||
punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ |
||||
punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ |
||||
punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ |
||||
punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ |
||||
punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ |
||||
punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ |
||||
|
||||
cmpl $256, rm |
||||
jg shade_ximage_15_mmx_saturate |
||||
cmpl $256, gm |
||||
jg shade_ximage_15_mmx_saturate |
||||
cmpl $256, bm |
||||
jg shade_ximage_15_mmx_saturate |
||||
|
||||
1: movl %ebx, %ecx |
||||
addl $3, %ecx |
||||
jns 3f |
||||
2: |
||||
movq (%esi, %ecx, 2), %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $10, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $11, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $3, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* 00 0r */ |
||||
pmulhw %mm6, %mm1 /* 00 0g */ |
||||
pmulhw %mm7, %mm2 /* 00 0b */ |
||||
|
||||
psllw $10, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movq %mm0, (%esi, %ecx, 2) |
||||
|
||||
addl $4, %ecx |
||||
js 2b |
||||
jz 4f |
||||
3: |
||||
movw (%esi, %ecx, 2), %ax |
||||
movd %eax, %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $10, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $11, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $3, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* 00 0r */ |
||||
pmulhw %mm6, %mm1 /* 00 0g */ |
||||
pmulhw %mm7, %mm2 /* 00 0b */ |
||||
|
||||
psllw $10, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movd %mm0, %eax |
||||
movw %ax, (%esi, %ecx, 2) |
||||
|
||||
incl %ecx |
||||
cmpl $2, %ecx |
||||
jng 3b |
||||
4: |
||||
addl bpl, %esi |
||||
decl %edx |
||||
jnz 1b |
||||
5: |
||||
LEAVE |
||||
|
||||
|
||||
shade_ximage_15_mmx_saturate: |
||||
|
||||
pcmpeqw %mm3, %mm3 |
||||
psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ |
||||
|
||||
1: movl %ebx, %ecx |
||||
addl $3, %ecx |
||||
jns 3f |
||||
2: |
||||
movq (%esi, %ecx, 2), %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $10, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $11, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $3, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* xx xr */ |
||||
pmulhw %mm6, %mm1 /* xx xg */ |
||||
pmulhw %mm7, %mm2 /* xx xb */ |
||||
|
||||
/* Saturate upper */ |
||||
paddusw %mm3, %mm0 /* ff er */ |
||||
paddusw %mm3, %mm1 /* ff eg */ |
||||
paddusw %mm3, %mm2 /* ff eb */ |
||||
|
||||
psubw %mm3, %mm1 /* 00 0g */ |
||||
psubw %mm3, %mm2 /* 00 0b */ |
||||
|
||||
psllw $10, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movq %mm0, (%esi, %ecx, 2) |
||||
|
||||
addl $4, %ecx |
||||
js 2b |
||||
jz 4f |
||||
3: |
||||
movw (%esi, %ecx, 2), %ax |
||||
movd %eax, %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $10, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $11, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $3, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* xx xr */ |
||||
pmulhw %mm6, %mm1 /* xx xg */ |
||||
pmulhw %mm7, %mm2 /* xx xb */ |
||||
|
||||
/* Saturate upper */ |
||||
paddusw %mm3, %mm0 /* ff er */ |
||||
paddusw %mm3, %mm1 /* ff eg */ |
||||
paddusw %mm3, %mm2 /* ff eb */ |
||||
|
||||
psubw %mm3, %mm1 /* 00 0g */ |
||||
psubw %mm3, %mm2 /* 00 0b */ |
||||
|
||||
psllw $10, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movd %mm0, %eax |
||||
movw %ax, (%esi, %ecx, 2) |
||||
|
||||
incl %ecx |
||||
cmpl $2, %ecx |
||||
jng 3b |
||||
4: |
||||
addl bpl, %esi |
||||
decl %edx |
||||
jnz 1b |
||||
5: |
||||
LEAVE |
||||
|
||||
|
||||
shade_ximage_16_mmx: |
||||
ENTER |
||||
|
||||
leal -6(%esi, %ebx, 2), %esi |
||||
negl %ebx |
||||
jz 5f |
||||
|
||||
/* Setup multipliers */ |
||||
movd rm, %mm5 |
||||
movd gm, %mm6 |
||||
movd bm, %mm7 |
||||
punpcklwd %mm5, %mm5 /* 00 00 00 00 rm rm rm rm */ |
||||
punpcklwd %mm6, %mm6 /* 00 00 00 00 gm gm gm gm */ |
||||
punpcklwd %mm7, %mm7 /* 00 00 00 00 bm bm bm bm */ |
||||
punpckldq %mm5, %mm5 /* rm rm rm rm rm rm rm rm */ |
||||
punpckldq %mm6, %mm6 /* gm gm gm gm gm gm gm gm */ |
||||
punpckldq %mm7, %mm7 /* bm bm bm bm bm bm bm bm */ |
||||
|
||||
cmpl $256, rm |
||||
jg shade_ximage_16_mmx_saturate |
||||
cmpl $256, gm |
||||
jg shade_ximage_16_mmx_saturate |
||||
cmpl $256, bm |
||||
jg shade_ximage_16_mmx_saturate |
||||
|
||||
1: movl %ebx, %ecx |
||||
addl $3, %ecx |
||||
jns 3f |
||||
2: |
||||
movq (%esi, %ecx, 2), %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $11, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $10, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $2, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* 00 0r */ |
||||
pmulhw %mm6, %mm1 /* 00 0g */ |
||||
pmulhw %mm7, %mm2 /* 00 0b */ |
||||
|
||||
psllw $11, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movq %mm0, (%esi, %ecx, 2) |
||||
|
||||
addl $4, %ecx |
||||
js 2b |
||||
jz 4f |
||||
3: |
||||
movw (%esi, %ecx, 2), %ax |
||||
movd %eax, %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $11, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $10, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $2, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* 00 0r */ |
||||
pmulhw %mm6, %mm1 /* 00 0g */ |
||||
pmulhw %mm7, %mm2 /* 00 0b */ |
||||
|
||||
psllw $11, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movd %mm0, %eax |
||||
movw %ax, (%esi, %ecx, 2) |
||||
|
||||
incl %ecx |
||||
cmpl $2, %ecx |
||||
jng 3b |
||||
4: |
||||
addl bpl, %esi |
||||
decl %edx |
||||
jnz 1b |
||||
5: |
||||
LEAVE |
||||
|
||||
|
||||
shade_ximage_16_mmx_saturate: |
||||
|
||||
pcmpeqw %mm3, %mm3 |
||||
movq %mm3, %mm4 |
||||
psllw $5, %mm3 /* ff e0 ff e0 ff e0 ff e0 */ |
||||
psllw $6, %mm4 /* ff c0 ff c0 ff c0 ff c0 */ |
||||
|
||||
1: movl %ebx, %ecx |
||||
addl $3, %ecx |
||||
jns 3f |
||||
2: |
||||
movq (%esi, %ecx, 2), %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $11, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $10, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $2, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* xx xr */ |
||||
pmulhw %mm6, %mm1 /* xx xg */ |
||||
pmulhw %mm7, %mm2 /* xx xb */ |
||||
|
||||
/* Saturate upper */ |
||||
paddusw %mm3, %mm0 /* ff er */ |
||||
paddusw %mm4, %mm1 /* ff cg */ |
||||
paddusw %mm3, %mm2 /* ff eb */ |
||||
|
||||
psubw %mm4, %mm1 /* 00 0g */ |
||||
psubw %mm3, %mm2 /* 00 0b */ |
||||
|
||||
psllw $11, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movq %mm0, (%esi, %ecx, 2) |
||||
|
||||
addl $4, %ecx |
||||
js 2b |
||||
jz 4f |
||||
3: |
||||
movw (%esi, %ecx, 2), %ax |
||||
movd %eax, %mm0 |
||||
|
||||
movq %mm0, %mm1 /* rg gb */ |
||||
movq %mm0, %mm2 /* rg gb */ |
||||
psrlw $5, %mm1 /* 0r rg */ |
||||
psrlw $11, %mm0 /* 00 0r */ |
||||
psllw $11, %mm2 /* b0 00 */ |
||||
psllw $10, %mm1 /* g0 00 */ |
||||
psllw $8, %mm0 /* 0r 00 */ |
||||
psrlw $2, %mm1 /* 0g 00 */ |
||||
psrlw $3, %mm2 /* 0b 00 */ |
||||
|
||||
pmulhw %mm5, %mm0 /* xx xr */ |
||||
pmulhw %mm6, %mm1 /* xx xg */ |
||||
pmulhw %mm7, %mm2 /* xx xb */ |
||||
|
||||
/* Saturate upper */ |
||||
paddusw %mm3, %mm0 /* ff er */ |
||||
paddusw %mm4, %mm1 /* ff cg */ |
||||
paddusw %mm3, %mm2 /* ff eb */ |
||||
|
||||
psubw %mm4, %mm1 /* 00 0g */ |
||||
psubw %mm3, %mm2 /* 00 0b */ |
||||
|
||||
psllw $11, %mm0 /* r0 00 */ |
||||
psllw $5, %mm1 /* 0g g0 */ |
||||
por %mm2, %mm0 /* r0 0b */ |
||||
por %mm1, %mm0 /* rg gb */ |
||||
|
||||
movd %mm0, %eax |
||||
movw %ax, (%esi, %ecx, 2) |
||||
|
||||
incl %ecx |
||||
cmpl $2, %ecx |
||||
jng 3b |
||||
4: |
||||
addl bpl, %esi |
||||
decl %edx |
||||
jnz 1b |
||||
5: |
||||
LEAVE |
||||
|
||||
|
||||
shade_ximage_32_mmx: |
||||
ENTER |
||||
|
||||
leal (%esi, %ebx, 4), %esi |
||||
negl %ebx |
||||
jz 3f |
||||
|
||||
movd rm, %mm4 |
||||
movd gm, %mm5 |
||||
movd bm, %mm6 |
||||
psllq $32, %mm4 |
||||
psllq $16, %mm5 |
||||
por %mm6, %mm4 |
||||
por %mm5, %mm4 |
||||
|
||||
pcmpeqw %mm6, %mm6 |
||||
psllw $15, %mm6 /* 80 00 80 00 80 00 80 00 */ |
||||
movq %mm6, %mm5 |
||||
pmulhw %mm4, %mm5 /* Get correction factor */ |
||||
1: |
||||
movl %ebx, %ecx |
||||
2: |
||||
movd (%esi, %ecx, 4), %mm1 /* 00 rr gg bb */ |
||||
pxor %mm0, %mm0 |
||||
punpcklbw %mm1, %mm0 /* 00 00 rr 00 gg 00 bb 00 */ |
||||
pxor %mm6, %mm0 /* Flip sign */ |
||||
|
||||
pmulhw %mm4, %mm0 /* 00 00 xx rr xx gg xx bb */ |
||||
psubw %mm5, %mm0 /* Correct range */ |
||||
packuswb %mm0, %mm0 /* 00 rr gg bb 00 rr gg bb */ |
||||
|
||||
movd %mm0, (%esi, %ecx, 4) |
||||
|
||||
incl %ecx |
||||
jnz 2b |
||||
|
||||
addl bpl, %esi |
||||
decl %edx |
||||
jnz 1b |
||||
3: |
||||
LEAVE |
||||
|
||||
#endif /* HAVE_MMX */ |
Loading…
Reference in new issue