Some cleanups in asm code

Drop DO_MMX_ASM and DO_AMD64_ASM compile guards, handled by make.
Whitespace cleanups.
This commit is contained in:
Kim Woelders 2024-04-13 09:52:00 +02:00
parent af97c7c08e
commit e9a89df496
7 changed files with 2184 additions and 2225 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly blending routines for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -51,7 +49,7 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba)
#include "asm_loadimmq.S"
/*\ MMX register use:
|*| %mm1 = Source value
|*| %mm2 = Destination value
@ -95,7 +93,6 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba)
decl %edx ;\
jns 8b
/*\ Unset MMX mode, reset registers, return \*/
#define LEAVE \
9: ;\
@ -281,7 +278,7 @@ PR_(imlib_mmx_copy_rgba_to_rgba):
/*\ Load source, save destination \*/
movq (%esi, %ecx, 4), %mm1
movq %mm1, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -358,7 +355,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -408,7 +405,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -440,7 +437,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgb):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -473,7 +470,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgba):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -511,7 +508,7 @@ PR_(imlib_mmx_add_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -559,7 +556,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -609,7 +606,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -641,7 +638,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgb):
/*\ d = d - s, unsigned saturation, and save \*/
psubusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -683,7 +680,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgba):
/*\ Negate result alphas \*/
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -723,7 +720,7 @@ PR_(imlib_mmx_subtract_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -774,7 +771,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -827,7 +824,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -877,7 +874,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -942,7 +939,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1001,7 +998,7 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm7, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1024,8 +1021,6 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba):
SIZE(imlib_mmx_reshade_copy_rgb_to_rgba)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly blending routines, with colour modding, for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -69,7 +67,7 @@ FN_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod)
FN_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
#include "asm_loadimmq.S"
/*\ MMX register use:
|*| %mm1 = Source value
|*| %mm2 = Destination value
@ -113,7 +111,6 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
decl %edx ;\
jns 8b
/*\ Unset MMX mode, reset registers, return \*/
#define LEAVE \
9: ;\
@ -622,7 +619,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -672,7 +669,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -713,7 +710,7 @@ PR_(imlib_mmx_add_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -763,7 +760,7 @@ PR_(imlib_mmx_add_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -795,7 +792,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgb_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -828,7 +825,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgba_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -860,7 +857,7 @@ PR_(imlib_mmx_add_copy_rgb_to_rgba_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -907,7 +904,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -957,7 +954,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -998,7 +995,7 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1048,7 +1045,7 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1080,7 +1077,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgb_cmod):
/*\ d = d - s, unsigned saturation, and save \*/
psubusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1122,7 +1119,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgba_cmod):
/*\ Negate result alphas \*/
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1161,7 +1158,7 @@ PR_(imlib_mmx_subtract_copy_rgb_to_rgba_cmod):
psubusb %mm1, %mm2
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1213,7 +1210,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1266,7 +1263,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1310,7 +1307,7 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1363,7 +1360,7 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1413,7 +1410,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1478,7 +1475,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1538,7 +1535,7 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1563,8 +1560,6 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod):
SIZE(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly rgba rendering routines for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -81,7 +79,6 @@ FN_(imlib_get_cpuid)
ret
PR_(imlib_mmx_bgr565_fast):
LOAD_IMMQ(mul_bgr565, %mm7) /*\ This constant is the only difference \*/
CLEANUP_IMMQ_LOADS(1)
@ -273,8 +270,6 @@ PR_(imlib_get_cpuid):
SIZE(imlib_get_cpuid)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly rotation routine for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
\*/
@ -197,7 +195,7 @@ PR_(imlib_mmx_RotateAA):
paddw %mm3, %mm5
packuswb %mm5, %mm5
movd %mm5, (%edi, %ecx, 4)
paddd dxh, %mm6
incl %ecx
@ -220,7 +218,7 @@ PR_(imlib_mmx_RotateAA):
decl %eax
sall $12, %eax
movl %eax, sht
movl sow, %ebx
movl src, %edx
.outside_loop_y:
@ -421,7 +419,7 @@ PR_(imlib_mmx_RotateAA):
.outside_il_0:
movl $0, %eax
movl %eax, (%edi, %ecx, 4)
.outside_il_end:
paddd dxh, %mm6
@ -447,8 +445,6 @@ PR_(imlib_mmx_RotateAA):
SIZE(imlib_mmx_RotateAA)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly scaling routine for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
\*/
@ -293,7 +291,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm0
psllw $6, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - My \*/
movl $0x4000, %ebx
subl My, %ebx
@ -307,18 +305,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ i -= Cy; while (i > Cy) \*/
subl Cy, %ebx
2:
cmpl Cy, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ p += sow; v += (*p * i) >> 10 \*/
addl sow_4, %eax
movd (%eax), %mm1
@ -336,7 +334,7 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %eax, %mm3
punpcklwd %mm3, %mm3
punpckldq %mm3, %mm3
/*\ p + 1 \*/
movl %esi, %eax
addl $4, %eax
@ -345,7 +343,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm2
psllw $6, %mm2
pmulhw %mm5, %mm2
/*\ i = 0x4000 - My \*/
movl $0x4000, %ebx
subl My, %ebx
@ -359,13 +357,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm2
/*\ i -= Cy; while (i > Cy) \*/
subl Cy, %ebx
2:
cmpl Cy, %ebx
jg 1b
/*\ p += sow; v += (*p * i) >> 10 \*/
addl sow_4, %eax
movd (%eax), %mm1
@ -425,7 +423,7 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %eax, %mm3
punpcklwd %mm3, %mm3
punpckldq %mm3, %mm3
/*\ x = -dw \*/
movl dw, %ecx
negl %ecx
@ -459,7 +457,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm0
psllw $6, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -473,18 +471,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ p += sow; v += (*p * i) >> 10 \*/
addl $4, %eax
movd (%eax), %mm1
@ -504,7 +502,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm2
psllw $6, %mm2
pmulhw %mm5, %mm2
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -518,13 +516,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm2
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ p += sow; v += (*p * i) >> 10 \*/
addl $4, %eax
movd (%eax), %mm1
@ -604,14 +602,14 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %ebx, %mm5
punpcklwd %mm5, %mm5
punpckldq %mm5, %mm5
/*\ p = sptr; v = (*p * Mx) >> 9 \*/
movl %esi, %eax
movd (%eax), %mm0
punpcklbw %mm7, %mm0
psllw $7, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -625,18 +623,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm1
pmulhw %mm3, %mm1
paddw %mm1, %mm0
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ v += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm1
@ -651,7 +649,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpckldq %mm4, %mm4
psllw $2, %mm0
pmulhw %mm4, %mm0
/*\ j = 0x4000 - My \*/
movl $0x4000, %edx
subl My, %edx
@ -666,7 +664,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm1
psllw $7, %mm1
pmulhw %mm5, %mm1
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -680,13 +678,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm2
pmulhw %mm3, %mm2
paddw %mm2, %mm1
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ vx += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm2
@ -702,13 +700,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $2, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ j -= Cy; while (j > Cy) \*/
subl Cy, %edx
4:
cmpl Cy, %edx
jg 3b
/*\ sptr += sow; p = sptr \*/
addl sow_4, %esi
movl %esi, %eax
@ -717,7 +715,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm1
psllw $7, %mm1
pmulhw %mm5, %mm1
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -731,13 +729,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm2
pmulhw %mm3, %mm2
paddw %mm2, %mm1
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ vx += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm2
@ -788,8 +786,6 @@ PR_(imlib_Scale_mmx_AARGBA):
SIZE(imlib_Scale_mmx_AARGBA)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif