Compare commits

...

2 Commits

Author SHA1 Message Date
Kim Woelders 0f378ef87f Add endbr32/64 instruction at the start of asm functions
Only do so when compiling with -fcf-protection (=> __CET__ is defined).

Inspired by:
004ff08738

Comment:
  Fixes SIGILL in tests on 11th gen intel (IBT)

#23
2024-04-13 15:36:12 +02:00
Kim Woelders e9a89df496 Some cleanups in asm code
Drop DO_MMX_ASM and DO_AMD64_ASM compile guards, handled by make.
Whitespace cleanups.
2024-04-13 09:59:07 +02:00
8 changed files with 2199 additions and 2225 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -17,4 +17,14 @@
.size PR_(sym),.-PR_(sym); \
.align 8;
#ifdef __CET__
#if defined(DO_MMX_ASM)
#define ENDBR_ endbr32
#elif defined(DO_AMD64_ASM)
#define ENDBR_ endbr64
#endif
#else
#define ENDBR_
#endif
#endif /* __ASM_H */

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly blending routines for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -51,7 +49,7 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba)
#include "asm_loadimmq.S"
/*\ MMX register use:
|*| %mm1 = Source value
|*| %mm2 = Destination value
@ -63,6 +61,7 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba)
/*\ Common code \*/
/*\ Set MMX mode, save registers, load common parameters \*/
#define ENTER \
ENDBR_ ;\
pushl %ebp ;\
movl %esp, %ebp ;\
pushl %ebx ;\
@ -95,7 +94,6 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba)
decl %edx ;\
jns 8b
/*\ Unset MMX mode, reset registers, return \*/
#define LEAVE \
9: ;\
@ -281,7 +279,7 @@ PR_(imlib_mmx_copy_rgba_to_rgba):
/*\ Load source, save destination \*/
movq (%esi, %ecx, 4), %mm1
movq %mm1, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -358,7 +356,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -408,7 +406,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -440,7 +438,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgb):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -473,7 +471,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgba):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -511,7 +509,7 @@ PR_(imlib_mmx_add_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -559,7 +557,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -609,7 +607,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -641,7 +639,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgb):
/*\ d = d - s, unsigned saturation, and save \*/
psubusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -683,7 +681,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgba):
/*\ Negate result alphas \*/
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -723,7 +721,7 @@ PR_(imlib_mmx_subtract_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -774,7 +772,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -827,7 +825,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -877,7 +875,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -942,7 +940,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1001,7 +999,7 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba):
/*\ Make result alpha 0xff \*/
por %mm7, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1024,8 +1022,6 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba):
SIZE(imlib_mmx_reshade_copy_rgb_to_rgba)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly blending routines, with colour modding, for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -69,7 +67,7 @@ FN_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod)
FN_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
#include "asm_loadimmq.S"
/*\ MMX register use:
|*| %mm1 = Source value
|*| %mm2 = Destination value
@ -81,6 +79,7 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
/*\ Common code \*/
/*\ Set MMX mode, save registers, load common parameters \*/
#define ENTER \
ENDBR_ ;\
pushl %ebp ;\
movl %esp, %ebp ;\
pushl %ebx ;\
@ -113,7 +112,6 @@ FN_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
decl %edx ;\
jns 8b
/*\ Unset MMX mode, reset registers, return \*/
#define LEAVE \
9: ;\
@ -622,7 +620,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -672,7 +670,7 @@ PR_(imlib_mmx_add_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -713,7 +711,7 @@ PR_(imlib_mmx_add_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -763,7 +761,7 @@ PR_(imlib_mmx_add_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -795,7 +793,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgb_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -828,7 +826,7 @@ PR_(imlib_mmx_add_copy_rgba_to_rgba_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -860,7 +858,7 @@ PR_(imlib_mmx_add_copy_rgb_to_rgba_cmod):
/*\ d = d + s, unsigned saturation, and save \*/
paddusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -907,7 +905,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -957,7 +955,7 @@ PR_(imlib_mmx_subtract_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -998,7 +996,7 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1048,7 +1046,7 @@ PR_(imlib_mmx_subtract_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1080,7 +1078,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgb_cmod):
/*\ d = d - s, unsigned saturation, and save \*/
psubusb %mm1, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1122,7 +1120,7 @@ PR_(imlib_mmx_subtract_copy_rgba_to_rgba_cmod):
/*\ Negate result alphas \*/
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1161,7 +1159,7 @@ PR_(imlib_mmx_subtract_copy_rgb_to_rgba_cmod):
psubusb %mm1, %mm2
pxor %mm5, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1213,7 +1211,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1266,7 +1264,7 @@ PR_(imlib_mmx_reshade_blend_rgba_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1310,7 +1308,7 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgb_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1363,7 +1361,7 @@ PR_(imlib_mmx_reshade_blend_rgb_to_rgba_cmod):
/*\ Pack into lower 4 bytes and save \*/
packuswb %mm4, %mm2
movd %mm2, (%edi, %ecx, 4)
incl %ecx
js 1b
@ -1413,7 +1411,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgb_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1478,7 +1476,7 @@ PR_(imlib_mmx_reshade_copy_rgba_to_rgba_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1538,7 +1536,7 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod):
paddusb %mm1, %mm2
psubusb %mm3, %mm2
movq %mm2, (%edi, %ecx, 4)
addl $2, %ecx
js 1b
jnz 3f
@ -1563,8 +1561,6 @@ PR_(imlib_mmx_reshade_copy_rgb_to_rgba_cmod):
SIZE(imlib_mmx_reshade_copy_rgb_to_rgba_cmod)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly rgba rendering routines for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
@ -38,6 +36,7 @@ FN_(imlib_get_cpuid)
/*\ Common code \*/
/*\ Save registers, load common parameters \*/
#define ENTER \
ENDBR_; \
pushl %ebp; \
movl %esp, %ebp; \
pushl %ebx; \
@ -81,7 +80,6 @@ FN_(imlib_get_cpuid)
ret
PR_(imlib_mmx_bgr565_fast):
LOAD_IMMQ(mul_bgr565, %mm7) /*\ This constant is the only difference \*/
CLEANUP_IMMQ_LOADS(1)
@ -273,8 +271,6 @@ PR_(imlib_get_cpuid):
SIZE(imlib_get_cpuid)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly rotation routine for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
\*/
@ -197,7 +195,7 @@ PR_(imlib_mmx_RotateAA):
paddw %mm3, %mm5
packuswb %mm5, %mm5
movd %mm5, (%edi, %ecx, 4)
paddd dxh, %mm6
incl %ecx
@ -220,7 +218,7 @@ PR_(imlib_mmx_RotateAA):
decl %eax
sall $12, %eax
movl %eax, sht
movl sow, %ebx
movl src, %edx
.outside_loop_y:
@ -421,7 +419,7 @@ PR_(imlib_mmx_RotateAA):
.outside_il_0:
movl $0, %eax
movl %eax, (%edi, %ecx, 4)
.outside_il_end:
paddd dxh, %mm6
@ -447,8 +445,6 @@ PR_(imlib_mmx_RotateAA):
SIZE(imlib_mmx_RotateAA)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

View File

@ -1,9 +1,7 @@
#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
/*\
/*\
|*| MMX assembly scaling routine for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
\*/
@ -293,7 +291,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm0
psllw $6, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - My \*/
movl $0x4000, %ebx
subl My, %ebx
@ -307,18 +305,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ i -= Cy; while (i > Cy) \*/
subl Cy, %ebx
2:
cmpl Cy, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ p += sow; v += (*p * i) >> 10 \*/
addl sow_4, %eax
movd (%eax), %mm1
@ -336,7 +334,7 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %eax, %mm3
punpcklwd %mm3, %mm3
punpckldq %mm3, %mm3
/*\ p + 1 \*/
movl %esi, %eax
addl $4, %eax
@ -345,7 +343,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm2
psllw $6, %mm2
pmulhw %mm5, %mm2
/*\ i = 0x4000 - My \*/
movl $0x4000, %ebx
subl My, %ebx
@ -359,13 +357,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm2
/*\ i -= Cy; while (i > Cy) \*/
subl Cy, %ebx
2:
cmpl Cy, %ebx
jg 1b
/*\ p += sow; v += (*p * i) >> 10 \*/
addl sow_4, %eax
movd (%eax), %mm1
@ -425,7 +423,7 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %eax, %mm3
punpcklwd %mm3, %mm3
punpckldq %mm3, %mm3
/*\ x = -dw \*/
movl dw, %ecx
negl %ecx
@ -459,7 +457,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm0
psllw $6, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -473,18 +471,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ p += sow; v += (*p * i) >> 10 \*/
addl $4, %eax
movd (%eax), %mm1
@ -504,7 +502,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm2
psllw $6, %mm2
pmulhw %mm5, %mm2
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -518,13 +516,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $6, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm2
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ p += sow; v += (*p * i) >> 10 \*/
addl $4, %eax
movd (%eax), %mm1
@ -604,14 +602,14 @@ PR_(imlib_Scale_mmx_AARGBA):
movd %ebx, %mm5
punpcklwd %mm5, %mm5
punpckldq %mm5, %mm5
/*\ p = sptr; v = (*p * Mx) >> 9 \*/
movl %esi, %eax
movd (%eax), %mm0
punpcklbw %mm7, %mm0
psllw $7, %mm0
pmulhw %mm5, %mm0
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -625,18 +623,18 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm1
pmulhw %mm3, %mm1
paddw %mm1, %mm0
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ mm6 = i \*/
movd %ebx, %mm6
punpcklwd %mm6, %mm6
punpckldq %mm6, %mm6
/*\ v += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm1
@ -651,7 +649,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpckldq %mm4, %mm4
psllw $2, %mm0
pmulhw %mm4, %mm0
/*\ j = 0x4000 - My \*/
movl $0x4000, %edx
subl My, %edx
@ -666,7 +664,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm1
psllw $7, %mm1
pmulhw %mm5, %mm1
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -680,13 +678,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm2
pmulhw %mm3, %mm2
paddw %mm2, %mm1
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ vx += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm2
@ -702,13 +700,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $2, %mm1
pmulhw %mm4, %mm1
paddw %mm1, %mm0
/*\ j -= Cy; while (j > Cy) \*/
subl Cy, %edx
4:
cmpl Cy, %edx
jg 3b
/*\ sptr += sow; p = sptr \*/
addl sow_4, %esi
movl %esi, %eax
@ -717,7 +715,7 @@ PR_(imlib_Scale_mmx_AARGBA):
punpcklbw %mm7, %mm1
psllw $7, %mm1
pmulhw %mm5, %mm1
/*\ i = 0x4000 - Mx \*/
movl $0x4000, %ebx
subl Mx, %ebx
@ -731,13 +729,13 @@ PR_(imlib_Scale_mmx_AARGBA):
psllw $7, %mm2
pmulhw %mm3, %mm2
paddw %mm2, %mm1
/*\ i -= Cx; while (i > Cx) \*/
subl Cx, %ebx
2:
cmpl Cx, %ebx
jg 1b
/*\ vx += (*++p * i) >> 9 \*/
addl $4, %eax
movd (%eax), %mm2
@ -788,8 +786,6 @@ PR_(imlib_Scale_mmx_AARGBA):
SIZE(imlib_Scale_mmx_AARGBA)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif