Wed Jan 4 04:14:14 2006 Michael Jennings (mej)

Applied patches from Tres Melton <tres@mindspring.com> for SSE2
alignment and other shading-related issues.
----------------------------------------------------------------------


SVN revision: 19521
This commit is contained in:
Michael Jennings 2006-01-04 09:15:17 +00:00
parent e39e3f1ce9
commit 9c737f4aa1
3 changed files with 35 additions and 42 deletions

View File

@ -5449,3 +5449,8 @@ Wed Jan 4 03:44:04 2006 Michael Jennings (mej)
X resource tracking/debugging support.
----------------------------------------------------------------------
Wed Jan 4 04:14:14 2006 Michael Jennings (mej)
Applied patches from Tres Melton <tres@mindspring.com> for SSE2
alignment and other shading-related issues.
----------------------------------------------------------------------

View File

@ -1748,14 +1748,7 @@ shade_ximage_24(void *data, int bpl, int w, int h, int rm, int gm, int bm)
void
colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h)
{
#ifdef HAVE_SSE2
XImage * __attribute__ ((aligned (16))) ximg;
#elif defined HAVE_MMX
XImage * __attribute__ ((aligned (8))) ximg;
#else
XImage *ximg;
#endif
register unsigned long i;
#if 0
@ -1887,6 +1880,7 @@ colormod_trans(Pixmap p, imlib_t *iml, GC gc, unsigned short w, unsigned short h
if (ximg->bits_per_pixel != 32) {
D_PIXMAP(("Rendering 24 bit\n"));
shade_ximage_24(ximg->data, ximg->bytes_per_line, w, h, rm, gm, bm);
break;
}
/* drop */
case 32:

View File

@ -1,12 +1,17 @@
/* File: sse2_cmod.c
* Written and Copyright (C) 2005 by Tres Melton
/*
* Copyright (C) 1997-2006, Michael Jennings
*
* Permission is hereby granted to Michael Jennings to license this code as
* he sees fit. I'd prefer the GPL but he will choose the BSD. The debate
* is moot as this is to become a part of the Eterm project, for which he is
* the primary author. For users of this code I ask that any modifications
* be released back into the community but with Michael Jennings chooses the
* BSD license then that request has no backing in law.
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies of the Software, its documentation and marketing & publicity
* materials, and acknowledgment shall be given in the documentation, materials
* and software packages that this Software was used.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
@ -15,8 +20,9 @@
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Ported from <willem@stack.nl> Willem Monsuwe's original x86/MMX assembly
* code by Tres Melton in 2005 and 2006. Anything copyrightable by me is
* assigned to the Eterm project and its founder/maintainer: Michael Jennings.
*
* Much inspiration was drawn from the original x86 MMX port written by
* Willem Monsuwe <willem@stack.nl> in pure x86/MMX Assembly. The MMX
@ -25,6 +31,8 @@
* ensure they worked with various gcc options. Further the code was
* extended to take advantage of the 128 bit xmm registers in SSE2.
*
* The imlib2 code in Enlightenment also has a lot to teach on the subject.
*
* Manuals used in this port:
* The Gnu Assembler
* http://www.gnu.org/software/binutils/manual/gas-2.9.1/html_mono/as.html
@ -65,21 +73,7 @@
* assembly code in C functions is the only way to ensure that this code will continue to
* function through a (however unlikely) change. If pure assembly were to be used as the
* original MMX author, Willem Monsuwe, did and the ABI changed then this code would cease
* to function properly. After examination of Willem's code I'm wondering if he
* wrote it for GNU/Linux originally. The ENTER and LEAVE macros put all of the
* parameters on the stack so that they can be accessed by references to the Base_Pointer
* the way that ANSI C is defined. If he originally wrote this for GNU/Linux then he most
* likely would have just used the registers instead of unwinding those optimizations
* manually by pushing them to the stack. And if he explicitly wanted to use the stack
* then there are parameters to gcc that would have performed those operations for him:
* -mregparm/-mmemparm. Other gcc options that can tweak with the stack and the number
* of registers available for function parameters are: -fcall-used/-fcall-saved,
* -fcaller-saves, -fstack-protector, -fPIC/-fpic, -mno-push-args, etc.. It might be
* advisable to check for these switches when using the original MMX code and emit a
* warning if any are enabled. I know that the PIC option trashes the BX register and
* that both Willem and I use that register. In other words If you do manage to get it
* to compile & run w/ -fpic it WILL break. On the plus side, you can keep the pieces! :-)
* On the other hand I could be wrong about everything
* to function properly.
*
* In Conclusion:
* Using C functions and inline assembly code should alleviate all of the concerns as the
@ -126,8 +120,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 3f \n\t"
"2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */
"movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
"movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
"movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */
/* From A64_128bit_Media_Programming (p. 347) */
/* Shifts the blue off of the green color */
@ -191,8 +185,8 @@ void shade_ximage_15_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 8f \n\t"
"7: \n\t"
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t"
"movdqu %%xmm0, %%xmm1 \n\t"
"movdqu %%xmm0, %%xmm2 \n\t"
"movdqa %%xmm0, %%xmm1 \n\t"
"movdqa %%xmm0, %%xmm2 \n\t"
"psrlw $5, %%xmm1 \n\t"
"psrlw $10, %%xmm0 \n\t"
"psllw $11, %%xmm2 \n\t"
@ -301,8 +295,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 3f \n\t"
"2: \n\t" /* Start of the inner loop (pixels 8 at a time --> 8 * 16 = 128bits/xmm register ) */
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t" /* Load the 16 bits of the pixel (5 bits for red, 6 bits for green, 5 bits for blue) */
"movdqu %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
"movdqu %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"movdqa %%xmm0, %%xmm1 \n\t" /* Create a copy of the pixel for the green color */
"movdqa %%xmm0, %%xmm2 \n\t" /* Create a copy of the pixel for the blue color */
"psrlw $5, %%xmm1 \n\t" /* Packed Shift Right Logical Words */
/* From A64_128bit_Media_Programming (p. 347) */
/* Shifts the blue off of the green color */
@ -359,7 +353,7 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"pcmpeqw %%xmm3, %%xmm3 \n\t" /* Packed Compare Equal Words */
/* From A64_128bit_Media_Programming (p. 276) */
/* This sets xmm3 to 128 1's (since mm6 = mm6) */
"movdqu %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */
"movdqa %%xmm3, %%xmm4 \n\t" /* Make copy of 128 ones */
"psllw $5, %%xmm3 \n\t" /* xmm3 = 8 copies of 1111 1111 1110 0000 */
"psllw $6, %%xmm4 \n\t" /* xmm4 = 8 copies of 1111 1111 1100 0000 */
"6: \n\t"
@ -368,8 +362,8 @@ void shade_ximage_16_sse2( volatile void *data, volatile int bpl, volatile int w
"jns 8f \n\t"
"7: \n\t"
"movdqu (%%rsi, %%rcx, 2), %%xmm0\n\t"
"movdqu %%xmm0, %%xmm1 \n\t"
"movdqu %%xmm0, %%xmm2 \n\t"
"movdqa %%xmm0, %%xmm1 \n\t"
"movdqa %%xmm0, %%xmm2 \n\t"
"psrlw $5, %%xmm1 \n\t"
"psrlw $11, %%xmm0 \n\t"
"psllw $11, %%xmm2 \n\t"
@ -480,7 +474,7 @@ void shade_ximage_32_sse2( volatile void *data, volatile int bpl, volatile int w
"psllw $15, %%xmm6 \n\t" /* Packed Shift Left Logical Words */
/* From A64_128bit_Media_Programming (p. 330) */
/* This sets 8 16 bit values of 1000 0000 0000 0000 in the 128 bit word */
"movdqu %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */
"movdqa %%xmm6, %%xmm5 \n\t" /* Copy mm6 to mm5 (we need mm6 later) */
"pmulhw %%xmm4, %%xmm5 \n\t" /* Packed Multiply High Signed Word */
/* mm4 = ( mm4 * mm5 ) >> 16 (8 times, once for each 16bit value) */
/* For each color_ modifier (cm) */