patch from Tiago Falcão - unroll loops in evas a bit.

SVN revision: 39467
This commit is contained in:
Carsten Haitzler 2009-03-13 15:18:39 +00:00
parent 750bdfce3d
commit 6bc4266805
12 changed files with 534 additions and 476 deletions

View File

@ -4,12 +4,12 @@
#ifdef BUILD_C
static void
_op_blend_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l, a = 256 - (c >> 24);
while (d < e)
DATA32 *e, a = 256 - (c >> 24);
UNROLL8_PLD_WHILE(d, l, e,
{
*d = c + MUL_256(a, *d);
d++;
}
});
}
#define _op_blend_caa_dp _op_blend_c_dp
@ -61,13 +61,13 @@ init_blend_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_blend_rel_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
l = 256 - (c >> 24);
while (d < e)
DATA32 *e;
int alpha = 256 - (c >> 24);
UNROLL8_PLD_WHILE(d, l, e,
{
*d = MUL_SYM(*d >> 24, c) + MUL_256(l, *d);
*d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
d++;
}
});
}
#define _op_blend_rel_caa_dp _op_blend_rel_c_dp

View File

@ -4,16 +4,17 @@
#ifdef BUILD_C
static void
_op_blend_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
l = 256 - (c >> 24);
while (d < e) {
DATA32 *e;
int alpha = 256 - (c >> 24);
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 a = *m;
switch(a)
{
case 0:
break;
case 255:
*d = c + MUL_256(l, *d);
*d = c + MUL_256(alpha, *d);
break;
default:
{
@ -24,15 +25,17 @@ _op_blend_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
break;
}
m++; d++;
}
});
}
static void
_op_blend_mas_can_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = *m;
switch(alpha)
{
case 0:
break;
@ -40,12 +43,12 @@ _op_blend_mas_can_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l)
*d = c;
break;
default:
l++;
*d = INTERP_256(l, c, *d);
alpha++;
*d = INTERP_256(alpha, c, *d);
break;
}
m++; d++;
}
});
}
#define _op_blend_mas_cn_dp _op_blend_mas_can_dp
@ -114,14 +117,16 @@ init_blend_mask_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_blend_rel_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 mc = MUL_SYM(*m, c);
l = 256 - (mc >> 24);
*d = MUL_SYM(*d >> 24, mc) + MUL_256(l, *d);
alpha = 256 - (mc >> 24);
*d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d);
d++;
m++;
}
});
}
#define _op_blend_rel_mas_cn_dp _op_blend_rel_mas_c_dp

View File

@ -4,18 +4,21 @@
#ifdef BUILD_C
static void
_op_blend_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = 256 - (*s >> 24);
*d = *s++ + MUL_256(l, *d);
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = 256 - (*s >> 24);
*d = *s++ + MUL_256(alpha, *d);
d++;
}
});
}
static void
_op_blend_pas_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e)
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
switch (*s & 0xff000000)
{
@ -25,12 +28,12 @@ _op_blend_pas_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d,
*d = *s;
break;
default:
l = 256 - (*s >> 24);
*d = *s + MUL_256(l, *d);
alpha = 256 - (*s >> 24);
*d = *s + MUL_256(alpha, *d);
break;
}
s++; d++;
}
});
}
#define _op_blend_pan_dp NULL
@ -86,24 +89,28 @@ init_blend_pixel_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_blend_rel_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = 256 - (*s >> 24);
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = 256 - (*s >> 24);
c = 1 + (*d >> 24);
*d = MUL_256(c, *s) + MUL_256(l, *d);
*d = MUL_256(c, *s) + MUL_256(alpha, *d);
d++;
s++;
}
});
}
static void
_op_blend_rel_pan_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
c = 1 + (*d >> 24);
*d++ = MUL_256(c, *s);
s++;
}
});
}
#define _op_blend_rel_pas_dp _op_blend_rel_p_dp

View File

@ -4,69 +4,78 @@
#ifdef BUILD_C
static void
_op_blend_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 sc = MUL4_SYM(c, *s);
l = 256 - (sc >> 24);
*d = sc + MUL_256(l, *d);
alpha = 256 - (sc >> 24);
*d = sc + MUL_256(alpha, *d);
d++;
s++;
}
});
}
static void
_op_blend_pan_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
l = 256 - (c >> 24);
while (d < e) {
*d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(l, *d);
DATA32 *e;
int alpha = 256 - (c >> 24);
UNROLL8_PLD_WHILE(d, l, e,
{
*d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
d++;
s++;
}
});
}
static void
_op_blend_p_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = 256 - (*s >> 24);
*d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(l, *d);
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = 256 - (*s >> 24);
*d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d);
d++;
s++;
}
});
}
static void
_op_blend_pan_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
*d++ = 0xff000000 + MUL3_SYM(c, *s);
s++;
}
});
}
static void
_op_blend_p_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
DATA32 *e;
int alpha;
c = 1 + (c & 0xff);
while (d < e) {
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 sc = MUL_256(c, *s);
l = 256 - (sc >> 24);
*d = sc + MUL_256(l, *d);
alpha = 256 - (sc >> 24);
*d = sc + MUL_256(alpha, *d);
d++;
s++;
}
});
}
static void
_op_blend_pan_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
DATA32 *e;
c = 1 + (c & 0xff);
while (d < e) {
UNROLL8_PLD_WHILE(d, l, e,
{
*d = INTERP_256(c, *s, *d);
d++;
s++;
}
});
}
#define _op_blend_pas_c_dp _op_blend_p_c_dp
@ -167,14 +176,16 @@ init_blend_pixel_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_blend_rel_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 sc = MUL4_SYM(c, *s);
l = 256 - (sc >> 24);
*d = MUL_SYM(*d >> 24, sc) + MUL_256(l, *d);
alpha = 256 - (sc >> 24);
*d = MUL_SYM(*d >> 24, sc) + MUL_256(alpha, *d);
d++;
s++;
}
});
}
#define _op_blend_rel_pas_c_dp _op_blend_rel_p_c_dp

View File

@ -4,33 +4,37 @@
#ifdef BUILD_C
static void
_op_blend_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = *m;
switch(alpha)
{
case 0:
break;
case 255:
l = 256 - (*s >> 24);
*d = *s + MUL_256(l, *d);
alpha = 256 - (*s >> 24);
*d = *s + MUL_256(alpha, *d);
break;
default:
c = MUL_SYM(l, *s);
l = 256 - (c >> 24);
*d = c + MUL_256(l, *d);
c = MUL_SYM(alpha, *s);
alpha = 256 - (c >> 24);
*d = c + MUL_256(alpha, *d);
break;
}
m++; s++; d++;
}
});
}
static void
_op_blend_pas_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = (*s >> 24);
switch(*m & l)
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = (*s >> 24);
switch(alpha)
{
case 0:
break;
@ -38,21 +42,22 @@ _op_blend_pas_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
*d = *s;
break;
default:
c = MUL_SYM(l, *s);
l = 256 - (c >> 24);
*d = c + MUL_256(l, *d);
c = MUL_SYM(alpha, *s);
alpha = 256 - (c >> 24);
*d = c + MUL_256(alpha, *d);
break;
}
m++; s++; d++;
}
});
}
static void
_op_blend_pan_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
alpha = *m;
switch(alpha)
{
case 0:
break;
@ -60,12 +65,12 @@ _op_blend_pan_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l)
*d = *s;
break;
default:
l++;
*d = INTERP_256(l, *s, *d);
alpha++;
*d = INTERP_256(alpha, *s, *d);
break;
}
m++; s++; d++;
}
});
}
@ -125,15 +130,15 @@ init_blend_pixel_mask_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_blend_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
int alpha;
UNROLL8_PLD_WHILE(d, l, e,
{
c = MUL_SYM(*m, *s);
l = 256 - (c >> 24);
*d = MUL_SYM(*d >> 24, c) + MUL_256(l, *d);
d++;
m++;
s++;
}
alpha = 256 - (c >> 24);
*d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
d++; m++; s++;
});
}
#define _op_blend_rel_pas_mas_dp _op_blend_rel_p_mas_dp

View File

@ -4,10 +4,12 @@
#ifdef BUILD_C
static void
_op_copy_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
for (; d < e; d++) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
*d = c;
}
d++;
});
}
#define _op_copy_cn_dp _op_copy_c_dp
@ -71,10 +73,12 @@ init_copy_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_copy_rel_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
for (; d < e; d++) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
*d = MUL_SYM(*d >> 24, c);
}
d++;
});
}

View File

@ -4,10 +4,12 @@
#ifdef BUILD_C
static void
_op_copy_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int color;
UNROLL8_PLD_WHILE(d, l, e,
{
color = *m;
switch(color)
{
case 0:
break;
@ -15,12 +17,12 @@ _op_copy_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
*d = c;
break;
default:
l++;
*d = INTERP_256(l, c, *d);
color++;
*d = INTERP_256(color, c, *d);
break;
}
m++; d++;
}
});
}
#define _op_copy_mas_cn_dp _op_copy_mas_c_dp
@ -85,28 +87,30 @@ init_copy_mask_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_copy_rel_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int color;
UNROLL8_PLD_WHILE(d, l, e,
{
color = *m;
switch(color)
{
case 0:
break;
case 255:
l = 1 + (*d >> 24);
*d = MUL_256(l, c);
color = 1 + (*d >> 24);
*d = MUL_256(color, c);
break;
default:
{
DATA32 da = 1 + (*d >> 24);
da = MUL_256(da, c);
l++;
*d = INTERP_256(l, da, *d);
color++;
*d = INTERP_256(color, da, *d);
}
break;
}
m++; d++;
}
});
}

View File

@ -4,10 +4,7 @@
#ifdef BUILD_C
static void
_op_copy_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
for (; d < e; d++, s++) {
*d = *s;
}
memcpy(d, s, l * sizeof(DATA32));
}
#define _op_copy_pan_dp _op_copy_p_dp
@ -63,10 +60,12 @@ init_copy_pixel_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_copy_rel_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
for (; d < e; d++, s++) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
*d = MUL_SYM(*d >> 24, *s);
}
d++; s++;
});
}

View File

@ -4,23 +4,25 @@
#ifdef BUILD_C
static void
_op_copy_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
*d = MUL4_SYM(c, *s);
d++;
s++;
}
});
}
static void
_op_copy_p_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
DATA32 *e;
c = 1 + (c >> 24);
while (d < e) {
UNROLL8_PLD_WHILE(d, l, e,
{
*d = MUL_256(c, *s);
d++;
s++;
}
});
}
@ -129,13 +131,14 @@ init_copy_pixel_color_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_copy_rel_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
DATA32 *e;
UNROLL8_PLD_WHILE(d, l, e,
{
DATA32 cs = MUL4_SYM(c, *s);
*d = MUL_SYM(*d >> 24, cs);
d++;
s++;
}
});
}
#define _op_copy_rel_pas_c_dp _op_copy_rel_p_c_dp

View File

@ -4,10 +4,12 @@
#ifdef BUILD_C
static void
_op_copy_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int color;
UNROLL8_PLD_WHILE(d, l, e,
{
color = *m;
switch(color)
{
case 0:
break;
@ -15,12 +17,12 @@ _op_copy_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
*d = *s;
break;
default:
l++;
*d = INTERP_256(l, *s, *d);
color++;
*d = INTERP_256(color, *s, *d);
break;
}
m++; s++; d++;
}
});
}
@ -77,10 +79,12 @@ init_copy_pixel_mask_pt_funcs_c(void)
#ifdef BUILD_C
static void
_op_copy_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
DATA32 *e = d + l;
while (d < e) {
l = *m;
switch(l)
DATA32 *e;
int color;
UNROLL8_PLD_WHILE(d, l, e,
{
color = *m;
switch(color)
{
case 0:
break;
@ -94,7 +98,7 @@ _op_copy_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
break;
}
m++; s++; d++;
}
});
}

View File

@ -127,6 +127,95 @@ void *alloca (size_t);
/*****************************************************************************/
#if defined(__ARM_ARCH_3M__)
# define __ARM_ARCH__ 40
#endif
#if defined(__ARM_ARCH_4__)
# define __ARM_ARCH__ 40
#endif
#if defined(__ARM_ARCH_4T__)
# define __ARM_ARCH__ 41
#endif
#if defined(__ARM_ARCH_5__)
# define __ARM_ARCH__ 50
#endif
#if defined(__ARM_ARCH_5T__)
# define __ARM_ARCH__ 51
#endif
#if defined(__ARM_ARCH_5E__)
# define __ARM_ARCH__ 52
#endif
#if defined(__ARM_ARCH_5TE__)
# define __ARM_ARCH__ 53
#endif
#if defined(__ARM_ARCH_5TEJ__)
# define __ARM_ARCH__ 54
#endif
#if defined(__ARM_ARCH_6__)
# define __ARM_ARCH__ 60
#endif
#if defined(__ARM_ARCH_6J__)
# define __ARM_ARCH__ 61
#endif
#if defined(__ARM_ARCH_6K__)
# define __ARM_ARCH__ 62
#endif
#if defined(__ARM_ARCH_6Z__)
# define __ARM_ARCH__ 63
#endif
#if defined(__ARM_ARCH_6ZK__)
# define __ARM_ARCH__ 64
#endif
#if defined(__ARM_ARCH_6T2__)
# define __ARM_ARCH__ 65
#endif
#if defined(__ARM_ARCH_7__)
# define __ARM_ARCH__ 70
#endif
#if defined(__ARM_ARCH_7A__)
# define __ARM_ARCH__ 71
#endif
#if defined(__ARM_ARCH_7R__)
# define __ARM_ARCH__ 72
#endif
#if defined(__ARM_ARCH_7M__)
# define __ARM_ARCH__ 73
#endif
#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 52)
/* tested on ARMv6 (arm1136j-s), Nokia N800 CPU */
#define pld(addr, off) \
__asm__("pld [%[address], %[offset]]":: \
[address] "r" (addr), [offset] "i" (off))
#else
#define pld(addr, off)
#endif /* __ARMEL__ */
/*****************************************************************************/
#define UNROLL2(op...) op op
#define UNROLL4(op...) UNROLL2(op) UNROLL2(op)
#define UNROLL8(op...) UNROLL4(op) UNROLL4(op)
#define UNROLL16(op...) UNROLL8(op) UNROLL8(op)
#define UNROLL8_PLD_WHILE(start, size, end, op) \
pld(start, 0); \
end = start + (size & ~7); \
while (start < end) \
{ \
pld(start, 32); \
UNROLL8(op); \
} \
end += (size & 7); \
pld(start, 32); \
while (start < end) \
{ \
op; \
}
/*****************************************************************************/
typedef unsigned long long DATA64;
@ -313,7 +402,7 @@ struct _Image_Entry
} info;
#ifdef BUILD_ASYNC_PRELOAD
pthread_mutex_t lock;
LK(lock);
#endif
Image_Entry_Flags flags;
@ -642,9 +731,8 @@ struct _RGBA_Font_Int
Eina_Hash *glyphs;
#ifdef HAVE_PTHREAD
pthread_mutex_t ft_mutex;
#endif
LK(ft_mutex);
Eina_Hash *kerning;
Eina_Hash *indexes;

View File

@ -32,78 +32,6 @@ extern "C" {
((((g) >> 2) & 0x3f) << 5) | \
(((b) >> 3) & 0x1f))
#define UNROLL2(op...) op op
#define UNROLL4(op...) UNROLL2(op) UNROLL2(op)
#define UNROLL8(op...) UNROLL4(op) UNROLL4(op)
#define UNROLL16(op...) UNROLL8(op) UNROLL8(op)
#if defined(__ARM_ARCH_3M__)
# define __ARM_ARCH__ 40
#endif
#if defined(__ARM_ARCH_4__)
# define __ARM_ARCH__ 40
#endif
#if defined(__ARM_ARCH_4T__)
# define __ARM_ARCH__ 41
#endif
#if defined(__ARM_ARCH_5__)
# define __ARM_ARCH__ 50
#endif
#if defined(__ARM_ARCH_5T__)
# define __ARM_ARCH__ 51
#endif
#if defined(__ARM_ARCH_5E__)
# define __ARM_ARCH__ 52
#endif
#if defined(__ARM_ARCH_5TE__)
# define __ARM_ARCH__ 53
#endif
#if defined(__ARM_ARCH_5TEJ__)
# define __ARM_ARCH__ 54
#endif
#if defined(__ARM_ARCH_6__)
# define __ARM_ARCH__ 60
#endif
#if defined(__ARM_ARCH_6J__)
# define __ARM_ARCH__ 61
#endif
#if defined(__ARM_ARCH_6K__)
# define __ARM_ARCH__ 62
#endif
#if defined(__ARM_ARCH_6Z__)
# define __ARM_ARCH__ 63
#endif
#if defined(__ARM_ARCH_6ZK__)
# define __ARM_ARCH__ 64
#endif
#if defined(__ARM_ARCH_6T2__)
# define __ARM_ARCH__ 65
#endif
#if defined(__ARM_ARCH_7__)
# define __ARM_ARCH__ 70
#endif
#if defined(__ARM_ARCH_7A__)
# define __ARM_ARCH__ 71
#endif
#if defined(__ARM_ARCH_7R__)
# define __ARM_ARCH__ 72
#endif
#if defined(__ARM_ARCH_7M__)
# define __ARM_ARCH__ 73
#endif
#if defined(__ARM_ARCH__) && (__ARM_ARCH__ >= 52)
/* tested on ARMv6 (arm1136j-s), Nokia N800 CPU */
#define pld(addr, off) \
__asm__("pld [%[address], %[offset]]":: \
[address] "r" (addr), [offset] "i" (off))
#else
#define pld(addr, off)
#endif /* __ARMEL__ */
static inline int
_calc_stride(int w)
{