forked from enlightenment/efl
Evas TGV: Optimize loader using NEON intrinsics
This can speed up the load a little (but I forgot the numbers). Patch by Vladimir Kuramshin.
This commit is contained in:
parent
29770922cf
commit
ef80047ac9
|
@ -15,6 +15,9 @@
|
|||
#include "rg_etc1.h"
|
||||
#include "Evas_Loader.h"
|
||||
|
||||
#ifdef BUILD_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
/**************************************************************
|
||||
* The TGV file format is oriented around compression mecanism
|
||||
* that hardware are good at decompressing. We do still provide
|
||||
|
@ -381,7 +384,7 @@ evas_image_load_file_data_tgv(void *loader_data,
|
|||
for (j = 0; j < loader->block.width; j += 4, it += etc_block_size)
|
||||
{
|
||||
Eina_Rectangle current_etc;
|
||||
unsigned int temporary[4 * 4] = { 0 };
|
||||
unsigned int temporary[4 * 4];
|
||||
unsigned int offset_x, offset_y;
|
||||
int k;
|
||||
|
||||
|
@ -414,6 +417,30 @@ evas_image_load_file_data_tgv(void *loader_data,
|
|||
|
||||
offset_x = current_etc.x - x - j;
|
||||
offset_y = current_etc.y - y - i;
|
||||
#ifdef BUILD_NEON
|
||||
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
|
||||
{
|
||||
uint32_t *dst = &p[current_etc.x - 1 + (current_etc.y - 1) * master.w];
|
||||
uint32_t *src = &temporary[offset_x + offset_y * 4];
|
||||
for (k = 0; k < current_etc.h; k++)
|
||||
{
|
||||
if (current_etc.w == 4)
|
||||
vst1q_u32(dst, vld1q_u32(src));
|
||||
else if (current_etc.w == 3)
|
||||
{
|
||||
vst1_u32(dst, vld1_u32(src));
|
||||
*(dst + 2) = *(src + 2);
|
||||
}
|
||||
else if (current_etc.w == 2)
|
||||
vst1_u32(dst, vld1_u32(src));
|
||||
else
|
||||
*dst = *src;
|
||||
dst += master.w;
|
||||
src += 4;
|
||||
}
|
||||
}
|
||||
else
|
||||
#endif
|
||||
for (k = 0; k < current_etc.h; k++)
|
||||
{
|
||||
memcpy(&p[current_etc.x - 1 +
|
||||
|
|
Loading…
Reference in New Issue