forked from enlightenment/efl
Evas TGV: Optimize loader using NEON intrinsics
This can speed up the load a little (but I forgot the numbers). Patch by Vladimir Kuramshin.
This commit is contained in:
parent
29770922cf
commit
ef80047ac9
|
@ -15,6 +15,9 @@
|
||||||
#include "rg_etc1.h"
|
#include "rg_etc1.h"
|
||||||
#include "Evas_Loader.h"
|
#include "Evas_Loader.h"
|
||||||
|
|
||||||
|
#ifdef BUILD_NEON
|
||||||
|
#include <arm_neon.h>
|
||||||
|
#endif
|
||||||
/**************************************************************
|
/**************************************************************
|
||||||
* The TGV file format is oriented around compression mecanism
|
* The TGV file format is oriented around compression mecanism
|
||||||
* that hardware are good at decompressing. We do still provide
|
* that hardware are good at decompressing. We do still provide
|
||||||
|
@ -381,7 +384,7 @@ evas_image_load_file_data_tgv(void *loader_data,
|
||||||
for (j = 0; j < loader->block.width; j += 4, it += etc_block_size)
|
for (j = 0; j < loader->block.width; j += 4, it += etc_block_size)
|
||||||
{
|
{
|
||||||
Eina_Rectangle current_etc;
|
Eina_Rectangle current_etc;
|
||||||
unsigned int temporary[4 * 4] = { 0 };
|
unsigned int temporary[4 * 4];
|
||||||
unsigned int offset_x, offset_y;
|
unsigned int offset_x, offset_y;
|
||||||
int k;
|
int k;
|
||||||
|
|
||||||
|
@ -414,6 +417,30 @@ evas_image_load_file_data_tgv(void *loader_data,
|
||||||
|
|
||||||
offset_x = current_etc.x - x - j;
|
offset_x = current_etc.x - x - j;
|
||||||
offset_y = current_etc.y - y - i;
|
offset_y = current_etc.y - y - i;
|
||||||
|
#ifdef BUILD_NEON
|
||||||
|
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
|
||||||
|
{
|
||||||
|
uint32_t *dst = &p[current_etc.x - 1 + (current_etc.y - 1) * master.w];
|
||||||
|
uint32_t *src = &temporary[offset_x + offset_y * 4];
|
||||||
|
for (k = 0; k < current_etc.h; k++)
|
||||||
|
{
|
||||||
|
if (current_etc.w == 4)
|
||||||
|
vst1q_u32(dst, vld1q_u32(src));
|
||||||
|
else if (current_etc.w == 3)
|
||||||
|
{
|
||||||
|
vst1_u32(dst, vld1_u32(src));
|
||||||
|
*(dst + 2) = *(src + 2);
|
||||||
|
}
|
||||||
|
else if (current_etc.w == 2)
|
||||||
|
vst1_u32(dst, vld1_u32(src));
|
||||||
|
else
|
||||||
|
*dst = *src;
|
||||||
|
dst += master.w;
|
||||||
|
src += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
for (k = 0; k < current_etc.h; k++)
|
for (k = 0; k < current_etc.h; k++)
|
||||||
{
|
{
|
||||||
memcpy(&p[current_etc.x - 1 +
|
memcpy(&p[current_etc.x - 1 +
|
||||||
|
|
Loading…
Reference in New Issue