From ef80047ac96cd2da6a6d67719cfe10b94d400509 Mon Sep 17 00:00:00 2001 From: Jean-Philippe Andre Date: Thu, 12 Jun 2014 17:47:56 +0900 Subject: [PATCH] Evas TGV: Optimize loader using NEON intrinsics This can speed up the load a little (but I forgot the numbers). Patch by Vladimir Kuramshin. --- .../evas/loaders/tgv/evas_image_load_tgv.c | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/modules/evas/loaders/tgv/evas_image_load_tgv.c b/src/modules/evas/loaders/tgv/evas_image_load_tgv.c index 61d48b3808..5d84ccb05d 100644 --- a/src/modules/evas/loaders/tgv/evas_image_load_tgv.c +++ b/src/modules/evas/loaders/tgv/evas_image_load_tgv.c @@ -15,6 +15,9 @@ #include "rg_etc1.h" #include "Evas_Loader.h" +#ifdef BUILD_NEON +#include +#endif /************************************************************** * The TGV file format is oriented around compression mecanism * that hardware are good at decompressing. We do still provide @@ -381,7 +384,7 @@ evas_image_load_file_data_tgv(void *loader_data, for (j = 0; j < loader->block.width; j += 4, it += etc_block_size) { Eina_Rectangle current_etc; - unsigned int temporary[4 * 4] = { 0 }; + unsigned int temporary[4 * 4]; unsigned int offset_x, offset_y; int k; @@ -414,6 +417,30 @@ evas_image_load_file_data_tgv(void *loader_data, offset_x = current_etc.x - x - j; offset_y = current_etc.y - y - i; +#ifdef BUILD_NEON + if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) + { + uint32_t *dst = &p[current_etc.x - 1 + (current_etc.y - 1) * master.w]; + uint32_t *src = &temporary[offset_x + offset_y * 4]; + for (k = 0; k < current_etc.h; k++) + { + if (current_etc.w == 4) + vst1q_u32(dst, vld1q_u32(src)); + else if (current_etc.w == 3) + { + vst1_u32(dst, vld1_u32(src)); + *(dst + 2) = *(src + 2); + } + else if (current_etc.w == 2) + vst1_u32(dst, vld1_u32(src)); + else + *dst = *src; + dst += master.w; + src += 4; + } + } + else +#endif for (k = 0; k < current_etc.h; k++) { memcpy(&p[current_etc.x - 1 +