summaryrefslogtreecommitdiff
path: root/src/static_libs/rg_etc
diff options
context:
space:
mode:
authorMatvey Konovalov <m.konovalov@samsung.com>2014-04-30 14:08:55 +0900
committerJean-Philippe Andre <jp.andre@samsung.com>2014-05-27 12:21:26 +0900
commit9550b653480f7a7a3af02c7e7712dc9e081605f4 (patch)
tree4a883c5a88691426db80ac3593bcd8c1ee3cff4f /src/static_libs/rg_etc
parent6754e03ee870cc80f075828e727bd33bfa183063 (diff)
Evas rg_etc1: Unroll the decoding loop for performance
Measurements have shown a 40% perf increase with these changes. Patch by Matvey Konovalov. Signed-off-by: Jean-Philippe Andre <jp.andre@samsung.com>
Diffstat (limited to 'src/static_libs/rg_etc')
-rw-r--r--src/static_libs/rg_etc/rg_etc1.c220
1 files changed, 152 insertions, 68 deletions
diff --git a/src/static_libs/rg_etc/rg_etc1.c b/src/static_libs/rg_etc/rg_etc1.c
index 2df3202f1d..f768eaab83 100644
--- a/src/static_libs/rg_etc/rg_etc1.c
+++ b/src/static_libs/rg_etc/rg_etc1.c
@@ -151,6 +151,8 @@ enum RG_Etc_Constants
151#define B_VAL_GET(p) (((DATA8 *)(p))[1]) 151#define B_VAL_GET(p) (((DATA8 *)(p))[1])
152#endif 152#endif
153 153
154#define A_MASK (0xFFul << 24)
155
154// For unpacking and writing BGRA output data 156// For unpacking and writing BGRA output data
155#define ARGB_JOIN(a,r,g,b) \ 157#define ARGB_JOIN(a,r,g,b) \
156 (((a) << 24) + ((r) << 16) + ((g) << 8) + (b)) 158 (((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
@@ -529,21 +531,9 @@ rg_etc1_color_quad_init(unsigned char r, unsigned char g, unsigned char b, unsig
529} 531}
530 532
531static inline unsigned int 533static inline unsigned int
532rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, unsigned char preserve_alpha) 534rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color)
533{ 535{
534 if (preserve_alpha) 536 return (new_color & ~A_MASK) | (old_color & A_MASK);
535 {
536 unsigned char r, g, b, a;
537
538 // Used for UNPACKING
539 a = A_VAL_SET(&old_color);
540 r = R_VAL_SET(&new_color);
541 g = G_VAL_SET(&new_color);
542 b = B_VAL_SET(&new_color);
543
544 return ARGB_JOIN(a, r, g, b);
545 }
546 return new_color;
547} 537}
548 538
549static inline void 539static inline void
@@ -876,15 +866,15 @@ rg_etc_block_base4_color_get(const unsigned char bytes[8], unsigned char idx)
876 866
877 if (idx) 867 if (idx)
878 { 868 {
879 r = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4R2BitOffset, 4); 869 r = (bytes[0]) & ((1 << 4) - 1);
880 g = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4G2BitOffset, 4); 870 g = (bytes[1]) & ((1 << 4) - 1);
881 b = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4B2BitOffset, 4); 871 b = (bytes[2]) & ((1 << 4) - 1);
882 } 872 }
883 else 873 else
884 { 874 {
885 r = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4R1BitOffset, 4); 875 r = (bytes[0] >> 4) & ((1 << 4) - 1);
886 g = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4G1BitOffset, 4); 876 g = (bytes[1] >> 4) & ((1 << 4) - 1);
887 b = rg_etc1_block_byte_bits_get(bytes, cETC1AbsColor4B1BitOffset, 4); 877 b = (bytes[2] >> 4) & ((1 << 4) - 1);
888 } 878 }
889 879
890 return b | (g << 4) | (r << 8); 880 return b | (g << 4) | (r << 8);
@@ -912,9 +902,9 @@ rg_etc1_block_base5_color_get(const unsigned char bytes[8])
912{ 902{
913 unsigned short r, g, b; 903 unsigned short r, g, b;
914 904
915 r = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5RBitOffset, 5); 905 r = (bytes[0] >> 3) & ((1 << 5) - 1);
916 g = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5GBitOffset, 5); 906 g = (bytes[1] >> 3) & ((1 << 5) - 1);
917 b = rg_etc1_block_byte_bits_get(bytes, cETC1BaseColor5BBitOffset, 5); 907 b = (bytes[2] >> 3) & ((1 << 5) - 1);
918 908
919 return b | (g << 5) | (r << 10); 909 return b | (g << 5) | (r << 10);
920} 910}
@@ -932,9 +922,9 @@ rg_etc1_block_delta3_color_get(const unsigned char bytes[8])
932{ 922{
933 unsigned short r, g, b; 923 unsigned short r, g, b;
934 924
935 r = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3RBitOffset, 3); 925 r = (bytes[0]) & ((1 << 3) - 1);
936 g = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3GBitOffset, 3); 926 g = (bytes[1]) & ((1 << 3) - 1);
937 b = rg_etc1_block_byte_bits_get(bytes, cETC1DeltaColor3BBitOffset, 3); 927 b = (bytes[2]) & ((1 << 3) - 1);
938 928
939 return b | (g << 3) | (r << 6); 929 return b | (g << 3) | (r << 6);
940} 930}
@@ -1207,13 +1197,14 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pDst_pixels_BGRA, boo
1207 unsigned char diff_flag, flip_flag, table_index0, table_index1; 1197 unsigned char diff_flag, flip_flag, table_index0, table_index1;
1208 unsigned int subblock_colors0[4] = { 0 }; 1198 unsigned int subblock_colors0[4] = { 0 };
1209 unsigned int subblock_colors1[4] = { 0 }; 1199 unsigned int subblock_colors1[4] = { 0 };
1210 unsigned char x, y;
1211 unsigned char success = 1; 1200 unsigned char success = 1;
1201 const unsigned char *bytes;
1202 bytes = (unsigned char *)ETC1_block;
1212 1203
1213 diff_flag = rg_etc1_block_diff_bit_get(ETC1_block); 1204 diff_flag = rg_etc1_block_diff_bit_get(ETC1_block);
1214 flip_flag = rg_etc1_block_flip_bit_get(ETC1_block); 1205 flip_flag = rg_etc1_block_flip_bit_get(ETC1_block);
1215 table_index0 = rg_etc1_block_inten_table_get(ETC1_block, 0); 1206 table_index0 = (bytes[3] >> 5) & 7;
1216 table_index1 = rg_etc1_block_inten_table_get(ETC1_block, 1); 1207 table_index1 = (bytes[3] >> 2) & 7;
1217 1208
1218 if (diff_flag) 1209 if (diff_flag)
1219 { 1210 {
@@ -1249,44 +1240,137 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pDst_pixels_BGRA, boo
1249 // 0011 1240 // 0011
1250 // 0011 1241 // 0011
1251 // 0011 1242 // 0011
1252 // Depending on flip_flag. 1243 unsigned char val0 = (bytes[7] & 1) | ((bytes[5] & 1) << 1);
1253 1244 unsigned char val1 = ((bytes[7] >> 4) & 1) | (((bytes[5] >> 4) & 1) << 1);
1254 if (flip_flag) 1245 unsigned char val2 = (bytes[6] & 1) | ((bytes[4] & 1) << 1);
1255 { 1246 unsigned char val3 = ((bytes[6] >> 4) & 1) | (((bytes[4] >> 4) & 1) << 1);
1256 for (y = 0; y < 2; y++) 1247 unsigned char val4 = ((bytes[7] >> 1) & 1) | (((bytes[5] >> 1) & 1) << 1);
1257 { 1248 unsigned char val5 = ((bytes[7] >> 5) & 1) | (((bytes[5] >> 5) & 1) << 1);
1258 for (x = 0; x < 4; x++) 1249 unsigned char val6 = ((bytes[6] >> 1) & 1) | (((bytes[4] >> 1) & 1) << 1);
1259 pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x], 1250 unsigned char val7 = ((bytes[6] >> 5) & 1) | (((bytes[4] >> 5) & 1) << 1);
1260 subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)], 1251 unsigned char val8 = ((bytes[7] >> 2) & 1) | (((bytes[5] >> 2) & 1) << 1);
1261 preserve_alpha); 1252 unsigned char val9 = ((bytes[7] >> 6) & 1) | (((bytes[5] >> 6) & 1) << 1);
1262 pDst_pixels_BGRA += 4; 1253 unsigned char val10 = ((bytes[6] >> 2) & 1) | (((bytes[4] >> 2) & 1) << 1);
1263 } 1254 unsigned char val11 = ((bytes[6] >> 6) & 1) | (((bytes[4] >> 6) & 1) << 1);
1264 1255 unsigned char val12 = ((bytes[7] >> 3) & 1) | (((bytes[5] >> 3) & 1) << 1);
1265 for (y = 2; y < 4; y++) 1256 unsigned char val13 = ((bytes[7] >> 7) & 1) | (((bytes[5] >> 7) & 1) << 1);
1266 { 1257 unsigned char val14 = ((bytes[6] >> 3) & 1) | (((bytes[4] >> 3) & 1) << 1);
1267 for (x = 0; x < 4; x++) 1258 unsigned char val15 = ((bytes[6] >> 7) & 1) | (((bytes[4] >> 7) & 1) << 1);
1268 pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x], 1259
1269 subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)], 1260 if (preserve_alpha) // Depending on flip_flag.
1270 preserve_alpha); 1261 {
1271 pDst_pixels_BGRA += 4; 1262 if (flip_flag)
1272 } 1263 {
1273 } 1264 pDst_pixels_BGRA[0] = rg_etc1_color_quad_set(pDst_pixels_BGRA[0],
1274 else 1265 subblock_colors0[rg_etc1_to_selector_index[val0]]);
1275 { 1266 pDst_pixels_BGRA[1] = rg_etc1_color_quad_set(pDst_pixels_BGRA[1],
1276 for (y = 0; y < 4; y++) 1267 subblock_colors0[rg_etc1_to_selector_index[val1]]);
1277 { 1268 pDst_pixels_BGRA[2] = rg_etc1_color_quad_set(pDst_pixels_BGRA[2],
1278 for (x = 0; x < 2; x++) 1269 subblock_colors0[rg_etc1_to_selector_index[val2]]);
1279 pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x], 1270 pDst_pixels_BGRA[3] = rg_etc1_color_quad_set(pDst_pixels_BGRA[3],
1280 subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)], 1271 subblock_colors0[rg_etc1_to_selector_index[val3]]);
1281 preserve_alpha); 1272 pDst_pixels_BGRA[4] = rg_etc1_color_quad_set(pDst_pixels_BGRA[4],
1282 for (; x < 4; x++) 1273 subblock_colors0[rg_etc1_to_selector_index[val4]]);
1283 pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x], 1274 pDst_pixels_BGRA[5] = rg_etc1_color_quad_set(pDst_pixels_BGRA[5],
1284 subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)], 1275 subblock_colors0[rg_etc1_to_selector_index[val5]]);
1285 preserve_alpha); 1276 pDst_pixels_BGRA[6] = rg_etc1_color_quad_set(pDst_pixels_BGRA[6],
1286 1277 subblock_colors0[rg_etc1_to_selector_index[val6]]);
1287 pDst_pixels_BGRA += 4; 1278 pDst_pixels_BGRA[7] = rg_etc1_color_quad_set(pDst_pixels_BGRA[7],
1288 } 1279 subblock_colors0[rg_etc1_to_selector_index[val7]]);
1289 } 1280 pDst_pixels_BGRA[8] = rg_etc1_color_quad_set(pDst_pixels_BGRA[8],
1281 subblock_colors1[rg_etc1_to_selector_index[val8]]);
1282 pDst_pixels_BGRA[9] = rg_etc1_color_quad_set(pDst_pixels_BGRA[9],
1283 subblock_colors1[rg_etc1_to_selector_index[val9]]);
1284 pDst_pixels_BGRA[10] = rg_etc1_color_quad_set(pDst_pixels_BGRA[10],
1285 subblock_colors1[rg_etc1_to_selector_index[val10]]);
1286 pDst_pixels_BGRA[11] = rg_etc1_color_quad_set(pDst_pixels_BGRA[11],
1287 subblock_colors1[rg_etc1_to_selector_index[val11]]);
1288 pDst_pixels_BGRA[12] = rg_etc1_color_quad_set(pDst_pixels_BGRA[12],
1289 subblock_colors1[rg_etc1_to_selector_index[val12]]);
1290 pDst_pixels_BGRA[13] = rg_etc1_color_quad_set(pDst_pixels_BGRA[13],
1291 subblock_colors1[rg_etc1_to_selector_index[val13]]);
1292 pDst_pixels_BGRA[14] = rg_etc1_color_quad_set(pDst_pixels_BGRA[14],
1293 subblock_colors1[rg_etc1_to_selector_index[val14]]);
1294 pDst_pixels_BGRA[15] = rg_etc1_color_quad_set(pDst_pixels_BGRA[15],
1295 subblock_colors1[rg_etc1_to_selector_index[val15]]);
1296 }
1297 else
1298 {
1299 pDst_pixels_BGRA[0] = rg_etc1_color_quad_set(pDst_pixels_BGRA[0],
1300 subblock_colors0[rg_etc1_to_selector_index[val0]]);
1301 pDst_pixels_BGRA[1] = rg_etc1_color_quad_set(pDst_pixels_BGRA[1],
1302 subblock_colors0[rg_etc1_to_selector_index[val1]]);
1303 pDst_pixels_BGRA[2] = rg_etc1_color_quad_set(pDst_pixels_BGRA[2],
1304 subblock_colors1[rg_etc1_to_selector_index[val2]]);
1305 pDst_pixels_BGRA[3] = rg_etc1_color_quad_set(pDst_pixels_BGRA[3],
1306 subblock_colors1[rg_etc1_to_selector_index[val3]]);
1307 pDst_pixels_BGRA[4] = rg_etc1_color_quad_set(pDst_pixels_BGRA[4],
1308 subblock_colors0[rg_etc1_to_selector_index[val4]]);
1309 pDst_pixels_BGRA[5] = rg_etc1_color_quad_set(pDst_pixels_BGRA[5],
1310 subblock_colors0[rg_etc1_to_selector_index[val5]]);
1311 pDst_pixels_BGRA[6] = rg_etc1_color_quad_set(pDst_pixels_BGRA[6],
1312 subblock_colors1[rg_etc1_to_selector_index[val6]]);
1313 pDst_pixels_BGRA[7] = rg_etc1_color_quad_set(pDst_pixels_BGRA[7],
1314 subblock_colors1[rg_etc1_to_selector_index[val7]]);
1315 pDst_pixels_BGRA[8] = rg_etc1_color_quad_set(pDst_pixels_BGRA[8],
1316 subblock_colors0[rg_etc1_to_selector_index[val8]]);
1317 pDst_pixels_BGRA[9] = rg_etc1_color_quad_set(pDst_pixels_BGRA[9],
1318 subblock_colors0[rg_etc1_to_selector_index[val9]]);
1319 pDst_pixels_BGRA[10] = rg_etc1_color_quad_set(pDst_pixels_BGRA[10],
1320 subblock_colors1[rg_etc1_to_selector_index[val10]]);
1321 pDst_pixels_BGRA[11] = rg_etc1_color_quad_set(pDst_pixels_BGRA[11],
1322 subblock_colors1[rg_etc1_to_selector_index[val11]]);
1323 pDst_pixels_BGRA[12] = rg_etc1_color_quad_set(pDst_pixels_BGRA[12],
1324 subblock_colors0[rg_etc1_to_selector_index[val12]]);
1325 pDst_pixels_BGRA[13] = rg_etc1_color_quad_set(pDst_pixels_BGRA[13],
1326 subblock_colors0[rg_etc1_to_selector_index[val13]]);
1327 pDst_pixels_BGRA[14] = rg_etc1_color_quad_set(pDst_pixels_BGRA[14],
1328 subblock_colors1[rg_etc1_to_selector_index[val14]]);
1329 pDst_pixels_BGRA[15] = rg_etc1_color_quad_set(pDst_pixels_BGRA[15],
1330 subblock_colors1[rg_etc1_to_selector_index[val15]]);
1331 }
1332 }
1333 else
1334 {
1335 if (flip_flag)
1336 {
1337 pDst_pixels_BGRA[0] = subblock_colors0[rg_etc1_to_selector_index[val0]];
1338 pDst_pixels_BGRA[1] = subblock_colors0[rg_etc1_to_selector_index[val1]];
1339 pDst_pixels_BGRA[2] = subblock_colors0[rg_etc1_to_selector_index[val2]];
1340 pDst_pixels_BGRA[3] = subblock_colors0[rg_etc1_to_selector_index[val3]];
1341 pDst_pixels_BGRA[4] = subblock_colors0[rg_etc1_to_selector_index[val4]];
1342 pDst_pixels_BGRA[5] = subblock_colors0[rg_etc1_to_selector_index[val5]];
1343 pDst_pixels_BGRA[6] = subblock_colors0[rg_etc1_to_selector_index[val6]];
1344 pDst_pixels_BGRA[7] = subblock_colors0[rg_etc1_to_selector_index[val7]];
1345 pDst_pixels_BGRA[8] = subblock_colors1[rg_etc1_to_selector_index[val8]];
1346 pDst_pixels_BGRA[9] = subblock_colors1[rg_etc1_to_selector_index[val9]];
1347 pDst_pixels_BGRA[10] = subblock_colors1[rg_etc1_to_selector_index[val10]];
1348 pDst_pixels_BGRA[11] = subblock_colors1[rg_etc1_to_selector_index[val11]];
1349 pDst_pixels_BGRA[12] = subblock_colors1[rg_etc1_to_selector_index[val12]];
1350 pDst_pixels_BGRA[13] = subblock_colors1[rg_etc1_to_selector_index[val13]];
1351 pDst_pixels_BGRA[14] = subblock_colors1[rg_etc1_to_selector_index[val14]];
1352 pDst_pixels_BGRA[15] = subblock_colors1[rg_etc1_to_selector_index[val15]];
1353 }
1354 else
1355 {
1356 pDst_pixels_BGRA[0] = subblock_colors0[rg_etc1_to_selector_index[val0]];
1357 pDst_pixels_BGRA[1] = subblock_colors0[rg_etc1_to_selector_index[val1]];
1358 pDst_pixels_BGRA[2] = subblock_colors1[rg_etc1_to_selector_index[val2]];
1359 pDst_pixels_BGRA[3] = subblock_colors1[rg_etc1_to_selector_index[val3]];
1360 pDst_pixels_BGRA[4] = subblock_colors0[rg_etc1_to_selector_index[val4]];
1361 pDst_pixels_BGRA[5] = subblock_colors0[rg_etc1_to_selector_index[val5]];
1362 pDst_pixels_BGRA[6] = subblock_colors1[rg_etc1_to_selector_index[val6]];
1363 pDst_pixels_BGRA[7] = subblock_colors1[rg_etc1_to_selector_index[val7]];
1364 pDst_pixels_BGRA[8] = subblock_colors0[rg_etc1_to_selector_index[val8]];
1365 pDst_pixels_BGRA[9] = subblock_colors0[rg_etc1_to_selector_index[val9]];
1366 pDst_pixels_BGRA[10] = subblock_colors1[rg_etc1_to_selector_index[val10]];
1367 pDst_pixels_BGRA[11] = subblock_colors1[rg_etc1_to_selector_index[val11]];
1368 pDst_pixels_BGRA[12] = subblock_colors0[rg_etc1_to_selector_index[val12]];
1369 pDst_pixels_BGRA[13] = subblock_colors0[rg_etc1_to_selector_index[val13]];
1370 pDst_pixels_BGRA[14] = subblock_colors1[rg_etc1_to_selector_index[val14]];
1371 pDst_pixels_BGRA[15] = subblock_colors1[rg_etc1_to_selector_index[val15]];
1372 }
1373 }
1290 1374
1291 return success; 1375 return success;
1292} 1376}
@@ -1763,7 +1847,7 @@ rg_etc1_optimizer_compute(rg_etc1_optimizer *optimizer)
1763 uint i; 1847 uint i;
1764 const uint8* pSelectors = optimizer->m_best_solution.m_selectors; 1848 const uint8* pSelectors = optimizer->m_best_solution.m_selectors;
1765 1849
1766 rg_etc1_solution_coordinates_block_colors_get(&optimizer->m_best_solution.m_coords, block_colors); 1850 rg_etc1_solution_coordinates_block_colors_get(optimizer->m_best_solution.m_coords, block_colors);
1767 pSrc_pixels = optimizer->m_pParams->m_pSrc_pixels; 1851 pSrc_pixels = optimizer->m_pParams->m_pSrc_pixels;
1768 for (i = 0; i < n; i++) 1852 for (i = 0; i < n; i++)
1769 actual_error += rg_etc1_color_quad_u8_rgb_squared_distance(pSrc_pixels[i], block_colors[pSelectors[i]]); 1853 actual_error += rg_etc1_color_quad_u8_rgb_squared_distance(pSrc_pixels[i], block_colors[pSelectors[i]]);