rg_etc1: Fix RGBA vs BGRA mishandling of the ETC1 codec

Evas uses BGRA data while rg_etc1 uses RGBA data, so there were incompatibilities between the two. Now, rg_etc1 will take BGRA data as input and output.
2014-04-22 11:52:53 +09:00 · 2014-04-22 11:52:53 +09:00 · 59b660aae9
parent 2ff9b054f9
commit 59b660aae9
1 changed files with 128 additions and 63 deletions
--- a/src/static_libs/rg_etc/rg_etc1.c
+++ b/src/static_libs/rg_etc/rg_etc1.c
@ -46,6 +46,14 @@ typedef unsigned char DATA8;
 #define cUINT64_MAX ULLONG_MAX
 #define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))

+// Some configuration defines
+
+// Disable this constrained function, it produces artifacts (in black areas mostly)
+#define RG_ETC1_CONSTRAINED_SUBBLOCK 0
+// Disable dithering. It uses invalid RGBA order and isn't great visually
+// Dithering should happen AFTER the color selection, not before
+#define RG_ETC1_DITHERING 0
+
 enum RG_Etc_Constants
  {
    cETC1BytesPerBlock = 8U,
@ -105,26 +113,48 @@ enum RG_Etc_Constants
    // 0   1   2   3   -4  -3  -2  -1
  };

+/*
+ * IMPORTANT NOTE:
+ *
+ * rg_etc1 originally works only on R,G,B,A data
+ * evas works on B,G,R,A data
+ *
+ * ARGB_JOIN() is used for unpacking, so it will directly produce BGRA.
+ *
+ * Upon packing, we convert BGRA to RGBA so we can use the precomputed tables,
+ * so we must use the X_VAL_GET() macros.
+ * Upon unpacking, we directly output BGRA data using ARGB_JOIN() and X_VAL_SET()
+ *
+ * Yes, this is a mess. Maybe a clear BGRA API is needed
+ */
+
 #ifndef WORDS_BIGENDIAN
-/* x86 */
-#define R_VAL(p) (((DATA8 *)(p))[0])
-#define G_VAL(p) (((DATA8 *)(p))[1])
-#define B_VAL(p) (((DATA8 *)(p))[2])
-#define A_VAL(p) (((DATA8 *)(p))[3])
-#define BA_VAL(p) ((DATA16 *)(p)[1])
-#define RG_VAL(p) ((DATA16 *)(p)[0])
+// BGRA
+#define A_VAL_SET(p) (((DATA8 *)(p))[3])
+#define R_VAL_SET(p) (((DATA8 *)(p))[2])
+#define G_VAL_SET(p) (((DATA8 *)(p))[1])
+#define B_VAL_SET(p) (((DATA8 *)(p))[0])
+// RGBA
+#define A_VAL_GET(p) (((DATA8 *)(p))[3])
+#define R_VAL_GET(p) (((DATA8 *)(p))[0])
+#define G_VAL_GET(p) (((DATA8 *)(p))[1])
+#define B_VAL_GET(p) (((DATA8 *)(p))[2])
 #else
-/* ppc */
-#define R_VAL(p) (((DATA8 *)(p))[3])
-#define G_VAL(p) (((DATA8 *)(p))[2])
-#define B_VAL(p) (((DATA8 *)(p))[1])
-#define A_VAL(p) (((DATA8 *)(p))[0])
-#define BA_VAL(p) ((DATA16 *)(p)[0])
-#define RG_VAL(p) ((DATA16 *)(p)[1])
+// BIGENDIAN is untested
+#define A_VAL_SET(p) (((DATA8 *)(p))[0])
+#define R_VAL_SET(p) (((DATA8 *)(p))[1])
+#define G_VAL_SET(p) (((DATA8 *)(p))[2])
+#define B_VAL_SET(p) (((DATA8 *)(p))[3])
+#define A_VAL_GET(p) (((DATA8 *)(p))[0])
+#define R_VAL_GET(p) (((DATA8 *)(p))[3])
+#define G_VAL_GET(p) (((DATA8 *)(p))[2])
+#define B_VAL_GET(p) (((DATA8 *)(p))[1])
 #endif

-#define ARGB_JOIN(a,r,g,b)                              \
-  (((a) << 24) + ((b) << 16) + ((g) << 8) + (r))
+// For unpacking and writing BGRA output data
+#define ARGB_JOIN(a,r,g,b) \
+        (((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
+
 static unsigned char rg_etc_quant5_tab[256 + 16];

 static const int rg_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = {
@ -428,14 +458,16 @@ typedef union
      unsigned char a;
   } comp;

-   unsigned char c[4];
-
   unsigned int m_u32;
 } color_quad_u8;

 static inline int
 rg_etc1_color_quad_u8_clamp(int v)
 {
+   /* FIXME: (From Wikipedia)
+    * "In C, the result of right-shifting a negative value is implementation-defined"
+    * The following code assumes right-shift will duplicate the sign bit.
+    */
   if (v & 0xFFFFFF00U)
     v = ((~v) >> 31) & 0xFF;
   return v;
@ -464,14 +496,13 @@ rg_etc1_color_quad_u8_clear(color_quad_u8 *color)
 static inline unsigned int
 rg_etc1_color_quad_u8_rgb_squared_distance(color_quad_u8 color1, color_quad_u8 color2)
 {
-
   return SQUARE((color1.comp.r - color2.comp.r)) + SQUARE((color1.comp.g - color2.comp.g)) + SQUARE((color1.comp.b - color2.comp.b));
 }

+#if RG_ETC1_CONSTRAINED_SUBBLOCK
 static inline void
 rg_etc1_color_quad_u8_component_set(color_quad_u8 *color, unsigned char idx, unsigned char value)
 {
-
   switch (idx)
     {
      case 0: color->comp.r = value; break;
@ -480,14 +511,16 @@ rg_etc1_color_quad_u8_component_set(color_quad_u8 *color, unsigned char idx, uns
      case 3: color->comp.a = value; break;
      default: abort();
     }
-
 }
+#endif

+#if 0
 static inline unsigned int
 rg_etc1_color_quad_duplicate_init(unsigned char y, unsigned char alpha)
 {
   return ARGB_JOIN(alpha, y, y, y);
 }
+#endif

 static inline unsigned int
 rg_etc1_color_quad_init(unsigned char r, unsigned char g, unsigned char b, unsigned char alpha)
@ -502,10 +535,11 @@ rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, unsigned
     {
        unsigned char r, g, b, a;

-        a = A_VAL(&old_color);
-        r = R_VAL(&new_color);
-        g = G_VAL(&new_color);
-        b = B_VAL(&new_color);
+        // Used for UNPACKING
+        a = A_VAL_SET(&old_color);
+        r = R_VAL_SET(&new_color);
+        g = G_VAL_SET(&new_color);
+        b = B_VAL_SET(&new_color);

        return ARGB_JOIN(a, r, g, b);
     }
@ -515,26 +549,31 @@ rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, unsigned
 static inline void
 rg_etc1_color_quad_get(unsigned int color, unsigned char *r, unsigned char *g, unsigned char *b, unsigned char *alpha)
 {
-   if (r) *r = R_VAL(&color);
-   if (g) *g = G_VAL(&color);
-   if (b) *b = B_VAL(&color);
-   if (alpha) *alpha = A_VAL(&color);
+   // Used for PACKING
+   if (r) *r = R_VAL_GET(&color);
+   if (g) *g = G_VAL_GET(&color);
+   if (b) *b = B_VAL_GET(&color);
+   if (alpha) *alpha = A_VAL_GET(&color);
 }

+#if RG_ETC1_CONSTRAINED_SUBBLOCK
 static inline unsigned char
 rg_etc1_color_quad_component_get(unsigned int color, unsigned char idx)
 {
   switch (idx)
     {
-      case 0: return R_VAL(&color);
-      case 1: return G_VAL(&color);
-      case 2: return B_VAL(&color);
-      case 3: return A_VAL(&color);
+      // FIXME: Untested code (RGBA vs BGRA)
+      case 0: return R_VAL_GET(&color);
+      case 1: return G_VAL_GET(&color);
+      case 2: return B_VAL_GET(&color);
+      case 3: return A_VAL_GET(&color);
      default: abort();
     }
   return 0;
 }
+#endif

+#if 0
 static inline unsigned int
 rg_etc1_color_quad_component_set(unsigned int color, unsigned char idx, unsigned char value)
 {
@ -559,7 +598,7 @@ rg_etc1_color_quad_grayscale_set(unsigned int color, unsigned char l)
 {
   unsigned char a;

-   a = A_VAL(&color);
+   a = A_VAL_SET(&color);

   return rg_etc1_color_quad_init(l, l, l, a);
 }
@ -635,8 +674,8 @@ rg_etc1_color_quad_argb_squared_distance(unsigned int color1, unsigned int color
 static inline unsigned char
 rg_etc1_color_quad_rgb_equals(unsigned int color1, unsigned int color2)
 {
-   A_VAL(&color1) = 0;
-   A_VAL(&color2) = 0;
+   A_VAL_SET(&color1) = 0;
+   A_VAL_SET(&color2) = 0;

   return color1 == color2;
 }
@ -676,6 +715,7 @@ rg_etc1_color_quad_del(unsigned int color1, unsigned int color2)

   return color1;
 }
+#endif

 static inline void
 rg_etc1_vec_init(float v[3], float s)
@ -1162,7 +1202,7 @@ rg_etc1_block_subblock_color4_abs_get(unsigned int dst[4], unsigned short packed

 // This is the exported function to unpack a block
 bool
-rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve_alpha)
+rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pDst_pixels_BGRA, bool preserve_alpha)
 {
   unsigned char diff_flag, flip_flag, table_index0, table_index1;
   unsigned int subblock_colors0[4] = { 0 };
@ -1216,19 +1256,19 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve
        for (y = 0; y < 2; y++)
          {
             for (x = 0; x < 4; x++)
-               pixels[x] = rg_etc1_color_quad_set(pixels[x],
+               pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
                                                  subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
                                                  preserve_alpha);
-             pixels += 4;
+             pDst_pixels_BGRA += 4;
          }

        for (y = 2; y < 4; y++)
          {
             for (x = 0; x < 4; x++)
-               pixels[x] = rg_etc1_color_quad_set(pixels[x],
+               pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
                                                  subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
                                                  preserve_alpha);
-             pixels += 4;
+             pDst_pixels_BGRA += 4;
          }
     }
   else
@ -1236,15 +1276,15 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve
        for (y = 0; y < 4; y++)
          {
             for (x = 0; x < 2; x++)
-               pixels[x] = rg_etc1_color_quad_set(pixels[x],
+               pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
                                                  subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
                                                  preserve_alpha);
             for (; x < 4; x++)
-               pixels[x] = rg_etc1_color_quad_set(pixels[x],
+               pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
                                                  subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
                                                  preserve_alpha);

-             pixels += 4;
+             pDst_pixels_BGRA += 4;
          }
     }

@ -1438,9 +1478,7 @@ rg_etc1_solution_coordinates_get_scaled_color(color_quad_u8 *color, const Etc1_S
   unsigned char br, bg, bb;

   rg_etc1_solution_coordinates_component_get(coords, &br, &bg, &bb);
-
-   rg_etc1_color_quad_u8_init(color,br, bg, bb, 255);
-
+   rg_etc1_color_quad_u8_init(color, br, bg, bb, 255);
 }

 static inline void
@ -2112,8 +2150,10 @@ void rg_etc1_pack_block_init()
 // Packs solid color blocks efficiently using a set of small precomputed tables.
 // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
 static uint64
-rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc1_pack_params *pack_params EINA_UNUSED)
+rg_etc1_pack_block_solid_color(unsigned char *block, const color_quad_u8 *color, rg_etc1_pack_params *pack_params EINA_UNUSED)
 {
+   const uint8 *pColor = (uint8 *) &color->m_u32;
+
   if (!rg_etc1_inverse_lookup[0][255])
     rg_etc1_pack_block_init();

@ -2123,8 +2163,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
        return 0;
     }

-   static uint s_next_comp[4] = { 1, 2, 0, 1 };
-
+   const uint s_next_comp[4] = { 1, 2, 0, 1 };
   uint best_error = cUINT32_MAX, best_i = 0;
   int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
   uint i;
@ -2133,13 +2172,15 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
   // that allow that 8-bit value to be encoded with no error.
   for (i = 0; i < 3; i++)
     {
-        const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
+        const int c0 = pColor[i];
+        const int c1 = pColor[s_next_comp[i]];
+        const int c2 = pColor[s_next_comp[i + 1]];

        const int delta_range = 1;
        int delta;
        for (delta = -delta_range; delta <= delta_range; delta++)
          {
-             const int c_plus_delta = CLAMP(pColor[i] + delta, 0, 255);
+             const int c_plus_delta = CLAMP(c0 + delta, 0, 255);

             uint16* pTable;
             if (!c_plus_delta)
@ -2168,7 +2209,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
                  pInverse_table = rg_etc1_inverse_lookup[x & 0xFF];
                  p1 = pInverse_table[c1];
                  p2 = pInverse_table[c2];
-                  trial_error = SQUARE((c_plus_delta - pColor[i])) + SQUARE((p1 >> 8)) + SQUARE((p2 >> 8));
+                  trial_error = SQUARE((c_plus_delta - c0)) + SQUARE((p1 >> 8)) + SQUARE((p2 >> 8));
                  if (trial_error < best_error)
                    {
                       best_error = trial_error;
@ -2216,7 +2257,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
   return best_error;
 }

-#if 0
+#if RG_ETC1_CONSTRAINED_SUBBLOCK
 static uint
 rg_etc1_pack_block_solid_color_constrained(rg_etc1_optimizer_results *results,uint num_colors,
                                           const uint8* pColor, rg_etc1_pack_params *pack_params EINA_UNUSED,
@ -2347,6 +2388,7 @@ rg_etc1_pack_block_solid_color_constrained(rg_etc1_optimizer_results *results,ui
 }
 #endif

+#if RG_ETC1_DITHERING
 // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
 static void
 rg_etc1_dither_block_555(color_quad_u8* dest, color_quad_u8* block)
@ -2393,15 +2435,33 @@ rg_etc1_dither_block_555(color_quad_u8* dest, color_quad_u8* block)
          }
     }
 }
+#endif
+
+static inline unsigned int
+_bgra_to_rgba(unsigned int val)
+{
+   //(((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
+   return ARGB_JOIN(A_VAL_GET(&val), R_VAL_GET(&val), G_VAL_GET(&val), B_VAL_GET(&val));
+}
+
+static void
+_bgra_to_rgba_block(color_quad_u8 *output, const unsigned int *input, int len)
+{
+   for (int k = len; k; --k)
+     {
+        output->m_u32 = _bgra_to_rgba(*input++);
+        output++;
+     }
+}

 unsigned int
-rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_etc1_pack_params *pack_params)
+rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_BGRA, rg_etc1_pack_params *pack_params)
 {
-   color_quad_u8* pSrc_pixels = (color_quad_u8 *)pSrc_pixels_rgba;
+   color_quad_u8 pSrc_pixels[16];
   unsigned char *dst_block = (unsigned char *)pETC1_block;
   unsigned int first_pixel_u32;
   int r;
-   color_quad_u8 dithered_pixels[16], subblock_pixels[8];
+   color_quad_u8 subblock_pixels[8];
   uint64 best_error = cUINT64_MAX;
   uint best_use_color4=EINA_FALSE;
   uint best_flip=EINA_FALSE;
@ -2417,7 +2477,6 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
   static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
   static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
   static const int s_scan_delta_0[] = { 0 };
-   first_pixel_u32 = *pSrc_pixels_rgba;

 #ifdef RG_ETC1_BUILD_DEBUG
   // Ensure all alpha values are 0xFF.
@ -2429,19 +2488,26 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
 #endif
   rg_etc1_optimizer_clear(&optimizer);

+   // Convert evas BGRA to rg_etc1 RGBA
+   _bgra_to_rgba_block(pSrc_pixels, pSrc_pixels_BGRA, 16);
+   first_pixel_u32 = pSrc_pixels[0].m_u32;
+
   // Check for solid block.
   for (r = 15; r >= 1; --r)
     if (pSrc_pixels[r].m_u32 != first_pixel_u32)
       break;
   if (!r)
-     return (unsigned int)(16 * rg_etc1_pack_block_solid_color(dst_block, &pSrc_pixels[0].comp.r, pack_params));
+     return (unsigned int)(16 * rg_etc1_pack_block_solid_color(dst_block, &pSrc_pixels[0], pack_params));

+#if RG_ETC1_DITHERING
   // Dithering gives mitigated results... It would be nice to know when to use it.
+   color_quad_u8 dithered_pixels[16];
   if (pack_params->m_dithering)
     {
        rg_etc1_dither_block_555(dithered_pixels, pSrc_pixels);
        pSrc_pixels = dithered_pixels;
     }
+#endif

   for (i = 0; i < 2; i++)
     {
@ -2473,6 +2539,8 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
             uint subblock;
             for (subblock = 0; subblock < 2; subblock++)
               {
+                  results[2].m_error = cUINT64_MAX;
+
                  if (flip)
                    // subblock is top or bottom, copy source
                    memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
@ -2490,10 +2558,7 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
                       rg_etc1_color_quad_u8_copy(&subblock_pixels[7], &pSrc_col[13]);
                    }

-                  results[2].m_error = cUINT64_MAX;
-
-#if 0
-                  // This feature is disabled because it will produce some visual artifacts
+#if RG_ETC1_CONSTRAINED_SUBBLOCK
                  if ((params.base_params->m_quality >= rg_etc1_medium_quality) && ((subblock) || (use_color4)))
                    {
                       const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;