rg_etc1: Fix RGBA vs BGRA mishandling of the ETC1 codec

Evas uses BGRA data while rg_etc1 uses RGBA data, so there
were incompatibilities between the two.

Now, rg_etc1 will take BGRA data as input and output.
This commit is contained in:
Jean-Philippe Andre 2014-04-22 11:52:53 +09:00
parent 2ff9b054f9
commit 59b660aae9
1 changed files with 128 additions and 63 deletions

View File

@ -46,6 +46,14 @@ typedef unsigned char DATA8;
#define cUINT64_MAX ULLONG_MAX
#define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
// Some configuration defines
// Disable this constrained function, it produces artifacts (in black areas mostly)
#define RG_ETC1_CONSTRAINED_SUBBLOCK 0
// Disable dithering. It uses invalid RGBA order and isn't great visually
// Dithering should happen AFTER the color selection, not before
#define RG_ETC1_DITHERING 0
enum RG_Etc_Constants
{
cETC1BytesPerBlock = 8U,
@ -105,26 +113,48 @@ enum RG_Etc_Constants
// 0 1 2 3 -4 -3 -2 -1
};
/*
* IMPORTANT NOTE:
*
* rg_etc1 originally works only on R,G,B,A data
* evas works on B,G,R,A data
*
* ARGB_JOIN() is used for unpacking, so it will directly produce BGRA.
*
* Upon packing, we convert BGRA to RGBA so we can use the precomputed tables,
* so we must use the X_VAL_GET() macros.
* Upon unpacking, we directly output BGRA data using ARGB_JOIN() and X_VAL_SET()
*
* Yes, this is a mess. Maybe a clear BGRA API is needed
*/
#ifndef WORDS_BIGENDIAN
/* x86 */
#define R_VAL(p) (((DATA8 *)(p))[0])
#define G_VAL(p) (((DATA8 *)(p))[1])
#define B_VAL(p) (((DATA8 *)(p))[2])
#define A_VAL(p) (((DATA8 *)(p))[3])
#define BA_VAL(p) ((DATA16 *)(p)[1])
#define RG_VAL(p) ((DATA16 *)(p)[0])
// BGRA
#define A_VAL_SET(p) (((DATA8 *)(p))[3])
#define R_VAL_SET(p) (((DATA8 *)(p))[2])
#define G_VAL_SET(p) (((DATA8 *)(p))[1])
#define B_VAL_SET(p) (((DATA8 *)(p))[0])
// RGBA
#define A_VAL_GET(p) (((DATA8 *)(p))[3])
#define R_VAL_GET(p) (((DATA8 *)(p))[0])
#define G_VAL_GET(p) (((DATA8 *)(p))[1])
#define B_VAL_GET(p) (((DATA8 *)(p))[2])
#else
/* ppc */
#define R_VAL(p) (((DATA8 *)(p))[3])
#define G_VAL(p) (((DATA8 *)(p))[2])
#define B_VAL(p) (((DATA8 *)(p))[1])
#define A_VAL(p) (((DATA8 *)(p))[0])
#define BA_VAL(p) ((DATA16 *)(p)[0])
#define RG_VAL(p) ((DATA16 *)(p)[1])
// BIGENDIAN is untested
#define A_VAL_SET(p) (((DATA8 *)(p))[0])
#define R_VAL_SET(p) (((DATA8 *)(p))[1])
#define G_VAL_SET(p) (((DATA8 *)(p))[2])
#define B_VAL_SET(p) (((DATA8 *)(p))[3])
#define A_VAL_GET(p) (((DATA8 *)(p))[0])
#define R_VAL_GET(p) (((DATA8 *)(p))[3])
#define G_VAL_GET(p) (((DATA8 *)(p))[2])
#define B_VAL_GET(p) (((DATA8 *)(p))[1])
#endif
#define ARGB_JOIN(a,r,g,b) \
(((a) << 24) + ((b) << 16) + ((g) << 8) + (r))
// For unpacking and writing BGRA output data
#define ARGB_JOIN(a,r,g,b) \
(((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
static unsigned char rg_etc_quant5_tab[256 + 16];
static const int rg_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = {
@ -428,14 +458,16 @@ typedef union
unsigned char a;
} comp;
unsigned char c[4];
unsigned int m_u32;
} color_quad_u8;
static inline int
rg_etc1_color_quad_u8_clamp(int v)
{
/* FIXME: (From Wikipedia)
* "In C, the result of right-shifting a negative value is implementation-defined"
* The following code assumes right-shift will duplicate the sign bit.
*/
if (v & 0xFFFFFF00U)
v = ((~v) >> 31) & 0xFF;
return v;
@ -464,14 +496,13 @@ rg_etc1_color_quad_u8_clear(color_quad_u8 *color)
static inline unsigned int
rg_etc1_color_quad_u8_rgb_squared_distance(color_quad_u8 color1, color_quad_u8 color2)
{
return SQUARE((color1.comp.r - color2.comp.r)) + SQUARE((color1.comp.g - color2.comp.g)) + SQUARE((color1.comp.b - color2.comp.b));
}
#if RG_ETC1_CONSTRAINED_SUBBLOCK
static inline void
rg_etc1_color_quad_u8_component_set(color_quad_u8 *color, unsigned char idx, unsigned char value)
{
switch (idx)
{
case 0: color->comp.r = value; break;
@ -480,14 +511,16 @@ rg_etc1_color_quad_u8_component_set(color_quad_u8 *color, unsigned char idx, uns
case 3: color->comp.a = value; break;
default: abort();
}
}
#endif
#if 0
static inline unsigned int
rg_etc1_color_quad_duplicate_init(unsigned char y, unsigned char alpha)
{
return ARGB_JOIN(alpha, y, y, y);
}
#endif
static inline unsigned int
rg_etc1_color_quad_init(unsigned char r, unsigned char g, unsigned char b, unsigned char alpha)
@ -502,10 +535,11 @@ rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, unsigned
{
unsigned char r, g, b, a;
a = A_VAL(&old_color);
r = R_VAL(&new_color);
g = G_VAL(&new_color);
b = B_VAL(&new_color);
// Used for UNPACKING
a = A_VAL_SET(&old_color);
r = R_VAL_SET(&new_color);
g = G_VAL_SET(&new_color);
b = B_VAL_SET(&new_color);
return ARGB_JOIN(a, r, g, b);
}
@ -515,26 +549,31 @@ rg_etc1_color_quad_set(unsigned int old_color, unsigned int new_color, unsigned
static inline void
rg_etc1_color_quad_get(unsigned int color, unsigned char *r, unsigned char *g, unsigned char *b, unsigned char *alpha)
{
if (r) *r = R_VAL(&color);
if (g) *g = G_VAL(&color);
if (b) *b = B_VAL(&color);
if (alpha) *alpha = A_VAL(&color);
// Used for PACKING
if (r) *r = R_VAL_GET(&color);
if (g) *g = G_VAL_GET(&color);
if (b) *b = B_VAL_GET(&color);
if (alpha) *alpha = A_VAL_GET(&color);
}
#if RG_ETC1_CONSTRAINED_SUBBLOCK
static inline unsigned char
rg_etc1_color_quad_component_get(unsigned int color, unsigned char idx)
{
switch (idx)
{
case 0: return R_VAL(&color);
case 1: return G_VAL(&color);
case 2: return B_VAL(&color);
case 3: return A_VAL(&color);
// FIXME: Untested code (RGBA vs BGRA)
case 0: return R_VAL_GET(&color);
case 1: return G_VAL_GET(&color);
case 2: return B_VAL_GET(&color);
case 3: return A_VAL_GET(&color);
default: abort();
}
return 0;
}
#endif
#if 0
static inline unsigned int
rg_etc1_color_quad_component_set(unsigned int color, unsigned char idx, unsigned char value)
{
@ -559,7 +598,7 @@ rg_etc1_color_quad_grayscale_set(unsigned int color, unsigned char l)
{
unsigned char a;
a = A_VAL(&color);
a = A_VAL_SET(&color);
return rg_etc1_color_quad_init(l, l, l, a);
}
@ -635,8 +674,8 @@ rg_etc1_color_quad_argb_squared_distance(unsigned int color1, unsigned int color
static inline unsigned char
rg_etc1_color_quad_rgb_equals(unsigned int color1, unsigned int color2)
{
A_VAL(&color1) = 0;
A_VAL(&color2) = 0;
A_VAL_SET(&color1) = 0;
A_VAL_SET(&color2) = 0;
return color1 == color2;
}
@ -676,6 +715,7 @@ rg_etc1_color_quad_del(unsigned int color1, unsigned int color2)
return color1;
}
#endif
static inline void
rg_etc1_vec_init(float v[3], float s)
@ -1162,7 +1202,7 @@ rg_etc1_block_subblock_color4_abs_get(unsigned int dst[4], unsigned short packed
// This is the exported function to unpack a block
bool
rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve_alpha)
rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pDst_pixels_BGRA, bool preserve_alpha)
{
unsigned char diff_flag, flip_flag, table_index0, table_index1;
unsigned int subblock_colors0[4] = { 0 };
@ -1216,19 +1256,19 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve
for (y = 0; y < 2; y++)
{
for (x = 0; x < 4; x++)
pixels[x] = rg_etc1_color_quad_set(pixels[x],
pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
preserve_alpha);
pixels += 4;
pDst_pixels_BGRA += 4;
}
for (y = 2; y < 4; y++)
{
for (x = 0; x < 4; x++)
pixels[x] = rg_etc1_color_quad_set(pixels[x],
pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
preserve_alpha);
pixels += 4;
pDst_pixels_BGRA += 4;
}
}
else
@ -1236,15 +1276,15 @@ rg_etc1_unpack_block(const void *ETC1_block, unsigned int *pixels, bool preserve
for (y = 0; y < 4; y++)
{
for (x = 0; x < 2; x++)
pixels[x] = rg_etc1_color_quad_set(pixels[x],
pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
subblock_colors0[rg_etc1_block_selector_get(ETC1_block, x, y)],
preserve_alpha);
for (; x < 4; x++)
pixels[x] = rg_etc1_color_quad_set(pixels[x],
pDst_pixels_BGRA[x] = rg_etc1_color_quad_set(pDst_pixels_BGRA[x],
subblock_colors1[rg_etc1_block_selector_get(ETC1_block, x, y)],
preserve_alpha);
pixels += 4;
pDst_pixels_BGRA += 4;
}
}
@ -1438,9 +1478,7 @@ rg_etc1_solution_coordinates_get_scaled_color(color_quad_u8 *color, const Etc1_S
unsigned char br, bg, bb;
rg_etc1_solution_coordinates_component_get(coords, &br, &bg, &bb);
rg_etc1_color_quad_u8_init(color,br, bg, bb, 255);
rg_etc1_color_quad_u8_init(color, br, bg, bb, 255);
}
static inline void
@ -2112,8 +2150,10 @@ void rg_etc1_pack_block_init()
// Packs solid color blocks efficiently using a set of small precomputed tables.
// For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time.
static uint64
rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc1_pack_params *pack_params EINA_UNUSED)
rg_etc1_pack_block_solid_color(unsigned char *block, const color_quad_u8 *color, rg_etc1_pack_params *pack_params EINA_UNUSED)
{
const uint8 *pColor = (uint8 *) &color->m_u32;
if (!rg_etc1_inverse_lookup[0][255])
rg_etc1_pack_block_init();
@ -2123,8 +2163,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
return 0;
}
static uint s_next_comp[4] = { 1, 2, 0, 1 };
const uint s_next_comp[4] = { 1, 2, 0, 1 };
uint best_error = cUINT32_MAX, best_i = 0;
int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0;
uint i;
@ -2133,13 +2172,15 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
// that allow that 8-bit value to be encoded with no error.
for (i = 0; i < 3; i++)
{
const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]];
const int c0 = pColor[i];
const int c1 = pColor[s_next_comp[i]];
const int c2 = pColor[s_next_comp[i + 1]];
const int delta_range = 1;
int delta;
for (delta = -delta_range; delta <= delta_range; delta++)
{
const int c_plus_delta = CLAMP(pColor[i] + delta, 0, 255);
const int c_plus_delta = CLAMP(c0 + delta, 0, 255);
uint16* pTable;
if (!c_plus_delta)
@ -2168,7 +2209,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
pInverse_table = rg_etc1_inverse_lookup[x & 0xFF];
p1 = pInverse_table[c1];
p2 = pInverse_table[c2];
trial_error = SQUARE((c_plus_delta - pColor[i])) + SQUARE((p1 >> 8)) + SQUARE((p2 >> 8));
trial_error = SQUARE((c_plus_delta - c0)) + SQUARE((p1 >> 8)) + SQUARE((p2 >> 8));
if (trial_error < best_error)
{
best_error = trial_error;
@ -2216,7 +2257,7 @@ rg_etc1_pack_block_solid_color(unsigned char *block, const uint8* pColor, rg_etc
return best_error;
}
#if 0
#if RG_ETC1_CONSTRAINED_SUBBLOCK
static uint
rg_etc1_pack_block_solid_color_constrained(rg_etc1_optimizer_results *results,uint num_colors,
const uint8* pColor, rg_etc1_pack_params *pack_params EINA_UNUSED,
@ -2347,6 +2388,7 @@ rg_etc1_pack_block_solid_color_constrained(rg_etc1_optimizer_results *results,ui
}
#endif
#if RG_ETC1_DITHERING
// Function originally from RYG's public domain real-time DXT1 compressor, modified for 555.
static void
rg_etc1_dither_block_555(color_quad_u8* dest, color_quad_u8* block)
@ -2393,15 +2435,33 @@ rg_etc1_dither_block_555(color_quad_u8* dest, color_quad_u8* block)
}
}
}
#endif
static inline unsigned int
_bgra_to_rgba(unsigned int val)
{
//(((a) << 24) + ((r) << 16) + ((g) << 8) + (b))
return ARGB_JOIN(A_VAL_GET(&val), R_VAL_GET(&val), G_VAL_GET(&val), B_VAL_GET(&val));
}
static void
_bgra_to_rgba_block(color_quad_u8 *output, const unsigned int *input, int len)
{
for (int k = len; k; --k)
{
output->m_u32 = _bgra_to_rgba(*input++);
output++;
}
}
unsigned int
rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_etc1_pack_params *pack_params)
rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_BGRA, rg_etc1_pack_params *pack_params)
{
color_quad_u8* pSrc_pixels = (color_quad_u8 *)pSrc_pixels_rgba;
color_quad_u8 pSrc_pixels[16];
unsigned char *dst_block = (unsigned char *)pETC1_block;
unsigned int first_pixel_u32;
int r;
color_quad_u8 dithered_pixels[16], subblock_pixels[8];
color_quad_u8 subblock_pixels[8];
uint64 best_error = cUINT64_MAX;
uint best_use_color4=EINA_FALSE;
uint best_flip=EINA_FALSE;
@ -2417,7 +2477,6 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
static const int s_scan_delta_0_to_4[] = { -4, -3, -2, -1, 0, 1, 2, 3, 4 };
static const int s_scan_delta_0_to_1[] = { -1, 0, 1 };
static const int s_scan_delta_0[] = { 0 };
first_pixel_u32 = *pSrc_pixels_rgba;
#ifdef RG_ETC1_BUILD_DEBUG
// Ensure all alpha values are 0xFF.
@ -2429,19 +2488,26 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
#endif
rg_etc1_optimizer_clear(&optimizer);
// Convert evas BGRA to rg_etc1 RGBA
_bgra_to_rgba_block(pSrc_pixels, pSrc_pixels_BGRA, 16);
first_pixel_u32 = pSrc_pixels[0].m_u32;
// Check for solid block.
for (r = 15; r >= 1; --r)
if (pSrc_pixels[r].m_u32 != first_pixel_u32)
break;
if (!r)
return (unsigned int)(16 * rg_etc1_pack_block_solid_color(dst_block, &pSrc_pixels[0].comp.r, pack_params));
return (unsigned int)(16 * rg_etc1_pack_block_solid_color(dst_block, &pSrc_pixels[0], pack_params));
#if RG_ETC1_DITHERING
// Dithering gives mitigated results... It would be nice to know when to use it.
color_quad_u8 dithered_pixels[16];
if (pack_params->m_dithering)
{
rg_etc1_dither_block_555(dithered_pixels, pSrc_pixels);
pSrc_pixels = dithered_pixels;
}
#endif
for (i = 0; i < 2; i++)
{
@ -2473,6 +2539,8 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
uint subblock;
for (subblock = 0; subblock < 2; subblock++)
{
results[2].m_error = cUINT64_MAX;
if (flip)
// subblock is top or bottom, copy source
memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8);
@ -2490,10 +2558,7 @@ rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, rg_e
rg_etc1_color_quad_u8_copy(&subblock_pixels[7], &pSrc_col[13]);
}
results[2].m_error = cUINT64_MAX;
#if 0
// This feature is disabled because it will produce some visual artifacts
#if RG_ETC1_CONSTRAINED_SUBBLOCK
if ((params.base_params->m_quality >= rg_etc1_medium_quality) && ((subblock) || (use_color4)))
{
const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32;