diff --git a/src/Makefile_Evas.am b/src/Makefile_Evas.am index 39a00aec37..c98f80c1db 100644 --- a/src/Makefile_Evas.am +++ b/src/Makefile_Evas.am @@ -1816,6 +1816,7 @@ modules/evas/savers/tgv/evas_image_save_tgv.c \ static_libs/rg_etc/rg_etc1.c \ static_libs/rg_etc/rg_etc2.c \ static_libs/rg_etc/rg_etc1.h \ +static_libs/rg_etc/etc2_encoder.c \ static_libs/lz4/lz4.c \ static_libs/lz4/lz4.h \ static_libs/lz4/lz4hc.c \ @@ -1871,6 +1872,7 @@ modules_evas_savers_tgv_module_la_SOURCES = \ modules/evas/savers/tgv/evas_image_save_tgv.c \ static_libs/rg_etc/rg_etc1.c \ static_libs/rg_etc/rg_etc1.h \ +static_libs/rg_etc/etc2_encoder.c \ static_libs/lz4/lz4.c \ static_libs/lz4/lz4.h \ static_libs/lz4/lz4hc.c \ diff --git a/src/modules/evas/savers/tgv/evas_image_save_tgv.c b/src/modules/evas/savers/tgv/evas_image_save_tgv.c index 8b2b50d063..f85b32f7fe 100644 --- a/src/modules/evas/savers/tgv/evas_image_save_tgv.c +++ b/src/modules/evas/savers/tgv/evas_image_save_tgv.c @@ -13,6 +13,13 @@ #include "lz4hc.h" #include "rg_etc1.h" +// FIXME: Remove DEBUG +#define DEBUG +#if defined(DEBUG) && defined(HAVE_CLOCK_GETTIME) && defined(_POSIX_MONOTONIC_CLOCK) +# include +# define DEBUG_STATS +#endif + static int _block_size_get(int size) { @@ -147,13 +154,26 @@ evas_image_save_file_tgv(RGBA_Image *im, { rg_etc1_pack_params param; FILE *f; - char *comp; - char *buffer; + uint8_t *comp = NULL; + uint8_t *buffer; uint32_t *data; uint32_t width, height; uint8_t header[8] = "TGV1"; int block_width, block_height, macro_block_width, macro_block_height; - int block_count, image_stride, image_height; + int block_count, image_stride, image_height, etc_block_size; + Evas_Colorspace cspace; + Eina_Bool alpha; + +#ifdef DEBUG_STATS + struct timespec ts1, ts2; + long long tsdiff, mse = 0, mse_div = 0, mse_alpha = 0; + clock_gettime(CLOCK_MONOTONIC, &ts1); +#endif + + /* FIXME: How to tell the encoder to encode as ETC1 or ETC2? + * The save API is weak. For now, assume ETC2 iif there's alpha. + * As long as we don't have a full ETC2 encoder, this is fine. + */ if (!im || !im->image.data || !file) return 0; @@ -161,8 +181,7 @@ evas_image_save_file_tgv(RGBA_Image *im, switch (im->cache_entry.space) { case EVAS_COLORSPACE_ARGB8888: - if (im->cache_entry.flags.alpha) - return 0; + alpha = im->cache_entry.flags.alpha; break; case EVAS_COLORSPACE_ETC1: case EVAS_COLORSPACE_RGB8_ETC2: @@ -177,6 +196,7 @@ evas_image_save_file_tgv(RGBA_Image *im, data = im->image.data; width = htonl(image_stride); height = htonl(image_height); + compress = !!compress; // Disable dithering, as it will deteriorate the quality of flat surfaces param.m_dithering = 0; @@ -193,11 +213,22 @@ evas_image_save_file_tgv(RGBA_Image *im, block_height = _block_size_get(image_height + 2); header[4] = (block_height << 4) | block_width; - // header[5]: 0 for ETC1 - header[5] = 0; + // header[5]: 0 for ETC1, 1 for RGB8_ETC2, 2 for RGBA8_ETC2_EAC + if (alpha) + { + cspace = EVAS_COLORSPACE_RGBA8_ETC2_EAC; + etc_block_size = 16; + header[5] = 2; + } + else + { + cspace = EVAS_COLORSPACE_ETC1; + etc_block_size = 8; + header[5] = 0; + } // header[6]: 0 for raw, 1, for LZ4 compressed - header[6] = (!!compress & 0x1); + header[6] = compress; // header[7]: options (unused) header[7] = 0; @@ -216,16 +247,10 @@ evas_image_save_file_tgv(RGBA_Image *im, // Number of ETC1 blocks in a compressed block block_count = (macro_block_width * macro_block_height) / (4 * 4); - buffer = alloca(block_count * 8); + buffer = alloca(block_count * etc_block_size); if (compress) - { - comp = alloca(LZ4_compressBound(block_count * 8)); - } - else - { - comp = NULL; - } + comp = alloca(LZ4_compressBound(block_count * etc_block_size)); // Write macro block for (int y = 0; y < image_height + 2; y += macro_block_height) @@ -240,7 +265,7 @@ evas_image_save_file_tgv(RGBA_Image *im, for (int x = 0; x < image_stride + 2; x += macro_block_width) { - char *offset = buffer; + uint8_t *offset = buffer; int real_x = x; if (x == 0) real_x = 0; @@ -315,19 +340,56 @@ evas_image_save_file_tgv(RGBA_Image *im, } } - rg_etc1_pack_block(offset, (unsigned int*) todo, ¶m); - offset += 8; + switch (cspace) + { + case EVAS_COLORSPACE_ETC1: + rg_etc1_pack_block(offset, (uint32_t *) todo, ¶m); + break; + case EVAS_COLORSPACE_RGB8_ETC2: + etc2_rgb8_block_pack(offset, (uint32_t *) todo, ¶m); + break; + case EVAS_COLORSPACE_RGBA8_ETC2_EAC: + etc2_rgba8_block_pack(offset, (uint32_t *) todo, ¶m); + break; + default: return 0; + } + +#ifdef DEBUG_STATS + { + // Decode to compute PSNR, this is slow. + uint32_t done[16]; + + if (alpha) + rg_etc2_rgba8_decode_block(offset, done); + else + rg_etc2_rgb8_decode_block(offset, done); + + for (int k = 0; k < 16; k++) + { + const int r = (R_VAL(&(todo[k])) - R_VAL(&(done[k]))); + const int g = (G_VAL(&(todo[k])) - G_VAL(&(done[k]))); + const int b = (B_VAL(&(todo[k])) - B_VAL(&(done[k]))); + const int a = (A_VAL(&(todo[k])) - A_VAL(&(done[k]))); + mse += r*r + g*g + b*b; + if (alpha) mse_alpha += a*a; + mse_div++; + } + } +#endif + + offset += etc_block_size; } } if (compress) { - wlen = LZ4_compressHC(buffer, comp, block_count * 8); + wlen = LZ4_compressHC((char *) buffer, (char *) comp, + block_count * etc_block_size); } else { comp = buffer; - wlen = block_count * 8; + wlen = block_count * etc_block_size; } if (wlen > 0) @@ -350,6 +412,29 @@ evas_image_save_file_tgv(RGBA_Image *im, } fclose(f); +#ifdef DEBUG_STATS + if (mse_div && mse) + { + // TODO: Add DSSIM too + double dmse = (double) mse / (double) (mse_div * 3.0); + double psnr = 20 * log10(255.0) - 10 * log10(dmse); + double dmse_alpha = (double) mse_alpha / (double) mse_div; + double psnr_alpha = (dmse_alpha > 0.0) ? (20 * log10(255.0) - 10 * log10(dmse_alpha)) : 0; + clock_gettime(CLOCK_MONOTONIC, &ts2); + tsdiff = ((ts2.tv_sec - ts1.tv_sec) * 1000LL) + ((ts2.tv_nsec - ts1.tv_nsec) / 1000000LL); + if (!alpha) + { + INF("ETC%d encoding stats: %dx%d, Time: %lldms, RGB PSNR: %.02fdB", + alpha ? 2 : 1, image_stride, image_height, tsdiff, psnr); + } + else + { + INF("ETC%d encoding stats: %dx%d, Time: %lldms, RGB PSNR: %.02fdB, Alpha PSNR: %.02fdB", + alpha ? 2 : 1, image_stride, image_height, tsdiff, psnr, psnr_alpha); + } + } +#endif + return 1; on_error: diff --git a/src/static_libs/rg_etc/etc2_encoder.c b/src/static_libs/rg_etc/etc2_encoder.c new file mode 100644 index 0000000000..b4db45062f --- /dev/null +++ b/src/static_libs/rg_etc/etc2_encoder.c @@ -0,0 +1,1036 @@ +/* +Copyright (C) 2014 Jean-Philippe ANDRE + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "rg_etc1.h" + +// Enable this flag when working on (quality) optimizations +//#define DEBUG + +// Weights for the distance (perceptual mode) - sum is ~1024 +static const int R_WEIGHT = 299 * 1024 / 1000; +static const int G_WEIGHT = 587 * 1024 / 1000; +static const int B_WEIGHT = 114 * 1024 / 1000; + +static const int kTargetError[3] = { + 5*5*16, // 34 dB + 2*2*16, // 42 dB + 0 // infinite dB +}; + +// For T and H modes +static const int kDistances[8] = { + 3, 6, 11, 16, 23, 32, 41, 64 +}; + +// For differential mode +static const int kSigned3bit[8] = { + 0, 1, 2, 3, -4, -3, -2, -1 +}; + +// For alpha support +static const int kAlphaModifiers[16][8] = { + { -3, -6, -9, -15, 2, 5, 8, 14}, + { -3, -7, -10, -13, 2, 6, 9, 12}, + { -2, -5, -8, -13, 1, 4, 7, 12}, + { -2, -4, -6, -13, 1, 3, 5, 12}, + { -3, -6, -8, -12, 2, 5, 7, 11}, + { -3, -7, -9, -11, 2, 6, 8, 10}, + { -4, -7, -8, -11, 3, 6, 7, 10}, + { -3, -5, -8, -11, 2, 4, 7, 10}, + { -2, -6, -8, -10, 1, 5, 7, 9}, + { -2, -5, -8, -10, 1, 4, 7, 9}, + { -2, -4, -8, -10, 1, 3, 7, 9}, + { -2, -5, -7, -10, 1, 4, 6, 9}, + { -3, -4, -7, -10, 2, 3, 6, 9}, + { -1, -2, -3, -10, 0, 1, 2, 9}, + { -4, -6, -8, -9, 3, 5, 7, 8}, + { -3, -5, -7, -9, 2, 4, 6, 8} +}; + +// Damn OpenGL people, why don't you just pack data as on a CPU??? +static const int kBlockWalk[16] = { + 0, 4, 8, 12, + 1, 5, 9, 13, + 2, 6, 10, 14, + 3, 7, 11, 15 +}; + +// Use with static constants so the compiler can optimize everything +#define BITS(byteval, lowbit, highbit) \ + (((byteval) >> (lowbit)) & ((1 << ((highbit) - (lowbit) + 1)) - 1)) + +#define BIT(byteval, bit) \ + (((byteval) >> (bit)) & 0x1) + +// Real clamp +#define CLAMP(a) ({ int _b = (a); (((_b) >= 0) ? (((_b) < 256) ? (_b) : 255) : 0); }) + +// Simple min +#define MIN(a,b) ({ int _z = (a), _y = (b); ((_z <= _y) ? _z : _y); }) + +// Simple max +#define MAX(a,b) ({ int __z = (a), __y = (b); ((__z > __y) ? __z : __y); }) + +// Simple clamp between two values +#define MINMAX(a,b,c) (MIN(c,MAX(a,b))) + +// Simple abs +#define ABS(a) ({ int _a = (a); ((_a >= 0) ? _a : (-_a)); }) + +// Write a BGRA value for output to Evas +#define BGRA(r,g,b,a) ((a << 24) | (r << 16) | (g << 8) | b) + +#ifndef WORDS_BIGENDIAN +/* x86 */ +#define A_VAL(p) (((uint8_t *)(p))[3]) +#define R_VAL(p) (((uint8_t *)(p))[2]) +#define G_VAL(p) (((uint8_t *)(p))[1]) +#define B_VAL(p) (((uint8_t *)(p))[0]) +#define R_IDX 2 +#define G_IDX 1 +#define B_IDX 0 +#else +/* ppc */ +#define A_VAL(p) (((uint8_t *)(p))[0]) +#define R_VAL(p) (((uint8_t *)(p))[1]) +#define G_VAL(p) (((uint8_t *)(p))[2]) +#define B_VAL(p) (((uint8_t *)(p))[3]) +#define R_IDX 1 +#define G_IDX 2 +#define B_IDX 3 +#endif + +#ifndef DBG +# ifdef DEBUG +# define DBG(fmt, ...) fprintf(stderr, "%s:%d: " fmt "\n", __FUNCTION__, __LINE__, ## __VA_ARGS__) +# else +# define DBG(...) +# endif +#endif + +/** Pack alpha block given a modifier table and a multiplier + * @returns Squared error + */ +static int +_etc2_alpha_block_pack(uint8_t *etc2_alpha, + const int base_codeword, + const int multiplier, + const int modifierIdx, + const uint32_t *bgra, + const Eina_Bool write) +{ + const int *alphaModifiers = kAlphaModifiers[modifierIdx]; + uint8_t alphaIndexes[16]; + int errAcc2 = 0; + + // Header + if (write) + { + etc2_alpha[0] = base_codeword & 0xFF; + etc2_alpha[1] = ((multiplier << 4) & 0xF0) | (modifierIdx & 0x0F); + } + + // Compute alphas now + for (int i = 0; i < 16; i++) + { + const int realA = A_VAL(bgra + kBlockWalk[i]); + int minErr = INT_MAX, idx = 0; + + // Brute force -- find modifier index + for (int k = 0; (k < 8) && minErr; k++) + { + int tryA = CLAMP(base_codeword + alphaModifiers[k] * multiplier); + int err = ABS(realA - tryA); + if (err < minErr) + { + minErr = err; + idx = k; + if (!minErr) break; + } + } + + alphaIndexes[i] = idx; + + // Keep some stats + errAcc2 += minErr * minErr; + } + + if (write) + for (int k = 0; k < 2; k++) + { + etc2_alpha[2 + 3 * k] = alphaIndexes[0 + 8 * k] << 5; // A + etc2_alpha[2 + 3 * k] |= alphaIndexes[1 + 8 * k] << 2; // B + etc2_alpha[2 + 3 * k] |= (alphaIndexes[2 + 8 * k] >> 1) & 0x3; // C01 + etc2_alpha[3 + 3 * k] = (alphaIndexes[2 + 8 * k] & 0x1) << 7; // C2 + etc2_alpha[3 + 3 * k] |= alphaIndexes[3 + 8 * k] << 4; // D + etc2_alpha[3 + 3 * k] |= alphaIndexes[4 + 8 * k] << 1; // E + etc2_alpha[3 + 3 * k] |= (alphaIndexes[5 + 8 * k] >> 2) & 0x1; // F0 + etc2_alpha[4 + 3 * k] = (alphaIndexes[5 + 8 * k] & 0x3) << 6; // F12 + etc2_alpha[4 + 3 * k] |= alphaIndexes[6 + 8 * k] << 3; // G + etc2_alpha[4 + 3 * k] |= alphaIndexes[7 + 8 * k]; // H + } + + return errAcc2; +} + +static int +_etc2_alpha_encode(uint8_t *etc2_alpha, const uint32_t *bgra, + const rg_etc1_pack_params *params) +{ + int alphas[16], avg = 0, diff = 0, maxDiff = 0, minErr = INT_MAX; + int base_codeword; + int multiplier, bestMult = 0; + int modifierIdx, bestIdx = 0, bestBase = 128; + int err, base_range, base_step = 1, max_error = 0; + + // Try to select the best alpha value (avg) + for (int i = 0; i < 16; i++) + { + alphas[i] = A_VAL(bgra + kBlockWalk[i]); + avg += alphas[i]; + } + avg /= 16; + + for (int i = 0; i < 16; i++) + { + int thisDiff = ABS(alphas[i] - avg); + maxDiff = MAX(thisDiff, maxDiff); + diff += thisDiff; + } + + base_codeword = alphas[0]; + if (!diff) + { + // All same alphas + etc2_alpha[0] = base_codeword; + memset(etc2_alpha + 1, 0, 7); + return 0; + } + + // Bruteforce -- try all tables and all multipliers, oh my god this will be slow. + max_error = kTargetError[params->m_quality]; + switch (params->m_quality) + { + // The follow parameters are completely arbitrary. + // Need some real testing. + case rg_etc1_high_quality: // exhaustive search + base_range = 255; + base_step = 1; + break; + case rg_etc1_medium_quality: // tweaked for "decent" results + base_range = 40; + base_step = 4; + break; + case rg_etc1_low_quality: // fast (not even fastest) + base_range = 8; + base_step = 4; + break; + } + + // for loop avg, avg-1, avg+1, avg-2, avg+2, ... + for (int step = 0; step < base_range; step += base_step) + for (base_codeword = CLAMP(avg - step); + base_codeword <= CLAMP(avg + step);) + { + for (modifierIdx = 0; modifierIdx < 16; modifierIdx++) + for (multiplier = 0; multiplier < 16; multiplier++) + { + if ((ABS(multiplier * kAlphaModifiers[modifierIdx][3]) + ABS(base_codeword - avg)) < maxDiff) + continue; + + err = _etc2_alpha_block_pack(etc2_alpha, base_codeword, + multiplier, modifierIdx, bgra, EINA_FALSE); + if (err < minErr) + { + minErr = err; + bestMult = multiplier; + bestIdx = modifierIdx; + bestBase = base_codeword; + if (err <= max_error) + goto pack_now; + + } + } + if (step <= 0) break; + if (base_codeword < 255) + base_codeword = CLAMP(base_codeword + 2 * step); + else + break; + } + +pack_now: + err = _etc2_alpha_block_pack(etc2_alpha, bestBase, + bestMult, bestIdx, bgra, EINA_TRUE); + return err; +} + +static Eina_Bool +_etc2_t_mode_header_pack(uint8_t *etc2, + uint32_t color1, uint32_t color2, int distance) +{ + // 4 bit colors + int r1_4 = R_VAL(&color1) >> 4; + int g1_4 = G_VAL(&color1) >> 4; + int b1_4 = B_VAL(&color1) >> 4; + int r2_4 = R_VAL(&color2) >> 4; + int g2_4 = G_VAL(&color2) >> 4; + int b2_4 = B_VAL(&color2) >> 4; + int distanceIdx, R, dR; + + for (distanceIdx = 0; distanceIdx < 8; distanceIdx++) + if (kDistances[distanceIdx] == distance) break; + + if (distanceIdx >= 8) + return EINA_FALSE; + + // R1. R + [dR] must be outside [0..31]. Scanning all values. Not smart. + R = r1_4 >> 2; + dR = r1_4 & 0x3; + for (int Rx = 0; Rx < 8; Rx++) + for (int dRx = 0; dRx < 2; dRx++) + { + int Rtry = R | (Rx << 2); + int dRtry = dR | (dRx << 2); + if ((Rtry + kSigned3bit[dRtry]) < 0 || (Rtry + kSigned3bit[dRtry] > 31)) + { + R = Rtry; + dR = dRtry; + break; + } + } + if ((R + kSigned3bit[dR]) >= 0 && (R + kSigned3bit[dR] <= 31)) + // this can't happen, should be an assert + return EINA_FALSE; + + etc2[0] = ((R & 0x1F) << 3) | (dR & 0x7); + + // G1, B1 + etc2[1] = (g1_4 << 4) | b1_4; + + // R2, G2 + etc2[2] = (r2_4 << 4) | g2_4; + + // B2, distance + etc2[3] = (b2_4 << 4) | ((distanceIdx >> 1) << 2) | (1 << 1) | (distanceIdx & 0x1); + + return EINA_TRUE; +} + +static Eina_Bool +_etc2_h_mode_header_pack(uint8_t *etc2, Eina_Bool *swap_colors, + uint32_t color1, uint32_t color2, int distance) +{ + int distanceIdx, R, dR, G, dG, distanceSpecialBit, da, db; + int r1_4, g1_4, b1_4, r2_4, g2_4, b2_4; + uint32_t c1, c2; + + for (distanceIdx = 0; distanceIdx < 8; distanceIdx++) + if (kDistances[distanceIdx] == distance) break; + + if (distanceIdx >= 8) + return EINA_FALSE; + + // The distance is coded in 3 bits. But in H mode, one bit is not coded + // in the header, as we use the comparison result between the two colors + // to select it. + distanceSpecialBit = BIT(distanceIdx, 0); + db = BIT(distanceIdx, 1); + da = BIT(distanceIdx, 2); + + // Note: if c1 == c2, no big deal because H is not the best choice of mode + if (distanceSpecialBit) + { + c1 = MAX(color1, color2); + c2 = MIN(color1, color2); + } + else + { + c1 = MIN(color1, color2); + c2 = MAX(color1, color2); + } + + // Return flag so we use the proper colors when packing the block + *swap_colors = (c1 != color1); + + // 4 bit colors + r1_4 = R_VAL(&c1) >> 4; + g1_4 = G_VAL(&c1) >> 4; + b1_4 = B_VAL(&c1) >> 4; + r2_4 = R_VAL(&c2) >> 4; + g2_4 = G_VAL(&c2) >> 4; + b2_4 = B_VAL(&c2) >> 4; + + // R1 + G1a. R + [dR] must be inside [0..31]. Scanning all values. Not smart. + R = r1_4; + dR = g1_4 >> 1; + if ((R + kSigned3bit[dR]) < 0 || (R + kSigned3bit[dR] > 31)) + R |= (1 << 4); + + if ((R + kSigned3bit[dR]) < 0 || (R + kSigned3bit[dR] > 31)) + return EINA_FALSE; // wtf? + + etc2[0] = ((R & 0x1F) << 3) | (dR & 0x7); + + // G1b + B1a + B1b[2 msb]. G + dG must be outside the range. + G = (g1_4 & 0x1) << 1; + G |= BIT(b1_4, 3); + dG = BITS(b1_4, 1, 2); + for (int Gx = 0; Gx < 8; Gx++) + for (int dGx = 0; dGx < 2; dGx++) + { + int Gtry = G | (Gx << 2); + int dGtry = dG | (dGx << 2); + if ((Gtry + kSigned3bit[dGtry]) < 0 || (Gtry + kSigned3bit[dGtry] > 31)) + { + G = Gtry; + dG = dGtry; + break; + } + } + + if ((G + kSigned3bit[dG]) >= 0 && (G + kSigned3bit[dG] <= 31)) + return EINA_FALSE; // wtf? + + etc2[1] = ((G & 0x1F) << 3) | (dG & 0x7); + + // B1[lsb] + R2 + G2 [3 msb] + etc2[2] = ((b1_4 & 0x1) << 7) | (r2_4 << 3) | (g2_4 >> 1); + + // G2[lsb] + B2 + distance + etc2[3] = ((g2_4 & 0x1) << 7) | (b2_4 << 3) + | (da << 2) | 0x2 | db; + + return EINA_TRUE; +} + +static inline int +_rgb_distance_percept(uint32_t color1, uint32_t color2) +{ + int R = R_VAL(&color1) - R_VAL(&color2); + int G = G_VAL(&color1) - G_VAL(&color2); + int B = B_VAL(&color1) - B_VAL(&color2); + return (R * R * R_WEIGHT) + (G * G * G_WEIGHT) + (B * B * B_WEIGHT); +} + +static inline int +_rgb_distance_euclid(uint32_t color1, uint32_t color2) +{ + int R = R_VAL(&color1) - R_VAL(&color2); + int G = G_VAL(&color1) - G_VAL(&color2); + int B = B_VAL(&color1) - B_VAL(&color2); + return (R * R) + (G * G) + (B * B); +} + +static unsigned int +_etc2_th_mode_block_pack(uint8_t *etc2, Eina_Bool h_mode, + uint32_t color1, uint32_t color2, int distance, + const uint32_t *bgra, Eina_Bool write, + Eina_Bool *swap_colors) +{ + uint8_t paint_colors[4][4]; + int errAcc = 0; + + if (write) + { + memset(etc2 + 4, 0, 4); + if (!h_mode) + { + if (!_etc2_t_mode_header_pack(etc2, color1, color2, distance)) + return INT_MAX; // assert + } + else + { + if (!_etc2_h_mode_header_pack(etc2, swap_colors, + color1, color2, distance)) + return INT_MAX; // assert + if (*swap_colors) + { + uint32_t tmp = color1; + color1 = color2; + color2 = tmp; + } + } + } + + for (int k = 0; k < 4; k++) + { + if (!h_mode) + { + paint_colors[0][k] = ((uint8_t *) &color1)[k]; + paint_colors[1][k] = CLAMP(((uint8_t *) &color2)[k] + distance); + paint_colors[2][k] = ((uint8_t *) &color2)[k]; + paint_colors[3][k] = CLAMP(((uint8_t *) &color2)[k] - distance); + } + else + { + paint_colors[0][k] = CLAMP(((uint8_t *) &color1)[k] + distance); + paint_colors[1][k] = CLAMP(((uint8_t *) &color1)[k] - distance); + paint_colors[2][k] = CLAMP(((uint8_t *) &color2)[k] + distance); + paint_colors[3][k] = CLAMP(((uint8_t *) &color2)[k] - distance); + } + } + + for (int k = 0; k < 16; k++) + { + uint32_t pixel = bgra[kBlockWalk[k]]; + int bestDist = INT_MAX; + int bestIdx = 0; + + for (int idx = 0; idx < 4; idx++) + { + int dist = _rgb_distance_euclid(pixel, *((uint32_t *) paint_colors[idx])); + if (dist < bestDist) + { + bestDist = dist; + bestIdx = idx; + if (!dist) break; + } + } + + errAcc += bestDist; + + if (write) + { + etc2[5 - (k >> 3)] |= ((bestIdx & 0x2) ? 1 : 0) << (k & 0x7); + etc2[7 - (k >> 3)] |= (bestIdx & 0x1) << (k & 0x7); + } + } + + return errAcc; +} + +static uint32_t +_color_reduce_444(uint32_t color) +{ + int R = R_VAL(&color); + int G = G_VAL(&color); + int B = B_VAL(&color); + int R1, R2, G1, G2, B1, B2; + + R1 = (R & 0xF0) | (R >> 4); + R2 = ((R & 0xF0) + 0x10) | ((R >> 4) + 1); + G1 = (G & 0xF0) | (G >> 4); + G2 = ((G & 0xF0) + 0x10) | ((G >> 4) + 1); + B1 = (B & 0xF0) | (B >> 4); + B2 = ((B & 0xF0) + 0x10) | ((B >> 4) + 1); + + R = (ABS(R - R1) <= ABS(R - R2)) ? R1 : R2; + G = (ABS(G - G1) <= ABS(G - G2)) ? G1 : G2; + B = (ABS(B - B1) <= ABS(B - B2)) ? B1 : B2; + + return BGRA(R, G, B, 255); +} + +static uint32_t +_color_reduce_676(uint32_t color) +{ + int R = R_VAL(&color); + int G = G_VAL(&color); + int B = B_VAL(&color); + int R1, G1, B1; + + // FIXME: Do we have better candidates to try? + R1 = (R & 0xFC) | (R >> 6); + G1 = (G & 0xFE) | (G >> 7); + B1 = (B & 0xFC) | (B >> 6); + + return BGRA(R1, G1, B1, 255); +} + +static int +_block_main_colors_find(uint32_t *color1_out, uint32_t *color2_out, + uint32_t color1, uint32_t color2, const uint32_t *bgra, + const rg_etc1_pack_params *params EINA_UNUSED) +{ + static const int kMaxIterations = 20; + + int errAcc; + + /* k-means complexity is O(n^(d.k+1) log n) + * In this case, n = 16, k = 2, d = 3 so 20 loops + */ + + if (color1 == color2) + { + // We should select another mode (planar) to encode flat colors + // We could also dither with two approximated colors + *color1_out = *color2_out = color1; + goto found; + } + + if (color1 == color2) + { + // We should dither... + *color1_out = *color2_out = color1; + goto found; + } + + for (int iter = 0; iter < kMaxIterations; iter++) + { + int r1 = 0, r2 = 0, g1 = 0, g2 = 0, b1 = 0, b2 = 0; + int cluster1_cnt = 0, cluster2_cnt = 0; + int cluster1[16], cluster2[16]; + int maxDist1 = 0, maxDist2 = 0; + uint32_t c1, c2; + + errAcc = 0; + memset(cluster1, 0, sizeof(cluster1)); + memset(cluster2, 0, sizeof(cluster2)); + + // k-means assignment step + for (int k = 0; k < 16; k++) + { + int dist1 = _rgb_distance_euclid(color1, bgra[k]); + int dist2 = _rgb_distance_euclid(color2, bgra[k]); + if (dist1 <= dist2) + { + cluster1[cluster1_cnt++] = k; + if (dist1 > maxDist1) + maxDist1 = dist1; + } + else + { + cluster2[cluster2_cnt++] = k; + if (dist2 > maxDist2) + maxDist2 = dist2; + } + } + + // k-means failed + if (!cluster1_cnt || !cluster2_cnt) + return -1; + + // k-means update step + for (int k = 0; k < cluster1_cnt; k++) + { + r1 += R_VAL(bgra + cluster1[k]); + g1 += G_VAL(bgra + cluster1[k]); + b1 += B_VAL(bgra + cluster1[k]); + } + + for (int k = 0; k < cluster2_cnt; k++) + { + r2 += R_VAL(bgra + cluster2[k]); + g2 += G_VAL(bgra + cluster2[k]); + b2 += B_VAL(bgra + cluster2[k]); + } + + r1 /= cluster1_cnt; + g1 /= cluster1_cnt; + b1 /= cluster1_cnt; + r2 /= cluster2_cnt; + g2 /= cluster2_cnt; + b2 /= cluster2_cnt; + + c1 = _color_reduce_444(BGRA(r1, g1, b1, 255)); + c2 = _color_reduce_444(BGRA(r2, g2, b2, 255)); + if (c1 == color1 && c2 == color2) + break; + + if (c1 != c2) + { + color1 = c1; + color2 = c2; + } + else if (_rgb_distance_euclid(c1, color1) > _rgb_distance_euclid(c2, color2)) + { + color1 = c1; + } + else + { + color2 = c2; + } + } + + *color1_out = color1; + *color2_out = color2; + +found: + errAcc = 0; + for (int k = 0; k < 16; k++) + errAcc += _rgb_distance_euclid(bgra[k], color2); + return errAcc; +} + +static unsigned int +_etc2_th_mode_block_encode(uint8_t *etc2, const uint32_t *bgra, + const rg_etc1_pack_params *params) +{ + int err, bestDist = kDistances[0]; + int minErr = INT_MAX, bestMode = 0; + uint32_t c1, c2, bestC1 = bgra[0], bestC2 = bgra[1]; + Eina_Bool swap_colors = EINA_FALSE; + + Eina_Inlist *tried_pairs = NULL; + struct ColorPair { + EINA_INLIST; + uint32_t low; + uint32_t high; + }; + struct ColorPair *pair; + + /* Bruteforce algo: + * Bootstrap k-means clustering with all possible pairs of colors + * from the 4x4 block. + * TODO: Don't retry the same rgb444 pairs again + */ + + for (int pix1 = 0; pix1 < 15; pix1++) + for (int pix2 = pix1 + 1; pix2 < 16; pix2++) + { + Eina_Bool already_tried = EINA_FALSE; + + // Bootstrap k-means. Find new pair of colors. + c1 = _color_reduce_444(bgra[pix1]); + c2 = _color_reduce_444(bgra[pix2]); + + if (c2 > c1) + { + uint32_t tmp = c2; + c2 = c1; + c1 = tmp; + } + + EINA_INLIST_FOREACH(tried_pairs, pair) + if (c1 == pair->high && c2 == pair->low) + { + already_tried = EINA_TRUE; + break; + } + + if (already_tried) + continue; + + pair = calloc(1, sizeof(*pair)); + if (pair) + { + pair->high = c1; + pair->low = c2; + tried_pairs = eina_inlist_append(tried_pairs, EINA_INLIST_GET(pair)); + } + + // Run k-means + err = _block_main_colors_find(&c1, &c2, c1, c2, bgra, params); + if (err < 0) + continue; + + for (int distIdx = 0; distIdx < 8; distIdx++) + { + for (int mode = 0; mode < 2; mode++) + { + + for (int swap = 0; swap < 2; swap++) + { + if (mode == 0 && swap) + { + uint32_t tmp = c2; + c2 = c1; + c1 = tmp; + } + err = _etc2_th_mode_block_pack(etc2, mode, c1, c2, + kDistances[distIdx], + bgra, EINA_FALSE, + &swap_colors); + if (err < minErr) + { + bestDist = kDistances[distIdx]; + minErr = err; + bestC1 = (!swap_colors) ? c1 : c2; + bestC2 = (!swap_colors) ? c2 : c1; + bestMode = mode; + if (err <= kTargetError[params->m_quality]) + goto found; + } + } + } + } + } + +found: + EINA_INLIST_FREE(tried_pairs, pair) + { + tried_pairs = eina_inlist_remove(tried_pairs, EINA_INLIST_GET(pair)); + free(pair); + } + + err = _etc2_th_mode_block_pack(etc2, bestMode, bestC1, bestC2, bestDist, + bgra, EINA_TRUE, &swap_colors); + + return err; +} + +static inline Eina_Bool +_etc2_planar_mode_header_pack(uint8_t *etc2, + uint32_t RO, uint32_t RH, uint32_t RV, + uint32_t GO, uint32_t GH, uint32_t GV, + uint32_t BO, uint32_t BH, uint32_t BV) +{ + int R, dR; + int G, dG; + int B, dB; + + // RO_6 [2..5] + R = BITS(RO >> 2, 2, 5); + // RO_6 [0..1] + GO_7[6] + dR = (BITS(RO >> 2, 0, 1) << 1) | BIT(GO >> 1, 6); + + if (!((R + kSigned3bit[dR] >= 0) && (R + kSigned3bit[dR] <= 31))) + R |= 1 << 4; + + // GO_7[2..5] + G = BITS(GO >> 1, 2, 5); + // GO_7[0..1] + BO_6[5] + dG = (BITS(GO >> 1, 0, 1) << 1) | BIT(BO >> 2, 5); + + if (!((G + kSigned3bit[dG] >= 0) && (G + kSigned3bit[dG] <= 31))) + G |= 1 << 4; + + // BO_6[3..4] + B = BITS(BO >> 2, 3, 4); + // BO_6[1..2] + dB = BITS(BO >> 2, 1, 2); + + // B + dB must be outside the range. + for (int Bx = 0; Bx < 8; Bx++) + for (int dBx = 0; dBx < 2; dBx++) + { + int Btry = B | (Bx << 2); + int dBtry = dB | (dBx << 2); + if ((Btry + kSigned3bit[dBtry]) < 0 || (Btry + kSigned3bit[dBtry] > 31)) + { + B = Btry; + dB = dBtry; + break; + } + } + + if (!((R + kSigned3bit[dR] >= 0) && (R + kSigned3bit[dR] <= 31))) + return EINA_FALSE; + + if (!((G + kSigned3bit[dG] >= 0) && (G + kSigned3bit[dG] <= 31))) + return EINA_FALSE; + + if ((B + kSigned3bit[dB] >= 0) && (B + kSigned3bit[dB] <= 31)) + return EINA_FALSE; + + // Write everything + etc2[0] = (R << 3) | dR; + etc2[1] = (G << 3) | dG; + etc2[2] = (B << 3) | dB; + etc2[3] = (BIT(BO >> 2, 0) << 7) | (BITS(RH >> 2, 1, 5) << 2) | 0x2 | BIT(RH >> 2, 0); + etc2[4] = ((GH >> 1) << 1) | BIT(BH >> 2, 5); + etc2[5] = (BITS(BH >> 2, 0, 4) << 3) | BITS(RV >> 2, 3, 5); + etc2[6] = (BITS(RV >> 2, 0, 2) << 5) | BITS(GV >> 1, 2, 6); + etc2[7] = (BITS(GV >> 1, 0, 1) << 6) | (BV >> 2); + + return EINA_TRUE; +} + +static unsigned int +_etc2_planar_mode_block_pack(uint8_t *etc2, + uint32_t Ocol, uint32_t Hcol, uint32_t Vcol, + const uint32_t *bgra, Eina_Bool write) +{ + unsigned int err = 0; + uint32_t RO, RH, RV, GO, GH, GV, BO, BH, BV; + + RO = R_VAL(&Ocol); + RH = R_VAL(&Hcol); + RV = R_VAL(&Vcol); + GO = G_VAL(&Ocol); + GH = G_VAL(&Hcol); + GV = G_VAL(&Vcol); + BO = B_VAL(&Ocol); + BH = B_VAL(&Hcol); + BV = B_VAL(&Vcol); + + if (write) + { + if (!_etc2_planar_mode_header_pack(etc2, RO, RH, RV, GO, GH, GV, BO, BH, BV)) + return INT_MAX; + } + + // Compute MSE, that's all we need to do + + for (int y = 0; y < 4; y++) + for (int x = 0; x < 4; x++) + { + const int R = CLAMP(((x * (RH - RO)) + y * (RV - RO) + 4 * RO + 2) >> 2); + const int G = CLAMP(((x * (GH - GO)) + y * (GV - GO) + 4 * GO + 2) >> 2); + const int B = CLAMP(((x * (BH - BO)) + y * (BV - BO) + 4 * BO + 2) >> 2); + uint32_t color = BGRA(R, G, B, 255); + + err += _rgb_distance_euclid(color, bgra[x + y * 4]); + } + + return err; +} + +static unsigned int +_etc2_planar_mode_block_encode(uint8_t *etc2, const uint32_t *bgra, + const rg_etc1_pack_params *params EINA_UNUSED) +{ + unsigned int err; + unsigned int Ocol, Hcol, Vcol, RO, GO, BO, Rx, Gx, Bx; + + // TODO: Scan a broader range to avoid artifacts when the + // points O, H or V are exceptions + + /* O is at (0,0) + * H is at (4,0) + * V is at (0,4) + * So, H and V are outside the block. + * We extrapolate the values from (0,3) and (3,0). + */ + + RO = R_VAL(&(bgra[0])); + GO = G_VAL(&(bgra[0])); + BO = B_VAL(&(bgra[0])); + Ocol = _color_reduce_676(bgra[0]); + + Rx = CLAMP(RO + (4 * (R_VAL(&(bgra[3])) - RO)) / 3); + Gx = CLAMP(GO + (4 * (G_VAL(&(bgra[3])) - GO)) / 3); + Bx = CLAMP(BO + (4 * (B_VAL(&(bgra[3])) - BO)) / 3); + Hcol = _color_reduce_676(BGRA(Rx, Gx, Bx, 0xFF)); + + Rx = CLAMP(RO + (4 * (R_VAL(&(bgra[12])) - RO)) / 3); + Gx = CLAMP(GO + (4 * (G_VAL(&(bgra[12])) - GO)) / 3); + Bx = CLAMP(BO + (4 * (B_VAL(&(bgra[12])) - BO)) / 3); + Vcol = _color_reduce_676(BGRA(Rx, Gx, Bx, 0xFF)); + + err = _etc2_planar_mode_block_pack(etc2, Ocol, Hcol, Vcol, bgra, EINA_TRUE); + + return err; +} + +#ifdef DEBUG +static unsigned int +_block_error_calc(const uint32_t *enc, const uint32_t *orig, Eina_Bool perceptual) +{ + unsigned int errAcc = 0; + + for (int k = 0; k < 16; k++) + { + if (perceptual) + errAcc += _rgb_distance_percept(enc[k], orig[k]); + else + errAcc += _rgb_distance_euclid(enc[k], orig[k]); + } + + return errAcc; +} +#endif + +unsigned int +etc2_rgba8_block_pack(unsigned char *etc2, const unsigned int *bgra, + rg_etc1_pack_params *params) +{ + rg_etc1_pack_params safe_params; + unsigned int errors[3] = { INT_MAX, INT_MAX, INT_MAX }; + unsigned int minErr = INT_MAX; + uint8_t etc2_try[3][8]; + int bestSolution = 0; + +#ifdef DEBUG + static int cnt [3] = {0}; +#endif + + safe_params.m_dithering = !!params->m_dithering; + safe_params.m_quality = MINMAX(params->m_quality, 0, 2); + + errors[0] = rg_etc1_pack_block(etc2_try[0], bgra, &safe_params); + errors[1] = _etc2_th_mode_block_encode(etc2_try[1], bgra, &safe_params); + errors[2] = _etc2_planar_mode_block_encode(etc2_try[2], bgra, &safe_params); + +#ifdef DEBUG + for (int i = 1; i < 3; i++) + { + const char *mode = (i == 1) ? "T or H" : "Planar"; + if (errors[i] < INT_MAX) + for (unsigned k = 0; k < sizeof(errors) / sizeof(*errors); k++) + { + uint32_t decoded[16]; + unsigned int real_errors[2]; + rg_etc2_rgb8_decode_block(etc2_try[i], decoded); + real_errors[0] = _block_error_calc(decoded, bgra, EINA_FALSE); + real_errors[1] = _block_error_calc(decoded, bgra, EINA_TRUE); + + if (real_errors[0] != errors[i]) + DBG("Invalid error calc in %s mode", mode); + } + } +#endif + + for (unsigned k = 0; k < sizeof(errors) / sizeof(*errors); k++) + if (errors[k] < minErr) + { + minErr = errors[k]; + bestSolution = k; + } + + memcpy(etc2 + 8, etc2_try[bestSolution], 8); + + minErr += _etc2_alpha_encode(etc2, bgra, &safe_params); + +#ifdef DEBUG + cnt[bestSolution]++; + DBG("Block count by mode: ETC1: %d T/H: %d Planar: %d. Err %d", + cnt[0], cnt[1], cnt[2], minErr); +#endif + + return minErr; +} + +unsigned int +etc2_rgb8_block_pack(unsigned char *etc2, const unsigned int *bgra, + rg_etc1_pack_params *params) +{ + rg_etc1_pack_params safe_params; + unsigned int errors[3] = { INT_MAX, INT_MAX, INT_MAX }; + unsigned int minErr = INT_MAX; + uint8_t etc2_try[3][8]; + int bestSolution = 0; + + safe_params.m_dithering = !!params->m_dithering; + safe_params.m_quality = MINMAX(params->m_quality, 0, 2); + + errors[0] = rg_etc1_pack_block(etc2_try[0], bgra, &safe_params); + errors[1] = _etc2_th_mode_block_encode(etc2_try[1], bgra, &safe_params); + errors[2] = _etc2_planar_mode_block_encode(etc2_try[2], bgra, &safe_params); + + for (unsigned k = 0; k < sizeof(errors) / sizeof(*errors); k++) + if (errors[k] < minErr) + { + minErr = errors[k]; + bestSolution = k; + } + + memcpy(etc2 + 8, etc2_try[bestSolution], 8); + + return minErr; +} diff --git a/src/static_libs/rg_etc/rg_etc1.h b/src/static_libs/rg_etc/rg_etc1.h index 275b0dbb3d..6f4b94449b 100644 --- a/src/static_libs/rg_etc/rg_etc1.h +++ b/src/static_libs/rg_etc/rg_etc1.h @@ -37,6 +37,12 @@ void rg_etc1_pack_block_init(); // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. unsigned int rg_etc1_pack_block(void* pETC1_block, const unsigned int* pSrc_pixels_BGRA, rg_etc1_pack_params *pack_params); +// Pack a 4x4 block of 32bpp BGRA pixels to a 16-byte RGBA8_ETC2_EAC block (supports alpha). +unsigned int etc2_rgba8_block_pack(unsigned char *etc2, const unsigned int *bgra, rg_etc1_pack_params *params); + +// Pack a 4x4 block of 32bpp BGRA pixels to a 8-byte RGB8_ETC2 block (opaque). +unsigned int etc2_rgb8_block_pack(unsigned char *etc2, const unsigned int *bgra, rg_etc1_pack_params *params); + // ETC2 support: RGB8_ETC2 void rg_etc2_rgb8_decode_block(const unsigned char *etc_block, unsigned int *bgra);