efl/src/lib/evas/common/evas_font_compress.c

460 lines
17 KiB
C

#include "evas_font_private.h"
// XXX:
// XXX: adapt cserve2 to this!
// XXX:
//--------------------------------------------------------------------------
//- UTILS ------------------------------------------------------------------
//--------------------------------------------------------------------------
static void
expand_bitmap(DATA8 *src, int pitch, int w, int h, DATA8 *dst)
{
// some glyphs from fonts come in 1bit variety - expand it to 8bit before
// compressing as it's easier to deal with a universal format
static const DATA8 bitrepl[2] = { 0x00, 0xff };
DATA8 *s, *d, bits;
int bi, bj, y, end;
for (y = 0; y < h; y++)
{
d = dst + (y * w);
s = src + (y * pitch);
// wall all bytes per row
for (bi = 0; bi < w; bi += 8)
{
bits = *s;
if ((w - bi) < 8) end = w - bi;
else end = 8;
// each byte has 8 bits - expand them out using lookup table above
for (bj = 0; bj < end; bj++)
{
*d = bitrepl[(bits >> (7 - bj)) & 0x1];
d++;
}
s++;
}
}
}
static inline DATA8
alpha8to4(int a8)
{
// a4 values are 0x00, 0x11, 0x22, 0x33, ... 0xee, 0xff
// increments by 0x11 = 17
int a4 = (a8 >> 4) & 0x0f;
int v = (a4 << 4) | a4;
if ((a8 - v) > 8) a4++;
else if ((v - a8) > 8) a4--;
return a4; // v = (a4 << 4) | a4;
}
//--------------------------------------------------------------------------
//- RLE 4BIT ---------------------------------------------------------------
//--------------------------------------------------------------------------
// what is 4bit rle? it's 4 bit per pixel run-length encoding. this means
// that every row of pixels is compressed int a separate defined list
// of "runs" where every run is N pixles at value V. RLE works well for
// things like fonts which have vast regions that are either empty or solid
// with some transition (anti-alias) pixels in between. it could be that for
// a black and white alternating pattern it will come out the worst possible
// case, but this basically "never happens".
//
// data is encoded so it's fastr to access and decompress at runtime. we have
// both a blob of data that is the RLE encoded data for all rows which consist
// of 1 byte per run, and also a jump table - per row telling us the byte
// offset inside the RLE data blob where the row data begins. since we know
// the offset of the next run, we know how many bytes each row is based on
// this.
//
// since rle data may be small (less than 256 bytes) and in almost all cases
// less than 64k, a jump table of 8 bite per entry is good for many uses, and
// otherwise 16bits is used. it also supports 32bit jumptables but these are
// there just in case the data goes beyond 64k - but is unlikely to ever
// happen in real life. this means jumptables come in 3 formats thus have to
// have 3 different handling paths. RLE data is the same so it's common code.
//
// each byte in the RLE section encodes a run of between 1 and 16 pixels in
// length. there is no such thing as a run of 0 pixels. the upper 4 bits of
// the byte encode the length, with 0 being 1 pixel, 1 being 2 pixels,
// 2 being 3 pixels and so on up top 16 pixels (thus run length is actually
// (byte >> 4) + 1). the lower 4 bits encode the 4 bit pixel value of the
// whole run, from 0 to 15. it is accessed via masking (byte & 0xf). thus
// every run in RLE consumes exactly 1 byte of memory nice and neatly.
//
// at the start before the jumptable is a 32bit (int) header. it just has a
// value at the moment that indicates 0 for it not being RLE data (used by
// the 4bit packed bitmap), 1 for 8bit jumptable RLE, 2 for 16bit jumptable
// and 3 for 32bit jumptable. all other values are reserved
//
// so data looks like this when packed into a single blob in memory (where
// xx is the data size of the jump table - 8, 16 or 32bit). there are n
// lines of data in the jumptable matching to the height of the glyph where
// n is the height in rows
//
// each jumptable row ACTUALLY indicates the byte offset of the NEXT line.
// the FIRST row of RLE data is assumed to be at offset 0 in the RLE data
// section, so a special case is used for this. note that jumptable values
// are OFFSETS starting at 0 which is the first byte in the RLE data section
//
// [int] header (0, 1, 2 or 3)
// [xx] jump table for line 0
// [xx] jump table for line 1
// [xx] jump table for line 2
// ...
// [xx] jump table for line n - 1
// [char] first byte of RLE data (beginning of rle data)
// [char] second byte of RLE data
// ...
// [char] last byte of RLE data
//
static DATA8 *
compress_rle4(DATA8 *src, int pitch, int w, int h, int *size_ret)
{
unsigned char *scratch, *p, *pix, spanval;
int *jumptab, x, y, spanlen, spannum, total, size, *iptr, *pos;
unsigned short *sptr;
DATA8 *dst, *buf, *dptr;
// these macros make the code more readable and easier to follow, and
// avoid replication of dumb blobs of logic
#define SPAN_ADD(_len, _val) do { (*pos) += 1; *p = ((_len) << 4) | (_val); p++; } while (0)
#define LAST_SPAN_VAL() (p[-1] & 0x0f)
#define LAST_SPAN_LEN() (p[-1] >> 4)
#define LAST_SPAN_DEL() do { (*pos) -= 1; p -= 1; } while (0)
// create out scratch buffer for compression on the stack - maximum size
scratch = p = alloca(pitch * h * 2);
// also place our jumptable on the stack too - all ints here - become
// smaller char/shorts after jumptable is generated and size known
jumptab = alloca(h * sizeof(int));
for (y = 0; y < h; y++)
{
pix = src + (y * pitch);
// pos is the position offset from RLE data start that we have to
// track to find out where this rows RLE run *ENDS* so keep a
// pointer to it and we will keep ++ing it with each REL entry we add
pos = &(jumptab[y]);
*pos = (int)(p - scratch);
// no spans now so init all span things to 0
spanval = spanlen = spannum = 0;
for (x = 0; x < w; x++)
{
// round value from a8 to a44
DATA8 v = alpha8to4(pix[x]);
// if the current pixel value (in 4bit) is not the same as the
// span value (n 4 bit) OR... if the span now exceeds 16 pixels
// then add/write out the span to our RLE span blob
if ((v != spanval) || (spanlen >= 16))
{
if (spanlen > 0)
{
SPAN_ADD(spanlen - 1, spanval);
spannum++;
}
spanval = v;
spanlen = 1;
}
// otherwise make span longer if values are the same
else spanlen++;
}
// do we have a span still being built that we haven't added and that
// is NOT transparent (0 value - there is no point storing spans
// at the end of a row that have 0 value
if ((spanlen > 0) && (spanval > 0))
{
SPAN_ADD(spanlen - 1, spanval);
spannum++;
}
// clean up any dangling 0 value at the end of a row as they just
// waste space and processing time
while ((spannum > 0) && (LAST_SPAN_VAL() == 0))
{
LAST_SPAN_DEL();
spannum--;
}
}
// get the size of RLE data we have plus int header
total = (int)(p - scratch);
size = sizeof(int) + total;
// based on total number of bytes in RLE, use 32, 16 or 8 bit jumptable
// and add that to our size
if (total > 65535) size += h * 4; // 32bit
else if (total > 255) size += h * 2; // 16bit
else size += h; // 8bit
*size_ret = size;
// allocate a fresh buffer where we will merge header, jumptable and RLE
// spans inot a single block
buf = dst = malloc(size);
if (!buf) return NULL;
// 32bit int header to indicate encoding type (3, 2 or 1)
iptr = (int *)dst;
if (total > 65535) *iptr = 3; // 32bit jump table
else if (total > 255) *iptr = 2; // 16 bit jump table
else *iptr = 1; // 8 bit jump table
// skip header and write jump table
dst += sizeof(int);
if (total > 65535) // 32bit jump table
{
iptr = (int *)dst;
for (y = 0; y < h; y++) iptr[y] = jumptab[y];
dst += (h * sizeof(int));
}
else if (total > 255) // 16bit jump table
{
sptr = (unsigned short *)dst;
for (y = 0; y < h; y++) sptr[y] = jumptab[y];
dst += (h * sizeof(unsigned short));
}
else // 8bit jump table
{
dptr = dst;
for (y = 0; y < h; y++) dptr[y] = jumptab[y];
dst += (h * sizeof(DATA8));
}
// copy rest of RLE data at the end of the jumptable and return it
memcpy(dst, scratch, total);
return buf;
}
// this decompresses a specific run of RLE data to the destination pointer
// and finishes reading RLE data before the "end" byte and starts AT the
// "start" byte within the array pointed to by src. this ASSUMES the dest
// buffer has already been zeroed out so we can skip runs that are "0"
static void
decompress_full_row(DATA8 *src, int start, int end, DATA8 *dst)
{
DATA8 *p = src + start, *e = src + end, *d = dst, len, val;
while (p < e)
{
// length is upper 4 bits + 1
len = (*p >> 4) + 1;
// value when EXPANDED to 8bit is the lower 4 bits REPEATEd in all
// 8 bites to ensure it rounds properly.
// i.e. lower 4 bits B4B3B2B1 -> B4B3B2B1B4B3B2B1
val = *p & 0xf;
val |= val << 4;
// if it's 0 just skip ahead (assume dst buffer is 0'd out)
if (val == 0) d += len;
else
{
// write out "len" pixels of tghe given value
while (len > 0)
{
*d = val;
d++;
len--;
}
}
// next RLE byte
p++;
}
}
// to save copy & paste repeating code, this macro acts as a code generator
// to create a specific decompress function per jumptable size (8, 16 or 32bit)
#define DECOMPRESS_ROW_FUNC(_name, _type) \
static void \
_name(_type *jumptab, DATA8 *src, DATA8 *dst, int pitch, int h) \
{ \
int y, start, end; \
for (y = 0; y < h; y++) \
{ \
if (y > 0) start = jumptab[y - 1]; \
else start = 0; \
end = jumptab[y]; \
decompress_full_row(src, start, end, dst + (y * pitch)); \
} \
}
// 3 versions of the decompress given 3 jumptable types/sizes
DECOMPRESS_ROW_FUNC(decompress_jumptab8_rle4, DATA8)
DECOMPRESS_ROW_FUNC(decompress_jumptab16_rle4, unsigned short)
DECOMPRESS_ROW_FUNC(decompress_jumptab32_rle4, int)
// decompress a full RLE blob with header into the dst pointer. pitch is
// the number of bytes between each destination row
static void
decompress_rle4(DATA8 *src, DATA8 *dst, int pitch, int w EINA_UNUSED, int h)
{
int header;
DATA8 *jumptab;
// get header value and then skip past to jump table
header = *((int *)src);
jumptab = src + sizeof(int);
#define DECOMPRESS_FUNC(_name, _type) _name((_type *)jumptab, jumptab + (h * sizeof(_type)), dst, pitch, h)
if (header == 1)
DECOMPRESS_FUNC(decompress_jumptab8_rle4, DATA8);
else if (header == 2)
DECOMPRESS_FUNC(decompress_jumptab16_rle4, unsigned short);
else if (header == 3)
DECOMPRESS_FUNC(decompress_jumptab32_rle4, int);
}
//--------------------------------------------------------------------------
//- RAW 4BIT ---------------------------------------------------------------
//--------------------------------------------------------------------------
// this compresses 8bit per pixel font data to 4bit per pixel (with 4 bit MSB
// per byte holding the left most pixel and 4 bit LSB holding the right pixel
// data). each row is rounded up to a whole number of bytes so the last
// pixel may only contain 1, not 2 4bit values and thus we throw away the LSB
// 4 bits on odd-length rows in the last pixel. at the top of the 4bit packed
// pixel data is an integer that stores the data type - value of 0 means
// 4bit packed data. this is so we can share the same generic "rle" pointer
// between 4bit rle and 4bit packed and easily switch between these 2 encodings
// based on which one is likely more compact and/or faster at runtime.
static DATA8 *
compress_bpp4(DATA8 *src, int pitch, int w, int h, int *size_ret)
{
int pitch2, x, y, *iptr;
DATA8 *buf, *p, *d, *s;
// our horizontal pitch in bytes ... rounding up to account for odd lengths
pitch2 = (w + 1) / 2;
// allocate the buffer size for header plus data
buf = malloc(sizeof(int) + (pitch2 * h));
if (!buf) return NULL;
// write the header value of 0
iptr = (int *)buf;
*iptr = 0;
// start with the 4 bit packed data body
p = buf + sizeof(int);
// return size
*size_ret = (pitch2 * h) + sizeof(int);
for (y = 0; y < h; y++)
{
s = src + (y * pitch);
d = p + (y * pitch2);
// walk source row 2 pixels at a time and reduce to 4 bit (upper
// 4 bits only needed) and pack
for (x = 0; x < (w - 1); x += 2)
{
DATA8 v1 = alpha8to4(s[0]);
DATA8 v2 = alpha8to4(s[1]);
*d = (v1 << 4) | v2;
s += 2;
d++;
}
/// handle dangling "last" pixel if odd row length
if (x < w) *d = (s[0] & 0xf0);
}
return buf;
}
// this decompresses packed 4bit data from the encoded data blob into a
// destination 8bit buffer assumed to be allocated and the right size with
// the given destination pitch in bytes per line and a row length of w
// pixels and height of h rows
static void
decompress_bpp4(DATA8 *src, DATA8 *dst, int pitch, int w, int h)
{
int pitch2, x, y;
DATA8 *d, *s, val;
// deal with source pixel to round up for odd length rows
pitch2 = (w + 1) / 2;
// skip header int
src += sizeof(int);
for (y = 0; y < h; y++)
{
s = src + (y * pitch2);
d = dst + (y * pitch);
// walk 2 pixels at a time (1 source byte) and unpack
for (x = 0; x < (w - 1); x += 2)
{
// take MSB 4 bits (pixel 1)
val = (*s) >> 4;
// replicate those 4 bits in MSB of dest so it rounds correctly
val |= val << 4;
// store in dest
*d = val;
d++;
// take LSB 4 bits (pixel 2)
val = (*s) & 0xf;
// replicate those 4 bits in MSB of dest so it rounds correctly
val |= val << 4;
// store in dest
*d = val;
s++;
d++;
}
// deal with odd length rows and take MSB 4 bits and store to dest
if (x < w)
{
val = (*s) >> 4;
val |= val << 4;
*d = val;
}
}
}
//--------------------------------------------------------------------------
//- GENERAL ----------------------------------------------------------------
//--------------------------------------------------------------------------
EAPI void *
evas_common_font_glyph_compress(void *data, int num_grays, int pixel_mode,
int pitch_data, int w, int h, int *size_ret)
{
DATA8 *inbuf, *buf;
int size = 0, pitch = 0;
// avoid compressing 0 sized glyph
if ((h < 1) || (pitch_data < 1)) return NULL;
inbuf = alloca(w * h);
// if glyph buffer is 8bit grey - then compress straght
if (((num_grays == 256) && (pixel_mode == FT_PIXEL_MODE_GRAY)))
{
inbuf = data;
pitch = pitch_data;
}
// if glyph is 1bit bitmap - expand it to 8bit grey first
else
{
pitch = w;
expand_bitmap(data, pitch_data, w, h, inbuf);
}
// in testing for small glyphs - eg 16x16 or smaller it seems raw 4bit
// encoding is faster (and smaller) than 4bit RLE.
if ((w * h) < (16 * 16))
// compress to 4bit per pixel, raw
buf = compress_bpp4(inbuf, pitch, w, h, &size);
else
// compress to 4bit per pixel, run length encoded per row
buf = compress_rle4(inbuf, pitch, w, h, &size);
*size_ret = size;
return buf;
}
// this decompresses a whole block of compressed font data back to 8bit
// per pixels and deals with both 4bit RLE and 4bit packed encoding modes
EAPI DATA8 *
evas_common_font_glyph_uncompress(RGBA_Font_Glyph *fg, int *wret, int *hret)
{
RGBA_Font_Glyph_Out *fgo = fg->glyph_out;
DATA8 *buf = calloc(1, fgo->bitmap.width * fgo->bitmap.rows);
int *iptr;
if (!buf) return NULL;
if (wret) *wret = fgo->bitmap.width;
if (hret) *hret = fgo->bitmap.rows;
iptr = (int *)fgo->rle;
if (*iptr > 0) // rle4
decompress_rle4(fgo->rle, buf, fgo->bitmap.width,
fgo->bitmap.width, fgo->bitmap.rows);
else // bpp4
decompress_bpp4(fgo->rle, buf, fgo->bitmap.width,
fgo->bitmap.width, fgo->bitmap.rows);
return buf;
}