evas - fonts - move to using 4bit and rel 4 bit compressed font glyphs

this changes the internal encoding of font glyphs in evas to use 4bit
uncompressed if small, or 4bit rle (run length encoded) if larger.
this caves at least 50% of memory on fonts - and more if bigger. with
large fonts (40-80pixel size) we can save in the region of 80% of
memory used for glyphs. this also happesn to allow speedups in
rendering too.
This commit is contained in:
Carsten Haitzler 2014-01-13 05:13:00 +09:00
parent f21b0ee6c3
commit 86a97efeea
12 changed files with 972 additions and 276 deletions

View File

@ -146,6 +146,7 @@ lib/evas/common/evas_font_draw.c \
lib/evas/common/evas_font_load.c \
lib/evas/common/evas_font_main.c \
lib/evas/common/evas_font_query.c \
lib/evas/common/evas_font_compress.c \
lib/evas/common/evas_image_load.c \
lib/evas/common/evas_image_save.c \
lib/evas/common/evas_image_main.c \
@ -250,6 +251,7 @@ static_libs/libunibreak/ChangeLog
# Engines
EXTRA_DIST += \
lib/evas/common/evas_font_compress_draw.c \
lib/evas/common/evas_map_image_internal.c \
lib/evas/common/evas_map_image_core.c \
lib/evas/common/evas_map_image_loop.c \

View File

@ -196,8 +196,6 @@ struct _Slave_Msg_Glyph {
unsigned int rows;
unsigned int width;
unsigned int pitch;
unsigned int num_grays;
unsigned int pixel_mode;
};
typedef struct _Slave_Msg_Glyph Slave_Msg_Glyph;

View File

@ -1933,10 +1933,6 @@ _glyphs_loaded_msg_create(Glyphs_Request *req, int *resp_size)
buf += sizeof(int);
memcpy(buf, &gldata->pitch, sizeof(int));
buf += sizeof(int);
memcpy(buf, &gldata->num_grays, sizeof(int));
buf += sizeof(int);
memcpy(buf, &gldata->pixel_mode, sizeof(int));
buf += sizeof(int);
memcpy(buf, &gldata->hint, sizeof(int));
buf += sizeof(int);
}
@ -2177,8 +2173,6 @@ _glyphs_load_request_response(Glyphs_Request *req,
gldata->rows = msg->glyphs[j].rows;
gldata->width = msg->glyphs[j].width;
gldata->pitch = msg->glyphs[j].pitch;
gldata->num_grays = msg->glyphs[j].num_grays;
gldata->pixel_mode = msg->glyphs[j].pixel_mode;
gldata->hint = hint;
fe->nglyphs++;

View File

@ -313,6 +313,9 @@ _font_slave_glyph_load(Font_Info *fi, unsigned int idx, unsigned int hint)
return EINA_TRUE;
}
// import the 1 func we need
EAPI void *evas_common_font_glyph_compress(void *data, int num_grays, int pixel_mode, int pitch_data, int w, int h, int *size_ret);
/* This function will render the glyph currently in the glyph slot into the
* given Font Cache.
*/
@ -321,18 +324,32 @@ _font_slave_glyph_render(Font_Info *fi, Slave_Msg_Font_Glyphs_Loaded *response,
unsigned int idx)
{
Font_Source_Info *fsi = fi->fsi;
unsigned int glyphsize;
int glyphsize = 0;
FT_Glyph glyph;
FT_BitmapGlyph bglyph;
char *data;
int buffer_id = 0;
void *buf;
FT_Get_Glyph(fsi->face->glyph, &glyph);
FT_Glyph_To_Bitmap(&glyph, FT_RENDER_MODE_NORMAL, 0, 1);
bglyph = (FT_BitmapGlyph)glyph;
if ((bglyph->bitmap.pitch < 1) || (bglyph->bitmap.rows < 1))
{
FT_Done_Glyph(glyph);
goto on_error;
}
glyphsize = bglyph->bitmap.pitch * bglyph->bitmap.rows;
if (!glyphsize)
buf = evas_common_font_glyph_compress(bglyph->bitmap.buffer,
bglyph->bitmap.num_grays,
bglyph->bitmap.pixel_mode,
bglyph->bitmap.pitch,
bglyph->bitmap.width,
bglyph->bitmap.rows,
&glyphsize);
if (!buf)
{
FT_Done_Glyph(glyph);
goto on_error;
@ -342,10 +359,12 @@ _font_slave_glyph_render(Font_Info *fi, Slave_Msg_Font_Glyphs_Loaded *response,
data = cserve2_shared_mempool_buffer_get(response->mempool, buffer_id);
if (!data)
{
free(buf);
FT_Done_Glyph(glyph);
goto on_error;
}
memcpy(data, bglyph->bitmap.buffer, glyphsize);
memcpy(data, buf, glyphsize);
free(buf);
// TODO: Check if we have problems with alignment
response->glyphs[response->nglyphs].index = idx;
@ -356,8 +375,6 @@ _font_slave_glyph_render(Font_Info *fi, Slave_Msg_Font_Glyphs_Loaded *response,
response->glyphs[response->nglyphs].rows = bglyph->bitmap.rows;
response->glyphs[response->nglyphs].width = bglyph->bitmap.width;
response->glyphs[response->nglyphs].pitch = bglyph->bitmap.pitch;
response->glyphs[response->nglyphs].num_grays = bglyph->bitmap.num_grays;
response->glyphs[response->nglyphs].pixel_mode = bglyph->bitmap.pixel_mode;
response->nglyphs++;
FT_Done_Glyph(glyph);

View File

@ -82,6 +82,10 @@ EAPI int evas_common_font_query_last_up_to_pos (RGBA_Font *fn, con
EAPI int evas_common_font_query_run_font_end_get(RGBA_Font *fn, RGBA_Font_Int **script_fi, RGBA_Font_Int **cur_fi, Evas_Script_Type script, const Eina_Unicode *text, int run_len);
EAPI void evas_common_font_ascent_descent_get(RGBA_Font *fn, const Evas_Text_Props *text_props, int *ascent, int *descent);
EAPI void *evas_common_font_glyph_compress(void *data, int num_grays, int pixel_mode, int pitch_data, int w, int h, int *size_ret);
EAPI void evas_common_font_glyph_draw(RGBA_Font_Glyph *fg, RGBA_Draw_Context *dc, DATA32 *dst, int dst_pitch, int x, int y, int cx, int cy, int cw, int ch);
EAPI DATA8 *evas_common_font_glyph_uncompress(RGBA_Font_Glyph *fg, int *wret, int *hret);
void evas_common_font_load_init(void);
void evas_common_font_load_shutdown(void);

View File

@ -0,0 +1,523 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <assert.h>
#include "evas_common_private.h"
#include "evas_private.h"
#include "evas_font_private.h"
#ifdef EVAS_CSERVE2
# include "../cserve2/evas_cs2_private.h"
#endif
#include FT_OUTLINE_H
#include FT_SYNTHESIS_H
// XXX:
// XXX: adapt cserve2 to this!
// XXX:
//--------------------------------------------------------------------------
//- UTILS ------------------------------------------------------------------
//--------------------------------------------------------------------------
static void
expand_bitmap(DATA8 *src, int pitch, int w, int h, DATA8 *dst)
{
// some glyphs from fonts come in 1bit variety - expand it to 8bit before
// compressing as it's easier to deal with a universal format
static const DATA8 bitrepl[2] = { 0x00, 0xff };
DATA8 *s, *d, bits;
int bi, bj, y, end;
for (y = 0; y < h; y++)
{
d = dst + (y * w);
s = src + (y * pitch);
// wall all bytes per row
for (bi = 0; bi < w; bi += 8)
{
bits = *s;
if ((w - bi) < 8) end = w - bi;
else end = 8;
// each byte has 8 bits - expand them out using lookup table above
for (bj = 0; bj < end; bj++)
{
*d = bitrepl[(bits >> (7 - bj)) & 0x1];
d++;
}
s++;
}
}
}
//--------------------------------------------------------------------------
//- RLE 4BIT ---------------------------------------------------------------
//--------------------------------------------------------------------------
// what is 4bit rle? it's 4 bit per pixel run-length encoding. this means
// that every row of pixels is compressed int a separate defined list
// of "runs" where every run is N pixles at value V. RLE works well for
// things like fonts which have vast regions that are either empty or solid
// with some transition (anti-alias) pixels in between. it could be that for
// a black and white alternating pattern it will come out the worst possible
// case, but this basically "never happens".
//
// data is encoded so it's fastr to access and decompress at runtime. we have
// both a blob of data that is the RLE encoded data for all rows which consist
// of 1 byte per run, and also a jump table - per row telling us the byte
// offset inside the RLE data blob where the row data begins. since we know
// the offset of the next run, we know how many bytes each row is based on
// this.
//
// since rle data may be small (less than 256 bytes) and in almost all cases
// less than 64k, a jump table of 8 bite per entry is good for many uses, and
// otherwise 16bits is used. it also supports 32bit jumptables but these are
// there just in case the data goes beyond 64k - but is unlikely to ever
// happen in real life. this means jumptables come in 3 formats thus have to
// have 3 different handling paths. RLE data is the same so it's common code.
//
// each byte in the RLE section encodes a run of between 1 and 16 pixels in
// length. there is no such thing as a run of 0 pixels. the upper 4 bits of
// the byte encode the length, with 0 being 1 pixel, 1 being 2 pixels,
// 2 being 3 pixels and so on up top 16 pixels (thus run length is actually
// (byte >> 4) + 1). the lower 4 bits encode the 4 bit pixel value of the
// whole run, from 0 to 15. it is accessed via masking (byte & 0xf). thus
// every run in RLE consumes exactly 1 byte of memory nice and neatly.
//
// at the start before the jumptable is a 32bit (int) header. it just has a
// value at the moment that indicates 0 for it not being RLE data (used by
// the 4bit packed bitmap), 1 for 8bit jumptable RLE, 2 for 16bit jumptable
// and 3 for 32bit jumptable. all other values are reserved
//
// so data looks like this when packed into a single blob in memory (where
// xx is the data size of the jump table - 8, 16 or 32bit). there are n
// lines of data in the jumptable matching to the height of the glyph where
// n is the height in rows
//
// each jumptable row ACTUALLY indicates the byte offset of the NEXT line.
// the FIRST row of RLE data is assumed to be at offset 0 in the RLE data
// section, so a special case is used for this. note that jumptable values
// are OFFSETS starting at 0 which is the first byte in the RLE data section
//
// [int] header (0, 1, 2 or 3)
// [xx] jump table for line 0
// [xx] jump table for line 1
// [xx] jump table for line 2
// ...
// [xx] jump table for line n - 1
// [char] first byte of RLE data (beginning of rle data)
// [char] second byte of RLE data
// ...
// [char] last byte of RLE data
//
static DATA8 *
compress_rle4(DATA8 *src, int pitch, int w, int h, int *size_ret)
{
unsigned char *scratch, *p, *pix, spanval;
int *jumptab, x, y, spanlen, spannum, total, size, *iptr, *pos;
unsigned short *sptr;
DATA8 *dst, *buf, *dptr;
// these macros make the code more readable and easier to follow, and
// avoid replication of dumb blobs of logic
#define SPAN_ADD(_len, _val) do { (*pos) += 1; *p = ((_len) << 4) | (_val); p++; } while (0)
#define LAST_SPAN_VAL() (p[-1] & 0x0f)
#define LAST_SPAN_LEN() (p[-1] >> 4)
#define LAST_SPAN_DEL() do { (*pos) -= 1; p -= 1; } while (0)
// create out scratch buffer for compression on the stack - maximum size
scratch = p = alloca(pitch * h * 2);
// also place our jumptable on the stack too - all ints here - become
// smaller char/shorts after jumptable is generated and size known
jumptab = alloca(h * sizeof(int));
for (y = 0; y < h; y++)
{
pix = src + (y * pitch);
// pos is the position offset from RLE data start that we have to
// track to find out where this rows RLE run *ENDS* so keep a
// pointer to it and we will keep ++ing it with each REL entry we add
pos = &(jumptab[y]);
*pos = (int)((unsigned long)p - (unsigned long)scratch);
// no spans now so init all span things to 0
spanval = spanlen = spannum = 0;
for (x = 0; x < w; x++)
{
// we only need upper 4 bits of value for span creation
DATA8 v = pix[x] >> 4;
// if the current pixel value (in 4bit) is not the same as the
// span value (n 4 bit) OR... if the span now exceeds 16 pixels
// then add/write out the span to our RLE span blob
if ((v != spanval) || (spanlen >= 16))
{
if (spanlen > 0)
{
SPAN_ADD(spanlen - 1, spanval);
spannum++;
}
spanval = v;
spanlen = 1;
}
// otherwise make span longer if values are the same
else spanlen++;
}
// do we have a span still being built that we haven't added and that
// is NOT transparent (0 value - there is no point storing spans
// at the end of a row that have 0 value
if ((spanlen > 0) && (spanval > 0))
{
SPAN_ADD(spanlen - 1, spanval);
spannum++;
}
// clean up any dangling 0 value at the end of a row as they just
// waste space and processing time
while ((spannum > 0) && (LAST_SPAN_VAL() == 0))
{
LAST_SPAN_DEL();
spannum--;
}
}
// get the size of RLE data we have plus int header
total = (int)((unsigned long)p - (unsigned long)scratch);
size = sizeof(int) + total;
// based on total number of bytes in RLE, use 32, 16 or 8 bit jumptable
// and add that to our size
if (total > 65535) size += h * 4; // 32bit
else if (total > 255) size += h * 2; // 16bit
else size += h; // 8bit
*size_ret = size;
// allocate a fresh buffer where we will merge header, jumptable and RLE
// spans inot a single block
buf = dst = malloc(size);
if (!buf) return NULL;
// 32bit int header to indicate encoding type (3, 2 or 1)
iptr = (int *)dst;
if (total > 65535) *iptr = 3; // 32bit jump table
else if (total > 255) *iptr = 2; // 16 bit jump table
else *iptr = 1; // 8 bit jump table
// skip header and write jump table
dst += sizeof(int);
if (total > 65535) // 32bit jump table
{
iptr = (int *)dst;
for (y = 0; y < h; y++) iptr[y] = jumptab[y];
dst += (h * sizeof(int));
}
else if (total > 255) // 16bit jump table
{
sptr = (unsigned short *)dst;
for (y = 0; y < h; y++) sptr[y] = jumptab[y];
dst += (h * sizeof(unsigned short));
}
else // 8bit jump table
{
dptr = dst;
for (y = 0; y < h; y++) dptr[y] = jumptab[y];
dst += (h * sizeof(DATA8));
}
// copy rest of RLE data at the end of the jumptable and return it
memcpy(dst, scratch, total);
return buf;
}
// this decompresses a specific run of RLE data to the destination pointer
// and finishes reading RLE data before the "end" byte and starts AT the
// "start" byte within the array pointed to by src. this ASSUMES the dest
// buffer has already been zeroed out so we can skip runs that are "0"
static void
decompress_full_row(DATA8 *src, int start, int end, DATA8 *dst)
{
DATA8 *p = src + start, *e = src + end, *d = dst, len, val;
while (p < e)
{
// length is upper 4 bits + 1
len = (*p >> 4) + 1;
// value when EXPANDED to 8bit is the lower 4 bits REPEATEd in all
// 8 bites to ensure it rounds properly.
// i.e. lower 4 bits B4B3B2B1 -> B4B3B2B1B4B3B2B1
val = *p & 0xf;
val |= val << 4;
// if it's 0 just skip ahead (assume dst buffer is 0'd out)
if (val == 0) d += len;
else
{
// write out "len" pixels of tghe given value
while (len > 0)
{
*d = val;
d++;
len--;
}
}
// next RLE byte
p++;
}
}
// to save copy & paste repeating code, this macro acts as a code generator
// to create a specific decompress function per jumptable size (8, 16 or 32bit)
#define DECOMPRESS_ROW_FUNC(_name, _type) \
static void \
_name(_type *jumptab, DATA8 *src, DATA8 *dst, int pitch, int h) \
{ \
int y, start, end; \
for (y = 0; y < h; y++) \
{ \
if (y > 0) start = jumptab[y - 1]; \
else start = 0; \
end = jumptab[y]; \
decompress_full_row(src, start, end, dst + (y * pitch)); \
} \
}
// 3 versions of the decompress given 3 jumptable types/sizes
DECOMPRESS_ROW_FUNC(decompress_jumptab8_rle4, DATA8)
DECOMPRESS_ROW_FUNC(decompress_jumptab16_rle4, unsigned short)
DECOMPRESS_ROW_FUNC(decompress_jumptab32_rle4, int)
// decompress a full RLE blob with header into the dst pointer. pitch is
// the number of bytes between each destination row
static void
decompress_rle4(DATA8 *src, DATA8 *dst, int pitch, int w EINA_UNUSED, int h)
{
int header;
DATA8 *jumptab;
// get header value and then skip past to jump table
header = *((int *)src);
jumptab = src + sizeof(int);
#define DECOMPRESS_FUNC(_name, _type) _name((_type *)jumptab, jumptab + (h * sizeof(_type)), dst, pitch, h)
if (header == 1)
DECOMPRESS_FUNC(decompress_jumptab8_rle4, DATA8);
else if (header == 2)
DECOMPRESS_FUNC(decompress_jumptab16_rle4, unsigned short);
else if (header == 3)
DECOMPRESS_FUNC(decompress_jumptab32_rle4, int);
}
//--------------------------------------------------------------------------
//- RAW 4BIT ---------------------------------------------------------------
//--------------------------------------------------------------------------
// this compresses 8bit per pixel font data to 4bit per pixel (with 4 bit MSB
// per byte holding the left most pixel and 4 bit LSB holding the right pixel
// data). each row is rounded up to a whole number of bytes so the last
// pixel may only contain 1, not 2 4bit values and thus we throw away the LSB
// 4 bits on odd-length rows in the last pixel. at the top of the 4bit packed
// pixel data is an integer that stores the data type - value of 0 means
// 4bit packed data. this is so we can share the same generic "rle" pointer
// between 4bit rle and 4bit packed and easily switch between these 2 encodings
// based on which one is likely more compact and/or faster at runtime.
static DATA8 *
compress_bpp4(DATA8 *src, int pitch, int w, int h, int *size_ret)
{
int pitch2, x, y, *iptr;
DATA8 *buf, *p, *d, *s;
// our horizontal pitch in bytes ... rounding up to account for odd lengths
pitch2 = (w + 1) / 2;
// allocate the buffer size for header plus data
buf = malloc(sizeof(int) + (pitch2 * h));
if (!buf) return NULL;
// write the header value of 0
iptr = (int *)buf;
*iptr = 0;
// start with the 4 bit packed data body
p = buf + sizeof(int);
// return size
*size_ret = (pitch2 * h) + sizeof(int);
for (y = 0; y < h; y++)
{
s = src + (y * pitch);
d = p + (y * pitch2);
// walk source row 2 pixels at a time and reduce to 4 bit (upper
// 4 bits only needed) and pack
for (x = 0; x < (w - 1); x += 2)
{
*d = (s[0] & 0xf0) | (s[1] >> 4);
s += 2;
d++;
}
/// handle dangling "last" pixel if odd row length
if (x < w) *d = (s[0] & 0xf0);
}
return buf;
}
// this decompresses packed 4bit data from the encoded data blob into a
// destination 8bit buffer assumed to be allocated and the right size with
// the given destination pitch in bytes per line and a row length of w
// pixels and height of h rows
static void
decompress_bpp4(DATA8 *src, DATA8 *dst, int pitch, int w, int h)
{
int pitch2, x, y;
DATA8 *d, *s, val;
// deal with source pixel to round up for odd length rows
pitch2 = (w + 1) / 2;
// skip header int
src += sizeof(int);
for (y = 0; y < h; y++)
{
s = src + (y * pitch2);
d = dst + (y * pitch);
// walk 2 pixels at a time (1 source byte) and unpack
for (x = 0; x < (w - 1); x += 2)
{
// take MSB 4 bits (pixel 1)
val = (*s) >> 4;
// replicate those 4 bits in MSB of dest so it rounds correctly
val |= val << 4;
// store in dest
*d = val;
d++;
// take LSB 4 bits (pixel 2)
val = (*s) & 0xf;
// replicate those 4 bits in MSB of dest so it rounds correctly
val |= val << 4;
// store in dest
*d = val;
s++;
d++;
}
// deal with odd length rows and take MSB 4 bits and store to dest
if (x < w)
{
val = (*s) >> 4;
val |= val << 4;
*d = val;
}
}
}
//--------------------------------------------------------------------------
//- GENERAL ----------------------------------------------------------------
//--------------------------------------------------------------------------
EAPI void *
evas_common_font_glyph_compress(void *data, int num_grays, int pixel_mode,
int pitch_data, int w, int h, int *size_ret)
{
DATA8 *inbuf, *buf;
int size = 0, pitch = 0;
// avoid compressing 0 sized glyph
if ((h < 1) || (pitch_data < 1)) return NULL;
inbuf = alloca(w * h);
// if glyph buffer is 8bit grey - then compress straght
if (((num_grays == 256) && (pixel_mode == FT_PIXEL_MODE_GRAY)))
{
inbuf = data;
pitch = pitch_data;
}
// if glyph is 1bit bitmap - expand it to 8bit grey first
else
{
pitch = w;
expand_bitmap(data, pitch_data, w, h, inbuf);
}
// in testing for small glyphs - eg 16x16 or smaller it seems raw 4bit
// encoding is faster (and smaller) than 4bit RLE.
if ((w * h) < (16 * 16))
// compress to 4bit per pixel, raw
buf = compress_bpp4(inbuf, pitch, w, h, &size);
else
// compress to 4bit per pixel, run length encoded per row
buf = compress_rle4(inbuf, pitch, w, h, &size);
*size_ret = size;
return buf;
}
// this decompresses a whole block of compressed font data back to 8bit
// per pixels and deals with both 4bit RLE and 4bit packed encoding modes
EAPI DATA8 *
evas_common_font_glyph_uncompress(RGBA_Font_Glyph *fg, int *wret, int *hret)
{
RGBA_Font_Glyph_Out *fgo = fg->glyph_out;
DATA8 *buf = calloc(1, fgo->bitmap.width * fgo->bitmap.rows);
int *iptr;
if (!buf) return NULL;
*wret = fgo->bitmap.width;
*hret = fgo->bitmap.rows;
iptr = (int *)fgo->rle;
if (*iptr > 0) // rle4
decompress_rle4(fgo->rle, buf, fgo->bitmap.width,
fgo->bitmap.width, fgo->bitmap.rows);
else // bpp4
decompress_bpp4(fgo->rle, buf, fgo->bitmap.width,
fgo->bitmap.width, fgo->bitmap.rows);
return buf;
}
// this draws a compressed font glyph and decompresses on the fly as it
// draws, saving memory bandwidth and providing speedups
EAPI void
evas_common_font_glyph_draw(RGBA_Font_Glyph *fg,
RGBA_Draw_Context *dc,
DATA32 *dst, int dst_pitch,
int x, int y, int cx, int cy, int cw, int ch)
{
RGBA_Font_Glyph_Out *fgo = fg->glyph_out;
int w, h, x1, x2, y1, y2, i, *iptr;
DATA32 coltab[16], col;
DATA16 mtab[16], v;
DATA8 tmp;
w = fgo->bitmap.width; h = fgo->bitmap.rows;
// skip if totally clipped out
if ((y >= (cy + ch)) || ((y + h) <= cy) ||
(x >= (cx + cw)) || ((x + w) <= cx)) return;
// figure y1/y2 limit range
y1 = 0; y2 = h;
if ((y + y1) < cy) y1 = cy - y;
if ((y + y2) > (cy + ch)) y2 = cy + ch - y;
// figure x1/x2 limit range
x1 = 0; x2 = w;
if ((x + x1) < cx) x1 = cx - x;
if ((x + x2) > (cx + cw)) x2 = cx + cw - x;
// build fast multiply + mask color tables to avoid compute. this works
// because of our very limited 4bit range of alpha values
col = dc->col.col;
for (i = 0; i <= 0xf; i++)
{
v = (i << 4) | i;
coltab[i] = MUL_SYM(v, col);
tmp = (coltab[i] >> 24);
mtab[i] = 256 - (tmp + (tmp >> 7));
}
#ifdef BUILD_MMX
if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
{
#define MMX 1
#include "evas_font_compress_draw.c"
#undef MMX
}
else
#endif
#ifdef BUILD_NEON
if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
{
#define NEON 1
#include "evas_font_compress_draw.c"
#undef NEON
}
else
#endif
{
#include "evas_font_compress_draw.c"
}
}

View File

@ -0,0 +1,342 @@
// inherited from parent func
// RGBA_Font_Glyph_Out *fgo;
// int w, h, x1, x2, y1, y2, i, *iptr;
// DATA32 coltab[16], col;
// DATA16 mtab[16], v;
// DATA8 tmp;
// blend a pixel using pre-computed multiplied col and inverse mul value
#define MMX_BLEND(_dst, _col, _mul) \
MOV_P2R(_dst, mm1, mm0) \
MOV_A2R(_mul, mm3) \
MOV_P2R(_col, mm2, mm0) \
MUL4_256_R2R(mm3, mm1) \
paddw_r2r(mm2, mm1); \
MOV_R2P(mm1, _dst, mm0)
#define C_BLEND(_dst, _col, _mul) \
_dst = _col + MUL_256(_mul, _dst)
// copy 64bits in 1 go (special mmx - no such thing in C here)
#define MMX_COPY64(_dst, _src) \
movq_r2m(_src, _dst)
// a loop of 64bit copies
#define MMX_COPY64LOOP(_dst, _len) \
if (_len >= 2) \
{ \
while (_len > 1) \
{ \
MMX_COPY64(_dst[0], mm7); \
_dst += 2; _len -= 2; \
} \
}
// if we build for mmx optimizations, we need to set up a few things in advance
// like the mm0 register is always all 0'd to fill in 0 padding when
// unpacking values to registers. also mm7 is reserved to hold an unpacked
// and dumpliacted coltab entry for the final entry (max color). so it's
// [col][col] in the 63bit register with both 32bit colors doublicated
#ifdef MMX
pxor_r2r(mm0, mm0);
movd_m2r(coltab[0xf], mm7);
punpckldq_r2r(mm7, mm7);
#endif
// check header for typ (rle4 or bpp4)
iptr = (int *)fgo->rle;
if (*iptr > 0) // rle4
{
DATA8 *p = fgo->rle, *e, *s;
DATA32 *d0, *d, t;
DATA16 len;
int xx, yy, dif;
iptr = (int *)p;
p += sizeof(int);
d0 = dst + x + (y * dst_pitch);
// this may seem horrible to put a massive blob of logic into a macro like
// this, but this is for speed reasons, so we can generate slightly different
// versions of the same blob of code logic that hold different optimizations
// inside (eg mmx/sse/neon asm etc.)
#define EXPAND_RLE(_donelabel, _extn, _2copy, _blend) \
if ((x1 == 0) && (x2 == w)) /* unclipped horizontally */ \
{ \
d0 += x1; \
for (yy = y1; yy < y2; yy++) \
{ \
/* figure out source ptr and end ptr based on jumptable */ \
if (yy > 0) s = p + jumptab[yy - 1]; \
else s = p; \
e = p + jumptab[yy]; \
d = d0 + (yy * dst_pitch); \
/* walk until we hit the end of the src data */ \
while (s < e) \
{ \
/* read the run length from RLE data and value */ \
len = (*s >> 4) + 1; \
v = *s & 0xf; \
/* if value is 0 we can just skip ahead entire run and do */ \
/* nothng as empty space doesn't need any work */ \
if (v == 0) d += len; \
/* if the value ends up being solid (inverse alpha is 0) */ \
else if (mtab[v] == 0) \
{ \
/* just COPY the color data direct to destination */ \
t = coltab[0xf]; \
/* this is a special 2 pixel (64bit dest) copy for */ \
/* speed - eg mmx etc. */ \
_2copy; \
/* do cleanup of left-over pixels after the 2 pixel */ \
/* copy above (if there is any such code) */ \
while (len > 0) \
{ \
/* just a plain copy of looked up value */ \
*d = t; \
d++; len--; \
} \
} \
/* our font mask value is between 0 and 15 (0xf) so we */ \
/* have to actually blend it to each dest pixel */ \
else \
{ \
while (len > 0) \
{ \
/* do blend using op provided by params */ \
_blend; \
d++; len--; \
} \
} \
s++; \
} \
} \
} \
else /* clipped horizontally (needs extra skip/cut logic) */ \
{ \
/* init out pos to 0 here (we reset AFTER each horiz loop later */ \
xx = 0; \
for (yy = y1; yy < y2; yy++) \
{ \
/* figure out source ptr and end ptr based on jumptable */ \
if (yy > 0) s = p + jumptab[yy - 1]; \
else s = p; \
e = p + jumptab[yy]; \
d = d0 + (yy * dst_pitch); \
/* walk until we hit the end of the src data and SKIP runs */ \
/* that are entirely before the start (x1) point and any */ \
/* run that spans over the start point is truncated at the */ \
/* start of the run */ \
while (s < e) \
{ \
len = (*s >> 4) + 1; \
/* if current pos pluse run length go over the start (x1) */ \
/* point of our clip area, then adjust run length and dest */ \
/* pointer and position and break out of our RLE skip loop */ \
if ((xx + (int)len) > x1) \
{ \
dif = x1 - xx; \
len -= dif; d += dif; xx += dif; \
break; \
} \
d += len; xx += len; s++; \
} \
/* walk until we hit the end of the REL run.. OR the end of */ \
/* our clip region - the x2 checks are done inside */ \
while (s < e) \
{ \
v = *s & 0xf; \
/* if value is 0 we can just skip ahead entire run and do */ \
/* nothng as empty space doesn't need any work */ \
if (v == 0) \
{ \
d += len; xx += len; \
/* clip check to stop run */ \
if (xx >= x2) goto _donelabel##_extn; \
} \
/* if the value ends up being solid (inverse alpha is 0) */ \
else if (mtab[v] == 0) \
{ \
/* just COPY the color data direct to destination */ \
t = coltab[0xf]; \
while (len > 0) \
{ \
/* clip check to stop run */ \
if (xx >= x2) goto _donelabel##_extn; \
/* just a plain copy of looked up value */ \
*d = t; \
d++; xx++; len--; \
} \
} \
/* our font mask value is between 0 and 15 (0xf) so we */ \
/* have to actually blend it to each dest pixel */ \
else \
{ \
while (len > 0) \
{ \
/* clip check to stop run */ \
if (xx >= x2) goto _donelabel##_extn; \
/* do blend using op provided by params */ \
_blend; \
d++; xx++; len--; \
} \
} \
s++; \
/* extra check here so length fetch after doesn't break */ \
if (s >= e) break; \
/* get length of NEXT RLE run at the end here */ \
len = (*s >> 4) + 1; \
} \
_donelabel##_extn: \
/* reset horiz pos to 0 ready for next line */ \
xx = 0; \
} \
}
// and here actually run the appropriate code in the macro/func defined
// above, based on the jumptable type (saves passing params on the stack
// to a sub function and we'd have to generate the subfunction by macros
// anyway, so just cust down code to assume context vars as opposed to
// passing them)
if (*iptr == 1) // 8 bit jump table
{
DATA8 *jumptab = p;
p += (h * sizeof(DATA8));
#ifdef MMX
EXPAND_RLE(done_8_clipped, _mmx, MMX_COPY64LOOP(d, len),
MMX_BLEND(d[0], coltab[v], mtab[v]))
#elif defined(NEON)
EXPAND_RLE(done_8_clipped, _neon, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#else
EXPAND_RLE(done_8_clipped, _c, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#endif
}
else if (*iptr == 2) // 16 bit jump table
{
unsigned short *jumptab = (unsigned short *)p;
p += (h * sizeof(unsigned short));
#ifdef MMX
EXPAND_RLE(done_16_clipped, _mmx, MMX_COPY64LOOP(d, len),
MMX_BLEND(d[0], coltab[v], mtab[v]))
#elif defined(NEON)
EXPAND_RLE(done_16_clipped, _neon, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#else
EXPAND_RLE(done_16_clipped, _c, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#endif
}
else if (*iptr == 3) // 32 bit jump table
{
int *jumptab = (int *)p;
p += (h * sizeof(int));
#ifdef MMX
EXPAND_RLE(done_32_clipped, _mmx, MMX_COPY64LOOP(d, len),
MMX_BLEND(d[0], coltab[v], mtab[v]))
#elif defined(NEON)
EXPAND_RLE(done_32_clipped, _neon, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#else
EXPAND_RLE(done_32_clipped, _c, ,
C_BLEND(d[0], coltab[v], mtab[v]))
#endif
}
#undef EXPAND_RLE
}
else // bpp4
{
int xx, yy, djump;
int pitch2;
DATA8 *s, *s0, v0;
DATA32 *d;
d = dst + x + x1 + ((y + y1) * dst_pitch);
djump = dst_pitch - (x2 - x1);
pitch2 = (w + 1) / 2;
s0 = fgo->rle + sizeof(int) + (y1 * pitch2);
for (yy = y1; yy < y2; yy++)
{
s = s0 + (x1 / 2);
xx = x1;
// do odd pixel at start if there is any
if (xx & 0x1)
{
v = (*s) & 0xf;
// fast path - totally solid color can just be written
// with no blending done
if (mtab[v] == 0) d[0] = coltab[0xf];
// blend our color from lookup table
else if (v)
{
// blend it
#ifdef MMX
MMX_BLEND(d[0], coltab[v], mtab[v]);
#else
C_BLEND(d[0], coltab[v], mtab[v]);
#endif
}
s++; d++; xx++;
}
// walk along 2 pixels at a time (1 src pixel is 4 bits packed)
for (; xx < (x2 - 1); xx += 2)
{
v0 = *s;
// fast path - totally solid color can just be written
// with no blending done - write 2 at once
if ((v0 == 0xff) && (mtab[v0 & 0xf] == 0))
{
// blend it
#ifdef MMX
MMX_COPY64(d[0], mm7);
#else
d[0] = d[1] = coltab[0xf];
#endif
}
// if our 2 values are not 0 (as 0's we can skip entirely)
else if (v0)
{
// get first pixel in MSB and blend it
v = (v0) >> 4;
#ifdef MMX
MMX_BLEND(d[0], coltab[v], mtab[v]);
#else
C_BLEND(d[0], coltab[v], mtab[v]);
#endif
// get next pixel in LSB and blend it
v = (v0) & 0xf;
#ifdef MMX
MMX_BLEND(d[1], coltab[v], mtab[v]);
#else
C_BLEND(d[1], coltab[v], mtab[v]);
#endif
}
s++; d += 2;
}
// clean up any leftover pixels at the end
if (xx < x2)
{
v = (*s) >> 4;
// fast path - totally solid color can just be written
// with no blending done
if (mtab[v] == 0) d[0] = coltab[0xf];
// blend our color from lookup table
else if (v)
{
// blend it
#ifdef MMX
MMX_BLEND(d[0], coltab[v], mtab[v]);
#else
C_BLEND(d[0], coltab[v], mtab[v]);
#endif
}
d++;
}
d += djump;
s0 += pitch2;
}
}
// with mmx (sse etc.) we need to say we are done with the mmx registers so
// any fpu usage is restored (early pentiums need this, later x86 do not)
#ifdef MMX
evas_common_cpu_end_opt();
#endif

View File

@ -14,10 +14,8 @@
struct _Evas_Glyph
{
RGBA_Font_Glyph *fg;
void *data;
Eina_Rectangle coord;
int x, y;
FT_UInt idx;
int j;
};
EAPI void
@ -33,7 +31,7 @@ evas_common_font_draw_init(void)
*/
EAPI Eina_Bool
evas_common_font_rgba_draw(RGBA_Image *dst, RGBA_Draw_Context *dc, int x, int y,
Evas_Glyph_Array *glyphs, RGBA_Gfx_Func func, int ext_x, int ext_y, int ext_w,
Evas_Glyph_Array *glyphs, RGBA_Gfx_Func func EINA_UNUSED, int ext_x, int ext_y, int ext_w,
int ext_h, int im_w, int im_h EINA_UNUSED)
{
DATA32 *im;
@ -43,183 +41,33 @@ evas_common_font_rgba_draw(RGBA_Image *dst, RGBA_Draw_Context *dc, int x, int y,
if (!glyphs->array) return EINA_FALSE;
im = dst->image.data;
EINA_INARRAY_FOREACH(glyphs->array, glyph)
{
RGBA_Font_Glyph *fg;
int chr_x, chr_y;
int chr_x, chr_y, w;
fg = glyph->fg;
/* FIXME: Why was that moved out of prepare ? This increase cache miss. */
glyph->coord.w = fg->glyph_out->bitmap.width;
glyph->coord.h = fg->glyph_out->bitmap.rows;
glyph->j = fg->glyph_out->bitmap.pitch;
glyph->data = fg->glyph_out->bitmap.buffer;
if (dc->font_ext.func.gl_new)
if ((!fg->ext_dat) && (dc->font_ext.func.gl_new))
{
/* extension calls */
fg->ext_dat = dc->font_ext.func.gl_new(dc->font_ext.data, fg);
fg->ext_dat_free = dc->font_ext.func.gl_free;
}
chr_x = x + glyph->coord.x;
chr_y = y + glyph->coord.y;
w = fg->glyph_out->bitmap.width;
chr_x = x + glyph->x;
chr_y = y + glyph->y;
if (chr_x < (ext_x + ext_w))
{
DATA8 *data;
int i, j, w, h;
data = glyph->data;
j = glyph->j;
w = glyph->coord.w;
if (j < w) j = w;
h = glyph->coord.h;
#ifdef HAVE_PIXMAN
# ifdef PIXMAN_FONT
int index;
DATA32 *font_alpha_buffer;
pixman_image_t *font_mask_image;
font_alpha_buffer = alloca(w * h * sizeof(DATA32));
for (index = 0; index < (w * h); index++)
font_alpha_buffer[index] = data[index] << 24;
font_mask_image = pixman_image_create_bits(PIXMAN_a8r8g8b8, w, h,
font_alpha_buffer,
w * sizeof(DATA32));
if (!font_mask_image) return EINA_FALSE;
# endif
#endif
if ((w > 0) && ((chr_x + w) > ext_x))
{
if ((j > 0) && (chr_x + w > ext_x))
{
if ((fg->ext_dat) && (dc->font_ext.func.gl_draw))
{
/* ext glyph draw */
dc->font_ext.func.gl_draw(dc->font_ext.data,
(void *)dst,
dc, fg, chr_x,
y - (chr_y - y));
}
else
{
if ((fg->glyph_out->bitmap.num_grays == 256) &&
(fg->glyph_out->bitmap.pixel_mode == FT_PIXEL_MODE_GRAY))
{
#ifdef HAVE_PIXMAN
# ifdef PIXMAN_FONT
if ((dst->pixman.im) &&
(dc->col.pixman_color_image))
pixman_image_composite(PIXMAN_OP_OVER,
dc->col.pixman_color_image,
font_mask_image,
dst->pixman.im,
chr_x,
y - (chr_y - y),
0, 0,
chr_x,
y - (chr_y - y),
w, h);
else
# endif
#endif
{
for (i = 0; i < h; i++)
{
int dx, dy;
int in_x, in_w;
in_x = 0;
in_w = 0;
dx = chr_x;
dy = y - (chr_y - i - y);
if ((dx < (ext_x + ext_w)) &&
(dy >= (ext_y)) &&
(dy < (ext_y + ext_h)))
{
if (dx + w > (ext_x + ext_w))
in_w += (dx + w) - (ext_x + ext_w);
if (dx < ext_x)
{
in_w += ext_x - dx;
in_x = ext_x - dx;
dx = ext_x;
}
if (in_w < w)
{
func(NULL, data + (i * j) + in_x, dc->col.col,
im + (dy * im_w) + dx, w - in_w);
}
}
}
}
}
else
{
DATA8 *tmpbuf = NULL, *dp, *tp, bits;
int bi, bj;
const DATA8 bitrepl[2] = {0x0, 0xff};
tmpbuf = alloca(w);
for (i = 0; i < h; i++)
{
int dx, dy;
int in_x, in_w, end;
in_x = 0;
in_w = 0;
dx = chr_x;
dy = y - (chr_y - i - y);
tp = tmpbuf;
dp = data + (i * fg->glyph_out->bitmap.pitch);
for (bi = 0; bi < w; bi += 8)
{
bits = *dp;
if ((w - bi) < 8) end = w - bi;
else end = 8;
for (bj = 0; bj < end; bj++)
{
*tp = bitrepl[(bits >> (7 - bj)) & 0x1];
tp++;
}
dp++;
}
if ((dx < (ext_x + ext_w)) &&
(dy >= (ext_y)) &&
(dy < (ext_y + ext_h)))
{
if (dx + w > (ext_x + ext_w))
in_w += (dx + w) - (ext_x + ext_w);
if (dx < ext_x)
{
in_w += ext_x - dx;
in_x = ext_x - dx;
dx = ext_x;
}
if (in_w < w)
{
func(NULL, tmpbuf + in_x, dc->col.col,
im + (dy * im_w) + dx, w - in_w);
}
}
}
}
}
}
if ((fg->ext_dat) && (dc->font_ext.func.gl_draw))
dc->font_ext.func.gl_draw(dc->font_ext.data, (void *)dst,
dc, fg, chr_x, y - (chr_y - y));
else if (fg->glyph_out->rle)
evas_common_font_glyph_draw(fg, dc, im, im_w,
chr_x, y - (chr_y - y),
ext_x, ext_y, ext_w, ext_h);
}
#ifdef HAVE_PIXMAN
# ifdef PIXMAN_FONT
pixman_image_unref(font_mask_image);
# endif
#endif
}
else
break;
@ -362,8 +210,8 @@ evas_common_font_draw_prepare(Evas_Text_Props *text_props)
glyph->fg = fg;
glyph->idx = idx;
glyph->coord.x = EVAS_FONT_WALK_PEN_X + EVAS_FONT_WALK_X_OFF + EVAS_FONT_WALK_X_BEAR;
glyph->coord.y = EVAS_FONT_WALK_PEN_Y + EVAS_FONT_WALK_Y_OFF + EVAS_FONT_WALK_Y_BEAR;
glyph->x = EVAS_FONT_WALK_PEN_X + EVAS_FONT_WALK_X_OFF + EVAS_FONT_WALK_X_BEAR;
glyph->y = EVAS_FONT_WALK_PEN_Y + EVAS_FONT_WALK_Y_OFF + EVAS_FONT_WALK_Y_BEAR;
}
EVAS_FONT_WALK_TEXT_END();
@ -389,11 +237,6 @@ evas_common_font_draw_prepare(Evas_Text_Props *text_props)
return;
error:
if (fg)
{
if (fg->glyph_out) free(fg->glyph_out);
free(fg);
}
eina_inarray_free(glyphs);
}

View File

@ -15,6 +15,7 @@
#include FT_OUTLINE_H
#include FT_SYNTHESIS_H
#include FT_BITMAP_H
FT_Library evas_ft_lib = 0;
static int initialised = 0;
@ -352,10 +353,17 @@ _glyph_free(RGBA_Font_Glyph *fg)
{
if ((!fg) || (fg == (void *)(-1))) return;
if (fg->glyph_out)
{
if ((fg->glyph_out->rle) && (fg->glyph_out->bitmap.rle_alloc))
free(fg->glyph_out->rle);
fg->glyph_out->rle = NULL;
if (!fg->glyph_out->bitmap.no_free_glout) free(fg->glyph_out);
fg->glyph_out = NULL;
}
FT_Done_Glyph(fg->glyph);
/* extension calls */
if (fg->ext_dat_free) fg->ext_dat_free(fg->ext_dat);
if (fg->glyph_out_free) fg->glyph_out_free(fg->glyph_out);
free(fg);
}
@ -578,23 +586,32 @@ evas_common_font_int_cache_glyph_render(RGBA_Font_Glyph *fg)
fbg = (FT_BitmapGlyph)fg->glyph;
fg->glyph_out = malloc(sizeof(RGBA_Font_Glyph_Out));
fg->glyph_out = calloc(1, sizeof(RGBA_Font_Glyph_Out));
fg->glyph_out->bitmap.rows = fbg->bitmap.rows;
fg->glyph_out->bitmap.width = fbg->bitmap.width;
fg->glyph_out->bitmap.pitch = fbg->bitmap.pitch;
fg->glyph_out->bitmap.buffer = fbg->bitmap.buffer;
fg->glyph_out->bitmap.num_grays = fbg->bitmap.num_grays;
fg->glyph_out->bitmap.pixel_mode = fbg->bitmap.pixel_mode;
fg->glyph_out_free = free;
/* This '+ 200' is just an estimation of how much memory freetype will use
fg->glyph_out->bitmap.rle_alloc = EINA_TRUE;
/* This '+ 100' is just an estimation of how much memory freetype will use
* on it's size. This value is not really used anywhere in code - it's
* only for statistics. */
size = sizeof(RGBA_Font_Glyph) + sizeof(Eina_List) +
(fg->glyph_out->bitmap.width * fg->glyph_out->bitmap.rows) + 200;
(fg->glyph_out->bitmap.width * fg->glyph_out->bitmap.rows / 2) + 100;
fi->usage += size;
if (fi->inuse) evas_common_font_int_use_increase(size);
fg->glyph_out->rle = evas_common_font_glyph_compress
(fbg->bitmap.buffer, fbg->bitmap.num_grays, fbg->bitmap.pixel_mode,
fbg->bitmap.pitch, fbg->bitmap.width, fbg->bitmap.rows,
&(fg->glyph_out->rle_size));
fg->glyph_out->bitmap.buffer = NULL;
// this may be technically incorrect as we go and free a bitmap buffer
// behind the ftglyph's back...
FT_Bitmap_Done(evas_ft_lib, &(fbg->bitmap));
return EINA_TRUE;
}

View File

@ -1952,10 +1952,11 @@ _font_entry_glyph_map_rebuild_check(Font_Entry *fe, Font_Hint_Flags hints)
gl->base.bitmap.rows = gd->rows;
gl->base.bitmap.width = gd->width;
gl->base.bitmap.pitch = gd->pitch;
gl->base.bitmap.buffer = (unsigned char *)
fe->map->mempool.data + gl->offset;
gl->base.bitmap.num_grays = gd->num_grays;
gl->base.bitmap.pixel_mode = gd->pixel_mode;
gl->base.bitmap.buffer = NULL;
gl->base.rle = (unsigned char *)
fe->map->mempool.data + gl->offset;
gl->base.rle_size = gl->size;
gl->base.bitmap.rle_alloc = EINA_FALSE;
gl->idx = gd->index;
gl->rid = 0;
@ -2062,7 +2063,7 @@ _glyph_request_cb(void *data, const void *msg, int size)
{
string_t shm_id;
unsigned int idx, offset, glsize, hints;
int rows, width, pitch, num_grays, pixel_mode;
int rows, width, pitch;
CS_Glyph_Out *gl;
pos = buf - (const char*) resp;
@ -2083,10 +2084,6 @@ _glyph_request_cb(void *data, const void *msg, int size)
buf += sizeof(int);
memcpy(&pitch, buf, sizeof(int));
buf += sizeof(int);
memcpy(&num_grays, buf, sizeof(int));
buf += sizeof(int);
memcpy(&pixel_mode, buf, sizeof(int));
buf += sizeof(int);
memcpy(&hints, buf, sizeof(int));
buf += sizeof(int);
if (hints != grd->hints)
@ -2112,10 +2109,12 @@ _glyph_request_cb(void *data, const void *msg, int size)
gl->base.bitmap.rows = rows;
gl->base.bitmap.width = width;
gl->base.bitmap.pitch = pitch;
gl->base.bitmap.buffer =
gl->base.bitmap.buffer = NULL;
gl->base.bitmap.rle_alloc = 0;
gl->base.bitmap.no_free_glout = 1;
gl->base.rle =
(unsigned char *) gl->map->mempool.data + gl->offset;
gl->base.bitmap.num_grays = num_grays;
gl->base.bitmap.pixel_mode = pixel_mode;
gl->base.rle_size = gl->size;
gl->rid = 0;
if (!eina_clist_element_is_linked(&gl->map_entry))

View File

@ -968,14 +968,16 @@ struct _RGBA_Font_Source
*/
struct _RGBA_Font_Glyph_Out
{
unsigned char *rle;
struct {
int rows;
int width;
int pitch;
unsigned char *buffer;
short num_grays;
char pixel_mode;
unsigned short rows;
unsigned short width;
unsigned short pitch;
unsigned short rle_alloc : 1;
unsigned short no_free_glout : 1;
} bitmap;
int rle_size;
};
struct _RGBA_Font_Glyph
@ -986,7 +988,6 @@ struct _RGBA_Font_Glyph
Evas_Coord y_bear;
FT_Glyph glyph;
RGBA_Font_Glyph_Out *glyph_out;
void (*glyph_out_free)(void *);
/* this is a problem - only 1 engine at a time can extend such a font... grrr */
void *ext_dat;
void (*ext_dat_free) (void *ext_dat);

View File

@ -5,10 +5,8 @@ evas_gl_font_texture_new(void *context, RGBA_Font_Glyph *fg)
{
Evas_Engine_GL_Context *gc = context;
Evas_GL_Texture *tex;
DATA8 *data;
int w, h, j, nw;
DATA8 *ndata;
int fh;
int w, h, j, nw, fh, x, y;
DATA8 *ndata, *data, *p1, *p2;
if (fg->ext_dat) return fg->ext_dat; // FIXME: one engine at a time can do this :(
@ -16,80 +14,38 @@ evas_gl_font_texture_new(void *context, RGBA_Font_Glyph *fg)
h = fg->glyph_out->bitmap.rows;
if ((w == 0) || (h == 0)) return NULL;
data = fg->glyph_out->bitmap.buffer;
j = fg->glyph_out->bitmap.pitch;
if (!fg->glyph_out->rle) return NULL;
data = evas_common_font_glyph_uncompress(fg, &w, &h);
if (!data) return NULL;
j = w;
if (j < w) j = w;
// expand to 32bit (4 byte) aligned rows for texture upload
nw = ((w + 3) / 4) * 4;
ndata = alloca(nw *h);
if (!ndata) return NULL;
if ((fg->glyph_out->bitmap.num_grays == 256) &&
(fg->glyph_out->bitmap.pixel_mode == FT_PIXEL_MODE_GRAY))
for (y = 0; y < h; y++)
{
int x, y;
DATA8 *p1, *p2;
for (y = 0; y < h; y++)
{
p1 = data + (j * y);
p2 = ndata + (nw * y);
for (x = 0; x < w; x++)
{
*p2 = *p1;
p1++;
p2++;
}
}
p1 = data + (j * y);
p2 = ndata + (nw * y);
for (x = 0; x < w; x++)
{
*p2 = *p1;
p1++;
p2++;
}
}
else
{
DATA8 *tmpbuf = NULL, *dp, *tp, bits;
int bi, bj, end;
const DATA8 bitrepl[2] = {0x0, 0xff};
tmpbuf = alloca(w);
if (tmpbuf)
{
int x, y;
DATA8 *p1, *p2;
for (y = 0; y < h; y++)
{
p1 = tmpbuf;
p2 = ndata + (nw * y);
tp = tmpbuf;
dp = data + (y * fg->glyph_out->bitmap.pitch);
for (bi = 0; bi < w; bi += 8)
{
bits = *dp;
if ((w - bi) < 8) end = w - bi;
else end = 8;
for (bj = 0; bj < end; bj++)
{
*tp = bitrepl[(bits >> (7 - bj)) & 0x1];
tp++;
}
dp++;
}
for (x = 0; x < w; x++)
{
*p2 = *p1;
p1++;
p2++;
}
}
}
}
// fh = h;
fh = fg->fi->max_h;
tex = evas_gl_common_texture_alpha_new(gc, ndata, w, h, fh);
if (!tex) return NULL;
if (!tex) goto done;
tex->sx1 = ((double)(tex->x)) / (double)tex->pt->w;
tex->sy1 = ((double)(tex->y)) / (double)tex->pt->h;
tex->sx2 = ((double)(tex->x + tex->w)) / (double)tex->pt->w;
tex->sy2 = ((double)(tex->y + tex->h)) / (double)tex->pt->h;
tex->fglyph = fg;
gc->font_glyph_textures = eina_list_append(gc->font_glyph_textures, tex);
done:
free(data);
return tex;
}