try to optimize cell copy

On my stupid benchmark: having tyfuzz parse a 628MB text file (few times
war and peace concatenated) over 10 sessions, it went from 15.2MB/s to
16.8MB/s \o/ That's a 10.5% improvement!
This commit is contained in:
Boris Faure 2017-09-09 00:18:02 +02:00
parent 7c6d581465
commit 0991479fcb
4 changed files with 35 additions and 34 deletions

View File

@ -983,7 +983,7 @@ add_new_ts:
ts = termpty_save_new(ts, w);
if (!ts)
return;
termpty_cell_copy(ty, cells, ts->cells, w);
TERMPTY_CELL_COPY(ty, cells, ts->cells, w);
ty->backpos++;
if (ty->backpos >= ty->backsize)
ty->backpos = 0;
@ -1261,7 +1261,7 @@ _termpty_line_rewrap(Termpty *ty, Termcell *src_cells, int len,
int copy_width = MIN(len, si->w - si->x);
Termcell *dst_cells = &SCREEN_INFO_GET_CELLS(si, si->x, si->y);
termpty_cell_copy(ty,
TERMPTY_CELL_COPY(ty,
/*src*/ src_cells,
/*dst*/ dst_cells,
copy_width);
@ -1560,8 +1560,8 @@ termpty_block_chid_get(const Termpty *ty, const char *chid)
return tb;
}
static void
_handle_block_codepoint_overwrite_heavy(Termpty *ty, int oldc, int newc)
void
termpty_handle_block_codepoint_overwrite_heavy(Termpty *ty, int oldc, int newc)
{
Termblock *tb;
int ido = 0, idn = 0;
@ -1591,26 +1591,6 @@ _handle_block_codepoint_overwrite_heavy(Termpty *ty, int oldc, int newc)
}
}
/* Try to trick the compiler into inlining the first test */
static inline void
_handle_block_codepoint_overwrite(Termpty *ty, Eina_Unicode oldc, Eina_Unicode newc)
{
if (!((oldc | newc) & 0x80000000)) return;
_handle_block_codepoint_overwrite_heavy(ty, oldc, newc);
}
void
termpty_cell_copy(Termpty *ty, Termcell *src, Termcell *dst, int n)
{
int i;
for (i = 0; i < n; i++)
{
_handle_block_codepoint_overwrite(ty, dst[i].codepoint, src[i].codepoint);
dst[i] = src[i];
}
}
void
termpty_screen_swap(Termpty *ty)
{
@ -1640,7 +1620,7 @@ termpty_cell_fill(Termpty *ty, Termcell *src, Termcell *dst, int n)
{
for (i = 0; i < n; i++)
{
_handle_block_codepoint_overwrite(ty, dst[i].codepoint, src[0].codepoint);
HANDLE_BLOCK_CODEPOINT_OVERWRITE(ty, dst[i].codepoint, src[0].codepoint);
dst[i] = src[0];
}
}
@ -1648,7 +1628,7 @@ termpty_cell_fill(Termpty *ty, Termcell *src, Termcell *dst, int n)
{
for (i = 0; i < n; i++)
{
_handle_block_codepoint_overwrite(ty, dst[i].codepoint, 0);
HANDLE_BLOCK_CODEPOINT_OVERWRITE(ty, dst[i].codepoint, 0);
memset(&(dst[i]), 0, sizeof(*dst));
}
}
@ -1661,7 +1641,7 @@ termpty_cells_set_content(Termpty *ty, Termcell *cells,
int i;
for (i = 0; i < count; i++)
{
_handle_block_codepoint_overwrite(ty, cells[i].codepoint, codepoint);
HANDLE_BLOCK_CODEPOINT_OVERWRITE(ty, cells[i].codepoint, codepoint);
cells[i].codepoint = codepoint;
}
}
@ -1676,7 +1656,7 @@ termpty_cells_att_fill_preserve_colors(Termpty *ty, Termcell *cells,
for (i = 0; i < count; i++)
{
Termatt att = cells[i].att;
_handle_block_codepoint_overwrite(ty, cells[i].codepoint, codepoint);
HANDLE_BLOCK_CODEPOINT_OVERWRITE(ty, cells[i].codepoint, codepoint);
cells[i] = local;
if (ty->termstate.att.fg == 0 && ty->termstate.att.bg == 0)
{
@ -1701,7 +1681,7 @@ termpty_cell_codepoint_att_fill(Termpty *ty, Eina_Unicode codepoint,
for (i = 0; i < n; i++)
{
_handle_block_codepoint_overwrite(ty, dst[i].codepoint, codepoint);
HANDLE_BLOCK_CODEPOINT_OVERWRITE(ty, dst[i].codepoint, codepoint);
dst[i] = local;
}
}

View File

@ -263,7 +263,6 @@ Termblock *termpty_block_get(const Termpty *ty, int id);
void termpty_block_chid_update(Termpty *ty, Termblock *blk);
Termblock *termpty_block_chid_get(const Termpty *ty, const char *chid);
void termpty_cell_copy(Termpty *ty, Termcell *src, Termcell *dst, int n);
void termpty_cell_fill(Termpty *ty, Termcell *src, Termcell *dst, int n);
void termpty_cell_codepoint_att_fill(Termpty *ty, Eina_Unicode codepoint, Termatt att, Termcell *dst, int n);
void termpty_cells_set_content(Termpty *ty, Termcell *cells,
@ -274,6 +273,7 @@ ssize_t termpty_line_length(const Termcell *cells, ssize_t nb_cells);
Config *termpty_config_get(const Termpty *ty);
void termpty_handle_buf(Termpty *ty, const Eina_Unicode *codepoints, int len);
void termpty_handle_block_codepoint_overwrite_heavy(Termpty *ty, int oldc, int newc);
extern int _termpty_log_dom;
@ -288,4 +288,25 @@ extern int _termpty_log_dom;
Field = Min; \
} while (0)
/* Try to trick the compiler into inlining the first test */
#define HANDLE_BLOCK_CODEPOINT_OVERWRITE(Tpty, OLDC, NEWC) \
do { \
if (EINA_UNLIKELY((OLDC | NEWC) & 0x80000000)) \
termpty_handle_block_codepoint_overwrite_heavy(Tpty, OLDC, NEWC); \
} while (0)
#define TERMPTY_CELL_COPY(Tpty, Tsrc, Tdst, N) \
do { \
int __i; \
\
for (__i = 0; __i < N; __i++) \
{ \
HANDLE_BLOCK_CODEPOINT_OVERWRITE(Tpty, \
(Tdst)[__i].codepoint, \
(Tsrc)[__i].codepoint); \
} \
memcpy(Tdst, Tsrc, N * sizeof(Termcell)); \
} while (0)
#endif

View File

@ -1464,7 +1464,7 @@ CUF:
for (x = ty->cursor_state.cx; x < (ty->w); x++)
{
if (x < lim)
termpty_cell_copy(ty, &(cells[x + arg]), &(cells[x]), 1);
TERMPTY_CELL_COPY(ty, &(cells[x + arg]), &(cells[x]), 1);
else
{
cells[x].codepoint = ' ';

View File

@ -83,7 +83,7 @@ termpty_text_scroll(Termpty *ty, Eina_Bool clear)
{
cells = &(TERMPTY_SCREEN(ty, x, (y + 1)));
cells2 = &(TERMPTY_SCREEN(ty, x, y));
termpty_cell_copy(ty, cells, cells2, w);
TERMPTY_CELL_COPY(ty, cells, cells2, w);
}
if (clear)
termpty_cells_clear(ty, cells, w);
@ -122,7 +122,7 @@ termpty_text_scroll_rev(Termpty *ty, Eina_Bool clear)
{
cells = &(TERMPTY_SCREEN(ty, 0, (y - 1)));
cells2 = &(TERMPTY_SCREEN(ty, 0, y));
termpty_cell_copy(ty, cells, cells2, ty->w);
TERMPTY_CELL_COPY(ty, cells, cells2, ty->w);
}
if (clear)
termpty_cells_clear(ty, cells, ty->w);
@ -198,7 +198,7 @@ termpty_text_append(Termpty *ty, const Eina_Unicode *codepoints, int len)
if (ty->termstate.insert)
{
for (j = max_right-1; j > ty->cursor_state.cx; j--)
termpty_cell_copy(ty, &(cells[j - 1]), &(cells[j]), 1);
TERMPTY_CELL_COPY(ty, &(cells[j - 1]), &(cells[j]), 1);
}
g = _termpty_charset_trans(ty, codepoints[i]);