forked from enlightenment/terminology
tools: allow user to decide whether emoji are double width
This commit is contained in:
parent
287bf5ab6e
commit
f3066b1935
|
@ -12,14 +12,16 @@ import xml.etree.ElementTree as ET
|
|||
|
||||
URange = namedtuple('unicode_range', ['width', 'start', 'end'])
|
||||
|
||||
def get_ranges(xmlfile, emoji_as_wide):
|
||||
def get_ranges(xmlfile):
|
||||
tree = ET.parse(xmlfile)
|
||||
root = tree.getroot()
|
||||
repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire")
|
||||
chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char")
|
||||
|
||||
ranges = []
|
||||
r = URange('N', 0, 0)
|
||||
ranges_basic = []
|
||||
ranges_emoji_double = []
|
||||
r_basic = URange('N', 0, 0)
|
||||
r_emoji_dbl = URange('N', 0, 0)
|
||||
for c in chars:
|
||||
ea = c.get('ea')
|
||||
if ea in ('Na', 'H'):
|
||||
|
@ -29,26 +31,35 @@ def get_ranges(xmlfile, emoji_as_wide):
|
|||
cp = c.get('cp')
|
||||
if not cp:
|
||||
continue
|
||||
if emoji_as_wide:
|
||||
ext_pic = c.get('ExtPict')
|
||||
emoji = c.get('Emoji')
|
||||
if emoji == 'Y' and ext_pic == 'Y' and ea != 'A':
|
||||
ea = 'W'
|
||||
else:
|
||||
blk = c.get('blk')
|
||||
if blk == 'Misc_Pictographs':
|
||||
ea = 'W'
|
||||
|
||||
cp = int(cp, 16)
|
||||
if ea != r[0]:
|
||||
ranges.append(r)
|
||||
r = URange(ea, cp, cp)
|
||||
|
||||
# basic
|
||||
if ea != r_basic[0]:
|
||||
ranges_basic.append(r_basic)
|
||||
r_basic = URange(ea, cp, cp)
|
||||
else:
|
||||
r = r._replace(end=cp)
|
||||
r_basic = r_basic._replace(end=cp)
|
||||
|
||||
ranges.append(r)
|
||||
# emoji as wide
|
||||
ext_pic = c.get('ExtPict')
|
||||
emoji = c.get('Emoji')
|
||||
if emoji == 'Y' and ext_pic == 'Y' and ea != 'A':
|
||||
ea = 'W'
|
||||
else:
|
||||
blk = c.get('blk')
|
||||
if blk == 'Misc_Pictographs':
|
||||
ea = 'W'
|
||||
if ea != r_emoji_dbl[0]:
|
||||
ranges_emoji_double.append(r_emoji_dbl)
|
||||
r_emoji_dbl = URange(ea, cp, cp)
|
||||
else:
|
||||
r_emoji_dbl = r_emoji_dbl._replace(end=cp)
|
||||
|
||||
ranges_basic.append(r_basic)
|
||||
ranges_emoji_double.append(r_emoji_dbl)
|
||||
|
||||
return (ranges_basic, ranges_emoji_double)
|
||||
|
||||
return ranges
|
||||
|
||||
def merge_ranges(ranges, is_same_width):
|
||||
res = []
|
||||
|
@ -69,109 +80,138 @@ def skip_ranges(ranges, width_skipped):
|
|||
res.append(r)
|
||||
return res
|
||||
|
||||
def gen_header(cur_range, file_header):
|
||||
def gen_header(mininum_codepoint, file_header):
|
||||
file_header.write(
|
||||
"""/* XXX: Code generated by tool unicode_dbl_width.py */
|
||||
#ifndef TERMINOLOGY_TERMPTY_DBL_H_
|
||||
#define TERMINOLOGY_TERMPTY_DBL_H_ 1
|
||||
|
||||
Eina_Bool _termpty_is_wide(const Eina_Unicode g);
|
||||
Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
|
||||
Eina_Bool _termpty_is_wide(const Eina_Unicode g, Eina_Bool emoji_dbl_width);
|
||||
Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g, Eina_Bool emoji_dbl_width);
|
||||
|
||||
static inline Eina_Bool
|
||||
_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
|
||||
{
|
||||
/* optimize for latin1 non-ambiguous */
|
||||
""")
|
||||
file_header.write(f" if (g <= 0x{cur_range.end:X})")
|
||||
file_header.write(f" if (g <= 0x{mininum_codepoint:X})")
|
||||
file_header.write(
|
||||
"""
|
||||
return EINA_FALSE;
|
||||
if (!ty->termstate.cjk_ambiguous_wide)
|
||||
return _termpty_is_wide(g);
|
||||
return _termpty_is_wide(g, ty->config->emoji_dbl_width);
|
||||
else
|
||||
return _termpty_is_ambigous_wide(g);
|
||||
return _termpty_is_ambigous_wide(g, ty->config->emoji_dbl_width);
|
||||
}
|
||||
|
||||
#endif
|
||||
""")
|
||||
|
||||
def gen_ambigous(ranges, file_source):
|
||||
def gen_ambigous(ranges_basic, ranges_emoji_double, file_source):
|
||||
def handle_ranges(ranges):
|
||||
def is_same_width(r1, r2):
|
||||
if r1.width == 'N':
|
||||
return r2.width == 'N'
|
||||
else:
|
||||
return r2.width in ('A', 'W')
|
||||
ranges = merge_ranges(ranges[1:], is_same_width)
|
||||
ranges = skip_ranges(ranges, ('N',))
|
||||
fallthrough = " EINA_FALLTHROUGH;"
|
||||
for idx, r in enumerate(ranges):
|
||||
if r.width == 'N':
|
||||
continue;
|
||||
if idx == len(ranges) -1:
|
||||
fallthrough = ""
|
||||
if r.start == r.end:
|
||||
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
|
||||
else:
|
||||
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
|
||||
|
||||
file_source.write(
|
||||
"""
|
||||
__attribute__((const))
|
||||
Eina_Bool
|
||||
_termpty_is_ambigous_wide(Eina_Unicode g)
|
||||
_termpty_is_ambigous_wide(Eina_Unicode g, Eina_Bool emoji_dbl_width)
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
if (emoji_dbl_width)
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
""")
|
||||
def is_same_width(r1, r2):
|
||||
if r1.width == 'N':
|
||||
return r2.width == 'N'
|
||||
else:
|
||||
return r2.width in ('A', 'W')
|
||||
ranges = merge_ranges(ranges[1:], is_same_width)
|
||||
ranges = skip_ranges(ranges, ('N',))
|
||||
|
||||
fallthrough = " EINA_FALLTHROUGH;"
|
||||
for idx, r in enumerate(ranges):
|
||||
if r.width == 'N':
|
||||
continue;
|
||||
if idx == len(ranges) -1:
|
||||
fallthrough = ""
|
||||
if r.start == r.end:
|
||||
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
|
||||
else:
|
||||
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
|
||||
|
||||
handle_ranges(ranges_emoji_double)
|
||||
file_source.write(
|
||||
"""
|
||||
return EINA_TRUE;
|
||||
}
|
||||
return EINA_FALSE;
|
||||
}
|
||||
""" return EINA_TRUE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
""")
|
||||
|
||||
def gen_wide(ranges, file_source):
|
||||
handle_ranges(ranges_basic)
|
||||
file_source.write(
|
||||
"""
|
||||
__attribute__((const))
|
||||
Eina_Bool
|
||||
_termpty_is_wide(Eina_Unicode g)
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
""")
|
||||
def is_same_width(r1, r2):
|
||||
if r1.width in ('N', 'A'):
|
||||
return r2.width in ('N', 'A')
|
||||
else:
|
||||
return r2.width == 'W'
|
||||
ranges = merge_ranges(ranges[1:], is_same_width)
|
||||
ranges = skip_ranges(ranges, ('N', 'A'))
|
||||
fallthrough = " EINA_FALLTHROUGH;"
|
||||
for idx, r in enumerate(ranges):
|
||||
if r.width in ('N', 'A'):
|
||||
continue;
|
||||
if idx == len(ranges) -1:
|
||||
fallthrough = ""
|
||||
if r.start == r.end:
|
||||
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
|
||||
else:
|
||||
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
|
||||
|
||||
file_source.write(
|
||||
"""
|
||||
return EINA_TRUE;
|
||||
}
|
||||
""" return EINA_TRUE;
|
||||
}
|
||||
}
|
||||
return EINA_FALSE;
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
def gen_c(ranges, file_header, file_source):
|
||||
gen_header(ranges[0], file_header)
|
||||
def gen_wide(ranges_basic, ranges_emoji_double, file_source):
|
||||
def handle_ranges(ranges):
|
||||
def is_same_width(r1, r2):
|
||||
if r1.width in ('N', 'A'):
|
||||
return r2.width in ('N', 'A')
|
||||
else:
|
||||
return r2.width == 'W'
|
||||
ranges = merge_ranges(ranges[1:], is_same_width)
|
||||
ranges = skip_ranges(ranges, ('N', 'A'))
|
||||
fallthrough = " EINA_FALLTHROUGH;"
|
||||
for idx, r in enumerate(ranges):
|
||||
if r.width in ('N', 'A'):
|
||||
continue;
|
||||
if idx == len(ranges) -1:
|
||||
fallthrough = ""
|
||||
if r.start == r.end:
|
||||
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
|
||||
else:
|
||||
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
|
||||
|
||||
file_source.write(
|
||||
"""
|
||||
__attribute__((const))
|
||||
Eina_Bool
|
||||
_termpty_is_wide(Eina_Unicode g, Eina_Bool emoji_dbl_width)
|
||||
{
|
||||
if (emoji_dbl_width)
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
""")
|
||||
handle_ranges(ranges_emoji_double)
|
||||
file_source.write(
|
||||
""" return EINA_TRUE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (g)
|
||||
{
|
||||
""")
|
||||
handle_ranges(ranges_basic)
|
||||
file_source.write(
|
||||
""" return EINA_TRUE;
|
||||
}
|
||||
}
|
||||
return EINA_FALSE;
|
||||
}
|
||||
""")
|
||||
|
||||
|
||||
def gen_c(ranges_basic, ranges_emoji_double, file_header, file_source):
|
||||
mininum_codepoint = min(ranges_basic[0].end, ranges_emoji_double[0].end)
|
||||
gen_header(mininum_codepoint, file_header)
|
||||
file_source.write(
|
||||
"""/* XXX: Code generated by tool unicode_dbl_width.py */
|
||||
#include "private.h"
|
||||
|
@ -180,8 +220,8 @@ def gen_c(ranges, file_header, file_source):
|
|||
#include "termpty.h"
|
||||
#include "termptydbl.h"
|
||||
""")
|
||||
gen_ambigous(ranges, file_source)
|
||||
gen_wide(ranges, file_source)
|
||||
gen_ambigous(ranges_basic, ranges_emoji_double, file_source)
|
||||
gen_wide(ranges_basic, ranges_emoji_double, file_source)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.')
|
||||
parser.add_argument('xml', type=argparse.FileType('r'))
|
||||
|
@ -190,5 +230,5 @@ parser.add_argument('source', type=argparse.FileType('w'))
|
|||
|
||||
args = parser.parse_args()
|
||||
|
||||
ranges = get_ranges(args.xml, False)
|
||||
gen_c(ranges, args.header, args.source)
|
||||
(ranges_basic, ranges_emoji_double) = get_ranges(args.xml)
|
||||
gen_c(ranges_basic, ranges_emoji_double, args.header, args.source)
|
||||
|
|
Loading…
Reference in New Issue