termptydbl: generate the exact double width test

This based on unicode 13.0:
https://www.unicode.org/reports/tr44/

Code is generated by tools/unicode_dbl_width.py

I'm using switch-case + fall through as I've found it was the best:
f46d550a8b
This commit is contained in:
Boris Faure 2020-07-05 21:40:42 +02:00
parent 9f2f8464f3
commit 8c970b8804
Signed by: borisfaure
GPG Key ID: 35C0410516166BE8
3 changed files with 496 additions and 399 deletions

View File

@ -1,396 +1,308 @@
/* XXX: Code generated by tool unicode_dbl_width.py */
#include "private.h"
#include <Elementary.h>
#include "termpty.h"
#include "termptydbl.h"
__attribute__((const))
Eina_Bool
_termpty_is_dblwidth_slow_get(const Termpty *ty, int g)
_termpty_is_ambigous_wide(Eina_Unicode g)
{
// check for east asian full-width (F), half-width (H), wide (W),
// narrow (Na) or ambiguous (A) codepoints
// ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
// emoji should be double since unicode 9 (was single before):
// http://www.unicode.org/emoji/charts/full-emoji-list.html
//
// [ 0x0080 -> 0x02AF] !!! handle carefully **
// [ 0x1DC0 -> 0x1DFF]
// [ 0x1E00 -> 0x1EFF]
// [ 0x2000 -> 0x209F] !!! handle carefully **
// [ 0x20D0 -> 0x214F]
// [ 0x2190 -> 0x23FF]
// [ 0x2460 -> 0x24FF]
// [ 0x2600 -> 0x262F]
// [ 0x2638 -> 0x27EF]
// [ 0x2900 -> 0x29FF]
// [ 0x2B00 -> 0x2BFF] !!! unicode only 2B55 2B50
// [ 0x2C60 -> 0x2C7F]
// [ 0x2E00 -> 0x2E7F]
// [ 0x3000 -> 0x303F] !! not 33D1
// [ 0xA490 -> 0xA4CF]
// [0x1F000 -> 0x1F02F]
// [0x1F0A0 -> 0x1F0FF]
// [0x1F100 -> 0x1F64F]
// [0x1F680 -> 0x1F6FF]
// [0x1F910 -> 0x1F96B]
// [0x1F980 -> 0x1F9E0]
//
// ** these range include these odities:
// © (copyright) 00A9
// ® (registered) 00AE
// ‼ (double exclamation) 203C
// ⁉ (exclamation questionmark) 2049
// which should be single width, so ignore them
// (W)
// optimization: only look into more detailed ranges if within larger block
if ((g >= 0x1100) && (g <= 0x3FFFD))
switch (g)
{
if (
// 1XXX
((g >= 0x1100) && (g <= 0x115f)) || // Hangul Jamo
// 2XXX
((g == 0x2329) || (g == 0x232a)) || // <>
((g >= 0x2e80) && (g <= 0x2ffb)) || // Radical supplements
// 3XXX -> A4C6
((g >= 0x3001) && (g <= 0x303f)) || // CJK Symbols and Punctuation
((g >= 0x3041) && (g <= 0x3247)) || // Hiragana, Katakana,
// Bopomoto, Hangul
// Compatibility Jamo, Kanbun,
// Bopomofo Extended, CJK
// Strokes, Katana Phonetic
// Extensions, Enclosed CJK
// Letters and Months
((g >= 0x3250) && (g <= 0x33D0)) || // Enclosed CJK Letters and
// Months, CJK Compatibility
// [ symbols used by "powerline" ]
((g >= 0x33D2) && (g <= 0x4dbf)) || // CJK Compatibility, CJK
// Unified Ideographs
// Extension A, Yijing
// Hexagram Symbols
((g >= 0x4e00) && (g <= 0xa4c6)) || // CJK Unified Ideographs,
// Yi Syllables, Yi Radicals
// aXXX
((g >= 0xa960) && (g <= 0xa97c)) || // Hangul Jamo Extended A
((g >= 0xac00) && (g <= 0xd7a3)) || // Hangul Syllables
// fXXX
((g >= 0xf900) && (g <= 0xfaff)) || // CJK Compatibility Ideographs
((g >= 0xfe10) && (g <= 0xfe19)) || // Vertical Forms
((g >= 0xfe30) && (g <= 0xfe6b)) || // CJK Compatibility Forms,
// Small Forms Variant
// 1XXXX
((g >= 0x1b000) && (g <= 0x1b11e)) || // Kana Supplement, Kana
// Extended A
((g >= 0x1b170) && (g <= 0x1b2fb)) || // Nushu
((g >= 0x1f200) && (g <= 0x1f202)) || // Enclosed Ideographic
// Supplement
((g >= 0x1f210) && (g <= 0x1f265)) || // Enclosed Ideographic
// Supplement
// 2XXXX
((g >= 0x20000) && (g <= 0x2fffd)) || // CJK
// 3XXXX
((g >= 0x30000) && (g <= 0x3fffd)))
case 0xA1: EINA_FALLTHROUGH;
case 0xA4: EINA_FALLTHROUGH;
case 0xA7 ... 0xAA: EINA_FALLTHROUGH;
case 0xAD ... 0xAE: EINA_FALLTHROUGH;
case 0xB0 ... 0xB4: EINA_FALLTHROUGH;
case 0xB6 ... 0xBA: EINA_FALLTHROUGH;
case 0xBC ... 0xBF: EINA_FALLTHROUGH;
case 0xC6: EINA_FALLTHROUGH;
case 0xD0: EINA_FALLTHROUGH;
case 0xD7 ... 0xD8: EINA_FALLTHROUGH;
case 0xDE ... 0xE1: EINA_FALLTHROUGH;
case 0xE6: EINA_FALLTHROUGH;
case 0xE8 ... 0xEA: EINA_FALLTHROUGH;
case 0xEC ... 0xED: EINA_FALLTHROUGH;
case 0xF0: EINA_FALLTHROUGH;
case 0xF2 ... 0xF3: EINA_FALLTHROUGH;
case 0xF7 ... 0xFA: EINA_FALLTHROUGH;
case 0xFC: EINA_FALLTHROUGH;
case 0xFE: EINA_FALLTHROUGH;
case 0x101: EINA_FALLTHROUGH;
case 0x111: EINA_FALLTHROUGH;
case 0x113: EINA_FALLTHROUGH;
case 0x11B: EINA_FALLTHROUGH;
case 0x126 ... 0x127: EINA_FALLTHROUGH;
case 0x12B: EINA_FALLTHROUGH;
case 0x131 ... 0x133: EINA_FALLTHROUGH;
case 0x138: EINA_FALLTHROUGH;
case 0x13F ... 0x142: EINA_FALLTHROUGH;
case 0x144: EINA_FALLTHROUGH;
case 0x148 ... 0x14B: EINA_FALLTHROUGH;
case 0x14D: EINA_FALLTHROUGH;
case 0x152 ... 0x153: EINA_FALLTHROUGH;
case 0x166 ... 0x167: EINA_FALLTHROUGH;
case 0x16B: EINA_FALLTHROUGH;
case 0x1CE: EINA_FALLTHROUGH;
case 0x1D0: EINA_FALLTHROUGH;
case 0x1D2: EINA_FALLTHROUGH;
case 0x1D4: EINA_FALLTHROUGH;
case 0x1D6: EINA_FALLTHROUGH;
case 0x1D8: EINA_FALLTHROUGH;
case 0x1DA: EINA_FALLTHROUGH;
case 0x1DC: EINA_FALLTHROUGH;
case 0x251: EINA_FALLTHROUGH;
case 0x261: EINA_FALLTHROUGH;
case 0x2C4: EINA_FALLTHROUGH;
case 0x2C7: EINA_FALLTHROUGH;
case 0x2C9 ... 0x2CB: EINA_FALLTHROUGH;
case 0x2CD: EINA_FALLTHROUGH;
case 0x2D0: EINA_FALLTHROUGH;
case 0x2D8 ... 0x2DB: EINA_FALLTHROUGH;
case 0x2DD: EINA_FALLTHROUGH;
case 0x2DF: EINA_FALLTHROUGH;
case 0x300 ... 0x36F: EINA_FALLTHROUGH;
case 0x391 ... 0x3A9: EINA_FALLTHROUGH;
case 0x3B1 ... 0x3C1: EINA_FALLTHROUGH;
case 0x3C3 ... 0x3C9: EINA_FALLTHROUGH;
case 0x401: EINA_FALLTHROUGH;
case 0x410 ... 0x44F: EINA_FALLTHROUGH;
case 0x451: EINA_FALLTHROUGH;
case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
case 0x2010: EINA_FALLTHROUGH;
case 0x2013 ... 0x2016: EINA_FALLTHROUGH;
case 0x2018 ... 0x2019: EINA_FALLTHROUGH;
case 0x201C ... 0x201D: EINA_FALLTHROUGH;
case 0x2020 ... 0x2022: EINA_FALLTHROUGH;
case 0x2024 ... 0x2027: EINA_FALLTHROUGH;
case 0x2030: EINA_FALLTHROUGH;
case 0x2032 ... 0x2033: EINA_FALLTHROUGH;
case 0x2035: EINA_FALLTHROUGH;
case 0x203B ... 0x203C: EINA_FALLTHROUGH;
case 0x203E: EINA_FALLTHROUGH;
case 0x2049: EINA_FALLTHROUGH;
case 0x2074: EINA_FALLTHROUGH;
case 0x207F: EINA_FALLTHROUGH;
case 0x2081 ... 0x2084: EINA_FALLTHROUGH;
case 0x20AC: EINA_FALLTHROUGH;
case 0x2103: EINA_FALLTHROUGH;
case 0x2105: EINA_FALLTHROUGH;
case 0x2109: EINA_FALLTHROUGH;
case 0x2113: EINA_FALLTHROUGH;
case 0x2116: EINA_FALLTHROUGH;
case 0x2121 ... 0x2122: EINA_FALLTHROUGH;
case 0x2126: EINA_FALLTHROUGH;
case 0x212B: EINA_FALLTHROUGH;
case 0x2139: EINA_FALLTHROUGH;
case 0x2153 ... 0x2154: EINA_FALLTHROUGH;
case 0x215B ... 0x215E: EINA_FALLTHROUGH;
case 0x2160 ... 0x216B: EINA_FALLTHROUGH;
case 0x2170 ... 0x2179: EINA_FALLTHROUGH;
case 0x2189: EINA_FALLTHROUGH;
case 0x2190 ... 0x2199: EINA_FALLTHROUGH;
case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
case 0x21B8 ... 0x21B9: EINA_FALLTHROUGH;
case 0x21D2: EINA_FALLTHROUGH;
case 0x21D4: EINA_FALLTHROUGH;
case 0x21E7: EINA_FALLTHROUGH;
case 0x2200: EINA_FALLTHROUGH;
case 0x2202 ... 0x2203: EINA_FALLTHROUGH;
case 0x2207 ... 0x2208: EINA_FALLTHROUGH;
case 0x220B: EINA_FALLTHROUGH;
case 0x220F: EINA_FALLTHROUGH;
case 0x2211: EINA_FALLTHROUGH;
case 0x2215: EINA_FALLTHROUGH;
case 0x221A: EINA_FALLTHROUGH;
case 0x221D ... 0x2220: EINA_FALLTHROUGH;
case 0x2223: EINA_FALLTHROUGH;
case 0x2225: EINA_FALLTHROUGH;
case 0x2227 ... 0x222C: EINA_FALLTHROUGH;
case 0x222E: EINA_FALLTHROUGH;
case 0x2234 ... 0x2237: EINA_FALLTHROUGH;
case 0x223C ... 0x223D: EINA_FALLTHROUGH;
case 0x2248: EINA_FALLTHROUGH;
case 0x224C: EINA_FALLTHROUGH;
case 0x2252: EINA_FALLTHROUGH;
case 0x2260 ... 0x2261: EINA_FALLTHROUGH;
case 0x2264 ... 0x2267: EINA_FALLTHROUGH;
case 0x226A ... 0x226B: EINA_FALLTHROUGH;
case 0x226E ... 0x226F: EINA_FALLTHROUGH;
case 0x2282 ... 0x2283: EINA_FALLTHROUGH;
case 0x2286 ... 0x2287: EINA_FALLTHROUGH;
case 0x2295: EINA_FALLTHROUGH;
case 0x2299: EINA_FALLTHROUGH;
case 0x22A5: EINA_FALLTHROUGH;
case 0x22BF: EINA_FALLTHROUGH;
case 0x2312: EINA_FALLTHROUGH;
case 0x231A ... 0x231B: EINA_FALLTHROUGH;
case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
case 0x2388: EINA_FALLTHROUGH;
case 0x23CF: EINA_FALLTHROUGH;
case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
case 0x2460 ... 0x24E9: EINA_FALLTHROUGH;
case 0x24EB ... 0x254B: EINA_FALLTHROUGH;
case 0x2550 ... 0x2573: EINA_FALLTHROUGH;
case 0x2580 ... 0x258F: EINA_FALLTHROUGH;
case 0x2592 ... 0x2595: EINA_FALLTHROUGH;
case 0x25A0 ... 0x25A1: EINA_FALLTHROUGH;
case 0x25A3 ... 0x25AB: EINA_FALLTHROUGH;
case 0x25B2 ... 0x25B3: EINA_FALLTHROUGH;
case 0x25B6 ... 0x25B7: EINA_FALLTHROUGH;
case 0x25BC ... 0x25BD: EINA_FALLTHROUGH;
case 0x25C0 ... 0x25C1: EINA_FALLTHROUGH;
case 0x25C6 ... 0x25C8: EINA_FALLTHROUGH;
case 0x25CB: EINA_FALLTHROUGH;
case 0x25CE ... 0x25D1: EINA_FALLTHROUGH;
case 0x25E2 ... 0x25E5: EINA_FALLTHROUGH;
case 0x25EF: EINA_FALLTHROUGH;
case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
case 0x2600 ... 0x2612: EINA_FALLTHROUGH;
case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
case 0x2714: EINA_FALLTHROUGH;
case 0x2716: EINA_FALLTHROUGH;
case 0x271D: EINA_FALLTHROUGH;
case 0x2721: EINA_FALLTHROUGH;
case 0x2728: EINA_FALLTHROUGH;
case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
case 0x273D: EINA_FALLTHROUGH;
case 0x2744: EINA_FALLTHROUGH;
case 0x2747: EINA_FALLTHROUGH;
case 0x274C: EINA_FALLTHROUGH;
case 0x274E: EINA_FALLTHROUGH;
case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
case 0x2757: EINA_FALLTHROUGH;
case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
case 0x2776 ... 0x277F: EINA_FALLTHROUGH;
case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
case 0x27A1: EINA_FALLTHROUGH;
case 0x27B0: EINA_FALLTHROUGH;
case 0x27BF: EINA_FALLTHROUGH;
case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
case 0x2B50: EINA_FALLTHROUGH;
case 0x2B55 ... 0x2B59: EINA_FALLTHROUGH;
case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
case 0x3041 ... 0x4DBF: EINA_FALLTHROUGH;
case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
case 0xFE00 ... 0xFE19: EINA_FALLTHROUGH;
case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
case 0xFFFD: EINA_FALLTHROUGH;
case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
case 0x1F000 ... 0x1F10A: EINA_FALLTHROUGH;
case 0x1F10D ... 0x1F12D: EINA_FALLTHROUGH;
case 0x1F12F ... 0x1F169: EINA_FALLTHROUGH;
case 0x1F16C ... 0x1F1AD: EINA_FALLTHROUGH;
case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
case 0x20000 ... 0x3134A: EINA_FALLTHROUGH;
case 0xE0100 ... 0xE01EF:
return EINA_TRUE;
}
return EINA_FALSE;
}
__attribute__((const))
Eina_Bool
_termpty_is_wide(Eina_Unicode g)
{
switch (g)
{
case 0xA9: EINA_FALLTHROUGH;
case 0xAE: EINA_FALLTHROUGH;
case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
case 0x203C: EINA_FALLTHROUGH;
case 0x2049: EINA_FALLTHROUGH;
case 0x2122: EINA_FALLTHROUGH;
case 0x2139: EINA_FALLTHROUGH;
case 0x2194 ... 0x2199: EINA_FALLTHROUGH;
case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
case 0x231A ... 0x231B: EINA_FALLTHROUGH;
case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
case 0x2388: EINA_FALLTHROUGH;
case 0x23CF: EINA_FALLTHROUGH;
case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
case 0x24C2: EINA_FALLTHROUGH;
case 0x25AA ... 0x25AB: EINA_FALLTHROUGH;
case 0x25B6: EINA_FALLTHROUGH;
case 0x25C0: EINA_FALLTHROUGH;
case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
case 0x2600 ... 0x2605: EINA_FALLTHROUGH;
case 0x2607 ... 0x2612: EINA_FALLTHROUGH;
case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
case 0x2714: EINA_FALLTHROUGH;
case 0x2716: EINA_FALLTHROUGH;
case 0x271D: EINA_FALLTHROUGH;
case 0x2721: EINA_FALLTHROUGH;
case 0x2728: EINA_FALLTHROUGH;
case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
case 0x2744: EINA_FALLTHROUGH;
case 0x2747: EINA_FALLTHROUGH;
case 0x274C: EINA_FALLTHROUGH;
case 0x274E: EINA_FALLTHROUGH;
case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
case 0x2757: EINA_FALLTHROUGH;
case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
case 0x27A1: EINA_FALLTHROUGH;
case 0x27B0: EINA_FALLTHROUGH;
case 0x27BF: EINA_FALLTHROUGH;
case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
case 0x2B50: EINA_FALLTHROUGH;
case 0x2B55: EINA_FALLTHROUGH;
case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
case 0x3041 ... 0x3247: EINA_FALLTHROUGH;
case 0x3250 ... 0x4DBF: EINA_FALLTHROUGH;
case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
case 0xFE10 ... 0xFE19: EINA_FALLTHROUGH;
case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
case 0x1F000 ... 0x1F0F5: EINA_FALLTHROUGH;
case 0x1F10D ... 0x1F10F: EINA_FALLTHROUGH;
case 0x1F12F: EINA_FALLTHROUGH;
case 0x1F16C ... 0x1F171: EINA_FALLTHROUGH;
case 0x1F17E ... 0x1F17F: EINA_FALLTHROUGH;
case 0x1F18E: EINA_FALLTHROUGH;
case 0x1F191 ... 0x1F19A: EINA_FALLTHROUGH;
case 0x1F1AD: EINA_FALLTHROUGH;
case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
case 0x20000 ... 0x3134A:
return EINA_TRUE;
}
if (ty->config->emoji_dbl_width && ((g >= 0x1f004) && (g <= 0x1f9c0)))
{
/* Taken from
* https://github.com/ridiculousfish/widecharwidth/blob/master/widechar_width.h
*/
const uint16_t u = (g & 0xfff);
if ( (u == 0x004) ||
(u == 0x0cf) ||
((u >= 0x170) && (u <= 0x171)) ||
((u >= 0x17e) && (u <= 0x17f)) ||
(u == 0x18e) ||
((u >= 0x191) && (u <= 0x19a)) ||
((u >= 0x1e6) && (u <= 0x1ff)) ||
((u >= 0x201) && (u < 0x202)) ||
(u == 0x21a) ||
(u == 0x22f) ||
((u >= 0x232) && (u <= 0x23a)) ||
((u >= 0x250) && (u <= 0x251)) ||
((u >= 0x300) && (u <= 0x321)) ||
((u >= 0x324) && (u <= 0x393)) ||
((u >= 0x396) && (u <= 0x397)) ||
((u >= 0x399) && (u <= 0x39B)) ||
((u >= 0x39E) && (u <= 0x3F0)) ||
((u >= 0x3F3) && (u <= 0x3F5)) ||
((u >= 0x3F7) && (u <= 0x4FD)) ||
((u >= 0x4FF) && (u <= 0x53D)) ||
((u >= 0x549) && (u <= 0x54E)) ||
((u >= 0x550) && (u <= 0x567)) ||
((u >= 0x56F) && (u <= 0x570)) ||
((u >= 0x573) && (u <= 0x579)) ||
(u == 0x587) ||
((u >= 0x58A) && (u <= 0x58D)) ||
(u == 0x590) ||
((u >= 0x595) && (u <= 0x596)) ||
(u == 0x5A5) ||
(u == 0x5A8) ||
((u >= 0x5B1) && (u <= 0x5B2)) ||
(u == 0x5BC) ||
((u >= 0x5C2) && (u <= 0x5C4)) ||
((u >= 0x5D1) && (u <= 0x5D3)) ||
((u >= 0x5DC) && (u <= 0x5DE)) ||
(u == 0x5E1) ||
(u == 0x5E3) ||
(u == 0x5E8) ||
(u == 0x5EF) ||
(u == 0x5F3) ||
((u >= 0x5FA) && (u <= 0x64F)) ||
((u >= 0x680) && (u <= 0x6C5)) ||
((u >= 0x6CB) && (u <= 0x6D0)) ||
((u >= 0x6E0) && (u <= 0x6E5)) ||
(u == 0x6E9) ||
((u >= 0x6EB) && (u <= 0x6EC)) ||
(u == 0x6F0) ||
(u == 0x6F3) ||
((u >= 0x910) && (u <= 0x918)) ||
((u >= 0x980) && (u <= 0x984)) ||
(u == 0x9C0)
)
return EINA_TRUE;
}
// FIXME: can optimize by breaking into tree and ranges
// (A)
if (ty->termstate.cjk_ambiguous_wide)
{
// grep ';A #' EastAsianWidth.txt | wc -l
// :(
if (
// aX
(((g >> 4) == 0xa) &&
(
(g == 0x00a1) ||
(g == 0x00a4) ||
((g >= 0x00a7) && (g <= 0x00a8)) ||
(g == 0x00aa) ||
((g >= 0x00ad) && (g <= 0x00ae)))) ||
// bX
(((g >> 4) == 0xb) &&
(((g >= 0x00b0) && (g <= 0x00bf)))) ||
// cX
(((g >> 4) == 0xc) &&
((g == 0x00c6))) ||
// dX
(((g >> 4) == 0xd) &&
(
(g == 0x00d0) ||
((g >= 0x00d7) && (g <= 0x00d8)) ||
((g >= 0x00de) && (g <= 0x00df)))) ||
// eX
(((g >> 4) == 0xe) &&
(
(g == 0x00e0) ||
(g == 0x00e1) ||
(g == 0x00e6) ||
((g >= 0x00e8) && (g <= 0x00e9)) ||
(g == 0x00ea) ||
((g >= 0x00ec) && (g <= 0x00ed)))) ||
// fX
(((g >> 4) == 0xf) &&
(
(g == 0x00f0) ||
((g >= 0x00f2) && (g <= 0x00f3)) ||
((g >= 0x00f7) && (g <= 0x00f9)) ||
(g == 0x00fa) ||
(g == 0x00fc) ||
(g == 0x00fe))) ||
// 1XX
(((g >> 8) == 0x1) &&
(
(g == 0x0101) ||
(g == 0x0111) ||
(g == 0x0113) ||
(g == 0x011b) ||
((g >= 0x0126) && (g <= 0x0127)) ||
(g == 0x012b) ||
((g >= 0x0131) && (g <= 0x0133)) ||
(g == 0x0138) ||
((g >= 0x013f) && (g <= 0x0142)) ||
(g == 0x0144) ||
((g >= 0x0148) && (g <= 0x014b)) ||
(g == 0x014d) ||
((g >= 0x0152) && (g <= 0x0153)) ||
((g >= 0x0166) && (g <= 0x0167)) ||
(g == 0x016b) ||
(g == 0x01ce) ||
(g == 0x01d0) ||
(g == 0x01d2) ||
(g == 0x01d4) ||
(g == 0x01d6) ||
(g == 0x01d8) ||
(g == 0x01da) ||
(g == 0x01dc))) ||
// 2XX
(((g >> 8) == 0x2) &&
(
(g == 0x0251) ||
(g == 0x0261) ||
(g == 0x02c4) ||
(g == 0x02c7) ||
(g == 0x02c9) ||
((g >= 0x02ca) && (g <= 0x02cb)) ||
(g == 0x02cd) ||
(g == 0x02d0) ||
((g >= 0x02d8) && (g <= 0x02d9)) ||
((g >= 0x02da) && (g <= 0x02db)) ||
(g == 0x02dd) ||
(g == 0x02df))) ||
// 3XX
(((g >> 8) == 0x3) &&
(
((g >= 0x0300) && (g <= 0x036f)) ||
((g >= 0x0391) && (g <= 0x03c9)))) ||
// 4XX
(((g >> 8) == 0x4) &&
(
(g == 0x0401) ||
((g >= 0x0410) && (g <= 0x044f)) ||
(g == 0x0451))) ||
// 2XXX
(((g >> 12) == 0x2) &&
((((g >> 8) == 0x20) &&
(
(g == 0x2010) ||
((g >= 0x2013) && (g <= 0x2016)) ||
((g >= 0x2018) && (g <= 0x2019)) ||
(g == 0x201c) ||
(g == 0x201d) ||
((g >= 0x2020) && (g <= 0x2022)) ||
((g >= 0x2024) && (g <= 0x2027)) ||
(g == 0x2030) ||
((g >= 0x2032) && (g <= 0x2033)) ||
(g == 0x2035) ||
(g == 0x203b) ||
(g == 0x203e) ||
(g == 0x2074) ||
(g == 0x207f) ||
((g >= 0x2081) && (g <= 0x2084)) ||
(g == 0x20ac))) ||
(((g >> 8) == 0x21) &&
(
(g == 0x2103) ||
(g == 0x2105) ||
(g == 0x2109) ||
(g == 0x2113) ||
(g == 0x2116) ||
((g >= 0x2121) && (g <= 0x2122)) ||
(g == 0x2126) ||
(g == 0x212b) ||
((g >= 0x2153) && (g <= 0x2154)) ||
((g >= 0x215b) && (g <= 0x215e)) ||
((g >= 0x2160) && (g <= 0x216b)) ||
((g >= 0x2170) && (g <= 0x2179)) ||
((g >= 0x2189) && (g <= 0x2199)) ||
((g >= 0x21b8) && (g <= 0x21b9)) ||
(g == 0x21d2) ||
(g == 0x21d4) ||
(g == 0x21e7))) ||
(((g >> 8) == 0x22) &&
(
(g == 0x2200) ||
((g >= 0x2202) && (g <= 0x2203)) ||
((g >= 0x2207) && (g <= 0x2208)) ||
(g == 0x220b) ||
(g == 0x220f) ||
(g == 0x2211) ||
(g == 0x2215) ||
(g == 0x221a) ||
((g >= 0x221d) && (g <= 0x221f)) ||
(g == 0x2220) ||
(g == 0x2223) ||
(g == 0x2225) ||
((g >= 0x2227) && (g <= 0x222e)) ||
((g >= 0x2234) && (g <= 0x2237)) ||
((g >= 0x223c) && (g <= 0x223d)) ||
(g == 0x2248) ||
(g == 0x224c) ||
(g == 0x2252) ||
((g >= 0x2260) && (g <= 0x2261)) ||
((g >= 0x2264) && (g <= 0x2267)) ||
((g >= 0x226a) && (g <= 0x226b)) ||
((g >= 0x226e) && (g <= 0x226f)) ||
((g >= 0x2282) && (g <= 0x2283)) ||
((g >= 0x2286) && (g <= 0x2287)) ||
(g == 0x2295) ||
(g == 0x2299) ||
(g == 0x22a5) ||
(g == 0x22bf))) ||
(((g >> 8) == 0x23) &&
((g == 0x2312))) ||
((((g >> 8) == 0x24) || ((g >> 8) == 0x25)) &&
(((g >= 0x2460) && (g <= 0x2595)))) ||
(((g >> 8) == 0x25) &&
(
((g >= 0x25a0) && (g <= 0x25bd)) ||
((g >= 0x25c0) && (g <= 0x25c1)) ||
((g >= 0x25c6) && (g <= 0x25c7)) ||
(g == 0x25c8) ||
(g == 0x25cb) ||
((g >= 0x25ce) && (g <= 0x25cf)) ||
((g >= 0x25d0) && (g <= 0x25d1)) ||
((g >= 0x25e2) && (g <= 0x25e3)) ||
((g >= 0x25e4) && (g <= 0x25e5)) ||
(g == 0x25ef))) ||
(((g >> 8) == 0x26) &&
(
((g >= 0x2605) && (g <= 0x2606)) ||
(g == 0x2609) ||
((g >= 0x260e) && (g <= 0x260f)) ||
((g >= 0x2614) && (g <= 0x2615)) ||
(g == 0x261c) ||
(g == 0x261e) ||
(g == 0x2640) ||
(g == 0x2642) ||
((g >= 0x2660) && (g <= 0x2661)) ||
((g >= 0x2663) && (g <= 0x2665)) ||
((g >= 0x2667) && (g <= 0x266a)) ||
((g >= 0x266c) && (g <= 0x266d)) ||
(g == 0x266f) ||
((g >= 0x269e) && (g <= 0x269f)) ||
((g >= 0x26be) && (g <= 0x26bf)) ||
((g >= 0x26c4) && (g <= 0x26cd)) ||
(g == 0x26cf) ||
((g >= 0x26d0) && (g <= 0x26e1)) ||
(g == 0x26e3) ||
((g >= 0x26e8) && (g <= 0x26ff)))) ||
(((g >> 8) == 0x27) &&
(
(g == 0x273d) ||
(g == 0x2757) ||
((g >= 0x2776) && (g <= 0x277f)))) ||
(((g >> 8) == 0x2b) &&
(((g >= 0x2b55) && (g <= 0x2b59)))))) ||
// 3XXX
(((g >> 12) == 0x3) &&
(((g >= 0x3248) && (g <= 0x324f)))) ||
// fXXX
(((g >> 12) == 0xf) &&
(
((g >= 0xfe00) && (g <= 0xfe0f)) ||
(g == 0xfffd))) ||
// 1XXXX
(((g >> 16) == 0x1) &&
(
((g >= 0x1f100) && (g <= 0x1f12d)) ||
((g >= 0x1f130) && (g <= 0x1f169)) ||
((g >= 0x1f170) && (g <= 0x1f19a)))) ||
// eXXXX
(((g >> 16) == 0xe) &&
(((g >= 0xe0100) && (g <= 0xe01ef)))) ||
// fXXXX
(((g >> 16) == 0xf) &&
(((g >= 0xf0000) && (g <= 0xffffd)))) ||
// 1XXXXX
(((g >> 24) == 0x1) &&
(((g >= 0x100000) && (g <= 0x10fffd)))))
return EINA_TRUE;
}
// Na, H -> not checked
return EINA_FALSE;
}

View File

@ -1,25 +1,20 @@
/* XXX: Code generated by tool unicode_dbl_width.py */
#ifndef _TERMPTY_DBL_H__
#define _TERMPTY_DBL_H__ 1
Eina_Bool _termpty_is_dblwidth_slow_get(const Termpty *ty, int g);
Eina_Bool _termpty_is_wide(const Eina_Unicode g);
Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
static inline Eina_Bool
_termpty_is_dblwidth_get(const Termpty *ty, int g)
_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
{
// check for east asian full-width (F), half-width (H), wide (W),
// narrow (Na) or ambiguous (A) codepoints
// ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
// optimize for latin1 non-ambiguous
if (g <= 0xa0)
/* optimize for latin1 non-ambiguous */
if (g <= 0xA0)
return EINA_FALSE;
// (F)
if ((g == 0x3000) ||
((g >= 0xff01) && (g <= 0xff60)) ||
((g >= 0xffe0) && (g <= 0xffe6)))
return EINA_TRUE;
return _termpty_is_dblwidth_slow_get(ty, g);
if (!ty->termstate.cjk_ambiguous_wide)
return _termpty_is_wide(g);
else
return _termpty_is_ambigous_wide(g);
}
#endif

190
tools/unicode_dbl_width.py Executable file
View File

@ -0,0 +1,190 @@
#!/usr/bin/env python3
"""
Generate src/bin/termptydbl.{c,h} from unicode files
used with ucd.all.flat.xml from
https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip
"""
import argparse
from collections import namedtuple
import xml.etree.ElementTree as ET
Range = namedtuple('range', ['width', 'start', 'end'])
def get_ranges(xmlfile, emoji_as_wide):
tree = ET.parse(xmlfile)
root = tree.getroot()
repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire")
chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char")
ranges = []
range = Range('N', 0, 0)
for c in chars:
ea = c.get('ea')
if ea in ('Na', 'H'):
ea = 'N'
if ea in ('F'):
ea = 'W'
assert ea in ('N', 'A', 'W')
cp = c.get('cp')
if not cp:
continue
if emoji_as_wide:
emoji = c.get('ExtPict')
if emoji == 'Y':
ea = 'W'
cp = int(cp, 16)
if ea != range[0]:
ranges.append(range)
range = Range(ea, cp, cp)
else:
range = range._replace(end=cp)
ranges.append(range)
return ranges
def merge_ranges(ranges, is_same_width):
res = []
range = ranges[0]
for r in ranges:
if is_same_width(r, range):
range = range._replace(end=r.end)
else:
res.append(range)
range = r
res.append(range)
return res
def skip_ranges(ranges, width_skipped):
res = []
for r in ranges:
if r.width not in width_skipped:
res.append(r)
return res
def gen_header(range, file_header):
file_header.write(
"""/* XXX: Code generated by tool unicode_dbl_width.py */
#ifndef _TERMPTY_DBL_H__
#define _TERMPTY_DBL_H__ 1
Eina_Bool _termpty_is_wide(const Eina_Unicode g);
Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
static inline Eina_Bool
_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
{
/* optimize for latin1 non-ambiguous */
""")
file_header.write(f" if (g <= 0x{range.end:X})")
file_header.write(
"""
return EINA_FALSE;
if (!ty->termstate.cjk_ambiguous_wide)
return _termpty_is_wide(g);
else
return _termpty_is_ambigous_wide(g);
}
#endif
""")
def gen_ambigous(ranges, file_source):
file_source.write(
"""
__attribute__((const))
Eina_Bool
_termpty_is_ambigous_wide(Eina_Unicode g)
{
switch (g)
{
""")
def is_same_width(r1, r2):
if r1.width == 'N':
return r2.width == 'N'
else:
return r2.width in ('A', 'W')
ranges = merge_ranges(ranges[1:], is_same_width)
ranges = skip_ranges(ranges, ('N',))
fallthrough = " EINA_FALLTHROUGH;"
for idx, r in enumerate(ranges):
if r.width == 'N':
continue;
if idx == len(ranges) -1:
fallthrough = ""
if r.start == r.end:
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
else:
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
file_source.write(
"""
return EINA_TRUE;
}
return EINA_FALSE;
}
""")
def gen_wide(ranges, file_source):
file_source.write(
"""
__attribute__((const))
Eina_Bool
_termpty_is_wide(Eina_Unicode g)
{
switch (g)
{
""")
def is_same_width(r1, r2):
if r1.width in ('N', 'A'):
return r2.width in ('N', 'A')
else:
return r2.width == 'W'
ranges = merge_ranges(ranges[1:], is_same_width)
ranges = skip_ranges(ranges, ('N', 'A'))
fallthrough = " EINA_FALLTHROUGH;"
for idx, r in enumerate(ranges):
if r.width in ('N', 'A'):
continue;
if idx == len(ranges) -1:
fallthrough = ""
if r.start == r.end:
file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
else:
file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
file_source.write(
"""
return EINA_TRUE;
}
return EINA_FALSE;
}
""")
def gen_c(ranges, file_header, file_source):
gen_header(ranges[0], file_header)
file_source.write(
"""/* XXX: Code generated by tool unicode_dbl_width.py */
#include "private.h"
#include <Elementary.h>
#include "termpty.h"
#include "termptydbl.h"
""")
gen_ambigous(ranges, file_source)
gen_wide(ranges, file_source)
parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.')
parser.add_argument('xml', type=argparse.FileType('r'))
parser.add_argument('header', type=argparse.FileType('w'))
parser.add_argument('source', type=argparse.FileType('w'))
args = parser.parse_args()
ranges = get_ranges(args.xml, True)
gen_c(ranges, args.header, args.source)