summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Faure <billiob@gmail.com>2020-07-05 21:40:42 +0200
committerBoris Faure <billiob@gmail.com>2020-07-05 23:34:13 +0200
commit8c970b88047882fe95907c06b08ff31eb2a6cad3 (patch)
tree184c4a18d62a5cb24736792a7c36729baa0c4dec
parent9f2f8464f3b40e14ad5ab657bd2799a6b2291d43 (diff)
termptydbl: generate the exact double width test
This based on unicode 13.0: https://www.unicode.org/reports/tr44/ Code is generated by tools/unicode_dbl_width.py I'm using switch-case + fall through as I've found it was the best: https://github.com/billiob/terminology/commit/f46d550a8b5a8fba1522796c5c7f6a0672070e73
-rw-r--r--src/bin/termptydbl.c676
-rw-r--r--src/bin/termptydbl.h25
-rwxr-xr-xtools/unicode_dbl_width.py190
3 files changed, 494 insertions, 397 deletions
diff --git a/src/bin/termptydbl.c b/src/bin/termptydbl.c
index a4362d7..691c062 100644
--- a/src/bin/termptydbl.c
+++ b/src/bin/termptydbl.c
@@ -1,396 +1,308 @@
1/* XXX: Code generated by tool unicode_dbl_width.py */
1#include "private.h" 2#include "private.h"
2 3
3#include <Elementary.h> 4#include <Elementary.h>
4#include "termpty.h" 5#include "termpty.h"
5#include "termptydbl.h" 6#include "termptydbl.h"
6 7
8__attribute__((const))
7Eina_Bool 9Eina_Bool
8_termpty_is_dblwidth_slow_get(const Termpty *ty, int g) 10_termpty_is_ambigous_wide(Eina_Unicode g)
9{ 11{
10 // check for east asian full-width (F), half-width (H), wide (W), 12 switch (g)
11 // narrow (Na) or ambiguous (A) codepoints 13 {
12 // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt 14 case 0xA1: EINA_FALLTHROUGH;
15 case 0xA4: EINA_FALLTHROUGH;
16 case 0xA7 ... 0xAA: EINA_FALLTHROUGH;
17 case 0xAD ... 0xAE: EINA_FALLTHROUGH;
18 case 0xB0 ... 0xB4: EINA_FALLTHROUGH;
19 case 0xB6 ... 0xBA: EINA_FALLTHROUGH;
20 case 0xBC ... 0xBF: EINA_FALLTHROUGH;
21 case 0xC6: EINA_FALLTHROUGH;
22 case 0xD0: EINA_FALLTHROUGH;
23 case 0xD7 ... 0xD8: EINA_FALLTHROUGH;
24 case 0xDE ... 0xE1: EINA_FALLTHROUGH;
25 case 0xE6: EINA_FALLTHROUGH;
26 case 0xE8 ... 0xEA: EINA_FALLTHROUGH;
27 case 0xEC ... 0xED: EINA_FALLTHROUGH;
28 case 0xF0: EINA_FALLTHROUGH;
29 case 0xF2 ... 0xF3: EINA_FALLTHROUGH;
30 case 0xF7 ... 0xFA: EINA_FALLTHROUGH;
31 case 0xFC: EINA_FALLTHROUGH;
32 case 0xFE: EINA_FALLTHROUGH;
33 case 0x101: EINA_FALLTHROUGH;
34 case 0x111: EINA_FALLTHROUGH;
35 case 0x113: EINA_FALLTHROUGH;
36 case 0x11B: EINA_FALLTHROUGH;
37 case 0x126 ... 0x127: EINA_FALLTHROUGH;
38 case 0x12B: EINA_FALLTHROUGH;
39 case 0x131 ... 0x133: EINA_FALLTHROUGH;
40 case 0x138: EINA_FALLTHROUGH;
41 case 0x13F ... 0x142: EINA_FALLTHROUGH;
42 case 0x144: EINA_FALLTHROUGH;
43 case 0x148 ... 0x14B: EINA_FALLTHROUGH;
44 case 0x14D: EINA_FALLTHROUGH;
45 case 0x152 ... 0x153: EINA_FALLTHROUGH;
46 case 0x166 ... 0x167: EINA_FALLTHROUGH;
47 case 0x16B: EINA_FALLTHROUGH;
48 case 0x1CE: EINA_FALLTHROUGH;
49 case 0x1D0: EINA_FALLTHROUGH;
50 case 0x1D2: EINA_FALLTHROUGH;
51 case 0x1D4: EINA_FALLTHROUGH;
52 case 0x1D6: EINA_FALLTHROUGH;
53 case 0x1D8: EINA_FALLTHROUGH;
54 case 0x1DA: EINA_FALLTHROUGH;
55 case 0x1DC: EINA_FALLTHROUGH;
56 case 0x251: EINA_FALLTHROUGH;
57 case 0x261: EINA_FALLTHROUGH;
58 case 0x2C4: EINA_FALLTHROUGH;
59 case 0x2C7: EINA_FALLTHROUGH;
60 case 0x2C9 ... 0x2CB: EINA_FALLTHROUGH;
61 case 0x2CD: EINA_FALLTHROUGH;
62 case 0x2D0: EINA_FALLTHROUGH;
63 case 0x2D8 ... 0x2DB: EINA_FALLTHROUGH;
64 case 0x2DD: EINA_FALLTHROUGH;
65 case 0x2DF: EINA_FALLTHROUGH;
66 case 0x300 ... 0x36F: EINA_FALLTHROUGH;
67 case 0x391 ... 0x3A9: EINA_FALLTHROUGH;
68 case 0x3B1 ... 0x3C1: EINA_FALLTHROUGH;
69 case 0x3C3 ... 0x3C9: EINA_FALLTHROUGH;
70 case 0x401: EINA_FALLTHROUGH;
71 case 0x410 ... 0x44F: EINA_FALLTHROUGH;
72 case 0x451: EINA_FALLTHROUGH;
73 case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
74 case 0x2010: EINA_FALLTHROUGH;
75 case 0x2013 ... 0x2016: EINA_FALLTHROUGH;
76 case 0x2018 ... 0x2019: EINA_FALLTHROUGH;
77 case 0x201C ... 0x201D: EINA_FALLTHROUGH;
78 case 0x2020 ... 0x2022: EINA_FALLTHROUGH;
79 case 0x2024 ... 0x2027: EINA_FALLTHROUGH;
80 case 0x2030: EINA_FALLTHROUGH;
81 case 0x2032 ... 0x2033: EINA_FALLTHROUGH;
82 case 0x2035: EINA_FALLTHROUGH;
83 case 0x203B ... 0x203C: EINA_FALLTHROUGH;
84 case 0x203E: EINA_FALLTHROUGH;
85 case 0x2049: EINA_FALLTHROUGH;
86 case 0x2074: EINA_FALLTHROUGH;
87 case 0x207F: EINA_FALLTHROUGH;
88 case 0x2081 ... 0x2084: EINA_FALLTHROUGH;
89 case 0x20AC: EINA_FALLTHROUGH;
90 case 0x2103: EINA_FALLTHROUGH;
91 case 0x2105: EINA_FALLTHROUGH;
92 case 0x2109: EINA_FALLTHROUGH;
93 case 0x2113: EINA_FALLTHROUGH;
94 case 0x2116: EINA_FALLTHROUGH;
95 case 0x2121 ... 0x2122: EINA_FALLTHROUGH;
96 case 0x2126: EINA_FALLTHROUGH;
97 case 0x212B: EINA_FALLTHROUGH;
98 case 0x2139: EINA_FALLTHROUGH;
99 case 0x2153 ... 0x2154: EINA_FALLTHROUGH;
100 case 0x215B ... 0x215E: EINA_FALLTHROUGH;
101 case 0x2160 ... 0x216B: EINA_FALLTHROUGH;
102 case 0x2170 ... 0x2179: EINA_FALLTHROUGH;
103 case 0x2189: EINA_FALLTHROUGH;
104 case 0x2190 ... 0x2199: EINA_FALLTHROUGH;
105 case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
106 case 0x21B8 ... 0x21B9: EINA_FALLTHROUGH;
107 case 0x21D2: EINA_FALLTHROUGH;
108 case 0x21D4: EINA_FALLTHROUGH;
109 case 0x21E7: EINA_FALLTHROUGH;
110 case 0x2200: EINA_FALLTHROUGH;
111 case 0x2202 ... 0x2203: EINA_FALLTHROUGH;
112 case 0x2207 ... 0x2208: EINA_FALLTHROUGH;
113 case 0x220B: EINA_FALLTHROUGH;
114 case 0x220F: EINA_FALLTHROUGH;
115 case 0x2211: EINA_FALLTHROUGH;
116 case 0x2215: EINA_FALLTHROUGH;
117 case 0x221A: EINA_FALLTHROUGH;
118 case 0x221D ... 0x2220: EINA_FALLTHROUGH;
119 case 0x2223: EINA_FALLTHROUGH;
120 case 0x2225: EINA_FALLTHROUGH;
121 case 0x2227 ... 0x222C: EINA_FALLTHROUGH;
122 case 0x222E: EINA_FALLTHROUGH;
123 case 0x2234 ... 0x2237: EINA_FALLTHROUGH;
124 case 0x223C ... 0x223D: EINA_FALLTHROUGH;
125 case 0x2248: EINA_FALLTHROUGH;
126 case 0x224C: EINA_FALLTHROUGH;
127 case 0x2252: EINA_FALLTHROUGH;
128 case 0x2260 ... 0x2261: EINA_FALLTHROUGH;
129 case 0x2264 ... 0x2267: EINA_FALLTHROUGH;
130 case 0x226A ... 0x226B: EINA_FALLTHROUGH;
131 case 0x226E ... 0x226F: EINA_FALLTHROUGH;
132 case 0x2282 ... 0x2283: EINA_FALLTHROUGH;
133 case 0x2286 ... 0x2287: EINA_FALLTHROUGH;
134 case 0x2295: EINA_FALLTHROUGH;
135 case 0x2299: EINA_FALLTHROUGH;
136 case 0x22A5: EINA_FALLTHROUGH;
137 case 0x22BF: EINA_FALLTHROUGH;
138 case 0x2312: EINA_FALLTHROUGH;
139 case 0x231A ... 0x231B: EINA_FALLTHROUGH;
140 case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
141 case 0x2388: EINA_FALLTHROUGH;
142 case 0x23CF: EINA_FALLTHROUGH;
143 case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
144 case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
145 case 0x2460 ... 0x24E9: EINA_FALLTHROUGH;
146 case 0x24EB ... 0x254B: EINA_FALLTHROUGH;
147 case 0x2550 ... 0x2573: EINA_FALLTHROUGH;
148 case 0x2580 ... 0x258F: EINA_FALLTHROUGH;
149 case 0x2592 ... 0x2595: EINA_FALLTHROUGH;
150 case 0x25A0 ... 0x25A1: EINA_FALLTHROUGH;
151 case 0x25A3 ... 0x25AB: EINA_FALLTHROUGH;
152 case 0x25B2 ... 0x25B3: EINA_FALLTHROUGH;
153 case 0x25B6 ... 0x25B7: EINA_FALLTHROUGH;
154 case 0x25BC ... 0x25BD: EINA_FALLTHROUGH;
155 case 0x25C0 ... 0x25C1: EINA_FALLTHROUGH;
156 case 0x25C6 ... 0x25C8: EINA_FALLTHROUGH;
157 case 0x25CB: EINA_FALLTHROUGH;
158 case 0x25CE ... 0x25D1: EINA_FALLTHROUGH;
159 case 0x25E2 ... 0x25E5: EINA_FALLTHROUGH;
160 case 0x25EF: EINA_FALLTHROUGH;
161 case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
162 case 0x2600 ... 0x2612: EINA_FALLTHROUGH;
163 case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
164 case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
165 case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
166 case 0x2714: EINA_FALLTHROUGH;
167 case 0x2716: EINA_FALLTHROUGH;
168 case 0x271D: EINA_FALLTHROUGH;
169 case 0x2721: EINA_FALLTHROUGH;
170 case 0x2728: EINA_FALLTHROUGH;
171 case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
172 case 0x273D: EINA_FALLTHROUGH;
173 case 0x2744: EINA_FALLTHROUGH;
174 case 0x2747: EINA_FALLTHROUGH;
175 case 0x274C: EINA_FALLTHROUGH;
176 case 0x274E: EINA_FALLTHROUGH;
177 case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
178 case 0x2757: EINA_FALLTHROUGH;
179 case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
180 case 0x2776 ... 0x277F: EINA_FALLTHROUGH;
181 case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
182 case 0x27A1: EINA_FALLTHROUGH;
183 case 0x27B0: EINA_FALLTHROUGH;
184 case 0x27BF: EINA_FALLTHROUGH;
185 case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
186 case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
187 case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
188 case 0x2B50: EINA_FALLTHROUGH;
189 case 0x2B55 ... 0x2B59: EINA_FALLTHROUGH;
190 case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
191 case 0x3041 ... 0x4DBF: EINA_FALLTHROUGH;
192 case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
193 case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
194 case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
195 case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
196 case 0xFE00 ... 0xFE19: EINA_FALLTHROUGH;
197 case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
198 case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
199 case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
200 case 0xFFFD: EINA_FALLTHROUGH;
201 case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
202 case 0x1F000 ... 0x1F10A: EINA_FALLTHROUGH;
203 case 0x1F10D ... 0x1F12D: EINA_FALLTHROUGH;
204 case 0x1F12F ... 0x1F169: EINA_FALLTHROUGH;
205 case 0x1F16C ... 0x1F1AD: EINA_FALLTHROUGH;
206 case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
207 case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
208 case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
209 case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
210 case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
211 case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
212 case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
213 case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
214 case 0x20000 ... 0x3134A: EINA_FALLTHROUGH;
215 case 0xE0100 ... 0xE01EF:
13 216
14 // emoji should be double since unicode 9 (was single before):
15 // http://www.unicode.org/emoji/charts/full-emoji-list.html
16 //
17 // [ 0x0080 -> 0x02AF] !!! handle carefully **
18 // [ 0x1DC0 -> 0x1DFF]
19 // [ 0x1E00 -> 0x1EFF]
20 // [ 0x2000 -> 0x209F] !!! handle carefully **
21 // [ 0x20D0 -> 0x214F]
22 // [ 0x2190 -> 0x23FF]
23 // [ 0x2460 -> 0x24FF]
24 // [ 0x2600 -> 0x262F]
25 // [ 0x2638 -> 0x27EF]
26 // [ 0x2900 -> 0x29FF]
27 // [ 0x2B00 -> 0x2BFF] !!! unicode only 2B55 2B50
28 // [ 0x2C60 -> 0x2C7F]
29 // [ 0x2E00 -> 0x2E7F]
30 // [ 0x3000 -> 0x303F] !! not 33D1
31 // [ 0xA490 -> 0xA4CF]
32 // [0x1F000 -> 0x1F02F]
33 // [0x1F0A0 -> 0x1F0FF]
34 // [0x1F100 -> 0x1F64F]
35 // [0x1F680 -> 0x1F6FF]
36 // [0x1F910 -> 0x1F96B]
37 // [0x1F980 -> 0x1F9E0]
38 //
39 // ** these range include these odities:
40 // © (copyright) 00A9
41 // ® (registered) 00AE
42 // ‼ (double exclamation) 203C
43 // ⁉ (exclamation questionmark) 2049
44 // which should be single width, so ignore them
45
46 // (W)
47 // optimization: only look into more detailed ranges if within larger block
48 if ((g >= 0x1100) && (g <= 0x3FFFD))
49 {
50 if (
51 // 1XXX
52 ((g >= 0x1100) && (g <= 0x115f)) || // Hangul Jamo
53 // 2XXX
54 ((g == 0x2329) || (g == 0x232a)) || // <>
55 ((g >= 0x2e80) && (g <= 0x2ffb)) || // Radical supplements
56 // 3XXX -> A4C6
57 ((g >= 0x3001) && (g <= 0x303f)) || // CJK Symbols and Punctuation
58 ((g >= 0x3041) && (g <= 0x3247)) || // Hiragana, Katakana,
59 // Bopomoto, Hangul
60 // Compatibility Jamo, Kanbun,
61 // Bopomofo Extended, CJK
62 // Strokes, Katana Phonetic
63 // Extensions, Enclosed CJK
64 // Letters and Months
65 ((g >= 0x3250) && (g <= 0x33D0)) || // Enclosed CJK Letters and
66 // Months, CJK Compatibility
67 // [ symbols used by "powerline" ]
68 ((g >= 0x33D2) && (g <= 0x4dbf)) || // CJK Compatibility, CJK
69 // Unified Ideographs
70 // Extension A, Yijing
71 // Hexagram Symbols
72 ((g >= 0x4e00) && (g <= 0xa4c6)) || // CJK Unified Ideographs,
73 // Yi Syllables, Yi Radicals
74 // aXXX
75 ((g >= 0xa960) && (g <= 0xa97c)) || // Hangul Jamo Extended A
76 ((g >= 0xac00) && (g <= 0xd7a3)) || // Hangul Syllables
77 // fXXX
78 ((g >= 0xf900) && (g <= 0xfaff)) || // CJK Compatibility Ideographs
79 ((g >= 0xfe10) && (g <= 0xfe19)) || // Vertical Forms
80 ((g >= 0xfe30) && (g <= 0xfe6b)) || // CJK Compatibility Forms,
81 // Small Forms Variant
82 // 1XXXX
83 ((g >= 0x1b000) && (g <= 0x1b11e)) || // Kana Supplement, Kana
84 // Extended A
85 ((g >= 0x1b170) && (g <= 0x1b2fb)) || // Nushu
86 ((g >= 0x1f200) && (g <= 0x1f202)) || // Enclosed Ideographic
87 // Supplement
88 ((g >= 0x1f210) && (g <= 0x1f265)) || // Enclosed Ideographic
89 // Supplement
90 // 2XXXX
91 ((g >= 0x20000) && (g <= 0x2fffd)) || // CJK
92 // 3XXXX
93 ((g >= 0x30000) && (g <= 0x3fffd)))
94 return EINA_TRUE; 217 return EINA_TRUE;
95 } 218 }
96 if (ty->config->emoji_dbl_width && ((g >= 0x1f004) && (g <= 0x1f9c0))) 219 return EINA_FALSE;
97 { 220}
98 /* Taken from
99 * https://github.com/ridiculousfish/widecharwidth/blob/master/widechar_width.h
100 */
101 const uint16_t u = (g & 0xfff);
102 if ( (u == 0x004) ||
103 (u == 0x0cf) ||
104 ((u >= 0x170) && (u <= 0x171)) ||
105 ((u >= 0x17e) && (u <= 0x17f)) ||
106 (u == 0x18e) ||
107 ((u >= 0x191) && (u <= 0x19a)) ||
108 ((u >= 0x1e6) && (u <= 0x1ff)) ||
109 ((u >= 0x201) && (u < 0x202)) ||
110 (u == 0x21a) ||
111 (u == 0x22f) ||
112 ((u >= 0x232) && (u <= 0x23a)) ||
113 ((u >= 0x250) && (u <= 0x251)) ||
114 ((u >= 0x300) && (u <= 0x321)) ||
115 ((u >= 0x324) && (u <= 0x393)) ||
116 ((u >= 0x396) && (u <= 0x397)) ||
117 ((u >= 0x399) && (u <= 0x39B)) ||
118 ((u >= 0x39E) && (u <= 0x3F0)) ||
119 ((u >= 0x3F3) && (u <= 0x3F5)) ||
120 ((u >= 0x3F7) && (u <= 0x4FD)) ||
121 ((u >= 0x4FF) && (u <= 0x53D)) ||
122 ((u >= 0x549) && (u <= 0x54E)) ||
123 ((u >= 0x550) && (u <= 0x567)) ||
124 ((u >= 0x56F) && (u <= 0x570)) ||
125 ((u >= 0x573) && (u <= 0x579)) ||
126 (u == 0x587) ||
127 ((u >= 0x58A) && (u <= 0x58D)) ||
128 (u == 0x590) ||
129 ((u >= 0x595) && (u <= 0x596)) ||
130 (u == 0x5A5) ||
131 (u == 0x5A8) ||
132 ((u >= 0x5B1) && (u <= 0x5B2)) ||
133 (u == 0x5BC) ||
134 ((u >= 0x5C2) && (u <= 0x5C4)) ||
135 ((u >= 0x5D1) && (u <= 0x5D3)) ||
136 ((u >= 0x5DC) && (u <= 0x5DE)) ||
137 (u == 0x5E1) ||
138 (u == 0x5E3) ||
139 (u == 0x5E8) ||
140 (u == 0x5EF) ||
141 (u == 0x5F3) ||
142 ((u >= 0x5FA) && (u <= 0x64F)) ||
143 ((u >= 0x680) && (u <= 0x6C5)) ||
144 ((u >= 0x6CB) && (u <= 0x6D0)) ||
145 ((u >= 0x6E0) && (u <= 0x6E5)) ||
146 (u == 0x6E9) ||
147 ((u >= 0x6EB) && (u <= 0x6EC)) ||
148 (u == 0x6F0) ||
149 (u == 0x6F3) ||
150 ((u >= 0x910) && (u <= 0x918)) ||
151 ((u >= 0x980) && (u <= 0x984)) ||
152 (u == 0x9C0)
153 )
154 return EINA_TRUE;
155 }
156 221
157 // FIXME: can optimize by breaking into tree and ranges 222__attribute__((const))
158 // (A) 223Eina_Bool
159 if (ty->termstate.cjk_ambiguous_wide) 224_termpty_is_wide(Eina_Unicode g)
160 { 225{
161 // grep ';A #' EastAsianWidth.txt | wc -l 226 switch (g)
162 // :( 227 {
163 if ( 228 case 0xA9: EINA_FALLTHROUGH;
164 // aX 229 case 0xAE: EINA_FALLTHROUGH;
165 (((g >> 4) == 0xa) && 230 case 0x1100 ... 0x115F: EINA_FALLTHROUGH;
166 ( 231 case 0x203C: EINA_FALLTHROUGH;
167 (g == 0x00a1) || 232 case 0x2049: EINA_FALLTHROUGH;
168 (g == 0x00a4) || 233 case 0x2122: EINA_FALLTHROUGH;
169 ((g >= 0x00a7) && (g <= 0x00a8)) || 234 case 0x2139: EINA_FALLTHROUGH;
170 (g == 0x00aa) || 235 case 0x2194 ... 0x2199: EINA_FALLTHROUGH;
171 ((g >= 0x00ad) && (g <= 0x00ae)))) || 236 case 0x21A9 ... 0x21AA: EINA_FALLTHROUGH;
172 // bX 237 case 0x231A ... 0x231B: EINA_FALLTHROUGH;
173 (((g >> 4) == 0xb) && 238 case 0x2328 ... 0x232A: EINA_FALLTHROUGH;
174 (((g >= 0x00b0) && (g <= 0x00bf)))) || 239 case 0x2388: EINA_FALLTHROUGH;
175 // cX 240 case 0x23CF: EINA_FALLTHROUGH;
176 (((g >> 4) == 0xc) && 241 case 0x23E9 ... 0x23F3: EINA_FALLTHROUGH;
177 ((g == 0x00c6))) || 242 case 0x23F8 ... 0x23FA: EINA_FALLTHROUGH;
178 // dX 243 case 0x24C2: EINA_FALLTHROUGH;
179 (((g >> 4) == 0xd) && 244 case 0x25AA ... 0x25AB: EINA_FALLTHROUGH;
180 ( 245 case 0x25B6: EINA_FALLTHROUGH;
181 (g == 0x00d0) || 246 case 0x25C0: EINA_FALLTHROUGH;
182 ((g >= 0x00d7) && (g <= 0x00d8)) || 247 case 0x25FB ... 0x25FE: EINA_FALLTHROUGH;
183 ((g >= 0x00de) && (g <= 0x00df)))) || 248 case 0x2600 ... 0x2605: EINA_FALLTHROUGH;
184 // eX 249 case 0x2607 ... 0x2612: EINA_FALLTHROUGH;
185 (((g >> 4) == 0xe) && 250 case 0x2614 ... 0x2685: EINA_FALLTHROUGH;
186 ( 251 case 0x2690 ... 0x2705: EINA_FALLTHROUGH;
187 (g == 0x00e0) || 252 case 0x2708 ... 0x2712: EINA_FALLTHROUGH;
188 (g == 0x00e1) || 253 case 0x2714: EINA_FALLTHROUGH;
189 (g == 0x00e6) || 254 case 0x2716: EINA_FALLTHROUGH;
190 ((g >= 0x00e8) && (g <= 0x00e9)) || 255 case 0x271D: EINA_FALLTHROUGH;
191 (g == 0x00ea) || 256 case 0x2721: EINA_FALLTHROUGH;
192 ((g >= 0x00ec) && (g <= 0x00ed)))) || 257 case 0x2728: EINA_FALLTHROUGH;
193 // fX 258 case 0x2733 ... 0x2734: EINA_FALLTHROUGH;
194 (((g >> 4) == 0xf) && 259 case 0x2744: EINA_FALLTHROUGH;
195 ( 260 case 0x2747: EINA_FALLTHROUGH;
196 (g == 0x00f0) || 261 case 0x274C: EINA_FALLTHROUGH;
197 ((g >= 0x00f2) && (g <= 0x00f3)) || 262 case 0x274E: EINA_FALLTHROUGH;
198 ((g >= 0x00f7) && (g <= 0x00f9)) || 263 case 0x2753 ... 0x2755: EINA_FALLTHROUGH;
199 (g == 0x00fa) || 264 case 0x2757: EINA_FALLTHROUGH;
200 (g == 0x00fc) || 265 case 0x2763 ... 0x2767: EINA_FALLTHROUGH;
201 (g == 0x00fe))) || 266 case 0x2795 ... 0x2797: EINA_FALLTHROUGH;
202 // 1XX 267 case 0x27A1: EINA_FALLTHROUGH;
203 (((g >> 8) == 0x1) && 268 case 0x27B0: EINA_FALLTHROUGH;
204 ( 269 case 0x27BF: EINA_FALLTHROUGH;
205 (g == 0x0101) || 270 case 0x2934 ... 0x2935: EINA_FALLTHROUGH;
206 (g == 0x0111) || 271 case 0x2B05 ... 0x2B07: EINA_FALLTHROUGH;
207 (g == 0x0113) || 272 case 0x2B1B ... 0x2B1C: EINA_FALLTHROUGH;
208 (g == 0x011b) || 273 case 0x2B50: EINA_FALLTHROUGH;
209 ((g >= 0x0126) && (g <= 0x0127)) || 274 case 0x2B55: EINA_FALLTHROUGH;
210 (g == 0x012b) || 275 case 0x2E80 ... 0x303E: EINA_FALLTHROUGH;
211 ((g >= 0x0131) && (g <= 0x0133)) || 276 case 0x3041 ... 0x3247: EINA_FALLTHROUGH;
212 (g == 0x0138) || 277 case 0x3250 ... 0x4DBF: EINA_FALLTHROUGH;
213 ((g >= 0x013f) && (g <= 0x0142)) || 278 case 0x4E00 ... 0xA4C6: EINA_FALLTHROUGH;
214 (g == 0x0144) || 279 case 0xA960 ... 0xA97C: EINA_FALLTHROUGH;
215 ((g >= 0x0148) && (g <= 0x014b)) || 280 case 0xAC00 ... 0xD7A3: EINA_FALLTHROUGH;
216 (g == 0x014d) || 281 case 0xF900 ... 0xFAD9: EINA_FALLTHROUGH;
217 ((g >= 0x0152) && (g <= 0x0153)) || 282 case 0xFE10 ... 0xFE19: EINA_FALLTHROUGH;
218 ((g >= 0x0166) && (g <= 0x0167)) || 283 case 0xFE30 ... 0xFE6B: EINA_FALLTHROUGH;
219 (g == 0x016b) || 284 case 0xFF01 ... 0xFF60: EINA_FALLTHROUGH;
220 (g == 0x01ce) || 285 case 0xFFE0 ... 0xFFE6: EINA_FALLTHROUGH;
221 (g == 0x01d0) || 286 case 0x16FE0 ... 0x1B2FB: EINA_FALLTHROUGH;
222 (g == 0x01d2) || 287 case 0x1F000 ... 0x1F0F5: EINA_FALLTHROUGH;
223 (g == 0x01d4) || 288 case 0x1F10D ... 0x1F10F: EINA_FALLTHROUGH;
224 (g == 0x01d6) || 289 case 0x1F12F: EINA_FALLTHROUGH;
225 (g == 0x01d8) || 290 case 0x1F16C ... 0x1F171: EINA_FALLTHROUGH;
226 (g == 0x01da) || 291 case 0x1F17E ... 0x1F17F: EINA_FALLTHROUGH;
227 (g == 0x01dc))) || 292 case 0x1F18E: EINA_FALLTHROUGH;
228 // 2XX 293 case 0x1F191 ... 0x1F19A: EINA_FALLTHROUGH;
229 (((g >> 8) == 0x2) && 294 case 0x1F1AD: EINA_FALLTHROUGH;
230 ( 295 case 0x1F200 ... 0x1F53D: EINA_FALLTHROUGH;
231 (g == 0x0251) || 296 case 0x1F546 ... 0x1F64F: EINA_FALLTHROUGH;
232 (g == 0x0261) || 297 case 0x1F680 ... 0x1F6FC: EINA_FALLTHROUGH;
233 (g == 0x02c4) || 298 case 0x1F7D5 ... 0x1F7EB: EINA_FALLTHROUGH;
234 (g == 0x02c7) || 299 case 0x1F8B0 ... 0x1F8B1: EINA_FALLTHROUGH;
235 (g == 0x02c9) || 300 case 0x1F90C ... 0x1F93A: EINA_FALLTHROUGH;
236 ((g >= 0x02ca) && (g <= 0x02cb)) || 301 case 0x1F93C ... 0x1F945: EINA_FALLTHROUGH;
237 (g == 0x02cd) || 302 case 0x1F947 ... 0x1FAD6: EINA_FALLTHROUGH;
238 (g == 0x02d0) || 303 case 0x20000 ... 0x3134A:
239 ((g >= 0x02d8) && (g <= 0x02d9)) ||
240 ((g >= 0x02da) && (g <= 0x02db)) ||
241 (g == 0x02dd) ||
242 (g == 0x02df))) ||
243 // 3XX
244 (((g >> 8) == 0x3) &&
245 (
246 ((g >= 0x0300) && (g <= 0x036f)) ||
247 ((g >= 0x0391) && (g <= 0x03c9)))) ||
248 // 4XX
249 (((g >> 8) == 0x4) &&
250 (
251 (g == 0x0401) ||
252 ((g >= 0x0410) && (g <= 0x044f)) ||
253 (g == 0x0451))) ||
254 // 2XXX
255 (((g >> 12) == 0x2) &&
256 ((((g >> 8) == 0x20) &&
257 (
258 (g == 0x2010) ||
259 ((g >= 0x2013) && (g <= 0x2016)) ||
260 ((g >= 0x2018) && (g <= 0x2019)) ||
261 (g == 0x201c) ||
262 (g == 0x201d) ||
263 ((g >= 0x2020) && (g <= 0x2022)) ||
264 ((g >= 0x2024) && (g <= 0x2027)) ||
265 (g == 0x2030) ||
266 ((g >= 0x2032) && (g <= 0x2033)) ||
267 (g == 0x2035) ||
268 (g == 0x203b) ||
269 (g == 0x203e) ||
270 (g == 0x2074) ||
271 (g == 0x207f) ||
272 ((g >= 0x2081) && (g <= 0x2084)) ||
273 (g == 0x20ac))) ||
274 (((g >> 8) == 0x21) &&
275 (
276 (g == 0x2103) ||
277 (g == 0x2105) ||
278 (g == 0x2109) ||
279 (g == 0x2113) ||
280 (g == 0x2116) ||
281 ((g >= 0x2121) && (g <= 0x2122)) ||
282 (g == 0x2126) ||
283 (g == 0x212b) ||
284 ((g >= 0x2153) && (g <= 0x2154)) ||
285 ((g >= 0x215b) && (g <= 0x215e)) ||
286 ((g >= 0x2160) && (g <= 0x216b)) ||
287 ((g >= 0x2170) && (g <= 0x2179)) ||
288 ((g >= 0x2189) && (g <= 0x2199)) ||
289 ((g >= 0x21b8) && (g <= 0x21b9)) ||
290 (g == 0x21d2) ||
291 (g == 0x21d4) ||
292 (g == 0x21e7))) ||
293 (((g >> 8) == 0x22) &&
294 (
295 (g == 0x2200) ||
296 ((g >= 0x2202) && (g <= 0x2203)) ||
297 ((g >= 0x2207) && (g <= 0x2208)) ||
298 (g == 0x220b) ||
299 (g == 0x220f) ||
300 (g == 0x2211) ||
301 (g == 0x2215) ||
302 (g == 0x221a) ||
303 ((g >= 0x221d) && (g <= 0x221f)) ||
304 (g == 0x2220) ||
305 (g == 0x2223) ||
306 (g == 0x2225) ||
307 ((g >= 0x2227) && (g <= 0x222e)) ||
308 ((g >= 0x2234) && (g <= 0x2237)) ||
309 ((g >= 0x223c) && (g <= 0x223d)) ||
310 (g == 0x2248) ||
311 (g == 0x224c) ||
312 (g == 0x2252) ||
313 ((g >= 0x2260) && (g <= 0x2261)) ||
314 ((g >= 0x2264) && (g <= 0x2267)) ||
315 ((g >= 0x226a) && (g <= 0x226b)) ||
316 ((g >= 0x226e) && (g <= 0x226f)) ||
317 ((g >= 0x2282) && (g <= 0x2283)) ||
318 ((g >= 0x2286) && (g <= 0x2287)) ||
319 (g == 0x2295) ||
320 (g == 0x2299) ||
321 (g == 0x22a5) ||
322 (g == 0x22bf))) ||
323 (((g >> 8) == 0x23) &&
324 ((g == 0x2312))) ||
325 ((((g >> 8) == 0x24) || ((g >> 8) == 0x25)) &&
326 (((g >= 0x2460) && (g <= 0x2595)))) ||
327 (((g >> 8) == 0x25) &&
328 (
329 ((g >= 0x25a0) && (g <= 0x25bd)) ||
330 ((g >= 0x25c0) && (g <= 0x25c1)) ||
331 ((g >= 0x25c6) && (g <= 0x25c7)) ||
332 (g == 0x25c8) ||
333 (g == 0x25cb) ||
334 ((g >= 0x25ce) && (g <= 0x25cf)) ||
335 ((g >= 0x25d0) && (g <= 0x25d1)) ||
336 ((g >= 0x25e2) && (g <= 0x25e3)) ||
337 ((g >= 0x25e4) && (g <= 0x25e5)) ||
338 (g == 0x25ef))) ||
339 (((g >> 8) == 0x26) &&
340 (
341 ((g >= 0x2605) && (g <= 0x2606)) ||
342 (g == 0x2609) ||
343 ((g >= 0x260e) && (g <= 0x260f)) ||
344 ((g >= 0x2614) && (g <= 0x2615)) ||
345 (g == 0x261c) ||
346 (g == 0x261e) ||
347 (g == 0x2640) ||
348 (g == 0x2642) ||
349 ((g >= 0x2660) && (g <= 0x2661)) ||
350 ((g >= 0x2663) && (g <= 0x2665)) ||
351 ((g >= 0x2667) && (g <= 0x266a)) ||
352 ((g >= 0x266c) && (g <= 0x266d)) ||
353 (g == 0x266f) ||
354 ((g >= 0x269e) && (g <= 0x269f)) ||
355 ((g >= 0x26be) && (g <= 0x26bf)) ||
356 ((g >= 0x26c4) && (g <= 0x26cd)) ||
357 (g == 0x26cf) ||
358 ((g >= 0x26d0) && (g <= 0x26e1)) ||
359 (g == 0x26e3) ||
360 ((g >= 0x26e8) && (g <= 0x26ff)))) ||
361 (((g >> 8) == 0x27) &&
362 (
363 (g == 0x273d) ||
364 (g == 0x2757) ||
365 ((g >= 0x2776) && (g <= 0x277f)))) ||
366 (((g >> 8) == 0x2b) &&
367 (((g >= 0x2b55) && (g <= 0x2b59)))))) ||
368 // 3XXX
369 (((g >> 12) == 0x3) &&
370 (((g >= 0x3248) && (g <= 0x324f)))) ||
371 // fXXX
372 (((g >> 12) == 0xf) &&
373 (
374 ((g >= 0xfe00) && (g <= 0xfe0f)) ||
375 (g == 0xfffd))) ||
376 // 1XXXX
377 (((g >> 16) == 0x1) &&
378 (
379 ((g >= 0x1f100) && (g <= 0x1f12d)) ||
380 ((g >= 0x1f130) && (g <= 0x1f169)) ||
381 ((g >= 0x1f170) && (g <= 0x1f19a)))) ||
382 // eXXXX
383 (((g >> 16) == 0xe) &&
384 (((g >= 0xe0100) && (g <= 0xe01ef)))) ||
385 // fXXXX
386 (((g >> 16) == 0xf) &&
387 (((g >= 0xf0000) && (g <= 0xffffd)))) ||
388 // 1XXXXX
389 (((g >> 24) == 0x1) &&
390 (((g >= 0x100000) && (g <= 0x10fffd)))))
391 return EINA_TRUE;
392 }
393 304
394 // Na, H -> not checked 305 return EINA_TRUE;
306 }
395 return EINA_FALSE; 307 return EINA_FALSE;
396} 308}
diff --git a/src/bin/termptydbl.h b/src/bin/termptydbl.h
index 142e16c..ca365b2 100644
--- a/src/bin/termptydbl.h
+++ b/src/bin/termptydbl.h
@@ -1,25 +1,20 @@
1/* XXX: Code generated by tool unicode_dbl_width.py */
1#ifndef _TERMPTY_DBL_H__ 2#ifndef _TERMPTY_DBL_H__
2#define _TERMPTY_DBL_H__ 1 3#define _TERMPTY_DBL_H__ 1
3 4
4Eina_Bool _termpty_is_dblwidth_slow_get(const Termpty *ty, int g); 5Eina_Bool _termpty_is_wide(const Eina_Unicode g);
6Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
5 7
6static inline Eina_Bool 8static inline Eina_Bool
7_termpty_is_dblwidth_get(const Termpty *ty, int g) 9_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
8{ 10{
9 // check for east asian full-width (F), half-width (H), wide (W), 11 /* optimize for latin1 non-ambiguous */
10 // narrow (Na) or ambiguous (A) codepoints 12 if (g <= 0xA0)
11 // ftp://ftp.unicode.org/Public/UNIDATA/EastAsianWidth.txt
12
13 // optimize for latin1 non-ambiguous
14 if (g <= 0xa0)
15 return EINA_FALSE; 13 return EINA_FALSE;
16 // (F) 14 if (!ty->termstate.cjk_ambiguous_wide)
17 if ((g == 0x3000) || 15 return _termpty_is_wide(g);
18 ((g >= 0xff01) && (g <= 0xff60)) || 16 else
19 ((g >= 0xffe0) && (g <= 0xffe6))) 17 return _termpty_is_ambigous_wide(g);
20 return EINA_TRUE;
21
22 return _termpty_is_dblwidth_slow_get(ty, g);
23} 18}
24 19
25#endif 20#endif
diff --git a/tools/unicode_dbl_width.py b/tools/unicode_dbl_width.py
new file mode 100755
index 0000000..2457834
--- /dev/null
+++ b/tools/unicode_dbl_width.py
@@ -0,0 +1,190 @@
1#!/usr/bin/env python3
2
3"""
4Generate src/bin/termptydbl.{c,h} from unicode files
5used with ucd.all.flat.xml from
6https://www.unicode.org/Public/UCD/latest/ucdxml/ucd.all.flat.zip
7"""
8
9import argparse
10from collections import namedtuple
11import xml.etree.ElementTree as ET
12
13Range = namedtuple('range', ['width', 'start', 'end'])
14
15def get_ranges(xmlfile, emoji_as_wide):
16 tree = ET.parse(xmlfile)
17 root = tree.getroot()
18 repertoire = root.find("{http://www.unicode.org/ns/2003/ucd/1.0}repertoire")
19 chars = repertoire.findall("{http://www.unicode.org/ns/2003/ucd/1.0}char")
20
21 ranges = []
22 range = Range('N', 0, 0)
23 for c in chars:
24 ea = c.get('ea')
25 if ea in ('Na', 'H'):
26 ea = 'N'
27 if ea in ('F'):
28 ea = 'W'
29 assert ea in ('N', 'A', 'W')
30 cp = c.get('cp')
31 if not cp:
32 continue
33 if emoji_as_wide:
34 emoji = c.get('ExtPict')
35 if emoji == 'Y':
36 ea = 'W'
37
38 cp = int(cp, 16)
39 if ea != range[0]:
40 ranges.append(range)
41 range = Range(ea, cp, cp)
42 else:
43 range = range._replace(end=cp)
44
45 ranges.append(range)
46
47 return ranges
48
49def merge_ranges(ranges, is_same_width):
50 res = []
51 range = ranges[0]
52 for r in ranges:
53 if is_same_width(r, range):
54 range = range._replace(end=r.end)
55 else:
56 res.append(range)
57 range = r
58 res.append(range)
59 return res
60
61def skip_ranges(ranges, width_skipped):
62 res = []
63 for r in ranges:
64 if r.width not in width_skipped:
65 res.append(r)
66 return res
67
68def gen_header(range, file_header):
69 file_header.write(
70"""/* XXX: Code generated by tool unicode_dbl_width.py */
71#ifndef _TERMPTY_DBL_H__
72#define _TERMPTY_DBL_H__ 1
73
74Eina_Bool _termpty_is_wide(const Eina_Unicode g);
75Eina_Bool _termpty_is_ambigous_wide(const Eina_Unicode g);
76
77static inline Eina_Bool
78_termpty_is_dblwidth_get(const Termpty *ty, const Eina_Unicode g)
79{
80 /* optimize for latin1 non-ambiguous */
81""")
82 file_header.write(f" if (g <= 0x{range.end:X})")
83 file_header.write(
84"""
85 return EINA_FALSE;
86 if (!ty->termstate.cjk_ambiguous_wide)
87 return _termpty_is_wide(g);
88 else
89 return _termpty_is_ambigous_wide(g);
90}
91
92#endif
93""")
94
95def gen_ambigous(ranges, file_source):
96 file_source.write(
97"""
98__attribute__((const))
99Eina_Bool
100_termpty_is_ambigous_wide(Eina_Unicode g)
101{
102 switch (g)
103 {
104""")
105 def is_same_width(r1, r2):
106 if r1.width == 'N':
107 return r2.width == 'N'
108 else:
109 return r2.width in ('A', 'W')
110 ranges = merge_ranges(ranges[1:], is_same_width)
111 ranges = skip_ranges(ranges, ('N',))
112
113 fallthrough = " EINA_FALLTHROUGH;"
114 for idx, r in enumerate(ranges):
115 if r.width == 'N':
116 continue;
117 if idx == len(ranges) -1:
118 fallthrough = ""
119 if r.start == r.end:
120 file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
121 else:
122 file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
123
124 file_source.write(
125"""
126 return EINA_TRUE;
127 }
128 return EINA_FALSE;
129}
130""")
131
132def gen_wide(ranges, file_source):
133 file_source.write(
134"""
135__attribute__((const))
136Eina_Bool
137_termpty_is_wide(Eina_Unicode g)
138{
139 switch (g)
140 {
141""")
142 def is_same_width(r1, r2):
143 if r1.width in ('N', 'A'):
144 return r2.width in ('N', 'A')
145 else:
146 return r2.width == 'W'
147 ranges = merge_ranges(ranges[1:], is_same_width)
148 ranges = skip_ranges(ranges, ('N', 'A'))
149 fallthrough = " EINA_FALLTHROUGH;"
150 for idx, r in enumerate(ranges):
151 if r.width in ('N', 'A'):
152 continue;
153 if idx == len(ranges) -1:
154 fallthrough = ""
155 if r.start == r.end:
156 file_source.write(f" case 0x{r.start:X}:{fallthrough}\n")
157 else:
158 file_source.write(f" case 0x{r.start:X} ... 0x{r.end:X}:{fallthrough}\n")
159
160 file_source.write(
161"""
162 return EINA_TRUE;
163 }
164 return EINA_FALSE;
165}
166""")
167
168
169def gen_c(ranges, file_header, file_source):
170 gen_header(ranges[0], file_header)
171 file_source.write(
172"""/* XXX: Code generated by tool unicode_dbl_width.py */
173#include "private.h"
174
175#include <Elementary.h>
176#include "termpty.h"
177#include "termptydbl.h"
178""")
179 gen_ambigous(ranges, file_source)
180 gen_wide(ranges, file_source)
181
182parser = argparse.ArgumentParser(description='Generate code handling different widths of unicode codepoints.')
183parser.add_argument('xml', type=argparse.FileType('r'))
184parser.add_argument('header', type=argparse.FileType('w'))
185parser.add_argument('source', type=argparse.FileType('w'))
186
187args = parser.parse_args()
188
189ranges = get_ranges(args.xml, True)
190gen_c(ranges, args.header, args.source)