summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAbdullehGhujeh <abdullahhasan10@gmail.com>2020-09-01 19:33:51 +0900
committerWooHyun Jung <wh0705.jung@samsung.com>2020-09-01 19:33:52 +0900
commit1ab71284db89dcc37da12dd56796e892bc50ae0a (patch)
tree98a2b1dec47fb513166d27a01ec987ee441d3dd3
parentba3b082d06b4ddea834e08d1c79d291ac6c3df0e (diff)
evas_textblock: rainbow flag emoji treated as two clusters(update unibreak to version 4.2)
Summary: if we have rainbow flag emoji (&#x1f3f3;&#xfe0f;&#x200d;&#x1f308;) we can use mouse/keyboard to move cursor inside it because we break it into two clusters, we break on 1F308, This is wrong as we should treat emoji as a single cluster (based on rules mentioned in Unicode segmentation standard “Do not break within emoji modifier sequences or emoji ZWJ sequences” (https://unicode.org/reports/tr29/#GB11 )). this issue happens because we don’t give 1F308 its correct grapheme break property value, I think this is a bug in the unibreak library as this Unicode 1F308 should have word break class value equals to Glue_After_ZWJ (based on https://www.unicode.org/reports/tr29/tr29-31.html#Glue_After_Zwj_WB and http://unicode.org/Public/emoji/5.0/emoji-zwj-sequences.txt) which will not make it break and we will get a single cluster. I noticed that the current unibreak lib used in EFL seems to implement Unicode 9 (latest is Unicode 13) which uses obsolete and unused grapheme break property, such as E_Modifier & Glue_After_ZWJ, so if a new emoji introduced (rainbow flag was introduced after Unicode 9) and based on Unicode 9 it should use property E_Modifier or Glue_After_ZWJ we will have issue with it. So I have updated unibreak lib using latest released version of unibreak (4.2) which implement Unicode 12. I needed to remove **BREAK_AFTER(i)** to pass the tests in D1140 as spaces do not break on latest update (also related to T995). {F3868712} this should fix T8665 & T8688 Reviewers: ali.alzyod, woohyun, bowonryu, zmike, segfaultxavi, bu5hm4n Reviewed By: ali.alzyod Subscribers: segfaultxavi, cedric, #reviewers, #committers Tags: #efl Maniphest Tasks: T8665 Differential Revision: https://phab.enlightenment.org/D11743
-rw-r--r--src/lib/evas/canvas/evas_object_textblock.c4
-rw-r--r--src/static_libs/libunibreak/LICENCE8
-rw-r--r--src/static_libs/libunibreak/NEWS14
-rw-r--r--src/static_libs/libunibreak/README.md16
-rw-r--r--src/static_libs/libunibreak/emojidata.c264
-rw-r--r--src/static_libs/libunibreak/emojidef.c61
-rw-r--r--src/static_libs/libunibreak/emojidef.h46
-rw-r--r--src/static_libs/libunibreak/graphemebreak.c88
-rw-r--r--src/static_libs/libunibreak/graphemebreak.h8
-rw-r--r--src/static_libs/libunibreak/graphemebreakdata.c114
-rw-r--r--src/static_libs/libunibreak/graphemebreakdef.h18
-rw-r--r--src/static_libs/libunibreak/linebreak.c66
-rw-r--r--src/static_libs/libunibreak/linebreak.h8
-rw-r--r--src/static_libs/libunibreak/linebreakdata.c225
-rw-r--r--src/static_libs/libunibreak/linebreakdef.c17
-rw-r--r--src/static_libs/libunibreak/linebreakdef.h25
-rw-r--r--src/static_libs/libunibreak/meson.build2
-rw-r--r--src/static_libs/libunibreak/unibreakbase.c2
-rw-r--r--src/static_libs/libunibreak/unibreakbase.h4
-rw-r--r--src/static_libs/libunibreak/unibreakdef.h12
-rw-r--r--src/static_libs/libunibreak/wordbreak.c92
-rw-r--r--src/static_libs/libunibreak/wordbreak.h11
-rw-r--r--src/static_libs/libunibreak/wordbreakdata.c201
-rw-r--r--src/static_libs/libunibreak/wordbreakdef.h20
-rw-r--r--src/tests/evas/evas_test_textblock.c4
25 files changed, 986 insertions, 344 deletions
diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c
index d50592cc5d..1a46ea614c 100644
--- a/src/lib/evas/canvas/evas_object_textblock.c
+++ b/src/lib/evas/canvas/evas_object_textblock.c
@@ -10297,7 +10297,7 @@ evas_textblock_cursor_word_start(Efl_Text_Cursor_Handle *cur)
10297 if ((cur->pos > 0) && (cur->pos == len)) 10297 if ((cur->pos > 0) && (cur->pos == len))
10298 cur->pos--; 10298 cur->pos--;
10299 10299
10300 for (i = cur->pos ; _is_white(text[i]) && BREAK_AFTER(i) ; i--) 10300 for (i = cur->pos ; _is_white(text[i]) ; i--)
10301 { 10301 {
10302 if (i == 0) 10302 if (i == 0)
10303 { 10303 {
@@ -10364,7 +10364,7 @@ evas_textblock_cursor_word_end(Efl_Text_Cursor_Handle *cur)
10364 set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks); 10364 set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks);
10365 } 10365 }
10366 10366
10367 for (i = cur->pos; text[i] && _is_white(text[i]) && (BREAK_AFTER(i)) ; i++); 10367 for (i = cur->pos; text[i] && _is_white(text[i]) ; i++);
10368 if (i == len) 10368 if (i == len)
10369 { 10369 {
10370 Evas_Object_Textblock_Node_Text *nnode; 10370 Evas_Object_Textblock_Node_Text *nnode;
diff --git a/src/static_libs/libunibreak/LICENCE b/src/static_libs/libunibreak/LICENCE
index 3fba16ad53..6b4137ca21 100644
--- a/src/static_libs/libunibreak/LICENCE
+++ b/src/static_libs/libunibreak/LICENCE
@@ -1,7 +1,7 @@
1Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> 1Copyright (C) Wu Yongwei <wuyongwei at gmail dot com>
2Copyright (C) 2012-2016 Tom Hacohen <tom at stosb dot com> 2Copyright (C) Tom Hacohen <tom at stosb dot com>
3Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> 3Copyright (C) Petr Filipsky <philodej at gmail dot com>
4Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> 4Copyright (C) Andreas Röver <roever at users dot sf dot net>
5 5
6This software is provided 'as-is', without any express or implied 6This software is provided 'as-is', without any express or implied
7warranty. In no event will the author be held liable for any damages 7warranty. In no event will the author be held liable for any damages
diff --git a/src/static_libs/libunibreak/NEWS b/src/static_libs/libunibreak/NEWS
index d217628da8..a2b9e0302e 100644
--- a/src/static_libs/libunibreak/NEWS
+++ b/src/static_libs/libunibreak/NEWS
@@ -1,3 +1,14 @@
1New in libunibreak 4.2
2
3- Update the data to conform to Unicode 12
4
5New in libunibreak 4.1
6
7- Update the code and data to conform to Unicode 11.0.0, especially
8 adding support for extended pictographs in word and grapheme breaking
9- ZWJ support has been much improved (it was broken)
10- Make minor tweaks to the project files
11
1New in libunibreak 4.0 12New in libunibreak 4.0
2 13
3- Update the code and data to conform to Unicode 9.0.0 14- Update the code and data to conform to Unicode 9.0.0
@@ -22,7 +33,8 @@ New in libunibreak 1.1
22New in libunibreak 1.0 33New in libunibreak 1.0
23 34
24- Add word breaking support 35- Add word breaking support
25- Change the library name to "libunibreak", while keeping maximum compatibility 36- Change the library name to "libunibreak", while keeping maximum
37 compatibility
26- Add pkg-config support 38- Add pkg-config support
27 39
28New in liblinebreak 2.1 40New in liblinebreak 2.1
diff --git a/src/static_libs/libunibreak/README.md b/src/static_libs/libunibreak/README.md
index f37fd902aa..4e65059586 100644
--- a/src/static_libs/libunibreak/README.md
+++ b/src/static_libs/libunibreak/README.md
@@ -6,11 +6,11 @@ Overview
6 6
7This is the README file for libunibreak, an implementation of the line 7This is the README file for libunibreak, an implementation of the line
8breaking and word breaking algorithms as described in [Unicode Standard 8breaking and word breaking algorithms as described in [Unicode Standard
9Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's 9Annex 14][1] and [Unicode Standard Annex 29][2]. Check the project's
10[home page] [3] for up-to-date information. 10[home page][3] for up-to-date information.
11 11
12 [1]: http://www.unicode.org/reports/tr14/tr14-37.html 12 [1]: http://www.unicode.org/reports/tr14/
13 [2]: http://www.unicode.org/reports/tr29/tr29-29.html 13 [2]: http://www.unicode.org/reports/tr29/
14 [3]: https://github.com/adah1972/libunibreak 14 [3]: https://github.com/adah1972/libunibreak
15 15
16 16
@@ -21,7 +21,7 @@ This library is released under an open-source licence, the zlib/libpng
21licence. Please check the file *LICENCE* for details. 21licence. Please check the file *LICENCE* for details.
22 22
23Apart from using the algorithm, part of the code is derived from the 23Apart from using the algorithm, part of the code is derived from the
24[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply. 24[Unicode Public Data][4], and the [Unicode Terms of Use][5] may apply.
25 25
26 [4]: http://www.unicode.org/Public/ 26 [4]: http://www.unicode.org/Public/
27 [5]: http://www.unicode.org/copyright.html 27 [5]: http://www.unicode.org/copyright.html
@@ -48,6 +48,8 @@ There are three ways to build the library:
48 *WordBreakProperty.txt*. 48 *WordBreakProperty.txt*.
49 - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* 49 - type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
50 from *GraphemeBreakProperty.txt*. 50 from *GraphemeBreakProperty.txt*.
51 - type `make emojidata` to regenerate *emojidata.c* from
52 *emoji-data.txt*.
51 53
522. On systems where GCC and Binutils are supported, one can type 542. On systems where GCC and Binutils are supported, one can type
53 55
@@ -65,6 +67,8 @@ There are three ways to build the library:
65 *WordBreakProperty.txt*. 67 *WordBreakProperty.txt*.
66 - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* 68 - type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
67 from *GraphemeBreakProperty.txt*. 69 from *GraphemeBreakProperty.txt*.
70 - type `make emojidata` to regenerate *emojidata.c* from
71 *emoji-data.txt*.
68 72
693. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2 733. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
70 (MinGW), MSVC can also be used. Type 74 (MinGW), MSVC can also be used. Type
@@ -72,7 +76,7 @@ There are three ways to build the library:
72 cd src 76 cd src
73 nmake -f Makefile.msvc 77 nmake -f Makefile.msvc
74 78
75 to build the static library. By default the debug release is built. 79 to build the static library. By default the debug version is built.
76 To build the release version 80 To build the release version
77 81
78 nmake -f Makefile.msvc CFG="libunibreak - Win32 Release" 82 nmake -f Makefile.msvc CFG="libunibreak - Win32 Release"
diff --git a/src/static_libs/libunibreak/emojidata.c b/src/static_libs/libunibreak/emojidata.c
new file mode 100644
index 0000000000..a78f2678e7
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidata.c
@@ -0,0 +1,264 @@
1/* The content of this file is generated from:
2# emoji-data.txt
3# Date: 2019-01-15, 12:10:05 GMT
4*/
5
6static const struct ExtendedPictograpic ep_prop[] = {
7 {0x00A9, 0x00A9},
8 {0x00AE, 0x00AE},
9 {0x203C, 0x203C},
10 {0x2049, 0x2049},
11 {0x2122, 0x2122},
12 {0x2139, 0x2139},
13 {0x2194, 0x2199},
14 {0x21A9, 0x21AA},
15 {0x231A, 0x231B},
16 {0x2328, 0x2328},
17 {0x2388, 0x2388},
18 {0x23CF, 0x23CF},
19 {0x23E9, 0x23F3},
20 {0x23F8, 0x23FA},
21 {0x24C2, 0x24C2},
22 {0x25AA, 0x25AB},
23 {0x25B6, 0x25B6},
24 {0x25C0, 0x25C0},
25 {0x25FB, 0x25FE},
26 {0x2600, 0x2605},
27 {0x2607, 0x2612},
28 {0x2614, 0x2615},
29 {0x2616, 0x2617},
30 {0x2618, 0x2618},
31 {0x2619, 0x2619},
32 {0x261A, 0x266F},
33 {0x2670, 0x2671},
34 {0x2672, 0x267D},
35 {0x267E, 0x267F},
36 {0x2680, 0x2685},
37 {0x2690, 0x2691},
38 {0x2692, 0x269C},
39 {0x269D, 0x269D},
40 {0x269E, 0x269F},
41 {0x26A0, 0x26A1},
42 {0x26A2, 0x26B1},
43 {0x26B2, 0x26B2},
44 {0x26B3, 0x26BC},
45 {0x26BD, 0x26BF},
46 {0x26C0, 0x26C3},
47 {0x26C4, 0x26CD},
48 {0x26CE, 0x26CE},
49 {0x26CF, 0x26E1},
50 {0x26E2, 0x26E2},
51 {0x26E3, 0x26E3},
52 {0x26E4, 0x26E7},
53 {0x26E8, 0x26FF},
54 {0x2700, 0x2700},
55 {0x2701, 0x2704},
56 {0x2705, 0x2705},
57 {0x2708, 0x2709},
58 {0x270A, 0x270B},
59 {0x270C, 0x2712},
60 {0x2714, 0x2714},
61 {0x2716, 0x2716},
62 {0x271D, 0x271D},
63 {0x2721, 0x2721},
64 {0x2728, 0x2728},
65 {0x2733, 0x2734},
66 {0x2744, 0x2744},
67 {0x2747, 0x2747},
68 {0x274C, 0x274C},
69 {0x274E, 0x274E},
70 {0x2753, 0x2755},
71 {0x2757, 0x2757},
72 {0x2763, 0x2767},
73 {0x2795, 0x2797},
74 {0x27A1, 0x27A1},
75 {0x27B0, 0x27B0},
76 {0x27BF, 0x27BF},
77 {0x2934, 0x2935},
78 {0x2B05, 0x2B07},
79 {0x2B1B, 0x2B1C},
80 {0x2B50, 0x2B50},
81 {0x2B55, 0x2B55},
82 {0x3030, 0x3030},
83 {0x303D, 0x303D},
84 {0x3297, 0x3297},
85 {0x3299, 0x3299},
86 {0x1F000, 0x1F02B},
87 {0x1F02C, 0x1F02F},
88 {0x1F030, 0x1F093},
89 {0x1F094, 0x1F09F},
90 {0x1F0A0, 0x1F0AE},
91 {0x1F0AF, 0x1F0B0},
92 {0x1F0B1, 0x1F0BE},
93 {0x1F0BF, 0x1F0BF},
94 {0x1F0C0, 0x1F0C0},
95 {0x1F0C1, 0x1F0CF},
96 {0x1F0D0, 0x1F0D0},
97 {0x1F0D1, 0x1F0DF},
98 {0x1F0E0, 0x1F0F5},
99 {0x1F0F6, 0x1F0FF},
100 {0x1F10D, 0x1F10F},
101 {0x1F12F, 0x1F12F},
102 {0x1F16C, 0x1F16C},
103 {0x1F16D, 0x1F16F},
104 {0x1F170, 0x1F171},
105 {0x1F17E, 0x1F17E},
106 {0x1F17F, 0x1F17F},
107 {0x1F18E, 0x1F18E},
108 {0x1F191, 0x1F19A},
109 {0x1F1AD, 0x1F1E5},
110 {0x1F201, 0x1F202},
111 {0x1F203, 0x1F20F},
112 {0x1F21A, 0x1F21A},
113 {0x1F22F, 0x1F22F},
114 {0x1F232, 0x1F23A},
115 {0x1F23C, 0x1F23F},
116 {0x1F249, 0x1F24F},
117 {0x1F250, 0x1F251},
118 {0x1F252, 0x1F25F},
119 {0x1F260, 0x1F265},
120 {0x1F266, 0x1F2FF},
121 {0x1F300, 0x1F320},
122 {0x1F321, 0x1F32C},
123 {0x1F32D, 0x1F32F},
124 {0x1F330, 0x1F335},
125 {0x1F336, 0x1F336},
126 {0x1F337, 0x1F37C},
127 {0x1F37D, 0x1F37D},
128 {0x1F37E, 0x1F37F},
129 {0x1F380, 0x1F393},
130 {0x1F394, 0x1F39F},
131 {0x1F3A0, 0x1F3C4},
132 {0x1F3C5, 0x1F3C5},
133 {0x1F3C6, 0x1F3CA},
134 {0x1F3CB, 0x1F3CE},
135 {0x1F3CF, 0x1F3D3},
136 {0x1F3D4, 0x1F3DF},
137 {0x1F3E0, 0x1F3F0},
138 {0x1F3F1, 0x1F3F7},
139 {0x1F3F8, 0x1F3FA},
140 {0x1F400, 0x1F43E},
141 {0x1F43F, 0x1F43F},
142 {0x1F440, 0x1F440},
143 {0x1F441, 0x1F441},
144 {0x1F442, 0x1F4F7},
145 {0x1F4F8, 0x1F4F8},
146 {0x1F4F9, 0x1F4FC},
147 {0x1F4FD, 0x1F4FE},
148 {0x1F4FF, 0x1F4FF},
149 {0x1F500, 0x1F53D},
150 {0x1F546, 0x1F54A},
151 {0x1F54B, 0x1F54F},
152 {0x1F550, 0x1F567},
153 {0x1F568, 0x1F579},
154 {0x1F57A, 0x1F57A},
155 {0x1F57B, 0x1F5A3},
156 {0x1F5A4, 0x1F5A4},
157 {0x1F5A5, 0x1F5FA},
158 {0x1F5FB, 0x1F5FF},
159 {0x1F600, 0x1F600},
160 {0x1F601, 0x1F610},
161 {0x1F611, 0x1F611},
162 {0x1F612, 0x1F614},
163 {0x1F615, 0x1F615},
164 {0x1F616, 0x1F616},
165 {0x1F617, 0x1F617},
166 {0x1F618, 0x1F618},
167 {0x1F619, 0x1F619},
168 {0x1F61A, 0x1F61A},
169 {0x1F61B, 0x1F61B},
170 {0x1F61C, 0x1F61E},
171 {0x1F61F, 0x1F61F},
172 {0x1F620, 0x1F625},
173 {0x1F626, 0x1F627},
174 {0x1F628, 0x1F62B},
175 {0x1F62C, 0x1F62C},
176 {0x1F62D, 0x1F62D},
177 {0x1F62E, 0x1F62F},
178 {0x1F630, 0x1F633},
179 {0x1F634, 0x1F634},
180 {0x1F635, 0x1F640},
181 {0x1F641, 0x1F642},
182 {0x1F643, 0x1F644},
183 {0x1F645, 0x1F64F},
184 {0x1F680, 0x1F6C5},
185 {0x1F6C6, 0x1F6CF},
186 {0x1F6D0, 0x1F6D0},
187 {0x1F6D1, 0x1F6D2},
188 {0x1F6D3, 0x1F6D4},
189 {0x1F6D5, 0x1F6D5},
190 {0x1F6D6, 0x1F6DF},
191 {0x1F6E0, 0x1F6EC},
192 {0x1F6ED, 0x1F6EF},
193 {0x1F6F0, 0x1F6F3},
194 {0x1F6F4, 0x1F6F6},
195 {0x1F6F7, 0x1F6F8},
196 {0x1F6F9, 0x1F6F9},
197 {0x1F6FA, 0x1F6FA},
198 {0x1F6FB, 0x1F6FF},
199 {0x1F774, 0x1F77F},
200 {0x1F7D5, 0x1F7D8},
201 {0x1F7D9, 0x1F7DF},
202 {0x1F7E0, 0x1F7EB},
203 {0x1F7EC, 0x1F7FF},
204 {0x1F80C, 0x1F80F},
205 {0x1F848, 0x1F84F},
206 {0x1F85A, 0x1F85F},
207 {0x1F888, 0x1F88F},
208 {0x1F8AE, 0x1F8FF},
209 {0x1F90C, 0x1F90C},
210 {0x1F90D, 0x1F90F},
211 {0x1F910, 0x1F918},
212 {0x1F919, 0x1F91E},
213 {0x1F91F, 0x1F91F},
214 {0x1F920, 0x1F927},
215 {0x1F928, 0x1F92F},
216 {0x1F930, 0x1F930},
217 {0x1F931, 0x1F932},
218 {0x1F933, 0x1F93A},
219 {0x1F93C, 0x1F93E},
220 {0x1F93F, 0x1F93F},
221 {0x1F940, 0x1F945},
222 {0x1F947, 0x1F94B},
223 {0x1F94C, 0x1F94C},
224 {0x1F94D, 0x1F94F},
225 {0x1F950, 0x1F95E},
226 {0x1F95F, 0x1F96B},
227 {0x1F96C, 0x1F970},
228 {0x1F971, 0x1F971},
229 {0x1F972, 0x1F972},
230 {0x1F973, 0x1F976},
231 {0x1F977, 0x1F979},
232 {0x1F97A, 0x1F97A},
233 {0x1F97B, 0x1F97B},
234 {0x1F97C, 0x1F97F},
235 {0x1F980, 0x1F984},
236 {0x1F985, 0x1F991},
237 {0x1F992, 0x1F997},
238 {0x1F998, 0x1F9A2},
239 {0x1F9A3, 0x1F9A4},
240 {0x1F9A5, 0x1F9AA},
241 {0x1F9AB, 0x1F9AD},
242 {0x1F9AE, 0x1F9AF},
243 {0x1F9B0, 0x1F9B9},
244 {0x1F9BA, 0x1F9BF},
245 {0x1F9C0, 0x1F9C0},
246 {0x1F9C1, 0x1F9C2},
247 {0x1F9C3, 0x1F9CA},
248 {0x1F9CB, 0x1F9CC},
249 {0x1F9CD, 0x1F9CF},
250 {0x1F9D0, 0x1F9E6},
251 {0x1F9E7, 0x1F9FF},
252 {0x1FA00, 0x1FA53},
253 {0x1FA54, 0x1FA5F},
254 {0x1FA60, 0x1FA6D},
255 {0x1FA6E, 0x1FA6F},
256 {0x1FA70, 0x1FA73},
257 {0x1FA74, 0x1FA77},
258 {0x1FA78, 0x1FA7A},
259 {0x1FA7B, 0x1FA7F},
260 {0x1FA80, 0x1FA82},
261 {0x1FA83, 0x1FA8F},
262 {0x1FA90, 0x1FA95},
263 {0x1FA96, 0x1FFFD},
264};
diff --git a/src/static_libs/libunibreak/emojidef.c b/src/static_libs/libunibreak/emojidef.c
new file mode 100644
index 0000000000..43a2ed3db0
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidef.c
@@ -0,0 +1,61 @@
1/*
2 * Emoji-related routine and data.
3 *
4 * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
5 *
6 * This software is provided 'as-is', without any express or implied
7 * warranty. In no event will the author be held liable for any damages
8 * arising from the use of this software.
9 *
10 * Permission is granted to anyone to use this software for any purpose,
11 * including commercial applications, and to alter it and redistribute
12 * it freely, subject to the following restrictions:
13 *
14 * 1. The origin of this software must not be misrepresented; you must
15 * not claim that you wrote the original software. If you use this
16 * software in a product, an acknowledgement in the product
17 * documentation would be appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must
19 * not be misrepresented as being the original software.
20 * 3. This notice may not be removed or altered from any source
21 * distribution.
22 */
23
24/**
25 * @file emojidef.c
26 *
27 * Emoji-related routine and data that are used internally.
28 *
29 * @author Andreas Röver
30 */
31
32#include "emojidef.h"
33#include "emojidata.c"
34
35/**
36 * Finds out if a codepoint is extended pictographic.
37 *
38 * @param[in] ch character to check
39 * @return \c true if the codepoint is extended pictographic;
40 * \c false otherwise
41 */
42bool ub_is_extended_pictographic(utf32_t ch)
43{
44 int min = 0;
45 int max = ARRAY_LEN(ep_prop) - 1;
46 int mid;
47
48 do
49 {
50 mid = (min + max) / 2;
51
52 if (ch < ep_prop[mid].start)
53 max = mid - 1;
54 else if (ch > ep_prop[mid].end)
55 min = mid + 1;
56 else
57 return true;
58 } while (min <= max);
59
60 return false;
61}
diff --git a/src/static_libs/libunibreak/emojidef.h b/src/static_libs/libunibreak/emojidef.h
new file mode 100644
index 0000000000..b9055fd261
--- /dev/null
+++ b/src/static_libs/libunibreak/emojidef.h
@@ -0,0 +1,46 @@
1/*
2 * Emoji-related routine and data.
3 *
4 * Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
5 *
6 * This software is provided 'as-is', without any express or implied
7 * warranty. In no event will the author be held liable for any damages
8 * arising from the use of this software.
9 *
10 * Permission is granted to anyone to use this software for any purpose,
11 * including commercial applications, and to alter it and redistribute
12 * it freely, subject to the following restrictions:
13 *
14 * 1. The origin of this software must not be misrepresented; you must
15 * not claim that you wrote the original software. If you use this
16 * software in a product, an acknowledgement in the product
17 * documentation would be appreciated but is not required.
18 * 2. Altered source versions must be plainly marked as such, and must
19 * not be misrepresented as being the original software.
20 * 3. This notice may not be removed or altered from any source
21 * distribution.
22 */
23
24/**
25 * @file emojidef.h
26 *
27 * Definitions of internal data structure and function for extended
28 * pictographs.
29 *
30 * @author Andreas Röver
31 */
32
33#include "unibreakdef.h"
34
35/**
36 * Struct for entries of extended pictographic properties. The array of
37 * the entries \e must be sorted. All codepoints within this list have
38 * the property of being extended pictographic.
39 */
40struct ExtendedPictograpic
41{
42 utf32_t start; /**< Start codepoint */
43 utf32_t end; /**< End codepoint, inclusive */
44};
45
46bool ub_is_extended_pictographic(utf32_t ch);
diff --git a/src/static_libs/libunibreak/graphemebreak.c b/src/static_libs/libunibreak/graphemebreak.c
index 77c3d5f55c..401522f12d 100644
--- a/src/static_libs/libunibreak/graphemebreak.c
+++ b/src/static_libs/libunibreak/graphemebreak.c
@@ -2,7 +2,7 @@
2 * Grapheme breaking in a Unicode sequence. Designed to be used in a 2 * Grapheme breaking in a Unicode sequence. Designed to be used in a
3 * generic text renderer. 3 * generic text renderer.
4 * 4 *
5 * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> 5 * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
6 * 6 *
7 * This software is provided 'as-is', without any express or implied 7 * This software is provided 'as-is', without any express or implied
8 * warranty. In no event will the author be held liable for any damages 8 * warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
28 * Unicode 9.0.0: 28 * Unicode 9.0.0:
29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
30 * 30 *
31 * This library has been updated according to Revision 35, for
32 * Unicode 12.0.0:
33 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
34 *
31 * The Unicode Terms of Use are available at 35 * The Unicode Terms of Use are available at
32 * <URL:http://www.unicode.org/copyright.html> 36 * <URL:http://www.unicode.org/copyright.html>
33 */ 37 */
@@ -38,23 +42,14 @@
38 * Implementation of the grapheme breaking algorithm as described in Unicode 42 * Implementation of the grapheme breaking algorithm as described in Unicode
39 * Standard Annex 29. 43 * Standard Annex 29.
40 * 44 *
41 * @author Andreas Roever 45 * @author Andreas Röver
42 */ 46 */
43 47
44#if defined(_MSC_VER) && _MSC_VER < 1800
45typedef int bool;
46#define false 0
47#define true 1
48#else
49#include <stdbool.h>
50#endif
51
52#include <string.h> 48#include <string.h>
53#include "graphemebreak.h" 49#include "graphemebreak.h"
54#include "graphemebreakdata.c" 50#include "graphemebreakdata.c"
55#include "unibreakdef.h" 51#include "unibreakdef.h"
56 52#include "emojidef.h"
57#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
58 53
59/** 54/**
60 * Initializes the wordbreak internals. It currently does nothing, but 55 * Initializes the wordbreak internals. It currently does nothing, but
@@ -67,8 +62,8 @@ void init_graphemebreak(void)
67/** 62/**
68 * Gets the grapheme breaking class of a character. 63 * Gets the grapheme breaking class of a character.
69 * 64 *
70 * @param ch character to check 65 * @param[in] ch character to check
71 * @return the grapheme breaking class if found; \c GBP_Other otherwise 66 * @return the grapheme breaking class if found; \c GBP_Other otherwise
72 */ 67 */
73static enum GraphemeBreakClass get_char_gb_class(utf32_t ch) 68static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
74{ 69{
@@ -93,6 +88,7 @@ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
93 88
94/** 89/**
95 * Sets the grapheme breaking information for a generic input string. 90 * Sets the grapheme breaking information for a generic input string.
91 * It uses the extended grapheme cluster ruleset.
96 * 92 *
97 * @param[in] s input string 93 * @param[in] s input string
98 * @param[in] len length of the input 94 * @param[in] len length of the input
@@ -104,7 +100,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
104 get_next_char_t get_next_char) 100 get_next_char_t get_next_char)
105{ 101{
106 size_t posNext = 0; 102 size_t posNext = 0;
107 bool rule10Left = false; // is the left side of rule 10 fulfilled? 103 int rule11Detector = 0;
108 bool evenRegionalIndicators = true; // is the number of preceeding 104 bool evenRegionalIndicators = true; // is the number of preceeding
109 // GBP_RegionalIndicator characters 105 // GBP_RegionalIndicator characters
110 // even 106 // even
@@ -117,6 +113,47 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
117 113
118 while (true) 114 while (true)
119 { 115 {
116
117 // this state-machine recognizes the following pattern:
118 // extended_pictograph Extended* ZWJ
119 // when that pattern has been detected rule11Detector will be
120 // 3 and rule 11 can be applied below
121 switch (current_class)
122 {
123 case GBP_ZWJ:
124 if (rule11Detector == 1 || rule11Detector == 2)
125 {
126 rule11Detector = 3;
127 }
128 else
129 {
130 rule11Detector = 0;
131 }
132 break;
133
134 case GBP_Extend:
135 if (rule11Detector == 1 || rule11Detector == 2)
136 {
137 rule11Detector = 2;
138 }
139 else
140 {
141 rule11Detector = 0;
142 }
143 break;
144
145 default:
146 if (ub_is_extended_pictographic(ch))
147 {
148 rule11Detector = 1;
149 }
150 else
151 {
152 rule11Detector = 0;
153 }
154 break;
155 }
156
120 enum GraphemeBreakClass prev_class = current_class; 157 enum GraphemeBreakClass prev_class = current_class;
121 158
122 // safe position if current character so that we can store the 159 // safe position if current character so that we can store the
@@ -137,16 +174,6 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
137 // get class of current character 174 // get class of current character
138 current_class = get_char_gb_class(ch); 175 current_class = get_char_gb_class(ch);
139 176
140 // update some helper variables
141 if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ))
142 {
143 rule10Left = true;
144 }
145 else if (prev_class != GBP_Extend)
146 {
147 rule10Left = false;
148 }
149
150 if (prev_class == GBP_Regional_Indicator) 177 if (prev_class == GBP_Regional_Indicator)
151 { 178 {
152 evenRegionalIndicators = !evenRegionalIndicators; 179 evenRegionalIndicators = !evenRegionalIndicators;
@@ -185,7 +212,8 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
185 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8 212 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8
186 } 213 }
187 else if ((current_class == GBP_Extend) || 214 else if ((current_class == GBP_Extend) ||
188 (current_class == GBP_ZWJ)) 215 (current_class == GBP_ZWJ) ||
216 (current_class == GBP_Virama))
189 { 217 {
190 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9 218 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9
191 } 219 }
@@ -197,13 +225,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
197 { 225 {
198 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b 226 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b
199 } 227 }
200 else if (rule10Left && (current_class == GBP_E_Modifier)) 228 else if ((rule11Detector == 3) && ub_is_extended_pictographic(ch))
201 {
202 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10
203 }
204 else if ((prev_class == GBP_ZWJ) &&
205 ((current_class == GBP_Glue_After_Zwj) ||
206 (current_class == GBP_E_Base_GAZ)))
207 { 229 {
208 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11 230 brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11
209 } 231 }
diff --git a/src/static_libs/libunibreak/graphemebreak.h b/src/static_libs/libunibreak/graphemebreak.h
index c01768233a..e5259b5ccd 100644
--- a/src/static_libs/libunibreak/graphemebreak.h
+++ b/src/static_libs/libunibreak/graphemebreak.h
@@ -2,7 +2,7 @@
2 * Grapheme breaking in a Unicode sequence. Designed to be used in a 2 * Grapheme breaking in a Unicode sequence. Designed to be used in a
3 * generic text renderer. 3 * generic text renderer.
4 * 4 *
5 * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> 5 * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
6 * 6 *
7 * This software is provided 'as-is', without any express or implied 7 * This software is provided 'as-is', without any express or implied
8 * warranty. In no event will the author be held liable for any damages 8 * warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
28 * Unicode 9.0.0: 28 * Unicode 9.0.0:
29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
30 * 30 *
31 * This library has been updated according to Revision 35, for
32 * Unicode 12.0.0:
33 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
34 *
31 * The Unicode Terms of Use are available at 35 * The Unicode Terms of Use are available at
32 * <URL:http://www.unicode.org/copyright.html> 36 * <URL:http://www.unicode.org/copyright.html>
33 */ 37 */
@@ -66,4 +70,4 @@ void set_graphemebreaks_utf32(const utf32_t *s, size_t len,
66} 70}
67#endif 71#endif
68 72
69#endif 73#endif /* GRAPHEMEBREAK_H */
diff --git a/src/static_libs/libunibreak/graphemebreakdata.c b/src/static_libs/libunibreak/graphemebreakdata.c
index cab9bebd80..bc1af932cf 100644
--- a/src/static_libs/libunibreak/graphemebreakdata.c
+++ b/src/static_libs/libunibreak/graphemebreakdata.c
@@ -1,6 +1,6 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# GraphemeBreakProperty-9.0.0.txt 2# GraphemeBreakProperty-12.1.0.txt
3# Date: 2016-06-03, 22:23:55 GMT 3# Date: 2019-03-10, 10:53:12 GMT
4*/ 4*/
5 5
6#include "graphemebreakdef.h" 6#include "graphemebreakdef.h"
@@ -36,12 +36,13 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
36 {0x0730, 0x074A, GBP_Extend}, 36 {0x0730, 0x074A, GBP_Extend},
37 {0x07A6, 0x07B0, GBP_Extend}, 37 {0x07A6, 0x07B0, GBP_Extend},
38 {0x07EB, 0x07F3, GBP_Extend}, 38 {0x07EB, 0x07F3, GBP_Extend},
39 {0x07FD, 0x07FD, GBP_Extend},
39 {0x0816, 0x0819, GBP_Extend}, 40 {0x0816, 0x0819, GBP_Extend},
40 {0x081B, 0x0823, GBP_Extend}, 41 {0x081B, 0x0823, GBP_Extend},
41 {0x0825, 0x0827, GBP_Extend}, 42 {0x0825, 0x0827, GBP_Extend},
42 {0x0829, 0x082D, GBP_Extend}, 43 {0x0829, 0x082D, GBP_Extend},
43 {0x0859, 0x085B, GBP_Extend}, 44 {0x0859, 0x085B, GBP_Extend},
44 {0x08D4, 0x08E1, GBP_Extend}, 45 {0x08D3, 0x08E1, GBP_Extend},
45 {0x08E2, 0x08E2, GBP_Prepend}, 46 {0x08E2, 0x08E2, GBP_Prepend},
46 {0x08E3, 0x0902, GBP_Extend}, 47 {0x08E3, 0x0902, GBP_Extend},
47 {0x0903, 0x0903, GBP_SpacingMark}, 48 {0x0903, 0x0903, GBP_SpacingMark},
@@ -66,6 +67,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
66 {0x09CD, 0x09CD, GBP_Extend}, 67 {0x09CD, 0x09CD, GBP_Extend},
67 {0x09D7, 0x09D7, GBP_Extend}, 68 {0x09D7, 0x09D7, GBP_Extend},
68 {0x09E2, 0x09E3, GBP_Extend}, 69 {0x09E2, 0x09E3, GBP_Extend},
70 {0x09FE, 0x09FE, GBP_Extend},
69 {0x0A01, 0x0A02, GBP_Extend}, 71 {0x0A01, 0x0A02, GBP_Extend},
70 {0x0A03, 0x0A03, GBP_SpacingMark}, 72 {0x0A03, 0x0A03, GBP_SpacingMark},
71 {0x0A3C, 0x0A3C, GBP_Extend}, 73 {0x0A3C, 0x0A3C, GBP_Extend},
@@ -86,6 +88,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
86 {0x0ACB, 0x0ACC, GBP_SpacingMark}, 88 {0x0ACB, 0x0ACC, GBP_SpacingMark},
87 {0x0ACD, 0x0ACD, GBP_Extend}, 89 {0x0ACD, 0x0ACD, GBP_Extend},
88 {0x0AE2, 0x0AE3, GBP_Extend}, 90 {0x0AE2, 0x0AE3, GBP_Extend},
91 {0x0AFA, 0x0AFF, GBP_Extend},
89 {0x0B01, 0x0B01, GBP_Extend}, 92 {0x0B01, 0x0B01, GBP_Extend},
90 {0x0B02, 0x0B03, GBP_SpacingMark}, 93 {0x0B02, 0x0B03, GBP_SpacingMark},
91 {0x0B3C, 0x0B3C, GBP_Extend}, 94 {0x0B3C, 0x0B3C, GBP_Extend},
@@ -110,6 +113,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
110 {0x0BD7, 0x0BD7, GBP_Extend}, 113 {0x0BD7, 0x0BD7, GBP_Extend},
111 {0x0C00, 0x0C00, GBP_Extend}, 114 {0x0C00, 0x0C00, GBP_Extend},
112 {0x0C01, 0x0C03, GBP_SpacingMark}, 115 {0x0C01, 0x0C03, GBP_SpacingMark},
116 {0x0C04, 0x0C04, GBP_Extend},
113 {0x0C3E, 0x0C40, GBP_Extend}, 117 {0x0C3E, 0x0C40, GBP_Extend},
114 {0x0C41, 0x0C44, GBP_SpacingMark}, 118 {0x0C41, 0x0C44, GBP_SpacingMark},
115 {0x0C46, 0x0C48, GBP_Extend}, 119 {0x0C46, 0x0C48, GBP_Extend},
@@ -130,8 +134,9 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
130 {0x0CCC, 0x0CCD, GBP_Extend}, 134 {0x0CCC, 0x0CCD, GBP_Extend},
131 {0x0CD5, 0x0CD6, GBP_Extend}, 135 {0x0CD5, 0x0CD6, GBP_Extend},
132 {0x0CE2, 0x0CE3, GBP_Extend}, 136 {0x0CE2, 0x0CE3, GBP_Extend},
133 {0x0D01, 0x0D01, GBP_Extend}, 137 {0x0D00, 0x0D01, GBP_Extend},
134 {0x0D02, 0x0D03, GBP_SpacingMark}, 138 {0x0D02, 0x0D03, GBP_SpacingMark},
139 {0x0D3B, 0x0D3C, GBP_Extend},
135 {0x0D3E, 0x0D3E, GBP_Extend}, 140 {0x0D3E, 0x0D3E, GBP_Extend},
136 {0x0D3F, 0x0D40, GBP_SpacingMark}, 141 {0x0D3F, 0x0D40, GBP_SpacingMark},
137 {0x0D41, 0x0D44, GBP_Extend}, 142 {0x0D41, 0x0D44, GBP_Extend},
@@ -156,8 +161,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
156 {0x0E47, 0x0E4E, GBP_Extend}, 161 {0x0E47, 0x0E4E, GBP_Extend},
157 {0x0EB1, 0x0EB1, GBP_Extend}, 162 {0x0EB1, 0x0EB1, GBP_Extend},
158 {0x0EB3, 0x0EB3, GBP_SpacingMark}, 163 {0x0EB3, 0x0EB3, GBP_SpacingMark},
159 {0x0EB4, 0x0EB9, GBP_Extend}, 164 {0x0EB4, 0x0EBC, GBP_Extend},
160 {0x0EBB, 0x0EBC, GBP_Extend},
161 {0x0EC8, 0x0ECD, GBP_Extend}, 165 {0x0EC8, 0x0ECD, GBP_Extend},
162 {0x0F18, 0x0F19, GBP_Extend}, 166 {0x0F18, 0x0F19, GBP_Extend},
163 {0x0F35, 0x0F35, GBP_Extend}, 167 {0x0F35, 0x0F35, GBP_Extend},
@@ -232,7 +236,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
232 {0x1B00, 0x1B03, GBP_Extend}, 236 {0x1B00, 0x1B03, GBP_Extend},
233 {0x1B04, 0x1B04, GBP_SpacingMark}, 237 {0x1B04, 0x1B04, GBP_SpacingMark},
234 {0x1B34, 0x1B34, GBP_Extend}, 238 {0x1B34, 0x1B34, GBP_Extend},
235 {0x1B35, 0x1B35, GBP_SpacingMark}, 239 {0x1B35, 0x1B35, GBP_Extend},
236 {0x1B36, 0x1B3A, GBP_Extend}, 240 {0x1B36, 0x1B3A, GBP_Extend},
237 {0x1B3B, 0x1B3B, GBP_SpacingMark}, 241 {0x1B3B, 0x1B3B, GBP_SpacingMark},
238 {0x1B3C, 0x1B3C, GBP_Extend}, 242 {0x1B3C, 0x1B3C, GBP_Extend},
@@ -265,10 +269,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
265 {0x1CE1, 0x1CE1, GBP_SpacingMark}, 269 {0x1CE1, 0x1CE1, GBP_SpacingMark},
266 {0x1CE2, 0x1CE8, GBP_Extend}, 270 {0x1CE2, 0x1CE8, GBP_Extend},
267 {0x1CED, 0x1CED, GBP_Extend}, 271 {0x1CED, 0x1CED, GBP_Extend},
268 {0x1CF2, 0x1CF3, GBP_SpacingMark},
269 {0x1CF4, 0x1CF4, GBP_Extend}, 272 {0x1CF4, 0x1CF4, GBP_Extend},
273 {0x1CF7, 0x1CF7, GBP_SpacingMark},
270 {0x1CF8, 0x1CF9, GBP_Extend}, 274 {0x1CF8, 0x1CF9, GBP_Extend},
271 {0x1DC0, 0x1DF5, GBP_Extend}, 275 {0x1DC0, 0x1DF9, GBP_Extend},
272 {0x1DFB, 0x1DFF, GBP_Extend}, 276 {0x1DFB, 0x1DFF, GBP_Extend},
273 {0x200B, 0x200B, GBP_Control}, 277 {0x200B, 0x200B, GBP_Control},
274 {0x200C, 0x200C, GBP_Extend}, 278 {0x200C, 0x200C, GBP_Extend},
@@ -285,10 +289,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
285 {0x20E1, 0x20E1, GBP_Extend}, 289 {0x20E1, 0x20E1, GBP_Extend},
286 {0x20E2, 0x20E4, GBP_Extend}, 290 {0x20E2, 0x20E4, GBP_Extend},
287 {0x20E5, 0x20F0, GBP_Extend}, 291 {0x20E5, 0x20F0, GBP_Extend},
288 {0x261D, 0x261D, GBP_E_Base},
289 {0x26F9, 0x26F9, GBP_E_Base},
290 {0x270A, 0x270D, GBP_E_Base},
291 {0x2764, 0x2764, GBP_Glue_After_Zwj},
292 {0x2CEF, 0x2CF1, GBP_Extend}, 292 {0x2CEF, 0x2CF1, GBP_Extend},
293 {0x2D7F, 0x2D7F, GBP_Extend}, 293 {0x2D7F, 0x2D7F, GBP_Extend},
294 {0x2DE0, 0x2DFF, GBP_Extend}, 294 {0x2DE0, 0x2DFF, GBP_Extend},
@@ -310,6 +310,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
310 {0xA8B4, 0xA8C3, GBP_SpacingMark}, 310 {0xA8B4, 0xA8C3, GBP_SpacingMark},
311 {0xA8C4, 0xA8C5, GBP_Extend}, 311 {0xA8C4, 0xA8C5, GBP_Extend},
312 {0xA8E0, 0xA8F1, GBP_Extend}, 312 {0xA8E0, 0xA8F1, GBP_Extend},
313 {0xA8FF, 0xA8FF, GBP_Extend},
313 {0xA926, 0xA92D, GBP_Extend}, 314 {0xA926, 0xA92D, GBP_Extend},
314 {0xA947, 0xA951, GBP_Extend}, 315 {0xA947, 0xA951, GBP_Extend},
315 {0xA952, 0xA953, GBP_SpacingMark}, 316 {0xA952, 0xA953, GBP_SpacingMark},
@@ -320,8 +321,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
320 {0xA9B4, 0xA9B5, GBP_SpacingMark}, 321 {0xA9B4, 0xA9B5, GBP_SpacingMark},
321 {0xA9B6, 0xA9B9, GBP_Extend}, 322 {0xA9B6, 0xA9B9, GBP_Extend},
322 {0xA9BA, 0xA9BB, GBP_SpacingMark}, 323 {0xA9BA, 0xA9BB, GBP_SpacingMark},
323 {0xA9BC, 0xA9BC, GBP_Extend}, 324 {0xA9BC, 0xA9BD, GBP_Extend},
324 {0xA9BD, 0xA9C0, GBP_SpacingMark}, 325 {0xA9BE, 0xA9C0, GBP_SpacingMark},
325 {0xA9E5, 0xA9E5, GBP_Extend}, 326 {0xA9E5, 0xA9E5, GBP_Extend},
326 {0xAA29, 0xAA2E, GBP_Extend}, 327 {0xAA29, 0xAA2E, GBP_Extend},
327 {0xAA2F, 0xAA30, GBP_SpacingMark}, 328 {0xAA2F, 0xAA30, GBP_SpacingMark},
@@ -1149,7 +1150,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1149 {0xD789, 0xD7A3, GBP_LVT}, 1150 {0xD789, 0xD7A3, GBP_LVT},
1150 {0xD7B0, 0xD7C6, GBP_V}, 1151 {0xD7B0, 0xD7C6, GBP_V},
1151 {0xD7CB, 0xD7FB, GBP_T}, 1152 {0xD7CB, 0xD7FB, GBP_T},
1152 {0xD800, 0xDFFF, GBP_Control},
1153 {0xFB1E, 0xFB1E, GBP_Extend}, 1153 {0xFB1E, 0xFB1E, GBP_Extend},
1154 {0xFE00, 0xFE0F, GBP_Extend}, 1154 {0xFE00, 0xFE0F, GBP_Extend},
1155 {0xFE20, 0xFE2F, GBP_Extend}, 1155 {0xFE20, 0xFE2F, GBP_Extend},
@@ -1166,6 +1166,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1166 {0x10A38, 0x10A3A, GBP_Extend}, 1166 {0x10A38, 0x10A3A, GBP_Extend},
1167 {0x10A3F, 0x10A3F, GBP_Extend}, 1167 {0x10A3F, 0x10A3F, GBP_Extend},
1168 {0x10AE5, 0x10AE6, GBP_Extend}, 1168 {0x10AE5, 0x10AE6, GBP_Extend},
1169 {0x10D24, 0x10D27, GBP_Extend},
1170 {0x10F46, 0x10F50, GBP_Extend},
1169 {0x11000, 0x11000, GBP_SpacingMark}, 1171 {0x11000, 0x11000, GBP_SpacingMark},
1170 {0x11001, 0x11001, GBP_Extend}, 1172 {0x11001, 0x11001, GBP_Extend},
1171 {0x11002, 0x11002, GBP_SpacingMark}, 1173 {0x11002, 0x11002, GBP_SpacingMark},
@@ -1177,10 +1179,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1177 {0x110B7, 0x110B8, GBP_SpacingMark}, 1179 {0x110B7, 0x110B8, GBP_SpacingMark},
1178 {0x110B9, 0x110BA, GBP_Extend}, 1180 {0x110B9, 0x110BA, GBP_Extend},
1179 {0x110BD, 0x110BD, GBP_Prepend}, 1181 {0x110BD, 0x110BD, GBP_Prepend},
1182 {0x110CD, 0x110CD, GBP_Prepend},
1180 {0x11100, 0x11102, GBP_Extend}, 1183 {0x11100, 0x11102, GBP_Extend},
1181 {0x11127, 0x1112B, GBP_Extend}, 1184 {0x11127, 0x1112B, GBP_Extend},
1182 {0x1112C, 0x1112C, GBP_SpacingMark}, 1185 {0x1112C, 0x1112C, GBP_SpacingMark},
1183 {0x1112D, 0x11134, GBP_Extend}, 1186 {0x1112D, 0x11134, GBP_Extend},
1187 {0x11145, 0x11146, GBP_SpacingMark},
1184 {0x11173, 0x11173, GBP_Extend}, 1188 {0x11173, 0x11173, GBP_Extend},
1185 {0x11180, 0x11181, GBP_Extend}, 1189 {0x11180, 0x11181, GBP_Extend},
1186 {0x11182, 0x11182, GBP_SpacingMark}, 1190 {0x11182, 0x11182, GBP_SpacingMark},
@@ -1188,7 +1192,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1188 {0x111B6, 0x111BE, GBP_Extend}, 1192 {0x111B6, 0x111BE, GBP_Extend},
1189 {0x111BF, 0x111C0, GBP_SpacingMark}, 1193 {0x111BF, 0x111C0, GBP_SpacingMark},
1190 {0x111C2, 0x111C3, GBP_Prepend}, 1194 {0x111C2, 0x111C3, GBP_Prepend},
1191 {0x111CA, 0x111CC, GBP_Extend}, 1195 {0x111C9, 0x111CC, GBP_Extend},
1192 {0x1122C, 0x1122E, GBP_SpacingMark}, 1196 {0x1122C, 0x1122E, GBP_SpacingMark},
1193 {0x1122F, 0x11231, GBP_Extend}, 1197 {0x1122F, 0x11231, GBP_Extend},
1194 {0x11232, 0x11233, GBP_SpacingMark}, 1198 {0x11232, 0x11233, GBP_SpacingMark},
@@ -1201,7 +1205,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1201 {0x112E3, 0x112EA, GBP_Extend}, 1205 {0x112E3, 0x112EA, GBP_Extend},
1202 {0x11300, 0x11301, GBP_Extend}, 1206 {0x11300, 0x11301, GBP_Extend},
1203 {0x11302, 0x11303, GBP_SpacingMark}, 1207 {0x11302, 0x11303, GBP_SpacingMark},
1204 {0x1133C, 0x1133C, GBP_Extend}, 1208 {0x1133B, 0x1133C, GBP_Extend},
1205 {0x1133E, 0x1133E, GBP_Extend}, 1209 {0x1133E, 0x1133E, GBP_Extend},
1206 {0x1133F, 0x1133F, GBP_SpacingMark}, 1210 {0x1133F, 0x1133F, GBP_SpacingMark},
1207 {0x11340, 0x11340, GBP_Extend}, 1211 {0x11340, 0x11340, GBP_Extend},
@@ -1218,6 +1222,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1218 {0x11442, 0x11444, GBP_Extend}, 1222 {0x11442, 0x11444, GBP_Extend},
1219 {0x11445, 0x11445, GBP_SpacingMark}, 1223 {0x11445, 0x11445, GBP_SpacingMark},
1220 {0x11446, 0x11446, GBP_Extend}, 1224 {0x11446, 0x11446, GBP_Extend},
1225 {0x1145E, 0x1145E, GBP_Extend},
1221 {0x114B0, 0x114B0, GBP_Extend}, 1226 {0x114B0, 0x114B0, GBP_Extend},
1222 {0x114B1, 0x114B2, GBP_SpacingMark}, 1227 {0x114B1, 0x114B2, GBP_SpacingMark},
1223 {0x114B3, 0x114B8, GBP_Extend}, 1228 {0x114B3, 0x114B8, GBP_Extend},
@@ -1255,6 +1260,29 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1255 {0x11722, 0x11725, GBP_Extend}, 1260 {0x11722, 0x11725, GBP_Extend},
1256 {0x11726, 0x11726, GBP_SpacingMark}, 1261 {0x11726, 0x11726, GBP_SpacingMark},
1257 {0x11727, 0x1172B, GBP_Extend}, 1262 {0x11727, 0x1172B, GBP_Extend},
1263 {0x1182C, 0x1182E, GBP_SpacingMark},
1264 {0x1182F, 0x11837, GBP_Extend},
1265 {0x11838, 0x11838, GBP_SpacingMark},
1266 {0x11839, 0x1183A, GBP_Extend},
1267 {0x119D1, 0x119D3, GBP_SpacingMark},
1268 {0x119D4, 0x119D7, GBP_Extend},
1269 {0x119DA, 0x119DB, GBP_Extend},
1270 {0x119DC, 0x119DF, GBP_SpacingMark},
1271 {0x119E0, 0x119E0, GBP_Extend},
1272 {0x119E4, 0x119E4, GBP_SpacingMark},
1273 {0x11A01, 0x11A0A, GBP_Extend},
1274 {0x11A33, 0x11A38, GBP_Extend},
1275 {0x11A39, 0x11A39, GBP_SpacingMark},
1276 {0x11A3A, 0x11A3A, GBP_Prepend},
1277 {0x11A3B, 0x11A3E, GBP_Extend},
1278 {0x11A47, 0x11A47, GBP_Extend},
1279 {0x11A51, 0x11A56, GBP_Extend},
1280 {0x11A57, 0x11A58, GBP_SpacingMark},
1281 {0x11A59, 0x11A5B, GBP_Extend},
1282 {0x11A84, 0x11A89, GBP_Prepend},
1283 {0x11A8A, 0x11A96, GBP_Extend},
1284 {0x11A97, 0x11A97, GBP_SpacingMark},
1285 {0x11A98, 0x11A99, GBP_Extend},
1258 {0x11C2F, 0x11C2F, GBP_SpacingMark}, 1286 {0x11C2F, 0x11C2F, GBP_SpacingMark},
1259 {0x11C30, 0x11C36, GBP_Extend}, 1287 {0x11C30, 0x11C36, GBP_Extend},
1260 {0x11C38, 0x11C3D, GBP_Extend}, 1288 {0x11C38, 0x11C3D, GBP_Extend},
@@ -1267,9 +1295,25 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1267 {0x11CB2, 0x11CB3, GBP_Extend}, 1295 {0x11CB2, 0x11CB3, GBP_Extend},
1268 {0x11CB4, 0x11CB4, GBP_SpacingMark}, 1296 {0x11CB4, 0x11CB4, GBP_SpacingMark},
1269 {0x11CB5, 0x11CB6, GBP_Extend}, 1297 {0x11CB5, 0x11CB6, GBP_Extend},
1298 {0x11D31, 0x11D36, GBP_Extend},
1299 {0x11D3A, 0x11D3A, GBP_Extend},
1300 {0x11D3C, 0x11D3D, GBP_Extend},
1301 {0x11D3F, 0x11D45, GBP_Extend},
1302 {0x11D46, 0x11D46, GBP_Prepend},
1303 {0x11D47, 0x11D47, GBP_Extend},
1304 {0x11D8A, 0x11D8E, GBP_SpacingMark},
1305 {0x11D90, 0x11D91, GBP_Extend},
1306 {0x11D93, 0x11D94, GBP_SpacingMark},
1307 {0x11D95, 0x11D95, GBP_Extend},
1308 {0x11D96, 0x11D96, GBP_SpacingMark},
1309 {0x11D97, 0x11D97, GBP_Extend},
1310 {0x11EF3, 0x11EF4, GBP_Extend},
1311 {0x11EF5, 0x11EF6, GBP_SpacingMark},
1312 {0x13430, 0x13438, GBP_Control},
1270 {0x16AF0, 0x16AF4, GBP_Extend}, 1313 {0x16AF0, 0x16AF4, GBP_Extend},
1271 {0x16B30, 0x16B36, GBP_Extend}, 1314 {0x16B30, 0x16B36, GBP_Extend},
1272 {0x16F51, 0x16F7E, GBP_SpacingMark}, 1315 {0x16F4F, 0x16F4F, GBP_Extend},
1316 {0x16F51, 0x16F87, GBP_SpacingMark},
1273 {0x16F8F, 0x16F92, GBP_Extend}, 1317 {0x16F8F, 0x16F92, GBP_Extend},
1274 {0x1BC9D, 0x1BC9E, GBP_Extend}, 1318 {0x1BC9D, 0x1BC9E, GBP_Extend},
1275 {0x1BCA0, 0x1BCA3, GBP_Control}, 1319 {0x1BCA0, 0x1BCA3, GBP_Control},
@@ -1294,38 +1338,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
1294 {0x1E01B, 0x1E021, GBP_Extend}, 1338 {0x1E01B, 0x1E021, GBP_Extend},
1295 {0x1E023, 0x1E024, GBP_Extend}, 1339 {0x1E023, 0x1E024, GBP_Extend},
1296 {0x1E026, 0x1E02A, GBP_Extend}, 1340 {0x1E026, 0x1E02A, GBP_Extend},
1341 {0x1E130, 0x1E136, GBP_Extend},
1342 {0x1E2EC, 0x1E2EF, GBP_Extend},
1297 {0x1E8D0, 0x1E8D6, GBP_Extend}, 1343 {0x1E8D0, 0x1E8D6, GBP_Extend},
1298 {0x1E944, 0x1E94A, GBP_Extend}, 1344 {0x1E944, 0x1E94A, GBP_Extend},
1299 {0x1F1E6, 0x1F1FF, GBP_Regional_Indicator}, 1345 {0x1F1E6, 0x1F1FF, GBP_Regional_Indicator},
1300 {0x1F385, 0x1F385, GBP_E_Base}, 1346 {0x1F3FB, 0x1F3FF, GBP_Extend},
1301 {0x1F3C3, 0x1F3C4, GBP_E_Base},
1302 {0x1F3CA, 0x1F3CB, GBP_E_Base},
1303 {0x1F3FB, 0x1F3FF, GBP_E_Modifier},
1304 {0x1F442, 0x1F443, GBP_E_Base},
1305 {0x1F446, 0x1F450, GBP_E_Base},
1306 {0x1F466, 0x1F469, GBP_E_Base_GAZ},
1307 {0x1F46E, 0x1F46E, GBP_E_Base},
1308 {0x1F470, 0x1F478, GBP_E_Base},
1309 {0x1F47C, 0x1F47C, GBP_E_Base},
1310 {0x1F481, 0x1F483, GBP_E_Base},
1311 {0x1F485, 0x1F487, GBP_E_Base},
1312 {0x1F48B, 0x1F48B, GBP_Glue_After_Zwj},
1313 {0x1F4AA, 0x1F4AA, GBP_E_Base},
1314 {0x1F575, 0x1F575, GBP_E_Base},
1315 {0x1F57A, 0x1F57A, GBP_E_Base},
1316 {0x1F590, 0x1F590, GBP_E_Base},
1317 {0x1F595, 0x1F596, GBP_E_Base},
1318 {0x1F5E8, 0x1F5E8, GBP_Glue_After_Zwj},
1319 {0x1F645, 0x1F647, GBP_E_Base},
1320 {0x1F64B, 0x1F64F, GBP_E_Base},
1321 {0x1F6A3, 0x1F6A3, GBP_E_Base},
1322 {0x1F6B4, 0x1F6B6, GBP_E_Base},
1323 {0x1F6C0, 0x1F6C0, GBP_E_Base},
1324 {0x1F918, 0x1F91E, GBP_E_Base},
1325 {0x1F926, 0x1F926, GBP_E_Base},
1326 {0x1F930, 0x1F930, GBP_E_Base},
1327 {0x1F933, 0x1F939, GBP_E_Base},
1328 {0x1F93C, 0x1F93E, GBP_E_Base},
1329 {0xE0000, 0xE0000, GBP_Control}, 1347 {0xE0000, 0xE0000, GBP_Control},
1330 {0xE0001, 0xE0001, GBP_Control}, 1348 {0xE0001, 0xE0001, GBP_Control},
1331 {0xE0002, 0xE001F, GBP_Control}, 1349 {0xE0002, 0xE001F, GBP_Control},
diff --git a/src/static_libs/libunibreak/graphemebreakdef.h b/src/static_libs/libunibreak/graphemebreakdef.h
index 0de1f3d623..90ccfbd5f1 100644
--- a/src/static_libs/libunibreak/graphemebreakdef.h
+++ b/src/static_libs/libunibreak/graphemebreakdef.h
@@ -2,7 +2,7 @@
2 * Grapheme breaking in a Unicode sequence. Designed to be used in a 2 * Grapheme breaking in a Unicode sequence. Designed to be used in a
3 * generic text renderer. 3 * generic text renderer.
4 * 4 *
5 * Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net> 5 * Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
6 * 6 *
7 * This software is provided 'as-is', without any express or implied 7 * This software is provided 'as-is', without any express or implied
8 * warranty. In no event will the author be held liable for any damages 8 * warranty. In no event will the author be held liable for any damages
@@ -28,6 +28,10 @@
28 * Unicode 9.0.0: 28 * Unicode 9.0.0:
29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 29 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
30 * 30 *
31 * This library has been updated according to Revision 35, for
32 * Unicode 12.0.0:
33 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
34 *
31 * The Unicode Terms of Use are available at 35 * The Unicode Terms of Use are available at
32 * <URL:http://www.unicode.org/copyright.html> 36 * <URL:http://www.unicode.org/copyright.html>
33 */ 37 */
@@ -45,13 +49,15 @@
45 49
46/** 50/**
47 * Word break classes. This is a direct mapping of Table 2 of Unicode 51 * Word break classes. This is a direct mapping of Table 2 of Unicode
48 * Standard Annex 29 52 * Standard Annex 29.
49 */ 53 */
50enum GraphemeBreakClass 54enum GraphemeBreakClass
51{ 55{
52 GBP_CR, 56 GBP_CR,
53 GBP_LF, 57 GBP_LF,
54 GBP_Control, 58 GBP_Control,
59 GBP_Virama,
60 GBP_LinkingConsonant,
55 GBP_Extend, 61 GBP_Extend,
56 GBP_ZWJ, 62 GBP_ZWJ,
57 GBP_Regional_Indicator, 63 GBP_Regional_Indicator,
@@ -62,10 +68,6 @@ enum GraphemeBreakClass
62 GBP_T, 68 GBP_T,
63 GBP_LV, 69 GBP_LV,
64 GBP_LVT, 70 GBP_LVT,
65 GBP_E_Base,
66 GBP_E_Modifier,
67 GBP_Glue_After_Zwj,
68 GBP_E_Base_GAZ,
69 GBP_Other, 71 GBP_Other,
70 GBP_Undefined 72 GBP_Undefined
71}; 73};
@@ -76,7 +78,7 @@ enum GraphemeBreakClass
76 */ 78 */
77struct GraphemeBreakProperties 79struct GraphemeBreakProperties
78{ 80{
79 utf32_t start; /**< Starting coding point */ 81 utf32_t start; /**< Start codepoint */
80 utf32_t end; /**< End coding point, including */ 82 utf32_t end; /**< End codepoint, inclusive */
81 enum GraphemeBreakClass prop; /**< The grapheme breaking property */ 83 enum GraphemeBreakClass prop; /**< The grapheme breaking property */
82}; 84};
diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c
index 41f23c1c08..98e2730314 100644
--- a/src/static_libs/libunibreak/linebreak.c
+++ b/src/static_libs/libunibreak/linebreak.c
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> 8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 * 9 *
10 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
31 * Unicode 5.0.0: 31 * Unicode 5.0.0:
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * 33 *
34 * This library has been updated according to Revision 37, for 34 * This library has been updated according to Revision 43, for
35 * Unicode 9.0.0: 35 * Unicode 12.0.0:
36 * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
37 * 37 *
38 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -80,7 +80,9 @@ enum BreakAction
80 80
81/** 81/**
82 * Break action pair table. This is a direct mapping of Table 2 of 82 * Break action pair table. This is a direct mapping of Table 2 of
83 * Unicode Standard Annex 14, Revision 37, except the "CB" part. 83 * Unicode Standard Annex 14, Revision 37, except for ZWJ (manually
84 * adjusted after special processing as per LB8a of Revision 41) and CB
85 * (manually added as per LB20).
84 */ 86 */
85static enum BreakAction baTable[LBP_CB][LBP_CB] = { 87static enum BreakAction baTable[LBP_CB][LBP_CB] = {
86 { /* OP */ 88 { /* OP */
@@ -270,17 +272,17 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
270 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 272 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
271 DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, 273 DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
272 { /* ZWJ */ 274 { /* ZWJ */
273 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 275 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
274 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 276 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
275 IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 277 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
276 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 278 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
277 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, 279 DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
278 { /* CB */ 280 { /* CB */
279 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, 281 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK,
280 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 282 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
281 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 283 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
282 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 284 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
283 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, 285 DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
284}; 286};
285 287
286/** 288/**
@@ -288,8 +290,9 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
288 */ 290 */
289struct LineBreakPropertiesIndex 291struct LineBreakPropertiesIndex
290{ 292{
291 utf32_t end; /**< End coding point */ 293 utf32_t end; /**< End codepoint */
292 const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ 294 const struct LineBreakProperties *lbp; /**< Pointer to line breaking
295 properties */
293}; 296};
294 297
295/** 298/**
@@ -335,7 +338,7 @@ static __inline int ends_with(const char *str, const char *suffix,
335 * Initializes the second-level index to the line breaking properties. 338 * Initializes the second-level index to the line breaking properties.
336 * If it is not called, the performance of #get_char_lb_class_lang (and 339 * If it is not called, the performance of #get_char_lb_class_lang (and
337 * thus the main functionality) can be pretty bad, especially for big 340 * thus the main functionality) can be pretty bad, especially for big
338 * code points like those of Chinese. 341 * codepoints like those of Chinese.
339 */ 342 */
340void init_linebreak(void) 343void init_linebreak(void)
341{ 344{
@@ -612,12 +615,18 @@ static int get_lb_result_lookup(
612 break; 615 break;
613 } 616 }
614 617
618 /* Special processing due to rule LB8a */
619 if (lbpCtx->fLb8aZwj)
620 {
621 brk = LINEBREAK_NOBREAK;
622 }
623
615 /* Special processing due to rule LB21a */ 624 /* Special processing due to rule LB21a */
616 if (lbpCtx->fLb21aHebrew && 625 if (lbpCtx->fLb21aHebrew &&
617 (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA)) 626 (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA))
618 { 627 {
619 brk = LINEBREAK_NOBREAK; 628 brk = LINEBREAK_NOBREAK;
620 lbpCtx->fLb21aHebrew = 0; 629 lbpCtx->fLb21aHebrew = false;
621 } 630 }
622 else 631 else
623 { 632 {
@@ -663,17 +672,21 @@ void lb_init_break_context(
663 lbpCtx->lbcCur = resolve_lb_class( 672 lbpCtx->lbcCur = resolve_lb_class(
664 get_char_lb_class_lang(ch, lbpCtx->lbpLang), 673 get_char_lb_class_lang(ch, lbpCtx->lbpLang),
665 lbpCtx->lang); 674 lbpCtx->lang);
666 lbpCtx->fLb21aHebrew = 0; 675 lbpCtx->fLb8aZwj =
676 (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_ZWJ);
677 lbpCtx->fLb10LeadSpace =
678 (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_SP);
679 lbpCtx->fLb21aHebrew = false;
667 lbpCtx->cLb30aRI = 0; 680 lbpCtx->cLb30aRI = 0;
668 treat_first_char(lbpCtx); 681 treat_first_char(lbpCtx);
669} 682}
670 683
671/** 684/**
672 * Updates LineBreakingContext for the next code point and returns 685 * Updates LineBreakingContext for the next codepoint and returns
673 * the detected break. 686 * the detected break.
674 * 687 *
675 * @param[in,out] lbpCtx pointer to the line breaking context 688 * @param[in,out] lbpCtx pointer to the line breaking context
676 * @param[in] ch Unicode code point 689 * @param[in] ch Unicode codepoint
677 * @return break result, one of #LINEBREAK_MUSTBREAK, 690 * @return break result, one of #LINEBREAK_MUSTBREAK,
678 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK 691 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
679 * @post the line breaking context is updated 692 * @post the line breaking context is updated
@@ -700,6 +713,25 @@ int lb_process_next_char(
700 default: 713 default:
701 break; 714 break;
702 } 715 }
716
717 /* Special processing due to rule LB8a */
718 if (lbpCtx->lbcNew == LBP_ZWJ)
719 {
720 lbpCtx->fLb8aZwj = true;
721 }
722 else
723 {
724 lbpCtx->fLb8aZwj = false;
725 }
726
727 /* Special processing due to rule LB10 */
728 if (lbpCtx->fLb10LeadSpace)
729 {
730 if (lbpCtx->lbcNew == LBP_CM || lbpCtx->lbcNew == LBP_ZWJ)
731 brk = LINEBREAK_ALLOWBREAK;
732 lbpCtx->fLb10LeadSpace = false;
733 }
734
703 return brk; 735 return brk;
704} 736}
705 737
diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h
index fd7351191b..fa88094b4b 100644
--- a/src/static_libs/libunibreak/linebreak.h
+++ b/src/static_libs/libunibreak/linebreak.h
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
30 * Unicode 5.0.0: 30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 32 *
33 * This library has been updated according to Revision 37, for 33 * This library has been updated according to Revision 43, for
34 * Unicode 9.0.0: 34 * Unicode 12.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> 35 * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c
index c571f2da00..23d9072baa 100644
--- a/src/static_libs/libunibreak/linebreakdata.c
+++ b/src/static_libs/libunibreak/linebreakdata.c
@@ -1,6 +1,6 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# LineBreak-9.0.0.txt 2# LineBreak-12.1.0.txt
3# Date: 2016-05-26, 01:00:00 GMT [KW, LI] 3# Date: 2019-03-31, 22:04:15 GMT [KW, LI]
4*/ 4*/
5 5
6#include "linebreakdef.h" 6#include "linebreakdef.h"
@@ -94,7 +94,7 @@ const struct LineBreakProperties lb_prop_default[] = {
94 { 0x037E, 0x037E, LBP_IS }, 94 { 0x037E, 0x037E, LBP_IS },
95 { 0x037F, 0x0482, LBP_AL }, 95 { 0x037F, 0x0482, LBP_AL },
96 { 0x0483, 0x0489, LBP_CM }, 96 { 0x0483, 0x0489, LBP_CM },
97 { 0x048A, 0x0587, LBP_AL }, 97 { 0x048A, 0x0588, LBP_AL },
98 { 0x0589, 0x0589, LBP_IS }, 98 { 0x0589, 0x0589, LBP_IS },
99 { 0x058A, 0x058A, LBP_BA }, 99 { 0x058A, 0x058A, LBP_BA },
100 { 0x058D, 0x058E, LBP_AL }, 100 { 0x058D, 0x058E, LBP_AL },
@@ -149,7 +149,10 @@ const struct LineBreakProperties lb_prop_default[] = {
149 { 0x07F4, 0x07F7, LBP_AL }, 149 { 0x07F4, 0x07F7, LBP_AL },
150 { 0x07F8, 0x07F8, LBP_IS }, 150 { 0x07F8, 0x07F8, LBP_IS },
151 { 0x07F9, 0x07F9, LBP_EX }, 151 { 0x07F9, 0x07F9, LBP_EX },
152 { 0x07FA, 0x0815, LBP_AL }, 152 { 0x07FA, 0x07FA, LBP_AL },
153 { 0x07FD, 0x07FD, LBP_CM },
154 { 0x07FE, 0x07FF, LBP_PR },
155 { 0x0800, 0x0815, LBP_AL },
153 { 0x0816, 0x0819, LBP_CM }, 156 { 0x0816, 0x0819, LBP_CM },
154 { 0x081A, 0x081A, LBP_AL }, 157 { 0x081A, 0x081A, LBP_AL },
155 { 0x081B, 0x0823, LBP_CM }, 158 { 0x081B, 0x0823, LBP_CM },
@@ -160,7 +163,7 @@ const struct LineBreakProperties lb_prop_default[] = {
160 { 0x0830, 0x0858, LBP_AL }, 163 { 0x0830, 0x0858, LBP_AL },
161 { 0x0859, 0x085B, LBP_CM }, 164 { 0x0859, 0x085B, LBP_CM },
162 { 0x085E, 0x08BD, LBP_AL }, 165 { 0x085E, 0x08BD, LBP_AL },
163 { 0x08D4, 0x08E1, LBP_CM }, 166 { 0x08D3, 0x08E1, LBP_CM },
164 { 0x08E2, 0x08E2, LBP_AL }, 167 { 0x08E2, 0x08E2, LBP_AL },
165 { 0x08E3, 0x0903, LBP_CM }, 168 { 0x08E3, 0x0903, LBP_CM },
166 { 0x0904, 0x0939, LBP_AL }, 169 { 0x0904, 0x0939, LBP_AL },
@@ -190,14 +193,17 @@ const struct LineBreakProperties lb_prop_default[] = {
190 { 0x09F9, 0x09F9, LBP_PO }, 193 { 0x09F9, 0x09F9, LBP_PO },
191 { 0x09FA, 0x09FA, LBP_AL }, 194 { 0x09FA, 0x09FA, LBP_AL },
192 { 0x09FB, 0x09FB, LBP_PR }, 195 { 0x09FB, 0x09FB, LBP_PR },
193 { 0x0A01, 0x0A03, LBP_CM }, 196 { 0x09FC, 0x09FD, LBP_AL },
197 { 0x09FE, 0x0A03, LBP_CM },
194 { 0x0A05, 0x0A39, LBP_AL }, 198 { 0x0A05, 0x0A39, LBP_AL },
195 { 0x0A3C, 0x0A51, LBP_CM }, 199 { 0x0A3C, 0x0A51, LBP_CM },
196 { 0x0A59, 0x0A5E, LBP_AL }, 200 { 0x0A59, 0x0A5E, LBP_AL },
197 { 0x0A66, 0x0A6F, LBP_NU }, 201 { 0x0A66, 0x0A6F, LBP_NU },
198 { 0x0A70, 0x0A71, LBP_CM }, 202 { 0x0A70, 0x0A71, LBP_CM },
199 { 0x0A72, 0x0A74, LBP_AL }, 203 { 0x0A72, 0x0A74, LBP_AL },
200 { 0x0A75, 0x0A83, LBP_CM }, 204 { 0x0A75, 0x0A75, LBP_CM },
205 { 0x0A76, 0x0A76, LBP_AL },
206 { 0x0A81, 0x0A83, LBP_CM },
201 { 0x0A85, 0x0AB9, LBP_AL }, 207 { 0x0A85, 0x0AB9, LBP_AL },
202 { 0x0ABC, 0x0ABC, LBP_CM }, 208 { 0x0ABC, 0x0ABC, LBP_CM },
203 { 0x0ABD, 0x0ABD, LBP_AL }, 209 { 0x0ABD, 0x0ABD, LBP_AL },
@@ -208,7 +214,7 @@ const struct LineBreakProperties lb_prop_default[] = {
208 { 0x0AF0, 0x0AF0, LBP_AL }, 214 { 0x0AF0, 0x0AF0, LBP_AL },
209 { 0x0AF1, 0x0AF1, LBP_PR }, 215 { 0x0AF1, 0x0AF1, LBP_PR },
210 { 0x0AF9, 0x0AF9, LBP_AL }, 216 { 0x0AF9, 0x0AF9, LBP_AL },
211 { 0x0B01, 0x0B03, LBP_CM }, 217 { 0x0AFA, 0x0B03, LBP_CM },
212 { 0x0B05, 0x0B39, LBP_AL }, 218 { 0x0B05, 0x0B39, LBP_AL },
213 { 0x0B3C, 0x0B3C, LBP_CM }, 219 { 0x0B3C, 0x0B3C, LBP_CM },
214 { 0x0B3D, 0x0B3D, LBP_AL }, 220 { 0x0B3D, 0x0B3D, LBP_AL },
@@ -226,14 +232,16 @@ const struct LineBreakProperties lb_prop_default[] = {
226 { 0x0BF0, 0x0BF8, LBP_AL }, 232 { 0x0BF0, 0x0BF8, LBP_AL },
227 { 0x0BF9, 0x0BF9, LBP_PR }, 233 { 0x0BF9, 0x0BF9, LBP_PR },
228 { 0x0BFA, 0x0BFA, LBP_AL }, 234 { 0x0BFA, 0x0BFA, LBP_AL },
229 { 0x0C00, 0x0C03, LBP_CM }, 235 { 0x0C00, 0x0C04, LBP_CM },
230 { 0x0C05, 0x0C3D, LBP_AL }, 236 { 0x0C05, 0x0C3D, LBP_AL },
231 { 0x0C3E, 0x0C56, LBP_CM }, 237 { 0x0C3E, 0x0C56, LBP_CM },
232 { 0x0C58, 0x0C61, LBP_AL }, 238 { 0x0C58, 0x0C61, LBP_AL },
233 { 0x0C62, 0x0C63, LBP_CM }, 239 { 0x0C62, 0x0C63, LBP_CM },
234 { 0x0C66, 0x0C6F, LBP_NU }, 240 { 0x0C66, 0x0C6F, LBP_NU },
241 { 0x0C77, 0x0C77, LBP_BB },
235 { 0x0C78, 0x0C80, LBP_AL }, 242 { 0x0C78, 0x0C80, LBP_AL },
236 { 0x0C81, 0x0C83, LBP_CM }, 243 { 0x0C81, 0x0C83, LBP_CM },
244 { 0x0C84, 0x0C84, LBP_BB },
237 { 0x0C85, 0x0CB9, LBP_AL }, 245 { 0x0C85, 0x0CB9, LBP_AL },
238 { 0x0CBC, 0x0CBC, LBP_CM }, 246 { 0x0CBC, 0x0CBC, LBP_CM },
239 { 0x0CBD, 0x0CBD, LBP_AL }, 247 { 0x0CBD, 0x0CBD, LBP_AL },
@@ -242,8 +250,10 @@ const struct LineBreakProperties lb_prop_default[] = {
242 { 0x0CE2, 0x0CE3, LBP_CM }, 250 { 0x0CE2, 0x0CE3, LBP_CM },
243 { 0x0CE6, 0x0CEF, LBP_NU }, 251 { 0x0CE6, 0x0CEF, LBP_NU },
244 { 0x0CF1, 0x0CF2, LBP_AL }, 252 { 0x0CF1, 0x0CF2, LBP_AL },
245 { 0x0D01, 0x0D03, LBP_CM }, 253 { 0x0D00, 0x0D03, LBP_CM },
246 { 0x0D05, 0x0D3D, LBP_AL }, 254 { 0x0D05, 0x0D3A, LBP_AL },
255 { 0x0D3B, 0x0D3C, LBP_CM },
256 { 0x0D3D, 0x0D3D, LBP_AL },
247 { 0x0D3E, 0x0D4D, LBP_CM }, 257 { 0x0D3E, 0x0D4D, LBP_CM },
248 { 0x0D4E, 0x0D56, LBP_AL }, 258 { 0x0D4E, 0x0D56, LBP_AL },
249 { 0x0D57, 0x0D57, LBP_CM }, 259 { 0x0D57, 0x0D57, LBP_CM },
@@ -417,11 +427,11 @@ const struct LineBreakProperties lb_prop_default[] = {
417 { 0x1CD4, 0x1CE8, LBP_CM }, 427 { 0x1CD4, 0x1CE8, LBP_CM },
418 { 0x1CE9, 0x1CEC, LBP_AL }, 428 { 0x1CE9, 0x1CEC, LBP_AL },
419 { 0x1CED, 0x1CED, LBP_CM }, 429 { 0x1CED, 0x1CED, LBP_CM },
420 { 0x1CEE, 0x1CF1, LBP_AL }, 430 { 0x1CEE, 0x1CF3, LBP_AL },
421 { 0x1CF2, 0x1CF4, LBP_CM }, 431 { 0x1CF4, 0x1CF4, LBP_CM },
422 { 0x1CF5, 0x1CF6, LBP_AL }, 432 { 0x1CF5, 0x1CF6, LBP_AL },
423 { 0x1CF8, 0x1CF9, LBP_CM }, 433 { 0x1CF7, 0x1CF9, LBP_CM },
424 { 0x1D00, 0x1DBF, LBP_AL }, 434 { 0x1CFA, 0x1DBF, LBP_AL },
425 { 0x1DC0, 0x1DFF, LBP_CM }, 435 { 0x1DC0, 0x1DFF, LBP_CM },
426 { 0x1E00, 0x1FFC, LBP_AL }, 436 { 0x1E00, 0x1FFC, LBP_AL },
427 { 0x1FFD, 0x1FFD, LBP_BB }, 437 { 0x1FFD, 0x1FFD, LBP_BB },
@@ -430,7 +440,9 @@ const struct LineBreakProperties lb_prop_default[] = {
430 { 0x2007, 0x2007, LBP_GL }, 440 { 0x2007, 0x2007, LBP_GL },
431 { 0x2008, 0x200A, LBP_BA }, 441 { 0x2008, 0x200A, LBP_BA },
432 { 0x200B, 0x200B, LBP_ZW }, 442 { 0x200B, 0x200B, LBP_ZW },
433 { 0x200C, 0x200F, LBP_CM }, 443 { 0x200C, 0x200C, LBP_CM },
444 { 0x200D, 0x200D, LBP_ZWJ },
445 { 0x200E, 0x200F, LBP_CM },
434 { 0x2010, 0x2010, LBP_BA }, 446 { 0x2010, 0x2010, LBP_BA },
435 { 0x2011, 0x2011, LBP_GL }, 447 { 0x2011, 0x2011, LBP_GL },
436 { 0x2012, 0x2013, LBP_BA }, 448 { 0x2012, 0x2013, LBP_BA },
@@ -808,7 +820,11 @@ const struct LineBreakProperties lb_prop_default[] = {
808 { 0x2E3F, 0x2E3F, LBP_AL }, 820 { 0x2E3F, 0x2E3F, LBP_AL },
809 { 0x2E40, 0x2E41, LBP_BA }, 821 { 0x2E40, 0x2E41, LBP_BA },
810 { 0x2E42, 0x2E42, LBP_OP }, 822 { 0x2E42, 0x2E42, LBP_OP },
811 { 0x2E43, 0x2E44, LBP_BA }, 823 { 0x2E43, 0x2E4A, LBP_BA },
824 { 0x2E4B, 0x2E4B, LBP_AL },
825 { 0x2E4C, 0x2E4C, LBP_BA },
826 { 0x2E4D, 0x2E4D, LBP_AL },
827 { 0x2E4E, 0x2E4F, LBP_BA },
812 { 0x2E80, 0x2FFB, LBP_ID }, 828 { 0x2E80, 0x2FFB, LBP_ID },
813 { 0x3000, 0x3000, LBP_BA }, 829 { 0x3000, 0x3000, LBP_BA },
814 { 0x3001, 0x3002, LBP_CL }, 830 { 0x3001, 0x3002, LBP_CL },
@@ -942,7 +958,8 @@ const struct LineBreakProperties lb_prop_default[] = {
942 { 0xA8E0, 0xA8F1, LBP_CM }, 958 { 0xA8E0, 0xA8F1, LBP_CM },
943 { 0xA8F2, 0xA8FB, LBP_AL }, 959 { 0xA8F2, 0xA8FB, LBP_AL },
944 { 0xA8FC, 0xA8FC, LBP_BB }, 960 { 0xA8FC, 0xA8FC, LBP_BB },
945 { 0xA8FD, 0xA8FD, LBP_AL }, 961 { 0xA8FD, 0xA8FE, LBP_AL },
962 { 0xA8FF, 0xA8FF, LBP_CM },
946 { 0xA900, 0xA909, LBP_NU }, 963 { 0xA900, 0xA909, LBP_NU },
947 { 0xA90A, 0xA925, LBP_AL }, 964 { 0xA90A, 0xA925, LBP_AL },
948 { 0xA926, 0xA92D, LBP_CM }, 965 { 0xA926, 0xA92D, LBP_CM },
@@ -1907,9 +1924,9 @@ const struct LineBreakProperties lb_prop_default[] = {
1907 { 0x1091F, 0x1091F, LBP_BA }, 1924 { 0x1091F, 0x1091F, LBP_BA },
1908 { 0x10920, 0x10A00, LBP_AL }, 1925 { 0x10920, 0x10A00, LBP_AL },
1909 { 0x10A01, 0x10A0F, LBP_CM }, 1926 { 0x10A01, 0x10A0F, LBP_CM },
1910 { 0x10A10, 0x10A33, LBP_AL }, 1927 { 0x10A10, 0x10A35, LBP_AL },
1911 { 0x10A38, 0x10A3F, LBP_CM }, 1928 { 0x10A38, 0x10A3F, LBP_CM },
1912 { 0x10A40, 0x10A47, LBP_AL }, 1929 { 0x10A40, 0x10A48, LBP_AL },
1913 { 0x10A50, 0x10A57, LBP_BA }, 1930 { 0x10A50, 0x10A57, LBP_BA },
1914 { 0x10A58, 0x10AE4, LBP_AL }, 1931 { 0x10A58, 0x10AE4, LBP_AL },
1915 { 0x10AE5, 0x10AE6, LBP_CM }, 1932 { 0x10AE5, 0x10AE6, LBP_CM },
@@ -1918,7 +1935,12 @@ const struct LineBreakProperties lb_prop_default[] = {
1918 { 0x10AF6, 0x10AF6, LBP_IN }, 1935 { 0x10AF6, 0x10AF6, LBP_IN },
1919 { 0x10B00, 0x10B35, LBP_AL }, 1936 { 0x10B00, 0x10B35, LBP_AL },
1920 { 0x10B39, 0x10B3F, LBP_BA }, 1937 { 0x10B39, 0x10B3F, LBP_BA },
1921 { 0x10B40, 0x10E7E, LBP_AL }, 1938 { 0x10B40, 0x10D23, LBP_AL },
1939 { 0x10D24, 0x10D27, LBP_CM },
1940 { 0x10D30, 0x10D39, LBP_NU },
1941 { 0x10E60, 0x10F45, LBP_AL },
1942 { 0x10F46, 0x10F50, LBP_CM },
1943 { 0x10F51, 0x10FF6, LBP_AL },
1922 { 0x11000, 0x11002, LBP_CM }, 1944 { 0x11000, 0x11002, LBP_CM },
1923 { 0x11003, 0x11037, LBP_AL }, 1945 { 0x11003, 0x11037, LBP_AL },
1924 { 0x11038, 0x11046, LBP_CM }, 1946 { 0x11038, 0x11046, LBP_CM },
@@ -1930,13 +1952,15 @@ const struct LineBreakProperties lb_prop_default[] = {
1930 { 0x110B0, 0x110BA, LBP_CM }, 1952 { 0x110B0, 0x110BA, LBP_CM },
1931 { 0x110BB, 0x110BD, LBP_AL }, 1953 { 0x110BB, 0x110BD, LBP_AL },
1932 { 0x110BE, 0x110C1, LBP_BA }, 1954 { 0x110BE, 0x110C1, LBP_BA },
1933 { 0x110D0, 0x110E8, LBP_AL }, 1955 { 0x110CD, 0x110E8, LBP_AL },
1934 { 0x110F0, 0x110F9, LBP_NU }, 1956 { 0x110F0, 0x110F9, LBP_NU },
1935 { 0x11100, 0x11102, LBP_CM }, 1957 { 0x11100, 0x11102, LBP_CM },
1936 { 0x11103, 0x11126, LBP_AL }, 1958 { 0x11103, 0x11126, LBP_AL },
1937 { 0x11127, 0x11134, LBP_CM }, 1959 { 0x11127, 0x11134, LBP_CM },
1938 { 0x11136, 0x1113F, LBP_NU }, 1960 { 0x11136, 0x1113F, LBP_NU },
1939 { 0x11140, 0x11143, LBP_BA }, 1961 { 0x11140, 0x11143, LBP_BA },
1962 { 0x11144, 0x11144, LBP_AL },
1963 { 0x11145, 0x11146, LBP_CM },
1940 { 0x11150, 0x11172, LBP_AL }, 1964 { 0x11150, 0x11172, LBP_AL },
1941 { 0x11173, 0x11173, LBP_CM }, 1965 { 0x11173, 0x11173, LBP_CM },
1942 { 0x11174, 0x11174, LBP_AL }, 1966 { 0x11174, 0x11174, LBP_AL },
@@ -1949,8 +1973,7 @@ const struct LineBreakProperties lb_prop_default[] = {
1949 { 0x111C5, 0x111C6, LBP_BA }, 1973 { 0x111C5, 0x111C6, LBP_BA },
1950 { 0x111C7, 0x111C7, LBP_AL }, 1974 { 0x111C7, 0x111C7, LBP_AL },
1951 { 0x111C8, 0x111C8, LBP_BA }, 1975 { 0x111C8, 0x111C8, LBP_BA },
1952 { 0x111C9, 0x111C9, LBP_AL }, 1976 { 0x111C9, 0x111CC, LBP_CM },
1953 { 0x111CA, 0x111CC, LBP_CM },
1954 { 0x111CD, 0x111CD, LBP_AL }, 1977 { 0x111CD, 0x111CD, LBP_AL },
1955 { 0x111D0, 0x111D9, LBP_NU }, 1978 { 0x111D0, 0x111D9, LBP_NU },
1956 { 0x111DA, 0x111DA, LBP_AL }, 1979 { 0x111DA, 0x111DA, LBP_AL },
@@ -1971,7 +1994,7 @@ const struct LineBreakProperties lb_prop_default[] = {
1971 { 0x112F0, 0x112F9, LBP_NU }, 1994 { 0x112F0, 0x112F9, LBP_NU },
1972 { 0x11300, 0x11303, LBP_CM }, 1995 { 0x11300, 0x11303, LBP_CM },
1973 { 0x11305, 0x11339, LBP_AL }, 1996 { 0x11305, 0x11339, LBP_AL },
1974 { 0x1133C, 0x1133C, LBP_CM }, 1997 { 0x1133B, 0x1133C, LBP_CM },
1975 { 0x1133D, 0x1133D, LBP_AL }, 1998 { 0x1133D, 0x1133D, LBP_AL },
1976 { 0x1133E, 0x1134D, LBP_CM }, 1999 { 0x1133E, 0x1134D, LBP_CM },
1977 { 0x11350, 0x11350, LBP_AL }, 2000 { 0x11350, 0x11350, LBP_AL },
@@ -1985,7 +2008,9 @@ const struct LineBreakProperties lb_prop_default[] = {
1985 { 0x1144F, 0x1144F, LBP_AL }, 2008 { 0x1144F, 0x1144F, LBP_AL },
1986 { 0x11450, 0x11459, LBP_NU }, 2009 { 0x11450, 0x11459, LBP_NU },
1987 { 0x1145B, 0x1145B, LBP_BA }, 2010 { 0x1145B, 0x1145B, LBP_BA },
1988 { 0x1145D, 0x114AF, LBP_AL }, 2011 { 0x1145D, 0x1145D, LBP_AL },
2012 { 0x1145E, 0x1145E, LBP_CM },
2013 { 0x1145F, 0x114AF, LBP_AL },
1989 { 0x114B0, 0x114C3, LBP_CM }, 2014 { 0x114B0, 0x114C3, LBP_CM },
1990 { 0x114C4, 0x114C7, LBP_AL }, 2015 { 0x114C4, 0x114C7, LBP_AL },
1991 { 0x114D0, 0x114D9, LBP_NU }, 2016 { 0x114D0, 0x114D9, LBP_NU },
@@ -2006,15 +2031,44 @@ const struct LineBreakProperties lb_prop_default[] = {
2006 { 0x11660, 0x1166C, LBP_BB }, 2031 { 0x11660, 0x1166C, LBP_BB },
2007 { 0x11680, 0x116AA, LBP_AL }, 2032 { 0x11680, 0x116AA, LBP_AL },
2008 { 0x116AB, 0x116B7, LBP_CM }, 2033 { 0x116AB, 0x116B7, LBP_CM },
2034 { 0x116B8, 0x116B8, LBP_AL },
2009 { 0x116C0, 0x116C9, LBP_NU }, 2035 { 0x116C0, 0x116C9, LBP_NU },
2010 { 0x11700, 0x1172B, LBP_SA }, 2036 { 0x11700, 0x1172B, LBP_SA },
2011 { 0x11730, 0x11739, LBP_NU }, 2037 { 0x11730, 0x11739, LBP_NU },
2012 { 0x1173A, 0x1173B, LBP_SA }, 2038 { 0x1173A, 0x1173B, LBP_SA },
2013 { 0x1173C, 0x1173E, LBP_BA }, 2039 { 0x1173C, 0x1173E, LBP_BA },
2014 { 0x1173F, 0x1173F, LBP_SA }, 2040 { 0x1173F, 0x1173F, LBP_SA },
2015 { 0x118A0, 0x118DF, LBP_AL }, 2041 { 0x11800, 0x1182B, LBP_AL },
2042 { 0x1182C, 0x1183A, LBP_CM },
2043 { 0x1183B, 0x118DF, LBP_AL },
2016 { 0x118E0, 0x118E9, LBP_NU }, 2044 { 0x118E0, 0x118E9, LBP_NU },
2017 { 0x118EA, 0x11C2E, LBP_AL }, 2045 { 0x118EA, 0x119D0, LBP_AL },
2046 { 0x119D1, 0x119E0, LBP_CM },
2047 { 0x119E1, 0x119E1, LBP_AL },
2048 { 0x119E2, 0x119E2, LBP_BB },
2049 { 0x119E3, 0x119E3, LBP_AL },
2050 { 0x119E4, 0x119E4, LBP_CM },
2051 { 0x11A00, 0x11A00, LBP_AL },
2052 { 0x11A01, 0x11A0A, LBP_CM },
2053 { 0x11A0B, 0x11A32, LBP_AL },
2054 { 0x11A33, 0x11A39, LBP_CM },
2055 { 0x11A3A, 0x11A3A, LBP_AL },
2056 { 0x11A3B, 0x11A3E, LBP_CM },
2057 { 0x11A3F, 0x11A3F, LBP_BB },
2058 { 0x11A40, 0x11A40, LBP_AL },
2059 { 0x11A41, 0x11A44, LBP_BA },
2060 { 0x11A45, 0x11A45, LBP_BB },
2061 { 0x11A46, 0x11A46, LBP_AL },
2062 { 0x11A47, 0x11A47, LBP_CM },
2063 { 0x11A50, 0x11A50, LBP_AL },
2064 { 0x11A51, 0x11A5B, LBP_CM },
2065 { 0x11A5C, 0x11A89, LBP_AL },
2066 { 0x11A8A, 0x11A99, LBP_CM },
2067 { 0x11A9A, 0x11A9C, LBP_BA },
2068 { 0x11A9D, 0x11A9D, LBP_AL },
2069 { 0x11A9E, 0x11AA0, LBP_BB },
2070 { 0x11AA1, 0x11AA2, LBP_BA },
2071 { 0x11AC0, 0x11C2E, LBP_AL },
2018 { 0x11C2F, 0x11C3F, LBP_CM }, 2072 { 0x11C2F, 0x11C3F, LBP_CM },
2019 { 0x11C40, 0x11C40, LBP_AL }, 2073 { 0x11C40, 0x11C40, LBP_AL },
2020 { 0x11C41, 0x11C45, LBP_BA }, 2074 { 0x11C41, 0x11C45, LBP_BA },
@@ -2024,6 +2078,21 @@ const struct LineBreakProperties lb_prop_default[] = {
2024 { 0x11C71, 0x11C71, LBP_EX }, 2078 { 0x11C71, 0x11C71, LBP_EX },
2025 { 0x11C72, 0x11C8F, LBP_AL }, 2079 { 0x11C72, 0x11C8F, LBP_AL },
2026 { 0x11C92, 0x11CB6, LBP_CM }, 2080 { 0x11C92, 0x11CB6, LBP_CM },
2081 { 0x11D00, 0x11D30, LBP_AL },
2082 { 0x11D31, 0x11D45, LBP_CM },
2083 { 0x11D46, 0x11D46, LBP_AL },
2084 { 0x11D47, 0x11D47, LBP_CM },
2085 { 0x11D50, 0x11D59, LBP_NU },
2086 { 0x11D60, 0x11D89, LBP_AL },
2087 { 0x11D8A, 0x11D97, LBP_CM },
2088 { 0x11D98, 0x11D98, LBP_AL },
2089 { 0x11DA0, 0x11DA9, LBP_NU },
2090 { 0x11EE0, 0x11EF2, LBP_AL },
2091 { 0x11EF3, 0x11EF6, LBP_CM },
2092 { 0x11EF7, 0x11FDC, LBP_AL },
2093 { 0x11FDD, 0x11FE0, LBP_PO },
2094 { 0x11FE1, 0x11FF1, LBP_AL },
2095 { 0x11FFF, 0x11FFF, LBP_BA },
2027 { 0x12000, 0x1246E, LBP_AL }, 2096 { 0x12000, 0x1246E, LBP_AL },
2028 { 0x12470, 0x12474, LBP_BA }, 2097 { 0x12470, 0x12474, LBP_BA },
2029 { 0x12480, 0x13257, LBP_AL }, 2098 { 0x12480, 0x13257, LBP_AL },
@@ -2039,7 +2108,11 @@ const struct LineBreakProperties lb_prop_default[] = {
2039 { 0x1328A, 0x13378, LBP_AL }, 2108 { 0x1328A, 0x13378, LBP_AL },
2040 { 0x13379, 0x13379, LBP_OP }, 2109 { 0x13379, 0x13379, LBP_OP },
2041 { 0x1337A, 0x1337B, LBP_CL }, 2110 { 0x1337A, 0x1337B, LBP_CL },
2042 { 0x1337C, 0x145CD, LBP_AL }, 2111 { 0x1337C, 0x1342E, LBP_AL },
2112 { 0x13430, 0x13436, LBP_GL },
2113 { 0x13437, 0x13437, LBP_OP },
2114 { 0x13438, 0x13438, LBP_CL },
2115 { 0x14400, 0x145CD, LBP_AL },
2043 { 0x145CE, 0x145CE, LBP_OP }, 2116 { 0x145CE, 0x145CE, LBP_OP },
2044 { 0x145CF, 0x145CF, LBP_CL }, 2117 { 0x145CF, 0x145CF, LBP_CL },
2045 { 0x145D0, 0x16A5E, LBP_AL }, 2118 { 0x145D0, 0x16A5E, LBP_AL },
@@ -2055,11 +2128,17 @@ const struct LineBreakProperties lb_prop_default[] = {
2055 { 0x16B44, 0x16B44, LBP_BA }, 2128 { 0x16B44, 0x16B44, LBP_BA },
2056 { 0x16B45, 0x16B45, LBP_AL }, 2129 { 0x16B45, 0x16B45, LBP_AL },
2057 { 0x16B50, 0x16B59, LBP_NU }, 2130 { 0x16B50, 0x16B59, LBP_NU },
2058 { 0x16B5B, 0x16F50, LBP_AL }, 2131 { 0x16B5B, 0x16E96, LBP_AL },
2132 { 0x16E97, 0x16E98, LBP_BA },
2133 { 0x16E99, 0x16F4A, LBP_AL },
2134 { 0x16F4F, 0x16F4F, LBP_CM },
2135 { 0x16F50, 0x16F50, LBP_AL },
2059 { 0x16F51, 0x16F92, LBP_CM }, 2136 { 0x16F51, 0x16F92, LBP_CM },
2060 { 0x16F93, 0x16F9F, LBP_AL }, 2137 { 0x16F93, 0x16F9F, LBP_AL },
2061 { 0x16FE0, 0x16FE0, LBP_NS }, 2138 { 0x16FE0, 0x16FE3, LBP_NS },
2062 { 0x17000, 0x1B001, LBP_ID }, 2139 { 0x17000, 0x1B11E, LBP_ID },
2140 { 0x1B150, 0x1B167, LBP_CJ },
2141 { 0x1B170, 0x1B2FB, LBP_ID },
2063 { 0x1BC00, 0x1BC9C, LBP_AL }, 2142 { 0x1BC00, 0x1BC9C, LBP_AL },
2064 { 0x1BC9D, 0x1BC9E, LBP_CM }, 2143 { 0x1BC9D, 0x1BC9E, LBP_CM },
2065 { 0x1BC9F, 0x1BC9F, LBP_BA }, 2144 { 0x1BC9F, 0x1BC9F, LBP_BA },
@@ -2088,22 +2167,34 @@ const struct LineBreakProperties lb_prop_default[] = {
2088 { 0x1DA87, 0x1DA8A, LBP_BA }, 2167 { 0x1DA87, 0x1DA8A, LBP_BA },
2089 { 0x1DA8B, 0x1DA8B, LBP_AL }, 2168 { 0x1DA8B, 0x1DA8B, LBP_AL },
2090 { 0x1DA9B, 0x1E02A, LBP_CM }, 2169 { 0x1DA9B, 0x1E02A, LBP_CM },
2170 { 0x1E100, 0x1E12C, LBP_AL },
2171 { 0x1E130, 0x1E136, LBP_CM },
2172 { 0x1E137, 0x1E13D, LBP_AL },
2173 { 0x1E140, 0x1E149, LBP_NU },
2174 { 0x1E14E, 0x1E2EB, LBP_AL },
2175 { 0x1E2EC, 0x1E2EF, LBP_CM },
2176 { 0x1E2F0, 0x1E2F9, LBP_NU },
2177 { 0x1E2FF, 0x1E2FF, LBP_PR },
2091 { 0x1E800, 0x1E8CF, LBP_AL }, 2178 { 0x1E800, 0x1E8CF, LBP_AL },
2092 { 0x1E8D0, 0x1E8D6, LBP_CM }, 2179 { 0x1E8D0, 0x1E8D6, LBP_CM },
2093 { 0x1E900, 0x1E943, LBP_AL }, 2180 { 0x1E900, 0x1E943, LBP_AL },
2094 { 0x1E944, 0x1E94A, LBP_CM }, 2181 { 0x1E944, 0x1E94A, LBP_CM },
2182 { 0x1E94B, 0x1E94B, LBP_AL },
2095 { 0x1E950, 0x1E959, LBP_NU }, 2183 { 0x1E950, 0x1E959, LBP_NU },
2096 { 0x1E95E, 0x1E95F, LBP_OP }, 2184 { 0x1E95E, 0x1E95F, LBP_OP },
2097 { 0x1EE00, 0x1EEF1, LBP_AL }, 2185 { 0x1EC71, 0x1ECAB, LBP_AL },
2186 { 0x1ECAC, 0x1ECAC, LBP_PO },
2187 { 0x1ECAD, 0x1ECAF, LBP_AL },
2188 { 0x1ECB0, 0x1ECB0, LBP_PO },
2189 { 0x1ECB1, 0x1EEF1, LBP_AL },
2098 { 0x1F000, 0x1F0FF, LBP_ID }, 2190 { 0x1F000, 0x1F0FF, LBP_ID },
2099 { 0x1F100, 0x1F10C, LBP_AI }, 2191 { 0x1F100, 0x1F10C, LBP_AI },
2100 { 0x1F10D, 0x1F10F, LBP_ID }, 2192 { 0x1F10D, 0x1F10F, LBP_ID },
2101 { 0x1F110, 0x1F12D, LBP_AI }, 2193 { 0x1F110, 0x1F12D, LBP_AI },
2102 { 0x1F12E, 0x1F12E, LBP_AL }, 2194 { 0x1F12E, 0x1F12F, LBP_AL },
2103 { 0x1F12F, 0x1F12F, LBP_ID },
2104 { 0x1F130, 0x1F169, LBP_AI }, 2195 { 0x1F130, 0x1F169, LBP_AI },
2105 { 0x1F16A, 0x1F16B, LBP_AL }, 2196 { 0x1F16A, 0x1F16C, LBP_AL },
2106 { 0x1F16C, 0x1F16F, LBP_ID }, 2197 { 0x1F16D, 0x1F16F, LBP_ID },
2107 { 0x1F170, 0x1F1AC, LBP_AI }, 2198 { 0x1F170, 0x1F1AC, LBP_AI },
2108 { 0x1F1AD, 0x1F1E5, LBP_ID }, 2199 { 0x1F1AD, 0x1F1E5, LBP_ID },
2109 { 0x1F1E6, 0x1F1FF, LBP_RI }, 2200 { 0x1F1E6, 0x1F1FF, LBP_RI },
@@ -2115,29 +2206,31 @@ const struct LineBreakProperties lb_prop_default[] = {
2115 { 0x1F3B5, 0x1F3B6, LBP_AL }, 2206 { 0x1F3B5, 0x1F3B6, LBP_AL },
2116 { 0x1F3B7, 0x1F3BB, LBP_ID }, 2207 { 0x1F3B7, 0x1F3BB, LBP_ID },
2117 { 0x1F3BC, 0x1F3BC, LBP_AL }, 2208 { 0x1F3BC, 0x1F3BC, LBP_AL },
2118 { 0x1F3BD, 0x1F3C2, LBP_ID }, 2209 { 0x1F3BD, 0x1F3C1, LBP_ID },
2119 { 0x1F3C3, 0x1F3C4, LBP_EB }, 2210 { 0x1F3C2, 0x1F3C4, LBP_EB },
2120 { 0x1F3C5, 0x1F3C9, LBP_ID }, 2211 { 0x1F3C5, 0x1F3C6, LBP_ID },
2121 { 0x1F3CA, 0x1F3CB, LBP_EB }, 2212 { 0x1F3C7, 0x1F3C7, LBP_EB },
2122 { 0x1F3CC, 0x1F3FA, LBP_ID }, 2213 { 0x1F3C8, 0x1F3C9, LBP_ID },
2214 { 0x1F3CA, 0x1F3CC, LBP_EB },
2215 { 0x1F3CD, 0x1F3FA, LBP_ID },
2123 { 0x1F3FB, 0x1F3FF, LBP_EM }, 2216 { 0x1F3FB, 0x1F3FF, LBP_EM },
2124 { 0x1F400, 0x1F441, LBP_ID }, 2217 { 0x1F400, 0x1F441, LBP_ID },
2125 { 0x1F442, 0x1F443, LBP_EB }, 2218 { 0x1F442, 0x1F443, LBP_EB },
2126 { 0x1F444, 0x1F445, LBP_ID }, 2219 { 0x1F444, 0x1F445, LBP_ID },
2127 { 0x1F446, 0x1F450, LBP_EB }, 2220 { 0x1F446, 0x1F450, LBP_EB },
2128 { 0x1F451, 0x1F465, LBP_ID }, 2221 { 0x1F451, 0x1F465, LBP_ID },
2129 { 0x1F466, 0x1F469, LBP_EB }, 2222 { 0x1F466, 0x1F478, LBP_EB },
2130 { 0x1F46A, 0x1F46D, LBP_ID },
2131 { 0x1F46E, 0x1F46E, LBP_EB },
2132 { 0x1F46F, 0x1F46F, LBP_ID },
2133 { 0x1F470, 0x1F478, LBP_EB },
2134 { 0x1F479, 0x1F47B, LBP_ID }, 2223 { 0x1F479, 0x1F47B, LBP_ID },
2135 { 0x1F47C, 0x1F47C, LBP_EB }, 2224 { 0x1F47C, 0x1F47C, LBP_EB },
2136 { 0x1F47D, 0x1F480, LBP_ID }, 2225 { 0x1F47D, 0x1F480, LBP_ID },
2137 { 0x1F481, 0x1F483, LBP_EB }, 2226 { 0x1F481, 0x1F483, LBP_EB },
2138 { 0x1F484, 0x1F484, LBP_ID }, 2227 { 0x1F484, 0x1F484, LBP_ID },
2139 { 0x1F485, 0x1F487, LBP_EB }, 2228 { 0x1F485, 0x1F487, LBP_EB },
2140 { 0x1F488, 0x1F49F, LBP_ID }, 2229 { 0x1F488, 0x1F48E, LBP_ID },
2230 { 0x1F48F, 0x1F48F, LBP_EB },
2231 { 0x1F490, 0x1F490, LBP_ID },
2232 { 0x1F491, 0x1F491, LBP_EB },
2233 { 0x1F492, 0x1F49F, LBP_ID },
2141 { 0x1F4A0, 0x1F4A0, LBP_AL }, 2234 { 0x1F4A0, 0x1F4A0, LBP_AL },
2142 { 0x1F4A1, 0x1F4A1, LBP_ID }, 2235 { 0x1F4A1, 0x1F4A1, LBP_ID },
2143 { 0x1F4A2, 0x1F4A2, LBP_AL }, 2236 { 0x1F4A2, 0x1F4A2, LBP_AL },
@@ -2155,8 +2248,8 @@ const struct LineBreakProperties lb_prop_default[] = {
2155 { 0x1F517, 0x1F524, LBP_AL }, 2248 { 0x1F517, 0x1F524, LBP_AL },
2156 { 0x1F525, 0x1F531, LBP_ID }, 2249 { 0x1F525, 0x1F531, LBP_ID },
2157 { 0x1F532, 0x1F549, LBP_AL }, 2250 { 0x1F532, 0x1F549, LBP_AL },
2158 { 0x1F54A, 0x1F574, LBP_ID }, 2251 { 0x1F54A, 0x1F573, LBP_ID },
2159 { 0x1F575, 0x1F575, LBP_EB }, 2252 { 0x1F574, 0x1F575, LBP_EB },
2160 { 0x1F576, 0x1F579, LBP_ID }, 2253 { 0x1F576, 0x1F579, LBP_ID },
2161 { 0x1F57A, 0x1F57A, LBP_EB }, 2254 { 0x1F57A, 0x1F57A, LBP_EB },
2162 { 0x1F57B, 0x1F58F, LBP_ID }, 2255 { 0x1F57B, 0x1F58F, LBP_ID },
@@ -2181,7 +2274,9 @@ const struct LineBreakProperties lb_prop_default[] = {
2181 { 0x1F6B4, 0x1F6B6, LBP_EB }, 2274 { 0x1F6B4, 0x1F6B6, LBP_EB },
2182 { 0x1F6B7, 0x1F6BF, LBP_ID }, 2275 { 0x1F6B7, 0x1F6BF, LBP_ID },
2183 { 0x1F6C0, 0x1F6C0, LBP_EB }, 2276 { 0x1F6C0, 0x1F6C0, LBP_EB },
2184 { 0x1F6C1, 0x1F6FF, LBP_ID }, 2277 { 0x1F6C1, 0x1F6CB, LBP_ID },
2278 { 0x1F6CC, 0x1F6CC, LBP_EB },
2279 { 0x1F6CD, 0x1F6FF, LBP_ID },
2185 { 0x1F700, 0x1F773, LBP_AL }, 2280 { 0x1F700, 0x1F773, LBP_AL },
2186 { 0x1F774, 0x1F77F, LBP_ID }, 2281 { 0x1F774, 0x1F77F, LBP_ID },
2187 { 0x1F780, 0x1F7D4, LBP_AL }, 2282 { 0x1F780, 0x1F7D4, LBP_AL },
@@ -2195,17 +2290,31 @@ const struct LineBreakProperties lb_prop_default[] = {
2195 { 0x1F860, 0x1F887, LBP_AL }, 2290 { 0x1F860, 0x1F887, LBP_AL },
2196 { 0x1F888, 0x1F88F, LBP_ID }, 2291 { 0x1F888, 0x1F88F, LBP_ID },
2197 { 0x1F890, 0x1F8AD, LBP_AL }, 2292 { 0x1F890, 0x1F8AD, LBP_AL },
2198 { 0x1F8AE, 0x1F917, LBP_ID }, 2293 { 0x1F8AE, 0x1F8FF, LBP_ID },
2199 { 0x1F918, 0x1F91E, LBP_EB }, 2294 { 0x1F900, 0x1F90B, LBP_AL },
2200 { 0x1F91F, 0x1F925, LBP_ID }, 2295 { 0x1F90C, 0x1F90E, LBP_ID },
2296 { 0x1F90F, 0x1F90F, LBP_EB },
2297 { 0x1F910, 0x1F917, LBP_ID },
2298 { 0x1F918, 0x1F91F, LBP_EB },
2299 { 0x1F920, 0x1F925, LBP_ID },
2201 { 0x1F926, 0x1F926, LBP_EB }, 2300 { 0x1F926, 0x1F926, LBP_EB },
2202 { 0x1F927, 0x1F92F, LBP_ID }, 2301 { 0x1F927, 0x1F92F, LBP_ID },
2203 { 0x1F930, 0x1F930, LBP_EB }, 2302 { 0x1F930, 0x1F939, LBP_EB },
2204 { 0x1F931, 0x1F932, LBP_ID },
2205 { 0x1F933, 0x1F939, LBP_EB },
2206 { 0x1F93A, 0x1F93B, LBP_ID }, 2303 { 0x1F93A, 0x1F93B, LBP_ID },
2207 { 0x1F93C, 0x1F93E, LBP_EB }, 2304 { 0x1F93C, 0x1F93E, LBP_EB },
2208 { 0x1F93F, 0x3FFFD, LBP_ID }, 2305 { 0x1F93F, 0x1F9B4, LBP_ID },
2306 { 0x1F9B5, 0x1F9B6, LBP_EB },
2307 { 0x1F9B7, 0x1F9B7, LBP_ID },
2308 { 0x1F9B8, 0x1F9B9, LBP_EB },
2309 { 0x1F9BA, 0x1F9BA, LBP_ID },
2310 { 0x1F9BB, 0x1F9BB, LBP_EB },
2311 { 0x1F9BC, 0x1F9CC, LBP_ID },
2312 { 0x1F9CD, 0x1F9CF, LBP_EB },
2313 { 0x1F9D0, 0x1F9D0, LBP_ID },
2314 { 0x1F9D1, 0x1F9DD, LBP_EB },
2315 { 0x1F9DE, 0x1F9FF, LBP_ID },
2316 { 0x1FA00, 0x1FA53, LBP_AL },
2317 { 0x1FA54, 0x3FFFD, LBP_ID },
2209 { 0xE0001, 0xE01EF, LBP_CM }, 2318 { 0xE0001, 0xE01EF, LBP_CM },
2210 { 0xF0000, 0x10FFFD, LBP_XX }, 2319 { 0xF0000, 0x10FFFD, LBP_XX },
2211 { 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined } 2320 { 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined }
diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c
index 6b485cecbd..847621ed95 100644
--- a/src/static_libs/libunibreak/linebreakdef.c
+++ b/src/static_libs/libunibreak/linebreakdef.c
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -22,20 +22,6 @@
22 * not be misrepresented as being the original software. 22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source 23 * 3. This notice may not be removed or altered from any source
24 * distribution. 24 * distribution.
25 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/>
28 *
29 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 *
33 * This library has been updated according to Revision 37, for
34 * Unicode 9.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
36 *
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
39 */ 25 */
40 26
41/** 27/**
@@ -66,6 +52,7 @@ static const struct LineBreakProperties lb_prop_German[] = {
66 { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */ 52 { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
67 { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */ 53 { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
68 { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */ 54 { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
55 { 0x2019, 0x2019, LBP_GL }, /* Right single quotation mark: glue */
69 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */ 56 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
70 { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */ 57 { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
71 { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */ 58 { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h
index 37ec7b546e..48d714ef29 100644
--- a/src/static_libs/libunibreak/linebreakdef.h
+++ b/src/static_libs/libunibreak/linebreakdef.h
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> 8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 * 9 *
10 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
31 * Unicode 5.0.0: 31 * Unicode 5.0.0:
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * 33 *
34 * This library has been updated according to Revision 37, for 34 * This library has been updated according to Revision 43, for
35 * Unicode 9.0.0: 35 * Unicode 12.0.0:
36 * <URL:http://www.unicode.org/reports/tr14/tr14-37.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
37 * 37 *
38 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -52,8 +52,8 @@
52#include "unibreakdef.h" 52#include "unibreakdef.h"
53 53
54/** 54/**
55 * Line break classes. This is a direct mapping of Table 1 of Unicode 55 * Line break classes. This is a mapping of Table 1 of Unicode
56 * Standard Annex 14, Revision 26. 56 * Standard Annex 14.
57 */ 57 */
58enum LineBreakClass 58enum LineBreakClass
59{ 59{
@@ -95,7 +95,7 @@ enum LineBreakClass
95 LBP_ZWJ, /**< Zero width joiner */ 95 LBP_ZWJ, /**< Zero width joiner */
96 96
97 /* The following break class is treated in the pair table, but it is 97 /* The following break class is treated in the pair table, but it is
98 * not part of Table 2 of UAX #14. */ 98 * not part of Table 2 of UAX #14-37. */
99 LBP_CB, /**< Contingent break */ 99 LBP_CB, /**< Contingent break */
100 100
101 /* The following break classes are not treated in the pair table */ 101 /* The following break classes are not treated in the pair table */
@@ -117,8 +117,8 @@ enum LineBreakClass
117 */ 117 */
118struct LineBreakProperties 118struct LineBreakProperties
119{ 119{
120 utf32_t start; /**< Starting coding point */ 120 utf32_t start; /**< Start codepoint */
121 utf32_t end; /**< End coding point */ 121 utf32_t end; /**< End codepoint, inclusive */
122 enum LineBreakClass prop; /**< The line breaking property */ 122 enum LineBreakClass prop; /**< The line breaking property */
123}; 123};
124 124
@@ -140,11 +140,14 @@ struct LineBreakPropertiesLang
140struct LineBreakContext 140struct LineBreakContext
141{ 141{
142 const char *lang; /**< Language name */ 142 const char *lang; /**< Language name */
143 const struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */ 143 const struct LineBreakProperties *lbpLang; /**< Pointer to
144 LineBreakProperties */
144 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */ 145 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
145 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */ 146 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
146 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */ 147 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
147 int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ 148 bool fLb8aZwj; /**< Flag for ZWJ (LB8a) */
149 bool fLb10LeadSpace; /**< Flag for leading space (LB10) */
150 bool fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
148 int cLb30aRI; /**< Count of RI characters (LB30a) */ 151 int cLb30aRI; /**< Count of RI characters (LB30a) */
149}; 152};
150 153
diff --git a/src/static_libs/libunibreak/meson.build b/src/static_libs/libunibreak/meson.build
index a2d5c3e3f9..cca9c1f6a0 100644
--- a/src/static_libs/libunibreak/meson.build
+++ b/src/static_libs/libunibreak/meson.build
@@ -15,6 +15,8 @@ libunibreak_src = [
15 'graphemebreak.c', 15 'graphemebreak.c',
16 'graphemebreak.h', 16 'graphemebreak.h',
17 'graphemebreakdef.h', 17 'graphemebreakdef.h',
18 'emojidef.h',
19 'emojidef.c',
18] 20]
19 21
20libunibreak_lib = static_library('libunibreak', 22libunibreak_lib = static_library('libunibreak',
diff --git a/src/static_libs/libunibreak/unibreakbase.c b/src/static_libs/libunibreak/unibreakbase.c
index 686852a990..ef24c90047 100644
--- a/src/static_libs/libunibreak/unibreakbase.c
+++ b/src/static_libs/libunibreak/unibreakbase.c
@@ -4,7 +4,7 @@
4 * Break processing in a Unicode sequence. Designed to be used in a 4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
diff --git a/src/static_libs/libunibreak/unibreakbase.h b/src/static_libs/libunibreak/unibreakbase.h
index ff9a6ce8a9..a00a5bdb6b 100644
--- a/src/static_libs/libunibreak/unibreakbase.h
+++ b/src/static_libs/libunibreak/unibreakbase.h
@@ -4,7 +4,7 @@
4 * Break processing in a Unicode sequence. Designed to be used in a 4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2015-2019 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -39,7 +39,7 @@
39extern "C" { 39extern "C" {
40#endif 40#endif
41 41
42#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */ 42#define UNIBREAK_VERSION 0x0402 /**< Version of the libunibreak */
43extern const int unibreak_version; 43extern const int unibreak_version;
44 44
45#ifndef UNIBREAK_UTF_TYPES_DEFINED 45#ifndef UNIBREAK_UTF_TYPES_DEFINED
diff --git a/src/static_libs/libunibreak/unibreakdef.h b/src/static_libs/libunibreak/unibreakdef.h
index e13016d8cd..5f3533e5dd 100644
--- a/src/static_libs/libunibreak/unibreakdef.h
+++ b/src/static_libs/libunibreak/unibreakdef.h
@@ -4,7 +4,7 @@
4 * Break processing in a Unicode sequence. Designed to be used in a 4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -35,9 +35,19 @@
35#ifndef UNIBREAKDEF_H 35#ifndef UNIBREAKDEF_H
36#define UNIBREAKDEF_H 36#define UNIBREAKDEF_H
37 37
38#if defined(_MSC_VER) && _MSC_VER < 1800
39typedef int bool;
40#define false 0
41#define true 1
42#else
43#include <stdbool.h>
44#endif
45
38#include <stddef.h> 46#include <stddef.h>
39#include "unibreakbase.h" 47#include "unibreakbase.h"
40 48
49#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
50
41#ifdef __cplusplus 51#ifdef __cplusplus
42extern "C" { 52extern "C" {
43#endif 53#endif
diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c
index 50c830c7cc..d4e22495c6 100644
--- a/src/static_libs/libunibreak/wordbreak.c
+++ b/src/static_libs/libunibreak/wordbreak.c
@@ -4,7 +4,8 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
30 * Unicode 6.0.0: 31 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 * 33 *
33 * This library has been updated according to Revision 29, for 34 * This library has been updated according to Revision 35, for
34 * Unicode 9.0.0: 35 * Unicode 12.0.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 36 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -53,8 +54,7 @@
53#include "unibreakdef.h" 54#include "unibreakdef.h"
54#include "wordbreak.h" 55#include "wordbreak.h"
55#include "wordbreakdata.c" 56#include "wordbreakdata.c"
56 57#include "emojidef.h"
57#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
58 58
59/** 59/**
60 * Initializes the wordbreak internals. It currently does nothing, but 60 * Initializes the wordbreak internals. It currently does nothing, but
@@ -215,7 +215,7 @@ static void set_wordbreaks(
215#if __has_attribute(fallthrough) 215#if __has_attribute(fallthrough)
216 __attribute__((fallthrough)); 216 __attribute__((fallthrough));
217#endif 217#endif
218 /* Fall off */ 218 /* Fall through */
219 219
220 case WBP_Newline: 220 case WBP_Newline:
221 /* WB3a,3b */ 221 /* WB3a,3b */
@@ -225,24 +225,6 @@ static void set_wordbreaks(
225 posLast = posCur; 225 posLast = posCur;
226 break; 226 break;
227 227
228 case WBP_E_Base_GAZ:
229 case WBP_Glue_After_Zwj:
230 /* WB3c */
231 if (wbcLast == WBP_ZWJ)
232 {
233 set_brks_to(s, brks, posLast, posCur, len,
234 WORDBREAK_NOBREAK, get_next_char);
235 }
236 /* No rule found, reset */
237 else
238 {
239 set_brks_to(s, brks, posLast, posCur, len,
240 WORDBREAK_BREAK, get_next_char);
241 }
242 wbcSeqStart = wbcCur;
243 posLast = posCur;
244 break;
245
246 case WBP_ZWJ: 228 case WBP_ZWJ:
247 case WBP_Extend: 229 case WBP_Extend:
248 case WBP_Format: 230 case WBP_Format:
@@ -260,8 +242,10 @@ static void set_wordbreaks(
260 { 242 {
261 /* It's surely not the first */ 243 /* It's surely not the first */
262 brks[posCur - 1] = WORDBREAK_NOBREAK; 244 brks[posCur - 1] = WORDBREAK_NOBREAK;
263 /* WB3c precedes 4, so no intervening Extend chars allowed. */ 245 /* WB3c and WB3d precede 4, so no intervening Extend
264 if (wbcSeqStart != WBP_ZWJ) 246 * chars allowed. */
247 if (wbcCur != WBP_ZWJ && wbcSeqStart != WBP_ZWJ &&
248 wbcSeqStart != WBP_WSegSpace)
265 { 249 {
266 /* "inherit" the previous class. */ 250 /* "inherit" the previous class. */
267 wbcCur = wbcLast; 251 wbcCur = wbcLast;
@@ -334,7 +318,8 @@ static void set_wordbreaks(
334#if __has_attribute(fallthrough) 318#if __has_attribute(fallthrough)
335 __attribute__((fallthrough)); 319 __attribute__((fallthrough));
336#endif 320#endif
337 /* No break on purpose */ 321 /* Fall through */
322
338 case WBP_MidNumLet: 323 case WBP_MidNumLet:
339 if (((wbcLast == WBP_ALetter) || 324 if (((wbcLast == WBP_ALetter) ||
340 (wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */ 325 (wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */
@@ -421,32 +406,6 @@ static void set_wordbreaks(
421 posLast = posCur; 406 posLast = posCur;
422 break; 407 break;
423 408
424 case WBP_E_Base:
425 /* No rule found, reset */
426 set_brks_to(s, brks, posLast, posCur, len,
427 WORDBREAK_BREAK, get_next_char);
428 wbcSeqStart = wbcCur;
429 posLast = posCur;
430 break;
431
432 case WBP_E_Modifier:
433 /* WB14 */
434 if ((wbcLast == WBP_E_Base) ||
435 (wbcLast == WBP_E_Base_GAZ))
436 {
437 set_brks_to(s, brks, posLast, posCur, len,
438 WORDBREAK_NOBREAK, get_next_char);
439 }
440 /* No rule found, reset */
441 else
442 {
443 set_brks_to(s, brks, posLast, posCur, len,
444 WORDBREAK_BREAK, get_next_char);
445 }
446 wbcSeqStart = wbcCur;
447 posLast = posCur;
448 break;
449
450 case WBP_Regional_Indicator: 409 case WBP_Regional_Indicator:
451 /* WB15,16 */ 410 /* WB15,16 */
452 if ((wbcSeqStart == WBP_Regional_Indicator) && 411 if ((wbcSeqStart == WBP_Regional_Indicator) &&
@@ -481,7 +440,32 @@ static void set_wordbreaks(
481 } 440 }
482 break; 441 break;
483 442
443 case WBP_WSegSpace:
444 if (wbcLast == WBP_WSegSpace) /* WB3d */
445 {
446 set_brks_to(s, brks, posLast, posCur, len,
447 WORDBREAK_NOBREAK, get_next_char);
448 posLast = posCur;
449 break;
450 }
451#ifndef __has_attribute
452# define __has_attribute(x) 0
453#endif
454#if __has_attribute(fallthrough)
455 __attribute__((fallthrough));
456#endif
457 /* Fall through */
458
484 case WBP_Any: 459 case WBP_Any:
460 /* Check for rule WB3c */
461 if (wbcLast == WBP_ZWJ && ub_is_extended_pictographic(ch))
462 {
463 set_brks_to(s, brks, posLast, posCur, len,
464 WORDBREAK_NOBREAK, get_next_char);
465 posLast = posCur;
466 break;
467 }
468
485 /* Allow breaks and reset */ 469 /* Allow breaks and reset */
486 set_brks_to(s, brks, posLast, posCur, len, 470 set_brks_to(s, brks, posLast, posCur, len,
487 WORDBREAK_BREAK, get_next_char); 471 WORDBREAK_BREAK, get_next_char);
diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h
index 1040c13280..021de4d751 100644
--- a/src/static_libs/libunibreak/wordbreak.h
+++ b/src/static_libs/libunibreak/wordbreak.h
@@ -4,7 +4,8 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
30 * Unicode 6.0.0: 31 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 * 33 *
33 * This library has been updated according to Revision 29, for 34 * This library has been updated according to Revision 35, for
34 * Unicode 9.0.0: 35 * Unicode 12.0.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 36 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -72,4 +73,4 @@ void set_wordbreaks_utf32(
72} 73}
73#endif 74#endif
74 75
75#endif 76#endif /* WORDBREAK_H */
diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c
index 99fcff5bad..f5ee889589 100644
--- a/src/static_libs/libunibreak/wordbreakdata.c
+++ b/src/static_libs/libunibreak/wordbreakdata.c
@@ -1,6 +1,6 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# WordBreakProperty-9.0.0.txt 2# WordBreakProperty-12.1.0.txt
3# Date: 2016-06-01, 10:34:38 GMT 3# Date: 2019-03-10, 10:53:28 GMT
4*/ 4*/
5 5
6#include "wordbreakdef.h" 6#include "wordbreakdef.h"
@@ -9,6 +9,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
9 {0x000A, 0x000A, WBP_LF}, 9 {0x000A, 0x000A, WBP_LF},
10 {0x000B, 0x000C, WBP_Newline}, 10 {0x000B, 0x000C, WBP_Newline},
11 {0x000D, 0x000D, WBP_CR}, 11 {0x000D, 0x000D, WBP_CR},
12 {0x0020, 0x0020, WBP_WSegSpace},
12 {0x0022, 0x0022, WBP_Double_Quote}, 13 {0x0022, 0x0022, WBP_Double_Quote},
13 {0x0027, 0x0027, WBP_Single_Quote}, 14 {0x0027, 0x0027, WBP_Single_Quote},
14 {0x002C, 0x002C, WBP_MidNum}, 15 {0x002C, 0x002C, WBP_MidNum},
@@ -35,11 +36,15 @@ static const struct WordBreakProperties wb_prop_default[] = {
35 {0x0294, 0x0294, WBP_ALetter}, 36 {0x0294, 0x0294, WBP_ALetter},
36 {0x0295, 0x02AF, WBP_ALetter}, 37 {0x0295, 0x02AF, WBP_ALetter},
37 {0x02B0, 0x02C1, WBP_ALetter}, 38 {0x02B0, 0x02C1, WBP_ALetter},
39 {0x02C2, 0x02C5, WBP_ALetter},
38 {0x02C6, 0x02D1, WBP_ALetter}, 40 {0x02C6, 0x02D1, WBP_ALetter},
39 {0x02D7, 0x02D7, WBP_MidLetter}, 41 {0x02D2, 0x02D7, WBP_ALetter},
42 {0x02DE, 0x02DF, WBP_ALetter},
40 {0x02E0, 0x02E4, WBP_ALetter}, 43 {0x02E0, 0x02E4, WBP_ALetter},
41 {0x02EC, 0x02EC, WBP_ALetter}, 44 {0x02EC, 0x02EC, WBP_ALetter},
45 {0x02ED, 0x02ED, WBP_ALetter},
42 {0x02EE, 0x02EE, WBP_ALetter}, 46 {0x02EE, 0x02EE, WBP_ALetter},
47 {0x02EF, 0x02FF, WBP_ALetter},
43 {0x0300, 0x036F, WBP_Extend}, 48 {0x0300, 0x036F, WBP_Extend},
44 {0x0370, 0x0373, WBP_ALetter}, 49 {0x0370, 0x0373, WBP_ALetter},
45 {0x0374, 0x0374, WBP_ALetter}, 50 {0x0374, 0x0374, WBP_ALetter},
@@ -60,7 +65,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
60 {0x048A, 0x052F, WBP_ALetter}, 65 {0x048A, 0x052F, WBP_ALetter},
61 {0x0531, 0x0556, WBP_ALetter}, 66 {0x0531, 0x0556, WBP_ALetter},
62 {0x0559, 0x0559, WBP_ALetter}, 67 {0x0559, 0x0559, WBP_ALetter},
63 {0x0561, 0x0587, WBP_ALetter}, 68 {0x055B, 0x055C, WBP_ALetter},
69 {0x055E, 0x055E, WBP_ALetter},
70 {0x0560, 0x0588, WBP_ALetter},
64 {0x0589, 0x0589, WBP_MidNum}, 71 {0x0589, 0x0589, WBP_MidNum},
65 {0x0591, 0x05BD, WBP_Extend}, 72 {0x0591, 0x05BD, WBP_Extend},
66 {0x05BF, 0x05BF, WBP_Extend}, 73 {0x05BF, 0x05BF, WBP_Extend},
@@ -68,7 +75,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
68 {0x05C4, 0x05C5, WBP_Extend}, 75 {0x05C4, 0x05C5, WBP_Extend},
69 {0x05C7, 0x05C7, WBP_Extend}, 76 {0x05C7, 0x05C7, WBP_Extend},
70 {0x05D0, 0x05EA, WBP_Hebrew_Letter}, 77 {0x05D0, 0x05EA, WBP_Hebrew_Letter},
71 {0x05F0, 0x05F2, WBP_Hebrew_Letter}, 78 {0x05EF, 0x05F2, WBP_Hebrew_Letter},
72 {0x05F3, 0x05F3, WBP_ALetter}, 79 {0x05F3, 0x05F3, WBP_ALetter},
73 {0x05F4, 0x05F4, WBP_MidLetter}, 80 {0x05F4, 0x05F4, WBP_MidLetter},
74 {0x0600, 0x0605, WBP_Format}, 81 {0x0600, 0x0605, WBP_Format},
@@ -110,6 +117,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
110 {0x07F4, 0x07F5, WBP_ALetter}, 117 {0x07F4, 0x07F5, WBP_ALetter},
111 {0x07F8, 0x07F8, WBP_MidNum}, 118 {0x07F8, 0x07F8, WBP_MidNum},
112 {0x07FA, 0x07FA, WBP_ALetter}, 119 {0x07FA, 0x07FA, WBP_ALetter},
120 {0x07FD, 0x07FD, WBP_Extend},
113 {0x0800, 0x0815, WBP_ALetter}, 121 {0x0800, 0x0815, WBP_ALetter},
114 {0x0816, 0x0819, WBP_Extend}, 122 {0x0816, 0x0819, WBP_Extend},
115 {0x081A, 0x081A, WBP_ALetter}, 123 {0x081A, 0x081A, WBP_ALetter},
@@ -120,9 +128,10 @@ static const struct WordBreakProperties wb_prop_default[] = {
120 {0x0829, 0x082D, WBP_Extend}, 128 {0x0829, 0x082D, WBP_Extend},
121 {0x0840, 0x0858, WBP_ALetter}, 129 {0x0840, 0x0858, WBP_ALetter},
122 {0x0859, 0x085B, WBP_Extend}, 130 {0x0859, 0x085B, WBP_Extend},
131 {0x0860, 0x086A, WBP_ALetter},
123 {0x08A0, 0x08B4, WBP_ALetter}, 132 {0x08A0, 0x08B4, WBP_ALetter},
124 {0x08B6, 0x08BD, WBP_ALetter}, 133 {0x08B6, 0x08BD, WBP_ALetter},
125 {0x08D4, 0x08E1, WBP_Extend}, 134 {0x08D3, 0x08E1, WBP_Extend},
126 {0x08E2, 0x08E2, WBP_Format}, 135 {0x08E2, 0x08E2, WBP_Format},
127 {0x08E3, 0x0902, WBP_Extend}, 136 {0x08E3, 0x0902, WBP_Extend},
128 {0x0903, 0x0903, WBP_Extend}, 137 {0x0903, 0x0903, WBP_Extend},
@@ -165,6 +174,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
165 {0x09E2, 0x09E3, WBP_Extend}, 174 {0x09E2, 0x09E3, WBP_Extend},
166 {0x09E6, 0x09EF, WBP_Numeric}, 175 {0x09E6, 0x09EF, WBP_Numeric},
167 {0x09F0, 0x09F1, WBP_ALetter}, 176 {0x09F0, 0x09F1, WBP_ALetter},
177 {0x09FC, 0x09FC, WBP_ALetter},
178 {0x09FE, 0x09FE, WBP_Extend},
168 {0x0A01, 0x0A02, WBP_Extend}, 179 {0x0A01, 0x0A02, WBP_Extend},
169 {0x0A03, 0x0A03, WBP_Extend}, 180 {0x0A03, 0x0A03, WBP_Extend},
170 {0x0A05, 0x0A0A, WBP_ALetter}, 181 {0x0A05, 0x0A0A, WBP_ALetter},
@@ -207,6 +218,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
207 {0x0AE2, 0x0AE3, WBP_Extend}, 218 {0x0AE2, 0x0AE3, WBP_Extend},
208 {0x0AE6, 0x0AEF, WBP_Numeric}, 219 {0x0AE6, 0x0AEF, WBP_Numeric},
209 {0x0AF9, 0x0AF9, WBP_ALetter}, 220 {0x0AF9, 0x0AF9, WBP_ALetter},
221 {0x0AFA, 0x0AFF, WBP_Extend},
210 {0x0B01, 0x0B01, WBP_Extend}, 222 {0x0B01, 0x0B01, WBP_Extend},
211 {0x0B02, 0x0B03, WBP_Extend}, 223 {0x0B02, 0x0B03, WBP_Extend},
212 {0x0B05, 0x0B0C, WBP_ALetter}, 224 {0x0B05, 0x0B0C, WBP_ALetter},
@@ -253,6 +265,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
253 {0x0BE6, 0x0BEF, WBP_Numeric}, 265 {0x0BE6, 0x0BEF, WBP_Numeric},
254 {0x0C00, 0x0C00, WBP_Extend}, 266 {0x0C00, 0x0C00, WBP_Extend},
255 {0x0C01, 0x0C03, WBP_Extend}, 267 {0x0C01, 0x0C03, WBP_Extend},
268 {0x0C04, 0x0C04, WBP_Extend},
256 {0x0C05, 0x0C0C, WBP_ALetter}, 269 {0x0C05, 0x0C0C, WBP_ALetter},
257 {0x0C0E, 0x0C10, WBP_ALetter}, 270 {0x0C0E, 0x0C10, WBP_ALetter},
258 {0x0C12, 0x0C28, WBP_ALetter}, 271 {0x0C12, 0x0C28, WBP_ALetter},
@@ -290,11 +303,12 @@ static const struct WordBreakProperties wb_prop_default[] = {
290 {0x0CE2, 0x0CE3, WBP_Extend}, 303 {0x0CE2, 0x0CE3, WBP_Extend},
291 {0x0CE6, 0x0CEF, WBP_Numeric}, 304 {0x0CE6, 0x0CEF, WBP_Numeric},
292 {0x0CF1, 0x0CF2, WBP_ALetter}, 305 {0x0CF1, 0x0CF2, WBP_ALetter},
293 {0x0D01, 0x0D01, WBP_Extend}, 306 {0x0D00, 0x0D01, WBP_Extend},
294 {0x0D02, 0x0D03, WBP_Extend}, 307 {0x0D02, 0x0D03, WBP_Extend},
295 {0x0D05, 0x0D0C, WBP_ALetter}, 308 {0x0D05, 0x0D0C, WBP_ALetter},
296 {0x0D0E, 0x0D10, WBP_ALetter}, 309 {0x0D0E, 0x0D10, WBP_ALetter},
297 {0x0D12, 0x0D3A, WBP_ALetter}, 310 {0x0D12, 0x0D3A, WBP_ALetter},
311 {0x0D3B, 0x0D3C, WBP_Extend},
298 {0x0D3D, 0x0D3D, WBP_ALetter}, 312 {0x0D3D, 0x0D3D, WBP_ALetter},
299 {0x0D3E, 0x0D40, WBP_Extend}, 313 {0x0D3E, 0x0D40, WBP_Extend},
300 {0x0D41, 0x0D44, WBP_Extend}, 314 {0x0D41, 0x0D44, WBP_Extend},
@@ -326,8 +340,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
326 {0x0E47, 0x0E4E, WBP_Extend}, 340 {0x0E47, 0x0E4E, WBP_Extend},
327 {0x0E50, 0x0E59, WBP_Numeric}, 341 {0x0E50, 0x0E59, WBP_Numeric},
328 {0x0EB1, 0x0EB1, WBP_Extend}, 342 {0x0EB1, 0x0EB1, WBP_Extend},
329 {0x0EB4, 0x0EB9, WBP_Extend}, 343 {0x0EB4, 0x0EBC, WBP_Extend},
330 {0x0EBB, 0x0EBC, WBP_Extend},
331 {0x0EC8, 0x0ECD, WBP_Extend}, 344 {0x0EC8, 0x0ECD, WBP_Extend},
332 {0x0ED0, 0x0ED9, WBP_Numeric}, 345 {0x0ED0, 0x0ED9, WBP_Numeric},
333 {0x0F00, 0x0F00, WBP_ALetter}, 346 {0x0F00, 0x0F00, WBP_ALetter},
@@ -376,7 +389,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
376 {0x10CD, 0x10CD, WBP_ALetter}, 389 {0x10CD, 0x10CD, WBP_ALetter},
377 {0x10D0, 0x10FA, WBP_ALetter}, 390 {0x10D0, 0x10FA, WBP_ALetter},
378 {0x10FC, 0x10FC, WBP_ALetter}, 391 {0x10FC, 0x10FC, WBP_ALetter},
379 {0x10FD, 0x1248, WBP_ALetter}, 392 {0x10FD, 0x10FF, WBP_ALetter},
393 {0x1100, 0x1248, WBP_ALetter},
380 {0x124A, 0x124D, WBP_ALetter}, 394 {0x124A, 0x124D, WBP_ALetter},
381 {0x1250, 0x1256, WBP_ALetter}, 395 {0x1250, 0x1256, WBP_ALetter},
382 {0x1258, 0x1258, WBP_ALetter}, 396 {0x1258, 0x1258, WBP_ALetter},
@@ -398,6 +412,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
398 {0x13F8, 0x13FD, WBP_ALetter}, 412 {0x13F8, 0x13FD, WBP_ALetter},
399 {0x1401, 0x166C, WBP_ALetter}, 413 {0x1401, 0x166C, WBP_ALetter},
400 {0x166F, 0x167F, WBP_ALetter}, 414 {0x166F, 0x167F, WBP_ALetter},
415 {0x1680, 0x1680, WBP_WSegSpace},
401 {0x1681, 0x169A, WBP_ALetter}, 416 {0x1681, 0x169A, WBP_ALetter},
402 {0x16A0, 0x16EA, WBP_ALetter}, 417 {0x16A0, 0x16EA, WBP_ALetter},
403 {0x16EE, 0x16F0, WBP_ALetter}, 418 {0x16EE, 0x16F0, WBP_ALetter},
@@ -426,7 +441,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
426 {0x1810, 0x1819, WBP_Numeric}, 441 {0x1810, 0x1819, WBP_Numeric},
427 {0x1820, 0x1842, WBP_ALetter}, 442 {0x1820, 0x1842, WBP_ALetter},
428 {0x1843, 0x1843, WBP_ALetter}, 443 {0x1843, 0x1843, WBP_ALetter},
429 {0x1844, 0x1877, WBP_ALetter}, 444 {0x1844, 0x1878, WBP_ALetter},
430 {0x1880, 0x1884, WBP_ALetter}, 445 {0x1880, 0x1884, WBP_ALetter},
431 {0x1885, 0x1886, WBP_Extend}, 446 {0x1885, 0x1886, WBP_Extend},
432 {0x1887, 0x18A8, WBP_ALetter}, 447 {0x1887, 0x18A8, WBP_ALetter},
@@ -509,24 +524,27 @@ static const struct WordBreakProperties wb_prop_default[] = {
509 {0x1C5A, 0x1C77, WBP_ALetter}, 524 {0x1C5A, 0x1C77, WBP_ALetter},
510 {0x1C78, 0x1C7D, WBP_ALetter}, 525 {0x1C78, 0x1C7D, WBP_ALetter},
511 {0x1C80, 0x1C88, WBP_ALetter}, 526 {0x1C80, 0x1C88, WBP_ALetter},
527 {0x1C90, 0x1CBA, WBP_ALetter},
528 {0x1CBD, 0x1CBF, WBP_ALetter},
512 {0x1CD0, 0x1CD2, WBP_Extend}, 529 {0x1CD0, 0x1CD2, WBP_Extend},
513 {0x1CD4, 0x1CE0, WBP_Extend}, 530 {0x1CD4, 0x1CE0, WBP_Extend},
514 {0x1CE1, 0x1CE1, WBP_Extend}, 531 {0x1CE1, 0x1CE1, WBP_Extend},
515 {0x1CE2, 0x1CE8, WBP_Extend}, 532 {0x1CE2, 0x1CE8, WBP_Extend},
516 {0x1CE9, 0x1CEC, WBP_ALetter}, 533 {0x1CE9, 0x1CEC, WBP_ALetter},
517 {0x1CED, 0x1CED, WBP_Extend}, 534 {0x1CED, 0x1CED, WBP_Extend},
518 {0x1CEE, 0x1CF1, WBP_ALetter}, 535 {0x1CEE, 0x1CF3, WBP_ALetter},
519 {0x1CF2, 0x1CF3, WBP_Extend},
520 {0x1CF4, 0x1CF4, WBP_Extend}, 536 {0x1CF4, 0x1CF4, WBP_Extend},
521 {0x1CF5, 0x1CF6, WBP_ALetter}, 537 {0x1CF5, 0x1CF6, WBP_ALetter},
538 {0x1CF7, 0x1CF7, WBP_Extend},
522 {0x1CF8, 0x1CF9, WBP_Extend}, 539 {0x1CF8, 0x1CF9, WBP_Extend},
540 {0x1CFA, 0x1CFA, WBP_ALetter},
523 {0x1D00, 0x1D2B, WBP_ALetter}, 541 {0x1D00, 0x1D2B, WBP_ALetter},
524 {0x1D2C, 0x1D6A, WBP_ALetter}, 542 {0x1D2C, 0x1D6A, WBP_ALetter},
525 {0x1D6B, 0x1D77, WBP_ALetter}, 543 {0x1D6B, 0x1D77, WBP_ALetter},
526 {0x1D78, 0x1D78, WBP_ALetter}, 544 {0x1D78, 0x1D78, WBP_ALetter},
527 {0x1D79, 0x1D9A, WBP_ALetter}, 545 {0x1D79, 0x1D9A, WBP_ALetter},
528 {0x1D9B, 0x1DBF, WBP_ALetter}, 546 {0x1D9B, 0x1DBF, WBP_ALetter},
529 {0x1DC0, 0x1DF5, WBP_Extend}, 547 {0x1DC0, 0x1DF9, WBP_Extend},
530 {0x1DFB, 0x1DFF, WBP_Extend}, 548 {0x1DFB, 0x1DFF, WBP_Extend},
531 {0x1E00, 0x1F15, WBP_ALetter}, 549 {0x1E00, 0x1F15, WBP_ALetter},
532 {0x1F18, 0x1F1D, WBP_ALetter}, 550 {0x1F18, 0x1F1D, WBP_ALetter},
@@ -547,6 +565,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
547 {0x1FE0, 0x1FEC, WBP_ALetter}, 565 {0x1FE0, 0x1FEC, WBP_ALetter},
548 {0x1FF2, 0x1FF4, WBP_ALetter}, 566 {0x1FF2, 0x1FF4, WBP_ALetter},
549 {0x1FF6, 0x1FFC, WBP_ALetter}, 567 {0x1FF6, 0x1FFC, WBP_ALetter},
568 {0x2000, 0x2006, WBP_WSegSpace},
569 {0x2008, 0x200A, WBP_WSegSpace},
550 {0x200C, 0x200C, WBP_Extend}, 570 {0x200C, 0x200C, WBP_Extend},
551 {0x200D, 0x200D, WBP_ZWJ}, 571 {0x200D, 0x200D, WBP_ZWJ},
552 {0x200E, 0x200F, WBP_Format}, 572 {0x200E, 0x200F, WBP_Format},
@@ -561,6 +581,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
561 {0x203F, 0x2040, WBP_ExtendNumLet}, 581 {0x203F, 0x2040, WBP_ExtendNumLet},
562 {0x2044, 0x2044, WBP_MidNum}, 582 {0x2044, 0x2044, WBP_MidNum},
563 {0x2054, 0x2054, WBP_ExtendNumLet}, 583 {0x2054, 0x2054, WBP_ExtendNumLet},
584 {0x205F, 0x205F, WBP_WSegSpace},
564 {0x2060, 0x2064, WBP_Format}, 585 {0x2060, 0x2064, WBP_Format},
565 {0x2066, 0x206F, WBP_Format}, 586 {0x2066, 0x206F, WBP_Format},
566 {0x2071, 0x2071, WBP_ALetter}, 587 {0x2071, 0x2071, WBP_ALetter},
@@ -590,10 +611,6 @@ static const struct WordBreakProperties wb_prop_default[] = {
590 {0x2183, 0x2184, WBP_ALetter}, 611 {0x2183, 0x2184, WBP_ALetter},
591 {0x2185, 0x2188, WBP_ALetter}, 612 {0x2185, 0x2188, WBP_ALetter},
592 {0x24B6, 0x24E9, WBP_ALetter}, 613 {0x24B6, 0x24E9, WBP_ALetter},
593 {0x261D, 0x261D, WBP_E_Base},
594 {0x26F9, 0x26F9, WBP_E_Base},
595 {0x270A, 0x270D, WBP_E_Base},
596 {0x2764, 0x2764, WBP_Glue_After_Zwj},
597 {0x2C00, 0x2C2E, WBP_ALetter}, 614 {0x2C00, 0x2C2E, WBP_ALetter},
598 {0x2C30, 0x2C5E, WBP_ALetter}, 615 {0x2C30, 0x2C5E, WBP_ALetter},
599 {0x2C60, 0x2C7B, WBP_ALetter}, 616 {0x2C60, 0x2C7B, WBP_ALetter},
@@ -619,6 +636,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
619 {0x2DD8, 0x2DDE, WBP_ALetter}, 636 {0x2DD8, 0x2DDE, WBP_ALetter},
620 {0x2DE0, 0x2DFF, WBP_Extend}, 637 {0x2DE0, 0x2DFF, WBP_Extend},
621 {0x2E2F, 0x2E2F, WBP_ALetter}, 638 {0x2E2F, 0x2E2F, WBP_ALetter},
639 {0x3000, 0x3000, WBP_WSegSpace},
622 {0x3005, 0x3005, WBP_ALetter}, 640 {0x3005, 0x3005, WBP_ALetter},
623 {0x302A, 0x302D, WBP_Extend}, 641 {0x302A, 0x302D, WBP_Extend},
624 {0x302E, 0x302F, WBP_Extend}, 642 {0x302E, 0x302F, WBP_Extend},
@@ -631,7 +649,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
631 {0x30A1, 0x30FA, WBP_Katakana}, 649 {0x30A1, 0x30FA, WBP_Katakana},
632 {0x30FC, 0x30FE, WBP_Katakana}, 650 {0x30FC, 0x30FE, WBP_Katakana},
633 {0x30FF, 0x30FF, WBP_Katakana}, 651 {0x30FF, 0x30FF, WBP_Katakana},
634 {0x3105, 0x312D, WBP_ALetter}, 652 {0x3105, 0x312F, WBP_ALetter},
635 {0x3131, 0x318E, WBP_ALetter}, 653 {0x3131, 0x318E, WBP_ALetter},
636 {0x31A0, 0x31BA, WBP_ALetter}, 654 {0x31A0, 0x31BA, WBP_ALetter},
637 {0x31F0, 0x31FF, WBP_Katakana}, 655 {0x31F0, 0x31FF, WBP_Katakana},
@@ -660,14 +678,16 @@ static const struct WordBreakProperties wb_prop_default[] = {
660 {0xA6E6, 0xA6EF, WBP_ALetter}, 678 {0xA6E6, 0xA6EF, WBP_ALetter},
661 {0xA6F0, 0xA6F1, WBP_Extend}, 679 {0xA6F0, 0xA6F1, WBP_Extend},
662 {0xA717, 0xA71F, WBP_ALetter}, 680 {0xA717, 0xA71F, WBP_ALetter},
681 {0xA720, 0xA721, WBP_ALetter},
663 {0xA722, 0xA76F, WBP_ALetter}, 682 {0xA722, 0xA76F, WBP_ALetter},
664 {0xA770, 0xA770, WBP_ALetter}, 683 {0xA770, 0xA770, WBP_ALetter},
665 {0xA771, 0xA787, WBP_ALetter}, 684 {0xA771, 0xA787, WBP_ALetter},
666 {0xA788, 0xA788, WBP_ALetter}, 685 {0xA788, 0xA788, WBP_ALetter},
686 {0xA789, 0xA78A, WBP_ALetter},
667 {0xA78B, 0xA78E, WBP_ALetter}, 687 {0xA78B, 0xA78E, WBP_ALetter},
668 {0xA78F, 0xA78F, WBP_ALetter}, 688 {0xA78F, 0xA78F, WBP_ALetter},
669 {0xA790, 0xA7AE, WBP_ALetter}, 689 {0xA790, 0xA7BF, WBP_ALetter},
670 {0xA7B0, 0xA7B7, WBP_ALetter}, 690 {0xA7C2, 0xA7C6, WBP_ALetter},
671 {0xA7F7, 0xA7F7, WBP_ALetter}, 691 {0xA7F7, 0xA7F7, WBP_ALetter},
672 {0xA7F8, 0xA7F9, WBP_ALetter}, 692 {0xA7F8, 0xA7F9, WBP_ALetter},
673 {0xA7FA, 0xA7FA, WBP_ALetter}, 693 {0xA7FA, 0xA7FA, WBP_ALetter},
@@ -690,7 +710,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
690 {0xA8E0, 0xA8F1, WBP_Extend}, 710 {0xA8E0, 0xA8F1, WBP_Extend},
691 {0xA8F2, 0xA8F7, WBP_ALetter}, 711 {0xA8F2, 0xA8F7, WBP_ALetter},
692 {0xA8FB, 0xA8FB, WBP_ALetter}, 712 {0xA8FB, 0xA8FB, WBP_ALetter},
693 {0xA8FD, 0xA8FD, WBP_ALetter}, 713 {0xA8FD, 0xA8FE, WBP_ALetter},
714 {0xA8FF, 0xA8FF, WBP_Extend},
694 {0xA900, 0xA909, WBP_Numeric}, 715 {0xA900, 0xA909, WBP_Numeric},
695 {0xA90A, 0xA925, WBP_ALetter}, 716 {0xA90A, 0xA925, WBP_ALetter},
696 {0xA926, 0xA92D, WBP_Extend}, 717 {0xA926, 0xA92D, WBP_Extend},
@@ -705,8 +726,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
705 {0xA9B4, 0xA9B5, WBP_Extend}, 726 {0xA9B4, 0xA9B5, WBP_Extend},
706 {0xA9B6, 0xA9B9, WBP_Extend}, 727 {0xA9B6, 0xA9B9, WBP_Extend},
707 {0xA9BA, 0xA9BB, WBP_Extend}, 728 {0xA9BA, 0xA9BB, WBP_Extend},
708 {0xA9BC, 0xA9BC, WBP_Extend}, 729 {0xA9BC, 0xA9BD, WBP_Extend},
709 {0xA9BD, 0xA9C0, WBP_Extend}, 730 {0xA9BE, 0xA9C0, WBP_Extend},
710 {0xA9CF, 0xA9CF, WBP_ALetter}, 731 {0xA9CF, 0xA9CF, WBP_ALetter},
711 {0xA9D0, 0xA9D9, WBP_Numeric}, 732 {0xA9D0, 0xA9D9, WBP_Numeric},
712 {0xA9E5, 0xA9E5, WBP_Extend}, 733 {0xA9E5, 0xA9E5, WBP_Extend},
@@ -745,8 +766,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
745 {0xAB20, 0xAB26, WBP_ALetter}, 766 {0xAB20, 0xAB26, WBP_ALetter},
746 {0xAB28, 0xAB2E, WBP_ALetter}, 767 {0xAB28, 0xAB2E, WBP_ALetter},
747 {0xAB30, 0xAB5A, WBP_ALetter}, 768 {0xAB30, 0xAB5A, WBP_ALetter},
769 {0xAB5B, 0xAB5B, WBP_ALetter},
748 {0xAB5C, 0xAB5F, WBP_ALetter}, 770 {0xAB5C, 0xAB5F, WBP_ALetter},
749 {0xAB60, 0xAB65, WBP_ALetter}, 771 {0xAB60, 0xAB67, WBP_ALetter},
750 {0xAB70, 0xABBF, WBP_ALetter}, 772 {0xAB70, 0xABBF, WBP_ALetter},
751 {0xABC0, 0xABE2, WBP_ALetter}, 773 {0xABC0, 0xABE2, WBP_ALetter},
752 {0xABE3, 0xABE4, WBP_Extend}, 774 {0xABE3, 0xABE4, WBP_Extend},
@@ -793,6 +815,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
793 {0xFF07, 0xFF07, WBP_MidNumLet}, 815 {0xFF07, 0xFF07, WBP_MidNumLet},
794 {0xFF0C, 0xFF0C, WBP_MidNum}, 816 {0xFF0C, 0xFF0C, WBP_MidNum},
795 {0xFF0E, 0xFF0E, WBP_MidNumLet}, 817 {0xFF0E, 0xFF0E, WBP_MidNumLet},
818 {0xFF10, 0xFF19, WBP_Numeric},
796 {0xFF1A, 0xFF1A, WBP_MidLetter}, 819 {0xFF1A, 0xFF1A, WBP_MidLetter},
797 {0xFF1B, 0xFF1B, WBP_MidNum}, 820 {0xFF1B, 0xFF1B, WBP_MidNum},
798 {0xFF21, 0xFF3A, WBP_ALetter}, 821 {0xFF21, 0xFF3A, WBP_ALetter},
@@ -821,7 +844,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
821 {0x102A0, 0x102D0, WBP_ALetter}, 844 {0x102A0, 0x102D0, WBP_ALetter},
822 {0x102E0, 0x102E0, WBP_Extend}, 845 {0x102E0, 0x102E0, WBP_Extend},
823 {0x10300, 0x1031F, WBP_ALetter}, 846 {0x10300, 0x1031F, WBP_ALetter},
824 {0x10330, 0x10340, WBP_ALetter}, 847 {0x1032D, 0x10340, WBP_ALetter},
825 {0x10341, 0x10341, WBP_ALetter}, 848 {0x10341, 0x10341, WBP_ALetter},
826 {0x10342, 0x10349, WBP_ALetter}, 849 {0x10342, 0x10349, WBP_ALetter},
827 {0x1034A, 0x1034A, WBP_ALetter}, 850 {0x1034A, 0x1034A, WBP_ALetter},
@@ -861,7 +884,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
861 {0x10A0C, 0x10A0F, WBP_Extend}, 884 {0x10A0C, 0x10A0F, WBP_Extend},
862 {0x10A10, 0x10A13, WBP_ALetter}, 885 {0x10A10, 0x10A13, WBP_ALetter},
863 {0x10A15, 0x10A17, WBP_ALetter}, 886 {0x10A15, 0x10A17, WBP_ALetter},
864 {0x10A19, 0x10A33, WBP_ALetter}, 887 {0x10A19, 0x10A35, WBP_ALetter},
865 {0x10A38, 0x10A3A, WBP_Extend}, 888 {0x10A38, 0x10A3A, WBP_Extend},
866 {0x10A3F, 0x10A3F, WBP_Extend}, 889 {0x10A3F, 0x10A3F, WBP_Extend},
867 {0x10A60, 0x10A7C, WBP_ALetter}, 890 {0x10A60, 0x10A7C, WBP_ALetter},
@@ -876,6 +899,14 @@ static const struct WordBreakProperties wb_prop_default[] = {
876 {0x10C00, 0x10C48, WBP_ALetter}, 899 {0x10C00, 0x10C48, WBP_ALetter},
877 {0x10C80, 0x10CB2, WBP_ALetter}, 900 {0x10C80, 0x10CB2, WBP_ALetter},
878 {0x10CC0, 0x10CF2, WBP_ALetter}, 901 {0x10CC0, 0x10CF2, WBP_ALetter},
902 {0x10D00, 0x10D23, WBP_ALetter},
903 {0x10D24, 0x10D27, WBP_Extend},
904 {0x10D30, 0x10D39, WBP_Numeric},
905 {0x10F00, 0x10F1C, WBP_ALetter},
906 {0x10F27, 0x10F27, WBP_ALetter},
907 {0x10F30, 0x10F45, WBP_ALetter},
908 {0x10F46, 0x10F50, WBP_Extend},
909 {0x10FE0, 0x10FF6, WBP_ALetter},
879 {0x11000, 0x11000, WBP_Extend}, 910 {0x11000, 0x11000, WBP_Extend},
880 {0x11001, 0x11001, WBP_Extend}, 911 {0x11001, 0x11001, WBP_Extend},
881 {0x11002, 0x11002, WBP_Extend}, 912 {0x11002, 0x11002, WBP_Extend},
@@ -890,6 +921,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
890 {0x110B7, 0x110B8, WBP_Extend}, 921 {0x110B7, 0x110B8, WBP_Extend},
891 {0x110B9, 0x110BA, WBP_Extend}, 922 {0x110B9, 0x110BA, WBP_Extend},
892 {0x110BD, 0x110BD, WBP_Format}, 923 {0x110BD, 0x110BD, WBP_Format},
924 {0x110CD, 0x110CD, WBP_Format},
893 {0x110D0, 0x110E8, WBP_ALetter}, 925 {0x110D0, 0x110E8, WBP_ALetter},
894 {0x110F0, 0x110F9, WBP_Numeric}, 926 {0x110F0, 0x110F9, WBP_Numeric},
895 {0x11100, 0x11102, WBP_Extend}, 927 {0x11100, 0x11102, WBP_Extend},
@@ -898,6 +930,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
898 {0x1112C, 0x1112C, WBP_Extend}, 930 {0x1112C, 0x1112C, WBP_Extend},
899 {0x1112D, 0x11134, WBP_Extend}, 931 {0x1112D, 0x11134, WBP_Extend},
900 {0x11136, 0x1113F, WBP_Numeric}, 932 {0x11136, 0x1113F, WBP_Numeric},
933 {0x11144, 0x11144, WBP_ALetter},
934 {0x11145, 0x11146, WBP_Extend},
901 {0x11150, 0x11172, WBP_ALetter}, 935 {0x11150, 0x11172, WBP_ALetter},
902 {0x11173, 0x11173, WBP_Extend}, 936 {0x11173, 0x11173, WBP_Extend},
903 {0x11176, 0x11176, WBP_ALetter}, 937 {0x11176, 0x11176, WBP_ALetter},
@@ -908,7 +942,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
908 {0x111B6, 0x111BE, WBP_Extend}, 942 {0x111B6, 0x111BE, WBP_Extend},
909 {0x111BF, 0x111C0, WBP_Extend}, 943 {0x111BF, 0x111C0, WBP_Extend},
910 {0x111C1, 0x111C4, WBP_ALetter}, 944 {0x111C1, 0x111C4, WBP_ALetter},
911 {0x111CA, 0x111CC, WBP_Extend}, 945 {0x111C9, 0x111CC, WBP_Extend},
912 {0x111D0, 0x111D9, WBP_Numeric}, 946 {0x111D0, 0x111D9, WBP_Numeric},
913 {0x111DA, 0x111DA, WBP_ALetter}, 947 {0x111DA, 0x111DA, WBP_ALetter},
914 {0x111DC, 0x111DC, WBP_ALetter}, 948 {0x111DC, 0x111DC, WBP_ALetter},
@@ -939,7 +973,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
939 {0x1132A, 0x11330, WBP_ALetter}, 973 {0x1132A, 0x11330, WBP_ALetter},
940 {0x11332, 0x11333, WBP_ALetter}, 974 {0x11332, 0x11333, WBP_ALetter},
941 {0x11335, 0x11339, WBP_ALetter}, 975 {0x11335, 0x11339, WBP_ALetter},
942 {0x1133C, 0x1133C, WBP_Extend}, 976 {0x1133B, 0x1133C, WBP_Extend},
943 {0x1133D, 0x1133D, WBP_ALetter}, 977 {0x1133D, 0x1133D, WBP_ALetter},
944 {0x1133E, 0x1133F, WBP_Extend}, 978 {0x1133E, 0x1133F, WBP_Extend},
945 {0x11340, 0x11340, WBP_Extend}, 979 {0x11340, 0x11340, WBP_Extend},
@@ -961,6 +995,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
961 {0x11446, 0x11446, WBP_Extend}, 995 {0x11446, 0x11446, WBP_Extend},
962 {0x11447, 0x1144A, WBP_ALetter}, 996 {0x11447, 0x1144A, WBP_ALetter},
963 {0x11450, 0x11459, WBP_Numeric}, 997 {0x11450, 0x11459, WBP_Numeric},
998 {0x1145E, 0x1145E, WBP_Extend},
999 {0x1145F, 0x1145F, WBP_ALetter},
964 {0x11480, 0x114AF, WBP_ALetter}, 1000 {0x11480, 0x114AF, WBP_ALetter},
965 {0x114B0, 0x114B2, WBP_Extend}, 1001 {0x114B0, 0x114B2, WBP_Extend},
966 {0x114B3, 0x114B8, WBP_Extend}, 1002 {0x114B3, 0x114B8, WBP_Extend},
@@ -999,6 +1035,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
999 {0x116B0, 0x116B5, WBP_Extend}, 1035 {0x116B0, 0x116B5, WBP_Extend},
1000 {0x116B6, 0x116B6, WBP_Extend}, 1036 {0x116B6, 0x116B6, WBP_Extend},
1001 {0x116B7, 0x116B7, WBP_Extend}, 1037 {0x116B7, 0x116B7, WBP_Extend},
1038 {0x116B8, 0x116B8, WBP_ALetter},
1002 {0x116C0, 0x116C9, WBP_Numeric}, 1039 {0x116C0, 0x116C9, WBP_Numeric},
1003 {0x1171D, 0x1171F, WBP_Extend}, 1040 {0x1171D, 0x1171F, WBP_Extend},
1004 {0x11720, 0x11721, WBP_Extend}, 1041 {0x11720, 0x11721, WBP_Extend},
@@ -1006,9 +1043,41 @@ static const struct WordBreakProperties wb_prop_default[] = {
1006 {0x11726, 0x11726, WBP_Extend}, 1043 {0x11726, 0x11726, WBP_Extend},
1007 {0x11727, 0x1172B, WBP_Extend}, 1044 {0x11727, 0x1172B, WBP_Extend},
1008 {0x11730, 0x11739, WBP_Numeric}, 1045 {0x11730, 0x11739, WBP_Numeric},
1046 {0x11800, 0x1182B, WBP_ALetter},
1047 {0x1182C, 0x1182E, WBP_Extend},
1048 {0x1182F, 0x11837, WBP_Extend},
1049 {0x11838, 0x11838, WBP_Extend},
1050 {0x11839, 0x1183A, WBP_Extend},
1009 {0x118A0, 0x118DF, WBP_ALetter}, 1051 {0x118A0, 0x118DF, WBP_ALetter},
1010 {0x118E0, 0x118E9, WBP_Numeric}, 1052 {0x118E0, 0x118E9, WBP_Numeric},
1011 {0x118FF, 0x118FF, WBP_ALetter}, 1053 {0x118FF, 0x118FF, WBP_ALetter},
1054 {0x119A0, 0x119A7, WBP_ALetter},
1055 {0x119AA, 0x119D0, WBP_ALetter},
1056 {0x119D1, 0x119D3, WBP_Extend},
1057 {0x119D4, 0x119D7, WBP_Extend},
1058 {0x119DA, 0x119DB, WBP_Extend},
1059 {0x119DC, 0x119DF, WBP_Extend},
1060 {0x119E0, 0x119E0, WBP_Extend},
1061 {0x119E1, 0x119E1, WBP_ALetter},
1062 {0x119E3, 0x119E3, WBP_ALetter},
1063 {0x119E4, 0x119E4, WBP_Extend},
1064 {0x11A00, 0x11A00, WBP_ALetter},
1065 {0x11A01, 0x11A0A, WBP_Extend},
1066 {0x11A0B, 0x11A32, WBP_ALetter},
1067 {0x11A33, 0x11A38, WBP_Extend},
1068 {0x11A39, 0x11A39, WBP_Extend},
1069 {0x11A3A, 0x11A3A, WBP_ALetter},
1070 {0x11A3B, 0x11A3E, WBP_Extend},
1071 {0x11A47, 0x11A47, WBP_Extend},
1072 {0x11A50, 0x11A50, WBP_ALetter},
1073 {0x11A51, 0x11A56, WBP_Extend},
1074 {0x11A57, 0x11A58, WBP_Extend},
1075 {0x11A59, 0x11A5B, WBP_Extend},
1076 {0x11A5C, 0x11A89, WBP_ALetter},
1077 {0x11A8A, 0x11A96, WBP_Extend},
1078 {0x11A97, 0x11A97, WBP_Extend},
1079 {0x11A98, 0x11A99, WBP_Extend},
1080 {0x11A9D, 0x11A9D, WBP_ALetter},
1012 {0x11AC0, 0x11AF8, WBP_ALetter}, 1081 {0x11AC0, 0x11AF8, WBP_ALetter},
1013 {0x11C00, 0x11C08, WBP_ALetter}, 1082 {0x11C00, 0x11C08, WBP_ALetter},
1014 {0x11C0A, 0x11C2E, WBP_ALetter}, 1083 {0x11C0A, 0x11C2E, WBP_ALetter},
@@ -1027,10 +1096,35 @@ static const struct WordBreakProperties wb_prop_default[] = {
1027 {0x11CB2, 0x11CB3, WBP_Extend}, 1096 {0x11CB2, 0x11CB3, WBP_Extend},
1028 {0x11CB4, 0x11CB4, WBP_Extend}, 1097 {0x11CB4, 0x11CB4, WBP_Extend},
1029 {0x11CB5, 0x11CB6, WBP_Extend}, 1098 {0x11CB5, 0x11CB6, WBP_Extend},
1099 {0x11D00, 0x11D06, WBP_ALetter},
1100 {0x11D08, 0x11D09, WBP_ALetter},
1101 {0x11D0B, 0x11D30, WBP_ALetter},
1102 {0x11D31, 0x11D36, WBP_Extend},
1103 {0x11D3A, 0x11D3A, WBP_Extend},
1104 {0x11D3C, 0x11D3D, WBP_Extend},
1105 {0x11D3F, 0x11D45, WBP_Extend},
1106 {0x11D46, 0x11D46, WBP_ALetter},
1107 {0x11D47, 0x11D47, WBP_Extend},
1108 {0x11D50, 0x11D59, WBP_Numeric},
1109 {0x11D60, 0x11D65, WBP_ALetter},
1110 {0x11D67, 0x11D68, WBP_ALetter},
1111 {0x11D6A, 0x11D89, WBP_ALetter},
1112 {0x11D8A, 0x11D8E, WBP_Extend},
1113 {0x11D90, 0x11D91, WBP_Extend},
1114 {0x11D93, 0x11D94, WBP_Extend},
1115 {0x11D95, 0x11D95, WBP_Extend},
1116 {0x11D96, 0x11D96, WBP_Extend},
1117 {0x11D97, 0x11D97, WBP_Extend},
1118 {0x11D98, 0x11D98, WBP_ALetter},
1119 {0x11DA0, 0x11DA9, WBP_Numeric},
1120 {0x11EE0, 0x11EF2, WBP_ALetter},
1121 {0x11EF3, 0x11EF4, WBP_Extend},
1122 {0x11EF5, 0x11EF6, WBP_Extend},
1030 {0x12000, 0x12399, WBP_ALetter}, 1123 {0x12000, 0x12399, WBP_ALetter},
1031 {0x12400, 0x1246E, WBP_ALetter}, 1124 {0x12400, 0x1246E, WBP_ALetter},
1032 {0x12480, 0x12543, WBP_ALetter}, 1125 {0x12480, 0x12543, WBP_ALetter},
1033 {0x13000, 0x1342E, WBP_ALetter}, 1126 {0x13000, 0x1342E, WBP_ALetter},
1127 {0x13430, 0x13438, WBP_Format},
1034 {0x14400, 0x14646, WBP_ALetter}, 1128 {0x14400, 0x14646, WBP_ALetter},
1035 {0x16800, 0x16A38, WBP_ALetter}, 1129 {0x16800, 0x16A38, WBP_ALetter},
1036 {0x16A40, 0x16A5E, WBP_ALetter}, 1130 {0x16A40, 0x16A5E, WBP_ALetter},
@@ -1043,13 +1137,17 @@ static const struct WordBreakProperties wb_prop_default[] = {
1043 {0x16B50, 0x16B59, WBP_Numeric}, 1137 {0x16B50, 0x16B59, WBP_Numeric},
1044 {0x16B63, 0x16B77, WBP_ALetter}, 1138 {0x16B63, 0x16B77, WBP_ALetter},
1045 {0x16B7D, 0x16B8F, WBP_ALetter}, 1139 {0x16B7D, 0x16B8F, WBP_ALetter},
1046 {0x16F00, 0x16F44, WBP_ALetter}, 1140 {0x16E40, 0x16E7F, WBP_ALetter},
1141 {0x16F00, 0x16F4A, WBP_ALetter},
1142 {0x16F4F, 0x16F4F, WBP_Extend},
1047 {0x16F50, 0x16F50, WBP_ALetter}, 1143 {0x16F50, 0x16F50, WBP_ALetter},
1048 {0x16F51, 0x16F7E, WBP_Extend}, 1144 {0x16F51, 0x16F87, WBP_Extend},
1049 {0x16F8F, 0x16F92, WBP_Extend}, 1145 {0x16F8F, 0x16F92, WBP_Extend},
1050 {0x16F93, 0x16F9F, WBP_ALetter}, 1146 {0x16F93, 0x16F9F, WBP_ALetter},
1051 {0x16FE0, 0x16FE0, WBP_ALetter}, 1147 {0x16FE0, 0x16FE1, WBP_ALetter},
1148 {0x16FE3, 0x16FE3, WBP_ALetter},
1052 {0x1B000, 0x1B000, WBP_Katakana}, 1149 {0x1B000, 0x1B000, WBP_Katakana},
1150 {0x1B164, 0x1B167, WBP_Katakana},
1053 {0x1BC00, 0x1BC6A, WBP_ALetter}, 1151 {0x1BC00, 0x1BC6A, WBP_ALetter},
1054 {0x1BC70, 0x1BC7C, WBP_ALetter}, 1152 {0x1BC70, 0x1BC7C, WBP_ALetter},
1055 {0x1BC80, 0x1BC88, WBP_ALetter}, 1153 {0x1BC80, 0x1BC88, WBP_ALetter},
@@ -1106,10 +1204,19 @@ static const struct WordBreakProperties wb_prop_default[] = {
1106 {0x1E01B, 0x1E021, WBP_Extend}, 1204 {0x1E01B, 0x1E021, WBP_Extend},
1107 {0x1E023, 0x1E024, WBP_Extend}, 1205 {0x1E023, 0x1E024, WBP_Extend},
1108 {0x1E026, 0x1E02A, WBP_Extend}, 1206 {0x1E026, 0x1E02A, WBP_Extend},
1207 {0x1E100, 0x1E12C, WBP_ALetter},
1208 {0x1E130, 0x1E136, WBP_Extend},
1209 {0x1E137, 0x1E13D, WBP_ALetter},
1210 {0x1E140, 0x1E149, WBP_Numeric},
1211 {0x1E14E, 0x1E14E, WBP_ALetter},
1212 {0x1E2C0, 0x1E2EB, WBP_ALetter},
1213 {0x1E2EC, 0x1E2EF, WBP_Extend},
1214 {0x1E2F0, 0x1E2F9, WBP_Numeric},
1109 {0x1E800, 0x1E8C4, WBP_ALetter}, 1215 {0x1E800, 0x1E8C4, WBP_ALetter},
1110 {0x1E8D0, 0x1E8D6, WBP_Extend}, 1216 {0x1E8D0, 0x1E8D6, WBP_Extend},
1111 {0x1E900, 0x1E943, WBP_ALetter}, 1217 {0x1E900, 0x1E943, WBP_ALetter},
1112 {0x1E944, 0x1E94A, WBP_Extend}, 1218 {0x1E944, 0x1E94A, WBP_Extend},
1219 {0x1E94B, 0x1E94B, WBP_ALetter},
1113 {0x1E950, 0x1E959, WBP_Numeric}, 1220 {0x1E950, 0x1E959, WBP_Numeric},
1114 {0x1EE00, 0x1EE03, WBP_ALetter}, 1221 {0x1EE00, 0x1EE03, WBP_ALetter},
1115 {0x1EE05, 0x1EE1F, WBP_ALetter}, 1222 {0x1EE05, 0x1EE1F, WBP_ALetter},
@@ -1148,35 +1255,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
1148 {0x1F150, 0x1F169, WBP_ALetter}, 1255 {0x1F150, 0x1F169, WBP_ALetter},
1149 {0x1F170, 0x1F189, WBP_ALetter}, 1256 {0x1F170, 0x1F189, WBP_ALetter},
1150 {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator}, 1257 {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator},
1151 {0x1F385, 0x1F385, WBP_E_Base}, 1258 {0x1F3FB, 0x1F3FF, WBP_Extend},
1152 {0x1F3C3, 0x1F3C4, WBP_E_Base},
1153 {0x1F3CA, 0x1F3CB, WBP_E_Base},
1154 {0x1F3FB, 0x1F3FF, WBP_E_Modifier},
1155 {0x1F442, 0x1F443, WBP_E_Base},
1156 {0x1F446, 0x1F450, WBP_E_Base},
1157 {0x1F466, 0x1F469, WBP_E_Base_GAZ},
1158 {0x1F46E, 0x1F46E, WBP_E_Base},
1159 {0x1F470, 0x1F478, WBP_E_Base},
1160 {0x1F47C, 0x1F47C, WBP_E_Base},
1161 {0x1F481, 0x1F483, WBP_E_Base},
1162 {0x1F485, 0x1F487, WBP_E_Base},
1163 {0x1F48B, 0x1F48B, WBP_Glue_After_Zwj},
1164 {0x1F4AA, 0x1F4AA, WBP_E_Base},
1165 {0x1F575, 0x1F575, WBP_E_Base},
1166 {0x1F57A, 0x1F57A, WBP_E_Base},
1167 {0x1F590, 0x1F590, WBP_E_Base},
1168 {0x1F595, 0x1F596, WBP_E_Base},
1169 {0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj},
1170 {0x1F645, 0x1F647, WBP_E_Base},
1171 {0x1F64B, 0x1F64F, WBP_E_Base},
1172 {0x1F6A3, 0x1F6A3, WBP_E_Base},
1173 {0x1F6B4, 0x1F6B6, WBP_E_Base},
1174 {0x1F6C0, 0x1F6C0, WBP_E_Base},
1175 {0x1F918, 0x1F91E, WBP_E_Base},
1176 {0x1F926, 0x1F926, WBP_E_Base},
1177 {0x1F930, 0x1F930, WBP_E_Base},
1178 {0x1F933, 0x1F939, WBP_E_Base},
1179 {0x1F93C, 0x1F93E, WBP_E_Base},
1180 {0xE0001, 0xE0001, WBP_Format}, 1259 {0xE0001, 0xE0001, WBP_Format},
1181 {0xE0020, 0xE007F, WBP_Extend}, 1260 {0xE0020, 0xE007F, WBP_Extend},
1182 {0xE0100, 0xE01EF, WBP_Extend}, 1261 {0xE0100, 0xE01EF, WBP_Extend},
diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h
index 82cd98e7c3..03feb3cdac 100644
--- a/src/static_libs/libunibreak/wordbreakdef.h
+++ b/src/static_libs/libunibreak/wordbreakdef.h
@@ -4,7 +4,8 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013-16 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +31,9 @@
30 * Unicode 6.0.0: 31 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 * 33 *
33 * This library has been updated according to Revision 29, for 34 * This library has been updated according to Revision 35, for
34 * Unicode 9.0.0: 35 * Unicode 12.0.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-29.html> 36 * <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -51,7 +52,7 @@
51 52
52/** 53/**
53 * Word break classes. This is a direct mapping of Table 3 of Unicode 54 * Word break classes. This is a direct mapping of Table 3 of Unicode
54 * Standard Annex 29, Revision 23. 55 * Standard Annex 29, Revision 35.
55 */ 56 */
56enum WordBreakClass 57enum WordBreakClass
57{ 58{
@@ -73,10 +74,7 @@ enum WordBreakClass
73 WBP_MidNum, 74 WBP_MidNum,
74 WBP_Numeric, 75 WBP_Numeric,
75 WBP_ExtendNumLet, 76 WBP_ExtendNumLet,
76 WBP_E_Base, 77 WBP_WSegSpace,
77 WBP_E_Modifier,
78 WBP_Glue_After_Zwj,
79 WBP_E_Base_GAZ,
80 WBP_Any 78 WBP_Any
81}; 79};
82 80
@@ -86,7 +84,7 @@ enum WordBreakClass
86 */ 84 */
87struct WordBreakProperties 85struct WordBreakProperties
88{ 86{
89 utf32_t start; /**< Starting coding point */ 87 utf32_t start; /**< Start codepoint */
90 utf32_t end; /**< End coding point */ 88 utf32_t end; /**< End codepoint, inclusive */
91 enum WordBreakClass prop; /**< The word breaking property */ 89 enum WordBreakClass prop; /**< The word breaking property */
92}; 90};
diff --git a/src/tests/evas/evas_test_textblock.c b/src/tests/evas/evas_test_textblock.c
index a05f30af37..b3177ff599 100644
--- a/src/tests/evas/evas_test_textblock.c
+++ b/src/tests/evas/evas_test_textblock.c
@@ -1054,6 +1054,10 @@ EFL_START_TEST(evas_textblock_cursor)
1054 pos = evas_textblock_cursor_pos_get(cur); 1054 pos = evas_textblock_cursor_pos_get(cur);
1055 1055
1056 ck_assert_int_eq(pos, 0); 1056 ck_assert_int_eq(pos, 0);
1057 evas_object_textblock_text_markup_set(tb, "&#x1f3f3;&#xfe0f;&#x200d;&#x1f308;");
1058 evas_textblock_cursor_pos_set(cur, 0);
1059 evas_textblock_cursor_cluster_next(cur);
1060 ck_assert_int_eq(4, evas_textblock_cursor_pos_get(cur));
1057 1061
1058 END_TB_TEST(); 1062 END_TB_TEST();
1059} 1063}