summaryrefslogtreecommitdiff
path: root/src/static_libs/libunibreak/wordbreak.c
diff options
context:
space:
mode:
authorTom Hacohen <tom@stosb.com>2013-08-07 11:56:46 +0100
committerTom Hacohen <tom@stosb.com>2013-08-07 11:57:09 +0100
commit4185694ecbe5293fad72f7eb9dd971900decb778 (patch)
tree919dca7f7dbe42d349e25b1f0017b591c7cd2b66 /src/static_libs/libunibreak/wordbreak.c
parent3cc3d0878a6dc29d0dcac9c2f5495bc8bdd84b7f (diff)
Synced libunibreak local copy with upstream.
Diffstat (limited to 'src/static_libs/libunibreak/wordbreak.c')
-rw-r--r--src/static_libs/libunibreak/wordbreak.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c
index 60db99e426..f2996c0e81 100644
--- a/src/static_libs/libunibreak/wordbreak.c
+++ b/src/static_libs/libunibreak/wordbreak.c
@@ -40,7 +40,7 @@
40 * Implementation of the word breaking algorithm as described in Unicode 40 * Implementation of the word breaking algorithm as described in Unicode
41 * Standard Annex 29. 41 * Standard Annex 29.
42 * 42 *
43 * @version 2.2, 2012/02/04 43 * @version 2.3, 2013/05/14
44 * @author Tom Hacohen 44 * @author Tom Hacohen
45 */ 45 */
46 46
@@ -188,7 +188,7 @@ static void set_wordbreaks(
188 188
189 switch (wbcCur) 189 switch (wbcCur)
190 { 190 {
191 case WBP_CR: 191 case WBP_CR:
192 /* WB3b */ 192 /* WB3b */
193 set_brks_to(s, brks, posLast, posCur, len, 193 set_brks_to(s, brks, posLast, posCur, len,
194 WORDBREAK_BREAK, get_next_char); 194 WORDBREAK_BREAK, get_next_char);
@@ -196,7 +196,7 @@ static void set_wordbreaks(
196 posLast = posCur; 196 posLast = posCur;
197 break; 197 break;
198 198
199 case WBP_LF: 199 case WBP_LF:
200 if (wbcSeqStart == WBP_CR) /* WB3 */ 200 if (wbcSeqStart == WBP_CR) /* WB3 */
201 { 201 {
202 set_brks_to(s, brks, posLast, posCur, len, 202 set_brks_to(s, brks, posLast, posCur, len,
@@ -207,7 +207,7 @@ static void set_wordbreaks(
207 } 207 }
208 /* Fall off */ 208 /* Fall off */
209 209
210 case WBP_Newline: 210 case WBP_Newline:
211 /* WB3a,3b */ 211 /* WB3a,3b */
212 set_brks_to(s, brks, posLast, posCur, len, 212 set_brks_to(s, brks, posLast, posCur, len,
213 WORDBREAK_BREAK, get_next_char); 213 WORDBREAK_BREAK, get_next_char);
@@ -215,8 +215,8 @@ static void set_wordbreaks(
215 posLast = posCur; 215 posLast = posCur;
216 break; 216 break;
217 217
218 case WBP_Extend: 218 case WBP_Extend:
219 case WBP_Format: 219 case WBP_Format:
220 /* WB4 - If not the first char/after a newline (WB3a,3b), skip 220 /* WB4 - If not the first char/after a newline (WB3a,3b), skip
221 * this class, set it to be the same as the prev, and mark 221 * this class, set it to be the same as the prev, and mark
222 * brks not to break before them. */ 222 * brks not to break before them. */
@@ -235,7 +235,7 @@ static void set_wordbreaks(
235 } 235 }
236 break; 236 break;
237 237
238 case WBP_Katakana: 238 case WBP_Katakana:
239 if ((wbcSeqStart == WBP_Katakana) || /* WB13 */ 239 if ((wbcSeqStart == WBP_Katakana) || /* WB13 */
240 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 240 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
241 { 241 {
@@ -252,7 +252,7 @@ static void set_wordbreaks(
252 posLast = posCur; 252 posLast = posCur;
253 break; 253 break;
254 254
255 case WBP_ALetter: 255 case WBP_ALetter:
256 if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */ 256 if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */
257 (wbcLast == WBP_Numeric) || /* WB10 */ 257 (wbcLast == WBP_Numeric) || /* WB10 */
258 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 258 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
@@ -270,7 +270,7 @@ static void set_wordbreaks(
270 posLast = posCur; 270 posLast = posCur;
271 break; 271 break;
272 272
273 case WBP_MidNumLet: 273 case WBP_MidNumLet:
274 if ((wbcLast == WBP_ALetter) || /* WB6,7 */ 274 if ((wbcLast == WBP_ALetter) || /* WB6,7 */
275 (wbcLast == WBP_Numeric)) /* WB11,12 */ 275 (wbcLast == WBP_Numeric)) /* WB11,12 */
276 { 276 {
@@ -285,7 +285,7 @@ static void set_wordbreaks(
285 } 285 }
286 break; 286 break;
287 287
288 case WBP_MidLetter: 288 case WBP_MidLetter:
289 if (wbcLast == WBP_ALetter) /* WB6,7 */ 289 if (wbcLast == WBP_ALetter) /* WB6,7 */
290 { 290 {
291 /* Go on */ 291 /* Go on */
@@ -299,7 +299,7 @@ static void set_wordbreaks(
299 } 299 }
300 break; 300 break;
301 301
302 case WBP_MidNum: 302 case WBP_MidNum:
303 if (wbcLast == WBP_Numeric) /* WB11,12 */ 303 if (wbcLast == WBP_Numeric) /* WB11,12 */
304 { 304 {
305 /* Go on */ 305 /* Go on */
@@ -313,7 +313,7 @@ static void set_wordbreaks(
313 } 313 }
314 break; 314 break;
315 315
316 case WBP_Numeric: 316 case WBP_Numeric:
317 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */ 317 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */
318 (wbcLast == WBP_ALetter) || /* WB9 */ 318 (wbcLast == WBP_ALetter) || /* WB9 */
319 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 319 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
@@ -331,7 +331,7 @@ static void set_wordbreaks(
331 posLast = posCur; 331 posLast = posCur;
332 break; 332 break;
333 333
334 case WBP_ExtendNumLet: 334 case WBP_ExtendNumLet:
335 /* WB13a,13b */ 335 /* WB13a,13b */
336 if ((wbcSeqStart == wbcLast) && 336 if ((wbcSeqStart == wbcLast) &&
337 ((wbcLast == WBP_ALetter) || 337 ((wbcLast == WBP_ALetter) ||
@@ -352,7 +352,18 @@ static void set_wordbreaks(
352 posLast = posCur; 352 posLast = posCur;
353 break; 353 break;
354 354
355 case WBP_Any: 355 case WBP_Regional:
356 /* WB13c */
357 if (wbcSeqStart == WBP_Regional)
358 {
359 set_brks_to(s, brks, posLast, posCur, len,
360 WORDBREAK_NOBREAK, get_next_char);
361 }
362 wbcSeqStart = wbcCur;
363 posLast = posCur;
364 break;
365
366 case WBP_Any:
356 /* Allow breaks and reset */ 367 /* Allow breaks and reset */
357 set_brks_to(s, brks, posLast, posCur, len, 368 set_brks_to(s, brks, posLast, posCur, len,
358 WORDBREAK_BREAK, get_next_char); 369 WORDBREAK_BREAK, get_next_char);