summaryrefslogtreecommitdiff
path: root/src/static_libs
diff options
context:
space:
mode:
authorTom Hacohen <tom@stosb.com>2015-05-07 10:53:11 +0100
committerTom Hacohen <tom@stosb.com>2015-05-07 10:54:26 +0100
commit7a49d23f90f41c478db9b7beb9763aa0cd74ae46 (patch)
tree8ff009d70d91523d2c5f718ed395c8e23df43c4f /src/static_libs
parentba77a837a37af0d154d7ceafbb5ab7d4f75090f6 (diff)
Static deps unibreak: update to what will soon be version 3.
Version 3 is not yet released, but this is on track to become it. This is based on commit: a815e11f7ebf35b59278f783227a829ee4692760. @feature.
Diffstat (limited to 'src/static_libs')
-rw-r--r--src/static_libs/libunibreak/ChangeLog164
-rw-r--r--src/static_libs/libunibreak/LICENCE5
-rw-r--r--src/static_libs/libunibreak/README.md (renamed from src/static_libs/libunibreak/README)0
-rw-r--r--src/static_libs/libunibreak/linebreak.c179
-rw-r--r--src/static_libs/libunibreak/linebreak.h29
-rw-r--r--src/static_libs/libunibreak/linebreakdata.c171
-rw-r--r--src/static_libs/libunibreak/linebreakdef.c8
-rw-r--r--src/static_libs/libunibreak/linebreakdef.h32
-rw-r--r--src/static_libs/libunibreak/unibreakbase.c41
-rw-r--r--src/static_libs/libunibreak/unibreakbase.h73
-rw-r--r--src/static_libs/libunibreak/unibreakdef.c159
-rw-r--r--src/static_libs/libunibreak/unibreakdef.h80
-rw-r--r--src/static_libs/libunibreak/wordbreak.c76
-rw-r--r--src/static_libs/libunibreak/wordbreak.h12
-rw-r--r--src/static_libs/libunibreak/wordbreakdata.c201
-rw-r--r--src/static_libs/libunibreak/wordbreakdef.h21
16 files changed, 948 insertions, 303 deletions
diff --git a/src/static_libs/libunibreak/ChangeLog b/src/static_libs/libunibreak/ChangeLog
index feb830b..44b56ea 100644
--- a/src/static_libs/libunibreak/ChangeLog
+++ b/src/static_libs/libunibreak/ChangeLog
@@ -1,3 +1,167 @@
12015-04-19 Wu Yongwei <wuyongwei@gmail.com>
2
3 * LICENCE: Update copyright information.
4
52015-04-19 Wu Yongwei <wuyongwei@gmail.com>
6
7 * src/linebreakdata2.tmp: Remove the unnecessary inclusion of
8 "linebreak.h".
9 * src/linebreakdata.c: Ditto.
10
112015-04-19 Wu Yongwei <wuyongwei@gmail.com>
12
13 Use extended regexp to simplify expressions.
14 * src/LineBreak1.sed: Simplify with extended regexp.
15 * src/LineBreak2.sed: Ditto.
16 * src/Makefile.am: Add `-E' to the command line of sed.
17
182015-04-19 Wu Yongwei <wuyongwei@gmail.com>
19
20 Make further clean-up for the 3.0 release.
21 * configure.ac (AC_INIT): Change the library version to `3.0'.
22 * Doxyfile (PROJECT_NUMBER): Change to `3.0'.
23 (EXCLUDE): Add the missing `src/' before `filter_dup.c'.
24 * src/wordbreakdata1.tmpl: Remove the inclusion of "linebreak.h".
25 * src/wordbreakdata.c: Ditto.
26
272015-04-19 Wu Yongwei <wuyongwei@gmail.com>
28
29 * src/wordbreakdef.h: Include "unibreakdef.h".
30
312015-04-19 Wu Yongwei <wuyongwei@gmail.com>
32
33 * purge: Make it remove `compile'.
34
352015-04-18 Wu Yongwei <wuyongwei@gmail.com>
36
37 * src/unibreakdef.c: New file.
38 * src/unibreakdef.h: New file.
39 * src/wordbreak.c: Rename reference to `lb_get_next_char...' to
40 `ub_get_next_char...'.
41 * src/linebreak.c: Ditto.
42 (lb_get_next_char_utf8): Remove definition.
43 (lb_get_next_char_utf16): Ditto.
44 (lb_get_next_char_utf32): Ditto.
45 * src/linebreakdef.h: Include "unibreakdef.h".
46 (EOS): Remove definition.
47 (get_next_char_t): Remove typedef.
48 (lb_get_next_char_utf8): Remove declaration.
49 (lb_get_next_char_utf16): Ditto.
50 (lb_get_next_char_utf32): Ditto.
51 * src/Makefile.am (include_HEADERS): Add `unibreakdef.h'.
52 (libunibreak_la_SOURCES): Add `unibreakdef.c'.
53 (libunibreak_la_CFLAGS): Define to `-W -Wall'.
54
552015-04-18 Wu Yongwei <wuyongwei@gmail.com>
56
57 * src/unibreakbase.c: New file.
58 * src/unibreakbase.h: New file.
59 * src/linebreak.c (linebreak_version): Remove definition.
60 * src/linebreak.h: Include "unibreakbase.h".
61 (linebreak_version): Remove declaration.
62 (LINEBREAK_VERSION): Remove definition.
63 (utf8_t): Remove typedef.
64 (utf16_t): Remove typedef.
65 (utf32_t): Remove typedef.
66 * src/wordbreak.h: Include "unibreakbase.h" instead of
67 "linebreak.h".
68 * src/Makefile.am (include_HEADERS): Add `unibreakbase.h'.
69 (libunibreak_la_SOURCES): Add `unibreakbase.c'.
70 (libunibreak_la_LDFLAGS): Set the version-info to `3:0:0'.
71
722015-04-13 Wu Yongwei <wuyongwei@gmail.com>
73
74 * src/wordbreak.c: Update copyright and version information.
75 * src/wordbreak.h: Ditto.
76 * src/wordbreakdef.h: Ditto.
77
782015-04-13 Tom Hacohen <tom@stosb.com>
79
80 * src/wordbreakdef.h (enum WordBreakClass): Clean up and reorder.
81
822015-04-10 Tom Hacohen <tom@stosb.com>
83
84 Don't ship internal header.
85 * src/Makefile.am (include_HEADERS): Remove `wordbreakdef.h'.
86 (EXTRA_DIST): Add `wordbreakdef.h'.
87
882015-04-10 Tom Hacohen <tom@stosb.com>
89
90 Update files according to UAX #29-29, for Unicode 7.0.0.
91 * src/wordbreak.c (set_wordbreaks): Take care of Hebrew letters.
92 * src/wordbreakdata.h (enum WordBreakClass): Add WBP_Hebrew_Letter,
93 WBP_Single_Quote, and WBP_Double_Quote.
94 * src/wordbreakdata.c: Regenerate from WordBreakProperty-7.0.0.txt.
95
962015-04-10 Tom Hacohen <tom@stosb.com>
97
98 * src/sort_numeric_hex.py: Fix compatibility issue with new Python.
99 * src/Makefile.am (wordbreakdata): Fix word break data enum for
100 names with underscores.
101 * src/wordbreakdef.h (enum WordBreakClass): Correct WBP_Regional to
102 WBP_Regional_Indicator.
103 * src/wordbreak.c: Ditto.
104 * src/wordbreakdata.c: Ditto.
105
1062015-04-05 Wu Yongwei <wuyongwei@gmail.com>
107
108 * src/linebreak.c: Make pointer alignment consistent.
109 * src/linebreak.h: Ditto.
110 * src/linebreakdef.h: Ditto.
111
1122015-04-05 Wu Yongwei <wuyongwei@gmail.com>
113
114 * src/linebreak.h: Update copyright year and UAX information.
115 * src/linebreakdef.c: Ditto.
116
1172015-04-05 Wu Yongwei <wuyongwei@gmail.com>
118
119 Implement rule LB21a, as introduced by Revision 28 of UAX #14.
120 * src/linebreakdef.h (struct LineBreakContext): Add new field
121 fLb21aHebrew.
122 * src/linebreak.c (treat_first_char): Initialize fLb21aHebrew
123 properly.
124 (lb_init_break_context): Clear fLb21aHebrew.
125 (get_lb_result_lookup): Apply rule LB21a and update fLb21aHebrew.
126
1272014-12-06 Mikhail Polubisok <mpolubisok@gmail.com>
128
129 * src/linebreak.c (get_lb_result_lookup): Extend assertion condition
130 that has been wrong since Unicode 6.2.
131
1322014-09-19 Petr Filipsky <philodej@gmail.com>
133
134 * src/LineBreak1.sed: Fix sed expression due to changed
135 LineBreak.txt file format.
136
1372014-05-24 Wu Yongwei <wuyongwei@gmail.com>
138
139 * src/Makefile.gcc (TARGET): Change from `liblinebreak.a' to
140 `libunibreak.a'.
141
1422014-05-23 Christoph Junghans <junghans@votca.org>
143
144 Fix `make install DESTDIR=...'.
145 * Makefile.am (install-exec-hook): Prefix `$(DESTDIR)/' before
146 `${libdir}'.
147
1482014-02-16 Wu Yongwei <wuyongwei@gmail.com>
149
150 Following https://people.gnome.org/~walters/docs/build-api.txt, add
151 a quasi-standard autogen.sh, which generates `configure' and runs it
152 optionally.
153 * autogen.sh: New file.
154
1552014-02-12 Wu Yongwei <wuyongwei@gmail.com>
156
157 * bootstrap: Remove the overkill bits and add back autoreconf.
158 * purge: Ensure config.cache is removed.
159
1602014-02-10 Tom Hacohen <tom@stosb.com>
161
162 * bootstrap: Solve bootstrap problems found on Linux and Mac (thanks
163 to Nick Shvelidze and Christopher Baker).
164
12013-11-14 Wu Yongwei <wuyongwei@gmail.com> 1652013-11-14 Wu Yongwei <wuyongwei@gmail.com>
2 166
3 * src/linebreak.c: Add/update comments and doc comments. 167 * src/linebreak.c: Add/update comments and doc comments.
diff --git a/src/static_libs/libunibreak/LICENCE b/src/static_libs/libunibreak/LICENCE
index ceec155..3eda8d5 100644
--- a/src/static_libs/libunibreak/LICENCE
+++ b/src/static_libs/libunibreak/LICENCE
@@ -1,5 +1,6 @@
1Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 1Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
2Copyright (C) 2012 Tom Hacohen <tom dot hacohen at samsung dot com> 2Copyright (C) 2012-2015 Tom Hacohen <tom at stosb dot com>
3Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
3 4
4This software is provided 'as-is', without any express or implied 5This software is provided 'as-is', without any express or implied
5warranty. In no event will the author be held liable for any damages 6warranty. In no event will the author be held liable for any damages
diff --git a/src/static_libs/libunibreak/README b/src/static_libs/libunibreak/README.md
index 52cd738..52cd738 100644
--- a/src/static_libs/libunibreak/README
+++ b/src/static_libs/libunibreak/README.md
diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c
index 9716df4..7c8ff9e 100644
--- a/src/static_libs/libunibreak/linebreak.c
+++ b/src/static_libs/libunibreak/linebreak.c
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> 8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 * 9 *
10 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
31 * Unicode 5.0.0: 31 * Unicode 5.0.0:
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * 33 *
34 * This library has been updated according to Revision 30, for 34 * This library has been updated according to Revision 33, for
35 * Unicode 6.2.0: 35 * Unicode 7.0.0:
36 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
37 * 37 *
38 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -45,7 +45,7 @@
45 * Implementation of the line breaking algorithm as described in Unicode 45 * Implementation of the line breaking algorithm as described in Unicode
46 * Standard Annex 14. 46 * Standard Annex 14.
47 * 47 *
48 * @version 2.5, 2013/11/14 48 * @version 2.7, 2015/04/18
49 * @author Wu Yongwei 49 * @author Wu Yongwei
50 * @author Petr Filipsky 50 * @author Petr Filipsky
51 */ 51 */
@@ -67,11 +67,6 @@
67#define LINEBREAK_INDEX_SIZE 40 67#define LINEBREAK_INDEX_SIZE 40
68 68
69/** 69/**
70 * Version number of the library.
71 */
72const int linebreak_version = LINEBREAK_VERSION;
73
74/**
75 * Enumeration of break actions. They are used in the break action 70 * Enumeration of break actions. They are used in the break action
76 * pair table below. 71 * pair table below.
77 */ 72 */
@@ -451,7 +446,7 @@ static enum LineBreakClass resolve_lb_class(
451 * @post \a lbpCtx->lbcCur has the updated line break class 446 * @post \a lbpCtx->lbcCur has the updated line break class
452 */ 447 */
453static void treat_first_char( 448static void treat_first_char(
454 struct LineBreakContext* lbpCtx) 449 struct LineBreakContext *lbpCtx)
455{ 450{
456 switch (lbpCtx->lbcCur) 451 switch (lbpCtx->lbcCur)
457 { 452 {
@@ -465,6 +460,8 @@ static void treat_first_char(
465 case LBP_SP: 460 case LBP_SP:
466 lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */ 461 lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */
467 break; 462 break;
463 case LBP_HL:
464 lbpCtx->fLb21aHebrew = 1; /* Rule LB21a */
468 default: 465 default:
469 break; 466 break;
470 } 467 }
@@ -485,7 +482,7 @@ static void treat_first_char(
485 * table lookup is needed 482 * table lookup is needed
486 */ 483 */
487static int get_lb_result_simple( 484static int get_lb_result_simple(
488 struct LineBreakContext* lbpCtx) 485 struct LineBreakContext *lbpCtx)
489{ 486{
490 if (lbpCtx->lbcCur == LBP_BK 487 if (lbpCtx->lbcCur == LBP_BK
491 || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF)) 488 || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF))
@@ -528,13 +525,12 @@ static int get_lb_result_simple(
528 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK 525 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
529 */ 526 */
530static int get_lb_result_lookup( 527static int get_lb_result_lookup(
531 struct LineBreakContext* lbpCtx) 528 struct LineBreakContext *lbpCtx)
532{ 529{
533 /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not
534 * yet implemented below. */
535 int brk = LINEBREAK_UNDEFINED; 530 int brk = LINEBREAK_UNDEFINED;
536 assert(lbpCtx->lbcCur <= LBP_JT); 531
537 assert(lbpCtx->lbcNew <= LBP_JT); 532 assert(lbpCtx->lbcCur <= LBP_RI);
533 assert(lbpCtx->lbcNew <= LBP_RI);
538 switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1]) 534 switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1])
539 { 535 {
540 case DIR_BRK: 536 case DIR_BRK:
@@ -555,6 +551,19 @@ static int get_lb_result_lookup(
555 brk = LINEBREAK_NOBREAK; 551 brk = LINEBREAK_NOBREAK;
556 break; 552 break;
557 } 553 }
554
555 /* Special processing due to rule LB21a */
556 if (lbpCtx->fLb21aHebrew &&
557 (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA))
558 {
559 brk = LINEBREAK_NOBREAK;
560 lbpCtx->fLb21aHebrew = 0;
561 }
562 else if (!(lbpCtx->lbcNew == LBP_HY || lbpCtx->lbcNew == LBP_BA))
563 {
564 lbpCtx->fLb21aHebrew = (lbpCtx->lbcNew == LBP_HL);
565 }
566
558 lbpCtx->lbcCur = lbpCtx->lbcNew; 567 lbpCtx->lbcCur = lbpCtx->lbcNew;
559 return brk; 568 return brk;
560} 569}
@@ -568,9 +577,9 @@ static int get_lb_result_lookup(
568 * @post the line breaking context is initialized 577 * @post the line breaking context is initialized
569 */ 578 */
570void lb_init_break_context( 579void lb_init_break_context(
571 struct LineBreakContext* lbpCtx, 580 struct LineBreakContext *lbpCtx,
572 utf32_t ch, 581 utf32_t ch,
573 const char* lang) 582 const char *lang)
574{ 583{
575 lbpCtx->lang = lang; 584 lbpCtx->lang = lang;
576 lbpCtx->lbpLang = get_lb_prop_lang(lang); 585 lbpCtx->lbpLang = get_lb_prop_lang(lang);
@@ -579,6 +588,7 @@ void lb_init_break_context(
579 lbpCtx->lbcCur = resolve_lb_class( 588 lbpCtx->lbcCur = resolve_lb_class(
580 get_char_lb_class_lang(ch, lbpCtx->lbpLang), 589 get_char_lb_class_lang(ch, lbpCtx->lbpLang),
581 lbpCtx->lang); 590 lbpCtx->lang);
591 lbpCtx->fLb21aHebrew = 0;
582 treat_first_char(lbpCtx); 592 treat_first_char(lbpCtx);
583} 593}
584 594
@@ -593,7 +603,7 @@ void lb_init_break_context(
593 * @post the line breaking context is updated 603 * @post the line breaking context is updated
594 */ 604 */
595int lb_process_next_char( 605int lb_process_next_char(
596 struct LineBreakContext* lbpCtx, 606 struct LineBreakContext *lbpCtx,
597 utf32_t ch ) 607 utf32_t ch )
598{ 608{
599 int brk; 609 int brk;
@@ -618,127 +628,6 @@ int lb_process_next_char(
618} 628}
619 629
620/** 630/**
621 * Gets the next Unicode character in a UTF-8 sequence. The index will
622 * be advanced to the next complete character, unless the end of string
623 * is reached in the middle of a UTF-8 sequence.
624 *
625 * @param[in] s input UTF-8 string
626 * @param[in] len length of the string in bytes
627 * @param[in,out] ip pointer to the index
628 * @return the Unicode character beginning at the index; or
629 * #EOS if end of input is encountered
630 */
631utf32_t lb_get_next_char_utf8(
632 const utf8_t *s,
633 size_t len,
634 size_t *ip)
635{
636 utf8_t ch;
637 utf32_t res;
638
639 assert(*ip <= len);
640 if (*ip == len)
641 return EOS;
642 ch = s[*ip];
643
644 if (ch < 0xC2 || ch > 0xF4)
645 { /* One-byte sequence, tail (should not occur), or invalid */
646 *ip += 1;
647 return ch;
648 }
649 else if (ch < 0xE0)
650 { /* Two-byte sequence */
651 if (*ip + 2 > len)
652 return EOS;
653 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
654 *ip += 2;
655 return res;
656 }
657 else if (ch < 0xF0)
658 { /* Three-byte sequence */
659 if (*ip + 3 > len)
660 return EOS;
661 res = ((ch & 0x0F) << 12) +
662 ((s[*ip + 1] & 0x3F) << 6) +
663 ((s[*ip + 2] & 0x3F));
664 *ip += 3;
665 return res;
666 }
667 else
668 { /* Four-byte sequence */
669 if (*ip + 4 > len)
670 return EOS;
671 res = ((ch & 0x07) << 18) +
672 ((s[*ip + 1] & 0x3F) << 12) +
673 ((s[*ip + 2] & 0x3F) << 6) +
674 ((s[*ip + 3] & 0x3F));
675 *ip += 4;
676 return res;
677 }
678}
679
680/**
681 * Gets the next Unicode character in a UTF-16 sequence. The index will
682 * be advanced to the next complete character, unless the end of string
683 * is reached in the middle of a UTF-16 surrogate pair.
684 *
685 * @param[in] s input UTF-16 string
686 * @param[in] len length of the string in words
687 * @param[in,out] ip pointer to the index
688 * @return the Unicode character beginning at the index; or
689 * #EOS if end of input is encountered
690 */
691utf32_t lb_get_next_char_utf16(
692 const utf16_t *s,
693 size_t len,
694 size_t *ip)
695{
696 utf16_t ch;
697
698 assert(*ip <= len);
699 if (*ip == len)
700 return EOS;
701 ch = s[(*ip)++];
702
703 if (ch < 0xD800 || ch > 0xDBFF)
704 { /* If the character is not a high surrogate */
705 return ch;
706 }
707 if (*ip == len)
708 { /* If the input ends here (an error) */
709 --(*ip);
710 return EOS;
711 }
712 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
713 { /* If the next character is not the low surrogate (an error) */
714 return ch;
715 }
716 /* Return the constructed character and advance the index again */
717 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
718}
719
720/**
721 * Gets the next Unicode character in a UTF-32 sequence. The index will
722 * be advanced to the next character.
723 *
724 * @param[in] s input UTF-32 string
725 * @param[in] len length of the string in dwords
726 * @param[in,out] ip pointer to the index
727 * @return the Unicode character beginning at the index; or
728 * #EOS if end of input is encountered
729 */
730utf32_t lb_get_next_char_utf32(
731 const utf32_t *s,
732 size_t len,
733 size_t *ip)
734{
735 assert(*ip <= len);
736 if (*ip == len)
737 return EOS;
738 return s[(*ip)++];
739}
740
741/**
742 * Sets the line breaking information for a generic input string. 631 * Sets the line breaking information for a generic input string.
743 * 632 *
744 * @param[in] s input string 633 * @param[in] s input string
@@ -809,7 +698,7 @@ void set_linebreaks_utf8(
809 char *brks) 698 char *brks)
810{ 699{
811 set_linebreaks(s, len, lang, brks, 700 set_linebreaks(s, len, lang, brks,
812 (get_next_char_t)lb_get_next_char_utf8); 701 (get_next_char_t)ub_get_next_char_utf8);
813} 702}
814 703
815/** 704/**
@@ -829,7 +718,7 @@ void set_linebreaks_utf16(
829 char *brks) 718 char *brks)
830{ 719{
831 set_linebreaks(s, len, lang, brks, 720 set_linebreaks(s, len, lang, brks,
832 (get_next_char_t)lb_get_next_char_utf16); 721 (get_next_char_t)ub_get_next_char_utf16);
833} 722}
834 723
835/** 724/**
@@ -849,7 +738,7 @@ void set_linebreaks_utf32(
849 char *brks) 738 char *brks)
850{ 739{
851 set_linebreaks(s, len, lang, brks, 740 set_linebreaks(s, len, lang, brks,
852 (get_next_char_t)lb_get_next_char_utf32); 741 (get_next_char_t)ub_get_next_char_utf32);
853} 742}
854 743
855/** 744/**
@@ -868,7 +757,7 @@ void set_linebreaks_utf32(
868int is_line_breakable( 757int is_line_breakable(
869 utf32_t char1, 758 utf32_t char1,
870 utf32_t char2, 759 utf32_t char2,
871 const char* lang) 760 const char *lang)
872{ 761{
873 utf32_t s[2]; 762 utf32_t s[2];
874 char brks[2]; 763 char brks[2];
diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h
index 94fbca0..68c8e41 100644
--- a/src/static_libs/libunibreak/linebreak.h
+++ b/src/static_libs/libunibreak/linebreak.h
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
30 * Unicode 5.0.0: 30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 32 *
33 * This library has been updated according to Revision 30, for 33 * This library has been updated according to Revision 33, for
34 * Unicode 6.2.0: 34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 35 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
@@ -43,7 +43,7 @@
43 * 43 *
44 * Header file for the line breaking algorithm. 44 * Header file for the line breaking algorithm.
45 * 45 *
46 * @version 2.2, 2012/10/06 46 * @version 2.4, 2015/04/18
47 * @author Wu Yongwei 47 * @author Wu Yongwei
48 */ 48 */
49 49
@@ -51,21 +51,12 @@
51#define LINEBREAK_H 51#define LINEBREAK_H
52 52
53#include <stddef.h> 53#include <stddef.h>
54#include "unibreakbase.h"
54 55
55#ifdef __cplusplus 56#ifdef __cplusplus
56extern "C" { 57extern "C" {
57#endif 58#endif
58 59
59#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */
60extern const int linebreak_version;
61
62#ifndef LINEBREAK_UTF_TYPES_DEFINED
63#define LINEBREAK_UTF_TYPES_DEFINED
64typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
65typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
66typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
67#endif
68
69#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */ 60#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */
70#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */ 61#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */
71#define LINEBREAK_NOBREAK 2 /**< No break is possible */ 62#define LINEBREAK_NOBREAK 2 /**< No break is possible */
@@ -73,12 +64,12 @@ typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
73 64
74void init_linebreak(void); 65void init_linebreak(void);
75void set_linebreaks_utf8( 66void set_linebreaks_utf8(
76 const utf8_t *s, size_t len, const char* lang, char *brks); 67 const utf8_t *s, size_t len, const char *lang, char *brks);
77void set_linebreaks_utf16( 68void set_linebreaks_utf16(
78 const utf16_t *s, size_t len, const char* lang, char *brks); 69 const utf16_t *s, size_t len, const char *lang, char *brks);
79void set_linebreaks_utf32( 70void set_linebreaks_utf32(
80 const utf32_t *s, size_t len, const char* lang, char *brks); 71 const utf32_t *s, size_t len, const char *lang, char *brks);
81int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang); 72int is_line_breakable(utf32_t char1, utf32_t char2, const char *lang);
82 73
83#ifdef __cplusplus 74#ifdef __cplusplus
84} 75}
diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c
index 3843e3b..d4bc8d8 100644
--- a/src/static_libs/libunibreak/linebreakdata.c
+++ b/src/static_libs/libunibreak/linebreakdata.c
@@ -1,9 +1,8 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# LineBreak-6.3.0.txt 2# LineBreak-7.0.0.txt
3# Date: 2013-02-06, 19:45:00 GMT [KW, LI] 3# Date: 2014-02-28, 23:15:00 GMT [KW, LI]
4*/ 4*/
5 5
6#include "linebreak.h"
7#include "linebreakdef.h" 6#include "linebreakdef.h"
8 7
9/** Default line breaking properties as from the Unicode Web site. */ 8/** Default line breaking properties as from the Unicode Web site. */
@@ -93,11 +92,12 @@ struct LineBreakProperties lb_prop_default[] = {
93 { 0x0363, 0x036F, LBP_CM }, 92 { 0x0363, 0x036F, LBP_CM },
94 { 0x0370, 0x037D, LBP_AL }, 93 { 0x0370, 0x037D, LBP_AL },
95 { 0x037E, 0x037E, LBP_IS }, 94 { 0x037E, 0x037E, LBP_IS },
96 { 0x0384, 0x0482, LBP_AL }, 95 { 0x037F, 0x0482, LBP_AL },
97 { 0x0483, 0x0489, LBP_CM }, 96 { 0x0483, 0x0489, LBP_CM },
98 { 0x048A, 0x0587, LBP_AL }, 97 { 0x048A, 0x0587, LBP_AL },
99 { 0x0589, 0x0589, LBP_IS }, 98 { 0x0589, 0x0589, LBP_IS },
100 { 0x058A, 0x058A, LBP_BA }, 99 { 0x058A, 0x058A, LBP_BA },
100 { 0x058D, 0x058E, LBP_AL },
101 { 0x058F, 0x058F, LBP_PR }, 101 { 0x058F, 0x058F, LBP_PR },
102 { 0x0591, 0x05BD, LBP_CM }, 102 { 0x0591, 0x05BD, LBP_CM },
103 { 0x05BE, 0x05BE, LBP_BA }, 103 { 0x05BE, 0x05BE, LBP_BA },
@@ -159,7 +159,7 @@ struct LineBreakProperties lb_prop_default[] = {
159 { 0x0829, 0x082D, LBP_CM }, 159 { 0x0829, 0x082D, LBP_CM },
160 { 0x0830, 0x0858, LBP_AL }, 160 { 0x0830, 0x0858, LBP_AL },
161 { 0x0859, 0x085B, LBP_CM }, 161 { 0x0859, 0x085B, LBP_CM },
162 { 0x085E, 0x08AC, LBP_AL }, 162 { 0x085E, 0x08B2, LBP_AL },
163 { 0x08E4, 0x0903, LBP_CM }, 163 { 0x08E4, 0x0903, LBP_CM },
164 { 0x0904, 0x0939, LBP_AL }, 164 { 0x0904, 0x0939, LBP_AL },
165 { 0x093A, 0x093C, LBP_CM }, 165 { 0x093A, 0x093C, LBP_CM },
@@ -171,7 +171,7 @@ struct LineBreakProperties lb_prop_default[] = {
171 { 0x0962, 0x0963, LBP_CM }, 171 { 0x0962, 0x0963, LBP_CM },
172 { 0x0964, 0x0965, LBP_BA }, 172 { 0x0964, 0x0965, LBP_BA },
173 { 0x0966, 0x096F, LBP_NU }, 173 { 0x0966, 0x096F, LBP_NU },
174 { 0x0970, 0x097F, LBP_AL }, 174 { 0x0970, 0x0980, LBP_AL },
175 { 0x0981, 0x0983, LBP_CM }, 175 { 0x0981, 0x0983, LBP_CM },
176 { 0x0985, 0x09B9, LBP_AL }, 176 { 0x0985, 0x09B9, LBP_AL },
177 { 0x09BC, 0x09BC, LBP_CM }, 177 { 0x09BC, 0x09BC, LBP_CM },
@@ -223,14 +223,14 @@ struct LineBreakProperties lb_prop_default[] = {
223 { 0x0BF0, 0x0BF8, LBP_AL }, 223 { 0x0BF0, 0x0BF8, LBP_AL },
224 { 0x0BF9, 0x0BF9, LBP_PR }, 224 { 0x0BF9, 0x0BF9, LBP_PR },
225 { 0x0BFA, 0x0BFA, LBP_AL }, 225 { 0x0BFA, 0x0BFA, LBP_AL },
226 { 0x0C01, 0x0C03, LBP_CM }, 226 { 0x0C00, 0x0C03, LBP_CM },
227 { 0x0C05, 0x0C3D, LBP_AL }, 227 { 0x0C05, 0x0C3D, LBP_AL },
228 { 0x0C3E, 0x0C56, LBP_CM }, 228 { 0x0C3E, 0x0C56, LBP_CM },
229 { 0x0C58, 0x0C61, LBP_AL }, 229 { 0x0C58, 0x0C61, LBP_AL },
230 { 0x0C62, 0x0C63, LBP_CM }, 230 { 0x0C62, 0x0C63, LBP_CM },
231 { 0x0C66, 0x0C6F, LBP_NU }, 231 { 0x0C66, 0x0C6F, LBP_NU },
232 { 0x0C78, 0x0C7F, LBP_AL }, 232 { 0x0C78, 0x0C7F, LBP_AL },
233 { 0x0C82, 0x0C83, LBP_CM }, 233 { 0x0C81, 0x0C83, LBP_CM },
234 { 0x0C85, 0x0CB9, LBP_AL }, 234 { 0x0C85, 0x0CB9, LBP_AL },
235 { 0x0CBC, 0x0CBC, LBP_CM }, 235 { 0x0CBC, 0x0CBC, LBP_CM },
236 { 0x0CBD, 0x0CBD, LBP_AL }, 236 { 0x0CBD, 0x0CBD, LBP_AL },
@@ -239,7 +239,7 @@ struct LineBreakProperties lb_prop_default[] = {
239 { 0x0CE2, 0x0CE3, LBP_CM }, 239 { 0x0CE2, 0x0CE3, LBP_CM },
240 { 0x0CE6, 0x0CEF, LBP_NU }, 240 { 0x0CE6, 0x0CEF, LBP_NU },
241 { 0x0CF1, 0x0CF2, LBP_AL }, 241 { 0x0CF1, 0x0CF2, LBP_AL },
242 { 0x0D02, 0x0D03, LBP_CM }, 242 { 0x0D01, 0x0D03, LBP_CM },
243 { 0x0D05, 0x0D3D, LBP_AL }, 243 { 0x0D05, 0x0D3D, LBP_AL },
244 { 0x0D3E, 0x0D4D, LBP_CM }, 244 { 0x0D3E, 0x0D4D, LBP_CM },
245 { 0x0D4E, 0x0D4E, LBP_AL }, 245 { 0x0D4E, 0x0D4E, LBP_AL },
@@ -252,7 +252,9 @@ struct LineBreakProperties lb_prop_default[] = {
252 { 0x0D7A, 0x0D7F, LBP_AL }, 252 { 0x0D7A, 0x0D7F, LBP_AL },
253 { 0x0D82, 0x0D83, LBP_CM }, 253 { 0x0D82, 0x0D83, LBP_CM },
254 { 0x0D85, 0x0DC6, LBP_AL }, 254 { 0x0D85, 0x0DC6, LBP_AL },
255 { 0x0DCA, 0x0DF3, LBP_CM }, 255 { 0x0DCA, 0x0DDF, LBP_CM },
256 { 0x0DE6, 0x0DEF, LBP_NU },
257 { 0x0DF2, 0x0DF3, LBP_CM },
256 { 0x0DF4, 0x0DF4, LBP_AL }, 258 { 0x0DF4, 0x0DF4, LBP_AL },
257 { 0x0E01, 0x0E3A, LBP_SA }, 259 { 0x0E01, 0x0E3A, LBP_SA },
258 { 0x0E3F, 0x0E3F, LBP_PR }, 260 { 0x0E3F, 0x0E3F, LBP_PR },
@@ -363,7 +365,7 @@ struct LineBreakProperties lb_prop_default[] = {
363 { 0x1810, 0x1819, LBP_NU }, 365 { 0x1810, 0x1819, LBP_NU },
364 { 0x1820, 0x18A8, LBP_AL }, 366 { 0x1820, 0x18A8, LBP_AL },
365 { 0x18A9, 0x18A9, LBP_CM }, 367 { 0x18A9, 0x18A9, LBP_CM },
366 { 0x18AA, 0x191C, LBP_AL }, 368 { 0x18AA, 0x191E, LBP_AL },
367 { 0x1920, 0x193B, LBP_CM }, 369 { 0x1920, 0x193B, LBP_CM },
368 { 0x1940, 0x1940, LBP_AL }, 370 { 0x1940, 0x1940, LBP_AL },
369 { 0x1944, 0x1945, LBP_EX }, 371 { 0x1944, 0x1945, LBP_EX },
@@ -378,7 +380,7 @@ struct LineBreakProperties lb_prop_default[] = {
378 { 0x1A7F, 0x1A7F, LBP_CM }, 380 { 0x1A7F, 0x1A7F, LBP_CM },
379 { 0x1A80, 0x1A99, LBP_NU }, 381 { 0x1A80, 0x1A99, LBP_NU },
380 { 0x1AA0, 0x1AAD, LBP_SA }, 382 { 0x1AA0, 0x1AAD, LBP_SA },
381 { 0x1B00, 0x1B04, LBP_CM }, 383 { 0x1AB0, 0x1B04, LBP_CM },
382 { 0x1B05, 0x1B33, LBP_AL }, 384 { 0x1B05, 0x1B33, LBP_AL },
383 { 0x1B34, 0x1B44, LBP_CM }, 385 { 0x1B34, 0x1B44, LBP_CM },
384 { 0x1B45, 0x1B4B, LBP_AL }, 386 { 0x1B45, 0x1B4B, LBP_AL },
@@ -412,7 +414,9 @@ struct LineBreakProperties lb_prop_default[] = {
412 { 0x1CED, 0x1CED, LBP_CM }, 414 { 0x1CED, 0x1CED, LBP_CM },
413 { 0x1CEE, 0x1CF1, LBP_AL }, 415 { 0x1CEE, 0x1CF1, LBP_AL },
414 { 0x1CF2, 0x1CF4, LBP_CM }, 416 { 0x1CF2, 0x1CF4, LBP_CM },
415 { 0x1CF5, 0x1DBF, LBP_AL }, 417 { 0x1CF5, 0x1CF6, LBP_AL },
418 { 0x1CF8, 0x1CF9, LBP_CM },
419 { 0x1D00, 0x1DBF, LBP_AL },
416 { 0x1DC0, 0x1DFF, LBP_CM }, 420 { 0x1DC0, 0x1DFF, LBP_CM },
417 { 0x1E00, 0x1FFC, LBP_AL }, 421 { 0x1E00, 0x1FFC, LBP_AL },
418 { 0x1FFD, 0x1FFD, LBP_BB }, 422 { 0x1FFD, 0x1FFD, LBP_BB },
@@ -475,7 +479,9 @@ struct LineBreakProperties lb_prop_default[] = {
475 { 0x20A7, 0x20A7, LBP_PO }, 479 { 0x20A7, 0x20A7, LBP_PO },
476 { 0x20A8, 0x20B5, LBP_PR }, 480 { 0x20A8, 0x20B5, LBP_PR },
477 { 0x20B6, 0x20B6, LBP_PO }, 481 { 0x20B6, 0x20B6, LBP_PO },
478 { 0x20B7, 0x20CF, LBP_PR }, 482 { 0x20B7, 0x20BA, LBP_PR },
483 { 0x20BB, 0x20BB, LBP_PO },
484 { 0x20BC, 0x20CF, LBP_PR },
479 { 0x20D0, 0x20F0, LBP_CM }, 485 { 0x20D0, 0x20F0, LBP_CM },
480 { 0x2100, 0x2102, LBP_AL }, 486 { 0x2100, 0x2102, LBP_AL },
481 { 0x2103, 0x2103, LBP_PO }, 487 { 0x2103, 0x2103, LBP_PO },
@@ -564,7 +570,12 @@ struct LineBreakProperties lb_prop_default[] = {
564 { 0x22A5, 0x22A5, LBP_AI }, 570 { 0x22A5, 0x22A5, LBP_AI },
565 { 0x22A6, 0x22BE, LBP_AL }, 571 { 0x22A6, 0x22BE, LBP_AL },
566 { 0x22BF, 0x22BF, LBP_AI }, 572 { 0x22BF, 0x22BF, LBP_AI },
567 { 0x22C0, 0x2311, LBP_AL }, 573 { 0x22C0, 0x2307, LBP_AL },
574 { 0x2308, 0x2308, LBP_OP },
575 { 0x2309, 0x2309, LBP_CL },
576 { 0x230A, 0x230A, LBP_OP },
577 { 0x230B, 0x230B, LBP_CL },
578 { 0x230C, 0x2311, LBP_AL },
568 { 0x2312, 0x2312, LBP_AI }, 579 { 0x2312, 0x2312, LBP_AI },
569 { 0x2313, 0x2319, LBP_AL }, 580 { 0x2313, 0x2319, LBP_AL },
570 { 0x231A, 0x231B, LBP_ID }, 581 { 0x231A, 0x231B, LBP_ID },
@@ -573,7 +584,7 @@ struct LineBreakProperties lb_prop_default[] = {
573 { 0x232A, 0x232A, LBP_CL }, 584 { 0x232A, 0x232A, LBP_CL },
574 { 0x232B, 0x23EF, LBP_AL }, 585 { 0x232B, 0x23EF, LBP_AL },
575 { 0x23F0, 0x23F3, LBP_ID }, 586 { 0x23F0, 0x23F3, LBP_ID },
576 { 0x2400, 0x244A, LBP_AL }, 587 { 0x23F4, 0x244A, LBP_AL },
577 { 0x2460, 0x24FE, LBP_AI }, 588 { 0x2460, 0x24FE, LBP_AI },
578 { 0x24FF, 0x24FF, LBP_AL }, 589 { 0x24FF, 0x24FF, LBP_AL },
579 { 0x2500, 0x254B, LBP_AI }, 590 { 0x2500, 0x254B, LBP_AI },
@@ -671,8 +682,8 @@ struct LineBreakProperties lb_prop_default[] = {
671 { 0x270E, 0x2756, LBP_AL }, 682 { 0x270E, 0x2756, LBP_AL },
672 { 0x2757, 0x2757, LBP_AI }, 683 { 0x2757, 0x2757, LBP_AI },
673 { 0x2758, 0x275A, LBP_AL }, 684 { 0x2758, 0x275A, LBP_AL },
674 { 0x275B, 0x275E, LBP_QU }, 685 { 0x275B, 0x2760, LBP_QU },
675 { 0x275F, 0x2761, LBP_AL }, 686 { 0x2761, 0x2761, LBP_AL },
676 { 0x2762, 0x2763, LBP_EX }, 687 { 0x2762, 0x2763, LBP_EX },
677 { 0x2764, 0x2767, LBP_AL }, 688 { 0x2764, 0x2767, LBP_AL },
678 { 0x2768, 0x2768, LBP_OP }, 689 { 0x2768, 0x2768, LBP_OP },
@@ -737,7 +748,7 @@ struct LineBreakProperties lb_prop_default[] = {
737 { 0x29FD, 0x29FD, LBP_CL }, 748 { 0x29FD, 0x29FD, LBP_CL },
738 { 0x29FE, 0x2B54, LBP_AL }, 749 { 0x29FE, 0x2B54, LBP_AL },
739 { 0x2B55, 0x2B59, LBP_AI }, 750 { 0x2B55, 0x2B59, LBP_AI },
740 { 0x2C00, 0x2CEE, LBP_AL }, 751 { 0x2B5A, 0x2CEE, LBP_AL },
741 { 0x2CEF, 0x2CF1, LBP_CM }, 752 { 0x2CEF, 0x2CF1, LBP_CM },
742 { 0x2CF2, 0x2CF3, LBP_AL }, 753 { 0x2CF2, 0x2CF3, LBP_AL },
743 { 0x2CF9, 0x2CF9, LBP_EX }, 754 { 0x2CF9, 0x2CF9, LBP_EX },
@@ -776,6 +787,10 @@ struct LineBreakProperties lb_prop_default[] = {
776 { 0x2E33, 0x2E34, LBP_BA }, 787 { 0x2E33, 0x2E34, LBP_BA },
777 { 0x2E35, 0x2E39, LBP_AL }, 788 { 0x2E35, 0x2E39, LBP_AL },
778 { 0x2E3A, 0x2E3B, LBP_B2 }, 789 { 0x2E3A, 0x2E3B, LBP_B2 },
790 { 0x2E3C, 0x2E3E, LBP_BA },
791 { 0x2E3F, 0x2E3F, LBP_AL },
792 { 0x2E40, 0x2E41, LBP_BA },
793 { 0x2E42, 0x2E42, LBP_OP },
779 { 0x2E80, 0x2FFB, LBP_ID }, 794 { 0x2E80, 0x2FFB, LBP_ID },
780 { 0x3000, 0x3000, LBP_BA }, 795 { 0x3000, 0x3000, LBP_BA },
781 { 0x3001, 0x3002, LBP_CL }, 796 { 0x3001, 0x3002, LBP_CL },
@@ -882,7 +897,7 @@ struct LineBreakProperties lb_prop_default[] = {
882 { 0xA66F, 0xA672, LBP_CM }, 897 { 0xA66F, 0xA672, LBP_CM },
883 { 0xA673, 0xA673, LBP_AL }, 898 { 0xA673, 0xA673, LBP_AL },
884 { 0xA674, 0xA67D, LBP_CM }, 899 { 0xA674, 0xA67D, LBP_CM },
885 { 0xA67E, 0xA697, LBP_AL }, 900 { 0xA67E, 0xA69D, LBP_AL },
886 { 0xA69F, 0xA69F, LBP_CM }, 901 { 0xA69F, 0xA69F, LBP_CM },
887 { 0xA6A0, 0xA6EF, LBP_AL }, 902 { 0xA6A0, 0xA6EF, LBP_AL },
888 { 0xA6F0, 0xA6F1, LBP_CM }, 903 { 0xA6F0, 0xA6F1, LBP_CM },
@@ -923,7 +938,11 @@ struct LineBreakProperties lb_prop_default[] = {
923 { 0xA9C7, 0xA9C9, LBP_BA }, 938 { 0xA9C7, 0xA9C9, LBP_BA },
924 { 0xA9CA, 0xA9CF, LBP_AL }, 939 { 0xA9CA, 0xA9CF, LBP_AL },
925 { 0xA9D0, 0xA9D9, LBP_NU }, 940 { 0xA9D0, 0xA9D9, LBP_NU },
926 { 0xA9DE, 0xAA28, LBP_AL }, 941 { 0xA9DE, 0xA9DF, LBP_AL },
942 { 0xA9E0, 0xA9EF, LBP_SA },
943 { 0xA9F0, 0xA9F9, LBP_NU },
944 { 0xA9FA, 0xA9FE, LBP_SA },
945 { 0xAA00, 0xAA28, LBP_AL },
927 { 0xAA29, 0xAA36, LBP_CM }, 946 { 0xAA29, 0xAA36, LBP_CM },
928 { 0xAA40, 0xAA42, LBP_AL }, 947 { 0xAA40, 0xAA42, LBP_AL },
929 { 0xAA43, 0xAA43, LBP_CM }, 948 { 0xAA43, 0xAA43, LBP_CM },
@@ -1753,8 +1772,8 @@ struct LineBreakProperties lb_prop_default[] = {
1753 { 0xFB29, 0xFB29, LBP_AL }, 1772 { 0xFB29, 0xFB29, LBP_AL },
1754 { 0xFB2A, 0xFB4F, LBP_HL }, 1773 { 0xFB2A, 0xFB4F, LBP_HL },
1755 { 0xFB50, 0xFD3D, LBP_AL }, 1774 { 0xFB50, 0xFD3D, LBP_AL },
1756 { 0xFD3E, 0xFD3E, LBP_OP }, 1775 { 0xFD3E, 0xFD3E, LBP_CL },
1757 { 0xFD3F, 0xFD3F, LBP_CL }, 1776 { 0xFD3F, 0xFD3F, LBP_OP },
1758 { 0xFD50, 0xFDFB, LBP_AL }, 1777 { 0xFD50, 0xFDFB, LBP_AL },
1759 { 0xFDFC, 0xFDFC, LBP_PO }, 1778 { 0xFDFC, 0xFDFC, LBP_PO },
1760 { 0xFDFD, 0xFDFD, LBP_AL }, 1779 { 0xFDFD, 0xFDFD, LBP_AL },
@@ -1766,7 +1785,7 @@ struct LineBreakProperties lb_prop_default[] = {
1766 { 0xFE17, 0xFE17, LBP_OP }, 1785 { 0xFE17, 0xFE17, LBP_OP },
1767 { 0xFE18, 0xFE18, LBP_CL }, 1786 { 0xFE18, 0xFE18, LBP_CL },
1768 { 0xFE19, 0xFE19, LBP_IN }, 1787 { 0xFE19, 0xFE19, LBP_IN },
1769 { 0xFE20, 0xFE26, LBP_CM }, 1788 { 0xFE20, 0xFE2D, LBP_CM },
1770 { 0xFE30, 0xFE34, LBP_ID }, 1789 { 0xFE30, 0xFE34, LBP_ID },
1771 { 0xFE35, 0xFE35, LBP_OP }, 1790 { 0xFE35, 0xFE35, LBP_OP },
1772 { 0xFE36, 0xFE36, LBP_CL }, 1791 { 0xFE36, 0xFE36, LBP_CL },
@@ -1852,13 +1871,17 @@ struct LineBreakProperties lb_prop_default[] = {
1852 { 0x10100, 0x10102, LBP_BA }, 1871 { 0x10100, 0x10102, LBP_BA },
1853 { 0x10107, 0x101FC, LBP_AL }, 1872 { 0x10107, 0x101FC, LBP_AL },
1854 { 0x101FD, 0x101FD, LBP_CM }, 1873 { 0x101FD, 0x101FD, LBP_CM },
1855 { 0x10280, 0x1039D, LBP_AL }, 1874 { 0x10280, 0x102D0, LBP_AL },
1875 { 0x102E0, 0x102E0, LBP_CM },
1876 { 0x102E1, 0x10375, LBP_AL },
1877 { 0x10376, 0x1037A, LBP_CM },
1878 { 0x10380, 0x1039D, LBP_AL },
1856 { 0x1039F, 0x1039F, LBP_BA }, 1879 { 0x1039F, 0x1039F, LBP_BA },
1857 { 0x103A0, 0x103CF, LBP_AL }, 1880 { 0x103A0, 0x103CF, LBP_AL },
1858 { 0x103D0, 0x103D0, LBP_BA }, 1881 { 0x103D0, 0x103D0, LBP_BA },
1859 { 0x103D1, 0x1049D, LBP_AL }, 1882 { 0x103D1, 0x1049D, LBP_AL },
1860 { 0x104A0, 0x104A9, LBP_NU }, 1883 { 0x104A0, 0x104A9, LBP_NU },
1861 { 0x10800, 0x10855, LBP_AL }, 1884 { 0x10500, 0x10855, LBP_AL },
1862 { 0x10857, 0x10857, LBP_BA }, 1885 { 0x10857, 0x10857, LBP_BA },
1863 { 0x10858, 0x1091B, LBP_AL }, 1886 { 0x10858, 0x1091B, LBP_AL },
1864 { 0x1091F, 0x1091F, LBP_BA }, 1887 { 0x1091F, 0x1091F, LBP_BA },
@@ -1868,7 +1891,12 @@ struct LineBreakProperties lb_prop_default[] = {
1868 { 0x10A38, 0x10A3F, LBP_CM }, 1891 { 0x10A38, 0x10A3F, LBP_CM },
1869 { 0x10A40, 0x10A47, LBP_AL }, 1892 { 0x10A40, 0x10A47, LBP_AL },
1870 { 0x10A50, 0x10A57, LBP_BA }, 1893 { 0x10A50, 0x10A57, LBP_BA },
1871 { 0x10A58, 0x10B35, LBP_AL }, 1894 { 0x10A58, 0x10AE4, LBP_AL },
1895 { 0x10AE5, 0x10AE6, LBP_CM },
1896 { 0x10AEB, 0x10AEF, LBP_AL },
1897 { 0x10AF0, 0x10AF5, LBP_BA },
1898 { 0x10AF6, 0x10AF6, LBP_IN },
1899 { 0x10B00, 0x10B35, LBP_AL },
1872 { 0x10B39, 0x10B3F, LBP_BA }, 1900 { 0x10B39, 0x10B3F, LBP_BA },
1873 { 0x10B40, 0x10E7E, LBP_AL }, 1901 { 0x10B40, 0x10E7E, LBP_AL },
1874 { 0x11000, 0x11002, LBP_CM }, 1902 { 0x11000, 0x11002, LBP_CM },
@@ -1877,7 +1905,7 @@ struct LineBreakProperties lb_prop_default[] = {
1877 { 0x11047, 0x11048, LBP_BA }, 1905 { 0x11047, 0x11048, LBP_BA },
1878 { 0x11049, 0x11065, LBP_AL }, 1906 { 0x11049, 0x11065, LBP_AL },
1879 { 0x11066, 0x1106F, LBP_NU }, 1907 { 0x11066, 0x1106F, LBP_NU },
1880 { 0x11080, 0x11082, LBP_CM }, 1908 { 0x1107F, 0x11082, LBP_CM },
1881 { 0x11083, 0x110AF, LBP_AL }, 1909 { 0x11083, 0x110AF, LBP_AL },
1882 { 0x110B0, 0x110BA, LBP_CM }, 1910 { 0x110B0, 0x110BA, LBP_CM },
1883 { 0x110BB, 0x110BD, LBP_AL }, 1911 { 0x110BB, 0x110BD, LBP_AL },
@@ -1889,6 +1917,11 @@ struct LineBreakProperties lb_prop_default[] = {
1889 { 0x11127, 0x11134, LBP_CM }, 1917 { 0x11127, 0x11134, LBP_CM },
1890 { 0x11136, 0x1113F, LBP_NU }, 1918 { 0x11136, 0x1113F, LBP_NU },
1891 { 0x11140, 0x11143, LBP_BA }, 1919 { 0x11140, 0x11143, LBP_BA },
1920 { 0x11150, 0x11172, LBP_AL },
1921 { 0x11173, 0x11173, LBP_CM },
1922 { 0x11174, 0x11174, LBP_AL },
1923 { 0x11175, 0x11175, LBP_BB },
1924 { 0x11176, 0x11176, LBP_AL },
1892 { 0x11180, 0x11182, LBP_CM }, 1925 { 0x11180, 0x11182, LBP_CM },
1893 { 0x11183, 0x111B2, LBP_AL }, 1926 { 0x11183, 0x111B2, LBP_AL },
1894 { 0x111B3, 0x111C0, LBP_CM }, 1927 { 0x111B3, 0x111C0, LBP_CM },
@@ -1896,12 +1929,46 @@ struct LineBreakProperties lb_prop_default[] = {
1896 { 0x111C5, 0x111C6, LBP_BA }, 1929 { 0x111C5, 0x111C6, LBP_BA },
1897 { 0x111C7, 0x111C7, LBP_AL }, 1930 { 0x111C7, 0x111C7, LBP_AL },
1898 { 0x111C8, 0x111C8, LBP_BA }, 1931 { 0x111C8, 0x111C8, LBP_BA },
1932 { 0x111CD, 0x111CD, LBP_AL },
1899 { 0x111D0, 0x111D9, LBP_NU }, 1933 { 0x111D0, 0x111D9, LBP_NU },
1934 { 0x111DA, 0x1122B, LBP_AL },
1935 { 0x1122C, 0x11237, LBP_CM },
1936 { 0x11238, 0x11239, LBP_BA },
1937 { 0x1123A, 0x1123A, LBP_AL },
1938 { 0x1123B, 0x1123C, LBP_BA },
1939 { 0x1123D, 0x112DE, LBP_AL },
1940 { 0x112DF, 0x112EA, LBP_CM },
1941 { 0x112F0, 0x112F9, LBP_NU },
1942 { 0x11301, 0x11303, LBP_CM },
1943 { 0x11305, 0x11339, LBP_AL },
1944 { 0x1133C, 0x1133C, LBP_CM },
1945 { 0x1133D, 0x1133D, LBP_AL },
1946 { 0x1133E, 0x11357, LBP_CM },
1947 { 0x1135D, 0x11361, LBP_AL },
1948 { 0x11362, 0x11374, LBP_CM },
1949 { 0x11480, 0x114AF, LBP_AL },
1950 { 0x114B0, 0x114C3, LBP_CM },
1951 { 0x114C4, 0x114C7, LBP_AL },
1952 { 0x114D0, 0x114D9, LBP_NU },
1953 { 0x11580, 0x115AE, LBP_AL },
1954 { 0x115AF, 0x115C0, LBP_CM },
1955 { 0x115C1, 0x115C1, LBP_BB },
1956 { 0x115C2, 0x115C3, LBP_BA },
1957 { 0x115C4, 0x115C5, LBP_EX },
1958 { 0x115C6, 0x115C8, LBP_AL },
1959 { 0x115C9, 0x115C9, LBP_BA },
1960 { 0x11600, 0x1162F, LBP_AL },
1961 { 0x11630, 0x11640, LBP_CM },
1962 { 0x11641, 0x11642, LBP_BA },
1963 { 0x11643, 0x11644, LBP_AL },
1964 { 0x11650, 0x11659, LBP_NU },
1900 { 0x11680, 0x116AA, LBP_AL }, 1965 { 0x11680, 0x116AA, LBP_AL },
1901 { 0x116AB, 0x116B7, LBP_CM }, 1966 { 0x116AB, 0x116B7, LBP_CM },
1902 { 0x116C0, 0x116C9, LBP_NU }, 1967 { 0x116C0, 0x116C9, LBP_NU },
1903 { 0x12000, 0x12462, LBP_AL }, 1968 { 0x118A0, 0x118DF, LBP_AL },
1904 { 0x12470, 0x12473, LBP_BA }, 1969 { 0x118E0, 0x118E9, LBP_NU },
1970 { 0x118EA, 0x1246E, LBP_AL },
1971 { 0x12470, 0x12474, LBP_BA },
1905 { 0x13000, 0x13257, LBP_AL }, 1972 { 0x13000, 0x13257, LBP_AL },
1906 { 0x13258, 0x1325A, LBP_OP }, 1973 { 0x13258, 0x1325A, LBP_OP },
1907 { 0x1325B, 0x1325D, LBP_CL }, 1974 { 0x1325B, 0x1325D, LBP_CL },
@@ -1915,10 +1982,27 @@ struct LineBreakProperties lb_prop_default[] = {
1915 { 0x1328A, 0x13378, LBP_AL }, 1982 { 0x1328A, 0x13378, LBP_AL },
1916 { 0x13379, 0x13379, LBP_OP }, 1983 { 0x13379, 0x13379, LBP_OP },
1917 { 0x1337A, 0x1337B, LBP_CL }, 1984 { 0x1337A, 0x1337B, LBP_CL },
1918 { 0x1337C, 0x16F50, LBP_AL }, 1985 { 0x1337C, 0x16A5E, LBP_AL },
1986 { 0x16A60, 0x16A69, LBP_NU },
1987 { 0x16A6E, 0x16A6F, LBP_BA },
1988 { 0x16AD0, 0x16AED, LBP_AL },
1989 { 0x16AF0, 0x16AF4, LBP_CM },
1990 { 0x16AF5, 0x16AF5, LBP_BA },
1991 { 0x16B00, 0x16B2F, LBP_AL },
1992 { 0x16B30, 0x16B36, LBP_CM },
1993 { 0x16B37, 0x16B39, LBP_BA },
1994 { 0x16B3A, 0x16B43, LBP_AL },
1995 { 0x16B44, 0x16B44, LBP_BA },
1996 { 0x16B45, 0x16B45, LBP_AL },
1997 { 0x16B50, 0x16B59, LBP_NU },
1998 { 0x16B5B, 0x16F50, LBP_AL },
1919 { 0x16F51, 0x16F92, LBP_CM }, 1999 { 0x16F51, 0x16F92, LBP_CM },
1920 { 0x16F93, 0x16F9F, LBP_AL }, 2000 { 0x16F93, 0x16F9F, LBP_AL },
1921 { 0x1B000, 0x1B001, LBP_ID }, 2001 { 0x1B000, 0x1B001, LBP_ID },
2002 { 0x1BC00, 0x1BC9C, LBP_AL },
2003 { 0x1BC9D, 0x1BC9E, LBP_CM },
2004 { 0x1BC9F, 0x1BC9F, LBP_BA },
2005 { 0x1BCA0, 0x1BCA3, LBP_CM },
1922 { 0x1D000, 0x1D164, LBP_AL }, 2006 { 0x1D000, 0x1D164, LBP_AL },
1923 { 0x1D165, 0x1D169, LBP_CM }, 2007 { 0x1D165, 0x1D169, LBP_CM },
1924 { 0x1D16A, 0x1D16C, LBP_AL }, 2008 { 0x1D16A, 0x1D16C, LBP_AL },
@@ -1931,15 +2015,19 @@ struct LineBreakProperties lb_prop_default[] = {
1931 { 0x1D242, 0x1D244, LBP_CM }, 2015 { 0x1D242, 0x1D244, LBP_CM },
1932 { 0x1D245, 0x1D7CB, LBP_AL }, 2016 { 0x1D245, 0x1D7CB, LBP_AL },
1933 { 0x1D7CE, 0x1D7FF, LBP_NU }, 2017 { 0x1D7CE, 0x1D7FF, LBP_NU },
2018 { 0x1E800, 0x1E8CF, LBP_AL },
2019 { 0x1E8D0, 0x1E8D6, LBP_CM },
1934 { 0x1EE00, 0x1EEF1, LBP_AL }, 2020 { 0x1EE00, 0x1EEF1, LBP_AL },
1935 { 0x1F000, 0x1F0DF, LBP_ID }, 2021 { 0x1F000, 0x1F0F5, LBP_ID },
1936 { 0x1F100, 0x1F12D, LBP_AI }, 2022 { 0x1F100, 0x1F12D, LBP_AI },
1937 { 0x1F12E, 0x1F12E, LBP_AL }, 2023 { 0x1F12E, 0x1F12E, LBP_AL },
1938 { 0x1F130, 0x1F169, LBP_AI }, 2024 { 0x1F130, 0x1F169, LBP_AI },
1939 { 0x1F16A, 0x1F16B, LBP_AL }, 2025 { 0x1F16A, 0x1F16B, LBP_AL },
1940 { 0x1F170, 0x1F19A, LBP_AI }, 2026 { 0x1F170, 0x1F19A, LBP_AI },
1941 { 0x1F1E6, 0x1F1FF, LBP_RI }, 2027 { 0x1F1E6, 0x1F1FF, LBP_RI },
1942 { 0x1F200, 0x1F3B4, LBP_ID }, 2028 { 0x1F200, 0x1F39B, LBP_ID },
2029 { 0x1F39C, 0x1F39D, LBP_AL },
2030 { 0x1F39E, 0x1F3B4, LBP_ID },
1943 { 0x1F3B5, 0x1F3B6, LBP_AL }, 2031 { 0x1F3B5, 0x1F3B6, LBP_AL },
1944 { 0x1F3B7, 0x1F3BB, LBP_ID }, 2032 { 0x1F3B7, 0x1F3BB, LBP_ID },
1945 { 0x1F3BC, 0x1F3BC, LBP_AL }, 2033 { 0x1F3BC, 0x1F3BC, LBP_AL },
@@ -1953,14 +2041,23 @@ struct LineBreakProperties lb_prop_default[] = {
1953 { 0x1F4AF, 0x1F4AF, LBP_AL }, 2041 { 0x1F4AF, 0x1F4AF, LBP_AL },
1954 { 0x1F4B0, 0x1F4B0, LBP_ID }, 2042 { 0x1F4B0, 0x1F4B0, LBP_ID },
1955 { 0x1F4B1, 0x1F4B2, LBP_AL }, 2043 { 0x1F4B1, 0x1F4B2, LBP_AL },
1956 { 0x1F4B3, 0x1F4FC, LBP_ID }, 2044 { 0x1F4B3, 0x1F4FE, LBP_ID },
1957 { 0x1F500, 0x1F506, LBP_AL }, 2045 { 0x1F500, 0x1F506, LBP_AL },
1958 { 0x1F507, 0x1F516, LBP_ID }, 2046 { 0x1F507, 0x1F516, LBP_ID },
1959 { 0x1F517, 0x1F524, LBP_AL }, 2047 { 0x1F517, 0x1F524, LBP_AL },
1960 { 0x1F525, 0x1F531, LBP_ID }, 2048 { 0x1F525, 0x1F531, LBP_ID },
1961 { 0x1F532, 0x1F543, LBP_AL }, 2049 { 0x1F532, 0x1F549, LBP_AL },
1962 { 0x1F550, 0x1F6C5, LBP_ID }, 2050 { 0x1F54A, 0x1F5D3, LBP_ID },
1963 { 0x1F700, 0x1F773, LBP_AL }, 2051 { 0x1F5D4, 0x1F5DB, LBP_AL },
2052 { 0x1F5DC, 0x1F5F3, LBP_ID },
2053 { 0x1F5F4, 0x1F5F9, LBP_AL },
2054 { 0x1F5FA, 0x1F64F, LBP_ID },
2055 { 0x1F650, 0x1F675, LBP_AL },
2056 { 0x1F676, 0x1F678, LBP_QU },
2057 { 0x1F679, 0x1F67B, LBP_NS },
2058 { 0x1F67C, 0x1F67F, LBP_AL },
2059 { 0x1F680, 0x1F6F3, LBP_ID },
2060 { 0x1F700, 0x1F8AD, LBP_AL },
1964 { 0x20000, 0x3FFFD, LBP_ID }, 2061 { 0x20000, 0x3FFFD, LBP_ID },
1965 { 0xE0001, 0xE01EF, LBP_CM }, 2062 { 0xE0001, 0xE01EF, LBP_CM },
1966 { 0xF0000, 0x10FFFD, LBP_XX }, 2063 { 0xF0000, 0x10FFFD, LBP_XX },
diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c
index 3455afd..41a7296 100644
--- a/src/static_libs/libunibreak/linebreakdef.c
+++ b/src/static_libs/libunibreak/linebreakdef.c
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
30 * Unicode 5.0.0: 30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 32 *
33 * This library has been updated according to Revision 30, for 33 * This library has been updated according to Revision 33, for
34 * Unicode 6.2.0: 34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 35 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h
index d557aba..7600d0a 100644
--- a/src/static_libs/libunibreak/linebreakdef.h
+++ b/src/static_libs/libunibreak/linebreakdef.h
@@ -4,7 +4,7 @@
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2015 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com> 8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 * 9 *
10 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
@@ -31,9 +31,9 @@
31 * Unicode 5.0.0: 31 * Unicode 5.0.0:
32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
33 * 33 *
34 * This library has been updated according to Revision 30, for 34 * This library has been updated according to Revision 33, for
35 * Unicode 6.2.0: 35 * Unicode 7.0.0:
36 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
37 * 37 *
38 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
@@ -45,16 +45,12 @@
45 * Definitions of internal data structures, declarations of global 45 * Definitions of internal data structures, declarations of global
46 * variables, and function prototypes for the line breaking algorithm. 46 * variables, and function prototypes for the line breaking algorithm.
47 * 47 *
48 * @version 2.4, 2013/11/10 48 * @version 2.6, 2015/04/18
49 * @author Wu Yongwei 49 * @author Wu Yongwei
50 * @author Petr Filipsky 50 * @author Petr Filipsky
51 */ 51 */
52 52
53/** 53#include "unibreakdef.h"
54 * Constant value to mark the end of string. It is not a valid Unicode
55 * character.
56 */
57#define EOS 0xFFFFFFFF
58 54
59/** 55/**
60 * Line break classes. This is a direct mapping of Table 1 of Unicode 56 * Line break classes. This is a direct mapping of Table 1 of Unicode
@@ -143,28 +139,20 @@ struct LineBreakContext
143 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */ 139 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
144 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */ 140 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
145 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */ 141 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
142 int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
146}; 143};
147 144
148/**
149 * Abstract function interface for #lb_get_next_char_utf8,
150 * #lb_get_next_char_utf16, and #lb_get_next_char_utf32.
151 */
152typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
153
154/* Declarations */ 145/* Declarations */
155extern struct LineBreakProperties lb_prop_default[]; 146extern struct LineBreakProperties lb_prop_default[];
156extern struct LineBreakPropertiesLang lb_prop_lang_map[]; 147extern struct LineBreakPropertiesLang lb_prop_lang_map[];
157 148
158/* Function Prototype */ 149/* Function Prototype */
159utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
160utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
161utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
162void lb_init_break_context( 150void lb_init_break_context(
163 struct LineBreakContext* lbpCtx, 151 struct LineBreakContext *lbpCtx,
164 utf32_t ch, 152 utf32_t ch,
165 const char* lang); 153 const char *lang);
166int lb_process_next_char( 154int lb_process_next_char(
167 struct LineBreakContext* lbpCtx, 155 struct LineBreakContext *lbpCtx,
168 utf32_t ch); 156 utf32_t ch);
169void set_linebreaks( 157void set_linebreaks(
170 const void *s, 158 const void *s,
diff --git a/src/static_libs/libunibreak/unibreakbase.c b/src/static_libs/libunibreak/unibreakbase.c
new file mode 100644
index 0000000..dbe3a38
--- /dev/null
+++ b/src/static_libs/libunibreak/unibreakbase.c
@@ -0,0 +1,41 @@
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 */
26
27/**
28 * @file unibreakbase.c
29 *
30 * Definition of basic libunibreak information.
31 *
32 * @version 1.0, 2015/04/18
33 * @author Wu Yongwei
34 */
35
36#include "unibreakbase.h"
37
38/**
39 * Version number of the library.
40 */
41const int unibreak_version = UNIBREAK_VERSION;
diff --git a/src/static_libs/libunibreak/unibreakbase.h b/src/static_libs/libunibreak/unibreakbase.h
new file mode 100644
index 0000000..76b35e6
--- /dev/null
+++ b/src/static_libs/libunibreak/unibreakbase.h
@@ -0,0 +1,73 @@
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/>
28 *
29 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 *
33 * This library has been updated according to Revision 33, for
34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
36 *
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
39 */
40
41/**
42 * @file unibreakbase.h
43 *
44 * Header file for common definitions in the libunibreak library.
45 *
46 * @version 1.0, 2015/04/18
47 * @author Wu Yongwei
48 */
49
50#ifndef UNIBREAKBASE_H
51#define UNIBREAKBASE_H
52
53#include <stddef.h>
54
55#ifdef __cplusplus
56extern "C" {
57#endif
58
59#define UNIBREAK_VERSION 0x0300 /**< Version of the library linebreak */
60extern const int unibreak_version;
61
62#ifndef UNIBREAK_UTF_TYPES_DEFINED
63#define UNIBREAK_UTF_TYPES_DEFINED
64typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
65typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
66typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
67#endif
68
69#ifdef __cplusplus
70}
71#endif
72
73#endif /* UNIBREAKBASE_H */
diff --git a/src/static_libs/libunibreak/unibreakdef.c b/src/static_libs/libunibreak/unibreakdef.c
new file mode 100644
index 0000000..2647b61
--- /dev/null
+++ b/src/static_libs/libunibreak/unibreakdef.c
@@ -0,0 +1,159 @@
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 */
26
27/**
28 * @file unibreakdef.c
29 *
30 * Definition of utility functions used by the libunibreak library.
31 *
32 * @version 1.0, 2015/04/18
33 * @author Wu Yongwei
34 */
35
36#include <assert.h>
37#include <stddef.h>
38#include "unibreakdef.h"
39
40/**
41 * Gets the next Unicode character in a UTF-8 sequence. The index will
42 * be advanced to the next complete character, unless the end of string
43 * is reached in the middle of a UTF-8 sequence.
44 *
45 * @param[in] s input UTF-8 string
46 * @param[in] len length of the string in bytes
47 * @param[in,out] ip pointer to the index
48 * @return the Unicode character beginning at the index; or
49 * #EOS if end of input is encountered
50 */
51utf32_t ub_get_next_char_utf8(
52 const utf8_t *s,
53 size_t len,
54 size_t *ip)
55{
56 utf8_t ch;
57 utf32_t res;
58
59 assert(*ip <= len);
60 if (*ip == len)
61 return EOS;
62 ch = s[*ip];
63
64 if (ch < 0xC2 || ch > 0xF4)
65 { /* One-byte sequence, tail (should not occur), or invalid */
66 *ip += 1;
67 return ch;
68 }
69 else if (ch < 0xE0)
70 { /* Two-byte sequence */
71 if (*ip + 2 > len)
72 return EOS;
73 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
74 *ip += 2;
75 return res;
76 }
77 else if (ch < 0xF0)
78 { /* Three-byte sequence */
79 if (*ip + 3 > len)
80 return EOS;
81 res = ((ch & 0x0F) << 12) +
82 ((s[*ip + 1] & 0x3F) << 6) +
83 ((s[*ip + 2] & 0x3F));
84 *ip += 3;
85 return res;
86 }
87 else
88 { /* Four-byte sequence */
89 if (*ip + 4 > len)
90 return EOS;
91 res = ((ch & 0x07) << 18) +
92 ((s[*ip + 1] & 0x3F) << 12) +
93 ((s[*ip + 2] & 0x3F) << 6) +
94 ((s[*ip + 3] & 0x3F));
95 *ip += 4;
96 return res;
97 }
98}
99
100/**
101 * Gets the next Unicode character in a UTF-16 sequence. The index will
102 * be advanced to the next complete character, unless the end of string
103 * is reached in the middle of a UTF-16 surrogate pair.
104 *
105 * @param[in] s input UTF-16 string
106 * @param[in] len length of the string in words
107 * @param[in,out] ip pointer to the index
108 * @return the Unicode character beginning at the index; or
109 * #EOS if end of input is encountered
110 */
111utf32_t ub_get_next_char_utf16(
112 const utf16_t *s,
113 size_t len,
114 size_t *ip)
115{
116 utf16_t ch;
117
118 assert(*ip <= len);
119 if (*ip == len)
120 return EOS;
121 ch = s[(*ip)++];
122
123 if (ch < 0xD800 || ch > 0xDBFF)
124 { /* If the character is not a high surrogate */
125 return ch;
126 }
127 if (*ip == len)
128 { /* If the input ends here (an error) */
129 --(*ip);
130 return EOS;
131 }
132 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
133 { /* If the next character is not the low surrogate (an error) */
134 return ch;
135 }
136 /* Return the constructed character and advance the index again */
137 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
138}
139
140/**
141 * Gets the next Unicode character in a UTF-32 sequence. The index will
142 * be advanced to the next character.
143 *
144 * @param[in] s input UTF-32 string
145 * @param[in] len length of the string in dwords
146 * @param[in,out] ip pointer to the index
147 * @return the Unicode character beginning at the index; or
148 * #EOS if end of input is encountered
149 */
150utf32_t ub_get_next_char_utf32(
151 const utf32_t *s,
152 size_t len,
153 size_t *ip)
154{
155 assert(*ip <= len);
156 if (*ip == len)
157 return EOS;
158 return s[(*ip)++];
159}
diff --git a/src/static_libs/libunibreak/unibreakdef.h b/src/static_libs/libunibreak/unibreakdef.h
new file mode 100644
index 0000000..b823e50
--- /dev/null
+++ b/src/static_libs/libunibreak/unibreakdef.h
@@ -0,0 +1,80 @@
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/>
28 *
29 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 *
33 * This library has been updated according to Revision 33, for
34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
36 *
37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html>
39 */
40
41/**
42 * @file unibreakdef.h
43 *
44 * Header file for private definitions in the libunibreak library.
45 *
46 * @version 1.1, 2015/04/19
47 * @author Wu Yongwei
48 */
49
50#ifndef UNIBREAKDEF_H
51#define UNIBREAKDEF_H
52
53#include "unibreakbase.h"
54
55#ifdef __cplusplus
56extern "C" {
57#endif
58
59/**
60 * Constant value to mark the end of string. It is not a valid Unicode
61 * character.
62 */
63#define EOS 0xFFFFFFFF
64
65/**
66 * Abstract function interface for #ub_get_next_char_utf8,
67 * #ub_get_next_char_utf16, and #ub_get_next_char_utf32.
68 */
69typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
70
71/* Function Prototype */
72utf32_t ub_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
73utf32_t ub_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
74utf32_t ub_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
75
76#ifdef __cplusplus
77}
78#endif
79
80#endif /* UNIBREAKDEF_H */
diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c
index 5c1e3d0..d7d5a42 100644
--- a/src/static_libs/libunibreak/wordbreak.c
+++ b/src/static_libs/libunibreak/wordbreak.c
@@ -4,7 +4,7 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-2015 Tom Hacohen <tom at stosb dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
30 * Unicode 6.0.0: 30 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 * 32 *
33 * This library has been updated according to Revision 21, for 33 * This library has been updated according to Revision 25, for
34 * Unicode 6.2.0: 34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html> 35 * <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
@@ -44,16 +44,14 @@
44 * Implementation of the word breaking algorithm as described in Unicode 44 * Implementation of the word breaking algorithm as described in Unicode
45 * Standard Annex 29. 45 * Standard Annex 29.
46 * 46 *
47 * @version 2.4, 2013/09/28 47 * @version 2.6, 2015/04/18
48 * @author Tom Hacohen 48 * @author Tom Hacohen
49 */ 49 */
50 50
51#include <assert.h> 51#include <assert.h>
52#include <stddef.h> 52#include <stddef.h>
53#include <string.h> 53#include <string.h>
54#include "linebreak.h" 54#include "unibreakdef.h"
55#include "linebreakdef.h"
56
57#include "wordbreak.h" 55#include "wordbreak.h"
58#include "wordbreakdata.c" 56#include "wordbreakdata.c"
59 57
@@ -128,7 +126,6 @@ static void set_brks_to(
128 while (posNext < posEnd) 126 while (posNext < posEnd)
129 { 127 {
130 utf32_t ch; 128 utf32_t ch;
131 (void)ch;
132 ch = get_next_char(s, len, &posNext); 129 ch = get_next_char(s, len, &posNext);
133 assert(ch != EOS); 130 assert(ch != EOS);
134 for (; posStart < posNext - 1; ++posStart) 131 for (; posStart < posNext - 1; ++posStart)
@@ -257,8 +254,24 @@ static void set_wordbreaks(
257 posLast = posCur; 254 posLast = posCur;
258 break; 255 break;
259 256
257 case WBP_Hebrew_Letter:
260 case WBP_ALetter: 258 case WBP_ALetter:
261 if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */ 259 if ((wbcSeqStart == WBP_Hebrew_Letter) &&
260 (wbcLast == WBP_Double_Quote)) /* WB7b,c */
261 {
262 if (wbcCur == WBP_Hebrew_Letter)
263 {
264 set_brks_to(s, brks, posLast, posCur, len,
265 WORDBREAK_NOBREAK, get_next_char);
266 }
267 else
268 {
269 set_brks_to(s, brks, posLast, posCur, len,
270 WORDBREAK_BREAK, get_next_char);
271 }
272 }
273 else if (((wbcSeqStart == WBP_ALetter) ||
274 (wbcSeqStart == WBP_Hebrew_Letter)) || /* WB5,6,7 */
262 (wbcLast == WBP_Numeric) || /* WB10 */ 275 (wbcLast == WBP_Numeric) || /* WB10 */
263 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 276 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
264 { 277 {
@@ -275,8 +288,18 @@ static void set_wordbreaks(
275 posLast = posCur; 288 posLast = posCur;
276 break; 289 break;
277 290
291 case WBP_Single_Quote:
292 if (wbcLast == WBP_Hebrew_Letter) /* WB7a */
293 {
294 set_brks_to(s, brks, posLast, posCur, len,
295 WORDBREAK_NOBREAK, get_next_char);
296 wbcSeqStart = wbcCur;
297 posLast = posCur;
298 }
299 /* No break on purpose */
278 case WBP_MidNumLet: 300 case WBP_MidNumLet:
279 if ((wbcLast == WBP_ALetter) || /* WB6,7 */ 301 if (((wbcLast == WBP_ALetter) ||
302 (wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */
280 (wbcLast == WBP_Numeric)) /* WB11,12 */ 303 (wbcLast == WBP_Numeric)) /* WB11,12 */
281 { 304 {
282 /* Go on */ 305 /* Go on */
@@ -291,7 +314,8 @@ static void set_wordbreaks(
291 break; 314 break;
292 315
293 case WBP_MidLetter: 316 case WBP_MidLetter:
294 if (wbcLast == WBP_ALetter) /* WB6,7 */ 317 if ((wbcLast == WBP_ALetter) ||
318 (wbcLast == WBP_Hebrew_Letter)) /* WB6,7 */
295 { 319 {
296 /* Go on */ 320 /* Go on */
297 } 321 }
@@ -320,7 +344,8 @@ static void set_wordbreaks(
320 344
321 case WBP_Numeric: 345 case WBP_Numeric:
322 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */ 346 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */
323 (wbcLast == WBP_ALetter) || /* WB9 */ 347 ((wbcLast == WBP_ALetter) ||
348 (wbcLast == WBP_Hebrew_Letter)) || /* WB9 */
324 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 349 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
325 { 350 {
326 set_brks_to(s, brks, posLast, posCur, len, 351 set_brks_to(s, brks, posLast, posCur, len,
@@ -340,6 +365,7 @@ static void set_wordbreaks(
340 /* WB13a,13b */ 365 /* WB13a,13b */
341 if ((wbcSeqStart == wbcLast) && 366 if ((wbcSeqStart == wbcLast) &&
342 ((wbcLast == WBP_ALetter) || 367 ((wbcLast == WBP_ALetter) ||
368 (wbcLast == WBP_Hebrew_Letter) ||
343 (wbcLast == WBP_Numeric) || 369 (wbcLast == WBP_Numeric) ||
344 (wbcLast == WBP_Katakana) || 370 (wbcLast == WBP_Katakana) ||
345 (wbcLast == WBP_ExtendNumLet))) 371 (wbcLast == WBP_ExtendNumLet)))
@@ -357,9 +383,9 @@ static void set_wordbreaks(
357 posLast = posCur; 383 posLast = posCur;
358 break; 384 break;
359 385
360 case WBP_Regional: 386 case WBP_Regional_Indicator:
361 /* WB13c */ 387 /* WB13c */
362 if (wbcSeqStart == WBP_Regional) 388 if (wbcSeqStart == WBP_Regional_Indicator)
363 { 389 {
364 set_brks_to(s, brks, posLast, posCur, len, 390 set_brks_to(s, brks, posLast, posCur, len,
365 WORDBREAK_NOBREAK, get_next_char); 391 WORDBREAK_NOBREAK, get_next_char);
@@ -368,6 +394,20 @@ static void set_wordbreaks(
368 posLast = posCur; 394 posLast = posCur;
369 break; 395 break;
370 396
397 case WBP_Double_Quote:
398 if (wbcLast == WBP_Hebrew_Letter) /* WB7b,c */
399 {
400 /* Go on */
401 }
402 else
403 {
404 set_brks_to(s, brks, posLast, posCur, len,
405 WORDBREAK_BREAK, get_next_char);
406 wbcSeqStart = wbcCur;
407 posLast = posCur;
408 }
409 break;
410
371 case WBP_Any: 411 case WBP_Any:
372 /* Allow breaks and reset */ 412 /* Allow breaks and reset */
373 set_brks_to(s, brks, posLast, posCur, len, 413 set_brks_to(s, brks, posLast, posCur, len,
@@ -409,7 +449,7 @@ void set_wordbreaks_utf8(
409 char *brks) 449 char *brks)
410{ 450{
411 set_wordbreaks(s, len, lang, brks, 451 set_wordbreaks(s, len, lang, brks,
412 (get_next_char_t)lb_get_next_char_utf8); 452 (get_next_char_t)ub_get_next_char_utf8);
413} 453}
414 454
415/** 455/**
@@ -429,7 +469,7 @@ void set_wordbreaks_utf16(
429 char *brks) 469 char *brks)
430{ 470{
431 set_wordbreaks(s, len, lang, brks, 471 set_wordbreaks(s, len, lang, brks,
432 (get_next_char_t)lb_get_next_char_utf16); 472 (get_next_char_t)ub_get_next_char_utf16);
433} 473}
434 474
435/** 475/**
@@ -449,5 +489,5 @@ void set_wordbreaks_utf32(
449 char *brks) 489 char *brks)
450{ 490{
451 set_wordbreaks(s, len, lang, brks, 491 set_wordbreaks(s, len, lang, brks,
452 (get_next_char_t)lb_get_next_char_utf32); 492 (get_next_char_t)ub_get_next_char_utf32);
453} 493}
diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h
index cd2bf2c..360953f 100644
--- a/src/static_libs/libunibreak/wordbreak.h
+++ b/src/static_libs/libunibreak/wordbreak.h
@@ -4,7 +4,7 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-2015 Tom Hacohen <tom at stosb dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -30,9 +30,9 @@
30 * Unicode 6.0.0: 30 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 * 32 *
33 * This library has been updated according to Revision 21, for 33 * This library has been updated according to Revision 25, for
34 * Unicode 6.2.0: 34 * Unicode 7.0.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html> 35 * <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
@@ -43,7 +43,7 @@
43 * 43 *
44 * Header file for the word breaking (segmentation) algorithm. 44 * Header file for the word breaking (segmentation) algorithm.
45 * 45 *
46 * @version 2.3, 2013/09/28 46 * @version 2.5, 2015/04/18
47 * @author Tom Hacohen 47 * @author Tom Hacohen
48 */ 48 */
49 49
@@ -51,7 +51,7 @@
51#define WORDBREAK_H 51#define WORDBREAK_H
52 52
53#include <stddef.h> 53#include <stddef.h>
54#include "linebreak.h" 54#include "unibreakbase.h"
55 55
56#ifdef __cplusplus 56#ifdef __cplusplus
57extern "C" { 57extern "C" {
diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c
index fe5afe3..c6d5694 100644
--- a/src/static_libs/libunibreak/wordbreakdata.c
+++ b/src/static_libs/libunibreak/wordbreakdata.c
@@ -1,16 +1,16 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# WordBreakProperty-6.2.0.txt 2# WordBreakProperty-7.0.0.txt
3# Date: 2012-08-13, 19:12:09 GMT [MD] 3# Date: 2014-02-19, 15:51:39 GMT [MD]
4*/ 4*/
5 5
6#include "linebreak.h"
7#include "wordbreakdef.h" 6#include "wordbreakdef.h"
8 7
9static struct WordBreakProperties wb_prop_default[] = { 8static struct WordBreakProperties wb_prop_default[] = {
10 {0x000A, 0x000A, WBP_LF}, 9 {0x000A, 0x000A, WBP_LF},
11 {0x000B, 0x000C, WBP_Newline}, 10 {0x000B, 0x000C, WBP_Newline},
12 {0x000D, 0x000D, WBP_CR}, 11 {0x000D, 0x000D, WBP_CR},
13 {0x0027, 0x0027, WBP_MidNumLet}, 12 {0x0022, 0x0022, WBP_Double_Quote},
13 {0x0027, 0x0027, WBP_Single_Quote},
14 {0x002C, 0x002C, WBP_MidNum}, 14 {0x002C, 0x002C, WBP_MidNum},
15 {0x002E, 0x002E, WBP_MidNumLet}, 15 {0x002E, 0x002E, WBP_MidNumLet},
16 {0x0030, 0x0039, WBP_Numeric}, 16 {0x0030, 0x0039, WBP_Numeric},
@@ -36,6 +36,7 @@ static struct WordBreakProperties wb_prop_default[] = {
36 {0x0295, 0x02AF, WBP_ALetter}, 36 {0x0295, 0x02AF, WBP_ALetter},
37 {0x02B0, 0x02C1, WBP_ALetter}, 37 {0x02B0, 0x02C1, WBP_ALetter},
38 {0x02C6, 0x02D1, WBP_ALetter}, 38 {0x02C6, 0x02D1, WBP_ALetter},
39 {0x02D7, 0x02D7, WBP_MidLetter},
39 {0x02E0, 0x02E4, WBP_ALetter}, 40 {0x02E0, 0x02E4, WBP_ALetter},
40 {0x02EC, 0x02EC, WBP_ALetter}, 41 {0x02EC, 0x02EC, WBP_ALetter},
41 {0x02EE, 0x02EE, WBP_ALetter}, 42 {0x02EE, 0x02EE, WBP_ALetter},
@@ -46,6 +47,7 @@ static struct WordBreakProperties wb_prop_default[] = {
46 {0x037A, 0x037A, WBP_ALetter}, 47 {0x037A, 0x037A, WBP_ALetter},
47 {0x037B, 0x037D, WBP_ALetter}, 48 {0x037B, 0x037D, WBP_ALetter},
48 {0x037E, 0x037E, WBP_MidNum}, 49 {0x037E, 0x037E, WBP_MidNum},
50 {0x037F, 0x037F, WBP_ALetter},
49 {0x0386, 0x0386, WBP_ALetter}, 51 {0x0386, 0x0386, WBP_ALetter},
50 {0x0387, 0x0387, WBP_MidLetter}, 52 {0x0387, 0x0387, WBP_MidLetter},
51 {0x0388, 0x038A, WBP_ALetter}, 53 {0x0388, 0x038A, WBP_ALetter},
@@ -55,7 +57,7 @@ static struct WordBreakProperties wb_prop_default[] = {
55 {0x03F7, 0x0481, WBP_ALetter}, 57 {0x03F7, 0x0481, WBP_ALetter},
56 {0x0483, 0x0487, WBP_Extend}, 58 {0x0483, 0x0487, WBP_Extend},
57 {0x0488, 0x0489, WBP_Extend}, 59 {0x0488, 0x0489, WBP_Extend},
58 {0x048A, 0x0527, WBP_ALetter}, 60 {0x048A, 0x052F, WBP_ALetter},
59 {0x0531, 0x0556, WBP_ALetter}, 61 {0x0531, 0x0556, WBP_ALetter},
60 {0x0559, 0x0559, WBP_ALetter}, 62 {0x0559, 0x0559, WBP_ALetter},
61 {0x0561, 0x0587, WBP_ALetter}, 63 {0x0561, 0x0587, WBP_ALetter},
@@ -65,13 +67,14 @@ static struct WordBreakProperties wb_prop_default[] = {
65 {0x05C1, 0x05C2, WBP_Extend}, 67 {0x05C1, 0x05C2, WBP_Extend},
66 {0x05C4, 0x05C5, WBP_Extend}, 68 {0x05C4, 0x05C5, WBP_Extend},
67 {0x05C7, 0x05C7, WBP_Extend}, 69 {0x05C7, 0x05C7, WBP_Extend},
68 {0x05D0, 0x05EA, WBP_ALetter}, 70 {0x05D0, 0x05EA, WBP_Hebrew_Letter},
69 {0x05F0, 0x05F2, WBP_ALetter}, 71 {0x05F0, 0x05F2, WBP_Hebrew_Letter},
70 {0x05F3, 0x05F3, WBP_ALetter}, 72 {0x05F3, 0x05F3, WBP_ALetter},
71 {0x05F4, 0x05F4, WBP_MidLetter}, 73 {0x05F4, 0x05F4, WBP_MidLetter},
72 {0x0600, 0x0604, WBP_Format}, 74 {0x0600, 0x0605, WBP_Format},
73 {0x060C, 0x060D, WBP_MidNum}, 75 {0x060C, 0x060D, WBP_MidNum},
74 {0x0610, 0x061A, WBP_Extend}, 76 {0x0610, 0x061A, WBP_Extend},
77 {0x061C, 0x061C, WBP_Format},
75 {0x0620, 0x063F, WBP_ALetter}, 78 {0x0620, 0x063F, WBP_ALetter},
76 {0x0640, 0x0640, WBP_ALetter}, 79 {0x0640, 0x0640, WBP_ALetter},
77 {0x0641, 0x064A, WBP_ALetter}, 80 {0x0641, 0x064A, WBP_ALetter},
@@ -117,10 +120,8 @@ static struct WordBreakProperties wb_prop_default[] = {
117 {0x0829, 0x082D, WBP_Extend}, 120 {0x0829, 0x082D, WBP_Extend},
118 {0x0840, 0x0858, WBP_ALetter}, 121 {0x0840, 0x0858, WBP_ALetter},
119 {0x0859, 0x085B, WBP_Extend}, 122 {0x0859, 0x085B, WBP_Extend},
120 {0x08A0, 0x08A0, WBP_ALetter}, 123 {0x08A0, 0x08B2, WBP_ALetter},
121 {0x08A2, 0x08AC, WBP_ALetter}, 124 {0x08E4, 0x0902, WBP_Extend},
122 {0x08E4, 0x08FE, WBP_Extend},
123 {0x0900, 0x0902, WBP_Extend},
124 {0x0903, 0x0903, WBP_Extend}, 125 {0x0903, 0x0903, WBP_Extend},
125 {0x0904, 0x0939, WBP_ALetter}, 126 {0x0904, 0x0939, WBP_ALetter},
126 {0x093A, 0x093A, WBP_Extend}, 127 {0x093A, 0x093A, WBP_Extend},
@@ -138,8 +139,7 @@ static struct WordBreakProperties wb_prop_default[] = {
138 {0x0962, 0x0963, WBP_Extend}, 139 {0x0962, 0x0963, WBP_Extend},
139 {0x0966, 0x096F, WBP_Numeric}, 140 {0x0966, 0x096F, WBP_Numeric},
140 {0x0971, 0x0971, WBP_ALetter}, 141 {0x0971, 0x0971, WBP_ALetter},
141 {0x0972, 0x0977, WBP_ALetter}, 142 {0x0972, 0x0980, WBP_ALetter},
142 {0x0979, 0x097F, WBP_ALetter},
143 {0x0981, 0x0981, WBP_Extend}, 143 {0x0981, 0x0981, WBP_Extend},
144 {0x0982, 0x0983, WBP_Extend}, 144 {0x0982, 0x0983, WBP_Extend},
145 {0x0985, 0x098C, WBP_ALetter}, 145 {0x0985, 0x098C, WBP_ALetter},
@@ -247,12 +247,12 @@ static struct WordBreakProperties wb_prop_default[] = {
247 {0x0BD0, 0x0BD0, WBP_ALetter}, 247 {0x0BD0, 0x0BD0, WBP_ALetter},
248 {0x0BD7, 0x0BD7, WBP_Extend}, 248 {0x0BD7, 0x0BD7, WBP_Extend},
249 {0x0BE6, 0x0BEF, WBP_Numeric}, 249 {0x0BE6, 0x0BEF, WBP_Numeric},
250 {0x0C00, 0x0C00, WBP_Extend},
250 {0x0C01, 0x0C03, WBP_Extend}, 251 {0x0C01, 0x0C03, WBP_Extend},
251 {0x0C05, 0x0C0C, WBP_ALetter}, 252 {0x0C05, 0x0C0C, WBP_ALetter},
252 {0x0C0E, 0x0C10, WBP_ALetter}, 253 {0x0C0E, 0x0C10, WBP_ALetter},
253 {0x0C12, 0x0C28, WBP_ALetter}, 254 {0x0C12, 0x0C28, WBP_ALetter},
254 {0x0C2A, 0x0C33, WBP_ALetter}, 255 {0x0C2A, 0x0C39, WBP_ALetter},
255 {0x0C35, 0x0C39, WBP_ALetter},
256 {0x0C3D, 0x0C3D, WBP_ALetter}, 256 {0x0C3D, 0x0C3D, WBP_ALetter},
257 {0x0C3E, 0x0C40, WBP_Extend}, 257 {0x0C3E, 0x0C40, WBP_Extend},
258 {0x0C41, 0x0C44, WBP_Extend}, 258 {0x0C41, 0x0C44, WBP_Extend},
@@ -263,6 +263,7 @@ static struct WordBreakProperties wb_prop_default[] = {
263 {0x0C60, 0x0C61, WBP_ALetter}, 263 {0x0C60, 0x0C61, WBP_ALetter},
264 {0x0C62, 0x0C63, WBP_Extend}, 264 {0x0C62, 0x0C63, WBP_Extend},
265 {0x0C66, 0x0C6F, WBP_Numeric}, 265 {0x0C66, 0x0C6F, WBP_Numeric},
266 {0x0C81, 0x0C81, WBP_Extend},
266 {0x0C82, 0x0C83, WBP_Extend}, 267 {0x0C82, 0x0C83, WBP_Extend},
267 {0x0C85, 0x0C8C, WBP_ALetter}, 268 {0x0C85, 0x0C8C, WBP_ALetter},
268 {0x0C8E, 0x0C90, WBP_ALetter}, 269 {0x0C8E, 0x0C90, WBP_ALetter},
@@ -284,6 +285,7 @@ static struct WordBreakProperties wb_prop_default[] = {
284 {0x0CE2, 0x0CE3, WBP_Extend}, 285 {0x0CE2, 0x0CE3, WBP_Extend},
285 {0x0CE6, 0x0CEF, WBP_Numeric}, 286 {0x0CE6, 0x0CEF, WBP_Numeric},
286 {0x0CF1, 0x0CF2, WBP_ALetter}, 287 {0x0CF1, 0x0CF2, WBP_ALetter},
288 {0x0D01, 0x0D01, WBP_Extend},
287 {0x0D02, 0x0D03, WBP_Extend}, 289 {0x0D02, 0x0D03, WBP_Extend},
288 {0x0D05, 0x0D0C, WBP_ALetter}, 290 {0x0D05, 0x0D0C, WBP_ALetter},
289 {0x0D0E, 0x0D10, WBP_ALetter}, 291 {0x0D0E, 0x0D10, WBP_ALetter},
@@ -311,6 +313,7 @@ static struct WordBreakProperties wb_prop_default[] = {
311 {0x0DD2, 0x0DD4, WBP_Extend}, 313 {0x0DD2, 0x0DD4, WBP_Extend},
312 {0x0DD6, 0x0DD6, WBP_Extend}, 314 {0x0DD6, 0x0DD6, WBP_Extend},
313 {0x0DD8, 0x0DDF, WBP_Extend}, 315 {0x0DD8, 0x0DDF, WBP_Extend},
316 {0x0DE6, 0x0DEF, WBP_Numeric},
314 {0x0DF2, 0x0DF3, WBP_Extend}, 317 {0x0DF2, 0x0DF3, WBP_Extend},
315 {0x0E31, 0x0E31, WBP_Extend}, 318 {0x0E31, 0x0E31, WBP_Extend},
316 {0x0E34, 0x0E3A, WBP_Extend}, 319 {0x0E34, 0x0E3A, WBP_Extend},
@@ -391,6 +394,7 @@ static struct WordBreakProperties wb_prop_default[] = {
391 {0x1681, 0x169A, WBP_ALetter}, 394 {0x1681, 0x169A, WBP_ALetter},
392 {0x16A0, 0x16EA, WBP_ALetter}, 395 {0x16A0, 0x16EA, WBP_ALetter},
393 {0x16EE, 0x16F0, WBP_ALetter}, 396 {0x16EE, 0x16F0, WBP_ALetter},
397 {0x16F1, 0x16F8, WBP_ALetter},
394 {0x1700, 0x170C, WBP_ALetter}, 398 {0x1700, 0x170C, WBP_ALetter},
395 {0x170E, 0x1711, WBP_ALetter}, 399 {0x170E, 0x1711, WBP_ALetter},
396 {0x1712, 0x1714, WBP_Extend}, 400 {0x1712, 0x1714, WBP_Extend},
@@ -411,6 +415,7 @@ static struct WordBreakProperties wb_prop_default[] = {
411 {0x17DD, 0x17DD, WBP_Extend}, 415 {0x17DD, 0x17DD, WBP_Extend},
412 {0x17E0, 0x17E9, WBP_Numeric}, 416 {0x17E0, 0x17E9, WBP_Numeric},
413 {0x180B, 0x180D, WBP_Extend}, 417 {0x180B, 0x180D, WBP_Extend},
418 {0x180E, 0x180E, WBP_Format},
414 {0x1810, 0x1819, WBP_Numeric}, 419 {0x1810, 0x1819, WBP_Numeric},
415 {0x1820, 0x1842, WBP_ALetter}, 420 {0x1820, 0x1842, WBP_ALetter},
416 {0x1843, 0x1843, WBP_ALetter}, 421 {0x1843, 0x1843, WBP_ALetter},
@@ -419,7 +424,7 @@ static struct WordBreakProperties wb_prop_default[] = {
419 {0x18A9, 0x18A9, WBP_Extend}, 424 {0x18A9, 0x18A9, WBP_Extend},
420 {0x18AA, 0x18AA, WBP_ALetter}, 425 {0x18AA, 0x18AA, WBP_ALetter},
421 {0x18B0, 0x18F5, WBP_ALetter}, 426 {0x18B0, 0x18F5, WBP_ALetter},
422 {0x1900, 0x191C, WBP_ALetter}, 427 {0x1900, 0x191E, WBP_ALetter},
423 {0x1920, 0x1922, WBP_Extend}, 428 {0x1920, 0x1922, WBP_Extend},
424 {0x1923, 0x1926, WBP_Extend}, 429 {0x1923, 0x1926, WBP_Extend},
425 {0x1927, 0x1928, WBP_Extend}, 430 {0x1927, 0x1928, WBP_Extend},
@@ -434,7 +439,8 @@ static struct WordBreakProperties wb_prop_default[] = {
434 {0x19D0, 0x19D9, WBP_Numeric}, 439 {0x19D0, 0x19D9, WBP_Numeric},
435 {0x1A00, 0x1A16, WBP_ALetter}, 440 {0x1A00, 0x1A16, WBP_ALetter},
436 {0x1A17, 0x1A18, WBP_Extend}, 441 {0x1A17, 0x1A18, WBP_Extend},
437 {0x1A19, 0x1A1B, WBP_Extend}, 442 {0x1A19, 0x1A1A, WBP_Extend},
443 {0x1A1B, 0x1A1B, WBP_Extend},
438 {0x1A55, 0x1A55, WBP_Extend}, 444 {0x1A55, 0x1A55, WBP_Extend},
439 {0x1A56, 0x1A56, WBP_Extend}, 445 {0x1A56, 0x1A56, WBP_Extend},
440 {0x1A57, 0x1A57, WBP_Extend}, 446 {0x1A57, 0x1A57, WBP_Extend},
@@ -449,6 +455,8 @@ static struct WordBreakProperties wb_prop_default[] = {
449 {0x1A7F, 0x1A7F, WBP_Extend}, 455 {0x1A7F, 0x1A7F, WBP_Extend},
450 {0x1A80, 0x1A89, WBP_Numeric}, 456 {0x1A80, 0x1A89, WBP_Numeric},
451 {0x1A90, 0x1A99, WBP_Numeric}, 457 {0x1A90, 0x1A99, WBP_Numeric},
458 {0x1AB0, 0x1ABD, WBP_Extend},
459 {0x1ABE, 0x1ABE, WBP_Extend},
452 {0x1B00, 0x1B03, WBP_Extend}, 460 {0x1B00, 0x1B03, WBP_Extend},
453 {0x1B04, 0x1B04, WBP_Extend}, 461 {0x1B04, 0x1B04, WBP_Extend},
454 {0x1B05, 0x1B33, WBP_ALetter}, 462 {0x1B05, 0x1B33, WBP_ALetter},
@@ -471,8 +479,7 @@ static struct WordBreakProperties wb_prop_default[] = {
471 {0x1BA6, 0x1BA7, WBP_Extend}, 479 {0x1BA6, 0x1BA7, WBP_Extend},
472 {0x1BA8, 0x1BA9, WBP_Extend}, 480 {0x1BA8, 0x1BA9, WBP_Extend},
473 {0x1BAA, 0x1BAA, WBP_Extend}, 481 {0x1BAA, 0x1BAA, WBP_Extend},
474 {0x1BAB, 0x1BAB, WBP_Extend}, 482 {0x1BAB, 0x1BAD, WBP_Extend},
475 {0x1BAC, 0x1BAD, WBP_Extend},
476 {0x1BAE, 0x1BAF, WBP_ALetter}, 483 {0x1BAE, 0x1BAF, WBP_ALetter},
477 {0x1BB0, 0x1BB9, WBP_Numeric}, 484 {0x1BB0, 0x1BB9, WBP_Numeric},
478 {0x1BBA, 0x1BE5, WBP_ALetter}, 485 {0x1BBA, 0x1BE5, WBP_ALetter},
@@ -504,13 +511,14 @@ static struct WordBreakProperties wb_prop_default[] = {
504 {0x1CF2, 0x1CF3, WBP_Extend}, 511 {0x1CF2, 0x1CF3, WBP_Extend},
505 {0x1CF4, 0x1CF4, WBP_Extend}, 512 {0x1CF4, 0x1CF4, WBP_Extend},
506 {0x1CF5, 0x1CF6, WBP_ALetter}, 513 {0x1CF5, 0x1CF6, WBP_ALetter},
514 {0x1CF8, 0x1CF9, WBP_Extend},
507 {0x1D00, 0x1D2B, WBP_ALetter}, 515 {0x1D00, 0x1D2B, WBP_ALetter},
508 {0x1D2C, 0x1D6A, WBP_ALetter}, 516 {0x1D2C, 0x1D6A, WBP_ALetter},
509 {0x1D6B, 0x1D77, WBP_ALetter}, 517 {0x1D6B, 0x1D77, WBP_ALetter},
510 {0x1D78, 0x1D78, WBP_ALetter}, 518 {0x1D78, 0x1D78, WBP_ALetter},
511 {0x1D79, 0x1D9A, WBP_ALetter}, 519 {0x1D79, 0x1D9A, WBP_ALetter},
512 {0x1D9B, 0x1DBF, WBP_ALetter}, 520 {0x1D9B, 0x1DBF, WBP_ALetter},
513 {0x1DC0, 0x1DE6, WBP_Extend}, 521 {0x1DC0, 0x1DF5, WBP_Extend},
514 {0x1DFC, 0x1DFF, WBP_Extend}, 522 {0x1DFC, 0x1DFF, WBP_Extend},
515 {0x1E00, 0x1F15, WBP_ALetter}, 523 {0x1E00, 0x1F15, WBP_ALetter},
516 {0x1F18, 0x1F1D, WBP_ALetter}, 524 {0x1F18, 0x1F1D, WBP_ALetter},
@@ -544,7 +552,7 @@ static struct WordBreakProperties wb_prop_default[] = {
544 {0x2044, 0x2044, WBP_MidNum}, 552 {0x2044, 0x2044, WBP_MidNum},
545 {0x2054, 0x2054, WBP_ExtendNumLet}, 553 {0x2054, 0x2054, WBP_ExtendNumLet},
546 {0x2060, 0x2064, WBP_Format}, 554 {0x2060, 0x2064, WBP_Format},
547 {0x206A, 0x206F, WBP_Format}, 555 {0x2066, 0x206F, WBP_Format},
548 {0x2071, 0x2071, WBP_ALetter}, 556 {0x2071, 0x2071, WBP_ALetter},
549 {0x207F, 0x207F, WBP_ALetter}, 557 {0x207F, 0x207F, WBP_ALetter},
550 {0x2090, 0x209C, WBP_ALetter}, 558 {0x2090, 0x209C, WBP_ALetter},
@@ -631,7 +639,8 @@ static struct WordBreakProperties wb_prop_default[] = {
631 {0xA670, 0xA672, WBP_Extend}, 639 {0xA670, 0xA672, WBP_Extend},
632 {0xA674, 0xA67D, WBP_Extend}, 640 {0xA674, 0xA67D, WBP_Extend},
633 {0xA67F, 0xA67F, WBP_ALetter}, 641 {0xA67F, 0xA67F, WBP_ALetter},
634 {0xA680, 0xA697, WBP_ALetter}, 642 {0xA680, 0xA69B, WBP_ALetter},
643 {0xA69C, 0xA69D, WBP_ALetter},
635 {0xA69F, 0xA69F, WBP_Extend}, 644 {0xA69F, 0xA69F, WBP_Extend},
636 {0xA6A0, 0xA6E5, WBP_ALetter}, 645 {0xA6A0, 0xA6E5, WBP_ALetter},
637 {0xA6E6, 0xA6EF, WBP_ALetter}, 646 {0xA6E6, 0xA6EF, WBP_ALetter},
@@ -642,8 +651,9 @@ static struct WordBreakProperties wb_prop_default[] = {
642 {0xA771, 0xA787, WBP_ALetter}, 651 {0xA771, 0xA787, WBP_ALetter},
643 {0xA788, 0xA788, WBP_ALetter}, 652 {0xA788, 0xA788, WBP_ALetter},
644 {0xA78B, 0xA78E, WBP_ALetter}, 653 {0xA78B, 0xA78E, WBP_ALetter},
645 {0xA790, 0xA793, WBP_ALetter}, 654 {0xA790, 0xA7AD, WBP_ALetter},
646 {0xA7A0, 0xA7AA, WBP_ALetter}, 655 {0xA7B0, 0xA7B1, WBP_ALetter},
656 {0xA7F7, 0xA7F7, WBP_ALetter},
647 {0xA7F8, 0xA7F9, WBP_ALetter}, 657 {0xA7F8, 0xA7F9, WBP_ALetter},
648 {0xA7FA, 0xA7FA, WBP_ALetter}, 658 {0xA7FA, 0xA7FA, WBP_ALetter},
649 {0xA7FB, 0xA801, WBP_ALetter}, 659 {0xA7FB, 0xA801, WBP_ALetter},
@@ -683,6 +693,8 @@ static struct WordBreakProperties wb_prop_default[] = {
683 {0xA9BD, 0xA9C0, WBP_Extend}, 693 {0xA9BD, 0xA9C0, WBP_Extend},
684 {0xA9CF, 0xA9CF, WBP_ALetter}, 694 {0xA9CF, 0xA9CF, WBP_ALetter},
685 {0xA9D0, 0xA9D9, WBP_Numeric}, 695 {0xA9D0, 0xA9D9, WBP_Numeric},
696 {0xA9E5, 0xA9E5, WBP_Extend},
697 {0xA9F0, 0xA9F9, WBP_Numeric},
686 {0xAA00, 0xAA28, WBP_ALetter}, 698 {0xAA00, 0xAA28, WBP_ALetter},
687 {0xAA29, 0xAA2E, WBP_Extend}, 699 {0xAA29, 0xAA2E, WBP_Extend},
688 {0xAA2F, 0xAA30, WBP_Extend}, 700 {0xAA2F, 0xAA30, WBP_Extend},
@@ -696,6 +708,8 @@ static struct WordBreakProperties wb_prop_default[] = {
696 {0xAA4D, 0xAA4D, WBP_Extend}, 708 {0xAA4D, 0xAA4D, WBP_Extend},
697 {0xAA50, 0xAA59, WBP_Numeric}, 709 {0xAA50, 0xAA59, WBP_Numeric},
698 {0xAA7B, 0xAA7B, WBP_Extend}, 710 {0xAA7B, 0xAA7B, WBP_Extend},
711 {0xAA7C, 0xAA7C, WBP_Extend},
712 {0xAA7D, 0xAA7D, WBP_Extend},
699 {0xAAB0, 0xAAB0, WBP_Extend}, 713 {0xAAB0, 0xAAB0, WBP_Extend},
700 {0xAAB2, 0xAAB4, WBP_Extend}, 714 {0xAAB2, 0xAAB4, WBP_Extend},
701 {0xAAB7, 0xAAB8, WBP_Extend}, 715 {0xAAB7, 0xAAB8, WBP_Extend},
@@ -714,6 +728,9 @@ static struct WordBreakProperties wb_prop_default[] = {
714 {0xAB11, 0xAB16, WBP_ALetter}, 728 {0xAB11, 0xAB16, WBP_ALetter},
715 {0xAB20, 0xAB26, WBP_ALetter}, 729 {0xAB20, 0xAB26, WBP_ALetter},
716 {0xAB28, 0xAB2E, WBP_ALetter}, 730 {0xAB28, 0xAB2E, WBP_ALetter},
731 {0xAB30, 0xAB5A, WBP_ALetter},
732 {0xAB5C, 0xAB5F, WBP_ALetter},
733 {0xAB64, 0xAB65, WBP_ALetter},
717 {0xABC0, 0xABE2, WBP_ALetter}, 734 {0xABC0, 0xABE2, WBP_ALetter},
718 {0xABE3, 0xABE4, WBP_Extend}, 735 {0xABE3, 0xABE4, WBP_Extend},
719 {0xABE5, 0xABE5, WBP_Extend}, 736 {0xABE5, 0xABE5, WBP_Extend},
@@ -728,15 +745,16 @@ static struct WordBreakProperties wb_prop_default[] = {
728 {0xD7CB, 0xD7FB, WBP_ALetter}, 745 {0xD7CB, 0xD7FB, WBP_ALetter},
729 {0xFB00, 0xFB06, WBP_ALetter}, 746 {0xFB00, 0xFB06, WBP_ALetter},
730 {0xFB13, 0xFB17, WBP_ALetter}, 747 {0xFB13, 0xFB17, WBP_ALetter},
731 {0xFB1D, 0xFB1D, WBP_ALetter}, 748 {0xFB1D, 0xFB1D, WBP_Hebrew_Letter},
732 {0xFB1E, 0xFB1E, WBP_Extend}, 749 {0xFB1E, 0xFB1E, WBP_Extend},
733 {0xFB1F, 0xFB28, WBP_ALetter}, 750 {0xFB1F, 0xFB28, WBP_Hebrew_Letter},
734 {0xFB2A, 0xFB36, WBP_ALetter}, 751 {0xFB2A, 0xFB36, WBP_Hebrew_Letter},
735 {0xFB38, 0xFB3C, WBP_ALetter}, 752 {0xFB38, 0xFB3C, WBP_Hebrew_Letter},
736 {0xFB3E, 0xFB3E, WBP_ALetter}, 753 {0xFB3E, 0xFB3E, WBP_Hebrew_Letter},
737 {0xFB40, 0xFB41, WBP_ALetter}, 754 {0xFB40, 0xFB41, WBP_Hebrew_Letter},
738 {0xFB43, 0xFB44, WBP_ALetter}, 755 {0xFB43, 0xFB44, WBP_Hebrew_Letter},
739 {0xFB46, 0xFBB1, WBP_ALetter}, 756 {0xFB46, 0xFB4F, WBP_Hebrew_Letter},
757 {0xFB50, 0xFBB1, WBP_ALetter},
740 {0xFBD3, 0xFD3D, WBP_ALetter}, 758 {0xFBD3, 0xFD3D, WBP_ALetter},
741 {0xFD50, 0xFD8F, WBP_ALetter}, 759 {0xFD50, 0xFD8F, WBP_ALetter},
742 {0xFD92, 0xFDC7, WBP_ALetter}, 760 {0xFD92, 0xFDC7, WBP_ALetter},
@@ -745,7 +763,7 @@ static struct WordBreakProperties wb_prop_default[] = {
745 {0xFE10, 0xFE10, WBP_MidNum}, 763 {0xFE10, 0xFE10, WBP_MidNum},
746 {0xFE13, 0xFE13, WBP_MidLetter}, 764 {0xFE13, 0xFE13, WBP_MidLetter},
747 {0xFE14, 0xFE14, WBP_MidNum}, 765 {0xFE14, 0xFE14, WBP_MidNum},
748 {0xFE20, 0xFE26, WBP_Extend}, 766 {0xFE20, 0xFE2D, WBP_Extend},
749 {0xFE33, 0xFE34, WBP_ExtendNumLet}, 767 {0xFE33, 0xFE34, WBP_ExtendNumLet},
750 {0xFE4D, 0xFE4F, WBP_ExtendNumLet}, 768 {0xFE4D, 0xFE4F, WBP_ExtendNumLet},
751 {0xFE50, 0xFE50, WBP_MidNum}, 769 {0xFE50, 0xFE50, WBP_MidNum},
@@ -784,11 +802,14 @@ static struct WordBreakProperties wb_prop_default[] = {
784 {0x101FD, 0x101FD, WBP_Extend}, 802 {0x101FD, 0x101FD, WBP_Extend},
785 {0x10280, 0x1029C, WBP_ALetter}, 803 {0x10280, 0x1029C, WBP_ALetter},
786 {0x102A0, 0x102D0, WBP_ALetter}, 804 {0x102A0, 0x102D0, WBP_ALetter},
787 {0x10300, 0x1031E, WBP_ALetter}, 805 {0x102E0, 0x102E0, WBP_Extend},
806 {0x10300, 0x1031F, WBP_ALetter},
788 {0x10330, 0x10340, WBP_ALetter}, 807 {0x10330, 0x10340, WBP_ALetter},
789 {0x10341, 0x10341, WBP_ALetter}, 808 {0x10341, 0x10341, WBP_ALetter},
790 {0x10342, 0x10349, WBP_ALetter}, 809 {0x10342, 0x10349, WBP_ALetter},
791 {0x1034A, 0x1034A, WBP_ALetter}, 810 {0x1034A, 0x1034A, WBP_ALetter},
811 {0x10350, 0x10375, WBP_ALetter},
812 {0x10376, 0x1037A, WBP_Extend},
792 {0x10380, 0x1039D, WBP_ALetter}, 813 {0x10380, 0x1039D, WBP_ALetter},
793 {0x103A0, 0x103C3, WBP_ALetter}, 814 {0x103A0, 0x103C3, WBP_ALetter},
794 {0x103C8, 0x103CF, WBP_ALetter}, 815 {0x103C8, 0x103CF, WBP_ALetter},
@@ -796,12 +817,19 @@ static struct WordBreakProperties wb_prop_default[] = {
796 {0x10400, 0x1044F, WBP_ALetter}, 817 {0x10400, 0x1044F, WBP_ALetter},
797 {0x10450, 0x1049D, WBP_ALetter}, 818 {0x10450, 0x1049D, WBP_ALetter},
798 {0x104A0, 0x104A9, WBP_Numeric}, 819 {0x104A0, 0x104A9, WBP_Numeric},
820 {0x10500, 0x10527, WBP_ALetter},
821 {0x10530, 0x10563, WBP_ALetter},
822 {0x10600, 0x10736, WBP_ALetter},
823 {0x10740, 0x10755, WBP_ALetter},
824 {0x10760, 0x10767, WBP_ALetter},
799 {0x10800, 0x10805, WBP_ALetter}, 825 {0x10800, 0x10805, WBP_ALetter},
800 {0x10808, 0x10808, WBP_ALetter}, 826 {0x10808, 0x10808, WBP_ALetter},
801 {0x1080A, 0x10835, WBP_ALetter}, 827 {0x1080A, 0x10835, WBP_ALetter},
802 {0x10837, 0x10838, WBP_ALetter}, 828 {0x10837, 0x10838, WBP_ALetter},
803 {0x1083C, 0x1083C, WBP_ALetter}, 829 {0x1083C, 0x1083C, WBP_ALetter},
804 {0x1083F, 0x10855, WBP_ALetter}, 830 {0x1083F, 0x10855, WBP_ALetter},
831 {0x10860, 0x10876, WBP_ALetter},
832 {0x10880, 0x1089E, WBP_ALetter},
805 {0x10900, 0x10915, WBP_ALetter}, 833 {0x10900, 0x10915, WBP_ALetter},
806 {0x10920, 0x10939, WBP_ALetter}, 834 {0x10920, 0x10939, WBP_ALetter},
807 {0x10980, 0x109B7, WBP_ALetter}, 835 {0x10980, 0x109B7, WBP_ALetter},
@@ -816,9 +844,14 @@ static struct WordBreakProperties wb_prop_default[] = {
816 {0x10A38, 0x10A3A, WBP_Extend}, 844 {0x10A38, 0x10A3A, WBP_Extend},
817 {0x10A3F, 0x10A3F, WBP_Extend}, 845 {0x10A3F, 0x10A3F, WBP_Extend},
818 {0x10A60, 0x10A7C, WBP_ALetter}, 846 {0x10A60, 0x10A7C, WBP_ALetter},
847 {0x10A80, 0x10A9C, WBP_ALetter},
848 {0x10AC0, 0x10AC7, WBP_ALetter},
849 {0x10AC9, 0x10AE4, WBP_ALetter},
850 {0x10AE5, 0x10AE6, WBP_Extend},
819 {0x10B00, 0x10B35, WBP_ALetter}, 851 {0x10B00, 0x10B35, WBP_ALetter},
820 {0x10B40, 0x10B55, WBP_ALetter}, 852 {0x10B40, 0x10B55, WBP_ALetter},
821 {0x10B60, 0x10B72, WBP_ALetter}, 853 {0x10B60, 0x10B72, WBP_ALetter},
854 {0x10B80, 0x10B91, WBP_ALetter},
822 {0x10C00, 0x10C48, WBP_ALetter}, 855 {0x10C00, 0x10C48, WBP_ALetter},
823 {0x11000, 0x11000, WBP_Extend}, 856 {0x11000, 0x11000, WBP_Extend},
824 {0x11001, 0x11001, WBP_Extend}, 857 {0x11001, 0x11001, WBP_Extend},
@@ -826,7 +859,7 @@ static struct WordBreakProperties wb_prop_default[] = {
826 {0x11003, 0x11037, WBP_ALetter}, 859 {0x11003, 0x11037, WBP_ALetter},
827 {0x11038, 0x11046, WBP_Extend}, 860 {0x11038, 0x11046, WBP_Extend},
828 {0x11066, 0x1106F, WBP_Numeric}, 861 {0x11066, 0x1106F, WBP_Numeric},
829 {0x11080, 0x11081, WBP_Extend}, 862 {0x1107F, 0x11081, WBP_Extend},
830 {0x11082, 0x11082, WBP_Extend}, 863 {0x11082, 0x11082, WBP_Extend},
831 {0x11083, 0x110AF, WBP_ALetter}, 864 {0x11083, 0x110AF, WBP_ALetter},
832 {0x110B0, 0x110B2, WBP_Extend}, 865 {0x110B0, 0x110B2, WBP_Extend},
@@ -842,6 +875,9 @@ static struct WordBreakProperties wb_prop_default[] = {
842 {0x1112C, 0x1112C, WBP_Extend}, 875 {0x1112C, 0x1112C, WBP_Extend},
843 {0x1112D, 0x11134, WBP_Extend}, 876 {0x1112D, 0x11134, WBP_Extend},
844 {0x11136, 0x1113F, WBP_Numeric}, 877 {0x11136, 0x1113F, WBP_Numeric},
878 {0x11150, 0x11172, WBP_ALetter},
879 {0x11173, 0x11173, WBP_Extend},
880 {0x11176, 0x11176, WBP_ALetter},
845 {0x11180, 0x11181, WBP_Extend}, 881 {0x11180, 0x11181, WBP_Extend},
846 {0x11182, 0x11182, WBP_Extend}, 882 {0x11182, 0x11182, WBP_Extend},
847 {0x11183, 0x111B2, WBP_ALetter}, 883 {0x11183, 0x111B2, WBP_ALetter},
@@ -850,6 +886,68 @@ static struct WordBreakProperties wb_prop_default[] = {
850 {0x111BF, 0x111C0, WBP_Extend}, 886 {0x111BF, 0x111C0, WBP_Extend},
851 {0x111C1, 0x111C4, WBP_ALetter}, 887 {0x111C1, 0x111C4, WBP_ALetter},
852 {0x111D0, 0x111D9, WBP_Numeric}, 888 {0x111D0, 0x111D9, WBP_Numeric},
889 {0x111DA, 0x111DA, WBP_ALetter},
890 {0x11200, 0x11211, WBP_ALetter},
891 {0x11213, 0x1122B, WBP_ALetter},
892 {0x1122C, 0x1122E, WBP_Extend},
893 {0x1122F, 0x11231, WBP_Extend},
894 {0x11232, 0x11233, WBP_Extend},
895 {0x11234, 0x11234, WBP_Extend},
896 {0x11235, 0x11235, WBP_Extend},
897 {0x11236, 0x11237, WBP_Extend},
898 {0x112B0, 0x112DE, WBP_ALetter},
899 {0x112DF, 0x112DF, WBP_Extend},
900 {0x112E0, 0x112E2, WBP_Extend},
901 {0x112E3, 0x112EA, WBP_Extend},
902 {0x112F0, 0x112F9, WBP_Numeric},
903 {0x11301, 0x11301, WBP_Extend},
904 {0x11302, 0x11303, WBP_Extend},
905 {0x11305, 0x1130C, WBP_ALetter},
906 {0x1130F, 0x11310, WBP_ALetter},
907 {0x11313, 0x11328, WBP_ALetter},
908 {0x1132A, 0x11330, WBP_ALetter},
909 {0x11332, 0x11333, WBP_ALetter},
910 {0x11335, 0x11339, WBP_ALetter},
911 {0x1133C, 0x1133C, WBP_Extend},
912 {0x1133D, 0x1133D, WBP_ALetter},
913 {0x1133E, 0x1133F, WBP_Extend},
914 {0x11340, 0x11340, WBP_Extend},
915 {0x11341, 0x11344, WBP_Extend},
916 {0x11347, 0x11348, WBP_Extend},
917 {0x1134B, 0x1134D, WBP_Extend},
918 {0x11357, 0x11357, WBP_Extend},
919 {0x1135D, 0x11361, WBP_ALetter},
920 {0x11362, 0x11363, WBP_Extend},
921 {0x11366, 0x1136C, WBP_Extend},
922 {0x11370, 0x11374, WBP_Extend},
923 {0x11480, 0x114AF, WBP_ALetter},
924 {0x114B0, 0x114B2, WBP_Extend},
925 {0x114B3, 0x114B8, WBP_Extend},
926 {0x114B9, 0x114B9, WBP_Extend},
927 {0x114BA, 0x114BA, WBP_Extend},
928 {0x114BB, 0x114BE, WBP_Extend},
929 {0x114BF, 0x114C0, WBP_Extend},
930 {0x114C1, 0x114C1, WBP_Extend},
931 {0x114C2, 0x114C3, WBP_Extend},
932 {0x114C4, 0x114C5, WBP_ALetter},
933 {0x114C7, 0x114C7, WBP_ALetter},
934 {0x114D0, 0x114D9, WBP_Numeric},
935 {0x11580, 0x115AE, WBP_ALetter},
936 {0x115AF, 0x115B1, WBP_Extend},
937 {0x115B2, 0x115B5, WBP_Extend},
938 {0x115B8, 0x115BB, WBP_Extend},
939 {0x115BC, 0x115BD, WBP_Extend},
940 {0x115BE, 0x115BE, WBP_Extend},
941 {0x115BF, 0x115C0, WBP_Extend},
942 {0x11600, 0x1162F, WBP_ALetter},
943 {0x11630, 0x11632, WBP_Extend},
944 {0x11633, 0x1163A, WBP_Extend},
945 {0x1163B, 0x1163C, WBP_Extend},
946 {0x1163D, 0x1163D, WBP_Extend},
947 {0x1163E, 0x1163E, WBP_Extend},
948 {0x1163F, 0x11640, WBP_Extend},
949 {0x11644, 0x11644, WBP_ALetter},
950 {0x11650, 0x11659, WBP_Numeric},
853 {0x11680, 0x116AA, WBP_ALetter}, 951 {0x11680, 0x116AA, WBP_ALetter},
854 {0x116AB, 0x116AB, WBP_Extend}, 952 {0x116AB, 0x116AB, WBP_Extend},
855 {0x116AC, 0x116AC, WBP_Extend}, 953 {0x116AC, 0x116AC, WBP_Extend},
@@ -859,16 +957,36 @@ static struct WordBreakProperties wb_prop_default[] = {
859 {0x116B6, 0x116B6, WBP_Extend}, 957 {0x116B6, 0x116B6, WBP_Extend},
860 {0x116B7, 0x116B7, WBP_Extend}, 958 {0x116B7, 0x116B7, WBP_Extend},
861 {0x116C0, 0x116C9, WBP_Numeric}, 959 {0x116C0, 0x116C9, WBP_Numeric},
862 {0x12000, 0x1236E, WBP_ALetter}, 960 {0x118A0, 0x118DF, WBP_ALetter},
863 {0x12400, 0x12462, WBP_ALetter}, 961 {0x118E0, 0x118E9, WBP_Numeric},
962 {0x118FF, 0x118FF, WBP_ALetter},
963 {0x11AC0, 0x11AF8, WBP_ALetter},
964 {0x12000, 0x12398, WBP_ALetter},
965 {0x12400, 0x1246E, WBP_ALetter},
864 {0x13000, 0x1342E, WBP_ALetter}, 966 {0x13000, 0x1342E, WBP_ALetter},
865 {0x16800, 0x16A38, WBP_ALetter}, 967 {0x16800, 0x16A38, WBP_ALetter},
968 {0x16A40, 0x16A5E, WBP_ALetter},
969 {0x16A60, 0x16A69, WBP_Numeric},
970 {0x16AD0, 0x16AED, WBP_ALetter},
971 {0x16AF0, 0x16AF4, WBP_Extend},
972 {0x16B00, 0x16B2F, WBP_ALetter},
973 {0x16B30, 0x16B36, WBP_Extend},
974 {0x16B40, 0x16B43, WBP_ALetter},
975 {0x16B50, 0x16B59, WBP_Numeric},
976 {0x16B63, 0x16B77, WBP_ALetter},
977 {0x16B7D, 0x16B8F, WBP_ALetter},
866 {0x16F00, 0x16F44, WBP_ALetter}, 978 {0x16F00, 0x16F44, WBP_ALetter},
867 {0x16F50, 0x16F50, WBP_ALetter}, 979 {0x16F50, 0x16F50, WBP_ALetter},
868 {0x16F51, 0x16F7E, WBP_Extend}, 980 {0x16F51, 0x16F7E, WBP_Extend},
869 {0x16F8F, 0x16F92, WBP_Extend}, 981 {0x16F8F, 0x16F92, WBP_Extend},
870 {0x16F93, 0x16F9F, WBP_ALetter}, 982 {0x16F93, 0x16F9F, WBP_ALetter},
871 {0x1B000, 0x1B000, WBP_Katakana}, 983 {0x1B000, 0x1B000, WBP_Katakana},
984 {0x1BC00, 0x1BC6A, WBP_ALetter},
985 {0x1BC70, 0x1BC7C, WBP_ALetter},
986 {0x1BC80, 0x1BC88, WBP_ALetter},
987 {0x1BC90, 0x1BC99, WBP_ALetter},
988 {0x1BC9D, 0x1BC9E, WBP_Extend},
989 {0x1BCA0, 0x1BCA3, WBP_Format},
872 {0x1D165, 0x1D166, WBP_Extend}, 990 {0x1D165, 0x1D166, WBP_Extend},
873 {0x1D167, 0x1D169, WBP_Extend}, 991 {0x1D167, 0x1D169, WBP_Extend},
874 {0x1D16D, 0x1D172, WBP_Extend}, 992 {0x1D16D, 0x1D172, WBP_Extend},
@@ -908,6 +1026,8 @@ static struct WordBreakProperties wb_prop_default[] = {
908 {0x1D7AA, 0x1D7C2, WBP_ALetter}, 1026 {0x1D7AA, 0x1D7C2, WBP_ALetter},
909 {0x1D7C4, 0x1D7CB, WBP_ALetter}, 1027 {0x1D7C4, 0x1D7CB, WBP_ALetter},
910 {0x1D7CE, 0x1D7FF, WBP_Numeric}, 1028 {0x1D7CE, 0x1D7FF, WBP_Numeric},
1029 {0x1E800, 0x1E8C4, WBP_ALetter},
1030 {0x1E8D0, 0x1E8D6, WBP_Extend},
911 {0x1EE00, 0x1EE03, WBP_ALetter}, 1031 {0x1EE00, 0x1EE03, WBP_ALetter},
912 {0x1EE05, 0x1EE1F, WBP_ALetter}, 1032 {0x1EE05, 0x1EE1F, WBP_ALetter},
913 {0x1EE21, 0x1EE22, WBP_ALetter}, 1033 {0x1EE21, 0x1EE22, WBP_ALetter},
@@ -941,7 +1061,10 @@ static struct WordBreakProperties wb_prop_default[] = {
941 {0x1EEA1, 0x1EEA3, WBP_ALetter}, 1061 {0x1EEA1, 0x1EEA3, WBP_ALetter},
942 {0x1EEA5, 0x1EEA9, WBP_ALetter}, 1062 {0x1EEA5, 0x1EEA9, WBP_ALetter},
943 {0x1EEAB, 0x1EEBB, WBP_ALetter}, 1063 {0x1EEAB, 0x1EEBB, WBP_ALetter},
944 {0x1F1E6, 0x1F1FF, WBP_Regional}, 1064 {0x1F130, 0x1F149, WBP_ALetter},
1065 {0x1F150, 0x1F169, WBP_ALetter},
1066 {0x1F170, 0x1F189, WBP_ALetter},
1067 {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator},
945 {0xE0001, 0xE0001, WBP_Format}, 1068 {0xE0001, 0xE0001, WBP_Format},
946 {0xE0020, 0xE007F, WBP_Format}, 1069 {0xE0020, 0xE007F, WBP_Format},
947 {0xE0100, 0xE01EF, WBP_Extend}, 1070 {0xE0100, 0xE01EF, WBP_Extend},
diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h
index 72816f9..7130a13 100644
--- a/src/static_libs/libunibreak/wordbreakdef.h
+++ b/src/static_libs/libunibreak/wordbreakdef.h
@@ -4,8 +4,7 @@
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com> 7 * Copyright (C) 2013-15 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
9 * 8 *
10 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
11 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -31,9 +30,8 @@
31 * Unicode 6.0.0: 30 * Unicode 6.0.0:
32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
33 * 32 *
34 * This library has been updated according to Revision 21, for 33 * This library has been updated according to Revision 25, for
35 * Unicode 6.2.0: 34 * Unicode 7.0.0:
36 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html>
37 * 35 *
38 * The Unicode Terms of Use are available at 36 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html> 37 * <URL:http://www.unicode.org/copyright.html>
@@ -45,11 +43,12 @@
45 * Definitions of internal data structures, declarations of global 43 * Definitions of internal data structures, declarations of global
46 * variables, and function prototypes for the word breaking algorithm. 44 * variables, and function prototypes for the word breaking algorithm.
47 * 45 *
48 * @version 2.4, 2013/11/10 46 * @version 2.6, 2015/04/19
49 * @author Tom Hacohen 47 * @author Tom Hacohen
50 * @author Petr Filipsky
51 */ 48 */
52 49
50#include "unibreakdef.h"
51
53/** 52/**
54 * Word break classes. This is a direct mapping of Table 3 of Unicode 53 * Word break classes. This is a direct mapping of Table 3 of Unicode
55 * Standard Annex 29, Revision 23. 54 * Standard Annex 29, Revision 23.
@@ -61,18 +60,18 @@ enum WordBreakClass
61 WBP_LF, 60 WBP_LF,
62 WBP_Newline, 61 WBP_Newline,
63 WBP_Extend, 62 WBP_Extend,
63 WBP_Regional_Indicator,
64 WBP_Format, 64 WBP_Format,
65 WBP_Katakana, 65 WBP_Katakana,
66 WBP_Hebrew_Letter,
66 WBP_ALetter, 67 WBP_ALetter,
68 WBP_Single_Quote,
69 WBP_Double_Quote,
67 WBP_MidNumLet, 70 WBP_MidNumLet,
68 WBP_MidLetter, 71 WBP_MidLetter,
69 WBP_MidNum, 72 WBP_MidNum,
70 WBP_Numeric, 73 WBP_Numeric,
71 WBP_ExtendNumLet, 74 WBP_ExtendNumLet,
72 WBP_Regional,
73 WBP_Hebrew,
74 WBP_Single,
75 WBP_Double,
76 WBP_Any 75 WBP_Any
77}; 76};
78 77