summaryrefslogtreecommitdiff
path: root/src/static_libs/libunibreak
diff options
context:
space:
mode:
authorTom Hacohen <tom@stosb.com>2014-01-21 16:41:06 +0000
committerTom Hacohen <tom@stosb.com>2014-01-21 16:41:06 +0000
commitcff1a9a59f40b1e83ed1db8145108cae53504d4f (patch)
tree524c58e21db031ab6acd6382dfdacb9c66d91a65 /src/static_libs/libunibreak
parentcc8fa1da451d588e4218a2b8f8d3eebb9b38890f (diff)
Synced libunibreak local copy with upstream.
This fixes T805.
Diffstat (limited to 'src/static_libs/libunibreak')
-rw-r--r--src/static_libs/libunibreak/AUTHORS5
-rw-r--r--src/static_libs/libunibreak/ChangeLog118
-rw-r--r--src/static_libs/libunibreak/NEWS7
-rw-r--r--src/static_libs/libunibreak/README79
-rw-r--r--src/static_libs/libunibreak/linebreak.c1236
-rw-r--r--src/static_libs/libunibreak/linebreak.h38
-rw-r--r--src/static_libs/libunibreak/linebreakdata.c19
-rw-r--r--src/static_libs/libunibreak/linebreakdef.c102
-rw-r--r--src/static_libs/libunibreak/linebreakdef.h152
-rw-r--r--src/static_libs/libunibreak/wordbreak.c674
-rw-r--r--src/static_libs/libunibreak/wordbreak.h32
-rw-r--r--src/static_libs/libunibreak/wordbreakdef.h63
12 files changed, 1392 insertions, 1133 deletions
diff --git a/src/static_libs/libunibreak/AUTHORS b/src/static_libs/libunibreak/AUTHORS
index 22786d4201..1b4f4b41d8 100644
--- a/src/static_libs/libunibreak/AUTHORS
+++ b/src/static_libs/libunibreak/AUTHORS
@@ -1,4 +1,5 @@
1Wu Yongwei. Designed and implemented liblinebreak. 1Wu Yongwei. Designed and implemented the original liblinebreak.
2Current maintainer of libunibreak.
2 3
3Nikolay Pultsin. Put forward the original requirements on liblinebreak, 4Nikolay Pultsin. Put forward the original requirements on liblinebreak,
4performed tests, and made a lot of suggestions on the initial versions. 5performed tests, and made a lot of suggestions on the initial versions.
@@ -6,3 +7,5 @@ performed tests, and made a lot of suggestions on the initial versions.
6Thomas Klausner. Autoconfiscated and libtoolized liblinebreak. 7Thomas Klausner. Autoconfiscated and libtoolized liblinebreak.
7 8
8Tom Hacohen. Added word boundaries support. 9Tom Hacohen. Added word boundaries support.
10
11Petr Filipsky. Added incremental processing for line-breaking.
diff --git a/src/static_libs/libunibreak/ChangeLog b/src/static_libs/libunibreak/ChangeLog
index 7d5e3b6391..feb830bd37 100644
--- a/src/static_libs/libunibreak/ChangeLog
+++ b/src/static_libs/libunibreak/ChangeLog
@@ -1,3 +1,116 @@
12013-11-14 Wu Yongwei <wuyongwei@gmail.com>
2
3 * src/linebreak.c: Add/update comments and doc comments.
4 (lb_init_breaking_class): Rename to treat_first_char.
5 (lb_classify_break_simple): Rename to get_lb_result_simple.
6 (lb_classify_break_lookup): Rename to get_lb_result_lookup.
7 (set_linebreaks): Remove an unused local variable.
8
92013-11-14 Wu Yongwei <wuyongwei@gmail.com>
10
11 * src/linebreakdata.c: Regenerate from LineBreak-6.3.0.txt.
12
132013-11-13 Wu Yongwei <wuyongwei@gmail.com>
14
15 Fix compilation problems under MSVC.
16 * src/linebreak.c (lb_init_breaking_class): Remove `inline'.
17 (lb_classify_break_simple): Ditto.
18 (lb_classify_break_lookup): Ditto.
19 (lb_classify_break_lookup): Move local variable declaration before
20 assertions.
21
222013-11-10 Wu Yongwei <wuyongwei@gmail.com>
23
24 * src/Makefile.am (libunibreak_la_LDFLAGS): Set the version-info to
25 `2:0:1'.
26
272013-11-10 Wu Yongwei <wuyongwei@gmail.com>
28
29 * src/linebreakdef.c: Adjust the order of code.
30 (lb_process_next_char): Make its return type int.
31 * src/linebreak.c (lb_process_next_char): Ditto.
32
332013-11-10 Wu Yongwei <wuyongwei@gmail.com>
34
35 * src/linebreak.c: Make minor changes in doc comments, formatting,
36 and names.
37 * src/linebreakdef.c: Ditto.
38
392013-11-10 Wu Yongwei <wuyongwei@gmail.com>
40
41 * AUTHORS: Add `Petr Filipsky'.
42
432013-11-10 Petr Filipsky <philodej@gmail.com>
44
45 Expose low level line-breaking API for incremental processing.
46 * src/linebreak.h: Add prototype declarations for
47 lb_init_break_context and lb_process_next_char.
48 (struct LineBreakContext): New struct.
49 * src/linebreak.h (LINEBREAK_UNDEFINED): New macro constant.
50 (lb_init_breaking_class): New static function.
51 (lb_classify_break_simple): New static function.
52 (lb_classify_break_lookup): New static function.
53 (lb_init_break_context): New function.
54 (lb_process_next_char): New function.
55 (set_linebreaks): Implement with lb_init_break_context and
56 lb_process_next_char.
57
582013-11-05 Petr Filipsky <philodej@gmail.com>
59
60 * src/wordbreakdef.h (enum WordBreakClass): Update according to
61 Table 3 of Unicode Standard Annex 29, Revision 23.
62
632013-09-30 Wu Yongwei <wuyongwei@gmail.com>
64
65 Update for the libunibreak 1.1 release.
66 * configure.ac (AC_INIT): Change the library version to `1.1'.
67 * Doxyfile (PROJECT_NUMBER): Change to `1.1'.
68 * Makefile.am (EXTRA_DIST): Add the `tools' directory.
69 * NEWS: Add information about libunibreak 1.1.
70 * src/Makefile.am (libunibreak_la_LDFLAGS): Set the version to `1:1'.
71
722013-09-29 Wu Yongwei <wuyongwei@gmail.com>
73
74 * src/Makefile.msvc: Modernize obsolete/deprecated MSVC options.
75
762013-09-28 Wu Yongwei <wuyongwei@gmail.com>
77
78 * src/wordbreak.c: Update copyright year and UAX information.
79 * src/wordbreak.h: Ditto.
80 * src/wordbreakdef.h: Ditto.
81
822013-09-28 Wu Yongwei <wuyongwei@gmail.com>
83
84 Fix the errors caused by libtool 2.4 (really annoying to the level
85 of WTF for making me add the foolish dependency on m4).
86 * Makefile.am (ACLOCAL_AMFLAGS): Add `-I m4'.
87 * bootstrap: Add a line to execute autoreconf.
88 * configure.ac (AC_CONFIG_MACRO_DIR): Set to `[m4]'.
89 * purge: Make it remove also the m4 directory.
90
912013-09-28 Wu Yongwei <wuyongwei@gmail.com>
92
93 * Makefile.am (EXTRA_DIST): Add `README.md'.
94
952013-09-28 Wu Yongwei <wuyongwei@gmail.com>
96
97 * README.md: New Markdown version of README.
98 * README: Remove.
99
1002013-05-13 Tom Hacohen <tom@stosb.com>
101
102 Update files according to UAX #29-21, for Unicode 6.2.0.
103 * README: Update the reference to UAX #29-21.
104 * src/wordbreak.c (set_wordbreaks): Update for WBP_Regional.
105 * src/wordbreakdef.h (WBP_Regional): New enumerator for the new
106 property `RI' as defined in UAX #29-21.
107 * src/wordbreakdata.c: Regenerate from WordBreakProperty-6.2.0.txt.
108
1092013-05-06 Wu Yongwei <wuyongwei@gmail.com>
110
111 * src/Makefile.am (install-exec-hook): Make sure `--disable-static'
112 can work (thanks to Eugene V. Lyubimkin).
113
12012-10-06 Wu Yongwei <wuyongwei@gmail.com> 1142012-10-06 Wu Yongwei <wuyongwei@gmail.com>
2 115
3 Update files according to UAX #14-30, for Unicode 6.2.0. 116 Update files according to UAX #14-30, for Unicode 6.2.0.
@@ -82,11 +195,12 @@
82 195
832012-08-11 Wu Yongwei <wuyongwei@gmail.com> 1962012-08-11 Wu Yongwei <wuyongwei@gmail.com>
84 197
198 Update for the libunibreak 1.0 release.
85 * configure.ac (AC_INIT): Change the library name and version to 199 * configure.ac (AC_INIT): Change the library name and version to
86 `libunibreak' and `1.0'. 200 `libunibreak' and `1.0'.
87 (AC_PROG_LN_S): New macro. 201 (AC_PROG_LN_S): New macro.
88 (AC_OUTPUT): Change to `libunibreak.pc'. 202 (AC_OUTPUT): Change to `libunibreak.pc'.
89 * Doxyfile: (PROJECT_NAME): Change to `libunibreak'. 203 * Doxyfile (PROJECT_NAME): Change to `libunibreak'.
90 (PROJECT_NUMBER): Change to `1.0'. 204 (PROJECT_NUMBER): Change to `1.0'.
91 * LICENCE: Add copyright information about Tom Hacohen. 205 * LICENCE: Add copyright information about Tom Hacohen.
92 * Makefile.am (lib_LTLIBRARIES): Change to `libunibreak.la'. 206 * Makefile.am (lib_LTLIBRARIES): Change to `libunibreak.la'.
@@ -96,7 +210,7 @@
96 a symlink to libunibreak.a. 210 a symlink to libunibreak.a.
97 * Makefile.msvc: Change the library name to `libunibreak', and the 211 * Makefile.msvc: Change the library name to `libunibreak', and the
98 output library to `unibreak.lib'. 212 output library to `unibreak.lib'.
99 * NEW: Add information about libunibreak 1.0. 213 * NEWS: Add information about libunibreak 1.0.
100 * README: Change the library name, and add information about word 214 * README: Change the library name, and add information about word
101 break. 215 break.
102 216
diff --git a/src/static_libs/libunibreak/NEWS b/src/static_libs/libunibreak/NEWS
index 581cab7cb8..3d3fcb809f 100644
--- a/src/static_libs/libunibreak/NEWS
+++ b/src/static_libs/libunibreak/NEWS
@@ -1,3 +1,10 @@
1New in libunibreak 1.1
2
3- Update the code and data to conform to Unicode 6.2.0
4- Update build files to support libtool 2.4
5- Adjust code structure
6- Make a few bug fixes
7
1New in libunibreak 1.0 8New in libunibreak 1.0
2 9
3- Add word breaking support 10- Add word breaking support
diff --git a/src/static_libs/libunibreak/README b/src/static_libs/libunibreak/README
index 39b41570f3..52cd7388b5 100644
--- a/src/static_libs/libunibreak/README
+++ b/src/static_libs/libunibreak/README
@@ -1,31 +1,30 @@
1 L I B U N I B R E A K 1LIBUNIBREAK
2 ===================== 2===========
3 3
4Overview 4Overview
5-------- 5--------
6 6
7This is the README file for libunibreak, an implementation of the line 7This is the README file for libunibreak, an implementation of the line
8breaking and word breaking algorithms as described in Unicode 8breaking and word breaking algorithms as described in [Unicode Standard
9Standard Annex 14 and Unicode Standard Annex 30, available at 9Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's
10 <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 10[home page] [3] for up-to-date information.
11 <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
12 11
13Check this URL for up-to-date information: 12 [1]: http://www.unicode.org/reports/tr14/tr14-30.html
14 <URL:https://github.com/adah1972/libunibreak> 13 [2]: http://www.unicode.org/reports/tr29/tr29-21.html
14 [3]: https://github.com/adah1972/libunibreak
15 15
16 16
17Licence 17Licence
18------- 18-------
19 19
20This library is released under an open-source licence, the zlib/libpng 20This library is released under an open-source licence, the zlib/libpng
21licence. Please check the file LICENCE for details. 21licence. Please check the file *LICENCE* for details.
22 22
23Apart from using the algorithm, part of the code is derived from the 23Apart from using the algorithm, part of the code is derived from the
24data provided under 24[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply.
25 <URL:http://www.unicode.org/Public/>
26 25
27And the Unicode Terms of Use may apply: 26 [4]: http://www.unicode.org/Public/
28 <URL:http://www.unicode.org/copyright.html> 27 [5]: http://www.unicode.org/copyright.html
29 28
30 29
31Installation 30Installation
@@ -33,56 +32,56 @@ Installation
33 32
34There are three ways to build the library: 33There are three ways to build the library:
35 34
361) On *NIX systems supported by the autoconfiscation tools, do the 351. On \*NIX systems supported by the autoconfiscation tools, do the
37 normal 36 normal
38 37
39 ./configure 38 ./configure
40 make 39 make
41 sudo make install 40 sudo make install
42 41
43 to build and install both the dynamic and static libraries. In 42 to build and install both the dynamic and static libraries. In
44 addition, one may 43 addition, one may
44 - type `make doc` to generate the doxygen documentation; or
45 - type `make linebreakdata` to regenerate *linebreakdata.c* from
46 *LineBreak.txt*.
47 - type `make wordbreakdata` to regenerate *wordbreakdata.c* from
48 *WordBreakProperty.txt*.
45 49
46 - type `make doc' to generate the doxygen documentation; or 502. On systems where GCC and Binutils are supported, one can type
47 - type `make linebreakdata' to regenerate linebreakdata.c from
48 LineBreak.txt.
49 - type `make wordbreakdata' to regenerate wordbreakdata.c from
50 WordBreakProperty.txt.
51 51
522) On systems where GCC and Binutils are supported, one can type 52 cd src
53 53 cp -p Makefile.gcc Makefile
54 cd src 54 make
55 cp -p Makefile.gcc Makefile
56 make
57 55
58 to build the static library. In addition, one may 56 to build the static library. In addition, one may
59 57 - type `make debug` or `make release` to explicitly generate the
60 - type `make debug' or `make release' to explicitly generate the
61 debug or release build; 58 debug or release build;
62 - type `make doc' to generate the doxygen documentation; or 59 - type `make doc` to generate the doxygen documentation; or
63 - type `make linebreakdata' to regenerate linebreakdata.c from 60 - type `make linebreakdata` to regenerate *linebreakdata.c* from
64 LineBreak.txt. 61 *LineBreak.txt*.
65 - type `make wordbreakdata' to regenerate wordbreakdata.c from 62 - type `make wordbreakdata` to regenerate *wordbreakdata.c* from
66 WordBreakProperty.txt. 63 *WordBreakProperty.txt*.
67 64
683) On Windows, apart from using method 1 (Cygwin/MSYS) and method 2 653. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
69 (MinGW), MSVC can also be used. Type 66 (MinGW), MSVC can also be used. Type
70 67
71 cd src 68 cd src
72 nmake -f Makefile.msvc 69 nmake -f Makefile.msvc
73 70
74 to build the static library. By default the debug release is built. 71 to build the static library. By default the debug release is built.
75 To build the release version 72 To build the release version
76 73
77 nmake -f Makefile.msvc CFG="libunibreak - Win32 Release" 74 nmake -f Makefile.msvc CFG="libunibreak - Win32 Release"
78 75
79 76
80Documentation 77Documentation
81------------- 78-------------
82 79
83Check the generated document doc/html/linebreak_8h.html and 80Check the generated document *doc/html/linebreak\_8h.html* and
84doc/html/wordbreak_8h.html in the downloaded file for the public 81*doc/html/wordbreak\_8h.html* in the downloaded file for the public
85interfaces exposed to applications. 82interfaces exposed to applications.
86 83
87 84
85<!--
88vim:autoindent:expandtab:formatoptions=tcqlmn:textwidth=72: 86vim:autoindent:expandtab:formatoptions=tcqlmn:textwidth=72:
87-->
diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c
index c1ea405883..9716df4860 100644
--- a/src/static_libs/libunibreak/linebreak.c
+++ b/src/static_libs/libunibreak/linebreak.c
@@ -1,10 +1,11 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -24,28 +25,29 @@
24 * distribution. 25 * distribution.
25 * 26 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14): 27 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/> 28 * <URL:http://www.unicode.org/reports/tr14/>
28 * 29 *
29 * When this library was designed, this annex was at Revision 19, for 30 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0: 31 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 33 *
33 * This library has been updated according to Revision 30, for 34 * This library has been updated according to Revision 30, for
34 * Unicode 6.2.0: 35 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
39 */ 40 */
40 41
41/** 42/**
42 * @file linebreak.c 43 * @file linebreak.c
43 * 44 *
44 * Implementation of the line breaking algorithm as described in Unicode 45 * Implementation of the line breaking algorithm as described in Unicode
45 * Standard Annex 14. 46 * Standard Annex 14.
46 * 47 *
47 * @version 2.3, 2012/10/06 48 * @version 2.5, 2013/11/14
48 * @author Wu Yongwei 49 * @author Wu Yongwei
50 * @author Petr Filipsky
49 */ 51 */
50 52
51#include <assert.h> 53#include <assert.h>
@@ -55,6 +57,11 @@
55#include "linebreakdef.h" 57#include "linebreakdef.h"
56 58
57/** 59/**
60 * Special value used internally to indicate an undefined break result.
61 */
62#define LINEBREAK_UNDEFINED -1
63
64/**
58 * Size of the second-level index to the line breaking properties. 65 * Size of the second-level index to the line breaking properties.
59 */ 66 */
60#define LINEBREAK_INDEX_SIZE 40 67#define LINEBREAK_INDEX_SIZE 40
@@ -70,11 +77,11 @@ const int linebreak_version = LINEBREAK_VERSION;
70 */ 77 */
71enum BreakAction 78enum BreakAction
72{ 79{
73 DIR_BRK, /**< Direct break opportunity */ 80 DIR_BRK, /**< Direct break opportunity */
74 IND_BRK, /**< Indirect break opportunity */ 81 IND_BRK, /**< Indirect break opportunity */
75 CMI_BRK, /**< Indirect break opportunity for combining marks */ 82 CMI_BRK, /**< Indirect break opportunity for combining marks */
76 CMP_BRK, /**< Prohibited break for combining marks */ 83 CMP_BRK, /**< Prohibited break for combining marks */
77 PRH_BRK /**< Prohibited break */ 84 PRH_BRK /**< Prohibited break */
78}; 85};
79 86
80/** 87/**
@@ -82,180 +89,180 @@ enum BreakAction
82 * Unicode Standard Annex 14, Revision 30. 89 * Unicode Standard Annex 14, Revision 30.
83 */ 90 */
84static enum BreakAction baTable[LBP_RI][LBP_RI] = { 91static enum BreakAction baTable[LBP_RI][LBP_RI] = {
85 { /* OP */ 92 { /* OP */
86 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 93 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
87 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 94 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
88 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 95 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
89 CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 96 CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
90 PRH_BRK }, 97 PRH_BRK },
91 { /* CL */ 98 { /* CL */
92 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, 99 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
93 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 100 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
94 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 101 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
95 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 102 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
96 DIR_BRK }, 103 DIR_BRK },
97 { /* CP */ 104 { /* CP */
98 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, 105 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
99 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 106 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
100 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 107 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
101 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 108 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
102 DIR_BRK }, 109 DIR_BRK },
103 { /* QU */ 110 { /* QU */
104 PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 111 PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
105 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 112 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
106 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 113 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
107 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 114 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
108 IND_BRK }, 115 IND_BRK },
109 { /* GL */ 116 { /* GL */
110 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 117 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
111 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 118 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
112 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 119 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
113 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 120 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
114 IND_BRK }, 121 IND_BRK },
115 { /* NS */ 122 { /* NS */
116 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 123 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
117 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 124 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
118 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 125 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
119 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 126 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
120 DIR_BRK }, 127 DIR_BRK },
121 { /* EX */ 128 { /* EX */
122 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 129 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
123 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 130 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
124 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 131 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
125 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 132 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
126 DIR_BRK }, 133 DIR_BRK },
127 { /* SY */ 134 { /* SY */
128 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 135 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
129 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, 136 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
130 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 137 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
131 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 138 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
132 DIR_BRK }, 139 DIR_BRK },
133 { /* IS */ 140 { /* IS */
134 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 141 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
135 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 142 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
136 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 143 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
137 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 144 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
138 DIR_BRK }, 145 DIR_BRK },
139 { /* PR */ 146 { /* PR */
140 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 147 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
141 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 148 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
142 IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 149 IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
143 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 150 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
144 DIR_BRK }, 151 DIR_BRK },
145 { /* PO */ 152 { /* PO */
146 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 153 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
147 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 154 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
148 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 155 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
149 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 156 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
150 DIR_BRK }, 157 DIR_BRK },
151 { /* NU */ 158 { /* NU */
152 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 159 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
153 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 160 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
154 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 161 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
155 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 162 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
156 DIR_BRK }, 163 DIR_BRK },
157 { /* AL */ 164 { /* AL */
158 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 165 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
159 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 166 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
160 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 167 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
161 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 168 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
162 DIR_BRK }, 169 DIR_BRK },
163 { /* HL */ 170 { /* HL */
164 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 171 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
165 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 172 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
166 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 173 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
167 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 174 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
168 DIR_BRK }, 175 DIR_BRK },
169 { /* ID */ 176 { /* ID */
170 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 177 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
171 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 178 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
172 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 179 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
173 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 180 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
174 DIR_BRK }, 181 DIR_BRK },
175 { /* IN */ 182 { /* IN */
176 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 183 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
177 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 184 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
178 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 185 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
179 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 186 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
180 DIR_BRK }, 187 DIR_BRK },
181 { /* HY */ 188 { /* HY */
182 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, 189 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
183 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, 190 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
184 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 191 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
185 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 192 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
186 DIR_BRK }, 193 DIR_BRK },
187 { /* BA */ 194 { /* BA */
188 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, 195 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
189 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 196 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
190 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 197 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
191 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 198 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
192 DIR_BRK }, 199 DIR_BRK },
193 { /* BB */ 200 { /* BB */
194 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 201 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
195 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 202 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
196 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 203 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
197 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 204 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
198 IND_BRK }, 205 IND_BRK },
199 { /* B2 */ 206 { /* B2 */
200 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 207 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
201 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 208 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
202 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, 209 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK,
203 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 210 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
204 DIR_BRK }, 211 DIR_BRK },
205 { /* ZW */ 212 { /* ZW */
206 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 213 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
207 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 214 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
208 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 215 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
209 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 216 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
210 DIR_BRK }, 217 DIR_BRK },
211 { /* CM */ 218 { /* CM */
212 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 219 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
213 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 220 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
214 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 221 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
215 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 222 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
216 DIR_BRK }, 223 DIR_BRK },
217 { /* WJ */ 224 { /* WJ */
218 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 225 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
219 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 226 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
220 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 227 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
221 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 228 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
222 IND_BRK }, 229 IND_BRK },
223 { /* H2 */ 230 { /* H2 */
224 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 231 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
225 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 232 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
226 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 233 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
227 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, 234 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
228 DIR_BRK }, 235 DIR_BRK },
229 { /* H3 */ 236 { /* H3 */
230 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 237 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
231 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 238 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
232 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 239 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
233 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, 240 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
234 DIR_BRK }, 241 DIR_BRK },
235 { /* JL */ 242 { /* JL */
236 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 243 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
237 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 244 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
238 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 245 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
239 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, 246 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
240 DIR_BRK }, 247 DIR_BRK },
241 { /* JV */ 248 { /* JV */
242 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 249 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
243 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 250 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
244 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 251 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
245 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, 252 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
246 DIR_BRK }, 253 DIR_BRK },
247 { /* JT */ 254 { /* JT */
248 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 255 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
249 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 256 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
250 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 257 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
251 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, 258 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
252 DIR_BRK }, 259 DIR_BRK },
253 { /* RI */ 260 { /* RI */
254 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 261 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
255 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 262 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
256 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 263 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
257 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 264 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
258 IND_BRK }, 265 IND_BRK },
259}; 266};
260 267
261/** 268/**
@@ -263,8 +270,8 @@ static enum BreakAction baTable[LBP_RI][LBP_RI] = {
263 */ 270 */
264struct LineBreakPropertiesIndex 271struct LineBreakPropertiesIndex
265{ 272{
266 utf32_t end; /**< End coding point */ 273 utf32_t end; /**< End coding point */
267 struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ 274 struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
268}; 275};
269 276
270/** 277/**
@@ -272,7 +279,7 @@ struct LineBreakPropertiesIndex
272 */ 279 */
273static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] = 280static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
274{ 281{
275 { 0xFFFFFFFF, lb_prop_default } 282 { 0xFFFFFFFF, lb_prop_default }
276}; 283};
277 284
278/** 285/**
@@ -283,84 +290,84 @@ static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
283 */ 290 */
284void init_linebreak(void) 291void init_linebreak(void)
285{ 292{
286 size_t i; 293 size_t i;
287 size_t iPropDefault; 294 size_t iPropDefault;
288 size_t len; 295 size_t len;
289 size_t step; 296 size_t step;
290 297
291 len = 0; 298 len = 0;
292 while (lb_prop_default[len].prop != LBP_Undefined) 299 while (lb_prop_default[len].prop != LBP_Undefined)
293 ++len; 300 ++len;
294 step = len / LINEBREAK_INDEX_SIZE; 301 step = len / LINEBREAK_INDEX_SIZE;
295 iPropDefault = 0; 302 iPropDefault = 0;
296 for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i) 303 for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
297 { 304 {
298 lb_prop_index[i].lbp = lb_prop_default + iPropDefault; 305 lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
299 iPropDefault += step; 306 iPropDefault += step;
300 lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1; 307 lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
301 } 308 }
302 lb_prop_index[--i].end = 0xFFFFFFFF; 309 lb_prop_index[--i].end = 0xFFFFFFFF;
303} 310}
304 311
305/** 312/**
306 * Gets the language-specific line breaking properties. 313 * Gets the language-specific line breaking properties.
307 * 314 *
308 * @param lang language of the text 315 * @param lang language of the text
309 * @return pointer to the language-specific line breaking 316 * @return pointer to the language-specific line breaking
310 * properties array if found; \c NULL otherwise 317 * properties array if found; \c NULL otherwise
311 */ 318 */
312static struct LineBreakProperties *get_lb_prop_lang(const char *lang) 319static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
313{ 320{
314 struct LineBreakPropertiesLang *lbplIter; 321 struct LineBreakPropertiesLang *lbplIter;
315 if (lang != NULL) 322 if (lang != NULL)
316 { 323 {
317 for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter) 324 for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
318 { 325 {
319 if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0) 326 if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
320 { 327 {
321 return lbplIter->lbp; 328 return lbplIter->lbp;
322 } 329 }
323 } 330 }
324 } 331 }
325 return NULL; 332 return NULL;
326} 333}
327 334
328/** 335/**
329 * Gets the line breaking class of a character from a line breaking 336 * Gets the line breaking class of a character from a line breaking
330 * properties array. 337 * properties array.
331 * 338 *
332 * @param ch character to check 339 * @param ch character to check
333 * @param lbp pointer to the line breaking properties array 340 * @param lbp pointer to the line breaking properties array
334 * @return the line breaking class if found; \c LBP_XX otherwise 341 * @return the line breaking class if found; \c LBP_XX otherwise
335 */ 342 */
336static enum LineBreakClass get_char_lb_class( 343static enum LineBreakClass get_char_lb_class(
337 utf32_t ch, 344 utf32_t ch,
338 struct LineBreakProperties *lbp) 345 struct LineBreakProperties *lbp)
339{ 346{
340 while (lbp->prop != LBP_Undefined && ch >= lbp->start) 347 while (lbp->prop != LBP_Undefined && ch >= lbp->start)
341 { 348 {
342 if (ch <= lbp->end) 349 if (ch <= lbp->end)
343 return lbp->prop; 350 return lbp->prop;
344 ++lbp; 351 ++lbp;
345 } 352 }
346 return LBP_XX; 353 return LBP_XX;
347} 354}
348 355
349/** 356/**
350 * Gets the line breaking class of a character from the default line 357 * Gets the line breaking class of a character from the default line
351 * breaking properties array. 358 * breaking properties array.
352 * 359 *
353 * @param ch character to check 360 * @param ch character to check
354 * @return the line breaking class if found; \c LBP_XX otherwise 361 * @return the line breaking class if found; \c LBP_XX otherwise
355 */ 362 */
356static enum LineBreakClass get_char_lb_class_default( 363static enum LineBreakClass get_char_lb_class_default(
357 utf32_t ch) 364 utf32_t ch)
358{ 365{
359 size_t i = 0; 366 size_t i = 0;
360 while (ch > lb_prop_index[i].end) 367 while (ch > lb_prop_index[i].end)
361 ++i; 368 ++i;
362 assert(i < LINEBREAK_INDEX_SIZE); 369 assert(i < LINEBREAK_INDEX_SIZE);
363 return get_char_lb_class(ch, lb_prop_index[i].lbp); 370 return get_char_lb_class(ch, lb_prop_index[i].lbp);
364} 371}
365 372
366/** 373/**
@@ -369,30 +376,30 @@ static enum LineBreakClass get_char_lb_class_default(
369 * and then the default data if there is no language-specific property 376 * and then the default data if there is no language-specific property
370 * available for the character. 377 * available for the character.
371 * 378 *
372 * @param ch character to check 379 * @param ch character to check
373 * @param lbpLang pointer to the language-specific line breaking 380 * @param lbpLang pointer to the language-specific line breaking
374 * properties array 381 * properties array
375 * @return the line breaking class if found; \c LBP_XX 382 * @return the line breaking class if found; \c LBP_XX
376 * otherwise 383 * otherwise
377 */ 384 */
378static enum LineBreakClass get_char_lb_class_lang( 385static enum LineBreakClass get_char_lb_class_lang(
379 utf32_t ch, 386 utf32_t ch,
380 struct LineBreakProperties *lbpLang) 387 struct LineBreakProperties *lbpLang)
381{ 388{
382 enum LineBreakClass lbcResult; 389 enum LineBreakClass lbcResult;
383 390
384 /* Find the language-specific line breaking class for a character */ 391 /* Find the language-specific line breaking class for a character */
385 if (lbpLang) 392 if (lbpLang)
386 { 393 {
387 lbcResult = get_char_lb_class(ch, lbpLang); 394 lbcResult = get_char_lb_class(ch, lbpLang);
388 if (lbcResult != LBP_XX) 395 if (lbcResult != LBP_XX)
389 return lbcResult; 396 return lbcResult;
390 } 397 }
391 398
392 /* Find the generic language-specific line breaking class, if no 399 /* Find the generic language-specific line breaking class, if no
393 * language context is provided, or language-specific data are not 400 * language context is provided, or language-specific data are not
394 * available for the specific character in the specified language */ 401 * available for the specific character in the specified language */
395 return get_char_lb_class_default(ch); 402 return get_char_lb_class_default(ch);
396} 403}
397 404
398/** 405/**
@@ -400,40 +407,214 @@ static enum LineBreakClass get_char_lb_class_lang(
400 * characters. They are treated in a simplistic way in this 407 * characters. They are treated in a simplistic way in this
401 * implementation. 408 * implementation.
402 * 409 *
403 * @param lbc line breaking class to resolve 410 * @param lbc line breaking class to resolve
404 * @param lang language of the text 411 * @param lang language of the text
405 * @return the resolved line breaking class 412 * @return the resolved line breaking class
406 */ 413 */
407static enum LineBreakClass resolve_lb_class( 414static enum LineBreakClass resolve_lb_class(
408 enum LineBreakClass lbc, 415 enum LineBreakClass lbc,
409 const char *lang) 416 const char *lang)
417{
418 switch (lbc)
419 {
420 case LBP_AI:
421 if (lang != NULL &&
422 (strncmp(lang, "zh", 2) == 0 || /* Chinese */
423 strncmp(lang, "ja", 2) == 0 || /* Japanese */
424 strncmp(lang, "ko", 2) == 0)) /* Korean */
425 {
426 return LBP_ID;
427 }
428 else
429 {
430 return LBP_AL;
431 }
432 case LBP_CJ:
433 /* Simplified for `normal' line breaking. See
434 * <url:http://www.unicode.org/reports/tr14/tr14-30.html#CJ>
435 * for details. */
436 return LBP_ID;
437 case LBP_SA:
438 case LBP_SG:
439 case LBP_XX:
440 return LBP_AL;
441 default:
442 return lbc;
443 }
444}
445
446/**
447 * Treats specially for the first character in a line.
448 *
449 * @param[in,out] lbpCtx pointer to the line breaking context
450 * @pre \a lbpCtx->lbcCur has a valid line break class
451 * @post \a lbpCtx->lbcCur has the updated line break class
452 */
453static void treat_first_char(
454 struct LineBreakContext* lbpCtx)
455{
456 switch (lbpCtx->lbcCur)
457 {
458 case LBP_LF:
459 case LBP_NL:
460 lbpCtx->lbcCur = LBP_BK; /* Rule LB5 */
461 break;
462 case LBP_CB:
463 lbpCtx->lbcCur = LBP_BA; /* Rule LB20 */
464 break;
465 case LBP_SP:
466 lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */
467 break;
468 default:
469 break;
470 }
471}
472
473/**
474 * Tries telling the line break opportunity by simple rules.
475 *
476 * @param[in,out] lbpCtx pointer to the line breaking context
477 * @pre \a lbpCtx->lbcCur has the current line break
478 * class; and \a lbpCtx->lbcNew has the line
479 * break class for the next character
480 * @post \a lbpCtx->lbcCur has the updated line break
481 * class
482 * @return break result, one of #LINEBREAK_MUSTBREAK,
483 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
484 * if identified; or #LINEBREAK_UNDEFINED if
485 * table lookup is needed
486 */
487static int get_lb_result_simple(
488 struct LineBreakContext* lbpCtx)
489{
490 if (lbpCtx->lbcCur == LBP_BK
491 || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF))
492 {
493 return LINEBREAK_MUSTBREAK; /* Rules LB4 and LB5 */
494 }
495
496 switch (lbpCtx->lbcNew)
497 {
498 case LBP_SP:
499 return LINEBREAK_NOBREAK; /* Rule LB7; no change to lbcCur */
500 case LBP_BK:
501 case LBP_LF:
502 case LBP_NL:
503 lbpCtx->lbcCur = LBP_BK; /* Mandatory break after */
504 return LINEBREAK_NOBREAK; /* Rule LB6 */
505 case LBP_CR:
506 lbpCtx->lbcCur = LBP_CR;
507 return LINEBREAK_NOBREAK; /* Rule LB6 */
508 case LBP_CB:
509 lbpCtx->lbcCur = LBP_BA;
510 return LINEBREAK_ALLOWBREAK; /* Rule LB20 */
511 default:
512 return LINEBREAK_UNDEFINED; /* Table lookup is needed */
513 }
514}
515
516/**
517 * Tells the line break opportunity by table lookup.
518 *
519 * @param[in,out] lbpCtx pointer to the line breaking context
520 * @pre \a lbpCtx->lbcCur has the current line break
521 * class; \a lbpCtx->lbcLast has the line break
522 * class for the last character; and \a
523 * lbcCur->lbcNew has the line break class for
524 * the next character
525 * @post \a lbpCtx->lbcCur has the updated line break
526 * class
527 * @return break result, one of #LINEBREAK_MUSTBREAK,
528 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
529 */
530static int get_lb_result_lookup(
531 struct LineBreakContext* lbpCtx)
532{
533 /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not
534 * yet implemented below. */
535 int brk = LINEBREAK_UNDEFINED;
536 assert(lbpCtx->lbcCur <= LBP_JT);
537 assert(lbpCtx->lbcNew <= LBP_JT);
538 switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1])
539 {
540 case DIR_BRK:
541 brk = LINEBREAK_ALLOWBREAK;
542 break;
543 case CMI_BRK:
544 case IND_BRK:
545 brk = (lbpCtx->lbcLast == LBP_SP)
546 ? LINEBREAK_ALLOWBREAK
547 : LINEBREAK_NOBREAK;
548 break;
549 case CMP_BRK:
550 brk = LINEBREAK_NOBREAK;
551 if (lbpCtx->lbcLast != LBP_SP)
552 return brk; /* Do not update lbcCur */
553 break;
554 case PRH_BRK:
555 brk = LINEBREAK_NOBREAK;
556 break;
557 }
558 lbpCtx->lbcCur = lbpCtx->lbcNew;
559 return brk;
560}
561
562/**
563 * Initializes line breaking context for a given language.
564 *
565 * @param[in,out] lbpCtx pointer to the line breaking context
566 * @param[in] ch the first character to process
567 * @param[in] lang language of the input
568 * @post the line breaking context is initialized
569 */
570void lb_init_break_context(
571 struct LineBreakContext* lbpCtx,
572 utf32_t ch,
573 const char* lang)
574{
575 lbpCtx->lang = lang;
576 lbpCtx->lbpLang = get_lb_prop_lang(lang);
577 lbpCtx->lbcLast = LBP_Undefined;
578 lbpCtx->lbcNew = LBP_Undefined;
579 lbpCtx->lbcCur = resolve_lb_class(
580 get_char_lb_class_lang(ch, lbpCtx->lbpLang),
581 lbpCtx->lang);
582 treat_first_char(lbpCtx);
583}
584
585/**
586 * Updates LineBreakingContext for the next code point and returns
587 * the detected break.
588 *
589 * @param[in,out] lbpCtx pointer to the line breaking context
590 * @param[in] ch Unicode code point
591 * @return break result, one of #LINEBREAK_MUSTBREAK,
592 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
593 * @post the line breaking context is updated
594 */
595int lb_process_next_char(
596 struct LineBreakContext* lbpCtx,
597 utf32_t ch )
410{ 598{
411 switch (lbc) 599 int brk;
412 { 600
413 case LBP_AI: 601 lbpCtx->lbcLast = lbpCtx->lbcNew;
414 if (lang != NULL && 602 lbpCtx->lbcNew = get_char_lb_class_lang(ch, lbpCtx->lbpLang);
415 (strncmp(lang, "zh", 2) == 0 || /* Chinese */ 603 brk = get_lb_result_simple(lbpCtx);
416 strncmp(lang, "ja", 2) == 0 || /* Japanese */ 604 switch (brk)
417 strncmp(lang, "ko", 2) == 0)) /* Korean */ 605 {
418 { 606 case LINEBREAK_MUSTBREAK:
419 return LBP_ID; 607 lbpCtx->lbcCur = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
420 } 608 treat_first_char(lbpCtx);
421 else 609 break;
422 { 610 case LINEBREAK_UNDEFINED:
423 return LBP_AL; 611 lbpCtx->lbcNew = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
424 } 612 brk = get_lb_result_lookup(lbpCtx);
425 case LBP_CJ: 613 break;
426 /* Simplified for `normal' line breaking. See 614 default:
427 * <url:http://www.unicode.org/reports/tr14/tr14-28.html#CJ> 615 break;
428 * for details. */ 616 }
429 return LBP_ID; 617 return brk;
430 case LBP_SA:
431 case LBP_SG:
432 case LBP_XX:
433 return LBP_AL;
434 default:
435 return lbc;
436 }
437} 618}
438 619
439/** 620/**
@@ -441,59 +622,59 @@ static enum LineBreakClass resolve_lb_class(
441 * be advanced to the next complete character, unless the end of string 622 * be advanced to the next complete character, unless the end of string
442 * is reached in the middle of a UTF-8 sequence. 623 * is reached in the middle of a UTF-8 sequence.
443 * 624 *
444 * @param[in] s input UTF-8 string 625 * @param[in] s input UTF-8 string
445 * @param[in] len length of the string in bytes 626 * @param[in] len length of the string in bytes
446 * @param[in,out] ip pointer to the index 627 * @param[in,out] ip pointer to the index
447 * @return the Unicode character beginning at the index; or 628 * @return the Unicode character beginning at the index; or
448 * #EOS if end of input is encountered 629 * #EOS if end of input is encountered
449 */ 630 */
450utf32_t lb_get_next_char_utf8( 631utf32_t lb_get_next_char_utf8(
451 const utf8_t *s, 632 const utf8_t *s,
452 size_t len, 633 size_t len,
453 size_t *ip) 634 size_t *ip)
454{ 635{
455 utf8_t ch; 636 utf8_t ch;
456 utf32_t res; 637 utf32_t res;
457 638
458 assert(*ip <= len); 639 assert(*ip <= len);
459 if (*ip == len) 640 if (*ip == len)
460 return EOS; 641 return EOS;
461 ch = s[*ip]; 642 ch = s[*ip];
462 643
463 if (ch < 0xC2 || ch > 0xF4) 644 if (ch < 0xC2 || ch > 0xF4)
464 { /* One-byte sequence, tail (should not occur), or invalid */ 645 { /* One-byte sequence, tail (should not occur), or invalid */
465 *ip += 1; 646 *ip += 1;
466 return ch; 647 return ch;
467 } 648 }
468 else if (ch < 0xE0) 649 else if (ch < 0xE0)
469 { /* Two-byte sequence */ 650 { /* Two-byte sequence */
470 if (*ip + 2 > len) 651 if (*ip + 2 > len)
471 return EOS; 652 return EOS;
472 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F); 653 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
473 *ip += 2; 654 *ip += 2;
474 return res; 655 return res;
475 } 656 }
476 else if (ch < 0xF0) 657 else if (ch < 0xF0)
477 { /* Three-byte sequence */ 658 { /* Three-byte sequence */
478 if (*ip + 3 > len) 659 if (*ip + 3 > len)
479 return EOS; 660 return EOS;
480 res = ((ch & 0x0F) << 12) + 661 res = ((ch & 0x0F) << 12) +
481 ((s[*ip + 1] & 0x3F) << 6) + 662 ((s[*ip + 1] & 0x3F) << 6) +
482 ((s[*ip + 2] & 0x3F)); 663 ((s[*ip + 2] & 0x3F));
483 *ip += 3; 664 *ip += 3;
484 return res; 665 return res;
485 } 666 }
486 else 667 else
487 { /* Four-byte sequence */ 668 { /* Four-byte sequence */
488 if (*ip + 4 > len) 669 if (*ip + 4 > len)
489 return EOS; 670 return EOS;
490 res = ((ch & 0x07) << 18) + 671 res = ((ch & 0x07) << 18) +
491 ((s[*ip + 1] & 0x3F) << 12) + 672 ((s[*ip + 1] & 0x3F) << 12) +
492 ((s[*ip + 2] & 0x3F) << 6) + 673 ((s[*ip + 2] & 0x3F) << 6) +
493 ((s[*ip + 3] & 0x3F)); 674 ((s[*ip + 3] & 0x3F));
494 *ip += 4; 675 *ip += 4;
495 return res; 676 return res;
496 } 677 }
497} 678}
498 679
499/** 680/**
@@ -501,263 +682,174 @@ utf32_t lb_get_next_char_utf8(
501 * be advanced to the next complete character, unless the end of string 682 * be advanced to the next complete character, unless the end of string
502 * is reached in the middle of a UTF-16 surrogate pair. 683 * is reached in the middle of a UTF-16 surrogate pair.
503 * 684 *
504 * @param[in] s input UTF-16 string 685 * @param[in] s input UTF-16 string
505 * @param[in] len length of the string in words 686 * @param[in] len length of the string in words
506 * @param[in,out] ip pointer to the index 687 * @param[in,out] ip pointer to the index
507 * @return the Unicode character beginning at the index; or 688 * @return the Unicode character beginning at the index; or
508 * #EOS if end of input is encountered 689 * #EOS if end of input is encountered
509 */ 690 */
510utf32_t lb_get_next_char_utf16( 691utf32_t lb_get_next_char_utf16(
511 const utf16_t *s, 692 const utf16_t *s,
512 size_t len, 693 size_t len,
513 size_t *ip) 694 size_t *ip)
514{ 695{
515 utf16_t ch; 696 utf16_t ch;
516 697
517 assert(*ip <= len); 698 assert(*ip <= len);
518 if (*ip == len) 699 if (*ip == len)
519 return EOS; 700 return EOS;
520 ch = s[(*ip)++]; 701 ch = s[(*ip)++];
521 702
522 if (ch < 0xD800 || ch > 0xDBFF) 703 if (ch < 0xD800 || ch > 0xDBFF)
523 { /* If the character is not a high surrogate */ 704 { /* If the character is not a high surrogate */
524 return ch; 705 return ch;
525 } 706 }
526 if (*ip == len) 707 if (*ip == len)
527 { /* If the input ends here (an error) */ 708 { /* If the input ends here (an error) */
528 --(*ip); 709 --(*ip);
529 return EOS; 710 return EOS;
530 } 711 }
531 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF) 712 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
532 { /* If the next character is not the low surrogate (an error) */ 713 { /* If the next character is not the low surrogate (an error) */
533 return ch; 714 return ch;
534 } 715 }
535 /* Return the constructed character and advance the index again */ 716 /* Return the constructed character and advance the index again */
536 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000; 717 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
537} 718}
538 719
539/** 720/**
540 * Gets the next Unicode character in a UTF-32 sequence. The index will 721 * Gets the next Unicode character in a UTF-32 sequence. The index will
541 * be advanced to the next character. 722 * be advanced to the next character.
542 * 723 *
543 * @param[in] s input UTF-32 string 724 * @param[in] s input UTF-32 string
544 * @param[in] len length of the string in dwords 725 * @param[in] len length of the string in dwords
545 * @param[in,out] ip pointer to the index 726 * @param[in,out] ip pointer to the index
546 * @return the Unicode character beginning at the index; or 727 * @return the Unicode character beginning at the index; or
547 * #EOS if end of input is encountered 728 * #EOS if end of input is encountered
548 */ 729 */
549utf32_t lb_get_next_char_utf32( 730utf32_t lb_get_next_char_utf32(
550 const utf32_t *s, 731 const utf32_t *s,
551 size_t len, 732 size_t len,
552 size_t *ip) 733 size_t *ip)
553{ 734{
554 assert(*ip <= len); 735 assert(*ip <= len);
555 if (*ip == len) 736 if (*ip == len)
556 return EOS; 737 return EOS;
557 return s[(*ip)++]; 738 return s[(*ip)++];
558} 739}
559 740
560/** 741/**
561 * Sets the line breaking information for a generic input string. 742 * Sets the line breaking information for a generic input string.
562 * 743 *
563 * @param[in] s input string 744 * @param[in] s input string
564 * @param[in] len length of the input 745 * @param[in] len length of the input
565 * @param[in] lang language of the input 746 * @param[in] lang language of the input
566 * @param[out] brks pointer to the output breaking data, 747 * @param[out] brks pointer to the output breaking data,
567 * containing #LINEBREAK_MUSTBREAK, 748 * containing #LINEBREAK_MUSTBREAK,
568 * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK, 749 * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
569 * or #LINEBREAK_INSIDEACHAR 750 * or #LINEBREAK_INSIDEACHAR
570 * @param[in] get_next_char function to get the next UTF-32 character 751 * @param[in] get_next_char function to get the next UTF-32 character
571 */ 752 */
572void set_linebreaks( 753void set_linebreaks(
573 const void *s, 754 const void *s,
574 size_t len, 755 size_t len,
575 const char *lang, 756 const char *lang,
576 char *brks, 757 char *brks,
577 get_next_char_t get_next_char) 758 get_next_char_t get_next_char)
578{ 759{
579 utf32_t ch; 760 utf32_t ch;
580 enum LineBreakClass lbcCur; 761 struct LineBreakContext lbCtx;
581 enum LineBreakClass lbcNew; 762 size_t posCur = 0;
582 enum LineBreakClass lbcLast; 763 size_t posLast = 0;
583 struct LineBreakProperties *lbpLang; 764
584 size_t posCur = 0; 765 --posLast; /* To be ++'d later */
585 size_t posLast = 0; 766 ch = get_next_char(s, len, &posCur);
586 767 if (ch == EOS)
587 --posLast; /* To be ++'d later */ 768 return;
588 ch = get_next_char(s, len, &posCur); 769 lb_init_break_context(&lbCtx, ch, lang);
589 if (ch == EOS) 770
590 return; 771 /* Process a line till an explicit break or end of string */
591 lbpLang = get_lb_prop_lang(lang); 772 for (;;)
592 lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang); 773 {
593 lbcNew = LBP_Undefined; 774 for (++posLast; posLast < posCur - 1; ++posLast)
594 775 {
595nextline: 776 brks[posLast] = LINEBREAK_INSIDEACHAR;
596 777 }
597 /* Special treatment for the first character */ 778 assert(posLast == posCur - 1);
598 switch (lbcCur) 779 ch = get_next_char(s, len, &posCur);
599 { 780 if (ch == EOS)
600 case LBP_LF: 781 break;
601 case LBP_NL: 782 brks[posLast] = lb_process_next_char(&lbCtx, ch);
602 lbcCur = LBP_BK; 783 }
603 break; 784
604 case LBP_CB: 785 assert(posLast == posCur - 1 && posCur <= len);
605 lbcCur = LBP_BA; 786 /* Break after the last character */
606 break; 787 brks[posLast] = LINEBREAK_MUSTBREAK;
607 case LBP_SP: 788 /* When the input contains incomplete sequences */
608 lbcCur = LBP_WJ; 789 while (posCur < len)
609 break; 790 {
610 default: 791 brks[posCur++] = LINEBREAK_INSIDEACHAR;
611 break; 792 }
612 }
613
614 /* Process a line till an explicit break or end of string */
615 for (;;)
616 {
617 for (++posLast; posLast < posCur - 1; ++posLast)
618 {
619 brks[posLast] = LINEBREAK_INSIDEACHAR;
620 }
621 assert(posLast == posCur - 1);
622 lbcLast = lbcNew;
623 ch = get_next_char(s, len, &posCur);
624 if (ch == EOS)
625 break;
626 lbcNew = get_char_lb_class_lang(ch, lbpLang);
627 if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
628 {
629 brks[posLast] = LINEBREAK_MUSTBREAK;
630 lbcCur = resolve_lb_class(lbcNew, lang);
631 goto nextline;
632 }
633
634 switch (lbcNew)
635 {
636 case LBP_SP:
637 brks[posLast] = LINEBREAK_NOBREAK;
638 continue;
639 case LBP_BK:
640 case LBP_LF:
641 case LBP_NL:
642 brks[posLast] = LINEBREAK_NOBREAK;
643 lbcCur = LBP_BK;
644 continue;
645 case LBP_CR:
646 brks[posLast] = LINEBREAK_NOBREAK;
647 lbcCur = LBP_CR;
648 continue;
649 case LBP_CB:
650 brks[posLast] = LINEBREAK_ALLOWBREAK;
651 lbcCur = LBP_BA;
652 continue;
653 default:
654 break;
655 }
656
657 lbcNew = resolve_lb_class(lbcNew, lang);
658
659 /* TODO: LB21a, as introduced by Revision 28 of UAX#14, is not
660 * yet implemented below. */
661
662 assert(lbcCur <= LBP_JT);
663 assert(lbcNew <= LBP_JT);
664 switch (baTable[lbcCur - 1][lbcNew - 1])
665 {
666 case DIR_BRK:
667 brks[posLast] = LINEBREAK_ALLOWBREAK;
668 break;
669 case CMI_BRK:
670 case IND_BRK:
671 if (lbcLast == LBP_SP)
672 {
673 brks[posLast] = LINEBREAK_ALLOWBREAK;
674 }
675 else
676 {
677 brks[posLast] = LINEBREAK_NOBREAK;
678 }
679 break;
680 case CMP_BRK:
681 brks[posLast] = LINEBREAK_NOBREAK;
682 if (lbcLast != LBP_SP)
683 continue;
684 break;
685 case PRH_BRK:
686 brks[posLast] = LINEBREAK_NOBREAK;
687 break;
688 }
689
690 lbcCur = lbcNew;
691 }
692
693 assert(posLast == posCur - 1 && posCur <= len);
694 /* Break after the last character */
695 brks[posLast] = LINEBREAK_MUSTBREAK;
696 /* When the input contains incomplete sequences */
697 while (posCur < len)
698 {
699 brks[posCur++] = LINEBREAK_INSIDEACHAR;
700 }
701} 793}
702 794
703/** 795/**
704 * Sets the line breaking information for a UTF-8 input string. 796 * Sets the line breaking information for a UTF-8 input string.
705 * 797 *
706 * @param[in] s input UTF-8 string 798 * @param[in] s input UTF-8 string
707 * @param[in] len length of the input 799 * @param[in] len length of the input
708 * @param[in] lang language of the input 800 * @param[in] lang language of the input
709 * @param[out] brks pointer to the output breaking data, containing 801 * @param[out] brks pointer to the output breaking data, containing
710 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 802 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
711 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 803 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
712 */ 804 */
713void set_linebreaks_utf8( 805void set_linebreaks_utf8(
714 const utf8_t *s, 806 const utf8_t *s,
715 size_t len, 807 size_t len,
716 const char *lang, 808 const char *lang,
717 char *brks) 809 char *brks)
718{ 810{
719 set_linebreaks(s, len, lang, brks, 811 set_linebreaks(s, len, lang, brks,
720 (get_next_char_t)lb_get_next_char_utf8); 812 (get_next_char_t)lb_get_next_char_utf8);
721} 813}
722 814
723/** 815/**
724 * Sets the line breaking information for a UTF-16 input string. 816 * Sets the line breaking information for a UTF-16 input string.
725 * 817 *
726 * @param[in] s input UTF-16 string 818 * @param[in] s input UTF-16 string
727 * @param[in] len length of the input 819 * @param[in] len length of the input
728 * @param[in] lang language of the input 820 * @param[in] lang language of the input
729 * @param[out] brks pointer to the output breaking data, containing 821 * @param[out] brks pointer to the output breaking data, containing
730 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 822 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
731 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 823 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
732 */ 824 */
733void set_linebreaks_utf16( 825void set_linebreaks_utf16(
734 const utf16_t *s, 826 const utf16_t *s,
735 size_t len, 827 size_t len,
736 const char *lang, 828 const char *lang,
737 char *brks) 829 char *brks)
738{ 830{
739 set_linebreaks(s, len, lang, brks, 831 set_linebreaks(s, len, lang, brks,
740 (get_next_char_t)lb_get_next_char_utf16); 832 (get_next_char_t)lb_get_next_char_utf16);
741} 833}
742 834
743/** 835/**
744 * Sets the line breaking information for a UTF-32 input string. 836 * Sets the line breaking information for a UTF-32 input string.
745 * 837 *
746 * @param[in] s input UTF-32 string 838 * @param[in] s input UTF-32 string
747 * @param[in] len length of the input 839 * @param[in] len length of the input
748 * @param[in] lang language of the input 840 * @param[in] lang language of the input
749 * @param[out] brks pointer to the output breaking data, containing 841 * @param[out] brks pointer to the output breaking data, containing
750 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 842 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
751 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 843 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
752 */ 844 */
753void set_linebreaks_utf32( 845void set_linebreaks_utf32(
754 const utf32_t *s, 846 const utf32_t *s,
755 size_t len, 847 size_t len,
756 const char *lang, 848 const char *lang,
757 char *brks) 849 char *brks)
758{ 850{
759 set_linebreaks(s, len, lang, brks, 851 set_linebreaks(s, len, lang, brks,
760 (get_next_char_t)lb_get_next_char_utf32); 852 (get_next_char_t)lb_get_next_char_utf32);
761} 853}
762 854
763/** 855/**
@@ -767,21 +859,21 @@ void set_linebreaks_utf32(
767 * complicated cases involving combining marks, spaces, etc. cannot be 859 * complicated cases involving combining marks, spaces, etc. cannot be
768 * correctly processed. 860 * correctly processed.
769 * 861 *
770 * @param char1 the first Unicode character 862 * @param char1 the first Unicode character
771 * @param char2 the second Unicode character 863 * @param char2 the second Unicode character
772 * @param lang language of the input 864 * @param lang language of the input
773 * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 865 * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
774 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 866 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
775 */ 867 */
776int is_line_breakable( 868int is_line_breakable(
777 utf32_t char1, 869 utf32_t char1,
778 utf32_t char2, 870 utf32_t char2,
779 const char* lang) 871 const char* lang)
780{ 872{
781 utf32_t s[2]; 873 utf32_t s[2];
782 char brks[2]; 874 char brks[2];
783 s[0] = char1; 875 s[0] = char1;
784 s[1] = char2; 876 s[1] = char2;
785 set_linebreaks_utf32(s, 2, lang, brks); 877 set_linebreaks_utf32(s, 2, lang, brks);
786 return brks[0]; 878 return brks[0];
787} 879}
diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h
index 288ef1b352..94fbca0f9f 100644
--- a/src/static_libs/libunibreak/linebreak.h
+++ b/src/static_libs/libunibreak/linebreak.h
@@ -1,4 +1,4 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
@@ -24,27 +24,27 @@
24 * distribution. 24 * distribution.
25 * 25 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14): 26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/> 27 * <URL:http://www.unicode.org/reports/tr14/>
28 * 28 *
29 * When this library was designed, this annex was at Revision 19, for 29 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0: 30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 32 *
33 * This library has been updated according to Revision 30, for 33 * This library has been updated according to Revision 30, for
34 * Unicode 6.2.0: 34 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
39 */ 39 */
40 40
41/** 41/**
42 * @file linebreak.h 42 * @file linebreak.h
43 * 43 *
44 * Header file for the line breaking algorithm. 44 * Header file for the line breaking algorithm.
45 * 45 *
46 * @version 2.2, 2012/10/06 46 * @version 2.2, 2012/10/06
47 * @author Wu Yongwei 47 * @author Wu Yongwei
48 */ 48 */
49 49
50#ifndef LINEBREAK_H 50#ifndef LINEBREAK_H
@@ -56,28 +56,28 @@
56extern "C" { 56extern "C" {
57#endif 57#endif
58 58
59#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */ 59#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */
60extern const int linebreak_version; 60extern const int linebreak_version;
61 61
62#ifndef LINEBREAK_UTF_TYPES_DEFINED 62#ifndef LINEBREAK_UTF_TYPES_DEFINED
63#define LINEBREAK_UTF_TYPES_DEFINED 63#define LINEBREAK_UTF_TYPES_DEFINED
64typedef unsigned char utf8_t; /**< Type for UTF-8 data points */ 64typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
65typedef unsigned short utf16_t; /**< Type for UTF-16 data points */ 65typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
66typedef unsigned int utf32_t; /**< Type for UTF-32 data points */ 66typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
67#endif 67#endif
68 68
69#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */ 69#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */
70#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */ 70#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */
71#define LINEBREAK_NOBREAK 2 /**< No break is possible */ 71#define LINEBREAK_NOBREAK 2 /**< No break is possible */
72#define LINEBREAK_INSIDEACHAR 3 /**< A UTF-8/16 sequence is unfinished */ 72#define LINEBREAK_INSIDEACHAR 3 /**< A UTF-8/16 sequence is unfinished */
73 73
74void init_linebreak(void); 74void init_linebreak(void);
75void set_linebreaks_utf8( 75void set_linebreaks_utf8(
76 const utf8_t *s, size_t len, const char* lang, char *brks); 76 const utf8_t *s, size_t len, const char* lang, char *brks);
77void set_linebreaks_utf16( 77void set_linebreaks_utf16(
78 const utf16_t *s, size_t len, const char* lang, char *brks); 78 const utf16_t *s, size_t len, const char* lang, char *brks);
79void set_linebreaks_utf32( 79void set_linebreaks_utf32(
80 const utf32_t *s, size_t len, const char* lang, char *brks); 80 const utf32_t *s, size_t len, const char* lang, char *brks);
81int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang); 81int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang);
82 82
83#ifdef __cplusplus 83#ifdef __cplusplus
diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c
index cced7d40bb..3843e3bfe7 100644
--- a/src/static_libs/libunibreak/linebreakdata.c
+++ b/src/static_libs/libunibreak/linebreakdata.c
@@ -1,6 +1,6 @@
1/* The content of this file is generated from: 1/* The content of this file is generated from:
2# LineBreak-6.2.0.txt 2# LineBreak-6.3.0.txt
3# Date: 2012-08-08, 19:26:00 GMT [KW] 3# Date: 2013-02-06, 19:45:00 GMT [KW, LI]
4*/ 4*/
5 5
6#include "linebreak.h" 6#include "linebreak.h"
@@ -114,7 +114,9 @@ struct LineBreakProperties lb_prop_default[] = {
114 { 0x060C, 0x060D, LBP_IS }, 114 { 0x060C, 0x060D, LBP_IS },
115 { 0x060E, 0x060F, LBP_AL }, 115 { 0x060E, 0x060F, LBP_AL },
116 { 0x0610, 0x061A, LBP_CM }, 116 { 0x0610, 0x061A, LBP_CM },
117 { 0x061B, 0x061F, LBP_EX }, 117 { 0x061B, 0x061B, LBP_EX },
118 { 0x061C, 0x061C, LBP_CM },
119 { 0x061E, 0x061F, LBP_EX },
118 { 0x0620, 0x064A, LBP_AL }, 120 { 0x0620, 0x064A, LBP_AL },
119 { 0x064B, 0x065F, LBP_CM }, 121 { 0x064B, 0x065F, LBP_CM },
120 { 0x0660, 0x0669, LBP_NU }, 122 { 0x0660, 0x0669, LBP_NU },
@@ -456,7 +458,7 @@ struct LineBreakProperties lb_prop_default[] = {
456 { 0x205D, 0x205F, LBP_BA }, 458 { 0x205D, 0x205F, LBP_BA },
457 { 0x2060, 0x2060, LBP_WJ }, 459 { 0x2060, 0x2060, LBP_WJ },
458 { 0x2061, 0x2064, LBP_AL }, 460 { 0x2061, 0x2064, LBP_AL },
459 { 0x206A, 0x206F, LBP_CM }, 461 { 0x2066, 0x206F, LBP_CM },
460 { 0x2070, 0x2071, LBP_AL }, 462 { 0x2070, 0x2071, LBP_AL },
461 { 0x2074, 0x2074, LBP_AI }, 463 { 0x2074, 0x2074, LBP_AI },
462 { 0x2075, 0x207C, LBP_AL }, 464 { 0x2075, 0x207C, LBP_AL },
@@ -473,7 +475,7 @@ struct LineBreakProperties lb_prop_default[] = {
473 { 0x20A7, 0x20A7, LBP_PO }, 475 { 0x20A7, 0x20A7, LBP_PO },
474 { 0x20A8, 0x20B5, LBP_PR }, 476 { 0x20A8, 0x20B5, LBP_PR },
475 { 0x20B6, 0x20B6, LBP_PO }, 477 { 0x20B6, 0x20B6, LBP_PO },
476 { 0x20B7, 0x20BA, LBP_PR }, 478 { 0x20B7, 0x20CF, LBP_PR },
477 { 0x20D0, 0x20F0, LBP_CM }, 479 { 0x20D0, 0x20F0, LBP_CM },
478 { 0x2100, 0x2102, LBP_AL }, 480 { 0x2100, 0x2102, LBP_AL },
479 { 0x2103, 0x2103, LBP_PO }, 481 { 0x2103, 0x2103, LBP_PO },
@@ -774,7 +776,8 @@ struct LineBreakProperties lb_prop_default[] = {
774 { 0x2E33, 0x2E34, LBP_BA }, 776 { 0x2E33, 0x2E34, LBP_BA },
775 { 0x2E35, 0x2E39, LBP_AL }, 777 { 0x2E35, 0x2E39, LBP_AL },
776 { 0x2E3A, 0x2E3B, LBP_B2 }, 778 { 0x2E3A, 0x2E3B, LBP_B2 },
777 { 0x2E80, 0x3000, LBP_ID }, 779 { 0x2E80, 0x2FFB, LBP_ID },
780 { 0x3000, 0x3000, LBP_BA },
778 { 0x3001, 0x3002, LBP_CL }, 781 { 0x3001, 0x3002, LBP_CL },
779 { 0x3003, 0x3004, LBP_ID }, 782 { 0x3003, 0x3004, LBP_ID },
780 { 0x3005, 0x3005, LBP_NS }, 783 { 0x3005, 0x3005, LBP_NS },
@@ -803,7 +806,9 @@ struct LineBreakProperties lb_prop_default[] = {
803 { 0x301E, 0x301F, LBP_CL }, 806 { 0x301E, 0x301F, LBP_CL },
804 { 0x3020, 0x3029, LBP_ID }, 807 { 0x3020, 0x3029, LBP_ID },
805 { 0x302A, 0x302F, LBP_CM }, 808 { 0x302A, 0x302F, LBP_CM },
806 { 0x3030, 0x303A, LBP_ID }, 809 { 0x3030, 0x3034, LBP_ID },
810 { 0x3035, 0x3035, LBP_CM },
811 { 0x3036, 0x303A, LBP_ID },
807 { 0x303B, 0x303C, LBP_NS }, 812 { 0x303B, 0x303C, LBP_NS },
808 { 0x303D, 0x303F, LBP_ID }, 813 { 0x303D, 0x303F, LBP_ID },
809 { 0x3041, 0x3041, LBP_CJ }, 814 { 0x3041, 0x3041, LBP_CJ },
diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c
index fb6744da33..3455afd687 100644
--- a/src/static_libs/libunibreak/linebreakdef.c
+++ b/src/static_libs/libunibreak/linebreakdef.c
@@ -1,4 +1,4 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
@@ -24,27 +24,27 @@
24 * distribution. 24 * distribution.
25 * 25 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14): 26 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/> 27 * <URL:http://www.unicode.org/reports/tr14/>
28 * 28 *
29 * When this library was designed, this annex was at Revision 19, for 29 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0: 30 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 32 *
33 * This library has been updated according to Revision 30, for 33 * This library has been updated according to Revision 30, for
34 * Unicode 6.2.0: 34 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
36 * 36 *
37 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
39 */ 39 */
40 40
41/** 41/**
42 * @file linebreakdef.c 42 * @file linebreakdef.c
43 * 43 *
44 * Definition of language-specific data. 44 * Definition of language-specific data.
45 * 45 *
46 * @version 2.2, 2012/10/06 46 * @version 2.2, 2012/10/06
47 * @author Wu Yongwei 47 * @author Wu Yongwei
48 */ 48 */
49 49
50#include "linebreak.h" 50#include "linebreak.h"
@@ -54,72 +54,72 @@
54 * English-specifc data over the default Unicode rules. 54 * English-specifc data over the default Unicode rules.
55 */ 55 */
56static struct LineBreakProperties lb_prop_English[] = { 56static struct LineBreakProperties lb_prop_English[] = {
57 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */ 57 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
58 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */ 58 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
59 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */ 59 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
60 { 0, 0, LBP_Undefined } 60 { 0, 0, LBP_Undefined }
61}; 61};
62 62
63/** 63/**
64 * German-specifc data over the default Unicode rules. 64 * German-specifc data over the default Unicode rules.
65 */ 65 */
66static struct LineBreakProperties lb_prop_German[] = { 66static struct LineBreakProperties lb_prop_German[] = {
67 { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */ 67 { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
68 { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */ 68 { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
69 { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */ 69 { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
70 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */ 70 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
71 { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */ 71 { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
72 { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */ 72 { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
73 { 0, 0, LBP_Undefined } 73 { 0, 0, LBP_Undefined }
74}; 74};
75 75
76/** 76/**
77 * Spanish-specifc data over the default Unicode rules. 77 * Spanish-specifc data over the default Unicode rules.
78 */ 78 */
79static struct LineBreakProperties lb_prop_Spanish[] = { 79static struct LineBreakProperties lb_prop_Spanish[] = {
80 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */ 80 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
81 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */ 81 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
82 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */ 82 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
83 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */ 83 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
84 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */ 84 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
85 { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */ 85 { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
86 { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */ 86 { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
87 { 0, 0, LBP_Undefined } 87 { 0, 0, LBP_Undefined }
88}; 88};
89 89
90/** 90/**
91 * French-specifc data over the default Unicode rules. 91 * French-specifc data over the default Unicode rules.
92 */ 92 */
93static struct LineBreakProperties lb_prop_French[] = { 93static struct LineBreakProperties lb_prop_French[] = {
94 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */ 94 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
95 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */ 95 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
96 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */ 96 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
97 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */ 97 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
98 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */ 98 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
99 { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */ 99 { 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
100 { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */ 100 { 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
101 { 0, 0, LBP_Undefined } 101 { 0, 0, LBP_Undefined }
102}; 102};
103 103
104/** 104/**
105 * Russian-specifc data over the default Unicode rules. 105 * Russian-specifc data over the default Unicode rules.
106 */ 106 */
107static struct LineBreakProperties lb_prop_Russian[] = { 107static struct LineBreakProperties lb_prop_Russian[] = {
108 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */ 108 { 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
109 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */ 109 { 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
110 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */ 110 { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
111 { 0, 0, LBP_Undefined } 111 { 0, 0, LBP_Undefined }
112}; 112};
113 113
114/** 114/**
115 * Chinese-specifc data over the default Unicode rules. 115 * Chinese-specifc data over the default Unicode rules.
116 */ 116 */
117static struct LineBreakProperties lb_prop_Chinese[] = { 117static struct LineBreakProperties lb_prop_Chinese[] = {
118 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */ 118 { 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
119 { 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */ 119 { 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */
120 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */ 120 { 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
121 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */ 121 { 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
122 { 0, 0, LBP_Undefined } 122 { 0, 0, LBP_Undefined }
123}; 123};
124 124
125/** 125/**
@@ -129,11 +129,11 @@ static struct LineBreakProperties lb_prop_Chinese[] = {
129 * you may want to redefine \e lb_prop_lang_map in your C source file. 129 * you may want to redefine \e lb_prop_lang_map in your C source file.
130 */ 130 */
131struct LineBreakPropertiesLang lb_prop_lang_map[] = { 131struct LineBreakPropertiesLang lb_prop_lang_map[] = {
132 { "en", 2, lb_prop_English }, 132 { "en", 2, lb_prop_English },
133 { "de", 2, lb_prop_German }, 133 { "de", 2, lb_prop_German },
134 { "es", 2, lb_prop_Spanish }, 134 { "es", 2, lb_prop_Spanish },
135 { "fr", 2, lb_prop_French }, 135 { "fr", 2, lb_prop_French },
136 { "ru", 2, lb_prop_Russian }, 136 { "ru", 2, lb_prop_Russian },
137 { "zh", 2, lb_prop_Chinese }, 137 { "zh", 2, lb_prop_Chinese },
138 { NULL, 0, NULL } 138 { NULL, 0, NULL }
139}; 139};
diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h
index 93fcd6781c..d557aba109 100644
--- a/src/static_libs/libunibreak/linebreakdef.h
+++ b/src/static_libs/libunibreak/linebreakdef.h
@@ -1,10 +1,11 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -24,35 +25,36 @@
24 * distribution. 25 * distribution.
25 * 26 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14): 27 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/> 28 * <URL:http://www.unicode.org/reports/tr14/>
28 * 29 *
29 * When this library was designed, this annex was at Revision 19, for 30 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0: 31 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 33 *
33 * This library has been updated according to Revision 30, for 34 * This library has been updated according to Revision 30, for
34 * Unicode 6.2.0: 35 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
39 */ 40 */
40 41
41/** 42/**
42 * @file linebreakdef.h 43 * @file linebreakdef.h
43 * 44 *
44 * Definitions of internal data structures, declarations of global 45 * Definitions of internal data structures, declarations of global
45 * variables, and function prototypes for the line breaking algorithm. 46 * variables, and function prototypes for the line breaking algorithm.
46 * 47 *
47 * @version 2.3, 2012/10/06 48 * @version 2.4, 2013/11/10
48 * @author Wu Yongwei 49 * @author Wu Yongwei
50 * @author Petr Filipsky
49 */ 51 */
50 52
51/** 53/**
52 * Constant value to mark the end of string. It is not a valid Unicode 54 * Constant value to mark the end of string. It is not a valid Unicode
53 * character. 55 * character.
54 */ 56 */
55#define EOS 0xFFFF 57#define EOS 0xFFFFFFFF
56 58
57/** 59/**
58 * Line break classes. This is a direct mapping of Table 1 of Unicode 60 * Line break classes. This is a direct mapping of Table 1 of Unicode
@@ -60,52 +62,52 @@
60 */ 62 */
61enum LineBreakClass 63enum LineBreakClass
62{ 64{
63 /* This is used to signal an error condition. */ 65 /* This is used to signal an error condition. */
64 LBP_Undefined, /**< Undefined */ 66 LBP_Undefined, /**< Undefined */
65 67
66 /* The following break classes are treated in the pair table. */ 68 /* The following break classes are treated in the pair table. */
67 LBP_OP, /**< Opening punctuation */ 69 LBP_OP, /**< Opening punctuation */
68 LBP_CL, /**< Closing punctuation */ 70 LBP_CL, /**< Closing punctuation */
69 LBP_CP, /**< Closing parenthesis */ 71 LBP_CP, /**< Closing parenthesis */
70 LBP_QU, /**< Ambiguous quotation */ 72 LBP_QU, /**< Ambiguous quotation */
71 LBP_GL, /**< Glue */ 73 LBP_GL, /**< Glue */
72 LBP_NS, /**< Non-starters */ 74 LBP_NS, /**< Non-starters */
73 LBP_EX, /**< Exclamation/Interrogation */ 75 LBP_EX, /**< Exclamation/Interrogation */
74 LBP_SY, /**< Symbols allowing break after */ 76 LBP_SY, /**< Symbols allowing break after */
75 LBP_IS, /**< Infix separator */ 77 LBP_IS, /**< Infix separator */
76 LBP_PR, /**< Prefix */ 78 LBP_PR, /**< Prefix */
77 LBP_PO, /**< Postfix */ 79 LBP_PO, /**< Postfix */
78 LBP_NU, /**< Numeric */ 80 LBP_NU, /**< Numeric */
79 LBP_AL, /**< Alphabetic */ 81 LBP_AL, /**< Alphabetic */
80 LBP_HL, /**< Hebrew letter */ 82 LBP_HL, /**< Hebrew letter */
81 LBP_ID, /**< Ideographic */ 83 LBP_ID, /**< Ideographic */
82 LBP_IN, /**< Inseparable characters */ 84 LBP_IN, /**< Inseparable characters */
83 LBP_HY, /**< Hyphen */ 85 LBP_HY, /**< Hyphen */
84 LBP_BA, /**< Break after */ 86 LBP_BA, /**< Break after */
85 LBP_BB, /**< Break before */ 87 LBP_BB, /**< Break before */
86 LBP_B2, /**< Break on either side (but not pair) */ 88 LBP_B2, /**< Break on either side (but not pair) */
87 LBP_ZW, /**< Zero-width space */ 89 LBP_ZW, /**< Zero-width space */
88 LBP_CM, /**< Combining marks */ 90 LBP_CM, /**< Combining marks */
89 LBP_WJ, /**< Word joiner */ 91 LBP_WJ, /**< Word joiner */
90 LBP_H2, /**< Hangul LV */ 92 LBP_H2, /**< Hangul LV */
91 LBP_H3, /**< Hangul LVT */ 93 LBP_H3, /**< Hangul LVT */
92 LBP_JL, /**< Hangul L Jamo */ 94 LBP_JL, /**< Hangul L Jamo */
93 LBP_JV, /**< Hangul V Jamo */ 95 LBP_JV, /**< Hangul V Jamo */
94 LBP_JT, /**< Hangul T Jamo */ 96 LBP_JT, /**< Hangul T Jamo */
95 LBP_RI, /**< Regional indicator */ 97 LBP_RI, /**< Regional indicator */
96 98
97 /* The following break classes are not treated in the pair table */ 99 /* The following break classes are not treated in the pair table */
98 LBP_AI, /**< Ambiguous (alphabetic or ideograph) */ 100 LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
99 LBP_BK, /**< Break (mandatory) */ 101 LBP_BK, /**< Break (mandatory) */
100 LBP_CB, /**< Contingent break */ 102 LBP_CB, /**< Contingent break */
101 LBP_CJ, /**< Conditional Japanese starter */ 103 LBP_CJ, /**< Conditional Japanese starter */
102 LBP_CR, /**< Carriage return */ 104 LBP_CR, /**< Carriage return */
103 LBP_LF, /**< Line feed */ 105 LBP_LF, /**< Line feed */
104 LBP_NL, /**< Next line */ 106 LBP_NL, /**< Next line */
105 LBP_SA, /**< South-East Asian */ 107 LBP_SA, /**< South-East Asian */
106 LBP_SG, /**< Surrogates */ 108 LBP_SG, /**< Surrogates */
107 LBP_SP, /**< Space */ 109 LBP_SP, /**< Space */
108 LBP_XX /**< Unknown */ 110 LBP_XX /**< Unknown */
109}; 111};
110 112
111/** 113/**
@@ -114,9 +116,9 @@ enum LineBreakClass
114 */ 116 */
115struct LineBreakProperties 117struct LineBreakProperties
116{ 118{
117 utf32_t start; /**< Starting coding point */ 119 utf32_t start; /**< Starting coding point */
118 utf32_t end; /**< End coding point */ 120 utf32_t end; /**< End coding point */
119 enum LineBreakClass prop; /**< The line breaking property */ 121 enum LineBreakClass prop; /**< The line breaking property */
120}; 122};
121 123
122/** 124/**
@@ -125,9 +127,22 @@ struct LineBreakProperties
125 */ 127 */
126struct LineBreakPropertiesLang 128struct LineBreakPropertiesLang
127{ 129{
128 const char *lang; /**< Language name */ 130 const char *lang; /**< Language name */
129 size_t namelen; /**< Length of name to match */ 131 size_t namelen; /**< Length of name to match */
130 struct LineBreakProperties *lbp; /**< Pointer to associated data */ 132 struct LineBreakProperties *lbp; /**< Pointer to associated data */
133};
134
135/**
136 * Context representing internal state of the line breaking algorithm.
137 * This is useful to callers if incremental analysis is wanted.
138 */
139struct LineBreakContext
140{
141 const char *lang; /**< Language name */
142 struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
143 enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
144 enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
145 enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
131}; 146};
132 147
133/** 148/**
@@ -144,9 +159,16 @@ extern struct LineBreakPropertiesLang lb_prop_lang_map[];
144utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip); 159utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
145utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip); 160utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
146utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip); 161utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
162void lb_init_break_context(
163 struct LineBreakContext* lbpCtx,
164 utf32_t ch,
165 const char* lang);
166int lb_process_next_char(
167 struct LineBreakContext* lbpCtx,
168 utf32_t ch);
147void set_linebreaks( 169void set_linebreaks(
148 const void *s, 170 const void *s,
149 size_t len, 171 size_t len,
150 const char *lang, 172 const char *lang,
151 char *brks, 173 char *brks,
152 get_next_char_t get_next_char); 174 get_next_char_t get_next_char);
diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c
index f2996c0e81..e67a1f8507 100644
--- a/src/static_libs/libunibreak/wordbreak.c
+++ b/src/static_libs/libunibreak/wordbreak.c
@@ -1,10 +1,10 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2012 Tom Hacohen <tom@stosb.com> 7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -24,24 +24,28 @@
24 * distribution. 24 * distribution.
25 * 25 *
26 * The main reference is Unicode Standard Annex 29 (UAX #29): 26 * The main reference is Unicode Standard Annex 29 (UAX #29):
27 * <URL:http://unicode.org/reports/tr29> 27 * <URL:http://unicode.org/reports/tr29>
28 * 28 *
29 * When this library was designed, this annex was at Revision 17, for 29 * When this library was designed, this annex was at Revision 17, for
30 * Unicode 6.0.0: 30 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 *
33 * This library has been updated according to Revision 21, for
34 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html>
32 * 36 *
33 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
34 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
35 */ 39 */
36 40
37/** 41/**
38 * @file wordbreak.c 42 * @file wordbreak.c
39 * 43 *
40 * Implementation of the word breaking algorithm as described in Unicode 44 * Implementation of the word breaking algorithm as described in Unicode
41 * Standard Annex 29. 45 * Standard Annex 29.
42 * 46 *
43 * @version 2.3, 2013/05/14 47 * @version 2.4, 2013/09/28
44 * @author Tom Hacohen 48 * @author Tom Hacohen
45 */ 49 */
46 50
47#include <assert.h> 51#include <assert.h>
@@ -66,34 +70,34 @@ void init_wordbreak(void)
66/** 70/**
67 * Gets the word breaking class of a character. 71 * Gets the word breaking class of a character.
68 * 72 *
69 * @param ch character to check 73 * @param ch character to check
70 * @param wbp pointer to the wbp breaking properties array 74 * @param wbp pointer to the wbp breaking properties array
71 * @param len size of the wbp array in number of items 75 * @param len size of the wbp array in number of items
72 * @return the word breaking class if found; \c WBP_Any otherwise 76 * @return the word breaking class if found; \c WBP_Any otherwise
73 */ 77 */
74static enum WordBreakClass get_char_wb_class( 78static enum WordBreakClass get_char_wb_class(
75 utf32_t ch, 79 utf32_t ch,
76 struct WordBreakProperties *wbp, 80 struct WordBreakProperties *wbp,
77 size_t len) 81 size_t len)
78{ 82{
79 int min = 0; 83 int min = 0;
80 int max = len - 1; 84 int max = len - 1;
81 int mid; 85 int mid;
82 86
83 do 87 do
84 { 88 {
85 mid = (min + max) / 2; 89 mid = (min + max) / 2;
86 90
87 if (ch < wbp[mid].start) 91 if (ch < wbp[mid].start)
88 max = mid - 1; 92 max = mid - 1;
89 else if (ch > wbp[mid].end) 93 else if (ch > wbp[mid].end)
90 min = mid + 1; 94 min = mid + 1;
91 else 95 else
92 return wbp[mid].prop; 96 return wbp[mid].prop;
93 } 97 }
94 while (min <= max); 98 while (min <= max);
95 99
96 return WBP_Any; 100 return WBP_Any;
97} 101}
98 102
99/** 103/**
@@ -103,346 +107,346 @@ static enum WordBreakClass get_char_wb_class(
103 * Assumes \a brks is initialized - all the cells with #WORDBREAK_NOBREAK are 107 * Assumes \a brks is initialized - all the cells with #WORDBREAK_NOBREAK are
104 * cells that we really don't want to break after. 108 * cells that we really don't want to break after.
105 * 109 *
106 * @param[in] s input string 110 * @param[in] s input string
107 * @param[out] brks breaks array to fill 111 * @param[out] brks breaks array to fill
108 * @param[in] posStart start position 112 * @param[in] posStart start position
109 * @param[in] posEnd end position (exclusive) 113 * @param[in] posEnd end position (exclusive)
110 * @param[in] len length of the string 114 * @param[in] len length of the string
111 * @param[in] brkType breaks type to use 115 * @param[in] brkType breaks type to use
112 * @param[in] get_next_char function to get the next UTF-32 character 116 * @param[in] get_next_char function to get the next UTF-32 character
113 */ 117 */
114static void set_brks_to( 118static void set_brks_to(
115 const void *s, 119 const void *s,
116 char *brks, 120 char *brks,
117 size_t posStart, 121 size_t posStart,
118 size_t posEnd, 122 size_t posEnd,
119 size_t len, 123 size_t len,
120 char brkType, 124 char brkType,
121 get_next_char_t get_next_char) 125 get_next_char_t get_next_char)
122{ 126{
123 size_t posNext = posStart; 127 size_t posNext = posStart;
124 while (posNext < posEnd) 128 while (posNext < posEnd)
125 { 129 {
126 utf32_t ch; 130 utf32_t ch;
127 ch = get_next_char(s, len, &posNext); 131 ch = get_next_char(s, len, &posNext);
128 assert(ch != EOS); 132 assert(ch != EOS);
129 for (; posStart < posNext - 1; ++posStart) 133 for (; posStart < posNext - 1; ++posStart)
130 brks[posStart] = WORDBREAK_INSIDEACHAR; 134 brks[posStart] = WORDBREAK_INSIDEACHAR;
131 assert(posStart == posNext - 1); 135 assert(posStart == posNext - 1);
132 136
133 /* Only set it if we haven't set it not to break before. */ 137 /* Only set it if we haven't set it not to break before. */
134 if (brks[posStart] != WORDBREAK_NOBREAK) 138 if (brks[posStart] != WORDBREAK_NOBREAK)
135 brks[posStart] = brkType; 139 brks[posStart] = brkType;
136 posStart = posNext; 140 posStart = posNext;
137 } 141 }
138} 142}
139 143
140/* Checks to see if the class is newline, CR, or LF (rules WB3a and b). */ 144/* Checks to see if the class is newline, CR, or LF (rules WB3a and b). */
141#define IS_WB3ab(cls) ((cls == WBP_Newline) || (cls == WBP_CR) || \ 145#define IS_WB3ab(cls) ((cls == WBP_Newline) || (cls == WBP_CR) || \
142 (cls == WBP_LF)) 146 (cls == WBP_LF))
143 147
144/** 148/**
145 * Sets the word breaking information for a generic input string. 149 * Sets the word breaking information for a generic input string.
146 * 150 *
147 * @param[in] s input string 151 * @param[in] s input string
148 * @param[in] len length of the input 152 * @param[in] len length of the input
149 * @param[in] lang language of the input 153 * @param[in] lang language of the input
150 * @param[out] brks pointer to the output breaking data, containing 154 * @param[out] brks pointer to the output breaking data, containing
151 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or 155 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
152 * #WORDBREAK_INSIDEACHAR 156 * #WORDBREAK_INSIDEACHAR
153 * @param[in] get_next_char function to get the next UTF-32 character 157 * @param[in] get_next_char function to get the next UTF-32 character
154 */ 158 */
155static void set_wordbreaks( 159static void set_wordbreaks(
156 const void *s, 160 const void *s,
157 size_t len, 161 size_t len,
158 const char *lang, 162 const char *lang,
159 char *brks, 163 char *brks,
160 get_next_char_t get_next_char) 164 get_next_char_t get_next_char)
161{ 165{
162 enum WordBreakClass wbcLast = WBP_Undefined; 166 enum WordBreakClass wbcLast = WBP_Undefined;
163 /* wbcSeqStart is the class that started the current sequence. 167 /* wbcSeqStart is the class that started the current sequence.
164 * WBP_Undefined is a special case that means "sot". 168 * WBP_Undefined is a special case that means "sot".
165 * This value is the class that is at the start of the current rule 169 * This value is the class that is at the start of the current rule
166 * matching sequence. For example, in case of Numeric+MidNum+Numeric 170 * matching sequence. For example, in case of Numeric+MidNum+Numeric
167 * it'll be Numeric all the way. 171 * it'll be Numeric all the way.
168 */ 172 */
169 enum WordBreakClass wbcSeqStart = WBP_Undefined; 173 enum WordBreakClass wbcSeqStart = WBP_Undefined;
170 utf32_t ch; 174 utf32_t ch;
171 size_t posNext = 0; 175 size_t posNext = 0;
172 size_t posCur = 0; 176 size_t posCur = 0;
173 size_t posLast = 0; 177 size_t posLast = 0;
174 178
175 /* TODO: Language-specific specialization. */ 179 /* TODO: Language-specific specialization. */
176 (void) lang; 180 (void) lang;
177 181
178 /* Init brks. */ 182 /* Init brks. */
179 memset(brks, WORDBREAK_BREAK, len); 183 memset(brks, WORDBREAK_BREAK, len);
180 184
181 ch = get_next_char(s, len, &posNext); 185 ch = get_next_char(s, len, &posNext);
182 186
183 while (ch != EOS) 187 while (ch != EOS)
184 { 188 {
185 enum WordBreakClass wbcCur; 189 enum WordBreakClass wbcCur;
186 wbcCur = get_char_wb_class(ch, wb_prop_default, 190 wbcCur = get_char_wb_class(ch, wb_prop_default,
187 ARRAY_LEN(wb_prop_default)); 191 ARRAY_LEN(wb_prop_default));
188 192
189 switch (wbcCur) 193 switch (wbcCur)
190 { 194 {
191 case WBP_CR: 195 case WBP_CR:
192 /* WB3b */ 196 /* WB3b */
193 set_brks_to(s, brks, posLast, posCur, len, 197 set_brks_to(s, brks, posLast, posCur, len,
194 WORDBREAK_BREAK, get_next_char); 198 WORDBREAK_BREAK, get_next_char);
195 wbcSeqStart = wbcCur; 199 wbcSeqStart = wbcCur;
196 posLast = posCur; 200 posLast = posCur;
197 break; 201 break;
198 202
199 case WBP_LF: 203 case WBP_LF:
200 if (wbcSeqStart == WBP_CR) /* WB3 */ 204 if (wbcSeqStart == WBP_CR) /* WB3 */
201 { 205 {
202 set_brks_to(s, brks, posLast, posCur, len, 206 set_brks_to(s, brks, posLast, posCur, len,
203 WORDBREAK_NOBREAK, get_next_char); 207 WORDBREAK_NOBREAK, get_next_char);
204 wbcSeqStart = wbcCur; 208 wbcSeqStart = wbcCur;
205 posLast = posCur; 209 posLast = posCur;
206 break; 210 break;
207 } 211 }
208 /* Fall off */ 212 /* Fall off */
209 213
210 case WBP_Newline: 214 case WBP_Newline:
211 /* WB3a,3b */ 215 /* WB3a,3b */
212 set_brks_to(s, brks, posLast, posCur, len, 216 set_brks_to(s, brks, posLast, posCur, len,
213 WORDBREAK_BREAK, get_next_char); 217 WORDBREAK_BREAK, get_next_char);
214 wbcSeqStart = wbcCur; 218 wbcSeqStart = wbcCur;
215 posLast = posCur; 219 posLast = posCur;
216 break; 220 break;
217 221
218 case WBP_Extend: 222 case WBP_Extend:
219 case WBP_Format: 223 case WBP_Format:
220 /* WB4 - If not the first char/after a newline (WB3a,3b), skip 224 /* WB4 - If not the first char/after a newline (WB3a,3b), skip
221 * this class, set it to be the same as the prev, and mark 225 * this class, set it to be the same as the prev, and mark
222 * brks not to break before them. */ 226 * brks not to break before them. */
223 if ((wbcSeqStart == WBP_Undefined) || IS_WB3ab(wbcSeqStart)) 227 if ((wbcSeqStart == WBP_Undefined) || IS_WB3ab(wbcSeqStart))
224 { 228 {
225 set_brks_to(s, brks, posLast, posCur, len, 229 set_brks_to(s, brks, posLast, posCur, len,
226 WORDBREAK_BREAK, get_next_char); 230 WORDBREAK_BREAK, get_next_char);
227 wbcSeqStart = wbcCur; 231 wbcSeqStart = wbcCur;
228 } 232 }
229 else 233 else
230 { 234 {
231 /* It's surely not the first */ 235 /* It's surely not the first */
232 brks[posCur - 1] = WORDBREAK_NOBREAK; 236 brks[posCur - 1] = WORDBREAK_NOBREAK;
233 /* "inherit" the previous class. */ 237 /* "inherit" the previous class. */
234 wbcCur = wbcLast; 238 wbcCur = wbcLast;
235 } 239 }
236 break; 240 break;
237 241
238 case WBP_Katakana: 242 case WBP_Katakana:
239 if ((wbcSeqStart == WBP_Katakana) || /* WB13 */ 243 if ((wbcSeqStart == WBP_Katakana) || /* WB13 */
240 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 244 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
241 { 245 {
242 set_brks_to(s, brks, posLast, posCur, len, 246 set_brks_to(s, brks, posLast, posCur, len,
243 WORDBREAK_NOBREAK, get_next_char); 247 WORDBREAK_NOBREAK, get_next_char);
244 } 248 }
245 /* No rule found, reset */ 249 /* No rule found, reset */
246 else 250 else
247 { 251 {
248 set_brks_to(s, brks, posLast, posCur, len, 252 set_brks_to(s, brks, posLast, posCur, len,
249 WORDBREAK_BREAK, get_next_char); 253 WORDBREAK_BREAK, get_next_char);
250 } 254 }
251 wbcSeqStart = wbcCur; 255 wbcSeqStart = wbcCur;
252 posLast = posCur; 256 posLast = posCur;
253 break; 257 break;
254 258
255 case WBP_ALetter: 259 case WBP_ALetter:
256 if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */ 260 if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */
257 (wbcLast == WBP_Numeric) || /* WB10 */ 261 (wbcLast == WBP_Numeric) || /* WB10 */
258 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 262 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
259 { 263 {
260 set_brks_to(s, brks, posLast, posCur, len, 264 set_brks_to(s, brks, posLast, posCur, len,
261 WORDBREAK_NOBREAK, get_next_char); 265 WORDBREAK_NOBREAK, get_next_char);
262 } 266 }
263 /* No rule found, reset */ 267 /* No rule found, reset */
264 else 268 else
265 { 269 {
266 set_brks_to(s, brks, posLast, posCur, len, 270 set_brks_to(s, brks, posLast, posCur, len,
267 WORDBREAK_BREAK, get_next_char); 271 WORDBREAK_BREAK, get_next_char);
268 } 272 }
269 wbcSeqStart = wbcCur; 273 wbcSeqStart = wbcCur;
270 posLast = posCur; 274 posLast = posCur;
271 break; 275 break;
272 276
273 case WBP_MidNumLet: 277 case WBP_MidNumLet:
274 if ((wbcLast == WBP_ALetter) || /* WB6,7 */ 278 if ((wbcLast == WBP_ALetter) || /* WB6,7 */
275 (wbcLast == WBP_Numeric)) /* WB11,12 */ 279 (wbcLast == WBP_Numeric)) /* WB11,12 */
276 { 280 {
277 /* Go on */ 281 /* Go on */
278 } 282 }
279 else 283 else
280 { 284 {
281 set_brks_to(s, brks, posLast, posCur, len, 285 set_brks_to(s, brks, posLast, posCur, len,
282 WORDBREAK_BREAK, get_next_char); 286 WORDBREAK_BREAK, get_next_char);
283 wbcSeqStart = wbcCur; 287 wbcSeqStart = wbcCur;
284 posLast = posCur; 288 posLast = posCur;
285 } 289 }
286 break; 290 break;
287 291
288 case WBP_MidLetter: 292 case WBP_MidLetter:
289 if (wbcLast == WBP_ALetter) /* WB6,7 */ 293 if (wbcLast == WBP_ALetter) /* WB6,7 */
290 { 294 {
291 /* Go on */ 295 /* Go on */
292 } 296 }
293 else 297 else
294 { 298 {
295 set_brks_to(s, brks, posLast, posCur, len, 299 set_brks_to(s, brks, posLast, posCur, len,
296 WORDBREAK_BREAK, get_next_char); 300 WORDBREAK_BREAK, get_next_char);
297 wbcSeqStart = wbcCur; 301 wbcSeqStart = wbcCur;
298 posLast = posCur; 302 posLast = posCur;
299 } 303 }
300 break; 304 break;
301 305
302 case WBP_MidNum: 306 case WBP_MidNum:
303 if (wbcLast == WBP_Numeric) /* WB11,12 */ 307 if (wbcLast == WBP_Numeric) /* WB11,12 */
304 { 308 {
305 /* Go on */ 309 /* Go on */
306 } 310 }
307 else 311 else
308 { 312 {
309 set_brks_to(s, brks, posLast, posCur, len, 313 set_brks_to(s, brks, posLast, posCur, len,
310 WORDBREAK_BREAK, get_next_char); 314 WORDBREAK_BREAK, get_next_char);
311 wbcSeqStart = wbcCur; 315 wbcSeqStart = wbcCur;
312 posLast = posCur; 316 posLast = posCur;
313 } 317 }
314 break; 318 break;
315 319
316 case WBP_Numeric: 320 case WBP_Numeric:
317 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */ 321 if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */
318 (wbcLast == WBP_ALetter) || /* WB9 */ 322 (wbcLast == WBP_ALetter) || /* WB9 */
319 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ 323 (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
320 { 324 {
321 set_brks_to(s, brks, posLast, posCur, len, 325 set_brks_to(s, brks, posLast, posCur, len,
322 WORDBREAK_NOBREAK, get_next_char); 326 WORDBREAK_NOBREAK, get_next_char);
323 } 327 }
324 /* No rule found, reset */ 328 /* No rule found, reset */
325 else 329 else
326 { 330 {
327 set_brks_to(s, brks, posLast, posCur, len, 331 set_brks_to(s, brks, posLast, posCur, len,
328 WORDBREAK_BREAK, get_next_char); 332 WORDBREAK_BREAK, get_next_char);
329 } 333 }
330 wbcSeqStart = wbcCur; 334 wbcSeqStart = wbcCur;
331 posLast = posCur; 335 posLast = posCur;
332 break; 336 break;
333 337
334 case WBP_ExtendNumLet: 338 case WBP_ExtendNumLet:
335 /* WB13a,13b */ 339 /* WB13a,13b */
336 if ((wbcSeqStart == wbcLast) && 340 if ((wbcSeqStart == wbcLast) &&
337 ((wbcLast == WBP_ALetter) || 341 ((wbcLast == WBP_ALetter) ||
338 (wbcLast == WBP_Numeric) || 342 (wbcLast == WBP_Numeric) ||
339 (wbcLast == WBP_Katakana) || 343 (wbcLast == WBP_Katakana) ||
340 (wbcLast == WBP_ExtendNumLet))) 344 (wbcLast == WBP_ExtendNumLet)))
341 { 345 {
342 set_brks_to(s, brks, posLast, posCur, len, 346 set_brks_to(s, brks, posLast, posCur, len,
343 WORDBREAK_NOBREAK, get_next_char); 347 WORDBREAK_NOBREAK, get_next_char);
344 } 348 }
345 /* No rule found, reset */ 349 /* No rule found, reset */
346 else 350 else
347 { 351 {
348 set_brks_to(s, brks, posLast, posCur, len, 352 set_brks_to(s, brks, posLast, posCur, len,
349 WORDBREAK_BREAK, get_next_char); 353 WORDBREAK_BREAK, get_next_char);
350 } 354 }
351 wbcSeqStart = wbcCur; 355 wbcSeqStart = wbcCur;
352 posLast = posCur; 356 posLast = posCur;
353 break; 357 break;
354 358
355 case WBP_Regional: 359 case WBP_Regional:
356 /* WB13c */ 360 /* WB13c */
357 if (wbcSeqStart == WBP_Regional) 361 if (wbcSeqStart == WBP_Regional)
358 { 362 {
359 set_brks_to(s, brks, posLast, posCur, len, 363 set_brks_to(s, brks, posLast, posCur, len,
360 WORDBREAK_NOBREAK, get_next_char); 364 WORDBREAK_NOBREAK, get_next_char);
361 } 365 }
362 wbcSeqStart = wbcCur; 366 wbcSeqStart = wbcCur;
363 posLast = posCur; 367 posLast = posCur;
364 break; 368 break;
365 369
366 case WBP_Any: 370 case WBP_Any:
367 /* Allow breaks and reset */ 371 /* Allow breaks and reset */
368 set_brks_to(s, brks, posLast, posCur, len, 372 set_brks_to(s, brks, posLast, posCur, len,
369 WORDBREAK_BREAK, get_next_char); 373 WORDBREAK_BREAK, get_next_char);
370 wbcSeqStart = wbcCur; 374 wbcSeqStart = wbcCur;
371 posLast = posCur; 375 posLast = posCur;
372 break; 376 break;
373 377
374 default: 378 default:
375 /* Error, should never get here! */ 379 /* Error, should never get here! */
376 assert(0); 380 assert(0);
377 break; 381 break;
378 } 382 }
379 383
380 wbcLast = wbcCur; 384 wbcLast = wbcCur;
381 posCur = posNext; 385 posCur = posNext;
382 ch = get_next_char(s, len, &posNext); 386 ch = get_next_char(s, len, &posNext);
383 } 387 }
384 388
385 /* WB2 */ 389 /* WB2 */
386 set_brks_to(s, brks, posLast, posNext, len, 390 set_brks_to(s, brks, posLast, posNext, len,
387 WORDBREAK_BREAK, get_next_char); 391 WORDBREAK_BREAK, get_next_char);
388} 392}
389 393
390/** 394/**
391 * Sets the word breaking information for a UTF-8 input string. 395 * Sets the word breaking information for a UTF-8 input string.
392 * 396 *
393 * @param[in] s input UTF-8 string 397 * @param[in] s input UTF-8 string
394 * @param[in] len length of the input 398 * @param[in] len length of the input
395 * @param[in] lang language of the input 399 * @param[in] lang language of the input
396 * @param[out] brks pointer to the output breaking data, containing 400 * @param[out] brks pointer to the output breaking data, containing
397 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or 401 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
398 * #WORDBREAK_INSIDEACHAR 402 * #WORDBREAK_INSIDEACHAR
399 */ 403 */
400void set_wordbreaks_utf8( 404void set_wordbreaks_utf8(
401 const utf8_t *s, 405 const utf8_t *s,
402 size_t len, 406 size_t len,
403 const char *lang, 407 const char *lang,
404 char *brks) 408 char *brks)
405{ 409{
406 set_wordbreaks(s, len, lang, brks, 410 set_wordbreaks(s, len, lang, brks,
407 (get_next_char_t)lb_get_next_char_utf8); 411 (get_next_char_t)lb_get_next_char_utf8);
408} 412}
409 413
410/** 414/**
411 * Sets the word breaking information for a UTF-16 input string. 415 * Sets the word breaking information for a UTF-16 input string.
412 * 416 *
413 * @param[in] s input UTF-16 string 417 * @param[in] s input UTF-16 string
414 * @param[in] len length of the input 418 * @param[in] len length of the input
415 * @param[in] lang language of the input 419 * @param[in] lang language of the input
416 * @param[out] brks pointer to the output breaking data, containing 420 * @param[out] brks pointer to the output breaking data, containing
417 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or 421 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
418 * #WORDBREAK_INSIDEACHAR 422 * #WORDBREAK_INSIDEACHAR
419 */ 423 */
420void set_wordbreaks_utf16( 424void set_wordbreaks_utf16(
421 const utf16_t *s, 425 const utf16_t *s,
422 size_t len, 426 size_t len,
423 const char *lang, 427 const char *lang,
424 char *brks) 428 char *brks)
425{ 429{
426 set_wordbreaks(s, len, lang, brks, 430 set_wordbreaks(s, len, lang, brks,
427 (get_next_char_t)lb_get_next_char_utf16); 431 (get_next_char_t)lb_get_next_char_utf16);
428} 432}
429 433
430/** 434/**
431 * Sets the word breaking information for a UTF-32 input string. 435 * Sets the word breaking information for a UTF-32 input string.
432 * 436 *
433 * @param[in] s input UTF-32 string 437 * @param[in] s input UTF-32 string
434 * @param[in] len length of the input 438 * @param[in] len length of the input
435 * @param[in] lang language of the input 439 * @param[in] lang language of the input
436 * @param[out] brks pointer to the output breaking data, containing 440 * @param[out] brks pointer to the output breaking data, containing
437 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or 441 * #WORDBREAK_BREAK, #WORDBREAK_NOBREAK, or
438 * #WORDBREAK_INSIDEACHAR 442 * #WORDBREAK_INSIDEACHAR
439 */ 443 */
440void set_wordbreaks_utf32( 444void set_wordbreaks_utf32(
441 const utf32_t *s, 445 const utf32_t *s,
442 size_t len, 446 size_t len,
443 const char *lang, 447 const char *lang,
444 char *brks) 448 char *brks)
445{ 449{
446 set_wordbreaks(s, len, lang, brks, 450 set_wordbreaks(s, len, lang, brks,
447 (get_next_char_t)lb_get_next_char_utf32); 451 (get_next_char_t)lb_get_next_char_utf32);
448} 452}
diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h
index 47bef274ce..cd2bf2c451 100644
--- a/src/static_libs/libunibreak/wordbreak.h
+++ b/src/static_libs/libunibreak/wordbreak.h
@@ -1,10 +1,10 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2012 Tom Hacohen <tom@stosb.com> 7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com>
8 * 8 *
9 * This software is provided 'as-is', without any express or implied 9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 10 * warranty. In no event will the author be held liable for any damages
@@ -24,23 +24,27 @@
24 * distribution. 24 * distribution.
25 * 25 *
26 * The main reference is Unicode Standard Annex 29 (UAX #29): 26 * The main reference is Unicode Standard Annex 29 (UAX #29):
27 * <URL:http://unicode.org/reports/tr29> 27 * <URL:http://unicode.org/reports/tr29>
28 * 28 *
29 * When this library was designed, this annex was at Revision 17, for 29 * When this library was designed, this annex was at Revision 17, for
30 * Unicode 6.0.0: 30 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32 *
33 * This library has been updated according to Revision 21, for
34 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html>
32 * 36 *
33 * The Unicode Terms of Use are available at 37 * The Unicode Terms of Use are available at
34 * <URL:http://www.unicode.org/copyright.html> 38 * <URL:http://www.unicode.org/copyright.html>
35 */ 39 */
36 40
37/** 41/**
38 * @file wordbreak.h 42 * @file wordbreak.h
39 * 43 *
40 * Header file for the word breaking (segmentation) algorithm. 44 * Header file for the word breaking (segmentation) algorithm.
41 * 45 *
42 * @version 2.2, 2012/02/04 46 * @version 2.3, 2013/09/28
43 * @author Tom Hacohen 47 * @author Tom Hacohen
44 */ 48 */
45 49
46#ifndef WORDBREAK_H 50#ifndef WORDBREAK_H
@@ -53,17 +57,17 @@
53extern "C" { 57extern "C" {
54#endif 58#endif
55 59
56#define WORDBREAK_BREAK 0 /**< Break is allowed */ 60#define WORDBREAK_BREAK 0 /**< Break is allowed */
57#define WORDBREAK_NOBREAK 1 /**< No break is allowed */ 61#define WORDBREAK_NOBREAK 1 /**< No break is allowed */
58#define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */ 62#define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */
59 63
60void init_wordbreak(void); 64void init_wordbreak(void);
61void set_wordbreaks_utf8( 65void set_wordbreaks_utf8(
62 const utf8_t *s, size_t len, const char* lang, char *brks); 66 const utf8_t *s, size_t len, const char* lang, char *brks);
63void set_wordbreaks_utf16( 67void set_wordbreaks_utf16(
64 const utf16_t *s, size_t len, const char* lang, char *brks); 68 const utf16_t *s, size_t len, const char* lang, char *brks);
65void set_wordbreaks_utf32( 69void set_wordbreaks_utf32(
66 const utf32_t *s, size_t len, const char* lang, char *brks); 70 const utf32_t *s, size_t len, const char* lang, char *brks);
67 71
68#ifdef __cplusplus 72#ifdef __cplusplus
69} 73}
diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h
index ca8b1b62c0..72816f9dc4 100644
--- a/src/static_libs/libunibreak/wordbreakdef.h
+++ b/src/static_libs/libunibreak/wordbreakdef.h
@@ -1,10 +1,11 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Word breaking in a Unicode sequence. Designed to be used in a 4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2012 Tom Hacohen <tom@stosb.com> 7 * Copyright (C) 2013 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -24,47 +25,55 @@
24 * distribution. 25 * distribution.
25 * 26 *
26 * The main reference is Unicode Standard Annex 29 (UAX #29): 27 * The main reference is Unicode Standard Annex 29 (UAX #29):
27 * <URL:http://unicode.org/reports/tr29> 28 * <URL:http://unicode.org/reports/tr29>
28 * 29 *
29 * When this library was designed, this annex was at Revision 17, for 30 * When this library was designed, this annex was at Revision 17, for
30 * Unicode 6.0.0: 31 * Unicode 6.0.0:
31 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html> 32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
33 *
34 * This library has been updated according to Revision 21, for
35 * Unicode 6.2.0:
36 * <URL:http://www.unicode.org/reports/tr29/tr29-21.html>
32 * 37 *
33 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
34 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
35 */ 40 */
36 41
37/** 42/**
38 * @file wordbreakdef.h 43 * @file wordbreakdef.h
39 * 44 *
40 * Definitions of internal data structures, declarations of global 45 * Definitions of internal data structures, declarations of global
41 * variables, and function prototypes for the word breaking algorithm. 46 * variables, and function prototypes for the word breaking algorithm.
42 * 47 *
43 * @version 2.2, 2013/05/14 48 * @version 2.4, 2013/11/10
44 * @author Tom Hacohen 49 * @author Tom Hacohen
50 * @author Petr Filipsky
45 */ 51 */
46 52
47/** 53/**
48 * Word break classes. This is a direct mapping of Table 3 of Unicode 54 * Word break classes. This is a direct mapping of Table 3 of Unicode
49 * Standard Annex 29, Revision 17. 55 * Standard Annex 29, Revision 23.
50 */ 56 */
51enum WordBreakClass 57enum WordBreakClass
52{ 58{
53 WBP_Undefined, 59 WBP_Undefined,
54 WBP_CR, 60 WBP_CR,
55 WBP_LF, 61 WBP_LF,
56 WBP_Newline, 62 WBP_Newline,
57 WBP_Extend, 63 WBP_Extend,
58 WBP_Format, 64 WBP_Format,
59 WBP_Katakana, 65 WBP_Katakana,
60 WBP_ALetter, 66 WBP_ALetter,
61 WBP_MidNumLet, 67 WBP_MidNumLet,
62 WBP_MidLetter, 68 WBP_MidLetter,
63 WBP_MidNum, 69 WBP_MidNum,
64 WBP_Numeric, 70 WBP_Numeric,
65 WBP_ExtendNumLet, 71 WBP_ExtendNumLet,
66 WBP_Regional, 72 WBP_Regional,
67 WBP_Any 73 WBP_Hebrew,
74 WBP_Single,
75 WBP_Double,
76 WBP_Any
68}; 77};
69 78
70/** 79/**
@@ -73,7 +82,7 @@ enum WordBreakClass
73 */ 82 */
74struct WordBreakProperties 83struct WordBreakProperties
75{ 84{
76 utf32_t start; /**< Starting coding point */ 85 utf32_t start; /**< Starting coding point */
77 utf32_t end; /**< End coding point */ 86 utf32_t end; /**< End coding point */
78 enum WordBreakClass prop; /**< The word breaking property */ 87 enum WordBreakClass prop; /**< The word breaking property */
79}; 88};