summaryrefslogtreecommitdiff
path: root/src/static_libs/libunibreak/linebreak.c
diff options
context:
space:
mode:
authorTom Hacohen <tom@stosb.com>2014-01-21 16:41:06 +0000
committerTom Hacohen <tom@stosb.com>2014-01-21 16:41:06 +0000
commitcff1a9a59f40b1e83ed1db8145108cae53504d4f (patch)
tree524c58e21db031ab6acd6382dfdacb9c66d91a65 /src/static_libs/libunibreak/linebreak.c
parentcc8fa1da451d588e4218a2b8f8d3eebb9b38890f (diff)
Synced libunibreak local copy with upstream.
This fixes T805.
Diffstat (limited to 'src/static_libs/libunibreak/linebreak.c')
-rw-r--r--src/static_libs/libunibreak/linebreak.c1236
1 files changed, 664 insertions, 572 deletions
diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c
index c1ea405883..9716df4860 100644
--- a/src/static_libs/libunibreak/linebreak.c
+++ b/src/static_libs/libunibreak/linebreak.c
@@ -1,10 +1,11 @@
1/* vim: set tabstop=4 shiftwidth=4: */ 1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 2
3/* 3/*
4 * Line breaking in a Unicode sequence. Designed to be used in a 4 * Line breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer. 5 * generic text renderer.
6 * 6 *
7 * Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com> 7 * Copyright (C) 2008-2013 Wu Yongwei <wuyongwei at gmail dot com>
8 * Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
8 * 9 *
9 * This software is provided 'as-is', without any express or implied 10 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages 11 * warranty. In no event will the author be held liable for any damages
@@ -24,28 +25,29 @@
24 * distribution. 25 * distribution.
25 * 26 *
26 * The main reference is Unicode Standard Annex 14 (UAX #14): 27 * The main reference is Unicode Standard Annex 14 (UAX #14):
27 * <URL:http://www.unicode.org/reports/tr14/> 28 * <URL:http://www.unicode.org/reports/tr14/>
28 * 29 *
29 * When this library was designed, this annex was at Revision 19, for 30 * When this library was designed, this annex was at Revision 19, for
30 * Unicode 5.0.0: 31 * Unicode 5.0.0:
31 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html> 32 * <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
32 * 33 *
33 * This library has been updated according to Revision 30, for 34 * This library has been updated according to Revision 30, for
34 * Unicode 6.2.0: 35 * Unicode 6.2.0:
35 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html> 36 * <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
36 * 37 *
37 * The Unicode Terms of Use are available at 38 * The Unicode Terms of Use are available at
38 * <URL:http://www.unicode.org/copyright.html> 39 * <URL:http://www.unicode.org/copyright.html>
39 */ 40 */
40 41
41/** 42/**
42 * @file linebreak.c 43 * @file linebreak.c
43 * 44 *
44 * Implementation of the line breaking algorithm as described in Unicode 45 * Implementation of the line breaking algorithm as described in Unicode
45 * Standard Annex 14. 46 * Standard Annex 14.
46 * 47 *
47 * @version 2.3, 2012/10/06 48 * @version 2.5, 2013/11/14
48 * @author Wu Yongwei 49 * @author Wu Yongwei
50 * @author Petr Filipsky
49 */ 51 */
50 52
51#include <assert.h> 53#include <assert.h>
@@ -55,6 +57,11 @@
55#include "linebreakdef.h" 57#include "linebreakdef.h"
56 58
57/** 59/**
60 * Special value used internally to indicate an undefined break result.
61 */
62#define LINEBREAK_UNDEFINED -1
63
64/**
58 * Size of the second-level index to the line breaking properties. 65 * Size of the second-level index to the line breaking properties.
59 */ 66 */
60#define LINEBREAK_INDEX_SIZE 40 67#define LINEBREAK_INDEX_SIZE 40
@@ -70,11 +77,11 @@ const int linebreak_version = LINEBREAK_VERSION;
70 */ 77 */
71enum BreakAction 78enum BreakAction
72{ 79{
73 DIR_BRK, /**< Direct break opportunity */ 80 DIR_BRK, /**< Direct break opportunity */
74 IND_BRK, /**< Indirect break opportunity */ 81 IND_BRK, /**< Indirect break opportunity */
75 CMI_BRK, /**< Indirect break opportunity for combining marks */ 82 CMI_BRK, /**< Indirect break opportunity for combining marks */
76 CMP_BRK, /**< Prohibited break for combining marks */ 83 CMP_BRK, /**< Prohibited break for combining marks */
77 PRH_BRK /**< Prohibited break */ 84 PRH_BRK /**< Prohibited break */
78}; 85};
79 86
80/** 87/**
@@ -82,180 +89,180 @@ enum BreakAction
82 * Unicode Standard Annex 14, Revision 30. 89 * Unicode Standard Annex 14, Revision 30.
83 */ 90 */
84static enum BreakAction baTable[LBP_RI][LBP_RI] = { 91static enum BreakAction baTable[LBP_RI][LBP_RI] = {
85 { /* OP */ 92 { /* OP */
86 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 93 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
87 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 94 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
88 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 95 PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
89 CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, 96 CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
90 PRH_BRK }, 97 PRH_BRK },
91 { /* CL */ 98 { /* CL */
92 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, 99 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
93 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 100 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
94 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 101 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
95 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 102 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
96 DIR_BRK }, 103 DIR_BRK },
97 { /* CP */ 104 { /* CP */
98 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, 105 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
99 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 106 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
100 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 107 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
101 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 108 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
102 DIR_BRK }, 109 DIR_BRK },
103 { /* QU */ 110 { /* QU */
104 PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 111 PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
105 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 112 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
106 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 113 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
107 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 114 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
108 IND_BRK }, 115 IND_BRK },
109 { /* GL */ 116 { /* GL */
110 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 117 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
111 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 118 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
112 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 119 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
113 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 120 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
114 IND_BRK }, 121 IND_BRK },
115 { /* NS */ 122 { /* NS */
116 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 123 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
117 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 124 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
118 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 125 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
119 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 126 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
120 DIR_BRK }, 127 DIR_BRK },
121 { /* EX */ 128 { /* EX */
122 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 129 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
123 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 130 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
124 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 131 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
125 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 132 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
126 DIR_BRK }, 133 DIR_BRK },
127 { /* SY */ 134 { /* SY */
128 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 135 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
129 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, 136 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
130 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 137 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
131 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 138 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
132 DIR_BRK }, 139 DIR_BRK },
133 { /* IS */ 140 { /* IS */
134 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 141 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
135 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 142 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
136 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 143 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
137 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 144 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
138 DIR_BRK }, 145 DIR_BRK },
139 { /* PR */ 146 { /* PR */
140 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 147 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
141 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 148 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
142 IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 149 IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
143 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 150 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
144 DIR_BRK }, 151 DIR_BRK },
145 { /* PO */ 152 { /* PO */
146 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 153 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
147 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 154 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
148 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 155 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
149 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 156 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
150 DIR_BRK }, 157 DIR_BRK },
151 { /* NU */ 158 { /* NU */
152 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 159 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
153 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 160 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
154 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 161 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
155 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 162 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
156 DIR_BRK }, 163 DIR_BRK },
157 { /* AL */ 164 { /* AL */
158 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 165 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
159 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 166 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
160 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 167 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
161 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 168 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
162 DIR_BRK }, 169 DIR_BRK },
163 { /* HL */ 170 { /* HL */
164 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 171 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
165 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 172 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
166 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 173 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
167 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 174 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
168 DIR_BRK }, 175 DIR_BRK },
169 { /* ID */ 176 { /* ID */
170 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 177 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
171 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 178 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
172 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 179 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
173 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 180 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
174 DIR_BRK }, 181 DIR_BRK },
175 { /* IN */ 182 { /* IN */
176 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 183 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
177 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 184 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
178 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 185 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
179 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 186 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
180 DIR_BRK }, 187 DIR_BRK },
181 { /* HY */ 188 { /* HY */
182 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, 189 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
183 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, 190 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
184 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 191 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
185 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 192 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
186 DIR_BRK }, 193 DIR_BRK },
187 { /* BA */ 194 { /* BA */
188 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, 195 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
189 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 196 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
190 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 197 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
191 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 198 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
192 DIR_BRK }, 199 DIR_BRK },
193 { /* BB */ 200 { /* BB */
194 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 201 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
195 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 202 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
196 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 203 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
197 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 204 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
198 IND_BRK }, 205 IND_BRK },
199 { /* B2 */ 206 { /* B2 */
200 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 207 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
201 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 208 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
202 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, 209 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK,
203 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 210 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
204 DIR_BRK }, 211 DIR_BRK },
205 { /* ZW */ 212 { /* ZW */
206 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 213 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
207 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 214 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
208 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 215 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
209 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 216 DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
210 DIR_BRK }, 217 DIR_BRK },
211 { /* CM */ 218 { /* CM */
212 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 219 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
213 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, 220 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
214 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 221 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
215 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 222 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
216 DIR_BRK }, 223 DIR_BRK },
217 { /* WJ */ 224 { /* WJ */
218 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 225 IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
219 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 226 PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
220 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 227 IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
221 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, 228 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
222 IND_BRK }, 229 IND_BRK },
223 { /* H2 */ 230 { /* H2 */
224 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 231 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
225 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 232 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
226 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 233 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
227 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, 234 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
228 DIR_BRK }, 235 DIR_BRK },
229 { /* H3 */ 236 { /* H3 */
230 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 237 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
231 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 238 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
232 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 239 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
233 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, 240 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
234 DIR_BRK }, 241 DIR_BRK },
235 { /* JL */ 242 { /* JL */
236 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 243 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
237 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 244 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
238 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 245 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
239 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, 246 CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
240 DIR_BRK }, 247 DIR_BRK },
241 { /* JV */ 248 { /* JV */
242 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 249 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
243 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 250 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
244 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 251 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
245 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, 252 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
246 DIR_BRK }, 253 DIR_BRK },
247 { /* JT */ 254 { /* JT */
248 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 255 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
249 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 256 PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
250 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 257 DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
251 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, 258 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
252 DIR_BRK }, 259 DIR_BRK },
253 { /* RI */ 260 { /* RI */
254 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, 261 DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
255 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 262 PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
256 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, 263 DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
257 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, 264 CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
258 IND_BRK }, 265 IND_BRK },
259}; 266};
260 267
261/** 268/**
@@ -263,8 +270,8 @@ static enum BreakAction baTable[LBP_RI][LBP_RI] = {
263 */ 270 */
264struct LineBreakPropertiesIndex 271struct LineBreakPropertiesIndex
265{ 272{
266 utf32_t end; /**< End coding point */ 273 utf32_t end; /**< End coding point */
267 struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ 274 struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
268}; 275};
269 276
270/** 277/**
@@ -272,7 +279,7 @@ struct LineBreakPropertiesIndex
272 */ 279 */
273static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] = 280static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
274{ 281{
275 { 0xFFFFFFFF, lb_prop_default } 282 { 0xFFFFFFFF, lb_prop_default }
276}; 283};
277 284
278/** 285/**
@@ -283,84 +290,84 @@ static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
283 */ 290 */
284void init_linebreak(void) 291void init_linebreak(void)
285{ 292{
286 size_t i; 293 size_t i;
287 size_t iPropDefault; 294 size_t iPropDefault;
288 size_t len; 295 size_t len;
289 size_t step; 296 size_t step;
290 297
291 len = 0; 298 len = 0;
292 while (lb_prop_default[len].prop != LBP_Undefined) 299 while (lb_prop_default[len].prop != LBP_Undefined)
293 ++len; 300 ++len;
294 step = len / LINEBREAK_INDEX_SIZE; 301 step = len / LINEBREAK_INDEX_SIZE;
295 iPropDefault = 0; 302 iPropDefault = 0;
296 for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i) 303 for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
297 { 304 {
298 lb_prop_index[i].lbp = lb_prop_default + iPropDefault; 305 lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
299 iPropDefault += step; 306 iPropDefault += step;
300 lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1; 307 lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
301 } 308 }
302 lb_prop_index[--i].end = 0xFFFFFFFF; 309 lb_prop_index[--i].end = 0xFFFFFFFF;
303} 310}
304 311
305/** 312/**
306 * Gets the language-specific line breaking properties. 313 * Gets the language-specific line breaking properties.
307 * 314 *
308 * @param lang language of the text 315 * @param lang language of the text
309 * @return pointer to the language-specific line breaking 316 * @return pointer to the language-specific line breaking
310 * properties array if found; \c NULL otherwise 317 * properties array if found; \c NULL otherwise
311 */ 318 */
312static struct LineBreakProperties *get_lb_prop_lang(const char *lang) 319static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
313{ 320{
314 struct LineBreakPropertiesLang *lbplIter; 321 struct LineBreakPropertiesLang *lbplIter;
315 if (lang != NULL) 322 if (lang != NULL)
316 { 323 {
317 for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter) 324 for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
318 { 325 {
319 if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0) 326 if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
320 { 327 {
321 return lbplIter->lbp; 328 return lbplIter->lbp;
322 } 329 }
323 } 330 }
324 } 331 }
325 return NULL; 332 return NULL;
326} 333}
327 334
328/** 335/**
329 * Gets the line breaking class of a character from a line breaking 336 * Gets the line breaking class of a character from a line breaking
330 * properties array. 337 * properties array.
331 * 338 *
332 * @param ch character to check 339 * @param ch character to check
333 * @param lbp pointer to the line breaking properties array 340 * @param lbp pointer to the line breaking properties array
334 * @return the line breaking class if found; \c LBP_XX otherwise 341 * @return the line breaking class if found; \c LBP_XX otherwise
335 */ 342 */
336static enum LineBreakClass get_char_lb_class( 343static enum LineBreakClass get_char_lb_class(
337 utf32_t ch, 344 utf32_t ch,
338 struct LineBreakProperties *lbp) 345 struct LineBreakProperties *lbp)
339{ 346{
340 while (lbp->prop != LBP_Undefined && ch >= lbp->start) 347 while (lbp->prop != LBP_Undefined && ch >= lbp->start)
341 { 348 {
342 if (ch <= lbp->end) 349 if (ch <= lbp->end)
343 return lbp->prop; 350 return lbp->prop;
344 ++lbp; 351 ++lbp;
345 } 352 }
346 return LBP_XX; 353 return LBP_XX;
347} 354}
348 355
349/** 356/**
350 * Gets the line breaking class of a character from the default line 357 * Gets the line breaking class of a character from the default line
351 * breaking properties array. 358 * breaking properties array.
352 * 359 *
353 * @param ch character to check 360 * @param ch character to check
354 * @return the line breaking class if found; \c LBP_XX otherwise 361 * @return the line breaking class if found; \c LBP_XX otherwise
355 */ 362 */
356static enum LineBreakClass get_char_lb_class_default( 363static enum LineBreakClass get_char_lb_class_default(
357 utf32_t ch) 364 utf32_t ch)
358{ 365{
359 size_t i = 0; 366 size_t i = 0;
360 while (ch > lb_prop_index[i].end) 367 while (ch > lb_prop_index[i].end)
361 ++i; 368 ++i;
362 assert(i < LINEBREAK_INDEX_SIZE); 369 assert(i < LINEBREAK_INDEX_SIZE);
363 return get_char_lb_class(ch, lb_prop_index[i].lbp); 370 return get_char_lb_class(ch, lb_prop_index[i].lbp);
364} 371}
365 372
366/** 373/**
@@ -369,30 +376,30 @@ static enum LineBreakClass get_char_lb_class_default(
369 * and then the default data if there is no language-specific property 376 * and then the default data if there is no language-specific property
370 * available for the character. 377 * available for the character.
371 * 378 *
372 * @param ch character to check 379 * @param ch character to check
373 * @param lbpLang pointer to the language-specific line breaking 380 * @param lbpLang pointer to the language-specific line breaking
374 * properties array 381 * properties array
375 * @return the line breaking class if found; \c LBP_XX 382 * @return the line breaking class if found; \c LBP_XX
376 * otherwise 383 * otherwise
377 */ 384 */
378static enum LineBreakClass get_char_lb_class_lang( 385static enum LineBreakClass get_char_lb_class_lang(
379 utf32_t ch, 386 utf32_t ch,
380 struct LineBreakProperties *lbpLang) 387 struct LineBreakProperties *lbpLang)
381{ 388{
382 enum LineBreakClass lbcResult; 389 enum LineBreakClass lbcResult;
383 390
384 /* Find the language-specific line breaking class for a character */ 391 /* Find the language-specific line breaking class for a character */
385 if (lbpLang) 392 if (lbpLang)
386 { 393 {
387 lbcResult = get_char_lb_class(ch, lbpLang); 394 lbcResult = get_char_lb_class(ch, lbpLang);
388 if (lbcResult != LBP_XX) 395 if (lbcResult != LBP_XX)
389 return lbcResult; 396 return lbcResult;
390 } 397 }
391 398
392 /* Find the generic language-specific line breaking class, if no 399 /* Find the generic language-specific line breaking class, if no
393 * language context is provided, or language-specific data are not 400 * language context is provided, or language-specific data are not
394 * available for the specific character in the specified language */ 401 * available for the specific character in the specified language */
395 return get_char_lb_class_default(ch); 402 return get_char_lb_class_default(ch);
396} 403}
397 404
398/** 405/**
@@ -400,40 +407,214 @@ static enum LineBreakClass get_char_lb_class_lang(
400 * characters. They are treated in a simplistic way in this 407 * characters. They are treated in a simplistic way in this
401 * implementation. 408 * implementation.
402 * 409 *
403 * @param lbc line breaking class to resolve 410 * @param lbc line breaking class to resolve
404 * @param lang language of the text 411 * @param lang language of the text
405 * @return the resolved line breaking class 412 * @return the resolved line breaking class
406 */ 413 */
407static enum LineBreakClass resolve_lb_class( 414static enum LineBreakClass resolve_lb_class(
408 enum LineBreakClass lbc, 415 enum LineBreakClass lbc,
409 const char *lang) 416 const char *lang)
417{
418 switch (lbc)
419 {
420 case LBP_AI:
421 if (lang != NULL &&
422 (strncmp(lang, "zh", 2) == 0 || /* Chinese */
423 strncmp(lang, "ja", 2) == 0 || /* Japanese */
424 strncmp(lang, "ko", 2) == 0)) /* Korean */
425 {
426 return LBP_ID;
427 }
428 else
429 {
430 return LBP_AL;
431 }
432 case LBP_CJ:
433 /* Simplified for `normal' line breaking. See
434 * <url:http://www.unicode.org/reports/tr14/tr14-30.html#CJ>
435 * for details. */
436 return LBP_ID;
437 case LBP_SA:
438 case LBP_SG:
439 case LBP_XX:
440 return LBP_AL;
441 default:
442 return lbc;
443 }
444}
445
446/**
447 * Treats specially for the first character in a line.
448 *
449 * @param[in,out] lbpCtx pointer to the line breaking context
450 * @pre \a lbpCtx->lbcCur has a valid line break class
451 * @post \a lbpCtx->lbcCur has the updated line break class
452 */
453static void treat_first_char(
454 struct LineBreakContext* lbpCtx)
455{
456 switch (lbpCtx->lbcCur)
457 {
458 case LBP_LF:
459 case LBP_NL:
460 lbpCtx->lbcCur = LBP_BK; /* Rule LB5 */
461 break;
462 case LBP_CB:
463 lbpCtx->lbcCur = LBP_BA; /* Rule LB20 */
464 break;
465 case LBP_SP:
466 lbpCtx->lbcCur = LBP_WJ; /* Leading space treated as WJ */
467 break;
468 default:
469 break;
470 }
471}
472
473/**
474 * Tries telling the line break opportunity by simple rules.
475 *
476 * @param[in,out] lbpCtx pointer to the line breaking context
477 * @pre \a lbpCtx->lbcCur has the current line break
478 * class; and \a lbpCtx->lbcNew has the line
479 * break class for the next character
480 * @post \a lbpCtx->lbcCur has the updated line break
481 * class
482 * @return break result, one of #LINEBREAK_MUSTBREAK,
483 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
484 * if identified; or #LINEBREAK_UNDEFINED if
485 * table lookup is needed
486 */
487static int get_lb_result_simple(
488 struct LineBreakContext* lbpCtx)
489{
490 if (lbpCtx->lbcCur == LBP_BK
491 || (lbpCtx->lbcCur == LBP_CR && lbpCtx->lbcNew != LBP_LF))
492 {
493 return LINEBREAK_MUSTBREAK; /* Rules LB4 and LB5 */
494 }
495
496 switch (lbpCtx->lbcNew)
497 {
498 case LBP_SP:
499 return LINEBREAK_NOBREAK; /* Rule LB7; no change to lbcCur */
500 case LBP_BK:
501 case LBP_LF:
502 case LBP_NL:
503 lbpCtx->lbcCur = LBP_BK; /* Mandatory break after */
504 return LINEBREAK_NOBREAK; /* Rule LB6 */
505 case LBP_CR:
506 lbpCtx->lbcCur = LBP_CR;
507 return LINEBREAK_NOBREAK; /* Rule LB6 */
508 case LBP_CB:
509 lbpCtx->lbcCur = LBP_BA;
510 return LINEBREAK_ALLOWBREAK; /* Rule LB20 */
511 default:
512 return LINEBREAK_UNDEFINED; /* Table lookup is needed */
513 }
514}
515
516/**
517 * Tells the line break opportunity by table lookup.
518 *
519 * @param[in,out] lbpCtx pointer to the line breaking context
520 * @pre \a lbpCtx->lbcCur has the current line break
521 * class; \a lbpCtx->lbcLast has the line break
522 * class for the last character; and \a
523 * lbcCur->lbcNew has the line break class for
524 * the next character
525 * @post \a lbpCtx->lbcCur has the updated line break
526 * class
527 * @return break result, one of #LINEBREAK_MUSTBREAK,
528 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
529 */
530static int get_lb_result_lookup(
531 struct LineBreakContext* lbpCtx)
532{
533 /* TODO: Rule LB21a, as introduced by Revision 28 of UAX#14, is not
534 * yet implemented below. */
535 int brk = LINEBREAK_UNDEFINED;
536 assert(lbpCtx->lbcCur <= LBP_JT);
537 assert(lbpCtx->lbcNew <= LBP_JT);
538 switch (baTable[lbpCtx->lbcCur - 1][lbpCtx->lbcNew - 1])
539 {
540 case DIR_BRK:
541 brk = LINEBREAK_ALLOWBREAK;
542 break;
543 case CMI_BRK:
544 case IND_BRK:
545 brk = (lbpCtx->lbcLast == LBP_SP)
546 ? LINEBREAK_ALLOWBREAK
547 : LINEBREAK_NOBREAK;
548 break;
549 case CMP_BRK:
550 brk = LINEBREAK_NOBREAK;
551 if (lbpCtx->lbcLast != LBP_SP)
552 return brk; /* Do not update lbcCur */
553 break;
554 case PRH_BRK:
555 brk = LINEBREAK_NOBREAK;
556 break;
557 }
558 lbpCtx->lbcCur = lbpCtx->lbcNew;
559 return brk;
560}
561
562/**
563 * Initializes line breaking context for a given language.
564 *
565 * @param[in,out] lbpCtx pointer to the line breaking context
566 * @param[in] ch the first character to process
567 * @param[in] lang language of the input
568 * @post the line breaking context is initialized
569 */
570void lb_init_break_context(
571 struct LineBreakContext* lbpCtx,
572 utf32_t ch,
573 const char* lang)
574{
575 lbpCtx->lang = lang;
576 lbpCtx->lbpLang = get_lb_prop_lang(lang);
577 lbpCtx->lbcLast = LBP_Undefined;
578 lbpCtx->lbcNew = LBP_Undefined;
579 lbpCtx->lbcCur = resolve_lb_class(
580 get_char_lb_class_lang(ch, lbpCtx->lbpLang),
581 lbpCtx->lang);
582 treat_first_char(lbpCtx);
583}
584
585/**
586 * Updates LineBreakingContext for the next code point and returns
587 * the detected break.
588 *
589 * @param[in,out] lbpCtx pointer to the line breaking context
590 * @param[in] ch Unicode code point
591 * @return break result, one of #LINEBREAK_MUSTBREAK,
592 * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
593 * @post the line breaking context is updated
594 */
595int lb_process_next_char(
596 struct LineBreakContext* lbpCtx,
597 utf32_t ch )
410{ 598{
411 switch (lbc) 599 int brk;
412 { 600
413 case LBP_AI: 601 lbpCtx->lbcLast = lbpCtx->lbcNew;
414 if (lang != NULL && 602 lbpCtx->lbcNew = get_char_lb_class_lang(ch, lbpCtx->lbpLang);
415 (strncmp(lang, "zh", 2) == 0 || /* Chinese */ 603 brk = get_lb_result_simple(lbpCtx);
416 strncmp(lang, "ja", 2) == 0 || /* Japanese */ 604 switch (brk)
417 strncmp(lang, "ko", 2) == 0)) /* Korean */ 605 {
418 { 606 case LINEBREAK_MUSTBREAK:
419 return LBP_ID; 607 lbpCtx->lbcCur = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
420 } 608 treat_first_char(lbpCtx);
421 else 609 break;
422 { 610 case LINEBREAK_UNDEFINED:
423 return LBP_AL; 611 lbpCtx->lbcNew = resolve_lb_class(lbpCtx->lbcNew, lbpCtx->lang);
424 } 612 brk = get_lb_result_lookup(lbpCtx);
425 case LBP_CJ: 613 break;
426 /* Simplified for `normal' line breaking. See 614 default:
427 * <url:http://www.unicode.org/reports/tr14/tr14-28.html#CJ> 615 break;
428 * for details. */ 616 }
429 return LBP_ID; 617 return brk;
430 case LBP_SA:
431 case LBP_SG:
432 case LBP_XX:
433 return LBP_AL;
434 default:
435 return lbc;
436 }
437} 618}
438 619
439/** 620/**
@@ -441,59 +622,59 @@ static enum LineBreakClass resolve_lb_class(
441 * be advanced to the next complete character, unless the end of string 622 * be advanced to the next complete character, unless the end of string
442 * is reached in the middle of a UTF-8 sequence. 623 * is reached in the middle of a UTF-8 sequence.
443 * 624 *
444 * @param[in] s input UTF-8 string 625 * @param[in] s input UTF-8 string
445 * @param[in] len length of the string in bytes 626 * @param[in] len length of the string in bytes
446 * @param[in,out] ip pointer to the index 627 * @param[in,out] ip pointer to the index
447 * @return the Unicode character beginning at the index; or 628 * @return the Unicode character beginning at the index; or
448 * #EOS if end of input is encountered 629 * #EOS if end of input is encountered
449 */ 630 */
450utf32_t lb_get_next_char_utf8( 631utf32_t lb_get_next_char_utf8(
451 const utf8_t *s, 632 const utf8_t *s,
452 size_t len, 633 size_t len,
453 size_t *ip) 634 size_t *ip)
454{ 635{
455 utf8_t ch; 636 utf8_t ch;
456 utf32_t res; 637 utf32_t res;
457 638
458 assert(*ip <= len); 639 assert(*ip <= len);
459 if (*ip == len) 640 if (*ip == len)
460 return EOS; 641 return EOS;
461 ch = s[*ip]; 642 ch = s[*ip];
462 643
463 if (ch < 0xC2 || ch > 0xF4) 644 if (ch < 0xC2 || ch > 0xF4)
464 { /* One-byte sequence, tail (should not occur), or invalid */ 645 { /* One-byte sequence, tail (should not occur), or invalid */
465 *ip += 1; 646 *ip += 1;
466 return ch; 647 return ch;
467 } 648 }
468 else if (ch < 0xE0) 649 else if (ch < 0xE0)
469 { /* Two-byte sequence */ 650 { /* Two-byte sequence */
470 if (*ip + 2 > len) 651 if (*ip + 2 > len)
471 return EOS; 652 return EOS;
472 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F); 653 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
473 *ip += 2; 654 *ip += 2;
474 return res; 655 return res;
475 } 656 }
476 else if (ch < 0xF0) 657 else if (ch < 0xF0)
477 { /* Three-byte sequence */ 658 { /* Three-byte sequence */
478 if (*ip + 3 > len) 659 if (*ip + 3 > len)
479 return EOS; 660 return EOS;
480 res = ((ch & 0x0F) << 12) + 661 res = ((ch & 0x0F) << 12) +
481 ((s[*ip + 1] & 0x3F) << 6) + 662 ((s[*ip + 1] & 0x3F) << 6) +
482 ((s[*ip + 2] & 0x3F)); 663 ((s[*ip + 2] & 0x3F));
483 *ip += 3; 664 *ip += 3;
484 return res; 665 return res;
485 } 666 }
486 else 667 else
487 { /* Four-byte sequence */ 668 { /* Four-byte sequence */
488 if (*ip + 4 > len) 669 if (*ip + 4 > len)
489 return EOS; 670 return EOS;
490 res = ((ch & 0x07) << 18) + 671 res = ((ch & 0x07) << 18) +
491 ((s[*ip + 1] & 0x3F) << 12) + 672 ((s[*ip + 1] & 0x3F) << 12) +
492 ((s[*ip + 2] & 0x3F) << 6) + 673 ((s[*ip + 2] & 0x3F) << 6) +
493 ((s[*ip + 3] & 0x3F)); 674 ((s[*ip + 3] & 0x3F));
494 *ip += 4; 675 *ip += 4;
495 return res; 676 return res;
496 } 677 }
497} 678}
498 679
499/** 680/**
@@ -501,263 +682,174 @@ utf32_t lb_get_next_char_utf8(
501 * be advanced to the next complete character, unless the end of string 682 * be advanced to the next complete character, unless the end of string
502 * is reached in the middle of a UTF-16 surrogate pair. 683 * is reached in the middle of a UTF-16 surrogate pair.
503 * 684 *
504 * @param[in] s input UTF-16 string 685 * @param[in] s input UTF-16 string
505 * @param[in] len length of the string in words 686 * @param[in] len length of the string in words
506 * @param[in,out] ip pointer to the index 687 * @param[in,out] ip pointer to the index
507 * @return the Unicode character beginning at the index; or 688 * @return the Unicode character beginning at the index; or
508 * #EOS if end of input is encountered 689 * #EOS if end of input is encountered
509 */ 690 */
510utf32_t lb_get_next_char_utf16( 691utf32_t lb_get_next_char_utf16(
511 const utf16_t *s, 692 const utf16_t *s,
512 size_t len, 693 size_t len,
513 size_t *ip) 694 size_t *ip)
514{ 695{
515 utf16_t ch; 696 utf16_t ch;
516 697
517 assert(*ip <= len); 698 assert(*ip <= len);
518 if (*ip == len) 699 if (*ip == len)
519 return EOS; 700 return EOS;
520 ch = s[(*ip)++]; 701 ch = s[(*ip)++];
521 702
522 if (ch < 0xD800 || ch > 0xDBFF) 703 if (ch < 0xD800 || ch > 0xDBFF)
523 { /* If the character is not a high surrogate */ 704 { /* If the character is not a high surrogate */
524 return ch; 705 return ch;
525 } 706 }
526 if (*ip == len) 707 if (*ip == len)
527 { /* If the input ends here (an error) */ 708 { /* If the input ends here (an error) */
528 --(*ip); 709 --(*ip);
529 return EOS; 710 return EOS;
530 } 711 }
531 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF) 712 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
532 { /* If the next character is not the low surrogate (an error) */ 713 { /* If the next character is not the low surrogate (an error) */
533 return ch; 714 return ch;
534 } 715 }
535 /* Return the constructed character and advance the index again */ 716 /* Return the constructed character and advance the index again */
536 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000; 717 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
537} 718}
538 719
539/** 720/**
540 * Gets the next Unicode character in a UTF-32 sequence. The index will 721 * Gets the next Unicode character in a UTF-32 sequence. The index will
541 * be advanced to the next character. 722 * be advanced to the next character.
542 * 723 *
543 * @param[in] s input UTF-32 string 724 * @param[in] s input UTF-32 string
544 * @param[in] len length of the string in dwords 725 * @param[in] len length of the string in dwords
545 * @param[in,out] ip pointer to the index 726 * @param[in,out] ip pointer to the index
546 * @return the Unicode character beginning at the index; or 727 * @return the Unicode character beginning at the index; or
547 * #EOS if end of input is encountered 728 * #EOS if end of input is encountered
548 */ 729 */
549utf32_t lb_get_next_char_utf32( 730utf32_t lb_get_next_char_utf32(
550 const utf32_t *s, 731 const utf32_t *s,
551 size_t len, 732 size_t len,
552 size_t *ip) 733 size_t *ip)
553{ 734{
554 assert(*ip <= len); 735 assert(*ip <= len);
555 if (*ip == len) 736 if (*ip == len)
556 return EOS; 737 return EOS;
557 return s[(*ip)++]; 738 return s[(*ip)++];
558} 739}
559 740
560/** 741/**
561 * Sets the line breaking information for a generic input string. 742 * Sets the line breaking information for a generic input string.
562 * 743 *
563 * @param[in] s input string 744 * @param[in] s input string
564 * @param[in] len length of the input 745 * @param[in] len length of the input
565 * @param[in] lang language of the input 746 * @param[in] lang language of the input
566 * @param[out] brks pointer to the output breaking data, 747 * @param[out] brks pointer to the output breaking data,
567 * containing #LINEBREAK_MUSTBREAK, 748 * containing #LINEBREAK_MUSTBREAK,
568 * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK, 749 * #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
569 * or #LINEBREAK_INSIDEACHAR 750 * or #LINEBREAK_INSIDEACHAR
570 * @param[in] get_next_char function to get the next UTF-32 character 751 * @param[in] get_next_char function to get the next UTF-32 character
571 */ 752 */
572void set_linebreaks( 753void set_linebreaks(
573 const void *s, 754 const void *s,
574 size_t len, 755 size_t len,
575 const char *lang, 756 const char *lang,
576 char *brks, 757 char *brks,
577 get_next_char_t get_next_char) 758 get_next_char_t get_next_char)
578{ 759{
579 utf32_t ch; 760 utf32_t ch;
580 enum LineBreakClass lbcCur; 761 struct LineBreakContext lbCtx;
581 enum LineBreakClass lbcNew; 762 size_t posCur = 0;
582 enum LineBreakClass lbcLast; 763 size_t posLast = 0;
583 struct LineBreakProperties *lbpLang; 764
584 size_t posCur = 0; 765 --posLast; /* To be ++'d later */
585 size_t posLast = 0; 766 ch = get_next_char(s, len, &posCur);
586 767 if (ch == EOS)
587 --posLast; /* To be ++'d later */ 768 return;
588 ch = get_next_char(s, len, &posCur); 769 lb_init_break_context(&lbCtx, ch, lang);
589 if (ch == EOS) 770
590 return; 771 /* Process a line till an explicit break or end of string */
591 lbpLang = get_lb_prop_lang(lang); 772 for (;;)
592 lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang); 773 {
593 lbcNew = LBP_Undefined; 774 for (++posLast; posLast < posCur - 1; ++posLast)
594 775 {
595nextline: 776 brks[posLast] = LINEBREAK_INSIDEACHAR;
596 777 }
597 /* Special treatment for the first character */ 778 assert(posLast == posCur - 1);
598 switch (lbcCur) 779 ch = get_next_char(s, len, &posCur);
599 { 780 if (ch == EOS)
600 case LBP_LF: 781 break;
601 case LBP_NL: 782 brks[posLast] = lb_process_next_char(&lbCtx, ch);
602 lbcCur = LBP_BK; 783 }
603 break; 784
604 case LBP_CB: 785 assert(posLast == posCur - 1 && posCur <= len);
605 lbcCur = LBP_BA; 786 /* Break after the last character */
606 break; 787 brks[posLast] = LINEBREAK_MUSTBREAK;
607 case LBP_SP: 788 /* When the input contains incomplete sequences */
608 lbcCur = LBP_WJ; 789 while (posCur < len)
609 break; 790 {
610 default: 791 brks[posCur++] = LINEBREAK_INSIDEACHAR;
611 break; 792 }
612 }
613
614 /* Process a line till an explicit break or end of string */
615 for (;;)
616 {
617 for (++posLast; posLast < posCur - 1; ++posLast)
618 {
619 brks[posLast] = LINEBREAK_INSIDEACHAR;
620 }
621 assert(posLast == posCur - 1);
622 lbcLast = lbcNew;
623 ch = get_next_char(s, len, &posCur);
624 if (ch == EOS)
625 break;
626 lbcNew = get_char_lb_class_lang(ch, lbpLang);
627 if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
628 {
629 brks[posLast] = LINEBREAK_MUSTBREAK;
630 lbcCur = resolve_lb_class(lbcNew, lang);
631 goto nextline;
632 }
633
634 switch (lbcNew)
635 {
636 case LBP_SP:
637 brks[posLast] = LINEBREAK_NOBREAK;
638 continue;
639 case LBP_BK:
640 case LBP_LF:
641 case LBP_NL:
642 brks[posLast] = LINEBREAK_NOBREAK;
643 lbcCur = LBP_BK;
644 continue;
645 case LBP_CR:
646 brks[posLast] = LINEBREAK_NOBREAK;
647 lbcCur = LBP_CR;
648 continue;
649 case LBP_CB:
650 brks[posLast] = LINEBREAK_ALLOWBREAK;
651 lbcCur = LBP_BA;
652 continue;
653 default:
654 break;
655 }
656
657 lbcNew = resolve_lb_class(lbcNew, lang);
658
659 /* TODO: LB21a, as introduced by Revision 28 of UAX#14, is not
660 * yet implemented below. */
661
662 assert(lbcCur <= LBP_JT);
663 assert(lbcNew <= LBP_JT);
664 switch (baTable[lbcCur - 1][lbcNew - 1])
665 {
666 case DIR_BRK:
667 brks[posLast] = LINEBREAK_ALLOWBREAK;
668 break;
669 case CMI_BRK:
670 case IND_BRK:
671 if (lbcLast == LBP_SP)
672 {
673 brks[posLast] = LINEBREAK_ALLOWBREAK;
674 }
675 else
676 {
677 brks[posLast] = LINEBREAK_NOBREAK;
678 }
679 break;
680 case CMP_BRK:
681 brks[posLast] = LINEBREAK_NOBREAK;
682 if (lbcLast != LBP_SP)
683 continue;
684 break;
685 case PRH_BRK:
686 brks[posLast] = LINEBREAK_NOBREAK;
687 break;
688 }
689
690 lbcCur = lbcNew;
691 }
692
693 assert(posLast == posCur - 1 && posCur <= len);
694 /* Break after the last character */
695 brks[posLast] = LINEBREAK_MUSTBREAK;
696 /* When the input contains incomplete sequences */
697 while (posCur < len)
698 {
699 brks[posCur++] = LINEBREAK_INSIDEACHAR;
700 }
701} 793}
702 794
703/** 795/**
704 * Sets the line breaking information for a UTF-8 input string. 796 * Sets the line breaking information for a UTF-8 input string.
705 * 797 *
706 * @param[in] s input UTF-8 string 798 * @param[in] s input UTF-8 string
707 * @param[in] len length of the input 799 * @param[in] len length of the input
708 * @param[in] lang language of the input 800 * @param[in] lang language of the input
709 * @param[out] brks pointer to the output breaking data, containing 801 * @param[out] brks pointer to the output breaking data, containing
710 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 802 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
711 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 803 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
712 */ 804 */
713void set_linebreaks_utf8( 805void set_linebreaks_utf8(
714 const utf8_t *s, 806 const utf8_t *s,
715 size_t len, 807 size_t len,
716 const char *lang, 808 const char *lang,
717 char *brks) 809 char *brks)
718{ 810{
719 set_linebreaks(s, len, lang, brks, 811 set_linebreaks(s, len, lang, brks,
720 (get_next_char_t)lb_get_next_char_utf8); 812 (get_next_char_t)lb_get_next_char_utf8);
721} 813}
722 814
723/** 815/**
724 * Sets the line breaking information for a UTF-16 input string. 816 * Sets the line breaking information for a UTF-16 input string.
725 * 817 *
726 * @param[in] s input UTF-16 string 818 * @param[in] s input UTF-16 string
727 * @param[in] len length of the input 819 * @param[in] len length of the input
728 * @param[in] lang language of the input 820 * @param[in] lang language of the input
729 * @param[out] brks pointer to the output breaking data, containing 821 * @param[out] brks pointer to the output breaking data, containing
730 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 822 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
731 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 823 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
732 */ 824 */
733void set_linebreaks_utf16( 825void set_linebreaks_utf16(
734 const utf16_t *s, 826 const utf16_t *s,
735 size_t len, 827 size_t len,
736 const char *lang, 828 const char *lang,
737 char *brks) 829 char *brks)
738{ 830{
739 set_linebreaks(s, len, lang, brks, 831 set_linebreaks(s, len, lang, brks,
740 (get_next_char_t)lb_get_next_char_utf16); 832 (get_next_char_t)lb_get_next_char_utf16);
741} 833}
742 834
743/** 835/**
744 * Sets the line breaking information for a UTF-32 input string. 836 * Sets the line breaking information for a UTF-32 input string.
745 * 837 *
746 * @param[in] s input UTF-32 string 838 * @param[in] s input UTF-32 string
747 * @param[in] len length of the input 839 * @param[in] len length of the input
748 * @param[in] lang language of the input 840 * @param[in] lang language of the input
749 * @param[out] brks pointer to the output breaking data, containing 841 * @param[out] brks pointer to the output breaking data, containing
750 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 842 * #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
751 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 843 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
752 */ 844 */
753void set_linebreaks_utf32( 845void set_linebreaks_utf32(
754 const utf32_t *s, 846 const utf32_t *s,
755 size_t len, 847 size_t len,
756 const char *lang, 848 const char *lang,
757 char *brks) 849 char *brks)
758{ 850{
759 set_linebreaks(s, len, lang, brks, 851 set_linebreaks(s, len, lang, brks,
760 (get_next_char_t)lb_get_next_char_utf32); 852 (get_next_char_t)lb_get_next_char_utf32);
761} 853}
762 854
763/** 855/**
@@ -767,21 +859,21 @@ void set_linebreaks_utf32(
767 * complicated cases involving combining marks, spaces, etc. cannot be 859 * complicated cases involving combining marks, spaces, etc. cannot be
768 * correctly processed. 860 * correctly processed.
769 * 861 *
770 * @param char1 the first Unicode character 862 * @param char1 the first Unicode character
771 * @param char2 the second Unicode character 863 * @param char2 the second Unicode character
772 * @param lang language of the input 864 * @param lang language of the input
773 * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK, 865 * @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
774 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR 866 * #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
775 */ 867 */
776int is_line_breakable( 868int is_line_breakable(
777 utf32_t char1, 869 utf32_t char1,
778 utf32_t char2, 870 utf32_t char2,
779 const char* lang) 871 const char* lang)
780{ 872{
781 utf32_t s[2]; 873 utf32_t s[2];
782 char brks[2]; 874 char brks[2];
783 s[0] = char1; 875 s[0] = char1;
784 s[1] = char2; 876 s[1] = char2;
785 set_linebreaks_utf32(s, 2, lang, brks); 877 set_linebreaks_utf32(s, 2, lang, brks);
786 return brks[0]; 878 return brks[0];
787} 879}