Static deps unibreak: Update to latest version.

This version supports Unicode 8.0 and includes fixes over the previous
version.
Reference version: 03ae8dd7d6ce6d19a38c1e62c70afc6ad82513bc

@feature
This commit is contained in:
Tom Hacohen 2015-12-21 11:10:35 +00:00
parent 9626acdb4d
commit db72b93601
15 changed files with 190 additions and 86 deletions

View File

@ -1,3 +1,49 @@
2015-12-20 Wu Yongwei <wuyongwei@gmail.com>
Fix the issue that U+FFFC (Object Replacement Character) does not
break correctly after Hebrew letters.
* src/linebreak.c (get_lb_result_simple): Resolve `Contingent Break
Opportunity' to `Break Opportunity Before and After'.
2015-11-11 novelplus <novelplus@outlook.com>
Update to Unicode 8.0.0.
* src/linebreak.c (baTable): Update according to Unicode 8.0.0.
* src/linebreakdata.c: Regenerate from LineBreak-8.0.0.txt.
* src/wordbreak.c: Update comments.
* src/wordbreakdata.c: Regenerate from WordBreakProperty-8.0.0.txt.
* tools/test.txt: Add more test text for new line-breaking rules.
2015-05-18 Wu Yongwei <wuyongwei@gmail.com>
* src/wordbreak.c: Eliminate a warning under the release build.
2015-05-18 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.gcc: Update for the new files unibreakbase.c and
unibreakdef.c.
2015-05-14 Wu Yongwei <wuyongwei@gmail.com>
* src/Makefile.msvc: Update for the new files unibreakbase.c,
unibreakbase.h, unibreakdef.c, and unibreakdef.h.
2015-05-10 Wu Yongwei <wuyongwei@gmail.com>
Update for the libunibreak 3.0 release.
* NEWS: Add information about libunibreak 3.0.
* src/linebreak.c: Mark file version as 3.0.
* src/linebreak.h: Ditto.
* src/linebreakdef.c: Ditto.
* src/linebreakdef.h: Ditto.
* src/unibreakbase.c: Ditto.
* src/unibreakbase.h: Ditto.
* src/unibreakdef.c: Ditto.
* src/unibreakdef.h: Ditto.
* src/wordbreak.c: Ditto.
* src/wordbreak.h: Ditto.
* src/wordbreakdef.h: Ditto.
2015-04-19 Wu Yongwei <wuyongwei@gmail.com>
* LICENCE: Update copyright information.
@ -124,6 +170,10 @@
(lb_init_break_context): Clear fLb21aHebrew.
(get_lb_result_lookup): Apply rule LB21a and update fLb21aHebrew.
2014-12-30 Wu Yongwei <wuyongwei@gmail.com>
* src/linebreakdata.c: Regenerate from LineBreak-7.0.0.txt.
2014-12-06 Mikhail Polubisok <mpolubisok@gmail.com>
* src/linebreak.c (get_lb_result_lookup): Extend assertion condition

View File

@ -1,3 +1,10 @@
New in libunibreak 3.0
- Update the code and data to conform to Unicode 7.0.0
- Update build scripts to fix compatibility issues
- Improve code structure
- Make a few bug fixes
New in libunibreak 1.1
- Update the code and data to conform to Unicode 6.2.0

View File

@ -31,9 +31,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 33, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
* This library has been updated according to Revision 35, for
* Unicode 8.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -45,7 +45,7 @@
* Implementation of the line breaking algorithm as described in Unicode
* Standard Annex 14.
*
* @version 2.7, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
* @author Petr Filipsky
*/
@ -123,12 +123,12 @@ static enum BreakAction baTable[LBP_RI][LBP_RI] = {
{ /* EX */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
DIR_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* SY */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, PRH_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
@ -503,7 +503,7 @@ static int get_lb_result_simple(
lbpCtx->lbcCur = LBP_CR;
return LINEBREAK_NOBREAK; /* Rule LB6 */
case LBP_CB:
lbpCtx->lbcCur = LBP_BA;
lbpCtx->lbcCur = LBP_B2;
return LINEBREAK_ALLOWBREAK; /* Rule LB20 */
default:
return LINEBREAK_UNDEFINED; /* Table lookup is needed */

View File

@ -43,7 +43,7 @@
*
* Header file for the line breaking algorithm.
*
* @version 2.4, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# LineBreak-7.0.0.txt
# Date: 2014-02-28, 23:15:00 GMT [KW, LI]
# LineBreak-8.0.0.txt
# Date: 2015-02-13, 09:15:00 GMT [KW, LI]
*/
#include "linebreakdef.h"
@ -159,8 +159,8 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0829, 0x082D, LBP_CM },
{ 0x0830, 0x0858, LBP_AL },
{ 0x0859, 0x085B, LBP_CM },
{ 0x085E, 0x08B2, LBP_AL },
{ 0x08E4, 0x0903, LBP_CM },
{ 0x085E, 0x08B4, LBP_AL },
{ 0x08E3, 0x0903, LBP_CM },
{ 0x0904, 0x0939, LBP_AL },
{ 0x093A, 0x093C, LBP_CM },
{ 0x093D, 0x093D, LBP_AL },
@ -205,6 +205,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0AE6, 0x0AEF, LBP_NU },
{ 0x0AF0, 0x0AF0, LBP_AL },
{ 0x0AF1, 0x0AF1, LBP_PR },
{ 0x0AF9, 0x0AF9, LBP_AL },
{ 0x0B01, 0x0B03, LBP_CM },
{ 0x0B05, 0x0B39, LBP_AL },
{ 0x0B3C, 0x0B3C, LBP_CM },
@ -244,7 +245,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0D3E, 0x0D4D, LBP_CM },
{ 0x0D4E, 0x0D4E, LBP_AL },
{ 0x0D57, 0x0D57, LBP_CM },
{ 0x0D60, 0x0D61, LBP_AL },
{ 0x0D5F, 0x0D61, LBP_AL },
{ 0x0D62, 0x0D63, LBP_CM },
{ 0x0D66, 0x0D6F, LBP_NU },
{ 0x0D70, 0x0D75, LBP_AL },
@ -325,7 +326,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x135D, 0x135F, LBP_CM },
{ 0x1360, 0x1360, LBP_AL },
{ 0x1361, 0x1361, LBP_BA },
{ 0x1362, 0x13F4, LBP_AL },
{ 0x1362, 0x13FD, LBP_AL },
{ 0x1400, 0x1400, LBP_BA },
{ 0x1401, 0x167F, LBP_AL },
{ 0x1680, 0x1680, LBP_BA },
@ -481,7 +482,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x20B6, 0x20B6, LBP_PO },
{ 0x20B7, 0x20BA, LBP_PR },
{ 0x20BB, 0x20BB, LBP_PO },
{ 0x20BC, 0x20CF, LBP_PR },
{ 0x20BC, 0x20BD, LBP_PR },
{ 0x20BE, 0x20BE, LBP_PO },
{ 0x20BF, 0x20CF, LBP_PR },
{ 0x20D0, 0x20F0, LBP_CM },
{ 0x2100, 0x2102, LBP_AL },
{ 0x2103, 0x2103, LBP_PO },
@ -508,7 +511,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x216C, 0x216F, LBP_AL },
{ 0x2170, 0x2179, LBP_AI },
{ 0x217A, 0x2188, LBP_AL },
{ 0x2189, 0x2199, LBP_AI },
{ 0x2189, 0x2189, LBP_AI },
{ 0x218A, 0x218B, LBP_AL },
{ 0x2190, 0x2199, LBP_AI },
{ 0x219A, 0x21D1, LBP_AL },
{ 0x21D2, 0x21D2, LBP_AI },
{ 0x21D3, 0x21D3, LBP_AL },
@ -570,7 +575,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x22A5, 0x22A5, LBP_AI },
{ 0x22A6, 0x22BE, LBP_AL },
{ 0x22BF, 0x22BF, LBP_AI },
{ 0x22C0, 0x2307, LBP_AL },
{ 0x22C0, 0x22EE, LBP_AL },
{ 0x22EF, 0x22EF, LBP_IN },
{ 0x22F0, 0x2307, LBP_AL },
{ 0x2308, 0x2308, LBP_OP },
{ 0x2309, 0x2309, LBP_CL },
{ 0x230A, 0x230A, LBP_OP },
@ -898,7 +905,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xA673, 0xA673, LBP_AL },
{ 0xA674, 0xA67D, LBP_CM },
{ 0xA67E, 0xA69D, LBP_AL },
{ 0xA69F, 0xA69F, LBP_CM },
{ 0xA69E, 0xA69F, LBP_CM },
{ 0xA6A0, 0xA6EF, LBP_AL },
{ 0xA6F0, 0xA6F1, LBP_CM },
{ 0xA6F2, 0xA6F2, LBP_AL },
@ -923,6 +930,8 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xA8D0, 0xA8D9, LBP_NU },
{ 0xA8E0, 0xA8F1, LBP_CM },
{ 0xA8F2, 0xA8FB, LBP_AL },
{ 0xA8FC, 0xA8FC, LBP_BB },
{ 0xA8FD, 0xA8FD, LBP_AL },
{ 0xA900, 0xA909, LBP_NU },
{ 0xA90A, 0xA925, LBP_AL },
{ 0xA926, 0xA92D, LBP_CM },
@ -1785,7 +1794,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xFE17, 0xFE17, LBP_OP },
{ 0xFE18, 0xFE18, LBP_CL },
{ 0xFE19, 0xFE19, LBP_IN },
{ 0xFE20, 0xFE2D, LBP_CM },
{ 0xFE20, 0xFE2F, LBP_CM },
{ 0xFE30, 0xFE34, LBP_ID },
{ 0xFE35, 0xFE35, LBP_OP },
{ 0xFE36, 0xFE36, LBP_CL },
@ -1929,21 +1938,31 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x111C5, 0x111C6, LBP_BA },
{ 0x111C7, 0x111C7, LBP_AL },
{ 0x111C8, 0x111C8, LBP_BA },
{ 0x111C9, 0x111C9, LBP_AL },
{ 0x111CA, 0x111CC, LBP_CM },
{ 0x111CD, 0x111CD, LBP_AL },
{ 0x111D0, 0x111D9, LBP_NU },
{ 0x111DA, 0x1122B, LBP_AL },
{ 0x111DA, 0x111DA, LBP_AL },
{ 0x111DB, 0x111DB, LBP_BB },
{ 0x111DC, 0x111DC, LBP_AL },
{ 0x111DD, 0x111DF, LBP_BA },
{ 0x111E1, 0x1122B, LBP_AL },
{ 0x1122C, 0x11237, LBP_CM },
{ 0x11238, 0x11239, LBP_BA },
{ 0x1123A, 0x1123A, LBP_AL },
{ 0x1123B, 0x1123C, LBP_BA },
{ 0x1123D, 0x112DE, LBP_AL },
{ 0x1123D, 0x112A8, LBP_AL },
{ 0x112A9, 0x112A9, LBP_BA },
{ 0x112B0, 0x112DE, LBP_AL },
{ 0x112DF, 0x112EA, LBP_CM },
{ 0x112F0, 0x112F9, LBP_NU },
{ 0x11301, 0x11303, LBP_CM },
{ 0x11300, 0x11303, LBP_CM },
{ 0x11305, 0x11339, LBP_AL },
{ 0x1133C, 0x1133C, LBP_CM },
{ 0x1133D, 0x1133D, LBP_AL },
{ 0x1133E, 0x11357, LBP_CM },
{ 0x1133E, 0x1134D, LBP_CM },
{ 0x11350, 0x11350, LBP_AL },
{ 0x11357, 0x11357, LBP_CM },
{ 0x1135D, 0x11361, LBP_AL },
{ 0x11362, 0x11374, LBP_CM },
{ 0x11480, 0x114AF, LBP_AL },
@ -1956,7 +1975,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x115C2, 0x115C3, LBP_BA },
{ 0x115C4, 0x115C5, LBP_EX },
{ 0x115C6, 0x115C8, LBP_AL },
{ 0x115C9, 0x115C9, LBP_BA },
{ 0x115C9, 0x115D7, LBP_BA },
{ 0x115D8, 0x115DB, LBP_AL },
{ 0x115DC, 0x115DD, LBP_CM },
{ 0x11600, 0x1162F, LBP_AL },
{ 0x11630, 0x11640, LBP_CM },
{ 0x11641, 0x11642, LBP_BA },
@ -1965,11 +1986,16 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x11680, 0x116AA, LBP_AL },
{ 0x116AB, 0x116B7, LBP_CM },
{ 0x116C0, 0x116C9, LBP_NU },
{ 0x11700, 0x1172B, LBP_SA },
{ 0x11730, 0x11739, LBP_NU },
{ 0x1173A, 0x1173B, LBP_SA },
{ 0x1173C, 0x1173E, LBP_BA },
{ 0x1173F, 0x1173F, LBP_SA },
{ 0x118A0, 0x118DF, LBP_AL },
{ 0x118E0, 0x118E9, LBP_NU },
{ 0x118EA, 0x1246E, LBP_AL },
{ 0x12470, 0x12474, LBP_BA },
{ 0x13000, 0x13257, LBP_AL },
{ 0x12480, 0x13257, LBP_AL },
{ 0x13258, 0x1325A, LBP_OP },
{ 0x1325B, 0x1325D, LBP_CL },
{ 0x1325E, 0x13281, LBP_AL },
@ -1982,7 +2008,10 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1328A, 0x13378, LBP_AL },
{ 0x13379, 0x13379, LBP_OP },
{ 0x1337A, 0x1337B, LBP_CL },
{ 0x1337C, 0x16A5E, LBP_AL },
{ 0x1337C, 0x145CD, LBP_AL },
{ 0x145CE, 0x145CE, LBP_OP },
{ 0x145CF, 0x145CF, LBP_CL },
{ 0x145D0, 0x16A5E, LBP_AL },
{ 0x16A60, 0x16A69, LBP_NU },
{ 0x16A6E, 0x16A6F, LBP_BA },
{ 0x16AD0, 0x16AED, LBP_AL },
@ -2015,6 +2044,18 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1D242, 0x1D244, LBP_CM },
{ 0x1D245, 0x1D7CB, LBP_AL },
{ 0x1D7CE, 0x1D7FF, LBP_NU },
{ 0x1D800, 0x1D9FF, LBP_AL },
{ 0x1DA00, 0x1DA36, LBP_CM },
{ 0x1DA37, 0x1DA3A, LBP_AL },
{ 0x1DA3B, 0x1DA6C, LBP_CM },
{ 0x1DA6D, 0x1DA74, LBP_AL },
{ 0x1DA75, 0x1DA75, LBP_CM },
{ 0x1DA76, 0x1DA83, LBP_AL },
{ 0x1DA84, 0x1DA84, LBP_CM },
{ 0x1DA85, 0x1DA86, LBP_AL },
{ 0x1DA87, 0x1DA8A, LBP_BA },
{ 0x1DA8B, 0x1DA8B, LBP_AL },
{ 0x1DA9B, 0x1DAAF, LBP_CM },
{ 0x1E800, 0x1E8CF, LBP_AL },
{ 0x1E8D0, 0x1E8D6, LBP_CM },
{ 0x1EE00, 0x1EEF1, LBP_AL },
@ -2031,7 +2072,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1F3B5, 0x1F3B6, LBP_AL },
{ 0x1F3B7, 0x1F3BB, LBP_ID },
{ 0x1F3BC, 0x1F3BC, LBP_AL },
{ 0x1F3BD, 0x1F49F, LBP_ID },
{ 0x1F3BD, 0x1F3FA, LBP_ID },
{ 0x1F3FB, 0x1F3FF, LBP_AL },
{ 0x1F400, 0x1F49F, LBP_ID },
{ 0x1F4A0, 0x1F4A0, LBP_AL },
{ 0x1F4A1, 0x1F4A1, LBP_ID },
{ 0x1F4A2, 0x1F4A2, LBP_AL },
@ -2041,7 +2084,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1F4AF, 0x1F4AF, LBP_AL },
{ 0x1F4B0, 0x1F4B0, LBP_ID },
{ 0x1F4B1, 0x1F4B2, LBP_AL },
{ 0x1F4B3, 0x1F4FE, LBP_ID },
{ 0x1F4B3, 0x1F4FF, LBP_ID },
{ 0x1F500, 0x1F506, LBP_AL },
{ 0x1F507, 0x1F516, LBP_ID },
{ 0x1F517, 0x1F524, LBP_AL },
@ -2058,7 +2101,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1F67C, 0x1F67F, LBP_AL },
{ 0x1F680, 0x1F6F3, LBP_ID },
{ 0x1F700, 0x1F8AD, LBP_AL },
{ 0x20000, 0x3FFFD, LBP_ID },
{ 0x1F910, 0x3FFFD, LBP_ID },
{ 0xE0001, 0xE01EF, LBP_CM },
{ 0xF0000, 0x10FFFD, LBP_XX },
{ 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined }

View File

@ -43,7 +43,7 @@
*
* Definition of language-specific data.
*
* @version 2.2, 2012/10/06
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -45,7 +45,7 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the line breaking algorithm.
*
* @version 2.6, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
* @author Petr Filipsky
*/

View File

@ -29,7 +29,7 @@
*
* Definition of basic libunibreak information.
*
* @version 1.0, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -22,20 +22,6 @@
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
* <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 33, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
@ -43,15 +29,13 @@
*
* Header file for common definitions in the libunibreak library.
*
* @version 1.0, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/
#ifndef UNIBREAKBASE_H
#define UNIBREAKBASE_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -29,7 +29,7 @@
*
* Definition of utility functions used by the libunibreak library.
*
* @version 1.0, 2015/04/18
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/

View File

@ -22,20 +22,6 @@
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
* <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 33, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-33.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
@ -43,13 +29,14 @@
*
* Header file for private definitions in the libunibreak library.
*
* @version 1.1, 2015/04/19
* @version 3.0, 2015/05/10
* @author Wu Yongwei
*/
#ifndef UNIBREAKDEF_H
#define UNIBREAKDEF_H
#include <stddef.h>
#include "unibreakbase.h"
#ifdef __cplusplus

View File

@ -30,9 +30,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 25, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
* This library has been updated according to Revision 27, for
* Unicode 8.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-27.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -44,7 +44,7 @@
* Implementation of the word breaking algorithm as described in Unicode
* Standard Annex 29.
*
* @version 2.6, 2015/04/18
* @version 3.1, 2015/05/18
* @author Tom Hacohen
*/
@ -127,6 +127,7 @@ static void set_brks_to(
{
utf32_t ch;
ch = get_next_char(s, len, &posNext);
(void)ch;
assert(ch != EOS);
for (; posStart < posNext - 1; ++posStart)
brks[posStart] = WORDBREAK_INSIDEACHAR;

View File

@ -43,7 +43,7 @@
*
* Header file for the word breaking (segmentation) algorithm.
*
* @version 2.5, 2015/04/18
* @version 3.0, 2015/05/10
* @author Tom Hacohen
*/

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# WordBreakProperty-7.0.0.txt
# Date: 2014-02-19, 15:51:39 GMT [MD]
# WordBreakProperty-8.0.0.txt
# Date: 2015-02-14, 10:26:15 GMT [MD]
*/
#include "wordbreakdef.h"
@ -120,8 +120,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x0829, 0x082D, WBP_Extend},
{0x0840, 0x0858, WBP_ALetter},
{0x0859, 0x085B, WBP_Extend},
{0x08A0, 0x08B2, WBP_ALetter},
{0x08E4, 0x0902, WBP_Extend},
{0x08A0, 0x08B4, WBP_ALetter},
{0x08E3, 0x0902, WBP_Extend},
{0x0903, 0x0903, WBP_Extend},
{0x0904, 0x0939, WBP_ALetter},
{0x093A, 0x093A, WBP_Extend},
@ -203,6 +203,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x0AE0, 0x0AE1, WBP_ALetter},
{0x0AE2, 0x0AE3, WBP_Extend},
{0x0AE6, 0x0AEF, WBP_Numeric},
{0x0AF9, 0x0AF9, WBP_ALetter},
{0x0B01, 0x0B01, WBP_Extend},
{0x0B02, 0x0B03, WBP_Extend},
{0x0B05, 0x0B0C, WBP_ALetter},
@ -259,7 +260,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x0C46, 0x0C48, WBP_Extend},
{0x0C4A, 0x0C4D, WBP_Extend},
{0x0C55, 0x0C56, WBP_Extend},
{0x0C58, 0x0C59, WBP_ALetter},
{0x0C58, 0x0C5A, WBP_ALetter},
{0x0C60, 0x0C61, WBP_ALetter},
{0x0C62, 0x0C63, WBP_Extend},
{0x0C66, 0x0C6F, WBP_Numeric},
@ -298,7 +299,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x0D4D, 0x0D4D, WBP_Extend},
{0x0D4E, 0x0D4E, WBP_ALetter},
{0x0D57, 0x0D57, WBP_Extend},
{0x0D60, 0x0D61, WBP_ALetter},
{0x0D5F, 0x0D61, WBP_ALetter},
{0x0D62, 0x0D63, WBP_Extend},
{0x0D66, 0x0D6F, WBP_Numeric},
{0x0D7A, 0x0D7F, WBP_ALetter},
@ -388,7 +389,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x1318, 0x135A, WBP_ALetter},
{0x135D, 0x135F, WBP_Extend},
{0x1380, 0x138F, WBP_ALetter},
{0x13A0, 0x13F4, WBP_ALetter},
{0x13A0, 0x13F5, WBP_ALetter},
{0x13F8, 0x13FD, WBP_ALetter},
{0x1401, 0x166C, WBP_ALetter},
{0x166F, 0x167F, WBP_ALetter},
{0x1681, 0x169A, WBP_ALetter},
@ -434,8 +436,6 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x1933, 0x1938, WBP_Extend},
{0x1939, 0x193B, WBP_Extend},
{0x1946, 0x194F, WBP_Numeric},
{0x19B0, 0x19C0, WBP_Extend},
{0x19C8, 0x19C9, WBP_Extend},
{0x19D0, 0x19D9, WBP_Numeric},
{0x1A00, 0x1A16, WBP_ALetter},
{0x1A17, 0x1A18, WBP_Extend},
@ -641,7 +641,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0xA67F, 0xA67F, WBP_ALetter},
{0xA680, 0xA69B, WBP_ALetter},
{0xA69C, 0xA69D, WBP_ALetter},
{0xA69F, 0xA69F, WBP_Extend},
{0xA69E, 0xA69F, WBP_Extend},
{0xA6A0, 0xA6E5, WBP_ALetter},
{0xA6E6, 0xA6EF, WBP_ALetter},
{0xA6F0, 0xA6F1, WBP_Extend},
@ -651,8 +651,9 @@ static struct WordBreakProperties wb_prop_default[] = {
{0xA771, 0xA787, WBP_ALetter},
{0xA788, 0xA788, WBP_ALetter},
{0xA78B, 0xA78E, WBP_ALetter},
{0xA78F, 0xA78F, WBP_ALetter},
{0xA790, 0xA7AD, WBP_ALetter},
{0xA7B0, 0xA7B1, WBP_ALetter},
{0xA7B0, 0xA7B7, WBP_ALetter},
{0xA7F7, 0xA7F7, WBP_ALetter},
{0xA7F8, 0xA7F9, WBP_ALetter},
{0xA7FA, 0xA7FA, WBP_ALetter},
@ -675,6 +676,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0xA8E0, 0xA8F1, WBP_Extend},
{0xA8F2, 0xA8F7, WBP_ALetter},
{0xA8FB, 0xA8FB, WBP_ALetter},
{0xA8FD, 0xA8FD, WBP_ALetter},
{0xA900, 0xA909, WBP_Numeric},
{0xA90A, 0xA925, WBP_ALetter},
{0xA926, 0xA92D, WBP_Extend},
@ -730,7 +732,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0xAB28, 0xAB2E, WBP_ALetter},
{0xAB30, 0xAB5A, WBP_ALetter},
{0xAB5C, 0xAB5F, WBP_ALetter},
{0xAB64, 0xAB65, WBP_ALetter},
{0xAB60, 0xAB65, WBP_ALetter},
{0xAB70, 0xABBF, WBP_ALetter},
{0xABC0, 0xABE2, WBP_ALetter},
{0xABE3, 0xABE4, WBP_Extend},
{0xABE5, 0xABE5, WBP_Extend},
@ -763,7 +766,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0xFE10, 0xFE10, WBP_MidNum},
{0xFE13, 0xFE13, WBP_MidLetter},
{0xFE14, 0xFE14, WBP_MidNum},
{0xFE20, 0xFE2D, WBP_Extend},
{0xFE20, 0xFE2F, WBP_Extend},
{0xFE33, 0xFE34, WBP_ExtendNumLet},
{0xFE4D, 0xFE4F, WBP_ExtendNumLet},
{0xFE50, 0xFE50, WBP_MidNum},
@ -830,6 +833,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x1083F, 0x10855, WBP_ALetter},
{0x10860, 0x10876, WBP_ALetter},
{0x10880, 0x1089E, WBP_ALetter},
{0x108E0, 0x108F2, WBP_ALetter},
{0x108F4, 0x108F5, WBP_ALetter},
{0x10900, 0x10915, WBP_ALetter},
{0x10920, 0x10939, WBP_ALetter},
{0x10980, 0x109B7, WBP_ALetter},
@ -853,6 +858,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x10B60, 0x10B72, WBP_ALetter},
{0x10B80, 0x10B91, WBP_ALetter},
{0x10C00, 0x10C48, WBP_ALetter},
{0x10C80, 0x10CB2, WBP_ALetter},
{0x10CC0, 0x10CF2, WBP_ALetter},
{0x11000, 0x11000, WBP_Extend},
{0x11001, 0x11001, WBP_Extend},
{0x11002, 0x11002, WBP_Extend},
@ -885,8 +892,10 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x111B6, 0x111BE, WBP_Extend},
{0x111BF, 0x111C0, WBP_Extend},
{0x111C1, 0x111C4, WBP_ALetter},
{0x111CA, 0x111CC, WBP_Extend},
{0x111D0, 0x111D9, WBP_Numeric},
{0x111DA, 0x111DA, WBP_ALetter},
{0x111DC, 0x111DC, WBP_ALetter},
{0x11200, 0x11211, WBP_ALetter},
{0x11213, 0x1122B, WBP_ALetter},
{0x1122C, 0x1122E, WBP_Extend},
@ -895,12 +904,17 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x11234, 0x11234, WBP_Extend},
{0x11235, 0x11235, WBP_Extend},
{0x11236, 0x11237, WBP_Extend},
{0x11280, 0x11286, WBP_ALetter},
{0x11288, 0x11288, WBP_ALetter},
{0x1128A, 0x1128D, WBP_ALetter},
{0x1128F, 0x1129D, WBP_ALetter},
{0x1129F, 0x112A8, WBP_ALetter},
{0x112B0, 0x112DE, WBP_ALetter},
{0x112DF, 0x112DF, WBP_Extend},
{0x112E0, 0x112E2, WBP_Extend},
{0x112E3, 0x112EA, WBP_Extend},
{0x112F0, 0x112F9, WBP_Numeric},
{0x11301, 0x11301, WBP_Extend},
{0x11300, 0x11301, WBP_Extend},
{0x11302, 0x11303, WBP_Extend},
{0x11305, 0x1130C, WBP_ALetter},
{0x1130F, 0x11310, WBP_ALetter},
@ -915,6 +929,7 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x11341, 0x11344, WBP_Extend},
{0x11347, 0x11348, WBP_Extend},
{0x1134B, 0x1134D, WBP_Extend},
{0x11350, 0x11350, WBP_ALetter},
{0x11357, 0x11357, WBP_Extend},
{0x1135D, 0x11361, WBP_ALetter},
{0x11362, 0x11363, WBP_Extend},
@ -939,6 +954,8 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x115BC, 0x115BD, WBP_Extend},
{0x115BE, 0x115BE, WBP_Extend},
{0x115BF, 0x115C0, WBP_Extend},
{0x115D8, 0x115DB, WBP_ALetter},
{0x115DC, 0x115DD, WBP_Extend},
{0x11600, 0x1162F, WBP_ALetter},
{0x11630, 0x11632, WBP_Extend},
{0x11633, 0x1163A, WBP_Extend},
@ -957,13 +974,21 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x116B6, 0x116B6, WBP_Extend},
{0x116B7, 0x116B7, WBP_Extend},
{0x116C0, 0x116C9, WBP_Numeric},
{0x1171D, 0x1171F, WBP_Extend},
{0x11720, 0x11721, WBP_Extend},
{0x11722, 0x11725, WBP_Extend},
{0x11726, 0x11726, WBP_Extend},
{0x11727, 0x1172B, WBP_Extend},
{0x11730, 0x11739, WBP_Numeric},
{0x118A0, 0x118DF, WBP_ALetter},
{0x118E0, 0x118E9, WBP_Numeric},
{0x118FF, 0x118FF, WBP_ALetter},
{0x11AC0, 0x11AF8, WBP_ALetter},
{0x12000, 0x12398, WBP_ALetter},
{0x12000, 0x12399, WBP_ALetter},
{0x12400, 0x1246E, WBP_ALetter},
{0x12480, 0x12543, WBP_ALetter},
{0x13000, 0x1342E, WBP_ALetter},
{0x14400, 0x14646, WBP_ALetter},
{0x16800, 0x16A38, WBP_ALetter},
{0x16A40, 0x16A5E, WBP_ALetter},
{0x16A60, 0x16A69, WBP_Numeric},
@ -1026,6 +1051,12 @@ static struct WordBreakProperties wb_prop_default[] = {
{0x1D7AA, 0x1D7C2, WBP_ALetter},
{0x1D7C4, 0x1D7CB, WBP_ALetter},
{0x1D7CE, 0x1D7FF, WBP_Numeric},
{0x1DA00, 0x1DA36, WBP_Extend},
{0x1DA3B, 0x1DA6C, WBP_Extend},
{0x1DA75, 0x1DA75, WBP_Extend},
{0x1DA84, 0x1DA84, WBP_Extend},
{0x1DA9B, 0x1DA9F, WBP_Extend},
{0x1DAA1, 0x1DAAF, WBP_Extend},
{0x1E800, 0x1E8C4, WBP_ALetter},
{0x1E8D0, 0x1E8D6, WBP_Extend},
{0x1EE00, 0x1EE03, WBP_ALetter},

View File

@ -32,6 +32,7 @@
*
* This library has been updated according to Revision 25, for
* Unicode 7.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-25.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -43,7 +44,7 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the word breaking algorithm.
*
* @version 2.6, 2015/04/19
* @version 3.0, 2015/05/10
* @author Tom Hacohen
*/