diff --git a/ChangeLog b/ChangeLog index 74e3bffc13..44cbaaa1c1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2013-01-11 Tom Hacohen (TAsn) + + * Static libs: Updated liblinebreak to libunibreak's latest version. + 2013-01-11 Cedric Bail * Fix not up to date clip cache for Evas_Object_Text. diff --git a/NEWS b/NEWS index 1a5b28706e..2186a38e73 100644 --- a/NEWS +++ b/NEWS @@ -74,6 +74,7 @@ Improvements: * use Eina_File in webp, gif, tiff, png and eet loader * Eina.h includes eina_alloca.h/alloca.h to define alloca() * Improved eina share del speed. + * Upgrade liblinebreak to latest version of libunibreak. Fixes: * Fix PPC (big endian) image codec bug. diff --git a/src/Makefile.am b/src/Makefile.am index c530955d7b..e22f4c004c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -124,5 +124,5 @@ clean-local: rm -rf modules/emotion/xine/*.gcno rm -rf modules/emotion/gstreamer/*.gcno rm -rf modules/emotion/generic/*.gcno - rm -rf static_libs/liblinebreak/*.gcno + rm -rf static_libs/libunibreak/*.gcno rm -rf static_libs/lz4/*.gcno diff --git a/src/Makefile_Evas.am b/src/Makefile_Evas.am index d1d3ba35b8..6ca2703442 100644 --- a/src/Makefile_Evas.am +++ b/src/Makefile_Evas.am @@ -19,18 +19,18 @@ lib/evas/include/evas_blend_ops.h # Linebreak noinst_HEADERS += \ -static_libs/liblinebreak/linebreak.h \ -static_libs/liblinebreak/linebreakdef.h \ -static_libs/liblinebreak/wordbreakdef.h \ -static_libs/liblinebreak/wordbreak.h +static_libs/libunibreak/linebreak.h \ +static_libs/libunibreak/linebreakdef.h \ +static_libs/libunibreak/wordbreakdef.h \ +static_libs/libunibreak/wordbreak.h # Linebreak lib_evas_libevas_la_SOURCES = \ -static_libs/liblinebreak/linebreak.c \ -static_libs/liblinebreak/linebreakdata.c \ -static_libs/liblinebreak/linebreakdef.c \ -static_libs/liblinebreak/wordbreak.c \ -static_libs/liblinebreak/wordbreakdata.x +static_libs/libunibreak/linebreak.c \ +static_libs/libunibreak/linebreakdata.c \ +static_libs/libunibreak/linebreakdef.c \ +static_libs/libunibreak/wordbreak.c \ +static_libs/libunibreak/wordbreakdata.c # Main lib_evas_libevas_la_SOURCES += \ @@ -194,7 +194,7 @@ lib_evas_libevas_la_CPPFLAGS = \ -I$(top_srcdir)/src/lib/evas/include \ -I$(top_srcdir)/src/lib/evas/cserve2 \ -I$(top_srcdir)/src/lib/evas/file \ --I$(top_srcdir)/src/static_libs/liblinebreak \ +-I$(top_srcdir)/src/static_libs/libunibreak \ -I$(top_srcdir)/src/lib/evas/common \ -I$(top_srcdir)/src/lib/eina \ -I$(top_builddir)/src/lib/eina \ @@ -239,11 +239,11 @@ lib_evas_libevas_la_LDFLAGS = @EFL_LTLIBRARY_FLAGS@ # Linebreak EXTRA_DIST += \ -static_libs/liblinebreak/LICENCE \ -static_libs/liblinebreak/AUTHORS \ -static_libs/liblinebreak/NEWS \ -static_libs/liblinebreak/README \ -static_libs/liblinebreak/ChangeLog +static_libs/libunibreak/LICENCE \ +static_libs/libunibreak/AUTHORS \ +static_libs/libunibreak/NEWS \ +static_libs/libunibreak/README \ +static_libs/libunibreak/ChangeLog # Engines diff --git a/src/static_libs/liblinebreak/AUTHORS b/src/static_libs/libunibreak/AUTHORS similarity index 100% rename from src/static_libs/liblinebreak/AUTHORS rename to src/static_libs/libunibreak/AUTHORS diff --git a/src/static_libs/liblinebreak/ChangeLog b/src/static_libs/libunibreak/ChangeLog similarity index 65% rename from src/static_libs/liblinebreak/ChangeLog rename to src/static_libs/libunibreak/ChangeLog index 8255c10b14..7d5e3b6391 100644 --- a/src/static_libs/liblinebreak/ChangeLog +++ b/src/static_libs/libunibreak/ChangeLog @@ -1,3 +1,194 @@ +2012-10-06 Wu Yongwei + + Update files according to UAX #14-30, for Unicode 6.2.0. + * README: Update the reference to UAX #14-30. + * src/linebreak.c (baTable): Update for the new class `RI'. + * src/linebreak.h (LINEBREAK_VERSION): Set to 0x0202. + * src/linebreakdef.h (LBP_RI): New enumerator for the new class `RI' + as defined in UAX #14-30. + * src/linebreakdata.c: Regenerate from LineBreak-6.2.0.txt. + +2012-10-06 Wu Yongwei + + * src/linebreak.c (baTable): Correct the issue that one column was + missing in the table. + +2012-10-06 Wu Yongwei + + * README: Update to reflect the recent changes. + +2012-10-06 Wu Yongwei + + Make `make linebreakdata' and `make wordbreakdata' work again. + * src/Makefile.am (EXTRA_DIST): Add missing `filter_dup.c'. + (linebreakdata): New make target. + (wordbreakdata): New make target. + +2012-10-06 Wu Yongwei + + Make `make dist' work again after the directory adjustment. + * Doxyfile (INPUT): Change to `src'. + (FILE_PATTERNS): Set to `*.c *.h'. + * Makefile.am (EXTRA_DIST): Move content from src/Makefile.am. + (doc): Move target from src/Makefile.am. + * src/Makefile.am (EXTRA_DIST): Move partial content to Makefile.am. + (doc): Move target to Makefile.am. + +2012-09-16 Wu Yongwei + + Update files according to UAX #14-28, for Unicode 6.1.0. + * README: Update the reference to UAX #14-28. + * src/linebreak.c (baTable): Update for the new class `HL'. + (resolve_lb_class): Resolve the new class `CJ' to `ID' (simplified). + * src/linebreakdef.h (LBP_HL): New enumerator for the new class `HL' + as defined in UAX #14-28. + (LBP_CJ): New enumerator for the new class `CJ' as defined in + UAX #14-28. + * src/linebreakdata.c: Regenerate from LineBreak-6.1.0.txt. + +2012-08-13 Tom Hacohen + + Move source files to under src. + * Makefile.am: Split from original Makefile.am. + (SUBDIRS): Add `src'. + * configure.ac (AC_CONFIG_SRCDIR): Add `src/' before `linebreak.c'. + (AC_CONFIG_FILES): Add `src/Makefile'. + * src/LineBreak1.sed: Move from LineBreak1.sed. + * src/LineBreak2.sed: Move from LineBreak2.sed. + * src/Makefile.am: Split from Makefile.am + * src/Makefile.gcc: Move from Makefile.gcc. + * src/Makefile.msvc: Move from Makefile.msvc. + * src/filter_dup.c: Move from filter_dup.c. + * src/linebreak.c: Move from linebreak.c. + * src/linebreak.h: Move from linebreak.h. + * src/linebreakdata.c: Move from linebreakdata.c. + * src/linebreakdata1.tmpl: Move from linebreakdata1.tmpl. + * src/linebreakdata2.tmpl: Move from linebreakdata2.tmpl. + * src/linebreakdata3.tmpl: Move from linebreakdata3.tmpl. + * src/linebreakdef.c: Move from linebreakdef.c. + * src/linebreakdef.h: Move from linebreakdef.h. + * src/sort_numeric_hex.py: Move from sort_numeric_hex.py. + * src/wordbreak.c: Move from wordbreak.c. + * src/wordbreak.h: Move from wordbreak.h. + * src/wordbreakdata.c: Move from wordbreakdata.c. + * src/wordbreakdata1.tmpl: Move from wordbreakdata1.tmpl. + * src/wordbreakdata2.tmpl: Move from wordbreakdata2.tmpl. + * src/wordbreakdef.h: Move from wordbreakdef.h. + +2012-08-12 Wu Yongwei + + * README: Change the home URL to github; remove $Id$; eliminate + non-ASCII characters. + +2012-08-11 Wu Yongwei + + * configure.ac (AC_INIT): Change the library name and version to + `libunibreak' and `1.0'. + (AC_PROG_LN_S): New macro. + (AC_OUTPUT): Change to `libunibreak.pc'. + * Doxyfile: (PROJECT_NAME): Change to `libunibreak'. + (PROJECT_NUMBER): Change to `1.0'. + * LICENCE: Add copyright information about Tom Hacohen. + * Makefile.am (lib_LTLIBRARIES): Change to `libunibreak.la'. + (pkgconfig_DATA): Change to `libunibreak.la'. + (libunibreak_la_LDFLAGS): Reset the version to `1:0'. + (install-exec-hook): Replace the static library liblinebreak.a with + a symlink to libunibreak.a. + * Makefile.msvc: Change the library name to `libunibreak', and the + output library to `unibreak.lib'. + * NEW: Add information about libunibreak 1.0. + * README: Change the library name, and add information about word + break. + +2012-02-04 Wu Yongwei + + * wordbreak.h (WORDBREAK_INSIDEACHAR): Change from + WORDBREAK_INSIDECHAR. + * wordbreak.c (set_brks_to): Change `WORDBREAK_INSIDECHAR' to + `WORDBREAK_INSIDEACHAR'. + +2012-01-19 Wu Yongwei + + * wordbreak.h: Change angle brackets to quotation marks (which + caused build errors). + +2012-01-19 Wu Yongwei + + * Makefile.gcc (CFILES): Add wordbreak.c. + (WordBreakProperty.txt): New target. + (wordbreakdata): New target. + +2012-01-19 Wu Yongwei + + * Makefile.am (liblinebreak_la_SOURCES): Remove wordbreakdata.c. + (EXTRA_DIST): Add wordbreakdata.c, wordbreakdata1.tmpl, and + wordbreakdata2.tmpl. + +2012-01-19 Wu Yongwei + + * Makefile.msvc: Add wordbreak files. + +2012-01-18 Tom Hacohen + + Add word breaking support. + * AUTHORS: Add `Tom Hacohen'. + * Makefile.am (include_HEADERS): Add header files for word breaking. + (liblinebreak_la_SOURCES): Add source files for word breaking. + (sort_numeric_hex.py): Add `sort_numeric_hex.py'. + (distclean-local): Clean also `WordBreakData.txt'. + (WordBreakProperty.txt): New target. + (wordbreakdata): New target. + * sort_numeric_hex.py: New file. + * wordbreak.c: New file. + * wordbreak.h: New file. + * wordbreakdef.h: New file. + * wordbreakdata.c: New file. + * wordbreakdata1.tmpl: New file. + * wordbreakdata2.tmpl: New file. + +2011-05-17 Wu Yongwei + + Add support for pkg-config (thanks to Tom Hacohen). + * liblinebreak.pc.in: New file. + * configure.ac (AC_OUTPUT): Add `liblinebreak.pc'. + * Makefile.am (pkgconfig_DATA): Set to `liblinebreak.pc'. + (pkgconfigdir): Set to `$(libdir)/pkgconfig'. + +2011-05-07 Wu Yongwei + + * README: Update the reference to UAX #14-26, for Unicode 6.0.0. + +2011-05-07 Wu Yongwei + + * configure.ac (AC_INIT): Increase the version to 2.1. + * Makefile.am (liblinebreak_la_LDFLAGS): Set the version-info to + `2:1'. + +2011-05-07 Wu Yongwei + + * LICENCE: Update the copyright year. + +2011-05-07 Wu Yongwei + + Update for the 2.1 release. + * Doxyfile (PROJECT_NUMBER): Set to `2.1'. + * NEWS: Add information about the 2.1 release. + * linebreak.h (LINEBREAK_VERSION): Set to `0x0201'. + * linebreak.h: Update comments. + * linebreak.c: Ditto. + * linebreakdef.h: Ditto. + * linebreakdef.c: Ditto. + +2011-05-07 Wu Yongwei + + * linebreakdata.c: Regenerate from LineBreak-6.0.0.txt. + +2011-05-07 Wu Yongwei + + * linebreak.c (set_linebreaks): Fix the assertion failure when + U+FFFC (OBJECT REPLACEMENT CHARACTER) appears at the beginning of a + line (thanks to Tom Hacohen). + 2010-01-03 Wu Yongwei * LICENCE: Update the copyright year. diff --git a/src/static_libs/liblinebreak/LICENCE b/src/static_libs/libunibreak/LICENCE similarity index 86% rename from src/static_libs/liblinebreak/LICENCE rename to src/static_libs/libunibreak/LICENCE index 9b9984f17f..ceec155511 100644 --- a/src/static_libs/liblinebreak/LICENCE +++ b/src/static_libs/libunibreak/LICENCE @@ -1,4 +1,5 @@ -Copyright (C) 2008-2010 Wu Yongwei +Copyright (C) 2008-2012 Wu Yongwei +Copyright (C) 2012 Tom Hacohen This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages diff --git a/src/static_libs/liblinebreak/NEWS b/src/static_libs/libunibreak/NEWS similarity index 73% rename from src/static_libs/liblinebreak/NEWS rename to src/static_libs/libunibreak/NEWS index 4c555219ac..581cab7cb8 100644 --- a/src/static_libs/liblinebreak/NEWS +++ b/src/static_libs/libunibreak/NEWS @@ -1,11 +1,23 @@ -New in 2.0 +New in libunibreak 1.0 + +- Add word breaking support +- Change the library name to "libunibreak", while keeping maximum compatibility +- Add pkg-config support + +New in liblinebreak 2.1 + +- Update the data according to LineBreak-6.0.0.txt +- Fix the bug that an assertion in code can fail if U+FFFC is + encountered at the beginning of a line + +New in liblinebreak 2.0 - Update the algorithm and data according to UAX #14-24 and LineBreak-5.2.0.txt - Rename some functions to reduce namespace pollution - Make Doxygen documentation better -New in 1.2 +New in liblinebreak 1.2 - Fix the bug that an assertion in code can fail if an invalid UTF-8 or UTF-16 sequence is encountered near the end of input @@ -14,7 +26,7 @@ New in 1.2 used as apostrophe - Make Doxygen documentation better -New in 1.1 +New in liblinebreak 1.1 - Make get_lb_prop_lang static and not an exported symbol - Define is_line_breakable to alias to is_breakable @@ -23,7 +35,7 @@ New in 1.1 linebreakdef.h - Add the function documentation comments to the header files -New in 1.0 +New in liblinebreak 1.0 - Update the line breaking data according to UAX #14-22 and LineBreak-5.1.0.txt diff --git a/src/static_libs/liblinebreak/README b/src/static_libs/libunibreak/README similarity index 65% rename from src/static_libs/liblinebreak/README rename to src/static_libs/libunibreak/README index 9d236519da..39b41570f3 100644 --- a/src/static_libs/liblinebreak/README +++ b/src/static_libs/libunibreak/README @@ -1,20 +1,17 @@ -NOTICE: This is the original version, that was adapted a bit (mostly - build related) in order to work nicely with Evas. - - - L I B L I N E B R E A K - ======================= + L I B U N I B R E A K + ===================== Overview -------- -This is the README file for liblinebreak, an implementation of the line -breaking algorithm as described in Unicode 5.2.0 Standard Annex 14, -Revision 24, available at - +This is the README file for libunibreak, an implementation of the line +breaking and word breaking algorithms as described in Unicode +Standard Annex 14 and Unicode Standard Annex 30, available at + + Check this URL for up-to-date information: - + Licence @@ -49,9 +46,12 @@ There are three ways to build the library: - type `make doc' to generate the doxygen documentation; or - type `make linebreakdata' to regenerate linebreakdata.c from LineBreak.txt. + - type `make wordbreakdata' to regenerate wordbreakdata.c from + WordBreakProperty.txt. 2) On systems where GCC and Binutils are supported, one can type + cd src cp -p Makefile.gcc Makefile make @@ -62,25 +62,27 @@ There are three ways to build the library: - type `make doc' to generate the doxygen documentation; or - type `make linebreakdata' to regenerate linebreakdata.c from LineBreak.txt. + - type `make wordbreakdata' to regenerate wordbreakdata.c from + WordBreakProperty.txt. 3) On Windows, apart from using method 1 (Cygwin/MSYS) and method 2 (MinGW), MSVC can also be used. Type + cd src nmake -f Makefile.msvc to build the static library. By default the debug release is built. To build the release version - nmake -f Makefile.msvc CFG="linebreak - Win32 Release" + nmake -f Makefile.msvc CFG="libunibreak - Win32 Release" Documentation ------------- -Check the generated document doc/html/linebreak_8h.html for the public +Check the generated document doc/html/linebreak_8h.html and +doc/html/wordbreak_8h.html in the downloaded file for the public interfaces exposed to applications. -$Id: README,v 1.6 2009/11/29 08:09:13 adah Exp $ - vim:autoindent:expandtab:formatoptions=tcqlmn:textwidth=72: diff --git a/src/static_libs/liblinebreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c similarity index 77% rename from src/static_libs/liblinebreak/linebreak.c rename to src/static_libs/libunibreak/linebreak.c index f9ff9a1f14..c1ea405883 100644 --- a/src/static_libs/liblinebreak/linebreak.c +++ b/src/static_libs/libunibreak/linebreak.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2010 Wu Yongwei + * Copyright (C) 2008-2012 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 24, for - * Unicode 5.2.0: - * + * This library has been updated according to Revision 30, for + * Unicode 6.2.0: + * * * The Unicode Terms of Use are available at * @@ -44,7 +44,7 @@ * Implementation of the line breaking algorithm as described in Unicode * Standard Annex 14. * - * @version 2.0, 2010/01/03 + * @version 2.3, 2012/10/06 * @author Wu Yongwei */ @@ -79,144 +79,183 @@ enum BreakAction /** * Break action pair table. This is a direct mapping of Table 2 of - * Unicode Standard Annex 14, Revision 24. + * Unicode Standard Annex 14, Revision 30. */ -static enum BreakAction baTable[LBP_JT][LBP_JT] = { +static enum BreakAction baTable[LBP_RI][LBP_RI] = { { /* OP */ PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK, - PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK }, + PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, + CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, + PRH_BRK }, { /* CL */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* CP */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* QU */ PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + IND_BRK }, { /* GL */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + IND_BRK }, { /* NS */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* EX */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* SY */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* IS */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* PR */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, + IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK }, { /* PO */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* NU */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* AL */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, + { /* HL */ + IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* ID */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* IN */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* HY */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* BA */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* BB */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + IND_BRK }, { /* B2 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* ZW */ DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK, - DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* CM */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK }, { /* WJ */ IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, - IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK }, + IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + IND_BRK }, { /* H2 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, + DIR_BRK }, { /* H3 */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, + DIR_BRK }, { /* JL */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, + DIR_BRK }, { /* JV */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK }, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, + DIR_BRK }, { /* JT */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, - PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK } + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, + DIR_BRK }, + { /* RI */ + DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, + IND_BRK }, }; /** @@ -379,7 +418,15 @@ static enum LineBreakClass resolve_lb_class( { return LBP_ID; } - /* Fall through */ + else + { + return LBP_AL; + } + case LBP_CJ: + /* Simplified for `normal' line breaking. See + * + * for details. */ + return LBP_ID; case LBP_SA: case LBP_SG: case LBP_XX: @@ -609,6 +656,9 @@ nextline: lbcNew = resolve_lb_class(lbcNew, lang); + /* TODO: LB21a, as introduced by Revision 28 of UAX#14, is not + * yet implemented below. */ + assert(lbcCur <= LBP_JT); assert(lbcNew <= LBP_JT); switch (baTable[lbcCur - 1][lbcNew - 1]) diff --git a/src/static_libs/liblinebreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h similarity index 89% rename from src/static_libs/liblinebreak/linebreak.h rename to src/static_libs/libunibreak/linebreak.h index abc1ae9e80..288ef1b352 100644 --- a/src/static_libs/liblinebreak/linebreak.h +++ b/src/static_libs/libunibreak/linebreak.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2010 Wu Yongwei + * Copyright (C) 2008-2012 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 24, for - * Unicode 5.2.0: - * + * This library has been updated according to Revision 30, for + * Unicode 6.2.0: + * * * The Unicode Terms of Use are available at * @@ -43,7 +43,7 @@ * * Header file for the line breaking algorithm. * - * @version 2.0, 2010/01/03 + * @version 2.2, 2012/10/06 * @author Wu Yongwei */ @@ -56,7 +56,7 @@ extern "C" { #endif -#define LINEBREAK_VERSION 0x0200 /**< Version of the library linebreak */ +#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */ extern const int linebreak_version; #ifndef LINEBREAK_UTF_TYPES_DEFINED diff --git a/src/static_libs/liblinebreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c similarity index 91% rename from src/static_libs/liblinebreak/linebreakdata.c rename to src/static_libs/libunibreak/linebreakdata.c index 002147998b..cced7d40bb 100644 --- a/src/static_libs/liblinebreak/linebreakdata.c +++ b/src/static_libs/libunibreak/linebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# LineBreak-6.0.0.txt -# Date: 2010-08-18, 17:25:00 PDT [KW] +# LineBreak-6.2.0.txt +# Date: 2012-08-08, 19:26:00 GMT [KW] */ #include "linebreak.h" @@ -98,6 +98,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x048A, 0x0587, LBP_AL }, { 0x0589, 0x0589, LBP_IS }, { 0x058A, 0x058A, LBP_BA }, + { 0x058F, 0x058F, LBP_PR }, { 0x0591, 0x05BD, LBP_CM }, { 0x05BE, 0x05BE, LBP_BA }, { 0x05BF, 0x05BF, LBP_CM }, @@ -107,7 +108,8 @@ struct LineBreakProperties lb_prop_default[] = { { 0x05C4, 0x05C5, LBP_CM }, { 0x05C6, 0x05C6, LBP_EX }, { 0x05C7, 0x05C7, LBP_CM }, - { 0x05D0, 0x0608, LBP_AL }, + { 0x05D0, 0x05F2, LBP_HL }, + { 0x05F3, 0x0608, LBP_AL }, { 0x0609, 0x060B, LBP_PO }, { 0x060C, 0x060D, LBP_IS }, { 0x060E, 0x060F, LBP_AL }, @@ -155,8 +157,8 @@ struct LineBreakProperties lb_prop_default[] = { { 0x0829, 0x082D, LBP_CM }, { 0x0830, 0x0858, LBP_AL }, { 0x0859, 0x085B, LBP_CM }, - { 0x085E, 0x085E, LBP_AL }, - { 0x0900, 0x0903, LBP_CM }, + { 0x085E, 0x08AC, LBP_AL }, + { 0x08E4, 0x0903, LBP_CM }, { 0x0904, 0x0939, LBP_AL }, { 0x093A, 0x093C, LBP_CM }, { 0x093D, 0x093D, LBP_AL }, @@ -199,6 +201,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x0AD0, 0x0AE1, LBP_AL }, { 0x0AE2, 0x0AE3, LBP_CM }, { 0x0AE6, 0x0AEF, LBP_NU }, + { 0x0AF0, 0x0AF0, LBP_AL }, { 0x0AF1, 0x0AF1, LBP_PR }, { 0x0B01, 0x0B03, LBP_CM }, { 0x0B05, 0x0B39, LBP_AL }, @@ -257,7 +260,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x0E5A, 0x0E5B, LBP_BA }, { 0x0E81, 0x0ECD, LBP_SA }, { 0x0ED0, 0x0ED9, LBP_NU }, - { 0x0EDC, 0x0EDD, LBP_SA }, + { 0x0EDC, 0x0EDF, LBP_SA }, { 0x0F00, 0x0F00, LBP_AL }, { 0x0F01, 0x0F04, LBP_BB }, { 0x0F05, 0x0F05, LBP_AL }, @@ -310,7 +313,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x1050, 0x108F, LBP_SA }, { 0x1090, 0x1099, LBP_NU }, { 0x109A, 0x109F, LBP_SA }, - { 0x10A0, 0x10FC, LBP_AL }, + { 0x10A0, 0x10FF, LBP_AL }, { 0x1100, 0x115F, LBP_JL }, { 0x1160, 0x11A7, LBP_JV }, { 0x11A8, 0x11FF, LBP_JT }, @@ -386,10 +389,10 @@ struct LineBreakProperties lb_prop_default[] = { { 0x1B74, 0x1B7C, LBP_AL }, { 0x1B80, 0x1B82, LBP_CM }, { 0x1B83, 0x1BA0, LBP_AL }, - { 0x1BA1, 0x1BAA, LBP_CM }, + { 0x1BA1, 0x1BAD, LBP_CM }, { 0x1BAE, 0x1BAF, LBP_AL }, { 0x1BB0, 0x1BB9, LBP_NU }, - { 0x1BC0, 0x1BE5, LBP_AL }, + { 0x1BBA, 0x1BE5, LBP_AL }, { 0x1BE6, 0x1BF3, LBP_CM }, { 0x1BFC, 0x1C23, LBP_AL }, { 0x1C24, 0x1C37, LBP_CM }, @@ -399,14 +402,15 @@ struct LineBreakProperties lb_prop_default[] = { { 0x1C50, 0x1C59, LBP_NU }, { 0x1C5A, 0x1C7D, LBP_AL }, { 0x1C7E, 0x1C7F, LBP_BA }, + { 0x1CC0, 0x1CC7, LBP_AL }, { 0x1CD0, 0x1CD2, LBP_CM }, { 0x1CD3, 0x1CD3, LBP_AL }, { 0x1CD4, 0x1CE8, LBP_CM }, { 0x1CE9, 0x1CEC, LBP_AL }, { 0x1CED, 0x1CED, LBP_CM }, { 0x1CEE, 0x1CF1, LBP_AL }, - { 0x1CF2, 0x1CF2, LBP_CM }, - { 0x1D00, 0x1DBF, LBP_AL }, + { 0x1CF2, 0x1CF4, LBP_CM }, + { 0x1CF5, 0x1DBF, LBP_AL }, { 0x1DC0, 0x1DFF, LBP_CM }, { 0x1E00, 0x1FFC, LBP_AL }, { 0x1FFD, 0x1FFD, LBP_BB }, @@ -469,7 +473,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x20A7, 0x20A7, LBP_PO }, { 0x20A8, 0x20B5, LBP_PR }, { 0x20B6, 0x20B6, LBP_PO }, - { 0x20B7, 0x20B9, LBP_PR }, + { 0x20B7, 0x20BA, LBP_PR }, { 0x20D0, 0x20F0, LBP_CM }, { 0x2100, 0x2102, LBP_AL }, { 0x2103, 0x2103, LBP_PO }, @@ -560,10 +564,14 @@ struct LineBreakProperties lb_prop_default[] = { { 0x22BF, 0x22BF, LBP_AI }, { 0x22C0, 0x2311, LBP_AL }, { 0x2312, 0x2312, LBP_AI }, - { 0x2313, 0x2328, LBP_AL }, + { 0x2313, 0x2319, LBP_AL }, + { 0x231A, 0x231B, LBP_ID }, + { 0x231C, 0x2328, LBP_AL }, { 0x2329, 0x2329, LBP_OP }, { 0x232A, 0x232A, LBP_CL }, - { 0x232B, 0x244A, LBP_AL }, + { 0x232B, 0x23EF, LBP_AL }, + { 0x23F0, 0x23F3, LBP_ID }, + { 0x2400, 0x244A, LBP_AL }, { 0x2460, 0x24FE, LBP_AI }, { 0x24FF, 0x24FF, LBP_AL }, { 0x2500, 0x254B, LBP_AI }, @@ -595,19 +603,23 @@ struct LineBreakProperties lb_prop_default[] = { { 0x25E2, 0x25E5, LBP_AI }, { 0x25E6, 0x25EE, LBP_AL }, { 0x25EF, 0x25EF, LBP_AI }, - { 0x25F0, 0x2604, LBP_AL }, + { 0x25F0, 0x25FF, LBP_AL }, + { 0x2600, 0x2603, LBP_ID }, + { 0x2604, 0x2604, LBP_AL }, { 0x2605, 0x2606, LBP_AI }, { 0x2607, 0x2608, LBP_AL }, { 0x2609, 0x2609, LBP_AI }, { 0x260A, 0x260D, LBP_AL }, { 0x260E, 0x260F, LBP_AI }, { 0x2610, 0x2613, LBP_AL }, - { 0x2614, 0x2617, LBP_AI }, - { 0x2618, 0x261B, LBP_AL }, - { 0x261C, 0x261C, LBP_AI }, - { 0x261D, 0x261D, LBP_AL }, - { 0x261E, 0x261E, LBP_AI }, - { 0x261F, 0x263F, LBP_AL }, + { 0x2614, 0x2615, LBP_ID }, + { 0x2616, 0x2617, LBP_AI }, + { 0x2618, 0x2618, LBP_ID }, + { 0x2619, 0x2619, LBP_AL }, + { 0x261A, 0x261F, LBP_ID }, + { 0x2620, 0x2638, LBP_AL }, + { 0x2639, 0x263B, LBP_ID }, + { 0x263C, 0x263F, LBP_AL }, { 0x2640, 0x2640, LBP_AI }, { 0x2641, 0x2641, LBP_AL }, { 0x2642, 0x2642, LBP_AI }, @@ -616,24 +628,45 @@ struct LineBreakProperties lb_prop_default[] = { { 0x2662, 0x2662, LBP_AL }, { 0x2663, 0x2665, LBP_AI }, { 0x2666, 0x2666, LBP_AL }, - { 0x2667, 0x266A, LBP_AI }, + { 0x2667, 0x2667, LBP_AI }, + { 0x2668, 0x2668, LBP_ID }, + { 0x2669, 0x266A, LBP_AI }, { 0x266B, 0x266B, LBP_AL }, { 0x266C, 0x266D, LBP_AI }, { 0x266E, 0x266E, LBP_AL }, { 0x266F, 0x266F, LBP_AI }, - { 0x2670, 0x269D, LBP_AL }, + { 0x2670, 0x267E, LBP_AL }, + { 0x267F, 0x267F, LBP_ID }, + { 0x2680, 0x269D, LBP_AL }, { 0x269E, 0x269F, LBP_AI }, - { 0x26A0, 0x26BD, LBP_AL }, - { 0x26BE, 0x26BF, LBP_AI }, - { 0x26C0, 0x26C3, LBP_AL }, - { 0x26C4, 0x26CD, LBP_AI }, + { 0x26A0, 0x26BC, LBP_AL }, + { 0x26BD, 0x26C8, LBP_ID }, + { 0x26C9, 0x26CC, LBP_AI }, + { 0x26CD, 0x26CD, LBP_ID }, { 0x26CE, 0x26CE, LBP_AL }, - { 0x26CF, 0x26E1, LBP_AI }, + { 0x26CF, 0x26D1, LBP_ID }, + { 0x26D2, 0x26D2, LBP_AI }, + { 0x26D3, 0x26D4, LBP_ID }, + { 0x26D5, 0x26D7, LBP_AI }, + { 0x26D8, 0x26D9, LBP_ID }, + { 0x26DA, 0x26DB, LBP_AI }, + { 0x26DC, 0x26DC, LBP_ID }, + { 0x26DD, 0x26DE, LBP_AI }, + { 0x26DF, 0x26E1, LBP_ID }, { 0x26E2, 0x26E2, LBP_AL }, { 0x26E3, 0x26E3, LBP_AI }, { 0x26E4, 0x26E7, LBP_AL }, - { 0x26E8, 0x26FF, LBP_AI }, - { 0x2701, 0x2756, LBP_AL }, + { 0x26E8, 0x26E9, LBP_AI }, + { 0x26EA, 0x26EA, LBP_ID }, + { 0x26EB, 0x26F0, LBP_AI }, + { 0x26F1, 0x26F5, LBP_ID }, + { 0x26F6, 0x26F6, LBP_AI }, + { 0x26F7, 0x26FA, LBP_ID }, + { 0x26FB, 0x26FC, LBP_AI }, + { 0x26FD, 0x2704, LBP_ID }, + { 0x2705, 0x2707, LBP_AL }, + { 0x2708, 0x270D, LBP_ID }, + { 0x270E, 0x2756, LBP_AL }, { 0x2757, 0x2757, LBP_AI }, { 0x2758, 0x275A, LBP_AL }, { 0x275B, 0x275E, LBP_QU }, @@ -704,6 +737,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0x2B55, 0x2B59, LBP_AI }, { 0x2C00, 0x2CEE, LBP_AL }, { 0x2CEF, 0x2CF1, LBP_CM }, + { 0x2CF2, 0x2CF3, LBP_AL }, { 0x2CF9, 0x2CF9, LBP_EX }, { 0x2CFA, 0x2CFC, LBP_BA }, { 0x2CFD, 0x2CFD, LBP_AL }, @@ -736,6 +770,10 @@ struct LineBreakProperties lb_prop_default[] = { { 0x2E2E, 0x2E2E, LBP_EX }, { 0x2E2F, 0x2E2F, LBP_AL }, { 0x2E30, 0x2E31, LBP_BA }, + { 0x2E32, 0x2E32, LBP_AL }, + { 0x2E33, 0x2E34, LBP_BA }, + { 0x2E35, 0x2E39, LBP_AL }, + { 0x2E3A, 0x2E3B, LBP_B2 }, { 0x2E80, 0x3000, LBP_ID }, { 0x3001, 0x3002, LBP_CL }, { 0x3003, 0x3004, LBP_ID }, @@ -768,55 +806,58 @@ struct LineBreakProperties lb_prop_default[] = { { 0x3030, 0x303A, LBP_ID }, { 0x303B, 0x303C, LBP_NS }, { 0x303D, 0x303F, LBP_ID }, - { 0x3041, 0x3041, LBP_NS }, + { 0x3041, 0x3041, LBP_CJ }, { 0x3042, 0x3042, LBP_ID }, - { 0x3043, 0x3043, LBP_NS }, + { 0x3043, 0x3043, LBP_CJ }, { 0x3044, 0x3044, LBP_ID }, - { 0x3045, 0x3045, LBP_NS }, + { 0x3045, 0x3045, LBP_CJ }, { 0x3046, 0x3046, LBP_ID }, - { 0x3047, 0x3047, LBP_NS }, + { 0x3047, 0x3047, LBP_CJ }, { 0x3048, 0x3048, LBP_ID }, - { 0x3049, 0x3049, LBP_NS }, + { 0x3049, 0x3049, LBP_CJ }, { 0x304A, 0x3062, LBP_ID }, - { 0x3063, 0x3063, LBP_NS }, + { 0x3063, 0x3063, LBP_CJ }, { 0x3064, 0x3082, LBP_ID }, - { 0x3083, 0x3083, LBP_NS }, + { 0x3083, 0x3083, LBP_CJ }, { 0x3084, 0x3084, LBP_ID }, - { 0x3085, 0x3085, LBP_NS }, + { 0x3085, 0x3085, LBP_CJ }, { 0x3086, 0x3086, LBP_ID }, - { 0x3087, 0x3087, LBP_NS }, + { 0x3087, 0x3087, LBP_CJ }, { 0x3088, 0x308D, LBP_ID }, - { 0x308E, 0x308E, LBP_NS }, + { 0x308E, 0x308E, LBP_CJ }, { 0x308F, 0x3094, LBP_ID }, - { 0x3095, 0x3096, LBP_NS }, + { 0x3095, 0x3096, LBP_CJ }, { 0x3099, 0x309A, LBP_CM }, { 0x309B, 0x309E, LBP_NS }, { 0x309F, 0x309F, LBP_ID }, - { 0x30A0, 0x30A1, LBP_NS }, + { 0x30A0, 0x30A0, LBP_NS }, + { 0x30A1, 0x30A1, LBP_CJ }, { 0x30A2, 0x30A2, LBP_ID }, - { 0x30A3, 0x30A3, LBP_NS }, + { 0x30A3, 0x30A3, LBP_CJ }, { 0x30A4, 0x30A4, LBP_ID }, - { 0x30A5, 0x30A5, LBP_NS }, + { 0x30A5, 0x30A5, LBP_CJ }, { 0x30A6, 0x30A6, LBP_ID }, - { 0x30A7, 0x30A7, LBP_NS }, + { 0x30A7, 0x30A7, LBP_CJ }, { 0x30A8, 0x30A8, LBP_ID }, - { 0x30A9, 0x30A9, LBP_NS }, + { 0x30A9, 0x30A9, LBP_CJ }, { 0x30AA, 0x30C2, LBP_ID }, - { 0x30C3, 0x30C3, LBP_NS }, + { 0x30C3, 0x30C3, LBP_CJ }, { 0x30C4, 0x30E2, LBP_ID }, - { 0x30E3, 0x30E3, LBP_NS }, + { 0x30E3, 0x30E3, LBP_CJ }, { 0x30E4, 0x30E4, LBP_ID }, - { 0x30E5, 0x30E5, LBP_NS }, + { 0x30E5, 0x30E5, LBP_CJ }, { 0x30E6, 0x30E6, LBP_ID }, - { 0x30E7, 0x30E7, LBP_NS }, + { 0x30E7, 0x30E7, LBP_CJ }, { 0x30E8, 0x30ED, LBP_ID }, - { 0x30EE, 0x30EE, LBP_NS }, + { 0x30EE, 0x30EE, LBP_CJ }, { 0x30EF, 0x30F4, LBP_ID }, - { 0x30F5, 0x30F6, LBP_NS }, + { 0x30F5, 0x30F6, LBP_CJ }, { 0x30F7, 0x30FA, LBP_ID }, - { 0x30FB, 0x30FE, LBP_NS }, + { 0x30FB, 0x30FB, LBP_NS }, + { 0x30FC, 0x30FC, LBP_CJ }, + { 0x30FD, 0x30FE, LBP_NS }, { 0x30FF, 0x31E3, LBP_ID }, - { 0x31F0, 0x31FF, LBP_NS }, + { 0x31F0, 0x31FF, LBP_CJ }, { 0x3200, 0x3247, LBP_ID }, { 0x3248, 0x324F, LBP_AI }, { 0x3250, 0x4DBF, LBP_ID }, @@ -835,8 +876,10 @@ struct LineBreakProperties lb_prop_default[] = { { 0xA62A, 0xA66E, LBP_AL }, { 0xA66F, 0xA672, LBP_CM }, { 0xA673, 0xA673, LBP_AL }, - { 0xA67C, 0xA67D, LBP_CM }, - { 0xA67E, 0xA6EF, LBP_AL }, + { 0xA674, 0xA67D, LBP_CM }, + { 0xA67E, 0xA697, LBP_AL }, + { 0xA69F, 0xA69F, LBP_CM }, + { 0xA6A0, 0xA6EF, LBP_AL }, { 0xA6F0, 0xA6F1, LBP_CM }, { 0xA6F2, 0xA6F2, LBP_AL }, { 0xA6F3, 0xA6F7, LBP_BA }, @@ -885,6 +928,11 @@ struct LineBreakProperties lb_prop_default[] = { { 0xAA5C, 0xAA5C, LBP_AL }, { 0xAA5D, 0xAA5F, LBP_BA }, { 0xAA60, 0xAADF, LBP_SA }, + { 0xAAE0, 0xAAEA, LBP_AL }, + { 0xAAEB, 0xAAEF, LBP_CM }, + { 0xAAF0, 0xAAF1, LBP_BA }, + { 0xAAF2, 0xAAF4, LBP_AL }, + { 0xAAF5, 0xAAF6, LBP_CM }, { 0xAB01, 0xABE2, LBP_AL }, { 0xABE3, 0xABEA, LBP_CM }, { 0xABEB, 0xABEB, LBP_BA }, @@ -1693,9 +1741,13 @@ struct LineBreakProperties lb_prop_default[] = { { 0xD800, 0xDFFF, LBP_SG }, { 0xE000, 0xF8FF, LBP_XX }, { 0xF900, 0xFAFF, LBP_ID }, - { 0xFB00, 0xFB1D, LBP_AL }, + { 0xFB00, 0xFB17, LBP_AL }, + { 0xFB1D, 0xFB1D, LBP_HL }, { 0xFB1E, 0xFB1E, LBP_CM }, - { 0xFB1F, 0xFD3D, LBP_AL }, + { 0xFB1F, 0xFB28, LBP_HL }, + { 0xFB29, 0xFB29, LBP_AL }, + { 0xFB2A, 0xFB4F, LBP_HL }, + { 0xFB50, 0xFD3D, LBP_AL }, { 0xFD3E, 0xFD3E, LBP_OP }, { 0xFD3F, 0xFD3F, LBP_CL }, { 0xFD50, 0xFDFB, LBP_AL }, @@ -1779,7 +1831,7 @@ struct LineBreakProperties lb_prop_default[] = { { 0xFF63, 0xFF64, LBP_CL }, { 0xFF65, 0xFF65, LBP_NS }, { 0xFF66, 0xFF66, LBP_AL }, - { 0xFF67, 0xFF70, LBP_NS }, + { 0xFF67, 0xFF70, LBP_CJ }, { 0xFF71, 0xFF9D, LBP_AL }, { 0xFF9E, 0xFF9F, LBP_NS }, { 0xFFA0, 0xFFDC, LBP_AL }, @@ -1825,6 +1877,24 @@ struct LineBreakProperties lb_prop_default[] = { { 0x110B0, 0x110BA, LBP_CM }, { 0x110BB, 0x110BD, LBP_AL }, { 0x110BE, 0x110C1, LBP_BA }, + { 0x110D0, 0x110E8, LBP_AL }, + { 0x110F0, 0x110F9, LBP_NU }, + { 0x11100, 0x11102, LBP_CM }, + { 0x11103, 0x11126, LBP_AL }, + { 0x11127, 0x11134, LBP_CM }, + { 0x11136, 0x1113F, LBP_NU }, + { 0x11140, 0x11143, LBP_BA }, + { 0x11180, 0x11182, LBP_CM }, + { 0x11183, 0x111B2, LBP_AL }, + { 0x111B3, 0x111C0, LBP_CM }, + { 0x111C1, 0x111C4, LBP_AL }, + { 0x111C5, 0x111C6, LBP_BA }, + { 0x111C7, 0x111C7, LBP_AL }, + { 0x111C8, 0x111C8, LBP_BA }, + { 0x111D0, 0x111D9, LBP_NU }, + { 0x11680, 0x116AA, LBP_AL }, + { 0x116AB, 0x116B7, LBP_CM }, + { 0x116C0, 0x116C9, LBP_NU }, { 0x12000, 0x12462, LBP_AL }, { 0x12470, 0x12473, LBP_BA }, { 0x13000, 0x13257, LBP_AL }, @@ -1840,7 +1910,9 @@ struct LineBreakProperties lb_prop_default[] = { { 0x1328A, 0x13378, LBP_AL }, { 0x13379, 0x13379, LBP_OP }, { 0x1337A, 0x1337B, LBP_CL }, - { 0x1337C, 0x16A38, LBP_AL }, + { 0x1337C, 0x16F50, LBP_AL }, + { 0x16F51, 0x16F92, LBP_CM }, + { 0x16F93, 0x16F9F, LBP_AL }, { 0x1B000, 0x1B001, LBP_ID }, { 0x1D000, 0x1D164, LBP_AL }, { 0x1D165, 0x1D169, LBP_CM }, @@ -1854,13 +1926,36 @@ struct LineBreakProperties lb_prop_default[] = { { 0x1D242, 0x1D244, LBP_CM }, { 0x1D245, 0x1D7CB, LBP_AL }, { 0x1D7CE, 0x1D7FF, LBP_NU }, - { 0x1F000, 0x1F0DF, LBP_AL }, + { 0x1EE00, 0x1EEF1, LBP_AL }, + { 0x1F000, 0x1F0DF, LBP_ID }, { 0x1F100, 0x1F12D, LBP_AI }, { 0x1F12E, 0x1F12E, LBP_AL }, - { 0x1F130, 0x1F19A, LBP_AI }, - { 0x1F1E6, 0x1F1FF, LBP_AL }, - { 0x1F200, 0x1F251, LBP_ID }, - { 0x1F300, 0x1F773, LBP_AL }, + { 0x1F130, 0x1F169, LBP_AI }, + { 0x1F16A, 0x1F16B, LBP_AL }, + { 0x1F170, 0x1F19A, LBP_AI }, + { 0x1F1E6, 0x1F1FF, LBP_RI }, + { 0x1F200, 0x1F3B4, LBP_ID }, + { 0x1F3B5, 0x1F3B6, LBP_AL }, + { 0x1F3B7, 0x1F3BB, LBP_ID }, + { 0x1F3BC, 0x1F3BC, LBP_AL }, + { 0x1F3BD, 0x1F49F, LBP_ID }, + { 0x1F4A0, 0x1F4A0, LBP_AL }, + { 0x1F4A1, 0x1F4A1, LBP_ID }, + { 0x1F4A2, 0x1F4A2, LBP_AL }, + { 0x1F4A3, 0x1F4A3, LBP_ID }, + { 0x1F4A4, 0x1F4A4, LBP_AL }, + { 0x1F4A5, 0x1F4AE, LBP_ID }, + { 0x1F4AF, 0x1F4AF, LBP_AL }, + { 0x1F4B0, 0x1F4B0, LBP_ID }, + { 0x1F4B1, 0x1F4B2, LBP_AL }, + { 0x1F4B3, 0x1F4FC, LBP_ID }, + { 0x1F500, 0x1F506, LBP_AL }, + { 0x1F507, 0x1F516, LBP_ID }, + { 0x1F517, 0x1F524, LBP_AL }, + { 0x1F525, 0x1F531, LBP_ID }, + { 0x1F532, 0x1F543, LBP_AL }, + { 0x1F550, 0x1F6C5, LBP_ID }, + { 0x1F700, 0x1F773, LBP_AL }, { 0x20000, 0x3FFFD, LBP_ID }, { 0xE0001, 0xE01EF, LBP_CM }, { 0xF0000, 0x10FFFD, LBP_XX }, diff --git a/src/static_libs/liblinebreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c similarity index 95% rename from src/static_libs/liblinebreak/linebreakdef.c rename to src/static_libs/libunibreak/linebreakdef.c index 9ddb4d966f..fb6744da33 100644 --- a/src/static_libs/liblinebreak/linebreakdef.c +++ b/src/static_libs/libunibreak/linebreakdef.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2010 Wu Yongwei + * Copyright (C) 2008-2012 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 24, for - * Unicode 5.2.0: - * + * This library has been updated according to Revision 30, for + * Unicode 6.2.0: + * * * The Unicode Terms of Use are available at * @@ -43,7 +43,7 @@ * * Definition of language-specific data. * - * @version 2.0, 2010/01/03 + * @version 2.2, 2012/10/06 * @author Wu Yongwei */ diff --git a/src/static_libs/liblinebreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h similarity index 92% rename from src/static_libs/liblinebreak/linebreakdef.h rename to src/static_libs/libunibreak/linebreakdef.h index bc4eee2e82..93fcd6781c 100644 --- a/src/static_libs/liblinebreak/linebreakdef.h +++ b/src/static_libs/libunibreak/linebreakdef.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2010 Wu Yongwei + * Copyright (C) 2008-2012 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 24, for - * Unicode 5.2.0: - * + * This library has been updated according to Revision 30, for + * Unicode 6.2.0: + * * * The Unicode Terms of Use are available at * @@ -44,7 +44,7 @@ * Definitions of internal data structures, declarations of global * variables, and function prototypes for the line breaking algorithm. * - * @version 2.0, 2010/01/03 + * @version 2.3, 2012/10/06 * @author Wu Yongwei */ @@ -56,7 +56,7 @@ /** * Line break classes. This is a direct mapping of Table 1 of Unicode - * Standard Annex 14, Revision 19. + * Standard Annex 14, Revision 26. */ enum LineBreakClass { @@ -77,6 +77,7 @@ enum LineBreakClass LBP_PO, /**< Postfix */ LBP_NU, /**< Numeric */ LBP_AL, /**< Alphabetic */ + LBP_HL, /**< Hebrew letter */ LBP_ID, /**< Ideographic */ LBP_IN, /**< Inseparable characters */ LBP_HY, /**< Hyphen */ @@ -91,11 +92,13 @@ enum LineBreakClass LBP_JL, /**< Hangul L Jamo */ LBP_JV, /**< Hangul V Jamo */ LBP_JT, /**< Hangul T Jamo */ + LBP_RI, /**< Regional indicator */ /* The following break classes are not treated in the pair table */ LBP_AI, /**< Ambiguous (alphabetic or ideograph) */ LBP_BK, /**< Break (mandatory) */ LBP_CB, /**< Contingent break */ + LBP_CJ, /**< Conditional Japanese starter */ LBP_CR, /**< Carriage return */ LBP_LF, /**< Line feed */ LBP_NL, /**< Next line */ diff --git a/src/static_libs/liblinebreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c similarity index 54% rename from src/static_libs/liblinebreak/wordbreak.c rename to src/static_libs/libunibreak/wordbreak.c index bbbb7f46fa..60db99e426 100644 --- a/src/static_libs/liblinebreak/wordbreak.c +++ b/src/static_libs/libunibreak/wordbreak.c @@ -4,7 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2011-2011 Tom Hacohen + * Copyright (C) 2012 Tom Hacohen * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -40,11 +40,10 @@ * Implementation of the word breaking algorithm as described in Unicode * Standard Annex 29. * - * @version 2.0, 2011/12/12 + * @version 2.2, 2012/02/04 * @author Tom Hacohen */ - #include #include #include @@ -52,15 +51,16 @@ #include "linebreakdef.h" #include "wordbreak.h" -#include "wordbreakdata.x" +#include "wordbreakdata.c" #define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) -/* Init the wordbreak internals. */ +/** + * Initializes the wordbreak internals. It currently does nothing, but + * it may in the future. + */ void init_wordbreak(void) { - /* Currently does nothing, may be needed in the future. */ - return; } /** @@ -68,7 +68,7 @@ void init_wordbreak(void) * * @param ch character to check * @param wbp pointer to the wbp breaking properties array - * @param len the size of the wbp array in number of items. + * @param len size of the wbp array in number of items * @return the word breaking class if found; \c WBP_Any otherwise */ static enum WordBreakClass get_char_wb_class( @@ -97,21 +97,22 @@ static enum WordBreakClass get_char_wb_class( } /** - * Sets the break types in brks starting from posLast up to posStop. + * Sets the word break types to a specific value in a range. * - * It sets the inside chars to #WORDBREAK_INSIDECHAR and the rest to brkType. - * Assumes brks is initialized - all the cells with #WORDBREAK_NOBREAK are + * It sets the inside chars to #WORDBREAK_INSIDEACHAR and the rest to brkType. + * Assumes \a brks is initialized - all the cells with #WORDBREAK_NOBREAK are * cells that we really don't want to break after. * - * @param s the string - * @param brks[out] the breaks array to fill. - * @param posStart the start position - * @param posEnd the end position - * @param len the length of the string - * @param brkType the breaks type to use - * @param get_next_char function to get the next UTF-32 character + * @param[in] s input string + * @param[out] brks breaks array to fill + * @param[in] posStart start position + * @param[in] posEnd end position (exclusive) + * @param[in] len length of the string + * @param[in] brkType breaks type to use + * @param[in] get_next_char function to get the next UTF-32 character */ -static void set_brks_to(const void *s, +static void set_brks_to( + const void *s, char *brks, size_t posStart, size_t posEnd, @@ -119,26 +120,26 @@ static void set_brks_to(const void *s, char brkType, get_next_char_t get_next_char) { - size_t posCur = posStart; - while (posCur < posEnd) + size_t posNext = posStart; + while (posNext < posEnd) { - get_next_char(s, len, &posCur); - for ( ; posStart < posCur - 1; ++posStart) - { - brks[posStart] = WORDBREAK_INSIDECHAR; - } - assert(posStart == posCur - 1); + utf32_t ch; + ch = get_next_char(s, len, &posNext); + assert(ch != EOS); + for (; posStart < posNext - 1; ++posStart) + brks[posStart] = WORDBREAK_INSIDEACHAR; + assert(posStart == posNext - 1); /* Only set it if we haven't set it not to break before. */ if (brks[posStart] != WORDBREAK_NOBREAK) brks[posStart] = brkType; - posStart = posCur; + posStart = posNext; } } -/* Checks to see if newline, cr, or lf. for WB3a and b */ +/* Checks to see if the class is newline, CR, or LF (rules WB3a and b). */ #define IS_WB3ab(cls) ((cls == WBP_Newline) || (cls == WBP_CR) || \ - (cls == WBP_LF)) + (cls == WBP_LF)) /** * Sets the word breaking information for a generic input string. @@ -158,204 +159,205 @@ static void set_wordbreaks( char *brks, get_next_char_t get_next_char) { - /* Previous class */ - enum WordBreakClass p_cls = WBP_Undefined; - /* Strong previous class. */ - enum WordBreakClass sp_cls = WBP_Undefined; + enum WordBreakClass wbcLast = WBP_Undefined; + /* wbcSeqStart is the class that started the current sequence. + * WBP_Undefined is a special case that means "sot". + * This value is the class that is at the start of the current rule + * matching sequence. For example, in case of Numeric+MidNum+Numeric + * it'll be Numeric all the way. + */ + enum WordBreakClass wbcSeqStart = WBP_Undefined; utf32_t ch; + size_t posNext = 0; size_t posCur = 0; - size_t posCurSt = 0; size_t posLast = 0; - /* FIXME: unused atm. */ + /* TODO: Language-specific specialization. */ (void) lang; - - /* Init brks */ + /* Init brks. */ memset(brks, WORDBREAK_BREAK, len); - ch = get_next_char(s, len, &posCur); + ch = get_next_char(s, len, &posNext); - /* WB3a, WB3b are implied. */ - for ( ; ch != EOS ; ) + while (ch != EOS) { - /* Current class */ - enum WordBreakClass c_cls; - c_cls = get_char_wb_class(ch, wb_prop_default, - ARRAY_LEN(wb_prop_default)); + enum WordBreakClass wbcCur; + wbcCur = get_char_wb_class(ch, wb_prop_default, + ARRAY_LEN(wb_prop_default)); - switch (c_cls) + switch (wbcCur) { case WBP_CR: - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + /* WB3b */ + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_LF: - if (sp_cls == WBP_CR) /* WB3 */ + if (wbcSeqStart == WBP_CR) /* WB3 */ { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; + break; } - sp_cls = c_cls; - posLast = posCurSt; - break; + /* Fall off */ case WBP_Newline: - /* WB3a, WB3b */ - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + /* WB3a,3b */ + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_Extend: case WBP_Format: - /* WB4 - If not the first char/after a newline (W3ab), - * skip this class, set it to be the same as the prev, and mark + /* WB4 - If not the first char/after a newline (WB3a,3b), skip + * this class, set it to be the same as the prev, and mark * brks not to break before them. */ - if ((sp_cls == WBP_Undefined) || IS_WB3ab(sp_cls)) + if ((wbcSeqStart == WBP_Undefined) || IS_WB3ab(wbcSeqStart)) { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; } else { /* It's surely not the first */ - brks[posCurSt - 1] = WORDBREAK_NOBREAK; + brks[posCur - 1] = WORDBREAK_NOBREAK; /* "inherit" the previous class. */ - c_cls = p_cls; + wbcCur = wbcLast; } break; case WBP_Katakana: - if ((sp_cls == WBP_Katakana) || /* WB13 */ - (sp_cls == WBP_ExtendNumLet)) /* WB13b */ + if ((wbcSeqStart == WBP_Katakana) || /* WB13 */ + (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); } /* No rule found, reset */ else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); } - sp_cls = c_cls; - posLast = posCurSt; + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_ALetter: - if ((sp_cls == WBP_ALetter) || /* WB5,6,7 */ - ((sp_cls == WBP_Numeric) && (p_cls == WBP_Numeric)) || /* WB10 */ - (sp_cls == WBP_ExtendNumLet)) /* WB13b */ + if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */ + (wbcLast == WBP_Numeric) || /* WB10 */ + (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); } /* No rule found, reset */ else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); } - sp_cls = c_cls; - posLast = posCurSt; + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_MidNumLet: - if ((p_cls == WBP_ALetter) || /* WBP6,7 */ - (p_cls == WBP_Numeric)) /* WBP11,12 */ + if ((wbcLast == WBP_ALetter) || /* WB6,7 */ + (wbcLast == WBP_Numeric)) /* WB11,12 */ { /* Go on */ } else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; } break; case WBP_MidLetter: - if (p_cls == WBP_ALetter) /* WBP6,7 */ + if (wbcLast == WBP_ALetter) /* WB6,7 */ { /* Go on */ } else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; } break; case WBP_MidNum: - if (p_cls == WBP_Numeric) /* WBP11,12 */ + if (wbcLast == WBP_Numeric) /* WB11,12 */ { /* Go on */ } else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; } break; case WBP_Numeric: - if ((sp_cls == WBP_Numeric) || /* WB8,11,12 */ - ((sp_cls == WBP_ALetter) && (p_cls == WBP_ALetter)) || /* WB9 */ - (sp_cls == WBP_ExtendNumLet)) /* WB13b */ + if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */ + (wbcLast == WBP_ALetter) || /* WB9 */ + (wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */ { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); } /* No rule found, reset */ else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); } - sp_cls = c_cls; - posLast = posCurSt; + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_ExtendNumLet: /* WB13a,13b */ - if ((sp_cls == p_cls) && - ((p_cls == WBP_ALetter) || - (p_cls == WBP_Numeric) || - (p_cls == WBP_Katakana) || - (p_cls == WBP_ExtendNumLet))) + if ((wbcSeqStart == wbcLast) && + ((wbcLast == WBP_ALetter) || + (wbcLast == WBP_Numeric) || + (wbcLast == WBP_Katakana) || + (wbcLast == WBP_ExtendNumLet))) { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); } /* No rule found, reset */ else { - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); } - sp_cls = c_cls; - posLast = posCurSt; + wbcSeqStart = wbcCur; + posLast = posCur; break; case WBP_Any: /* Allow breaks and reset */ - set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK, - get_next_char); - sp_cls = c_cls; - posLast = posCurSt; + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_BREAK, get_next_char); + wbcSeqStart = wbcCur; + posLast = posCur; break; default: @@ -364,14 +366,14 @@ static void set_wordbreaks( break; } - p_cls = c_cls; - posCurSt = posCur; - ch = get_next_char(s, len, &posCur); + wbcLast = wbcCur; + posCur = posNext; + ch = get_next_char(s, len, &posNext); } /* WB2 */ - set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK, - get_next_char); + set_brks_to(s, brks, posLast, posNext, len, + WORDBREAK_BREAK, get_next_char); } /** diff --git a/src/static_libs/liblinebreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h similarity index 87% rename from src/static_libs/liblinebreak/wordbreak.h rename to src/static_libs/libunibreak/wordbreak.h index 7b7bea7114..47bef274ce 100644 --- a/src/static_libs/liblinebreak/wordbreak.h +++ b/src/static_libs/libunibreak/wordbreak.h @@ -4,7 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2011-2011 Tom Hacohen + * Copyright (C) 2012 Tom Hacohen * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -39,7 +39,7 @@ * * Header file for the word breaking (segmentation) algorithm. * - * @version 2.0, 2011/12/12 + * @version 2.2, 2012/02/04 * @author Tom Hacohen */ @@ -47,15 +47,15 @@ #define WORDBREAK_H #include -#include +#include "linebreak.h" #ifdef __cplusplus extern "C" { #endif -#define WORDBREAK_BREAK 0 /* Break found */ -#define WORDBREAK_NOBREAK 1 /**< Break not found */ -#define WORDBREAK_INSIDECHAR 2 /**< A UTF-8/16 sequence is unfinished */ +#define WORDBREAK_BREAK 0 /**< Break is allowed */ +#define WORDBREAK_NOBREAK 1 /**< No break is allowed */ +#define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */ void init_wordbreak(void); void set_wordbreaks_utf8( diff --git a/src/static_libs/liblinebreak/wordbreakdata.x b/src/static_libs/libunibreak/wordbreakdata.c similarity index 99% rename from src/static_libs/liblinebreak/wordbreakdata.x rename to src/static_libs/libunibreak/wordbreakdata.c index c7278ef7f0..c42c51b278 100644 --- a/src/static_libs/liblinebreak/wordbreakdata.x +++ b/src/static_libs/libunibreak/wordbreakdata.c @@ -2,8 +2,10 @@ # WordBreakProperty-6.0.0.txt # Date: 2010-08-19, 00:48:48 GMT [MD] */ + #include "linebreak.h" #include "wordbreakdef.h" + static struct WordBreakProperties wb_prop_default[] = { {0x000A, 0x000A, WBP_LF}, {0x000B, 0x000C, WBP_Newline}, diff --git a/src/static_libs/liblinebreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h similarity index 93% rename from src/static_libs/liblinebreak/wordbreakdef.h rename to src/static_libs/libunibreak/wordbreakdef.h index 331cd01d05..0b4b1653f9 100644 --- a/src/static_libs/liblinebreak/wordbreakdef.h +++ b/src/static_libs/libunibreak/wordbreakdef.h @@ -4,7 +4,7 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2011-2011 Tom Hacohen + * Copyright (C) 2012 Tom Hacohen * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -40,7 +40,7 @@ * Definitions of internal data structures, declarations of global * variables, and function prototypes for the word breaking algorithm. * - * @version 2.0, 2011/12/12 + * @version 2.1, 2012/01/18 * @author Tom Hacohen */ @@ -51,8 +51,6 @@ enum WordBreakClass { WBP_Undefined, - - /* The following break classes are treated in the pair table. */ WBP_CR, WBP_LF, WBP_Newline,