Efl static_libs: Updated liblinebreak -> libunibreak.

SVN revision: 82652
This commit is contained in:
Tom Hacohen 2013-01-11 18:16:09 +00:00
parent d83b83e10b
commit 88ab486e63
18 changed files with 690 additions and 329 deletions

View File

@ -1,3 +1,7 @@
2013-01-11 Tom Hacohen (TAsn)
* Static libs: Updated liblinebreak to libunibreak's latest version.
2013-01-11 Cedric Bail
* Fix not up to date clip cache for Evas_Object_Text.

1
NEWS
View File

@ -74,6 +74,7 @@ Improvements:
* use Eina_File in webp, gif, tiff, png and eet loader
* Eina.h includes eina_alloca.h/alloca.h to define alloca()
* Improved eina share del speed.
* Upgrade liblinebreak to latest version of libunibreak.
Fixes:
* Fix PPC (big endian) image codec bug.

View File

@ -124,5 +124,5 @@ clean-local:
rm -rf modules/emotion/xine/*.gcno
rm -rf modules/emotion/gstreamer/*.gcno
rm -rf modules/emotion/generic/*.gcno
rm -rf static_libs/liblinebreak/*.gcno
rm -rf static_libs/libunibreak/*.gcno
rm -rf static_libs/lz4/*.gcno

View File

@ -19,18 +19,18 @@ lib/evas/include/evas_blend_ops.h
# Linebreak
noinst_HEADERS += \
static_libs/liblinebreak/linebreak.h \
static_libs/liblinebreak/linebreakdef.h \
static_libs/liblinebreak/wordbreakdef.h \
static_libs/liblinebreak/wordbreak.h
static_libs/libunibreak/linebreak.h \
static_libs/libunibreak/linebreakdef.h \
static_libs/libunibreak/wordbreakdef.h \
static_libs/libunibreak/wordbreak.h
# Linebreak
lib_evas_libevas_la_SOURCES = \
static_libs/liblinebreak/linebreak.c \
static_libs/liblinebreak/linebreakdata.c \
static_libs/liblinebreak/linebreakdef.c \
static_libs/liblinebreak/wordbreak.c \
static_libs/liblinebreak/wordbreakdata.x
static_libs/libunibreak/linebreak.c \
static_libs/libunibreak/linebreakdata.c \
static_libs/libunibreak/linebreakdef.c \
static_libs/libunibreak/wordbreak.c \
static_libs/libunibreak/wordbreakdata.c
# Main
lib_evas_libevas_la_SOURCES += \
@ -194,7 +194,7 @@ lib_evas_libevas_la_CPPFLAGS = \
-I$(top_srcdir)/src/lib/evas/include \
-I$(top_srcdir)/src/lib/evas/cserve2 \
-I$(top_srcdir)/src/lib/evas/file \
-I$(top_srcdir)/src/static_libs/liblinebreak \
-I$(top_srcdir)/src/static_libs/libunibreak \
-I$(top_srcdir)/src/lib/evas/common \
-I$(top_srcdir)/src/lib/eina \
-I$(top_builddir)/src/lib/eina \
@ -239,11 +239,11 @@ lib_evas_libevas_la_LDFLAGS = @EFL_LTLIBRARY_FLAGS@
# Linebreak
EXTRA_DIST += \
static_libs/liblinebreak/LICENCE \
static_libs/liblinebreak/AUTHORS \
static_libs/liblinebreak/NEWS \
static_libs/liblinebreak/README \
static_libs/liblinebreak/ChangeLog
static_libs/libunibreak/LICENCE \
static_libs/libunibreak/AUTHORS \
static_libs/libunibreak/NEWS \
static_libs/libunibreak/README \
static_libs/libunibreak/ChangeLog
# Engines

View File

@ -1,3 +1,194 @@
2012-10-06 Wu Yongwei <wuyongwei@gmail.com>
Update files according to UAX #14-30, for Unicode 6.2.0.
* README: Update the reference to UAX #14-30.
* src/linebreak.c (baTable): Update for the new class `RI'.
* src/linebreak.h (LINEBREAK_VERSION): Set to 0x0202.
* src/linebreakdef.h (LBP_RI): New enumerator for the new class `RI'
as defined in UAX #14-30.
* src/linebreakdata.c: Regenerate from LineBreak-6.2.0.txt.
2012-10-06 Wu Yongwei <wuyongwei@gmail.com>
* src/linebreak.c (baTable): Correct the issue that one column was
missing in the table.
2012-10-06 Wu Yongwei <wuyongwei@gmail.com>
* README: Update to reflect the recent changes.
2012-10-06 Wu Yongwei <wuyongwei@gmail.com>
Make `make linebreakdata' and `make wordbreakdata' work again.
* src/Makefile.am (EXTRA_DIST): Add missing `filter_dup.c'.
(linebreakdata): New make target.
(wordbreakdata): New make target.
2012-10-06 Wu Yongwei <wuyongwei@gmail.com>
Make `make dist' work again after the directory adjustment.
* Doxyfile (INPUT): Change to `src'.
(FILE_PATTERNS): Set to `*.c *.h'.
* Makefile.am (EXTRA_DIST): Move content from src/Makefile.am.
(doc): Move target from src/Makefile.am.
* src/Makefile.am (EXTRA_DIST): Move partial content to Makefile.am.
(doc): Move target to Makefile.am.
2012-09-16 Wu Yongwei <wuyongwei@gmail.com>
Update files according to UAX #14-28, for Unicode 6.1.0.
* README: Update the reference to UAX #14-28.
* src/linebreak.c (baTable): Update for the new class `HL'.
(resolve_lb_class): Resolve the new class `CJ' to `ID' (simplified).
* src/linebreakdef.h (LBP_HL): New enumerator for the new class `HL'
as defined in UAX #14-28.
(LBP_CJ): New enumerator for the new class `CJ' as defined in
UAX #14-28.
* src/linebreakdata.c: Regenerate from LineBreak-6.1.0.txt.
2012-08-13 Tom Hacohen <tom@stosb.com>
Move source files to under src.
* Makefile.am: Split from original Makefile.am.
(SUBDIRS): Add `src'.
* configure.ac (AC_CONFIG_SRCDIR): Add `src/' before `linebreak.c'.
(AC_CONFIG_FILES): Add `src/Makefile'.
* src/LineBreak1.sed: Move from LineBreak1.sed.
* src/LineBreak2.sed: Move from LineBreak2.sed.
* src/Makefile.am: Split from Makefile.am
* src/Makefile.gcc: Move from Makefile.gcc.
* src/Makefile.msvc: Move from Makefile.msvc.
* src/filter_dup.c: Move from filter_dup.c.
* src/linebreak.c: Move from linebreak.c.
* src/linebreak.h: Move from linebreak.h.
* src/linebreakdata.c: Move from linebreakdata.c.
* src/linebreakdata1.tmpl: Move from linebreakdata1.tmpl.
* src/linebreakdata2.tmpl: Move from linebreakdata2.tmpl.
* src/linebreakdata3.tmpl: Move from linebreakdata3.tmpl.
* src/linebreakdef.c: Move from linebreakdef.c.
* src/linebreakdef.h: Move from linebreakdef.h.
* src/sort_numeric_hex.py: Move from sort_numeric_hex.py.
* src/wordbreak.c: Move from wordbreak.c.
* src/wordbreak.h: Move from wordbreak.h.
* src/wordbreakdata.c: Move from wordbreakdata.c.
* src/wordbreakdata1.tmpl: Move from wordbreakdata1.tmpl.
* src/wordbreakdata2.tmpl: Move from wordbreakdata2.tmpl.
* src/wordbreakdef.h: Move from wordbreakdef.h.
2012-08-12 Wu Yongwei <wuyongwei@gmail.com>
* README: Change the home URL to github; remove $Id$; eliminate
non-ASCII characters.
2012-08-11 Wu Yongwei <wuyongwei@gmail.com>
* configure.ac (AC_INIT): Change the library name and version to
`libunibreak' and `1.0'.
(AC_PROG_LN_S): New macro.
(AC_OUTPUT): Change to `libunibreak.pc'.
* Doxyfile: (PROJECT_NAME): Change to `libunibreak'.
(PROJECT_NUMBER): Change to `1.0'.
* LICENCE: Add copyright information about Tom Hacohen.
* Makefile.am (lib_LTLIBRARIES): Change to `libunibreak.la'.
(pkgconfig_DATA): Change to `libunibreak.la'.
(libunibreak_la_LDFLAGS): Reset the version to `1:0'.
(install-exec-hook): Replace the static library liblinebreak.a with
a symlink to libunibreak.a.
* Makefile.msvc: Change the library name to `libunibreak', and the
output library to `unibreak.lib'.
* NEW: Add information about libunibreak 1.0.
* README: Change the library name, and add information about word
break.
2012-02-04 Wu Yongwei <wuyongwei@gmail.com>
* wordbreak.h (WORDBREAK_INSIDEACHAR): Change from
WORDBREAK_INSIDECHAR.
* wordbreak.c (set_brks_to): Change `WORDBREAK_INSIDECHAR' to
`WORDBREAK_INSIDEACHAR'.
2012-01-19 Wu Yongwei <wuyongwei@gmail.com>
* wordbreak.h: Change angle brackets to quotation marks (which
caused build errors).
2012-01-19 Wu Yongwei <wuyongwei@gmail.com>
* Makefile.gcc (CFILES): Add wordbreak.c.
(WordBreakProperty.txt): New target.
(wordbreakdata): New target.
2012-01-19 Wu Yongwei <wuyongwei@gmail.com>
* Makefile.am (liblinebreak_la_SOURCES): Remove wordbreakdata.c.
(EXTRA_DIST): Add wordbreakdata.c, wordbreakdata1.tmpl, and
wordbreakdata2.tmpl.
2012-01-19 Wu Yongwei <wuyongwei@gmail.com>
* Makefile.msvc: Add wordbreak files.
2012-01-18 Tom Hacohen <tom@stosb.com>
Add word breaking support.
* AUTHORS: Add `Tom Hacohen'.
* Makefile.am (include_HEADERS): Add header files for word breaking.
(liblinebreak_la_SOURCES): Add source files for word breaking.
(sort_numeric_hex.py): Add `sort_numeric_hex.py'.
(distclean-local): Clean also `WordBreakData.txt'.
(WordBreakProperty.txt): New target.
(wordbreakdata): New target.
* sort_numeric_hex.py: New file.
* wordbreak.c: New file.
* wordbreak.h: New file.
* wordbreakdef.h: New file.
* wordbreakdata.c: New file.
* wordbreakdata1.tmpl: New file.
* wordbreakdata2.tmpl: New file.
2011-05-17 Wu Yongwei <wuyongwei@gmail.com>
Add support for pkg-config (thanks to Tom Hacohen).
* liblinebreak.pc.in: New file.
* configure.ac (AC_OUTPUT): Add `liblinebreak.pc'.
* Makefile.am (pkgconfig_DATA): Set to `liblinebreak.pc'.
(pkgconfigdir): Set to `$(libdir)/pkgconfig'.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
* README: Update the reference to UAX #14-26, for Unicode 6.0.0.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
* configure.ac (AC_INIT): Increase the version to 2.1.
* Makefile.am (liblinebreak_la_LDFLAGS): Set the version-info to
`2:1'.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
* LICENCE: Update the copyright year.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
Update for the 2.1 release.
* Doxyfile (PROJECT_NUMBER): Set to `2.1'.
* NEWS: Add information about the 2.1 release.
* linebreak.h (LINEBREAK_VERSION): Set to `0x0201'.
* linebreak.h: Update comments.
* linebreak.c: Ditto.
* linebreakdef.h: Ditto.
* linebreakdef.c: Ditto.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
* linebreakdata.c: Regenerate from LineBreak-6.0.0.txt.
2011-05-07 Wu Yongwei <wuyongwei@gmail.com>
* linebreak.c (set_linebreaks): Fix the assertion failure when
U+FFFC (OBJECT REPLACEMENT CHARACTER) appears at the beginning of a
line (thanks to Tom Hacohen).
2010-01-03 Wu Yongwei <wuyongwei@gmail.com>
* LICENCE: Update the copyright year.

View File

@ -1,4 +1,5 @@
Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) 2012 Tom Hacohen <tom dot hacohen at samsung dot com>
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages

View File

@ -1,11 +1,23 @@
New in 2.0
New in libunibreak 1.0
- Add word breaking support
- Change the library name to "libunibreak", while keeping maximum compatibility
- Add pkg-config support
New in liblinebreak 2.1
- Update the data according to LineBreak-6.0.0.txt
- Fix the bug that an assertion in code can fail if U+FFFC is
encountered at the beginning of a line
New in liblinebreak 2.0
- Update the algorithm and data according to UAX #14-24 and
LineBreak-5.2.0.txt
- Rename some functions to reduce namespace pollution
- Make Doxygen documentation better
New in 1.2
New in liblinebreak 1.2
- Fix the bug that an assertion in code can fail if an invalid UTF-8 or
UTF-16 sequence is encountered near the end of input
@ -14,7 +26,7 @@ New in 1.2
used as apostrophe
- Make Doxygen documentation better
New in 1.1
New in liblinebreak 1.1
- Make get_lb_prop_lang static and not an exported symbol
- Define is_line_breakable to alias to is_breakable
@ -23,7 +35,7 @@ New in 1.1
linebreakdef.h
- Add the function documentation comments to the header files
New in 1.0
New in liblinebreak 1.0
- Update the line breaking data according to UAX #14-22 and
LineBreak-5.1.0.txt

View File

@ -1,20 +1,17 @@
NOTICE: This is the original version, that was adapted a bit (mostly
build related) in order to work nicely with Evas.
L I B L I N E B R E A K
=======================
L I B U N I B R E A K
=====================
Overview
--------
This is the README file for liblinebreak, an implementation of the line
breaking algorithm as described in Unicode 5.2.0 Standard Annex 14,
Revision 24, available at
<URL:http://www.unicode.org/reports/tr14/tr14-24.html>
This is the README file for libunibreak, an implementation of the line
breaking and word breaking algorithms as described in Unicode
Standard Annex 14 and Unicode Standard Annex 30, available at
<URL:http://www.unicode.org/reports/tr14/tr14-30.html>
<URL:http://www.unicode.org/reports/tr29/tr29-17.html>
Check this URL for up-to-date information:
<URL:http://vimgadgets.sourceforge.net/liblinebreak/>
<URL:https://github.com/adah1972/libunibreak>
Licence
@ -49,9 +46,12 @@ There are three ways to build the library:
- type `make doc' to generate the doxygen documentation; or
- type `make linebreakdata' to regenerate linebreakdata.c from
LineBreak.txt.
- type `make wordbreakdata' to regenerate wordbreakdata.c from
WordBreakProperty.txt.
2) On systems where GCC and Binutils are supported, one can type
cd src
cp -p Makefile.gcc Makefile
make
@ -62,25 +62,27 @@ There are three ways to build the library:
- type `make doc' to generate the doxygen documentation; or
- type `make linebreakdata' to regenerate linebreakdata.c from
LineBreak.txt.
- type `make wordbreakdata' to regenerate wordbreakdata.c from
WordBreakProperty.txt.
3) On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
(MinGW), MSVC can also be used. Type
cd src
nmake -f Makefile.msvc
to build the static library. By default the debug release is built.
To build the release version
nmake -f Makefile.msvc CFG="linebreak - Win32 Release"
nmake -f Makefile.msvc CFG="libunibreak - Win32 Release"
Documentation
-------------
Check the generated document doc/html/linebreak_8h.html for the public
Check the generated document doc/html/linebreak_8h.html and
doc/html/wordbreak_8h.html in the downloaded file for the public
interfaces exposed to applications.
$Id: README,v 1.6 2009/11/29 08:09:13 adah Exp $
vim:autoindent:expandtab:formatoptions=tcqlmn:textwidth=72:

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 24, for
* Unicode 5.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
* This library has been updated according to Revision 30, for
* Unicode 6.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -44,7 +44,7 @@
* Implementation of the line breaking algorithm as described in Unicode
* Standard Annex 14.
*
* @version 2.0, 2010/01/03
* @version 2.3, 2012/10/06
* @author Wu Yongwei
*/
@ -79,144 +79,183 @@ enum BreakAction
/**
* Break action pair table. This is a direct mapping of Table 2 of
* Unicode Standard Annex 14, Revision 24.
* Unicode Standard Annex 14, Revision 30.
*/
static enum BreakAction baTable[LBP_JT][LBP_JT] = {
static enum BreakAction baTable[LBP_RI][LBP_RI] = {
{ /* OP */
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
CMP_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
PRH_BRK },
{ /* CL */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* CP */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* QU */
PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK },
{ /* GL */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK },
{ /* NS */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* EX */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* SY */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* IS */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* PR */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK },
{ /* PO */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* NU */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* AL */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* HL */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* ID */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* IN */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* HY */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* BA */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* BB */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK },
{ /* B2 */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* ZW */
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* CM */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK },
{ /* WJ */
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
IND_BRK },
{ /* H2 */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
DIR_BRK },
{ /* H3 */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
DIR_BRK },
{ /* JL */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
DIR_BRK },
{ /* JV */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK,
DIR_BRK },
{ /* JT */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK,
DIR_BRK },
{ /* RI */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK },
};
/**
@ -379,7 +418,15 @@ static enum LineBreakClass resolve_lb_class(
{
return LBP_ID;
}
/* Fall through */
else
{
return LBP_AL;
}
case LBP_CJ:
/* Simplified for `normal' line breaking. See
* <url:http://www.unicode.org/reports/tr14/tr14-28.html#CJ>
* for details. */
return LBP_ID;
case LBP_SA:
case LBP_SG:
case LBP_XX:
@ -609,6 +656,9 @@ nextline:
lbcNew = resolve_lb_class(lbcNew, lang);
/* TODO: LB21a, as introduced by Revision 28 of UAX#14, is not
* yet implemented below. */
assert(lbcCur <= LBP_JT);
assert(lbcNew <= LBP_JT);
switch (baTable[lbcCur - 1][lbcNew - 1])

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 24, for
* Unicode 5.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
* This library has been updated according to Revision 30, for
* Unicode 6.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -43,7 +43,7 @@
*
* Header file for the line breaking algorithm.
*
* @version 2.0, 2010/01/03
* @version 2.2, 2012/10/06
* @author Wu Yongwei
*/
@ -56,7 +56,7 @@
extern "C" {
#endif
#define LINEBREAK_VERSION 0x0200 /**< Version of the library linebreak */
#define LINEBREAK_VERSION 0x0202 /**< Version of the library linebreak */
extern const int linebreak_version;
#ifndef LINEBREAK_UTF_TYPES_DEFINED

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# LineBreak-6.0.0.txt
# Date: 2010-08-18, 17:25:00 PDT [KW]
# LineBreak-6.2.0.txt
# Date: 2012-08-08, 19:26:00 GMT [KW]
*/
#include "linebreak.h"
@ -98,6 +98,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x048A, 0x0587, LBP_AL },
{ 0x0589, 0x0589, LBP_IS },
{ 0x058A, 0x058A, LBP_BA },
{ 0x058F, 0x058F, LBP_PR },
{ 0x0591, 0x05BD, LBP_CM },
{ 0x05BE, 0x05BE, LBP_BA },
{ 0x05BF, 0x05BF, LBP_CM },
@ -107,7 +108,8 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x05C4, 0x05C5, LBP_CM },
{ 0x05C6, 0x05C6, LBP_EX },
{ 0x05C7, 0x05C7, LBP_CM },
{ 0x05D0, 0x0608, LBP_AL },
{ 0x05D0, 0x05F2, LBP_HL },
{ 0x05F3, 0x0608, LBP_AL },
{ 0x0609, 0x060B, LBP_PO },
{ 0x060C, 0x060D, LBP_IS },
{ 0x060E, 0x060F, LBP_AL },
@ -155,8 +157,8 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0829, 0x082D, LBP_CM },
{ 0x0830, 0x0858, LBP_AL },
{ 0x0859, 0x085B, LBP_CM },
{ 0x085E, 0x085E, LBP_AL },
{ 0x0900, 0x0903, LBP_CM },
{ 0x085E, 0x08AC, LBP_AL },
{ 0x08E4, 0x0903, LBP_CM },
{ 0x0904, 0x0939, LBP_AL },
{ 0x093A, 0x093C, LBP_CM },
{ 0x093D, 0x093D, LBP_AL },
@ -199,6 +201,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0AD0, 0x0AE1, LBP_AL },
{ 0x0AE2, 0x0AE3, LBP_CM },
{ 0x0AE6, 0x0AEF, LBP_NU },
{ 0x0AF0, 0x0AF0, LBP_AL },
{ 0x0AF1, 0x0AF1, LBP_PR },
{ 0x0B01, 0x0B03, LBP_CM },
{ 0x0B05, 0x0B39, LBP_AL },
@ -257,7 +260,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x0E5A, 0x0E5B, LBP_BA },
{ 0x0E81, 0x0ECD, LBP_SA },
{ 0x0ED0, 0x0ED9, LBP_NU },
{ 0x0EDC, 0x0EDD, LBP_SA },
{ 0x0EDC, 0x0EDF, LBP_SA },
{ 0x0F00, 0x0F00, LBP_AL },
{ 0x0F01, 0x0F04, LBP_BB },
{ 0x0F05, 0x0F05, LBP_AL },
@ -310,7 +313,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1050, 0x108F, LBP_SA },
{ 0x1090, 0x1099, LBP_NU },
{ 0x109A, 0x109F, LBP_SA },
{ 0x10A0, 0x10FC, LBP_AL },
{ 0x10A0, 0x10FF, LBP_AL },
{ 0x1100, 0x115F, LBP_JL },
{ 0x1160, 0x11A7, LBP_JV },
{ 0x11A8, 0x11FF, LBP_JT },
@ -386,10 +389,10 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1B74, 0x1B7C, LBP_AL },
{ 0x1B80, 0x1B82, LBP_CM },
{ 0x1B83, 0x1BA0, LBP_AL },
{ 0x1BA1, 0x1BAA, LBP_CM },
{ 0x1BA1, 0x1BAD, LBP_CM },
{ 0x1BAE, 0x1BAF, LBP_AL },
{ 0x1BB0, 0x1BB9, LBP_NU },
{ 0x1BC0, 0x1BE5, LBP_AL },
{ 0x1BBA, 0x1BE5, LBP_AL },
{ 0x1BE6, 0x1BF3, LBP_CM },
{ 0x1BFC, 0x1C23, LBP_AL },
{ 0x1C24, 0x1C37, LBP_CM },
@ -399,14 +402,15 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1C50, 0x1C59, LBP_NU },
{ 0x1C5A, 0x1C7D, LBP_AL },
{ 0x1C7E, 0x1C7F, LBP_BA },
{ 0x1CC0, 0x1CC7, LBP_AL },
{ 0x1CD0, 0x1CD2, LBP_CM },
{ 0x1CD3, 0x1CD3, LBP_AL },
{ 0x1CD4, 0x1CE8, LBP_CM },
{ 0x1CE9, 0x1CEC, LBP_AL },
{ 0x1CED, 0x1CED, LBP_CM },
{ 0x1CEE, 0x1CF1, LBP_AL },
{ 0x1CF2, 0x1CF2, LBP_CM },
{ 0x1D00, 0x1DBF, LBP_AL },
{ 0x1CF2, 0x1CF4, LBP_CM },
{ 0x1CF5, 0x1DBF, LBP_AL },
{ 0x1DC0, 0x1DFF, LBP_CM },
{ 0x1E00, 0x1FFC, LBP_AL },
{ 0x1FFD, 0x1FFD, LBP_BB },
@ -469,7 +473,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x20A7, 0x20A7, LBP_PO },
{ 0x20A8, 0x20B5, LBP_PR },
{ 0x20B6, 0x20B6, LBP_PO },
{ 0x20B7, 0x20B9, LBP_PR },
{ 0x20B7, 0x20BA, LBP_PR },
{ 0x20D0, 0x20F0, LBP_CM },
{ 0x2100, 0x2102, LBP_AL },
{ 0x2103, 0x2103, LBP_PO },
@ -560,10 +564,14 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x22BF, 0x22BF, LBP_AI },
{ 0x22C0, 0x2311, LBP_AL },
{ 0x2312, 0x2312, LBP_AI },
{ 0x2313, 0x2328, LBP_AL },
{ 0x2313, 0x2319, LBP_AL },
{ 0x231A, 0x231B, LBP_ID },
{ 0x231C, 0x2328, LBP_AL },
{ 0x2329, 0x2329, LBP_OP },
{ 0x232A, 0x232A, LBP_CL },
{ 0x232B, 0x244A, LBP_AL },
{ 0x232B, 0x23EF, LBP_AL },
{ 0x23F0, 0x23F3, LBP_ID },
{ 0x2400, 0x244A, LBP_AL },
{ 0x2460, 0x24FE, LBP_AI },
{ 0x24FF, 0x24FF, LBP_AL },
{ 0x2500, 0x254B, LBP_AI },
@ -595,19 +603,23 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x25E2, 0x25E5, LBP_AI },
{ 0x25E6, 0x25EE, LBP_AL },
{ 0x25EF, 0x25EF, LBP_AI },
{ 0x25F0, 0x2604, LBP_AL },
{ 0x25F0, 0x25FF, LBP_AL },
{ 0x2600, 0x2603, LBP_ID },
{ 0x2604, 0x2604, LBP_AL },
{ 0x2605, 0x2606, LBP_AI },
{ 0x2607, 0x2608, LBP_AL },
{ 0x2609, 0x2609, LBP_AI },
{ 0x260A, 0x260D, LBP_AL },
{ 0x260E, 0x260F, LBP_AI },
{ 0x2610, 0x2613, LBP_AL },
{ 0x2614, 0x2617, LBP_AI },
{ 0x2618, 0x261B, LBP_AL },
{ 0x261C, 0x261C, LBP_AI },
{ 0x261D, 0x261D, LBP_AL },
{ 0x261E, 0x261E, LBP_AI },
{ 0x261F, 0x263F, LBP_AL },
{ 0x2614, 0x2615, LBP_ID },
{ 0x2616, 0x2617, LBP_AI },
{ 0x2618, 0x2618, LBP_ID },
{ 0x2619, 0x2619, LBP_AL },
{ 0x261A, 0x261F, LBP_ID },
{ 0x2620, 0x2638, LBP_AL },
{ 0x2639, 0x263B, LBP_ID },
{ 0x263C, 0x263F, LBP_AL },
{ 0x2640, 0x2640, LBP_AI },
{ 0x2641, 0x2641, LBP_AL },
{ 0x2642, 0x2642, LBP_AI },
@ -616,24 +628,45 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x2662, 0x2662, LBP_AL },
{ 0x2663, 0x2665, LBP_AI },
{ 0x2666, 0x2666, LBP_AL },
{ 0x2667, 0x266A, LBP_AI },
{ 0x2667, 0x2667, LBP_AI },
{ 0x2668, 0x2668, LBP_ID },
{ 0x2669, 0x266A, LBP_AI },
{ 0x266B, 0x266B, LBP_AL },
{ 0x266C, 0x266D, LBP_AI },
{ 0x266E, 0x266E, LBP_AL },
{ 0x266F, 0x266F, LBP_AI },
{ 0x2670, 0x269D, LBP_AL },
{ 0x2670, 0x267E, LBP_AL },
{ 0x267F, 0x267F, LBP_ID },
{ 0x2680, 0x269D, LBP_AL },
{ 0x269E, 0x269F, LBP_AI },
{ 0x26A0, 0x26BD, LBP_AL },
{ 0x26BE, 0x26BF, LBP_AI },
{ 0x26C0, 0x26C3, LBP_AL },
{ 0x26C4, 0x26CD, LBP_AI },
{ 0x26A0, 0x26BC, LBP_AL },
{ 0x26BD, 0x26C8, LBP_ID },
{ 0x26C9, 0x26CC, LBP_AI },
{ 0x26CD, 0x26CD, LBP_ID },
{ 0x26CE, 0x26CE, LBP_AL },
{ 0x26CF, 0x26E1, LBP_AI },
{ 0x26CF, 0x26D1, LBP_ID },
{ 0x26D2, 0x26D2, LBP_AI },
{ 0x26D3, 0x26D4, LBP_ID },
{ 0x26D5, 0x26D7, LBP_AI },
{ 0x26D8, 0x26D9, LBP_ID },
{ 0x26DA, 0x26DB, LBP_AI },
{ 0x26DC, 0x26DC, LBP_ID },
{ 0x26DD, 0x26DE, LBP_AI },
{ 0x26DF, 0x26E1, LBP_ID },
{ 0x26E2, 0x26E2, LBP_AL },
{ 0x26E3, 0x26E3, LBP_AI },
{ 0x26E4, 0x26E7, LBP_AL },
{ 0x26E8, 0x26FF, LBP_AI },
{ 0x2701, 0x2756, LBP_AL },
{ 0x26E8, 0x26E9, LBP_AI },
{ 0x26EA, 0x26EA, LBP_ID },
{ 0x26EB, 0x26F0, LBP_AI },
{ 0x26F1, 0x26F5, LBP_ID },
{ 0x26F6, 0x26F6, LBP_AI },
{ 0x26F7, 0x26FA, LBP_ID },
{ 0x26FB, 0x26FC, LBP_AI },
{ 0x26FD, 0x2704, LBP_ID },
{ 0x2705, 0x2707, LBP_AL },
{ 0x2708, 0x270D, LBP_ID },
{ 0x270E, 0x2756, LBP_AL },
{ 0x2757, 0x2757, LBP_AI },
{ 0x2758, 0x275A, LBP_AL },
{ 0x275B, 0x275E, LBP_QU },
@ -704,6 +737,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x2B55, 0x2B59, LBP_AI },
{ 0x2C00, 0x2CEE, LBP_AL },
{ 0x2CEF, 0x2CF1, LBP_CM },
{ 0x2CF2, 0x2CF3, LBP_AL },
{ 0x2CF9, 0x2CF9, LBP_EX },
{ 0x2CFA, 0x2CFC, LBP_BA },
{ 0x2CFD, 0x2CFD, LBP_AL },
@ -736,6 +770,10 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x2E2E, 0x2E2E, LBP_EX },
{ 0x2E2F, 0x2E2F, LBP_AL },
{ 0x2E30, 0x2E31, LBP_BA },
{ 0x2E32, 0x2E32, LBP_AL },
{ 0x2E33, 0x2E34, LBP_BA },
{ 0x2E35, 0x2E39, LBP_AL },
{ 0x2E3A, 0x2E3B, LBP_B2 },
{ 0x2E80, 0x3000, LBP_ID },
{ 0x3001, 0x3002, LBP_CL },
{ 0x3003, 0x3004, LBP_ID },
@ -768,55 +806,58 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x3030, 0x303A, LBP_ID },
{ 0x303B, 0x303C, LBP_NS },
{ 0x303D, 0x303F, LBP_ID },
{ 0x3041, 0x3041, LBP_NS },
{ 0x3041, 0x3041, LBP_CJ },
{ 0x3042, 0x3042, LBP_ID },
{ 0x3043, 0x3043, LBP_NS },
{ 0x3043, 0x3043, LBP_CJ },
{ 0x3044, 0x3044, LBP_ID },
{ 0x3045, 0x3045, LBP_NS },
{ 0x3045, 0x3045, LBP_CJ },
{ 0x3046, 0x3046, LBP_ID },
{ 0x3047, 0x3047, LBP_NS },
{ 0x3047, 0x3047, LBP_CJ },
{ 0x3048, 0x3048, LBP_ID },
{ 0x3049, 0x3049, LBP_NS },
{ 0x3049, 0x3049, LBP_CJ },
{ 0x304A, 0x3062, LBP_ID },
{ 0x3063, 0x3063, LBP_NS },
{ 0x3063, 0x3063, LBP_CJ },
{ 0x3064, 0x3082, LBP_ID },
{ 0x3083, 0x3083, LBP_NS },
{ 0x3083, 0x3083, LBP_CJ },
{ 0x3084, 0x3084, LBP_ID },
{ 0x3085, 0x3085, LBP_NS },
{ 0x3085, 0x3085, LBP_CJ },
{ 0x3086, 0x3086, LBP_ID },
{ 0x3087, 0x3087, LBP_NS },
{ 0x3087, 0x3087, LBP_CJ },
{ 0x3088, 0x308D, LBP_ID },
{ 0x308E, 0x308E, LBP_NS },
{ 0x308E, 0x308E, LBP_CJ },
{ 0x308F, 0x3094, LBP_ID },
{ 0x3095, 0x3096, LBP_NS },
{ 0x3095, 0x3096, LBP_CJ },
{ 0x3099, 0x309A, LBP_CM },
{ 0x309B, 0x309E, LBP_NS },
{ 0x309F, 0x309F, LBP_ID },
{ 0x30A0, 0x30A1, LBP_NS },
{ 0x30A0, 0x30A0, LBP_NS },
{ 0x30A1, 0x30A1, LBP_CJ },
{ 0x30A2, 0x30A2, LBP_ID },
{ 0x30A3, 0x30A3, LBP_NS },
{ 0x30A3, 0x30A3, LBP_CJ },
{ 0x30A4, 0x30A4, LBP_ID },
{ 0x30A5, 0x30A5, LBP_NS },
{ 0x30A5, 0x30A5, LBP_CJ },
{ 0x30A6, 0x30A6, LBP_ID },
{ 0x30A7, 0x30A7, LBP_NS },
{ 0x30A7, 0x30A7, LBP_CJ },
{ 0x30A8, 0x30A8, LBP_ID },
{ 0x30A9, 0x30A9, LBP_NS },
{ 0x30A9, 0x30A9, LBP_CJ },
{ 0x30AA, 0x30C2, LBP_ID },
{ 0x30C3, 0x30C3, LBP_NS },
{ 0x30C3, 0x30C3, LBP_CJ },
{ 0x30C4, 0x30E2, LBP_ID },
{ 0x30E3, 0x30E3, LBP_NS },
{ 0x30E3, 0x30E3, LBP_CJ },
{ 0x30E4, 0x30E4, LBP_ID },
{ 0x30E5, 0x30E5, LBP_NS },
{ 0x30E5, 0x30E5, LBP_CJ },
{ 0x30E6, 0x30E6, LBP_ID },
{ 0x30E7, 0x30E7, LBP_NS },
{ 0x30E7, 0x30E7, LBP_CJ },
{ 0x30E8, 0x30ED, LBP_ID },
{ 0x30EE, 0x30EE, LBP_NS },
{ 0x30EE, 0x30EE, LBP_CJ },
{ 0x30EF, 0x30F4, LBP_ID },
{ 0x30F5, 0x30F6, LBP_NS },
{ 0x30F5, 0x30F6, LBP_CJ },
{ 0x30F7, 0x30FA, LBP_ID },
{ 0x30FB, 0x30FE, LBP_NS },
{ 0x30FB, 0x30FB, LBP_NS },
{ 0x30FC, 0x30FC, LBP_CJ },
{ 0x30FD, 0x30FE, LBP_NS },
{ 0x30FF, 0x31E3, LBP_ID },
{ 0x31F0, 0x31FF, LBP_NS },
{ 0x31F0, 0x31FF, LBP_CJ },
{ 0x3200, 0x3247, LBP_ID },
{ 0x3248, 0x324F, LBP_AI },
{ 0x3250, 0x4DBF, LBP_ID },
@ -835,8 +876,10 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xA62A, 0xA66E, LBP_AL },
{ 0xA66F, 0xA672, LBP_CM },
{ 0xA673, 0xA673, LBP_AL },
{ 0xA67C, 0xA67D, LBP_CM },
{ 0xA67E, 0xA6EF, LBP_AL },
{ 0xA674, 0xA67D, LBP_CM },
{ 0xA67E, 0xA697, LBP_AL },
{ 0xA69F, 0xA69F, LBP_CM },
{ 0xA6A0, 0xA6EF, LBP_AL },
{ 0xA6F0, 0xA6F1, LBP_CM },
{ 0xA6F2, 0xA6F2, LBP_AL },
{ 0xA6F3, 0xA6F7, LBP_BA },
@ -885,6 +928,11 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xAA5C, 0xAA5C, LBP_AL },
{ 0xAA5D, 0xAA5F, LBP_BA },
{ 0xAA60, 0xAADF, LBP_SA },
{ 0xAAE0, 0xAAEA, LBP_AL },
{ 0xAAEB, 0xAAEF, LBP_CM },
{ 0xAAF0, 0xAAF1, LBP_BA },
{ 0xAAF2, 0xAAF4, LBP_AL },
{ 0xAAF5, 0xAAF6, LBP_CM },
{ 0xAB01, 0xABE2, LBP_AL },
{ 0xABE3, 0xABEA, LBP_CM },
{ 0xABEB, 0xABEB, LBP_BA },
@ -1693,9 +1741,13 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xD800, 0xDFFF, LBP_SG },
{ 0xE000, 0xF8FF, LBP_XX },
{ 0xF900, 0xFAFF, LBP_ID },
{ 0xFB00, 0xFB1D, LBP_AL },
{ 0xFB00, 0xFB17, LBP_AL },
{ 0xFB1D, 0xFB1D, LBP_HL },
{ 0xFB1E, 0xFB1E, LBP_CM },
{ 0xFB1F, 0xFD3D, LBP_AL },
{ 0xFB1F, 0xFB28, LBP_HL },
{ 0xFB29, 0xFB29, LBP_AL },
{ 0xFB2A, 0xFB4F, LBP_HL },
{ 0xFB50, 0xFD3D, LBP_AL },
{ 0xFD3E, 0xFD3E, LBP_OP },
{ 0xFD3F, 0xFD3F, LBP_CL },
{ 0xFD50, 0xFDFB, LBP_AL },
@ -1779,7 +1831,7 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0xFF63, 0xFF64, LBP_CL },
{ 0xFF65, 0xFF65, LBP_NS },
{ 0xFF66, 0xFF66, LBP_AL },
{ 0xFF67, 0xFF70, LBP_NS },
{ 0xFF67, 0xFF70, LBP_CJ },
{ 0xFF71, 0xFF9D, LBP_AL },
{ 0xFF9E, 0xFF9F, LBP_NS },
{ 0xFFA0, 0xFFDC, LBP_AL },
@ -1825,6 +1877,24 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x110B0, 0x110BA, LBP_CM },
{ 0x110BB, 0x110BD, LBP_AL },
{ 0x110BE, 0x110C1, LBP_BA },
{ 0x110D0, 0x110E8, LBP_AL },
{ 0x110F0, 0x110F9, LBP_NU },
{ 0x11100, 0x11102, LBP_CM },
{ 0x11103, 0x11126, LBP_AL },
{ 0x11127, 0x11134, LBP_CM },
{ 0x11136, 0x1113F, LBP_NU },
{ 0x11140, 0x11143, LBP_BA },
{ 0x11180, 0x11182, LBP_CM },
{ 0x11183, 0x111B2, LBP_AL },
{ 0x111B3, 0x111C0, LBP_CM },
{ 0x111C1, 0x111C4, LBP_AL },
{ 0x111C5, 0x111C6, LBP_BA },
{ 0x111C7, 0x111C7, LBP_AL },
{ 0x111C8, 0x111C8, LBP_BA },
{ 0x111D0, 0x111D9, LBP_NU },
{ 0x11680, 0x116AA, LBP_AL },
{ 0x116AB, 0x116B7, LBP_CM },
{ 0x116C0, 0x116C9, LBP_NU },
{ 0x12000, 0x12462, LBP_AL },
{ 0x12470, 0x12473, LBP_BA },
{ 0x13000, 0x13257, LBP_AL },
@ -1840,7 +1910,9 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1328A, 0x13378, LBP_AL },
{ 0x13379, 0x13379, LBP_OP },
{ 0x1337A, 0x1337B, LBP_CL },
{ 0x1337C, 0x16A38, LBP_AL },
{ 0x1337C, 0x16F50, LBP_AL },
{ 0x16F51, 0x16F92, LBP_CM },
{ 0x16F93, 0x16F9F, LBP_AL },
{ 0x1B000, 0x1B001, LBP_ID },
{ 0x1D000, 0x1D164, LBP_AL },
{ 0x1D165, 0x1D169, LBP_CM },
@ -1854,13 +1926,36 @@ struct LineBreakProperties lb_prop_default[] = {
{ 0x1D242, 0x1D244, LBP_CM },
{ 0x1D245, 0x1D7CB, LBP_AL },
{ 0x1D7CE, 0x1D7FF, LBP_NU },
{ 0x1F000, 0x1F0DF, LBP_AL },
{ 0x1EE00, 0x1EEF1, LBP_AL },
{ 0x1F000, 0x1F0DF, LBP_ID },
{ 0x1F100, 0x1F12D, LBP_AI },
{ 0x1F12E, 0x1F12E, LBP_AL },
{ 0x1F130, 0x1F19A, LBP_AI },
{ 0x1F1E6, 0x1F1FF, LBP_AL },
{ 0x1F200, 0x1F251, LBP_ID },
{ 0x1F300, 0x1F773, LBP_AL },
{ 0x1F130, 0x1F169, LBP_AI },
{ 0x1F16A, 0x1F16B, LBP_AL },
{ 0x1F170, 0x1F19A, LBP_AI },
{ 0x1F1E6, 0x1F1FF, LBP_RI },
{ 0x1F200, 0x1F3B4, LBP_ID },
{ 0x1F3B5, 0x1F3B6, LBP_AL },
{ 0x1F3B7, 0x1F3BB, LBP_ID },
{ 0x1F3BC, 0x1F3BC, LBP_AL },
{ 0x1F3BD, 0x1F49F, LBP_ID },
{ 0x1F4A0, 0x1F4A0, LBP_AL },
{ 0x1F4A1, 0x1F4A1, LBP_ID },
{ 0x1F4A2, 0x1F4A2, LBP_AL },
{ 0x1F4A3, 0x1F4A3, LBP_ID },
{ 0x1F4A4, 0x1F4A4, LBP_AL },
{ 0x1F4A5, 0x1F4AE, LBP_ID },
{ 0x1F4AF, 0x1F4AF, LBP_AL },
{ 0x1F4B0, 0x1F4B0, LBP_ID },
{ 0x1F4B1, 0x1F4B2, LBP_AL },
{ 0x1F4B3, 0x1F4FC, LBP_ID },
{ 0x1F500, 0x1F506, LBP_AL },
{ 0x1F507, 0x1F516, LBP_ID },
{ 0x1F517, 0x1F524, LBP_AL },
{ 0x1F525, 0x1F531, LBP_ID },
{ 0x1F532, 0x1F543, LBP_AL },
{ 0x1F550, 0x1F6C5, LBP_ID },
{ 0x1F700, 0x1F773, LBP_AL },
{ 0x20000, 0x3FFFD, LBP_ID },
{ 0xE0001, 0xE01EF, LBP_CM },
{ 0xF0000, 0x10FFFD, LBP_XX },

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 24, for
* Unicode 5.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
* This library has been updated according to Revision 30, for
* Unicode 6.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -43,7 +43,7 @@
*
* Definition of language-specific data.
*
* @version 2.0, 2010/01/03
* @version 2.2, 2012/10/06
* @author Wu Yongwei
*/

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2012 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 24, for
* Unicode 5.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
* This library has been updated according to Revision 30, for
* Unicode 6.2.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-30.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -44,7 +44,7 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the line breaking algorithm.
*
* @version 2.0, 2010/01/03
* @version 2.3, 2012/10/06
* @author Wu Yongwei
*/
@ -56,7 +56,7 @@
/**
* Line break classes. This is a direct mapping of Table 1 of Unicode
* Standard Annex 14, Revision 19.
* Standard Annex 14, Revision 26.
*/
enum LineBreakClass
{
@ -77,6 +77,7 @@ enum LineBreakClass
LBP_PO, /**< Postfix */
LBP_NU, /**< Numeric */
LBP_AL, /**< Alphabetic */
LBP_HL, /**< Hebrew letter */
LBP_ID, /**< Ideographic */
LBP_IN, /**< Inseparable characters */
LBP_HY, /**< Hyphen */
@ -91,11 +92,13 @@ enum LineBreakClass
LBP_JL, /**< Hangul L Jamo */
LBP_JV, /**< Hangul V Jamo */
LBP_JT, /**< Hangul T Jamo */
LBP_RI, /**< Regional indicator */
/* The following break classes are not treated in the pair table */
LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
LBP_BK, /**< Break (mandatory) */
LBP_CB, /**< Contingent break */
LBP_CJ, /**< Conditional Japanese starter */
LBP_CR, /**< Carriage return */
LBP_LF, /**< Line feed */
LBP_NL, /**< Next line */

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2011-2011 Tom Hacohen <tom@stosb.com>
* Copyright (C) 2012 Tom Hacohen <tom@stosb.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -40,11 +40,10 @@
* Implementation of the word breaking algorithm as described in Unicode
* Standard Annex 29.
*
* @version 2.0, 2011/12/12
* @version 2.2, 2012/02/04
* @author Tom Hacohen
*/
#include <assert.h>
#include <stddef.h>
#include <string.h>
@ -52,15 +51,16 @@
#include "linebreakdef.h"
#include "wordbreak.h"
#include "wordbreakdata.x"
#include "wordbreakdata.c"
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
/* Init the wordbreak internals. */
/**
* Initializes the wordbreak internals. It currently does nothing, but
* it may in the future.
*/
void init_wordbreak(void)
{
/* Currently does nothing, may be needed in the future. */
return;
}
/**
@ -68,7 +68,7 @@ void init_wordbreak(void)
*
* @param ch character to check
* @param wbp pointer to the wbp breaking properties array
* @param len the size of the wbp array in number of items.
* @param len size of the wbp array in number of items
* @return the word breaking class if found; \c WBP_Any otherwise
*/
static enum WordBreakClass get_char_wb_class(
@ -97,21 +97,22 @@ static enum WordBreakClass get_char_wb_class(
}
/**
* Sets the break types in brks starting from posLast up to posStop.
* Sets the word break types to a specific value in a range.
*
* It sets the inside chars to #WORDBREAK_INSIDECHAR and the rest to brkType.
* Assumes brks is initialized - all the cells with #WORDBREAK_NOBREAK are
* It sets the inside chars to #WORDBREAK_INSIDEACHAR and the rest to brkType.
* Assumes \a brks is initialized - all the cells with #WORDBREAK_NOBREAK are
* cells that we really don't want to break after.
*
* @param s the string
* @param brks[out] the breaks array to fill.
* @param posStart the start position
* @param posEnd the end position
* @param len the length of the string
* @param brkType the breaks type to use
* @param get_next_char function to get the next UTF-32 character
* @param[in] s input string
* @param[out] brks breaks array to fill
* @param[in] posStart start position
* @param[in] posEnd end position (exclusive)
* @param[in] len length of the string
* @param[in] brkType breaks type to use
* @param[in] get_next_char function to get the next UTF-32 character
*/
static void set_brks_to(const void *s,
static void set_brks_to(
const void *s,
char *brks,
size_t posStart,
size_t posEnd,
@ -119,26 +120,26 @@ static void set_brks_to(const void *s,
char brkType,
get_next_char_t get_next_char)
{
size_t posCur = posStart;
while (posCur < posEnd)
size_t posNext = posStart;
while (posNext < posEnd)
{
get_next_char(s, len, &posCur);
for ( ; posStart < posCur - 1; ++posStart)
{
brks[posStart] = WORDBREAK_INSIDECHAR;
}
assert(posStart == posCur - 1);
utf32_t ch;
ch = get_next_char(s, len, &posNext);
assert(ch != EOS);
for (; posStart < posNext - 1; ++posStart)
brks[posStart] = WORDBREAK_INSIDEACHAR;
assert(posStart == posNext - 1);
/* Only set it if we haven't set it not to break before. */
if (brks[posStart] != WORDBREAK_NOBREAK)
brks[posStart] = brkType;
posStart = posCur;
posStart = posNext;
}
}
/* Checks to see if newline, cr, or lf. for WB3a and b */
/* Checks to see if the class is newline, CR, or LF (rules WB3a and b). */
#define IS_WB3ab(cls) ((cls == WBP_Newline) || (cls == WBP_CR) || \
(cls == WBP_LF))
(cls == WBP_LF))
/**
* Sets the word breaking information for a generic input string.
@ -158,204 +159,205 @@ static void set_wordbreaks(
char *brks,
get_next_char_t get_next_char)
{
/* Previous class */
enum WordBreakClass p_cls = WBP_Undefined;
/* Strong previous class. */
enum WordBreakClass sp_cls = WBP_Undefined;
enum WordBreakClass wbcLast = WBP_Undefined;
/* wbcSeqStart is the class that started the current sequence.
* WBP_Undefined is a special case that means "sot".
* This value is the class that is at the start of the current rule
* matching sequence. For example, in case of Numeric+MidNum+Numeric
* it'll be Numeric all the way.
*/
enum WordBreakClass wbcSeqStart = WBP_Undefined;
utf32_t ch;
size_t posNext = 0;
size_t posCur = 0;
size_t posCurSt = 0;
size_t posLast = 0;
/* FIXME: unused atm. */
/* TODO: Language-specific specialization. */
(void) lang;
/* Init brks */
/* Init brks. */
memset(brks, WORDBREAK_BREAK, len);
ch = get_next_char(s, len, &posCur);
ch = get_next_char(s, len, &posNext);
/* WB3a, WB3b are implied. */
for ( ; ch != EOS ; )
while (ch != EOS)
{
/* Current class */
enum WordBreakClass c_cls;
c_cls = get_char_wb_class(ch, wb_prop_default,
ARRAY_LEN(wb_prop_default));
enum WordBreakClass wbcCur;
wbcCur = get_char_wb_class(ch, wb_prop_default,
ARRAY_LEN(wb_prop_default));
switch (c_cls)
switch (wbcCur)
{
case WBP_CR:
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
/* WB3b */
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_LF:
if (sp_cls == WBP_CR) /* WB3 */
if (wbcSeqStart == WBP_CR) /* WB3 */
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
break;
}
sp_cls = c_cls;
posLast = posCurSt;
break;
/* Fall off */
case WBP_Newline:
/* WB3a, WB3b */
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
/* WB3a,3b */
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_Extend:
case WBP_Format:
/* WB4 - If not the first char/after a newline (W3ab),
* skip this class, set it to be the same as the prev, and mark
/* WB4 - If not the first char/after a newline (WB3a,3b), skip
* this class, set it to be the same as the prev, and mark
* brks not to break before them. */
if ((sp_cls == WBP_Undefined) || IS_WB3ab(sp_cls))
if ((wbcSeqStart == WBP_Undefined) || IS_WB3ab(wbcSeqStart))
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
}
else
{
/* It's surely not the first */
brks[posCurSt - 1] = WORDBREAK_NOBREAK;
brks[posCur - 1] = WORDBREAK_NOBREAK;
/* "inherit" the previous class. */
c_cls = p_cls;
wbcCur = wbcLast;
}
break;
case WBP_Katakana:
if ((sp_cls == WBP_Katakana) || /* WB13 */
(sp_cls == WBP_ExtendNumLet)) /* WB13b */
if ((wbcSeqStart == WBP_Katakana) || /* WB13 */
(wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
sp_cls = c_cls;
posLast = posCurSt;
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_ALetter:
if ((sp_cls == WBP_ALetter) || /* WB5,6,7 */
((sp_cls == WBP_Numeric) && (p_cls == WBP_Numeric)) || /* WB10 */
(sp_cls == WBP_ExtendNumLet)) /* WB13b */
if ((wbcSeqStart == WBP_ALetter) || /* WB5,6,7 */
(wbcLast == WBP_Numeric) || /* WB10 */
(wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
sp_cls = c_cls;
posLast = posCurSt;
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_MidNumLet:
if ((p_cls == WBP_ALetter) || /* WBP6,7 */
(p_cls == WBP_Numeric)) /* WBP11,12 */
if ((wbcLast == WBP_ALetter) || /* WB6,7 */
(wbcLast == WBP_Numeric)) /* WB11,12 */
{
/* Go on */
}
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
}
break;
case WBP_MidLetter:
if (p_cls == WBP_ALetter) /* WBP6,7 */
if (wbcLast == WBP_ALetter) /* WB6,7 */
{
/* Go on */
}
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
}
break;
case WBP_MidNum:
if (p_cls == WBP_Numeric) /* WBP11,12 */
if (wbcLast == WBP_Numeric) /* WB11,12 */
{
/* Go on */
}
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
}
break;
case WBP_Numeric:
if ((sp_cls == WBP_Numeric) || /* WB8,11,12 */
((sp_cls == WBP_ALetter) && (p_cls == WBP_ALetter)) || /* WB9 */
(sp_cls == WBP_ExtendNumLet)) /* WB13b */
if ((wbcSeqStart == WBP_Numeric) || /* WB8,11,12 */
(wbcLast == WBP_ALetter) || /* WB9 */
(wbcSeqStart == WBP_ExtendNumLet)) /* WB13b */
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
sp_cls = c_cls;
posLast = posCurSt;
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_ExtendNumLet:
/* WB13a,13b */
if ((sp_cls == p_cls) &&
((p_cls == WBP_ALetter) ||
(p_cls == WBP_Numeric) ||
(p_cls == WBP_Katakana) ||
(p_cls == WBP_ExtendNumLet)))
if ((wbcSeqStart == wbcLast) &&
((wbcLast == WBP_ALetter) ||
(wbcLast == WBP_Numeric) ||
(wbcLast == WBP_Katakana) ||
(wbcLast == WBP_ExtendNumLet)))
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_NOBREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
sp_cls = c_cls;
posLast = posCurSt;
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_Any:
/* Allow breaks and reset */
set_brks_to(s, brks, posLast, posCurSt, len, WORDBREAK_BREAK,
get_next_char);
sp_cls = c_cls;
posLast = posCurSt;
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
break;
default:
@ -364,14 +366,14 @@ static void set_wordbreaks(
break;
}
p_cls = c_cls;
posCurSt = posCur;
ch = get_next_char(s, len, &posCur);
wbcLast = wbcCur;
posCur = posNext;
ch = get_next_char(s, len, &posNext);
}
/* WB2 */
set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK,
get_next_char);
set_brks_to(s, brks, posLast, posNext, len,
WORDBREAK_BREAK, get_next_char);
}
/**

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2011-2011 Tom Hacohen <tom@stosb.com>
* Copyright (C) 2012 Tom Hacohen <tom@stosb.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -39,7 +39,7 @@
*
* Header file for the word breaking (segmentation) algorithm.
*
* @version 2.0, 2011/12/12
* @version 2.2, 2012/02/04
* @author Tom Hacohen
*/
@ -47,15 +47,15 @@
#define WORDBREAK_H
#include <stddef.h>
#include <linebreak.h>
#include "linebreak.h"
#ifdef __cplusplus
extern "C" {
#endif
#define WORDBREAK_BREAK 0 /* Break found */
#define WORDBREAK_NOBREAK 1 /**< Break not found */
#define WORDBREAK_INSIDECHAR 2 /**< A UTF-8/16 sequence is unfinished */
#define WORDBREAK_BREAK 0 /**< Break is allowed */
#define WORDBREAK_NOBREAK 1 /**< No break is allowed */
#define WORDBREAK_INSIDEACHAR 2 /**< A UTF-8/16 sequence is unfinished */
void init_wordbreak(void);
void set_wordbreaks_utf8(

View File

@ -2,8 +2,10 @@
# WordBreakProperty-6.0.0.txt
# Date: 2010-08-19, 00:48:48 GMT [MD]
*/
#include "linebreak.h"
#include "wordbreakdef.h"
static struct WordBreakProperties wb_prop_default[] = {
{0x000A, 0x000A, WBP_LF},
{0x000B, 0x000C, WBP_Newline},

View File

@ -4,7 +4,7 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2011-2011 Tom Hacohen <tom@stosb.com>
* Copyright (C) 2012 Tom Hacohen <tom@stosb.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -40,7 +40,7 @@
* Definitions of internal data structures, declarations of global
* variables, and function prototypes for the word breaking algorithm.
*
* @version 2.0, 2011/12/12
* @version 2.1, 2012/01/18
* @author Tom Hacohen
*/
@ -51,8 +51,6 @@
enum WordBreakClass
{
WBP_Undefined,
/* The following break classes are treated in the pair table. */
WBP_CR,
WBP_LF,
WBP_Newline,