forked from enlightenment/efl
Evas: Added liblinebreak (static dep) to the build process.
SVN revision: 59203
This commit is contained in:
parent
e307051ccb
commit
bf909af0f3
|
@ -272,6 +272,33 @@ if test "x${want_fontconfig}" = "xyes" -o "x${want_fontconfig}" = "xauto" ; then
|
|||
])
|
||||
fi
|
||||
|
||||
# linebreak
|
||||
have_linebreak="no"
|
||||
want_linebreak="yes"
|
||||
AC_ARG_ENABLE([liblinebreak],
|
||||
AC_HELP_STRING([--disable-liblinebreak],
|
||||
[disable linking against liblinebreak. @<:@default=enabled@:>@]),
|
||||
[
|
||||
if test "x${enableval}" = "xyes" ; then
|
||||
want_linebreak="yes"
|
||||
else
|
||||
want_linebreak="no"
|
||||
fi
|
||||
])
|
||||
|
||||
|
||||
AM_CONDITIONAL(EVAS_USE_LINEBREAK, test "x${want_linebreak}" = "xyes")
|
||||
if test "x${want_linebreak}" = "xyes" ; then
|
||||
have_linebreak="yes"
|
||||
LINEBREAK_CFLAGS='-I$(top_srcdir)/src/static_deps/liblinebreak'
|
||||
LINEBREAK_LIBS='$(top_builddir)/src/static_deps/liblinebreak/liblinebreak.la'
|
||||
AC_SUBST(LINEBREAK_CFLAGS)
|
||||
AC_SUBST(LINEBREAK_LIBS)
|
||||
AC_DEFINE(HAVE_LINEBREAK, 1, [have liblinebreak support])
|
||||
else
|
||||
have_linebreak="no"
|
||||
fi
|
||||
|
||||
# fribidi support
|
||||
have_fribidi="no"
|
||||
AC_ARG_ENABLE([fribidi],
|
||||
|
@ -1645,6 +1672,8 @@ src/modules/savers/eet/Makefile
|
|||
src/modules/savers/jpeg/Makefile
|
||||
src/modules/savers/png/Makefile
|
||||
src/modules/savers/tiff/Makefile
|
||||
src/static_deps/Makefile
|
||||
src/static_deps/liblinebreak/Makefile
|
||||
src/lib/include/Makefile
|
||||
src/examples/Makefile
|
||||
README
|
||||
|
@ -1742,6 +1771,7 @@ echo
|
|||
echo "Font Rendering Helpers:"
|
||||
echo " Fribidi.................: $have_fribidi"
|
||||
echo " Harfbuzz................: $have_harfbuzz"
|
||||
echo " liblinebreak............: $have_linebreak"
|
||||
# FIXME: add non freetype2 font engine support
|
||||
# FIXME: make freetype2 optional
|
||||
echo
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
MAINTAINERCLEANFILES = Makefile.in
|
||||
|
||||
SUBDIRS = lib bin modules examples
|
||||
SUBDIRS = static_deps lib bin modules examples
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
Wu Yongwei. Designed and implemented liblinebreak.
|
||||
|
||||
Nikolay Pultsin. Put forward the original requirements on liblinebreak,
|
||||
performed tests, and made a lot of suggestions on the initial versions.
|
||||
|
||||
Thomas Klausner. Autoconfiscated and libtoolized liblinebreak.
|
|
@ -0,0 +1,397 @@
|
|||
2010-01-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* LICENCE: Update the copyright year.
|
||||
|
||||
2010-01-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* NEWS: Add information about the 2.0 release.
|
||||
|
||||
2010-01-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Doxyfile (PROJECT_NUMBER): Set to `2.0'.
|
||||
(HAVE_DOT): Set to `YES'.
|
||||
|
||||
2010-01-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c: Update the version number in comment to 2.0.
|
||||
* linebreak.h: Ditto.
|
||||
* linebreakdef.c: Ditto.
|
||||
* linebreakdef.h: Ditto.
|
||||
|
||||
2009-12-17 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Change the values of enum BreakAction to the same length.
|
||||
* linebreak.c (DIRECT_BRK): Rename to DIR_BRK.
|
||||
(INDIRECT_BRK): Rename to IND_BRK.
|
||||
(CM_INDIRECT_BRK): Rename to CMI_BRK.
|
||||
(CM_PROHIBITED_BRK): Rename to CMP_BRK.
|
||||
(PROHIBITED_BRK): Rename to PRH_BRK.
|
||||
|
||||
2009-11-29 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Doxyfile (TAB_SIZE): Set to the correct size `4', as used in the
|
||||
source files.
|
||||
|
||||
2009-11-29 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Update files according to UAX #14-24, for Unicode 5.2.0.
|
||||
* linebreak.c: Update comments about UAX #14.
|
||||
* linebreak.h: Ditto.
|
||||
* linebreakdef.c: Ditto.
|
||||
* linebreakdef.h: Ditto.
|
||||
(LBP_CP): New enumerator for the new `CP' class as defined in
|
||||
UAX #14-24.
|
||||
* linebreak.c (baTable): Update for the new class `CP'.
|
||||
* linebreakdata.c: Regenerate from LineBreak-5.2.0.txt.
|
||||
* README: Update the reference to UAX #14-24, for Unicode 5.2.0.
|
||||
|
||||
2009-05-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* NEWS: Add information about the 1.2 release.
|
||||
|
||||
2009-04-30 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Optimize the Doxygen output.
|
||||
* linebreak.c (lb_prop_index): Adjust its definition format
|
||||
slightly.
|
||||
|
||||
2009-04-30 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Doxyfile (USE_WINDOWS_ENCODING): Remove obsolete tag.
|
||||
(DETAILS_AT_TOP): Ditto.
|
||||
(MAX_DOT_GRAPH_WIDTH): Ditto.
|
||||
(MAX_DOT_GRAPH_HEIGHT): Ditto.
|
||||
(REFERENCED_BY_RELATION): Set to `NO'.
|
||||
(REFERENCES_RELATION): Ditto.
|
||||
(EXCLUDE): Add `filter_dup.c'.
|
||||
|
||||
2009-04-28 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c (lb_get_next_char_utf8): Fix the issue that the index
|
||||
can point to the middle of a UTF-8 sequence if End of String (EOS)
|
||||
is encountered prematurely (thanks to Nikolay Pultsin and Rick Xu).
|
||||
(lb_get_next_char_utf16): Fix the issue that the index can point to
|
||||
the middle of a UTF-16 surrogate pair if EOS is encountered
|
||||
prematurely.
|
||||
|
||||
2009-04-20 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreakdef.c (lb_prop_English): Remove the specialization of
|
||||
right single quotation mark as closing punctuation mark, because it
|
||||
can be used as apostrophe.
|
||||
(lb_prop_Spanish): Ditto.
|
||||
(lb_prop_French): Ditto.
|
||||
|
||||
2009-04-09 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile.msvc: Make the `clean' target work on MSVC versions other
|
||||
than 6.0; do not use precompiled header.
|
||||
|
||||
2009-03-07 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.h: Correct the wrong date in the documentation comment.
|
||||
* linebreakdef.h: Ditto.
|
||||
|
||||
2009-02-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* configure.ac (AC_INIT): Increase the version to 2.0.
|
||||
* Makefile.am (liblinebreak_la_LDFLAGS): Set the version-info to
|
||||
`2:0'.
|
||||
|
||||
2009-02-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.h (LINEBREAK_VERSION): New macro.
|
||||
(linebreak_version): New global constant declaration.
|
||||
* linebreak.c (linebreak_version): New global constant definition.
|
||||
|
||||
2009-02-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Reduce namespace pollution.
|
||||
* linebreak.c (get_lb_prop_lang): Mark as static.
|
||||
(get_next_char_utf8): Rename to lb_get_next_char_utf8.
|
||||
(get_next_char_utf16): Rename to lb_get_next_char_utf32.
|
||||
(get_next_char_utf32): Rename to lb_get_next_char_utf32.
|
||||
(is_breakable): Rename to is_line_breakable.
|
||||
* linebreak.h (get_next_char_utf8): Remove the function prototype
|
||||
declaration.
|
||||
(get_next_char_utf16): Ditto.
|
||||
(get_next_char_utf32): Ditto.
|
||||
(is_breakable): Rename to is_line_breakable.
|
||||
* linebreakdef.h (lb_get_next_char_utf8): Add the function prototype
|
||||
declaration.
|
||||
(lb_get_next_char_utf16): Ditto.
|
||||
(lb_get_next_char_utf32): Ditto.
|
||||
|
||||
2009-02-06 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* NEWS: Add information about the 1.1 release.
|
||||
|
||||
2009-01-02 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add the missing `LICENCE' file.
|
||||
|
||||
2008-12-31 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c: Update the version number in comment to 1.0.
|
||||
* linebreak.h: Ditto.
|
||||
* linebreakdef.c: Ditto.
|
||||
* linebreakdef.h: Ditto.
|
||||
|
||||
2008-12-31 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* NEWS: Update for the 1.0 release.
|
||||
|
||||
2008-12-31 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* README: Correct two typos.
|
||||
|
||||
2008-12-31 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* README: Add the online URL reference.
|
||||
|
||||
2008-12-30 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* README: Update the reference to UAX #14-22, for Unicode 5.1.0.
|
||||
|
||||
2008-12-13 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Update files according to UAX #14-22, for Unicode 5.1.0.
|
||||
* linebreak.c (baTable): Update according to Table 2 of UAX #14-22.
|
||||
* linebreakdef.c (lb_prop_Spanish): Remove the unnecessary
|
||||
customization for inverted marks in Spanish.
|
||||
* linebreakdata.c: Regenerate from LineBreak-5.1.0.txt.
|
||||
* linebreak.h: Update comment only.
|
||||
* linebreakdef.h: Ditto.
|
||||
|
||||
2008-12-12 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* README: Update for the new build methods and better readability.
|
||||
|
||||
2008-12-12 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile.msvc: Correct the inconsistent naming in the output
|
||||
message.
|
||||
|
||||
2008-12-12 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* configure.ac (AM_INIT_AUTOMAKE): Mark `foreign'.
|
||||
* bootstrap: New file.
|
||||
* purge: New file.
|
||||
* Makefile.gcc (purge): Remove this target.
|
||||
|
||||
2008-12-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* NEWS: New file.
|
||||
|
||||
2008-12-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* AUTHORS: New file.
|
||||
|
||||
2008-12-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile.gcc (purge): New phony target to purge files generated by
|
||||
autoconfiscation.
|
||||
|
||||
2008-12-10 Thomas Klausner <tk@giga.or.at>
|
||||
|
||||
* configure.ac: New file.
|
||||
* Makefile.am: New file.
|
||||
|
||||
2008-12-10 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Doxyfile (OUTPUT_DIRECTORY): Set to `doc'.
|
||||
(ALPHABETICAL_INDEX): Set to `YES'.
|
||||
|
||||
2008-12-09 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile.msvc: New file.
|
||||
|
||||
2008-12-09 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile: Remove (to become Makefile.gcc).
|
||||
* Makefile.gcc: New file (was Makefile).
|
||||
|
||||
2008-12-07 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c: Adjust the comment that refers to Unicode Annex 14.
|
||||
* linebreak.h: Ditto.
|
||||
* linebreakdef.c: Ditto.
|
||||
* linebreakdef.h: Ditto.
|
||||
|
||||
2008-12-07 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Use only POSIX basic regexp to ensure maximum portability (issues
|
||||
have been found on Mac OS X, where GNU extensions do not work).
|
||||
* LineBreak1.sed: Replace `[:xdigit:]' with `0-9A-F', and `\+' with
|
||||
`\{1,\}'.
|
||||
* LineBreak2.sed: Ditto.
|
||||
|
||||
2008-12-07 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile: Replace `*.exe' with `filter_dup$(EXEEXT)', since the
|
||||
extension `.exe' is specific to Windows.
|
||||
|
||||
2008-04-20 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Add README and LICENCE files, as well as a Doxyfile to generate
|
||||
documents.
|
||||
* README: New file.
|
||||
* LICENCE: New file.
|
||||
* Doxyfile: New file.
|
||||
* Makefile (doc): Add new phony target.
|
||||
|
||||
2008-04-04 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Remove the English override for plus sign: it is better treated in
|
||||
the text breaking program (see ../breaktext/ for an example).
|
||||
* linebreakdef.c (lb_prop_English): Remove the line for plus sign.
|
||||
|
||||
2008-03-29 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile: Correct the dependency-making rules when OLDGCC=Y.
|
||||
|
||||
2008-03-23 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile (clean): Do not remove *.exe and tags here.
|
||||
(distclean): Remove *.exe and tags.
|
||||
|
||||
2008-03-23 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Remove the English override for solidus: it is better treated in the
|
||||
text breaking program (see ../breaktext/ for an example).
|
||||
* linebreakdef.c (lb_prop_English): Remove the line for solidus.
|
||||
|
||||
2008-03-16 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Rename init_linebreak_prop_index to init_linebreak for future
|
||||
safety; make visible certain functions that are potentially useful.
|
||||
* linebreak.c (init_linebreak_prop_index): Rename to init_linebreak.
|
||||
(get_next_char_t): Move to linebreakdef.h.
|
||||
(get_next_char_utf8): Make non-static.
|
||||
(get_next_char_utf16): Ditto.
|
||||
(get_next_char_utf32): Ditto.
|
||||
(set_linebreaks): Ditto.
|
||||
* linebreak.h (init_linebreak_prop_index): Rename to init_linebreak.
|
||||
(get_next_char_utf8): Add the function prototype.
|
||||
(get_next_char_utf16): Ditto.
|
||||
(get_next_char_utf32): Ditto.
|
||||
* linebreakdef.h (get_next_char_t): Add the typedef.
|
||||
(set_linebreaks): Add the function prototype.
|
||||
|
||||
2008-03-16 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile (OLDGCC): Add support for GCC 2.95.3 (when OLDGCC=Y).
|
||||
|
||||
2008-03-15 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c (set_linebreaks): Fix a bug that `==' was wrongly used
|
||||
for `='.
|
||||
|
||||
2008-03-05 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Improve the performance by reducing the look-ups of the
|
||||
language-specific line breaking properties array from the language
|
||||
name (thanks to Nikolay Pultsin).
|
||||
* linebreak.c (get_lb_prop_lang): New function.
|
||||
(get_char_lb_class_lang): Change the second parameter from the
|
||||
language name to the line breaking properties array.
|
||||
(set_linebreaks): Look up the language-specific line breaking
|
||||
properties array from the language name only once in one function
|
||||
call.
|
||||
|
||||
2008-03-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Make minor adjustments in code and comments.
|
||||
* linebreak.c: Adjust the doc comments.
|
||||
(init_linebreak_prop_index): Modify a conditional to make it more
|
||||
robust and consistent.
|
||||
* linebreakdef.c (lb_prop_lang_map): Replace the pointer
|
||||
lb_prop_default with NULL, since the value is never used.
|
||||
|
||||
2008-03-03 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Accelerate get_char_lb_class for invalid Unicode code points.
|
||||
* linebreak.c (get_char_lb_class): Adjust the conditionals so that
|
||||
getting the line breaking class for an invalid code point is much
|
||||
faster, which requires the array of line breaking properties be
|
||||
sorted.
|
||||
* linebreakdef.h: Adjust a comment that the array of line break
|
||||
properties must be sorted.
|
||||
|
||||
2008-03-02 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Change the values of enum BreakAction to more complete forms.
|
||||
* linebreak.c (INDRCT_BRK): Rename to INDIRECT_BRK.
|
||||
(CM_INDRCT_BRK): Rename to CM_INDIRECT_BRK.
|
||||
(CM_PROHIBTD_BRK): Rename to CM_PROHIBITED_BRK.
|
||||
(PROHIBTD_BRK): Rename to PROHIBITED_BRK.
|
||||
|
||||
2008-03-02 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Implement a two-stage search in get_char_lb_class_default to
|
||||
accelerate the overall performance, especially for non-Latin
|
||||
languages.
|
||||
* linebreak.c (LINEBREAK_INDEX_SIZE): New constant macro.
|
||||
(struct LineBreakPropertiesIndex): New struct.
|
||||
(lb_prop_index): New static variable.
|
||||
(init_linebreak_prop_index): New function.
|
||||
(get_char_lb_class_default): New function.
|
||||
(get_char_lb_class_lang): Use get_char_lb_class_default.
|
||||
* linebreak.h: Detect C++ and add extern "C" guard if necessary.
|
||||
(init_linebreak_prop_index): Add the prototype declaration.
|
||||
* linebreakdef.h: Adjust a comment.
|
||||
|
||||
2008-03-02 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Split/refactor the code; add (doc) comments.
|
||||
* Makefile (CFILES): Add linebreakdata.c and linebreakdef.c.
|
||||
* linebreak.c: Add and adjust comments.
|
||||
(linebreakdef.h): Add include file.
|
||||
(linebreakdata.c): Remove include file.
|
||||
(EOS): Remove (now in linebreakdef.h).
|
||||
(enum LineBreakClass): Ditto.
|
||||
(struct LineBreakProperties): Ditto.
|
||||
(lbpEnglish): Remove (now in linebreakdef.c as lb_prop_English).
|
||||
(lbpGerman): Remove (now in linebreakdef.c as lb_prop_German).
|
||||
(lbpSpanish): Remove (now in linebreakdef.c as lb_prop_Spanish).
|
||||
(lbpFrench): Remove (now in linebreakdef.c as lb_prop_French).
|
||||
(lbpRussian): Remove (now in linebreakdef.c as lb_prop_Russian).
|
||||
(lbpChinese): Remove (now in linebreakdef.c as lb_prop_Chinese).
|
||||
(struct LineBreakPropertiesLang): Remove (now in linebreakdef.h).
|
||||
(lbpLangs): Remove (now in linebreakdef.c as lb_prop_lang_map).
|
||||
(get_next_char_utf16): Make sure memory access not go beyond len.
|
||||
* linebreak.h: Add copyright information and adjust comments.
|
||||
(stddef.h): Add include file.
|
||||
* linebreakdata.c (linebreak.h): Add include file.
|
||||
(linebreakdef.h): Add include file.
|
||||
(lbpDefault): Make global and rename to lb_prop_default.
|
||||
* linebreakdata2.tmpl: Add two include files, a comment line, and
|
||||
remove `static'.
|
||||
* linebreakdef.c: New file.
|
||||
* linebreakdef.h: New file.
|
||||
|
||||
2008-02-26 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* linebreak.c (lbpSpanish): New array for Spanish-specific data.
|
||||
(lbpLangs): Update the index array for Spanish.
|
||||
(resolve_lb_class): Resolve AmbIguous class to IDeographic in
|
||||
Chinese, Japanese, and Korean.
|
||||
|
||||
2008-02-26 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
* Makefile (LineBreak.txt): Add new rule to retrieve it from the Web
|
||||
if it is not already there.
|
||||
|
||||
2008-02-23 Wu Yongwei <wuyongwei@gmail.com>
|
||||
|
||||
Add files for linebreak.
|
||||
* LineBreak1.sed: New file.
|
||||
* LineBreak2.sed: New file.
|
||||
* Makefile: New file.
|
||||
* filter_dup.c: New file.
|
||||
* linebreak.c: New file.
|
||||
* linebreak.h: New file.
|
||||
* linebreakdata.c: New file.
|
||||
* linebreakdata1.tmpl: New file.
|
||||
* linebreakdata2.tmpl: New file.
|
||||
* linebreakdata3.tmpl: New file.
|
|
@ -0,0 +1,18 @@
|
|||
Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgement in the product documentation would
|
||||
be appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not
|
||||
be misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source
|
||||
distribution.
|
|
@ -0,0 +1 @@
|
|||
s/\(^[0-9A-F.]\{1,\};[A-Z][A-Z0-9]\) #.*/\1/p
|
|
@ -0,0 +1,2 @@
|
|||
s/^\([0-9A-F]\{1,\}\);/\1..\1;/
|
||||
s/^\([0-9A-F]\{1,\}\)\.\.\([0-9A-F]\{1,\}\);\([A-Z][A-Z0-9]\)/ { 0x\1, 0x\2, LBP_\3 },/
|
|
@ -0,0 +1,16 @@
|
|||
#noinst_PROGRAMS = filter_dup
|
||||
include_HEADERS = linebreak.h linebreakdef.h
|
||||
noinst_LTLIBRARIES = liblinebreak.la
|
||||
|
||||
liblinebreak_la_SOURCES = \
|
||||
linebreak.c \
|
||||
linebreakdata.c \
|
||||
linebreakdef.c
|
||||
|
||||
EXTRA_DIST = \
|
||||
LineBreak1.sed \
|
||||
LineBreak2.sed \
|
||||
linebreakdata1.tmpl \
|
||||
linebreakdata2.tmpl \
|
||||
linebreakdata3.tmpl \
|
||||
LICENCE
|
|
@ -0,0 +1,37 @@
|
|||
New in 2.0
|
||||
|
||||
- Update the algorithm and data according to UAX #14-24 and
|
||||
LineBreak-5.2.0.txt
|
||||
- Rename some functions to reduce namespace pollution
|
||||
- Make Doxygen documentation better
|
||||
|
||||
New in 1.2
|
||||
|
||||
- Fix the bug that an assertion in code can fail if an invalid UTF-8 or
|
||||
UTF-16 sequence is encountered near the end of input
|
||||
- Remove the specialization of right single quotation mark as closing
|
||||
punctuation mark in English, French, and Spanish, because it can be
|
||||
used as apostrophe
|
||||
- Make Doxygen documentation better
|
||||
|
||||
New in 1.1
|
||||
|
||||
- Make get_lb_prop_lang static and not an exported symbol
|
||||
- Define is_line_breakable to alias to is_breakable
|
||||
- Declare get_next_char_utf* will be changed to lb_get_next_char_utf*
|
||||
- Move the declarations of get_next_char_utf* from linebreak.h to
|
||||
linebreakdef.h
|
||||
- Add the function documentation comments to the header files
|
||||
|
||||
New in 1.0
|
||||
|
||||
- Update the line breaking data according to UAX #14-22 and
|
||||
LineBreak-5.1.0.txt
|
||||
- Add autoconfiscation support (./configure, make, make install)
|
||||
- Add Makefile for MSVC
|
||||
|
||||
First public release (0.9.6, or 20080421)
|
||||
|
||||
- Implement line breaking algorithm according to UAX #14-19
|
||||
- Line breaking data is generated from LineBreak-5.0.0.txt
|
||||
- Makefile only supports GCC
|
|
@ -0,0 +1,86 @@
|
|||
NOTICE: This is the original version, that was adapted a bit (mostly
|
||||
build related) in order to work nicely with Evas.
|
||||
|
||||
|
||||
L I B L I N E B R E A K
|
||||
=======================
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
This is the README file for liblinebreak, an implementation of the line
|
||||
breaking algorithm as described in Unicode 5.2.0 Standard Annex 14,
|
||||
Revision 24, available at
|
||||
<URL:http://www.unicode.org/reports/tr14/tr14-24.html>
|
||||
|
||||
Check this URL for up-to-date information:
|
||||
<URL:http://vimgadgets.sourceforge.net/liblinebreak/>
|
||||
|
||||
|
||||
Licence
|
||||
-------
|
||||
|
||||
This library is released under an open-source licence, the zlib/libpng
|
||||
licence. Please check the file LICENCE for details.
|
||||
|
||||
Apart from using the algorithm, part of the code is derived from the
|
||||
data provided under
|
||||
<URL:http://www.unicode.org/Public/>
|
||||
|
||||
And the Unicode Terms of Use may apply:
|
||||
<URL:http://www.unicode.org/copyright.html>
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
There are three ways to build the library:
|
||||
|
||||
1) On *NIX systems supported by the autoconfiscation tools, do the
|
||||
normal
|
||||
|
||||
./configure
|
||||
make
|
||||
sudo make install
|
||||
|
||||
to build and install both the dynamic and static libraries. In
|
||||
addition, one may
|
||||
|
||||
- type `make doc' to generate the doxygen documentation; or
|
||||
- type `make linebreakdata' to regenerate linebreakdata.c from
|
||||
LineBreak.txt.
|
||||
|
||||
2) On systems where GCC and Binutils are supported, one can type
|
||||
|
||||
cp -p Makefile.gcc Makefile
|
||||
make
|
||||
|
||||
to build the static library. In addition, one may
|
||||
|
||||
- type `make debug' or `make release' to explicitly generate the
|
||||
debug or release build;
|
||||
- type `make doc' to generate the doxygen documentation; or
|
||||
- type `make linebreakdata' to regenerate linebreakdata.c from
|
||||
LineBreak.txt.
|
||||
|
||||
3) On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
|
||||
(MinGW), MSVC can also be used. Type
|
||||
|
||||
nmake -f Makefile.msvc
|
||||
|
||||
to build the static library. By default the debug release is built.
|
||||
To build the release version
|
||||
|
||||
nmake -f Makefile.msvc CFG="linebreak - Win32 Release"
|
||||
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
Check the generated document doc/html/linebreak_8h.html for the public
|
||||
interfaces exposed to applications.
|
||||
|
||||
|
||||
$Id: README,v 1.6 2009/11/29 08:09:13 adah Exp $
|
||||
|
||||
vim:autoindent:expandtab:formatoptions=tcqlmn:textwidth=72:
|
|
@ -0,0 +1,48 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int main()
|
||||
{
|
||||
char s[80];
|
||||
char beg[16];
|
||||
char end[16];
|
||||
char prop[16];
|
||||
char lastbeg[16];
|
||||
char lastend[16];
|
||||
char lastprop[16];
|
||||
lastprop[0] = 0;
|
||||
for (;;)
|
||||
{
|
||||
if (fgets(s, sizeof s, stdin) == NULL)
|
||||
break;
|
||||
if (strstr(s, "LBP_") == NULL || strstr(s, "LBP_Undef") != NULL)
|
||||
{
|
||||
if (lastprop[0])
|
||||
{
|
||||
printf("\t{ %s %s %s },\n", lastbeg, lastend, lastprop);
|
||||
lastprop[0] = 0;
|
||||
}
|
||||
printf("%s", s);
|
||||
continue;
|
||||
}
|
||||
sscanf(s, "\t{ %s %s %s }", beg, end, prop);
|
||||
/*printf("==>\t{ \"%s\" \"%s\" \"%s\" },\n", beg, end, prop);*/
|
||||
if (lastprop[0] && strcmp(lastprop, prop) != 0)
|
||||
{
|
||||
printf("\t{ %s %s %s },\n", lastbeg, lastend, lastprop);
|
||||
lastprop[0] = 0;
|
||||
}
|
||||
if (lastprop[0] == 0)
|
||||
{
|
||||
strcpy(lastbeg, beg);
|
||||
strcpy(lastprop, prop);
|
||||
}
|
||||
strcpy(lastend, end);
|
||||
}
|
||||
if (lastprop[0])
|
||||
{
|
||||
printf("\t{ %s %s %s },\n", lastbeg, lastend, prop);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/sh
|
||||
if [ ! -f "LineBreak.txt" ]; then
|
||||
wget http://unicode.org/Public/UNIDATA/LineBreak.txt
|
||||
fi
|
||||
|
||||
sed -n -f LineBreak1.sed LineBreak.txt > tmp.txt
|
||||
sed -f LineBreak2.sed tmp.txt | ./filter_dup > tmp.c
|
||||
head -2 LineBreak.txt > tmp.txt
|
||||
cat linebreakdata1.tmpl tmp.txt linebreakdata2.tmpl tmp.c linebreakdata3.tmpl > linebreakdata.c
|
||||
rm tmp.txt tmp.c
|
||||
|
|
@ -0,0 +1,734 @@
|
|||
/* vim: set tabstop=4 shiftwidth=4: */
|
||||
|
||||
/*
|
||||
* Line breaking in a Unicode sequence. Designed to be used in a
|
||||
* generic text renderer.
|
||||
*
|
||||
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the author be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute
|
||||
* it freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgement in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*
|
||||
* The main reference is Unicode Standard Annex 14 (UAX #14):
|
||||
* <URL:http://www.unicode.org/reports/tr14/>
|
||||
*
|
||||
* When this library was designed, this annex was at Revision 19, for
|
||||
* Unicode 5.0.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
|
||||
*
|
||||
* This library has been updated according to Revision 24, for
|
||||
* Unicode 5.2.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
|
||||
*
|
||||
* The Unicode Terms of Use are available at
|
||||
* <URL:http://www.unicode.org/copyright.html>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file linebreak.c
|
||||
*
|
||||
* Implementation of the line breaking algorithm as described in Unicode
|
||||
* Standard Annex 14.
|
||||
*
|
||||
* @version 2.0, 2010/01/03
|
||||
* @author Wu Yongwei
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include "linebreak.h"
|
||||
#include "linebreakdef.h"
|
||||
|
||||
/**
|
||||
* Size of the second-level index to the line breaking properties.
|
||||
*/
|
||||
#define LINEBREAK_INDEX_SIZE 40
|
||||
|
||||
/**
|
||||
* Version number of the library.
|
||||
*/
|
||||
const int linebreak_version = LINEBREAK_VERSION;
|
||||
|
||||
/**
|
||||
* Enumeration of break actions. They are used in the break action
|
||||
* pair table below.
|
||||
*/
|
||||
enum BreakAction
|
||||
{
|
||||
DIR_BRK, /**< Direct break opportunity */
|
||||
IND_BRK, /**< Indirect break opportunity */
|
||||
CMI_BRK, /**< Indirect break opportunity for combining marks */
|
||||
CMP_BRK, /**< Prohibited break for combining marks */
|
||||
PRH_BRK /**< Prohibited break */
|
||||
};
|
||||
|
||||
/**
|
||||
* Break action pair table. This is a direct mapping of Table 2 of
|
||||
* Unicode Standard Annex 14, Revision 24.
|
||||
*/
|
||||
static enum BreakAction baTable[LBP_JT][LBP_JT] = {
|
||||
{ /* OP */
|
||||
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, CMP_BRK,
|
||||
PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK, PRH_BRK },
|
||||
{ /* CL */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* CP */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, PRH_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* QU */
|
||||
PRH_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
|
||||
{ /* GL */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
|
||||
{ /* NS */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* EX */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* SY */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* IS */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* PR */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, IND_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
|
||||
{ /* PO */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* NU */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* AL */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* ID */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* IN */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* HY */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* BA */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, DIR_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* BB */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
|
||||
{ /* B2 */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* ZW */
|
||||
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, DIR_BRK,
|
||||
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* CM */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
|
||||
{ /* WJ */
|
||||
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK },
|
||||
{ /* H2 */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
|
||||
{ /* H3 */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK },
|
||||
{ /* JL */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
|
||||
{ /* JV */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, IND_BRK },
|
||||
{ /* JT */
|
||||
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
|
||||
PRH_BRK, PRH_BRK, DIR_BRK, IND_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
|
||||
IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK,
|
||||
PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK }
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct for the second-level index to the line breaking properties.
|
||||
*/
|
||||
struct LineBreakPropertiesIndex
|
||||
{
|
||||
utf32_t end; /**< End coding point */
|
||||
struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
|
||||
};
|
||||
|
||||
/**
|
||||
* Second-level index to the line breaking properties.
|
||||
*/
|
||||
static struct LineBreakPropertiesIndex lb_prop_index[LINEBREAK_INDEX_SIZE] =
|
||||
{
|
||||
{ 0xFFFFFFFF, lb_prop_default }
|
||||
};
|
||||
|
||||
/**
|
||||
* Initializes the second-level index to the line breaking properties.
|
||||
* If it is not called, the performance of #get_char_lb_class_lang (and
|
||||
* thus the main functionality) can be pretty bad, especially for big
|
||||
* code points like those of Chinese.
|
||||
*/
|
||||
void init_linebreak(void)
|
||||
{
|
||||
size_t i;
|
||||
size_t iPropDefault;
|
||||
size_t len;
|
||||
size_t step;
|
||||
|
||||
len = 0;
|
||||
while (lb_prop_default[len].prop != LBP_Undefined)
|
||||
++len;
|
||||
step = len / LINEBREAK_INDEX_SIZE;
|
||||
iPropDefault = 0;
|
||||
for (i = 0; i < LINEBREAK_INDEX_SIZE; ++i)
|
||||
{
|
||||
lb_prop_index[i].lbp = lb_prop_default + iPropDefault;
|
||||
iPropDefault += step;
|
||||
lb_prop_index[i].end = lb_prop_default[iPropDefault].start - 1;
|
||||
}
|
||||
lb_prop_index[--i].end = 0xFFFFFFFF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the language-specific line breaking properties.
|
||||
*
|
||||
* @param lang language of the text
|
||||
* @return pointer to the language-specific line breaking
|
||||
* properties array if found; \c NULL otherwise
|
||||
*/
|
||||
static struct LineBreakProperties *get_lb_prop_lang(const char *lang)
|
||||
{
|
||||
struct LineBreakPropertiesLang *lbplIter;
|
||||
if (lang != NULL)
|
||||
{
|
||||
for (lbplIter = lb_prop_lang_map; lbplIter->lang != NULL; ++lbplIter)
|
||||
{
|
||||
if (strncmp(lang, lbplIter->lang, lbplIter->namelen) == 0)
|
||||
{
|
||||
return lbplIter->lbp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the line breaking class of a character from a line breaking
|
||||
* properties array.
|
||||
*
|
||||
* @param ch character to check
|
||||
* @param lbp pointer to the line breaking properties array
|
||||
* @return the line breaking class if found; \c LBP_XX otherwise
|
||||
*/
|
||||
static enum LineBreakClass get_char_lb_class(
|
||||
utf32_t ch,
|
||||
struct LineBreakProperties *lbp)
|
||||
{
|
||||
while (lbp->prop != LBP_Undefined && ch >= lbp->start)
|
||||
{
|
||||
if (ch <= lbp->end)
|
||||
return lbp->prop;
|
||||
++lbp;
|
||||
}
|
||||
return LBP_XX;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the line breaking class of a character from the default line
|
||||
* breaking properties array.
|
||||
*
|
||||
* @param ch character to check
|
||||
* @return the line breaking class if found; \c LBP_XX otherwise
|
||||
*/
|
||||
static enum LineBreakClass get_char_lb_class_default(
|
||||
utf32_t ch)
|
||||
{
|
||||
size_t i = 0;
|
||||
while (ch > lb_prop_index[i].end)
|
||||
++i;
|
||||
assert(i < LINEBREAK_INDEX_SIZE);
|
||||
return get_char_lb_class(ch, lb_prop_index[i].lbp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the line breaking class of a character for a specific
|
||||
* language. This function will check the language-specific data first,
|
||||
* and then the default data if there is no language-specific property
|
||||
* available for the character.
|
||||
*
|
||||
* @param ch character to check
|
||||
* @param lbpLang pointer to the language-specific line breaking
|
||||
* properties array
|
||||
* @return the line breaking class if found; \c LBP_XX
|
||||
* otherwise
|
||||
*/
|
||||
static enum LineBreakClass get_char_lb_class_lang(
|
||||
utf32_t ch,
|
||||
struct LineBreakProperties *lbpLang)
|
||||
{
|
||||
enum LineBreakClass lbcResult;
|
||||
|
||||
/* Find the language-specific line breaking class for a character */
|
||||
if (lbpLang)
|
||||
{
|
||||
lbcResult = get_char_lb_class(ch, lbpLang);
|
||||
if (lbcResult != LBP_XX)
|
||||
return lbcResult;
|
||||
}
|
||||
|
||||
/* Find the generic language-specific line breaking class, if no
|
||||
* language context is provided, or language-specific data are not
|
||||
* available for the specific character in the specified language */
|
||||
return get_char_lb_class_default(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the line breaking class for certain ambiguous or complicated
|
||||
* characters. They are treated in a simplistic way in this
|
||||
* implementation.
|
||||
*
|
||||
* @param lbc line breaking class to resolve
|
||||
* @param lang language of the text
|
||||
* @return the resolved line breaking class
|
||||
*/
|
||||
static enum LineBreakClass resolve_lb_class(
|
||||
enum LineBreakClass lbc,
|
||||
const char *lang)
|
||||
{
|
||||
switch (lbc)
|
||||
{
|
||||
case LBP_AI:
|
||||
if (lang != NULL &&
|
||||
(strncmp(lang, "zh", 2) == 0 || /* Chinese */
|
||||
strncmp(lang, "ja", 2) == 0 || /* Japanese */
|
||||
strncmp(lang, "ko", 2) == 0)) /* Korean */
|
||||
{
|
||||
return LBP_ID;
|
||||
}
|
||||
/* Fall through */
|
||||
case LBP_SA:
|
||||
case LBP_SG:
|
||||
case LBP_XX:
|
||||
return LBP_AL;
|
||||
default:
|
||||
return lbc;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next Unicode character in a UTF-8 sequence. The index will
|
||||
* be advanced to the next complete character, unless the end of string
|
||||
* is reached in the middle of a UTF-8 sequence.
|
||||
*
|
||||
* @param[in] s input UTF-8 string
|
||||
* @param[in] len length of the string in bytes
|
||||
* @param[in,out] ip pointer to the index
|
||||
* @return the Unicode character beginning at the index; or
|
||||
* #EOS if end of input is encountered
|
||||
*/
|
||||
utf32_t lb_get_next_char_utf8(
|
||||
const utf8_t *s,
|
||||
size_t len,
|
||||
size_t *ip)
|
||||
{
|
||||
utf8_t ch;
|
||||
utf32_t res;
|
||||
|
||||
assert(*ip <= len);
|
||||
if (*ip == len)
|
||||
return EOS;
|
||||
ch = s[*ip];
|
||||
|
||||
if (ch < 0xC2 || ch > 0xF4)
|
||||
{ /* One-byte sequence, tail (should not occur), or invalid */
|
||||
*ip += 1;
|
||||
return ch;
|
||||
}
|
||||
else if (ch < 0xE0)
|
||||
{ /* Two-byte sequence */
|
||||
if (*ip + 2 > len)
|
||||
return EOS;
|
||||
res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
|
||||
*ip += 2;
|
||||
return res;
|
||||
}
|
||||
else if (ch < 0xF0)
|
||||
{ /* Three-byte sequence */
|
||||
if (*ip + 3 > len)
|
||||
return EOS;
|
||||
res = ((ch & 0x0F) << 12) +
|
||||
((s[*ip + 1] & 0x3F) << 6) +
|
||||
((s[*ip + 2] & 0x3F));
|
||||
*ip += 3;
|
||||
return res;
|
||||
}
|
||||
else
|
||||
{ /* Four-byte sequence */
|
||||
if (*ip + 4 > len)
|
||||
return EOS;
|
||||
res = ((ch & 0x07) << 18) +
|
||||
((s[*ip + 1] & 0x3F) << 12) +
|
||||
((s[*ip + 2] & 0x3F) << 6) +
|
||||
((s[*ip + 3] & 0x3F));
|
||||
*ip += 4;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next Unicode character in a UTF-16 sequence. The index will
|
||||
* be advanced to the next complete character, unless the end of string
|
||||
* is reached in the middle of a UTF-16 surrogate pair.
|
||||
*
|
||||
* @param[in] s input UTF-16 string
|
||||
* @param[in] len length of the string in words
|
||||
* @param[in,out] ip pointer to the index
|
||||
* @return the Unicode character beginning at the index; or
|
||||
* #EOS if end of input is encountered
|
||||
*/
|
||||
utf32_t lb_get_next_char_utf16(
|
||||
const utf16_t *s,
|
||||
size_t len,
|
||||
size_t *ip)
|
||||
{
|
||||
utf16_t ch;
|
||||
|
||||
assert(*ip <= len);
|
||||
if (*ip == len)
|
||||
return EOS;
|
||||
ch = s[(*ip)++];
|
||||
|
||||
if (ch < 0xD800 || ch > 0xDBFF)
|
||||
{ /* If the character is not a high surrogate */
|
||||
return ch;
|
||||
}
|
||||
if (*ip == len)
|
||||
{ /* If the input ends here (an error) */
|
||||
--(*ip);
|
||||
return EOS;
|
||||
}
|
||||
if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
|
||||
{ /* If the next character is not the low surrogate (an error) */
|
||||
return ch;
|
||||
}
|
||||
/* Return the constructed character and advance the index again */
|
||||
return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the next Unicode character in a UTF-32 sequence. The index will
|
||||
* be advanced to the next character.
|
||||
*
|
||||
* @param[in] s input UTF-32 string
|
||||
* @param[in] len length of the string in dwords
|
||||
* @param[in,out] ip pointer to the index
|
||||
* @return the Unicode character beginning at the index; or
|
||||
* #EOS if end of input is encountered
|
||||
*/
|
||||
utf32_t lb_get_next_char_utf32(
|
||||
const utf32_t *s,
|
||||
size_t len,
|
||||
size_t *ip)
|
||||
{
|
||||
assert(*ip <= len);
|
||||
if (*ip == len)
|
||||
return EOS;
|
||||
return s[(*ip)++];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the line breaking information for a generic input string.
|
||||
*
|
||||
* @param[in] s input string
|
||||
* @param[in] len length of the input
|
||||
* @param[in] lang language of the input
|
||||
* @param[out] brks pointer to the output breaking data,
|
||||
* containing #LINEBREAK_MUSTBREAK,
|
||||
* #LINEBREAK_ALLOWBREAK, #LINEBREAK_NOBREAK,
|
||||
* or #LINEBREAK_INSIDEACHAR
|
||||
* @param[in] get_next_char function to get the next UTF-32 character
|
||||
*/
|
||||
void set_linebreaks(
|
||||
const void *s,
|
||||
size_t len,
|
||||
const char *lang,
|
||||
char *brks,
|
||||
get_next_char_t get_next_char)
|
||||
{
|
||||
utf32_t ch;
|
||||
enum LineBreakClass lbcCur;
|
||||
enum LineBreakClass lbcNew;
|
||||
enum LineBreakClass lbcLast;
|
||||
struct LineBreakProperties *lbpLang;
|
||||
size_t posCur = 0;
|
||||
size_t posLast = 0;
|
||||
|
||||
--posLast; /* To be ++'d later */
|
||||
ch = get_next_char(s, len, &posCur);
|
||||
if (ch == EOS)
|
||||
return;
|
||||
lbpLang = get_lb_prop_lang(lang);
|
||||
lbcCur = resolve_lb_class(get_char_lb_class_lang(ch, lbpLang), lang);
|
||||
lbcNew = LBP_Undefined;
|
||||
|
||||
nextline:
|
||||
|
||||
/* Special treatment for the first character */
|
||||
switch (lbcCur)
|
||||
{
|
||||
case LBP_LF:
|
||||
case LBP_NL:
|
||||
lbcCur = LBP_BK;
|
||||
break;
|
||||
case LBP_SP:
|
||||
lbcCur = LBP_WJ;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Process a line till an explicit break or end of string */
|
||||
for (;;)
|
||||
{
|
||||
for (++posLast; posLast < posCur - 1; ++posLast)
|
||||
{
|
||||
brks[posLast] = LINEBREAK_INSIDEACHAR;
|
||||
}
|
||||
assert(posLast == posCur - 1);
|
||||
lbcLast = lbcNew;
|
||||
ch = get_next_char(s, len, &posCur);
|
||||
if (ch == EOS)
|
||||
break;
|
||||
lbcNew = get_char_lb_class_lang(ch, lbpLang);
|
||||
if (lbcCur == LBP_BK || (lbcCur == LBP_CR && lbcNew != LBP_LF))
|
||||
{
|
||||
brks[posLast] = LINEBREAK_MUSTBREAK;
|
||||
lbcCur = resolve_lb_class(lbcNew, lang);
|
||||
goto nextline;
|
||||
}
|
||||
|
||||
switch (lbcNew)
|
||||
{
|
||||
case LBP_SP:
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
continue;
|
||||
case LBP_BK:
|
||||
case LBP_LF:
|
||||
case LBP_NL:
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
lbcCur = LBP_BK;
|
||||
continue;
|
||||
case LBP_CR:
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
lbcCur = LBP_CR;
|
||||
continue;
|
||||
case LBP_CB:
|
||||
brks[posLast] = LINEBREAK_ALLOWBREAK;
|
||||
lbcCur = LBP_BA;
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
lbcNew = resolve_lb_class(lbcNew, lang);
|
||||
|
||||
assert(lbcCur <= LBP_JT);
|
||||
assert(lbcNew <= LBP_JT);
|
||||
switch (baTable[lbcCur - 1][lbcNew - 1])
|
||||
{
|
||||
case DIR_BRK:
|
||||
brks[posLast] = LINEBREAK_ALLOWBREAK;
|
||||
break;
|
||||
case CMI_BRK:
|
||||
case IND_BRK:
|
||||
if (lbcLast == LBP_SP)
|
||||
{
|
||||
brks[posLast] = LINEBREAK_ALLOWBREAK;
|
||||
}
|
||||
else
|
||||
{
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
}
|
||||
break;
|
||||
case CMP_BRK:
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
if (lbcLast != LBP_SP)
|
||||
continue;
|
||||
break;
|
||||
case PRH_BRK:
|
||||
brks[posLast] = LINEBREAK_NOBREAK;
|
||||
break;
|
||||
}
|
||||
|
||||
lbcCur = lbcNew;
|
||||
}
|
||||
|
||||
assert(posLast == posCur - 1 && posCur <= len);
|
||||
/* Break after the last character */
|
||||
brks[posLast] = LINEBREAK_MUSTBREAK;
|
||||
/* When the input contains incomplete sequences */
|
||||
while (posCur < len)
|
||||
{
|
||||
brks[posCur++] = LINEBREAK_INSIDEACHAR;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the line breaking information for a UTF-8 input string.
|
||||
*
|
||||
* @param[in] s input UTF-8 string
|
||||
* @param[in] len length of the input
|
||||
* @param[in] lang language of the input
|
||||
* @param[out] brks pointer to the output breaking data, containing
|
||||
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
|
||||
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
|
||||
*/
|
||||
void set_linebreaks_utf8(
|
||||
const utf8_t *s,
|
||||
size_t len,
|
||||
const char *lang,
|
||||
char *brks)
|
||||
{
|
||||
set_linebreaks(s, len, lang, brks,
|
||||
(get_next_char_t)lb_get_next_char_utf8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the line breaking information for a UTF-16 input string.
|
||||
*
|
||||
* @param[in] s input UTF-16 string
|
||||
* @param[in] len length of the input
|
||||
* @param[in] lang language of the input
|
||||
* @param[out] brks pointer to the output breaking data, containing
|
||||
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
|
||||
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
|
||||
*/
|
||||
void set_linebreaks_utf16(
|
||||
const utf16_t *s,
|
||||
size_t len,
|
||||
const char *lang,
|
||||
char *brks)
|
||||
{
|
||||
set_linebreaks(s, len, lang, brks,
|
||||
(get_next_char_t)lb_get_next_char_utf16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the line breaking information for a UTF-32 input string.
|
||||
*
|
||||
* @param[in] s input UTF-32 string
|
||||
* @param[in] len length of the input
|
||||
* @param[in] lang language of the input
|
||||
* @param[out] brks pointer to the output breaking data, containing
|
||||
* #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
|
||||
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
|
||||
*/
|
||||
void set_linebreaks_utf32(
|
||||
const utf32_t *s,
|
||||
size_t len,
|
||||
const char *lang,
|
||||
char *brks)
|
||||
{
|
||||
set_linebreaks(s, len, lang, brks,
|
||||
(get_next_char_t)lb_get_next_char_utf32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether a line break can occur between two Unicode characters.
|
||||
* This is a wrapper function to expose a simple interface. Generally
|
||||
* speaking, it is better to use #set_linebreaks_utf32 instead, since
|
||||
* complicated cases involving combining marks, spaces, etc. cannot be
|
||||
* correctly processed.
|
||||
*
|
||||
* @param char1 the first Unicode character
|
||||
* @param char2 the second Unicode character
|
||||
* @param lang language of the input
|
||||
* @return one of #LINEBREAK_MUSTBREAK, #LINEBREAK_ALLOWBREAK,
|
||||
* #LINEBREAK_NOBREAK, or #LINEBREAK_INSIDEACHAR
|
||||
*/
|
||||
int is_line_breakable(
|
||||
utf32_t char1,
|
||||
utf32_t char2,
|
||||
const char* lang)
|
||||
{
|
||||
utf32_t s[2];
|
||||
char brks[2];
|
||||
s[0] = char1;
|
||||
s[1] = char2;
|
||||
set_linebreaks_utf32(s, 2, lang, brks);
|
||||
return brks[0];
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/* vim: set tabstop=4 shiftwidth=4: */
|
||||
|
||||
/*
|
||||
* Line breaking in a Unicode sequence. Designed to be used in a
|
||||
* generic text renderer.
|
||||
*
|
||||
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the author be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute
|
||||
* it freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgement in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*
|
||||
* The main reference is Unicode Standard Annex 14 (UAX #14):
|
||||
* <URL:http://www.unicode.org/reports/tr14/>
|
||||
*
|
||||
* When this library was designed, this annex was at Revision 19, for
|
||||
* Unicode 5.0.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
|
||||
*
|
||||
* This library has been updated according to Revision 24, for
|
||||
* Unicode 5.2.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
|
||||
*
|
||||
* The Unicode Terms of Use are available at
|
||||
* <URL:http://www.unicode.org/copyright.html>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file linebreak.h
|
||||
*
|
||||
* Header file for the line breaking algorithm.
|
||||
*
|
||||
* @version 2.0, 2010/01/03
|
||||
* @author Wu Yongwei
|
||||
*/
|
||||
|
||||
#ifndef LINEBREAK_H
|
||||
#define LINEBREAK_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define LINEBREAK_VERSION 0x0200 /**< Version of the library linebreak */
|
||||
extern const int linebreak_version;
|
||||
|
||||
#ifndef LINEBREAK_UTF_TYPES_DEFINED
|
||||
#define LINEBREAK_UTF_TYPES_DEFINED
|
||||
typedef unsigned char utf8_t; /**< Type for UTF-8 data points */
|
||||
typedef unsigned short utf16_t; /**< Type for UTF-16 data points */
|
||||
typedef unsigned int utf32_t; /**< Type for UTF-32 data points */
|
||||
#endif
|
||||
|
||||
#define LINEBREAK_MUSTBREAK 0 /**< Break is mandatory */
|
||||
#define LINEBREAK_ALLOWBREAK 1 /**< Break is allowed */
|
||||
#define LINEBREAK_NOBREAK 2 /**< No break is possible */
|
||||
#define LINEBREAK_INSIDEACHAR 3 /**< A UTF-8/16 sequence is unfinished */
|
||||
|
||||
void init_linebreak(void);
|
||||
void set_linebreaks_utf8(
|
||||
const utf8_t *s, size_t len, const char* lang, char *brks);
|
||||
void set_linebreaks_utf16(
|
||||
const utf16_t *s, size_t len, const char* lang, char *brks);
|
||||
void set_linebreaks_utf32(
|
||||
const utf32_t *s, size_t len, const char* lang, char *brks);
|
||||
int is_line_breakable(utf32_t char1, utf32_t char2, const char* lang);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* LINEBREAK_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1 @@
|
|||
/* The content of this file is generated from:
|
|
@ -0,0 +1,7 @@
|
|||
*/
|
||||
|
||||
#include "linebreak.h"
|
||||
#include "linebreakdef.h"
|
||||
|
||||
/** Default line breaking properties as from the Unicode Web site. */
|
||||
struct LineBreakProperties lb_prop_default[] = {
|
|
@ -0,0 +1,2 @@
|
|||
{ 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined }
|
||||
};
|
|
@ -0,0 +1,139 @@
|
|||
/* vim: set tabstop=4 shiftwidth=4: */
|
||||
|
||||
/*
|
||||
* Line breaking in a Unicode sequence. Designed to be used in a
|
||||
* generic text renderer.
|
||||
*
|
||||
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the author be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute
|
||||
* it freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgement in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*
|
||||
* The main reference is Unicode Standard Annex 14 (UAX #14):
|
||||
* <URL:http://www.unicode.org/reports/tr14/>
|
||||
*
|
||||
* When this library was designed, this annex was at Revision 19, for
|
||||
* Unicode 5.0.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
|
||||
*
|
||||
* This library has been updated according to Revision 24, for
|
||||
* Unicode 5.2.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
|
||||
*
|
||||
* The Unicode Terms of Use are available at
|
||||
* <URL:http://www.unicode.org/copyright.html>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file linebreakdef.c
|
||||
*
|
||||
* Definition of language-specific data.
|
||||
*
|
||||
* @version 2.0, 2010/01/03
|
||||
* @author Wu Yongwei
|
||||
*/
|
||||
|
||||
#include "linebreak.h"
|
||||
#include "linebreakdef.h"
|
||||
|
||||
/**
|
||||
* English-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_English[] = {
|
||||
{ 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
|
||||
{ 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
|
||||
{ 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* German-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_German[] = {
|
||||
{ 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
|
||||
{ 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
|
||||
{ 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
|
||||
{ 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
|
||||
{ 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
|
||||
{ 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* Spanish-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_Spanish[] = {
|
||||
{ 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
|
||||
{ 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
|
||||
{ 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
|
||||
{ 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
|
||||
{ 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
|
||||
{ 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
|
||||
{ 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* French-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_French[] = {
|
||||
{ 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
|
||||
{ 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
|
||||
{ 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
|
||||
{ 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
|
||||
{ 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
|
||||
{ 0x2039, 0x2039, LBP_OP }, /* Left single angle quotation mark: opening */
|
||||
{ 0x203A, 0x203A, LBP_CL }, /* Right single angle quotation mark: closing */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* Russian-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_Russian[] = {
|
||||
{ 0x00AB, 0x00AB, LBP_OP }, /* Left double angle quotation mark: opening */
|
||||
{ 0x00BB, 0x00BB, LBP_CL }, /* Right double angle quotation mark: closing */
|
||||
{ 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* Chinese-specifc data over the default Unicode rules.
|
||||
*/
|
||||
static struct LineBreakProperties lb_prop_Chinese[] = {
|
||||
{ 0x2018, 0x2018, LBP_OP }, /* Left single quotation mark: opening */
|
||||
{ 0x2019, 0x2019, LBP_CL }, /* Right single quotation mark: closing */
|
||||
{ 0x201C, 0x201C, LBP_OP }, /* Left double quotation mark: opening */
|
||||
{ 0x201D, 0x201D, LBP_CL }, /* Right double quotation mark: closing */
|
||||
{ 0, 0, LBP_Undefined }
|
||||
};
|
||||
|
||||
/**
|
||||
* Association data of language-specific line breaking properties with
|
||||
* language names. This is the definition for the static data in this
|
||||
* file. If you want more flexibility, or do not need the data here,
|
||||
* you may want to redefine \e lb_prop_lang_map in your C source file.
|
||||
*/
|
||||
struct LineBreakPropertiesLang lb_prop_lang_map[] = {
|
||||
{ "en", 2, lb_prop_English },
|
||||
{ "de", 2, lb_prop_German },
|
||||
{ "es", 2, lb_prop_Spanish },
|
||||
{ "fr", 2, lb_prop_French },
|
||||
{ "ru", 2, lb_prop_Russian },
|
||||
{ "zh", 2, lb_prop_Chinese },
|
||||
{ NULL, 0, NULL }
|
||||
};
|
|
@ -0,0 +1,149 @@
|
|||
/* vim: set tabstop=4 shiftwidth=4: */
|
||||
|
||||
/*
|
||||
* Line breaking in a Unicode sequence. Designed to be used in a
|
||||
* generic text renderer.
|
||||
*
|
||||
* Copyright (C) 2008-2010 Wu Yongwei <wuyongwei at gmail dot com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the author be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute
|
||||
* it freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must
|
||||
* not claim that you wrote the original software. If you use this
|
||||
* software in a product, an acknowledgement in the product
|
||||
* documentation would be appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must
|
||||
* not be misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source
|
||||
* distribution.
|
||||
*
|
||||
* The main reference is Unicode Standard Annex 14 (UAX #14):
|
||||
* <URL:http://www.unicode.org/reports/tr14/>
|
||||
*
|
||||
* When this library was designed, this annex was at Revision 19, for
|
||||
* Unicode 5.0.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
|
||||
*
|
||||
* This library has been updated according to Revision 24, for
|
||||
* Unicode 5.2.0:
|
||||
* <URL:http://www.unicode.org/reports/tr14/tr14-24.html>
|
||||
*
|
||||
* The Unicode Terms of Use are available at
|
||||
* <URL:http://www.unicode.org/copyright.html>
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file linebreakdef.h
|
||||
*
|
||||
* Definitions of internal data structures, declarations of global
|
||||
* variables, and function prototypes for the line breaking algorithm.
|
||||
*
|
||||
* @version 2.0, 2010/01/03
|
||||
* @author Wu Yongwei
|
||||
*/
|
||||
|
||||
/**
|
||||
* Constant value to mark the end of string. It is not a valid Unicode
|
||||
* character.
|
||||
*/
|
||||
#define EOS 0xFFFF
|
||||
|
||||
/**
|
||||
* Line break classes. This is a direct mapping of Table 1 of Unicode
|
||||
* Standard Annex 14, Revision 19.
|
||||
*/
|
||||
enum LineBreakClass
|
||||
{
|
||||
/* This is used to signal an error condition. */
|
||||
LBP_Undefined, /**< Undefined */
|
||||
|
||||
/* The following break classes are treated in the pair table. */
|
||||
LBP_OP, /**< Opening punctuation */
|
||||
LBP_CL, /**< Closing punctuation */
|
||||
LBP_CP, /**< Closing parenthesis */
|
||||
LBP_QU, /**< Ambiguous quotation */
|
||||
LBP_GL, /**< Glue */
|
||||
LBP_NS, /**< Non-starters */
|
||||
LBP_EX, /**< Exclamation/Interrogation */
|
||||
LBP_SY, /**< Symbols allowing break after */
|
||||
LBP_IS, /**< Infix separator */
|
||||
LBP_PR, /**< Prefix */
|
||||
LBP_PO, /**< Postfix */
|
||||
LBP_NU, /**< Numeric */
|
||||
LBP_AL, /**< Alphabetic */
|
||||
LBP_ID, /**< Ideographic */
|
||||
LBP_IN, /**< Inseparable characters */
|
||||
LBP_HY, /**< Hyphen */
|
||||
LBP_BA, /**< Break after */
|
||||
LBP_BB, /**< Break before */
|
||||
LBP_B2, /**< Break on either side (but not pair) */
|
||||
LBP_ZW, /**< Zero-width space */
|
||||
LBP_CM, /**< Combining marks */
|
||||
LBP_WJ, /**< Word joiner */
|
||||
LBP_H2, /**< Hangul LV */
|
||||
LBP_H3, /**< Hangul LVT */
|
||||
LBP_JL, /**< Hangul L Jamo */
|
||||
LBP_JV, /**< Hangul V Jamo */
|
||||
LBP_JT, /**< Hangul T Jamo */
|
||||
|
||||
/* The following break classes are not treated in the pair table */
|
||||
LBP_AI, /**< Ambiguous (alphabetic or ideograph) */
|
||||
LBP_BK, /**< Break (mandatory) */
|
||||
LBP_CB, /**< Contingent break */
|
||||
LBP_CR, /**< Carriage return */
|
||||
LBP_LF, /**< Line feed */
|
||||
LBP_NL, /**< Next line */
|
||||
LBP_SA, /**< South-East Asian */
|
||||
LBP_SG, /**< Surrogates */
|
||||
LBP_SP, /**< Space */
|
||||
LBP_XX /**< Unknown */
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct for entries of line break properties. The array of the
|
||||
* entries \e must be sorted.
|
||||
*/
|
||||
struct LineBreakProperties
|
||||
{
|
||||
utf32_t start; /**< Starting coding point */
|
||||
utf32_t end; /**< End coding point */
|
||||
enum LineBreakClass prop; /**< The line breaking property */
|
||||
};
|
||||
|
||||
/**
|
||||
* Struct for association of language-specific line breaking properties
|
||||
* with language names.
|
||||
*/
|
||||
struct LineBreakPropertiesLang
|
||||
{
|
||||
const char *lang; /**< Language name */
|
||||
size_t namelen; /**< Length of name to match */
|
||||
struct LineBreakProperties *lbp; /**< Pointer to associated data */
|
||||
};
|
||||
|
||||
/**
|
||||
* Abstract function interface for #lb_get_next_char_utf8,
|
||||
* #lb_get_next_char_utf16, and #lb_get_next_char_utf32.
|
||||
*/
|
||||
typedef utf32_t (*get_next_char_t)(const void *, size_t, size_t *);
|
||||
|
||||
/* Declarations */
|
||||
extern struct LineBreakProperties lb_prop_default[];
|
||||
extern struct LineBreakPropertiesLang lb_prop_lang_map[];
|
||||
|
||||
/* Function Prototype */
|
||||
utf32_t lb_get_next_char_utf8(const utf8_t *s, size_t len, size_t *ip);
|
||||
utf32_t lb_get_next_char_utf16(const utf16_t *s, size_t len, size_t *ip);
|
||||
utf32_t lb_get_next_char_utf32(const utf32_t *s, size_t len, size_t *ip);
|
||||
void set_linebreaks(
|
||||
const void *s,
|
||||
size_t len,
|
||||
const char *lang,
|
||||
char *brks,
|
||||
get_next_char_t get_next_char);
|
Loading…
Reference in New Issue