Revert "Revert "evas_textblock: rainbow flag emoji treated as two clusters(update unibreak to version 4.2)""

This reverts commit 173b3a108e.
This was reverted because of freezing codes for release.
Now, release work was over. So, I think it's ok to restore this.
This commit is contained in:
WooHyun Jung 2020-10-08 12:32:53 +09:00
parent 0a08a860a8
commit 509e3fcc7a
25 changed files with 986 additions and 344 deletions

View File

@ -10249,7 +10249,7 @@ evas_textblock_cursor_word_start(Efl_Text_Cursor_Handle *cur)
if ((cur->pos > 0) && (cur->pos == len))
cur->pos--;
for (i = cur->pos ; _is_white(text[i]) && BREAK_AFTER(i) ; i--)
for (i = cur->pos ; _is_white(text[i]) ; i--)
{
if (i == 0)
{
@ -10316,7 +10316,7 @@ evas_textblock_cursor_word_end(Efl_Text_Cursor_Handle *cur)
set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks);
}
for (i = cur->pos; text[i] && _is_white(text[i]) && (BREAK_AFTER(i)) ; i++);
for (i = cur->pos; text[i] && _is_white(text[i]) ; i++);
if (i == len)
{
Evas_Object_Textblock_Node_Text *nnode;

View File

@ -1,7 +1,7 @@
Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) 2012-2016 Tom Hacohen <tom at stosb dot com>
Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
Copyright (C) Wu Yongwei <wuyongwei at gmail dot com>
Copyright (C) Tom Hacohen <tom at stosb dot com>
Copyright (C) Petr Filipsky <philodej at gmail dot com>
Copyright (C) Andreas Röver <roever at users dot sf dot net>
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages

View File

@ -1,3 +1,14 @@
New in libunibreak 4.2
- Update the data to conform to Unicode 12
New in libunibreak 4.1
- Update the code and data to conform to Unicode 11.0.0, especially
adding support for extended pictographs in word and grapheme breaking
- ZWJ support has been much improved (it was broken)
- Make minor tweaks to the project files
New in libunibreak 4.0
- Update the code and data to conform to Unicode 9.0.0
@ -22,7 +33,8 @@ New in libunibreak 1.1
New in libunibreak 1.0
- Add word breaking support
- Change the library name to "libunibreak", while keeping maximum compatibility
- Change the library name to "libunibreak", while keeping maximum
compatibility
- Add pkg-config support
New in liblinebreak 2.1

View File

@ -6,11 +6,11 @@ Overview
This is the README file for libunibreak, an implementation of the line
breaking and word breaking algorithms as described in [Unicode Standard
Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's
[home page] [3] for up-to-date information.
Annex 14][1] and [Unicode Standard Annex 29][2]. Check the project's
[home page][3] for up-to-date information.
[1]: http://www.unicode.org/reports/tr14/tr14-37.html
[2]: http://www.unicode.org/reports/tr29/tr29-29.html
[1]: http://www.unicode.org/reports/tr14/
[2]: http://www.unicode.org/reports/tr29/
[3]: https://github.com/adah1972/libunibreak
@ -21,7 +21,7 @@ This library is released under an open-source licence, the zlib/libpng
licence. Please check the file *LICENCE* for details.
Apart from using the algorithm, part of the code is derived from the
[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply.
[Unicode Public Data][4], and the [Unicode Terms of Use][5] may apply.
[4]: http://www.unicode.org/Public/
[5]: http://www.unicode.org/copyright.html
@ -48,6 +48,8 @@ There are three ways to build the library:
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
- type `make emojidata` to regenerate *emojidata.c* from
*emoji-data.txt*.
2. On systems where GCC and Binutils are supported, one can type
@ -65,6 +67,8 @@ There are three ways to build the library:
*WordBreakProperty.txt*.
- type `make graphemebreakdata` to regenerate *graphemebreakdata.c*
from *GraphemeBreakProperty.txt*.
- type `make emojidata` to regenerate *emojidata.c* from
*emoji-data.txt*.
3. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2
(MinGW), MSVC can also be used. Type
@ -72,7 +76,7 @@ There are three ways to build the library:
cd src
nmake -f Makefile.msvc
to build the static library. By default the debug release is built.
to build the static library. By default the debug version is built.
To build the release version
nmake -f Makefile.msvc CFG="libunibreak - Win32 Release"

View File

@ -0,0 +1,264 @@
/* The content of this file is generated from:
# emoji-data.txt
# Date: 2019-01-15, 12:10:05 GMT
*/
static const struct ExtendedPictograpic ep_prop[] = {
{0x00A9, 0x00A9},
{0x00AE, 0x00AE},
{0x203C, 0x203C},
{0x2049, 0x2049},
{0x2122, 0x2122},
{0x2139, 0x2139},
{0x2194, 0x2199},
{0x21A9, 0x21AA},
{0x231A, 0x231B},
{0x2328, 0x2328},
{0x2388, 0x2388},
{0x23CF, 0x23CF},
{0x23E9, 0x23F3},
{0x23F8, 0x23FA},
{0x24C2, 0x24C2},
{0x25AA, 0x25AB},
{0x25B6, 0x25B6},
{0x25C0, 0x25C0},
{0x25FB, 0x25FE},
{0x2600, 0x2605},
{0x2607, 0x2612},
{0x2614, 0x2615},
{0x2616, 0x2617},
{0x2618, 0x2618},
{0x2619, 0x2619},
{0x261A, 0x266F},
{0x2670, 0x2671},
{0x2672, 0x267D},
{0x267E, 0x267F},
{0x2680, 0x2685},
{0x2690, 0x2691},
{0x2692, 0x269C},
{0x269D, 0x269D},
{0x269E, 0x269F},
{0x26A0, 0x26A1},
{0x26A2, 0x26B1},
{0x26B2, 0x26B2},
{0x26B3, 0x26BC},
{0x26BD, 0x26BF},
{0x26C0, 0x26C3},
{0x26C4, 0x26CD},
{0x26CE, 0x26CE},
{0x26CF, 0x26E1},
{0x26E2, 0x26E2},
{0x26E3, 0x26E3},
{0x26E4, 0x26E7},
{0x26E8, 0x26FF},
{0x2700, 0x2700},
{0x2701, 0x2704},
{0x2705, 0x2705},
{0x2708, 0x2709},
{0x270A, 0x270B},
{0x270C, 0x2712},
{0x2714, 0x2714},
{0x2716, 0x2716},
{0x271D, 0x271D},
{0x2721, 0x2721},
{0x2728, 0x2728},
{0x2733, 0x2734},
{0x2744, 0x2744},
{0x2747, 0x2747},
{0x274C, 0x274C},
{0x274E, 0x274E},
{0x2753, 0x2755},
{0x2757, 0x2757},
{0x2763, 0x2767},
{0x2795, 0x2797},
{0x27A1, 0x27A1},
{0x27B0, 0x27B0},
{0x27BF, 0x27BF},
{0x2934, 0x2935},
{0x2B05, 0x2B07},
{0x2B1B, 0x2B1C},
{0x2B50, 0x2B50},
{0x2B55, 0x2B55},
{0x3030, 0x3030},
{0x303D, 0x303D},
{0x3297, 0x3297},
{0x3299, 0x3299},
{0x1F000, 0x1F02B},
{0x1F02C, 0x1F02F},
{0x1F030, 0x1F093},
{0x1F094, 0x1F09F},
{0x1F0A0, 0x1F0AE},
{0x1F0AF, 0x1F0B0},
{0x1F0B1, 0x1F0BE},
{0x1F0BF, 0x1F0BF},
{0x1F0C0, 0x1F0C0},
{0x1F0C1, 0x1F0CF},
{0x1F0D0, 0x1F0D0},
{0x1F0D1, 0x1F0DF},
{0x1F0E0, 0x1F0F5},
{0x1F0F6, 0x1F0FF},
{0x1F10D, 0x1F10F},
{0x1F12F, 0x1F12F},
{0x1F16C, 0x1F16C},
{0x1F16D, 0x1F16F},
{0x1F170, 0x1F171},
{0x1F17E, 0x1F17E},
{0x1F17F, 0x1F17F},
{0x1F18E, 0x1F18E},
{0x1F191, 0x1F19A},
{0x1F1AD, 0x1F1E5},
{0x1F201, 0x1F202},
{0x1F203, 0x1F20F},
{0x1F21A, 0x1F21A},
{0x1F22F, 0x1F22F},
{0x1F232, 0x1F23A},
{0x1F23C, 0x1F23F},
{0x1F249, 0x1F24F},
{0x1F250, 0x1F251},
{0x1F252, 0x1F25F},
{0x1F260, 0x1F265},
{0x1F266, 0x1F2FF},
{0x1F300, 0x1F320},
{0x1F321, 0x1F32C},
{0x1F32D, 0x1F32F},
{0x1F330, 0x1F335},
{0x1F336, 0x1F336},
{0x1F337, 0x1F37C},
{0x1F37D, 0x1F37D},
{0x1F37E, 0x1F37F},
{0x1F380, 0x1F393},
{0x1F394, 0x1F39F},
{0x1F3A0, 0x1F3C4},
{0x1F3C5, 0x1F3C5},
{0x1F3C6, 0x1F3CA},
{0x1F3CB, 0x1F3CE},
{0x1F3CF, 0x1F3D3},
{0x1F3D4, 0x1F3DF},
{0x1F3E0, 0x1F3F0},
{0x1F3F1, 0x1F3F7},
{0x1F3F8, 0x1F3FA},
{0x1F400, 0x1F43E},
{0x1F43F, 0x1F43F},
{0x1F440, 0x1F440},
{0x1F441, 0x1F441},
{0x1F442, 0x1F4F7},
{0x1F4F8, 0x1F4F8},
{0x1F4F9, 0x1F4FC},
{0x1F4FD, 0x1F4FE},
{0x1F4FF, 0x1F4FF},
{0x1F500, 0x1F53D},
{0x1F546, 0x1F54A},
{0x1F54B, 0x1F54F},
{0x1F550, 0x1F567},
{0x1F568, 0x1F579},
{0x1F57A, 0x1F57A},
{0x1F57B, 0x1F5A3},
{0x1F5A4, 0x1F5A4},
{0x1F5A5, 0x1F5FA},
{0x1F5FB, 0x1F5FF},
{0x1F600, 0x1F600},
{0x1F601, 0x1F610},
{0x1F611, 0x1F611},
{0x1F612, 0x1F614},
{0x1F615, 0x1F615},
{0x1F616, 0x1F616},
{0x1F617, 0x1F617},
{0x1F618, 0x1F618},
{0x1F619, 0x1F619},
{0x1F61A, 0x1F61A},
{0x1F61B, 0x1F61B},
{0x1F61C, 0x1F61E},
{0x1F61F, 0x1F61F},
{0x1F620, 0x1F625},
{0x1F626, 0x1F627},
{0x1F628, 0x1F62B},
{0x1F62C, 0x1F62C},
{0x1F62D, 0x1F62D},
{0x1F62E, 0x1F62F},
{0x1F630, 0x1F633},
{0x1F634, 0x1F634},
{0x1F635, 0x1F640},
{0x1F641, 0x1F642},
{0x1F643, 0x1F644},
{0x1F645, 0x1F64F},
{0x1F680, 0x1F6C5},
{0x1F6C6, 0x1F6CF},
{0x1F6D0, 0x1F6D0},
{0x1F6D1, 0x1F6D2},
{0x1F6D3, 0x1F6D4},
{0x1F6D5, 0x1F6D5},
{0x1F6D6, 0x1F6DF},
{0x1F6E0, 0x1F6EC},
{0x1F6ED, 0x1F6EF},
{0x1F6F0, 0x1F6F3},
{0x1F6F4, 0x1F6F6},
{0x1F6F7, 0x1F6F8},
{0x1F6F9, 0x1F6F9},
{0x1F6FA, 0x1F6FA},
{0x1F6FB, 0x1F6FF},
{0x1F774, 0x1F77F},
{0x1F7D5, 0x1F7D8},
{0x1F7D9, 0x1F7DF},
{0x1F7E0, 0x1F7EB},
{0x1F7EC, 0x1F7FF},
{0x1F80C, 0x1F80F},
{0x1F848, 0x1F84F},
{0x1F85A, 0x1F85F},
{0x1F888, 0x1F88F},
{0x1F8AE, 0x1F8FF},
{0x1F90C, 0x1F90C},
{0x1F90D, 0x1F90F},
{0x1F910, 0x1F918},
{0x1F919, 0x1F91E},
{0x1F91F, 0x1F91F},
{0x1F920, 0x1F927},
{0x1F928, 0x1F92F},
{0x1F930, 0x1F930},
{0x1F931, 0x1F932},
{0x1F933, 0x1F93A},
{0x1F93C, 0x1F93E},
{0x1F93F, 0x1F93F},
{0x1F940, 0x1F945},
{0x1F947, 0x1F94B},
{0x1F94C, 0x1F94C},
{0x1F94D, 0x1F94F},
{0x1F950, 0x1F95E},
{0x1F95F, 0x1F96B},
{0x1F96C, 0x1F970},
{0x1F971, 0x1F971},
{0x1F972, 0x1F972},
{0x1F973, 0x1F976},
{0x1F977, 0x1F979},
{0x1F97A, 0x1F97A},
{0x1F97B, 0x1F97B},
{0x1F97C, 0x1F97F},
{0x1F980, 0x1F984},
{0x1F985, 0x1F991},
{0x1F992, 0x1F997},
{0x1F998, 0x1F9A2},
{0x1F9A3, 0x1F9A4},
{0x1F9A5, 0x1F9AA},
{0x1F9AB, 0x1F9AD},
{0x1F9AE, 0x1F9AF},
{0x1F9B0, 0x1F9B9},
{0x1F9BA, 0x1F9BF},
{0x1F9C0, 0x1F9C0},
{0x1F9C1, 0x1F9C2},
{0x1F9C3, 0x1F9CA},
{0x1F9CB, 0x1F9CC},
{0x1F9CD, 0x1F9CF},
{0x1F9D0, 0x1F9E6},
{0x1F9E7, 0x1F9FF},
{0x1FA00, 0x1FA53},
{0x1FA54, 0x1FA5F},
{0x1FA60, 0x1FA6D},
{0x1FA6E, 0x1FA6F},
{0x1FA70, 0x1FA73},
{0x1FA74, 0x1FA77},
{0x1FA78, 0x1FA7A},
{0x1FA7B, 0x1FA7F},
{0x1FA80, 0x1FA82},
{0x1FA83, 0x1FA8F},
{0x1FA90, 0x1FA95},
{0x1FA96, 0x1FFFD},
};

View File

@ -0,0 +1,61 @@
/*
* Emoji-related routine and data.
*
* Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgement in the product
* documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*/
/**
* @file emojidef.c
*
* Emoji-related routine and data that are used internally.
*
* @author Andreas Röver
*/
#include "emojidef.h"
#include "emojidata.c"
/**
* Finds out if a codepoint is extended pictographic.
*
* @param[in] ch character to check
* @return \c true if the codepoint is extended pictographic;
* \c false otherwise
*/
bool ub_is_extended_pictographic(utf32_t ch)
{
int min = 0;
int max = ARRAY_LEN(ep_prop) - 1;
int mid;
do
{
mid = (min + max) / 2;
if (ch < ep_prop[mid].start)
max = mid - 1;
else if (ch > ep_prop[mid].end)
min = mid + 1;
else
return true;
} while (min <= max);
return false;
}

View File

@ -0,0 +1,46 @@
/*
* Emoji-related routine and data.
*
* Copyright (C) 2018 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute
* it freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgement in the product
* documentation would be appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*/
/**
* @file emojidef.h
*
* Definitions of internal data structure and function for extended
* pictographs.
*
* @author Andreas Röver
*/
#include "unibreakdef.h"
/**
* Struct for entries of extended pictographic properties. The array of
* the entries \e must be sorted. All codepoints within this list have
* the property of being extended pictographic.
*/
struct ExtendedPictograpic
{
utf32_t start; /**< Start codepoint */
utf32_t end; /**< End codepoint, inclusive */
};
bool ub_is_extended_pictographic(utf32_t ch);

View File

@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
* Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@ -38,23 +42,14 @@
* Implementation of the grapheme breaking algorithm as described in Unicode
* Standard Annex 29.
*
* @author Andreas Roever
* @author Andreas Röver
*/
#if defined(_MSC_VER) && _MSC_VER < 1800
typedef int bool;
#define false 0
#define true 1
#else
#include <stdbool.h>
#endif
#include <string.h>
#include "graphemebreak.h"
#include "graphemebreakdata.c"
#include "unibreakdef.h"
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
#include "emojidef.h"
/**
* Initializes the wordbreak internals. It currently does nothing, but
@ -67,8 +62,8 @@ void init_graphemebreak(void)
/**
* Gets the grapheme breaking class of a character.
*
* @param ch character to check
* @return the grapheme breaking class if found; \c GBP_Other otherwise
* @param[in] ch character to check
* @return the grapheme breaking class if found; \c GBP_Other otherwise
*/
static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
{
@ -93,6 +88,7 @@ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch)
/**
* Sets the grapheme breaking information for a generic input string.
* It uses the extended grapheme cluster ruleset.
*
* @param[in] s input string
* @param[in] len length of the input
@ -104,7 +100,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
get_next_char_t get_next_char)
{
size_t posNext = 0;
bool rule10Left = false; // is the left side of rule 10 fulfilled?
int rule11Detector = 0;
bool evenRegionalIndicators = true; // is the number of preceeding
// GBP_RegionalIndicator characters
// even
@ -117,6 +113,47 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
while (true)
{
// this state-machine recognizes the following pattern:
// extended_pictograph Extended* ZWJ
// when that pattern has been detected rule11Detector will be
// 3 and rule 11 can be applied below
switch (current_class)
{
case GBP_ZWJ:
if (rule11Detector == 1 || rule11Detector == 2)
{
rule11Detector = 3;
}
else
{
rule11Detector = 0;
}
break;
case GBP_Extend:
if (rule11Detector == 1 || rule11Detector == 2)
{
rule11Detector = 2;
}
else
{
rule11Detector = 0;
}
break;
default:
if (ub_is_extended_pictographic(ch))
{
rule11Detector = 1;
}
else
{
rule11Detector = 0;
}
break;
}
enum GraphemeBreakClass prev_class = current_class;
// safe position if current character so that we can store the
@ -137,16 +174,6 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
// get class of current character
current_class = get_char_gb_class(ch);
// update some helper variables
if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ))
{
rule10Left = true;
}
else if (prev_class != GBP_Extend)
{
rule10Left = false;
}
if (prev_class == GBP_Regional_Indicator)
{
evenRegionalIndicators = !evenRegionalIndicators;
@ -185,7 +212,8 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8
}
else if ((current_class == GBP_Extend) ||
(current_class == GBP_ZWJ))
(current_class == GBP_ZWJ) ||
(current_class == GBP_Virama))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9
}
@ -197,13 +225,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks,
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b
}
else if (rule10Left && (current_class == GBP_E_Modifier))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10
}
else if ((prev_class == GBP_ZWJ) &&
((current_class == GBP_Glue_After_Zwj) ||
(current_class == GBP_E_Base_GAZ)))
else if ((rule11Detector == 3) && ub_is_extended_pictographic(ch))
{
brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11
}

View File

@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
* Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@ -66,4 +70,4 @@ void set_graphemebreaks_utf32(const utf32_t *s, size_t len,
}
#endif
#endif
#endif /* GRAPHEMEBREAK_H */

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# GraphemeBreakProperty-9.0.0.txt
# Date: 2016-06-03, 22:23:55 GMT
# GraphemeBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:12 GMT
*/
#include "graphemebreakdef.h"
@ -36,12 +36,13 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0730, 0x074A, GBP_Extend},
{0x07A6, 0x07B0, GBP_Extend},
{0x07EB, 0x07F3, GBP_Extend},
{0x07FD, 0x07FD, GBP_Extend},
{0x0816, 0x0819, GBP_Extend},
{0x081B, 0x0823, GBP_Extend},
{0x0825, 0x0827, GBP_Extend},
{0x0829, 0x082D, GBP_Extend},
{0x0859, 0x085B, GBP_Extend},
{0x08D4, 0x08E1, GBP_Extend},
{0x08D3, 0x08E1, GBP_Extend},
{0x08E2, 0x08E2, GBP_Prepend},
{0x08E3, 0x0902, GBP_Extend},
{0x0903, 0x0903, GBP_SpacingMark},
@ -66,6 +67,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x09CD, 0x09CD, GBP_Extend},
{0x09D7, 0x09D7, GBP_Extend},
{0x09E2, 0x09E3, GBP_Extend},
{0x09FE, 0x09FE, GBP_Extend},
{0x0A01, 0x0A02, GBP_Extend},
{0x0A03, 0x0A03, GBP_SpacingMark},
{0x0A3C, 0x0A3C, GBP_Extend},
@ -86,6 +88,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0ACB, 0x0ACC, GBP_SpacingMark},
{0x0ACD, 0x0ACD, GBP_Extend},
{0x0AE2, 0x0AE3, GBP_Extend},
{0x0AFA, 0x0AFF, GBP_Extend},
{0x0B01, 0x0B01, GBP_Extend},
{0x0B02, 0x0B03, GBP_SpacingMark},
{0x0B3C, 0x0B3C, GBP_Extend},
@ -110,6 +113,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0BD7, 0x0BD7, GBP_Extend},
{0x0C00, 0x0C00, GBP_Extend},
{0x0C01, 0x0C03, GBP_SpacingMark},
{0x0C04, 0x0C04, GBP_Extend},
{0x0C3E, 0x0C40, GBP_Extend},
{0x0C41, 0x0C44, GBP_SpacingMark},
{0x0C46, 0x0C48, GBP_Extend},
@ -130,8 +134,9 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0CCC, 0x0CCD, GBP_Extend},
{0x0CD5, 0x0CD6, GBP_Extend},
{0x0CE2, 0x0CE3, GBP_Extend},
{0x0D01, 0x0D01, GBP_Extend},
{0x0D00, 0x0D01, GBP_Extend},
{0x0D02, 0x0D03, GBP_SpacingMark},
{0x0D3B, 0x0D3C, GBP_Extend},
{0x0D3E, 0x0D3E, GBP_Extend},
{0x0D3F, 0x0D40, GBP_SpacingMark},
{0x0D41, 0x0D44, GBP_Extend},
@ -156,8 +161,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x0E47, 0x0E4E, GBP_Extend},
{0x0EB1, 0x0EB1, GBP_Extend},
{0x0EB3, 0x0EB3, GBP_SpacingMark},
{0x0EB4, 0x0EB9, GBP_Extend},
{0x0EBB, 0x0EBC, GBP_Extend},
{0x0EB4, 0x0EBC, GBP_Extend},
{0x0EC8, 0x0ECD, GBP_Extend},
{0x0F18, 0x0F19, GBP_Extend},
{0x0F35, 0x0F35, GBP_Extend},
@ -232,7 +236,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1B00, 0x1B03, GBP_Extend},
{0x1B04, 0x1B04, GBP_SpacingMark},
{0x1B34, 0x1B34, GBP_Extend},
{0x1B35, 0x1B35, GBP_SpacingMark},
{0x1B35, 0x1B35, GBP_Extend},
{0x1B36, 0x1B3A, GBP_Extend},
{0x1B3B, 0x1B3B, GBP_SpacingMark},
{0x1B3C, 0x1B3C, GBP_Extend},
@ -265,10 +269,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1CE1, 0x1CE1, GBP_SpacingMark},
{0x1CE2, 0x1CE8, GBP_Extend},
{0x1CED, 0x1CED, GBP_Extend},
{0x1CF2, 0x1CF3, GBP_SpacingMark},
{0x1CF4, 0x1CF4, GBP_Extend},
{0x1CF7, 0x1CF7, GBP_SpacingMark},
{0x1CF8, 0x1CF9, GBP_Extend},
{0x1DC0, 0x1DF5, GBP_Extend},
{0x1DC0, 0x1DF9, GBP_Extend},
{0x1DFB, 0x1DFF, GBP_Extend},
{0x200B, 0x200B, GBP_Control},
{0x200C, 0x200C, GBP_Extend},
@ -285,10 +289,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x20E1, 0x20E1, GBP_Extend},
{0x20E2, 0x20E4, GBP_Extend},
{0x20E5, 0x20F0, GBP_Extend},
{0x261D, 0x261D, GBP_E_Base},
{0x26F9, 0x26F9, GBP_E_Base},
{0x270A, 0x270D, GBP_E_Base},
{0x2764, 0x2764, GBP_Glue_After_Zwj},
{0x2CEF, 0x2CF1, GBP_Extend},
{0x2D7F, 0x2D7F, GBP_Extend},
{0x2DE0, 0x2DFF, GBP_Extend},
@ -310,6 +310,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xA8B4, 0xA8C3, GBP_SpacingMark},
{0xA8C4, 0xA8C5, GBP_Extend},
{0xA8E0, 0xA8F1, GBP_Extend},
{0xA8FF, 0xA8FF, GBP_Extend},
{0xA926, 0xA92D, GBP_Extend},
{0xA947, 0xA951, GBP_Extend},
{0xA952, 0xA953, GBP_SpacingMark},
@ -320,8 +321,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xA9B4, 0xA9B5, GBP_SpacingMark},
{0xA9B6, 0xA9B9, GBP_Extend},
{0xA9BA, 0xA9BB, GBP_SpacingMark},
{0xA9BC, 0xA9BC, GBP_Extend},
{0xA9BD, 0xA9C0, GBP_SpacingMark},
{0xA9BC, 0xA9BD, GBP_Extend},
{0xA9BE, 0xA9C0, GBP_SpacingMark},
{0xA9E5, 0xA9E5, GBP_Extend},
{0xAA29, 0xAA2E, GBP_Extend},
{0xAA2F, 0xAA30, GBP_SpacingMark},
@ -1149,7 +1150,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0xD789, 0xD7A3, GBP_LVT},
{0xD7B0, 0xD7C6, GBP_V},
{0xD7CB, 0xD7FB, GBP_T},
{0xD800, 0xDFFF, GBP_Control},
{0xFB1E, 0xFB1E, GBP_Extend},
{0xFE00, 0xFE0F, GBP_Extend},
{0xFE20, 0xFE2F, GBP_Extend},
@ -1166,6 +1166,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x10A38, 0x10A3A, GBP_Extend},
{0x10A3F, 0x10A3F, GBP_Extend},
{0x10AE5, 0x10AE6, GBP_Extend},
{0x10D24, 0x10D27, GBP_Extend},
{0x10F46, 0x10F50, GBP_Extend},
{0x11000, 0x11000, GBP_SpacingMark},
{0x11001, 0x11001, GBP_Extend},
{0x11002, 0x11002, GBP_SpacingMark},
@ -1177,10 +1179,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x110B7, 0x110B8, GBP_SpacingMark},
{0x110B9, 0x110BA, GBP_Extend},
{0x110BD, 0x110BD, GBP_Prepend},
{0x110CD, 0x110CD, GBP_Prepend},
{0x11100, 0x11102, GBP_Extend},
{0x11127, 0x1112B, GBP_Extend},
{0x1112C, 0x1112C, GBP_SpacingMark},
{0x1112D, 0x11134, GBP_Extend},
{0x11145, 0x11146, GBP_SpacingMark},
{0x11173, 0x11173, GBP_Extend},
{0x11180, 0x11181, GBP_Extend},
{0x11182, 0x11182, GBP_SpacingMark},
@ -1188,7 +1192,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x111B6, 0x111BE, GBP_Extend},
{0x111BF, 0x111C0, GBP_SpacingMark},
{0x111C2, 0x111C3, GBP_Prepend},
{0x111CA, 0x111CC, GBP_Extend},
{0x111C9, 0x111CC, GBP_Extend},
{0x1122C, 0x1122E, GBP_SpacingMark},
{0x1122F, 0x11231, GBP_Extend},
{0x11232, 0x11233, GBP_SpacingMark},
@ -1201,7 +1205,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x112E3, 0x112EA, GBP_Extend},
{0x11300, 0x11301, GBP_Extend},
{0x11302, 0x11303, GBP_SpacingMark},
{0x1133C, 0x1133C, GBP_Extend},
{0x1133B, 0x1133C, GBP_Extend},
{0x1133E, 0x1133E, GBP_Extend},
{0x1133F, 0x1133F, GBP_SpacingMark},
{0x11340, 0x11340, GBP_Extend},
@ -1218,6 +1222,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11442, 0x11444, GBP_Extend},
{0x11445, 0x11445, GBP_SpacingMark},
{0x11446, 0x11446, GBP_Extend},
{0x1145E, 0x1145E, GBP_Extend},
{0x114B0, 0x114B0, GBP_Extend},
{0x114B1, 0x114B2, GBP_SpacingMark},
{0x114B3, 0x114B8, GBP_Extend},
@ -1255,6 +1260,29 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11722, 0x11725, GBP_Extend},
{0x11726, 0x11726, GBP_SpacingMark},
{0x11727, 0x1172B, GBP_Extend},
{0x1182C, 0x1182E, GBP_SpacingMark},
{0x1182F, 0x11837, GBP_Extend},
{0x11838, 0x11838, GBP_SpacingMark},
{0x11839, 0x1183A, GBP_Extend},
{0x119D1, 0x119D3, GBP_SpacingMark},
{0x119D4, 0x119D7, GBP_Extend},
{0x119DA, 0x119DB, GBP_Extend},
{0x119DC, 0x119DF, GBP_SpacingMark},
{0x119E0, 0x119E0, GBP_Extend},
{0x119E4, 0x119E4, GBP_SpacingMark},
{0x11A01, 0x11A0A, GBP_Extend},
{0x11A33, 0x11A38, GBP_Extend},
{0x11A39, 0x11A39, GBP_SpacingMark},
{0x11A3A, 0x11A3A, GBP_Prepend},
{0x11A3B, 0x11A3E, GBP_Extend},
{0x11A47, 0x11A47, GBP_Extend},
{0x11A51, 0x11A56, GBP_Extend},
{0x11A57, 0x11A58, GBP_SpacingMark},
{0x11A59, 0x11A5B, GBP_Extend},
{0x11A84, 0x11A89, GBP_Prepend},
{0x11A8A, 0x11A96, GBP_Extend},
{0x11A97, 0x11A97, GBP_SpacingMark},
{0x11A98, 0x11A99, GBP_Extend},
{0x11C2F, 0x11C2F, GBP_SpacingMark},
{0x11C30, 0x11C36, GBP_Extend},
{0x11C38, 0x11C3D, GBP_Extend},
@ -1267,9 +1295,25 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x11CB2, 0x11CB3, GBP_Extend},
{0x11CB4, 0x11CB4, GBP_SpacingMark},
{0x11CB5, 0x11CB6, GBP_Extend},
{0x11D31, 0x11D36, GBP_Extend},
{0x11D3A, 0x11D3A, GBP_Extend},
{0x11D3C, 0x11D3D, GBP_Extend},
{0x11D3F, 0x11D45, GBP_Extend},
{0x11D46, 0x11D46, GBP_Prepend},
{0x11D47, 0x11D47, GBP_Extend},
{0x11D8A, 0x11D8E, GBP_SpacingMark},
{0x11D90, 0x11D91, GBP_Extend},
{0x11D93, 0x11D94, GBP_SpacingMark},
{0x11D95, 0x11D95, GBP_Extend},
{0x11D96, 0x11D96, GBP_SpacingMark},
{0x11D97, 0x11D97, GBP_Extend},
{0x11EF3, 0x11EF4, GBP_Extend},
{0x11EF5, 0x11EF6, GBP_SpacingMark},
{0x13430, 0x13438, GBP_Control},
{0x16AF0, 0x16AF4, GBP_Extend},
{0x16B30, 0x16B36, GBP_Extend},
{0x16F51, 0x16F7E, GBP_SpacingMark},
{0x16F4F, 0x16F4F, GBP_Extend},
{0x16F51, 0x16F87, GBP_SpacingMark},
{0x16F8F, 0x16F92, GBP_Extend},
{0x1BC9D, 0x1BC9E, GBP_Extend},
{0x1BCA0, 0x1BCA3, GBP_Control},
@ -1294,38 +1338,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = {
{0x1E01B, 0x1E021, GBP_Extend},
{0x1E023, 0x1E024, GBP_Extend},
{0x1E026, 0x1E02A, GBP_Extend},
{0x1E130, 0x1E136, GBP_Extend},
{0x1E2EC, 0x1E2EF, GBP_Extend},
{0x1E8D0, 0x1E8D6, GBP_Extend},
{0x1E944, 0x1E94A, GBP_Extend},
{0x1F1E6, 0x1F1FF, GBP_Regional_Indicator},
{0x1F385, 0x1F385, GBP_E_Base},
{0x1F3C3, 0x1F3C4, GBP_E_Base},
{0x1F3CA, 0x1F3CB, GBP_E_Base},
{0x1F3FB, 0x1F3FF, GBP_E_Modifier},
{0x1F442, 0x1F443, GBP_E_Base},
{0x1F446, 0x1F450, GBP_E_Base},
{0x1F466, 0x1F469, GBP_E_Base_GAZ},
{0x1F46E, 0x1F46E, GBP_E_Base},
{0x1F470, 0x1F478, GBP_E_Base},
{0x1F47C, 0x1F47C, GBP_E_Base},
{0x1F481, 0x1F483, GBP_E_Base},
{0x1F485, 0x1F487, GBP_E_Base},
{0x1F48B, 0x1F48B, GBP_Glue_After_Zwj},
{0x1F4AA, 0x1F4AA, GBP_E_Base},
{0x1F575, 0x1F575, GBP_E_Base},
{0x1F57A, 0x1F57A, GBP_E_Base},
{0x1F590, 0x1F590, GBP_E_Base},
{0x1F595, 0x1F596, GBP_E_Base},
{0x1F5E8, 0x1F5E8, GBP_Glue_After_Zwj},
{0x1F645, 0x1F647, GBP_E_Base},
{0x1F64B, 0x1F64F, GBP_E_Base},
{0x1F6A3, 0x1F6A3, GBP_E_Base},
{0x1F6B4, 0x1F6B6, GBP_E_Base},
{0x1F6C0, 0x1F6C0, GBP_E_Base},
{0x1F918, 0x1F91E, GBP_E_Base},
{0x1F926, 0x1F926, GBP_E_Base},
{0x1F930, 0x1F930, GBP_E_Base},
{0x1F933, 0x1F939, GBP_E_Base},
{0x1F93C, 0x1F93E, GBP_E_Base},
{0x1F3FB, 0x1F3FF, GBP_Extend},
{0xE0000, 0xE0000, GBP_Control},
{0xE0001, 0xE0001, GBP_Control},
{0xE0002, 0xE001F, GBP_Control},

View File

@ -2,7 +2,7 @@
* Grapheme breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2016 Andreas Röver <roever at users dot sf dot net>
* Copyright (C) 2016-2019 Andreas Röver <roever at users dot sf dot net>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -28,6 +28,10 @@
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
*
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
@ -45,13 +49,15 @@
/**
* Word break classes. This is a direct mapping of Table 2 of Unicode
* Standard Annex 29
* Standard Annex 29.
*/
enum GraphemeBreakClass
{
GBP_CR,
GBP_LF,
GBP_Control,
GBP_Virama,
GBP_LinkingConsonant,
GBP_Extend,
GBP_ZWJ,
GBP_Regional_Indicator,
@ -62,10 +68,6 @@ enum GraphemeBreakClass
GBP_T,
GBP_LV,
GBP_LVT,
GBP_E_Base,
GBP_E_Modifier,
GBP_Glue_After_Zwj,
GBP_E_Base_GAZ,
GBP_Other,
GBP_Undefined
};
@ -76,7 +78,7 @@ enum GraphemeBreakClass
*/
struct GraphemeBreakProperties
{
utf32_t start; /**< Starting coding point */
utf32_t end; /**< End coding point, including */
utf32_t start; /**< Start codepoint */
utf32_t end; /**< End codepoint, inclusive */
enum GraphemeBreakClass prop; /**< The grapheme breaking property */
};

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
@ -31,9 +31,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 37, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
* This library has been updated according to Revision 43, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -80,7 +80,9 @@ enum BreakAction
/**
* Break action pair table. This is a direct mapping of Table 2 of
* Unicode Standard Annex 14, Revision 37, except the "CB" part.
* Unicode Standard Annex 14, Revision 37, except for ZWJ (manually
* adjusted after special processing as per LB8a of Revision 41) and CB
* (manually added as per LB20).
*/
static enum BreakAction baTable[LBP_CB][LBP_CB] = {
{ /* OP */
@ -270,17 +272,17 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
{ /* ZWJ */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
{ /* CB */
DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK,
PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK,
CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK,
DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK },
DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK },
};
/**
@ -288,8 +290,9 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = {
*/
struct LineBreakPropertiesIndex
{
utf32_t end; /**< End coding point */
const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */
utf32_t end; /**< End codepoint */
const struct LineBreakProperties *lbp; /**< Pointer to line breaking
properties */
};
/**
@ -335,7 +338,7 @@ static __inline int ends_with(const char *str, const char *suffix,
* Initializes the second-level index to the line breaking properties.
* If it is not called, the performance of #get_char_lb_class_lang (and
* thus the main functionality) can be pretty bad, especially for big
* code points like those of Chinese.
* codepoints like those of Chinese.
*/
void init_linebreak(void)
{
@ -612,12 +615,18 @@ static int get_lb_result_lookup(
break;
}
/* Special processing due to rule LB8a */
if (lbpCtx->fLb8aZwj)
{
brk = LINEBREAK_NOBREAK;
}
/* Special processing due to rule LB21a */
if (lbpCtx->fLb21aHebrew &&
(lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA))
{
brk = LINEBREAK_NOBREAK;
lbpCtx->fLb21aHebrew = 0;
lbpCtx->fLb21aHebrew = false;
}
else
{
@ -663,17 +672,21 @@ void lb_init_break_context(
lbpCtx->lbcCur = resolve_lb_class(
get_char_lb_class_lang(ch, lbpCtx->lbpLang),
lbpCtx->lang);
lbpCtx->fLb21aHebrew = 0;
lbpCtx->fLb8aZwj =
(get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_ZWJ);
lbpCtx->fLb10LeadSpace =
(get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_SP);
lbpCtx->fLb21aHebrew = false;
lbpCtx->cLb30aRI = 0;
treat_first_char(lbpCtx);
}
/**
* Updates LineBreakingContext for the next code point and returns
* Updates LineBreakingContext for the next codepoint and returns
* the detected break.
*
* @param[in,out] lbpCtx pointer to the line breaking context
* @param[in] ch Unicode code point
* @param[in] ch Unicode codepoint
* @return break result, one of #LINEBREAK_MUSTBREAK,
* #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK
* @post the line breaking context is updated
@ -700,6 +713,25 @@ int lb_process_next_char(
default:
break;
}
/* Special processing due to rule LB8a */
if (lbpCtx->lbcNew == LBP_ZWJ)
{
lbpCtx->fLb8aZwj = true;
}
else
{
lbpCtx->fLb8aZwj = false;
}
/* Special processing due to rule LB10 */
if (lbpCtx->fLb10LeadSpace)
{
if (lbpCtx->lbcNew == LBP_CM || lbpCtx->lbcNew == LBP_ZWJ)
brk = LINEBREAK_ALLOWBREAK;
lbpCtx->fLb10LeadSpace = false;
}
return brk;
}

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2019 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +30,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 37, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
* This library has been updated according to Revision 43, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# LineBreak-9.0.0.txt
# Date: 2016-05-26, 01:00:00 GMT [KW, LI]
# LineBreak-12.1.0.txt
# Date: 2019-03-31, 22:04:15 GMT [KW, LI]
*/
#include "linebreakdef.h"
@ -94,7 +94,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x037E, 0x037E, LBP_IS },
{ 0x037F, 0x0482, LBP_AL },
{ 0x0483, 0x0489, LBP_CM },
{ 0x048A, 0x0587, LBP_AL },
{ 0x048A, 0x0588, LBP_AL },
{ 0x0589, 0x0589, LBP_IS },
{ 0x058A, 0x058A, LBP_BA },
{ 0x058D, 0x058E, LBP_AL },
@ -149,7 +149,10 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x07F4, 0x07F7, LBP_AL },
{ 0x07F8, 0x07F8, LBP_IS },
{ 0x07F9, 0x07F9, LBP_EX },
{ 0x07FA, 0x0815, LBP_AL },
{ 0x07FA, 0x07FA, LBP_AL },
{ 0x07FD, 0x07FD, LBP_CM },
{ 0x07FE, 0x07FF, LBP_PR },
{ 0x0800, 0x0815, LBP_AL },
{ 0x0816, 0x0819, LBP_CM },
{ 0x081A, 0x081A, LBP_AL },
{ 0x081B, 0x0823, LBP_CM },
@ -160,7 +163,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0830, 0x0858, LBP_AL },
{ 0x0859, 0x085B, LBP_CM },
{ 0x085E, 0x08BD, LBP_AL },
{ 0x08D4, 0x08E1, LBP_CM },
{ 0x08D3, 0x08E1, LBP_CM },
{ 0x08E2, 0x08E2, LBP_AL },
{ 0x08E3, 0x0903, LBP_CM },
{ 0x0904, 0x0939, LBP_AL },
@ -190,14 +193,17 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x09F9, 0x09F9, LBP_PO },
{ 0x09FA, 0x09FA, LBP_AL },
{ 0x09FB, 0x09FB, LBP_PR },
{ 0x0A01, 0x0A03, LBP_CM },
{ 0x09FC, 0x09FD, LBP_AL },
{ 0x09FE, 0x0A03, LBP_CM },
{ 0x0A05, 0x0A39, LBP_AL },
{ 0x0A3C, 0x0A51, LBP_CM },
{ 0x0A59, 0x0A5E, LBP_AL },
{ 0x0A66, 0x0A6F, LBP_NU },
{ 0x0A70, 0x0A71, LBP_CM },
{ 0x0A72, 0x0A74, LBP_AL },
{ 0x0A75, 0x0A83, LBP_CM },
{ 0x0A75, 0x0A75, LBP_CM },
{ 0x0A76, 0x0A76, LBP_AL },
{ 0x0A81, 0x0A83, LBP_CM },
{ 0x0A85, 0x0AB9, LBP_AL },
{ 0x0ABC, 0x0ABC, LBP_CM },
{ 0x0ABD, 0x0ABD, LBP_AL },
@ -208,7 +214,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0AF0, 0x0AF0, LBP_AL },
{ 0x0AF1, 0x0AF1, LBP_PR },
{ 0x0AF9, 0x0AF9, LBP_AL },
{ 0x0B01, 0x0B03, LBP_CM },
{ 0x0AFA, 0x0B03, LBP_CM },
{ 0x0B05, 0x0B39, LBP_AL },
{ 0x0B3C, 0x0B3C, LBP_CM },
{ 0x0B3D, 0x0B3D, LBP_AL },
@ -226,14 +232,16 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0BF0, 0x0BF8, LBP_AL },
{ 0x0BF9, 0x0BF9, LBP_PR },
{ 0x0BFA, 0x0BFA, LBP_AL },
{ 0x0C00, 0x0C03, LBP_CM },
{ 0x0C00, 0x0C04, LBP_CM },
{ 0x0C05, 0x0C3D, LBP_AL },
{ 0x0C3E, 0x0C56, LBP_CM },
{ 0x0C58, 0x0C61, LBP_AL },
{ 0x0C62, 0x0C63, LBP_CM },
{ 0x0C66, 0x0C6F, LBP_NU },
{ 0x0C77, 0x0C77, LBP_BB },
{ 0x0C78, 0x0C80, LBP_AL },
{ 0x0C81, 0x0C83, LBP_CM },
{ 0x0C84, 0x0C84, LBP_BB },
{ 0x0C85, 0x0CB9, LBP_AL },
{ 0x0CBC, 0x0CBC, LBP_CM },
{ 0x0CBD, 0x0CBD, LBP_AL },
@ -242,8 +250,10 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x0CE2, 0x0CE3, LBP_CM },
{ 0x0CE6, 0x0CEF, LBP_NU },
{ 0x0CF1, 0x0CF2, LBP_AL },
{ 0x0D01, 0x0D03, LBP_CM },
{ 0x0D05, 0x0D3D, LBP_AL },
{ 0x0D00, 0x0D03, LBP_CM },
{ 0x0D05, 0x0D3A, LBP_AL },
{ 0x0D3B, 0x0D3C, LBP_CM },
{ 0x0D3D, 0x0D3D, LBP_AL },
{ 0x0D3E, 0x0D4D, LBP_CM },
{ 0x0D4E, 0x0D56, LBP_AL },
{ 0x0D57, 0x0D57, LBP_CM },
@ -417,11 +427,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1CD4, 0x1CE8, LBP_CM },
{ 0x1CE9, 0x1CEC, LBP_AL },
{ 0x1CED, 0x1CED, LBP_CM },
{ 0x1CEE, 0x1CF1, LBP_AL },
{ 0x1CF2, 0x1CF4, LBP_CM },
{ 0x1CEE, 0x1CF3, LBP_AL },
{ 0x1CF4, 0x1CF4, LBP_CM },
{ 0x1CF5, 0x1CF6, LBP_AL },
{ 0x1CF8, 0x1CF9, LBP_CM },
{ 0x1D00, 0x1DBF, LBP_AL },
{ 0x1CF7, 0x1CF9, LBP_CM },
{ 0x1CFA, 0x1DBF, LBP_AL },
{ 0x1DC0, 0x1DFF, LBP_CM },
{ 0x1E00, 0x1FFC, LBP_AL },
{ 0x1FFD, 0x1FFD, LBP_BB },
@ -430,7 +440,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x2007, 0x2007, LBP_GL },
{ 0x2008, 0x200A, LBP_BA },
{ 0x200B, 0x200B, LBP_ZW },
{ 0x200C, 0x200F, LBP_CM },
{ 0x200C, 0x200C, LBP_CM },
{ 0x200D, 0x200D, LBP_ZWJ },
{ 0x200E, 0x200F, LBP_CM },
{ 0x2010, 0x2010, LBP_BA },
{ 0x2011, 0x2011, LBP_GL },
{ 0x2012, 0x2013, LBP_BA },
@ -808,7 +820,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x2E3F, 0x2E3F, LBP_AL },
{ 0x2E40, 0x2E41, LBP_BA },
{ 0x2E42, 0x2E42, LBP_OP },
{ 0x2E43, 0x2E44, LBP_BA },
{ 0x2E43, 0x2E4A, LBP_BA },
{ 0x2E4B, 0x2E4B, LBP_AL },
{ 0x2E4C, 0x2E4C, LBP_BA },
{ 0x2E4D, 0x2E4D, LBP_AL },
{ 0x2E4E, 0x2E4F, LBP_BA },
{ 0x2E80, 0x2FFB, LBP_ID },
{ 0x3000, 0x3000, LBP_BA },
{ 0x3001, 0x3002, LBP_CL },
@ -942,7 +958,8 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0xA8E0, 0xA8F1, LBP_CM },
{ 0xA8F2, 0xA8FB, LBP_AL },
{ 0xA8FC, 0xA8FC, LBP_BB },
{ 0xA8FD, 0xA8FD, LBP_AL },
{ 0xA8FD, 0xA8FE, LBP_AL },
{ 0xA8FF, 0xA8FF, LBP_CM },
{ 0xA900, 0xA909, LBP_NU },
{ 0xA90A, 0xA925, LBP_AL },
{ 0xA926, 0xA92D, LBP_CM },
@ -1907,9 +1924,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1091F, 0x1091F, LBP_BA },
{ 0x10920, 0x10A00, LBP_AL },
{ 0x10A01, 0x10A0F, LBP_CM },
{ 0x10A10, 0x10A33, LBP_AL },
{ 0x10A10, 0x10A35, LBP_AL },
{ 0x10A38, 0x10A3F, LBP_CM },
{ 0x10A40, 0x10A47, LBP_AL },
{ 0x10A40, 0x10A48, LBP_AL },
{ 0x10A50, 0x10A57, LBP_BA },
{ 0x10A58, 0x10AE4, LBP_AL },
{ 0x10AE5, 0x10AE6, LBP_CM },
@ -1918,7 +1935,12 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x10AF6, 0x10AF6, LBP_IN },
{ 0x10B00, 0x10B35, LBP_AL },
{ 0x10B39, 0x10B3F, LBP_BA },
{ 0x10B40, 0x10E7E, LBP_AL },
{ 0x10B40, 0x10D23, LBP_AL },
{ 0x10D24, 0x10D27, LBP_CM },
{ 0x10D30, 0x10D39, LBP_NU },
{ 0x10E60, 0x10F45, LBP_AL },
{ 0x10F46, 0x10F50, LBP_CM },
{ 0x10F51, 0x10FF6, LBP_AL },
{ 0x11000, 0x11002, LBP_CM },
{ 0x11003, 0x11037, LBP_AL },
{ 0x11038, 0x11046, LBP_CM },
@ -1930,13 +1952,15 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x110B0, 0x110BA, LBP_CM },
{ 0x110BB, 0x110BD, LBP_AL },
{ 0x110BE, 0x110C1, LBP_BA },
{ 0x110D0, 0x110E8, LBP_AL },
{ 0x110CD, 0x110E8, LBP_AL },
{ 0x110F0, 0x110F9, LBP_NU },
{ 0x11100, 0x11102, LBP_CM },
{ 0x11103, 0x11126, LBP_AL },
{ 0x11127, 0x11134, LBP_CM },
{ 0x11136, 0x1113F, LBP_NU },
{ 0x11140, 0x11143, LBP_BA },
{ 0x11144, 0x11144, LBP_AL },
{ 0x11145, 0x11146, LBP_CM },
{ 0x11150, 0x11172, LBP_AL },
{ 0x11173, 0x11173, LBP_CM },
{ 0x11174, 0x11174, LBP_AL },
@ -1949,8 +1973,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x111C5, 0x111C6, LBP_BA },
{ 0x111C7, 0x111C7, LBP_AL },
{ 0x111C8, 0x111C8, LBP_BA },
{ 0x111C9, 0x111C9, LBP_AL },
{ 0x111CA, 0x111CC, LBP_CM },
{ 0x111C9, 0x111CC, LBP_CM },
{ 0x111CD, 0x111CD, LBP_AL },
{ 0x111D0, 0x111D9, LBP_NU },
{ 0x111DA, 0x111DA, LBP_AL },
@ -1971,7 +1994,7 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x112F0, 0x112F9, LBP_NU },
{ 0x11300, 0x11303, LBP_CM },
{ 0x11305, 0x11339, LBP_AL },
{ 0x1133C, 0x1133C, LBP_CM },
{ 0x1133B, 0x1133C, LBP_CM },
{ 0x1133D, 0x1133D, LBP_AL },
{ 0x1133E, 0x1134D, LBP_CM },
{ 0x11350, 0x11350, LBP_AL },
@ -1985,7 +2008,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1144F, 0x1144F, LBP_AL },
{ 0x11450, 0x11459, LBP_NU },
{ 0x1145B, 0x1145B, LBP_BA },
{ 0x1145D, 0x114AF, LBP_AL },
{ 0x1145D, 0x1145D, LBP_AL },
{ 0x1145E, 0x1145E, LBP_CM },
{ 0x1145F, 0x114AF, LBP_AL },
{ 0x114B0, 0x114C3, LBP_CM },
{ 0x114C4, 0x114C7, LBP_AL },
{ 0x114D0, 0x114D9, LBP_NU },
@ -2006,15 +2031,44 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x11660, 0x1166C, LBP_BB },
{ 0x11680, 0x116AA, LBP_AL },
{ 0x116AB, 0x116B7, LBP_CM },
{ 0x116B8, 0x116B8, LBP_AL },
{ 0x116C0, 0x116C9, LBP_NU },
{ 0x11700, 0x1172B, LBP_SA },
{ 0x11730, 0x11739, LBP_NU },
{ 0x1173A, 0x1173B, LBP_SA },
{ 0x1173C, 0x1173E, LBP_BA },
{ 0x1173F, 0x1173F, LBP_SA },
{ 0x118A0, 0x118DF, LBP_AL },
{ 0x11800, 0x1182B, LBP_AL },
{ 0x1182C, 0x1183A, LBP_CM },
{ 0x1183B, 0x118DF, LBP_AL },
{ 0x118E0, 0x118E9, LBP_NU },
{ 0x118EA, 0x11C2E, LBP_AL },
{ 0x118EA, 0x119D0, LBP_AL },
{ 0x119D1, 0x119E0, LBP_CM },
{ 0x119E1, 0x119E1, LBP_AL },
{ 0x119E2, 0x119E2, LBP_BB },
{ 0x119E3, 0x119E3, LBP_AL },
{ 0x119E4, 0x119E4, LBP_CM },
{ 0x11A00, 0x11A00, LBP_AL },
{ 0x11A01, 0x11A0A, LBP_CM },
{ 0x11A0B, 0x11A32, LBP_AL },
{ 0x11A33, 0x11A39, LBP_CM },
{ 0x11A3A, 0x11A3A, LBP_AL },
{ 0x11A3B, 0x11A3E, LBP_CM },
{ 0x11A3F, 0x11A3F, LBP_BB },
{ 0x11A40, 0x11A40, LBP_AL },
{ 0x11A41, 0x11A44, LBP_BA },
{ 0x11A45, 0x11A45, LBP_BB },
{ 0x11A46, 0x11A46, LBP_AL },
{ 0x11A47, 0x11A47, LBP_CM },
{ 0x11A50, 0x11A50, LBP_AL },
{ 0x11A51, 0x11A5B, LBP_CM },
{ 0x11A5C, 0x11A89, LBP_AL },
{ 0x11A8A, 0x11A99, LBP_CM },
{ 0x11A9A, 0x11A9C, LBP_BA },
{ 0x11A9D, 0x11A9D, LBP_AL },
{ 0x11A9E, 0x11AA0, LBP_BB },
{ 0x11AA1, 0x11AA2, LBP_BA },
{ 0x11AC0, 0x11C2E, LBP_AL },
{ 0x11C2F, 0x11C3F, LBP_CM },
{ 0x11C40, 0x11C40, LBP_AL },
{ 0x11C41, 0x11C45, LBP_BA },
@ -2024,6 +2078,21 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x11C71, 0x11C71, LBP_EX },
{ 0x11C72, 0x11C8F, LBP_AL },
{ 0x11C92, 0x11CB6, LBP_CM },
{ 0x11D00, 0x11D30, LBP_AL },
{ 0x11D31, 0x11D45, LBP_CM },
{ 0x11D46, 0x11D46, LBP_AL },
{ 0x11D47, 0x11D47, LBP_CM },
{ 0x11D50, 0x11D59, LBP_NU },
{ 0x11D60, 0x11D89, LBP_AL },
{ 0x11D8A, 0x11D97, LBP_CM },
{ 0x11D98, 0x11D98, LBP_AL },
{ 0x11DA0, 0x11DA9, LBP_NU },
{ 0x11EE0, 0x11EF2, LBP_AL },
{ 0x11EF3, 0x11EF6, LBP_CM },
{ 0x11EF7, 0x11FDC, LBP_AL },
{ 0x11FDD, 0x11FE0, LBP_PO },
{ 0x11FE1, 0x11FF1, LBP_AL },
{ 0x11FFF, 0x11FFF, LBP_BA },
{ 0x12000, 0x1246E, LBP_AL },
{ 0x12470, 0x12474, LBP_BA },
{ 0x12480, 0x13257, LBP_AL },
@ -2039,7 +2108,11 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1328A, 0x13378, LBP_AL },
{ 0x13379, 0x13379, LBP_OP },
{ 0x1337A, 0x1337B, LBP_CL },
{ 0x1337C, 0x145CD, LBP_AL },
{ 0x1337C, 0x1342E, LBP_AL },
{ 0x13430, 0x13436, LBP_GL },
{ 0x13437, 0x13437, LBP_OP },
{ 0x13438, 0x13438, LBP_CL },
{ 0x14400, 0x145CD, LBP_AL },
{ 0x145CE, 0x145CE, LBP_OP },
{ 0x145CF, 0x145CF, LBP_CL },
{ 0x145D0, 0x16A5E, LBP_AL },
@ -2055,11 +2128,17 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x16B44, 0x16B44, LBP_BA },
{ 0x16B45, 0x16B45, LBP_AL },
{ 0x16B50, 0x16B59, LBP_NU },
{ 0x16B5B, 0x16F50, LBP_AL },
{ 0x16B5B, 0x16E96, LBP_AL },
{ 0x16E97, 0x16E98, LBP_BA },
{ 0x16E99, 0x16F4A, LBP_AL },
{ 0x16F4F, 0x16F4F, LBP_CM },
{ 0x16F50, 0x16F50, LBP_AL },
{ 0x16F51, 0x16F92, LBP_CM },
{ 0x16F93, 0x16F9F, LBP_AL },
{ 0x16FE0, 0x16FE0, LBP_NS },
{ 0x17000, 0x1B001, LBP_ID },
{ 0x16FE0, 0x16FE3, LBP_NS },
{ 0x17000, 0x1B11E, LBP_ID },
{ 0x1B150, 0x1B167, LBP_CJ },
{ 0x1B170, 0x1B2FB, LBP_ID },
{ 0x1BC00, 0x1BC9C, LBP_AL },
{ 0x1BC9D, 0x1BC9E, LBP_CM },
{ 0x1BC9F, 0x1BC9F, LBP_BA },
@ -2088,22 +2167,34 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1DA87, 0x1DA8A, LBP_BA },
{ 0x1DA8B, 0x1DA8B, LBP_AL },
{ 0x1DA9B, 0x1E02A, LBP_CM },
{ 0x1E100, 0x1E12C, LBP_AL },
{ 0x1E130, 0x1E136, LBP_CM },
{ 0x1E137, 0x1E13D, LBP_AL },
{ 0x1E140, 0x1E149, LBP_NU },
{ 0x1E14E, 0x1E2EB, LBP_AL },
{ 0x1E2EC, 0x1E2EF, LBP_CM },
{ 0x1E2F0, 0x1E2F9, LBP_NU },
{ 0x1E2FF, 0x1E2FF, LBP_PR },
{ 0x1E800, 0x1E8CF, LBP_AL },
{ 0x1E8D0, 0x1E8D6, LBP_CM },
{ 0x1E900, 0x1E943, LBP_AL },
{ 0x1E944, 0x1E94A, LBP_CM },
{ 0x1E94B, 0x1E94B, LBP_AL },
{ 0x1E950, 0x1E959, LBP_NU },
{ 0x1E95E, 0x1E95F, LBP_OP },
{ 0x1EE00, 0x1EEF1, LBP_AL },
{ 0x1EC71, 0x1ECAB, LBP_AL },
{ 0x1ECAC, 0x1ECAC, LBP_PO },
{ 0x1ECAD, 0x1ECAF, LBP_AL },
{ 0x1ECB0, 0x1ECB0, LBP_PO },
{ 0x1ECB1, 0x1EEF1, LBP_AL },
{ 0x1F000, 0x1F0FF, LBP_ID },
{ 0x1F100, 0x1F10C, LBP_AI },
{ 0x1F10D, 0x1F10F, LBP_ID },
{ 0x1F110, 0x1F12D, LBP_AI },
{ 0x1F12E, 0x1F12E, LBP_AL },
{ 0x1F12F, 0x1F12F, LBP_ID },
{ 0x1F12E, 0x1F12F, LBP_AL },
{ 0x1F130, 0x1F169, LBP_AI },
{ 0x1F16A, 0x1F16B, LBP_AL },
{ 0x1F16C, 0x1F16F, LBP_ID },
{ 0x1F16A, 0x1F16C, LBP_AL },
{ 0x1F16D, 0x1F16F, LBP_ID },
{ 0x1F170, 0x1F1AC, LBP_AI },
{ 0x1F1AD, 0x1F1E5, LBP_ID },
{ 0x1F1E6, 0x1F1FF, LBP_RI },
@ -2115,29 +2206,31 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F3B5, 0x1F3B6, LBP_AL },
{ 0x1F3B7, 0x1F3BB, LBP_ID },
{ 0x1F3BC, 0x1F3BC, LBP_AL },
{ 0x1F3BD, 0x1F3C2, LBP_ID },
{ 0x1F3C3, 0x1F3C4, LBP_EB },
{ 0x1F3C5, 0x1F3C9, LBP_ID },
{ 0x1F3CA, 0x1F3CB, LBP_EB },
{ 0x1F3CC, 0x1F3FA, LBP_ID },
{ 0x1F3BD, 0x1F3C1, LBP_ID },
{ 0x1F3C2, 0x1F3C4, LBP_EB },
{ 0x1F3C5, 0x1F3C6, LBP_ID },
{ 0x1F3C7, 0x1F3C7, LBP_EB },
{ 0x1F3C8, 0x1F3C9, LBP_ID },
{ 0x1F3CA, 0x1F3CC, LBP_EB },
{ 0x1F3CD, 0x1F3FA, LBP_ID },
{ 0x1F3FB, 0x1F3FF, LBP_EM },
{ 0x1F400, 0x1F441, LBP_ID },
{ 0x1F442, 0x1F443, LBP_EB },
{ 0x1F444, 0x1F445, LBP_ID },
{ 0x1F446, 0x1F450, LBP_EB },
{ 0x1F451, 0x1F465, LBP_ID },
{ 0x1F466, 0x1F469, LBP_EB },
{ 0x1F46A, 0x1F46D, LBP_ID },
{ 0x1F46E, 0x1F46E, LBP_EB },
{ 0x1F46F, 0x1F46F, LBP_ID },
{ 0x1F470, 0x1F478, LBP_EB },
{ 0x1F466, 0x1F478, LBP_EB },
{ 0x1F479, 0x1F47B, LBP_ID },
{ 0x1F47C, 0x1F47C, LBP_EB },
{ 0x1F47D, 0x1F480, LBP_ID },
{ 0x1F481, 0x1F483, LBP_EB },
{ 0x1F484, 0x1F484, LBP_ID },
{ 0x1F485, 0x1F487, LBP_EB },
{ 0x1F488, 0x1F49F, LBP_ID },
{ 0x1F488, 0x1F48E, LBP_ID },
{ 0x1F48F, 0x1F48F, LBP_EB },
{ 0x1F490, 0x1F490, LBP_ID },
{ 0x1F491, 0x1F491, LBP_EB },
{ 0x1F492, 0x1F49F, LBP_ID },
{ 0x1F4A0, 0x1F4A0, LBP_AL },
{ 0x1F4A1, 0x1F4A1, LBP_ID },
{ 0x1F4A2, 0x1F4A2, LBP_AL },
@ -2155,8 +2248,8 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F517, 0x1F524, LBP_AL },
{ 0x1F525, 0x1F531, LBP_ID },
{ 0x1F532, 0x1F549, LBP_AL },
{ 0x1F54A, 0x1F574, LBP_ID },
{ 0x1F575, 0x1F575, LBP_EB },
{ 0x1F54A, 0x1F573, LBP_ID },
{ 0x1F574, 0x1F575, LBP_EB },
{ 0x1F576, 0x1F579, LBP_ID },
{ 0x1F57A, 0x1F57A, LBP_EB },
{ 0x1F57B, 0x1F58F, LBP_ID },
@ -2181,7 +2274,9 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F6B4, 0x1F6B6, LBP_EB },
{ 0x1F6B7, 0x1F6BF, LBP_ID },
{ 0x1F6C0, 0x1F6C0, LBP_EB },
{ 0x1F6C1, 0x1F6FF, LBP_ID },
{ 0x1F6C1, 0x1F6CB, LBP_ID },
{ 0x1F6CC, 0x1F6CC, LBP_EB },
{ 0x1F6CD, 0x1F6FF, LBP_ID },
{ 0x1F700, 0x1F773, LBP_AL },
{ 0x1F774, 0x1F77F, LBP_ID },
{ 0x1F780, 0x1F7D4, LBP_AL },
@ -2195,17 +2290,31 @@ const struct LineBreakProperties lb_prop_default[] = {
{ 0x1F860, 0x1F887, LBP_AL },
{ 0x1F888, 0x1F88F, LBP_ID },
{ 0x1F890, 0x1F8AD, LBP_AL },
{ 0x1F8AE, 0x1F917, LBP_ID },
{ 0x1F918, 0x1F91E, LBP_EB },
{ 0x1F91F, 0x1F925, LBP_ID },
{ 0x1F8AE, 0x1F8FF, LBP_ID },
{ 0x1F900, 0x1F90B, LBP_AL },
{ 0x1F90C, 0x1F90E, LBP_ID },
{ 0x1F90F, 0x1F90F, LBP_EB },
{ 0x1F910, 0x1F917, LBP_ID },
{ 0x1F918, 0x1F91F, LBP_EB },
{ 0x1F920, 0x1F925, LBP_ID },
{ 0x1F926, 0x1F926, LBP_EB },
{ 0x1F927, 0x1F92F, LBP_ID },
{ 0x1F930, 0x1F930, LBP_EB },
{ 0x1F931, 0x1F932, LBP_ID },
{ 0x1F933, 0x1F939, LBP_EB },
{ 0x1F930, 0x1F939, LBP_EB },
{ 0x1F93A, 0x1F93B, LBP_ID },
{ 0x1F93C, 0x1F93E, LBP_EB },
{ 0x1F93F, 0x3FFFD, LBP_ID },
{ 0x1F93F, 0x1F9B4, LBP_ID },
{ 0x1F9B5, 0x1F9B6, LBP_EB },
{ 0x1F9B7, 0x1F9B7, LBP_ID },
{ 0x1F9B8, 0x1F9B9, LBP_EB },
{ 0x1F9BA, 0x1F9BA, LBP_ID },
{ 0x1F9BB, 0x1F9BB, LBP_EB },
{ 0x1F9BC, 0x1F9CC, LBP_ID },
{ 0x1F9CD, 0x1F9CF, LBP_EB },
{ 0x1F9D0, 0x1F9D0, LBP_ID },
{ 0x1F9D1, 0x1F9DD, LBP_EB },
{ 0x1F9DE, 0x1F9FF, LBP_ID },
{ 0x1FA00, 0x1FA53, LBP_AL },
{ 0x1FA54, 0x3FFFD, LBP_ID },
{ 0xE0001, 0xE01EF, LBP_CM },
{ 0xF0000, 0x10FFFD, LBP_XX },
{ 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined }

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -22,20 +22,6 @@
* not be misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source
* distribution.
*
* The main reference is Unicode Standard Annex 14 (UAX #14):
* <URL:http://www.unicode.org/reports/tr14/>
*
* When this library was designed, this annex was at Revision 19, for
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 37, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
*/
/**
@ -66,6 +52,7 @@ static const struct LineBreakProperties lb_prop_German[] = {
{ 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */
{ 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */
{ 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */
{ 0x2019, 0x2019, LBP_GL }, /* Right single quotation mark: glue */
{ 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */
{ 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */
{ 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */

View File

@ -4,7 +4,7 @@
* Line breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2008-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2008-2018 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2013 Petr Filipsky <philodej at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
@ -31,9 +31,9 @@
* Unicode 5.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-19.html>
*
* This library has been updated according to Revision 37, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-37.html>
* This library has been updated according to Revision 43, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr14/tr14-43.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -52,8 +52,8 @@
#include "unibreakdef.h"
/**
* Line break classes. This is a direct mapping of Table 1 of Unicode
* Standard Annex 14, Revision 26.
* Line break classes. This is a mapping of Table 1 of Unicode
* Standard Annex 14.
*/
enum LineBreakClass
{
@ -95,7 +95,7 @@ enum LineBreakClass
LBP_ZWJ, /**< Zero width joiner */
/* The following break class is treated in the pair table, but it is
* not part of Table 2 of UAX #14. */
* not part of Table 2 of UAX #14-37. */
LBP_CB, /**< Contingent break */
/* The following break classes are not treated in the pair table */
@ -117,8 +117,8 @@ enum LineBreakClass
*/
struct LineBreakProperties
{
utf32_t start; /**< Starting coding point */
utf32_t end; /**< End coding point */
utf32_t start; /**< Start codepoint */
utf32_t end; /**< End codepoint, inclusive */
enum LineBreakClass prop; /**< The line breaking property */
};
@ -140,11 +140,14 @@ struct LineBreakPropertiesLang
struct LineBreakContext
{
const char *lang; /**< Language name */
const struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */
const struct LineBreakProperties *lbpLang; /**< Pointer to
LineBreakProperties */
enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */
enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */
enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */
int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
bool fLb8aZwj; /**< Flag for ZWJ (LB8a) */
bool fLb10LeadSpace; /**< Flag for leading space (LB10) */
bool fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */
int cLb30aRI; /**< Count of RI characters (LB30a) */
};

View File

@ -15,6 +15,8 @@ libunibreak_src = [
'graphemebreak.c',
'graphemebreak.h',
'graphemebreakdef.h',
'emojidef.h',
'emojidef.c',
]
libunibreak_lib = static_library('libunibreak',

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2019 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -39,7 +39,7 @@
extern "C" {
#endif
#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */
#define UNIBREAK_VERSION 0x0402 /**< Version of the libunibreak */
extern const int unibreak_version;
#ifndef UNIBREAK_UTF_TYPES_DEFINED

View File

@ -4,7 +4,7 @@
* Break processing in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2015-2016 Wu Yongwei <wuyongwei at gmail dot com>
* Copyright (C) 2015-2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -35,9 +35,19 @@
#ifndef UNIBREAKDEF_H
#define UNIBREAKDEF_H
#if defined(_MSC_VER) && _MSC_VER < 1800
typedef int bool;
#define false 0
#define true 1
#else
#include <stdbool.h>
#endif
#include <stddef.h>
#include "unibreakbase.h"
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -53,8 +54,7 @@
#include "unibreakdef.h"
#include "wordbreak.h"
#include "wordbreakdata.c"
#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0]))
#include "emojidef.h"
/**
* Initializes the wordbreak internals. It currently does nothing, but
@ -215,7 +215,7 @@ static void set_wordbreaks(
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
/* Fall off */
/* Fall through */
case WBP_Newline:
/* WB3a,3b */
@ -225,24 +225,6 @@ static void set_wordbreaks(
posLast = posCur;
break;
case WBP_E_Base_GAZ:
case WBP_Glue_After_Zwj:
/* WB3c */
if (wbcLast == WBP_ZWJ)
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_ZWJ:
case WBP_Extend:
case WBP_Format:
@ -260,8 +242,10 @@ static void set_wordbreaks(
{
/* It's surely not the first */
brks[posCur - 1] = WORDBREAK_NOBREAK;
/* WB3c precedes 4, so no intervening Extend chars allowed. */
if (wbcSeqStart != WBP_ZWJ)
/* WB3c and WB3d precede 4, so no intervening Extend
* chars allowed. */
if (wbcCur != WBP_ZWJ && wbcSeqStart != WBP_ZWJ &&
wbcSeqStart != WBP_WSegSpace)
{
/* "inherit" the previous class. */
wbcCur = wbcLast;
@ -334,7 +318,8 @@ static void set_wordbreaks(
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
/* No break on purpose */
/* Fall through */
case WBP_MidNumLet:
if (((wbcLast == WBP_ALetter) ||
(wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */
@ -421,32 +406,6 @@ static void set_wordbreaks(
posLast = posCur;
break;
case WBP_E_Base:
/* No rule found, reset */
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_E_Modifier:
/* WB14 */
if ((wbcLast == WBP_E_Base) ||
(wbcLast == WBP_E_Base_GAZ))
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
}
/* No rule found, reset */
else
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);
}
wbcSeqStart = wbcCur;
posLast = posCur;
break;
case WBP_Regional_Indicator:
/* WB15,16 */
if ((wbcSeqStart == WBP_Regional_Indicator) &&
@ -481,7 +440,32 @@ static void set_wordbreaks(
}
break;
case WBP_WSegSpace:
if (wbcLast == WBP_WSegSpace) /* WB3d */
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
posLast = posCur;
break;
}
#ifndef __has_attribute
# define __has_attribute(x) 0
#endif
#if __has_attribute(fallthrough)
__attribute__((fallthrough));
#endif
/* Fall through */
case WBP_Any:
/* Check for rule WB3c */
if (wbcLast == WBP_ZWJ && ub_is_extended_pictographic(ch))
{
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_NOBREAK, get_next_char);
posLast = posCur;
break;
}
/* Allow breaks and reset */
set_brks_to(s, brks, posLast, posCur, len,
WORDBREAK_BREAK, get_next_char);

View File

@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -72,4 +73,4 @@ void set_wordbreaks_utf32(
}
#endif
#endif
#endif /* WORDBREAK_H */

View File

@ -1,6 +1,6 @@
/* The content of this file is generated from:
# WordBreakProperty-9.0.0.txt
# Date: 2016-06-01, 10:34:38 GMT
# WordBreakProperty-12.1.0.txt
# Date: 2019-03-10, 10:53:28 GMT
*/
#include "wordbreakdef.h"
@ -9,6 +9,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x000A, 0x000A, WBP_LF},
{0x000B, 0x000C, WBP_Newline},
{0x000D, 0x000D, WBP_CR},
{0x0020, 0x0020, WBP_WSegSpace},
{0x0022, 0x0022, WBP_Double_Quote},
{0x0027, 0x0027, WBP_Single_Quote},
{0x002C, 0x002C, WBP_MidNum},
@ -35,11 +36,15 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0294, 0x0294, WBP_ALetter},
{0x0295, 0x02AF, WBP_ALetter},
{0x02B0, 0x02C1, WBP_ALetter},
{0x02C2, 0x02C5, WBP_ALetter},
{0x02C6, 0x02D1, WBP_ALetter},
{0x02D7, 0x02D7, WBP_MidLetter},
{0x02D2, 0x02D7, WBP_ALetter},
{0x02DE, 0x02DF, WBP_ALetter},
{0x02E0, 0x02E4, WBP_ALetter},
{0x02EC, 0x02EC, WBP_ALetter},
{0x02ED, 0x02ED, WBP_ALetter},
{0x02EE, 0x02EE, WBP_ALetter},
{0x02EF, 0x02FF, WBP_ALetter},
{0x0300, 0x036F, WBP_Extend},
{0x0370, 0x0373, WBP_ALetter},
{0x0374, 0x0374, WBP_ALetter},
@ -60,7 +65,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x048A, 0x052F, WBP_ALetter},
{0x0531, 0x0556, WBP_ALetter},
{0x0559, 0x0559, WBP_ALetter},
{0x0561, 0x0587, WBP_ALetter},
{0x055B, 0x055C, WBP_ALetter},
{0x055E, 0x055E, WBP_ALetter},
{0x0560, 0x0588, WBP_ALetter},
{0x0589, 0x0589, WBP_MidNum},
{0x0591, 0x05BD, WBP_Extend},
{0x05BF, 0x05BF, WBP_Extend},
@ -68,7 +75,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x05C4, 0x05C5, WBP_Extend},
{0x05C7, 0x05C7, WBP_Extend},
{0x05D0, 0x05EA, WBP_Hebrew_Letter},
{0x05F0, 0x05F2, WBP_Hebrew_Letter},
{0x05EF, 0x05F2, WBP_Hebrew_Letter},
{0x05F3, 0x05F3, WBP_ALetter},
{0x05F4, 0x05F4, WBP_MidLetter},
{0x0600, 0x0605, WBP_Format},
@ -110,6 +117,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x07F4, 0x07F5, WBP_ALetter},
{0x07F8, 0x07F8, WBP_MidNum},
{0x07FA, 0x07FA, WBP_ALetter},
{0x07FD, 0x07FD, WBP_Extend},
{0x0800, 0x0815, WBP_ALetter},
{0x0816, 0x0819, WBP_Extend},
{0x081A, 0x081A, WBP_ALetter},
@ -120,9 +128,10 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0829, 0x082D, WBP_Extend},
{0x0840, 0x0858, WBP_ALetter},
{0x0859, 0x085B, WBP_Extend},
{0x0860, 0x086A, WBP_ALetter},
{0x08A0, 0x08B4, WBP_ALetter},
{0x08B6, 0x08BD, WBP_ALetter},
{0x08D4, 0x08E1, WBP_Extend},
{0x08D3, 0x08E1, WBP_Extend},
{0x08E2, 0x08E2, WBP_Format},
{0x08E3, 0x0902, WBP_Extend},
{0x0903, 0x0903, WBP_Extend},
@ -165,6 +174,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x09E2, 0x09E3, WBP_Extend},
{0x09E6, 0x09EF, WBP_Numeric},
{0x09F0, 0x09F1, WBP_ALetter},
{0x09FC, 0x09FC, WBP_ALetter},
{0x09FE, 0x09FE, WBP_Extend},
{0x0A01, 0x0A02, WBP_Extend},
{0x0A03, 0x0A03, WBP_Extend},
{0x0A05, 0x0A0A, WBP_ALetter},
@ -207,6 +218,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0AE2, 0x0AE3, WBP_Extend},
{0x0AE6, 0x0AEF, WBP_Numeric},
{0x0AF9, 0x0AF9, WBP_ALetter},
{0x0AFA, 0x0AFF, WBP_Extend},
{0x0B01, 0x0B01, WBP_Extend},
{0x0B02, 0x0B03, WBP_Extend},
{0x0B05, 0x0B0C, WBP_ALetter},
@ -253,6 +265,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0BE6, 0x0BEF, WBP_Numeric},
{0x0C00, 0x0C00, WBP_Extend},
{0x0C01, 0x0C03, WBP_Extend},
{0x0C04, 0x0C04, WBP_Extend},
{0x0C05, 0x0C0C, WBP_ALetter},
{0x0C0E, 0x0C10, WBP_ALetter},
{0x0C12, 0x0C28, WBP_ALetter},
@ -290,11 +303,12 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0CE2, 0x0CE3, WBP_Extend},
{0x0CE6, 0x0CEF, WBP_Numeric},
{0x0CF1, 0x0CF2, WBP_ALetter},
{0x0D01, 0x0D01, WBP_Extend},
{0x0D00, 0x0D01, WBP_Extend},
{0x0D02, 0x0D03, WBP_Extend},
{0x0D05, 0x0D0C, WBP_ALetter},
{0x0D0E, 0x0D10, WBP_ALetter},
{0x0D12, 0x0D3A, WBP_ALetter},
{0x0D3B, 0x0D3C, WBP_Extend},
{0x0D3D, 0x0D3D, WBP_ALetter},
{0x0D3E, 0x0D40, WBP_Extend},
{0x0D41, 0x0D44, WBP_Extend},
@ -326,8 +340,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x0E47, 0x0E4E, WBP_Extend},
{0x0E50, 0x0E59, WBP_Numeric},
{0x0EB1, 0x0EB1, WBP_Extend},
{0x0EB4, 0x0EB9, WBP_Extend},
{0x0EBB, 0x0EBC, WBP_Extend},
{0x0EB4, 0x0EBC, WBP_Extend},
{0x0EC8, 0x0ECD, WBP_Extend},
{0x0ED0, 0x0ED9, WBP_Numeric},
{0x0F00, 0x0F00, WBP_ALetter},
@ -376,7 +389,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10CD, 0x10CD, WBP_ALetter},
{0x10D0, 0x10FA, WBP_ALetter},
{0x10FC, 0x10FC, WBP_ALetter},
{0x10FD, 0x1248, WBP_ALetter},
{0x10FD, 0x10FF, WBP_ALetter},
{0x1100, 0x1248, WBP_ALetter},
{0x124A, 0x124D, WBP_ALetter},
{0x1250, 0x1256, WBP_ALetter},
{0x1258, 0x1258, WBP_ALetter},
@ -398,6 +412,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x13F8, 0x13FD, WBP_ALetter},
{0x1401, 0x166C, WBP_ALetter},
{0x166F, 0x167F, WBP_ALetter},
{0x1680, 0x1680, WBP_WSegSpace},
{0x1681, 0x169A, WBP_ALetter},
{0x16A0, 0x16EA, WBP_ALetter},
{0x16EE, 0x16F0, WBP_ALetter},
@ -426,7 +441,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1810, 0x1819, WBP_Numeric},
{0x1820, 0x1842, WBP_ALetter},
{0x1843, 0x1843, WBP_ALetter},
{0x1844, 0x1877, WBP_ALetter},
{0x1844, 0x1878, WBP_ALetter},
{0x1880, 0x1884, WBP_ALetter},
{0x1885, 0x1886, WBP_Extend},
{0x1887, 0x18A8, WBP_ALetter},
@ -509,24 +524,27 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1C5A, 0x1C77, WBP_ALetter},
{0x1C78, 0x1C7D, WBP_ALetter},
{0x1C80, 0x1C88, WBP_ALetter},
{0x1C90, 0x1CBA, WBP_ALetter},
{0x1CBD, 0x1CBF, WBP_ALetter},
{0x1CD0, 0x1CD2, WBP_Extend},
{0x1CD4, 0x1CE0, WBP_Extend},
{0x1CE1, 0x1CE1, WBP_Extend},
{0x1CE2, 0x1CE8, WBP_Extend},
{0x1CE9, 0x1CEC, WBP_ALetter},
{0x1CED, 0x1CED, WBP_Extend},
{0x1CEE, 0x1CF1, WBP_ALetter},
{0x1CF2, 0x1CF3, WBP_Extend},
{0x1CEE, 0x1CF3, WBP_ALetter},
{0x1CF4, 0x1CF4, WBP_Extend},
{0x1CF5, 0x1CF6, WBP_ALetter},
{0x1CF7, 0x1CF7, WBP_Extend},
{0x1CF8, 0x1CF9, WBP_Extend},
{0x1CFA, 0x1CFA, WBP_ALetter},
{0x1D00, 0x1D2B, WBP_ALetter},
{0x1D2C, 0x1D6A, WBP_ALetter},
{0x1D6B, 0x1D77, WBP_ALetter},
{0x1D78, 0x1D78, WBP_ALetter},
{0x1D79, 0x1D9A, WBP_ALetter},
{0x1D9B, 0x1DBF, WBP_ALetter},
{0x1DC0, 0x1DF5, WBP_Extend},
{0x1DC0, 0x1DF9, WBP_Extend},
{0x1DFB, 0x1DFF, WBP_Extend},
{0x1E00, 0x1F15, WBP_ALetter},
{0x1F18, 0x1F1D, WBP_ALetter},
@ -547,6 +565,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1FE0, 0x1FEC, WBP_ALetter},
{0x1FF2, 0x1FF4, WBP_ALetter},
{0x1FF6, 0x1FFC, WBP_ALetter},
{0x2000, 0x2006, WBP_WSegSpace},
{0x2008, 0x200A, WBP_WSegSpace},
{0x200C, 0x200C, WBP_Extend},
{0x200D, 0x200D, WBP_ZWJ},
{0x200E, 0x200F, WBP_Format},
@ -561,6 +581,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x203F, 0x2040, WBP_ExtendNumLet},
{0x2044, 0x2044, WBP_MidNum},
{0x2054, 0x2054, WBP_ExtendNumLet},
{0x205F, 0x205F, WBP_WSegSpace},
{0x2060, 0x2064, WBP_Format},
{0x2066, 0x206F, WBP_Format},
{0x2071, 0x2071, WBP_ALetter},
@ -590,10 +611,6 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x2183, 0x2184, WBP_ALetter},
{0x2185, 0x2188, WBP_ALetter},
{0x24B6, 0x24E9, WBP_ALetter},
{0x261D, 0x261D, WBP_E_Base},
{0x26F9, 0x26F9, WBP_E_Base},
{0x270A, 0x270D, WBP_E_Base},
{0x2764, 0x2764, WBP_Glue_After_Zwj},
{0x2C00, 0x2C2E, WBP_ALetter},
{0x2C30, 0x2C5E, WBP_ALetter},
{0x2C60, 0x2C7B, WBP_ALetter},
@ -619,6 +636,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x2DD8, 0x2DDE, WBP_ALetter},
{0x2DE0, 0x2DFF, WBP_Extend},
{0x2E2F, 0x2E2F, WBP_ALetter},
{0x3000, 0x3000, WBP_WSegSpace},
{0x3005, 0x3005, WBP_ALetter},
{0x302A, 0x302D, WBP_Extend},
{0x302E, 0x302F, WBP_Extend},
@ -631,7 +649,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x30A1, 0x30FA, WBP_Katakana},
{0x30FC, 0x30FE, WBP_Katakana},
{0x30FF, 0x30FF, WBP_Katakana},
{0x3105, 0x312D, WBP_ALetter},
{0x3105, 0x312F, WBP_ALetter},
{0x3131, 0x318E, WBP_ALetter},
{0x31A0, 0x31BA, WBP_ALetter},
{0x31F0, 0x31FF, WBP_Katakana},
@ -660,14 +678,16 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA6E6, 0xA6EF, WBP_ALetter},
{0xA6F0, 0xA6F1, WBP_Extend},
{0xA717, 0xA71F, WBP_ALetter},
{0xA720, 0xA721, WBP_ALetter},
{0xA722, 0xA76F, WBP_ALetter},
{0xA770, 0xA770, WBP_ALetter},
{0xA771, 0xA787, WBP_ALetter},
{0xA788, 0xA788, WBP_ALetter},
{0xA789, 0xA78A, WBP_ALetter},
{0xA78B, 0xA78E, WBP_ALetter},
{0xA78F, 0xA78F, WBP_ALetter},
{0xA790, 0xA7AE, WBP_ALetter},
{0xA7B0, 0xA7B7, WBP_ALetter},
{0xA790, 0xA7BF, WBP_ALetter},
{0xA7C2, 0xA7C6, WBP_ALetter},
{0xA7F7, 0xA7F7, WBP_ALetter},
{0xA7F8, 0xA7F9, WBP_ALetter},
{0xA7FA, 0xA7FA, WBP_ALetter},
@ -690,7 +710,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA8E0, 0xA8F1, WBP_Extend},
{0xA8F2, 0xA8F7, WBP_ALetter},
{0xA8FB, 0xA8FB, WBP_ALetter},
{0xA8FD, 0xA8FD, WBP_ALetter},
{0xA8FD, 0xA8FE, WBP_ALetter},
{0xA8FF, 0xA8FF, WBP_Extend},
{0xA900, 0xA909, WBP_Numeric},
{0xA90A, 0xA925, WBP_ALetter},
{0xA926, 0xA92D, WBP_Extend},
@ -705,8 +726,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xA9B4, 0xA9B5, WBP_Extend},
{0xA9B6, 0xA9B9, WBP_Extend},
{0xA9BA, 0xA9BB, WBP_Extend},
{0xA9BC, 0xA9BC, WBP_Extend},
{0xA9BD, 0xA9C0, WBP_Extend},
{0xA9BC, 0xA9BD, WBP_Extend},
{0xA9BE, 0xA9C0, WBP_Extend},
{0xA9CF, 0xA9CF, WBP_ALetter},
{0xA9D0, 0xA9D9, WBP_Numeric},
{0xA9E5, 0xA9E5, WBP_Extend},
@ -745,8 +766,9 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xAB20, 0xAB26, WBP_ALetter},
{0xAB28, 0xAB2E, WBP_ALetter},
{0xAB30, 0xAB5A, WBP_ALetter},
{0xAB5B, 0xAB5B, WBP_ALetter},
{0xAB5C, 0xAB5F, WBP_ALetter},
{0xAB60, 0xAB65, WBP_ALetter},
{0xAB60, 0xAB67, WBP_ALetter},
{0xAB70, 0xABBF, WBP_ALetter},
{0xABC0, 0xABE2, WBP_ALetter},
{0xABE3, 0xABE4, WBP_Extend},
@ -793,6 +815,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0xFF07, 0xFF07, WBP_MidNumLet},
{0xFF0C, 0xFF0C, WBP_MidNum},
{0xFF0E, 0xFF0E, WBP_MidNumLet},
{0xFF10, 0xFF19, WBP_Numeric},
{0xFF1A, 0xFF1A, WBP_MidLetter},
{0xFF1B, 0xFF1B, WBP_MidNum},
{0xFF21, 0xFF3A, WBP_ALetter},
@ -821,7 +844,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x102A0, 0x102D0, WBP_ALetter},
{0x102E0, 0x102E0, WBP_Extend},
{0x10300, 0x1031F, WBP_ALetter},
{0x10330, 0x10340, WBP_ALetter},
{0x1032D, 0x10340, WBP_ALetter},
{0x10341, 0x10341, WBP_ALetter},
{0x10342, 0x10349, WBP_ALetter},
{0x1034A, 0x1034A, WBP_ALetter},
@ -861,7 +884,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10A0C, 0x10A0F, WBP_Extend},
{0x10A10, 0x10A13, WBP_ALetter},
{0x10A15, 0x10A17, WBP_ALetter},
{0x10A19, 0x10A33, WBP_ALetter},
{0x10A19, 0x10A35, WBP_ALetter},
{0x10A38, 0x10A3A, WBP_Extend},
{0x10A3F, 0x10A3F, WBP_Extend},
{0x10A60, 0x10A7C, WBP_ALetter},
@ -876,6 +899,14 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x10C00, 0x10C48, WBP_ALetter},
{0x10C80, 0x10CB2, WBP_ALetter},
{0x10CC0, 0x10CF2, WBP_ALetter},
{0x10D00, 0x10D23, WBP_ALetter},
{0x10D24, 0x10D27, WBP_Extend},
{0x10D30, 0x10D39, WBP_Numeric},
{0x10F00, 0x10F1C, WBP_ALetter},
{0x10F27, 0x10F27, WBP_ALetter},
{0x10F30, 0x10F45, WBP_ALetter},
{0x10F46, 0x10F50, WBP_Extend},
{0x10FE0, 0x10FF6, WBP_ALetter},
{0x11000, 0x11000, WBP_Extend},
{0x11001, 0x11001, WBP_Extend},
{0x11002, 0x11002, WBP_Extend},
@ -890,6 +921,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x110B7, 0x110B8, WBP_Extend},
{0x110B9, 0x110BA, WBP_Extend},
{0x110BD, 0x110BD, WBP_Format},
{0x110CD, 0x110CD, WBP_Format},
{0x110D0, 0x110E8, WBP_ALetter},
{0x110F0, 0x110F9, WBP_Numeric},
{0x11100, 0x11102, WBP_Extend},
@ -898,6 +930,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1112C, 0x1112C, WBP_Extend},
{0x1112D, 0x11134, WBP_Extend},
{0x11136, 0x1113F, WBP_Numeric},
{0x11144, 0x11144, WBP_ALetter},
{0x11145, 0x11146, WBP_Extend},
{0x11150, 0x11172, WBP_ALetter},
{0x11173, 0x11173, WBP_Extend},
{0x11176, 0x11176, WBP_ALetter},
@ -908,7 +942,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x111B6, 0x111BE, WBP_Extend},
{0x111BF, 0x111C0, WBP_Extend},
{0x111C1, 0x111C4, WBP_ALetter},
{0x111CA, 0x111CC, WBP_Extend},
{0x111C9, 0x111CC, WBP_Extend},
{0x111D0, 0x111D9, WBP_Numeric},
{0x111DA, 0x111DA, WBP_ALetter},
{0x111DC, 0x111DC, WBP_ALetter},
@ -939,7 +973,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1132A, 0x11330, WBP_ALetter},
{0x11332, 0x11333, WBP_ALetter},
{0x11335, 0x11339, WBP_ALetter},
{0x1133C, 0x1133C, WBP_Extend},
{0x1133B, 0x1133C, WBP_Extend},
{0x1133D, 0x1133D, WBP_ALetter},
{0x1133E, 0x1133F, WBP_Extend},
{0x11340, 0x11340, WBP_Extend},
@ -961,6 +995,8 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11446, 0x11446, WBP_Extend},
{0x11447, 0x1144A, WBP_ALetter},
{0x11450, 0x11459, WBP_Numeric},
{0x1145E, 0x1145E, WBP_Extend},
{0x1145F, 0x1145F, WBP_ALetter},
{0x11480, 0x114AF, WBP_ALetter},
{0x114B0, 0x114B2, WBP_Extend},
{0x114B3, 0x114B8, WBP_Extend},
@ -999,6 +1035,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x116B0, 0x116B5, WBP_Extend},
{0x116B6, 0x116B6, WBP_Extend},
{0x116B7, 0x116B7, WBP_Extend},
{0x116B8, 0x116B8, WBP_ALetter},
{0x116C0, 0x116C9, WBP_Numeric},
{0x1171D, 0x1171F, WBP_Extend},
{0x11720, 0x11721, WBP_Extend},
@ -1006,9 +1043,41 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11726, 0x11726, WBP_Extend},
{0x11727, 0x1172B, WBP_Extend},
{0x11730, 0x11739, WBP_Numeric},
{0x11800, 0x1182B, WBP_ALetter},
{0x1182C, 0x1182E, WBP_Extend},
{0x1182F, 0x11837, WBP_Extend},
{0x11838, 0x11838, WBP_Extend},
{0x11839, 0x1183A, WBP_Extend},
{0x118A0, 0x118DF, WBP_ALetter},
{0x118E0, 0x118E9, WBP_Numeric},
{0x118FF, 0x118FF, WBP_ALetter},
{0x119A0, 0x119A7, WBP_ALetter},
{0x119AA, 0x119D0, WBP_ALetter},
{0x119D1, 0x119D3, WBP_Extend},
{0x119D4, 0x119D7, WBP_Extend},
{0x119DA, 0x119DB, WBP_Extend},
{0x119DC, 0x119DF, WBP_Extend},
{0x119E0, 0x119E0, WBP_Extend},
{0x119E1, 0x119E1, WBP_ALetter},
{0x119E3, 0x119E3, WBP_ALetter},
{0x119E4, 0x119E4, WBP_Extend},
{0x11A00, 0x11A00, WBP_ALetter},
{0x11A01, 0x11A0A, WBP_Extend},
{0x11A0B, 0x11A32, WBP_ALetter},
{0x11A33, 0x11A38, WBP_Extend},
{0x11A39, 0x11A39, WBP_Extend},
{0x11A3A, 0x11A3A, WBP_ALetter},
{0x11A3B, 0x11A3E, WBP_Extend},
{0x11A47, 0x11A47, WBP_Extend},
{0x11A50, 0x11A50, WBP_ALetter},
{0x11A51, 0x11A56, WBP_Extend},
{0x11A57, 0x11A58, WBP_Extend},
{0x11A59, 0x11A5B, WBP_Extend},
{0x11A5C, 0x11A89, WBP_ALetter},
{0x11A8A, 0x11A96, WBP_Extend},
{0x11A97, 0x11A97, WBP_Extend},
{0x11A98, 0x11A99, WBP_Extend},
{0x11A9D, 0x11A9D, WBP_ALetter},
{0x11AC0, 0x11AF8, WBP_ALetter},
{0x11C00, 0x11C08, WBP_ALetter},
{0x11C0A, 0x11C2E, WBP_ALetter},
@ -1027,10 +1096,35 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x11CB2, 0x11CB3, WBP_Extend},
{0x11CB4, 0x11CB4, WBP_Extend},
{0x11CB5, 0x11CB6, WBP_Extend},
{0x11D00, 0x11D06, WBP_ALetter},
{0x11D08, 0x11D09, WBP_ALetter},
{0x11D0B, 0x11D30, WBP_ALetter},
{0x11D31, 0x11D36, WBP_Extend},
{0x11D3A, 0x11D3A, WBP_Extend},
{0x11D3C, 0x11D3D, WBP_Extend},
{0x11D3F, 0x11D45, WBP_Extend},
{0x11D46, 0x11D46, WBP_ALetter},
{0x11D47, 0x11D47, WBP_Extend},
{0x11D50, 0x11D59, WBP_Numeric},
{0x11D60, 0x11D65, WBP_ALetter},
{0x11D67, 0x11D68, WBP_ALetter},
{0x11D6A, 0x11D89, WBP_ALetter},
{0x11D8A, 0x11D8E, WBP_Extend},
{0x11D90, 0x11D91, WBP_Extend},
{0x11D93, 0x11D94, WBP_Extend},
{0x11D95, 0x11D95, WBP_Extend},
{0x11D96, 0x11D96, WBP_Extend},
{0x11D97, 0x11D97, WBP_Extend},
{0x11D98, 0x11D98, WBP_ALetter},
{0x11DA0, 0x11DA9, WBP_Numeric},
{0x11EE0, 0x11EF2, WBP_ALetter},
{0x11EF3, 0x11EF4, WBP_Extend},
{0x11EF5, 0x11EF6, WBP_Extend},
{0x12000, 0x12399, WBP_ALetter},
{0x12400, 0x1246E, WBP_ALetter},
{0x12480, 0x12543, WBP_ALetter},
{0x13000, 0x1342E, WBP_ALetter},
{0x13430, 0x13438, WBP_Format},
{0x14400, 0x14646, WBP_ALetter},
{0x16800, 0x16A38, WBP_ALetter},
{0x16A40, 0x16A5E, WBP_ALetter},
@ -1043,13 +1137,17 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x16B50, 0x16B59, WBP_Numeric},
{0x16B63, 0x16B77, WBP_ALetter},
{0x16B7D, 0x16B8F, WBP_ALetter},
{0x16F00, 0x16F44, WBP_ALetter},
{0x16E40, 0x16E7F, WBP_ALetter},
{0x16F00, 0x16F4A, WBP_ALetter},
{0x16F4F, 0x16F4F, WBP_Extend},
{0x16F50, 0x16F50, WBP_ALetter},
{0x16F51, 0x16F7E, WBP_Extend},
{0x16F51, 0x16F87, WBP_Extend},
{0x16F8F, 0x16F92, WBP_Extend},
{0x16F93, 0x16F9F, WBP_ALetter},
{0x16FE0, 0x16FE0, WBP_ALetter},
{0x16FE0, 0x16FE1, WBP_ALetter},
{0x16FE3, 0x16FE3, WBP_ALetter},
{0x1B000, 0x1B000, WBP_Katakana},
{0x1B164, 0x1B167, WBP_Katakana},
{0x1BC00, 0x1BC6A, WBP_ALetter},
{0x1BC70, 0x1BC7C, WBP_ALetter},
{0x1BC80, 0x1BC88, WBP_ALetter},
@ -1106,10 +1204,19 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1E01B, 0x1E021, WBP_Extend},
{0x1E023, 0x1E024, WBP_Extend},
{0x1E026, 0x1E02A, WBP_Extend},
{0x1E100, 0x1E12C, WBP_ALetter},
{0x1E130, 0x1E136, WBP_Extend},
{0x1E137, 0x1E13D, WBP_ALetter},
{0x1E140, 0x1E149, WBP_Numeric},
{0x1E14E, 0x1E14E, WBP_ALetter},
{0x1E2C0, 0x1E2EB, WBP_ALetter},
{0x1E2EC, 0x1E2EF, WBP_Extend},
{0x1E2F0, 0x1E2F9, WBP_Numeric},
{0x1E800, 0x1E8C4, WBP_ALetter},
{0x1E8D0, 0x1E8D6, WBP_Extend},
{0x1E900, 0x1E943, WBP_ALetter},
{0x1E944, 0x1E94A, WBP_Extend},
{0x1E94B, 0x1E94B, WBP_ALetter},
{0x1E950, 0x1E959, WBP_Numeric},
{0x1EE00, 0x1EE03, WBP_ALetter},
{0x1EE05, 0x1EE1F, WBP_ALetter},
@ -1148,35 +1255,7 @@ static const struct WordBreakProperties wb_prop_default[] = {
{0x1F150, 0x1F169, WBP_ALetter},
{0x1F170, 0x1F189, WBP_ALetter},
{0x1F1E6, 0x1F1FF, WBP_Regional_Indicator},
{0x1F385, 0x1F385, WBP_E_Base},
{0x1F3C3, 0x1F3C4, WBP_E_Base},
{0x1F3CA, 0x1F3CB, WBP_E_Base},
{0x1F3FB, 0x1F3FF, WBP_E_Modifier},
{0x1F442, 0x1F443, WBP_E_Base},
{0x1F446, 0x1F450, WBP_E_Base},
{0x1F466, 0x1F469, WBP_E_Base_GAZ},
{0x1F46E, 0x1F46E, WBP_E_Base},
{0x1F470, 0x1F478, WBP_E_Base},
{0x1F47C, 0x1F47C, WBP_E_Base},
{0x1F481, 0x1F483, WBP_E_Base},
{0x1F485, 0x1F487, WBP_E_Base},
{0x1F48B, 0x1F48B, WBP_Glue_After_Zwj},
{0x1F4AA, 0x1F4AA, WBP_E_Base},
{0x1F575, 0x1F575, WBP_E_Base},
{0x1F57A, 0x1F57A, WBP_E_Base},
{0x1F590, 0x1F590, WBP_E_Base},
{0x1F595, 0x1F596, WBP_E_Base},
{0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj},
{0x1F645, 0x1F647, WBP_E_Base},
{0x1F64B, 0x1F64F, WBP_E_Base},
{0x1F6A3, 0x1F6A3, WBP_E_Base},
{0x1F6B4, 0x1F6B6, WBP_E_Base},
{0x1F6C0, 0x1F6C0, WBP_E_Base},
{0x1F918, 0x1F91E, WBP_E_Base},
{0x1F926, 0x1F926, WBP_E_Base},
{0x1F930, 0x1F930, WBP_E_Base},
{0x1F933, 0x1F939, WBP_E_Base},
{0x1F93C, 0x1F93E, WBP_E_Base},
{0x1F3FB, 0x1F3FF, WBP_Extend},
{0xE0001, 0xE0001, WBP_Format},
{0xE0020, 0xE007F, WBP_Extend},
{0xE0100, 0xE01EF, WBP_Extend},

View File

@ -4,7 +4,8 @@
* Word breaking in a Unicode sequence. Designed to be used in a
* generic text renderer.
*
* Copyright (C) 2013-16 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
* Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the author be held liable for any damages
@ -30,9 +31,9 @@
* Unicode 6.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
*
* This library has been updated according to Revision 29, for
* Unicode 9.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
* This library has been updated according to Revision 35, for
* Unicode 12.0.0:
* <URL:http://www.unicode.org/reports/tr29/tr29-35.html>
*
* The Unicode Terms of Use are available at
* <URL:http://www.unicode.org/copyright.html>
@ -51,7 +52,7 @@
/**
* Word break classes. This is a direct mapping of Table 3 of Unicode
* Standard Annex 29, Revision 23.
* Standard Annex 29, Revision 35.
*/
enum WordBreakClass
{
@ -73,10 +74,7 @@ enum WordBreakClass
WBP_MidNum,
WBP_Numeric,
WBP_ExtendNumLet,
WBP_E_Base,
WBP_E_Modifier,
WBP_Glue_After_Zwj,
WBP_E_Base_GAZ,
WBP_WSegSpace,
WBP_Any
};
@ -86,7 +84,7 @@ enum WordBreakClass
*/
struct WordBreakProperties
{
utf32_t start; /**< Starting coding point */
utf32_t end; /**< End coding point */
utf32_t start; /**< Start codepoint */
utf32_t end; /**< End codepoint, inclusive */
enum WordBreakClass prop; /**< The word breaking property */
};

View File

@ -1054,6 +1054,10 @@ EFL_START_TEST(evas_textblock_cursor)
pos = evas_textblock_cursor_pos_get(cur);
ck_assert_int_eq(pos, 0);
evas_object_textblock_text_markup_set(tb, "&#x1f3f3;&#xfe0f;&#x200d;&#x1f308;");
evas_textblock_cursor_pos_set(cur, 0);
evas_textblock_cursor_cluster_next(cur);
ck_assert_int_eq(4, evas_textblock_cursor_pos_get(cur));
END_TB_TEST();
}