From 509e3fcc7a65918f6bcf8a664353b45af457cfa6 Mon Sep 17 00:00:00 2001 From: WooHyun Jung Date: Thu, 8 Oct 2020 12:32:53 +0900 Subject: [PATCH] Revert "Revert "evas_textblock: rainbow flag emoji treated as two clusters(update unibreak to version 4.2)"" This reverts commit 173b3a108e1b2093ac37650619a61568aaed4e04. This was reverted because of freezing codes for release. Now, release work was over. So, I think it's ok to restore this. --- src/lib/evas/canvas/evas_object_textblock.c | 4 +- src/static_libs/libunibreak/LICENCE | 8 +- src/static_libs/libunibreak/NEWS | 14 +- src/static_libs/libunibreak/README.md | 16 +- src/static_libs/libunibreak/emojidata.c | 264 ++++++++++++++++++ src/static_libs/libunibreak/emojidef.c | 61 ++++ src/static_libs/libunibreak/emojidef.h | 46 +++ src/static_libs/libunibreak/graphemebreak.c | 88 +++--- src/static_libs/libunibreak/graphemebreak.h | 8 +- .../libunibreak/graphemebreakdata.c | 114 ++++---- .../libunibreak/graphemebreakdef.h | 18 +- src/static_libs/libunibreak/linebreak.c | 66 +++-- src/static_libs/libunibreak/linebreak.h | 8 +- src/static_libs/libunibreak/linebreakdata.c | 225 +++++++++++---- src/static_libs/libunibreak/linebreakdef.c | 17 +- src/static_libs/libunibreak/linebreakdef.h | 25 +- src/static_libs/libunibreak/meson.build | 2 + src/static_libs/libunibreak/unibreakbase.c | 2 +- src/static_libs/libunibreak/unibreakbase.h | 4 +- src/static_libs/libunibreak/unibreakdef.h | 12 +- src/static_libs/libunibreak/wordbreak.c | 92 +++--- src/static_libs/libunibreak/wordbreak.h | 11 +- src/static_libs/libunibreak/wordbreakdata.c | 201 +++++++++---- src/static_libs/libunibreak/wordbreakdef.h | 20 +- src/tests/evas/evas_test_textblock.c | 4 + 25 files changed, 986 insertions(+), 344 deletions(-) create mode 100644 src/static_libs/libunibreak/emojidata.c create mode 100644 src/static_libs/libunibreak/emojidef.c create mode 100644 src/static_libs/libunibreak/emojidef.h diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c index 501e12ba78..77f3d3d07a 100644 --- a/src/lib/evas/canvas/evas_object_textblock.c +++ b/src/lib/evas/canvas/evas_object_textblock.c @@ -10249,7 +10249,7 @@ evas_textblock_cursor_word_start(Efl_Text_Cursor_Handle *cur) if ((cur->pos > 0) && (cur->pos == len)) cur->pos--; - for (i = cur->pos ; _is_white(text[i]) && BREAK_AFTER(i) ; i--) + for (i = cur->pos ; _is_white(text[i]) ; i--) { if (i == 0) { @@ -10316,7 +10316,7 @@ evas_textblock_cursor_word_end(Efl_Text_Cursor_Handle *cur) set_wordbreaks_utf32((const utf32_t *) text, len, lang, breaks); } - for (i = cur->pos; text[i] && _is_white(text[i]) && (BREAK_AFTER(i)) ; i++); + for (i = cur->pos; text[i] && _is_white(text[i]) ; i++); if (i == len) { Evas_Object_Textblock_Node_Text *nnode; diff --git a/src/static_libs/libunibreak/LICENCE b/src/static_libs/libunibreak/LICENCE index 3fba16ad53..6b4137ca21 100644 --- a/src/static_libs/libunibreak/LICENCE +++ b/src/static_libs/libunibreak/LICENCE @@ -1,7 +1,7 @@ -Copyright (C) 2008-2016 Wu Yongwei -Copyright (C) 2012-2016 Tom Hacohen -Copyright (C) 2013 Petr Filipsky -Copyright (C) 2016 Andreas Röver +Copyright (C) Wu Yongwei +Copyright (C) Tom Hacohen +Copyright (C) Petr Filipsky +Copyright (C) Andreas Röver This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages diff --git a/src/static_libs/libunibreak/NEWS b/src/static_libs/libunibreak/NEWS index d217628da8..a2b9e0302e 100644 --- a/src/static_libs/libunibreak/NEWS +++ b/src/static_libs/libunibreak/NEWS @@ -1,3 +1,14 @@ +New in libunibreak 4.2 + +- Update the data to conform to Unicode 12 + +New in libunibreak 4.1 + +- Update the code and data to conform to Unicode 11.0.0, especially + adding support for extended pictographs in word and grapheme breaking +- ZWJ support has been much improved (it was broken) +- Make minor tweaks to the project files + New in libunibreak 4.0 - Update the code and data to conform to Unicode 9.0.0 @@ -22,7 +33,8 @@ New in libunibreak 1.1 New in libunibreak 1.0 - Add word breaking support -- Change the library name to "libunibreak", while keeping maximum compatibility +- Change the library name to "libunibreak", while keeping maximum + compatibility - Add pkg-config support New in liblinebreak 2.1 diff --git a/src/static_libs/libunibreak/README.md b/src/static_libs/libunibreak/README.md index f37fd902aa..4e65059586 100644 --- a/src/static_libs/libunibreak/README.md +++ b/src/static_libs/libunibreak/README.md @@ -6,11 +6,11 @@ Overview This is the README file for libunibreak, an implementation of the line breaking and word breaking algorithms as described in [Unicode Standard -Annex 14] [1] and [Unicode Standard Annex 29] [2]. Check the project's -[home page] [3] for up-to-date information. +Annex 14][1] and [Unicode Standard Annex 29][2]. Check the project's +[home page][3] for up-to-date information. - [1]: http://www.unicode.org/reports/tr14/tr14-37.html - [2]: http://www.unicode.org/reports/tr29/tr29-29.html + [1]: http://www.unicode.org/reports/tr14/ + [2]: http://www.unicode.org/reports/tr29/ [3]: https://github.com/adah1972/libunibreak @@ -21,7 +21,7 @@ This library is released under an open-source licence, the zlib/libpng licence. Please check the file *LICENCE* for details. Apart from using the algorithm, part of the code is derived from the -[Unicode Public Data] [4], and the [Unicode Terms of Use] [5] may apply. +[Unicode Public Data][4], and the [Unicode Terms of Use][5] may apply. [4]: http://www.unicode.org/Public/ [5]: http://www.unicode.org/copyright.html @@ -48,6 +48,8 @@ There are three ways to build the library: *WordBreakProperty.txt*. - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* from *GraphemeBreakProperty.txt*. + - type `make emojidata` to regenerate *emojidata.c* from + *emoji-data.txt*. 2. On systems where GCC and Binutils are supported, one can type @@ -65,6 +67,8 @@ There are three ways to build the library: *WordBreakProperty.txt*. - type `make graphemebreakdata` to regenerate *graphemebreakdata.c* from *GraphemeBreakProperty.txt*. + - type `make emojidata` to regenerate *emojidata.c* from + *emoji-data.txt*. 3. On Windows, apart from using method 1 (Cygwin/MSYS) and method 2 (MinGW), MSVC can also be used. Type @@ -72,7 +76,7 @@ There are three ways to build the library: cd src nmake -f Makefile.msvc - to build the static library. By default the debug release is built. + to build the static library. By default the debug version is built. To build the release version nmake -f Makefile.msvc CFG="libunibreak - Win32 Release" diff --git a/src/static_libs/libunibreak/emojidata.c b/src/static_libs/libunibreak/emojidata.c new file mode 100644 index 0000000000..a78f2678e7 --- /dev/null +++ b/src/static_libs/libunibreak/emojidata.c @@ -0,0 +1,264 @@ +/* The content of this file is generated from: +# emoji-data.txt +# Date: 2019-01-15, 12:10:05 GMT +*/ + +static const struct ExtendedPictograpic ep_prop[] = { + {0x00A9, 0x00A9}, + {0x00AE, 0x00AE}, + {0x203C, 0x203C}, + {0x2049, 0x2049}, + {0x2122, 0x2122}, + {0x2139, 0x2139}, + {0x2194, 0x2199}, + {0x21A9, 0x21AA}, + {0x231A, 0x231B}, + {0x2328, 0x2328}, + {0x2388, 0x2388}, + {0x23CF, 0x23CF}, + {0x23E9, 0x23F3}, + {0x23F8, 0x23FA}, + {0x24C2, 0x24C2}, + {0x25AA, 0x25AB}, + {0x25B6, 0x25B6}, + {0x25C0, 0x25C0}, + {0x25FB, 0x25FE}, + {0x2600, 0x2605}, + {0x2607, 0x2612}, + {0x2614, 0x2615}, + {0x2616, 0x2617}, + {0x2618, 0x2618}, + {0x2619, 0x2619}, + {0x261A, 0x266F}, + {0x2670, 0x2671}, + {0x2672, 0x267D}, + {0x267E, 0x267F}, + {0x2680, 0x2685}, + {0x2690, 0x2691}, + {0x2692, 0x269C}, + {0x269D, 0x269D}, + {0x269E, 0x269F}, + {0x26A0, 0x26A1}, + {0x26A2, 0x26B1}, + {0x26B2, 0x26B2}, + {0x26B3, 0x26BC}, + {0x26BD, 0x26BF}, + {0x26C0, 0x26C3}, + {0x26C4, 0x26CD}, + {0x26CE, 0x26CE}, + {0x26CF, 0x26E1}, + {0x26E2, 0x26E2}, + {0x26E3, 0x26E3}, + {0x26E4, 0x26E7}, + {0x26E8, 0x26FF}, + {0x2700, 0x2700}, + {0x2701, 0x2704}, + {0x2705, 0x2705}, + {0x2708, 0x2709}, + {0x270A, 0x270B}, + {0x270C, 0x2712}, + {0x2714, 0x2714}, + {0x2716, 0x2716}, + {0x271D, 0x271D}, + {0x2721, 0x2721}, + {0x2728, 0x2728}, + {0x2733, 0x2734}, + {0x2744, 0x2744}, + {0x2747, 0x2747}, + {0x274C, 0x274C}, + {0x274E, 0x274E}, + {0x2753, 0x2755}, + {0x2757, 0x2757}, + {0x2763, 0x2767}, + {0x2795, 0x2797}, + {0x27A1, 0x27A1}, + {0x27B0, 0x27B0}, + {0x27BF, 0x27BF}, + {0x2934, 0x2935}, + {0x2B05, 0x2B07}, + {0x2B1B, 0x2B1C}, + {0x2B50, 0x2B50}, + {0x2B55, 0x2B55}, + {0x3030, 0x3030}, + {0x303D, 0x303D}, + {0x3297, 0x3297}, + {0x3299, 0x3299}, + {0x1F000, 0x1F02B}, + {0x1F02C, 0x1F02F}, + {0x1F030, 0x1F093}, + {0x1F094, 0x1F09F}, + {0x1F0A0, 0x1F0AE}, + {0x1F0AF, 0x1F0B0}, + {0x1F0B1, 0x1F0BE}, + {0x1F0BF, 0x1F0BF}, + {0x1F0C0, 0x1F0C0}, + {0x1F0C1, 0x1F0CF}, + {0x1F0D0, 0x1F0D0}, + {0x1F0D1, 0x1F0DF}, + {0x1F0E0, 0x1F0F5}, + {0x1F0F6, 0x1F0FF}, + {0x1F10D, 0x1F10F}, + {0x1F12F, 0x1F12F}, + {0x1F16C, 0x1F16C}, + {0x1F16D, 0x1F16F}, + {0x1F170, 0x1F171}, + {0x1F17E, 0x1F17E}, + {0x1F17F, 0x1F17F}, + {0x1F18E, 0x1F18E}, + {0x1F191, 0x1F19A}, + {0x1F1AD, 0x1F1E5}, + {0x1F201, 0x1F202}, + {0x1F203, 0x1F20F}, + {0x1F21A, 0x1F21A}, + {0x1F22F, 0x1F22F}, + {0x1F232, 0x1F23A}, + {0x1F23C, 0x1F23F}, + {0x1F249, 0x1F24F}, + {0x1F250, 0x1F251}, + {0x1F252, 0x1F25F}, + {0x1F260, 0x1F265}, + {0x1F266, 0x1F2FF}, + {0x1F300, 0x1F320}, + {0x1F321, 0x1F32C}, + {0x1F32D, 0x1F32F}, + {0x1F330, 0x1F335}, + {0x1F336, 0x1F336}, + {0x1F337, 0x1F37C}, + {0x1F37D, 0x1F37D}, + {0x1F37E, 0x1F37F}, + {0x1F380, 0x1F393}, + {0x1F394, 0x1F39F}, + {0x1F3A0, 0x1F3C4}, + {0x1F3C5, 0x1F3C5}, + {0x1F3C6, 0x1F3CA}, + {0x1F3CB, 0x1F3CE}, + {0x1F3CF, 0x1F3D3}, + {0x1F3D4, 0x1F3DF}, + {0x1F3E0, 0x1F3F0}, + {0x1F3F1, 0x1F3F7}, + {0x1F3F8, 0x1F3FA}, + {0x1F400, 0x1F43E}, + {0x1F43F, 0x1F43F}, + {0x1F440, 0x1F440}, + {0x1F441, 0x1F441}, + {0x1F442, 0x1F4F7}, + {0x1F4F8, 0x1F4F8}, + {0x1F4F9, 0x1F4FC}, + {0x1F4FD, 0x1F4FE}, + {0x1F4FF, 0x1F4FF}, + {0x1F500, 0x1F53D}, + {0x1F546, 0x1F54A}, + {0x1F54B, 0x1F54F}, + {0x1F550, 0x1F567}, + {0x1F568, 0x1F579}, + {0x1F57A, 0x1F57A}, + {0x1F57B, 0x1F5A3}, + {0x1F5A4, 0x1F5A4}, + {0x1F5A5, 0x1F5FA}, + {0x1F5FB, 0x1F5FF}, + {0x1F600, 0x1F600}, + {0x1F601, 0x1F610}, + {0x1F611, 0x1F611}, + {0x1F612, 0x1F614}, + {0x1F615, 0x1F615}, + {0x1F616, 0x1F616}, + {0x1F617, 0x1F617}, + {0x1F618, 0x1F618}, + {0x1F619, 0x1F619}, + {0x1F61A, 0x1F61A}, + {0x1F61B, 0x1F61B}, + {0x1F61C, 0x1F61E}, + {0x1F61F, 0x1F61F}, + {0x1F620, 0x1F625}, + {0x1F626, 0x1F627}, + {0x1F628, 0x1F62B}, + {0x1F62C, 0x1F62C}, + {0x1F62D, 0x1F62D}, + {0x1F62E, 0x1F62F}, + {0x1F630, 0x1F633}, + {0x1F634, 0x1F634}, + {0x1F635, 0x1F640}, + {0x1F641, 0x1F642}, + {0x1F643, 0x1F644}, + {0x1F645, 0x1F64F}, + {0x1F680, 0x1F6C5}, + {0x1F6C6, 0x1F6CF}, + {0x1F6D0, 0x1F6D0}, + {0x1F6D1, 0x1F6D2}, + {0x1F6D3, 0x1F6D4}, + {0x1F6D5, 0x1F6D5}, + {0x1F6D6, 0x1F6DF}, + {0x1F6E0, 0x1F6EC}, + {0x1F6ED, 0x1F6EF}, + {0x1F6F0, 0x1F6F3}, + {0x1F6F4, 0x1F6F6}, + {0x1F6F7, 0x1F6F8}, + {0x1F6F9, 0x1F6F9}, + {0x1F6FA, 0x1F6FA}, + {0x1F6FB, 0x1F6FF}, + {0x1F774, 0x1F77F}, + {0x1F7D5, 0x1F7D8}, + {0x1F7D9, 0x1F7DF}, + {0x1F7E0, 0x1F7EB}, + {0x1F7EC, 0x1F7FF}, + {0x1F80C, 0x1F80F}, + {0x1F848, 0x1F84F}, + {0x1F85A, 0x1F85F}, + {0x1F888, 0x1F88F}, + {0x1F8AE, 0x1F8FF}, + {0x1F90C, 0x1F90C}, + {0x1F90D, 0x1F90F}, + {0x1F910, 0x1F918}, + {0x1F919, 0x1F91E}, + {0x1F91F, 0x1F91F}, + {0x1F920, 0x1F927}, + {0x1F928, 0x1F92F}, + {0x1F930, 0x1F930}, + {0x1F931, 0x1F932}, + {0x1F933, 0x1F93A}, + {0x1F93C, 0x1F93E}, + {0x1F93F, 0x1F93F}, + {0x1F940, 0x1F945}, + {0x1F947, 0x1F94B}, + {0x1F94C, 0x1F94C}, + {0x1F94D, 0x1F94F}, + {0x1F950, 0x1F95E}, + {0x1F95F, 0x1F96B}, + {0x1F96C, 0x1F970}, + {0x1F971, 0x1F971}, + {0x1F972, 0x1F972}, + {0x1F973, 0x1F976}, + {0x1F977, 0x1F979}, + {0x1F97A, 0x1F97A}, + {0x1F97B, 0x1F97B}, + {0x1F97C, 0x1F97F}, + {0x1F980, 0x1F984}, + {0x1F985, 0x1F991}, + {0x1F992, 0x1F997}, + {0x1F998, 0x1F9A2}, + {0x1F9A3, 0x1F9A4}, + {0x1F9A5, 0x1F9AA}, + {0x1F9AB, 0x1F9AD}, + {0x1F9AE, 0x1F9AF}, + {0x1F9B0, 0x1F9B9}, + {0x1F9BA, 0x1F9BF}, + {0x1F9C0, 0x1F9C0}, + {0x1F9C1, 0x1F9C2}, + {0x1F9C3, 0x1F9CA}, + {0x1F9CB, 0x1F9CC}, + {0x1F9CD, 0x1F9CF}, + {0x1F9D0, 0x1F9E6}, + {0x1F9E7, 0x1F9FF}, + {0x1FA00, 0x1FA53}, + {0x1FA54, 0x1FA5F}, + {0x1FA60, 0x1FA6D}, + {0x1FA6E, 0x1FA6F}, + {0x1FA70, 0x1FA73}, + {0x1FA74, 0x1FA77}, + {0x1FA78, 0x1FA7A}, + {0x1FA7B, 0x1FA7F}, + {0x1FA80, 0x1FA82}, + {0x1FA83, 0x1FA8F}, + {0x1FA90, 0x1FA95}, + {0x1FA96, 0x1FFFD}, +}; diff --git a/src/static_libs/libunibreak/emojidef.c b/src/static_libs/libunibreak/emojidef.c new file mode 100644 index 0000000000..43a2ed3db0 --- /dev/null +++ b/src/static_libs/libunibreak/emojidef.c @@ -0,0 +1,61 @@ +/* + * Emoji-related routine and data. + * + * Copyright (C) 2018 Andreas Röver + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgement in the product + * documentation would be appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +/** + * @file emojidef.c + * + * Emoji-related routine and data that are used internally. + * + * @author Andreas Röver + */ + +#include "emojidef.h" +#include "emojidata.c" + +/** + * Finds out if a codepoint is extended pictographic. + * + * @param[in] ch character to check + * @return \c true if the codepoint is extended pictographic; + * \c false otherwise + */ +bool ub_is_extended_pictographic(utf32_t ch) +{ + int min = 0; + int max = ARRAY_LEN(ep_prop) - 1; + int mid; + + do + { + mid = (min + max) / 2; + + if (ch < ep_prop[mid].start) + max = mid - 1; + else if (ch > ep_prop[mid].end) + min = mid + 1; + else + return true; + } while (min <= max); + + return false; +} diff --git a/src/static_libs/libunibreak/emojidef.h b/src/static_libs/libunibreak/emojidef.h new file mode 100644 index 0000000000..b9055fd261 --- /dev/null +++ b/src/static_libs/libunibreak/emojidef.h @@ -0,0 +1,46 @@ +/* + * Emoji-related routine and data. + * + * Copyright (C) 2018 Andreas Röver + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the author be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgement in the product + * documentation would be appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +/** + * @file emojidef.h + * + * Definitions of internal data structure and function for extended + * pictographs. + * + * @author Andreas Röver + */ + +#include "unibreakdef.h" + +/** + * Struct for entries of extended pictographic properties. The array of + * the entries \e must be sorted. All codepoints within this list have + * the property of being extended pictographic. + */ +struct ExtendedPictograpic +{ + utf32_t start; /**< Start codepoint */ + utf32_t end; /**< End codepoint, inclusive */ +}; + +bool ub_is_extended_pictographic(utf32_t ch); diff --git a/src/static_libs/libunibreak/graphemebreak.c b/src/static_libs/libunibreak/graphemebreak.c index 77c3d5f55c..401522f12d 100644 --- a/src/static_libs/libunibreak/graphemebreak.c +++ b/src/static_libs/libunibreak/graphemebreak.c @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016 Andreas Röver + * Copyright (C) 2016-2019 Andreas Röver * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,6 +28,10 @@ * Unicode 9.0.0: * * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * + * * The Unicode Terms of Use are available at * */ @@ -38,23 +42,14 @@ * Implementation of the grapheme breaking algorithm as described in Unicode * Standard Annex 29. * - * @author Andreas Roever + * @author Andreas Röver */ -#if defined(_MSC_VER) && _MSC_VER < 1800 -typedef int bool; -#define false 0 -#define true 1 -#else -#include -#endif - #include #include "graphemebreak.h" #include "graphemebreakdata.c" #include "unibreakdef.h" - -#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) +#include "emojidef.h" /** * Initializes the wordbreak internals. It currently does nothing, but @@ -67,8 +62,8 @@ void init_graphemebreak(void) /** * Gets the grapheme breaking class of a character. * - * @param ch character to check - * @return the grapheme breaking class if found; \c GBP_Other otherwise + * @param[in] ch character to check + * @return the grapheme breaking class if found; \c GBP_Other otherwise */ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch) { @@ -93,6 +88,7 @@ static enum GraphemeBreakClass get_char_gb_class(utf32_t ch) /** * Sets the grapheme breaking information for a generic input string. + * It uses the extended grapheme cluster ruleset. * * @param[in] s input string * @param[in] len length of the input @@ -104,7 +100,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, get_next_char_t get_next_char) { size_t posNext = 0; - bool rule10Left = false; // is the left side of rule 10 fulfilled? + int rule11Detector = 0; bool evenRegionalIndicators = true; // is the number of preceeding // GBP_RegionalIndicator characters // even @@ -117,6 +113,47 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, while (true) { + + // this state-machine recognizes the following pattern: + // extended_pictograph Extended* ZWJ + // when that pattern has been detected rule11Detector will be + // 3 and rule 11 can be applied below + switch (current_class) + { + case GBP_ZWJ: + if (rule11Detector == 1 || rule11Detector == 2) + { + rule11Detector = 3; + } + else + { + rule11Detector = 0; + } + break; + + case GBP_Extend: + if (rule11Detector == 1 || rule11Detector == 2) + { + rule11Detector = 2; + } + else + { + rule11Detector = 0; + } + break; + + default: + if (ub_is_extended_pictographic(ch)) + { + rule11Detector = 1; + } + else + { + rule11Detector = 0; + } + break; + } + enum GraphemeBreakClass prev_class = current_class; // safe position if current character so that we can store the @@ -137,16 +174,6 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, // get class of current character current_class = get_char_gb_class(ch); - // update some helper variables - if ((prev_class == GBP_E_Base) || (prev_class == GBP_E_Base_GAZ)) - { - rule10Left = true; - } - else if (prev_class != GBP_Extend) - { - rule10Left = false; - } - if (prev_class == GBP_Regional_Indicator) { evenRegionalIndicators = !evenRegionalIndicators; @@ -185,7 +212,8 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB8 } else if ((current_class == GBP_Extend) || - (current_class == GBP_ZWJ)) + (current_class == GBP_ZWJ) || + (current_class == GBP_Virama)) { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9 } @@ -197,13 +225,7 @@ static void set_graphemebreaks(const void *s, size_t len, char *brks, { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB9b } - else if (rule10Left && (current_class == GBP_E_Modifier)) - { - brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB10 - } - else if ((prev_class == GBP_ZWJ) && - ((current_class == GBP_Glue_After_Zwj) || - (current_class == GBP_E_Base_GAZ))) + else if ((rule11Detector == 3) && ub_is_extended_pictographic(ch)) { brks[brksPos] = GRAPHEMEBREAK_NOBREAK; // Rule: GB11 } diff --git a/src/static_libs/libunibreak/graphemebreak.h b/src/static_libs/libunibreak/graphemebreak.h index c01768233a..e5259b5ccd 100644 --- a/src/static_libs/libunibreak/graphemebreak.h +++ b/src/static_libs/libunibreak/graphemebreak.h @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016 Andreas Röver + * Copyright (C) 2016-2019 Andreas Röver * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,6 +28,10 @@ * Unicode 9.0.0: * * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * + * * The Unicode Terms of Use are available at * */ @@ -66,4 +70,4 @@ void set_graphemebreaks_utf32(const utf32_t *s, size_t len, } #endif -#endif +#endif /* GRAPHEMEBREAK_H */ diff --git a/src/static_libs/libunibreak/graphemebreakdata.c b/src/static_libs/libunibreak/graphemebreakdata.c index cab9bebd80..bc1af932cf 100644 --- a/src/static_libs/libunibreak/graphemebreakdata.c +++ b/src/static_libs/libunibreak/graphemebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# GraphemeBreakProperty-9.0.0.txt -# Date: 2016-06-03, 22:23:55 GMT +# GraphemeBreakProperty-12.1.0.txt +# Date: 2019-03-10, 10:53:12 GMT */ #include "graphemebreakdef.h" @@ -36,12 +36,13 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0730, 0x074A, GBP_Extend}, {0x07A6, 0x07B0, GBP_Extend}, {0x07EB, 0x07F3, GBP_Extend}, + {0x07FD, 0x07FD, GBP_Extend}, {0x0816, 0x0819, GBP_Extend}, {0x081B, 0x0823, GBP_Extend}, {0x0825, 0x0827, GBP_Extend}, {0x0829, 0x082D, GBP_Extend}, {0x0859, 0x085B, GBP_Extend}, - {0x08D4, 0x08E1, GBP_Extend}, + {0x08D3, 0x08E1, GBP_Extend}, {0x08E2, 0x08E2, GBP_Prepend}, {0x08E3, 0x0902, GBP_Extend}, {0x0903, 0x0903, GBP_SpacingMark}, @@ -66,6 +67,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x09CD, 0x09CD, GBP_Extend}, {0x09D7, 0x09D7, GBP_Extend}, {0x09E2, 0x09E3, GBP_Extend}, + {0x09FE, 0x09FE, GBP_Extend}, {0x0A01, 0x0A02, GBP_Extend}, {0x0A03, 0x0A03, GBP_SpacingMark}, {0x0A3C, 0x0A3C, GBP_Extend}, @@ -86,6 +88,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0ACB, 0x0ACC, GBP_SpacingMark}, {0x0ACD, 0x0ACD, GBP_Extend}, {0x0AE2, 0x0AE3, GBP_Extend}, + {0x0AFA, 0x0AFF, GBP_Extend}, {0x0B01, 0x0B01, GBP_Extend}, {0x0B02, 0x0B03, GBP_SpacingMark}, {0x0B3C, 0x0B3C, GBP_Extend}, @@ -110,6 +113,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0BD7, 0x0BD7, GBP_Extend}, {0x0C00, 0x0C00, GBP_Extend}, {0x0C01, 0x0C03, GBP_SpacingMark}, + {0x0C04, 0x0C04, GBP_Extend}, {0x0C3E, 0x0C40, GBP_Extend}, {0x0C41, 0x0C44, GBP_SpacingMark}, {0x0C46, 0x0C48, GBP_Extend}, @@ -130,8 +134,9 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0CCC, 0x0CCD, GBP_Extend}, {0x0CD5, 0x0CD6, GBP_Extend}, {0x0CE2, 0x0CE3, GBP_Extend}, - {0x0D01, 0x0D01, GBP_Extend}, + {0x0D00, 0x0D01, GBP_Extend}, {0x0D02, 0x0D03, GBP_SpacingMark}, + {0x0D3B, 0x0D3C, GBP_Extend}, {0x0D3E, 0x0D3E, GBP_Extend}, {0x0D3F, 0x0D40, GBP_SpacingMark}, {0x0D41, 0x0D44, GBP_Extend}, @@ -156,8 +161,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x0E47, 0x0E4E, GBP_Extend}, {0x0EB1, 0x0EB1, GBP_Extend}, {0x0EB3, 0x0EB3, GBP_SpacingMark}, - {0x0EB4, 0x0EB9, GBP_Extend}, - {0x0EBB, 0x0EBC, GBP_Extend}, + {0x0EB4, 0x0EBC, GBP_Extend}, {0x0EC8, 0x0ECD, GBP_Extend}, {0x0F18, 0x0F19, GBP_Extend}, {0x0F35, 0x0F35, GBP_Extend}, @@ -232,7 +236,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1B00, 0x1B03, GBP_Extend}, {0x1B04, 0x1B04, GBP_SpacingMark}, {0x1B34, 0x1B34, GBP_Extend}, - {0x1B35, 0x1B35, GBP_SpacingMark}, + {0x1B35, 0x1B35, GBP_Extend}, {0x1B36, 0x1B3A, GBP_Extend}, {0x1B3B, 0x1B3B, GBP_SpacingMark}, {0x1B3C, 0x1B3C, GBP_Extend}, @@ -265,10 +269,10 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1CE1, 0x1CE1, GBP_SpacingMark}, {0x1CE2, 0x1CE8, GBP_Extend}, {0x1CED, 0x1CED, GBP_Extend}, - {0x1CF2, 0x1CF3, GBP_SpacingMark}, {0x1CF4, 0x1CF4, GBP_Extend}, + {0x1CF7, 0x1CF7, GBP_SpacingMark}, {0x1CF8, 0x1CF9, GBP_Extend}, - {0x1DC0, 0x1DF5, GBP_Extend}, + {0x1DC0, 0x1DF9, GBP_Extend}, {0x1DFB, 0x1DFF, GBP_Extend}, {0x200B, 0x200B, GBP_Control}, {0x200C, 0x200C, GBP_Extend}, @@ -285,10 +289,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x20E1, 0x20E1, GBP_Extend}, {0x20E2, 0x20E4, GBP_Extend}, {0x20E5, 0x20F0, GBP_Extend}, - {0x261D, 0x261D, GBP_E_Base}, - {0x26F9, 0x26F9, GBP_E_Base}, - {0x270A, 0x270D, GBP_E_Base}, - {0x2764, 0x2764, GBP_Glue_After_Zwj}, {0x2CEF, 0x2CF1, GBP_Extend}, {0x2D7F, 0x2D7F, GBP_Extend}, {0x2DE0, 0x2DFF, GBP_Extend}, @@ -310,6 +310,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xA8B4, 0xA8C3, GBP_SpacingMark}, {0xA8C4, 0xA8C5, GBP_Extend}, {0xA8E0, 0xA8F1, GBP_Extend}, + {0xA8FF, 0xA8FF, GBP_Extend}, {0xA926, 0xA92D, GBP_Extend}, {0xA947, 0xA951, GBP_Extend}, {0xA952, 0xA953, GBP_SpacingMark}, @@ -320,8 +321,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xA9B4, 0xA9B5, GBP_SpacingMark}, {0xA9B6, 0xA9B9, GBP_Extend}, {0xA9BA, 0xA9BB, GBP_SpacingMark}, - {0xA9BC, 0xA9BC, GBP_Extend}, - {0xA9BD, 0xA9C0, GBP_SpacingMark}, + {0xA9BC, 0xA9BD, GBP_Extend}, + {0xA9BE, 0xA9C0, GBP_SpacingMark}, {0xA9E5, 0xA9E5, GBP_Extend}, {0xAA29, 0xAA2E, GBP_Extend}, {0xAA2F, 0xAA30, GBP_SpacingMark}, @@ -1149,7 +1150,6 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0xD789, 0xD7A3, GBP_LVT}, {0xD7B0, 0xD7C6, GBP_V}, {0xD7CB, 0xD7FB, GBP_T}, - {0xD800, 0xDFFF, GBP_Control}, {0xFB1E, 0xFB1E, GBP_Extend}, {0xFE00, 0xFE0F, GBP_Extend}, {0xFE20, 0xFE2F, GBP_Extend}, @@ -1166,6 +1166,8 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x10A38, 0x10A3A, GBP_Extend}, {0x10A3F, 0x10A3F, GBP_Extend}, {0x10AE5, 0x10AE6, GBP_Extend}, + {0x10D24, 0x10D27, GBP_Extend}, + {0x10F46, 0x10F50, GBP_Extend}, {0x11000, 0x11000, GBP_SpacingMark}, {0x11001, 0x11001, GBP_Extend}, {0x11002, 0x11002, GBP_SpacingMark}, @@ -1177,10 +1179,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x110B7, 0x110B8, GBP_SpacingMark}, {0x110B9, 0x110BA, GBP_Extend}, {0x110BD, 0x110BD, GBP_Prepend}, + {0x110CD, 0x110CD, GBP_Prepend}, {0x11100, 0x11102, GBP_Extend}, {0x11127, 0x1112B, GBP_Extend}, {0x1112C, 0x1112C, GBP_SpacingMark}, {0x1112D, 0x11134, GBP_Extend}, + {0x11145, 0x11146, GBP_SpacingMark}, {0x11173, 0x11173, GBP_Extend}, {0x11180, 0x11181, GBP_Extend}, {0x11182, 0x11182, GBP_SpacingMark}, @@ -1188,7 +1192,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x111B6, 0x111BE, GBP_Extend}, {0x111BF, 0x111C0, GBP_SpacingMark}, {0x111C2, 0x111C3, GBP_Prepend}, - {0x111CA, 0x111CC, GBP_Extend}, + {0x111C9, 0x111CC, GBP_Extend}, {0x1122C, 0x1122E, GBP_SpacingMark}, {0x1122F, 0x11231, GBP_Extend}, {0x11232, 0x11233, GBP_SpacingMark}, @@ -1201,7 +1205,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x112E3, 0x112EA, GBP_Extend}, {0x11300, 0x11301, GBP_Extend}, {0x11302, 0x11303, GBP_SpacingMark}, - {0x1133C, 0x1133C, GBP_Extend}, + {0x1133B, 0x1133C, GBP_Extend}, {0x1133E, 0x1133E, GBP_Extend}, {0x1133F, 0x1133F, GBP_SpacingMark}, {0x11340, 0x11340, GBP_Extend}, @@ -1218,6 +1222,7 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11442, 0x11444, GBP_Extend}, {0x11445, 0x11445, GBP_SpacingMark}, {0x11446, 0x11446, GBP_Extend}, + {0x1145E, 0x1145E, GBP_Extend}, {0x114B0, 0x114B0, GBP_Extend}, {0x114B1, 0x114B2, GBP_SpacingMark}, {0x114B3, 0x114B8, GBP_Extend}, @@ -1255,6 +1260,29 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11722, 0x11725, GBP_Extend}, {0x11726, 0x11726, GBP_SpacingMark}, {0x11727, 0x1172B, GBP_Extend}, + {0x1182C, 0x1182E, GBP_SpacingMark}, + {0x1182F, 0x11837, GBP_Extend}, + {0x11838, 0x11838, GBP_SpacingMark}, + {0x11839, 0x1183A, GBP_Extend}, + {0x119D1, 0x119D3, GBP_SpacingMark}, + {0x119D4, 0x119D7, GBP_Extend}, + {0x119DA, 0x119DB, GBP_Extend}, + {0x119DC, 0x119DF, GBP_SpacingMark}, + {0x119E0, 0x119E0, GBP_Extend}, + {0x119E4, 0x119E4, GBP_SpacingMark}, + {0x11A01, 0x11A0A, GBP_Extend}, + {0x11A33, 0x11A38, GBP_Extend}, + {0x11A39, 0x11A39, GBP_SpacingMark}, + {0x11A3A, 0x11A3A, GBP_Prepend}, + {0x11A3B, 0x11A3E, GBP_Extend}, + {0x11A47, 0x11A47, GBP_Extend}, + {0x11A51, 0x11A56, GBP_Extend}, + {0x11A57, 0x11A58, GBP_SpacingMark}, + {0x11A59, 0x11A5B, GBP_Extend}, + {0x11A84, 0x11A89, GBP_Prepend}, + {0x11A8A, 0x11A96, GBP_Extend}, + {0x11A97, 0x11A97, GBP_SpacingMark}, + {0x11A98, 0x11A99, GBP_Extend}, {0x11C2F, 0x11C2F, GBP_SpacingMark}, {0x11C30, 0x11C36, GBP_Extend}, {0x11C38, 0x11C3D, GBP_Extend}, @@ -1267,9 +1295,25 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x11CB2, 0x11CB3, GBP_Extend}, {0x11CB4, 0x11CB4, GBP_SpacingMark}, {0x11CB5, 0x11CB6, GBP_Extend}, + {0x11D31, 0x11D36, GBP_Extend}, + {0x11D3A, 0x11D3A, GBP_Extend}, + {0x11D3C, 0x11D3D, GBP_Extend}, + {0x11D3F, 0x11D45, GBP_Extend}, + {0x11D46, 0x11D46, GBP_Prepend}, + {0x11D47, 0x11D47, GBP_Extend}, + {0x11D8A, 0x11D8E, GBP_SpacingMark}, + {0x11D90, 0x11D91, GBP_Extend}, + {0x11D93, 0x11D94, GBP_SpacingMark}, + {0x11D95, 0x11D95, GBP_Extend}, + {0x11D96, 0x11D96, GBP_SpacingMark}, + {0x11D97, 0x11D97, GBP_Extend}, + {0x11EF3, 0x11EF4, GBP_Extend}, + {0x11EF5, 0x11EF6, GBP_SpacingMark}, + {0x13430, 0x13438, GBP_Control}, {0x16AF0, 0x16AF4, GBP_Extend}, {0x16B30, 0x16B36, GBP_Extend}, - {0x16F51, 0x16F7E, GBP_SpacingMark}, + {0x16F4F, 0x16F4F, GBP_Extend}, + {0x16F51, 0x16F87, GBP_SpacingMark}, {0x16F8F, 0x16F92, GBP_Extend}, {0x1BC9D, 0x1BC9E, GBP_Extend}, {0x1BCA0, 0x1BCA3, GBP_Control}, @@ -1294,38 +1338,12 @@ static const struct GraphemeBreakProperties gb_prop_default[] = { {0x1E01B, 0x1E021, GBP_Extend}, {0x1E023, 0x1E024, GBP_Extend}, {0x1E026, 0x1E02A, GBP_Extend}, + {0x1E130, 0x1E136, GBP_Extend}, + {0x1E2EC, 0x1E2EF, GBP_Extend}, {0x1E8D0, 0x1E8D6, GBP_Extend}, {0x1E944, 0x1E94A, GBP_Extend}, {0x1F1E6, 0x1F1FF, GBP_Regional_Indicator}, - {0x1F385, 0x1F385, GBP_E_Base}, - {0x1F3C3, 0x1F3C4, GBP_E_Base}, - {0x1F3CA, 0x1F3CB, GBP_E_Base}, - {0x1F3FB, 0x1F3FF, GBP_E_Modifier}, - {0x1F442, 0x1F443, GBP_E_Base}, - {0x1F446, 0x1F450, GBP_E_Base}, - {0x1F466, 0x1F469, GBP_E_Base_GAZ}, - {0x1F46E, 0x1F46E, GBP_E_Base}, - {0x1F470, 0x1F478, GBP_E_Base}, - {0x1F47C, 0x1F47C, GBP_E_Base}, - {0x1F481, 0x1F483, GBP_E_Base}, - {0x1F485, 0x1F487, GBP_E_Base}, - {0x1F48B, 0x1F48B, GBP_Glue_After_Zwj}, - {0x1F4AA, 0x1F4AA, GBP_E_Base}, - {0x1F575, 0x1F575, GBP_E_Base}, - {0x1F57A, 0x1F57A, GBP_E_Base}, - {0x1F590, 0x1F590, GBP_E_Base}, - {0x1F595, 0x1F596, GBP_E_Base}, - {0x1F5E8, 0x1F5E8, GBP_Glue_After_Zwj}, - {0x1F645, 0x1F647, GBP_E_Base}, - {0x1F64B, 0x1F64F, GBP_E_Base}, - {0x1F6A3, 0x1F6A3, GBP_E_Base}, - {0x1F6B4, 0x1F6B6, GBP_E_Base}, - {0x1F6C0, 0x1F6C0, GBP_E_Base}, - {0x1F918, 0x1F91E, GBP_E_Base}, - {0x1F926, 0x1F926, GBP_E_Base}, - {0x1F930, 0x1F930, GBP_E_Base}, - {0x1F933, 0x1F939, GBP_E_Base}, - {0x1F93C, 0x1F93E, GBP_E_Base}, + {0x1F3FB, 0x1F3FF, GBP_Extend}, {0xE0000, 0xE0000, GBP_Control}, {0xE0001, 0xE0001, GBP_Control}, {0xE0002, 0xE001F, GBP_Control}, diff --git a/src/static_libs/libunibreak/graphemebreakdef.h b/src/static_libs/libunibreak/graphemebreakdef.h index 0de1f3d623..90ccfbd5f1 100644 --- a/src/static_libs/libunibreak/graphemebreakdef.h +++ b/src/static_libs/libunibreak/graphemebreakdef.h @@ -2,7 +2,7 @@ * Grapheme breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2016 Andreas Röver + * Copyright (C) 2016-2019 Andreas Röver * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -28,6 +28,10 @@ * Unicode 9.0.0: * * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * + * * The Unicode Terms of Use are available at * */ @@ -45,13 +49,15 @@ /** * Word break classes. This is a direct mapping of Table 2 of Unicode - * Standard Annex 29 + * Standard Annex 29. */ enum GraphemeBreakClass { GBP_CR, GBP_LF, GBP_Control, + GBP_Virama, + GBP_LinkingConsonant, GBP_Extend, GBP_ZWJ, GBP_Regional_Indicator, @@ -62,10 +68,6 @@ enum GraphemeBreakClass GBP_T, GBP_LV, GBP_LVT, - GBP_E_Base, - GBP_E_Modifier, - GBP_Glue_After_Zwj, - GBP_E_Base_GAZ, GBP_Other, GBP_Undefined }; @@ -76,7 +78,7 @@ enum GraphemeBreakClass */ struct GraphemeBreakProperties { - utf32_t start; /**< Starting coding point */ - utf32_t end; /**< End coding point, including */ + utf32_t start; /**< Start codepoint */ + utf32_t end; /**< End codepoint, inclusive */ enum GraphemeBreakClass prop; /**< The grapheme breaking property */ }; diff --git a/src/static_libs/libunibreak/linebreak.c b/src/static_libs/libunibreak/linebreak.c index 41f23c1c08..98e2730314 100644 --- a/src/static_libs/libunibreak/linebreak.c +++ b/src/static_libs/libunibreak/linebreak.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2016 Wu Yongwei + * Copyright (C) 2008-2019 Wu Yongwei * Copyright (C) 2013 Petr Filipsky * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 37, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 43, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * @@ -80,7 +80,9 @@ enum BreakAction /** * Break action pair table. This is a direct mapping of Table 2 of - * Unicode Standard Annex 14, Revision 37, except the "CB" part. + * Unicode Standard Annex 14, Revision 37, except for ZWJ (manually + * adjusted after special processing as per LB8a of Revision 41) and CB + * (manually added as per LB20). */ static enum BreakAction baTable[LBP_CB][LBP_CB] = { { /* OP */ @@ -270,17 +272,17 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = { CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* ZWJ */ - DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, - PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - IND_BRK, DIR_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, + IND_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, PRH_BRK, + PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, IND_BRK, + DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, IND_BRK, IND_BRK, IND_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, { /* CB */ DIR_BRK, PRH_BRK, PRH_BRK, IND_BRK, IND_BRK, DIR_BRK, PRH_BRK, PRH_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, PRH_BRK, CMI_BRK, PRH_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, - DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK, DIR_BRK }, + DIR_BRK, DIR_BRK, DIR_BRK, IND_BRK, DIR_BRK }, }; /** @@ -288,8 +290,9 @@ static enum BreakAction baTable[LBP_CB][LBP_CB] = { */ struct LineBreakPropertiesIndex { - utf32_t end; /**< End coding point */ - const struct LineBreakProperties *lbp;/**< Pointer to line breaking properties */ + utf32_t end; /**< End codepoint */ + const struct LineBreakProperties *lbp; /**< Pointer to line breaking + properties */ }; /** @@ -335,7 +338,7 @@ static __inline int ends_with(const char *str, const char *suffix, * Initializes the second-level index to the line breaking properties. * If it is not called, the performance of #get_char_lb_class_lang (and * thus the main functionality) can be pretty bad, especially for big - * code points like those of Chinese. + * codepoints like those of Chinese. */ void init_linebreak(void) { @@ -612,12 +615,18 @@ static int get_lb_result_lookup( break; } + /* Special processing due to rule LB8a */ + if (lbpCtx->fLb8aZwj) + { + brk = LINEBREAK_NOBREAK; + } + /* Special processing due to rule LB21a */ if (lbpCtx->fLb21aHebrew && (lbpCtx->lbcCur == LBP_HY || lbpCtx->lbcCur == LBP_BA)) { brk = LINEBREAK_NOBREAK; - lbpCtx->fLb21aHebrew = 0; + lbpCtx->fLb21aHebrew = false; } else { @@ -663,17 +672,21 @@ void lb_init_break_context( lbpCtx->lbcCur = resolve_lb_class( get_char_lb_class_lang(ch, lbpCtx->lbpLang), lbpCtx->lang); - lbpCtx->fLb21aHebrew = 0; + lbpCtx->fLb8aZwj = + (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_ZWJ); + lbpCtx->fLb10LeadSpace = + (get_char_lb_class_lang(ch, lbpCtx->lbpLang) == LBP_SP); + lbpCtx->fLb21aHebrew = false; lbpCtx->cLb30aRI = 0; treat_first_char(lbpCtx); } /** - * Updates LineBreakingContext for the next code point and returns + * Updates LineBreakingContext for the next codepoint and returns * the detected break. * * @param[in,out] lbpCtx pointer to the line breaking context - * @param[in] ch Unicode code point + * @param[in] ch Unicode codepoint * @return break result, one of #LINEBREAK_MUSTBREAK, * #LINEBREAK_ALLOWBREAK, and #LINEBREAK_NOBREAK * @post the line breaking context is updated @@ -700,6 +713,25 @@ int lb_process_next_char( default: break; } + + /* Special processing due to rule LB8a */ + if (lbpCtx->lbcNew == LBP_ZWJ) + { + lbpCtx->fLb8aZwj = true; + } + else + { + lbpCtx->fLb8aZwj = false; + } + + /* Special processing due to rule LB10 */ + if (lbpCtx->fLb10LeadSpace) + { + if (lbpCtx->lbcNew == LBP_CM || lbpCtx->lbcNew == LBP_ZWJ) + brk = LINEBREAK_ALLOWBREAK; + lbpCtx->fLb10LeadSpace = false; + } + return brk; } diff --git a/src/static_libs/libunibreak/linebreak.h b/src/static_libs/libunibreak/linebreak.h index fd7351191b..fa88094b4b 100644 --- a/src/static_libs/libunibreak/linebreak.h +++ b/src/static_libs/libunibreak/linebreak.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2016 Wu Yongwei + * Copyright (C) 2008-2019 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +30,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 37, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 43, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * diff --git a/src/static_libs/libunibreak/linebreakdata.c b/src/static_libs/libunibreak/linebreakdata.c index c571f2da00..23d9072baa 100644 --- a/src/static_libs/libunibreak/linebreakdata.c +++ b/src/static_libs/libunibreak/linebreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# LineBreak-9.0.0.txt -# Date: 2016-05-26, 01:00:00 GMT [KW, LI] +# LineBreak-12.1.0.txt +# Date: 2019-03-31, 22:04:15 GMT [KW, LI] */ #include "linebreakdef.h" @@ -94,7 +94,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x037E, 0x037E, LBP_IS }, { 0x037F, 0x0482, LBP_AL }, { 0x0483, 0x0489, LBP_CM }, - { 0x048A, 0x0587, LBP_AL }, + { 0x048A, 0x0588, LBP_AL }, { 0x0589, 0x0589, LBP_IS }, { 0x058A, 0x058A, LBP_BA }, { 0x058D, 0x058E, LBP_AL }, @@ -149,7 +149,10 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x07F4, 0x07F7, LBP_AL }, { 0x07F8, 0x07F8, LBP_IS }, { 0x07F9, 0x07F9, LBP_EX }, - { 0x07FA, 0x0815, LBP_AL }, + { 0x07FA, 0x07FA, LBP_AL }, + { 0x07FD, 0x07FD, LBP_CM }, + { 0x07FE, 0x07FF, LBP_PR }, + { 0x0800, 0x0815, LBP_AL }, { 0x0816, 0x0819, LBP_CM }, { 0x081A, 0x081A, LBP_AL }, { 0x081B, 0x0823, LBP_CM }, @@ -160,7 +163,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0830, 0x0858, LBP_AL }, { 0x0859, 0x085B, LBP_CM }, { 0x085E, 0x08BD, LBP_AL }, - { 0x08D4, 0x08E1, LBP_CM }, + { 0x08D3, 0x08E1, LBP_CM }, { 0x08E2, 0x08E2, LBP_AL }, { 0x08E3, 0x0903, LBP_CM }, { 0x0904, 0x0939, LBP_AL }, @@ -190,14 +193,17 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x09F9, 0x09F9, LBP_PO }, { 0x09FA, 0x09FA, LBP_AL }, { 0x09FB, 0x09FB, LBP_PR }, - { 0x0A01, 0x0A03, LBP_CM }, + { 0x09FC, 0x09FD, LBP_AL }, + { 0x09FE, 0x0A03, LBP_CM }, { 0x0A05, 0x0A39, LBP_AL }, { 0x0A3C, 0x0A51, LBP_CM }, { 0x0A59, 0x0A5E, LBP_AL }, { 0x0A66, 0x0A6F, LBP_NU }, { 0x0A70, 0x0A71, LBP_CM }, { 0x0A72, 0x0A74, LBP_AL }, - { 0x0A75, 0x0A83, LBP_CM }, + { 0x0A75, 0x0A75, LBP_CM }, + { 0x0A76, 0x0A76, LBP_AL }, + { 0x0A81, 0x0A83, LBP_CM }, { 0x0A85, 0x0AB9, LBP_AL }, { 0x0ABC, 0x0ABC, LBP_CM }, { 0x0ABD, 0x0ABD, LBP_AL }, @@ -208,7 +214,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0AF0, 0x0AF0, LBP_AL }, { 0x0AF1, 0x0AF1, LBP_PR }, { 0x0AF9, 0x0AF9, LBP_AL }, - { 0x0B01, 0x0B03, LBP_CM }, + { 0x0AFA, 0x0B03, LBP_CM }, { 0x0B05, 0x0B39, LBP_AL }, { 0x0B3C, 0x0B3C, LBP_CM }, { 0x0B3D, 0x0B3D, LBP_AL }, @@ -226,14 +232,16 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0BF0, 0x0BF8, LBP_AL }, { 0x0BF9, 0x0BF9, LBP_PR }, { 0x0BFA, 0x0BFA, LBP_AL }, - { 0x0C00, 0x0C03, LBP_CM }, + { 0x0C00, 0x0C04, LBP_CM }, { 0x0C05, 0x0C3D, LBP_AL }, { 0x0C3E, 0x0C56, LBP_CM }, { 0x0C58, 0x0C61, LBP_AL }, { 0x0C62, 0x0C63, LBP_CM }, { 0x0C66, 0x0C6F, LBP_NU }, + { 0x0C77, 0x0C77, LBP_BB }, { 0x0C78, 0x0C80, LBP_AL }, { 0x0C81, 0x0C83, LBP_CM }, + { 0x0C84, 0x0C84, LBP_BB }, { 0x0C85, 0x0CB9, LBP_AL }, { 0x0CBC, 0x0CBC, LBP_CM }, { 0x0CBD, 0x0CBD, LBP_AL }, @@ -242,8 +250,10 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x0CE2, 0x0CE3, LBP_CM }, { 0x0CE6, 0x0CEF, LBP_NU }, { 0x0CF1, 0x0CF2, LBP_AL }, - { 0x0D01, 0x0D03, LBP_CM }, - { 0x0D05, 0x0D3D, LBP_AL }, + { 0x0D00, 0x0D03, LBP_CM }, + { 0x0D05, 0x0D3A, LBP_AL }, + { 0x0D3B, 0x0D3C, LBP_CM }, + { 0x0D3D, 0x0D3D, LBP_AL }, { 0x0D3E, 0x0D4D, LBP_CM }, { 0x0D4E, 0x0D56, LBP_AL }, { 0x0D57, 0x0D57, LBP_CM }, @@ -417,11 +427,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1CD4, 0x1CE8, LBP_CM }, { 0x1CE9, 0x1CEC, LBP_AL }, { 0x1CED, 0x1CED, LBP_CM }, - { 0x1CEE, 0x1CF1, LBP_AL }, - { 0x1CF2, 0x1CF4, LBP_CM }, + { 0x1CEE, 0x1CF3, LBP_AL }, + { 0x1CF4, 0x1CF4, LBP_CM }, { 0x1CF5, 0x1CF6, LBP_AL }, - { 0x1CF8, 0x1CF9, LBP_CM }, - { 0x1D00, 0x1DBF, LBP_AL }, + { 0x1CF7, 0x1CF9, LBP_CM }, + { 0x1CFA, 0x1DBF, LBP_AL }, { 0x1DC0, 0x1DFF, LBP_CM }, { 0x1E00, 0x1FFC, LBP_AL }, { 0x1FFD, 0x1FFD, LBP_BB }, @@ -430,7 +440,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2007, 0x2007, LBP_GL }, { 0x2008, 0x200A, LBP_BA }, { 0x200B, 0x200B, LBP_ZW }, - { 0x200C, 0x200F, LBP_CM }, + { 0x200C, 0x200C, LBP_CM }, + { 0x200D, 0x200D, LBP_ZWJ }, + { 0x200E, 0x200F, LBP_CM }, { 0x2010, 0x2010, LBP_BA }, { 0x2011, 0x2011, LBP_GL }, { 0x2012, 0x2013, LBP_BA }, @@ -808,7 +820,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x2E3F, 0x2E3F, LBP_AL }, { 0x2E40, 0x2E41, LBP_BA }, { 0x2E42, 0x2E42, LBP_OP }, - { 0x2E43, 0x2E44, LBP_BA }, + { 0x2E43, 0x2E4A, LBP_BA }, + { 0x2E4B, 0x2E4B, LBP_AL }, + { 0x2E4C, 0x2E4C, LBP_BA }, + { 0x2E4D, 0x2E4D, LBP_AL }, + { 0x2E4E, 0x2E4F, LBP_BA }, { 0x2E80, 0x2FFB, LBP_ID }, { 0x3000, 0x3000, LBP_BA }, { 0x3001, 0x3002, LBP_CL }, @@ -942,7 +958,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0xA8E0, 0xA8F1, LBP_CM }, { 0xA8F2, 0xA8FB, LBP_AL }, { 0xA8FC, 0xA8FC, LBP_BB }, - { 0xA8FD, 0xA8FD, LBP_AL }, + { 0xA8FD, 0xA8FE, LBP_AL }, + { 0xA8FF, 0xA8FF, LBP_CM }, { 0xA900, 0xA909, LBP_NU }, { 0xA90A, 0xA925, LBP_AL }, { 0xA926, 0xA92D, LBP_CM }, @@ -1907,9 +1924,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1091F, 0x1091F, LBP_BA }, { 0x10920, 0x10A00, LBP_AL }, { 0x10A01, 0x10A0F, LBP_CM }, - { 0x10A10, 0x10A33, LBP_AL }, + { 0x10A10, 0x10A35, LBP_AL }, { 0x10A38, 0x10A3F, LBP_CM }, - { 0x10A40, 0x10A47, LBP_AL }, + { 0x10A40, 0x10A48, LBP_AL }, { 0x10A50, 0x10A57, LBP_BA }, { 0x10A58, 0x10AE4, LBP_AL }, { 0x10AE5, 0x10AE6, LBP_CM }, @@ -1918,7 +1935,12 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x10AF6, 0x10AF6, LBP_IN }, { 0x10B00, 0x10B35, LBP_AL }, { 0x10B39, 0x10B3F, LBP_BA }, - { 0x10B40, 0x10E7E, LBP_AL }, + { 0x10B40, 0x10D23, LBP_AL }, + { 0x10D24, 0x10D27, LBP_CM }, + { 0x10D30, 0x10D39, LBP_NU }, + { 0x10E60, 0x10F45, LBP_AL }, + { 0x10F46, 0x10F50, LBP_CM }, + { 0x10F51, 0x10FF6, LBP_AL }, { 0x11000, 0x11002, LBP_CM }, { 0x11003, 0x11037, LBP_AL }, { 0x11038, 0x11046, LBP_CM }, @@ -1930,13 +1952,15 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x110B0, 0x110BA, LBP_CM }, { 0x110BB, 0x110BD, LBP_AL }, { 0x110BE, 0x110C1, LBP_BA }, - { 0x110D0, 0x110E8, LBP_AL }, + { 0x110CD, 0x110E8, LBP_AL }, { 0x110F0, 0x110F9, LBP_NU }, { 0x11100, 0x11102, LBP_CM }, { 0x11103, 0x11126, LBP_AL }, { 0x11127, 0x11134, LBP_CM }, { 0x11136, 0x1113F, LBP_NU }, { 0x11140, 0x11143, LBP_BA }, + { 0x11144, 0x11144, LBP_AL }, + { 0x11145, 0x11146, LBP_CM }, { 0x11150, 0x11172, LBP_AL }, { 0x11173, 0x11173, LBP_CM }, { 0x11174, 0x11174, LBP_AL }, @@ -1949,8 +1973,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x111C5, 0x111C6, LBP_BA }, { 0x111C7, 0x111C7, LBP_AL }, { 0x111C8, 0x111C8, LBP_BA }, - { 0x111C9, 0x111C9, LBP_AL }, - { 0x111CA, 0x111CC, LBP_CM }, + { 0x111C9, 0x111CC, LBP_CM }, { 0x111CD, 0x111CD, LBP_AL }, { 0x111D0, 0x111D9, LBP_NU }, { 0x111DA, 0x111DA, LBP_AL }, @@ -1971,7 +1994,7 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x112F0, 0x112F9, LBP_NU }, { 0x11300, 0x11303, LBP_CM }, { 0x11305, 0x11339, LBP_AL }, - { 0x1133C, 0x1133C, LBP_CM }, + { 0x1133B, 0x1133C, LBP_CM }, { 0x1133D, 0x1133D, LBP_AL }, { 0x1133E, 0x1134D, LBP_CM }, { 0x11350, 0x11350, LBP_AL }, @@ -1985,7 +2008,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1144F, 0x1144F, LBP_AL }, { 0x11450, 0x11459, LBP_NU }, { 0x1145B, 0x1145B, LBP_BA }, - { 0x1145D, 0x114AF, LBP_AL }, + { 0x1145D, 0x1145D, LBP_AL }, + { 0x1145E, 0x1145E, LBP_CM }, + { 0x1145F, 0x114AF, LBP_AL }, { 0x114B0, 0x114C3, LBP_CM }, { 0x114C4, 0x114C7, LBP_AL }, { 0x114D0, 0x114D9, LBP_NU }, @@ -2006,15 +2031,44 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11660, 0x1166C, LBP_BB }, { 0x11680, 0x116AA, LBP_AL }, { 0x116AB, 0x116B7, LBP_CM }, + { 0x116B8, 0x116B8, LBP_AL }, { 0x116C0, 0x116C9, LBP_NU }, { 0x11700, 0x1172B, LBP_SA }, { 0x11730, 0x11739, LBP_NU }, { 0x1173A, 0x1173B, LBP_SA }, { 0x1173C, 0x1173E, LBP_BA }, { 0x1173F, 0x1173F, LBP_SA }, - { 0x118A0, 0x118DF, LBP_AL }, + { 0x11800, 0x1182B, LBP_AL }, + { 0x1182C, 0x1183A, LBP_CM }, + { 0x1183B, 0x118DF, LBP_AL }, { 0x118E0, 0x118E9, LBP_NU }, - { 0x118EA, 0x11C2E, LBP_AL }, + { 0x118EA, 0x119D0, LBP_AL }, + { 0x119D1, 0x119E0, LBP_CM }, + { 0x119E1, 0x119E1, LBP_AL }, + { 0x119E2, 0x119E2, LBP_BB }, + { 0x119E3, 0x119E3, LBP_AL }, + { 0x119E4, 0x119E4, LBP_CM }, + { 0x11A00, 0x11A00, LBP_AL }, + { 0x11A01, 0x11A0A, LBP_CM }, + { 0x11A0B, 0x11A32, LBP_AL }, + { 0x11A33, 0x11A39, LBP_CM }, + { 0x11A3A, 0x11A3A, LBP_AL }, + { 0x11A3B, 0x11A3E, LBP_CM }, + { 0x11A3F, 0x11A3F, LBP_BB }, + { 0x11A40, 0x11A40, LBP_AL }, + { 0x11A41, 0x11A44, LBP_BA }, + { 0x11A45, 0x11A45, LBP_BB }, + { 0x11A46, 0x11A46, LBP_AL }, + { 0x11A47, 0x11A47, LBP_CM }, + { 0x11A50, 0x11A50, LBP_AL }, + { 0x11A51, 0x11A5B, LBP_CM }, + { 0x11A5C, 0x11A89, LBP_AL }, + { 0x11A8A, 0x11A99, LBP_CM }, + { 0x11A9A, 0x11A9C, LBP_BA }, + { 0x11A9D, 0x11A9D, LBP_AL }, + { 0x11A9E, 0x11AA0, LBP_BB }, + { 0x11AA1, 0x11AA2, LBP_BA }, + { 0x11AC0, 0x11C2E, LBP_AL }, { 0x11C2F, 0x11C3F, LBP_CM }, { 0x11C40, 0x11C40, LBP_AL }, { 0x11C41, 0x11C45, LBP_BA }, @@ -2024,6 +2078,21 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x11C71, 0x11C71, LBP_EX }, { 0x11C72, 0x11C8F, LBP_AL }, { 0x11C92, 0x11CB6, LBP_CM }, + { 0x11D00, 0x11D30, LBP_AL }, + { 0x11D31, 0x11D45, LBP_CM }, + { 0x11D46, 0x11D46, LBP_AL }, + { 0x11D47, 0x11D47, LBP_CM }, + { 0x11D50, 0x11D59, LBP_NU }, + { 0x11D60, 0x11D89, LBP_AL }, + { 0x11D8A, 0x11D97, LBP_CM }, + { 0x11D98, 0x11D98, LBP_AL }, + { 0x11DA0, 0x11DA9, LBP_NU }, + { 0x11EE0, 0x11EF2, LBP_AL }, + { 0x11EF3, 0x11EF6, LBP_CM }, + { 0x11EF7, 0x11FDC, LBP_AL }, + { 0x11FDD, 0x11FE0, LBP_PO }, + { 0x11FE1, 0x11FF1, LBP_AL }, + { 0x11FFF, 0x11FFF, LBP_BA }, { 0x12000, 0x1246E, LBP_AL }, { 0x12470, 0x12474, LBP_BA }, { 0x12480, 0x13257, LBP_AL }, @@ -2039,7 +2108,11 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1328A, 0x13378, LBP_AL }, { 0x13379, 0x13379, LBP_OP }, { 0x1337A, 0x1337B, LBP_CL }, - { 0x1337C, 0x145CD, LBP_AL }, + { 0x1337C, 0x1342E, LBP_AL }, + { 0x13430, 0x13436, LBP_GL }, + { 0x13437, 0x13437, LBP_OP }, + { 0x13438, 0x13438, LBP_CL }, + { 0x14400, 0x145CD, LBP_AL }, { 0x145CE, 0x145CE, LBP_OP }, { 0x145CF, 0x145CF, LBP_CL }, { 0x145D0, 0x16A5E, LBP_AL }, @@ -2055,11 +2128,17 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x16B44, 0x16B44, LBP_BA }, { 0x16B45, 0x16B45, LBP_AL }, { 0x16B50, 0x16B59, LBP_NU }, - { 0x16B5B, 0x16F50, LBP_AL }, + { 0x16B5B, 0x16E96, LBP_AL }, + { 0x16E97, 0x16E98, LBP_BA }, + { 0x16E99, 0x16F4A, LBP_AL }, + { 0x16F4F, 0x16F4F, LBP_CM }, + { 0x16F50, 0x16F50, LBP_AL }, { 0x16F51, 0x16F92, LBP_CM }, { 0x16F93, 0x16F9F, LBP_AL }, - { 0x16FE0, 0x16FE0, LBP_NS }, - { 0x17000, 0x1B001, LBP_ID }, + { 0x16FE0, 0x16FE3, LBP_NS }, + { 0x17000, 0x1B11E, LBP_ID }, + { 0x1B150, 0x1B167, LBP_CJ }, + { 0x1B170, 0x1B2FB, LBP_ID }, { 0x1BC00, 0x1BC9C, LBP_AL }, { 0x1BC9D, 0x1BC9E, LBP_CM }, { 0x1BC9F, 0x1BC9F, LBP_BA }, @@ -2088,22 +2167,34 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1DA87, 0x1DA8A, LBP_BA }, { 0x1DA8B, 0x1DA8B, LBP_AL }, { 0x1DA9B, 0x1E02A, LBP_CM }, + { 0x1E100, 0x1E12C, LBP_AL }, + { 0x1E130, 0x1E136, LBP_CM }, + { 0x1E137, 0x1E13D, LBP_AL }, + { 0x1E140, 0x1E149, LBP_NU }, + { 0x1E14E, 0x1E2EB, LBP_AL }, + { 0x1E2EC, 0x1E2EF, LBP_CM }, + { 0x1E2F0, 0x1E2F9, LBP_NU }, + { 0x1E2FF, 0x1E2FF, LBP_PR }, { 0x1E800, 0x1E8CF, LBP_AL }, { 0x1E8D0, 0x1E8D6, LBP_CM }, { 0x1E900, 0x1E943, LBP_AL }, { 0x1E944, 0x1E94A, LBP_CM }, + { 0x1E94B, 0x1E94B, LBP_AL }, { 0x1E950, 0x1E959, LBP_NU }, { 0x1E95E, 0x1E95F, LBP_OP }, - { 0x1EE00, 0x1EEF1, LBP_AL }, + { 0x1EC71, 0x1ECAB, LBP_AL }, + { 0x1ECAC, 0x1ECAC, LBP_PO }, + { 0x1ECAD, 0x1ECAF, LBP_AL }, + { 0x1ECB0, 0x1ECB0, LBP_PO }, + { 0x1ECB1, 0x1EEF1, LBP_AL }, { 0x1F000, 0x1F0FF, LBP_ID }, { 0x1F100, 0x1F10C, LBP_AI }, { 0x1F10D, 0x1F10F, LBP_ID }, { 0x1F110, 0x1F12D, LBP_AI }, - { 0x1F12E, 0x1F12E, LBP_AL }, - { 0x1F12F, 0x1F12F, LBP_ID }, + { 0x1F12E, 0x1F12F, LBP_AL }, { 0x1F130, 0x1F169, LBP_AI }, - { 0x1F16A, 0x1F16B, LBP_AL }, - { 0x1F16C, 0x1F16F, LBP_ID }, + { 0x1F16A, 0x1F16C, LBP_AL }, + { 0x1F16D, 0x1F16F, LBP_ID }, { 0x1F170, 0x1F1AC, LBP_AI }, { 0x1F1AD, 0x1F1E5, LBP_ID }, { 0x1F1E6, 0x1F1FF, LBP_RI }, @@ -2115,29 +2206,31 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F3B5, 0x1F3B6, LBP_AL }, { 0x1F3B7, 0x1F3BB, LBP_ID }, { 0x1F3BC, 0x1F3BC, LBP_AL }, - { 0x1F3BD, 0x1F3C2, LBP_ID }, - { 0x1F3C3, 0x1F3C4, LBP_EB }, - { 0x1F3C5, 0x1F3C9, LBP_ID }, - { 0x1F3CA, 0x1F3CB, LBP_EB }, - { 0x1F3CC, 0x1F3FA, LBP_ID }, + { 0x1F3BD, 0x1F3C1, LBP_ID }, + { 0x1F3C2, 0x1F3C4, LBP_EB }, + { 0x1F3C5, 0x1F3C6, LBP_ID }, + { 0x1F3C7, 0x1F3C7, LBP_EB }, + { 0x1F3C8, 0x1F3C9, LBP_ID }, + { 0x1F3CA, 0x1F3CC, LBP_EB }, + { 0x1F3CD, 0x1F3FA, LBP_ID }, { 0x1F3FB, 0x1F3FF, LBP_EM }, { 0x1F400, 0x1F441, LBP_ID }, { 0x1F442, 0x1F443, LBP_EB }, { 0x1F444, 0x1F445, LBP_ID }, { 0x1F446, 0x1F450, LBP_EB }, { 0x1F451, 0x1F465, LBP_ID }, - { 0x1F466, 0x1F469, LBP_EB }, - { 0x1F46A, 0x1F46D, LBP_ID }, - { 0x1F46E, 0x1F46E, LBP_EB }, - { 0x1F46F, 0x1F46F, LBP_ID }, - { 0x1F470, 0x1F478, LBP_EB }, + { 0x1F466, 0x1F478, LBP_EB }, { 0x1F479, 0x1F47B, LBP_ID }, { 0x1F47C, 0x1F47C, LBP_EB }, { 0x1F47D, 0x1F480, LBP_ID }, { 0x1F481, 0x1F483, LBP_EB }, { 0x1F484, 0x1F484, LBP_ID }, { 0x1F485, 0x1F487, LBP_EB }, - { 0x1F488, 0x1F49F, LBP_ID }, + { 0x1F488, 0x1F48E, LBP_ID }, + { 0x1F48F, 0x1F48F, LBP_EB }, + { 0x1F490, 0x1F490, LBP_ID }, + { 0x1F491, 0x1F491, LBP_EB }, + { 0x1F492, 0x1F49F, LBP_ID }, { 0x1F4A0, 0x1F4A0, LBP_AL }, { 0x1F4A1, 0x1F4A1, LBP_ID }, { 0x1F4A2, 0x1F4A2, LBP_AL }, @@ -2155,8 +2248,8 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F517, 0x1F524, LBP_AL }, { 0x1F525, 0x1F531, LBP_ID }, { 0x1F532, 0x1F549, LBP_AL }, - { 0x1F54A, 0x1F574, LBP_ID }, - { 0x1F575, 0x1F575, LBP_EB }, + { 0x1F54A, 0x1F573, LBP_ID }, + { 0x1F574, 0x1F575, LBP_EB }, { 0x1F576, 0x1F579, LBP_ID }, { 0x1F57A, 0x1F57A, LBP_EB }, { 0x1F57B, 0x1F58F, LBP_ID }, @@ -2181,7 +2274,9 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F6B4, 0x1F6B6, LBP_EB }, { 0x1F6B7, 0x1F6BF, LBP_ID }, { 0x1F6C0, 0x1F6C0, LBP_EB }, - { 0x1F6C1, 0x1F6FF, LBP_ID }, + { 0x1F6C1, 0x1F6CB, LBP_ID }, + { 0x1F6CC, 0x1F6CC, LBP_EB }, + { 0x1F6CD, 0x1F6FF, LBP_ID }, { 0x1F700, 0x1F773, LBP_AL }, { 0x1F774, 0x1F77F, LBP_ID }, { 0x1F780, 0x1F7D4, LBP_AL }, @@ -2195,17 +2290,31 @@ const struct LineBreakProperties lb_prop_default[] = { { 0x1F860, 0x1F887, LBP_AL }, { 0x1F888, 0x1F88F, LBP_ID }, { 0x1F890, 0x1F8AD, LBP_AL }, - { 0x1F8AE, 0x1F917, LBP_ID }, - { 0x1F918, 0x1F91E, LBP_EB }, - { 0x1F91F, 0x1F925, LBP_ID }, + { 0x1F8AE, 0x1F8FF, LBP_ID }, + { 0x1F900, 0x1F90B, LBP_AL }, + { 0x1F90C, 0x1F90E, LBP_ID }, + { 0x1F90F, 0x1F90F, LBP_EB }, + { 0x1F910, 0x1F917, LBP_ID }, + { 0x1F918, 0x1F91F, LBP_EB }, + { 0x1F920, 0x1F925, LBP_ID }, { 0x1F926, 0x1F926, LBP_EB }, { 0x1F927, 0x1F92F, LBP_ID }, - { 0x1F930, 0x1F930, LBP_EB }, - { 0x1F931, 0x1F932, LBP_ID }, - { 0x1F933, 0x1F939, LBP_EB }, + { 0x1F930, 0x1F939, LBP_EB }, { 0x1F93A, 0x1F93B, LBP_ID }, { 0x1F93C, 0x1F93E, LBP_EB }, - { 0x1F93F, 0x3FFFD, LBP_ID }, + { 0x1F93F, 0x1F9B4, LBP_ID }, + { 0x1F9B5, 0x1F9B6, LBP_EB }, + { 0x1F9B7, 0x1F9B7, LBP_ID }, + { 0x1F9B8, 0x1F9B9, LBP_EB }, + { 0x1F9BA, 0x1F9BA, LBP_ID }, + { 0x1F9BB, 0x1F9BB, LBP_EB }, + { 0x1F9BC, 0x1F9CC, LBP_ID }, + { 0x1F9CD, 0x1F9CF, LBP_EB }, + { 0x1F9D0, 0x1F9D0, LBP_ID }, + { 0x1F9D1, 0x1F9DD, LBP_EB }, + { 0x1F9DE, 0x1F9FF, LBP_ID }, + { 0x1FA00, 0x1FA53, LBP_AL }, + { 0x1FA54, 0x3FFFD, LBP_ID }, { 0xE0001, 0xE01EF, LBP_CM }, { 0xF0000, 0x10FFFD, LBP_XX }, { 0xFFFFFFFF, 0xFFFFFFFF, LBP_Undefined } diff --git a/src/static_libs/libunibreak/linebreakdef.c b/src/static_libs/libunibreak/linebreakdef.c index 6b485cecbd..847621ed95 100644 --- a/src/static_libs/libunibreak/linebreakdef.c +++ b/src/static_libs/libunibreak/linebreakdef.c @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2016 Wu Yongwei + * Copyright (C) 2008-2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -22,20 +22,6 @@ * not be misrepresented as being the original software. * 3. This notice may not be removed or altered from any source * distribution. - * - * The main reference is Unicode Standard Annex 14 (UAX #14): - * - * - * When this library was designed, this annex was at Revision 19, for - * Unicode 5.0.0: - * - * - * This library has been updated according to Revision 37, for - * Unicode 9.0.0: - * - * - * The Unicode Terms of Use are available at - * */ /** @@ -66,6 +52,7 @@ static const struct LineBreakProperties lb_prop_German[] = { { 0x00AB, 0x00AB, LBP_CL }, /* Left double angle quotation mark: closing */ { 0x00BB, 0x00BB, LBP_OP }, /* Right double angle quotation mark: opening */ { 0x2018, 0x2018, LBP_CL }, /* Left single quotation mark: closing */ + { 0x2019, 0x2019, LBP_GL }, /* Right single quotation mark: glue */ { 0x201C, 0x201C, LBP_CL }, /* Left double quotation mark: closing */ { 0x2039, 0x2039, LBP_CL }, /* Left single angle quotation mark: closing */ { 0x203A, 0x203A, LBP_OP }, /* Right single angle quotation mark: opening */ diff --git a/src/static_libs/libunibreak/linebreakdef.h b/src/static_libs/libunibreak/linebreakdef.h index 37ec7b546e..48d714ef29 100644 --- a/src/static_libs/libunibreak/linebreakdef.h +++ b/src/static_libs/libunibreak/linebreakdef.h @@ -4,7 +4,7 @@ * Line breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2008-2016 Wu Yongwei + * Copyright (C) 2008-2018 Wu Yongwei * Copyright (C) 2013 Petr Filipsky * * This software is provided 'as-is', without any express or implied @@ -31,9 +31,9 @@ * Unicode 5.0.0: * * - * This library has been updated according to Revision 37, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 43, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * @@ -52,8 +52,8 @@ #include "unibreakdef.h" /** - * Line break classes. This is a direct mapping of Table 1 of Unicode - * Standard Annex 14, Revision 26. + * Line break classes. This is a mapping of Table 1 of Unicode + * Standard Annex 14. */ enum LineBreakClass { @@ -95,7 +95,7 @@ enum LineBreakClass LBP_ZWJ, /**< Zero width joiner */ /* The following break class is treated in the pair table, but it is - * not part of Table 2 of UAX #14. */ + * not part of Table 2 of UAX #14-37. */ LBP_CB, /**< Contingent break */ /* The following break classes are not treated in the pair table */ @@ -117,8 +117,8 @@ enum LineBreakClass */ struct LineBreakProperties { - utf32_t start; /**< Starting coding point */ - utf32_t end; /**< End coding point */ + utf32_t start; /**< Start codepoint */ + utf32_t end; /**< End codepoint, inclusive */ enum LineBreakClass prop; /**< The line breaking property */ }; @@ -140,11 +140,14 @@ struct LineBreakPropertiesLang struct LineBreakContext { const char *lang; /**< Language name */ - const struct LineBreakProperties *lbpLang;/**< Pointer to LineBreakProperties */ + const struct LineBreakProperties *lbpLang; /**< Pointer to + LineBreakProperties */ enum LineBreakClass lbcCur; /**< Breaking class of current codepoint */ enum LineBreakClass lbcNew; /**< Breaking class of next codepoint */ enum LineBreakClass lbcLast; /**< Breaking class of last codepoint */ - int fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ + bool fLb8aZwj; /**< Flag for ZWJ (LB8a) */ + bool fLb10LeadSpace; /**< Flag for leading space (LB10) */ + bool fLb21aHebrew; /**< Flag for Hebrew letters (LB21a) */ int cLb30aRI; /**< Count of RI characters (LB30a) */ }; diff --git a/src/static_libs/libunibreak/meson.build b/src/static_libs/libunibreak/meson.build index a2d5c3e3f9..cca9c1f6a0 100644 --- a/src/static_libs/libunibreak/meson.build +++ b/src/static_libs/libunibreak/meson.build @@ -15,6 +15,8 @@ libunibreak_src = [ 'graphemebreak.c', 'graphemebreak.h', 'graphemebreakdef.h', + 'emojidef.h', + 'emojidef.c', ] libunibreak_lib = static_library('libunibreak', diff --git a/src/static_libs/libunibreak/unibreakbase.c b/src/static_libs/libunibreak/unibreakbase.c index 686852a990..ef24c90047 100644 --- a/src/static_libs/libunibreak/unibreakbase.c +++ b/src/static_libs/libunibreak/unibreakbase.c @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2016 Wu Yongwei + * Copyright (C) 2015-2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages diff --git a/src/static_libs/libunibreak/unibreakbase.h b/src/static_libs/libunibreak/unibreakbase.h index ff9a6ce8a9..a00a5bdb6b 100644 --- a/src/static_libs/libunibreak/unibreakbase.h +++ b/src/static_libs/libunibreak/unibreakbase.h @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2016 Wu Yongwei + * Copyright (C) 2015-2019 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -39,7 +39,7 @@ extern "C" { #endif -#define UNIBREAK_VERSION 0x0400 /**< Version of the library linebreak */ +#define UNIBREAK_VERSION 0x0402 /**< Version of the libunibreak */ extern const int unibreak_version; #ifndef UNIBREAK_UTF_TYPES_DEFINED diff --git a/src/static_libs/libunibreak/unibreakdef.h b/src/static_libs/libunibreak/unibreakdef.h index e13016d8cd..5f3533e5dd 100644 --- a/src/static_libs/libunibreak/unibreakdef.h +++ b/src/static_libs/libunibreak/unibreakdef.h @@ -4,7 +4,7 @@ * Break processing in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2015-2016 Wu Yongwei + * Copyright (C) 2015-2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -35,9 +35,19 @@ #ifndef UNIBREAKDEF_H #define UNIBREAKDEF_H +#if defined(_MSC_VER) && _MSC_VER < 1800 +typedef int bool; +#define false 0 +#define true 1 +#else +#include +#endif + #include #include "unibreakbase.h" +#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) + #ifdef __cplusplus extern "C" { #endif diff --git a/src/static_libs/libunibreak/wordbreak.c b/src/static_libs/libunibreak/wordbreak.c index 50c830c7cc..d4e22495c6 100644 --- a/src/static_libs/libunibreak/wordbreak.c +++ b/src/static_libs/libunibreak/wordbreak.c @@ -4,7 +4,8 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-2016 Tom Hacohen + * Copyright (C) 2013-2019 Tom Hacohen + * Copyright (C) 2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +31,9 @@ * Unicode 6.0.0: * * - * This library has been updated according to Revision 29, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * @@ -53,8 +54,7 @@ #include "unibreakdef.h" #include "wordbreak.h" #include "wordbreakdata.c" - -#define ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) +#include "emojidef.h" /** * Initializes the wordbreak internals. It currently does nothing, but @@ -215,7 +215,7 @@ static void set_wordbreaks( #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif - /* Fall off */ + /* Fall through */ case WBP_Newline: /* WB3a,3b */ @@ -225,24 +225,6 @@ static void set_wordbreaks( posLast = posCur; break; - case WBP_E_Base_GAZ: - case WBP_Glue_After_Zwj: - /* WB3c */ - if (wbcLast == WBP_ZWJ) - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_NOBREAK, get_next_char); - } - /* No rule found, reset */ - else - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_BREAK, get_next_char); - } - wbcSeqStart = wbcCur; - posLast = posCur; - break; - case WBP_ZWJ: case WBP_Extend: case WBP_Format: @@ -260,8 +242,10 @@ static void set_wordbreaks( { /* It's surely not the first */ brks[posCur - 1] = WORDBREAK_NOBREAK; - /* WB3c precedes 4, so no intervening Extend chars allowed. */ - if (wbcSeqStart != WBP_ZWJ) + /* WB3c and WB3d precede 4, so no intervening Extend + * chars allowed. */ + if (wbcCur != WBP_ZWJ && wbcSeqStart != WBP_ZWJ && + wbcSeqStart != WBP_WSegSpace) { /* "inherit" the previous class. */ wbcCur = wbcLast; @@ -334,7 +318,8 @@ static void set_wordbreaks( #if __has_attribute(fallthrough) __attribute__((fallthrough)); #endif - /* No break on purpose */ + /* Fall through */ + case WBP_MidNumLet: if (((wbcLast == WBP_ALetter) || (wbcLast == WBP_Hebrew_Letter)) || /* WB6,7 */ @@ -421,32 +406,6 @@ static void set_wordbreaks( posLast = posCur; break; - case WBP_E_Base: - /* No rule found, reset */ - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_BREAK, get_next_char); - wbcSeqStart = wbcCur; - posLast = posCur; - break; - - case WBP_E_Modifier: - /* WB14 */ - if ((wbcLast == WBP_E_Base) || - (wbcLast == WBP_E_Base_GAZ)) - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_NOBREAK, get_next_char); - } - /* No rule found, reset */ - else - { - set_brks_to(s, brks, posLast, posCur, len, - WORDBREAK_BREAK, get_next_char); - } - wbcSeqStart = wbcCur; - posLast = posCur; - break; - case WBP_Regional_Indicator: /* WB15,16 */ if ((wbcSeqStart == WBP_Regional_Indicator) && @@ -481,7 +440,32 @@ static void set_wordbreaks( } break; + case WBP_WSegSpace: + if (wbcLast == WBP_WSegSpace) /* WB3d */ + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + posLast = posCur; + break; + } +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif +#if __has_attribute(fallthrough) + __attribute__((fallthrough)); +#endif + /* Fall through */ + case WBP_Any: + /* Check for rule WB3c */ + if (wbcLast == WBP_ZWJ && ub_is_extended_pictographic(ch)) + { + set_brks_to(s, brks, posLast, posCur, len, + WORDBREAK_NOBREAK, get_next_char); + posLast = posCur; + break; + } + /* Allow breaks and reset */ set_brks_to(s, brks, posLast, posCur, len, WORDBREAK_BREAK, get_next_char); diff --git a/src/static_libs/libunibreak/wordbreak.h b/src/static_libs/libunibreak/wordbreak.h index 1040c13280..021de4d751 100644 --- a/src/static_libs/libunibreak/wordbreak.h +++ b/src/static_libs/libunibreak/wordbreak.h @@ -4,7 +4,8 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-2016 Tom Hacohen + * Copyright (C) 2013-2019 Tom Hacohen + * Copyright (C) 2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +31,9 @@ * Unicode 6.0.0: * * - * This library has been updated according to Revision 29, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * @@ -72,4 +73,4 @@ void set_wordbreaks_utf32( } #endif -#endif +#endif /* WORDBREAK_H */ diff --git a/src/static_libs/libunibreak/wordbreakdata.c b/src/static_libs/libunibreak/wordbreakdata.c index 99fcff5bad..f5ee889589 100644 --- a/src/static_libs/libunibreak/wordbreakdata.c +++ b/src/static_libs/libunibreak/wordbreakdata.c @@ -1,6 +1,6 @@ /* The content of this file is generated from: -# WordBreakProperty-9.0.0.txt -# Date: 2016-06-01, 10:34:38 GMT +# WordBreakProperty-12.1.0.txt +# Date: 2019-03-10, 10:53:28 GMT */ #include "wordbreakdef.h" @@ -9,6 +9,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x000A, 0x000A, WBP_LF}, {0x000B, 0x000C, WBP_Newline}, {0x000D, 0x000D, WBP_CR}, + {0x0020, 0x0020, WBP_WSegSpace}, {0x0022, 0x0022, WBP_Double_Quote}, {0x0027, 0x0027, WBP_Single_Quote}, {0x002C, 0x002C, WBP_MidNum}, @@ -35,11 +36,15 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0294, 0x0294, WBP_ALetter}, {0x0295, 0x02AF, WBP_ALetter}, {0x02B0, 0x02C1, WBP_ALetter}, + {0x02C2, 0x02C5, WBP_ALetter}, {0x02C6, 0x02D1, WBP_ALetter}, - {0x02D7, 0x02D7, WBP_MidLetter}, + {0x02D2, 0x02D7, WBP_ALetter}, + {0x02DE, 0x02DF, WBP_ALetter}, {0x02E0, 0x02E4, WBP_ALetter}, {0x02EC, 0x02EC, WBP_ALetter}, + {0x02ED, 0x02ED, WBP_ALetter}, {0x02EE, 0x02EE, WBP_ALetter}, + {0x02EF, 0x02FF, WBP_ALetter}, {0x0300, 0x036F, WBP_Extend}, {0x0370, 0x0373, WBP_ALetter}, {0x0374, 0x0374, WBP_ALetter}, @@ -60,7 +65,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x048A, 0x052F, WBP_ALetter}, {0x0531, 0x0556, WBP_ALetter}, {0x0559, 0x0559, WBP_ALetter}, - {0x0561, 0x0587, WBP_ALetter}, + {0x055B, 0x055C, WBP_ALetter}, + {0x055E, 0x055E, WBP_ALetter}, + {0x0560, 0x0588, WBP_ALetter}, {0x0589, 0x0589, WBP_MidNum}, {0x0591, 0x05BD, WBP_Extend}, {0x05BF, 0x05BF, WBP_Extend}, @@ -68,7 +75,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x05C4, 0x05C5, WBP_Extend}, {0x05C7, 0x05C7, WBP_Extend}, {0x05D0, 0x05EA, WBP_Hebrew_Letter}, - {0x05F0, 0x05F2, WBP_Hebrew_Letter}, + {0x05EF, 0x05F2, WBP_Hebrew_Letter}, {0x05F3, 0x05F3, WBP_ALetter}, {0x05F4, 0x05F4, WBP_MidLetter}, {0x0600, 0x0605, WBP_Format}, @@ -110,6 +117,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x07F4, 0x07F5, WBP_ALetter}, {0x07F8, 0x07F8, WBP_MidNum}, {0x07FA, 0x07FA, WBP_ALetter}, + {0x07FD, 0x07FD, WBP_Extend}, {0x0800, 0x0815, WBP_ALetter}, {0x0816, 0x0819, WBP_Extend}, {0x081A, 0x081A, WBP_ALetter}, @@ -120,9 +128,10 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0829, 0x082D, WBP_Extend}, {0x0840, 0x0858, WBP_ALetter}, {0x0859, 0x085B, WBP_Extend}, + {0x0860, 0x086A, WBP_ALetter}, {0x08A0, 0x08B4, WBP_ALetter}, {0x08B6, 0x08BD, WBP_ALetter}, - {0x08D4, 0x08E1, WBP_Extend}, + {0x08D3, 0x08E1, WBP_Extend}, {0x08E2, 0x08E2, WBP_Format}, {0x08E3, 0x0902, WBP_Extend}, {0x0903, 0x0903, WBP_Extend}, @@ -165,6 +174,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x09E2, 0x09E3, WBP_Extend}, {0x09E6, 0x09EF, WBP_Numeric}, {0x09F0, 0x09F1, WBP_ALetter}, + {0x09FC, 0x09FC, WBP_ALetter}, + {0x09FE, 0x09FE, WBP_Extend}, {0x0A01, 0x0A02, WBP_Extend}, {0x0A03, 0x0A03, WBP_Extend}, {0x0A05, 0x0A0A, WBP_ALetter}, @@ -207,6 +218,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0AE2, 0x0AE3, WBP_Extend}, {0x0AE6, 0x0AEF, WBP_Numeric}, {0x0AF9, 0x0AF9, WBP_ALetter}, + {0x0AFA, 0x0AFF, WBP_Extend}, {0x0B01, 0x0B01, WBP_Extend}, {0x0B02, 0x0B03, WBP_Extend}, {0x0B05, 0x0B0C, WBP_ALetter}, @@ -253,6 +265,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0BE6, 0x0BEF, WBP_Numeric}, {0x0C00, 0x0C00, WBP_Extend}, {0x0C01, 0x0C03, WBP_Extend}, + {0x0C04, 0x0C04, WBP_Extend}, {0x0C05, 0x0C0C, WBP_ALetter}, {0x0C0E, 0x0C10, WBP_ALetter}, {0x0C12, 0x0C28, WBP_ALetter}, @@ -290,11 +303,12 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0CE2, 0x0CE3, WBP_Extend}, {0x0CE6, 0x0CEF, WBP_Numeric}, {0x0CF1, 0x0CF2, WBP_ALetter}, - {0x0D01, 0x0D01, WBP_Extend}, + {0x0D00, 0x0D01, WBP_Extend}, {0x0D02, 0x0D03, WBP_Extend}, {0x0D05, 0x0D0C, WBP_ALetter}, {0x0D0E, 0x0D10, WBP_ALetter}, {0x0D12, 0x0D3A, WBP_ALetter}, + {0x0D3B, 0x0D3C, WBP_Extend}, {0x0D3D, 0x0D3D, WBP_ALetter}, {0x0D3E, 0x0D40, WBP_Extend}, {0x0D41, 0x0D44, WBP_Extend}, @@ -326,8 +340,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x0E47, 0x0E4E, WBP_Extend}, {0x0E50, 0x0E59, WBP_Numeric}, {0x0EB1, 0x0EB1, WBP_Extend}, - {0x0EB4, 0x0EB9, WBP_Extend}, - {0x0EBB, 0x0EBC, WBP_Extend}, + {0x0EB4, 0x0EBC, WBP_Extend}, {0x0EC8, 0x0ECD, WBP_Extend}, {0x0ED0, 0x0ED9, WBP_Numeric}, {0x0F00, 0x0F00, WBP_ALetter}, @@ -376,7 +389,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10CD, 0x10CD, WBP_ALetter}, {0x10D0, 0x10FA, WBP_ALetter}, {0x10FC, 0x10FC, WBP_ALetter}, - {0x10FD, 0x1248, WBP_ALetter}, + {0x10FD, 0x10FF, WBP_ALetter}, + {0x1100, 0x1248, WBP_ALetter}, {0x124A, 0x124D, WBP_ALetter}, {0x1250, 0x1256, WBP_ALetter}, {0x1258, 0x1258, WBP_ALetter}, @@ -398,6 +412,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x13F8, 0x13FD, WBP_ALetter}, {0x1401, 0x166C, WBP_ALetter}, {0x166F, 0x167F, WBP_ALetter}, + {0x1680, 0x1680, WBP_WSegSpace}, {0x1681, 0x169A, WBP_ALetter}, {0x16A0, 0x16EA, WBP_ALetter}, {0x16EE, 0x16F0, WBP_ALetter}, @@ -426,7 +441,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1810, 0x1819, WBP_Numeric}, {0x1820, 0x1842, WBP_ALetter}, {0x1843, 0x1843, WBP_ALetter}, - {0x1844, 0x1877, WBP_ALetter}, + {0x1844, 0x1878, WBP_ALetter}, {0x1880, 0x1884, WBP_ALetter}, {0x1885, 0x1886, WBP_Extend}, {0x1887, 0x18A8, WBP_ALetter}, @@ -509,24 +524,27 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1C5A, 0x1C77, WBP_ALetter}, {0x1C78, 0x1C7D, WBP_ALetter}, {0x1C80, 0x1C88, WBP_ALetter}, + {0x1C90, 0x1CBA, WBP_ALetter}, + {0x1CBD, 0x1CBF, WBP_ALetter}, {0x1CD0, 0x1CD2, WBP_Extend}, {0x1CD4, 0x1CE0, WBP_Extend}, {0x1CE1, 0x1CE1, WBP_Extend}, {0x1CE2, 0x1CE8, WBP_Extend}, {0x1CE9, 0x1CEC, WBP_ALetter}, {0x1CED, 0x1CED, WBP_Extend}, - {0x1CEE, 0x1CF1, WBP_ALetter}, - {0x1CF2, 0x1CF3, WBP_Extend}, + {0x1CEE, 0x1CF3, WBP_ALetter}, {0x1CF4, 0x1CF4, WBP_Extend}, {0x1CF5, 0x1CF6, WBP_ALetter}, + {0x1CF7, 0x1CF7, WBP_Extend}, {0x1CF8, 0x1CF9, WBP_Extend}, + {0x1CFA, 0x1CFA, WBP_ALetter}, {0x1D00, 0x1D2B, WBP_ALetter}, {0x1D2C, 0x1D6A, WBP_ALetter}, {0x1D6B, 0x1D77, WBP_ALetter}, {0x1D78, 0x1D78, WBP_ALetter}, {0x1D79, 0x1D9A, WBP_ALetter}, {0x1D9B, 0x1DBF, WBP_ALetter}, - {0x1DC0, 0x1DF5, WBP_Extend}, + {0x1DC0, 0x1DF9, WBP_Extend}, {0x1DFB, 0x1DFF, WBP_Extend}, {0x1E00, 0x1F15, WBP_ALetter}, {0x1F18, 0x1F1D, WBP_ALetter}, @@ -547,6 +565,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1FE0, 0x1FEC, WBP_ALetter}, {0x1FF2, 0x1FF4, WBP_ALetter}, {0x1FF6, 0x1FFC, WBP_ALetter}, + {0x2000, 0x2006, WBP_WSegSpace}, + {0x2008, 0x200A, WBP_WSegSpace}, {0x200C, 0x200C, WBP_Extend}, {0x200D, 0x200D, WBP_ZWJ}, {0x200E, 0x200F, WBP_Format}, @@ -561,6 +581,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x203F, 0x2040, WBP_ExtendNumLet}, {0x2044, 0x2044, WBP_MidNum}, {0x2054, 0x2054, WBP_ExtendNumLet}, + {0x205F, 0x205F, WBP_WSegSpace}, {0x2060, 0x2064, WBP_Format}, {0x2066, 0x206F, WBP_Format}, {0x2071, 0x2071, WBP_ALetter}, @@ -590,10 +611,6 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2183, 0x2184, WBP_ALetter}, {0x2185, 0x2188, WBP_ALetter}, {0x24B6, 0x24E9, WBP_ALetter}, - {0x261D, 0x261D, WBP_E_Base}, - {0x26F9, 0x26F9, WBP_E_Base}, - {0x270A, 0x270D, WBP_E_Base}, - {0x2764, 0x2764, WBP_Glue_After_Zwj}, {0x2C00, 0x2C2E, WBP_ALetter}, {0x2C30, 0x2C5E, WBP_ALetter}, {0x2C60, 0x2C7B, WBP_ALetter}, @@ -619,6 +636,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x2DD8, 0x2DDE, WBP_ALetter}, {0x2DE0, 0x2DFF, WBP_Extend}, {0x2E2F, 0x2E2F, WBP_ALetter}, + {0x3000, 0x3000, WBP_WSegSpace}, {0x3005, 0x3005, WBP_ALetter}, {0x302A, 0x302D, WBP_Extend}, {0x302E, 0x302F, WBP_Extend}, @@ -631,7 +649,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x30A1, 0x30FA, WBP_Katakana}, {0x30FC, 0x30FE, WBP_Katakana}, {0x30FF, 0x30FF, WBP_Katakana}, - {0x3105, 0x312D, WBP_ALetter}, + {0x3105, 0x312F, WBP_ALetter}, {0x3131, 0x318E, WBP_ALetter}, {0x31A0, 0x31BA, WBP_ALetter}, {0x31F0, 0x31FF, WBP_Katakana}, @@ -660,14 +678,16 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA6E6, 0xA6EF, WBP_ALetter}, {0xA6F0, 0xA6F1, WBP_Extend}, {0xA717, 0xA71F, WBP_ALetter}, + {0xA720, 0xA721, WBP_ALetter}, {0xA722, 0xA76F, WBP_ALetter}, {0xA770, 0xA770, WBP_ALetter}, {0xA771, 0xA787, WBP_ALetter}, {0xA788, 0xA788, WBP_ALetter}, + {0xA789, 0xA78A, WBP_ALetter}, {0xA78B, 0xA78E, WBP_ALetter}, {0xA78F, 0xA78F, WBP_ALetter}, - {0xA790, 0xA7AE, WBP_ALetter}, - {0xA7B0, 0xA7B7, WBP_ALetter}, + {0xA790, 0xA7BF, WBP_ALetter}, + {0xA7C2, 0xA7C6, WBP_ALetter}, {0xA7F7, 0xA7F7, WBP_ALetter}, {0xA7F8, 0xA7F9, WBP_ALetter}, {0xA7FA, 0xA7FA, WBP_ALetter}, @@ -690,7 +710,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA8E0, 0xA8F1, WBP_Extend}, {0xA8F2, 0xA8F7, WBP_ALetter}, {0xA8FB, 0xA8FB, WBP_ALetter}, - {0xA8FD, 0xA8FD, WBP_ALetter}, + {0xA8FD, 0xA8FE, WBP_ALetter}, + {0xA8FF, 0xA8FF, WBP_Extend}, {0xA900, 0xA909, WBP_Numeric}, {0xA90A, 0xA925, WBP_ALetter}, {0xA926, 0xA92D, WBP_Extend}, @@ -705,8 +726,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xA9B4, 0xA9B5, WBP_Extend}, {0xA9B6, 0xA9B9, WBP_Extend}, {0xA9BA, 0xA9BB, WBP_Extend}, - {0xA9BC, 0xA9BC, WBP_Extend}, - {0xA9BD, 0xA9C0, WBP_Extend}, + {0xA9BC, 0xA9BD, WBP_Extend}, + {0xA9BE, 0xA9C0, WBP_Extend}, {0xA9CF, 0xA9CF, WBP_ALetter}, {0xA9D0, 0xA9D9, WBP_Numeric}, {0xA9E5, 0xA9E5, WBP_Extend}, @@ -745,8 +766,9 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xAB20, 0xAB26, WBP_ALetter}, {0xAB28, 0xAB2E, WBP_ALetter}, {0xAB30, 0xAB5A, WBP_ALetter}, + {0xAB5B, 0xAB5B, WBP_ALetter}, {0xAB5C, 0xAB5F, WBP_ALetter}, - {0xAB60, 0xAB65, WBP_ALetter}, + {0xAB60, 0xAB67, WBP_ALetter}, {0xAB70, 0xABBF, WBP_ALetter}, {0xABC0, 0xABE2, WBP_ALetter}, {0xABE3, 0xABE4, WBP_Extend}, @@ -793,6 +815,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0xFF07, 0xFF07, WBP_MidNumLet}, {0xFF0C, 0xFF0C, WBP_MidNum}, {0xFF0E, 0xFF0E, WBP_MidNumLet}, + {0xFF10, 0xFF19, WBP_Numeric}, {0xFF1A, 0xFF1A, WBP_MidLetter}, {0xFF1B, 0xFF1B, WBP_MidNum}, {0xFF21, 0xFF3A, WBP_ALetter}, @@ -821,7 +844,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x102A0, 0x102D0, WBP_ALetter}, {0x102E0, 0x102E0, WBP_Extend}, {0x10300, 0x1031F, WBP_ALetter}, - {0x10330, 0x10340, WBP_ALetter}, + {0x1032D, 0x10340, WBP_ALetter}, {0x10341, 0x10341, WBP_ALetter}, {0x10342, 0x10349, WBP_ALetter}, {0x1034A, 0x1034A, WBP_ALetter}, @@ -861,7 +884,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10A0C, 0x10A0F, WBP_Extend}, {0x10A10, 0x10A13, WBP_ALetter}, {0x10A15, 0x10A17, WBP_ALetter}, - {0x10A19, 0x10A33, WBP_ALetter}, + {0x10A19, 0x10A35, WBP_ALetter}, {0x10A38, 0x10A3A, WBP_Extend}, {0x10A3F, 0x10A3F, WBP_Extend}, {0x10A60, 0x10A7C, WBP_ALetter}, @@ -876,6 +899,14 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x10C00, 0x10C48, WBP_ALetter}, {0x10C80, 0x10CB2, WBP_ALetter}, {0x10CC0, 0x10CF2, WBP_ALetter}, + {0x10D00, 0x10D23, WBP_ALetter}, + {0x10D24, 0x10D27, WBP_Extend}, + {0x10D30, 0x10D39, WBP_Numeric}, + {0x10F00, 0x10F1C, WBP_ALetter}, + {0x10F27, 0x10F27, WBP_ALetter}, + {0x10F30, 0x10F45, WBP_ALetter}, + {0x10F46, 0x10F50, WBP_Extend}, + {0x10FE0, 0x10FF6, WBP_ALetter}, {0x11000, 0x11000, WBP_Extend}, {0x11001, 0x11001, WBP_Extend}, {0x11002, 0x11002, WBP_Extend}, @@ -890,6 +921,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x110B7, 0x110B8, WBP_Extend}, {0x110B9, 0x110BA, WBP_Extend}, {0x110BD, 0x110BD, WBP_Format}, + {0x110CD, 0x110CD, WBP_Format}, {0x110D0, 0x110E8, WBP_ALetter}, {0x110F0, 0x110F9, WBP_Numeric}, {0x11100, 0x11102, WBP_Extend}, @@ -898,6 +930,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1112C, 0x1112C, WBP_Extend}, {0x1112D, 0x11134, WBP_Extend}, {0x11136, 0x1113F, WBP_Numeric}, + {0x11144, 0x11144, WBP_ALetter}, + {0x11145, 0x11146, WBP_Extend}, {0x11150, 0x11172, WBP_ALetter}, {0x11173, 0x11173, WBP_Extend}, {0x11176, 0x11176, WBP_ALetter}, @@ -908,7 +942,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x111B6, 0x111BE, WBP_Extend}, {0x111BF, 0x111C0, WBP_Extend}, {0x111C1, 0x111C4, WBP_ALetter}, - {0x111CA, 0x111CC, WBP_Extend}, + {0x111C9, 0x111CC, WBP_Extend}, {0x111D0, 0x111D9, WBP_Numeric}, {0x111DA, 0x111DA, WBP_ALetter}, {0x111DC, 0x111DC, WBP_ALetter}, @@ -939,7 +973,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1132A, 0x11330, WBP_ALetter}, {0x11332, 0x11333, WBP_ALetter}, {0x11335, 0x11339, WBP_ALetter}, - {0x1133C, 0x1133C, WBP_Extend}, + {0x1133B, 0x1133C, WBP_Extend}, {0x1133D, 0x1133D, WBP_ALetter}, {0x1133E, 0x1133F, WBP_Extend}, {0x11340, 0x11340, WBP_Extend}, @@ -961,6 +995,8 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11446, 0x11446, WBP_Extend}, {0x11447, 0x1144A, WBP_ALetter}, {0x11450, 0x11459, WBP_Numeric}, + {0x1145E, 0x1145E, WBP_Extend}, + {0x1145F, 0x1145F, WBP_ALetter}, {0x11480, 0x114AF, WBP_ALetter}, {0x114B0, 0x114B2, WBP_Extend}, {0x114B3, 0x114B8, WBP_Extend}, @@ -999,6 +1035,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x116B0, 0x116B5, WBP_Extend}, {0x116B6, 0x116B6, WBP_Extend}, {0x116B7, 0x116B7, WBP_Extend}, + {0x116B8, 0x116B8, WBP_ALetter}, {0x116C0, 0x116C9, WBP_Numeric}, {0x1171D, 0x1171F, WBP_Extend}, {0x11720, 0x11721, WBP_Extend}, @@ -1006,9 +1043,41 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11726, 0x11726, WBP_Extend}, {0x11727, 0x1172B, WBP_Extend}, {0x11730, 0x11739, WBP_Numeric}, + {0x11800, 0x1182B, WBP_ALetter}, + {0x1182C, 0x1182E, WBP_Extend}, + {0x1182F, 0x11837, WBP_Extend}, + {0x11838, 0x11838, WBP_Extend}, + {0x11839, 0x1183A, WBP_Extend}, {0x118A0, 0x118DF, WBP_ALetter}, {0x118E0, 0x118E9, WBP_Numeric}, {0x118FF, 0x118FF, WBP_ALetter}, + {0x119A0, 0x119A7, WBP_ALetter}, + {0x119AA, 0x119D0, WBP_ALetter}, + {0x119D1, 0x119D3, WBP_Extend}, + {0x119D4, 0x119D7, WBP_Extend}, + {0x119DA, 0x119DB, WBP_Extend}, + {0x119DC, 0x119DF, WBP_Extend}, + {0x119E0, 0x119E0, WBP_Extend}, + {0x119E1, 0x119E1, WBP_ALetter}, + {0x119E3, 0x119E3, WBP_ALetter}, + {0x119E4, 0x119E4, WBP_Extend}, + {0x11A00, 0x11A00, WBP_ALetter}, + {0x11A01, 0x11A0A, WBP_Extend}, + {0x11A0B, 0x11A32, WBP_ALetter}, + {0x11A33, 0x11A38, WBP_Extend}, + {0x11A39, 0x11A39, WBP_Extend}, + {0x11A3A, 0x11A3A, WBP_ALetter}, + {0x11A3B, 0x11A3E, WBP_Extend}, + {0x11A47, 0x11A47, WBP_Extend}, + {0x11A50, 0x11A50, WBP_ALetter}, + {0x11A51, 0x11A56, WBP_Extend}, + {0x11A57, 0x11A58, WBP_Extend}, + {0x11A59, 0x11A5B, WBP_Extend}, + {0x11A5C, 0x11A89, WBP_ALetter}, + {0x11A8A, 0x11A96, WBP_Extend}, + {0x11A97, 0x11A97, WBP_Extend}, + {0x11A98, 0x11A99, WBP_Extend}, + {0x11A9D, 0x11A9D, WBP_ALetter}, {0x11AC0, 0x11AF8, WBP_ALetter}, {0x11C00, 0x11C08, WBP_ALetter}, {0x11C0A, 0x11C2E, WBP_ALetter}, @@ -1027,10 +1096,35 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x11CB2, 0x11CB3, WBP_Extend}, {0x11CB4, 0x11CB4, WBP_Extend}, {0x11CB5, 0x11CB6, WBP_Extend}, + {0x11D00, 0x11D06, WBP_ALetter}, + {0x11D08, 0x11D09, WBP_ALetter}, + {0x11D0B, 0x11D30, WBP_ALetter}, + {0x11D31, 0x11D36, WBP_Extend}, + {0x11D3A, 0x11D3A, WBP_Extend}, + {0x11D3C, 0x11D3D, WBP_Extend}, + {0x11D3F, 0x11D45, WBP_Extend}, + {0x11D46, 0x11D46, WBP_ALetter}, + {0x11D47, 0x11D47, WBP_Extend}, + {0x11D50, 0x11D59, WBP_Numeric}, + {0x11D60, 0x11D65, WBP_ALetter}, + {0x11D67, 0x11D68, WBP_ALetter}, + {0x11D6A, 0x11D89, WBP_ALetter}, + {0x11D8A, 0x11D8E, WBP_Extend}, + {0x11D90, 0x11D91, WBP_Extend}, + {0x11D93, 0x11D94, WBP_Extend}, + {0x11D95, 0x11D95, WBP_Extend}, + {0x11D96, 0x11D96, WBP_Extend}, + {0x11D97, 0x11D97, WBP_Extend}, + {0x11D98, 0x11D98, WBP_ALetter}, + {0x11DA0, 0x11DA9, WBP_Numeric}, + {0x11EE0, 0x11EF2, WBP_ALetter}, + {0x11EF3, 0x11EF4, WBP_Extend}, + {0x11EF5, 0x11EF6, WBP_Extend}, {0x12000, 0x12399, WBP_ALetter}, {0x12400, 0x1246E, WBP_ALetter}, {0x12480, 0x12543, WBP_ALetter}, {0x13000, 0x1342E, WBP_ALetter}, + {0x13430, 0x13438, WBP_Format}, {0x14400, 0x14646, WBP_ALetter}, {0x16800, 0x16A38, WBP_ALetter}, {0x16A40, 0x16A5E, WBP_ALetter}, @@ -1043,13 +1137,17 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x16B50, 0x16B59, WBP_Numeric}, {0x16B63, 0x16B77, WBP_ALetter}, {0x16B7D, 0x16B8F, WBP_ALetter}, - {0x16F00, 0x16F44, WBP_ALetter}, + {0x16E40, 0x16E7F, WBP_ALetter}, + {0x16F00, 0x16F4A, WBP_ALetter}, + {0x16F4F, 0x16F4F, WBP_Extend}, {0x16F50, 0x16F50, WBP_ALetter}, - {0x16F51, 0x16F7E, WBP_Extend}, + {0x16F51, 0x16F87, WBP_Extend}, {0x16F8F, 0x16F92, WBP_Extend}, {0x16F93, 0x16F9F, WBP_ALetter}, - {0x16FE0, 0x16FE0, WBP_ALetter}, + {0x16FE0, 0x16FE1, WBP_ALetter}, + {0x16FE3, 0x16FE3, WBP_ALetter}, {0x1B000, 0x1B000, WBP_Katakana}, + {0x1B164, 0x1B167, WBP_Katakana}, {0x1BC00, 0x1BC6A, WBP_ALetter}, {0x1BC70, 0x1BC7C, WBP_ALetter}, {0x1BC80, 0x1BC88, WBP_ALetter}, @@ -1106,10 +1204,19 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1E01B, 0x1E021, WBP_Extend}, {0x1E023, 0x1E024, WBP_Extend}, {0x1E026, 0x1E02A, WBP_Extend}, + {0x1E100, 0x1E12C, WBP_ALetter}, + {0x1E130, 0x1E136, WBP_Extend}, + {0x1E137, 0x1E13D, WBP_ALetter}, + {0x1E140, 0x1E149, WBP_Numeric}, + {0x1E14E, 0x1E14E, WBP_ALetter}, + {0x1E2C0, 0x1E2EB, WBP_ALetter}, + {0x1E2EC, 0x1E2EF, WBP_Extend}, + {0x1E2F0, 0x1E2F9, WBP_Numeric}, {0x1E800, 0x1E8C4, WBP_ALetter}, {0x1E8D0, 0x1E8D6, WBP_Extend}, {0x1E900, 0x1E943, WBP_ALetter}, {0x1E944, 0x1E94A, WBP_Extend}, + {0x1E94B, 0x1E94B, WBP_ALetter}, {0x1E950, 0x1E959, WBP_Numeric}, {0x1EE00, 0x1EE03, WBP_ALetter}, {0x1EE05, 0x1EE1F, WBP_ALetter}, @@ -1148,35 +1255,7 @@ static const struct WordBreakProperties wb_prop_default[] = { {0x1F150, 0x1F169, WBP_ALetter}, {0x1F170, 0x1F189, WBP_ALetter}, {0x1F1E6, 0x1F1FF, WBP_Regional_Indicator}, - {0x1F385, 0x1F385, WBP_E_Base}, - {0x1F3C3, 0x1F3C4, WBP_E_Base}, - {0x1F3CA, 0x1F3CB, WBP_E_Base}, - {0x1F3FB, 0x1F3FF, WBP_E_Modifier}, - {0x1F442, 0x1F443, WBP_E_Base}, - {0x1F446, 0x1F450, WBP_E_Base}, - {0x1F466, 0x1F469, WBP_E_Base_GAZ}, - {0x1F46E, 0x1F46E, WBP_E_Base}, - {0x1F470, 0x1F478, WBP_E_Base}, - {0x1F47C, 0x1F47C, WBP_E_Base}, - {0x1F481, 0x1F483, WBP_E_Base}, - {0x1F485, 0x1F487, WBP_E_Base}, - {0x1F48B, 0x1F48B, WBP_Glue_After_Zwj}, - {0x1F4AA, 0x1F4AA, WBP_E_Base}, - {0x1F575, 0x1F575, WBP_E_Base}, - {0x1F57A, 0x1F57A, WBP_E_Base}, - {0x1F590, 0x1F590, WBP_E_Base}, - {0x1F595, 0x1F596, WBP_E_Base}, - {0x1F5E8, 0x1F5E8, WBP_Glue_After_Zwj}, - {0x1F645, 0x1F647, WBP_E_Base}, - {0x1F64B, 0x1F64F, WBP_E_Base}, - {0x1F6A3, 0x1F6A3, WBP_E_Base}, - {0x1F6B4, 0x1F6B6, WBP_E_Base}, - {0x1F6C0, 0x1F6C0, WBP_E_Base}, - {0x1F918, 0x1F91E, WBP_E_Base}, - {0x1F926, 0x1F926, WBP_E_Base}, - {0x1F930, 0x1F930, WBP_E_Base}, - {0x1F933, 0x1F939, WBP_E_Base}, - {0x1F93C, 0x1F93E, WBP_E_Base}, + {0x1F3FB, 0x1F3FF, WBP_Extend}, {0xE0001, 0xE0001, WBP_Format}, {0xE0020, 0xE007F, WBP_Extend}, {0xE0100, 0xE01EF, WBP_Extend}, diff --git a/src/static_libs/libunibreak/wordbreakdef.h b/src/static_libs/libunibreak/wordbreakdef.h index 82cd98e7c3..03feb3cdac 100644 --- a/src/static_libs/libunibreak/wordbreakdef.h +++ b/src/static_libs/libunibreak/wordbreakdef.h @@ -4,7 +4,8 @@ * Word breaking in a Unicode sequence. Designed to be used in a * generic text renderer. * - * Copyright (C) 2013-16 Tom Hacohen + * Copyright (C) 2013-2019 Tom Hacohen + * Copyright (C) 2018 Wu Yongwei * * This software is provided 'as-is', without any express or implied * warranty. In no event will the author be held liable for any damages @@ -30,9 +31,9 @@ * Unicode 6.0.0: * * - * This library has been updated according to Revision 29, for - * Unicode 9.0.0: - * + * This library has been updated according to Revision 35, for + * Unicode 12.0.0: + * * * The Unicode Terms of Use are available at * @@ -51,7 +52,7 @@ /** * Word break classes. This is a direct mapping of Table 3 of Unicode - * Standard Annex 29, Revision 23. + * Standard Annex 29, Revision 35. */ enum WordBreakClass { @@ -73,10 +74,7 @@ enum WordBreakClass WBP_MidNum, WBP_Numeric, WBP_ExtendNumLet, - WBP_E_Base, - WBP_E_Modifier, - WBP_Glue_After_Zwj, - WBP_E_Base_GAZ, + WBP_WSegSpace, WBP_Any }; @@ -86,7 +84,7 @@ enum WordBreakClass */ struct WordBreakProperties { - utf32_t start; /**< Starting coding point */ - utf32_t end; /**< End coding point */ + utf32_t start; /**< Start codepoint */ + utf32_t end; /**< End codepoint, inclusive */ enum WordBreakClass prop; /**< The word breaking property */ }; diff --git a/src/tests/evas/evas_test_textblock.c b/src/tests/evas/evas_test_textblock.c index a24d16d73c..3f1cbec8d8 100644 --- a/src/tests/evas/evas_test_textblock.c +++ b/src/tests/evas/evas_test_textblock.c @@ -1054,6 +1054,10 @@ EFL_START_TEST(evas_textblock_cursor) pos = evas_textblock_cursor_pos_get(cur); ck_assert_int_eq(pos, 0); + evas_object_textblock_text_markup_set(tb, "🏳️‍🌈"); + evas_textblock_cursor_pos_set(cur, 0); + evas_textblock_cursor_cluster_next(cur); + ck_assert_int_eq(4, evas_textblock_cursor_pos_get(cur)); END_TB_TEST(); }