summaryrefslogtreecommitdiff
path: root/src/static_libs/libunibreak/unibreakdef.c
diff options
context:
space:
mode:
authorTom Hacohen <tom@stosb.com>2015-05-07 10:53:11 +0100
committerTom Hacohen <tom@stosb.com>2015-05-07 10:54:26 +0100
commit7a49d23f90f41c478db9b7beb9763aa0cd74ae46 (patch)
tree8ff009d70d91523d2c5f718ed395c8e23df43c4f /src/static_libs/libunibreak/unibreakdef.c
parentba77a837a37af0d154d7ceafbb5ab7d4f75090f6 (diff)
Static deps unibreak: update to what will soon be version 3.
Version 3 is not yet released, but this is on track to become it. This is based on commit: a815e11f7ebf35b59278f783227a829ee4692760. @feature.
Diffstat (limited to 'src/static_libs/libunibreak/unibreakdef.c')
-rw-r--r--src/static_libs/libunibreak/unibreakdef.c159
1 files changed, 159 insertions, 0 deletions
diff --git a/src/static_libs/libunibreak/unibreakdef.c b/src/static_libs/libunibreak/unibreakdef.c
new file mode 100644
index 0000000000..2647b615d5
--- /dev/null
+++ b/src/static_libs/libunibreak/unibreakdef.c
@@ -0,0 +1,159 @@
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Break processing in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2015 Wu Yongwei <wuyongwei at gmail dot com>
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the author be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must
18 * not claim that you wrote the original software. If you use this
19 * software in a product, an acknowledgement in the product
20 * documentation would be appreciated but is not required.
21 * 2. Altered source versions must be plainly marked as such, and must
22 * not be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source
24 * distribution.
25 */
26
27/**
28 * @file unibreakdef.c
29 *
30 * Definition of utility functions used by the libunibreak library.
31 *
32 * @version 1.0, 2015/04/18
33 * @author Wu Yongwei
34 */
35
36#include <assert.h>
37#include <stddef.h>
38#include "unibreakdef.h"
39
40/**
41 * Gets the next Unicode character in a UTF-8 sequence. The index will
42 * be advanced to the next complete character, unless the end of string
43 * is reached in the middle of a UTF-8 sequence.
44 *
45 * @param[in] s input UTF-8 string
46 * @param[in] len length of the string in bytes
47 * @param[in,out] ip pointer to the index
48 * @return the Unicode character beginning at the index; or
49 * #EOS if end of input is encountered
50 */
51utf32_t ub_get_next_char_utf8(
52 const utf8_t *s,
53 size_t len,
54 size_t *ip)
55{
56 utf8_t ch;
57 utf32_t res;
58
59 assert(*ip <= len);
60 if (*ip == len)
61 return EOS;
62 ch = s[*ip];
63
64 if (ch < 0xC2 || ch > 0xF4)
65 { /* One-byte sequence, tail (should not occur), or invalid */
66 *ip += 1;
67 return ch;
68 }
69 else if (ch < 0xE0)
70 { /* Two-byte sequence */
71 if (*ip + 2 > len)
72 return EOS;
73 res = ((ch & 0x1F) << 6) + (s[*ip + 1] & 0x3F);
74 *ip += 2;
75 return res;
76 }
77 else if (ch < 0xF0)
78 { /* Three-byte sequence */
79 if (*ip + 3 > len)
80 return EOS;
81 res = ((ch & 0x0F) << 12) +
82 ((s[*ip + 1] & 0x3F) << 6) +
83 ((s[*ip + 2] & 0x3F));
84 *ip += 3;
85 return res;
86 }
87 else
88 { /* Four-byte sequence */
89 if (*ip + 4 > len)
90 return EOS;
91 res = ((ch & 0x07) << 18) +
92 ((s[*ip + 1] & 0x3F) << 12) +
93 ((s[*ip + 2] & 0x3F) << 6) +
94 ((s[*ip + 3] & 0x3F));
95 *ip += 4;
96 return res;
97 }
98}
99
100/**
101 * Gets the next Unicode character in a UTF-16 sequence. The index will
102 * be advanced to the next complete character, unless the end of string
103 * is reached in the middle of a UTF-16 surrogate pair.
104 *
105 * @param[in] s input UTF-16 string
106 * @param[in] len length of the string in words
107 * @param[in,out] ip pointer to the index
108 * @return the Unicode character beginning at the index; or
109 * #EOS if end of input is encountered
110 */
111utf32_t ub_get_next_char_utf16(
112 const utf16_t *s,
113 size_t len,
114 size_t *ip)
115{
116 utf16_t ch;
117
118 assert(*ip <= len);
119 if (*ip == len)
120 return EOS;
121 ch = s[(*ip)++];
122
123 if (ch < 0xD800 || ch > 0xDBFF)
124 { /* If the character is not a high surrogate */
125 return ch;
126 }
127 if (*ip == len)
128 { /* If the input ends here (an error) */
129 --(*ip);
130 return EOS;
131 }
132 if (s[*ip] < 0xDC00 || s[*ip] > 0xDFFF)
133 { /* If the next character is not the low surrogate (an error) */
134 return ch;
135 }
136 /* Return the constructed character and advance the index again */
137 return (((utf32_t)ch & 0x3FF) << 10) + (s[(*ip)++] & 0x3FF) + 0x10000;
138}
139
140/**
141 * Gets the next Unicode character in a UTF-32 sequence. The index will
142 * be advanced to the next character.
143 *
144 * @param[in] s input UTF-32 string
145 * @param[in] len length of the string in dwords
146 * @param[in,out] ip pointer to the index
147 * @return the Unicode character beginning at the index; or
148 * #EOS if end of input is encountered
149 */
150utf32_t ub_get_next_char_utf32(
151 const utf32_t *s,
152 size_t len,
153 size_t *ip)
154{
155 assert(*ip <= len);
156 if (*ip == len)
157 return EOS;
158 return s[(*ip)++];
159}