summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Faure <billiob@gmail.com>2020-06-20 11:37:56 +0100
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2020-06-20 11:37:56 +0100
commitabc146f37f87200142d44d0feab8986dd219e1fd (patch)
treeff951cf3b231b1e8b7a70b1cc308c156a824e750
parentb61f755e88b898e51554934682e7cd2526b868ab (diff)
eina_unicode: have explicit type conversions
Summary: Found by running terminology's tests with UBSAN: include/eina-1/eina/eina_inline_unicode.x: runtime error: implicit conversion from type 'char' of value -62 (8-bit, signed) to type 'unsigned char' changed the value to 194 (8-bit, unsigned) Reviewers: #reviewers, vtorri Subscribers: cedric, #reviewers, #committers Tags: #efl Differential Revision: https://phab.enlightenment.org/D11972
-rw-r--r--src/lib/eina/eina_inline_unicode.x7
-rw-r--r--src/lib/eina/eina_unicode.c48
2 files changed, 35 insertions, 20 deletions
diff --git a/src/lib/eina/eina_inline_unicode.x b/src/lib/eina/eina_inline_unicode.x
index 7b7f7592b4..08e1e15018 100644
--- a/src/lib/eina/eina_inline_unicode.x
+++ b/src/lib/eina/eina_inline_unicode.x
@@ -40,7 +40,7 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
40 ind = *iindex; 40 ind = *iindex;
41 41
42 /* if this char is the null terminator, exit */ 42 /* if this char is the null terminator, exit */
43 if ((d = buf[ind++]) == 0) return 0; 43 if ((d = (unsigned char)buf[ind++]) == 0) return 0;
44 44
45 if ((d & 0x80) == 0) 45 if ((d & 0x80) == 0)
46 { // 1 byte (7bit) - 0xxxxxxx 46 { // 1 byte (7bit) - 0xxxxxxx
@@ -51,7 +51,8 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
51 if ((d & 0xe0) == 0xc0) 51 if ((d & 0xe0) == 0xc0)
52 { // 2 byte (11bit) - 110xxxxx 10xxxxxx 52 { // 2 byte (11bit) - 110xxxxx 10xxxxxx
53 r = (d & 0x1f) << 6; 53 r = (d & 0x1f) << 6;
54 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 54 if (((d = (unsigned char)buf[ind++]) == 0) ||
55 EINA_IS_INVALID_BYTE(d) ||
55 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 56 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
56 r |= (d & 0x3f); 57 r |= (d & 0x3f);
57 if (r <= 0x7F) goto error; 58 if (r <= 0x7F) goto error;
@@ -65,7 +66,7 @@ eina_unicode_utf8_next_get(const char *buf, int *iindex)
65 * we just use the invalid unicode codepoints 8 lower bits represent 66 * we just use the invalid unicode codepoints 8 lower bits represent
66 * the original char */ 67 * the original char */
67error: 68error:
68 d = buf[*iindex]; 69 d = (unsigned char)buf[*iindex];
69 (*iindex)++; 70 (*iindex)++;
70 return ERROR_REPLACEMENT_BASE | d; 71 return ERROR_REPLACEMENT_BASE | d;
71} 72}
diff --git a/src/lib/eina/eina_unicode.c b/src/lib/eina/eina_unicode.c
index 0bb70ffae9..6ede02ccec 100644
--- a/src/lib/eina/eina_unicode.c
+++ b/src/lib/eina/eina_unicode.c
@@ -193,7 +193,7 @@ eina_unicode_escape(const Eina_Unicode *str)
193EAPI Eina_Unicode 193EAPI Eina_Unicode
194_eina_unicode_utf8_next_get(int ind, 194_eina_unicode_utf8_next_get(int ind,
195 unsigned char d, 195 unsigned char d,
196 const char *buf, 196 const char *buf,
197 int *iindex) 197 int *iindex)
198{ 198{
199 Eina_Unicode r; 199 Eina_Unicode r;
@@ -201,10 +201,12 @@ _eina_unicode_utf8_next_get(int ind,
201 if ((d & 0xf0) == 0xe0) 201 if ((d & 0xf0) == 0xe0)
202 { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx 202 { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
203 r = (d & 0x0f) << 12; 203 r = (d & 0x0f) << 12;
204 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 204 if (((d = (unsigned char)buf[ind++]) == 0) ||
205 EINA_IS_INVALID_BYTE(d) ||
205 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 206 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
206 r |= (d & 0x3f) << 6; 207 r |= (d & 0x3f) << 6;
207 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 208 if (((d = (unsigned char)buf[ind++]) == 0) ||
209 EINA_IS_INVALID_BYTE(d) ||
208 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 210 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
209 r |= (d & 0x3f); 211 r |= (d & 0x3f);
210 if (r <= 0x7FF) goto error; 212 if (r <= 0x7FF) goto error;
@@ -214,13 +216,16 @@ _eina_unicode_utf8_next_get(int ind,
214 if ((d & 0xf8) == 0xf0) 216 if ((d & 0xf8) == 0xf0)
215 { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 217 { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
216 r = (d & 0x07) << 18; 218 r = (d & 0x07) << 18;
217 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 219 if (((d = (unsigned char)buf[ind++]) == 0) ||
220 EINA_IS_INVALID_BYTE(d) ||
218 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 221 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
219 r |= (d & 0x3f) << 12; 222 r |= (d & 0x3f) << 12;
220 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 223 if (((d = (unsigned char)buf[ind++]) == 0) ||
224 EINA_IS_INVALID_BYTE(d) ||
221 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 225 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
222 r |= (d & 0x3f) << 6; 226 r |= (d & 0x3f) << 6;
223 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 227 if (((d = (unsigned char)buf[ind++]) == 0) ||
228 EINA_IS_INVALID_BYTE(d) ||
224 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 229 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
225 r |= (d & 0x3f); 230 r |= (d & 0x3f);
226 if (r <= 0xFFFF) goto error; 231 if (r <= 0xFFFF) goto error;
@@ -230,16 +235,20 @@ _eina_unicode_utf8_next_get(int ind,
230 if ((d & 0xfc) == 0xf8) 235 if ((d & 0xfc) == 0xf8)
231 { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 236 { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
232 r = (d & 0x03) << 24; 237 r = (d & 0x03) << 24;
233 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 238 if (((d = (unsigned char)buf[ind++]) == 0) ||
239 EINA_IS_INVALID_BYTE(d) ||
234 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 240 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
235 r |= (d & 0x3f) << 18; 241 r |= (d & 0x3f) << 18;
236 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 242 if (((d = (unsigned char)buf[ind++]) == 0) ||
243 EINA_IS_INVALID_BYTE(d) ||
237 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 244 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
238 r |= (d & 0x3f) << 12; 245 r |= (d & 0x3f) << 12;
239 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 246 if (((d = (unsigned char)buf[ind++]) == 0) ||
247 EINA_IS_INVALID_BYTE(d) ||
240 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 248 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
241 r |= (d & 0x3f) << 6; 249 r |= (d & 0x3f) << 6;
242 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 250 if (((d = (unsigned char)buf[ind++]) == 0) ||
251 EINA_IS_INVALID_BYTE(d) ||
243 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 252 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
244 r |= (d & 0x3f); 253 r |= (d & 0x3f);
245 if (r <= 0x1FFFFF) goto error; 254 if (r <= 0x1FFFFF) goto error;
@@ -249,19 +258,24 @@ _eina_unicode_utf8_next_get(int ind,
249 if ((d & 0xfe) == 0xfc) 258 if ((d & 0xfe) == 0xfc)
250 { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 259 { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
251 r = (d & 0x01) << 30; 260 r = (d & 0x01) << 30;
252 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 261 if (((d = (unsigned char)buf[ind++]) == 0)
262 || EINA_IS_INVALID_BYTE(d) ||
253 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 263 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
254 r |= (d & 0x3f) << 24; 264 r |= (d & 0x3f) << 24;
255 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 265 if (((d = (unsigned char) buf[ind++]) == 0) ||
266 EINA_IS_INVALID_BYTE(d) ||
256 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 267 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
257 r |= (d & 0x3f) << 18; 268 r |= (d & 0x3f) << 18;
258 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 269 if (((d = (unsigned char)buf[ind++]) == 0) ||
270 EINA_IS_INVALID_BYTE(d) ||
259 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 271 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
260 r |= (d & 0x3f) << 12; 272 r |= (d & 0x3f) << 12;
261 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 273 if (((d = (unsigned char)buf[ind++]) == 0) ||
274 EINA_IS_INVALID_BYTE(d) ||
262 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 275 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
263 r |= (d & 0x3f) << 6; 276 r |= (d & 0x3f) << 6;
264 if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || 277 if (((d = (unsigned char)buf[ind++]) == 0) ||
278 EINA_IS_INVALID_BYTE(d) ||
265 !EINA_IS_CONTINUATION_BYTE(d)) goto error; 279 !EINA_IS_CONTINUATION_BYTE(d)) goto error;
266 r |= (d & 0x3f); 280 r |= (d & 0x3f);
267 if (r <= 0x3FFFFFF) goto error; 281 if (r <= 0x3FFFFFF) goto error;
@@ -273,7 +287,7 @@ _eina_unicode_utf8_next_get(int ind,
273 * we just use the invalid unicode codepoints 8 lower bits represent 287 * we just use the invalid unicode codepoints 8 lower bits represent
274 * the original char */ 288 * the original char */
275error: 289error:
276 d = buf[*iindex]; 290 d = (unsigned char)buf[*iindex];
277 (*iindex)++; 291 (*iindex)++;
278 return ERROR_REPLACEMENT_BASE | d; 292 return ERROR_REPLACEMENT_BASE | d;
279} 293}
@@ -298,7 +312,7 @@ eina_unicode_utf8_get_prev(const char *buf, int *iindex)
298 /* Next advance iindex to previous codepoint */ 312 /* Next advance iindex to previous codepoint */
299 ind = *iindex; 313 ind = *iindex;
300 ind--; 314 ind--;
301 while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80)) 315 while ((ind > 0) && (((unsigned char)buf[ind] & 0xc0) == 0x80))
302 ind--; 316 ind--;
303 317
304 *iindex = ind; 318 *iindex = ind;