From 066543089d5226575b36d6b4b0213035958b7363 Mon Sep 17 00:00:00 2001 From: Cedric BAIL Date: Mon, 21 Jan 2013 09:37:14 +0000 Subject: [PATCH] efl: inline eina_unicode_utf8_get_next. SVN revision: 83035 --- src/Makefile_Eina.am | 6 +- src/lib/eina/eina_abi.c | 86 +++++++++++++++++++++++++++++ src/lib/eina/eina_inline_unicode.x | 76 +++++++++++++++++++++++++ src/lib/eina/eina_unicode.c | 89 +++++++++++------------------- src/lib/eina/eina_unicode.h | 4 +- 5 files changed, 201 insertions(+), 60 deletions(-) create mode 100644 src/lib/eina/eina_abi.c create mode 100644 src/lib/eina/eina_inline_unicode.x diff --git a/src/Makefile_Eina.am b/src/Makefile_Eina.am index 7eef6dba4c..7604959ceb 100644 --- a/src/Makefile_Eina.am +++ b/src/Makefile_Eina.am @@ -78,7 +78,8 @@ lib/eina/eina_inline_value.x \ lib/eina/eina_inline_lock_barrier.x \ lib/eina/eina_tmpstr.h \ lib/eina/eina_alloca.h \ -lib/eina/eina_cow.h +lib/eina/eina_cow.h \ +lib/eina/eina_inline_unicode.x # Will be back for developper after 1.2. # lib/eina/eina_model.h @@ -142,7 +143,8 @@ lib/eina/eina_share_common.h \ lib/eina/eina_private.h \ lib/eina/eina_strbuf_common.h \ lib/eina/eina_tmpstr.c \ -lib/eina/eina_cow.c +lib/eina/eina_cow.c \ +lib/eina/eina_abi.c # Will be back for developper after 1.2 # lib/eina/eina_model.c \ diff --git a/src/lib/eina/eina_abi.c b/src/lib/eina/eina_abi.c new file mode 100644 index 0000000000..0372f08935 --- /dev/null +++ b/src/lib/eina/eina_abi.c @@ -0,0 +1,86 @@ +/* EINA - EFL data type library + * Copyright (C) 2013 Cedric Bail + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; + * if not, see . + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "eina_config.h" +#include "eina_private.h" +#include "eina_safety_checks.h" + +#if EINA_SIZEOF_WCHAR_T >= 4 +# include +typedef wchar_t Eina_Unicode; +#else +# include +typedef uint32_t Eina_Unicode; +#endif + +EAPI Eina_Unicode +_eina_unicode_utf8_get_next(int ind, + unsigned char d, + const char *buf, + int *iindex); + +#define ERROR_REPLACEMENT_BASE 0xDC80 +#define EINA_IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245)) +#define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80) + +EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) +{ + int ind; + Eina_Unicode r; + unsigned char d; + + EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); + EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0); + + ind = *iindex; + + /* if this char is the null terminator, exit */ + if ((d = buf[ind++]) == 0) return 0; + + if ((d & 0x80) == 0) + { // 1 byte (7bit) - 0xxxxxxx + *iindex = ind; + return d; + } + + if ((d & 0xe0) == 0xc0) + { // 2 byte (11bit) - 110xxxxx 10xxxxxx + r = (d & 0x1f) << 6; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; + r |= (d & 0x3f); + if (r <= 0x7F) goto error; + *iindex = ind; + return r; + } + + return _eina_unicode_utf8_get_next(ind, d, buf, iindex); + +/* Gets here where there was an error and we want to replace the char + * we just use the invalid unicode codepoints 8 lower bits represent + * the original char */ +error: + d = buf[*iindex]; + (*iindex)++; + return ERROR_REPLACEMENT_BASE | d; +} + diff --git a/src/lib/eina/eina_inline_unicode.x b/src/lib/eina/eina_inline_unicode.x new file mode 100644 index 0000000000..8fcb4f7e86 --- /dev/null +++ b/src/lib/eina/eina_inline_unicode.x @@ -0,0 +1,76 @@ +/* EINA - EFL data type library + * Copyright (C) 2013 Cedric Bail + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; + * if not, see . + */ + +#ifndef EINA_INLINE_UNICODE_ +# define EINA_INLINE_UNICODE_ + +#include "eina_safety_checks.h" + +EAPI Eina_Unicode _eina_unicode_utf8_get_next(int ind, + unsigned char d, + const char *buf, + int *iindex); + +#define ERROR_REPLACEMENT_BASE 0xDC80 +#define EINA_IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245)) +#define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80) + +static inline Eina_Unicode +eina_unicode_utf8_get_next(const char *buf, int *iindex) +{ + int ind; + Eina_Unicode r; + unsigned char d; + + EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); + EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0); + + ind = *iindex; + + /* if this char is the null terminator, exit */ + if ((d = buf[ind++]) == 0) return 0; + + if ((d & 0x80) == 0) + { // 1 byte (7bit) - 0xxxxxxx + *iindex = ind; + return d; + } + + if ((d & 0xe0) == 0xc0) + { // 2 byte (11bit) - 110xxxxx 10xxxxxx + r = (d & 0x1f) << 6; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; + r |= (d & 0x3f); + if (r <= 0x7F) goto error; + *iindex = ind; + return r; + } + + return _eina_unicode_utf8_get_next(ind, d, buf, iindex); + +/* Gets here where there was an error and we want to replace the char + * we just use the invalid unicode codepoints 8 lower bits represent + * the original char */ +error: + d = buf[*iindex]; + (*iindex)++; + return ERROR_REPLACEMENT_BASE | d; +} + +#endif diff --git a/src/lib/eina/eina_unicode.c b/src/lib/eina/eina_unicode.c index a1993de697..6d9a74051d 100644 --- a/src/lib/eina/eina_unicode.c +++ b/src/lib/eina/eina_unicode.c @@ -185,49 +185,24 @@ eina_unicode_escape(const Eina_Unicode *str) #define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6 /* The replacement range that will be used for bad utf8 chars. */ -#define ERROR_REPLACEMENT_BASE 0xDC80 #define ERROR_REPLACEMENT_END 0xDCFF -#define IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245)) -#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80) EAPI Eina_Unicode -eina_unicode_utf8_get_next(const char *buf, int *iindex) +_eina_unicode_utf8_get_next(int ind, + unsigned char d, + const char *buf, + int *iindex) { - int ind; Eina_Unicode r; - unsigned char d; - EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); - EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0); - - ind = *iindex; - - /* if this char is the null terminator, exit */ - if ((d = buf[ind++]) == 0) return 0; - - if ((d & 0x80) == 0) - { // 1 byte (7bit) - 0xxxxxxx - *iindex = ind; - return d; - } - if ((d & 0xe0) == 0xc0) - { // 2 byte (11bit) - 110xxxxx 10xxxxxx - r = (d & 0x1f) << 6; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; - r |= (d & 0x3f); - if (r <= 0x7F) goto error; - *iindex = ind; - return r; - } if ((d & 0xf0) == 0xe0) { // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx r = (d & 0x0f) << 12; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 6; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f); if (r <= 0x7FF) goto error; *iindex = ind; @@ -236,14 +211,14 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex) if ((d & 0xf8) == 0xf0) { // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx r = (d & 0x07) << 18; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 12; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 6; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f); if (r <= 0xFFFF) goto error; *iindex = ind; @@ -252,17 +227,17 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex) if ((d & 0xfc) == 0xf8) { // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx r = (d & 0x03) << 24; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 18; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 12; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 6; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f); if (r <= 0x1FFFFF) goto error; *iindex = ind; @@ -271,20 +246,20 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex) if ((d & 0xfe) == 0xfc) { // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx r = (d & 0x01) << 30; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 24; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 18; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 12; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f) << 6; - if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) || - !IS_CONTINUATION_BYTE(d)) goto error; + if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || + !EINA_IS_CONTINUATION_BYTE(d)) goto error; r |= (d & 0x3f); if (r <= 0x3FFFFFF) goto error; *iindex = ind; diff --git a/src/lib/eina/eina_unicode.h b/src/lib/eina/eina_unicode.h index af5cbca4d8..8b614d9cbf 100644 --- a/src/lib/eina/eina_unicode.h +++ b/src/lib/eina/eina_unicode.h @@ -124,7 +124,7 @@ EAPI Eina_Unicode *eina_unicode_escape(const Eina_Unicode *str) EINA_ARG_NONNULL * @return the codepoint found, 0 if @p buf or @p iindex are NULL * @since 1.1.0 */ -EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2); +static inline Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2); /** * Reads UTF8 bytes from @p buf, starting at @p iindex and returns @@ -172,6 +172,8 @@ EAPI Eina_Unicode *eina_unicode_utf8_to_unicode(const char *utf, int *_len) EINA */ EAPI char * eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC; +#include "eina_inline_unicode.x" + /** * @} */