diff --git a/src/lib/eina/eina_abi.c b/src/lib/eina/eina_abi.c index 0372f08935..b6c109e5c3 100644 --- a/src/lib/eina/eina_abi.c +++ b/src/lib/eina/eina_abi.c @@ -22,6 +22,7 @@ #include "eina_config.h" #include "eina_private.h" +#include "eina_unicode.h" #include "eina_safety_checks.h" #if EINA_SIZEOF_WCHAR_T >= 4 @@ -32,55 +33,12 @@ typedef wchar_t Eina_Unicode; typedef uint32_t Eina_Unicode; #endif -EAPI Eina_Unicode -_eina_unicode_utf8_get_next(int ind, - unsigned char d, - const char *buf, - int *iindex); - #define ERROR_REPLACEMENT_BASE 0xDC80 #define EINA_IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245)) #define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80) EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) { - int ind; - Eina_Unicode r; - unsigned char d; - - EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0); - EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0); - - ind = *iindex; - - /* if this char is the null terminator, exit */ - if ((d = buf[ind++]) == 0) return 0; - - if ((d & 0x80) == 0) - { // 1 byte (7bit) - 0xxxxxxx - *iindex = ind; - return d; - } - - if ((d & 0xe0) == 0xc0) - { // 2 byte (11bit) - 110xxxxx 10xxxxxx - r = (d & 0x1f) << 6; - if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) || - !EINA_IS_CONTINUATION_BYTE(d)) goto error; - r |= (d & 0x3f); - if (r <= 0x7F) goto error; - *iindex = ind; - return r; - } - - return _eina_unicode_utf8_get_next(ind, d, buf, iindex); - -/* Gets here where there was an error and we want to replace the char - * we just use the invalid unicode codepoints 8 lower bits represent - * the original char */ -error: - d = buf[*iindex]; - (*iindex)++; - return ERROR_REPLACEMENT_BASE | d; + return eina_unicode_utf8_next_get(buf, iindex); } diff --git a/src/lib/eina/eina_inline_unicode.x b/src/lib/eina/eina_inline_unicode.x index 8fcb4f7e86..50f70d275c 100644 --- a/src/lib/eina/eina_inline_unicode.x +++ b/src/lib/eina/eina_inline_unicode.x @@ -21,7 +21,7 @@ #include "eina_safety_checks.h" -EAPI Eina_Unicode _eina_unicode_utf8_get_next(int ind, +EAPI Eina_Unicode _eina_unicode_utf8_next_get(int ind, unsigned char d, const char *buf, int *iindex); @@ -31,7 +31,7 @@ EAPI Eina_Unicode _eina_unicode_utf8_get_next(int ind, #define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80) static inline Eina_Unicode -eina_unicode_utf8_get_next(const char *buf, int *iindex) +eina_unicode_utf8_next_get(const char *buf, int *iindex) { int ind; Eina_Unicode r; @@ -62,7 +62,7 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex) return r; } - return _eina_unicode_utf8_get_next(ind, d, buf, iindex); + return _eina_unicode_utf8_next_get(ind, d, buf, iindex); /* Gets here where there was an error and we want to replace the char * we just use the invalid unicode codepoints 8 lower bits represent diff --git a/src/lib/eina/eina_unicode.c b/src/lib/eina/eina_unicode.c index 6d9a74051d..f59b3ea027 100644 --- a/src/lib/eina/eina_unicode.c +++ b/src/lib/eina/eina_unicode.c @@ -188,7 +188,7 @@ eina_unicode_escape(const Eina_Unicode *str) #define ERROR_REPLACEMENT_END 0xDCFF EAPI Eina_Unicode -_eina_unicode_utf8_get_next(int ind, +_eina_unicode_utf8_next_get(int ind, unsigned char d, const char *buf, int *iindex) diff --git a/src/lib/eina/eina_unicode.h b/src/lib/eina/eina_unicode.h index 8b614d9cbf..898f60e700 100644 --- a/src/lib/eina/eina_unicode.h +++ b/src/lib/eina/eina_unicode.h @@ -111,6 +111,21 @@ EAPI Eina_Unicode *eina_unicode_escape(const Eina_Unicode *str) EINA_ARG_NONNULL /* UTF-8 Handling */ +/** + * Reads UTF8 bytes from @p buf, starting at @p iindex and returns + * the decoded code point at @p iindex offset, and advances @p iindex + * to the next code point after this. @p iindex is always advanced, + * unless if the advancement is after the @c NULL. + * On error: return a codepoint between DC80 to DCFF where the low 8 bits + * are the byte's value. + * + * @param buf the string + * @param iindex the index to look at and return by. + * @return the codepoint found, 0 if @p buf or @p iindex are NULL + * @since 1.8.0 + */ +static inline Eina_Unicode eina_unicode_utf8_next_get(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2); + /** * Reads UTF8 bytes from @p buf, starting at @p iindex and returns * the decoded code point at @p iindex offset, and advances @p iindex @@ -124,7 +139,7 @@ EAPI Eina_Unicode *eina_unicode_escape(const Eina_Unicode *str) EINA_ARG_NONNULL * @return the codepoint found, 0 if @p buf or @p iindex are NULL * @since 1.1.0 */ -static inline Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2); +EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2) EINA_DEPRECATED; /** * Reads UTF8 bytes from @p buf, starting at @p iindex and returns