forked from enlightenment/efl
efl: inline eina_unicode_utf8_get_next.
SVN revision: 83035
This commit is contained in:
parent
3e7e37630f
commit
066543089d
|
@ -78,7 +78,8 @@ lib/eina/eina_inline_value.x \
|
|||
lib/eina/eina_inline_lock_barrier.x \
|
||||
lib/eina/eina_tmpstr.h \
|
||||
lib/eina/eina_alloca.h \
|
||||
lib/eina/eina_cow.h
|
||||
lib/eina/eina_cow.h \
|
||||
lib/eina/eina_inline_unicode.x
|
||||
|
||||
# Will be back for developper after 1.2.
|
||||
# lib/eina/eina_model.h
|
||||
|
@ -142,7 +143,8 @@ lib/eina/eina_share_common.h \
|
|||
lib/eina/eina_private.h \
|
||||
lib/eina/eina_strbuf_common.h \
|
||||
lib/eina/eina_tmpstr.c \
|
||||
lib/eina/eina_cow.c
|
||||
lib/eina/eina_cow.c \
|
||||
lib/eina/eina_abi.c
|
||||
|
||||
# Will be back for developper after 1.2
|
||||
# lib/eina/eina_model.c \
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
/* EINA - EFL data type library
|
||||
* Copyright (C) 2013 Cedric Bail
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library;
|
||||
* if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include "eina_config.h"
|
||||
#include "eina_private.h"
|
||||
#include "eina_safety_checks.h"
|
||||
|
||||
#if EINA_SIZEOF_WCHAR_T >= 4
|
||||
# include <wchar.h>
|
||||
typedef wchar_t Eina_Unicode;
|
||||
#else
|
||||
# include <inttypes.h>
|
||||
typedef uint32_t Eina_Unicode;
|
||||
#endif
|
||||
|
||||
EAPI Eina_Unicode
|
||||
_eina_unicode_utf8_get_next(int ind,
|
||||
unsigned char d,
|
||||
const char *buf,
|
||||
int *iindex);
|
||||
|
||||
#define ERROR_REPLACEMENT_BASE 0xDC80
|
||||
#define EINA_IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245))
|
||||
#define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
|
||||
|
||||
EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
||||
{
|
||||
int ind;
|
||||
Eina_Unicode r;
|
||||
unsigned char d;
|
||||
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
|
||||
|
||||
ind = *iindex;
|
||||
|
||||
/* if this char is the null terminator, exit */
|
||||
if ((d = buf[ind++]) == 0) return 0;
|
||||
|
||||
if ((d & 0x80) == 0)
|
||||
{ // 1 byte (7bit) - 0xxxxxxx
|
||||
*iindex = ind;
|
||||
return d;
|
||||
}
|
||||
|
||||
if ((d & 0xe0) == 0xc0)
|
||||
{ // 2 byte (11bit) - 110xxxxx 10xxxxxx
|
||||
r = (d & 0x1f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x7F) goto error;
|
||||
*iindex = ind;
|
||||
return r;
|
||||
}
|
||||
|
||||
return _eina_unicode_utf8_get_next(ind, d, buf, iindex);
|
||||
|
||||
/* Gets here where there was an error and we want to replace the char
|
||||
* we just use the invalid unicode codepoints 8 lower bits represent
|
||||
* the original char */
|
||||
error:
|
||||
d = buf[*iindex];
|
||||
(*iindex)++;
|
||||
return ERROR_REPLACEMENT_BASE | d;
|
||||
}
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/* EINA - EFL data type library
|
||||
* Copyright (C) 2013 Cedric Bail
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library;
|
||||
* if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef EINA_INLINE_UNICODE_
|
||||
# define EINA_INLINE_UNICODE_
|
||||
|
||||
#include "eina_safety_checks.h"
|
||||
|
||||
EAPI Eina_Unicode _eina_unicode_utf8_get_next(int ind,
|
||||
unsigned char d,
|
||||
const char *buf,
|
||||
int *iindex);
|
||||
|
||||
#define ERROR_REPLACEMENT_BASE 0xDC80
|
||||
#define EINA_IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245))
|
||||
#define EINA_IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
|
||||
|
||||
static inline Eina_Unicode
|
||||
eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
||||
{
|
||||
int ind;
|
||||
Eina_Unicode r;
|
||||
unsigned char d;
|
||||
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
|
||||
|
||||
ind = *iindex;
|
||||
|
||||
/* if this char is the null terminator, exit */
|
||||
if ((d = buf[ind++]) == 0) return 0;
|
||||
|
||||
if ((d & 0x80) == 0)
|
||||
{ // 1 byte (7bit) - 0xxxxxxx
|
||||
*iindex = ind;
|
||||
return d;
|
||||
}
|
||||
|
||||
if ((d & 0xe0) == 0xc0)
|
||||
{ // 2 byte (11bit) - 110xxxxx 10xxxxxx
|
||||
r = (d & 0x1f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x7F) goto error;
|
||||
*iindex = ind;
|
||||
return r;
|
||||
}
|
||||
|
||||
return _eina_unicode_utf8_get_next(ind, d, buf, iindex);
|
||||
|
||||
/* Gets here where there was an error and we want to replace the char
|
||||
* we just use the invalid unicode codepoints 8 lower bits represent
|
||||
* the original char */
|
||||
error:
|
||||
d = buf[*iindex];
|
||||
(*iindex)++;
|
||||
return ERROR_REPLACEMENT_BASE | d;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -185,49 +185,24 @@ eina_unicode_escape(const Eina_Unicode *str)
|
|||
|
||||
#define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6
|
||||
/* The replacement range that will be used for bad utf8 chars. */
|
||||
#define ERROR_REPLACEMENT_BASE 0xDC80
|
||||
#define ERROR_REPLACEMENT_END 0xDCFF
|
||||
#define IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245))
|
||||
#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
|
||||
|
||||
EAPI Eina_Unicode
|
||||
eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
||||
_eina_unicode_utf8_get_next(int ind,
|
||||
unsigned char d,
|
||||
const char *buf,
|
||||
int *iindex)
|
||||
{
|
||||
int ind;
|
||||
Eina_Unicode r;
|
||||
unsigned char d;
|
||||
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
|
||||
|
||||
ind = *iindex;
|
||||
|
||||
/* if this char is the null terminator, exit */
|
||||
if ((d = buf[ind++]) == 0) return 0;
|
||||
|
||||
if ((d & 0x80) == 0)
|
||||
{ // 1 byte (7bit) - 0xxxxxxx
|
||||
*iindex = ind;
|
||||
return d;
|
||||
}
|
||||
if ((d & 0xe0) == 0xc0)
|
||||
{ // 2 byte (11bit) - 110xxxxx 10xxxxxx
|
||||
r = (d & 0x1f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x7F) goto error;
|
||||
*iindex = ind;
|
||||
return r;
|
||||
}
|
||||
if ((d & 0xf0) == 0xe0)
|
||||
{ // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
|
||||
r = (d & 0x0f) << 12;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x7FF) goto error;
|
||||
*iindex = ind;
|
||||
|
@ -236,14 +211,14 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
|||
if ((d & 0xf8) == 0xf0)
|
||||
{ // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
r = (d & 0x07) << 18;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 12;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0xFFFF) goto error;
|
||||
*iindex = ind;
|
||||
|
@ -252,17 +227,17 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
|||
if ((d & 0xfc) == 0xf8)
|
||||
{ // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
r = (d & 0x03) << 24;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 18;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 12;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x1FFFFF) goto error;
|
||||
*iindex = ind;
|
||||
|
@ -271,20 +246,20 @@ eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
|||
if ((d & 0xfe) == 0xfc)
|
||||
{ // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
r = (d & 0x01) << 30;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 24;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 18;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 12;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f) << 6;
|
||||
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
||||
!IS_CONTINUATION_BYTE(d)) goto error;
|
||||
if (((d = buf[ind++]) == 0) || EINA_IS_INVALID_BYTE(d) ||
|
||||
!EINA_IS_CONTINUATION_BYTE(d)) goto error;
|
||||
r |= (d & 0x3f);
|
||||
if (r <= 0x3FFFFFF) goto error;
|
||||
*iindex = ind;
|
||||
|
|
|
@ -124,7 +124,7 @@ EAPI Eina_Unicode *eina_unicode_escape(const Eina_Unicode *str) EINA_ARG_NONNULL
|
|||
* @return the codepoint found, 0 if @p buf or @p iindex are NULL
|
||||
* @since 1.1.0
|
||||
*/
|
||||
EAPI Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2);
|
||||
static inline Eina_Unicode eina_unicode_utf8_get_next(const char *buf, int *iindex) EINA_ARG_NONNULL(1, 2);
|
||||
|
||||
/**
|
||||
* Reads UTF8 bytes from @p buf, starting at @p iindex and returns
|
||||
|
@ -172,6 +172,8 @@ EAPI Eina_Unicode *eina_unicode_utf8_to_unicode(const char *utf, int *_len) EINA
|
|||
*/
|
||||
EAPI char * eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC;
|
||||
|
||||
#include "eina_inline_unicode.x"
|
||||
|
||||
/**
|
||||
* @}
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue