2010-07-27 01:22:20 -07:00
|
|
|
/* EINA - EFL data type library
|
2010-08-02 02:43:57 -07:00
|
|
|
* Copyright (C) 2010 Tom Hacohen,
|
|
|
|
* Brett Nash
|
2010-07-27 01:22:20 -07:00
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library;
|
|
|
|
* if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2012-01-20 04:52:15 -08:00
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
# include "config.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "eina_config.h"
|
|
|
|
#include "eina_private.h"
|
2012-01-20 05:00:01 -08:00
|
|
|
#include <string.h>
|
2012-01-20 04:52:15 -08:00
|
|
|
|
|
|
|
/* undefs EINA_ARG_NONULL() so NULL checks are not compiled out! */
|
|
|
|
#include "eina_safety_checks.h"
|
2010-07-27 01:22:20 -07:00
|
|
|
#include "eina_unicode.h"
|
|
|
|
|
|
|
|
/* FIXME: check if sizeof(wchar_t) == sizeof(Eina_Unicode) if so,
|
|
|
|
* probably better to use the standard functions */
|
|
|
|
|
2010-08-12 07:16:32 -07:00
|
|
|
/* Maybe I'm too tired, but this is the only thing that actually worked. */
|
|
|
|
const Eina_Unicode _EINA_UNICODE_EMPTY_STRING[1] = {0};
|
|
|
|
EAPI const Eina_Unicode *EINA_UNICODE_EMPTY_STRING = _EINA_UNICODE_EMPTY_STRING;
|
2010-07-27 01:22:20 -07:00
|
|
|
EAPI int
|
|
|
|
eina_unicode_strcmp(const Eina_Unicode *a, const Eina_Unicode *b)
|
|
|
|
{
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(a, -1);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(b, -1);
|
|
|
|
|
2010-07-27 19:37:05 -07:00
|
|
|
for (; *a && *a == *b; a++, b++)
|
2010-07-27 01:22:20 -07:00
|
|
|
;
|
|
|
|
if (*a == *b)
|
|
|
|
return 0;
|
|
|
|
else if (*a < *b)
|
|
|
|
return -1;
|
2010-07-27 19:37:05 -07:00
|
|
|
else
|
2010-07-27 01:22:20 -07:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_strcpy(Eina_Unicode *dest, const Eina_Unicode *source)
|
|
|
|
{
|
|
|
|
Eina_Unicode *ret = dest;
|
2010-07-27 19:37:05 -07:00
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(dest, NULL);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(source, NULL);
|
|
|
|
|
2010-07-27 01:22:20 -07:00
|
|
|
while (*source)
|
|
|
|
*dest++ = *source++;
|
2010-08-02 02:44:01 -07:00
|
|
|
*dest = 0;
|
2010-07-27 01:22:20 -07:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_strncpy(Eina_Unicode *dest, const Eina_Unicode *source, size_t n)
|
|
|
|
{
|
|
|
|
Eina_Unicode *ret = dest;
|
2010-07-27 19:37:05 -07:00
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(dest, NULL);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(source, NULL);
|
|
|
|
|
2010-08-02 02:44:11 -07:00
|
|
|
for ( ; n && *source ; n--)
|
2010-07-27 01:22:20 -07:00
|
|
|
*dest++ = *source++;
|
2010-07-27 19:37:05 -07:00
|
|
|
for (; n; n--)
|
2010-07-27 01:22:20 -07:00
|
|
|
*dest++ = 0;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI size_t
|
|
|
|
eina_unicode_strlen(const Eina_Unicode *ustr)
|
|
|
|
{
|
|
|
|
const Eina_Unicode *end;
|
2012-01-20 05:00:01 -08:00
|
|
|
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(ustr, 0);
|
|
|
|
|
2010-07-27 19:37:05 -07:00
|
|
|
for (end = ustr; *end; end++)
|
2010-07-27 01:22:20 -07:00
|
|
|
;
|
|
|
|
return end - ustr;
|
|
|
|
}
|
|
|
|
|
2010-07-29 20:39:12 -07:00
|
|
|
EAPI size_t
|
|
|
|
eina_unicode_strnlen(const Eina_Unicode *ustr, int n)
|
|
|
|
{
|
|
|
|
const Eina_Unicode *end;
|
2010-07-30 20:54:09 -07:00
|
|
|
const Eina_Unicode *last = ustr + n; /* technically not portable ;-) */
|
2012-01-20 05:00:01 -08:00
|
|
|
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(ustr, 0);
|
|
|
|
|
2010-08-02 02:43:57 -07:00
|
|
|
for (end = ustr; end < last && *end; end++)
|
2010-07-29 20:39:12 -07:00
|
|
|
;
|
|
|
|
return end - ustr;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2011-02-08 05:43:03 -08:00
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_strndup(const Eina_Unicode *text, size_t n)
|
|
|
|
{
|
|
|
|
Eina_Unicode *ustr;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(text, NULL);
|
|
|
|
|
|
|
|
ustr = malloc((n + 1) * sizeof(Eina_Unicode));
|
2011-02-08 05:43:03 -08:00
|
|
|
memcpy(ustr, text, n * sizeof(Eina_Unicode));
|
|
|
|
ustr[n] = 0;
|
|
|
|
return ustr;
|
|
|
|
}
|
|
|
|
|
2010-07-27 01:22:20 -07:00
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_strdup(const Eina_Unicode *text)
|
|
|
|
{
|
2010-12-10 16:40:05 -08:00
|
|
|
size_t len;
|
2010-07-27 01:22:20 -07:00
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(text, NULL);
|
|
|
|
|
2010-07-27 01:22:20 -07:00
|
|
|
len = eina_unicode_strlen(text);
|
2011-02-08 05:43:03 -08:00
|
|
|
return eina_unicode_strndup(text, len);
|
2010-07-27 01:22:20 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_strstr(const Eina_Unicode *haystack, const Eina_Unicode *needle)
|
|
|
|
{
|
|
|
|
const Eina_Unicode *i, *j;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(haystack, NULL);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(needle, NULL);
|
|
|
|
|
2010-07-27 19:37:05 -07:00
|
|
|
for (i = haystack; *i; i++)
|
2010-07-27 01:22:20 -07:00
|
|
|
{
|
|
|
|
haystack = i; /* set this location as the base position */
|
2010-07-27 19:37:05 -07:00
|
|
|
for (j = needle; *j && *i && *j == *i; j++, i++)
|
2010-07-27 01:22:20 -07:00
|
|
|
;
|
|
|
|
|
|
|
|
if (!*j) /*if we got to the end of j this means we got a full match */
|
2010-07-27 20:03:25 -07:00
|
|
|
{
|
|
|
|
return (Eina_Unicode *)haystack; /* return the new base position */
|
|
|
|
}
|
2010-07-27 01:22:20 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_escape(const Eina_Unicode *str)
|
|
|
|
{
|
|
|
|
Eina_Unicode *s2, *d;
|
|
|
|
const Eina_Unicode *s;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(str, NULL);
|
|
|
|
|
2010-07-27 01:22:20 -07:00
|
|
|
s2 = malloc((eina_unicode_strlen(str) * 2) + 1);
|
2010-07-27 19:37:05 -07:00
|
|
|
if (!s2)
|
|
|
|
return NULL;
|
|
|
|
|
2010-07-27 01:22:20 -07:00
|
|
|
for (s = str, d = s2; *s != 0; s++, d++)
|
|
|
|
{
|
2010-07-27 19:37:05 -07:00
|
|
|
if ((*s == ' ') || (*s == '\\') || (*s == '\''))
|
|
|
|
{
|
|
|
|
*d = '\\';
|
|
|
|
d++;
|
|
|
|
}
|
|
|
|
|
|
|
|
*d = *s;
|
2010-07-27 01:22:20 -07:00
|
|
|
}
|
|
|
|
*d = 0;
|
|
|
|
return s2;
|
|
|
|
}
|
|
|
|
|
2011-02-16 07:43:25 -08:00
|
|
|
/* UTF-8 Handling */
|
|
|
|
|
|
|
|
#define EINA_UNICODE_UTF8_BYTES_PER_CHAR 6
|
|
|
|
/* The replacement range that will be used for bad utf8 chars. */
|
|
|
|
#define ERROR_REPLACEMENT_BASE 0xDC80
|
|
|
|
#define ERROR_REPLACEMENT_END 0xDCFF
|
|
|
|
#define IS_INVALID_BYTE(x) ((x == 192) || (x == 193) || (x >= 245))
|
|
|
|
#define IS_CONTINUATION_BYTE(x) ((x & 0xC0) == 0x80)
|
|
|
|
|
|
|
|
EAPI Eina_Unicode
|
|
|
|
eina_unicode_utf8_get_next(const char *buf, int *iindex)
|
|
|
|
{
|
2012-10-18 18:51:07 -07:00
|
|
|
int ind;
|
2011-02-16 07:43:25 -08:00
|
|
|
Eina_Unicode r;
|
|
|
|
unsigned char d;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
|
|
|
|
|
2012-10-18 18:51:07 -07:00
|
|
|
ind = *iindex;
|
|
|
|
|
2011-02-16 07:43:25 -08:00
|
|
|
/* if this char is the null terminator, exit */
|
|
|
|
if ((d = buf[ind++]) == 0) return 0;
|
|
|
|
|
|
|
|
if ((d & 0x80) == 0)
|
|
|
|
{ // 1 byte (7bit) - 0xxxxxxx
|
|
|
|
*iindex = ind;
|
|
|
|
return d;
|
|
|
|
}
|
|
|
|
if ((d & 0xe0) == 0xc0)
|
|
|
|
{ // 2 byte (11bit) - 110xxxxx 10xxxxxx
|
|
|
|
r = (d & 0x1f) << 6;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f);
|
2011-02-16 23:53:49 -08:00
|
|
|
if (r <= 0x7F) goto error;
|
2011-02-16 07:43:25 -08:00
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
if ((d & 0xf0) == 0xe0)
|
|
|
|
{ // 3 byte (16bit) - 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
r = (d & 0x0f) << 12;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 6;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f);
|
2011-02-16 23:53:49 -08:00
|
|
|
if (r <= 0x7FF) goto error;
|
2011-02-16 07:43:25 -08:00
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
if ((d & 0xf8) == 0xf0)
|
|
|
|
{ // 4 byte (21bit) - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
r = (d & 0x07) << 18;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 12;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 6;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f);
|
2011-02-16 23:53:49 -08:00
|
|
|
if (r <= 0xFFFF) goto error;
|
2011-02-16 07:43:25 -08:00
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
if ((d & 0xfc) == 0xf8)
|
|
|
|
{ // 5 byte (26bit) - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
r = (d & 0x03) << 24;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 18;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 12;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 6;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f);
|
2011-02-16 23:53:49 -08:00
|
|
|
if (r <= 0x1FFFFF) goto error;
|
2011-02-16 07:43:25 -08:00
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
if ((d & 0xfe) == 0xfc)
|
|
|
|
{ // 6 byte (31bit) - 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
r = (d & 0x01) << 30;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 24;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 18;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 12;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f) << 6;
|
|
|
|
if (((d = buf[ind++]) == 0) || IS_INVALID_BYTE(d) ||
|
|
|
|
!IS_CONTINUATION_BYTE(d)) goto error;
|
|
|
|
r |= (d & 0x3f);
|
2011-02-16 23:53:49 -08:00
|
|
|
if (r <= 0x3FFFFFF) goto error;
|
2011-02-16 07:43:25 -08:00
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Gets here where there was an error and we want to replace the char
|
|
|
|
* we just use the invalid unicode codepoints 8 lower bits represent
|
|
|
|
* the original char */
|
|
|
|
error:
|
|
|
|
d = buf[*iindex];
|
|
|
|
(*iindex)++;
|
|
|
|
return ERROR_REPLACEMENT_BASE | d;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode
|
|
|
|
eina_unicode_utf8_get_prev(const char *buf, int *iindex)
|
|
|
|
{
|
2012-01-20 05:00:01 -08:00
|
|
|
int r, ind;
|
|
|
|
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(iindex, 0);
|
|
|
|
|
|
|
|
ind = *iindex;
|
2011-02-16 07:43:25 -08:00
|
|
|
/* First obtain the codepoint at iindex */
|
|
|
|
r = eina_unicode_utf8_get_next(buf, &ind);
|
|
|
|
|
|
|
|
/* although when ind == 0 there's no previous char, we still want to get
|
|
|
|
* the current char */
|
|
|
|
if (*iindex <= 0)
|
|
|
|
return r;
|
|
|
|
|
|
|
|
/* Next advance iindex to previous codepoint */
|
|
|
|
ind = *iindex;
|
|
|
|
ind--;
|
|
|
|
while ((ind > 0) && ((buf[ind] & 0xc0) == 0x80))
|
|
|
|
ind--;
|
|
|
|
|
|
|
|
*iindex = ind;
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI int
|
|
|
|
eina_unicode_utf8_get_len(const char *buf)
|
|
|
|
{
|
|
|
|
/* returns the number of utf8 characters (not bytes) in the string */
|
|
|
|
int i = 0, len = 0;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(buf, 0);
|
|
|
|
|
2011-02-16 07:43:25 -08:00
|
|
|
while (eina_unicode_utf8_get_next(buf, &i))
|
|
|
|
len++;
|
|
|
|
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI Eina_Unicode *
|
|
|
|
eina_unicode_utf8_to_unicode(const char *utf, int *_len)
|
|
|
|
{
|
|
|
|
/* FIXME: Should optimize! */
|
|
|
|
int len, i;
|
|
|
|
int ind;
|
|
|
|
Eina_Unicode *buf, *uind;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(utf, NULL);
|
|
|
|
|
2011-02-16 07:43:25 -08:00
|
|
|
len = eina_unicode_utf8_get_len(utf);
|
|
|
|
if (_len)
|
|
|
|
*_len = len;
|
|
|
|
buf = (Eina_Unicode *) calloc(sizeof(Eina_Unicode), (len + 1));
|
|
|
|
if (!buf) return buf;
|
|
|
|
|
|
|
|
for (i = 0, ind = 0, uind = buf ; i < len ; i++, uind++)
|
|
|
|
{
|
|
|
|
*uind = eina_unicode_utf8_get_next(utf, &ind);
|
|
|
|
}
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
EAPI char *
|
|
|
|
eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
|
|
|
|
{
|
|
|
|
char *buf;
|
|
|
|
const Eina_Unicode *uind;
|
|
|
|
char *ind;
|
|
|
|
int ulen, len;
|
|
|
|
|
2012-01-20 05:00:01 -08:00
|
|
|
EINA_SAFETY_ON_NULL_RETURN_VAL(uni, NULL);
|
|
|
|
|
2011-02-16 07:43:25 -08:00
|
|
|
ulen = eina_unicode_strlen(uni);
|
|
|
|
buf = (char *) calloc(ulen + 1, EINA_UNICODE_UTF8_BYTES_PER_CHAR);
|
|
|
|
|
|
|
|
len = 0;
|
|
|
|
for (uind = uni, ind = buf ; *uind ; uind++)
|
|
|
|
{
|
|
|
|
if (*uind <= 0x7F) /* 1 byte char */
|
|
|
|
{
|
|
|
|
*ind++ = *uind;
|
|
|
|
len += 1;
|
|
|
|
}
|
|
|
|
else if (*uind <= 0x7FF) /* 2 byte char */
|
|
|
|
{
|
|
|
|
*ind++ = 0xC0 | (unsigned char) (*uind >> 6);
|
|
|
|
*ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
|
|
|
|
len += 2;
|
|
|
|
}
|
|
|
|
else if (*uind <= 0xFFFF) /* 3 byte char */
|
|
|
|
{
|
|
|
|
/* If it's a special replacement codepoint */
|
|
|
|
if (*uind >= ERROR_REPLACEMENT_BASE &&
|
|
|
|
*uind <= ERROR_REPLACEMENT_END)
|
|
|
|
{
|
|
|
|
*ind++ = *uind & 0xFF;
|
|
|
|
len += 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
*ind++ = 0xE0 | (unsigned char) (*uind >> 12);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
|
|
|
|
len += 3;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (*uind <= 0x1FFFFF) /* 4 byte char */
|
|
|
|
{
|
|
|
|
*ind++ = 0xF0 | (unsigned char) ((*uind >> 18) & 0x07);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
|
|
|
|
len += 4;
|
|
|
|
}
|
|
|
|
else if (*uind <= 0x3FFFFFF) /* 5 byte char */
|
|
|
|
{
|
|
|
|
*ind++ = 0xF8 | (unsigned char) ((*uind >> 24) & 0x03);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
|
|
|
|
len += 5;
|
|
|
|
}
|
|
|
|
else if (*uind <= 0x7FFFFFFF) /* 6 byte char */
|
|
|
|
{
|
|
|
|
*ind++ = 0xFC | (unsigned char) ((*uind >> 30) & 0x01);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 24) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 18) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 12) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) ((*uind >> 6) & 0x3F);
|
|
|
|
*ind++ = 0x80 | (unsigned char) (*uind & 0x3F);
|
|
|
|
len += 6;
|
|
|
|
}
|
|
|
|
else /* error */
|
|
|
|
{
|
|
|
|
/* Do something */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buf = realloc(buf, len + 1);
|
|
|
|
buf[len] = '\0';
|
|
|
|
if (_len)
|
|
|
|
*_len = len;
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|