forked from enlightenment/efl
Eina unicode: add eina_unicode_unicode_to_utf8_range
Required some special treatment to get words (substring) out of an input Unicode string to a utf8 one. This saves the trouble converting the whole string: you input an offset in the Eina_Unicode array, and provide the required length. That's is, now you can extract words and whatnot in utf8 form. To save code I wrapped the original one to get the whole length.
This commit is contained in:
parent
120305b08e
commit
89ef4b70b8
|
@ -341,21 +341,20 @@ eina_unicode_utf8_to_unicode(const char *utf, int *_len)
|
|||
}
|
||||
|
||||
EAPI char *
|
||||
eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
|
||||
eina_unicode_unicode_to_utf8_range(const Eina_Unicode *uni, int ulen, int *_len)
|
||||
{
|
||||
char *buf, *buf2;
|
||||
const Eina_Unicode *uind;
|
||||
char *ind;
|
||||
int ulen, len;
|
||||
int i, len;
|
||||
|
||||
EINA_SAFETY_ON_NULL_RETURN_VAL(uni, NULL);
|
||||
|
||||
ulen = eina_unicode_strlen(uni);
|
||||
buf = malloc((ulen + 1) * EINA_UNICODE_UTF8_BYTES_PER_CHAR);
|
||||
if (!buf) return NULL;
|
||||
|
||||
len = 0;
|
||||
for (uind = uni, ind = buf ; *uind ; uind++)
|
||||
for (uind = uni, ind = buf, i = 0 ; *uind && (i < ulen) ; uind++, i++)
|
||||
{
|
||||
if (*uind <= 0x7F) /* 1 byte char */
|
||||
{
|
||||
|
@ -424,5 +423,10 @@ eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
|
|||
return buf2;
|
||||
}
|
||||
|
||||
EAPI char *
|
||||
eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len)
|
||||
{
|
||||
int len = eina_unicode_strlen(uni);
|
||||
|
||||
|
||||
return eina_unicode_unicode_to_utf8_range(uni, len, _len);
|
||||
}
|
||||
|
|
|
@ -178,6 +178,17 @@ EAPI int eina_unicode_utf8_get_len(const char *buf) EINA_ARG_NONNULL(1);
|
|||
*/
|
||||
EAPI Eina_Unicode *eina_unicode_utf8_to_unicode(const char *utf, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC;
|
||||
|
||||
/**
|
||||
* Converts an Eina_Unicode string to a newly allocated utf-8 substring at given length.
|
||||
*
|
||||
* @param uni the Eina_Unicode string
|
||||
* @param ulen the length in the unicode string to convert.
|
||||
* @param _len the length byte length of the return utf8 substring.
|
||||
* @return the newly allocated utf-8 substring.
|
||||
* @since 1.17
|
||||
*/
|
||||
EAPI char * eina_unicode_unicode_to_utf8_range(const Eina_Unicode *uni, int ulen, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC;
|
||||
|
||||
/**
|
||||
* Converts an Eina_Unicode string to a newly allocated utf-8 string.
|
||||
*
|
||||
|
|
|
@ -585,6 +585,10 @@ START_TEST(eina_unicode_utf8_conversion)
|
|||
char c_in[] = "\xD7\x90""\xEF\xB7\xB6""\x80""\xF0\x9F\x91\x99"
|
||||
"\xFB\xBF\xBF\xBF\xBF""\xFD\xBF\xBF\xBF\xBF\xBF""abc";
|
||||
char *c_out;
|
||||
|
||||
/* Substring of c_in (offset = 2, length = 3) */
|
||||
char c_sub[] = "\x80""\xF0\x9F\x91\x99""\xFB\xBF\xBF\xBF\xBF";
|
||||
char *c_sub_out;
|
||||
int len;
|
||||
|
||||
eina_init();
|
||||
|
@ -597,6 +601,19 @@ START_TEST(eina_unicode_utf8_conversion)
|
|||
fail_if((len != 24) || strcmp(c_in, c_out));
|
||||
free(c_out);
|
||||
|
||||
/* Range conversion */
|
||||
c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in + 2, 3, &len);
|
||||
ck_assert_int_eq(len, 10);
|
||||
ck_assert_str_eq(c_sub, c_sub_out);
|
||||
|
||||
c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in, 100, &len);
|
||||
ck_assert_int_eq(len, 24);
|
||||
ck_assert_str_eq(c_in, c_sub_out);
|
||||
|
||||
c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in, 0, &len);
|
||||
ck_assert_int_eq(len, 0);
|
||||
ck_assert_str_eq("", c_sub_out);
|
||||
|
||||
eina_shutdown();
|
||||
}
|
||||
END_TEST
|
||||
|
|
Loading…
Reference in New Issue