From 89ef4b70b8b6a04a911da8ef94c2124fab031032 Mon Sep 17 00:00:00 2001 From: Daniel Hirt Date: Sat, 21 Nov 2015 12:39:00 +0200 Subject: [PATCH] Eina unicode: add eina_unicode_unicode_to_utf8_range Required some special treatment to get words (substring) out of an input Unicode string to a utf8 one. This saves the trouble converting the whole string: you input an offset in the Eina_Unicode array, and provide the required length. That's is, now you can extract words and whatnot in utf8 form. To save code I wrapped the original one to get the whole length. --- src/lib/eina/eina_unicode.c | 14 +++++++++----- src/lib/eina/eina_unicode.h | 11 +++++++++++ src/tests/eina/eina_test_ustr.c | 17 +++++++++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/lib/eina/eina_unicode.c b/src/lib/eina/eina_unicode.c index c8fddd0576..adec87a477 100644 --- a/src/lib/eina/eina_unicode.c +++ b/src/lib/eina/eina_unicode.c @@ -341,21 +341,20 @@ eina_unicode_utf8_to_unicode(const char *utf, int *_len) } EAPI char * -eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len) +eina_unicode_unicode_to_utf8_range(const Eina_Unicode *uni, int ulen, int *_len) { char *buf, *buf2; const Eina_Unicode *uind; char *ind; - int ulen, len; + int i, len; EINA_SAFETY_ON_NULL_RETURN_VAL(uni, NULL); - ulen = eina_unicode_strlen(uni); buf = malloc((ulen + 1) * EINA_UNICODE_UTF8_BYTES_PER_CHAR); if (!buf) return NULL; len = 0; - for (uind = uni, ind = buf ; *uind ; uind++) + for (uind = uni, ind = buf, i = 0 ; *uind && (i < ulen) ; uind++, i++) { if (*uind <= 0x7F) /* 1 byte char */ { @@ -424,5 +423,10 @@ eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len) return buf2; } +EAPI char * +eina_unicode_unicode_to_utf8(const Eina_Unicode *uni, int *_len) +{ + int len = eina_unicode_strlen(uni); - + return eina_unicode_unicode_to_utf8_range(uni, len, _len); +} diff --git a/src/lib/eina/eina_unicode.h b/src/lib/eina/eina_unicode.h index e58b8532d2..7bf4f83484 100644 --- a/src/lib/eina/eina_unicode.h +++ b/src/lib/eina/eina_unicode.h @@ -178,6 +178,17 @@ EAPI int eina_unicode_utf8_get_len(const char *buf) EINA_ARG_NONNULL(1); */ EAPI Eina_Unicode *eina_unicode_utf8_to_unicode(const char *utf, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC; +/** + * Converts an Eina_Unicode string to a newly allocated utf-8 substring at given length. + * + * @param uni the Eina_Unicode string + * @param ulen the length in the unicode string to convert. + * @param _len the length byte length of the return utf8 substring. + * @return the newly allocated utf-8 substring. + * @since 1.17 + */ +EAPI char * eina_unicode_unicode_to_utf8_range(const Eina_Unicode *uni, int ulen, int *_len) EINA_WARN_UNUSED_RESULT EINA_ARG_NONNULL(1) EINA_MALLOC; + /** * Converts an Eina_Unicode string to a newly allocated utf-8 string. * diff --git a/src/tests/eina/eina_test_ustr.c b/src/tests/eina/eina_test_ustr.c index f9c28d3f80..fb36fd9013 100644 --- a/src/tests/eina/eina_test_ustr.c +++ b/src/tests/eina/eina_test_ustr.c @@ -585,6 +585,10 @@ START_TEST(eina_unicode_utf8_conversion) char c_in[] = "\xD7\x90""\xEF\xB7\xB6""\x80""\xF0\x9F\x91\x99" "\xFB\xBF\xBF\xBF\xBF""\xFD\xBF\xBF\xBF\xBF\xBF""abc"; char *c_out; + + /* Substring of c_in (offset = 2, length = 3) */ + char c_sub[] = "\x80""\xF0\x9F\x91\x99""\xFB\xBF\xBF\xBF\xBF"; + char *c_sub_out; int len; eina_init(); @@ -597,6 +601,19 @@ START_TEST(eina_unicode_utf8_conversion) fail_if((len != 24) || strcmp(c_in, c_out)); free(c_out); + /* Range conversion */ + c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in + 2, 3, &len); + ck_assert_int_eq(len, 10); + ck_assert_str_eq(c_sub, c_sub_out); + + c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in, 100, &len); + ck_assert_int_eq(len, 24); + ck_assert_str_eq(c_in, c_sub_out); + + c_sub_out = eina_unicode_unicode_to_utf8_range(uni_in, 0, &len); + ck_assert_int_eq(len, 0); + ck_assert_str_eq("", c_sub_out); + eina_shutdown(); } END_TEST