utf8: only encode codepoints ≤ U+10FFFF. Closes T8022

This commit is contained in:
Boris Faure 2019-06-23 16:43:18 +02:00
parent 18bff78895
commit cdaae389aa
1 changed files with 0 additions and 21 deletions

View File

@ -34,27 +34,6 @@ codepoint_to_utf8(Eina_Unicode g, char *txt)
txt[4] = 0;
return 4;
}
else if (g < (1 << (2 + 6 + 6 + 6 + 6)))
{ // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
txt[0] = 0xf8 | ((g >> 24) & 0x03);
txt[1] = 0x80 | ((g >> 18) & 0x3f);
txt[2] = 0x80 | ((g >> 12) & 0x3f);
txt[3] = 0x80 | ((g >> 6 ) & 0x3f);
txt[4] = 0x80 | ((g ) & 0x3f);
txt[5] = 0;
return 5;
}
else if ((unsigned int)g < (unsigned int)(1 << (1 + 6 + 6 + 6 + 6 + 6)))
{ // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
txt[0] = 0xfc | ((g >> 30) & 0x01);
txt[1] = 0x80 | ((g >> 24) & 0x3f);
txt[2] = 0x80 | ((g >> 18) & 0x3f);
txt[3] = 0x80 | ((g >> 12) & 0x3f);
txt[4] = 0x80 | ((g >> 6 ) & 0x3f);
txt[5] = 0x80 | ((g ) & 0x3f);
txt[6] = 0;
return 6;
}
else
{ // error - cant encode this in utf8
txt[0] = 0;