forked from enlightenment/efl
501 lines
14 KiB
C
501 lines
14 KiB
C
#include "config.h"
|
|
#include "Efl.h"
|
|
|
|
#define MY_CLASS EFL_TEXT_MARKUP_UTIL_CLASS
|
|
|
|
#define ERR(...) EINA_LOG_DOM_ERR(EINA_LOG_DOMAIN_DEFAULT, __VA_ARGS__)
|
|
|
|
#define _REPLACEMENT_CHAR 0xFFFC
|
|
#define _PARAGRAPH_SEPARATOR 0x2029
|
|
#define _NEWLINE '\n'
|
|
#define _TAB '\t'
|
|
|
|
#define _REPLACEMENT_CHAR_UTF8 "\xEF\xBF\xBC"
|
|
#define _PARAGRAPH_SEPARATOR_UTF8 "\xE2\x80\xA9"
|
|
#define _NEWLINE_UTF8 "\n"
|
|
#define _TAB_UTF8 "\t"
|
|
#define EVAS_TEXTBLOCK_IS_VISIBLE_FORMAT_CHAR(ch) \
|
|
(((ch) == _REPLACEMENT_CHAR) || \
|
|
((ch) == _NEWLINE) || \
|
|
((ch) == _TAB) || \
|
|
((ch) == _PARAGRAPH_SEPARATOR))
|
|
|
|
#define _IS_TAB(item) \
|
|
(!strcmp(item, "tab") || !strcmp(item, "\t") || !strcmp(item, "\\t"))
|
|
|
|
#define _IS_LINE_SEPARATOR(item) \
|
|
(!strcmp(item, "br") || !strcmp(item, "\n") || !strcmp(item, "\\n"))
|
|
|
|
#define _IS_PARAGRAPH_SEPARATOR_SIMPLE(item) \
|
|
(!strcmp(item, "ps"))
|
|
|
|
#define _IS_PARAGRAPH_SEPARATOR(o, item) \
|
|
(_IS_PARAGRAPH_SEPARATOR_SIMPLE(item) || \
|
|
(o->legacy_newline && _IS_LINE_SEPARATOR(item))) /* Paragraph separator */
|
|
|
|
static void
|
|
_markup_get_text_utf8_append(Eina_Strbuf *sbuf, const char *text)
|
|
{
|
|
int ch, pos = 0, pos2 = 0;
|
|
|
|
for (;;)
|
|
{
|
|
pos = pos2;
|
|
ch = eina_unicode_utf8_next_get(text, &pos2);
|
|
if ((ch <= 0) || (pos2 <= 0)) break;
|
|
|
|
if (ch == _NEWLINE)
|
|
eina_strbuf_append(sbuf, "<br/>");
|
|
else if (ch == _TAB)
|
|
eina_strbuf_append(sbuf, "<tab/>");
|
|
else if (ch == '<')
|
|
eina_strbuf_append(sbuf, "<");
|
|
else if (ch == '>')
|
|
eina_strbuf_append(sbuf, ">");
|
|
else if (ch == '&')
|
|
eina_strbuf_append(sbuf, "&");
|
|
else if (ch == '"')
|
|
eina_strbuf_append(sbuf, """);
|
|
else if (ch == _PARAGRAPH_SEPARATOR)
|
|
eina_strbuf_append(sbuf, "<ps/>");
|
|
else if (ch == _REPLACEMENT_CHAR)
|
|
eina_strbuf_append(sbuf, "");
|
|
else if (ch != '\r')
|
|
{
|
|
eina_strbuf_append_length(sbuf, text + pos, pos2 - pos);
|
|
}
|
|
}
|
|
}
|
|
|
|
EOLIAN static char*
|
|
_efl_text_markup_util_text_to_markup(Eo *class EINA_UNUSED,
|
|
void *_pd EINA_UNUSED, const char *text)
|
|
{
|
|
Eina_Strbuf *sbuf;
|
|
char *str = NULL;
|
|
|
|
if (!text) return NULL;
|
|
|
|
sbuf = eina_strbuf_new();
|
|
|
|
_markup_get_text_utf8_append(sbuf, text);
|
|
|
|
str = eina_strbuf_string_steal(sbuf);
|
|
eina_strbuf_free(sbuf);
|
|
return str;
|
|
}
|
|
|
|
/* table of html escapes (that i can find) this should be ordered with the
|
|
* most common first as it's a linear search to match - no hash for this.
|
|
*
|
|
* these are stored as one large string and one additional array that
|
|
* contains the offsets to the tokens for space efficiency.
|
|
*/
|
|
/**
|
|
* @internal
|
|
* @var escape_strings[]
|
|
* This string consists of NULL terminated pairs of strings, the first of
|
|
* every pair is an escape and the second is the value of the escape.
|
|
*/
|
|
static const char escape_strings[] =
|
|
/* most common escaped stuff */
|
|
""\0" "\x22\0"
|
|
"&\0" "\x26\0"
|
|
"'\0" "\x27\0"
|
|
"<\0" "\x3c\0"
|
|
">\0" "\x3e\0"
|
|
/* all the rest */
|
|
" \0" "\xc2\xa0\0"
|
|
"¡\0" "\xc2\xa1\0"
|
|
"¢\0" "\xc2\xa2\0"
|
|
"£\0" "\xc2\xa3\0"
|
|
"¤\0" "\xc2\xa4\0"
|
|
"¥\0" "\xc2\xa5\0"
|
|
"¦\0" "\xc2\xa6\0"
|
|
"§\0" "\xc2\xa7\0"
|
|
"¨\0" "\xc2\xa8\0"
|
|
"©\0" "\xc2\xa9\0"
|
|
"ª\0" "\xc2\xaa\0"
|
|
"«\0" "\xc2\xab\0"
|
|
"¬\0" "\xc2\xac\0"
|
|
"­\0" "\xc2\xad\0"
|
|
"®\0" "\xc2\xae\0"
|
|
"¯\0" "\xc2\xaf\0"
|
|
"°\0" "\xc2\xb0\0"
|
|
"±\0" "\xc2\xb1\0"
|
|
"²\0" "\xc2\xb2\0"
|
|
"³\0" "\xc2\xb3\0"
|
|
"´\0" "\xc2\xb4\0"
|
|
"µ\0" "\xc2\xb5\0"
|
|
"¶\0" "\xc2\xb6\0"
|
|
"·\0" "\xc2\xb7\0"
|
|
"¸\0" "\xc2\xb8\0"
|
|
"¹\0" "\xc2\xb9\0"
|
|
"º\0" "\xc2\xba\0"
|
|
"»\0" "\xc2\xbb\0"
|
|
"¼\0" "\xc2\xbc\0"
|
|
"½\0" "\xc2\xbd\0"
|
|
"¾\0" "\xc2\xbe\0"
|
|
"¿\0" "\xc2\xbf\0"
|
|
"À\0" "\xc3\x80\0"
|
|
"Á\0" "\xc3\x81\0"
|
|
"Â\0" "\xc3\x82\0"
|
|
"Ã\0" "\xc3\x83\0"
|
|
"Ä\0" "\xc3\x84\0"
|
|
"Å\0" "\xc3\x85\0"
|
|
"&Aelig;\0" "\xc3\x86\0"
|
|
"Ç\0" "\xc3\x87\0"
|
|
"È\0" "\xc3\x88\0"
|
|
"É\0" "\xc3\x89\0"
|
|
"Ê\0" "\xc3\x8a\0"
|
|
"Ë\0" "\xc3\x8b\0"
|
|
"Ì\0" "\xc3\x8c\0"
|
|
"Í\0" "\xc3\x8d\0"
|
|
"Î\0" "\xc3\x8e\0"
|
|
"Ï\0" "\xc3\x8f\0"
|
|
"&Eth;\0" "\xc3\x90\0"
|
|
"Ñ\0" "\xc3\x91\0"
|
|
"Ò\0" "\xc3\x92\0"
|
|
"Ó\0" "\xc3\x93\0"
|
|
"Ô\0" "\xc3\x94\0"
|
|
"Õ\0" "\xc3\x95\0"
|
|
"Ö\0" "\xc3\x96\0"
|
|
"×\0" "\xc3\x97\0"
|
|
"Ø\0" "\xc3\x98\0"
|
|
"Ù\0" "\xc3\x99\0"
|
|
"Ú\0" "\xc3\x9a\0"
|
|
"Û\0" "\xc3\x9b\0"
|
|
"Ý\0" "\xc3\x9d\0"
|
|
"&Thorn;\0" "\xc3\x9e\0"
|
|
"ß\0" "\xc3\x9f\0"
|
|
"à\0" "\xc3\xa0\0"
|
|
"á\0" "\xc3\xa1\0"
|
|
"â\0" "\xc3\xa2\0"
|
|
"ã\0" "\xc3\xa3\0"
|
|
"ä\0" "\xc3\xa4\0"
|
|
"å\0" "\xc3\xa5\0"
|
|
"æ\0" "\xc3\xa6\0"
|
|
"ç\0" "\xc3\xa7\0"
|
|
"è\0" "\xc3\xa8\0"
|
|
"é\0" "\xc3\xa9\0"
|
|
"ê\0" "\xc3\xaa\0"
|
|
"ë\0" "\xc3\xab\0"
|
|
"ì\0" "\xc3\xac\0"
|
|
"í\0" "\xc3\xad\0"
|
|
"î\0" "\xc3\xae\0"
|
|
"ï\0" "\xc3\xaf\0"
|
|
"ð\0" "\xc3\xb0\0"
|
|
"ñ\0" "\xc3\xb1\0"
|
|
"ò\0" "\xc3\xb2\0"
|
|
"ó\0" "\xc3\xb3\0"
|
|
"ô\0" "\xc3\xb4\0"
|
|
"õ\0" "\xc3\xb5\0"
|
|
"ö\0" "\xc3\xb6\0"
|
|
"÷\0" "\xc3\xb7\0"
|
|
"ø\0" "\xc3\xb8\0"
|
|
"ù\0" "\xc3\xb9\0"
|
|
"ú\0" "\xc3\xba\0"
|
|
"û\0" "\xc3\xbb\0"
|
|
"ü\0" "\xc3\xbc\0"
|
|
"ý\0" "\xc3\xbd\0"
|
|
"þ\0" "\xc3\xbe\0"
|
|
"ÿ\0" "\xc3\xbf\0"
|
|
"α\0" "\xce\x91\0"
|
|
"β\0" "\xce\x92\0"
|
|
"γ\0" "\xce\x93\0"
|
|
"δ\0" "\xce\x94\0"
|
|
"ε\0" "\xce\x95\0"
|
|
"ζ\0" "\xce\x96\0"
|
|
"η\0" "\xce\x97\0"
|
|
"θ\0" "\xce\x98\0"
|
|
"ι\0" "\xce\x99\0"
|
|
"κ\0" "\xce\x9a\0"
|
|
"λ\0" "\xce\x9b\0"
|
|
"μ\0" "\xce\x9c\0"
|
|
"ν\0" "\xce\x9d\0"
|
|
"ξ\0" "\xce\x9e\0"
|
|
"ο\0" "\xce\x9f\0"
|
|
"π\0" "\xce\xa0\0"
|
|
"ρ\0" "\xce\xa1\0"
|
|
"σ\0" "\xce\xa3\0"
|
|
"τ\0" "\xce\xa4\0"
|
|
"υ\0" "\xce\xa5\0"
|
|
"φ\0" "\xce\xa6\0"
|
|
"χ\0" "\xce\xa7\0"
|
|
"ψ\0" "\xce\xa8\0"
|
|
"ω\0" "\xce\xa9\0"
|
|
"…\0" "\xe2\x80\xa6\0"
|
|
"€\0" "\xe2\x82\xac\0"
|
|
"←\0" "\xe2\x86\x90\0"
|
|
"↑\0" "\xe2\x86\x91\0"
|
|
"→\0" "\xe2\x86\x92\0"
|
|
"↓\0" "\xe2\x86\x93\0"
|
|
"↔\0" "\xe2\x86\x94\0"
|
|
"←\0" "\xe2\x87\x90\0"
|
|
"→\0" "\xe2\x87\x92\0"
|
|
"∀\0" "\xe2\x88\x80\0"
|
|
"∃\0" "\xe2\x88\x83\0"
|
|
"∇\0" "\xe2\x88\x87\0"
|
|
"∏\0" "\xe2\x88\x8f\0"
|
|
"∑\0" "\xe2\x88\x91\0"
|
|
"∧\0" "\xe2\x88\xa7\0"
|
|
"∨\0" "\xe2\x88\xa8\0"
|
|
"∫\0" "\xe2\x88\xab\0"
|
|
"≠\0" "\xe2\x89\xa0\0"
|
|
"≡\0" "\xe2\x89\xa1\0"
|
|
"⊕\0" "\xe2\x8a\x95\0"
|
|
"⊥\0" "\xe2\x8a\xa5\0"
|
|
"†\0" "\xe2\x80\xa0\0"
|
|
"‡\0" "\xe2\x80\xa1\0"
|
|
"•\0" "\xe2\x80\xa2\0"
|
|
"‌\0" "\xe2\x80\x8c\0"
|
|
"‍\0" "\xe2\x80\x8d\0"
|
|
"‎\0" "\xe2\x80\x8e\0"
|
|
"‏\0" "\xe2\x80\x8f\0"
|
|
;
|
|
|
|
static inline void
|
|
_escaped_advance_after_end_of_string(const char **p_buf)
|
|
{
|
|
while (**p_buf != 0) (*p_buf)++;
|
|
(*p_buf)++;
|
|
}
|
|
|
|
static inline int
|
|
_escaped_is_eq_and_advance(const char *s, const char *s_end,
|
|
const char **p_m, const char *m_end)
|
|
{
|
|
Eina_Bool reached_end;
|
|
for (;((s < s_end) && (*p_m < m_end)); s++, (*p_m)++)
|
|
{
|
|
if (*s != **p_m)
|
|
{
|
|
_escaped_advance_after_end_of_string(p_m);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
reached_end = !**p_m;
|
|
if (*p_m < m_end)
|
|
_escaped_advance_after_end_of_string(p_m);
|
|
|
|
return ((s == s_end) && reached_end);
|
|
}
|
|
|
|
static inline const char *
|
|
_escaped_char_get(const char *s, const char *s_end)
|
|
{
|
|
/* Handle numeric escape codes. */
|
|
if (s[1] == '#')
|
|
{
|
|
static char utf8_escape[7]; /* Support up to 6 bytes utf8 */
|
|
char ustr[10];
|
|
Eina_Unicode uchar[2] = { 0, 0 };
|
|
char *utf8_char;
|
|
size_t len = 0;
|
|
int base = 10;
|
|
s += 2; /* Skip "&#" */
|
|
|
|
if ((*s == 'x') && (*s == 'X'))
|
|
{
|
|
s++;
|
|
base = 16;
|
|
}
|
|
|
|
len = s_end - s;
|
|
if (len > sizeof(ustr))
|
|
len = sizeof(ustr);
|
|
|
|
memcpy(ustr, s, len);
|
|
ustr[len - 1] = '\0';
|
|
uchar[0] = strtol(ustr, NULL, base);
|
|
|
|
if (uchar[0] == 0)
|
|
return NULL;
|
|
|
|
utf8_char = eina_unicode_unicode_to_utf8(uchar, NULL);
|
|
// eina_unicode_unicode_to_utf8() always creates a string that
|
|
// is nul terminated - guaranteed
|
|
if (utf8_char)
|
|
{
|
|
strcpy(utf8_escape, utf8_char);
|
|
free(utf8_char);
|
|
}
|
|
|
|
return utf8_escape;
|
|
}
|
|
else
|
|
{
|
|
const char *map_itr, *map_end;
|
|
|
|
map_itr = escape_strings;
|
|
map_end = map_itr + sizeof(escape_strings);
|
|
|
|
while (map_itr < map_end)
|
|
{
|
|
if (_escaped_is_eq_and_advance(s, s_end, &map_itr, map_end))
|
|
return map_itr;
|
|
if (map_itr < map_end)
|
|
_escaped_advance_after_end_of_string(&map_itr);
|
|
}
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static char *
|
|
_text_util_markup_to_text(const char *text)
|
|
{
|
|
Eina_Strbuf *sbuf;
|
|
char *s, *p, *ret;
|
|
char *tag_start, *tag_end, *esc_start, *esc_end;
|
|
|
|
if (!text) return NULL;
|
|
|
|
tag_start = tag_end = esc_start = esc_end = NULL;
|
|
sbuf = eina_strbuf_new();
|
|
p = (char *)text;
|
|
s = p;
|
|
/* This loop goes through all of the mark up text until it finds format
|
|
* tags, escape sequences or the terminating NULL. When it finds either
|
|
* of those, it appends the text found up until that point to the textblock
|
|
* proccesses whatever found. It repeats itself until the termainating
|
|
* NULL is reached. */
|
|
for (;;)
|
|
{
|
|
/* If we got to the end of string or just finished/started tag
|
|
* or escape sequence handling. */
|
|
if ((*p == 0) ||
|
|
(tag_end) || (esc_end) ||
|
|
(tag_start) || (esc_start))
|
|
{
|
|
if (tag_end)
|
|
{
|
|
/* If we reached to a tag ending, analyze the tag */
|
|
char *ttag;
|
|
size_t ttag_len;
|
|
|
|
tag_start++; /* Skip the < */
|
|
tag_end--; /* Skip the > */
|
|
if ((tag_end > tag_start) && (*(tag_end - 1) == '/'))
|
|
{
|
|
tag_end --; /* Skip the terminating '/' */
|
|
while (*(tag_end - 1) == ' ')
|
|
tag_end--; /* skip trailing ' ' */
|
|
}
|
|
|
|
ttag_len = tag_end - tag_start;
|
|
|
|
ttag = malloc(ttag_len + 1);
|
|
if (ttag)
|
|
{
|
|
memcpy(ttag, tag_start, ttag_len);
|
|
ttag[ttag_len] = 0;
|
|
|
|
if (_IS_PARAGRAPH_SEPARATOR_SIMPLE(ttag))
|
|
eina_strbuf_append(sbuf, _PARAGRAPH_SEPARATOR_UTF8);
|
|
else if (_IS_LINE_SEPARATOR(ttag))
|
|
eina_strbuf_append(sbuf, _NEWLINE_UTF8);
|
|
else if (_IS_TAB(ttag))
|
|
eina_strbuf_append(sbuf, _TAB_UTF8);
|
|
else if (!strncmp(ttag, "item", 4))
|
|
eina_strbuf_append(sbuf, _REPLACEMENT_CHAR_UTF8);
|
|
|
|
free(ttag);
|
|
}
|
|
tag_start = tag_end = NULL;
|
|
}
|
|
else if (esc_end)
|
|
{
|
|
const char *escape;
|
|
|
|
escape = _escaped_char_get(esc_start, esc_end + 1);
|
|
if (escape) eina_strbuf_append(sbuf, escape);
|
|
esc_start = esc_end = NULL;
|
|
}
|
|
else if (*p == 0)
|
|
{
|
|
if (s)
|
|
{
|
|
eina_strbuf_append_length(sbuf, s, p - s);
|
|
s = NULL;
|
|
}
|
|
else
|
|
{
|
|
ERR("There is a invalid markup tag at positoin '%u'. Please check the text.", (unsigned int) (p - text));
|
|
}
|
|
}
|
|
if (*p == 0)
|
|
break;
|
|
}
|
|
if (*p == '<')
|
|
{
|
|
if (!esc_start)
|
|
{
|
|
/* Append the text prior to this to the textblock and
|
|
* mark the start of the tag */
|
|
tag_start = p;
|
|
tag_end = NULL;
|
|
if (s)
|
|
{
|
|
eina_strbuf_append_length(sbuf, s, p - s);
|
|
s = NULL;
|
|
}
|
|
else
|
|
{
|
|
ERR("There is a invalid markup tag at positoin '%u'. Please check the text.", (unsigned int) (p - text));
|
|
}
|
|
}
|
|
}
|
|
else if (*p == '>')
|
|
{
|
|
if (tag_start)
|
|
{
|
|
tag_end = p + 1;
|
|
s = p + 1;
|
|
}
|
|
}
|
|
else if (*p == '&')
|
|
{
|
|
if (!tag_start)
|
|
{
|
|
/* Append the text prior to this to the textblock and mark
|
|
* the start of the escape sequence */
|
|
esc_start = p;
|
|
esc_end = NULL;
|
|
if (s)
|
|
{
|
|
eina_strbuf_append_length(sbuf, s, p - s);
|
|
s = NULL;
|
|
}
|
|
else
|
|
{
|
|
ERR("There is a invalid markup tag at positoin '%u'. Please check the text.", (unsigned int) (p - text));
|
|
}
|
|
}
|
|
}
|
|
else if (*p == ';')
|
|
{
|
|
if (esc_start)
|
|
{
|
|
esc_end = p;
|
|
s = p + 1;
|
|
}
|
|
}
|
|
p++;
|
|
}
|
|
|
|
ret = eina_strbuf_string_steal(sbuf);
|
|
eina_strbuf_free(sbuf);
|
|
return ret;
|
|
}
|
|
|
|
static EOLIAN char*
|
|
_efl_text_markup_util_markup_to_text(Eo *class EINA_UNUSED,
|
|
void *_pd EINA_UNUSED, const char *text)
|
|
{
|
|
return _text_util_markup_to_text(text);
|
|
}
|
|
|
|
#include "interfaces/efl_text_markup_util.eo.c"
|