evas_textblock: enhance escape character handling

Summary:
-Lazy initialization for html escapes lists
-Lower memory consumtion for escapes lists
-Simplify code maintenance by sorting lists on runtime, new items donot need to respect sort order(run time will handle it)

Reviewers: woohyun, bowonryu, cedric, tasn

Reviewed By: woohyun

Subscribers: cedric, #reviewers, #committers

Tags: #efl

Differential Revision: https://phab.enlightenment.org/D9370
This commit is contained in:
Ali Alzyod 2020-08-05 11:27:03 +09:00 committed by WooHyun Jung
parent 5d3497e506
commit 86c274ea33
1 changed files with 329 additions and 310 deletions

View File

@ -968,12 +968,6 @@ _line_free(Evas_Object_Textblock_Line *ln)
*/
/**
* @internal
* @var escape_values_e_sorted[]
* This array consists of Escape_Value structure sorted by escape string
* And new added value must be placed sorted position, and reflected on escape_values_v_sorted
*/
typedef struct _Escape_Value Escape_Value;
struct _Escape_Value
@ -986,338 +980,352 @@ struct _Escape_Value
#define ESCAPE_VALUE(e,v) {e,v,strlen(e),strlen(v)}
static const Escape_Value escape_values_e_sorted[] = {
ESCAPE_VALUE("Á", "\xc3\x81"),
ESCAPE_VALUE("Â", "\xc3\x82"),
ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
ESCAPE_VALUE("À", "\xc3\x80"),
ESCAPE_VALUE("Å", "\xc3\x85"),
ESCAPE_VALUE("Ã", "\xc3\x83"),
ESCAPE_VALUE("Ä", "\xc3\x84"),
ESCAPE_VALUE("Ç", "\xc3\x87"),
ESCAPE_VALUE("‡", "\xe2\x80\xa1"),
ESCAPE_VALUE("É", "\xc3\x89"),
ESCAPE_VALUE("Ê", "\xc3\x8a"),
ESCAPE_VALUE("È", "\xc3\x88"),
ESCAPE_VALUE("&Eth;", "\xc3\x90"),
ESCAPE_VALUE("Ë", "\xc3\x8b"),
ESCAPE_VALUE("Í", "\xc3\x8d"),
ESCAPE_VALUE("Î", "\xc3\x8e"),
ESCAPE_VALUE("Ì", "\xc3\x8c"),
ESCAPE_VALUE("Ï", "\xc3\x8f"),
ESCAPE_VALUE("Ñ", "\xc3\x91"),
ESCAPE_VALUE("Ó", "\xc3\x93"),
ESCAPE_VALUE("Ô", "\xc3\x94"),
ESCAPE_VALUE("Ò", "\xc3\x92"),
ESCAPE_VALUE("Ø", "\xc3\x98"),
ESCAPE_VALUE("Õ", "\xc3\x95"),
ESCAPE_VALUE("Ö", "\xc3\x96"),
ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
ESCAPE_VALUE("Ú", "\xc3\x9a"),
ESCAPE_VALUE("Û", "\xc3\x9b"),
ESCAPE_VALUE("Ù", "\xc3\x99"),
ESCAPE_VALUE("Ý", "\xc3\x9d"),
ESCAPE_VALUE("á", "\xc3\xa1"),
ESCAPE_VALUE("â", "\xc3\xa2"),
ESCAPE_VALUE("´", "\xc2\xb4"),
ESCAPE_VALUE("æ", "\xc3\xa6"),
ESCAPE_VALUE("à", "\xc3\xa0"),
ESCAPE_VALUE("α", "\xce\x91"),
ESCAPE_VALUE("∧", "\xe2\x88\xa7"),
ESCAPE_VALUE("å", "\xc3\xa5"),
ESCAPE_VALUE("ã", "\xc3\xa3"),
ESCAPE_VALUE("ä", "\xc3\xa4"),
ESCAPE_VALUE("β", "\xce\x92"),
ESCAPE_VALUE("¦", "\xc2\xa6"),
ESCAPE_VALUE("•", "\xe2\x80\xa2"),
ESCAPE_VALUE("ç", "\xc3\xa7"),
ESCAPE_VALUE("¸", "\xc2\xb8"),
ESCAPE_VALUE("¢", "\xc2\xa2"),
ESCAPE_VALUE("χ", "\xce\xa7"),
ESCAPE_VALUE("©", "\xc2\xa9"),
ESCAPE_VALUE("¤", "\xc2\xa4"),
ESCAPE_VALUE("†", "\xe2\x80\xa0"),
ESCAPE_VALUE("↓", "\xe2\x86\x93"),
ESCAPE_VALUE("°", "\xc2\xb0"),
ESCAPE_VALUE("δ", "\xce\x94"),
ESCAPE_VALUE("÷", "\xc3\xb7"),
ESCAPE_VALUE("é", "\xc3\xa9"),
ESCAPE_VALUE("ê", "\xc3\xaa"),
ESCAPE_VALUE("è", "\xc3\xa8"),
ESCAPE_VALUE("ε", "\xce\x95"),
ESCAPE_VALUE("≡", "\xe2\x89\xa1"),
ESCAPE_VALUE("η", "\xce\x97"),
ESCAPE_VALUE("ð", "\xc3\xb0"),
ESCAPE_VALUE("ë", "\xc3\xab"),
ESCAPE_VALUE("€", "\xe2\x82\xac"),
ESCAPE_VALUE("∃", "\xe2\x88\x83"),
ESCAPE_VALUE("∀", "\xe2\x88\x80"),
ESCAPE_VALUE("½", "\xc2\xbd"),
ESCAPE_VALUE("¼", "\xc2\xbc"),
ESCAPE_VALUE("¾", "\xc2\xbe"),
ESCAPE_VALUE("γ", "\xce\x93"),
ESCAPE_VALUE("↔", "\xe2\x86\x94"),
ESCAPE_VALUE("…", "\xe2\x80\xa6"),
ESCAPE_VALUE("í", "\xc3\xad"),
ESCAPE_VALUE("î", "\xc3\xae"),
ESCAPE_VALUE("¡", "\xc2\xa1"),
ESCAPE_VALUE("ì", "\xc3\xac"),
ESCAPE_VALUE("∫", "\xe2\x88\xab"),
ESCAPE_VALUE("ι", "\xce\x99"),
ESCAPE_VALUE("¿", "\xc2\xbf"),
ESCAPE_VALUE("ï", "\xc3\xaf"),
ESCAPE_VALUE("κ", "\xce\x9a"),
ESCAPE_VALUE("λ", "\xce\x9b"),
ESCAPE_VALUE("«", "\xc2\xab"),
ESCAPE_VALUE("←", "\xe2\x86\x90"),
ESCAPE_VALUE("←", "\xe2\x87\x90"),
ESCAPE_VALUE("‎", "\xe2\x80\x8e"),
ESCAPE_VALUE("¯", "\xc2\xaf"),
ESCAPE_VALUE("µ", "\xc2\xb5"),
ESCAPE_VALUE("·", "\xc2\xb7"),
ESCAPE_VALUE("μ", "\xce\x9c"),
ESCAPE_VALUE("∇", "\xe2\x88\x87"),
ESCAPE_VALUE(" ", "\xc2\xa0"),
ESCAPE_VALUE("≠", "\xe2\x89\xa0"),
ESCAPE_VALUE("¬", "\xc2\xac"),
ESCAPE_VALUE("ñ", "\xc3\xb1"),
ESCAPE_VALUE("ν", "\xce\x9d"),
ESCAPE_VALUE("ó", "\xc3\xb3"),
ESCAPE_VALUE("ô", "\xc3\xb4"),
ESCAPE_VALUE("ò", "\xc3\xb2"),
ESCAPE_VALUE("ω", "\xce\xa9"),
ESCAPE_VALUE("ο", "\xce\x9f"),
ESCAPE_VALUE("⊕", "\xe2\x8a\x95"),
ESCAPE_VALUE("∨", "\xe2\x88\xa8"),
ESCAPE_VALUE("ª", "\xc2\xaa"),
ESCAPE_VALUE("º", "\xc2\xba"),
ESCAPE_VALUE("ø", "\xc3\xb8"),
ESCAPE_VALUE("õ", "\xc3\xb5"),
ESCAPE_VALUE("ö", "\xc3\xb6"),
ESCAPE_VALUE("¶", "\xc2\xb6"),
ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"),
ESCAPE_VALUE("φ", "\xce\xa6"),
ESCAPE_VALUE("π", "\xce\xa0"),
ESCAPE_VALUE("±", "\xc2\xb1"),
ESCAPE_VALUE("£", "\xc2\xa3"),
ESCAPE_VALUE("∏", "\xe2\x88\x8f"),
ESCAPE_VALUE("ψ", "\xce\xa8"),
ESCAPE_VALUE("»", "\xc2\xbb"),
ESCAPE_VALUE("→", "\xe2\x86\x92"),
ESCAPE_VALUE("→", "\xe2\x87\x92"),
ESCAPE_VALUE("®", "\xc2\xae"),
ESCAPE_VALUE("ρ", "\xce\xa1"),
ESCAPE_VALUE("‏", "\xe2\x80\x8f"),
ESCAPE_VALUE("§", "\xc2\xa7"),
ESCAPE_VALUE("­", "\xc2\xad"),
ESCAPE_VALUE("σ", "\xce\xa3"),
ESCAPE_VALUE("∑", "\xe2\x88\x91"),
ESCAPE_VALUE("¹", "\xc2\xb9"),
ESCAPE_VALUE("²", "\xc2\xb2"),
ESCAPE_VALUE("³", "\xc2\xb3"),
ESCAPE_VALUE("ß", "\xc3\x9f"),
ESCAPE_VALUE("τ", "\xce\xa4"),
ESCAPE_VALUE("θ", "\xce\x98"),
ESCAPE_VALUE("þ", "\xc3\xbe"),
ESCAPE_VALUE("×", "\xc3\x97"),
ESCAPE_VALUE("ú", "\xc3\xba"),
ESCAPE_VALUE("↑", "\xe2\x86\x91"),
ESCAPE_VALUE("û", "\xc3\xbb"),
ESCAPE_VALUE("ù", "\xc3\xb9"),
ESCAPE_VALUE("¨", "\xc2\xa8"),
ESCAPE_VALUE("υ", "\xce\xa5"),
ESCAPE_VALUE("ü", "\xc3\xbc"),
ESCAPE_VALUE("ξ", "\xce\x9e"),
ESCAPE_VALUE("ý", "\xc3\xbd"),
ESCAPE_VALUE("¥", "\xc2\xa5"),
ESCAPE_VALUE("ÿ", "\xc3\xbf"),
ESCAPE_VALUE("ζ", "\xce\x96"),
ESCAPE_VALUE("‍", "\xe2\x80\x8d"),
ESCAPE_VALUE("‌", "\xe2\x80\x8c"),
/**
* @internal
* @var html_common_escapes[]
* This array consists of most common html escapes values as _Escape_Value structure
*/
static const Escape_Value html_common_escapes[] = {
ESCAPE_VALUE("&", "\x26"),
ESCAPE_VALUE("'", "\x27"),
ESCAPE_VALUE(">", "\x3e"),
ESCAPE_VALUE("<", "\x3c"),
ESCAPE_VALUE(""", "\x22"),
};
/**
* @internal
* @var escape_values_e_common_sorted[]
* same as escape_values_e_sorted with small subset of common escapes
* This array consists of rest html escapes values as _Escape_Value structure
*/
static const Escape_Value escape_values_e_common_sorted[] = {
ESCAPE_VALUE("&", "\x26"),
ESCAPE_VALUE("'", "\x27"),
ESCAPE_VALUE(">", "\x3e"),
ESCAPE_VALUE("<", "\x3c"),
ESCAPE_VALUE(""", "\x22"),
};
/**
* @internal
* @var escape_values_v_sorted[]
* This array consists of Escape_Value structure sorted by escape value
* And new added value must be placed sorted position, and reflected on escape_values_e_sorted
*/
static const Escape_Value escape_values_v_sorted[] = {
ESCAPE_VALUE(" ", "\xc2\xa0"),
ESCAPE_VALUE("¡", "\xc2\xa1"),
ESCAPE_VALUE("¢", "\xc2\xa2"),
ESCAPE_VALUE("£", "\xc2\xa3"),
ESCAPE_VALUE("¤", "\xc2\xa4"),
ESCAPE_VALUE("¥", "\xc2\xa5"),
ESCAPE_VALUE("¦", "\xc2\xa6"),
ESCAPE_VALUE("§", "\xc2\xa7"),
ESCAPE_VALUE("¨", "\xc2\xa8"),
ESCAPE_VALUE("©", "\xc2\xa9"),
ESCAPE_VALUE("ª", "\xc2\xaa"),
ESCAPE_VALUE("«", "\xc2\xab"),
ESCAPE_VALUE("¬", "\xc2\xac"),
ESCAPE_VALUE("­", "\xc2\xad"),
ESCAPE_VALUE("®", "\xc2\xae"),
ESCAPE_VALUE("¯", "\xc2\xaf"),
ESCAPE_VALUE("°", "\xc2\xb0"),
ESCAPE_VALUE("±", "\xc2\xb1"),
ESCAPE_VALUE("²", "\xc2\xb2"),
ESCAPE_VALUE("³", "\xc2\xb3"),
ESCAPE_VALUE("´", "\xc2\xb4"),
ESCAPE_VALUE("µ", "\xc2\xb5"),
ESCAPE_VALUE("¶", "\xc2\xb6"),
ESCAPE_VALUE("·", "\xc2\xb7"),
ESCAPE_VALUE("¸", "\xc2\xb8"),
ESCAPE_VALUE("¹", "\xc2\xb9"),
ESCAPE_VALUE("º", "\xc2\xba"),
ESCAPE_VALUE("»", "\xc2\xbb"),
ESCAPE_VALUE("¼", "\xc2\xbc"),
ESCAPE_VALUE("½", "\xc2\xbd"),
ESCAPE_VALUE("¾", "\xc2\xbe"),
ESCAPE_VALUE("¿", "\xc2\xbf"),
ESCAPE_VALUE("À", "\xc3\x80"),
static const Escape_Value html_escapes[] = {
ESCAPE_VALUE("Á", "\xc3\x81"),
ESCAPE_VALUE("Â", "\xc3\x82"),
ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
ESCAPE_VALUE("À", "\xc3\x80"),
ESCAPE_VALUE("Å", "\xc3\x85"),
ESCAPE_VALUE("Ã", "\xc3\x83"),
ESCAPE_VALUE("Ä", "\xc3\x84"),
ESCAPE_VALUE("Å", "\xc3\x85"),
ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
ESCAPE_VALUE("Ç", "\xc3\x87"),
ESCAPE_VALUE("È", "\xc3\x88"),
ESCAPE_VALUE("‡", "\xe2\x80\xa1"),
ESCAPE_VALUE("É", "\xc3\x89"),
ESCAPE_VALUE("Ê", "\xc3\x8a"),
ESCAPE_VALUE("È", "\xc3\x88"),
ESCAPE_VALUE("&Eth;", "\xc3\x90"),
ESCAPE_VALUE("Ë", "\xc3\x8b"),
ESCAPE_VALUE("Ì", "\xc3\x8c"),
ESCAPE_VALUE("Í", "\xc3\x8d"),
ESCAPE_VALUE("Î", "\xc3\x8e"),
ESCAPE_VALUE("Ì", "\xc3\x8c"),
ESCAPE_VALUE("Ï", "\xc3\x8f"),
ESCAPE_VALUE("&Eth;", "\xc3\x90"),
ESCAPE_VALUE("Ñ", "\xc3\x91"),
ESCAPE_VALUE("Ò", "\xc3\x92"),
ESCAPE_VALUE("Ó", "\xc3\x93"),
ESCAPE_VALUE("Ô", "\xc3\x94"),
ESCAPE_VALUE("Ò", "\xc3\x92"),
ESCAPE_VALUE("Ø", "\xc3\x98"),
ESCAPE_VALUE("Õ", "\xc3\x95"),
ESCAPE_VALUE("Ö", "\xc3\x96"),
ESCAPE_VALUE("×", "\xc3\x97"),
ESCAPE_VALUE("Ø", "\xc3\x98"),
ESCAPE_VALUE("Ù", "\xc3\x99"),
ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
ESCAPE_VALUE("Ú", "\xc3\x9a"),
ESCAPE_VALUE("Û", "\xc3\x9b"),
ESCAPE_VALUE("Ù", "\xc3\x99"),
ESCAPE_VALUE("Ý", "\xc3\x9d"),
ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
ESCAPE_VALUE("ß", "\xc3\x9f"),
ESCAPE_VALUE("à", "\xc3\xa0"),
ESCAPE_VALUE("á", "\xc3\xa1"),
ESCAPE_VALUE("â", "\xc3\xa2"),
ESCAPE_VALUE("´", "\xc2\xb4"),
ESCAPE_VALUE("æ", "\xc3\xa6"),
ESCAPE_VALUE("à", "\xc3\xa0"),
ESCAPE_VALUE("α", "\xce\x91"),
ESCAPE_VALUE("∧", "\xe2\x88\xa7"),
ESCAPE_VALUE("å", "\xc3\xa5"),
ESCAPE_VALUE("ã", "\xc3\xa3"),
ESCAPE_VALUE("ä", "\xc3\xa4"),
ESCAPE_VALUE("å", "\xc3\xa5"),
ESCAPE_VALUE("æ", "\xc3\xa6"),
ESCAPE_VALUE("β", "\xce\x92"),
ESCAPE_VALUE("¦", "\xc2\xa6"),
ESCAPE_VALUE("•", "\xe2\x80\xa2"),
ESCAPE_VALUE("ç", "\xc3\xa7"),
ESCAPE_VALUE("è", "\xc3\xa8"),
ESCAPE_VALUE("¸", "\xc2\xb8"),
ESCAPE_VALUE("¢", "\xc2\xa2"),
ESCAPE_VALUE("χ", "\xce\xa7"),
ESCAPE_VALUE("©", "\xc2\xa9"),
ESCAPE_VALUE("¤", "\xc2\xa4"),
ESCAPE_VALUE("†", "\xe2\x80\xa0"),
ESCAPE_VALUE("↓", "\xe2\x86\x93"),
ESCAPE_VALUE("°", "\xc2\xb0"),
ESCAPE_VALUE("δ", "\xce\x94"),
ESCAPE_VALUE("÷", "\xc3\xb7"),
ESCAPE_VALUE("é", "\xc3\xa9"),
ESCAPE_VALUE("ê", "\xc3\xaa"),
ESCAPE_VALUE("è", "\xc3\xa8"),
ESCAPE_VALUE("ε", "\xce\x95"),
ESCAPE_VALUE("≡", "\xe2\x89\xa1"),
ESCAPE_VALUE("η", "\xce\x97"),
ESCAPE_VALUE("ð", "\xc3\xb0"),
ESCAPE_VALUE("ë", "\xc3\xab"),
ESCAPE_VALUE("ì", "\xc3\xac"),
ESCAPE_VALUE("€", "\xe2\x82\xac"),
ESCAPE_VALUE("∃", "\xe2\x88\x83"),
ESCAPE_VALUE("∀", "\xe2\x88\x80"),
ESCAPE_VALUE("½", "\xc2\xbd"),
ESCAPE_VALUE("¼", "\xc2\xbc"),
ESCAPE_VALUE("¾", "\xc2\xbe"),
ESCAPE_VALUE("γ", "\xce\x93"),
ESCAPE_VALUE("↔", "\xe2\x86\x94"),
ESCAPE_VALUE("…", "\xe2\x80\xa6"),
ESCAPE_VALUE("í", "\xc3\xad"),
ESCAPE_VALUE("î", "\xc3\xae"),
ESCAPE_VALUE("ï", "\xc3\xaf"),
ESCAPE_VALUE("ð", "\xc3\xb0"),
ESCAPE_VALUE("ñ", "\xc3\xb1"),
ESCAPE_VALUE("ò", "\xc3\xb2"),
ESCAPE_VALUE("ó", "\xc3\xb3"),
ESCAPE_VALUE("ô", "\xc3\xb4"),
ESCAPE_VALUE("õ", "\xc3\xb5"),
ESCAPE_VALUE("ö", "\xc3\xb6"),
ESCAPE_VALUE("÷", "\xc3\xb7"),
ESCAPE_VALUE("ø", "\xc3\xb8"),
ESCAPE_VALUE("ù", "\xc3\xb9"),
ESCAPE_VALUE("ú", "\xc3\xba"),
ESCAPE_VALUE("û", "\xc3\xbb"),
ESCAPE_VALUE("ü", "\xc3\xbc"),
ESCAPE_VALUE("ý", "\xc3\xbd"),
ESCAPE_VALUE("þ", "\xc3\xbe"),
ESCAPE_VALUE("ÿ", "\xc3\xbf"),
ESCAPE_VALUE("α", "\xce\x91"),
ESCAPE_VALUE("β", "\xce\x92"),
ESCAPE_VALUE("γ", "\xce\x93"),
ESCAPE_VALUE("δ", "\xce\x94"),
ESCAPE_VALUE("ε", "\xce\x95"),
ESCAPE_VALUE("ζ", "\xce\x96"),
ESCAPE_VALUE("η", "\xce\x97"),
ESCAPE_VALUE("θ", "\xce\x98"),
ESCAPE_VALUE("¡", "\xc2\xa1"),
ESCAPE_VALUE("ì", "\xc3\xac"),
ESCAPE_VALUE("∫", "\xe2\x88\xab"),
ESCAPE_VALUE("ι", "\xce\x99"),
ESCAPE_VALUE("¿", "\xc2\xbf"),
ESCAPE_VALUE("ï", "\xc3\xaf"),
ESCAPE_VALUE("κ", "\xce\x9a"),
ESCAPE_VALUE("λ", "\xce\x9b"),
ESCAPE_VALUE("μ", "\xce\x9c"),
ESCAPE_VALUE("ν", "\xce\x9d"),
ESCAPE_VALUE("ξ", "\xce\x9e"),
ESCAPE_VALUE("ο", "\xce\x9f"),
ESCAPE_VALUE("π", "\xce\xa0"),
ESCAPE_VALUE("ρ", "\xce\xa1"),
ESCAPE_VALUE("σ", "\xce\xa3"),
ESCAPE_VALUE("τ", "\xce\xa4"),
ESCAPE_VALUE("υ", "\xce\xa5"),
ESCAPE_VALUE("φ", "\xce\xa6"),
ESCAPE_VALUE("χ", "\xce\xa7"),
ESCAPE_VALUE("ψ", "\xce\xa8"),
ESCAPE_VALUE("ω", "\xce\xa9"),
ESCAPE_VALUE("‌", "\xe2\x80\x8c"),
ESCAPE_VALUE("‍", "\xe2\x80\x8d"),
ESCAPE_VALUE("‎", "\xe2\x80\x8e"),
ESCAPE_VALUE("‏", "\xe2\x80\x8f"),
ESCAPE_VALUE("†", "\xe2\x80\xa0"),
ESCAPE_VALUE("‡", "\xe2\x80\xa1"),
ESCAPE_VALUE("•", "\xe2\x80\xa2"),
ESCAPE_VALUE("…", "\xe2\x80\xa6"),
ESCAPE_VALUE("€", "\xe2\x82\xac"),
ESCAPE_VALUE("«", "\xc2\xab"),
ESCAPE_VALUE("←", "\xe2\x86\x90"),
ESCAPE_VALUE("↑", "\xe2\x86\x91"),
ESCAPE_VALUE("→", "\xe2\x86\x92"),
ESCAPE_VALUE("↓", "\xe2\x86\x93"),
ESCAPE_VALUE("↔", "\xe2\x86\x94"),
ESCAPE_VALUE("←", "\xe2\x87\x90"),
ESCAPE_VALUE("→", "\xe2\x87\x92"),
ESCAPE_VALUE("∀", "\xe2\x88\x80"),
ESCAPE_VALUE("∃", "\xe2\x88\x83"),
ESCAPE_VALUE("‎", "\xe2\x80\x8e"),
ESCAPE_VALUE("¯", "\xc2\xaf"),
ESCAPE_VALUE("µ", "\xc2\xb5"),
ESCAPE_VALUE("·", "\xc2\xb7"),
ESCAPE_VALUE("μ", "\xce\x9c"),
ESCAPE_VALUE("∇", "\xe2\x88\x87"),
ESCAPE_VALUE("∏", "\xe2\x88\x8f"),
ESCAPE_VALUE("∑", "\xe2\x88\x91"),
ESCAPE_VALUE("∧", "\xe2\x88\xa7"),
ESCAPE_VALUE("∨", "\xe2\x88\xa8"),
ESCAPE_VALUE("∫", "\xe2\x88\xab"),
ESCAPE_VALUE(" ", "\xc2\xa0"),
ESCAPE_VALUE("≠", "\xe2\x89\xa0"),
ESCAPE_VALUE("≡", "\xe2\x89\xa1"),
ESCAPE_VALUE("¬", "\xc2\xac"),
ESCAPE_VALUE("ñ", "\xc3\xb1"),
ESCAPE_VALUE("ν", "\xce\x9d"),
ESCAPE_VALUE("ó", "\xc3\xb3"),
ESCAPE_VALUE("ô", "\xc3\xb4"),
ESCAPE_VALUE("ò", "\xc3\xb2"),
ESCAPE_VALUE("ω", "\xce\xa9"),
ESCAPE_VALUE("ο", "\xce\x9f"),
ESCAPE_VALUE("⊕", "\xe2\x8a\x95"),
ESCAPE_VALUE("∨", "\xe2\x88\xa8"),
ESCAPE_VALUE("ª", "\xc2\xaa"),
ESCAPE_VALUE("º", "\xc2\xba"),
ESCAPE_VALUE("ø", "\xc3\xb8"),
ESCAPE_VALUE("õ", "\xc3\xb5"),
ESCAPE_VALUE("ö", "\xc3\xb6"),
ESCAPE_VALUE("¶", "\xc2\xb6"),
ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"),
ESCAPE_VALUE("φ", "\xce\xa6"),
ESCAPE_VALUE("π", "\xce\xa0"),
ESCAPE_VALUE("±", "\xc2\xb1"),
ESCAPE_VALUE("£", "\xc2\xa3"),
ESCAPE_VALUE("∏", "\xe2\x88\x8f"),
ESCAPE_VALUE("ψ", "\xce\xa8"),
ESCAPE_VALUE("»", "\xc2\xbb"),
ESCAPE_VALUE("→", "\xe2\x86\x92"),
ESCAPE_VALUE("⇒", "\xe2\x87\x92"),
ESCAPE_VALUE("®", "\xc2\xae"),
ESCAPE_VALUE("ρ", "\xce\xa1"),
ESCAPE_VALUE("‏", "\xe2\x80\x8f"),
ESCAPE_VALUE("§", "\xc2\xa7"),
ESCAPE_VALUE("­", "\xc2\xad"),
ESCAPE_VALUE("σ", "\xce\xa3"),
ESCAPE_VALUE("∑", "\xe2\x88\x91"),
ESCAPE_VALUE("¹", "\xc2\xb9"),
ESCAPE_VALUE("²", "\xc2\xb2"),
ESCAPE_VALUE("³", "\xc2\xb3"),
ESCAPE_VALUE("ß", "\xc3\x9f"),
ESCAPE_VALUE("τ", "\xce\xa4"),
ESCAPE_VALUE("θ", "\xce\x98"),
ESCAPE_VALUE("þ", "\xc3\xbe"),
ESCAPE_VALUE("×", "\xc3\x97"),
ESCAPE_VALUE("ú", "\xc3\xba"),
ESCAPE_VALUE("↑", "\xe2\x86\x91"),
ESCAPE_VALUE("û", "\xc3\xbb"),
ESCAPE_VALUE("ù", "\xc3\xb9"),
ESCAPE_VALUE("¨", "\xc2\xa8"),
ESCAPE_VALUE("υ", "\xce\xa5"),
ESCAPE_VALUE("ü", "\xc3\xbc"),
ESCAPE_VALUE("ξ", "\xce\x9e"),
ESCAPE_VALUE("ý", "\xc3\xbd"),
ESCAPE_VALUE("¥", "\xc2\xa5"),
ESCAPE_VALUE("ÿ", "\xc3\xbf"),
ESCAPE_VALUE("ζ", "\xce\x96"),
ESCAPE_VALUE("‍", "\xe2\x80\x8d"),
ESCAPE_VALUE("‌", "\xe2\x80\x8c"),
};
/**
* @internal
* @var escape_values_v_common_sorted[]
* same as escape_values_v_sorted with small subset of common escapes
*/
static const Escape_Value escape_values_v_common_sorted[] = {
ESCAPE_VALUE(""", "\x22"),
ESCAPE_VALUE("&", "\x26"),
ESCAPE_VALUE("'", "\x27"),
ESCAPE_VALUE("<", "\x3c"),
ESCAPE_VALUE(">", "\x3e"),
};
static int
_escape_key_sort(const void *a, const void *b)
{
const char *k_a = (*(const Escape_Value **) a)->escape;
const char *k_b = (*(const Escape_Value **) b)->escape;
return strcmp(k_a, k_b);
}
static int
_escape_value_sort(const void *a, const void *b)
{
const char *v_a = (*(const Escape_Value **) a)->value;
const char *v_b = (*(const Escape_Value **) b)->value;
return strcmp(v_a, v_b);
}
static Escape_Value **
escape_sorted_common_key_copy()
{
int i;
int len;
const Escape_Value *source;
int (*compare_fun)(const void*,const void*);
len = sizeof(html_common_escapes) / sizeof(Escape_Value);
source = html_common_escapes;
compare_fun = _escape_key_sort;
Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *));
for (i = 0 ; i < len ; i++)
{
ret_list[i] = (Escape_Value *)(&source[i]);
}
qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun);
return ret_list;
}
static Escape_Value **
escape_sorted_common_value_copy()
{
int i;
int len;
const Escape_Value *source;
int (*compare_fun)(const void*,const void*);
len = sizeof(html_common_escapes) / sizeof(Escape_Value);
source = html_common_escapes;
compare_fun = _escape_value_sort;
Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *));
for (i = 0 ; i < len ; i++)
{
ret_list[i] = (Escape_Value *)(&source[i]);
}
qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun);
return ret_list;
}
static Escape_Value **
escape_sorted_rest_key_copy()
{
int i;
int len;
const Escape_Value *source;
int (*compare_fun)(const void*,const void*);
len = sizeof(html_escapes) / sizeof(Escape_Value);
source = html_escapes;
compare_fun = _escape_key_sort;
Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *));
for (i = 0 ; i < len ; i++)
{
ret_list[i] = (Escape_Value *)(&source[i]);
}
qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun);
return ret_list;
}
static Escape_Value **
escape_sorted_rest_value_copy()
{
int i;
int len;
const Escape_Value *source;
int (*compare_fun)(const void*,const void*);
len = sizeof(html_escapes) / sizeof(Escape_Value);
source = html_escapes;
compare_fun = _escape_value_sort;
Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *));
for (i = 0 ; i < len ; i++)
{
ret_list[i] = (Escape_Value *)(&source[i]);
}
qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun);
return ret_list;
}
static Escape_Value **
get_html_escape_array_common_key_sorted(size_t *p_len)
{
static Escape_Value **escape_values_common_k_sorted = NULL;
static size_t common_len = sizeof(html_common_escapes) / sizeof(Escape_Value);
Escape_Value **ret_list = NULL;
if (!escape_values_common_k_sorted)
escape_values_common_k_sorted = escape_sorted_common_key_copy();
ret_list = escape_values_common_k_sorted;
if(p_len) *p_len = common_len;
return ret_list;
}
static Escape_Value **
get_html_escape_array_common_value_sorted(size_t *p_len)
{
static Escape_Value **escape_values_common_v_sorted = NULL;
static size_t common_len = sizeof(html_common_escapes) / sizeof(Escape_Value);
Escape_Value **ret_list = NULL;
if (!escape_values_common_v_sorted)
escape_values_common_v_sorted = escape_sorted_common_value_copy();
ret_list = escape_values_common_v_sorted;
if(p_len) *p_len = common_len;
return ret_list;
}
static Escape_Value **
get_html_escape_array_rest_key_sorted(size_t *p_len)
{
static Escape_Value **escape_values_k_sorted = NULL;
static size_t rest_len = sizeof(html_escapes) / sizeof(Escape_Value);
Escape_Value **ret_list = NULL;
if (!escape_values_k_sorted)
escape_values_k_sorted = escape_sorted_rest_key_copy();
ret_list = escape_values_k_sorted;
if(p_len) *p_len = rest_len;
return ret_list;
}
static Escape_Value **
get_html_escape_array_rest_value_sorted(size_t *p_len)
{
static Escape_Value **escape_values_v_sorted = NULL;
static size_t rest_len = sizeof(html_escapes) / sizeof(Escape_Value);
Escape_Value **ret_list = NULL;
if (!escape_values_v_sorted)
escape_values_v_sorted = escape_sorted_rest_value_copy();
ret_list = escape_values_v_sorted;
if(p_len) *p_len = rest_len;
return ret_list;
}
/**
* @internal
@ -8490,22 +8498,25 @@ _escaped_is_eq_and_advance(const char *s, const char *s_end,
* @param escape_values_len is the len of Escape_Value array
*/
static int
_escaped_string_search(const char *s, size_t s_len, const Escape_Value escape_values[], const size_t escape_values_len)
_escaped_string_search(const char *s, size_t s_len, Escape_Value **escape_values, const size_t escape_values_len)
{
if (!escape_values)
return -1;
int l = 0;
int r = escape_values_len - 1;
while (l <= r)
{
int m = (l + r) / 2;
int res = strncmp(s, escape_values[m].escape, MAX(escape_values[m].escape_len, s_len));
int res = strncmp(s, escape_values[m]->escape, MAX(escape_values[m]->escape_len, s_len));
if (res == 0)
{
//Handle special case when s_len is less than escape_len
//then we will continue searching
//example ("&gt;",1,....)
if (escape_values[m].escape_len > s_len)
if (escape_values[m]->escape_len > s_len)
res = -1;
else if (escape_values[m].escape_len < s_len)
else if (escape_values[m]->escape_len < s_len)
res = 1;
else return m;
}
@ -8525,14 +8536,17 @@ _escaped_string_search(const char *s, size_t s_len, const Escape_Value escape_va
* @param escape_values_len is the len of Escape_Value array
*/
static int
_escaped_value_search(const char *s, const Escape_Value escape_values[], const size_t escape_values_len)
_escaped_value_search(const char *s, Escape_Value **escape_values , const size_t escape_values_len)
{
if (!escape_values)
return -1;
int l = 0;
int r = escape_values_len - 1;
while (l <= r)
{
int m = (l + r) / 2;
int res = strncmp(s, escape_values[m].value, escape_values[m].value_len);
int res = strncmp(s, escape_values[m]->value, escape_values[m]->value_len);
if (res == 0)
return m;
if (res > 0)
@ -8553,21 +8567,22 @@ _escaped_value_search(const char *s, const Escape_Value escape_values[], const s
static inline const char *
_escaped_char_match(const char *s, int *adv)
{
static const size_t escape_common_size = sizeof(escape_values_v_common_sorted) / sizeof(Escape_Value);
int n_ret = _escaped_value_search(s, escape_values_v_common_sorted, escape_common_size);
size_t len = 0;
Escape_Value **list = get_html_escape_array_common_value_sorted(&len);
int n_ret = _escaped_value_search(s, list, len);
if (n_ret != -1)
{
*adv = (int) escape_values_v_common_sorted[n_ret].value_len;
return escape_values_v_common_sorted[n_ret].escape;
*adv = (int) list[n_ret]->value_len;
return list[n_ret]->escape;
}
else
{
static const size_t escape_size = sizeof(escape_values_v_sorted) / sizeof(Escape_Value);
n_ret = _escaped_value_search(s, escape_values_v_sorted, escape_size);
list = get_html_escape_array_rest_value_sorted(&len);
n_ret = _escaped_value_search(s, list, len);
if (n_ret != -1)
{
*adv = (int)escape_values_v_sorted[n_ret].value_len;
return escape_values_v_sorted[n_ret].escape;
*adv = (int)list[n_ret]->value_len;
return list[n_ret]->escape;
}
}
return NULL;
@ -8623,18 +8638,22 @@ _escaped_char_get(const char *s, const char *s_end)
}
else
{
static const size_t escape_common_size = sizeof(escape_values_e_common_sorted) / sizeof(Escape_Value);
int n_ret = _escaped_string_search(s, s_end-s, escape_values_e_common_sorted, escape_common_size);
size_t len = 0;
Escape_Value **list;
list = get_html_escape_array_common_key_sorted(&len);
int n_ret = _escaped_string_search(s, s_end-s, list, len);
if (n_ret != -1)
{
return escape_values_e_common_sorted[n_ret].value;
return list[n_ret]->value;
}
else
{
static const size_t escape_size = sizeof(escape_values_e_sorted) / sizeof(Escape_Value);
n_ret = _escaped_string_search(s, s_end-s, escape_values_e_sorted, escape_size);
list = get_html_escape_array_rest_key_sorted(&len);
n_ret = _escaped_string_search(s, s_end-s, list, len);
if (n_ret != -1)
return escape_values_e_sorted[n_ret].value;
{
return list[n_ret]->value;
}
}
}