From 86c274ea3395a02ee9ee2a252113982e4f60077e Mon Sep 17 00:00:00 2001 From: Ali Alzyod Date: Wed, 5 Aug 2020 11:27:03 +0900 Subject: [PATCH] evas_textblock: enhance escape character handling Summary: -Lazy initialization for html escapes lists -Lower memory consumtion for escapes lists -Simplify code maintenance by sorting lists on runtime, new items donot need to respect sort order(run time will handle it) Reviewers: woohyun, bowonryu, cedric, tasn Reviewed By: woohyun Subscribers: cedric, #reviewers, #committers Tags: #efl Differential Revision: https://phab.enlightenment.org/D9370 --- src/lib/evas/canvas/evas_object_textblock.c | 639 ++++++++++---------- 1 file changed, 329 insertions(+), 310 deletions(-) diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c index 4c6fc865d8..6ed1593e15 100644 --- a/src/lib/evas/canvas/evas_object_textblock.c +++ b/src/lib/evas/canvas/evas_object_textblock.c @@ -968,12 +968,6 @@ _line_free(Evas_Object_Textblock_Line *ln) */ -/** - * @internal - * @var escape_values_e_sorted[] - * This array consists of Escape_Value structure sorted by escape string - * And new added value must be placed sorted position, and reflected on escape_values_v_sorted - */ typedef struct _Escape_Value Escape_Value; struct _Escape_Value @@ -986,338 +980,352 @@ struct _Escape_Value #define ESCAPE_VALUE(e,v) {e,v,strlen(e),strlen(v)} -static const Escape_Value escape_values_e_sorted[] = { - ESCAPE_VALUE("Á", "\xc3\x81"), - ESCAPE_VALUE("Â", "\xc3\x82"), - ESCAPE_VALUE("&Aelig;", "\xc3\x86"), - ESCAPE_VALUE("À", "\xc3\x80"), - ESCAPE_VALUE("Å", "\xc3\x85"), - ESCAPE_VALUE("Ã", "\xc3\x83"), - ESCAPE_VALUE("Ä", "\xc3\x84"), - ESCAPE_VALUE("Ç", "\xc3\x87"), - ESCAPE_VALUE("‡", "\xe2\x80\xa1"), - ESCAPE_VALUE("É", "\xc3\x89"), - ESCAPE_VALUE("Ê", "\xc3\x8a"), - ESCAPE_VALUE("È", "\xc3\x88"), - ESCAPE_VALUE("&Eth;", "\xc3\x90"), - ESCAPE_VALUE("Ë", "\xc3\x8b"), - ESCAPE_VALUE("Í", "\xc3\x8d"), - ESCAPE_VALUE("Î", "\xc3\x8e"), - ESCAPE_VALUE("Ì", "\xc3\x8c"), - ESCAPE_VALUE("Ï", "\xc3\x8f"), - ESCAPE_VALUE("Ñ", "\xc3\x91"), - ESCAPE_VALUE("Ó", "\xc3\x93"), - ESCAPE_VALUE("Ô", "\xc3\x94"), - ESCAPE_VALUE("Ò", "\xc3\x92"), - ESCAPE_VALUE("Ø", "\xc3\x98"), - ESCAPE_VALUE("Õ", "\xc3\x95"), - ESCAPE_VALUE("Ö", "\xc3\x96"), - ESCAPE_VALUE("&Thorn;", "\xc3\x9e"), - ESCAPE_VALUE("Ú", "\xc3\x9a"), - ESCAPE_VALUE("Û", "\xc3\x9b"), - ESCAPE_VALUE("Ù", "\xc3\x99"), - ESCAPE_VALUE("Ý", "\xc3\x9d"), - ESCAPE_VALUE("á", "\xc3\xa1"), - ESCAPE_VALUE("â", "\xc3\xa2"), - ESCAPE_VALUE("´", "\xc2\xb4"), - ESCAPE_VALUE("æ", "\xc3\xa6"), - ESCAPE_VALUE("à", "\xc3\xa0"), - ESCAPE_VALUE("α", "\xce\x91"), - ESCAPE_VALUE("∧", "\xe2\x88\xa7"), - ESCAPE_VALUE("å", "\xc3\xa5"), - ESCAPE_VALUE("ã", "\xc3\xa3"), - ESCAPE_VALUE("ä", "\xc3\xa4"), - ESCAPE_VALUE("β", "\xce\x92"), - ESCAPE_VALUE("¦", "\xc2\xa6"), - ESCAPE_VALUE("•", "\xe2\x80\xa2"), - ESCAPE_VALUE("ç", "\xc3\xa7"), - ESCAPE_VALUE("¸", "\xc2\xb8"), - ESCAPE_VALUE("¢", "\xc2\xa2"), - ESCAPE_VALUE("χ", "\xce\xa7"), - ESCAPE_VALUE("©", "\xc2\xa9"), - ESCAPE_VALUE("¤", "\xc2\xa4"), - ESCAPE_VALUE("†", "\xe2\x80\xa0"), - ESCAPE_VALUE("↓", "\xe2\x86\x93"), - ESCAPE_VALUE("°", "\xc2\xb0"), - ESCAPE_VALUE("δ", "\xce\x94"), - ESCAPE_VALUE("÷", "\xc3\xb7"), - ESCAPE_VALUE("é", "\xc3\xa9"), - ESCAPE_VALUE("ê", "\xc3\xaa"), - ESCAPE_VALUE("è", "\xc3\xa8"), - ESCAPE_VALUE("ε", "\xce\x95"), - ESCAPE_VALUE("≡", "\xe2\x89\xa1"), - ESCAPE_VALUE("η", "\xce\x97"), - ESCAPE_VALUE("ð", "\xc3\xb0"), - ESCAPE_VALUE("ë", "\xc3\xab"), - ESCAPE_VALUE("€", "\xe2\x82\xac"), - ESCAPE_VALUE("∃", "\xe2\x88\x83"), - ESCAPE_VALUE("∀", "\xe2\x88\x80"), - ESCAPE_VALUE("½", "\xc2\xbd"), - ESCAPE_VALUE("¼", "\xc2\xbc"), - ESCAPE_VALUE("¾", "\xc2\xbe"), - ESCAPE_VALUE("γ", "\xce\x93"), - ESCAPE_VALUE("↔", "\xe2\x86\x94"), - ESCAPE_VALUE("…", "\xe2\x80\xa6"), - ESCAPE_VALUE("í", "\xc3\xad"), - ESCAPE_VALUE("î", "\xc3\xae"), - ESCAPE_VALUE("¡", "\xc2\xa1"), - ESCAPE_VALUE("ì", "\xc3\xac"), - ESCAPE_VALUE("∫", "\xe2\x88\xab"), - ESCAPE_VALUE("ι", "\xce\x99"), - ESCAPE_VALUE("¿", "\xc2\xbf"), - ESCAPE_VALUE("ï", "\xc3\xaf"), - ESCAPE_VALUE("κ", "\xce\x9a"), - ESCAPE_VALUE("λ", "\xce\x9b"), - ESCAPE_VALUE("«", "\xc2\xab"), - ESCAPE_VALUE("←", "\xe2\x86\x90"), - ESCAPE_VALUE("←", "\xe2\x87\x90"), - ESCAPE_VALUE("‎", "\xe2\x80\x8e"), - ESCAPE_VALUE("¯", "\xc2\xaf"), - ESCAPE_VALUE("µ", "\xc2\xb5"), - ESCAPE_VALUE("·", "\xc2\xb7"), - ESCAPE_VALUE("μ", "\xce\x9c"), - ESCAPE_VALUE("∇", "\xe2\x88\x87"), - ESCAPE_VALUE(" ", "\xc2\xa0"), - ESCAPE_VALUE("≠", "\xe2\x89\xa0"), - ESCAPE_VALUE("¬", "\xc2\xac"), - ESCAPE_VALUE("ñ", "\xc3\xb1"), - ESCAPE_VALUE("ν", "\xce\x9d"), - ESCAPE_VALUE("ó", "\xc3\xb3"), - ESCAPE_VALUE("ô", "\xc3\xb4"), - ESCAPE_VALUE("ò", "\xc3\xb2"), - ESCAPE_VALUE("ω", "\xce\xa9"), - ESCAPE_VALUE("ο", "\xce\x9f"), - ESCAPE_VALUE("⊕", "\xe2\x8a\x95"), - ESCAPE_VALUE("∨", "\xe2\x88\xa8"), - ESCAPE_VALUE("ª", "\xc2\xaa"), - ESCAPE_VALUE("º", "\xc2\xba"), - ESCAPE_VALUE("ø", "\xc3\xb8"), - ESCAPE_VALUE("õ", "\xc3\xb5"), - ESCAPE_VALUE("ö", "\xc3\xb6"), - ESCAPE_VALUE("¶", "\xc2\xb6"), - ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"), - ESCAPE_VALUE("φ", "\xce\xa6"), - ESCAPE_VALUE("π", "\xce\xa0"), - ESCAPE_VALUE("±", "\xc2\xb1"), - ESCAPE_VALUE("£", "\xc2\xa3"), - ESCAPE_VALUE("∏", "\xe2\x88\x8f"), - ESCAPE_VALUE("ψ", "\xce\xa8"), - ESCAPE_VALUE("»", "\xc2\xbb"), - ESCAPE_VALUE("→", "\xe2\x86\x92"), - ESCAPE_VALUE("→", "\xe2\x87\x92"), - ESCAPE_VALUE("®", "\xc2\xae"), - ESCAPE_VALUE("ρ", "\xce\xa1"), - ESCAPE_VALUE("‏", "\xe2\x80\x8f"), - ESCAPE_VALUE("§", "\xc2\xa7"), - ESCAPE_VALUE("­", "\xc2\xad"), - ESCAPE_VALUE("σ", "\xce\xa3"), - ESCAPE_VALUE("∑", "\xe2\x88\x91"), - ESCAPE_VALUE("¹", "\xc2\xb9"), - ESCAPE_VALUE("²", "\xc2\xb2"), - ESCAPE_VALUE("³", "\xc2\xb3"), - ESCAPE_VALUE("ß", "\xc3\x9f"), - ESCAPE_VALUE("τ", "\xce\xa4"), - ESCAPE_VALUE("θ", "\xce\x98"), - ESCAPE_VALUE("þ", "\xc3\xbe"), - ESCAPE_VALUE("×", "\xc3\x97"), - ESCAPE_VALUE("ú", "\xc3\xba"), - ESCAPE_VALUE("↑", "\xe2\x86\x91"), - ESCAPE_VALUE("û", "\xc3\xbb"), - ESCAPE_VALUE("ù", "\xc3\xb9"), - ESCAPE_VALUE("¨", "\xc2\xa8"), - ESCAPE_VALUE("υ", "\xce\xa5"), - ESCAPE_VALUE("ü", "\xc3\xbc"), - ESCAPE_VALUE("ξ", "\xce\x9e"), - ESCAPE_VALUE("ý", "\xc3\xbd"), - ESCAPE_VALUE("¥", "\xc2\xa5"), - ESCAPE_VALUE("ÿ", "\xc3\xbf"), - ESCAPE_VALUE("ζ", "\xce\x96"), - ESCAPE_VALUE("‍", "\xe2\x80\x8d"), - ESCAPE_VALUE("‌", "\xe2\x80\x8c"), +/** + * @internal + * @var html_common_escapes[] + * This array consists of most common html escapes values as _Escape_Value structure + */ +static const Escape_Value html_common_escapes[] = { + ESCAPE_VALUE("&", "\x26"), + ESCAPE_VALUE("'", "\x27"), + ESCAPE_VALUE(">", "\x3e"), + ESCAPE_VALUE("<", "\x3c"), + ESCAPE_VALUE(""", "\x22"), }; /** * @internal * @var escape_values_e_common_sorted[] - * same as escape_values_e_sorted with small subset of common escapes + * This array consists of rest html escapes values as _Escape_Value structure */ -static const Escape_Value escape_values_e_common_sorted[] = { - ESCAPE_VALUE("&", "\x26"), - ESCAPE_VALUE("'", "\x27"), - ESCAPE_VALUE(">", "\x3e"), - ESCAPE_VALUE("<", "\x3c"), - ESCAPE_VALUE(""", "\x22"), -}; -/** - * @internal - * @var escape_values_v_sorted[] - * This array consists of Escape_Value structure sorted by escape value - * And new added value must be placed sorted position, and reflected on escape_values_e_sorted - */ -static const Escape_Value escape_values_v_sorted[] = { - ESCAPE_VALUE(" ", "\xc2\xa0"), - ESCAPE_VALUE("¡", "\xc2\xa1"), - ESCAPE_VALUE("¢", "\xc2\xa2"), - ESCAPE_VALUE("£", "\xc2\xa3"), - ESCAPE_VALUE("¤", "\xc2\xa4"), - ESCAPE_VALUE("¥", "\xc2\xa5"), - ESCAPE_VALUE("¦", "\xc2\xa6"), - ESCAPE_VALUE("§", "\xc2\xa7"), - ESCAPE_VALUE("¨", "\xc2\xa8"), - ESCAPE_VALUE("©", "\xc2\xa9"), - ESCAPE_VALUE("ª", "\xc2\xaa"), - ESCAPE_VALUE("«", "\xc2\xab"), - ESCAPE_VALUE("¬", "\xc2\xac"), - ESCAPE_VALUE("­", "\xc2\xad"), - ESCAPE_VALUE("®", "\xc2\xae"), - ESCAPE_VALUE("¯", "\xc2\xaf"), - ESCAPE_VALUE("°", "\xc2\xb0"), - ESCAPE_VALUE("±", "\xc2\xb1"), - ESCAPE_VALUE("²", "\xc2\xb2"), - ESCAPE_VALUE("³", "\xc2\xb3"), - ESCAPE_VALUE("´", "\xc2\xb4"), - ESCAPE_VALUE("µ", "\xc2\xb5"), - ESCAPE_VALUE("¶", "\xc2\xb6"), - ESCAPE_VALUE("·", "\xc2\xb7"), - ESCAPE_VALUE("¸", "\xc2\xb8"), - ESCAPE_VALUE("¹", "\xc2\xb9"), - ESCAPE_VALUE("º", "\xc2\xba"), - ESCAPE_VALUE("»", "\xc2\xbb"), - ESCAPE_VALUE("¼", "\xc2\xbc"), - ESCAPE_VALUE("½", "\xc2\xbd"), - ESCAPE_VALUE("¾", "\xc2\xbe"), - ESCAPE_VALUE("¿", "\xc2\xbf"), - ESCAPE_VALUE("À", "\xc3\x80"), +static const Escape_Value html_escapes[] = { ESCAPE_VALUE("Á", "\xc3\x81"), ESCAPE_VALUE("Â", "\xc3\x82"), + ESCAPE_VALUE("&Aelig;", "\xc3\x86"), + ESCAPE_VALUE("À", "\xc3\x80"), + ESCAPE_VALUE("Å", "\xc3\x85"), ESCAPE_VALUE("Ã", "\xc3\x83"), ESCAPE_VALUE("Ä", "\xc3\x84"), - ESCAPE_VALUE("Å", "\xc3\x85"), - ESCAPE_VALUE("&Aelig;", "\xc3\x86"), ESCAPE_VALUE("Ç", "\xc3\x87"), - ESCAPE_VALUE("È", "\xc3\x88"), + ESCAPE_VALUE("‡", "\xe2\x80\xa1"), ESCAPE_VALUE("É", "\xc3\x89"), ESCAPE_VALUE("Ê", "\xc3\x8a"), + ESCAPE_VALUE("È", "\xc3\x88"), + ESCAPE_VALUE("&Eth;", "\xc3\x90"), ESCAPE_VALUE("Ë", "\xc3\x8b"), - ESCAPE_VALUE("Ì", "\xc3\x8c"), ESCAPE_VALUE("Í", "\xc3\x8d"), ESCAPE_VALUE("Î", "\xc3\x8e"), + ESCAPE_VALUE("Ì", "\xc3\x8c"), ESCAPE_VALUE("Ï", "\xc3\x8f"), - ESCAPE_VALUE("&Eth;", "\xc3\x90"), ESCAPE_VALUE("Ñ", "\xc3\x91"), - ESCAPE_VALUE("Ò", "\xc3\x92"), ESCAPE_VALUE("Ó", "\xc3\x93"), ESCAPE_VALUE("Ô", "\xc3\x94"), + ESCAPE_VALUE("Ò", "\xc3\x92"), + ESCAPE_VALUE("Ø", "\xc3\x98"), ESCAPE_VALUE("Õ", "\xc3\x95"), ESCAPE_VALUE("Ö", "\xc3\x96"), - ESCAPE_VALUE("×", "\xc3\x97"), - ESCAPE_VALUE("Ø", "\xc3\x98"), - ESCAPE_VALUE("Ù", "\xc3\x99"), + ESCAPE_VALUE("&Thorn;", "\xc3\x9e"), ESCAPE_VALUE("Ú", "\xc3\x9a"), ESCAPE_VALUE("Û", "\xc3\x9b"), + ESCAPE_VALUE("Ù", "\xc3\x99"), ESCAPE_VALUE("Ý", "\xc3\x9d"), - ESCAPE_VALUE("&Thorn;", "\xc3\x9e"), - ESCAPE_VALUE("ß", "\xc3\x9f"), - ESCAPE_VALUE("à", "\xc3\xa0"), ESCAPE_VALUE("á", "\xc3\xa1"), ESCAPE_VALUE("â", "\xc3\xa2"), + ESCAPE_VALUE("´", "\xc2\xb4"), + ESCAPE_VALUE("æ", "\xc3\xa6"), + ESCAPE_VALUE("à", "\xc3\xa0"), + ESCAPE_VALUE("α", "\xce\x91"), + ESCAPE_VALUE("∧", "\xe2\x88\xa7"), + ESCAPE_VALUE("å", "\xc3\xa5"), ESCAPE_VALUE("ã", "\xc3\xa3"), ESCAPE_VALUE("ä", "\xc3\xa4"), - ESCAPE_VALUE("å", "\xc3\xa5"), - ESCAPE_VALUE("æ", "\xc3\xa6"), + ESCAPE_VALUE("β", "\xce\x92"), + ESCAPE_VALUE("¦", "\xc2\xa6"), + ESCAPE_VALUE("•", "\xe2\x80\xa2"), ESCAPE_VALUE("ç", "\xc3\xa7"), - ESCAPE_VALUE("è", "\xc3\xa8"), + ESCAPE_VALUE("¸", "\xc2\xb8"), + ESCAPE_VALUE("¢", "\xc2\xa2"), + ESCAPE_VALUE("χ", "\xce\xa7"), + ESCAPE_VALUE("©", "\xc2\xa9"), + ESCAPE_VALUE("¤", "\xc2\xa4"), + ESCAPE_VALUE("†", "\xe2\x80\xa0"), + ESCAPE_VALUE("↓", "\xe2\x86\x93"), + ESCAPE_VALUE("°", "\xc2\xb0"), + ESCAPE_VALUE("δ", "\xce\x94"), + ESCAPE_VALUE("÷", "\xc3\xb7"), ESCAPE_VALUE("é", "\xc3\xa9"), ESCAPE_VALUE("ê", "\xc3\xaa"), + ESCAPE_VALUE("è", "\xc3\xa8"), + ESCAPE_VALUE("ε", "\xce\x95"), + ESCAPE_VALUE("≡", "\xe2\x89\xa1"), + ESCAPE_VALUE("η", "\xce\x97"), + ESCAPE_VALUE("ð", "\xc3\xb0"), ESCAPE_VALUE("ë", "\xc3\xab"), - ESCAPE_VALUE("ì", "\xc3\xac"), + ESCAPE_VALUE("€", "\xe2\x82\xac"), + ESCAPE_VALUE("∃", "\xe2\x88\x83"), + ESCAPE_VALUE("∀", "\xe2\x88\x80"), + ESCAPE_VALUE("½", "\xc2\xbd"), + ESCAPE_VALUE("¼", "\xc2\xbc"), + ESCAPE_VALUE("¾", "\xc2\xbe"), + ESCAPE_VALUE("γ", "\xce\x93"), + ESCAPE_VALUE("↔", "\xe2\x86\x94"), + ESCAPE_VALUE("…", "\xe2\x80\xa6"), ESCAPE_VALUE("í", "\xc3\xad"), ESCAPE_VALUE("î", "\xc3\xae"), - ESCAPE_VALUE("ï", "\xc3\xaf"), - ESCAPE_VALUE("ð", "\xc3\xb0"), - ESCAPE_VALUE("ñ", "\xc3\xb1"), - ESCAPE_VALUE("ò", "\xc3\xb2"), - ESCAPE_VALUE("ó", "\xc3\xb3"), - ESCAPE_VALUE("ô", "\xc3\xb4"), - ESCAPE_VALUE("õ", "\xc3\xb5"), - ESCAPE_VALUE("ö", "\xc3\xb6"), - ESCAPE_VALUE("÷", "\xc3\xb7"), - ESCAPE_VALUE("ø", "\xc3\xb8"), - ESCAPE_VALUE("ù", "\xc3\xb9"), - ESCAPE_VALUE("ú", "\xc3\xba"), - ESCAPE_VALUE("û", "\xc3\xbb"), - ESCAPE_VALUE("ü", "\xc3\xbc"), - ESCAPE_VALUE("ý", "\xc3\xbd"), - ESCAPE_VALUE("þ", "\xc3\xbe"), - ESCAPE_VALUE("ÿ", "\xc3\xbf"), - ESCAPE_VALUE("α", "\xce\x91"), - ESCAPE_VALUE("β", "\xce\x92"), - ESCAPE_VALUE("γ", "\xce\x93"), - ESCAPE_VALUE("δ", "\xce\x94"), - ESCAPE_VALUE("ε", "\xce\x95"), - ESCAPE_VALUE("ζ", "\xce\x96"), - ESCAPE_VALUE("η", "\xce\x97"), - ESCAPE_VALUE("θ", "\xce\x98"), + ESCAPE_VALUE("¡", "\xc2\xa1"), + ESCAPE_VALUE("ì", "\xc3\xac"), + ESCAPE_VALUE("∫", "\xe2\x88\xab"), ESCAPE_VALUE("ι", "\xce\x99"), + ESCAPE_VALUE("¿", "\xc2\xbf"), + ESCAPE_VALUE("ï", "\xc3\xaf"), ESCAPE_VALUE("κ", "\xce\x9a"), ESCAPE_VALUE("λ", "\xce\x9b"), - ESCAPE_VALUE("μ", "\xce\x9c"), - ESCAPE_VALUE("ν", "\xce\x9d"), - ESCAPE_VALUE("ξ", "\xce\x9e"), - ESCAPE_VALUE("ο", "\xce\x9f"), - ESCAPE_VALUE("π", "\xce\xa0"), - ESCAPE_VALUE("ρ", "\xce\xa1"), - ESCAPE_VALUE("σ", "\xce\xa3"), - ESCAPE_VALUE("τ", "\xce\xa4"), - ESCAPE_VALUE("υ", "\xce\xa5"), - ESCAPE_VALUE("φ", "\xce\xa6"), - ESCAPE_VALUE("χ", "\xce\xa7"), - ESCAPE_VALUE("ψ", "\xce\xa8"), - ESCAPE_VALUE("ω", "\xce\xa9"), - ESCAPE_VALUE("‌", "\xe2\x80\x8c"), - ESCAPE_VALUE("‍", "\xe2\x80\x8d"), - ESCAPE_VALUE("‎", "\xe2\x80\x8e"), - ESCAPE_VALUE("‏", "\xe2\x80\x8f"), - ESCAPE_VALUE("†", "\xe2\x80\xa0"), - ESCAPE_VALUE("‡", "\xe2\x80\xa1"), - ESCAPE_VALUE("•", "\xe2\x80\xa2"), - ESCAPE_VALUE("…", "\xe2\x80\xa6"), - ESCAPE_VALUE("€", "\xe2\x82\xac"), + ESCAPE_VALUE("«", "\xc2\xab"), ESCAPE_VALUE("←", "\xe2\x86\x90"), - ESCAPE_VALUE("↑", "\xe2\x86\x91"), - ESCAPE_VALUE("→", "\xe2\x86\x92"), - ESCAPE_VALUE("↓", "\xe2\x86\x93"), - ESCAPE_VALUE("↔", "\xe2\x86\x94"), ESCAPE_VALUE("←", "\xe2\x87\x90"), - ESCAPE_VALUE("→", "\xe2\x87\x92"), - ESCAPE_VALUE("∀", "\xe2\x88\x80"), - ESCAPE_VALUE("∃", "\xe2\x88\x83"), + ESCAPE_VALUE("‎", "\xe2\x80\x8e"), + ESCAPE_VALUE("¯", "\xc2\xaf"), + ESCAPE_VALUE("µ", "\xc2\xb5"), + ESCAPE_VALUE("·", "\xc2\xb7"), + ESCAPE_VALUE("μ", "\xce\x9c"), ESCAPE_VALUE("∇", "\xe2\x88\x87"), - ESCAPE_VALUE("∏", "\xe2\x88\x8f"), - ESCAPE_VALUE("∑", "\xe2\x88\x91"), - ESCAPE_VALUE("∧", "\xe2\x88\xa7"), - ESCAPE_VALUE("∨", "\xe2\x88\xa8"), - ESCAPE_VALUE("∫", "\xe2\x88\xab"), + ESCAPE_VALUE(" ", "\xc2\xa0"), ESCAPE_VALUE("≠", "\xe2\x89\xa0"), - ESCAPE_VALUE("≡", "\xe2\x89\xa1"), + ESCAPE_VALUE("¬", "\xc2\xac"), + ESCAPE_VALUE("ñ", "\xc3\xb1"), + ESCAPE_VALUE("ν", "\xce\x9d"), + ESCAPE_VALUE("ó", "\xc3\xb3"), + ESCAPE_VALUE("ô", "\xc3\xb4"), + ESCAPE_VALUE("ò", "\xc3\xb2"), + ESCAPE_VALUE("ω", "\xce\xa9"), + ESCAPE_VALUE("ο", "\xce\x9f"), ESCAPE_VALUE("⊕", "\xe2\x8a\x95"), + ESCAPE_VALUE("∨", "\xe2\x88\xa8"), + ESCAPE_VALUE("ª", "\xc2\xaa"), + ESCAPE_VALUE("º", "\xc2\xba"), + ESCAPE_VALUE("ø", "\xc3\xb8"), + ESCAPE_VALUE("õ", "\xc3\xb5"), + ESCAPE_VALUE("ö", "\xc3\xb6"), + ESCAPE_VALUE("¶", "\xc2\xb6"), ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"), + ESCAPE_VALUE("φ", "\xce\xa6"), + ESCAPE_VALUE("π", "\xce\xa0"), + ESCAPE_VALUE("±", "\xc2\xb1"), + ESCAPE_VALUE("£", "\xc2\xa3"), + ESCAPE_VALUE("∏", "\xe2\x88\x8f"), + ESCAPE_VALUE("ψ", "\xce\xa8"), + ESCAPE_VALUE("»", "\xc2\xbb"), + ESCAPE_VALUE("→", "\xe2\x86\x92"), + ESCAPE_VALUE("⇒", "\xe2\x87\x92"), + ESCAPE_VALUE("®", "\xc2\xae"), + ESCAPE_VALUE("ρ", "\xce\xa1"), + ESCAPE_VALUE("‏", "\xe2\x80\x8f"), + ESCAPE_VALUE("§", "\xc2\xa7"), + ESCAPE_VALUE("­", "\xc2\xad"), + ESCAPE_VALUE("σ", "\xce\xa3"), + ESCAPE_VALUE("∑", "\xe2\x88\x91"), + ESCAPE_VALUE("¹", "\xc2\xb9"), + ESCAPE_VALUE("²", "\xc2\xb2"), + ESCAPE_VALUE("³", "\xc2\xb3"), + ESCAPE_VALUE("ß", "\xc3\x9f"), + ESCAPE_VALUE("τ", "\xce\xa4"), + ESCAPE_VALUE("θ", "\xce\x98"), + ESCAPE_VALUE("þ", "\xc3\xbe"), + ESCAPE_VALUE("×", "\xc3\x97"), + ESCAPE_VALUE("ú", "\xc3\xba"), + ESCAPE_VALUE("↑", "\xe2\x86\x91"), + ESCAPE_VALUE("û", "\xc3\xbb"), + ESCAPE_VALUE("ù", "\xc3\xb9"), + ESCAPE_VALUE("¨", "\xc2\xa8"), + ESCAPE_VALUE("υ", "\xce\xa5"), + ESCAPE_VALUE("ü", "\xc3\xbc"), + ESCAPE_VALUE("ξ", "\xce\x9e"), + ESCAPE_VALUE("ý", "\xc3\xbd"), + ESCAPE_VALUE("¥", "\xc2\xa5"), + ESCAPE_VALUE("ÿ", "\xc3\xbf"), + ESCAPE_VALUE("ζ", "\xce\x96"), + ESCAPE_VALUE("‍", "\xe2\x80\x8d"), + ESCAPE_VALUE("‌", "\xe2\x80\x8c"), }; -/** - * @internal - * @var escape_values_v_common_sorted[] - * same as escape_values_v_sorted with small subset of common escapes - */ -static const Escape_Value escape_values_v_common_sorted[] = { - ESCAPE_VALUE(""", "\x22"), - ESCAPE_VALUE("&", "\x26"), - ESCAPE_VALUE("'", "\x27"), - ESCAPE_VALUE("<", "\x3c"), - ESCAPE_VALUE(">", "\x3e"), -}; +static int +_escape_key_sort(const void *a, const void *b) +{ + const char *k_a = (*(const Escape_Value **) a)->escape; + const char *k_b = (*(const Escape_Value **) b)->escape; + return strcmp(k_a, k_b); +} + +static int +_escape_value_sort(const void *a, const void *b) +{ + const char *v_a = (*(const Escape_Value **) a)->value; + const char *v_b = (*(const Escape_Value **) b)->value; + return strcmp(v_a, v_b); +} + +static Escape_Value ** +escape_sorted_common_key_copy() +{ + int i; + int len; + const Escape_Value *source; + int (*compare_fun)(const void*,const void*); + + len = sizeof(html_common_escapes) / sizeof(Escape_Value); + source = html_common_escapes; + + compare_fun = _escape_key_sort; + + Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *)); + for (i = 0 ; i < len ; i++) + { + ret_list[i] = (Escape_Value *)(&source[i]); + } + + qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun); + return ret_list; +} + +static Escape_Value ** +escape_sorted_common_value_copy() +{ + int i; + int len; + const Escape_Value *source; + int (*compare_fun)(const void*,const void*); + + len = sizeof(html_common_escapes) / sizeof(Escape_Value); + source = html_common_escapes; + + compare_fun = _escape_value_sort; + + Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *)); + for (i = 0 ; i < len ; i++) + { + ret_list[i] = (Escape_Value *)(&source[i]); + } + + qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun); + return ret_list; +} + +static Escape_Value ** +escape_sorted_rest_key_copy() +{ + int i; + int len; + const Escape_Value *source; + int (*compare_fun)(const void*,const void*); + + len = sizeof(html_escapes) / sizeof(Escape_Value); + source = html_escapes; + + compare_fun = _escape_key_sort; + + Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *)); + for (i = 0 ; i < len ; i++) + { + ret_list[i] = (Escape_Value *)(&source[i]); + } + + qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun); + return ret_list; +} + +static Escape_Value ** +escape_sorted_rest_value_copy() +{ + int i; + int len; + const Escape_Value *source; + int (*compare_fun)(const void*,const void*); + len = sizeof(html_escapes) / sizeof(Escape_Value); + source = html_escapes; + + compare_fun = _escape_value_sort; + + Escape_Value **ret_list = malloc(len * sizeof(Escape_Value *)); + for (i = 0 ; i < len ; i++) + { + ret_list[i] = (Escape_Value *)(&source[i]); + } + + qsort(&ret_list[0], len, sizeof(Escape_Value *), compare_fun); + return ret_list; +} + +static Escape_Value ** +get_html_escape_array_common_key_sorted(size_t *p_len) +{ + static Escape_Value **escape_values_common_k_sorted = NULL; + + static size_t common_len = sizeof(html_common_escapes) / sizeof(Escape_Value); + + Escape_Value **ret_list = NULL; + + if (!escape_values_common_k_sorted) + escape_values_common_k_sorted = escape_sorted_common_key_copy(); + ret_list = escape_values_common_k_sorted; + if(p_len) *p_len = common_len; + + + return ret_list; +} + +static Escape_Value ** +get_html_escape_array_common_value_sorted(size_t *p_len) +{ + static Escape_Value **escape_values_common_v_sorted = NULL; + + static size_t common_len = sizeof(html_common_escapes) / sizeof(Escape_Value); + + Escape_Value **ret_list = NULL; + + if (!escape_values_common_v_sorted) + escape_values_common_v_sorted = escape_sorted_common_value_copy(); + ret_list = escape_values_common_v_sorted; + if(p_len) *p_len = common_len; + + + return ret_list; +} + +static Escape_Value ** +get_html_escape_array_rest_key_sorted(size_t *p_len) +{ + static Escape_Value **escape_values_k_sorted = NULL; + + static size_t rest_len = sizeof(html_escapes) / sizeof(Escape_Value); + + Escape_Value **ret_list = NULL; + + if (!escape_values_k_sorted) + escape_values_k_sorted = escape_sorted_rest_key_copy(); + ret_list = escape_values_k_sorted; + if(p_len) *p_len = rest_len; + + return ret_list; +} + +static Escape_Value ** +get_html_escape_array_rest_value_sorted(size_t *p_len) +{ + static Escape_Value **escape_values_v_sorted = NULL; + + static size_t rest_len = sizeof(html_escapes) / sizeof(Escape_Value); + + Escape_Value **ret_list = NULL; + + if (!escape_values_v_sorted) + escape_values_v_sorted = escape_sorted_rest_value_copy(); + ret_list = escape_values_v_sorted; + if(p_len) *p_len = rest_len; + + return ret_list; +} /** * @internal @@ -8490,22 +8498,25 @@ _escaped_is_eq_and_advance(const char *s, const char *s_end, * @param escape_values_len is the len of Escape_Value array */ static int -_escaped_string_search(const char *s, size_t s_len, const Escape_Value escape_values[], const size_t escape_values_len) +_escaped_string_search(const char *s, size_t s_len, Escape_Value **escape_values, const size_t escape_values_len) { + if (!escape_values) + return -1; + int l = 0; int r = escape_values_len - 1; while (l <= r) { int m = (l + r) / 2; - int res = strncmp(s, escape_values[m].escape, MAX(escape_values[m].escape_len, s_len)); + int res = strncmp(s, escape_values[m]->escape, MAX(escape_values[m]->escape_len, s_len)); if (res == 0) { //Handle special case when s_len is less than escape_len //then we will continue searching //example (">",1,....) - if (escape_values[m].escape_len > s_len) + if (escape_values[m]->escape_len > s_len) res = -1; - else if (escape_values[m].escape_len < s_len) + else if (escape_values[m]->escape_len < s_len) res = 1; else return m; } @@ -8525,14 +8536,17 @@ _escaped_string_search(const char *s, size_t s_len, const Escape_Value escape_va * @param escape_values_len is the len of Escape_Value array */ static int -_escaped_value_search(const char *s, const Escape_Value escape_values[], const size_t escape_values_len) +_escaped_value_search(const char *s, Escape_Value **escape_values , const size_t escape_values_len) { + if (!escape_values) + return -1; + int l = 0; int r = escape_values_len - 1; while (l <= r) { int m = (l + r) / 2; - int res = strncmp(s, escape_values[m].value, escape_values[m].value_len); + int res = strncmp(s, escape_values[m]->value, escape_values[m]->value_len); if (res == 0) return m; if (res > 0) @@ -8553,21 +8567,22 @@ _escaped_value_search(const char *s, const Escape_Value escape_values[], const s static inline const char * _escaped_char_match(const char *s, int *adv) { - static const size_t escape_common_size = sizeof(escape_values_v_common_sorted) / sizeof(Escape_Value); - int n_ret = _escaped_value_search(s, escape_values_v_common_sorted, escape_common_size); + size_t len = 0; + Escape_Value **list = get_html_escape_array_common_value_sorted(&len); + int n_ret = _escaped_value_search(s, list, len); if (n_ret != -1) { - *adv = (int) escape_values_v_common_sorted[n_ret].value_len; - return escape_values_v_common_sorted[n_ret].escape; + *adv = (int) list[n_ret]->value_len; + return list[n_ret]->escape; } else { - static const size_t escape_size = sizeof(escape_values_v_sorted) / sizeof(Escape_Value); - n_ret = _escaped_value_search(s, escape_values_v_sorted, escape_size); + list = get_html_escape_array_rest_value_sorted(&len); + n_ret = _escaped_value_search(s, list, len); if (n_ret != -1) { - *adv = (int)escape_values_v_sorted[n_ret].value_len; - return escape_values_v_sorted[n_ret].escape; + *adv = (int)list[n_ret]->value_len; + return list[n_ret]->escape; } } return NULL; @@ -8623,18 +8638,22 @@ _escaped_char_get(const char *s, const char *s_end) } else { - static const size_t escape_common_size = sizeof(escape_values_e_common_sorted) / sizeof(Escape_Value); - int n_ret = _escaped_string_search(s, s_end-s, escape_values_e_common_sorted, escape_common_size); + size_t len = 0; + Escape_Value **list; + list = get_html_escape_array_common_key_sorted(&len); + int n_ret = _escaped_string_search(s, s_end-s, list, len); if (n_ret != -1) { - return escape_values_e_common_sorted[n_ret].value; + return list[n_ret]->value; } else { - static const size_t escape_size = sizeof(escape_values_e_sorted) / sizeof(Escape_Value); - n_ret = _escaped_string_search(s, s_end-s, escape_values_e_sorted, escape_size); + list = get_html_escape_array_rest_key_sorted(&len); + n_ret = _escaped_string_search(s, s_end-s, list, len); if (n_ret != -1) - return escape_values_e_sorted[n_ret].value; + { + return list[n_ret]->value; + } } }