diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c index da73c03eb7..0913fd30b7 100644 --- a/src/lib/evas/canvas/evas_object_textblock.c +++ b/src/lib/evas/canvas/evas_object_textblock.c @@ -1087,173 +1087,364 @@ _line_free(Evas_Object_Textblock_Line *ln) } /* table of html escapes (that i can find) this should be ordered with the - * most common first as it's a linear search to match - no hash for this. + * sorted by there escape strings and values as it's a binary search to match - no hash for this. * - * these are stored as one large string and one additional array that - * contains the offsets to the tokens for space efficiency. + * these are stored as array of struct of Escape_Value structure (no Runtime sort will happen) */ + + /** * @internal - * @var escape_strings[] - * This string consists of NULL terminated pairs of strings, the first of - * every pair is an escape and the second is the value of the escape. + * @var escape_values_e_sorted[] + * This array consists of Escape_Value structure sorted by escape string + * And new added value must be placed sorted position, and reflected on escape_values_v_sorted */ -static const char escape_strings[] = -/* most common escaped stuff */ -""\0" "\x22\0" -"&\0" "\x26\0" -"'\0" "\x27\0" -"<\0" "\x3c\0" -">\0" "\x3e\0" -/* all the rest */ -" \0" "\xc2\xa0\0" -"¡\0" "\xc2\xa1\0" -"¢\0" "\xc2\xa2\0" -"£\0" "\xc2\xa3\0" -"¤\0" "\xc2\xa4\0" -"¥\0" "\xc2\xa5\0" -"¦\0" "\xc2\xa6\0" -"§\0" "\xc2\xa7\0" -"¨\0" "\xc2\xa8\0" -"©\0" "\xc2\xa9\0" -"ª\0" "\xc2\xaa\0" -"«\0" "\xc2\xab\0" -"¬\0" "\xc2\xac\0" -"­\0" "\xc2\xad\0" -"®\0" "\xc2\xae\0" -"¯\0" "\xc2\xaf\0" -"°\0" "\xc2\xb0\0" -"±\0" "\xc2\xb1\0" -"²\0" "\xc2\xb2\0" -"³\0" "\xc2\xb3\0" -"´\0" "\xc2\xb4\0" -"µ\0" "\xc2\xb5\0" -"¶\0" "\xc2\xb6\0" -"·\0" "\xc2\xb7\0" -"¸\0" "\xc2\xb8\0" -"¹\0" "\xc2\xb9\0" -"º\0" "\xc2\xba\0" -"»\0" "\xc2\xbb\0" -"¼\0" "\xc2\xbc\0" -"½\0" "\xc2\xbd\0" -"¾\0" "\xc2\xbe\0" -"¿\0" "\xc2\xbf\0" -"À\0" "\xc3\x80\0" -"Á\0" "\xc3\x81\0" -"Â\0" "\xc3\x82\0" -"Ã\0" "\xc3\x83\0" -"Ä\0" "\xc3\x84\0" -"Å\0" "\xc3\x85\0" -"&Aelig;\0" "\xc3\x86\0" -"Ç\0" "\xc3\x87\0" -"È\0" "\xc3\x88\0" -"É\0" "\xc3\x89\0" -"Ê\0" "\xc3\x8a\0" -"Ë\0" "\xc3\x8b\0" -"Ì\0" "\xc3\x8c\0" -"Í\0" "\xc3\x8d\0" -"Î\0" "\xc3\x8e\0" -"Ï\0" "\xc3\x8f\0" -"&Eth;\0" "\xc3\x90\0" -"Ñ\0" "\xc3\x91\0" -"Ò\0" "\xc3\x92\0" -"Ó\0" "\xc3\x93\0" -"Ô\0" "\xc3\x94\0" -"Õ\0" "\xc3\x95\0" -"Ö\0" "\xc3\x96\0" -"×\0" "\xc3\x97\0" -"Ø\0" "\xc3\x98\0" -"Ù\0" "\xc3\x99\0" -"Ú\0" "\xc3\x9a\0" -"Û\0" "\xc3\x9b\0" -"Ý\0" "\xc3\x9d\0" -"&Thorn;\0" "\xc3\x9e\0" -"ß\0" "\xc3\x9f\0" -"à\0" "\xc3\xa0\0" -"á\0" "\xc3\xa1\0" -"â\0" "\xc3\xa2\0" -"ã\0" "\xc3\xa3\0" -"ä\0" "\xc3\xa4\0" -"å\0" "\xc3\xa5\0" -"æ\0" "\xc3\xa6\0" -"ç\0" "\xc3\xa7\0" -"è\0" "\xc3\xa8\0" -"é\0" "\xc3\xa9\0" -"ê\0" "\xc3\xaa\0" -"ë\0" "\xc3\xab\0" -"ì\0" "\xc3\xac\0" -"í\0" "\xc3\xad\0" -"î\0" "\xc3\xae\0" -"ï\0" "\xc3\xaf\0" -"ð\0" "\xc3\xb0\0" -"ñ\0" "\xc3\xb1\0" -"ò\0" "\xc3\xb2\0" -"ó\0" "\xc3\xb3\0" -"ô\0" "\xc3\xb4\0" -"õ\0" "\xc3\xb5\0" -"ö\0" "\xc3\xb6\0" -"÷\0" "\xc3\xb7\0" -"ø\0" "\xc3\xb8\0" -"ù\0" "\xc3\xb9\0" -"ú\0" "\xc3\xba\0" -"û\0" "\xc3\xbb\0" -"ü\0" "\xc3\xbc\0" -"ý\0" "\xc3\xbd\0" -"þ\0" "\xc3\xbe\0" -"ÿ\0" "\xc3\xbf\0" -"α\0" "\xce\x91\0" -"β\0" "\xce\x92\0" -"γ\0" "\xce\x93\0" -"δ\0" "\xce\x94\0" -"ε\0" "\xce\x95\0" -"ζ\0" "\xce\x96\0" -"η\0" "\xce\x97\0" -"θ\0" "\xce\x98\0" -"ι\0" "\xce\x99\0" -"κ\0" "\xce\x9a\0" -"λ\0" "\xce\x9b\0" -"μ\0" "\xce\x9c\0" -"ν\0" "\xce\x9d\0" -"ξ\0" "\xce\x9e\0" -"ο\0" "\xce\x9f\0" -"π\0" "\xce\xa0\0" -"ρ\0" "\xce\xa1\0" -"σ\0" "\xce\xa3\0" -"τ\0" "\xce\xa4\0" -"υ\0" "\xce\xa5\0" -"φ\0" "\xce\xa6\0" -"χ\0" "\xce\xa7\0" -"ψ\0" "\xce\xa8\0" -"ω\0" "\xce\xa9\0" -"…\0" "\xe2\x80\xa6\0" -"€\0" "\xe2\x82\xac\0" -"←\0" "\xe2\x86\x90\0" -"↑\0" "\xe2\x86\x91\0" -"→\0" "\xe2\x86\x92\0" -"↓\0" "\xe2\x86\x93\0" -"↔\0" "\xe2\x86\x94\0" -"←\0" "\xe2\x87\x90\0" -"→\0" "\xe2\x87\x92\0" -"∀\0" "\xe2\x88\x80\0" -"∃\0" "\xe2\x88\x83\0" -"∇\0" "\xe2\x88\x87\0" -"∏\0" "\xe2\x88\x8f\0" -"∑\0" "\xe2\x88\x91\0" -"∧\0" "\xe2\x88\xa7\0" -"∨\0" "\xe2\x88\xa8\0" -"∫\0" "\xe2\x88\xab\0" -"≠\0" "\xe2\x89\xa0\0" -"≡\0" "\xe2\x89\xa1\0" -"⊕\0" "\xe2\x8a\x95\0" -"⊥\0" "\xe2\x8a\xa5\0" -"†\0" "\xe2\x80\xa0\0" -"‡\0" "\xe2\x80\xa1\0" -"•\0" "\xe2\x80\xa2\0" -"‌\0" "\xe2\x80\x8c\0" -"‍\0" "\xe2\x80\x8d\0" -"‎\0" "\xe2\x80\x8e\0" -"‏\0" "\xe2\x80\x8f\0" -; +typedef struct _Escape_Value Escape_Value; + +struct _Escape_Value +{ + char *escape; + char *value; + size_t escape_len; + size_t value_len; +}; + +#define ESCAPE_VALUE(e,v) {e,v,strlen(e),strlen(v)} + +static const Escape_Value escape_values_e_sorted[] = { + ESCAPE_VALUE("Á", "\xc3\x81"), + ESCAPE_VALUE("Â", "\xc3\x82"), + ESCAPE_VALUE("&Aelig;", "\xc3\x86"), + ESCAPE_VALUE("À", "\xc3\x80"), + ESCAPE_VALUE("Å", "\xc3\x85"), + ESCAPE_VALUE("Ã", "\xc3\x83"), + ESCAPE_VALUE("Ä", "\xc3\x84"), + ESCAPE_VALUE("Ç", "\xc3\x87"), + ESCAPE_VALUE("‡", "\xe2\x80\xa1"), + ESCAPE_VALUE("É", "\xc3\x89"), + ESCAPE_VALUE("Ê", "\xc3\x8a"), + ESCAPE_VALUE("È", "\xc3\x88"), + ESCAPE_VALUE("&Eth;", "\xc3\x90"), + ESCAPE_VALUE("Ë", "\xc3\x8b"), + ESCAPE_VALUE("Í", "\xc3\x8d"), + ESCAPE_VALUE("Î", "\xc3\x8e"), + ESCAPE_VALUE("Ì", "\xc3\x8c"), + ESCAPE_VALUE("Ï", "\xc3\x8f"), + ESCAPE_VALUE("Ñ", "\xc3\x91"), + ESCAPE_VALUE("Ó", "\xc3\x93"), + ESCAPE_VALUE("Ô", "\xc3\x94"), + ESCAPE_VALUE("Ò", "\xc3\x92"), + ESCAPE_VALUE("Ø", "\xc3\x98"), + ESCAPE_VALUE("Õ", "\xc3\x95"), + ESCAPE_VALUE("Ö", "\xc3\x96"), + ESCAPE_VALUE("&Thorn;", "\xc3\x9e"), + ESCAPE_VALUE("Ú", "\xc3\x9a"), + ESCAPE_VALUE("Û", "\xc3\x9b"), + ESCAPE_VALUE("Ù", "\xc3\x99"), + ESCAPE_VALUE("Ý", "\xc3\x9d"), + ESCAPE_VALUE("á", "\xc3\xa1"), + ESCAPE_VALUE("â", "\xc3\xa2"), + ESCAPE_VALUE("´", "\xc2\xb4"), + ESCAPE_VALUE("æ", "\xc3\xa6"), + ESCAPE_VALUE("à", "\xc3\xa0"), + ESCAPE_VALUE("α", "\xce\x91"), + ESCAPE_VALUE("∧", "\xe2\x88\xa7"), + ESCAPE_VALUE("å", "\xc3\xa5"), + ESCAPE_VALUE("ã", "\xc3\xa3"), + ESCAPE_VALUE("ä", "\xc3\xa4"), + ESCAPE_VALUE("β", "\xce\x92"), + ESCAPE_VALUE("¦", "\xc2\xa6"), + ESCAPE_VALUE("•", "\xe2\x80\xa2"), + ESCAPE_VALUE("ç", "\xc3\xa7"), + ESCAPE_VALUE("¸", "\xc2\xb8"), + ESCAPE_VALUE("¢", "\xc2\xa2"), + ESCAPE_VALUE("χ", "\xce\xa7"), + ESCAPE_VALUE("©", "\xc2\xa9"), + ESCAPE_VALUE("¤", "\xc2\xa4"), + ESCAPE_VALUE("†", "\xe2\x80\xa0"), + ESCAPE_VALUE("↓", "\xe2\x86\x93"), + ESCAPE_VALUE("°", "\xc2\xb0"), + ESCAPE_VALUE("δ", "\xce\x94"), + ESCAPE_VALUE("÷", "\xc3\xb7"), + ESCAPE_VALUE("é", "\xc3\xa9"), + ESCAPE_VALUE("ê", "\xc3\xaa"), + ESCAPE_VALUE("è", "\xc3\xa8"), + ESCAPE_VALUE("ε", "\xce\x95"), + ESCAPE_VALUE("≡", "\xe2\x89\xa1"), + ESCAPE_VALUE("η", "\xce\x97"), + ESCAPE_VALUE("ð", "\xc3\xb0"), + ESCAPE_VALUE("ë", "\xc3\xab"), + ESCAPE_VALUE("€", "\xe2\x82\xac"), + ESCAPE_VALUE("∃", "\xe2\x88\x83"), + ESCAPE_VALUE("∀", "\xe2\x88\x80"), + ESCAPE_VALUE("½", "\xc2\xbd"), + ESCAPE_VALUE("¼", "\xc2\xbc"), + ESCAPE_VALUE("¾", "\xc2\xbe"), + ESCAPE_VALUE("γ", "\xce\x93"), + ESCAPE_VALUE("↔", "\xe2\x86\x94"), + ESCAPE_VALUE("…", "\xe2\x80\xa6"), + ESCAPE_VALUE("í", "\xc3\xad"), + ESCAPE_VALUE("î", "\xc3\xae"), + ESCAPE_VALUE("¡", "\xc2\xa1"), + ESCAPE_VALUE("ì", "\xc3\xac"), + ESCAPE_VALUE("∫", "\xe2\x88\xab"), + ESCAPE_VALUE("ι", "\xce\x99"), + ESCAPE_VALUE("¿", "\xc2\xbf"), + ESCAPE_VALUE("ï", "\xc3\xaf"), + ESCAPE_VALUE("κ", "\xce\x9a"), + ESCAPE_VALUE("λ", "\xce\x9b"), + ESCAPE_VALUE("«", "\xc2\xab"), + ESCAPE_VALUE("←", "\xe2\x86\x90"), + ESCAPE_VALUE("←", "\xe2\x87\x90"), + ESCAPE_VALUE("‎", "\xe2\x80\x8e"), + ESCAPE_VALUE("¯", "\xc2\xaf"), + ESCAPE_VALUE("µ", "\xc2\xb5"), + ESCAPE_VALUE("·", "\xc2\xb7"), + ESCAPE_VALUE("μ", "\xce\x9c"), + ESCAPE_VALUE("∇", "\xe2\x88\x87"), + ESCAPE_VALUE(" ", "\xc2\xa0"), + ESCAPE_VALUE("≠", "\xe2\x89\xa0"), + ESCAPE_VALUE("¬", "\xc2\xac"), + ESCAPE_VALUE("ñ", "\xc3\xb1"), + ESCAPE_VALUE("ν", "\xce\x9d"), + ESCAPE_VALUE("ó", "\xc3\xb3"), + ESCAPE_VALUE("ô", "\xc3\xb4"), + ESCAPE_VALUE("ò", "\xc3\xb2"), + ESCAPE_VALUE("ω", "\xce\xa9"), + ESCAPE_VALUE("ο", "\xce\x9f"), + ESCAPE_VALUE("⊕", "\xe2\x8a\x95"), + ESCAPE_VALUE("∨", "\xe2\x88\xa8"), + ESCAPE_VALUE("ª", "\xc2\xaa"), + ESCAPE_VALUE("º", "\xc2\xba"), + ESCAPE_VALUE("ø", "\xc3\xb8"), + ESCAPE_VALUE("õ", "\xc3\xb5"), + ESCAPE_VALUE("ö", "\xc3\xb6"), + ESCAPE_VALUE("¶", "\xc2\xb6"), + ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"), + ESCAPE_VALUE("φ", "\xce\xa6"), + ESCAPE_VALUE("π", "\xce\xa0"), + ESCAPE_VALUE("±", "\xc2\xb1"), + ESCAPE_VALUE("£", "\xc2\xa3"), + ESCAPE_VALUE("∏", "\xe2\x88\x8f"), + ESCAPE_VALUE("ψ", "\xce\xa8"), + ESCAPE_VALUE("»", "\xc2\xbb"), + ESCAPE_VALUE("→", "\xe2\x86\x92"), + ESCAPE_VALUE("→", "\xe2\x87\x92"), + ESCAPE_VALUE("®", "\xc2\xae"), + ESCAPE_VALUE("ρ", "\xce\xa1"), + ESCAPE_VALUE("‏", "\xe2\x80\x8f"), + ESCAPE_VALUE("§", "\xc2\xa7"), + ESCAPE_VALUE("­", "\xc2\xad"), + ESCAPE_VALUE("σ", "\xce\xa3"), + ESCAPE_VALUE("∑", "\xe2\x88\x91"), + ESCAPE_VALUE("¹", "\xc2\xb9"), + ESCAPE_VALUE("²", "\xc2\xb2"), + ESCAPE_VALUE("³", "\xc2\xb3"), + ESCAPE_VALUE("ß", "\xc3\x9f"), + ESCAPE_VALUE("τ", "\xce\xa4"), + ESCAPE_VALUE("θ", "\xce\x98"), + ESCAPE_VALUE("þ", "\xc3\xbe"), + ESCAPE_VALUE("×", "\xc3\x97"), + ESCAPE_VALUE("ú", "\xc3\xba"), + ESCAPE_VALUE("↑", "\xe2\x86\x91"), + ESCAPE_VALUE("û", "\xc3\xbb"), + ESCAPE_VALUE("ù", "\xc3\xb9"), + ESCAPE_VALUE("¨", "\xc2\xa8"), + ESCAPE_VALUE("υ", "\xce\xa5"), + ESCAPE_VALUE("ü", "\xc3\xbc"), + ESCAPE_VALUE("ξ", "\xce\x9e"), + ESCAPE_VALUE("ý", "\xc3\xbd"), + ESCAPE_VALUE("¥", "\xc2\xa5"), + ESCAPE_VALUE("ÿ", "\xc3\xbf"), + ESCAPE_VALUE("ζ", "\xce\x96"), + ESCAPE_VALUE("‍", "\xe2\x80\x8d"), + ESCAPE_VALUE("‌", "\xe2\x80\x8c"), +}; + + +/** + * @internal + * @var escape_values_e_common_sorted[] + * same as escape_values_e_sorted with small subset of common escapes + */ +static const Escape_Value escape_values_e_common_sorted[] = { + ESCAPE_VALUE("&", "\x26"), + ESCAPE_VALUE("'", "\x27"), + ESCAPE_VALUE(">", "\x3e"), + ESCAPE_VALUE(""", "\x22"), + ESCAPE_VALUE("<", "\x3c"), +}; + +/** + * @internal + * @var escape_values_v_sorted[] + * This array consists of Escape_Value structure sorted by escape value + * And new added value must be placed sorted position, and reflected on escape_values_e_sorted + */ +static const Escape_Value escape_values_v_sorted[] = { + ESCAPE_VALUE(" ", "\xc2\xa0"), + ESCAPE_VALUE("¡", "\xc2\xa1"), + ESCAPE_VALUE("¢", "\xc2\xa2"), + ESCAPE_VALUE("£", "\xc2\xa3"), + ESCAPE_VALUE("¤", "\xc2\xa4"), + ESCAPE_VALUE("¥", "\xc2\xa5"), + ESCAPE_VALUE("¦", "\xc2\xa6"), + ESCAPE_VALUE("§", "\xc2\xa7"), + ESCAPE_VALUE("¨", "\xc2\xa8"), + ESCAPE_VALUE("©", "\xc2\xa9"), + ESCAPE_VALUE("ª", "\xc2\xaa"), + ESCAPE_VALUE("«", "\xc2\xab"), + ESCAPE_VALUE("¬", "\xc2\xac"), + ESCAPE_VALUE("­", "\xc2\xad"), + ESCAPE_VALUE("®", "\xc2\xae"), + ESCAPE_VALUE("¯", "\xc2\xaf"), + ESCAPE_VALUE("°", "\xc2\xb0"), + ESCAPE_VALUE("±", "\xc2\xb1"), + ESCAPE_VALUE("²", "\xc2\xb2"), + ESCAPE_VALUE("³", "\xc2\xb3"), + ESCAPE_VALUE("´", "\xc2\xb4"), + ESCAPE_VALUE("µ", "\xc2\xb5"), + ESCAPE_VALUE("¶", "\xc2\xb6"), + ESCAPE_VALUE("·", "\xc2\xb7"), + ESCAPE_VALUE("¸", "\xc2\xb8"), + ESCAPE_VALUE("¹", "\xc2\xb9"), + ESCAPE_VALUE("º", "\xc2\xba"), + ESCAPE_VALUE("»", "\xc2\xbb"), + ESCAPE_VALUE("¼", "\xc2\xbc"), + ESCAPE_VALUE("½", "\xc2\xbd"), + ESCAPE_VALUE("¾", "\xc2\xbe"), + ESCAPE_VALUE("¿", "\xc2\xbf"), + ESCAPE_VALUE("À", "\xc3\x80"), + ESCAPE_VALUE("Á", "\xc3\x81"), + ESCAPE_VALUE("Â", "\xc3\x82"), + ESCAPE_VALUE("Ã", "\xc3\x83"), + ESCAPE_VALUE("Ä", "\xc3\x84"), + ESCAPE_VALUE("Å", "\xc3\x85"), + ESCAPE_VALUE("&Aelig;", "\xc3\x86"), + ESCAPE_VALUE("Ç", "\xc3\x87"), + ESCAPE_VALUE("È", "\xc3\x88"), + ESCAPE_VALUE("É", "\xc3\x89"), + ESCAPE_VALUE("Ê", "\xc3\x8a"), + ESCAPE_VALUE("Ë", "\xc3\x8b"), + ESCAPE_VALUE("Ì", "\xc3\x8c"), + ESCAPE_VALUE("Í", "\xc3\x8d"), + ESCAPE_VALUE("Î", "\xc3\x8e"), + ESCAPE_VALUE("Ï", "\xc3\x8f"), + ESCAPE_VALUE("&Eth;", "\xc3\x90"), + ESCAPE_VALUE("Ñ", "\xc3\x91"), + ESCAPE_VALUE("Ò", "\xc3\x92"), + ESCAPE_VALUE("Ó", "\xc3\x93"), + ESCAPE_VALUE("Ô", "\xc3\x94"), + ESCAPE_VALUE("Õ", "\xc3\x95"), + ESCAPE_VALUE("Ö", "\xc3\x96"), + ESCAPE_VALUE("×", "\xc3\x97"), + ESCAPE_VALUE("Ø", "\xc3\x98"), + ESCAPE_VALUE("Ù", "\xc3\x99"), + ESCAPE_VALUE("Ú", "\xc3\x9a"), + ESCAPE_VALUE("Û", "\xc3\x9b"), + ESCAPE_VALUE("Ý", "\xc3\x9d"), + ESCAPE_VALUE("&Thorn;", "\xc3\x9e"), + ESCAPE_VALUE("ß", "\xc3\x9f"), + ESCAPE_VALUE("à", "\xc3\xa0"), + ESCAPE_VALUE("á", "\xc3\xa1"), + ESCAPE_VALUE("â", "\xc3\xa2"), + ESCAPE_VALUE("ã", "\xc3\xa3"), + ESCAPE_VALUE("ä", "\xc3\xa4"), + ESCAPE_VALUE("å", "\xc3\xa5"), + ESCAPE_VALUE("æ", "\xc3\xa6"), + ESCAPE_VALUE("ç", "\xc3\xa7"), + ESCAPE_VALUE("è", "\xc3\xa8"), + ESCAPE_VALUE("é", "\xc3\xa9"), + ESCAPE_VALUE("ê", "\xc3\xaa"), + ESCAPE_VALUE("ë", "\xc3\xab"), + ESCAPE_VALUE("ì", "\xc3\xac"), + ESCAPE_VALUE("í", "\xc3\xad"), + ESCAPE_VALUE("î", "\xc3\xae"), + ESCAPE_VALUE("ï", "\xc3\xaf"), + ESCAPE_VALUE("ð", "\xc3\xb0"), + ESCAPE_VALUE("ñ", "\xc3\xb1"), + ESCAPE_VALUE("ò", "\xc3\xb2"), + ESCAPE_VALUE("ó", "\xc3\xb3"), + ESCAPE_VALUE("ô", "\xc3\xb4"), + ESCAPE_VALUE("õ", "\xc3\xb5"), + ESCAPE_VALUE("ö", "\xc3\xb6"), + ESCAPE_VALUE("÷", "\xc3\xb7"), + ESCAPE_VALUE("ø", "\xc3\xb8"), + ESCAPE_VALUE("ù", "\xc3\xb9"), + ESCAPE_VALUE("ú", "\xc3\xba"), + ESCAPE_VALUE("û", "\xc3\xbb"), + ESCAPE_VALUE("ü", "\xc3\xbc"), + ESCAPE_VALUE("ý", "\xc3\xbd"), + ESCAPE_VALUE("þ", "\xc3\xbe"), + ESCAPE_VALUE("ÿ", "\xc3\xbf"), + ESCAPE_VALUE("α", "\xce\x91"), + ESCAPE_VALUE("β", "\xce\x92"), + ESCAPE_VALUE("γ", "\xce\x93"), + ESCAPE_VALUE("δ", "\xce\x94"), + ESCAPE_VALUE("ε", "\xce\x95"), + ESCAPE_VALUE("ζ", "\xce\x96"), + ESCAPE_VALUE("η", "\xce\x97"), + ESCAPE_VALUE("θ", "\xce\x98"), + ESCAPE_VALUE("ι", "\xce\x99"), + ESCAPE_VALUE("κ", "\xce\x9a"), + ESCAPE_VALUE("λ", "\xce\x9b"), + ESCAPE_VALUE("μ", "\xce\x9c"), + ESCAPE_VALUE("ν", "\xce\x9d"), + ESCAPE_VALUE("ξ", "\xce\x9e"), + ESCAPE_VALUE("ο", "\xce\x9f"), + ESCAPE_VALUE("π", "\xce\xa0"), + ESCAPE_VALUE("ρ", "\xce\xa1"), + ESCAPE_VALUE("σ", "\xce\xa3"), + ESCAPE_VALUE("τ", "\xce\xa4"), + ESCAPE_VALUE("υ", "\xce\xa5"), + ESCAPE_VALUE("φ", "\xce\xa6"), + ESCAPE_VALUE("χ", "\xce\xa7"), + ESCAPE_VALUE("ψ", "\xce\xa8"), + ESCAPE_VALUE("ω", "\xce\xa9"), + ESCAPE_VALUE("‌", "\xe2\x80\x8c"), + ESCAPE_VALUE("‍", "\xe2\x80\x8d"), + ESCAPE_VALUE("‎", "\xe2\x80\x8e"), + ESCAPE_VALUE("‏", "\xe2\x80\x8f"), + ESCAPE_VALUE("†", "\xe2\x80\xa0"), + ESCAPE_VALUE("‡", "\xe2\x80\xa1"), + ESCAPE_VALUE("•", "\xe2\x80\xa2"), + ESCAPE_VALUE("…", "\xe2\x80\xa6"), + ESCAPE_VALUE("€", "\xe2\x82\xac"), + ESCAPE_VALUE("←", "\xe2\x86\x90"), + ESCAPE_VALUE("↑", "\xe2\x86\x91"), + ESCAPE_VALUE("→", "\xe2\x86\x92"), + ESCAPE_VALUE("↓", "\xe2\x86\x93"), + ESCAPE_VALUE("↔", "\xe2\x86\x94"), + ESCAPE_VALUE("←", "\xe2\x87\x90"), + ESCAPE_VALUE("→", "\xe2\x87\x92"), + ESCAPE_VALUE("∀", "\xe2\x88\x80"), + ESCAPE_VALUE("∃", "\xe2\x88\x83"), + ESCAPE_VALUE("∇", "\xe2\x88\x87"), + ESCAPE_VALUE("∏", "\xe2\x88\x8f"), + ESCAPE_VALUE("∑", "\xe2\x88\x91"), + ESCAPE_VALUE("∧", "\xe2\x88\xa7"), + ESCAPE_VALUE("∨", "\xe2\x88\xa8"), + ESCAPE_VALUE("∫", "\xe2\x88\xab"), + ESCAPE_VALUE("≠", "\xe2\x89\xa0"), + ESCAPE_VALUE("≡", "\xe2\x89\xa1"), + ESCAPE_VALUE("⊕", "\xe2\x8a\x95"), + ESCAPE_VALUE("⊥", "\xe2\x8a\xa5"), +}; + +/** + * @internal + * @var escape_values_v_common_sorted[] + * same as escape_values_v_sorted with small subset of common escapes + */ +static const Escape_Value escape_values_v_common_sorted[] = { + ESCAPE_VALUE(""", "\x22"), + ESCAPE_VALUE("&", "\x26"), + ESCAPE_VALUE("'", "\x27"), + ESCAPE_VALUE("<", "\x3c"), + ESCAPE_VALUE(">", "\x3e"), +}; + + /** * @internal @@ -7521,48 +7712,93 @@ _escaped_is_eq_and_advance(const char *s, const char *s_end, return ((s == s_end) && reached_end); } + +/** + * @internal + * + * @param s the escape string to search for its index + * @param s_len length of s string + * @param escape_values array of Escape_Value to look inside, Sorted by Escape + * @param escape_values_len is the len of Escape_Value array + */ +int _escaped_string_search(const char * s, size_t s_len, const Escape_Value escape_values[], const size_t escape_values_len) +{ + int l = 0; + int r = escape_values_len - 1; + while (l <= r) + { + int m = (l + r) / 2; + int res = strncmp(s, escape_values[m].escape, MAX(escape_values[m].escape_len, s_len)); + if (res == 0) + { + //Handle special case when s_len is less than escape_len + //then we will continue searching + //example (">",1,....) + if (escape_values[m].escape_len > s_len) + res = -1; + else if (escape_values[m].escape_len < s_len) + res = 1; + else return m; + } + if (res > 0) + l = m + 1; + else + r = m - 1; + } + return -1; +} + +/** + * @internal + * + * @param s the value string to search for its index + * @param escape_values array of Escape_Value to look inside, Sorted by Value + * @param escape_values_len is the len of Escape_Value array + */ +int _escaped_value_search(const char * s, const Escape_Value escape_values[], const size_t escape_values_len) +{ + int l = 0; + int r = escape_values_len - 1; + while (l <= r) + { + int m = (l + r) / 2; + int res = strncmp(s, escape_values[m].value, escape_values[m].value_len); + if (res == 0) + return m; + if (res > 0) + l = m + 1; + else + r = m - 1; + } + return -1; +} + + /** * @internal * * @param s the string to match */ + static inline const char * _escaped_char_match(const char *s, int *adv) { - const char *map_itr, *map_end, *mc, *sc; - - map_itr = escape_strings; - map_end = map_itr + sizeof(escape_strings); - - while (map_itr < map_end) + static const size_t escape_common_size = sizeof(escape_values_v_common_sorted) / sizeof(Escape_Value); + int n_ret = _escaped_value_search(s, escape_values_v_common_sorted, escape_common_size); + if (n_ret != -1) { - const char *escape; - int match; - - escape = map_itr; - _escaped_advance_after_end_of_string(&map_itr); - if (map_itr >= map_end) break; - - mc = map_itr; - sc = s; - match = 1; - while ((*mc) && (*sc)) + *adv = (int) escape_values_v_common_sorted[n_ret].value_len; + return escape_values_v_common_sorted[n_ret].escape; + } + else + { + static const size_t escape_size = sizeof(escape_values_v_sorted) / sizeof(Escape_Value); + n_ret = _escaped_value_search(s, escape_values_v_sorted, escape_size); + if (n_ret != -1) { - if ((unsigned char)*sc < (unsigned char)*mc) return NULL; - if (*sc != *mc) - { - match = 0; - break; - } - mc++; - sc++; + *adv = (int)escape_values_v_sorted[n_ret].value_len; + return escape_values_v_sorted[n_ret].escape; } - if (match) - { - *adv = mc - map_itr; - return escape; - } - _escaped_advance_after_end_of_string(&map_itr); } return NULL; } @@ -7617,17 +7853,18 @@ _escaped_char_get(const char *s, const char *s_end) } else { - const char *map_itr, *map_end; - - map_itr = escape_strings; - map_end = map_itr + sizeof(escape_strings); - - while (map_itr < map_end) + static const size_t escape_common_size = sizeof(escape_values_e_common_sorted) / sizeof(Escape_Value); + int n_ret = _escaped_string_search(s, s_end-s, escape_values_e_common_sorted, escape_common_size); + if (n_ret != -1) { - if (_escaped_is_eq_and_advance(s, s_end, &map_itr, map_end)) - return map_itr; - if (map_itr < map_end) - _escaped_advance_after_end_of_string(&map_itr); + return escape_values_e_common_sorted[n_ret].value; + } + else + { + static const size_t escape_size = sizeof(escape_values_e_sorted) / sizeof(Escape_Value); + n_ret = _escaped_string_search(s, s_end-s, escape_values_e_sorted, escape_size); + if (n_ret != -1) + return escape_values_e_sorted[n_ret].value; } }