rework how the html entity escape arrays are stored. this saves a few hundred bytes on the data section of the library, kills off ~300 locations, and makes the so 77K smaller.

SVN revision: 30012
2007-05-17 22:48:48 +00:00 · 2007-05-17 22:48:48 +00:00 · 07ff938e34
parent 840acd78a4
commit 07ff938e34
1 changed files with 240 additions and 191 deletions
--- a/legacy/evas/src/lib/canvas/evas_object_textblock.c
+++ b/legacy/evas/src/lib/canvas/evas_object_textblock.c
@ -441,190 +441,235 @@ _lines_clear(Evas_Object *obj, Evas_Object_Textblock_Line *lines)
 }

 /* table of html escapes (that i can find) this should be ordered with the
- * most common first as it's a linear search to match - no hash for this
+ * most common first as it's a linear search to match - no hash for this.
+ *
+ * these are stored as one large string and one additional array that
+ * contains the offsets to the tokens for space efficiency.
 */
-static const char *_escapes[] = 
-{
-/* most common escaped stuff */
-     "&lt;", "<",
-     "&gt;", ">",
-     "&amp;", "&",
-     "&nbsp;", " ", /* NOTE: we will allow nbsp's to break as we map early - maybe map to ascii 0x01 and then make the rendering code think 0x01 -> 0x20 */
-     "&quot;", "\"",
-     /* all the rest */
-     "&copy;", "©",
-     "&reg;", "®",
-     "&hellip;", "…",
-     "&Ntilde;", "Ñ",
-     "&ntilde;", "ñ",
-     "&Ccedil;", "Ç",
-     "&ccedil;", "ç",
-     "&szlig;", "ß",
-     "&THORN;", "Þ",
-     "&thorn;", "þ",
-     "&ETH;", "Ð",
-     "&eth;", "ð",
-     "&acute;", "´",
-     "&cedil;", "¸",
-     "&deg;", "°",
-     "&uml;", "¨",
-     "&cent;", "¢",
-     "&pound;", "£",
-     "&curren;", "¤",
-     "&yen;", "¥",
-     "&euro;", "€",
-     "&sect;", "§",
-     "&para;", "¶",
-     "&laquo;", "«",
-     "&raquo;", "»",
-     "&iexcl;", "¡",
-     "&iquest;", "¿",
-     "&brvbar;", "¦",
-     "&ordf;", "ª",
-     "&ordm;", "º",
-     "&micro;", "µ",
-     "&macr;", "¯",
-     "&oplus;", "⊕",
-     "&int;", "∫",
-     "&sum;", "∑",
-     "&prod;", "∏",
-     "&perp;", "⊥",
-     "&or;", "∨",
-     "&and;", "∧",
-     "&equiv;", "≡",
-     "&ne;", "≠",
-     "&forall;", "∀",
-     "&exist;", "∃",
-     "&nabla;", "∇",
-     "&larr;", "←",
-     "&rarr;", "→",
-     "&uarr;", "↑",
-     "&darr;", "↓",
-     "&harr;", "↔",
-     "&lArr;", "⇐",
-     "&rArr;", "⇒",
-     "&plusmn;", "±",
-     "&middot;", "·",
-     "&times;", "×",
-     "&divide;", "÷",
-     "&sup1;", "¹",
-     "&sup2;", "²",
-     "&sup3;", "³",
-     "&frac14;", "¼",
-     "&frac12;", "½",
-     "&frac34;", "¾",
-     "&not;", "¬",
-     "&Aacute;", "Á",
-     "&Eacute;", "É",
-     "&Iacute;", "Í",
-     "&Oacute;", "Ó",
-     "&Uacute;", "Ú",
-     "&Yacute;", "Ý",
-     "&aacute;", "á",
-     "&eacute;", "é",
-     "&iacute;", "í",
-     "&oacute;", "ó",
-     "&uacute;", "ú",
-     "&yacute;", "ý",
-     "&Acirc;", "Â",
-     "&Ecirc;", "Ê",
-     "&Icirc;", "Î",
-     "&Ocirc;", "Ô",
-     "&Ucirc;", "Û",
-     "&acirc;", "â",
-     "&ecirc;", "ê",
-     "&icirc;", "î",
-     "&ocirc;", "ô",
-     "&ucirc;", "û",
-     "&Agrave;", "À",
-     "&Egrave;", "È",
-     "&Igrave;", "Ì",
-     "&Ograve;", "Ò",
-     "&Ugrave;", "Ù",
-     "&agrave;", "à",
-     "&egrave;", "è",
-     "&igrave;", "ì",
-     "&ograve;", "ò",
-     "&ugrave;", "ù",
-     "&Auml;", "Ä",
-     "&Euml;", "Ë",
-     "&Iuml;", "Ï",
-     "&Ouml;", "Ö",
-     "&auml;", "ä",
-     "&euml;", "ë",
-     "&iuml;", "ï",
-     "&ouml;", "ö",
-     "&uuml;", "ü",
-     "&yuml;", "ÿ",
-     "&Atilde;", "Ã",
-     "&atilde;", "ã",
-     "&Otilde;", "Õ",
-     "&otilde;", "õ",
-     "&aring;", "å",
-     "&Aring;", "Å",
-     "&Oslash;", "Ø",
-     "&oslash;", "ø",
-     "&AElig;", "Æ",
-     "&aelig;", "æ",
-     "&Ntilde;", "Ñ",
-     "&ntilde;", "ñ",
-     "&Ccedil;", "Ç",
-     "&ccedil;", "ç",
-     "&szlig;", "ß",
-     "&THORN;", "Þ",
-     "&thorn;", "þ",
-     "&ETH;", "Ð",
-     "&eth;", "ð",
-     "&alpha;", "α",
-     "&beta;", "β",
-     "&gamma;", "γ",
-     "&delta;", "δ",
-     "&epsilon;", "ε",
-     "&zeta;", "ζ",
-     "&eta;", "η",
-     "&theta;", "θ",
-     "&iota;", "ι",
-     "&kappa;", "κ",
-     "&lambda;", "λ",
-     "&mu;", "μ",
-     "&nu;", "ν",
-     "&omicron;", "ο",
-     "&xi;", "ξ",
-     "&pi;", "π",
-     "&rho;", "ρ",
-     "&sigma;", "σ",
-     "&tau;", "τ",
-     "&upsilon;", "υ",
-     "&phi;", "φ",
-     "&chi;", "χ",
-     "&psi;", "ψ",
-     "&omega;", "ω",
-     "&Alpha;", "Α",
-     "&Beta;", "Β",
-     "&Gamma;", "Γ",
-     "&Delta;", "Δ",
-     "&Epsilon;", "Ε",
-     "&Zeta;", "Ζ",
-     "&Eta;", "Η",
-     "&Theta;", "Θ",
-     "&Iota;", "Ι",
-     "&Kappa;", "Κ",
-     "&Lambda;", "Λ",
-     "&Mu;", "Μ",
-     "&Nu;", "Ν",
-     "&Omicron;", "Ο",
-     "&Xi;", "Ξ",
-     "&Pi;", "Π",
-     "&Rho;", "Ρ",
-     "&Sigma;", "Σ",
-     "&Tau;", "Τ",
-     "&Upsilon;", "Υ",
-     "&Phi;", "Φ",
-     "&Chi;", "Χ",
-     "&Psi;", "Ψ",
-     "&Omega;", "Ω"
+static const char escape_strings[] =
+	/* most common escaped stuff */
+	"&lt;\0\x3c\0"
+	"&gt;\0\x3e\0"
+	"&amp;\0\x26\0"
+	"&nbsp;\0\x20\0" /* NOTE: we allow nsbp's to break as we map early - maybe map to ascii 0x01 and then make the rendering code think 0x01 -> 0x20 */
+	"&quot;\0\x22\0"
+	/* all the rest */
+	"&copy;\0\xc2\xa9\0"
+	"&reg;\0\xc2\xae\0"
+	"&hellip;\0\xe2\x80\xa6\0"
+	"&Ntilde;\0\xc3\x91\0"
+	"&ntilde;\0\xc3\xb1\0"
+	"&Ccedil;\0\xc3\x87\0"
+	"&ccedil;\0\xc3\xa7\0"
+	"&szlig;\0\xc3\x9f\0"
+	"&THORN;\0\xc3\x9e\0"
+	"&thorn;\0\xc3\xbe\0"
+	"&ETH;\0\xc3\x90\0"
+	"&eth;\0\xc3\xb0\0"
+	"&acute;\0\xc2\xb4\0"
+	"&cedil;\0\xc2\xb8\0"
+	"&deg;\0\xc2\xb0\0"
+	"&uml;\0\xc2\xa8\0"
+	"&cent;\0\xc2\xa2\0"
+	"&pound;\0\xc2\xa3\0"
+	"&curren;\0\xc2\xa4\0"
+	"&yen;\0\xc2\xa5\0"
+	"&euro;\0\xe2\x82\xac\0"
+	"&sect;\0\xc2\xa7\0"
+	"&para;\0\xc2\xb6\0"
+	"&laquo;\0\xc2\xab\0"
+	"&raquo;\0\xc2\xbb\0"
+	"&iexcl;\0\xc2\xa1\0"
+	"&iquest;\0\xc2\xbf\0"
+	"&brvbar;\0\xc2\xa6\0"
+	"&ordf;\0\xc2\xaa\0"
+	"&ordm;\0\xc2\xba\0"
+	"&micro;\0\xc2\xb5\0"
+	"&macr;\0\xc2\xaf\0"
+	"&oplus;\0\xe2\x8a\x95\0"
+	"&int;\0\xe2\x88\xab\0"
+	"&sum;\0\xe2\x88\x91\0"
+	"&prod;\0\xe2\x88\x8f\0"
+	"&perp;\0\xe2\x8a\xa5\0"
+	"&or;\0\xe2\x88\xa8\0"
+	"&and;\0\xe2\x88\xa7\0"
+	"&equiv;\0\xe2\x89\xa1\0"
+	"&ne;\0\xe2\x89\xa0\0"
+	"&forall;\0\xe2\x88\x80\0"
+	"&exist;\0\xe2\x88\x83\0"
+	"&nabla;\0\xe2\x88\x87\0"
+	"&larr;\0\xe2\x86\x90\0"
+	"&rarr;\0\xe2\x86\x92\0"
+	"&uarr;\0\xe2\x86\x91\0"
+	"&darr;\0\xe2\x86\x93\0"
+	"&harr;\0\xe2\x86\x94\0"
+	"&lArr;\0\xe2\x87\x90\0"
+	"&rArr;\0\xe2\x87\x92\0"
+	"&plusmn;\0\xc2\xb1\0"
+	"&middot;\0\xc2\xb7\0"
+	"&times;\0\xc3\x97\0"
+	"&divide;\0\xc3\xb7\0"
+	"&sup1;\0\xc2\xb9\0"
+	"&sup2;\0\xc2\xb2\0"
+	"&sup3;\0\xc2\xb3\0"
+	"&frac14;\0\xc2\xbc\0"
+	"&frac12;\0\xc2\xbd\0"
+	"&frac34;\0\xc2\xbe\0"
+	"&not;\0\xc2\xac\0"
+	"&Aacute;\0\xc3\x81\0"
+	"&Eacute;\0\xc3\x89\0"
+	"&Iacute;\0\xc3\x8d\0"
+	"&Oacute;\0\xc3\x93\0"
+	"&Uacute;\0\xc3\x9a\0"
+	"&Yacute;\0\xc3\x9d\0"
+	"&aacute;\0\xc3\xa1\0"
+	"&eacute;\0\xc3\xa9\0"
+	"&iacute;\0\xc3\xad\0"
+	"&oacute;\0\xc3\xb3\0"
+	"&uacute;\0\xc3\xba\0"
+	"&yacute;\0\xc3\xbd\0"
+	"&Acirc;\0\xc3\x82\0"
+	"&Ecirc;\0\xc3\x8a\0"
+	"&Icirc;\0\xc3\x8e\0"
+	"&Ocirc;\0\xc3\x94\0"
+	"&Ucirc;\0\xc3\x9b\0"
+	"&acirc;\0\xc3\xa2\0"
+	"&ecirc;\0\xc3\xaa\0"
+	"&icirc;\0\xc3\xae\0"
+	"&ocirc;\0\xc3\xb4\0"
+	"&ucirc;\0\xc3\xbb\0"
+	"&Agrave;\0\xc3\x80\0"
+	"&Egrave;\0\xc3\x88\0"
+	"&Igrave;\0\xc3\x8c\0"
+	"&Ograve;\0\xc3\x92\0"
+	"&Ugrave;\0\xc3\x99\0"
+	"&agrave;\0\xc3\xa0\0"
+	"&egrave;\0\xc3\xa8\0"
+	"&igrave;\0\xc3\xac\0"
+	"&ograve;\0\xc3\xb2\0"
+	"&ugrave;\0\xc3\xb9\0"
+	"&Auml;\0\xc3\x84\0"
+	"&Euml;\0\xc3\x8b\0"
+	"&Iuml;\0\xc3\x8f\0"
+	"&Ouml;\0\xc3\x96\0"
+	"&auml;\0\xc3\xa4\0"
+	"&euml;\0\xc3\xab\0"
+	"&iuml;\0\xc3\xaf\0"
+	"&ouml;\0\xc3\xb6\0"
+	"&uuml;\0\xc3\xbc\0"
+	"&yuml;\0\xc3\xbf\0"
+	"&Atilde;\0\xc3\x83\0"
+	"&atilde;\0\xc3\xa3\0"
+	"&Otilde;\0\xc3\x95\0"
+	"&otilde;\0\xc3\xb5\0"
+	"&aring;\0\xc3\xa5\0"
+	"&Aring;\0\xc3\x85\0"
+	"&Oslash;\0\xc3\x98\0"
+	"&oslash;\0\xc3\xb8\0"
+	"&AElig;\0\xc3\x86\0"
+	"&aelig;\0\xc3\xa6\0"
+	"&Ntilde;\0\xc3\x91\0"
+	"&ntilde;\0\xc3\xb1\0"
+	"&Ccedil;\0\xc3\x87\0"
+	"&ccedil;\0\xc3\xa7\0"
+	"&szlig;\0\xc3\x9f\0"
+	"&THORN;\0\xc3\x9e\0"
+	"&thorn;\0\xc3\xbe\0"
+	"&ETH;\0\xc3\x90\0"
+	"&eth;\0\xc3\xb0\0"
+	"&alpha;\0\xce\xb1\0"
+	"&beta;\0\xce\xb2\0"
+	"&gamma;\0\xce\xb3\0"
+	"&delta;\0\xce\xb4\0"
+	"&epsilon;\0\xce\xb5\0"
+	"&zeta;\0\xce\xb6\0"
+	"&eta;\0\xce\xb7\0"
+	"&theta;\0\xce\xb8\0"
+	"&iota;\0\xce\xb9\0"
+	"&kappa;\0\xce\xba\0"
+	"&lambda;\0\xce\xbb\0"
+	"&mu;\0\xce\xbc\0"
+	"&nu;\0\xce\xbd\0"
+	"&omicron;\0\xce\xbf\0"
+	"&xi;\0\xce\xbe\0"
+	"&pi;\0\xcf\x80\0"
+	"&rho;\0\xcf\x81\0"
+	"&sigma;\0\xcf\x83\0"
+	"&tau;\0\xcf\x84\0"
+	"&upsilon;\0\xcf\x85\0"
+	"&phi;\0\xcf\x86\0"
+	"&chi;\0\xcf\x87\0"
+	"&psi;\0\xcf\x88\0"
+	"&omega;\0\xcf\x89\0"
+	"&Alpha;\0\xce\x91\0"
+	"&Beta;\0\xce\x92\0"
+	"&Gamma;\0\xce\x93\0"
+	"&Delta;\0\xce\x94\0"
+	"&Epsilon;\0\xce\x95\0"
+	"&Zeta;\0\xce\x96\0"
+	"&Eta;\0\xce\x97\0"
+	"&Theta;\0\xce\x98\0"
+	"&Iota;\0\xce\x99\0"
+	"&Kappa;\0\xce\x9a\0"
+	"&Lambda;\0\xce\x9b\0"
+	"&Mu;\0\xce\x9c\0"
+	"&Nu;\0\xce\x9d\0"
+	"&Omicron;\0\xce\x9f\0"
+	"&Xi;\0\xce\x9e\0"
+	"&Pi;\0\xce\xa0\0"
+	"&Rho;\0\xce\xa1\0"
+	"&Sigma;\0\xce\xa3\0"
+	"&Tau;\0\xce\xa4\0"
+	"&Upsilon;\0\xce\xa5\0"
+	"&Phi;\0\xce\xa6\0"
+	"&Chi;\0\xce\xa7\0"
+	"&Psi;\0\xce\xa8\0"
+	"&Omega;\0\xce\xa9\0"
+;
+
+static const unsigned short escape_offsets[] = {
+	0, 4, 5, 9, 10, 15, 16, 22, 23, 29, 30, 36,
+	38, 43, 45, 53, 56, 64, 66, 74, 76, 84, 86,
+	94, 96, 103, 105, 112, 114, 121, 123, 128, 130,
+	135, 137, 144, 146, 153, 155, 160, 162, 167,
+	169, 175, 177, 184, 186, 194, 196, 201, 203,
+	209, 212, 218, 220, 226, 228, 235, 237, 244,
+	246, 253, 255, 263, 265, 273, 275, 281, 283,
+	289, 291, 298, 300, 306, 308, 315, 318, 323,
+	326, 331, 334, 340, 343, 349, 352, 356, 359,
+	364, 367, 374, 377, 381, 384, 392, 395, 402,
+	405, 412, 415, 421, 424, 430, 433, 439, 442,
+	448, 451, 457, 460, 466, 469, 475, 478, 486,
+	488, 496, 498, 505, 507, 515, 517, 523, 525,
+	531, 533, 539, 541, 549, 551, 559, 561, 569,
+	571, 576, 578, 586, 588, 596, 598, 606, 608,
+	616, 618, 626, 628, 636, 638, 646, 648, 656,
+	658, 666, 668, 676, 678, 686, 688, 696, 698,
+	705, 707, 714, 716, 723, 725, 732, 734, 741,
+	743, 750, 752, 759, 761, 768, 770, 777, 779,
+	786, 788, 796, 798, 806, 808, 816, 818, 826,
+	828, 836, 838, 846, 848, 856, 858, 866, 868,
+	876, 878, 886, 888, 894, 896, 902, 904, 910,
+	912, 918, 920, 926, 928, 934, 936, 942, 944,
+	950, 952, 958, 960, 966, 968, 976, 978, 986,
+	988, 996, 998, 1006, 1008, 1015, 1017, 1024, 1026,
+	1034, 1036, 1044, 1046, 1053, 1055, 1062, 1064, 1072,
+	1074, 1082, 1084, 1092, 1094, 1102, 1104, 1111, 1113,
+	1120, 1122, 1129, 1131, 1136, 1138, 1143, 1145, 1152,
+	1154, 1160, 1162, 1169, 1171, 1178, 1180, 1189, 1191,
+	1197, 1199, 1204, 1206, 1213, 1215, 1221, 1223, 1230,
+	1232, 1240, 1242, 1246, 1248, 1252, 1254, 1263, 1265,
+	1269, 1271, 1275, 1277, 1282, 1284, 1291, 1293, 1298,
+	1300, 1309, 1311, 1316, 1318, 1323, 1325, 1330, 1332,
+	1339, 1341, 1348, 1350, 1356, 1358, 1365, 1367, 1374,
+	1376, 1385, 1387, 1393, 1395, 1400, 1402, 1409, 1411,
+	1417, 1419, 1426, 1428, 1436, 1438, 1442, 1444, 1448,
+	1450, 1459, 1461, 1465, 1467, 1471, 1473, 1478, 1480,
+	1487, 1489, 1494, 1496, 1505, 1507, 1512, 1514, 1519,
+	1521, 1526, 1528, 1535
 };

+
 static int
 _is_white(int c)
 {
@ -2492,12 +2537,14 @@ evas_object_textblock_text_markup_set(Evas_Object *obj, const char *text)
 		    {
 		       int i;
 		       
-		       for (i = 0; i < (int)(sizeof(_escapes) / sizeof(char *)); i += 2)
+		       for (i = 0; i < (int)(sizeof(escape_offsets) / sizeof(escape_offsets[0])); i += 2)
 			 {
-			    if (!strncmp(_escapes[i], esc_start, 
-					 esc_end - esc_start + 1))
+			    const char *in = escape_strings + escape_offsets[i];
+			    const char *out = escape_strings + escape_offsets[i + 1];
+
+			    if (!strncmp(in, esc_start, esc_end - esc_start + 1))
 			      {
-				 evas_textblock_cursor_text_append(o->cursor, _escapes[i + 1]);
+				 evas_textblock_cursor_text_append(o->cursor, out);
 				 break;
 			      }
 			 }
@ -2595,19 +2642,21 @@ evas_object_textblock_text_markup_get(Evas_Object *obj)
 			 {
 			    int i;

-			    for (i = 1; i < (int)(sizeof(_escapes) / sizeof(char *)); i += 2)
+			    for (i = 1; i < (int)(sizeof(escape_offsets) / sizeof(escape_offsets[0])); i += 2)
 			      {
-				 if (!strncmp(_escapes[i], p,
-					      strlen(_escapes[i])))
+			    const char *in = escape_strings + escape_offsets[i];
+			    const char *out = escape_strings + escape_offsets[i - 1];
+
+				 if (!strncmp(in, p, strlen(in)))
 				   {
 				      o->markup_text = _strbuf_append_n(o->markup_text,
 									ps, p - ps,
 									&slen, &salloc);
 				      o->markup_text = _strbuf_append(o->markup_text,
-								      _escapes[i-1],
+								      out,
 								      &slen, &salloc);
-				      ps = p + strlen(_escapes[i]);
-				      p += strlen(_escapes[i]) - 1;
+				      ps = p + strlen(in);
+				      p += strlen(in) - 1;
 				   }
 			      }
 			 }