rework how the html entity escape arrays are stored. this saves a few hundred bytes on the data section of the library, kills off ~300 locations, and makes the so 77K smaller.

SVN revision: 30012
This commit is contained in:
tilman 2007-05-17 22:48:48 +00:00 committed by tilman
parent 840acd78a4
commit 07ff938e34
1 changed files with 240 additions and 191 deletions

View File

@ -441,190 +441,235 @@ _lines_clear(Evas_Object *obj, Evas_Object_Textblock_Line *lines)
}
/* table of html escapes (that i can find) this should be ordered with the
* most common first as it's a linear search to match - no hash for this
* most common first as it's a linear search to match - no hash for this.
*
* these are stored as one large string and one additional array that
* contains the offsets to the tokens for space efficiency.
*/
static const char *_escapes[] =
{
/* most common escaped stuff */
"&lt;", "<",
"&gt;", ">",
"&amp;", "&",
"&nbsp;", " ", /* NOTE: we will allow nbsp's to break as we map early - maybe map to ascii 0x01 and then make the rendering code think 0x01 -> 0x20 */
"&quot;", "\"",
/* all the rest */
"&copy;", "©",
"&reg;", "®",
"&hellip;", "",
"&Ntilde;", "Ñ",
"&ntilde;", "ñ",
"&Ccedil;", "Ç",
"&ccedil;", "ç",
"&szlig;", "ß",
"&THORN;", "Þ",
"&thorn;", "þ",
"&ETH;", "Ð",
"&eth;", "ð",
"&acute;", "´",
"&cedil;", "¸",
"&deg;", "°",
"&uml;", "¨",
"&cent;", "¢",
"&pound;", "£",
"&curren;", "¤",
"&yen;", "¥",
"&euro;", "",
"&sect;", "§",
"&para;", "",
"&laquo;", "«",
"&raquo;", "»",
"&iexcl;", "¡",
"&iquest;", "¿",
"&brvbar;", "¦",
"&ordf;", "ª",
"&ordm;", "º",
"&micro;", "µ",
"&macr;", "¯",
"&oplus;", "",
"&int;", "",
"&sum;", "",
"&prod;", "",
"&perp;", "",
"&or;", "",
"&and;", "",
"&equiv;", "",
"&ne;", "",
"&forall;", "",
"&exist;", "",
"&nabla;", "",
"&larr;", "",
"&rarr;", "",
"&uarr;", "",
"&darr;", "",
"&harr;", "",
"&lArr;", "",
"&rArr;", "",
"&plusmn;", "±",
"&middot;", "·",
"&times;", "×",
"&divide;", "÷",
"&sup1;", "¹",
"&sup2;", "²",
"&sup3;", "³",
"&frac14;", "¼",
"&frac12;", "½",
"&frac34;", "¾",
"&not;", "¬",
"&Aacute;", "Á",
"&Eacute;", "É",
"&Iacute;", "Í",
"&Oacute;", "Ó",
"&Uacute;", "Ú",
"&Yacute;", "Ý",
"&aacute;", "á",
"&eacute;", "é",
"&iacute;", "í",
"&oacute;", "ó",
"&uacute;", "ú",
"&yacute;", "ý",
"&Acirc;", "Â",
"&Ecirc;", "Ê",
"&Icirc;", "Î",
"&Ocirc;", "Ô",
"&Ucirc;", "Û",
"&acirc;", "â",
"&ecirc;", "ê",
"&icirc;", "î",
"&ocirc;", "ô",
"&ucirc;", "û",
"&Agrave;", "À",
"&Egrave;", "È",
"&Igrave;", "Ì",
"&Ograve;", "Ò",
"&Ugrave;", "Ù",
"&agrave;", "à",
"&egrave;", "è",
"&igrave;", "ì",
"&ograve;", "ò",
"&ugrave;", "ù",
"&Auml;", "Ä",
"&Euml;", "Ë",
"&Iuml;", "Ï",
"&Ouml;", "Ö",
"&auml;", "ä",
"&euml;", "ë",
"&iuml;", "ï",
"&ouml;", "ö",
"&uuml;", "ü",
"&yuml;", "ÿ",
"&Atilde;", "Ã",
"&atilde;", "ã",
"&Otilde;", "Õ",
"&otilde;", "õ",
"&aring;", "å",
"&Aring;", "Å",
"&Oslash;", "Ø",
"&oslash;", "ø",
"&AElig;", "Æ",
"&aelig;", "æ",
"&Ntilde;", "Ñ",
"&ntilde;", "ñ",
"&Ccedil;", "Ç",
"&ccedil;", "ç",
"&szlig;", "ß",
"&THORN;", "Þ",
"&thorn;", "þ",
"&ETH;", "Ð",
"&eth;", "ð",
"&alpha;", "α",
"&beta;", "β",
"&gamma;", "γ",
"&delta;", "δ",
"&epsilon;", "ε",
"&zeta;", "ζ",
"&eta;", "η",
"&theta;", "θ",
"&iota;", "ι",
"&kappa;", "κ",
"&lambda;", "λ",
"&mu;", "μ",
"&nu;", "ν",
"&omicron;", "ο",
"&xi;", "ξ",
"&pi;", "π",
"&rho;", "ρ",
"&sigma;", "σ",
"&tau;", "τ",
"&upsilon;", "υ",
"&phi;", "φ",
"&chi;", "χ",
"&psi;", "ψ",
"&omega;", "ω",
"&Alpha;", "Α",
"&Beta;", "Β",
"&Gamma;", "Γ",
"&Delta;", "Δ",
"&Epsilon;", "Ε",
"&Zeta;", "Ζ",
"&Eta;", "Η",
"&Theta;", "Θ",
"&Iota;", "Ι",
"&Kappa;", "Κ",
"&Lambda;", "Λ",
"&Mu;", "Μ",
"&Nu;", "Ν",
"&Omicron;", "Ο",
"&Xi;", "Ξ",
"&Pi;", "Π",
"&Rho;", "Ρ",
"&Sigma;", "Σ",
"&Tau;", "Τ",
"&Upsilon;", "Υ",
"&Phi;", "Φ",
"&Chi;", "Χ",
"&Psi;", "Ψ",
"&Omega;", "Ω"
static const char escape_strings[] =
/* most common escaped stuff */
"&lt;\0\x3c\0"
"&gt;\0\x3e\0"
"&amp;\0\x26\0"
"&nbsp;\0\x20\0" /* NOTE: we allow nsbp's to break as we map early - maybe map to ascii 0x01 and then make the rendering code think 0x01 -> 0x20 */
"&quot;\0\x22\0"
/* all the rest */
"&copy;\0\xc2\xa9\0"
"&reg;\0\xc2\xae\0"
"&hellip;\0\xe2\x80\xa6\0"
"&Ntilde;\0\xc3\x91\0"
"&ntilde;\0\xc3\xb1\0"
"&Ccedil;\0\xc3\x87\0"
"&ccedil;\0\xc3\xa7\0"
"&szlig;\0\xc3\x9f\0"
"&THORN;\0\xc3\x9e\0"
"&thorn;\0\xc3\xbe\0"
"&ETH;\0\xc3\x90\0"
"&eth;\0\xc3\xb0\0"
"&acute;\0\xc2\xb4\0"
"&cedil;\0\xc2\xb8\0"
"&deg;\0\xc2\xb0\0"
"&uml;\0\xc2\xa8\0"
"&cent;\0\xc2\xa2\0"
"&pound;\0\xc2\xa3\0"
"&curren;\0\xc2\xa4\0"
"&yen;\0\xc2\xa5\0"
"&euro;\0\xe2\x82\xac\0"
"&sect;\0\xc2\xa7\0"
"&para;\0\xc2\xb6\0"
"&laquo;\0\xc2\xab\0"
"&raquo;\0\xc2\xbb\0"
"&iexcl;\0\xc2\xa1\0"
"&iquest;\0\xc2\xbf\0"
"&brvbar;\0\xc2\xa6\0"
"&ordf;\0\xc2\xaa\0"
"&ordm;\0\xc2\xba\0"
"&micro;\0\xc2\xb5\0"
"&macr;\0\xc2\xaf\0"
"&oplus;\0\xe2\x8a\x95\0"
"&int;\0\xe2\x88\xab\0"
"&sum;\0\xe2\x88\x91\0"
"&prod;\0\xe2\x88\x8f\0"
"&perp;\0\xe2\x8a\xa5\0"
"&or;\0\xe2\x88\xa8\0"
"&and;\0\xe2\x88\xa7\0"
"&equiv;\0\xe2\x89\xa1\0"
"&ne;\0\xe2\x89\xa0\0"
"&forall;\0\xe2\x88\x80\0"
"&exist;\0\xe2\x88\x83\0"
"&nabla;\0\xe2\x88\x87\0"
"&larr;\0\xe2\x86\x90\0"
"&rarr;\0\xe2\x86\x92\0"
"&uarr;\0\xe2\x86\x91\0"
"&darr;\0\xe2\x86\x93\0"
"&harr;\0\xe2\x86\x94\0"
"&lArr;\0\xe2\x87\x90\0"
"&rArr;\0\xe2\x87\x92\0"
"&plusmn;\0\xc2\xb1\0"
"&middot;\0\xc2\xb7\0"
"&times;\0\xc3\x97\0"
"&divide;\0\xc3\xb7\0"
"&sup1;\0\xc2\xb9\0"
"&sup2;\0\xc2\xb2\0"
"&sup3;\0\xc2\xb3\0"
"&frac14;\0\xc2\xbc\0"
"&frac12;\0\xc2\xbd\0"
"&frac34;\0\xc2\xbe\0"
"&not;\0\xc2\xac\0"
"&Aacute;\0\xc3\x81\0"
"&Eacute;\0\xc3\x89\0"
"&Iacute;\0\xc3\x8d\0"
"&Oacute;\0\xc3\x93\0"
"&Uacute;\0\xc3\x9a\0"
"&Yacute;\0\xc3\x9d\0"
"&aacute;\0\xc3\xa1\0"
"&eacute;\0\xc3\xa9\0"
"&iacute;\0\xc3\xad\0"
"&oacute;\0\xc3\xb3\0"
"&uacute;\0\xc3\xba\0"
"&yacute;\0\xc3\xbd\0"
"&Acirc;\0\xc3\x82\0"
"&Ecirc;\0\xc3\x8a\0"
"&Icirc;\0\xc3\x8e\0"
"&Ocirc;\0\xc3\x94\0"
"&Ucirc;\0\xc3\x9b\0"
"&acirc;\0\xc3\xa2\0"
"&ecirc;\0\xc3\xaa\0"
"&icirc;\0\xc3\xae\0"
"&ocirc;\0\xc3\xb4\0"
"&ucirc;\0\xc3\xbb\0"
"&Agrave;\0\xc3\x80\0"
"&Egrave;\0\xc3\x88\0"
"&Igrave;\0\xc3\x8c\0"
"&Ograve;\0\xc3\x92\0"
"&Ugrave;\0\xc3\x99\0"
"&agrave;\0\xc3\xa0\0"
"&egrave;\0\xc3\xa8\0"
"&igrave;\0\xc3\xac\0"
"&ograve;\0\xc3\xb2\0"
"&ugrave;\0\xc3\xb9\0"
"&Auml;\0\xc3\x84\0"
"&Euml;\0\xc3\x8b\0"
"&Iuml;\0\xc3\x8f\0"
"&Ouml;\0\xc3\x96\0"
"&auml;\0\xc3\xa4\0"
"&euml;\0\xc3\xab\0"
"&iuml;\0\xc3\xaf\0"
"&ouml;\0\xc3\xb6\0"
"&uuml;\0\xc3\xbc\0"
"&yuml;\0\xc3\xbf\0"
"&Atilde;\0\xc3\x83\0"
"&atilde;\0\xc3\xa3\0"
"&Otilde;\0\xc3\x95\0"
"&otilde;\0\xc3\xb5\0"
"&aring;\0\xc3\xa5\0"
"&Aring;\0\xc3\x85\0"
"&Oslash;\0\xc3\x98\0"
"&oslash;\0\xc3\xb8\0"
"&AElig;\0\xc3\x86\0"
"&aelig;\0\xc3\xa6\0"
"&Ntilde;\0\xc3\x91\0"
"&ntilde;\0\xc3\xb1\0"
"&Ccedil;\0\xc3\x87\0"
"&ccedil;\0\xc3\xa7\0"
"&szlig;\0\xc3\x9f\0"
"&THORN;\0\xc3\x9e\0"
"&thorn;\0\xc3\xbe\0"
"&ETH;\0\xc3\x90\0"
"&eth;\0\xc3\xb0\0"
"&alpha;\0\xce\xb1\0"
"&beta;\0\xce\xb2\0"
"&gamma;\0\xce\xb3\0"
"&delta;\0\xce\xb4\0"
"&epsilon;\0\xce\xb5\0"
"&zeta;\0\xce\xb6\0"
"&eta;\0\xce\xb7\0"
"&theta;\0\xce\xb8\0"
"&iota;\0\xce\xb9\0"
"&kappa;\0\xce\xba\0"
"&lambda;\0\xce\xbb\0"
"&mu;\0\xce\xbc\0"
"&nu;\0\xce\xbd\0"
"&omicron;\0\xce\xbf\0"
"&xi;\0\xce\xbe\0"
"&pi;\0\xcf\x80\0"
"&rho;\0\xcf\x81\0"
"&sigma;\0\xcf\x83\0"
"&tau;\0\xcf\x84\0"
"&upsilon;\0\xcf\x85\0"
"&phi;\0\xcf\x86\0"
"&chi;\0\xcf\x87\0"
"&psi;\0\xcf\x88\0"
"&omega;\0\xcf\x89\0"
"&Alpha;\0\xce\x91\0"
"&Beta;\0\xce\x92\0"
"&Gamma;\0\xce\x93\0"
"&Delta;\0\xce\x94\0"
"&Epsilon;\0\xce\x95\0"
"&Zeta;\0\xce\x96\0"
"&Eta;\0\xce\x97\0"
"&Theta;\0\xce\x98\0"
"&Iota;\0\xce\x99\0"
"&Kappa;\0\xce\x9a\0"
"&Lambda;\0\xce\x9b\0"
"&Mu;\0\xce\x9c\0"
"&Nu;\0\xce\x9d\0"
"&Omicron;\0\xce\x9f\0"
"&Xi;\0\xce\x9e\0"
"&Pi;\0\xce\xa0\0"
"&Rho;\0\xce\xa1\0"
"&Sigma;\0\xce\xa3\0"
"&Tau;\0\xce\xa4\0"
"&Upsilon;\0\xce\xa5\0"
"&Phi;\0\xce\xa6\0"
"&Chi;\0\xce\xa7\0"
"&Psi;\0\xce\xa8\0"
"&Omega;\0\xce\xa9\0"
;
static const unsigned short escape_offsets[] = {
0, 4, 5, 9, 10, 15, 16, 22, 23, 29, 30, 36,
38, 43, 45, 53, 56, 64, 66, 74, 76, 84, 86,
94, 96, 103, 105, 112, 114, 121, 123, 128, 130,
135, 137, 144, 146, 153, 155, 160, 162, 167,
169, 175, 177, 184, 186, 194, 196, 201, 203,
209, 212, 218, 220, 226, 228, 235, 237, 244,
246, 253, 255, 263, 265, 273, 275, 281, 283,
289, 291, 298, 300, 306, 308, 315, 318, 323,
326, 331, 334, 340, 343, 349, 352, 356, 359,
364, 367, 374, 377, 381, 384, 392, 395, 402,
405, 412, 415, 421, 424, 430, 433, 439, 442,
448, 451, 457, 460, 466, 469, 475, 478, 486,
488, 496, 498, 505, 507, 515, 517, 523, 525,
531, 533, 539, 541, 549, 551, 559, 561, 569,
571, 576, 578, 586, 588, 596, 598, 606, 608,
616, 618, 626, 628, 636, 638, 646, 648, 656,
658, 666, 668, 676, 678, 686, 688, 696, 698,
705, 707, 714, 716, 723, 725, 732, 734, 741,
743, 750, 752, 759, 761, 768, 770, 777, 779,
786, 788, 796, 798, 806, 808, 816, 818, 826,
828, 836, 838, 846, 848, 856, 858, 866, 868,
876, 878, 886, 888, 894, 896, 902, 904, 910,
912, 918, 920, 926, 928, 934, 936, 942, 944,
950, 952, 958, 960, 966, 968, 976, 978, 986,
988, 996, 998, 1006, 1008, 1015, 1017, 1024, 1026,
1034, 1036, 1044, 1046, 1053, 1055, 1062, 1064, 1072,
1074, 1082, 1084, 1092, 1094, 1102, 1104, 1111, 1113,
1120, 1122, 1129, 1131, 1136, 1138, 1143, 1145, 1152,
1154, 1160, 1162, 1169, 1171, 1178, 1180, 1189, 1191,
1197, 1199, 1204, 1206, 1213, 1215, 1221, 1223, 1230,
1232, 1240, 1242, 1246, 1248, 1252, 1254, 1263, 1265,
1269, 1271, 1275, 1277, 1282, 1284, 1291, 1293, 1298,
1300, 1309, 1311, 1316, 1318, 1323, 1325, 1330, 1332,
1339, 1341, 1348, 1350, 1356, 1358, 1365, 1367, 1374,
1376, 1385, 1387, 1393, 1395, 1400, 1402, 1409, 1411,
1417, 1419, 1426, 1428, 1436, 1438, 1442, 1444, 1448,
1450, 1459, 1461, 1465, 1467, 1471, 1473, 1478, 1480,
1487, 1489, 1494, 1496, 1505, 1507, 1512, 1514, 1519,
1521, 1526, 1528, 1535
};
static int
_is_white(int c)
{
@ -2492,12 +2537,14 @@ evas_object_textblock_text_markup_set(Evas_Object *obj, const char *text)
{
int i;
for (i = 0; i < (int)(sizeof(_escapes) / sizeof(char *)); i += 2)
for (i = 0; i < (int)(sizeof(escape_offsets) / sizeof(escape_offsets[0])); i += 2)
{
if (!strncmp(_escapes[i], esc_start,
esc_end - esc_start + 1))
const char *in = escape_strings + escape_offsets[i];
const char *out = escape_strings + escape_offsets[i + 1];
if (!strncmp(in, esc_start, esc_end - esc_start + 1))
{
evas_textblock_cursor_text_append(o->cursor, _escapes[i + 1]);
evas_textblock_cursor_text_append(o->cursor, out);
break;
}
}
@ -2595,19 +2642,21 @@ evas_object_textblock_text_markup_get(Evas_Object *obj)
{
int i;
for (i = 1; i < (int)(sizeof(_escapes) / sizeof(char *)); i += 2)
for (i = 1; i < (int)(sizeof(escape_offsets) / sizeof(escape_offsets[0])); i += 2)
{
if (!strncmp(_escapes[i], p,
strlen(_escapes[i])))
const char *in = escape_strings + escape_offsets[i];
const char *out = escape_strings + escape_offsets[i - 1];
if (!strncmp(in, p, strlen(in)))
{
o->markup_text = _strbuf_append_n(o->markup_text,
ps, p - ps,
&slen, &salloc);
o->markup_text = _strbuf_append(o->markup_text,
_escapes[i-1],
out,
&slen, &salloc);
ps = p + strlen(_escapes[i]);
p += strlen(_escapes[i]) - 1;
ps = p + strlen(in);
p += strlen(in) - 1;
}
}
}