summaryrefslogtreecommitdiff
path: root/src/lib/evas/canvas/evas_object_textblock.c
diff options
context:
space:
mode:
authorAli Alzyod <ali198724@gmail.com>2019-04-23 16:52:55 +0000
committerMarcel Hollerbach <mail@marcel-hollerbach.de>2019-04-25 14:04:05 +0200
commit555ac0a452c3986b77deefcf791e9919e00f5e6b (patch)
tree5403836de6442d537af9d94a0f3f107cdc9dd761 /src/lib/evas/canvas/evas_object_textblock.c
parent1c974289e4506a9eb82a7fdcfc147fa870ff34e3 (diff)
evas: change way of searching for Escape strings/values in textblock
Instead of using old way for linear search for Escape Strings or Escape values, we will sort these values and string in compile time then binary search them. In simple words: Instead of having one array with pair of {escapeChar,escapeValue} and linear search it. We will have two arrays with pair of {escapeChar,escapeValue}, one with escapeChar Sorted, and one with escapeValue sorted. and we will use one of the array to binary search escape chars, and use the other to binary search escape values, 1- This will increase the speed for the search a lot for both Escape characters and Escape values. 2- Make code more easy to understand and trace 3- This will also fix bug for ``` int value; const char * value = evas_textblock_string_escape_get("",&value) //because of some unhanded case in previous code, this will return "&qout;" , which is first element in predefined escape character array ``` Reviewed-by: Xavi Artigas <xavierartigas@yahoo.es> Differential Revision: https://phab.enlightenment.org/D8610
Diffstat (limited to 'src/lib/evas/canvas/evas_object_textblock.c')
-rw-r--r--src/lib/evas/canvas/evas_object_textblock.c643
1 files changed, 440 insertions, 203 deletions
diff --git a/src/lib/evas/canvas/evas_object_textblock.c b/src/lib/evas/canvas/evas_object_textblock.c
index da73c03..0913fd3 100644
--- a/src/lib/evas/canvas/evas_object_textblock.c
+++ b/src/lib/evas/canvas/evas_object_textblock.c
@@ -1087,173 +1087,364 @@ _line_free(Evas_Object_Textblock_Line *ln)
1087} 1087}
1088 1088
1089/* table of html escapes (that i can find) this should be ordered with the 1089/* table of html escapes (that i can find) this should be ordered with the
1090 * most common first as it's a linear search to match - no hash for this. 1090 * sorted by there escape strings and values as it's a binary search to match - no hash for this.
1091 * 1091 *
1092 * these are stored as one large string and one additional array that 1092 * these are stored as array of struct of Escape_Value structure (no Runtime sort will happen)
1093 * contains the offsets to the tokens for space efficiency.
1094 */ 1093 */
1094
1095
1096/**
1097 * @internal
1098 * @var escape_values_e_sorted[]
1099 * This array consists of Escape_Value structure sorted by escape string
1100 * And new added value must be placed sorted position, and reflected on escape_values_v_sorted
1101 */
1102typedef struct _Escape_Value Escape_Value;
1103
1104struct _Escape_Value
1105{
1106 char *escape;
1107 char *value;
1108 size_t escape_len;
1109 size_t value_len;
1110};
1111
1112#define ESCAPE_VALUE(e,v) {e,v,strlen(e),strlen(v)}
1113
1114static const Escape_Value escape_values_e_sorted[] = {
1115 ESCAPE_VALUE("&Aacute;", "\xc3\x81"),
1116 ESCAPE_VALUE("&Acirc;", "\xc3\x82"),
1117 ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
1118 ESCAPE_VALUE("&Agrave;", "\xc3\x80"),
1119 ESCAPE_VALUE("&Aring;", "\xc3\x85"),
1120 ESCAPE_VALUE("&Atilde;", "\xc3\x83"),
1121 ESCAPE_VALUE("&Auml;", "\xc3\x84"),
1122 ESCAPE_VALUE("&Ccedil;", "\xc3\x87"),
1123 ESCAPE_VALUE("&Dagger;", "\xe2\x80\xa1"),
1124 ESCAPE_VALUE("&Eacute;", "\xc3\x89"),
1125 ESCAPE_VALUE("&Ecirc;", "\xc3\x8a"),
1126 ESCAPE_VALUE("&Egrave;", "\xc3\x88"),
1127 ESCAPE_VALUE("&Eth;", "\xc3\x90"),
1128 ESCAPE_VALUE("&Euml;", "\xc3\x8b"),
1129 ESCAPE_VALUE("&Iacute;", "\xc3\x8d"),
1130 ESCAPE_VALUE("&Icirc;", "\xc3\x8e"),
1131 ESCAPE_VALUE("&Igrave;", "\xc3\x8c"),
1132 ESCAPE_VALUE("&Iuml;", "\xc3\x8f"),
1133 ESCAPE_VALUE("&Ntilde;", "\xc3\x91"),
1134 ESCAPE_VALUE("&Oacute;", "\xc3\x93"),
1135 ESCAPE_VALUE("&Ocirc;", "\xc3\x94"),
1136 ESCAPE_VALUE("&Ograve;", "\xc3\x92"),
1137 ESCAPE_VALUE("&Oslash;", "\xc3\x98"),
1138 ESCAPE_VALUE("&Otilde;", "\xc3\x95"),
1139 ESCAPE_VALUE("&Ouml;", "\xc3\x96"),
1140 ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
1141 ESCAPE_VALUE("&Uacute;", "\xc3\x9a"),
1142 ESCAPE_VALUE("&Ucirc;", "\xc3\x9b"),
1143 ESCAPE_VALUE("&Ugrave;", "\xc3\x99"),
1144 ESCAPE_VALUE("&Yacute;", "\xc3\x9d"),
1145 ESCAPE_VALUE("&aacute;", "\xc3\xa1"),
1146 ESCAPE_VALUE("&acirc;", "\xc3\xa2"),
1147 ESCAPE_VALUE("&acute;", "\xc2\xb4"),
1148 ESCAPE_VALUE("&aelig;", "\xc3\xa6"),
1149 ESCAPE_VALUE("&agrave;", "\xc3\xa0"),
1150 ESCAPE_VALUE("&alpha;", "\xce\x91"),
1151 ESCAPE_VALUE("&and;", "\xe2\x88\xa7"),
1152 ESCAPE_VALUE("&aring;", "\xc3\xa5"),
1153 ESCAPE_VALUE("&atilde;", "\xc3\xa3"),
1154 ESCAPE_VALUE("&auml;", "\xc3\xa4"),
1155 ESCAPE_VALUE("&beta;", "\xce\x92"),
1156 ESCAPE_VALUE("&brvbar;", "\xc2\xa6"),
1157 ESCAPE_VALUE("&bull;", "\xe2\x80\xa2"),
1158 ESCAPE_VALUE("&ccedil;", "\xc3\xa7"),
1159 ESCAPE_VALUE("&cedil;", "\xc2\xb8"),
1160 ESCAPE_VALUE("&cent;", "\xc2\xa2"),
1161 ESCAPE_VALUE("&chi;", "\xce\xa7"),
1162 ESCAPE_VALUE("&copy;", "\xc2\xa9"),
1163 ESCAPE_VALUE("&curren;", "\xc2\xa4"),
1164 ESCAPE_VALUE("&dagger;", "\xe2\x80\xa0"),
1165 ESCAPE_VALUE("&darr;", "\xe2\x86\x93"),
1166 ESCAPE_VALUE("&deg;", "\xc2\xb0"),
1167 ESCAPE_VALUE("&delta;", "\xce\x94"),
1168 ESCAPE_VALUE("&divide;", "\xc3\xb7"),
1169 ESCAPE_VALUE("&eacute;", "\xc3\xa9"),
1170 ESCAPE_VALUE("&ecirc;", "\xc3\xaa"),
1171 ESCAPE_VALUE("&egrave;", "\xc3\xa8"),
1172 ESCAPE_VALUE("&epsilon;", "\xce\x95"),
1173 ESCAPE_VALUE("&equiv;", "\xe2\x89\xa1"),
1174 ESCAPE_VALUE("&eta;", "\xce\x97"),
1175 ESCAPE_VALUE("&eth;", "\xc3\xb0"),
1176 ESCAPE_VALUE("&euml;", "\xc3\xab"),
1177 ESCAPE_VALUE("&euro;", "\xe2\x82\xac"),
1178 ESCAPE_VALUE("&exist;", "\xe2\x88\x83"),
1179 ESCAPE_VALUE("&forall;", "\xe2\x88\x80"),
1180 ESCAPE_VALUE("&frac12;", "\xc2\xbd"),
1181 ESCAPE_VALUE("&frac14;", "\xc2\xbc"),
1182 ESCAPE_VALUE("&frac34;", "\xc2\xbe"),
1183 ESCAPE_VALUE("&gamma;", "\xce\x93"),
1184 ESCAPE_VALUE("&harr;", "\xe2\x86\x94"),
1185 ESCAPE_VALUE("&hellip;", "\xe2\x80\xa6"),
1186 ESCAPE_VALUE("&iacute;", "\xc3\xad"),
1187 ESCAPE_VALUE("&icirc;", "\xc3\xae"),
1188 ESCAPE_VALUE("&iexcl;", "\xc2\xa1"),
1189 ESCAPE_VALUE("&igrave;", "\xc3\xac"),
1190 ESCAPE_VALUE("&int;", "\xe2\x88\xab"),
1191 ESCAPE_VALUE("&iota;", "\xce\x99"),
1192 ESCAPE_VALUE("&iquest;", "\xc2\xbf"),
1193 ESCAPE_VALUE("&iuml;", "\xc3\xaf"),
1194 ESCAPE_VALUE("&kappa;", "\xce\x9a"),
1195 ESCAPE_VALUE("&lambda;", "\xce\x9b"),
1196 ESCAPE_VALUE("&laquo;", "\xc2\xab"),
1197 ESCAPE_VALUE("&larr;", "\xe2\x86\x90"),
1198 ESCAPE_VALUE("&larr;", "\xe2\x87\x90"),
1199 ESCAPE_VALUE("&lrm;", "\xe2\x80\x8e"),
1200 ESCAPE_VALUE("&macr;", "\xc2\xaf"),
1201 ESCAPE_VALUE("&micro;", "\xc2\xb5"),
1202 ESCAPE_VALUE("&middot;", "\xc2\xb7"),
1203 ESCAPE_VALUE("&mu;", "\xce\x9c"),
1204 ESCAPE_VALUE("&nabla;", "\xe2\x88\x87"),
1205 ESCAPE_VALUE("&nbsp;", "\xc2\xa0"),
1206 ESCAPE_VALUE("&ne;", "\xe2\x89\xa0"),
1207 ESCAPE_VALUE("&not;", "\xc2\xac"),
1208 ESCAPE_VALUE("&ntilde;", "\xc3\xb1"),
1209 ESCAPE_VALUE("&nu;", "\xce\x9d"),
1210 ESCAPE_VALUE("&oacute;", "\xc3\xb3"),
1211 ESCAPE_VALUE("&ocirc;", "\xc3\xb4"),
1212 ESCAPE_VALUE("&ograve;", "\xc3\xb2"),
1213 ESCAPE_VALUE("&omega;", "\xce\xa9"),
1214 ESCAPE_VALUE("&omicron;", "\xce\x9f"),
1215 ESCAPE_VALUE("&oplus;", "\xe2\x8a\x95"),
1216 ESCAPE_VALUE("&or;", "\xe2\x88\xa8"),
1217 ESCAPE_VALUE("&ordf;", "\xc2\xaa"),
1218 ESCAPE_VALUE("&ordm;", "\xc2\xba"),
1219 ESCAPE_VALUE("&oslash;", "\xc3\xb8"),
1220 ESCAPE_VALUE("&otilde;", "\xc3\xb5"),
1221 ESCAPE_VALUE("&ouml;", "\xc3\xb6"),
1222 ESCAPE_VALUE("&para;", "\xc2\xb6"),
1223 ESCAPE_VALUE("&perp;", "\xe2\x8a\xa5"),
1224 ESCAPE_VALUE("&phi;", "\xce\xa6"),
1225 ESCAPE_VALUE("&pi;", "\xce\xa0"),
1226 ESCAPE_VALUE("&plusmn;", "\xc2\xb1"),
1227 ESCAPE_VALUE("&pound;", "\xc2\xa3"),
1228 ESCAPE_VALUE("&prod;", "\xe2\x88\x8f"),
1229 ESCAPE_VALUE("&psi;", "\xce\xa8"),
1230 ESCAPE_VALUE("&raquo;", "\xc2\xbb"),
1231 ESCAPE_VALUE("&rarr;", "\xe2\x86\x92"),
1232 ESCAPE_VALUE("&rarr;", "\xe2\x87\x92"),
1233 ESCAPE_VALUE("&reg;", "\xc2\xae"),
1234 ESCAPE_VALUE("&rho;", "\xce\xa1"),
1235 ESCAPE_VALUE("&rlm;", "\xe2\x80\x8f"),
1236 ESCAPE_VALUE("&sect;", "\xc2\xa7"),
1237 ESCAPE_VALUE("&shy;", "\xc2\xad"),
1238 ESCAPE_VALUE("&sigma;", "\xce\xa3"),
1239 ESCAPE_VALUE("&sum;", "\xe2\x88\x91"),
1240 ESCAPE_VALUE("&sup1;", "\xc2\xb9"),
1241 ESCAPE_VALUE("&sup2;", "\xc2\xb2"),
1242 ESCAPE_VALUE("&sup3;", "\xc2\xb3"),
1243 ESCAPE_VALUE("&szlig;", "\xc3\x9f"),
1244 ESCAPE_VALUE("&tau;", "\xce\xa4"),
1245 ESCAPE_VALUE("&theta;", "\xce\x98"),
1246 ESCAPE_VALUE("&thorn;", "\xc3\xbe"),
1247 ESCAPE_VALUE("&times;", "\xc3\x97"),
1248 ESCAPE_VALUE("&uacute;", "\xc3\xba"),
1249 ESCAPE_VALUE("&uarr;", "\xe2\x86\x91"),
1250 ESCAPE_VALUE("&ucirc;", "\xc3\xbb"),
1251 ESCAPE_VALUE("&ugrave;", "\xc3\xb9"),
1252 ESCAPE_VALUE("&uml;", "\xc2\xa8"),
1253 ESCAPE_VALUE("&upsilon;", "\xce\xa5"),
1254 ESCAPE_VALUE("&uuml;", "\xc3\xbc"),
1255 ESCAPE_VALUE("&xi;", "\xce\x9e"),
1256 ESCAPE_VALUE("&yacute;", "\xc3\xbd"),
1257 ESCAPE_VALUE("&yen;", "\xc2\xa5"),
1258 ESCAPE_VALUE("&yuml;", "\xc3\xbf"),
1259 ESCAPE_VALUE("&zeta;", "\xce\x96"),
1260 ESCAPE_VALUE("&zwj;", "\xe2\x80\x8d"),
1261 ESCAPE_VALUE("&zwnj;", "\xe2\x80\x8c"),
1262};
1263
1264
1095/** 1265/**
1096 * @internal 1266 * @internal
1097 * @var escape_strings[] 1267 * @var escape_values_e_common_sorted[]
1098 * This string consists of NULL terminated pairs of strings, the first of 1268 * same as escape_values_e_sorted with small subset of common escapes
1099 * every pair is an escape and the second is the value of the escape.
1100 */ 1269 */
1101static const char escape_strings[] = 1270static const Escape_Value escape_values_e_common_sorted[] = {
1102/* most common escaped stuff */ 1271 ESCAPE_VALUE("&amp;", "\x26"),
1103"&quot;\0" "\x22\0" 1272 ESCAPE_VALUE("&apos;", "\x27"),
1104"&amp;\0" "\x26\0" 1273 ESCAPE_VALUE("&gt;", "\x3e"),
1105"&apos;\0" "\x27\0" 1274 ESCAPE_VALUE("&quot;", "\x22"),
1106"&lt;\0" "\x3c\0" 1275 ESCAPE_VALUE("&lt;", "\x3c"),
1107"&gt;\0" "\x3e\0" 1276};
1108/* all the rest */ 1277
1109"&nbsp;\0" "\xc2\xa0\0" 1278/**
1110"&iexcl;\0" "\xc2\xa1\0" 1279 * @internal
1111"&cent;\0" "\xc2\xa2\0" 1280 * @var escape_values_v_sorted[]
1112"&pound;\0" "\xc2\xa3\0" 1281 * This array consists of Escape_Value structure sorted by escape value
1113"&curren;\0" "\xc2\xa4\0" 1282 * And new added value must be placed sorted position, and reflected on escape_values_e_sorted
1114"&yen;\0" "\xc2\xa5\0" 1283 */
1115"&brvbar;\0" "\xc2\xa6\0" 1284static const Escape_Value escape_values_v_sorted[] = {
1116"&sect;\0" "\xc2\xa7\0" 1285 ESCAPE_VALUE("&nbsp;", "\xc2\xa0"),
1117"&uml;\0" "\xc2\xa8\0" 1286 ESCAPE_VALUE("&iexcl;", "\xc2\xa1"),
1118"&copy;\0" "\xc2\xa9\0" 1287 ESCAPE_VALUE("&cent;", "\xc2\xa2"),
1119"&ordf;\0" "\xc2\xaa\0" 1288 ESCAPE_VALUE("&pound;", "\xc2\xa3"),
1120"&laquo;\0" "\xc2\xab\0" 1289 ESCAPE_VALUE("&curren;", "\xc2\xa4"),
1121"&not;\0" "\xc2\xac\0" 1290 ESCAPE_VALUE("&yen;", "\xc2\xa5"),
1122"&shy;\0" "\xc2\xad\0" 1291 ESCAPE_VALUE("&brvbar;", "\xc2\xa6"),
1123"&reg;\0" "\xc2\xae\0" 1292 ESCAPE_VALUE("&sect;", "\xc2\xa7"),
1124"&macr;\0" "\xc2\xaf\0" 1293 ESCAPE_VALUE("&uml;", "\xc2\xa8"),
1125"&deg;\0" "\xc2\xb0\0" 1294 ESCAPE_VALUE("&copy;", "\xc2\xa9"),
1126"&plusmn;\0" "\xc2\xb1\0" 1295 ESCAPE_VALUE("&ordf;", "\xc2\xaa"),
1127"&sup2;\0" "\xc2\xb2\0" 1296 ESCAPE_VALUE("&laquo;", "\xc2\xab"),
1128"&sup3;\0" "\xc2\xb3\0" 1297 ESCAPE_VALUE("&not;", "\xc2\xac"),
1129"&acute;\0" "\xc2\xb4\0" 1298 ESCAPE_VALUE("&shy;", "\xc2\xad"),
1130"&micro;\0" "\xc2\xb5\0" 1299 ESCAPE_VALUE("&reg;", "\xc2\xae"),
1131"&para;\0" "\xc2\xb6\0" 1300 ESCAPE_VALUE("&macr;", "\xc2\xaf"),
1132"&middot;\0" "\xc2\xb7\0" 1301 ESCAPE_VALUE("&deg;", "\xc2\xb0"),
1133"&cedil;\0" "\xc2\xb8\0" 1302 ESCAPE_VALUE("&plusmn;", "\xc2\xb1"),
1134"&sup1;\0" "\xc2\xb9\0" 1303 ESCAPE_VALUE("&sup2;", "\xc2\xb2"),
1135"&ordm;\0" "\xc2\xba\0" 1304 ESCAPE_VALUE("&sup3;", "\xc2\xb3"),
1136"&raquo;\0" "\xc2\xbb\0" 1305 ESCAPE_VALUE("&acute;", "\xc2\xb4"),
1137"&frac14;\0" "\xc2\xbc\0" 1306 ESCAPE_VALUE("&micro;", "\xc2\xb5"),
1138"&frac12;\0" "\xc2\xbd\0" 1307 ESCAPE_VALUE("&para;", "\xc2\xb6"),
1139"&frac34;\0" "\xc2\xbe\0" 1308 ESCAPE_VALUE("&middot;", "\xc2\xb7"),
1140"&iquest;\0" "\xc2\xbf\0" 1309 ESCAPE_VALUE("&cedil;", "\xc2\xb8"),
1141"&Agrave;\0" "\xc3\x80\0" 1310 ESCAPE_VALUE("&sup1;", "\xc2\xb9"),
1142"&Aacute;\0" "\xc3\x81\0" 1311 ESCAPE_VALUE("&ordm;", "\xc2\xba"),
1143"&Acirc;\0" "\xc3\x82\0" 1312 ESCAPE_VALUE("&raquo;", "\xc2\xbb"),
1144"&Atilde;\0" "\xc3\x83\0" 1313 ESCAPE_VALUE("&frac14;", "\xc2\xbc"),
1145"&Auml;\0" "\xc3\x84\0" 1314 ESCAPE_VALUE("&frac12;", "\xc2\xbd"),
1146"&Aring;\0" "\xc3\x85\0" 1315 ESCAPE_VALUE("&frac34;", "\xc2\xbe"),
1147"&Aelig;\0" "\xc3\x86\0" 1316 ESCAPE_VALUE("&iquest;", "\xc2\xbf"),
1148"&Ccedil;\0" "\xc3\x87\0" 1317 ESCAPE_VALUE("&Agrave;", "\xc3\x80"),
1149"&Egrave;\0" "\xc3\x88\0" 1318 ESCAPE_VALUE("&Aacute;", "\xc3\x81"),
1150"&Eacute;\0" "\xc3\x89\0" 1319 ESCAPE_VALUE("&Acirc;", "\xc3\x82"),
1151"&Ecirc;\0" "\xc3\x8a\0" 1320 ESCAPE_VALUE("&Atilde;", "\xc3\x83"),
1152"&Euml;\0" "\xc3\x8b\0" 1321 ESCAPE_VALUE("&Auml;", "\xc3\x84"),
1153"&Igrave;\0" "\xc3\x8c\0" 1322 ESCAPE_VALUE("&Aring;", "\xc3\x85"),
1154"&Iacute;\0" "\xc3\x8d\0" 1323 ESCAPE_VALUE("&Aelig;", "\xc3\x86"),
1155"&Icirc;\0" "\xc3\x8e\0" 1324 ESCAPE_VALUE("&Ccedil;", "\xc3\x87"),
1156"&Iuml;\0" "\xc3\x8f\0" 1325 ESCAPE_VALUE("&Egrave;", "\xc3\x88"),
1157"&Eth;\0" "\xc3\x90\0" 1326 ESCAPE_VALUE("&Eacute;", "\xc3\x89"),
1158"&Ntilde;\0" "\xc3\x91\0" 1327 ESCAPE_VALUE("&Ecirc;", "\xc3\x8a"),
1159"&Ograve;\0" "\xc3\x92\0" 1328 ESCAPE_VALUE("&Euml;", "\xc3\x8b"),
1160"&Oacute;\0" "\xc3\x93\0" 1329 ESCAPE_VALUE("&Igrave;", "\xc3\x8c"),
1161"&Ocirc;\0" "\xc3\x94\0" 1330 ESCAPE_VALUE("&Iacute;", "\xc3\x8d"),
1162"&Otilde;\0" "\xc3\x95\0" 1331 ESCAPE_VALUE("&Icirc;", "\xc3\x8e"),
1163"&Ouml;\0" "\xc3\x96\0" 1332 ESCAPE_VALUE("&Iuml;", "\xc3\x8f"),
1164"&times;\0" "\xc3\x97\0" 1333 ESCAPE_VALUE("&Eth;", "\xc3\x90"),
1165"&Oslash;\0" "\xc3\x98\0" 1334 ESCAPE_VALUE("&Ntilde;", "\xc3\x91"),
1166"&Ugrave;\0" "\xc3\x99\0" 1335 ESCAPE_VALUE("&Ograve;", "\xc3\x92"),
1167"&Uacute;\0" "\xc3\x9a\0" 1336 ESCAPE_VALUE("&Oacute;", "\xc3\x93"),
1168"&Ucirc;\0" "\xc3\x9b\0" 1337 ESCAPE_VALUE("&Ocirc;", "\xc3\x94"),
1169"&Yacute;\0" "\xc3\x9d\0" 1338 ESCAPE_VALUE("&Otilde;", "\xc3\x95"),
1170"&Thorn;\0" "\xc3\x9e\0" 1339 ESCAPE_VALUE("&Ouml;", "\xc3\x96"),
1171"&szlig;\0" "\xc3\x9f\0" 1340 ESCAPE_VALUE("&times;", "\xc3\x97"),
1172"&agrave;\0" "\xc3\xa0\0" 1341 ESCAPE_VALUE("&Oslash;", "\xc3\x98"),
1173"&aacute;\0" "\xc3\xa1\0" 1342 ESCAPE_VALUE("&Ugrave;", "\xc3\x99"),
1174"&acirc;\0" "\xc3\xa2\0" 1343 ESCAPE_VALUE("&Uacute;", "\xc3\x9a"),
1175"&atilde;\0" "\xc3\xa3\0" 1344 ESCAPE_VALUE("&Ucirc;", "\xc3\x9b"),
1176"&auml;\0" "\xc3\xa4\0" 1345 ESCAPE_VALUE("&Yacute;", "\xc3\x9d"),
1177"&aring;\0" "\xc3\xa5\0" 1346 ESCAPE_VALUE("&Thorn;", "\xc3\x9e"),
1178"&aelig;\0" "\xc3\xa6\0" 1347 ESCAPE_VALUE("&szlig;", "\xc3\x9f"),
1179"&ccedil;\0" "\xc3\xa7\0" 1348 ESCAPE_VALUE("&agrave;", "\xc3\xa0"),
1180"&egrave;\0" "\xc3\xa8\0" 1349 ESCAPE_VALUE("&aacute;", "\xc3\xa1"),
1181"&eacute;\0" "\xc3\xa9\0" 1350 ESCAPE_VALUE("&acirc;", "\xc3\xa2"),
1182"&ecirc;\0" "\xc3\xaa\0" 1351 ESCAPE_VALUE("&atilde;", "\xc3\xa3"),
1183"&euml;\0" "\xc3\xab\0" 1352 ESCAPE_VALUE("&auml;", "\xc3\xa4"),
1184"&igrave;\0" "\xc3\xac\0" 1353 ESCAPE_VALUE("&aring;", "\xc3\xa5"),
1185"&iacute;\0" "\xc3\xad\0" 1354 ESCAPE_VALUE("&aelig;", "\xc3\xa6"),
1186"&icirc;\0" "\xc3\xae\0" 1355 ESCAPE_VALUE("&ccedil;", "\xc3\xa7"),
1187"&iuml;\0" "\xc3\xaf\0" 1356 ESCAPE_VALUE("&egrave;", "\xc3\xa8"),
1188"&eth;\0" "\xc3\xb0\0" 1357 ESCAPE_VALUE("&eacute;", "\xc3\xa9"),
1189"&ntilde;\0" "\xc3\xb1\0" 1358 ESCAPE_VALUE("&ecirc;", "\xc3\xaa"),
1190"&ograve;\0" "\xc3\xb2\0" 1359 ESCAPE_VALUE("&euml;", "\xc3\xab"),
1191"&oacute;\0" "\xc3\xb3\0" 1360 ESCAPE_VALUE("&igrave;", "\xc3\xac"),
1192"&ocirc;\0" "\xc3\xb4\0" 1361 ESCAPE_VALUE("&iacute;", "\xc3\xad"),
1193"&otilde;\0" "\xc3\xb5\0" 1362 ESCAPE_VALUE("&icirc;", "\xc3\xae"),
1194"&ouml;\0" "\xc3\xb6\0" 1363 ESCAPE_VALUE("&iuml;", "\xc3\xaf"),
1195"&divide;\0" "\xc3\xb7\0" 1364 ESCAPE_VALUE("&eth;", "\xc3\xb0"),
1196"&oslash;\0" "\xc3\xb8\0" 1365 ESCAPE_VALUE("&ntilde;", "\xc3\xb1"),
1197"&ugrave;\0" "\xc3\xb9\0" 1366 ESCAPE_VALUE("&ograve;", "\xc3\xb2"),
1198"&uacute;\0" "\xc3\xba\0" 1367 ESCAPE_VALUE("&oacute;", "\xc3\xb3"),
1199"&ucirc;\0" "\xc3\xbb\0" 1368 ESCAPE_VALUE("&ocirc;", "\xc3\xb4"),
1200"&uuml;\0" "\xc3\xbc\0" 1369 ESCAPE_VALUE("&otilde;", "\xc3\xb5"),
1201"&yacute;\0" "\xc3\xbd\0" 1370 ESCAPE_VALUE("&ouml;", "\xc3\xb6"),
1202"&thorn;\0" "\xc3\xbe\0" 1371 ESCAPE_VALUE("&divide;", "\xc3\xb7"),
1203"&yuml;\0" "\xc3\xbf\0" 1372 ESCAPE_VALUE("&oslash;", "\xc3\xb8"),
1204"&alpha;\0" "\xce\x91\0" 1373 ESCAPE_VALUE("&ugrave;", "\xc3\xb9"),
1205"&beta;\0" "\xce\x92\0" 1374 ESCAPE_VALUE("&uacute;", "\xc3\xba"),
1206"&gamma;\0" "\xce\x93\0" 1375 ESCAPE_VALUE("&ucirc;", "\xc3\xbb"),
1207"&delta;\0" "\xce\x94\0" 1376 ESCAPE_VALUE("&uuml;", "\xc3\xbc"),
1208"&epsilon;\0" "\xce\x95\0" 1377 ESCAPE_VALUE("&yacute;", "\xc3\xbd"),
1209"&zeta;\0" "\xce\x96\0" 1378 ESCAPE_VALUE("&thorn;", "\xc3\xbe"),
1210"&eta;\0" "\xce\x97\0" 1379 ESCAPE_VALUE("&yuml;", "\xc3\xbf"),
1211"&theta;\0" "\xce\x98\0" 1380 ESCAPE_VALUE("&alpha;", "\xce\x91"),
1212"&iota;\0" "\xce\x99\0" 1381 ESCAPE_VALUE("&beta;", "\xce\x92"),
1213"&kappa;\0" "\xce\x9a\0" 1382 ESCAPE_VALUE("&gamma;", "\xce\x93"),
1214"&lambda;\0" "\xce\x9b\0" 1383 ESCAPE_VALUE("&delta;", "\xce\x94"),
1215"&mu;\0" "\xce\x9c\0" 1384 ESCAPE_VALUE("&epsilon;", "\xce\x95"),
1216"&nu;\0" "\xce\x9d\0" 1385 ESCAPE_VALUE("&zeta;", "\xce\x96"),
1217"&xi;\0" "\xce\x9e\0" 1386 ESCAPE_VALUE("&eta;", "\xce\x97"),
1218"&omicron;\0" "\xce\x9f\0" 1387 ESCAPE_VALUE("&theta;", "\xce\x98"),
1219"&pi;\0" "\xce\xa0\0" 1388 ESCAPE_VALUE("&iota;", "\xce\x99"),
1220"&rho;\0" "\xce\xa1\0" 1389 ESCAPE_VALUE("&kappa;", "\xce\x9a"),
1221"&sigma;\0" "\xce\xa3\0" 1390 ESCAPE_VALUE("&lambda;", "\xce\x9b"),
1222"&tau;\0" "\xce\xa4\0" 1391 ESCAPE_VALUE("&mu;", "\xce\x9c"),
1223"&upsilon;\0" "\xce\xa5\0" 1392 ESCAPE_VALUE("&nu;", "\xce\x9d"),
1224"&phi;\0" "\xce\xa6\0" 1393 ESCAPE_VALUE("&xi;", "\xce\x9e"),
1225"&chi;\0" "\xce\xa7\0" 1394 ESCAPE_VALUE("&omicron;", "\xce\x9f"),
1226"&psi;\0" "\xce\xa8\0" 1395 ESCAPE_VALUE("&pi;", "\xce\xa0"),
1227"&omega;\0" "\xce\xa9\0" 1396 ESCAPE_VALUE("&rho;", "\xce\xa1"),
1228"&hellip;\0" "\xe2\x80\xa6\0" 1397 ESCAPE_VALUE("&sigma;", "\xce\xa3"),
1229"&euro;\0" "\xe2\x82\xac\0" 1398 ESCAPE_VALUE("&tau;", "\xce\xa4"),
1230"&larr;\0" "\xe2\x86\x90\0" 1399 ESCAPE_VALUE("&upsilon;", "\xce\xa5"),
1231"&uarr;\0" "\xe2\x86\x91\0" 1400 ESCAPE_VALUE("&phi;", "\xce\xa6"),
1232"&rarr;\0" "\xe2\x86\x92\0" 1401 ESCAPE_VALUE("&chi;", "\xce\xa7"),
1233"&darr;\0" "\xe2\x86\x93\0" 1402 ESCAPE_VALUE("&psi;", "\xce\xa8"),
1234"&harr;\0" "\xe2\x86\x94\0" 1403 ESCAPE_VALUE("&omega;", "\xce\xa9"),
1235"&larr;\0" "\xe2\x87\x90\0" 1404 ESCAPE_VALUE("&zwnj;", "\xe2\x80\x8c"),
1236"&rarr;\0" "\xe2\x87\x92\0" 1405 ESCAPE_VALUE("&zwj;", "\xe2\x80\x8d"),
1237"&forall;\0" "\xe2\x88\x80\0" 1406 ESCAPE_VALUE("&lrm;", "\xe2\x80\x8e"),
1238"&exist;\0" "\xe2\x88\x83\0" 1407 ESCAPE_VALUE("&rlm;", "\xe2\x80\x8f"),
1239"&nabla;\0" "\xe2\x88\x87\0" 1408 ESCAPE_VALUE("&dagger;", "\xe2\x80\xa0"),
1240"&prod;\0" "\xe2\x88\x8f\0" 1409 ESCAPE_VALUE("&Dagger;", "\xe2\x80\xa1"),
1241"&sum;\0" "\xe2\x88\x91\0" 1410 ESCAPE_VALUE("&bull;", "\xe2\x80\xa2"),
1242"&and;\0" "\xe2\x88\xa7\0" 1411 ESCAPE_VALUE("&hellip;", "\xe2\x80\xa6"),
1243"&or;\0" "\xe2\x88\xa8\0" 1412 ESCAPE_VALUE("&euro;", "\xe2\x82\xac"),
1244"&int;\0" "\xe2\x88\xab\0" 1413 ESCAPE_VALUE("&larr;", "\xe2\x86\x90"),
1245"&ne;\0" "\xe2\x89\xa0\0" 1414 ESCAPE_VALUE("&uarr;", "\xe2\x86\x91"),
1246"&equiv;\0" "\xe2\x89\xa1\0" 1415 ESCAPE_VALUE("&rarr;", "\xe2\x86\x92"),
1247"&oplus;\0" "\xe2\x8a\x95\0" 1416 ESCAPE_VALUE("&darr;", "\xe2\x86\x93"),
1248"&perp;\0" "\xe2\x8a\xa5\0" 1417 ESCAPE_VALUE("&harr;", "\xe2\x86\x94"),
1249"&dagger;\0" "\xe2\x80\xa0\0" 1418 ESCAPE_VALUE("&larr;", "\xe2\x87\x90"),
1250"&Dagger;\0" "\xe2\x80\xa1\0" 1419 ESCAPE_VALUE("&rarr;", "\xe2\x87\x92"),
1251"&bull;\0" "\xe2\x80\xa2\0" 1420 ESCAPE_VALUE("&forall;", "\xe2\x88\x80"),
1252"&zwnj;\0" "\xe2\x80\x8c\0" 1421 ESCAPE_VALUE("&exist;", "\xe2\x88\x83"),
1253"&zwj;\0" "\xe2\x80\x8d\0" 1422 ESCAPE_VALUE("&nabla;", "\xe2\x88\x87"),
1254"&lrm;\0" "\xe2\x80\x8e\0" 1423 ESCAPE_VALUE("&prod;", "\xe2\x88\x8f"),
1255"&rlm;\0" "\xe2\x80\x8f\0" 1424 ESCAPE_VALUE("&sum;", "\xe2\x88\x91"),
1256; 1425 ESCAPE_VALUE("&and;", "\xe2\x88\xa7"),
1426 ESCAPE_VALUE("&or;", "\xe2\x88\xa8"),
1427 ESCAPE_VALUE("&int;", "\xe2\x88\xab"),
1428 ESCAPE_VALUE("&ne;", "\xe2\x89\xa0"),
1429 ESCAPE_VALUE("&equiv;", "\xe2\x89\xa1"),
1430 ESCAPE_VALUE("&oplus;", "\xe2\x8a\x95"),
1431 ESCAPE_VALUE("&perp;", "\xe2\x8a\xa5"),
1432};
1433
1434/**
1435 * @internal
1436 * @var escape_values_v_common_sorted[]
1437 * same as escape_values_v_sorted with small subset of common escapes
1438 */
1439static const Escape_Value escape_values_v_common_sorted[] = {
1440 ESCAPE_VALUE("&quot;", "\x22"),
1441 ESCAPE_VALUE("&amp;", "\x26"),
1442 ESCAPE_VALUE("&apos;", "\x27"),
1443 ESCAPE_VALUE("&lt;", "\x3c"),
1444 ESCAPE_VALUE("&gt;", "\x3e"),
1445};
1446
1447
1257 1448
1258/** 1449/**
1259 * @internal 1450 * @internal
@@ -7521,48 +7712,93 @@ _escaped_is_eq_and_advance(const char *s, const char *s_end,
7521 return ((s == s_end) && reached_end); 7712 return ((s == s_end) && reached_end);
7522} 7713}
7523 7714
7715
7716/**
7717 * @internal
7718 *
7719 * @param s the escape string to search for its index
7720 * @param s_len length of s string
7721 * @param escape_values array of Escape_Value to look inside, Sorted by Escape
7722 * @param escape_values_len is the len of Escape_Value array
7723 */
7724int _escaped_string_search(const char * s, size_t s_len, const Escape_Value escape_values[], const size_t escape_values_len)
7725{
7726 int l = 0;
7727 int r = escape_values_len - 1;
7728 while (l <= r)
7729 {
7730 int m = (l + r) / 2;
7731 int res = strncmp(s, escape_values[m].escape, MAX(escape_values[m].escape_len, s_len));
7732 if (res == 0)
7733 {
7734 //Handle special case when s_len is less than escape_len
7735 //then we will continue searching
7736 //example ("&gt;",1,....)
7737 if (escape_values[m].escape_len > s_len)
7738 res = -1;
7739 else if (escape_values[m].escape_len < s_len)
7740 res = 1;
7741 else return m;
7742 }
7743 if (res > 0)
7744 l = m + 1;
7745 else
7746 r = m - 1;
7747 }
7748 return -1;
7749}
7750
7751/**
7752 * @internal
7753 *
7754 * @param s the value string to search for its index
7755 * @param escape_values array of Escape_Value to look inside, Sorted by Value
7756 * @param escape_values_len is the len of Escape_Value array
7757 */
7758int _escaped_value_search(const char * s, const Escape_Value escape_values[], const size_t escape_values_len)
7759{
7760 int l = 0;
7761 int r = escape_values_len - 1;
7762 while (l <= r)
7763 {
7764 int m = (l + r) / 2;
7765 int res = strncmp(s, escape_values[m].value, escape_values[m].value_len);
7766 if (res == 0)
7767 return m;
7768 if (res > 0)
7769 l = m + 1;
7770 else
7771 r = m - 1;
7772 }
7773 return -1;
7774}
7775
7776
7524/** 7777/**
7525 * @internal 7778 * @internal
7526 * 7779 *
7527 * @param s the string to match 7780 * @param s the string to match
7528 */ 7781 */
7782
7529static inline const char * 7783static inline const char *
7530_escaped_char_match(const char *s, int *adv) 7784_escaped_char_match(const char *s, int *adv)
7531{ 7785{
7532 const char *map_itr, *map_end, *mc, *sc; 7786 static const size_t escape_common_size = sizeof(escape_values_v_common_sorted) / sizeof(Escape_Value);
7533 7787 int n_ret = _escaped_value_search(s, escape_values_v_common_sorted, escape_common_size);
7534 map_itr = escape_strings; 7788 if (n_ret != -1)
7535 map_end = map_itr + sizeof(escape_strings);
7536
7537 while (map_itr < map_end)
7538 { 7789 {
7539 const char *escape; 7790 *adv = (int) escape_values_v_common_sorted[n_ret].value_len;
7540 int match; 7791 return escape_values_v_common_sorted[n_ret].escape;
7541 7792 }
7542 escape = map_itr; 7793 else
7543 _escaped_advance_after_end_of_string(&map_itr); 7794 {
7544 if (map_itr >= map_end) break; 7795 static const size_t escape_size = sizeof(escape_values_v_sorted) / sizeof(Escape_Value);
7545 7796 n_ret = _escaped_value_search(s, escape_values_v_sorted, escape_size);
7546 mc = map_itr; 7797 if (n_ret != -1)
7547 sc = s;
7548 match = 1;
7549 while ((*mc) && (*sc))
7550 {
7551 if ((unsigned char)*sc < (unsigned char)*mc) return NULL;
7552 if (*sc != *mc)
7553 {
7554 match = 0;
7555 break;
7556 }
7557 mc++;
7558 sc++;
7559 }
7560 if (match)
7561 { 7798 {
7562 *adv = mc - map_itr; 7799 *adv = (int)escape_values_v_sorted[n_ret].value_len;
7563 return escape; 7800 return escape_values_v_sorted[n_ret].escape;
7564 } 7801 }
7565 _escaped_advance_after_end_of_string(&map_itr);
7566 } 7802 }
7567 return NULL; 7803 return NULL;
7568} 7804}
@@ -7617,17 +7853,18 @@ _escaped_char_get(const char *s, const char *s_end)
7617 } 7853 }
7618 else 7854 else
7619 { 7855 {
7620 const char *map_itr, *map_end; 7856 static const size_t escape_common_size = sizeof(escape_values_e_common_sorted) / sizeof(Escape_Value);
7621 7857 int n_ret = _escaped_string_search(s, s_end-s, escape_values_e_common_sorted, escape_common_size);
7622 map_itr = escape_strings; 7858 if (n_ret != -1)
7623 map_end = map_itr + sizeof(escape_strings); 7859 {
7624 7860 return escape_values_e_common_sorted[n_ret].value;
7625 while (map_itr < map_end) 7861 }
7862 else
7626 { 7863 {
7627 if (_escaped_is_eq_and_advance(s, s_end, &map_itr, map_end)) 7864 static const size_t escape_size = sizeof(escape_values_e_sorted) / sizeof(Escape_Value);
7628 return map_itr; 7865 n_ret = _escaped_string_search(s, s_end-s, escape_values_e_sorted, escape_size);
7629 if (map_itr < map_end) 7866 if (n_ret != -1)
7630 _escaped_advance_after_end_of_string(&map_itr); 7867 return escape_values_e_sorted[n_ret].value;
7631 } 7868 }
7632 } 7869 }
7633 7870