diff options
author | Vincent Torri <vincent.torri@gmail.com> | 2013-11-01 16:00:39 +0100 |
---|---|---|
committer | Cedric Bail <cedric.bail@samsung.com> | 2013-11-04 16:40:29 +0900 |
commit | bc07d80e0cc1d2f7e72db2c83f2c37e91e2aef44 (patch) | |
tree | 6da1c43b55fa739cf051f7b7fe2cd771734e9e55 /src | |
parent | 058e03aa7401a197d400ee8330bae5d54e8c42db (diff) |
evil: add regex code (needed for elm).
Signed-off-by: Cedric Bail <cedric.bail@samsung.com>
Diffstat (limited to '')
-rw-r--r-- | src/Makefile_Evil.am | 31 | ||||
-rw-r--r-- | src/lib/evil/regex/cclass.h | 31 | ||||
-rw-r--r-- | src/lib/evil/regex/cname.h | 102 | ||||
-rw-r--r-- | src/lib/evil/regex/engine.c | 1019 | ||||
-rw-r--r-- | src/lib/evil/regex/engine.ih | 35 | ||||
-rw-r--r-- | src/lib/evil/regex/regcomp.c | 1603 | ||||
-rw-r--r-- | src/lib/evil/regex/regcomp.ih | 51 | ||||
-rw-r--r-- | src/lib/evil/regex/regerror.c | 126 | ||||
-rw-r--r-- | src/lib/evil/regex/regerror.ih | 12 | ||||
-rw-r--r-- | src/lib/evil/regex/regex.h | 77 | ||||
-rw-r--r-- | src/lib/evil/regex/regex2.h | 134 | ||||
-rw-r--r-- | src/lib/evil/regex/regexec.c | 138 | ||||
-rw-r--r-- | src/lib/evil/regex/regfree.c | 37 | ||||
-rw-r--r-- | src/lib/evil/regex/utils.h | 22 |
14 files changed, 3415 insertions, 3 deletions
diff --git a/src/Makefile_Evil.am b/src/Makefile_Evil.am index b6b4e0af95..47e15b4b31 100644 --- a/src/Makefile_Evil.am +++ b/src/Makefile_Evil.am | |||
@@ -69,9 +69,32 @@ else | |||
69 | lib_evil_libevil_la_LINK = $(CXXLINK) $(lib_evil_libevil_la_LDFLAGS) | 69 | lib_evil_libevil_la_LINK = $(CXXLINK) $(lib_evil_libevil_la_LDFLAGS) |
70 | endif | 70 | endif |
71 | 71 | ||
72 | # regex | ||
73 | |||
74 | dist_install_evilheaders_DATA += \ | ||
75 | lib/evil/regex/regex.h | ||
76 | |||
77 | lib_evil_libevil_la_SOURCES += \ | ||
78 | lib/evil/regex/regcomp.c \ | ||
79 | lib/evil/regex/regerror.c \ | ||
80 | lib/evil/regex/regexec.c \ | ||
81 | lib/evil/regex/regfree.c \ | ||
82 | lib/evil/regex/cclass.h \ | ||
83 | lib/evil/regex/cname.h \ | ||
84 | lib/evil/regex/regex2.h \ | ||
85 | lib/evil/regex/utils.h | ||
86 | |||
87 | lib_evil_libevil_la_CPPFLAGS = \ | ||
88 | -I$(top_srcdir)/src/lib/evil \ | ||
89 | -I$(top_srcdir)/src/lib/evil/regex \ | ||
90 | -DPOSIX_MISTAKE | ||
91 | |||
92 | #libdl | ||
93 | |||
72 | lib_evil_libdl_la_SOURCES = lib/evil/dlfcn.c | 94 | lib_evil_libdl_la_SOURCES = lib/evil/dlfcn.c |
73 | 95 | ||
74 | lib_evil_libdl_la_CPPFLAGS = -I$(top_builddir)/src/lib/efl \ | 96 | lib_evil_libdl_la_CPPFLAGS = \ |
97 | -I$(top_builddir)/src/lib/efl \ | ||
75 | @EVIL_CFLAGS@ \ | 98 | @EVIL_CFLAGS@ \ |
76 | @EVIL_DLFCN_CPPFLAGS@ | 99 | @EVIL_DLFCN_CPPFLAGS@ |
77 | lib_evil_libdl_la_LIBADD = \ | 100 | lib_evil_libdl_la_LIBADD = \ |
@@ -121,6 +144,8 @@ bin_evil_test_evil_LDADD = @USE_EVIL_LIBS@ | |||
121 | 144 | ||
122 | endif | 145 | endif |
123 | EXTRA_DIST += \ | 146 | EXTRA_DIST += \ |
124 | lib/evil/gdtoa/README \ | 147 | lib/evil/regex/regerror.ih \ |
125 | lib/evil/gdtoa/README.mingw \ | 148 | lib/evil/regex/engine.ih \ |
149 | lib/evil/regex/regcomp.ih \ | ||
150 | lib/evil/regex/engine.c \ | ||
126 | bin/evil/memcpy_glibc_i686.S | 151 | bin/evil/memcpy_glibc_i686.S |
diff --git a/src/lib/evil/regex/cclass.h b/src/lib/evil/regex/cclass.h new file mode 100644 index 0000000000..0c293028e9 --- /dev/null +++ b/src/lib/evil/regex/cclass.h | |||
@@ -0,0 +1,31 @@ | |||
1 | /* character-class table */ | ||
2 | static struct cclass { | ||
3 | char *name; | ||
4 | char *chars; | ||
5 | char *multis; | ||
6 | } cclasses[] = { | ||
7 | "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | ||
8 | 0123456789", "", | ||
9 | "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", | ||
10 | "", | ||
11 | "blank", " \t", "", | ||
12 | "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ | ||
13 | \25\26\27\30\31\32\33\34\35\36\37\177", "", | ||
14 | "digit", "0123456789", "", | ||
15 | "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | ||
16 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | ||
17 | "", | ||
18 | "lower", "abcdefghijklmnopqrstuvwxyz", | ||
19 | "", | ||
20 | "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ | ||
21 | 0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", | ||
22 | "", | ||
23 | "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", | ||
24 | "", | ||
25 | "space", "\t\n\v\f\r ", "", | ||
26 | "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", | ||
27 | "", | ||
28 | "xdigit", "0123456789ABCDEFabcdef", | ||
29 | "", | ||
30 | NULL, 0, "" | ||
31 | }; | ||
diff --git a/src/lib/evil/regex/cname.h b/src/lib/evil/regex/cname.h new file mode 100644 index 0000000000..02e86e912e --- /dev/null +++ b/src/lib/evil/regex/cname.h | |||
@@ -0,0 +1,102 @@ | |||
1 | /* character-name table */ | ||
2 | static struct cname { | ||
3 | char *name; | ||
4 | char code; | ||
5 | } cnames[] = { | ||
6 | "NUL", '\0', | ||
7 | "SOH", '\001', | ||
8 | "STX", '\002', | ||
9 | "ETX", '\003', | ||
10 | "EOT", '\004', | ||
11 | "ENQ", '\005', | ||
12 | "ACK", '\006', | ||
13 | "BEL", '\007', | ||
14 | "alert", '\007', | ||
15 | "BS", '\010', | ||
16 | "backspace", '\b', | ||
17 | "HT", '\011', | ||
18 | "tab", '\t', | ||
19 | "LF", '\012', | ||
20 | "newline", '\n', | ||
21 | "VT", '\013', | ||
22 | "vertical-tab", '\v', | ||
23 | "FF", '\014', | ||
24 | "form-feed", '\f', | ||
25 | "CR", '\015', | ||
26 | "carriage-return", '\r', | ||
27 | "SO", '\016', | ||
28 | "SI", '\017', | ||
29 | "DLE", '\020', | ||
30 | "DC1", '\021', | ||
31 | "DC2", '\022', | ||
32 | "DC3", '\023', | ||
33 | "DC4", '\024', | ||
34 | "NAK", '\025', | ||
35 | "SYN", '\026', | ||
36 | "ETB", '\027', | ||
37 | "CAN", '\030', | ||
38 | "EM", '\031', | ||
39 | "SUB", '\032', | ||
40 | "ESC", '\033', | ||
41 | "IS4", '\034', | ||
42 | "FS", '\034', | ||
43 | "IS3", '\035', | ||
44 | "GS", '\035', | ||
45 | "IS2", '\036', | ||
46 | "RS", '\036', | ||
47 | "IS1", '\037', | ||
48 | "US", '\037', | ||
49 | "space", ' ', | ||
50 | "exclamation-mark", '!', | ||
51 | "quotation-mark", '"', | ||
52 | "number-sign", '#', | ||
53 | "dollar-sign", '$', | ||
54 | "percent-sign", '%', | ||
55 | "ampersand", '&', | ||
56 | "apostrophe", '\'', | ||
57 | "left-parenthesis", '(', | ||
58 | "right-parenthesis", ')', | ||
59 | "asterisk", '*', | ||
60 | "plus-sign", '+', | ||
61 | "comma", ',', | ||
62 | "hyphen", '-', | ||
63 | "hyphen-minus", '-', | ||
64 | "period", '.', | ||
65 | "full-stop", '.', | ||
66 | "slash", '/', | ||
67 | "solidus", '/', | ||
68 | "zero", '0', | ||
69 | "one", '1', | ||
70 | "two", '2', | ||
71 | "three", '3', | ||
72 | "four", '4', | ||
73 | "five", '5', | ||
74 | "six", '6', | ||
75 | "seven", '7', | ||
76 | "eight", '8', | ||
77 | "nine", '9', | ||
78 | "colon", ':', | ||
79 | "semicolon", ';', | ||
80 | "less-than-sign", '<', | ||
81 | "equals-sign", '=', | ||
82 | "greater-than-sign", '>', | ||
83 | "question-mark", '?', | ||
84 | "commercial-at", '@', | ||
85 | "left-square-bracket", '[', | ||
86 | "backslash", '\\', | ||
87 | "reverse-solidus", '\\', | ||
88 | "right-square-bracket", ']', | ||
89 | "circumflex", '^', | ||
90 | "circumflex-accent", '^', | ||
91 | "underscore", '_', | ||
92 | "low-line", '_', | ||
93 | "grave-accent", '`', | ||
94 | "left-brace", '{', | ||
95 | "left-curly-bracket", '{', | ||
96 | "vertical-line", '|', | ||
97 | "right-brace", '}', | ||
98 | "right-curly-bracket", '}', | ||
99 | "tilde", '~', | ||
100 | "DEL", '\177', | ||
101 | NULL, 0, | ||
102 | }; | ||
diff --git a/src/lib/evil/regex/engine.c b/src/lib/evil/regex/engine.c new file mode 100644 index 0000000000..919fe3f641 --- /dev/null +++ b/src/lib/evil/regex/engine.c | |||
@@ -0,0 +1,1019 @@ | |||
1 | /* | ||
2 | * The matching engine and friends. This file is #included by regexec.c | ||
3 | * after suitable #defines of a variety of macros used herein, so that | ||
4 | * different state representations can be used without duplicating masses | ||
5 | * of code. | ||
6 | */ | ||
7 | |||
8 | #ifdef SNAMES | ||
9 | #define matcher smatcher | ||
10 | #define fast sfast | ||
11 | #define slow sslow | ||
12 | #define dissect sdissect | ||
13 | #define backref sbackref | ||
14 | #define step sstep | ||
15 | #define print sprint | ||
16 | #define at sat | ||
17 | #define match smat | ||
18 | #endif | ||
19 | #ifdef LNAMES | ||
20 | #define matcher lmatcher | ||
21 | #define fast lfast | ||
22 | #define slow lslow | ||
23 | #define dissect ldissect | ||
24 | #define backref lbackref | ||
25 | #define step lstep | ||
26 | #define print lprint | ||
27 | #define at lat | ||
28 | #define match lmat | ||
29 | #endif | ||
30 | |||
31 | /* another structure passed up and down to avoid zillions of parameters */ | ||
32 | struct match { | ||
33 | struct re_guts *g; | ||
34 | int eflags; | ||
35 | regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ | ||
36 | char *offp; /* offsets work from here */ | ||
37 | char *beginp; /* start of string -- virtual NUL precedes */ | ||
38 | char *endp; /* end of string -- virtual NUL here */ | ||
39 | char *coldp; /* can be no match starting before here */ | ||
40 | char **lastpos; /* [nplus+1] */ | ||
41 | STATEVARS; | ||
42 | states st; /* current states */ | ||
43 | states fresh; /* states for a fresh start */ | ||
44 | states tmp; /* temporary */ | ||
45 | states empty; /* empty set of states */ | ||
46 | }; | ||
47 | |||
48 | #include "engine.ih" | ||
49 | |||
50 | #ifdef REDEBUG | ||
51 | #define SP(t, s, c) print(m, t, s, c, stdout) | ||
52 | #define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) | ||
53 | #define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } | ||
54 | #else | ||
55 | #define SP(t, s, c) /* nothing */ | ||
56 | #define AT(t, p1, p2, s1, s2) /* nothing */ | ||
57 | #define NOTE(s) /* nothing */ | ||
58 | #endif | ||
59 | |||
60 | /* | ||
61 | - matcher - the actual matching engine | ||
62 | == static int matcher(register struct re_guts *g, char *string, \ | ||
63 | == size_t nmatch, regmatch_t pmatch[], int eflags); | ||
64 | */ | ||
65 | static int /* 0 success, REG_NOMATCH failure */ | ||
66 | matcher(g, string, nmatch, pmatch, eflags) | ||
67 | register struct re_guts *g; | ||
68 | char *string; | ||
69 | size_t nmatch; | ||
70 | regmatch_t pmatch[]; | ||
71 | int eflags; | ||
72 | { | ||
73 | register char *endp; | ||
74 | register int i; | ||
75 | struct match mv; | ||
76 | register struct match *m = &mv; | ||
77 | register char *dp; | ||
78 | const register sopno gf = g->firststate+1; /* +1 for OEND */ | ||
79 | const register sopno gl = g->laststate; | ||
80 | char *start; | ||
81 | char *stop; | ||
82 | |||
83 | /* simplify the situation where possible */ | ||
84 | if (g->cflags®_NOSUB) | ||
85 | nmatch = 0; | ||
86 | if (eflags®_STARTEND) { | ||
87 | start = string + pmatch[0].rm_so; | ||
88 | stop = string + pmatch[0].rm_eo; | ||
89 | } else { | ||
90 | start = string; | ||
91 | stop = start + strlen(start); | ||
92 | } | ||
93 | if (stop < start) | ||
94 | return(REG_INVARG); | ||
95 | |||
96 | /* prescreening; this does wonders for this rather slow code */ | ||
97 | if (g->must != NULL) { | ||
98 | for (dp = start; dp < stop; dp++) | ||
99 | if (*dp == g->must[0] && stop - dp >= g->mlen && | ||
100 | memcmp(dp, g->must, (size_t)g->mlen) == 0) | ||
101 | break; | ||
102 | if (dp == stop) /* we didn't find g->must */ | ||
103 | return(REG_NOMATCH); | ||
104 | } | ||
105 | |||
106 | /* match struct setup */ | ||
107 | m->g = g; | ||
108 | m->eflags = eflags; | ||
109 | m->pmatch = NULL; | ||
110 | m->lastpos = NULL; | ||
111 | m->offp = string; | ||
112 | m->beginp = start; | ||
113 | m->endp = stop; | ||
114 | STATESETUP(m, 4); | ||
115 | SETUP(m->st); | ||
116 | SETUP(m->fresh); | ||
117 | SETUP(m->tmp); | ||
118 | SETUP(m->empty); | ||
119 | CLEAR(m->empty); | ||
120 | |||
121 | /* this loop does only one repetition except for backrefs */ | ||
122 | for (;;) { | ||
123 | endp = fast(m, start, stop, gf, gl); | ||
124 | if (endp == NULL) { /* a miss */ | ||
125 | STATETEARDOWN(m); | ||
126 | return(REG_NOMATCH); | ||
127 | } | ||
128 | if (nmatch == 0 && !g->backrefs) | ||
129 | break; /* no further info needed */ | ||
130 | |||
131 | /* where? */ | ||
132 | assert(m->coldp != NULL); | ||
133 | for (;;) { | ||
134 | NOTE("finding start"); | ||
135 | endp = slow(m, m->coldp, stop, gf, gl); | ||
136 | if (endp != NULL) | ||
137 | break; | ||
138 | assert(m->coldp < m->endp); | ||
139 | m->coldp++; | ||
140 | } | ||
141 | if (nmatch == 1 && !g->backrefs) | ||
142 | break; /* no further info needed */ | ||
143 | |||
144 | /* oh my, he wants the subexpressions... */ | ||
145 | if (m->pmatch == NULL) | ||
146 | m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * | ||
147 | sizeof(regmatch_t)); | ||
148 | if (m->pmatch == NULL) { | ||
149 | STATETEARDOWN(m); | ||
150 | return(REG_ESPACE); | ||
151 | } | ||
152 | for (i = 1; i <= m->g->nsub; i++) | ||
153 | m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; | ||
154 | if (!g->backrefs && !(m->eflags®_BACKR)) { | ||
155 | NOTE("dissecting"); | ||
156 | dp = dissect(m, m->coldp, endp, gf, gl); | ||
157 | } else { | ||
158 | if (g->nplus > 0 && m->lastpos == NULL) | ||
159 | m->lastpos = (char **)malloc((g->nplus+1) * | ||
160 | sizeof(char *)); | ||
161 | if (g->nplus > 0 && m->lastpos == NULL) { | ||
162 | free(m->pmatch); | ||
163 | STATETEARDOWN(m); | ||
164 | return(REG_ESPACE); | ||
165 | } | ||
166 | NOTE("backref dissect"); | ||
167 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); | ||
168 | } | ||
169 | if (dp != NULL) | ||
170 | break; | ||
171 | |||
172 | /* uh-oh... we couldn't find a subexpression-level match */ | ||
173 | assert(g->backrefs); /* must be back references doing it */ | ||
174 | assert(g->nplus == 0 || m->lastpos != NULL); | ||
175 | for (;;) { | ||
176 | if (dp != NULL || endp <= m->coldp) | ||
177 | break; /* defeat */ | ||
178 | NOTE("backoff"); | ||
179 | endp = slow(m, m->coldp, endp-1, gf, gl); | ||
180 | if (endp == NULL) | ||
181 | break; /* defeat */ | ||
182 | /* try it on a shorter possibility */ | ||
183 | #ifndef NDEBUG | ||
184 | for (i = 1; i <= m->g->nsub; i++) { | ||
185 | assert(m->pmatch[i].rm_so == -1); | ||
186 | assert(m->pmatch[i].rm_eo == -1); | ||
187 | } | ||
188 | #endif | ||
189 | NOTE("backoff dissect"); | ||
190 | dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); | ||
191 | } | ||
192 | assert(dp == NULL || dp == endp); | ||
193 | if (dp != NULL) /* found a shorter one */ | ||
194 | break; | ||
195 | |||
196 | /* despite initial appearances, there is no match here */ | ||
197 | NOTE("false alarm"); | ||
198 | start = m->coldp + 1; /* recycle starting later */ | ||
199 | assert(start <= stop); | ||
200 | } | ||
201 | |||
202 | /* fill in the details if requested */ | ||
203 | if (nmatch > 0) { | ||
204 | pmatch[0].rm_so = m->coldp - m->offp; | ||
205 | pmatch[0].rm_eo = endp - m->offp; | ||
206 | } | ||
207 | if (nmatch > 1) { | ||
208 | assert(m->pmatch != NULL); | ||
209 | for (i = 1; i < nmatch; i++) | ||
210 | if (i <= m->g->nsub) | ||
211 | pmatch[i] = m->pmatch[i]; | ||
212 | else { | ||
213 | pmatch[i].rm_so = -1; | ||
214 | pmatch[i].rm_eo = -1; | ||
215 | } | ||
216 | } | ||
217 | |||
218 | if (m->pmatch != NULL) | ||
219 | free((char *)m->pmatch); | ||
220 | if (m->lastpos != NULL) | ||
221 | free((char *)m->lastpos); | ||
222 | STATETEARDOWN(m); | ||
223 | return(0); | ||
224 | } | ||
225 | |||
226 | /* | ||
227 | - dissect - figure out what matched what, no back references | ||
228 | == static char *dissect(register struct match *m, char *start, \ | ||
229 | == char *stop, sopno startst, sopno stopst); | ||
230 | */ | ||
231 | static char * /* == stop (success) always */ | ||
232 | dissect(m, start, stop, startst, stopst) | ||
233 | register struct match *m; | ||
234 | char *start; | ||
235 | char *stop; | ||
236 | sopno startst; | ||
237 | sopno stopst; | ||
238 | { | ||
239 | register int i; | ||
240 | register sopno ss; /* start sop of current subRE */ | ||
241 | register sopno es; /* end sop of current subRE */ | ||
242 | register char *sp; /* start of string matched by it */ | ||
243 | register char *stp; /* string matched by it cannot pass here */ | ||
244 | register char *rest; /* start of rest of string */ | ||
245 | register char *tail; /* string unmatched by rest of RE */ | ||
246 | register sopno ssub; /* start sop of subsubRE */ | ||
247 | register sopno esub; /* end sop of subsubRE */ | ||
248 | register char *ssp; /* start of string matched by subsubRE */ | ||
249 | register char *sep; /* end of string matched by subsubRE */ | ||
250 | register char *oldssp; /* previous ssp */ | ||
251 | register char *dp; | ||
252 | |||
253 | AT("diss", start, stop, startst, stopst); | ||
254 | sp = start; | ||
255 | for (ss = startst; ss < stopst; ss = es) { | ||
256 | /* identify end of subRE */ | ||
257 | es = ss; | ||
258 | switch (OP(m->g->strip[es])) { | ||
259 | case OPLUS_: | ||
260 | case OQUEST_: | ||
261 | es += OPND(m->g->strip[es]); | ||
262 | break; | ||
263 | case OCH_: | ||
264 | while (OP(m->g->strip[es]) != O_CH) | ||
265 | es += OPND(m->g->strip[es]); | ||
266 | break; | ||
267 | } | ||
268 | es++; | ||
269 | |||
270 | /* figure out what it matched */ | ||
271 | switch (OP(m->g->strip[ss])) { | ||
272 | case OEND: | ||
273 | assert(nope); | ||
274 | break; | ||
275 | case OCHAR: | ||
276 | sp++; | ||
277 | break; | ||
278 | case OBOL: | ||
279 | case OEOL: | ||
280 | case OBOW: | ||
281 | case OEOW: | ||
282 | break; | ||
283 | case OANY: | ||
284 | case OANYOF: | ||
285 | sp++; | ||
286 | break; | ||
287 | case OBACK_: | ||
288 | case O_BACK: | ||
289 | assert(nope); | ||
290 | break; | ||
291 | /* cases where length of match is hard to find */ | ||
292 | case OQUEST_: | ||
293 | stp = stop; | ||
294 | for (;;) { | ||
295 | /* how long could this one be? */ | ||
296 | rest = slow(m, sp, stp, ss, es); | ||
297 | assert(rest != NULL); /* it did match */ | ||
298 | /* could the rest match the rest? */ | ||
299 | tail = slow(m, rest, stop, es, stopst); | ||
300 | if (tail == stop) | ||
301 | break; /* yes! */ | ||
302 | /* no -- try a shorter match for this one */ | ||
303 | stp = rest - 1; | ||
304 | assert(stp >= sp); /* it did work */ | ||
305 | } | ||
306 | ssub = ss + 1; | ||
307 | esub = es - 1; | ||
308 | /* did innards match? */ | ||
309 | if (slow(m, sp, rest, ssub, esub) != NULL) { | ||
310 | dp = dissect(m, sp, rest, ssub, esub); | ||
311 | assert(dp == rest); | ||
312 | } else /* no */ | ||
313 | assert(sp == rest); | ||
314 | sp = rest; | ||
315 | break; | ||
316 | case OPLUS_: | ||
317 | stp = stop; | ||
318 | for (;;) { | ||
319 | /* how long could this one be? */ | ||
320 | rest = slow(m, sp, stp, ss, es); | ||
321 | assert(rest != NULL); /* it did match */ | ||
322 | /* could the rest match the rest? */ | ||
323 | tail = slow(m, rest, stop, es, stopst); | ||
324 | if (tail == stop) | ||
325 | break; /* yes! */ | ||
326 | /* no -- try a shorter match for this one */ | ||
327 | stp = rest - 1; | ||
328 | assert(stp >= sp); /* it did work */ | ||
329 | } | ||
330 | ssub = ss + 1; | ||
331 | esub = es - 1; | ||
332 | ssp = sp; | ||
333 | oldssp = ssp; | ||
334 | for (;;) { /* find last match of innards */ | ||
335 | sep = slow(m, ssp, rest, ssub, esub); | ||
336 | if (sep == NULL || sep == ssp) | ||
337 | break; /* failed or matched null */ | ||
338 | oldssp = ssp; /* on to next try */ | ||
339 | ssp = sep; | ||
340 | } | ||
341 | if (sep == NULL) { | ||
342 | /* last successful match */ | ||
343 | sep = ssp; | ||
344 | ssp = oldssp; | ||
345 | } | ||
346 | assert(sep == rest); /* must exhaust substring */ | ||
347 | assert(slow(m, ssp, sep, ssub, esub) == rest); | ||
348 | dp = dissect(m, ssp, sep, ssub, esub); | ||
349 | assert(dp == sep); | ||
350 | sp = rest; | ||
351 | break; | ||
352 | case OCH_: | ||
353 | stp = stop; | ||
354 | for (;;) { | ||
355 | /* how long could this one be? */ | ||
356 | rest = slow(m, sp, stp, ss, es); | ||
357 | assert(rest != NULL); /* it did match */ | ||
358 | /* could the rest match the rest? */ | ||
359 | tail = slow(m, rest, stop, es, stopst); | ||
360 | if (tail == stop) | ||
361 | break; /* yes! */ | ||
362 | /* no -- try a shorter match for this one */ | ||
363 | stp = rest - 1; | ||
364 | assert(stp >= sp); /* it did work */ | ||
365 | } | ||
366 | ssub = ss + 1; | ||
367 | esub = ss + OPND(m->g->strip[ss]) - 1; | ||
368 | assert(OP(m->g->strip[esub]) == OOR1); | ||
369 | for (;;) { /* find first matching branch */ | ||
370 | if (slow(m, sp, rest, ssub, esub) == rest) | ||
371 | break; /* it matched all of it */ | ||
372 | /* that one missed, try next one */ | ||
373 | assert(OP(m->g->strip[esub]) == OOR1); | ||
374 | esub++; | ||
375 | assert(OP(m->g->strip[esub]) == OOR2); | ||
376 | ssub = esub + 1; | ||
377 | esub += OPND(m->g->strip[esub]); | ||
378 | if (OP(m->g->strip[esub]) == OOR2) | ||
379 | esub--; | ||
380 | else | ||
381 | assert(OP(m->g->strip[esub]) == O_CH); | ||
382 | } | ||
383 | dp = dissect(m, sp, rest, ssub, esub); | ||
384 | assert(dp == rest); | ||
385 | sp = rest; | ||
386 | break; | ||
387 | case O_PLUS: | ||
388 | case O_QUEST: | ||
389 | case OOR1: | ||
390 | case OOR2: | ||
391 | case O_CH: | ||
392 | assert(nope); | ||
393 | break; | ||
394 | case OLPAREN: | ||
395 | i = OPND(m->g->strip[ss]); | ||
396 | assert(0 < i && i <= m->g->nsub); | ||
397 | m->pmatch[i].rm_so = sp - m->offp; | ||
398 | break; | ||
399 | case ORPAREN: | ||
400 | i = OPND(m->g->strip[ss]); | ||
401 | assert(0 < i && i <= m->g->nsub); | ||
402 | m->pmatch[i].rm_eo = sp - m->offp; | ||
403 | break; | ||
404 | default: /* uh oh */ | ||
405 | assert(nope); | ||
406 | break; | ||
407 | } | ||
408 | } | ||
409 | |||
410 | assert(sp == stop); | ||
411 | return(sp); | ||
412 | } | ||
413 | |||
414 | /* | ||
415 | - backref - figure out what matched what, figuring in back references | ||
416 | == static char *backref(register struct match *m, char *start, \ | ||
417 | == char *stop, sopno startst, sopno stopst, sopno lev); | ||
418 | */ | ||
419 | static char * /* == stop (success) or NULL (failure) */ | ||
420 | backref(m, start, stop, startst, stopst, lev) | ||
421 | register struct match *m; | ||
422 | char *start; | ||
423 | char *stop; | ||
424 | sopno startst; | ||
425 | sopno stopst; | ||
426 | sopno lev; /* PLUS nesting level */ | ||
427 | { | ||
428 | register int i; | ||
429 | register sopno ss; /* start sop of current subRE */ | ||
430 | register char *sp; /* start of string matched by it */ | ||
431 | register sopno ssub; /* start sop of subsubRE */ | ||
432 | register sopno esub; /* end sop of subsubRE */ | ||
433 | register char *ssp; /* start of string matched by subsubRE */ | ||
434 | register char *dp; | ||
435 | register size_t len; | ||
436 | register int hard; | ||
437 | register sop s; | ||
438 | register regoff_t offsave; | ||
439 | register cset *cs; | ||
440 | |||
441 | AT("back", start, stop, startst, stopst); | ||
442 | sp = start; | ||
443 | |||
444 | /* get as far as we can with easy stuff */ | ||
445 | hard = 0; | ||
446 | for (ss = startst; !hard && ss < stopst; ss++) | ||
447 | switch (OP(s = m->g->strip[ss])) { | ||
448 | case OCHAR: | ||
449 | if (sp == stop || *sp++ != (char)OPND(s)) | ||
450 | return(NULL); | ||
451 | break; | ||
452 | case OANY: | ||
453 | if (sp == stop) | ||
454 | return(NULL); | ||
455 | sp++; | ||
456 | break; | ||
457 | case OANYOF: | ||
458 | cs = &m->g->sets[OPND(s)]; | ||
459 | if (sp == stop || !CHIN(cs, *sp++)) | ||
460 | return(NULL); | ||
461 | break; | ||
462 | case OBOL: | ||
463 | if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | ||
464 | (sp < m->endp && *(sp-1) == '\n' && | ||
465 | (m->g->cflags®_NEWLINE)) ) | ||
466 | { /* yes */ } | ||
467 | else | ||
468 | return(NULL); | ||
469 | break; | ||
470 | case OEOL: | ||
471 | if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || | ||
472 | (sp < m->endp && *sp == '\n' && | ||
473 | (m->g->cflags®_NEWLINE)) ) | ||
474 | { /* yes */ } | ||
475 | else | ||
476 | return(NULL); | ||
477 | break; | ||
478 | case OBOW: | ||
479 | if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || | ||
480 | (sp < m->endp && *(sp-1) == '\n' && | ||
481 | (m->g->cflags®_NEWLINE)) || | ||
482 | (sp > m->beginp && | ||
483 | !ISWORD(*(sp-1))) ) && | ||
484 | (sp < m->endp && ISWORD(*sp)) ) | ||
485 | { /* yes */ } | ||
486 | else | ||
487 | return(NULL); | ||
488 | break; | ||
489 | case OEOW: | ||
490 | if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || | ||
491 | (sp < m->endp && *sp == '\n' && | ||
492 | (m->g->cflags®_NEWLINE)) || | ||
493 | (sp < m->endp && !ISWORD(*sp)) ) && | ||
494 | (sp > m->beginp && ISWORD(*(sp-1))) ) | ||
495 | { /* yes */ } | ||
496 | else | ||
497 | return(NULL); | ||
498 | break; | ||
499 | case O_QUEST: | ||
500 | break; | ||
501 | case OOR1: /* matches null but needs to skip */ | ||
502 | ss++; | ||
503 | s = m->g->strip[ss]; | ||
504 | do { | ||
505 | assert(OP(s) == OOR2); | ||
506 | ss += OPND(s); | ||
507 | } while (OP(s = m->g->strip[ss]) != O_CH); | ||
508 | /* note that the ss++ gets us past the O_CH */ | ||
509 | break; | ||
510 | default: /* have to make a choice */ | ||
511 | hard = 1; | ||
512 | break; | ||
513 | } | ||
514 | if (!hard) { /* that was it! */ | ||
515 | if (sp != stop) | ||
516 | return(NULL); | ||
517 | return(sp); | ||
518 | } | ||
519 | ss--; /* adjust for the for's final increment */ | ||
520 | |||
521 | /* the hard stuff */ | ||
522 | AT("hard", sp, stop, ss, stopst); | ||
523 | s = m->g->strip[ss]; | ||
524 | switch (OP(s)) { | ||
525 | case OBACK_: /* the vilest depths */ | ||
526 | i = OPND(s); | ||
527 | assert(0 < i && i <= m->g->nsub); | ||
528 | if (m->pmatch[i].rm_eo == -1) | ||
529 | return(NULL); | ||
530 | assert(m->pmatch[i].rm_so != -1); | ||
531 | len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; | ||
532 | assert(stop - m->beginp >= len); | ||
533 | if (sp > stop - len) | ||
534 | return(NULL); /* not enough left to match */ | ||
535 | ssp = m->offp + m->pmatch[i].rm_so; | ||
536 | if (memcmp(sp, ssp, len) != 0) | ||
537 | return(NULL); | ||
538 | while (m->g->strip[ss] != SOP(O_BACK, i)) | ||
539 | ss++; | ||
540 | return(backref(m, sp+len, stop, ss+1, stopst, lev)); | ||
541 | break; | ||
542 | case OQUEST_: /* to null or not */ | ||
543 | dp = backref(m, sp, stop, ss+1, stopst, lev); | ||
544 | if (dp != NULL) | ||
545 | return(dp); /* not */ | ||
546 | return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); | ||
547 | break; | ||
548 | case OPLUS_: | ||
549 | assert(m->lastpos != NULL); | ||
550 | assert(lev+1 <= m->g->nplus); | ||
551 | m->lastpos[lev+1] = sp; | ||
552 | return(backref(m, sp, stop, ss+1, stopst, lev+1)); | ||
553 | break; | ||
554 | case O_PLUS: | ||
555 | if (sp == m->lastpos[lev]) /* last pass matched null */ | ||
556 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | ||
557 | /* try another pass */ | ||
558 | m->lastpos[lev] = sp; | ||
559 | dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); | ||
560 | if (dp == NULL) | ||
561 | return(backref(m, sp, stop, ss+1, stopst, lev-1)); | ||
562 | else | ||
563 | return(dp); | ||
564 | break; | ||
565 | case OCH_: /* find the right one, if any */ | ||
566 | ssub = ss + 1; | ||
567 | esub = ss + OPND(s) - 1; | ||
568 | assert(OP(m->g->strip[esub]) == OOR1); | ||
569 | for (;;) { /* find first matching branch */ | ||
570 | dp = backref(m, sp, stop, ssub, esub, lev); | ||
571 | if (dp != NULL) | ||
572 | return(dp); | ||
573 | /* that one missed, try next one */ | ||
574 | if (OP(m->g->strip[esub]) == O_CH) | ||
575 | return(NULL); /* there is none */ | ||
576 | esub++; | ||
577 | assert(OP(m->g->strip[esub]) == OOR2); | ||
578 | ssub = esub + 1; | ||
579 | esub += OPND(m->g->strip[esub]); | ||
580 | if (OP(m->g->strip[esub]) == OOR2) | ||
581 | esub--; | ||
582 | else | ||
583 | assert(OP(m->g->strip[esub]) == O_CH); | ||
584 | } | ||
585 | break; | ||
586 | case OLPAREN: /* must undo assignment if rest fails */ | ||
587 | i = OPND(s); | ||
588 | assert(0 < i && i <= m->g->nsub); | ||
589 | offsave = m->pmatch[i].rm_so; | ||
590 | m->pmatch[i].rm_so = sp - m->offp; | ||
591 | dp = backref(m, sp, stop, ss+1, stopst, lev); | ||
592 | if (dp != NULL) | ||
593 | return(dp); | ||
594 | m->pmatch[i].rm_so = offsave; | ||
595 | return(NULL); | ||
596 | break; | ||
597 | case ORPAREN: /* must undo assignment if rest fails */ | ||
598 | i = OPND(s); | ||
599 | assert(0 < i && i <= m->g->nsub); | ||
600 | offsave = m->pmatch[i].rm_eo; | ||
601 | m->pmatch[i].rm_eo = sp - m->offp; | ||
602 | dp = backref(m, sp, stop, ss+1, stopst, lev); | ||
603 | if (dp != NULL) | ||
604 | return(dp); | ||
605 | m->pmatch[i].rm_eo = offsave; | ||
606 | return(NULL); | ||
607 | break; | ||
608 | default: /* uh oh */ | ||
609 | assert(nope); | ||
610 | break; | ||
611 | } | ||
612 | |||
613 | /* "can't happen" */ | ||
614 | assert(nope); | ||
615 | /* NOTREACHED */ | ||
616 | return((char *)NULL); /* dummy */ | ||
617 | } | ||
618 | |||
619 | /* | ||
620 | - fast - step through the string at top speed | ||
621 | == static char *fast(register struct match *m, char *start, \ | ||
622 | == char *stop, sopno startst, sopno stopst); | ||
623 | */ | ||
624 | static char * /* where tentative match ended, or NULL */ | ||
625 | fast(m, start, stop, startst, stopst) | ||
626 | register struct match *m; | ||
627 | char *start; | ||
628 | char *stop; | ||
629 | sopno startst; | ||
630 | sopno stopst; | ||
631 | { | ||
632 | register states st = m->st; | ||
633 | register states fresh = m->fresh; | ||
634 | register states tmp = m->tmp; | ||
635 | register char *p = start; | ||
636 | register int c = (start == m->beginp) ? OUT : *(start-1); | ||
637 | register int lastc; /* previous c */ | ||
638 | register int flagch; | ||
639 | register int i; | ||
640 | register char *coldp; /* last p after which no match was underway */ | ||
641 | |||
642 | CLEAR(st); | ||
643 | SET1(st, startst); | ||
644 | st = step(m->g, startst, stopst, st, NOTHING, st); | ||
645 | ASSIGN(fresh, st); | ||
646 | SP("start", st, *p); | ||
647 | coldp = NULL; | ||
648 | for (;;) { | ||
649 | /* next character */ | ||
650 | lastc = c; | ||
651 | c = (p == m->endp) ? OUT : *p; | ||
652 | if (EQ(st, fresh)) | ||
653 | coldp = p; | ||
654 | |||
655 | /* is there an EOL and/or BOL between lastc and c? */ | ||
656 | flagch = '\0'; | ||
657 | i = 0; | ||
658 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | ||
659 | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | ||
660 | flagch = BOL; | ||
661 | i = m->g->nbol; | ||
662 | } | ||
663 | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | ||
664 | (c == OUT && !(m->eflags®_NOTEOL)) ) { | ||
665 | flagch = (flagch == BOL) ? BOLEOL : EOL; | ||
666 | i += m->g->neol; | ||
667 | } | ||
668 | if (i != 0) { | ||
669 | for (; i > 0; i--) | ||
670 | st = step(m->g, startst, stopst, st, flagch, st); | ||
671 | SP("boleol", st, c); | ||
672 | } | ||
673 | |||
674 | /* how about a word boundary? */ | ||
675 | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | ||
676 | (c != OUT && ISWORD(c)) ) { | ||
677 | flagch = BOW; | ||
678 | } | ||
679 | if ( (lastc != OUT && ISWORD(lastc)) && | ||
680 | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | ||
681 | flagch = EOW; | ||
682 | } | ||
683 | if (flagch == BOW || flagch == EOW) { | ||
684 | st = step(m->g, startst, stopst, st, flagch, st); | ||
685 | SP("boweow", st, c); | ||
686 | } | ||
687 | |||
688 | /* are we done? */ | ||
689 | if (ISSET(st, stopst) || p == stop) | ||
690 | break; /* NOTE BREAK OUT */ | ||
691 | |||
692 | /* no, we must deal with this character */ | ||
693 | ASSIGN(tmp, st); | ||
694 | ASSIGN(st, fresh); | ||
695 | assert(c != OUT); | ||
696 | st = step(m->g, startst, stopst, tmp, c, st); | ||
697 | SP("aft", st, c); | ||
698 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | ||
699 | p++; | ||
700 | } | ||
701 | |||
702 | assert(coldp != NULL); | ||
703 | m->coldp = coldp; | ||
704 | if (ISSET(st, stopst)) | ||
705 | return(p+1); | ||
706 | else | ||
707 | return(NULL); | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | - slow - step through the string more deliberately | ||
712 | == static char *slow(register struct match *m, char *start, \ | ||
713 | == char *stop, sopno startst, sopno stopst); | ||
714 | */ | ||
715 | static char * /* where it ended */ | ||
716 | slow(m, start, stop, startst, stopst) | ||
717 | register struct match *m; | ||
718 | char *start; | ||
719 | char *stop; | ||
720 | sopno startst; | ||
721 | sopno stopst; | ||
722 | { | ||
723 | register states st = m->st; | ||
724 | register states empty = m->empty; | ||
725 | register states tmp = m->tmp; | ||
726 | register char *p = start; | ||
727 | register int c = (start == m->beginp) ? OUT : *(start-1); | ||
728 | register int lastc; /* previous c */ | ||
729 | register int flagch; | ||
730 | register int i; | ||
731 | register char *matchp; /* last p at which a match ended */ | ||
732 | |||
733 | AT("slow", start, stop, startst, stopst); | ||
734 | CLEAR(st); | ||
735 | SET1(st, startst); | ||
736 | SP("sstart", st, *p); | ||
737 | st = step(m->g, startst, stopst, st, NOTHING, st); | ||
738 | matchp = NULL; | ||
739 | for (;;) { | ||
740 | /* next character */ | ||
741 | lastc = c; | ||
742 | c = (p == m->endp) ? OUT : *p; | ||
743 | |||
744 | /* is there an EOL and/or BOL between lastc and c? */ | ||
745 | flagch = '\0'; | ||
746 | i = 0; | ||
747 | if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || | ||
748 | (lastc == OUT && !(m->eflags®_NOTBOL)) ) { | ||
749 | flagch = BOL; | ||
750 | i = m->g->nbol; | ||
751 | } | ||
752 | if ( (c == '\n' && m->g->cflags®_NEWLINE) || | ||
753 | (c == OUT && !(m->eflags®_NOTEOL)) ) { | ||
754 | flagch = (flagch == BOL) ? BOLEOL : EOL; | ||
755 | i += m->g->neol; | ||
756 | } | ||
757 | if (i != 0) { | ||
758 | for (; i > 0; i--) | ||
759 | st = step(m->g, startst, stopst, st, flagch, st); | ||
760 | SP("sboleol", st, c); | ||
761 | } | ||
762 | |||
763 | /* how about a word boundary? */ | ||
764 | if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && | ||
765 | (c != OUT && ISWORD(c)) ) { | ||
766 | flagch = BOW; | ||
767 | } | ||
768 | if ( (lastc != OUT && ISWORD(lastc)) && | ||
769 | (flagch == EOL || (c != OUT && !ISWORD(c))) ) { | ||
770 | flagch = EOW; | ||
771 | } | ||
772 | if (flagch == BOW || flagch == EOW) { | ||
773 | st = step(m->g, startst, stopst, st, flagch, st); | ||
774 | SP("sboweow", st, c); | ||
775 | } | ||
776 | |||
777 | /* are we done? */ | ||
778 | if (ISSET(st, stopst)) | ||
779 | matchp = p; | ||
780 | if (EQ(st, empty) || p == stop) | ||
781 | break; /* NOTE BREAK OUT */ | ||
782 | |||
783 | /* no, we must deal with this character */ | ||
784 | ASSIGN(tmp, st); | ||
785 | ASSIGN(st, empty); | ||
786 | assert(c != OUT); | ||
787 | st = step(m->g, startst, stopst, tmp, c, st); | ||
788 | SP("saft", st, c); | ||
789 | assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); | ||
790 | p++; | ||
791 | } | ||
792 | |||
793 | return(matchp); | ||
794 | } | ||
795 | |||
796 | |||
797 | /* | ||
798 | - step - map set of states reachable before char to set reachable after | ||
799 | == static states step(register struct re_guts *g, sopno start, sopno stop, \ | ||
800 | == register states bef, int ch, register states aft); | ||
801 | == #define BOL (OUT+1) | ||
802 | == #define EOL (BOL+1) | ||
803 | == #define BOLEOL (BOL+2) | ||
804 | == #define NOTHING (BOL+3) | ||
805 | == #define BOW (BOL+4) | ||
806 | == #define EOW (BOL+5) | ||
807 | == #define CODEMAX (BOL+5) // highest code used | ||
808 | == #define NONCHAR(c) ((c) > CHAR_MAX) | ||
809 | == #define NNONCHAR (CODEMAX-CHAR_MAX) | ||
810 | */ | ||
811 | static states | ||
812 | step(g, start, stop, bef, ch, aft) | ||
813 | register struct re_guts *g; | ||
814 | sopno start; /* start state within strip */ | ||
815 | sopno stop; /* state after stop state within strip */ | ||
816 | register states bef; /* states reachable before */ | ||
817 | int ch; /* character or NONCHAR code */ | ||
818 | register states aft; /* states already known reachable after */ | ||
819 | { | ||
820 | register cset *cs; | ||
821 | register sop s; | ||
822 | register sopno pc; | ||
823 | register onestate here; /* note, macros know this name */ | ||
824 | register sopno look; | ||
825 | register long i; | ||
826 | |||
827 | for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { | ||
828 | s = g->strip[pc]; | ||
829 | switch (OP(s)) { | ||
830 | case OEND: | ||
831 | assert(pc == stop-1); | ||
832 | break; | ||
833 | case OCHAR: | ||
834 | /* only characters can match */ | ||
835 | assert(!NONCHAR(ch) || ch != (char)OPND(s)); | ||
836 | if (ch == (char)OPND(s)) | ||
837 | FWD(aft, bef, 1); | ||
838 | break; | ||
839 | case OBOL: | ||
840 | if (ch == BOL || ch == BOLEOL) | ||
841 | FWD(aft, bef, 1); | ||
842 | break; | ||
843 | case OEOL: | ||
844 | if (ch == EOL || ch == BOLEOL) | ||
845 | FWD(aft, bef, 1); | ||
846 | break; | ||
847 | case OBOW: | ||
848 | if (ch == BOW) | ||
849 | FWD(aft, bef, 1); | ||
850 | break; | ||
851 | case OEOW: | ||
852 | if (ch == EOW) | ||
853 | FWD(aft, bef, 1); | ||
854 | break; | ||
855 | case OANY: | ||
856 | if (!NONCHAR(ch)) | ||
857 | FWD(aft, bef, 1); | ||
858 | break; | ||
859 | case OANYOF: | ||
860 | cs = &g->sets[OPND(s)]; | ||
861 | if (!NONCHAR(ch) && CHIN(cs, ch)) | ||
862 | FWD(aft, bef, 1); | ||
863 | break; | ||
864 | case OBACK_: /* ignored here */ | ||
865 | case O_BACK: | ||
866 | FWD(aft, aft, 1); | ||
867 | break; | ||
868 | case OPLUS_: /* forward, this is just an empty */ | ||
869 | FWD(aft, aft, 1); | ||
870 | break; | ||
871 | case O_PLUS: /* both forward and back */ | ||
872 | FWD(aft, aft, 1); | ||
873 | i = ISSETBACK(aft, OPND(s)); | ||
874 | BACK(aft, aft, OPND(s)); | ||
875 | if (!i && ISSETBACK(aft, OPND(s))) { | ||
876 | /* oho, must reconsider loop body */ | ||
877 | pc -= OPND(s) + 1; | ||
878 | INIT(here, pc); | ||
879 | } | ||
880 | break; | ||
881 | case OQUEST_: /* two branches, both forward */ | ||
882 | FWD(aft, aft, 1); | ||
883 | FWD(aft, aft, OPND(s)); | ||
884 | break; | ||
885 | case O_QUEST: /* just an empty */ | ||
886 | FWD(aft, aft, 1); | ||
887 | break; | ||
888 | case OLPAREN: /* not significant here */ | ||
889 | case ORPAREN: | ||
890 | FWD(aft, aft, 1); | ||
891 | break; | ||
892 | case OCH_: /* mark the first two branches */ | ||
893 | FWD(aft, aft, 1); | ||
894 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | ||
895 | FWD(aft, aft, OPND(s)); | ||
896 | break; | ||
897 | case OOR1: /* done a branch, find the O_CH */ | ||
898 | if (ISSTATEIN(aft, here)) { | ||
899 | for (look = 1; | ||
900 | OP(s = g->strip[pc+look]) != O_CH; | ||
901 | look += OPND(s)) | ||
902 | assert(OP(s) == OOR2); | ||
903 | FWD(aft, aft, look); | ||
904 | } | ||
905 | break; | ||
906 | case OOR2: /* propagate OCH_'s marking */ | ||
907 | FWD(aft, aft, 1); | ||
908 | if (OP(g->strip[pc+OPND(s)]) != O_CH) { | ||
909 | assert(OP(g->strip[pc+OPND(s)]) == OOR2); | ||
910 | FWD(aft, aft, OPND(s)); | ||
911 | } | ||
912 | break; | ||
913 | case O_CH: /* just empty */ | ||
914 | FWD(aft, aft, 1); | ||
915 | break; | ||
916 | default: /* ooooops... */ | ||
917 | assert(nope); | ||
918 | break; | ||
919 | } | ||
920 | } | ||
921 | |||
922 | return(aft); | ||
923 | } | ||
924 | |||
925 | #ifdef REDEBUG | ||
926 | /* | ||
927 | - print - print a set of states | ||
928 | == #ifdef REDEBUG | ||
929 | == static void print(struct match *m, char *caption, states st, \ | ||
930 | == int ch, FILE *d); | ||
931 | == #endif | ||
932 | */ | ||
933 | static void | ||
934 | print(m, caption, st, ch, d) | ||
935 | struct match *m; | ||
936 | char *caption; | ||
937 | states st; | ||
938 | int ch; | ||
939 | FILE *d; | ||
940 | { | ||
941 | register struct re_guts *g = m->g; | ||
942 | register int i; | ||
943 | register int first = 1; | ||
944 | |||
945 | if (!(m->eflags®_TRACE)) | ||
946 | return; | ||
947 | |||
948 | fprintf(d, "%s", caption); | ||
949 | if (ch != '\0') | ||
950 | fprintf(d, " %s", pchar(ch)); | ||
951 | for (i = 0; i < g->nstates; i++) | ||
952 | if (ISSET(st, i)) { | ||
953 | fprintf(d, "%s%d", (first) ? "\t" : ", ", i); | ||
954 | first = 0; | ||
955 | } | ||
956 | fprintf(d, "\n"); | ||
957 | } | ||
958 | |||
959 | /* | ||
960 | - at - print current situation | ||
961 | == #ifdef REDEBUG | ||
962 | == static void at(struct match *m, char *title, char *start, char *stop, \ | ||
963 | == sopno startst, sopno stopst); | ||
964 | == #endif | ||
965 | */ | ||
966 | static void | ||
967 | at(m, title, start, stop, startst, stopst) | ||
968 | struct match *m; | ||
969 | char *title; | ||
970 | char *start; | ||
971 | char *stop; | ||
972 | sopno startst; | ||
973 | sopno stopst; | ||
974 | { | ||
975 | if (!(m->eflags®_TRACE)) | ||
976 | return; | ||
977 | |||
978 | printf("%s %s-", title, pchar(*start)); | ||
979 | printf("%s ", pchar(*stop)); | ||
980 | printf("%ld-%ld\n", (long)startst, (long)stopst); | ||
981 | } | ||
982 | |||
983 | #ifndef PCHARDONE | ||
984 | #define PCHARDONE /* never again */ | ||
985 | /* | ||
986 | - pchar - make a character printable | ||
987 | == #ifdef REDEBUG | ||
988 | == static char *pchar(int ch); | ||
989 | == #endif | ||
990 | * | ||
991 | * Is this identical to regchar() over in debug.c? Well, yes. But a | ||
992 | * duplicate here avoids having a debugging-capable regexec.o tied to | ||
993 | * a matching debug.o, and this is convenient. It all disappears in | ||
994 | * the non-debug compilation anyway, so it doesn't matter much. | ||
995 | */ | ||
996 | static char * /* -> representation */ | ||
997 | pchar(ch) | ||
998 | int ch; | ||
999 | { | ||
1000 | static char pbuf[10]; | ||
1001 | |||
1002 | if (isprint(ch) || ch == ' ') | ||
1003 | sprintf(pbuf, "%c", ch); | ||
1004 | else | ||
1005 | sprintf(pbuf, "\\%o", ch); | ||
1006 | return(pbuf); | ||
1007 | } | ||
1008 | #endif | ||
1009 | #endif | ||
1010 | |||
1011 | #undef matcher | ||
1012 | #undef fast | ||
1013 | #undef slow | ||
1014 | #undef dissect | ||
1015 | #undef backref | ||
1016 | #undef step | ||
1017 | #undef print | ||
1018 | #undef at | ||
1019 | #undef match | ||
diff --git a/src/lib/evil/regex/engine.ih b/src/lib/evil/regex/engine.ih new file mode 100644 index 0000000000..6b744d431c --- /dev/null +++ b/src/lib/evil/regex/engine.ih | |||
@@ -0,0 +1,35 @@ | |||
1 | /* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
2 | #ifdef __cplusplus | ||
3 | extern "C" { | ||
4 | #endif | ||
5 | |||
6 | /* === lib/evil/regex/engine.c === */ | ||
7 | static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); | ||
8 | static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||
9 | static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); | ||
10 | static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||
11 | static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); | ||
12 | static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); | ||
13 | #define BOL (OUT+1) | ||
14 | #define EOL (BOL+1) | ||
15 | #define BOLEOL (BOL+2) | ||
16 | #define NOTHING (BOL+3) | ||
17 | #define BOW (BOL+4) | ||
18 | #define EOW (BOL+5) | ||
19 | #define CODEMAX (BOL+5) /* highest code used */ | ||
20 | #define NONCHAR(c) ((c) > CHAR_MAX) | ||
21 | #define NNONCHAR (CODEMAX-CHAR_MAX) | ||
22 | #ifdef REDEBUG | ||
23 | static void print(struct match *m, char *caption, states st, int ch, FILE *d); | ||
24 | #endif | ||
25 | #ifdef REDEBUG | ||
26 | static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst); | ||
27 | #endif | ||
28 | #ifdef REDEBUG | ||
29 | static char *pchar(int ch); | ||
30 | #endif | ||
31 | |||
32 | #ifdef __cplusplus | ||
33 | } | ||
34 | #endif | ||
35 | /* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
diff --git a/src/lib/evil/regex/regcomp.c b/src/lib/evil/regex/regcomp.c new file mode 100644 index 0000000000..080d292700 --- /dev/null +++ b/src/lib/evil/regex/regcomp.c | |||
@@ -0,0 +1,1603 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <stdio.h> | ||
3 | #include <string.h> | ||
4 | #include <ctype.h> | ||
5 | #include <limits.h> | ||
6 | #include <stdlib.h> | ||
7 | #include <regex.h> | ||
8 | |||
9 | #include "utils.h" | ||
10 | #include "regex2.h" | ||
11 | |||
12 | #include "cclass.h" | ||
13 | #include "cname.h" | ||
14 | |||
15 | /* | ||
16 | * parse structure, passed up and down to avoid global variables and | ||
17 | * other clumsinesses | ||
18 | */ | ||
19 | struct parse { | ||
20 | char *next; /* next character in RE */ | ||
21 | char *end; /* end of string (-> NUL normally) */ | ||
22 | int error; /* has an error been seen? */ | ||
23 | sop *strip; /* malloced strip */ | ||
24 | sopno ssize; /* malloced strip size (allocated) */ | ||
25 | sopno slen; /* malloced strip length (used) */ | ||
26 | int ncsalloc; /* number of csets allocated */ | ||
27 | struct re_guts *g; | ||
28 | # define NPAREN 10 /* we need to remember () 1-9 for back refs */ | ||
29 | sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ | ||
30 | sopno pend[NPAREN]; /* -> ) ([0] unused) */ | ||
31 | }; | ||
32 | |||
33 | #include "regcomp.ih" | ||
34 | |||
35 | static char nuls[10]; /* place to point scanner in event of error */ | ||
36 | |||
37 | /* | ||
38 | * macros for use with parse structure | ||
39 | * BEWARE: these know that the parse structure is named `p' !!! | ||
40 | */ | ||
41 | #define PEEK() (*p->next) | ||
42 | #define PEEK2() (*(p->next+1)) | ||
43 | #define MORE() (p->next < p->end) | ||
44 | #define MORE2() (p->next+1 < p->end) | ||
45 | #define SEE(c) (MORE() && PEEK() == (c)) | ||
46 | #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) | ||
47 | #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) | ||
48 | #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) | ||
49 | #define NEXT() (p->next++) | ||
50 | #define NEXT2() (p->next += 2) | ||
51 | #define NEXTn(n) (p->next += (n)) | ||
52 | #define GETNEXT() (*p->next++) | ||
53 | #define SETERROR(e) seterr(p, (e)) | ||
54 | #define REQUIRE(co, e) ((co) || SETERROR(e)) | ||
55 | #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e)) | ||
56 | #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e)) | ||
57 | #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e)) | ||
58 | #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) | ||
59 | #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) | ||
60 | #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) | ||
61 | #define ASTERN(sop, pos) EMIT(sop, HERE()-pos) | ||
62 | #define HERE() (p->slen) | ||
63 | #define THERE() (p->slen - 1) | ||
64 | #define THERETHERE() (p->slen - 2) | ||
65 | #define DROP(n) (p->slen -= (n)) | ||
66 | |||
67 | #ifndef NDEBUG | ||
68 | static int never = 0; /* for use in asserts; shuts lint up */ | ||
69 | #else | ||
70 | #define never 0 /* some <assert.h>s have bugs too */ | ||
71 | #endif | ||
72 | |||
73 | /* | ||
74 | - regcomp - interface for parser and compilation | ||
75 | = extern int regcomp(regex_t *, const char *, int); | ||
76 | = #define REG_BASIC 0000 | ||
77 | = #define REG_EXTENDED 0001 | ||
78 | = #define REG_ICASE 0002 | ||
79 | = #define REG_NOSUB 0004 | ||
80 | = #define REG_NEWLINE 0010 | ||
81 | = #define REG_NOSPEC 0020 | ||
82 | = #define REG_PEND 0040 | ||
83 | = #define REG_DUMP 0200 | ||
84 | */ | ||
85 | EAPI int /* 0 success, otherwise REG_something */ | ||
86 | regcomp(preg, pattern, cflags) | ||
87 | regex_t *preg; | ||
88 | const char *pattern; | ||
89 | int cflags; | ||
90 | { | ||
91 | struct parse pa; | ||
92 | register struct re_guts *g; | ||
93 | register struct parse *p = &pa; | ||
94 | register int i; | ||
95 | register size_t len; | ||
96 | #ifdef REDEBUG | ||
97 | # define GOODFLAGS(f) (f) | ||
98 | #else | ||
99 | # define GOODFLAGS(f) ((f)&~REG_DUMP) | ||
100 | #endif | ||
101 | |||
102 | cflags = GOODFLAGS(cflags); | ||
103 | if ((cflags®_EXTENDED) && (cflags®_NOSPEC)) | ||
104 | return(REG_INVARG); | ||
105 | |||
106 | if (cflags®_PEND) { | ||
107 | if (preg->re_endp < pattern) | ||
108 | return(REG_INVARG); | ||
109 | len = preg->re_endp - pattern; | ||
110 | } else | ||
111 | len = strlen((char *)pattern); | ||
112 | |||
113 | /* do the mallocs early so failure handling is easy */ | ||
114 | g = (struct re_guts *)malloc(sizeof(struct re_guts) + | ||
115 | (NC-1)*sizeof(cat_t)); | ||
116 | if (g == NULL) | ||
117 | return(REG_ESPACE); | ||
118 | p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */ | ||
119 | p->strip = (sop *)malloc(p->ssize * sizeof(sop)); | ||
120 | p->slen = 0; | ||
121 | if (p->strip == NULL) { | ||
122 | free((char *)g); | ||
123 | return(REG_ESPACE); | ||
124 | } | ||
125 | |||
126 | /* set things up */ | ||
127 | p->g = g; | ||
128 | p->next = (char *)pattern; /* convenience; we do not modify it */ | ||
129 | p->end = p->next + len; | ||
130 | p->error = 0; | ||
131 | p->ncsalloc = 0; | ||
132 | for (i = 0; i < NPAREN; i++) { | ||
133 | p->pbegin[i] = 0; | ||
134 | p->pend[i] = 0; | ||
135 | } | ||
136 | g->csetsize = NC; | ||
137 | g->sets = NULL; | ||
138 | g->setbits = NULL; | ||
139 | g->ncsets = 0; | ||
140 | g->cflags = cflags; | ||
141 | g->iflags = 0; | ||
142 | g->nbol = 0; | ||
143 | g->neol = 0; | ||
144 | g->must = NULL; | ||
145 | g->mlen = 0; | ||
146 | g->nsub = 0; | ||
147 | g->ncategories = 1; /* category 0 is "everything else" */ | ||
148 | g->categories = &g->catspace[-(CHAR_MIN)]; | ||
149 | (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t)); | ||
150 | g->backrefs = 0; | ||
151 | |||
152 | /* do it */ | ||
153 | EMIT(OEND, 0); | ||
154 | g->firststate = THERE(); | ||
155 | if (cflags®_EXTENDED) | ||
156 | p_ere(p, OUT); | ||
157 | else if (cflags®_NOSPEC) | ||
158 | p_str(p); | ||
159 | else | ||
160 | p_bre(p, OUT, OUT); | ||
161 | EMIT(OEND, 0); | ||
162 | g->laststate = THERE(); | ||
163 | |||
164 | /* tidy up loose ends and fill things in */ | ||
165 | categorize(p, g); | ||
166 | stripsnug(p, g); | ||
167 | findmust(p, g); | ||
168 | g->nplus = pluscount(p, g); | ||
169 | g->magic = MAGIC2; | ||
170 | preg->re_nsub = g->nsub; | ||
171 | preg->re_g = g; | ||
172 | preg->re_magic = MAGIC1; | ||
173 | #ifndef REDEBUG | ||
174 | /* not debugging, so can't rely on the assert() in regexec() */ | ||
175 | if (g->iflags&BAD) | ||
176 | SETERROR(REG_ASSERT); | ||
177 | #endif | ||
178 | |||
179 | /* win or lose, we're done */ | ||
180 | if (p->error != 0) /* lose */ | ||
181 | regfree(preg); | ||
182 | return(p->error); | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | - p_ere - ERE parser top level, concatenation and alternation | ||
187 | == static void p_ere(register struct parse *p, int stop); | ||
188 | */ | ||
189 | static void | ||
190 | p_ere(p, stop) | ||
191 | register struct parse *p; | ||
192 | int stop; /* character this ERE should end at */ | ||
193 | { | ||
194 | register char c; | ||
195 | register sopno prevback; | ||
196 | register sopno prevfwd; | ||
197 | register sopno conc; | ||
198 | register int first = 1; /* is this the first alternative? */ | ||
199 | |||
200 | for (;;) { | ||
201 | /* do a bunch of concatenated expressions */ | ||
202 | conc = HERE(); | ||
203 | while (MORE() && (c = PEEK()) != '|' && c != stop) | ||
204 | p_ere_exp(p); | ||
205 | REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ | ||
206 | |||
207 | if (!EAT('|')) | ||
208 | break; /* NOTE BREAK OUT */ | ||
209 | |||
210 | if (first) { | ||
211 | INSERT(OCH_, conc); /* offset is wrong */ | ||
212 | prevfwd = conc; | ||
213 | prevback = conc; | ||
214 | first = 0; | ||
215 | } | ||
216 | ASTERN(OOR1, prevback); | ||
217 | prevback = THERE(); | ||
218 | AHEAD(prevfwd); /* fix previous offset */ | ||
219 | prevfwd = HERE(); | ||
220 | EMIT(OOR2, 0); /* offset is very wrong */ | ||
221 | } | ||
222 | |||
223 | if (!first) { /* tail-end fixups */ | ||
224 | AHEAD(prevfwd); | ||
225 | ASTERN(O_CH, prevback); | ||
226 | } | ||
227 | |||
228 | assert(!MORE() || SEE(stop)); | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op | ||
233 | == static void p_ere_exp(register struct parse *p); | ||
234 | */ | ||
235 | static void | ||
236 | p_ere_exp(p) | ||
237 | register struct parse *p; | ||
238 | { | ||
239 | register char c; | ||
240 | register sopno pos; | ||
241 | register int count; | ||
242 | register int count2; | ||
243 | register sopno subno; | ||
244 | int wascaret = 0; | ||
245 | |||
246 | assert(MORE()); /* caller should have ensured this */ | ||
247 | c = GETNEXT(); | ||
248 | |||
249 | pos = HERE(); | ||
250 | switch (c) { | ||
251 | case '(': | ||
252 | REQUIRE(MORE(), REG_EPAREN); | ||
253 | p->g->nsub++; | ||
254 | subno = p->g->nsub; | ||
255 | if (subno < NPAREN) | ||
256 | p->pbegin[subno] = HERE(); | ||
257 | EMIT(OLPAREN, subno); | ||
258 | if (!SEE(')')) | ||
259 | p_ere(p, ')'); | ||
260 | if (subno < NPAREN) { | ||
261 | p->pend[subno] = HERE(); | ||
262 | assert(p->pend[subno] != 0); | ||
263 | } | ||
264 | EMIT(ORPAREN, subno); | ||
265 | MUSTEAT(')', REG_EPAREN); | ||
266 | break; | ||
267 | #ifndef POSIX_MISTAKE | ||
268 | case ')': /* happens only if no current unmatched ( */ | ||
269 | /* | ||
270 | * You may ask, why the ifndef? Because I didn't notice | ||
271 | * this until slightly too late for 1003.2, and none of the | ||
272 | * other 1003.2 regular-expression reviewers noticed it at | ||
273 | * all. So an unmatched ) is legal POSIX, at least until | ||
274 | * we can get it fixed. | ||
275 | */ | ||
276 | SETERROR(REG_EPAREN); | ||
277 | break; | ||
278 | #endif | ||
279 | case '^': | ||
280 | EMIT(OBOL, 0); | ||
281 | p->g->iflags |= USEBOL; | ||
282 | p->g->nbol++; | ||
283 | wascaret = 1; | ||
284 | break; | ||
285 | case '$': | ||
286 | EMIT(OEOL, 0); | ||
287 | p->g->iflags |= USEEOL; | ||
288 | p->g->neol++; | ||
289 | break; | ||
290 | case '|': | ||
291 | SETERROR(REG_EMPTY); | ||
292 | break; | ||
293 | case '*': | ||
294 | case '+': | ||
295 | case '?': | ||
296 | SETERROR(REG_BADRPT); | ||
297 | break; | ||
298 | case '.': | ||
299 | if (p->g->cflags®_NEWLINE) | ||
300 | nonnewline(p); | ||
301 | else | ||
302 | EMIT(OANY, 0); | ||
303 | break; | ||
304 | case '[': | ||
305 | p_bracket(p); | ||
306 | break; | ||
307 | case '\\': | ||
308 | REQUIRE(MORE(), REG_EESCAPE); | ||
309 | c = GETNEXT(); | ||
310 | ordinary(p, c); | ||
311 | break; | ||
312 | case '{': /* okay as ordinary except if digit follows */ | ||
313 | REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); | ||
314 | /* FALLTHROUGH */ | ||
315 | default: | ||
316 | ordinary(p, c); | ||
317 | break; | ||
318 | } | ||
319 | |||
320 | if (!MORE()) | ||
321 | return; | ||
322 | c = PEEK(); | ||
323 | /* we call { a repetition if followed by a digit */ | ||
324 | if (!( c == '*' || c == '+' || c == '?' || | ||
325 | (c == '{' && MORE2() && isdigit(PEEK2())) )) | ||
326 | return; /* no repetition, we're done */ | ||
327 | NEXT(); | ||
328 | |||
329 | REQUIRE(!wascaret, REG_BADRPT); | ||
330 | switch (c) { | ||
331 | case '*': /* implemented as +? */ | ||
332 | /* this case does not require the (y|) trick, noKLUDGE */ | ||
333 | INSERT(OPLUS_, pos); | ||
334 | ASTERN(O_PLUS, pos); | ||
335 | INSERT(OQUEST_, pos); | ||
336 | ASTERN(O_QUEST, pos); | ||
337 | break; | ||
338 | case '+': | ||
339 | INSERT(OPLUS_, pos); | ||
340 | ASTERN(O_PLUS, pos); | ||
341 | break; | ||
342 | case '?': | ||
343 | /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | ||
344 | INSERT(OCH_, pos); /* offset slightly wrong */ | ||
345 | ASTERN(OOR1, pos); /* this one's right */ | ||
346 | AHEAD(pos); /* fix the OCH_ */ | ||
347 | EMIT(OOR2, 0); /* offset very wrong... */ | ||
348 | AHEAD(THERE()); /* ...so fix it */ | ||
349 | ASTERN(O_CH, THERETHERE()); | ||
350 | break; | ||
351 | case '{': | ||
352 | count = p_count(p); | ||
353 | if (EAT(',')) { | ||
354 | if (isdigit(PEEK())) { | ||
355 | count2 = p_count(p); | ||
356 | REQUIRE(count <= count2, REG_BADBR); | ||
357 | } else /* single number with comma */ | ||
358 | count2 = INFINITY; | ||
359 | } else /* just a single number */ | ||
360 | count2 = count; | ||
361 | repeat(p, pos, count, count2); | ||
362 | if (!EAT('}')) { /* error heuristics */ | ||
363 | while (MORE() && PEEK() != '}') | ||
364 | NEXT(); | ||
365 | REQUIRE(MORE(), REG_EBRACE); | ||
366 | SETERROR(REG_BADBR); | ||
367 | } | ||
368 | break; | ||
369 | } | ||
370 | |||
371 | if (!MORE()) | ||
372 | return; | ||
373 | c = PEEK(); | ||
374 | if (!( c == '*' || c == '+' || c == '?' || | ||
375 | (c == '{' && MORE2() && isdigit(PEEK2())) ) ) | ||
376 | return; | ||
377 | SETERROR(REG_BADRPT); | ||
378 | } | ||
379 | |||
380 | /* | ||
381 | - p_str - string (no metacharacters) "parser" | ||
382 | == static void p_str(register struct parse *p); | ||
383 | */ | ||
384 | static void | ||
385 | p_str(p) | ||
386 | register struct parse *p; | ||
387 | { | ||
388 | REQUIRE(MORE(), REG_EMPTY); | ||
389 | while (MORE()) | ||
390 | ordinary(p, GETNEXT()); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | - p_bre - BRE parser top level, anchoring and concatenation | ||
395 | == static void p_bre(register struct parse *p, register int end1, \ | ||
396 | == register int end2); | ||
397 | * Giving end1 as OUT essentially eliminates the end1/end2 check. | ||
398 | * | ||
399 | * This implementation is a bit of a kludge, in that a trailing $ is first | ||
400 | * taken as an ordinary character and then revised to be an anchor. The | ||
401 | * only undesirable side effect is that '$' gets included as a character | ||
402 | * category in such cases. This is fairly harmless; not worth fixing. | ||
403 | * The amount of lookahead needed to avoid this kludge is excessive. | ||
404 | */ | ||
405 | static void | ||
406 | p_bre(p, end1, end2) | ||
407 | register struct parse *p; | ||
408 | register int end1; /* first terminating character */ | ||
409 | register int end2; /* second terminating character */ | ||
410 | { | ||
411 | register sopno start = HERE(); | ||
412 | register int first = 1; /* first subexpression? */ | ||
413 | register int wasdollar = 0; | ||
414 | |||
415 | if (EAT('^')) { | ||
416 | EMIT(OBOL, 0); | ||
417 | p->g->iflags |= USEBOL; | ||
418 | p->g->nbol++; | ||
419 | } | ||
420 | while (MORE() && !SEETWO(end1, end2)) { | ||
421 | wasdollar = p_simp_re(p, first); | ||
422 | first = 0; | ||
423 | } | ||
424 | if (wasdollar) { /* oops, that was a trailing anchor */ | ||
425 | DROP(1); | ||
426 | EMIT(OEOL, 0); | ||
427 | p->g->iflags |= USEEOL; | ||
428 | p->g->neol++; | ||
429 | } | ||
430 | |||
431 | REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ | ||
432 | } | ||
433 | |||
434 | /* | ||
435 | - p_simp_re - parse a simple RE, an atom possibly followed by a repetition | ||
436 | == static int p_simp_re(register struct parse *p, int starordinary); | ||
437 | */ | ||
438 | static int /* was the simple RE an unbackslashed $? */ | ||
439 | p_simp_re(p, starordinary) | ||
440 | register struct parse *p; | ||
441 | int starordinary; /* is a leading * an ordinary character? */ | ||
442 | { | ||
443 | register int c; | ||
444 | register int count; | ||
445 | register int count2; | ||
446 | register sopno pos; | ||
447 | register int i; | ||
448 | register sopno subno; | ||
449 | # define BACKSL (1<<CHAR_BIT) | ||
450 | |||
451 | pos = HERE(); /* repetion op, if any, covers from here */ | ||
452 | |||
453 | assert(MORE()); /* caller should have ensured this */ | ||
454 | c = GETNEXT(); | ||
455 | if (c == '\\') { | ||
456 | REQUIRE(MORE(), REG_EESCAPE); | ||
457 | c = BACKSL | (unsigned char)GETNEXT(); | ||
458 | } | ||
459 | switch (c) { | ||
460 | case '.': | ||
461 | if (p->g->cflags®_NEWLINE) | ||
462 | nonnewline(p); | ||
463 | else | ||
464 | EMIT(OANY, 0); | ||
465 | break; | ||
466 | case '[': | ||
467 | p_bracket(p); | ||
468 | break; | ||
469 | case BACKSL|'{': | ||
470 | SETERROR(REG_BADRPT); | ||
471 | break; | ||
472 | case BACKSL|'(': | ||
473 | p->g->nsub++; | ||
474 | subno = p->g->nsub; | ||
475 | if (subno < NPAREN) | ||
476 | p->pbegin[subno] = HERE(); | ||
477 | EMIT(OLPAREN, subno); | ||
478 | /* the MORE here is an error heuristic */ | ||
479 | if (MORE() && !SEETWO('\\', ')')) | ||
480 | p_bre(p, '\\', ')'); | ||
481 | if (subno < NPAREN) { | ||
482 | p->pend[subno] = HERE(); | ||
483 | assert(p->pend[subno] != 0); | ||
484 | } | ||
485 | EMIT(ORPAREN, subno); | ||
486 | REQUIRE(EATTWO('\\', ')'), REG_EPAREN); | ||
487 | break; | ||
488 | case BACKSL|')': /* should not get here -- must be user */ | ||
489 | case BACKSL|'}': | ||
490 | SETERROR(REG_EPAREN); | ||
491 | break; | ||
492 | case BACKSL|'1': | ||
493 | case BACKSL|'2': | ||
494 | case BACKSL|'3': | ||
495 | case BACKSL|'4': | ||
496 | case BACKSL|'5': | ||
497 | case BACKSL|'6': | ||
498 | case BACKSL|'7': | ||
499 | case BACKSL|'8': | ||
500 | case BACKSL|'9': | ||
501 | i = (c&~BACKSL) - '0'; | ||
502 | assert(i < NPAREN); | ||
503 | if (p->pend[i] != 0) { | ||
504 | assert(i <= p->g->nsub); | ||
505 | EMIT(OBACK_, i); | ||
506 | assert(p->pbegin[i] != 0); | ||
507 | assert(OP(p->strip[p->pbegin[i]]) == OLPAREN); | ||
508 | assert(OP(p->strip[p->pend[i]]) == ORPAREN); | ||
509 | (void) dupl(p, p->pbegin[i]+1, p->pend[i]); | ||
510 | EMIT(O_BACK, i); | ||
511 | } else | ||
512 | SETERROR(REG_ESUBREG); | ||
513 | p->g->backrefs = 1; | ||
514 | break; | ||
515 | case '*': | ||
516 | REQUIRE(starordinary, REG_BADRPT); | ||
517 | /* FALLTHROUGH */ | ||
518 | default: | ||
519 | ordinary(p, (char)c); /* takes off BACKSL, if any */ | ||
520 | break; | ||
521 | } | ||
522 | |||
523 | if (EAT('*')) { /* implemented as +? */ | ||
524 | /* this case does not require the (y|) trick, noKLUDGE */ | ||
525 | INSERT(OPLUS_, pos); | ||
526 | ASTERN(O_PLUS, pos); | ||
527 | INSERT(OQUEST_, pos); | ||
528 | ASTERN(O_QUEST, pos); | ||
529 | } else if (EATTWO('\\', '{')) { | ||
530 | count = p_count(p); | ||
531 | if (EAT(',')) { | ||
532 | if (MORE() && isdigit(PEEK())) { | ||
533 | count2 = p_count(p); | ||
534 | REQUIRE(count <= count2, REG_BADBR); | ||
535 | } else /* single number with comma */ | ||
536 | count2 = INFINITY; | ||
537 | } else /* just a single number */ | ||
538 | count2 = count; | ||
539 | repeat(p, pos, count, count2); | ||
540 | if (!EATTWO('\\', '}')) { /* error heuristics */ | ||
541 | while (MORE() && !SEETWO('\\', '}')) | ||
542 | NEXT(); | ||
543 | REQUIRE(MORE(), REG_EBRACE); | ||
544 | SETERROR(REG_BADBR); | ||
545 | } | ||
546 | } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ | ||
547 | return(1); | ||
548 | |||
549 | return(0); | ||
550 | } | ||
551 | |||
552 | /* | ||
553 | - p_count - parse a repetition count | ||
554 | == static int p_count(register struct parse *p); | ||
555 | */ | ||
556 | static int /* the value */ | ||
557 | p_count(p) | ||
558 | register struct parse *p; | ||
559 | { | ||
560 | register int count = 0; | ||
561 | register int ndigits = 0; | ||
562 | |||
563 | while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { | ||
564 | count = count*10 + (GETNEXT() - '0'); | ||
565 | ndigits++; | ||
566 | } | ||
567 | |||
568 | REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); | ||
569 | return(count); | ||
570 | } | ||
571 | |||
572 | /* | ||
573 | - p_bracket - parse a bracketed character list | ||
574 | == static void p_bracket(register struct parse *p); | ||
575 | * | ||
576 | * Note a significant property of this code: if the allocset() did SETERROR, | ||
577 | * no set operations are done. | ||
578 | */ | ||
579 | static void | ||
580 | p_bracket(p) | ||
581 | register struct parse *p; | ||
582 | { | ||
583 | register cset *cs = allocset(p); | ||
584 | register int invert = 0; | ||
585 | |||
586 | /* Dept of Truly Sickening Special-Case Kludges */ | ||
587 | if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { | ||
588 | EMIT(OBOW, 0); | ||
589 | NEXTn(6); | ||
590 | return; | ||
591 | } | ||
592 | if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { | ||
593 | EMIT(OEOW, 0); | ||
594 | NEXTn(6); | ||
595 | return; | ||
596 | } | ||
597 | |||
598 | if (EAT('^')) | ||
599 | invert++; /* make note to invert set at end */ | ||
600 | if (EAT(']')) | ||
601 | CHadd(cs, ']'); | ||
602 | else if (EAT('-')) | ||
603 | CHadd(cs, '-'); | ||
604 | while (MORE() && PEEK() != ']' && !SEETWO('-', ']')) | ||
605 | p_b_term(p, cs); | ||
606 | if (EAT('-')) | ||
607 | CHadd(cs, '-'); | ||
608 | MUSTEAT(']', REG_EBRACK); | ||
609 | |||
610 | if (p->error != 0) /* don't mess things up further */ | ||
611 | return; | ||
612 | |||
613 | if (p->g->cflags®_ICASE) { | ||
614 | register int i; | ||
615 | register int ci; | ||
616 | |||
617 | for (i = p->g->csetsize - 1; i >= 0; i--) | ||
618 | if (CHIN(cs, i) && isalpha(i)) { | ||
619 | ci = othercase(i); | ||
620 | if (ci != i) | ||
621 | CHadd(cs, ci); | ||
622 | } | ||
623 | if (cs->multis != NULL) | ||
624 | mccase(p, cs); | ||
625 | } | ||
626 | if (invert) { | ||
627 | register int i; | ||
628 | |||
629 | for (i = p->g->csetsize - 1; i >= 0; i--) | ||
630 | if (CHIN(cs, i)) | ||
631 | CHsub(cs, i); | ||
632 | else | ||
633 | CHadd(cs, i); | ||
634 | if (p->g->cflags®_NEWLINE) | ||
635 | CHsub(cs, '\n'); | ||
636 | if (cs->multis != NULL) | ||
637 | mcinvert(p, cs); | ||
638 | } | ||
639 | |||
640 | assert(cs->multis == NULL); /* xxx */ | ||
641 | |||
642 | if (nch(p, cs) == 1) { /* optimize singleton sets */ | ||
643 | ordinary(p, firstch(p, cs)); | ||
644 | freeset(p, cs); | ||
645 | } else | ||
646 | EMIT(OANYOF, freezeset(p, cs)); | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | - p_b_term - parse one term of a bracketed character list | ||
651 | == static void p_b_term(register struct parse *p, register cset *cs); | ||
652 | */ | ||
653 | static void | ||
654 | p_b_term(p, cs) | ||
655 | register struct parse *p; | ||
656 | register cset *cs; | ||
657 | { | ||
658 | register char c; | ||
659 | register char start, finish; | ||
660 | register int i; | ||
661 | |||
662 | /* classify what we've got */ | ||
663 | switch ((MORE()) ? PEEK() : '\0') { | ||
664 | case '[': | ||
665 | c = (MORE2()) ? PEEK2() : '\0'; | ||
666 | break; | ||
667 | case '-': | ||
668 | SETERROR(REG_ERANGE); | ||
669 | return; /* NOTE RETURN */ | ||
670 | break; | ||
671 | default: | ||
672 | c = '\0'; | ||
673 | break; | ||
674 | } | ||
675 | |||
676 | switch (c) { | ||
677 | case ':': /* character class */ | ||
678 | NEXT2(); | ||
679 | REQUIRE(MORE(), REG_EBRACK); | ||
680 | c = PEEK(); | ||
681 | REQUIRE(c != '-' && c != ']', REG_ECTYPE); | ||
682 | p_b_cclass(p, cs); | ||
683 | REQUIRE(MORE(), REG_EBRACK); | ||
684 | REQUIRE(EATTWO(':', ']'), REG_ECTYPE); | ||
685 | break; | ||
686 | case '=': /* equivalence class */ | ||
687 | NEXT2(); | ||
688 | REQUIRE(MORE(), REG_EBRACK); | ||
689 | c = PEEK(); | ||
690 | REQUIRE(c != '-' && c != ']', REG_ECOLLATE); | ||
691 | p_b_eclass(p, cs); | ||
692 | REQUIRE(MORE(), REG_EBRACK); | ||
693 | REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); | ||
694 | break; | ||
695 | default: /* symbol, ordinary character, or range */ | ||
696 | /* xxx revision needed for multichar stuff */ | ||
697 | start = p_b_symbol(p); | ||
698 | if (SEE('-') && MORE2() && PEEK2() != ']') { | ||
699 | /* range */ | ||
700 | NEXT(); | ||
701 | if (EAT('-')) | ||
702 | finish = '-'; | ||
703 | else | ||
704 | finish = p_b_symbol(p); | ||
705 | } else | ||
706 | finish = start; | ||
707 | /* xxx what about signed chars here... */ | ||
708 | REQUIRE(start <= finish, REG_ERANGE); | ||
709 | for (i = start; i <= finish; i++) | ||
710 | CHadd(cs, i); | ||
711 | break; | ||
712 | } | ||
713 | } | ||
714 | |||
715 | /* | ||
716 | - p_b_cclass - parse a character-class name and deal with it | ||
717 | == static void p_b_cclass(register struct parse *p, register cset *cs); | ||
718 | */ | ||
719 | static void | ||
720 | p_b_cclass(p, cs) | ||
721 | register struct parse *p; | ||
722 | register cset *cs; | ||
723 | { | ||
724 | register char *sp = p->next; | ||
725 | register struct cclass *cp; | ||
726 | register size_t len; | ||
727 | register char *u; | ||
728 | register char c; | ||
729 | |||
730 | while (MORE() && isalpha(PEEK())) | ||
731 | NEXT(); | ||
732 | len = p->next - sp; | ||
733 | for (cp = cclasses; cp->name != NULL; cp++) | ||
734 | if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') | ||
735 | break; | ||
736 | if (cp->name == NULL) { | ||
737 | /* oops, didn't find it */ | ||
738 | SETERROR(REG_ECTYPE); | ||
739 | return; | ||
740 | } | ||
741 | |||
742 | u = cp->chars; | ||
743 | while ((c = *u++) != '\0') | ||
744 | CHadd(cs, c); | ||
745 | for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) | ||
746 | MCadd(p, cs, u); | ||
747 | } | ||
748 | |||
749 | /* | ||
750 | - p_b_eclass - parse an equivalence-class name and deal with it | ||
751 | == static void p_b_eclass(register struct parse *p, register cset *cs); | ||
752 | * | ||
753 | * This implementation is incomplete. xxx | ||
754 | */ | ||
755 | static void | ||
756 | p_b_eclass(p, cs) | ||
757 | register struct parse *p; | ||
758 | register cset *cs; | ||
759 | { | ||
760 | register char c; | ||
761 | |||
762 | c = p_b_coll_elem(p, '='); | ||
763 | CHadd(cs, c); | ||
764 | } | ||
765 | |||
766 | /* | ||
767 | - p_b_symbol - parse a character or [..]ed multicharacter collating symbol | ||
768 | == static char p_b_symbol(register struct parse *p); | ||
769 | */ | ||
770 | static char /* value of symbol */ | ||
771 | p_b_symbol(p) | ||
772 | register struct parse *p; | ||
773 | { | ||
774 | register char value; | ||
775 | |||
776 | REQUIRE(MORE(), REG_EBRACK); | ||
777 | if (!EATTWO('[', '.')) | ||
778 | return(GETNEXT()); | ||
779 | |||
780 | /* collating symbol */ | ||
781 | value = p_b_coll_elem(p, '.'); | ||
782 | REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); | ||
783 | return(value); | ||
784 | } | ||
785 | |||
786 | /* | ||
787 | - p_b_coll_elem - parse a collating-element name and look it up | ||
788 | == static char p_b_coll_elem(register struct parse *p, int endc); | ||
789 | */ | ||
790 | static char /* value of collating element */ | ||
791 | p_b_coll_elem(p, endc) | ||
792 | register struct parse *p; | ||
793 | int endc; /* name ended by endc,']' */ | ||
794 | { | ||
795 | register char *sp = p->next; | ||
796 | register struct cname *cp; | ||
797 | register int len; | ||
798 | |||
799 | while (MORE() && !SEETWO(endc, ']')) | ||
800 | NEXT(); | ||
801 | if (!MORE()) { | ||
802 | SETERROR(REG_EBRACK); | ||
803 | return(0); | ||
804 | } | ||
805 | len = p->next - sp; | ||
806 | for (cp = cnames; cp->name != NULL; cp++) | ||
807 | if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0') | ||
808 | return(cp->code); /* known name */ | ||
809 | if (len == 1) | ||
810 | return(*sp); /* single character */ | ||
811 | SETERROR(REG_ECOLLATE); /* neither */ | ||
812 | return(0); | ||
813 | } | ||
814 | |||
815 | /* | ||
816 | - othercase - return the case counterpart of an alphabetic | ||
817 | == static char othercase(int ch); | ||
818 | */ | ||
819 | static char /* if no counterpart, return ch */ | ||
820 | othercase(ch) | ||
821 | int ch; | ||
822 | { | ||
823 | assert(isalpha(ch)); | ||
824 | if (isupper(ch)) | ||
825 | return(tolower(ch)); | ||
826 | else if (islower(ch)) | ||
827 | return(toupper(ch)); | ||
828 | else /* peculiar, but could happen */ | ||
829 | return(ch); | ||
830 | } | ||
831 | |||
832 | /* | ||
833 | - bothcases - emit a dualcase version of a two-case character | ||
834 | == static void bothcases(register struct parse *p, int ch); | ||
835 | * | ||
836 | * Boy, is this implementation ever a kludge... | ||
837 | */ | ||
838 | static void | ||
839 | bothcases(p, ch) | ||
840 | register struct parse *p; | ||
841 | int ch; | ||
842 | { | ||
843 | register char *oldnext = p->next; | ||
844 | register char *oldend = p->end; | ||
845 | char bracket[3]; | ||
846 | |||
847 | assert(othercase(ch) != ch); /* p_bracket() would recurse */ | ||
848 | p->next = bracket; | ||
849 | p->end = bracket+2; | ||
850 | bracket[0] = ch; | ||
851 | bracket[1] = ']'; | ||
852 | bracket[2] = '\0'; | ||
853 | p_bracket(p); | ||
854 | assert(p->next == bracket+2); | ||
855 | p->next = oldnext; | ||
856 | p->end = oldend; | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | - ordinary - emit an ordinary character | ||
861 | == static void ordinary(register struct parse *p, register int ch); | ||
862 | */ | ||
863 | static void | ||
864 | ordinary(p, ch) | ||
865 | register struct parse *p; | ||
866 | register int ch; | ||
867 | { | ||
868 | register cat_t *cap = p->g->categories; | ||
869 | |||
870 | if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) | ||
871 | bothcases(p, ch); | ||
872 | else { | ||
873 | EMIT(OCHAR, (unsigned char)ch); | ||
874 | if (cap[ch] == 0) | ||
875 | cap[ch] = p->g->ncategories++; | ||
876 | } | ||
877 | } | ||
878 | |||
879 | /* | ||
880 | - nonnewline - emit REG_NEWLINE version of OANY | ||
881 | == static void nonnewline(register struct parse *p); | ||
882 | * | ||
883 | * Boy, is this implementation ever a kludge... | ||
884 | */ | ||
885 | static void | ||
886 | nonnewline(p) | ||
887 | register struct parse *p; | ||
888 | { | ||
889 | register char *oldnext = p->next; | ||
890 | register char *oldend = p->end; | ||
891 | char bracket[4]; | ||
892 | |||
893 | p->next = bracket; | ||
894 | p->end = bracket+3; | ||
895 | bracket[0] = '^'; | ||
896 | bracket[1] = '\n'; | ||
897 | bracket[2] = ']'; | ||
898 | bracket[3] = '\0'; | ||
899 | p_bracket(p); | ||
900 | assert(p->next == bracket+3); | ||
901 | p->next = oldnext; | ||
902 | p->end = oldend; | ||
903 | } | ||
904 | |||
905 | /* | ||
906 | - repeat - generate code for a bounded repetition, recursively if needed | ||
907 | == static void repeat(register struct parse *p, sopno start, int from, int to); | ||
908 | */ | ||
909 | static void | ||
910 | repeat(p, start, from, to) | ||
911 | register struct parse *p; | ||
912 | sopno start; /* operand from here to end of strip */ | ||
913 | int from; /* repeated from this number */ | ||
914 | int to; /* to this number of times (maybe INFINITY) */ | ||
915 | { | ||
916 | register sopno finish = HERE(); | ||
917 | # define N 2 | ||
918 | # define INF 3 | ||
919 | # define REP(f, t) ((f)*8 + (t)) | ||
920 | # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N) | ||
921 | register sopno copy; | ||
922 | |||
923 | if (p->error != 0) /* head off possible runaway recursion */ | ||
924 | return; | ||
925 | |||
926 | assert(from <= to); | ||
927 | |||
928 | switch (REP(MAP(from), MAP(to))) { | ||
929 | case REP(0, 0): /* must be user doing this */ | ||
930 | DROP(finish-start); /* drop the operand */ | ||
931 | break; | ||
932 | case REP(0, 1): /* as x{1,1}? */ | ||
933 | case REP(0, N): /* as x{1,n}? */ | ||
934 | case REP(0, INF): /* as x{1,}? */ | ||
935 | /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | ||
936 | INSERT(OCH_, start); /* offset is wrong... */ | ||
937 | repeat(p, start+1, 1, to); | ||
938 | ASTERN(OOR1, start); | ||
939 | AHEAD(start); /* ... fix it */ | ||
940 | EMIT(OOR2, 0); | ||
941 | AHEAD(THERE()); | ||
942 | ASTERN(O_CH, THERETHERE()); | ||
943 | break; | ||
944 | case REP(1, 1): /* trivial case */ | ||
945 | /* done */ | ||
946 | break; | ||
947 | case REP(1, N): /* as x?x{1,n-1} */ | ||
948 | /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */ | ||
949 | INSERT(OCH_, start); | ||
950 | ASTERN(OOR1, start); | ||
951 | AHEAD(start); | ||
952 | EMIT(OOR2, 0); /* offset very wrong... */ | ||
953 | AHEAD(THERE()); /* ...so fix it */ | ||
954 | ASTERN(O_CH, THERETHERE()); | ||
955 | copy = dupl(p, start+1, finish+1); | ||
956 | assert(copy == finish+4); | ||
957 | repeat(p, copy, 1, to-1); | ||
958 | break; | ||
959 | case REP(1, INF): /* as x+ */ | ||
960 | INSERT(OPLUS_, start); | ||
961 | ASTERN(O_PLUS, start); | ||
962 | break; | ||
963 | case REP(N, N): /* as xx{m-1,n-1} */ | ||
964 | copy = dupl(p, start, finish); | ||
965 | repeat(p, copy, from-1, to-1); | ||
966 | break; | ||
967 | case REP(N, INF): /* as xx{n-1,INF} */ | ||
968 | copy = dupl(p, start, finish); | ||
969 | repeat(p, copy, from-1, to); | ||
970 | break; | ||
971 | default: /* "can't happen" */ | ||
972 | SETERROR(REG_ASSERT); /* just in case */ | ||
973 | break; | ||
974 | } | ||
975 | } | ||
976 | |||
977 | /* | ||
978 | - seterr - set an error condition | ||
979 | == static int seterr(register struct parse *p, int e); | ||
980 | */ | ||
981 | static int /* useless but makes type checking happy */ | ||
982 | seterr(p, e) | ||
983 | register struct parse *p; | ||
984 | int e; | ||
985 | { | ||
986 | if (p->error == 0) /* keep earliest error condition */ | ||
987 | p->error = e; | ||
988 | p->next = nuls; /* try to bring things to a halt */ | ||
989 | p->end = nuls; | ||
990 | return(0); /* make the return value well-defined */ | ||
991 | } | ||
992 | |||
993 | /* | ||
994 | - allocset - allocate a set of characters for [] | ||
995 | == static cset *allocset(register struct parse *p); | ||
996 | */ | ||
997 | static cset * | ||
998 | allocset(p) | ||
999 | register struct parse *p; | ||
1000 | { | ||
1001 | register int no = p->g->ncsets++; | ||
1002 | register size_t nc; | ||
1003 | register size_t nbytes; | ||
1004 | register cset *cs; | ||
1005 | register size_t css = (size_t)p->g->csetsize; | ||
1006 | register int i; | ||
1007 | |||
1008 | if (no >= p->ncsalloc) { /* need another column of space */ | ||
1009 | p->ncsalloc += CHAR_BIT; | ||
1010 | nc = p->ncsalloc; | ||
1011 | assert(nc % CHAR_BIT == 0); | ||
1012 | nbytes = nc / CHAR_BIT * css; | ||
1013 | if (p->g->sets == NULL) | ||
1014 | p->g->sets = (cset *)malloc(nc * sizeof(cset)); | ||
1015 | else | ||
1016 | p->g->sets = (cset *)realloc((char *)p->g->sets, | ||
1017 | nc * sizeof(cset)); | ||
1018 | if (p->g->setbits == NULL) | ||
1019 | p->g->setbits = (uch *)malloc(nbytes); | ||
1020 | else { | ||
1021 | p->g->setbits = (uch *)realloc((char *)p->g->setbits, | ||
1022 | nbytes); | ||
1023 | /* xxx this isn't right if setbits is now NULL */ | ||
1024 | for (i = 0; i < no; i++) | ||
1025 | p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT); | ||
1026 | } | ||
1027 | if (p->g->sets != NULL && p->g->setbits != NULL) | ||
1028 | (void) memset((char *)p->g->setbits + (nbytes - css), | ||
1029 | 0, css); | ||
1030 | else { | ||
1031 | no = 0; | ||
1032 | SETERROR(REG_ESPACE); | ||
1033 | /* caller's responsibility not to do set ops */ | ||
1034 | } | ||
1035 | } | ||
1036 | |||
1037 | assert(p->g->sets != NULL); /* xxx */ | ||
1038 | cs = &p->g->sets[no]; | ||
1039 | cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); | ||
1040 | cs->mask = 1 << ((no) % CHAR_BIT); | ||
1041 | cs->hash = 0; | ||
1042 | cs->smultis = 0; | ||
1043 | cs->multis = NULL; | ||
1044 | |||
1045 | return(cs); | ||
1046 | } | ||
1047 | |||
1048 | /* | ||
1049 | - freeset - free a now-unused set | ||
1050 | == static void freeset(register struct parse *p, register cset *cs); | ||
1051 | */ | ||
1052 | static void | ||
1053 | freeset(p, cs) | ||
1054 | register struct parse *p; | ||
1055 | register cset *cs; | ||
1056 | { | ||
1057 | register int i; | ||
1058 | register cset *top = &p->g->sets[p->g->ncsets]; | ||
1059 | register size_t css = (size_t)p->g->csetsize; | ||
1060 | |||
1061 | for (i = 0; i < css; i++) | ||
1062 | CHsub(cs, i); | ||
1063 | if (cs == top-1) /* recover only the easy case */ | ||
1064 | p->g->ncsets--; | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | - freezeset - final processing on a set of characters | ||
1069 | == static int freezeset(register struct parse *p, register cset *cs); | ||
1070 | * | ||
1071 | * The main task here is merging identical sets. This is usually a waste | ||
1072 | * of time (although the hash code minimizes the overhead), but can win | ||
1073 | * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash | ||
1074 | * is done using addition rather than xor -- all ASCII [aA] sets xor to | ||
1075 | * the same value! | ||
1076 | */ | ||
1077 | static int /* set number */ | ||
1078 | freezeset(p, cs) | ||
1079 | register struct parse *p; | ||
1080 | register cset *cs; | ||
1081 | { | ||
1082 | register uch h = cs->hash; | ||
1083 | register int i; | ||
1084 | register cset *top = &p->g->sets[p->g->ncsets]; | ||
1085 | register cset *cs2; | ||
1086 | register size_t css = (size_t)p->g->csetsize; | ||
1087 | |||
1088 | /* look for an earlier one which is the same */ | ||
1089 | for (cs2 = &p->g->sets[0]; cs2 < top; cs2++) | ||
1090 | if (cs2->hash == h && cs2 != cs) { | ||
1091 | /* maybe */ | ||
1092 | for (i = 0; i < css; i++) | ||
1093 | if (!!CHIN(cs2, i) != !!CHIN(cs, i)) | ||
1094 | break; /* no */ | ||
1095 | if (i == css) | ||
1096 | break; /* yes */ | ||
1097 | } | ||
1098 | |||
1099 | if (cs2 < top) { /* found one */ | ||
1100 | freeset(p, cs); | ||
1101 | cs = cs2; | ||
1102 | } | ||
1103 | |||
1104 | return((int)(cs - p->g->sets)); | ||
1105 | } | ||
1106 | |||
1107 | /* | ||
1108 | - firstch - return first character in a set (which must have at least one) | ||
1109 | == static int firstch(register struct parse *p, register cset *cs); | ||
1110 | */ | ||
1111 | static int /* character; there is no "none" value */ | ||
1112 | firstch(p, cs) | ||
1113 | register struct parse *p; | ||
1114 | register cset *cs; | ||
1115 | { | ||
1116 | register int i; | ||
1117 | register size_t css = (size_t)p->g->csetsize; | ||
1118 | |||
1119 | for (i = 0; i < css; i++) | ||
1120 | if (CHIN(cs, i)) | ||
1121 | return((char)i); | ||
1122 | assert(never); | ||
1123 | return(0); /* arbitrary */ | ||
1124 | } | ||
1125 | |||
1126 | /* | ||
1127 | - nch - number of characters in a set | ||
1128 | == static int nch(register struct parse *p, register cset *cs); | ||
1129 | */ | ||
1130 | static int | ||
1131 | nch(p, cs) | ||
1132 | register struct parse *p; | ||
1133 | register cset *cs; | ||
1134 | { | ||
1135 | register int i; | ||
1136 | register size_t css = (size_t)p->g->csetsize; | ||
1137 | register int n = 0; | ||
1138 | |||
1139 | for (i = 0; i < css; i++) | ||
1140 | if (CHIN(cs, i)) | ||
1141 | n++; | ||
1142 | return(n); | ||
1143 | } | ||
1144 | |||
1145 | /* | ||
1146 | - mcadd - add a collating element to a cset | ||
1147 | == static void mcadd(register struct parse *p, register cset *cs, \ | ||
1148 | == register char *cp); | ||
1149 | */ | ||
1150 | static void | ||
1151 | mcadd(p, cs, cp) | ||
1152 | register struct parse *p; | ||
1153 | register cset *cs; | ||
1154 | register char *cp; | ||
1155 | { | ||
1156 | register size_t oldend = cs->smultis; | ||
1157 | |||
1158 | cs->smultis += strlen(cp) + 1; | ||
1159 | if (cs->multis == NULL) | ||
1160 | cs->multis = malloc(cs->smultis); | ||
1161 | else | ||
1162 | cs->multis = realloc(cs->multis, cs->smultis); | ||
1163 | if (cs->multis == NULL) { | ||
1164 | SETERROR(REG_ESPACE); | ||
1165 | return; | ||
1166 | } | ||
1167 | |||
1168 | (void) strcpy(cs->multis + oldend - 1, cp); | ||
1169 | cs->multis[cs->smultis - 1] = '\0'; | ||
1170 | } | ||
1171 | |||
1172 | /* | ||
1173 | - mcsub - subtract a collating element from a cset | ||
1174 | == static void mcsub(register cset *cs, register char *cp); | ||
1175 | */ | ||
1176 | static void | ||
1177 | mcsub(cs, cp) | ||
1178 | register cset *cs; | ||
1179 | register char *cp; | ||
1180 | { | ||
1181 | register char *fp = mcfind(cs, cp); | ||
1182 | register size_t len = strlen(fp); | ||
1183 | |||
1184 | assert(fp != NULL); | ||
1185 | (void) memmove(fp, fp + len + 1, | ||
1186 | cs->smultis - (fp + len + 1 - cs->multis)); | ||
1187 | cs->smultis -= len; | ||
1188 | |||
1189 | if (cs->smultis == 0) { | ||
1190 | free(cs->multis); | ||
1191 | cs->multis = NULL; | ||
1192 | return; | ||
1193 | } | ||
1194 | |||
1195 | cs->multis = realloc(cs->multis, cs->smultis); | ||
1196 | assert(cs->multis != NULL); | ||
1197 | } | ||
1198 | |||
1199 | /* | ||
1200 | - mcin - is a collating element in a cset? | ||
1201 | == static int mcin(register cset *cs, register char *cp); | ||
1202 | */ | ||
1203 | static int | ||
1204 | mcin(cs, cp) | ||
1205 | register cset *cs; | ||
1206 | register char *cp; | ||
1207 | { | ||
1208 | return(mcfind(cs, cp) != NULL); | ||
1209 | } | ||
1210 | |||
1211 | /* | ||
1212 | - mcfind - find a collating element in a cset | ||
1213 | == static char *mcfind(register cset *cs, register char *cp); | ||
1214 | */ | ||
1215 | static char * | ||
1216 | mcfind(cs, cp) | ||
1217 | register cset *cs; | ||
1218 | register char *cp; | ||
1219 | { | ||
1220 | register char *p; | ||
1221 | |||
1222 | if (cs->multis == NULL) | ||
1223 | return(NULL); | ||
1224 | for (p = cs->multis; *p != '\0'; p += strlen(p) + 1) | ||
1225 | if (strcmp(cp, p) == 0) | ||
1226 | return(p); | ||
1227 | return(NULL); | ||
1228 | } | ||
1229 | |||
1230 | /* | ||
1231 | - mcinvert - invert the list of collating elements in a cset | ||
1232 | == static void mcinvert(register struct parse *p, register cset *cs); | ||
1233 | * | ||
1234 | * This would have to know the set of possibilities. Implementation | ||
1235 | * is deferred. | ||
1236 | */ | ||
1237 | static void | ||
1238 | mcinvert(p, cs) | ||
1239 | register struct parse *p; | ||
1240 | register cset *cs; | ||
1241 | { | ||
1242 | assert(cs->multis == NULL); /* xxx */ | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | - mccase - add case counterparts of the list of collating elements in a cset | ||
1247 | == static void mccase(register struct parse *p, register cset *cs); | ||
1248 | * | ||
1249 | * This would have to know the set of possibilities. Implementation | ||
1250 | * is deferred. | ||
1251 | */ | ||
1252 | static void | ||
1253 | mccase(p, cs) | ||
1254 | register struct parse *p; | ||
1255 | register cset *cs; | ||
1256 | { | ||
1257 | assert(cs->multis == NULL); /* xxx */ | ||
1258 | } | ||
1259 | |||
1260 | /* | ||
1261 | - isinsets - is this character in any sets? | ||
1262 | == static int isinsets(register struct re_guts *g, int c); | ||
1263 | */ | ||
1264 | static int /* predicate */ | ||
1265 | isinsets(g, c) | ||
1266 | register struct re_guts *g; | ||
1267 | int c; | ||
1268 | { | ||
1269 | register uch *col; | ||
1270 | register int i; | ||
1271 | register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | ||
1272 | register unsigned uc = (unsigned char)c; | ||
1273 | |||
1274 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | ||
1275 | if (col[uc] != 0) | ||
1276 | return(1); | ||
1277 | return(0); | ||
1278 | } | ||
1279 | |||
1280 | /* | ||
1281 | - samesets - are these two characters in exactly the same sets? | ||
1282 | == static int samesets(register struct re_guts *g, int c1, int c2); | ||
1283 | */ | ||
1284 | static int /* predicate */ | ||
1285 | samesets(g, c1, c2) | ||
1286 | register struct re_guts *g; | ||
1287 | int c1; | ||
1288 | int c2; | ||
1289 | { | ||
1290 | register uch *col; | ||
1291 | register int i; | ||
1292 | register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; | ||
1293 | register unsigned uc1 = (unsigned char)c1; | ||
1294 | register unsigned uc2 = (unsigned char)c2; | ||
1295 | |||
1296 | for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) | ||
1297 | if (col[uc1] != col[uc2]) | ||
1298 | return(0); | ||
1299 | return(1); | ||
1300 | } | ||
1301 | |||
1302 | /* | ||
1303 | - categorize - sort out character categories | ||
1304 | == static void categorize(struct parse *p, register struct re_guts *g); | ||
1305 | */ | ||
1306 | static void | ||
1307 | categorize(p, g) | ||
1308 | struct parse *p; | ||
1309 | register struct re_guts *g; | ||
1310 | { | ||
1311 | register cat_t *cats = g->categories; | ||
1312 | register int c; | ||
1313 | register int c2; | ||
1314 | register cat_t cat; | ||
1315 | |||
1316 | /* avoid making error situations worse */ | ||
1317 | if (p->error != 0) | ||
1318 | return; | ||
1319 | |||
1320 | for (c = CHAR_MIN; c <= CHAR_MAX; c++) | ||
1321 | if (cats[c] == 0 && isinsets(g, c)) { | ||
1322 | cat = g->ncategories++; | ||
1323 | cats[c] = cat; | ||
1324 | for (c2 = c+1; c2 <= CHAR_MAX; c2++) | ||
1325 | if (cats[c2] == 0 && samesets(g, c, c2)) | ||
1326 | cats[c2] = cat; | ||
1327 | } | ||
1328 | } | ||
1329 | |||
1330 | /* | ||
1331 | - dupl - emit a duplicate of a bunch of sops | ||
1332 | == static sopno dupl(register struct parse *p, sopno start, sopno finish); | ||
1333 | */ | ||
1334 | static sopno /* start of duplicate */ | ||
1335 | dupl(p, start, finish) | ||
1336 | register struct parse *p; | ||
1337 | sopno start; /* from here */ | ||
1338 | sopno finish; /* to this less one */ | ||
1339 | { | ||
1340 | register sopno ret = HERE(); | ||
1341 | register sopno len = finish - start; | ||
1342 | |||
1343 | assert(finish >= start); | ||
1344 | if (len == 0) | ||
1345 | return(ret); | ||
1346 | enlarge(p, p->ssize + len); /* this many unexpected additions */ | ||
1347 | assert(p->ssize >= p->slen + len); | ||
1348 | (void) memcpy((char *)(p->strip + p->slen), | ||
1349 | (char *)(p->strip + start), (size_t)len*sizeof(sop)); | ||
1350 | p->slen += len; | ||
1351 | return(ret); | ||
1352 | } | ||
1353 | |||
1354 | /* | ||
1355 | - doemit - emit a strip operator | ||
1356 | == static void doemit(register struct parse *p, sop op, size_t opnd); | ||
1357 | * | ||
1358 | * It might seem better to implement this as a macro with a function as | ||
1359 | * hard-case backup, but it's just too big and messy unless there are | ||
1360 | * some changes to the data structures. Maybe later. | ||
1361 | */ | ||
1362 | static void | ||
1363 | doemit(p, op, opnd) | ||
1364 | register struct parse *p; | ||
1365 | sop op; | ||
1366 | size_t opnd; | ||
1367 | { | ||
1368 | /* avoid making error situations worse */ | ||
1369 | if (p->error != 0) | ||
1370 | return; | ||
1371 | |||
1372 | /* deal with oversize operands ("can't happen", more or less) */ | ||
1373 | assert(opnd < 1<<OPSHIFT); | ||
1374 | |||
1375 | /* deal with undersized strip */ | ||
1376 | if (p->slen >= p->ssize) | ||
1377 | enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */ | ||
1378 | assert(p->slen < p->ssize); | ||
1379 | |||
1380 | /* finally, it's all reduced to the easy case */ | ||
1381 | p->strip[p->slen++] = SOP(op, opnd); | ||
1382 | } | ||
1383 | |||
1384 | /* | ||
1385 | - doinsert - insert a sop into the strip | ||
1386 | == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | ||
1387 | */ | ||
1388 | static void | ||
1389 | doinsert(p, op, opnd, pos) | ||
1390 | register struct parse *p; | ||
1391 | sop op; | ||
1392 | size_t opnd; | ||
1393 | sopno pos; | ||
1394 | { | ||
1395 | register sopno sn; | ||
1396 | register sop s; | ||
1397 | register int i; | ||
1398 | |||
1399 | /* avoid making error situations worse */ | ||
1400 | if (p->error != 0) | ||
1401 | return; | ||
1402 | |||
1403 | sn = HERE(); | ||
1404 | EMIT(op, opnd); /* do checks, ensure space */ | ||
1405 | assert(HERE() == sn+1); | ||
1406 | s = p->strip[sn]; | ||
1407 | |||
1408 | /* adjust paren pointers */ | ||
1409 | assert(pos > 0); | ||
1410 | for (i = 1; i < NPAREN; i++) { | ||
1411 | if (p->pbegin[i] >= pos) { | ||
1412 | p->pbegin[i]++; | ||
1413 | } | ||
1414 | if (p->pend[i] >= pos) { | ||
1415 | p->pend[i]++; | ||
1416 | } | ||
1417 | } | ||
1418 | |||
1419 | memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos], | ||
1420 | (HERE()-pos-1)*sizeof(sop)); | ||
1421 | p->strip[pos] = s; | ||
1422 | } | ||
1423 | |||
1424 | /* | ||
1425 | - dofwd - complete a forward reference | ||
1426 | == static void dofwd(register struct parse *p, sopno pos, sop value); | ||
1427 | */ | ||
1428 | static void | ||
1429 | dofwd(p, pos, value) | ||
1430 | register struct parse *p; | ||
1431 | register sopno pos; | ||
1432 | sop value; | ||
1433 | { | ||
1434 | /* avoid making error situations worse */ | ||
1435 | if (p->error != 0) | ||
1436 | return; | ||
1437 | |||
1438 | assert(value < 1<<OPSHIFT); | ||
1439 | p->strip[pos] = OP(p->strip[pos]) | value; | ||
1440 | } | ||
1441 | |||
1442 | /* | ||
1443 | - enlarge - enlarge the strip | ||
1444 | == static void enlarge(register struct parse *p, sopno size); | ||
1445 | */ | ||
1446 | static void | ||
1447 | enlarge(p, size) | ||
1448 | register struct parse *p; | ||
1449 | register sopno size; | ||
1450 | { | ||
1451 | register sop *sp; | ||
1452 | |||
1453 | if (p->ssize >= size) | ||
1454 | return; | ||
1455 | |||
1456 | sp = (sop *)realloc(p->strip, size*sizeof(sop)); | ||
1457 | if (sp == NULL) { | ||
1458 | SETERROR(REG_ESPACE); | ||
1459 | return; | ||
1460 | } | ||
1461 | p->strip = sp; | ||
1462 | p->ssize = size; | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | - stripsnug - compact the strip | ||
1467 | == static void stripsnug(register struct parse *p, register struct re_guts *g); | ||
1468 | */ | ||
1469 | static void | ||
1470 | stripsnug(p, g) | ||
1471 | register struct parse *p; | ||
1472 | register struct re_guts *g; | ||
1473 | { | ||
1474 | g->nstates = p->slen; | ||
1475 | g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop)); | ||
1476 | if (g->strip == NULL) { | ||
1477 | SETERROR(REG_ESPACE); | ||
1478 | g->strip = p->strip; | ||
1479 | } | ||
1480 | } | ||
1481 | |||
1482 | /* | ||
1483 | - findmust - fill in must and mlen with longest mandatory literal string | ||
1484 | == static void findmust(register struct parse *p, register struct re_guts *g); | ||
1485 | * | ||
1486 | * This algorithm could do fancy things like analyzing the operands of | | ||
1487 | * for common subsequences. Someday. This code is simple and finds most | ||
1488 | * of the interesting cases. | ||
1489 | * | ||
1490 | * Note that must and mlen got initialized during setup. | ||
1491 | */ | ||
1492 | static void | ||
1493 | findmust(p, g) | ||
1494 | struct parse *p; | ||
1495 | register struct re_guts *g; | ||
1496 | { | ||
1497 | register sop *scan; | ||
1498 | sop *start; | ||
1499 | register sop *newstart; | ||
1500 | register sopno newlen; | ||
1501 | register sop s; | ||
1502 | register char *cp; | ||
1503 | register sopno i; | ||
1504 | |||
1505 | /* avoid making error situations worse */ | ||
1506 | if (p->error != 0) | ||
1507 | return; | ||
1508 | |||
1509 | /* find the longest OCHAR sequence in strip */ | ||
1510 | newlen = 0; | ||
1511 | scan = g->strip + 1; | ||
1512 | do { | ||
1513 | s = *scan++; | ||
1514 | switch (OP(s)) { | ||
1515 | case OCHAR: /* sequence member */ | ||
1516 | if (newlen == 0) /* new sequence */ | ||
1517 | newstart = scan - 1; | ||
1518 | newlen++; | ||
1519 | break; | ||
1520 | case OPLUS_: /* things that don't break one */ | ||
1521 | case OLPAREN: | ||
1522 | case ORPAREN: | ||
1523 | break; | ||
1524 | case OQUEST_: /* things that must be skipped */ | ||
1525 | case OCH_: | ||
1526 | scan--; | ||
1527 | do { | ||
1528 | scan += OPND(s); | ||
1529 | s = *scan; | ||
1530 | /* assert() interferes w debug printouts */ | ||
1531 | if (OP(s) != O_QUEST && OP(s) != O_CH && | ||
1532 | OP(s) != OOR2) { | ||
1533 | g->iflags |= BAD; | ||
1534 | return; | ||
1535 | } | ||
1536 | } while (OP(s) != O_QUEST && OP(s) != O_CH); | ||
1537 | /* fallthrough */ | ||
1538 | default: /* things that break a sequence */ | ||
1539 | if (newlen > g->mlen) { /* ends one */ | ||
1540 | start = newstart; | ||
1541 | g->mlen = newlen; | ||
1542 | } | ||
1543 | newlen = 0; | ||
1544 | break; | ||
1545 | } | ||
1546 | } while (OP(s) != OEND); | ||
1547 | |||
1548 | if (g->mlen == 0) /* there isn't one */ | ||
1549 | return; | ||
1550 | |||
1551 | /* turn it into a character string */ | ||
1552 | g->must = malloc((size_t)g->mlen + 1); | ||
1553 | if (g->must == NULL) { /* argh; just forget it */ | ||
1554 | g->mlen = 0; | ||
1555 | return; | ||
1556 | } | ||
1557 | cp = g->must; | ||
1558 | scan = start; | ||
1559 | for (i = g->mlen; i > 0; i--) { | ||
1560 | while (OP(s = *scan++) != OCHAR) | ||
1561 | continue; | ||
1562 | assert(cp < g->must + g->mlen); | ||
1563 | *cp++ = (char)OPND(s); | ||
1564 | } | ||
1565 | assert(cp == g->must + g->mlen); | ||
1566 | *cp++ = '\0'; /* just on general principles */ | ||
1567 | } | ||
1568 | |||
1569 | /* | ||
1570 | - pluscount - count + nesting | ||
1571 | == static sopno pluscount(register struct parse *p, register struct re_guts *g); | ||
1572 | */ | ||
1573 | static sopno /* nesting depth */ | ||
1574 | pluscount(p, g) | ||
1575 | struct parse *p; | ||
1576 | register struct re_guts *g; | ||
1577 | { | ||
1578 | register sop *scan; | ||
1579 | register sop s; | ||
1580 | register sopno plusnest = 0; | ||
1581 | register sopno maxnest = 0; | ||
1582 | |||
1583 | if (p->error != 0) | ||
1584 | return(0); /* there may not be an OEND */ | ||
1585 | |||
1586 | scan = g->strip + 1; | ||
1587 | do { | ||
1588 | s = *scan++; | ||
1589 | switch (OP(s)) { | ||
1590 | case OPLUS_: | ||
1591 | plusnest++; | ||
1592 | break; | ||
1593 | case O_PLUS: | ||
1594 | if (plusnest > maxnest) | ||
1595 | maxnest = plusnest; | ||
1596 | plusnest--; | ||
1597 | break; | ||
1598 | } | ||
1599 | } while (OP(s) != OEND); | ||
1600 | if (plusnest != 0) | ||
1601 | g->iflags |= BAD; | ||
1602 | return(maxnest); | ||
1603 | } | ||
diff --git a/src/lib/evil/regex/regcomp.ih b/src/lib/evil/regex/regcomp.ih new file mode 100644 index 0000000000..357461f77f --- /dev/null +++ b/src/lib/evil/regex/regcomp.ih | |||
@@ -0,0 +1,51 @@ | |||
1 | /* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
2 | #ifdef __cplusplus | ||
3 | extern "C" { | ||
4 | #endif | ||
5 | |||
6 | /* === lib/evil/regex/regcomp.c === */ | ||
7 | static void p_ere(register struct parse *p, int stop); | ||
8 | static void p_ere_exp(register struct parse *p); | ||
9 | static void p_str(register struct parse *p); | ||
10 | static void p_bre(register struct parse *p, register int end1, register int end2); | ||
11 | static int p_simp_re(register struct parse *p, int starordinary); | ||
12 | static int p_count(register struct parse *p); | ||
13 | static void p_bracket(register struct parse *p); | ||
14 | static void p_b_term(register struct parse *p, register cset *cs); | ||
15 | static void p_b_cclass(register struct parse *p, register cset *cs); | ||
16 | static void p_b_eclass(register struct parse *p, register cset *cs); | ||
17 | static char p_b_symbol(register struct parse *p); | ||
18 | static char p_b_coll_elem(register struct parse *p, int endc); | ||
19 | static char othercase(int ch); | ||
20 | static void bothcases(register struct parse *p, int ch); | ||
21 | static void ordinary(register struct parse *p, register int ch); | ||
22 | static void nonnewline(register struct parse *p); | ||
23 | static void repeat(register struct parse *p, sopno start, int from, int to); | ||
24 | static int seterr(register struct parse *p, int e); | ||
25 | static cset *allocset(register struct parse *p); | ||
26 | static void freeset(register struct parse *p, register cset *cs); | ||
27 | static int freezeset(register struct parse *p, register cset *cs); | ||
28 | static int firstch(register struct parse *p, register cset *cs); | ||
29 | static int nch(register struct parse *p, register cset *cs); | ||
30 | static void mcadd(register struct parse *p, register cset *cs, register char *cp); | ||
31 | static void mcsub(register cset *cs, register char *cp); | ||
32 | static int mcin(register cset *cs, register char *cp); | ||
33 | static char *mcfind(register cset *cs, register char *cp); | ||
34 | static void mcinvert(register struct parse *p, register cset *cs); | ||
35 | static void mccase(register struct parse *p, register cset *cs); | ||
36 | static int isinsets(register struct re_guts *g, int c); | ||
37 | static int samesets(register struct re_guts *g, int c1, int c2); | ||
38 | static void categorize(struct parse *p, register struct re_guts *g); | ||
39 | static sopno dupl(register struct parse *p, sopno start, sopno finish); | ||
40 | static void doemit(register struct parse *p, sop op, size_t opnd); | ||
41 | static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos); | ||
42 | static void dofwd(register struct parse *p, sopno pos, sop value); | ||
43 | static void enlarge(register struct parse *p, sopno size); | ||
44 | static void stripsnug(register struct parse *p, register struct re_guts *g); | ||
45 | static void findmust(register struct parse *p, register struct re_guts *g); | ||
46 | static sopno pluscount(register struct parse *p, register struct re_guts *g); | ||
47 | |||
48 | #ifdef __cplusplus | ||
49 | } | ||
50 | #endif | ||
51 | /* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
diff --git a/src/lib/evil/regex/regerror.c b/src/lib/evil/regex/regerror.c new file mode 100644 index 0000000000..e3cbe4b770 --- /dev/null +++ b/src/lib/evil/regex/regerror.c | |||
@@ -0,0 +1,126 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <stdio.h> | ||
3 | #include <string.h> | ||
4 | #include <ctype.h> | ||
5 | #include <limits.h> | ||
6 | #include <stdlib.h> | ||
7 | #include <regex.h> | ||
8 | |||
9 | #include "utils.h" | ||
10 | #include "regerror.ih" | ||
11 | |||
12 | /* | ||
13 | = #define REG_OKAY 0 | ||
14 | = #define REG_NOMATCH 1 | ||
15 | = #define REG_BADPAT 2 | ||
16 | = #define REG_ECOLLATE 3 | ||
17 | = #define REG_ECTYPE 4 | ||
18 | = #define REG_EESCAPE 5 | ||
19 | = #define REG_ESUBREG 6 | ||
20 | = #define REG_EBRACK 7 | ||
21 | = #define REG_EPAREN 8 | ||
22 | = #define REG_EBRACE 9 | ||
23 | = #define REG_BADBR 10 | ||
24 | = #define REG_ERANGE 11 | ||
25 | = #define REG_ESPACE 12 | ||
26 | = #define REG_BADRPT 13 | ||
27 | = #define REG_EMPTY 14 | ||
28 | = #define REG_ASSERT 15 | ||
29 | = #define REG_INVARG 16 | ||
30 | = #define REG_ATOI 255 // convert name to number (!) | ||
31 | = #define REG_ITOA 0400 // convert number to name (!) | ||
32 | */ | ||
33 | static struct rerr { | ||
34 | int code; | ||
35 | char *name; | ||
36 | char *explain; | ||
37 | } rerrs[] = { | ||
38 | REG_OKAY, "REG_OKAY", "no errors detected", | ||
39 | REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match", | ||
40 | REG_BADPAT, "REG_BADPAT", "invalid regular expression", | ||
41 | REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element", | ||
42 | REG_ECTYPE, "REG_ECTYPE", "invalid character class", | ||
43 | REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)", | ||
44 | REG_ESUBREG, "REG_ESUBREG", "invalid backreference number", | ||
45 | REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced", | ||
46 | REG_EPAREN, "REG_EPAREN", "parentheses not balanced", | ||
47 | REG_EBRACE, "REG_EBRACE", "braces not balanced", | ||
48 | REG_BADBR, "REG_BADBR", "invalid repetition count(s)", | ||
49 | REG_ERANGE, "REG_ERANGE", "invalid character range", | ||
50 | REG_ESPACE, "REG_ESPACE", "out of memory", | ||
51 | REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid", | ||
52 | REG_EMPTY, "REG_EMPTY", "empty (sub)expression", | ||
53 | REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug", | ||
54 | REG_INVARG, "REG_INVARG", "invalid argument to regex routine", | ||
55 | -1, "", "*** unknown regexp error code ***", | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | - regerror - the interface to error numbers | ||
60 | = extern size_t regerror(int, const regex_t *, char *, size_t); | ||
61 | */ | ||
62 | /* ARGSUSED */ | ||
63 | EAPI size_t | ||
64 | regerror(errcode, preg, errbuf, errbuf_size) | ||
65 | int errcode; | ||
66 | const regex_t *preg; | ||
67 | char *errbuf; | ||
68 | size_t errbuf_size; | ||
69 | { | ||
70 | register struct rerr *r; | ||
71 | register size_t len; | ||
72 | register int target = errcode &~ REG_ITOA; | ||
73 | register char *s; | ||
74 | char convbuf[50]; | ||
75 | |||
76 | if (errcode == REG_ATOI) | ||
77 | s = regatoi(preg, convbuf); | ||
78 | else { | ||
79 | for (r = rerrs; r->code >= 0; r++) | ||
80 | if (r->code == target) | ||
81 | break; | ||
82 | |||
83 | if (errcode®_ITOA) { | ||
84 | if (r->code >= 0) | ||
85 | (void) strcpy(convbuf, r->name); | ||
86 | else | ||
87 | sprintf(convbuf, "REG_0x%x", target); | ||
88 | assert(strlen(convbuf) < sizeof(convbuf)); | ||
89 | s = convbuf; | ||
90 | } else | ||
91 | s = r->explain; | ||
92 | } | ||
93 | |||
94 | len = strlen(s) + 1; | ||
95 | if (errbuf_size > 0) { | ||
96 | if (errbuf_size > len) | ||
97 | (void) strcpy(errbuf, s); | ||
98 | else { | ||
99 | (void) strncpy(errbuf, s, errbuf_size-1); | ||
100 | errbuf[errbuf_size-1] = '\0'; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | return(len); | ||
105 | } | ||
106 | |||
107 | /* | ||
108 | - regatoi - internal routine to implement REG_ATOI | ||
109 | == static char *regatoi(const regex_t *preg, char *localbuf); | ||
110 | */ | ||
111 | static char * | ||
112 | regatoi(preg, localbuf) | ||
113 | const regex_t *preg; | ||
114 | char *localbuf; | ||
115 | { | ||
116 | register struct rerr *r; | ||
117 | |||
118 | for (r = rerrs; r->code >= 0; r++) | ||
119 | if (strcmp(r->name, preg->re_endp) == 0) | ||
120 | break; | ||
121 | if (r->code < 0) | ||
122 | return("0"); | ||
123 | |||
124 | sprintf(localbuf, "%d", r->code); | ||
125 | return(localbuf); | ||
126 | } | ||
diff --git a/src/lib/evil/regex/regerror.ih b/src/lib/evil/regex/regerror.ih new file mode 100644 index 0000000000..e3f391cc90 --- /dev/null +++ b/src/lib/evil/regex/regerror.ih | |||
@@ -0,0 +1,12 @@ | |||
1 | /* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
2 | #ifdef __cplusplus | ||
3 | extern "C" { | ||
4 | #endif | ||
5 | |||
6 | /* === lib/evil/regex/regerror.c === */ | ||
7 | static char *regatoi(const regex_t *preg, char *localbuf); | ||
8 | |||
9 | #ifdef __cplusplus | ||
10 | } | ||
11 | #endif | ||
12 | /* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
diff --git a/src/lib/evil/regex/regex.h b/src/lib/evil/regex/regex.h new file mode 100644 index 0000000000..2288c49ae4 --- /dev/null +++ b/src/lib/evil/regex/regex.h | |||
@@ -0,0 +1,77 @@ | |||
1 | #ifndef _REGEX_H_ | ||
2 | #define _REGEX_H_ /* never again */ | ||
3 | |||
4 | #include <evil_macro.h> | ||
5 | |||
6 | /* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
7 | #ifdef __cplusplus | ||
8 | extern "C" { | ||
9 | #endif | ||
10 | |||
11 | /* === lib/evil/regex/regex2.h === */ | ||
12 | typedef off_t regoff_t; | ||
13 | typedef struct { | ||
14 | int re_magic; | ||
15 | size_t re_nsub; /* number of parenthesized subexpressions */ | ||
16 | const char *re_endp; /* end pointer for REG_PEND */ | ||
17 | struct re_guts *re_g; /* none of your business :-) */ | ||
18 | } regex_t; | ||
19 | typedef struct { | ||
20 | regoff_t rm_so; /* start of match */ | ||
21 | regoff_t rm_eo; /* end of match */ | ||
22 | } regmatch_t; | ||
23 | |||
24 | |||
25 | /* === lib/evil/regex/regcomp.c === */ | ||
26 | EAPI int regcomp(regex_t *, const char *, int); | ||
27 | #define REG_BASIC 0000 | ||
28 | #define REG_EXTENDED 0001 | ||
29 | #define REG_ICASE 0002 | ||
30 | #define REG_NOSUB 0004 | ||
31 | #define REG_NEWLINE 0010 | ||
32 | #define REG_NOSPEC 0020 | ||
33 | #define REG_PEND 0040 | ||
34 | #define REG_DUMP 0200 | ||
35 | |||
36 | |||
37 | /* === lib/evil/regex/regerror.c === */ | ||
38 | #define REG_OKAY 0 | ||
39 | #define REG_NOMATCH 1 | ||
40 | #define REG_BADPAT 2 | ||
41 | #define REG_ECOLLATE 3 | ||
42 | #define REG_ECTYPE 4 | ||
43 | #define REG_EESCAPE 5 | ||
44 | #define REG_ESUBREG 6 | ||
45 | #define REG_EBRACK 7 | ||
46 | #define REG_EPAREN 8 | ||
47 | #define REG_EBRACE 9 | ||
48 | #define REG_BADBR 10 | ||
49 | #define REG_ERANGE 11 | ||
50 | #define REG_ESPACE 12 | ||
51 | #define REG_BADRPT 13 | ||
52 | #define REG_EMPTY 14 | ||
53 | #define REG_ASSERT 15 | ||
54 | #define REG_INVARG 16 | ||
55 | #define REG_ATOI 255 /* convert name to number (!) */ | ||
56 | #define REG_ITOA 0400 /* convert number to name (!) */ | ||
57 | EAPI size_t regerror(int, const regex_t *, char *, size_t); | ||
58 | |||
59 | |||
60 | /* === lib/evil/regex/regexec.c === */ | ||
61 | EAPI int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); | ||
62 | #define REG_NOTBOL 00001 | ||
63 | #define REG_NOTEOL 00002 | ||
64 | #define REG_STARTEND 00004 | ||
65 | #define REG_TRACE 00400 /* tracing of execution */ | ||
66 | #define REG_LARGE 01000 /* force large representation */ | ||
67 | #define REG_BACKR 02000 /* force use of backref code */ | ||
68 | |||
69 | |||
70 | /* === lib/evil/regex/regfree.c === */ | ||
71 | EAPI void regfree(regex_t *); | ||
72 | |||
73 | #ifdef __cplusplus | ||
74 | } | ||
75 | #endif | ||
76 | /* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */ | ||
77 | #endif | ||
diff --git a/src/lib/evil/regex/regex2.h b/src/lib/evil/regex/regex2.h new file mode 100644 index 0000000000..58fd8d8a43 --- /dev/null +++ b/src/lib/evil/regex/regex2.h | |||
@@ -0,0 +1,134 @@ | |||
1 | /* | ||
2 | * First, the stuff that ends up in the outside-world include file | ||
3 | = typedef off_t regoff_t; | ||
4 | = typedef struct { | ||
5 | = int re_magic; | ||
6 | = size_t re_nsub; // number of parenthesized subexpressions | ||
7 | = const char *re_endp; // end pointer for REG_PEND | ||
8 | = struct re_guts *re_g; // none of your business :-) | ||
9 | = } regex_t; | ||
10 | = typedef struct { | ||
11 | = regoff_t rm_so; // start of match | ||
12 | = regoff_t rm_eo; // end of match | ||
13 | = } regmatch_t; | ||
14 | */ | ||
15 | /* | ||
16 | * internals of regex_t | ||
17 | */ | ||
18 | #define MAGIC1 ((('r'^0200)<<8) | 'e') | ||
19 | |||
20 | /* | ||
21 | * The internal representation is a *strip*, a sequence of | ||
22 | * operators ending with an endmarker. (Some terminology etc. is a | ||
23 | * historical relic of earlier versions which used multiple strips.) | ||
24 | * Certain oddities in the representation are there to permit running | ||
25 | * the machinery backwards; in particular, any deviation from sequential | ||
26 | * flow must be marked at both its source and its destination. Some | ||
27 | * fine points: | ||
28 | * | ||
29 | * - OPLUS_ and O_PLUS are *inside* the loop they create. | ||
30 | * - OQUEST_ and O_QUEST are *outside* the bypass they create. | ||
31 | * - OCH_ and O_CH are *outside* the multi-way branch they create, while | ||
32 | * OOR1 and OOR2 are respectively the end and the beginning of one of | ||
33 | * the branches. Note that there is an implicit OOR2 following OCH_ | ||
34 | * and an implicit OOR1 preceding O_CH. | ||
35 | * | ||
36 | * In state representations, an operator's bit is on to signify a state | ||
37 | * immediately *preceding* "execution" of that operator. | ||
38 | */ | ||
39 | typedef long sop; /* strip operator */ | ||
40 | typedef long sopno; | ||
41 | #define OPRMASK 0x7c000000 | ||
42 | #define OPDMASK 0x03ffffff | ||
43 | #define OPSHIFT (26) | ||
44 | #define OP(n) ((n)&OPRMASK) | ||
45 | #define OPND(n) ((n)&OPDMASK) | ||
46 | #define SOP(op, opnd) ((op)|(opnd)) | ||
47 | /* operators meaning operand */ | ||
48 | /* (back, fwd are offsets) */ | ||
49 | #define OEND (1<<OPSHIFT) /* endmarker - */ | ||
50 | #define OCHAR (2<<OPSHIFT) /* character unsigned char */ | ||
51 | #define OBOL (3<<OPSHIFT) /* left anchor - */ | ||
52 | #define OEOL (4<<OPSHIFT) /* right anchor - */ | ||
53 | #define OANY (5<<OPSHIFT) /* . - */ | ||
54 | #define OANYOF (6<<OPSHIFT) /* [...] set number */ | ||
55 | #define OBACK_ (7<<OPSHIFT) /* begin \d paren number */ | ||
56 | #define O_BACK (8<<OPSHIFT) /* end \d paren number */ | ||
57 | #define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */ | ||
58 | #define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */ | ||
59 | #define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */ | ||
60 | #define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */ | ||
61 | #define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */ | ||
62 | #define ORPAREN (14<<OPSHIFT) /* ) back to ( */ | ||
63 | #define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */ | ||
64 | #define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */ | ||
65 | #define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */ | ||
66 | #define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */ | ||
67 | #define OBOW (19<<OPSHIFT) /* begin word - */ | ||
68 | #define OEOW (20<<OPSHIFT) /* end word - */ | ||
69 | |||
70 | /* | ||
71 | * Structure for [] character-set representation. Character sets are | ||
72 | * done as bit vectors, grouped 8 to a byte vector for compactness. | ||
73 | * The individual set therefore has both a pointer to the byte vector | ||
74 | * and a mask to pick out the relevant bit of each byte. A hash code | ||
75 | * simplifies testing whether two sets could be identical. | ||
76 | * | ||
77 | * This will get trickier for multicharacter collating elements. As | ||
78 | * preliminary hooks for dealing with such things, we also carry along | ||
79 | * a string of multi-character elements, and decide the size of the | ||
80 | * vectors at run time. | ||
81 | */ | ||
82 | typedef struct { | ||
83 | uch *ptr; /* -> uch [csetsize] */ | ||
84 | uch mask; /* bit within array */ | ||
85 | uch hash; /* hash code */ | ||
86 | size_t smultis; | ||
87 | char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ | ||
88 | } cset; | ||
89 | /* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ | ||
90 | #define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) | ||
91 | #define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) | ||
92 | #define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) | ||
93 | #define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ | ||
94 | #define MCsub(p, cs, cp) mcsub(p, cs, cp) | ||
95 | #define MCin(p, cs, cp) mcin(p, cs, cp) | ||
96 | |||
97 | /* stuff for character categories */ | ||
98 | typedef unsigned char cat_t; | ||
99 | |||
100 | /* | ||
101 | * main compiled-expression structure | ||
102 | */ | ||
103 | struct re_guts { | ||
104 | int magic; | ||
105 | # define MAGIC2 ((('R'^0200)<<8)|'E') | ||
106 | sop *strip; /* malloced area for strip */ | ||
107 | int csetsize; /* number of bits in a cset vector */ | ||
108 | int ncsets; /* number of csets in use */ | ||
109 | cset *sets; /* -> cset [ncsets] */ | ||
110 | uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ | ||
111 | int cflags; /* copy of regcomp() cflags argument */ | ||
112 | sopno nstates; /* = number of sops */ | ||
113 | sopno firststate; /* the initial OEND (normally 0) */ | ||
114 | sopno laststate; /* the final OEND */ | ||
115 | int iflags; /* internal flags */ | ||
116 | # define USEBOL 01 /* used ^ */ | ||
117 | # define USEEOL 02 /* used $ */ | ||
118 | # define BAD 04 /* something wrong */ | ||
119 | int nbol; /* number of ^ used */ | ||
120 | int neol; /* number of $ used */ | ||
121 | int ncategories; /* how many character categories */ | ||
122 | cat_t *categories; /* ->catspace[-CHAR_MIN] */ | ||
123 | char *must; /* match must contain this string */ | ||
124 | int mlen; /* length of must */ | ||
125 | size_t nsub; /* copy of re_nsub */ | ||
126 | int backrefs; /* does it use back references? */ | ||
127 | sopno nplus; /* how deep does it nest +s? */ | ||
128 | /* catspace must be last */ | ||
129 | cat_t catspace[1]; /* actually [NC] */ | ||
130 | }; | ||
131 | |||
132 | /* misc utilities */ | ||
133 | #define OUT (CHAR_MAX+1) /* a non-character value */ | ||
134 | #define ISWORD(c) (isalnum(c) || (c) == '_') | ||
diff --git a/src/lib/evil/regex/regexec.c b/src/lib/evil/regex/regexec.c new file mode 100644 index 0000000000..3a9135d7b8 --- /dev/null +++ b/src/lib/evil/regex/regexec.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * the outer shell of regexec() | ||
3 | * | ||
4 | * This file includes engine.c *twice*, after muchos fiddling with the | ||
5 | * macros that code uses. This lets the same code operate on two different | ||
6 | * representations for state sets. | ||
7 | */ | ||
8 | #include <sys/types.h> | ||
9 | #include <stdio.h> | ||
10 | #include <stdlib.h> | ||
11 | #include <string.h> | ||
12 | #include <limits.h> | ||
13 | #include <ctype.h> | ||
14 | #include <regex.h> | ||
15 | |||
16 | #include "utils.h" | ||
17 | #include "regex2.h" | ||
18 | |||
19 | static int nope = 0; /* for use in asserts; shuts lint up */ | ||
20 | |||
21 | /* macros for manipulating states, small version */ | ||
22 | #define states unsigned | ||
23 | #define states1 unsigned /* for later use in regexec() decision */ | ||
24 | #define CLEAR(v) ((v) = 0) | ||
25 | #define SET0(v, n) ((v) &= ~((unsigned)1 << (n))) | ||
26 | #define SET1(v, n) ((v) |= (unsigned)1 << (n)) | ||
27 | #define ISSET(v, n) ((v) & ((unsigned)1 << (n))) | ||
28 | #define ASSIGN(d, s) ((d) = (s)) | ||
29 | #define EQ(a, b) ((a) == (b)) | ||
30 | #define STATEVARS int dummy /* dummy version */ | ||
31 | #define STATESETUP(m, n) /* nothing */ | ||
32 | #define STATETEARDOWN(m) /* nothing */ | ||
33 | #define SETUP(v) ((v) = 0) | ||
34 | #define onestate unsigned | ||
35 | #define INIT(o, n) ((o) = (unsigned)1 << (n)) | ||
36 | #define INC(o) ((o) <<= 1) | ||
37 | #define ISSTATEIN(v, o) ((v) & (o)) | ||
38 | /* some abbreviations; note that some of these know variable names! */ | ||
39 | /* do "if I'm here, I can also be there" etc without branches */ | ||
40 | #define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n)) | ||
41 | #define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n)) | ||
42 | #define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n))) | ||
43 | /* function names */ | ||
44 | #define SNAMES /* engine.c looks after details */ | ||
45 | |||
46 | #include "engine.c" | ||
47 | |||
48 | /* now undo things */ | ||
49 | #undef states | ||
50 | #undef CLEAR | ||
51 | #undef SET0 | ||
52 | #undef SET1 | ||
53 | #undef ISSET | ||
54 | #undef ASSIGN | ||
55 | #undef EQ | ||
56 | #undef STATEVARS | ||
57 | #undef STATESETUP | ||
58 | #undef STATETEARDOWN | ||
59 | #undef SETUP | ||
60 | #undef onestate | ||
61 | #undef INIT | ||
62 | #undef INC | ||
63 | #undef ISSTATEIN | ||
64 | #undef FWD | ||
65 | #undef BACK | ||
66 | #undef ISSETBACK | ||
67 | #undef SNAMES | ||
68 | |||
69 | /* macros for manipulating states, large version */ | ||
70 | #define states char * | ||
71 | #define CLEAR(v) memset(v, 0, m->g->nstates) | ||
72 | #define SET0(v, n) ((v)[n] = 0) | ||
73 | #define SET1(v, n) ((v)[n] = 1) | ||
74 | #define ISSET(v, n) ((v)[n]) | ||
75 | #define ASSIGN(d, s) memcpy(d, s, m->g->nstates) | ||
76 | #define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0) | ||
77 | #define STATEVARS int vn; char *space | ||
78 | #define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \ | ||
79 | if ((m)->space == NULL) return(REG_ESPACE); \ | ||
80 | (m)->vn = 0; } | ||
81 | #define STATETEARDOWN(m) { free((m)->space); } | ||
82 | #define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates]) | ||
83 | #define onestate int | ||
84 | #define INIT(o, n) ((o) = (n)) | ||
85 | #define INC(o) ((o)++) | ||
86 | #define ISSTATEIN(v, o) ((v)[o]) | ||
87 | /* some abbreviations; note that some of these know variable names! */ | ||
88 | /* do "if I'm here, I can also be there" etc without branches */ | ||
89 | #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) | ||
90 | #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) | ||
91 | #define ISSETBACK(v, n) ((v)[here - (n)]) | ||
92 | /* function names */ | ||
93 | #define LNAMES /* flag */ | ||
94 | |||
95 | #include "engine.c" | ||
96 | |||
97 | /* | ||
98 | - regexec - interface for matching | ||
99 | = extern int regexec(const regex_t *, const char *, size_t, \ | ||
100 | = regmatch_t [], int); | ||
101 | = #define REG_NOTBOL 00001 | ||
102 | = #define REG_NOTEOL 00002 | ||
103 | = #define REG_STARTEND 00004 | ||
104 | = #define REG_TRACE 00400 // tracing of execution | ||
105 | = #define REG_LARGE 01000 // force large representation | ||
106 | = #define REG_BACKR 02000 // force use of backref code | ||
107 | * | ||
108 | * We put this here so we can exploit knowledge of the state representation | ||
109 | * when choosing which matcher to call. Also, by this point the matchers | ||
110 | * have been prototyped. | ||
111 | */ | ||
112 | EAPI int /* 0 success, REG_NOMATCH failure */ | ||
113 | regexec(preg, string, nmatch, pmatch, eflags) | ||
114 | const regex_t *preg; | ||
115 | const char *string; | ||
116 | size_t nmatch; | ||
117 | regmatch_t pmatch[]; | ||
118 | int eflags; | ||
119 | { | ||
120 | register struct re_guts *g = preg->re_g; | ||
121 | #ifdef REDEBUG | ||
122 | # define GOODFLAGS(f) (f) | ||
123 | #else | ||
124 | # define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND)) | ||
125 | #endif | ||
126 | |||
127 | if (preg->re_magic != MAGIC1 || g->magic != MAGIC2) | ||
128 | return(REG_BADPAT); | ||
129 | assert(!(g->iflags&BAD)); | ||
130 | if (g->iflags&BAD) /* backstop for no-debug case */ | ||
131 | return(REG_BADPAT); | ||
132 | eflags = GOODFLAGS(eflags); | ||
133 | |||
134 | if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) | ||
135 | return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); | ||
136 | else | ||
137 | return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); | ||
138 | } | ||
diff --git a/src/lib/evil/regex/regfree.c b/src/lib/evil/regex/regfree.c new file mode 100644 index 0000000000..75885bd076 --- /dev/null +++ b/src/lib/evil/regex/regfree.c | |||
@@ -0,0 +1,37 @@ | |||
1 | #include <sys/types.h> | ||
2 | #include <stdio.h> | ||
3 | #include <stdlib.h> | ||
4 | #include <regex.h> | ||
5 | |||
6 | #include "utils.h" | ||
7 | #include "regex2.h" | ||
8 | |||
9 | /* | ||
10 | - regfree - free everything | ||
11 | = extern void regfree(regex_t *); | ||
12 | */ | ||
13 | EAPI void | ||
14 | regfree(preg) | ||
15 | regex_t *preg; | ||
16 | { | ||
17 | register struct re_guts *g; | ||
18 | |||
19 | if (preg->re_magic != MAGIC1) /* oops */ | ||
20 | return; /* nice to complain, but hard */ | ||
21 | |||
22 | g = preg->re_g; | ||
23 | if (g == NULL || g->magic != MAGIC2) /* oops again */ | ||
24 | return; | ||
25 | preg->re_magic = 0; /* mark it invalid */ | ||
26 | g->magic = 0; /* mark it invalid */ | ||
27 | |||
28 | if (g->strip != NULL) | ||
29 | free((char *)g->strip); | ||
30 | if (g->sets != NULL) | ||
31 | free((char *)g->sets); | ||
32 | if (g->setbits != NULL) | ||
33 | free((char *)g->setbits); | ||
34 | if (g->must != NULL) | ||
35 | free(g->must); | ||
36 | free((char *)g); | ||
37 | } | ||
diff --git a/src/lib/evil/regex/utils.h b/src/lib/evil/regex/utils.h new file mode 100644 index 0000000000..1a997ac8fc --- /dev/null +++ b/src/lib/evil/regex/utils.h | |||
@@ -0,0 +1,22 @@ | |||
1 | /* utility definitions */ | ||
2 | #ifdef _POSIX2_RE_DUP_MAX | ||
3 | #define DUPMAX _POSIX2_RE_DUP_MAX | ||
4 | #else | ||
5 | #define DUPMAX 255 | ||
6 | #endif | ||
7 | #define INFINITY (DUPMAX + 1) | ||
8 | #define NC (CHAR_MAX - CHAR_MIN + 1) | ||
9 | typedef unsigned char uch; | ||
10 | |||
11 | /* switch off assertions (if not already off) if no REDEBUG */ | ||
12 | #ifndef REDEBUG | ||
13 | #ifndef NDEBUG | ||
14 | #define NDEBUG /* no assertions please */ | ||
15 | #endif | ||
16 | #endif | ||
17 | #include <assert.h> | ||
18 | |||
19 | /* for old systems with bcopy() but no memmove() */ | ||
20 | #ifdef USEBCOPY | ||
21 | #define memmove(d, s, c) bcopy(s, d, c) | ||
22 | #endif | ||