summaryrefslogtreecommitdiff
path: root/src/lib/evil
diff options
context:
space:
mode:
authorVincent Torri <vincent.torri@gmail.com>2019-04-18 09:31:51 -0400
committerMike Blumenkrantz <zmike@samsung.com>2019-04-18 12:30:22 -0400
commitecce595b241763dd3029a1bc39f10f4f664b3c9a (patch)
tree6b8cf50e27d676911a1d99900997f4b20431e725 /src/lib/evil
parente65c49b4224d4c4d27f67bbc1c91e8d2bc8450e8 (diff)
Windows: remove fnmatch and regex in Evil and use the ones in regex DLL installed by ewpi
Test Plan: compilation with autotools and meson (at least as far as it can go) Reviewers: zmike, raster Reviewed By: zmike Subscribers: cedric, #reviewers, #committers Tags: #efl Differential Revision: https://phab.enlightenment.org/D8646
Diffstat (limited to 'src/lib/evil')
-rw-r--r--src/lib/evil/evil_fnmatch.c231
-rw-r--r--src/lib/evil/evil_fnmatch_list_of_states.c77
-rw-r--r--src/lib/evil/evil_fnmatch_private.h24
-rw-r--r--src/lib/evil/fnmatch.h57
-rw-r--r--src/lib/evil/meson.build15
-rw-r--r--src/lib/evil/regex/cclass.h31
-rw-r--r--src/lib/evil/regex/cname.h102
-rw-r--r--src/lib/evil/regex/engine.c1019
-rw-r--r--src/lib/evil/regex/engine.ih35
-rw-r--r--src/lib/evil/regex/meson.build10
-rw-r--r--src/lib/evil/regex/regcomp.c1604
-rw-r--r--src/lib/evil/regex/regcomp.ih51
-rw-r--r--src/lib/evil/regex/regerror.c122
-rw-r--r--src/lib/evil/regex/regerror.ih12
-rw-r--r--src/lib/evil/regex/regex.h78
-rw-r--r--src/lib/evil/regex/regex2.h134
-rw-r--r--src/lib/evil/regex/regexec.c138
-rw-r--r--src/lib/evil/regex/regfree.c37
-rw-r--r--src/lib/evil/regex/utils.h22
19 files changed, 4 insertions, 3795 deletions
diff --git a/src/lib/evil/evil_fnmatch.c b/src/lib/evil/evil_fnmatch.c
deleted file mode 100644
index 0b4af7b..0000000
--- a/src/lib/evil/evil_fnmatch.c
+++ /dev/null
@@ -1,231 +0,0 @@
1#ifdef HAVE_CONFIG_H
2# include "config.h"
3#endif /* HAVE_CONFIG_H */
4
5#include <assert.h>
6#include <string.h>
7
8#include "fnmatch.h"
9#include "evil_fnmatch_private.h"
10
11enum fnmatch_status
12{
13 fnmatch_not_found = 0,
14 fnmatch_found = 1,
15 fnmatch_syntax_error = 2
16};
17
18static
19size_t
20fnmatch_match_class_token(enum fnmatch_status *status,
21 const char *class_token,
22 char c)
23{
24 if (! *class_token)
25 {
26 *status = fnmatch_syntax_error;
27 return 0;
28 }
29 else if (class_token[1] == '-' && class_token[2] != ']')
30 {
31 if (class_token[0] <= c && c <= class_token[2])
32 *status = fnmatch_found;
33 return 3;
34 }
35 else
36 {
37 if (c == *class_token)
38 *status = fnmatch_found;
39 return 1;
40 }
41}
42
43static
44size_t
45fnmatch_complement_class(const char *class_token)
46{
47 switch (*class_token)
48 {
49 case 0:
50 return FNM_SYNTAXERR;
51
52 case '!':
53 return 1;
54
55 default:
56 return 0;
57 }
58}
59
60static
61size_t
62fnmatch_match_class(const char *class,
63 char c)
64{
65 const size_t complement = fnmatch_complement_class(class + 1);
66 enum fnmatch_status status;
67 size_t pos;
68
69 if (complement == FNM_SYNTAXERR)
70 return FNM_SYNTAXERR;
71
72 status = fnmatch_not_found;
73 pos = 1 + complement;
74
75 do
76 pos += fnmatch_match_class_token(&status, class + pos, c);
77 while (class[pos] && class[pos] != ']');
78
79 if (status == fnmatch_syntax_error || ! class[pos])
80 return FNM_SYNTAXERR;
81
82 if (status == fnmatch_found)
83 return complement ? 0 : pos + 1;
84 else
85 return complement ? pos + 1 : 0;
86}
87
88static
89size_t
90fnmatch_chrcasecmp(char a,
91 char b)
92{
93 if ('A' <= a && a <= 'Z')
94 a += 'a' - 'A';
95 if ('A' <= b && b <= 'Z')
96 b += 'a' - 'A';
97 return a == b;
98}
99
100static
101size_t
102fnmatch_match_token(const char *token,
103 char c,
104 e_bool leading,
105 int flags)
106{
107 if (*token == '\\' && !(flags & FNM_NOESCAPE))
108 return token[1] ? (token[1] == c ? 2 : 0) : FNM_SYNTAXERR;
109
110 if (c == '/' && (flags & FNM_PATHNAME))
111 return *token == '/';
112
113 if (c == '.' && leading && (flags & FNM_PERIOD))
114 return *token == '.';
115
116 switch (*token)
117 {
118 case '?':
119 return 1;
120
121 case '[':
122 return fnmatch_match_class(token, c);
123
124 default:
125 if (flags & FNM_CASEFOLD)
126 return fnmatch_chrcasecmp(*token, c);
127 return *token == c ? 1 : 0;
128 }
129}
130
131static
132void
133fnmatch_init_states(struct list_of_states *states)
134{
135 states->size = 1;
136 states->states[0] = 0;
137 memset(states->has, 0, states->reserved * sizeof (*states->has));
138 states->has[0] = 1;
139}
140
141static
142size_t
143fnmatch_check_finals(const char *pattern,
144 const struct list_of_states *states)
145{
146 size_t i, j;
147 for (i = 0; i < states->size; ++i)
148 {
149 e_bool match = 1;
150
151 for (j = states->states[i]; pattern[j]; ++j)
152 if (pattern[j] != '*')
153 {
154 match = 0;
155 break;
156 }
157
158 if (match)
159 return 0;
160 }
161 return FNM_NOMATCH;
162}
163
164int
165fnmatch(const char *pattern,
166 const char *string,
167 int flags)
168{
169 struct list_of_states *states;
170 struct list_of_states *new_states;
171 e_bool leading = 1;
172 char *c;
173 size_t r;
174
175 assert(pattern);
176 assert(string);
177
178 states = fnmatch_list_of_states_alloc(2, strlen(pattern));
179 new_states = states + 1;
180
181 if (! states)
182 return FNM_NOMEM;
183 fnmatch_init_states(states);
184
185
186 for (c = (char *)string; *c && states->size; ++c)
187 {
188 size_t i;
189 fnmatch_list_of_states_clear(new_states);
190
191 for (i = 0; i < states->size; ++i)
192 {
193 const size_t pos = states->states[i];
194
195 if (! pattern[pos])
196 {
197 if (*c == '/' && (flags & FNM_LEADING_DIR))
198 return 0;
199 continue;
200 }
201 else if (pattern[pos] == '*')
202 {
203 fnmatch_list_of_states_insert(states, pos + 1);
204 if ((*c != '/' || !(flags & FNM_PATHNAME)) &&
205 (*c != '.' || !leading || !(flags & FNM_PERIOD)))
206 fnmatch_list_of_states_insert(new_states, pos);
207 }
208 else
209 {
210 const size_t m = fnmatch_match_token(pattern + pos, *c,
211 leading, flags);
212
213 if (m == FNM_SYNTAXERR)
214 return FNM_SYNTAXERR;
215 else if (m)
216 fnmatch_list_of_states_insert(new_states, pos + m);
217 }
218 }
219 {
220 struct list_of_states *tmp = states;
221
222 states = new_states;
223 new_states = tmp;
224 }
225 leading = *c == '/' && (flags & FNM_PATHNAME);
226 }
227
228 r = fnmatch_check_finals(pattern, states);
229 fnmatch_list_of_states_free(states < new_states ? states : new_states, 2);
230 return (int)r;
231}
diff --git a/src/lib/evil/evil_fnmatch_list_of_states.c b/src/lib/evil/evil_fnmatch_list_of_states.c
deleted file mode 100644
index c6cfb7f..0000000
--- a/src/lib/evil/evil_fnmatch_list_of_states.c
+++ /dev/null
@@ -1,77 +0,0 @@
1#ifdef HAVE_CONFIG_H
2# include "config.h"
3#endif /* HAVE_CONFIG_H */
4
5#include <assert.h>
6#include <stdlib.h>
7#include <string.h>
8
9#include "evil_fnmatch_private.h"
10
11struct list_of_states*
12fnmatch_list_of_states_alloc(size_t n,
13 size_t pattern_len)
14{
15 struct list_of_states *l;
16
17 const size_t reserved = pattern_len + 1;
18 const size_t states_size = sizeof (*l->states) * reserved;
19 const size_t has_size = sizeof (*l->has) * reserved;
20 const size_t states_has_size = states_size + has_size;
21 const size_t struct_size = sizeof (*l) + states_has_size;
22
23 unsigned char *states;
24 unsigned char *has;
25 size_t i;
26
27 l = malloc(n * struct_size);
28 if (!l)
29 return 0;
30
31 states = (unsigned char *) (l + n);
32 has = states + states_size;
33
34 for (i = 0; i < n; ++i)
35 {
36 l[i].reserved = reserved;
37 l[i].states = (size_t *) states;
38 l[i].has = (e_bool *) has;
39 states += states_has_size;
40 has += states_has_size;
41 }
42
43 return l;
44}
45
46void
47fnmatch_list_of_states_free(struct list_of_states *lists,
48 size_t n)
49{
50 assert(lists);
51
52 (void) n;
53 free(lists);
54}
55
56void
57fnmatch_list_of_states_insert(struct list_of_states *list,
58 size_t state)
59{
60 assert(list);
61 assert(state < list->reserved);
62
63 if (list->has[state])
64 return;
65
66 list->states[list->size++] = state;
67 list->has[state] = 1;
68}
69
70void
71fnmatch_list_of_states_clear(struct list_of_states *list)
72{
73 assert(list);
74
75 list->size = 0;
76 memset(list->has, 0, list->reserved * sizeof (*list->has));
77}
diff --git a/src/lib/evil/evil_fnmatch_private.h b/src/lib/evil/evil_fnmatch_private.h
deleted file mode 100644
index f5ced5d..0000000
--- a/src/lib/evil/evil_fnmatch_private.h
+++ /dev/null
@@ -1,24 +0,0 @@
1#ifndef __EVIL_FNMATCH_PRIVATE_H__
2#define __EVIL_FNMATCH_PRIVATE_H__
3
4
5typedef int e_bool;
6
7struct list_of_states
8{
9 size_t reserved;
10 size_t size;
11 size_t *states;
12 e_bool *has;
13};
14
15struct list_of_states *fnmatch_list_of_states_alloc(size_t n, size_t pattern_len);
16
17void fnmatch_list_of_states_free(struct list_of_states *lists, size_t n);
18
19void fnmatch_list_of_states_insert(struct list_of_states *list, size_t state);
20
21void fnmatch_list_of_states_clear(struct list_of_states *list);
22
23
24#endif /* __EVIL_FNMATCH_PRIVATE_H__ */
diff --git a/src/lib/evil/fnmatch.h b/src/lib/evil/fnmatch.h
deleted file mode 100644
index 0464fa7..0000000
--- a/src/lib/evil/fnmatch.h
+++ /dev/null
@@ -1,57 +0,0 @@
1#ifndef __EVIL_FNMATCH_H__
2#define __EVIL_FNMATCH_H__
3
4#ifdef EAPI
5# undef EAPI
6#endif
7
8#ifdef _WIN32
9# ifdef EFL_BUILD
10# ifdef DLL_EXPORT
11# define EAPI __declspec(dllexport)
12# else
13# define EAPI
14# endif
15# else
16# define EAPI __declspec(dllimport)
17# endif
18#endif
19
20#ifdef __cplusplus
21extern "C" {
22#endif
23
24/* We #undef these before defining them because some losing systems
25 (HP-UX A.08.07 for example) define these in <unistd.h>. */
26#undef FNM_PATHNAME
27#undef FNM_NOESCAPE
28#undef FNM_PERIOD
29
30/* Bits set in the FLAGS argument to `fnmatch'. */
31#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */
32#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */
33#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */
34
35#if !defined (_POSIX_C_SOURCE) || _POSIX_C_SOURCE < 2 || defined (_GNU_SOURCE)
36#define FNM_FILE_NAME FNM_PATHNAME /* Preferred GNU name. */
37#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */
38#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */
39#endif
40
41/* Value returned by `fnmatch' if STRING does not match PATTERN. */
42#define FNM_NOMATCH 1
43#define FNM_SYNTAXERR 2
44#define FNM_NOMEM 3
45
46/* Match STRING against the filename pattern PATTERN,
47 returning zero if it matches, FNM_NOMATCH if not. */
48EAPI int fnmatch(const char *__pattern, const char *__string, int __flags);
49
50#ifdef __cplusplus
51}
52#endif
53
54#undef EAPI
55#define EAPI
56
57#endif /* __EVIL_FNMATCH_H__ */
diff --git a/src/lib/evil/meson.build b/src/lib/evil/meson.build
index 482e733..0402b37 100644
--- a/src/lib/evil/meson.build
+++ b/src/lib/evil/meson.build
@@ -18,17 +18,13 @@ if target_machine.system() == 'windows'
18 'evil_unistd.h', 18 'evil_unistd.h',
19 'evil_util.h', 19 'evil_util.h',
20 'dirent.h', 20 'dirent.h',
21 'fnmatch.h',
22 'pwd.h', 21 'pwd.h',
23 'regex/regex.h'
24 ] 22 ]
25 evil_header_sys_src = [join_paths('sys','mman.h')] 23 evil_header_sys_src = [join_paths('sys','mman.h')]
26 24
27 evil_src = [ 25 evil_src = [
28 'evil_dlfcn.c', 26 'evil_dlfcn.c',
29 'evil_fcntl.c', 27 'evil_fcntl.c',
30 'evil_fnmatch.c',
31 'evil_fnmatch_list_of_states.c',
32 'evil_langinfo.c', 28 'evil_langinfo.c',
33 'evil_locale.c', 29 'evil_locale.c',
34 'evil_main.c', 30 'evil_main.c',
@@ -41,11 +37,8 @@ if target_machine.system() == 'windows'
41 'evil_unistd.c', 37 'evil_unistd.c',
42 'evil_util.c', 38 'evil_util.c',
43 'evil_private.h', 39 'evil_private.h',
44 'evil_fnmatch_private.h',
45 ] 40 ]
46 41
47 subdir('regex')
48
49 psapi = cc.find_library('psapi') 42 psapi = cc.find_library('psapi')
50 ole32 = cc.find_library('ole32') 43 ole32 = cc.find_library('ole32')
51 ws2_32 = cc.find_library('ws2_32') 44 ws2_32 = cc.find_library('ws2_32')
@@ -53,13 +46,13 @@ if target_machine.system() == 'windows'
53 uuid = cc.find_library('uuid') 46 uuid = cc.find_library('uuid')
54 47
55 evil_lib = library('evil', evil_src, 48 evil_lib = library('evil', evil_src,
56 dependencies : [psapi, ole32, ws2_32, secur32, uuid], 49 dependencies : [psapi, ole32, ws2_32, secur32, uuid, regexp],
57 include_directories : [config_dir, include_directories('regex')], 50 include_directories : [config_dir],
58 ) 51 )
59 52
60 evil = declare_dependency( 53 evil = declare_dependency(
61 include_directories: [config_dir, include_directories('regex'), include_directories('.')], 54 include_directories: [include_directories('.')],
62 dependencies : [psapi, ole32, ws2_32, secur32, uuid], 55 dependencies : [psapi, ole32, ws2_32, secur32, uuid, regexp],
63 link_with: evil_lib, 56 link_with: evil_lib,
64 ) 57 )
65else 58else
diff --git a/src/lib/evil/regex/cclass.h b/src/lib/evil/regex/cclass.h
deleted file mode 100644
index 0c29302..0000000
--- a/src/lib/evil/regex/cclass.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/* character-class table */
2static struct cclass {
3 char *name;
4 char *chars;
5 char *multis;
6} cclasses[] = {
7 "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
80123456789", "",
9 "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
10 "",
11 "blank", " \t", "",
12 "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
13\25\26\27\30\31\32\33\34\35\36\37\177", "",
14 "digit", "0123456789", "",
15 "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
160123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
17 "",
18 "lower", "abcdefghijklmnopqrstuvwxyz",
19 "",
20 "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
210123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
22 "",
23 "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
24 "",
25 "space", "\t\n\v\f\r ", "",
26 "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
27 "",
28 "xdigit", "0123456789ABCDEFabcdef",
29 "",
30 NULL, 0, ""
31};
diff --git a/src/lib/evil/regex/cname.h b/src/lib/evil/regex/cname.h
deleted file mode 100644
index 02e86e9..0000000
--- a/src/lib/evil/regex/cname.h
+++ /dev/null
@@ -1,102 +0,0 @@
1/* character-name table */
2static struct cname {
3 char *name;
4 char code;
5} cnames[] = {
6 "NUL", '\0',
7 "SOH", '\001',
8 "STX", '\002',
9 "ETX", '\003',
10 "EOT", '\004',
11 "ENQ", '\005',
12 "ACK", '\006',
13 "BEL", '\007',
14 "alert", '\007',
15 "BS", '\010',
16 "backspace", '\b',
17 "HT", '\011',
18 "tab", '\t',
19 "LF", '\012',
20 "newline", '\n',
21 "VT", '\013',
22 "vertical-tab", '\v',
23 "FF", '\014',
24 "form-feed", '\f',
25 "CR", '\015',
26 "carriage-return", '\r',
27 "SO", '\016',
28 "SI", '\017',
29 "DLE", '\020',
30 "DC1", '\021',
31 "DC2", '\022',
32 "DC3", '\023',
33 "DC4", '\024',
34 "NAK", '\025',
35 "SYN", '\026',
36 "ETB", '\027',
37 "CAN", '\030',
38 "EM", '\031',
39 "SUB", '\032',
40 "ESC", '\033',
41 "IS4", '\034',
42 "FS", '\034',
43 "IS3", '\035',
44 "GS", '\035',
45 "IS2", '\036',
46 "RS", '\036',
47 "IS1", '\037',
48 "US", '\037',
49 "space", ' ',
50 "exclamation-mark", '!',
51 "quotation-mark", '"',
52 "number-sign", '#',
53 "dollar-sign", '$',
54 "percent-sign", '%',
55 "ampersand", '&',
56 "apostrophe", '\'',
57 "left-parenthesis", '(',
58 "right-parenthesis", ')',
59 "asterisk", '*',
60 "plus-sign", '+',
61 "comma", ',',
62 "hyphen", '-',
63 "hyphen-minus", '-',
64 "period", '.',
65 "full-stop", '.',
66 "slash", '/',
67 "solidus", '/',
68 "zero", '0',
69 "one", '1',
70 "two", '2',
71 "three", '3',
72 "four", '4',
73 "five", '5',
74 "six", '6',
75 "seven", '7',
76 "eight", '8',
77 "nine", '9',
78 "colon", ':',
79 "semicolon", ';',
80 "less-than-sign", '<',
81 "equals-sign", '=',
82 "greater-than-sign", '>',
83 "question-mark", '?',
84 "commercial-at", '@',
85 "left-square-bracket", '[',
86 "backslash", '\\',
87 "reverse-solidus", '\\',
88 "right-square-bracket", ']',
89 "circumflex", '^',
90 "circumflex-accent", '^',
91 "underscore", '_',
92 "low-line", '_',
93 "grave-accent", '`',
94 "left-brace", '{',
95 "left-curly-bracket", '{',
96 "vertical-line", '|',
97 "right-brace", '}',
98 "right-curly-bracket", '}',
99 "tilde", '~',
100 "DEL", '\177',
101 NULL, 0,
102};
diff --git a/src/lib/evil/regex/engine.c b/src/lib/evil/regex/engine.c
deleted file mode 100644
index 919fe3f..0000000
--- a/src/lib/evil/regex/engine.c
+++ /dev/null
@@ -1,1019 +0,0 @@
1/*
2 * The matching engine and friends. This file is #included by regexec.c
3 * after suitable #defines of a variety of macros used herein, so that
4 * different state representations can be used without duplicating masses
5 * of code.
6 */
7
8#ifdef SNAMES
9#define matcher smatcher
10#define fast sfast
11#define slow sslow
12#define dissect sdissect
13#define backref sbackref
14#define step sstep
15#define print sprint
16#define at sat
17#define match smat
18#endif
19#ifdef LNAMES
20#define matcher lmatcher
21#define fast lfast
22#define slow lslow
23#define dissect ldissect
24#define backref lbackref
25#define step lstep
26#define print lprint
27#define at lat
28#define match lmat
29#endif
30
31/* another structure passed up and down to avoid zillions of parameters */
32struct match {
33 struct re_guts *g;
34 int eflags;
35 regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
36 char *offp; /* offsets work from here */
37 char *beginp; /* start of string -- virtual NUL precedes */
38 char *endp; /* end of string -- virtual NUL here */
39 char *coldp; /* can be no match starting before here */
40 char **lastpos; /* [nplus+1] */
41 STATEVARS;
42 states st; /* current states */
43 states fresh; /* states for a fresh start */
44 states tmp; /* temporary */
45 states empty; /* empty set of states */
46};
47
48#include "engine.ih"
49
50#ifdef REDEBUG
51#define SP(t, s, c) print(m, t, s, c, stdout)
52#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2)
53#define NOTE(str) { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
54#else
55#define SP(t, s, c) /* nothing */
56#define AT(t, p1, p2, s1, s2) /* nothing */
57#define NOTE(s) /* nothing */
58#endif
59
60/*
61 - matcher - the actual matching engine
62 == static int matcher(register struct re_guts *g, char *string, \
63 == size_t nmatch, regmatch_t pmatch[], int eflags);
64 */
65static int /* 0 success, REG_NOMATCH failure */
66matcher(g, string, nmatch, pmatch, eflags)
67register struct re_guts *g;
68char *string;
69size_t nmatch;
70regmatch_t pmatch[];
71int eflags;
72{
73 register char *endp;
74 register int i;
75 struct match mv;
76 register struct match *m = &mv;
77 register char *dp;
78 const register sopno gf = g->firststate+1; /* +1 for OEND */
79 const register sopno gl = g->laststate;
80 char *start;
81 char *stop;
82
83 /* simplify the situation where possible */
84 if (g->cflags&REG_NOSUB)
85 nmatch = 0;
86 if (eflags&REG_STARTEND) {
87 start = string + pmatch[0].rm_so;
88 stop = string + pmatch[0].rm_eo;
89 } else {
90 start = string;
91 stop = start + strlen(start);
92 }
93 if (stop < start)
94 return(REG_INVARG);
95
96 /* prescreening; this does wonders for this rather slow code */
97 if (g->must != NULL) {
98 for (dp = start; dp < stop; dp++)
99 if (*dp == g->must[0] && stop - dp >= g->mlen &&
100 memcmp(dp, g->must, (size_t)g->mlen) == 0)
101 break;
102 if (dp == stop) /* we didn't find g->must */
103 return(REG_NOMATCH);
104 }
105
106 /* match struct setup */
107 m->g = g;
108 m->eflags = eflags;
109 m->pmatch = NULL;
110 m->lastpos = NULL;
111 m->offp = string;
112 m->beginp = start;
113 m->endp = stop;
114 STATESETUP(m, 4);
115 SETUP(m->st);
116 SETUP(m->fresh);
117 SETUP(m->tmp);
118 SETUP(m->empty);
119 CLEAR(m->empty);
120
121 /* this loop does only one repetition except for backrefs */
122 for (;;) {
123 endp = fast(m, start, stop, gf, gl);
124 if (endp == NULL) { /* a miss */
125 STATETEARDOWN(m);
126 return(REG_NOMATCH);
127 }
128 if (nmatch == 0 && !g->backrefs)
129 break; /* no further info needed */
130
131 /* where? */
132 assert(m->coldp != NULL);
133 for (;;) {
134 NOTE("finding start");
135 endp = slow(m, m->coldp, stop, gf, gl);
136 if (endp != NULL)
137 break;
138 assert(m->coldp < m->endp);
139 m->coldp++;
140 }
141 if (nmatch == 1 && !g->backrefs)
142 break; /* no further info needed */
143
144 /* oh my, he wants the subexpressions... */
145 if (m->pmatch == NULL)
146 m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
147 sizeof(regmatch_t));
148 if (m->pmatch == NULL) {
149 STATETEARDOWN(m);
150 return(REG_ESPACE);
151 }
152 for (i = 1; i <= m->g->nsub; i++)
153 m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
154 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
155 NOTE("dissecting");
156 dp = dissect(m, m->coldp, endp, gf, gl);
157 } else {
158 if (g->nplus > 0 && m->lastpos == NULL)
159 m->lastpos = (char **)malloc((g->nplus+1) *
160 sizeof(char *));
161 if (g->nplus > 0 && m->lastpos == NULL) {
162 free(m->pmatch);
163 STATETEARDOWN(m);
164 return(REG_ESPACE);
165 }
166 NOTE("backref dissect");
167 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
168 }
169 if (dp != NULL)
170 break;
171
172 /* uh-oh... we couldn't find a subexpression-level match */
173 assert(g->backrefs); /* must be back references doing it */
174 assert(g->nplus == 0 || m->lastpos != NULL);
175 for (;;) {
176 if (dp != NULL || endp <= m->coldp)
177 break; /* defeat */
178 NOTE("backoff");
179 endp = slow(m, m->coldp, endp-1, gf, gl);
180 if (endp == NULL)
181 break; /* defeat */
182 /* try it on a shorter possibility */
183#ifndef NDEBUG
184 for (i = 1; i <= m->g->nsub; i++) {
185 assert(m->pmatch[i].rm_so == -1);
186 assert(m->pmatch[i].rm_eo == -1);
187 }
188#endif
189 NOTE("backoff dissect");
190 dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
191 }
192 assert(dp == NULL || dp == endp);
193 if (dp != NULL) /* found a shorter one */
194 break;
195
196 /* despite initial appearances, there is no match here */
197 NOTE("false alarm");
198 start = m->coldp + 1; /* recycle starting later */
199 assert(start <= stop);
200 }
201
202 /* fill in the details if requested */
203 if (nmatch > 0) {
204 pmatch[0].rm_so = m->coldp - m->offp;
205 pmatch[0].rm_eo = endp - m->offp;
206 }
207 if (nmatch > 1) {
208 assert(m->pmatch != NULL);
209 for (i = 1; i < nmatch; i++)
210 if (i <= m->g->nsub)
211 pmatch[i] = m->pmatch[i];
212 else {
213 pmatch[i].rm_so = -1;
214 pmatch[i].rm_eo = -1;
215 }
216 }
217
218 if (m->pmatch != NULL)
219 free((char *)m->pmatch);
220 if (m->lastpos != NULL)
221 free((char *)m->lastpos);
222 STATETEARDOWN(m);
223 return(0);
224}
225
226/*
227 - dissect - figure out what matched what, no back references
228 == static char *dissect(register struct match *m, char *start, \
229 == char *stop, sopno startst, sopno stopst);
230 */
231static char * /* == stop (success) always */
232dissect(m, start, stop, startst, stopst)
233register struct match *m;
234char *start;
235char *stop;
236sopno startst;
237sopno stopst;
238{
239 register int i;
240 register sopno ss; /* start sop of current subRE */
241 register sopno es; /* end sop of current subRE */
242 register char *sp; /* start of string matched by it */
243 register char *stp; /* string matched by it cannot pass here */
244 register char *rest; /* start of rest of string */
245 register char *tail; /* string unmatched by rest of RE */
246 register sopno ssub; /* start sop of subsubRE */
247 register sopno esub; /* end sop of subsubRE */
248 register char *ssp; /* start of string matched by subsubRE */
249 register char *sep; /* end of string matched by subsubRE */
250 register char *oldssp; /* previous ssp */
251 register char *dp;
252
253 AT("diss", start, stop, startst, stopst);
254 sp = start;
255 for (ss = startst; ss < stopst; ss = es) {
256 /* identify end of subRE */
257 es = ss;
258 switch (OP(m->g->strip[es])) {
259 case OPLUS_:
260 case OQUEST_:
261 es += OPND(m->g->strip[es]);
262 break;
263 case OCH_:
264 while (OP(m->g->strip[es]) != O_CH)
265 es += OPND(m->g->strip[es]);
266 break;
267 }
268 es++;
269
270 /* figure out what it matched */
271 switch (OP(m->g->strip[ss])) {
272 case OEND:
273 assert(nope);
274 break;
275 case OCHAR:
276 sp++;
277 break;
278 case OBOL:
279 case OEOL:
280 case OBOW:
281 case OEOW:
282 break;
283 case OANY:
284 case OANYOF:
285 sp++;
286 break;
287 case OBACK_:
288 case O_BACK:
289 assert(nope);
290 break;
291 /* cases where length of match is hard to find */
292 case OQUEST_:
293 stp = stop;
294 for (;;) {
295 /* how long could this one be? */
296 rest = slow(m, sp, stp, ss, es);
297 assert(rest != NULL); /* it did match */
298 /* could the rest match the rest? */
299 tail = slow(m, rest, stop, es, stopst);
300 if (tail == stop)
301 break; /* yes! */
302 /* no -- try a shorter match for this one */
303 stp = rest - 1;
304 assert(stp >= sp); /* it did work */
305 }
306 ssub = ss + 1;
307 esub = es - 1;
308 /* did innards match? */
309 if (slow(m, sp, rest, ssub, esub) != NULL) {
310 dp = dissect(m, sp, rest, ssub, esub);
311 assert(dp == rest);
312 } else /* no */
313 assert(sp == rest);
314 sp = rest;
315 break;
316 case OPLUS_:
317 stp = stop;
318 for (;;) {
319 /* how long could this one be? */
320 rest = slow(m, sp, stp, ss, es);
321 assert(rest != NULL); /* it did match */
322 /* could the rest match the rest? */
323 tail = slow(m, rest, stop, es, stopst);
324 if (tail == stop)
325 break; /* yes! */
326 /* no -- try a shorter match for this one */
327 stp = rest - 1;
328 assert(stp >= sp); /* it did work */
329 }
330 ssub = ss + 1;
331 esub = es - 1;
332 ssp = sp;
333 oldssp = ssp;
334 for (;;) { /* find last match of innards */
335 sep = slow(m, ssp, rest, ssub, esub);
336 if (sep == NULL || sep == ssp)
337 break; /* failed or matched null */
338 oldssp = ssp; /* on to next try */
339 ssp = sep;
340 }
341 if (sep == NULL) {
342 /* last successful match */
343 sep = ssp;
344 ssp = oldssp;
345 }
346 assert(sep == rest); /* must exhaust substring */
347 assert(slow(m, ssp, sep, ssub, esub) == rest);
348 dp = dissect(m, ssp, sep, ssub, esub);
349 assert(dp == sep);
350 sp = rest;
351 break;
352 case OCH_:
353 stp = stop;
354 for (;;) {
355 /* how long could this one be? */
356 rest = slow(m, sp, stp, ss, es);
357 assert(rest != NULL); /* it did match */
358 /* could the rest match the rest? */
359 tail = slow(m, rest, stop, es, stopst);
360 if (tail == stop)
361 break; /* yes! */
362 /* no -- try a shorter match for this one */
363 stp = rest - 1;
364 assert(stp >= sp); /* it did work */
365 }
366 ssub = ss + 1;
367 esub = ss + OPND(m->g->strip[ss]) - 1;
368 assert(OP(m->g->strip[esub]) == OOR1);
369 for (;;) { /* find first matching branch */
370 if (slow(m, sp, rest, ssub, esub) == rest)
371 break; /* it matched all of it */
372 /* that one missed, try next one */
373 assert(OP(m->g->strip[esub]) == OOR1);
374 esub++;
375 assert(OP(m->g->strip[esub]) == OOR2);
376 ssub = esub + 1;
377 esub += OPND(m->g->strip[esub]);
378 if (OP(m->g->strip[esub]) == OOR2)
379 esub--;
380 else
381 assert(OP(m->g->strip[esub]) == O_CH);
382 }
383 dp = dissect(m, sp, rest, ssub, esub);
384 assert(dp == rest);
385 sp = rest;
386 break;
387 case O_PLUS:
388 case O_QUEST:
389 case OOR1:
390 case OOR2:
391 case O_CH:
392 assert(nope);
393 break;
394 case OLPAREN:
395 i = OPND(m->g->strip[ss]);
396 assert(0 < i && i <= m->g->nsub);
397 m->pmatch[i].rm_so = sp - m->offp;
398 break;
399 case ORPAREN:
400 i = OPND(m->g->strip[ss]);
401 assert(0 < i && i <= m->g->nsub);
402 m->pmatch[i].rm_eo = sp - m->offp;
403 break;
404 default: /* uh oh */
405 assert(nope);
406 break;
407 }
408 }
409
410 assert(sp == stop);
411 return(sp);
412}
413
414/*
415 - backref - figure out what matched what, figuring in back references
416 == static char *backref(register struct match *m, char *start, \
417 == char *stop, sopno startst, sopno stopst, sopno lev);
418 */
419static char * /* == stop (success) or NULL (failure) */
420backref(m, start, stop, startst, stopst, lev)
421register struct match *m;
422char *start;
423char *stop;
424sopno startst;
425sopno stopst;
426sopno lev; /* PLUS nesting level */
427{
428 register int i;
429 register sopno ss; /* start sop of current subRE */
430 register char *sp; /* start of string matched by it */
431 register sopno ssub; /* start sop of subsubRE */
432 register sopno esub; /* end sop of subsubRE */
433 register char *ssp; /* start of string matched by subsubRE */
434 register char *dp;
435 register size_t len;
436 register int hard;
437 register sop s;
438 register regoff_t offsave;
439 register cset *cs;
440
441 AT("back", start, stop, startst, stopst);
442 sp = start;
443
444 /* get as far as we can with easy stuff */
445 hard = 0;
446 for (ss = startst; !hard && ss < stopst; ss++)
447 switch (OP(s = m->g->strip[ss])) {
448 case OCHAR:
449 if (sp == stop || *sp++ != (char)OPND(s))
450 return(NULL);
451 break;
452 case OANY:
453 if (sp == stop)
454 return(NULL);
455 sp++;
456 break;
457 case OANYOF:
458 cs = &m->g->sets[OPND(s)];
459 if (sp == stop || !CHIN(cs, *sp++))
460 return(NULL);
461 break;
462 case OBOL:
463 if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
464 (sp < m->endp && *(sp-1) == '\n' &&
465 (m->g->cflags&REG_NEWLINE)) )
466 { /* yes */ }
467 else
468 return(NULL);
469 break;
470 case OEOL:
471 if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
472 (sp < m->endp && *sp == '\n' &&
473 (m->g->cflags&REG_NEWLINE)) )
474 { /* yes */ }
475 else
476 return(NULL);
477 break;
478 case OBOW:
479 if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
480 (sp < m->endp && *(sp-1) == '\n' &&
481 (m->g->cflags&REG_NEWLINE)) ||
482 (sp > m->beginp &&
483 !ISWORD(*(sp-1))) ) &&
484 (sp < m->endp && ISWORD(*sp)) )
485 { /* yes */ }
486 else
487 return(NULL);
488 break;
489 case OEOW:
490 if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
491 (sp < m->endp && *sp == '\n' &&
492 (m->g->cflags&REG_NEWLINE)) ||
493 (sp < m->endp && !ISWORD(*sp)) ) &&
494 (sp > m->beginp && ISWORD(*(sp-1))) )
495 { /* yes */ }
496 else
497 return(NULL);
498 break;
499 case O_QUEST:
500 break;
501 case OOR1: /* matches null but needs to skip */
502 ss++;
503 s = m->g->strip[ss];
504 do {
505 assert(OP(s) == OOR2);
506 ss += OPND(s);
507 } while (OP(s = m->g->strip[ss]) != O_CH);
508 /* note that the ss++ gets us past the O_CH */
509 break;
510 default: /* have to make a choice */
511 hard = 1;
512 break;
513 }
514 if (!hard) { /* that was it! */
515 if (sp != stop)
516 return(NULL);
517 return(sp);
518 }
519 ss--; /* adjust for the for's final increment */
520
521 /* the hard stuff */
522 AT("hard", sp, stop, ss, stopst);
523 s = m->g->strip[ss];
524 switch (OP(s)) {
525 case OBACK_: /* the vilest depths */
526 i = OPND(s);
527 assert(0 < i && i <= m->g->nsub);
528 if (m->pmatch[i].rm_eo == -1)
529 return(NULL);
530 assert(m->pmatch[i].rm_so != -1);
531 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
532 assert(stop - m->beginp >= len);
533 if (sp > stop - len)
534 return(NULL); /* not enough left to match */
535 ssp = m->offp + m->pmatch[i].rm_so;
536 if (memcmp(sp, ssp, len) != 0)
537 return(NULL);
538 while (m->g->strip[ss] != SOP(O_BACK, i))
539 ss++;
540 return(backref(m, sp+len, stop, ss+1, stopst, lev));
541 break;
542 case OQUEST_: /* to null or not */
543 dp = backref(m, sp, stop, ss+1, stopst, lev);
544 if (dp != NULL)
545 return(dp); /* not */
546 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
547 break;
548 case OPLUS_:
549 assert(m->lastpos != NULL);
550 assert(lev+1 <= m->g->nplus);
551 m->lastpos[lev+1] = sp;
552 return(backref(m, sp, stop, ss+1, stopst, lev+1));
553 break;
554 case O_PLUS:
555 if (sp == m->lastpos[lev]) /* last pass matched null */
556 return(backref(m, sp, stop, ss+1, stopst, lev-1));
557 /* try another pass */
558 m->lastpos[lev] = sp;
559 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
560 if (dp == NULL)
561 return(backref(m, sp, stop, ss+1, stopst, lev-1));
562 else
563 return(dp);
564 break;
565 case OCH_: /* find the right one, if any */
566 ssub = ss + 1;
567 esub = ss + OPND(s) - 1;
568 assert(OP(m->g->strip[esub]) == OOR1);
569 for (;;) { /* find first matching branch */
570 dp = backref(m, sp, stop, ssub, esub, lev);
571 if (dp != NULL)
572 return(dp);
573 /* that one missed, try next one */
574 if (OP(m->g->strip[esub]) == O_CH)
575 return(NULL); /* there is none */
576 esub++;
577 assert(OP(m->g->strip[esub]) == OOR2);
578 ssub = esub + 1;
579 esub += OPND(m->g->strip[esub]);
580 if (OP(m->g->strip[esub]) == OOR2)
581 esub--;
582 else
583 assert(OP(m->g->strip[esub]) == O_CH);
584 }
585 break;
586 case OLPAREN: /* must undo assignment if rest fails */
587 i = OPND(s);
588 assert(0 < i && i <= m->g->nsub);
589 offsave = m->pmatch[i].rm_so;
590 m->pmatch[i].rm_so = sp - m->offp;
591 dp = backref(m, sp, stop, ss+1, stopst, lev);
592 if (dp != NULL)
593 return(dp);
594 m->pmatch[i].rm_so = offsave;
595 return(NULL);
596 break;
597 case ORPAREN: /* must undo assignment if rest fails */
598 i = OPND(s);
599 assert(0 < i && i <= m->g->nsub);
600 offsave = m->pmatch[i].rm_eo;
601 m->pmatch[i].rm_eo = sp - m->offp;
602 dp = backref(m, sp, stop, ss+1, stopst, lev);
603 if (dp != NULL)
604 return(dp);
605 m->pmatch[i].rm_eo = offsave;
606 return(NULL);
607 break;
608 default: /* uh oh */
609 assert(nope);
610 break;
611 }
612
613 /* "can't happen" */
614 assert(nope);
615 /* NOTREACHED */
616 return((char *)NULL); /* dummy */
617}
618
619/*
620 - fast - step through the string at top speed
621 == static char *fast(register struct match *m, char *start, \
622 == char *stop, sopno startst, sopno stopst);
623 */
624static char * /* where tentative match ended, or NULL */
625fast(m, start, stop, startst, stopst)
626register struct match *m;
627char *start;
628char *stop;
629sopno startst;
630sopno stopst;
631{
632 register states st = m->st;
633 register states fresh = m->fresh;
634 register states tmp = m->tmp;
635 register char *p = start;
636 register int c = (start == m->beginp) ? OUT : *(start-1);
637 register int lastc; /* previous c */
638 register int flagch;
639 register int i;
640 register char *coldp; /* last p after which no match was underway */
641
642 CLEAR(st);
643 SET1(st, startst);
644 st = step(m->g, startst, stopst, st, NOTHING, st);
645 ASSIGN(fresh, st);
646 SP("start", st, *p);
647 coldp = NULL;
648 for (;;) {
649 /* next character */
650 lastc = c;
651 c = (p == m->endp) ? OUT : *p;
652 if (EQ(st, fresh))
653 coldp = p;
654
655 /* is there an EOL and/or BOL between lastc and c? */
656 flagch = '\0';
657 i = 0;
658 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
659 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
660 flagch = BOL;
661 i = m->g->nbol;
662 }
663 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
664 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
665 flagch = (flagch == BOL) ? BOLEOL : EOL;
666 i += m->g->neol;
667 }
668 if (i != 0) {
669 for (; i > 0; i--)
670 st = step(m->g, startst, stopst, st, flagch, st);
671 SP("boleol", st, c);
672 }
673
674 /* how about a word boundary? */
675 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
676 (c != OUT && ISWORD(c)) ) {
677 flagch = BOW;
678 }
679 if ( (lastc != OUT && ISWORD(lastc)) &&
680 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
681 flagch = EOW;
682 }
683 if (flagch == BOW || flagch == EOW) {
684 st = step(m->g, startst, stopst, st, flagch, st);
685 SP("boweow", st, c);
686 }
687
688 /* are we done? */
689 if (ISSET(st, stopst) || p == stop)
690 break; /* NOTE BREAK OUT */
691
692 /* no, we must deal with this character */
693 ASSIGN(tmp, st);
694 ASSIGN(st, fresh);
695 assert(c != OUT);
696 st = step(m->g, startst, stopst, tmp, c, st);
697 SP("aft", st, c);
698 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
699 p++;
700 }
701
702 assert(coldp != NULL);
703 m->coldp = coldp;
704 if (ISSET(st, stopst))
705 return(p+1);
706 else
707 return(NULL);
708}
709
710/*
711 - slow - step through the string more deliberately
712 == static char *slow(register struct match *m, char *start, \
713 == char *stop, sopno startst, sopno stopst);
714 */
715static char * /* where it ended */
716slow(m, start, stop, startst, stopst)
717register struct match *m;
718char *start;
719char *stop;
720sopno startst;
721sopno stopst;
722{
723 register states st = m->st;
724 register states empty = m->empty;
725 register states tmp = m->tmp;
726 register char *p = start;
727 register int c = (start == m->beginp) ? OUT : *(start-1);
728 register int lastc; /* previous c */
729 register int flagch;
730 register int i;
731 register char *matchp; /* last p at which a match ended */
732
733 AT("slow", start, stop, startst, stopst);
734 CLEAR(st);
735 SET1(st, startst);
736 SP("sstart", st, *p);
737 st = step(m->g, startst, stopst, st, NOTHING, st);
738 matchp = NULL;
739 for (;;) {
740 /* next character */
741 lastc = c;
742 c = (p == m->endp) ? OUT : *p;
743
744 /* is there an EOL and/or BOL between lastc and c? */
745 flagch = '\0';
746 i = 0;
747 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
748 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
749 flagch = BOL;
750 i = m->g->nbol;
751 }
752 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
753 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
754 flagch = (flagch == BOL) ? BOLEOL : EOL;
755 i += m->g->neol;
756 }
757 if (i != 0) {
758 for (; i > 0; i--)
759 st = step(m->g, startst, stopst, st, flagch, st);
760 SP("sboleol", st, c);
761 }
762
763 /* how about a word boundary? */
764 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
765 (c != OUT && ISWORD(c)) ) {
766 flagch = BOW;
767 }
768 if ( (lastc != OUT && ISWORD(lastc)) &&
769 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
770 flagch = EOW;
771 }
772 if (flagch == BOW || flagch == EOW) {
773 st = step(m->g, startst, stopst, st, flagch, st);
774 SP("sboweow", st, c);
775 }
776
777 /* are we done? */
778 if (ISSET(st, stopst))
779 matchp = p;
780 if (EQ(st, empty) || p == stop)
781 break; /* NOTE BREAK OUT */
782
783 /* no, we must deal with this character */
784 ASSIGN(tmp, st);
785 ASSIGN(st, empty);
786 assert(c != OUT);
787 st = step(m->g, startst, stopst, tmp, c, st);
788 SP("saft", st, c);
789 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
790 p++;
791 }
792
793 return(matchp);
794}
795
796
797/*
798 - step - map set of states reachable before char to set reachable after
799 == static states step(register struct re_guts *g, sopno start, sopno stop, \
800 == register states bef, int ch, register states aft);
801 == #define BOL (OUT+1)
802 == #define EOL (BOL+1)
803 == #define BOLEOL (BOL+2)
804 == #define NOTHING (BOL+3)
805 == #define BOW (BOL+4)
806 == #define EOW (BOL+5)
807 == #define CODEMAX (BOL+5) // highest code used
808 == #define NONCHAR(c) ((c) > CHAR_MAX)
809 == #define NNONCHAR (CODEMAX-CHAR_MAX)
810 */
811static states
812step(g, start, stop, bef, ch, aft)
813register struct re_guts *g;
814sopno start; /* start state within strip */
815sopno stop; /* state after stop state within strip */
816register states bef; /* states reachable before */
817int ch; /* character or NONCHAR code */
818register states aft; /* states already known reachable after */
819{
820 register cset *cs;
821 register sop s;
822 register sopno pc;
823 register onestate here; /* note, macros know this name */
824 register sopno look;
825 register long i;
826
827 for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
828 s = g->strip[pc];
829 switch (OP(s)) {
830 case OEND:
831 assert(pc == stop-1);
832 break;
833 case OCHAR:
834 /* only characters can match */
835 assert(!NONCHAR(ch) || ch != (char)OPND(s));
836 if (ch == (char)OPND(s))
837 FWD(aft, bef, 1);
838 break;
839 case OBOL:
840 if (ch == BOL || ch == BOLEOL)
841 FWD(aft, bef, 1);
842 break;
843 case OEOL:
844 if (ch == EOL || ch == BOLEOL)
845 FWD(aft, bef, 1);
846 break;
847 case OBOW:
848 if (ch == BOW)
849 FWD(aft, bef, 1);
850 break;
851 case OEOW:
852 if (ch == EOW)
853 FWD(aft, bef, 1);
854 break;
855 case OANY:
856 if (!NONCHAR(ch))
857 FWD(aft, bef, 1);
858 break;
859 case OANYOF:
860 cs = &g->sets[OPND(s)];
861 if (!NONCHAR(ch) && CHIN(cs, ch))
862 FWD(aft, bef, 1);
863 break;
864 case OBACK_: /* ignored here */
865 case O_BACK:
866 FWD(aft, aft, 1);
867 break;
868 case OPLUS_: /* forward, this is just an empty */
869 FWD(aft, aft, 1);
870 break;
871 case O_PLUS: /* both forward and back */
872 FWD(aft, aft, 1);
873 i = ISSETBACK(aft, OPND(s));
874 BACK(aft, aft, OPND(s));
875 if (!i && ISSETBACK(aft, OPND(s))) {
876 /* oho, must reconsider loop body */
877 pc -= OPND(s) + 1;
878 INIT(here, pc);
879 }
880 break;
881 case OQUEST_: /* two branches, both forward */
882 FWD(aft, aft, 1);
883 FWD(aft, aft, OPND(s));
884 break;
885 case O_QUEST: /* just an empty */
886 FWD(aft, aft, 1);
887 break;
888 case OLPAREN: /* not significant here */
889 case ORPAREN:
890 FWD(aft, aft, 1);
891 break;
892 case OCH_: /* mark the first two branches */
893 FWD(aft, aft, 1);
894 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
895 FWD(aft, aft, OPND(s));
896 break;
897 case OOR1: /* done a branch, find the O_CH */
898 if (ISSTATEIN(aft, here)) {
899 for (look = 1;
900 OP(s = g->strip[pc+look]) != O_CH;
901 look += OPND(s))
902 assert(OP(s) == OOR2);
903 FWD(aft, aft, look);
904 }
905 break;
906 case OOR2: /* propagate OCH_'s marking */
907 FWD(aft, aft, 1);
908 if (OP(g->strip[pc+OPND(s)]) != O_CH) {
909 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
910 FWD(aft, aft, OPND(s));
911 }
912 break;
913 case O_CH: /* just empty */
914 FWD(aft, aft, 1);
915 break;
916 default: /* ooooops... */
917 assert(nope);
918 break;
919 }
920 }
921
922 return(aft);
923}
924
925#ifdef REDEBUG
926/*
927 - print - print a set of states
928 == #ifdef REDEBUG
929 == static void print(struct match *m, char *caption, states st, \
930 == int ch, FILE *d);
931 == #endif
932 */
933static void
934print(m, caption, st, ch, d)
935struct match *m;
936char *caption;
937states st;
938int ch;
939FILE *d;
940{
941 register struct re_guts *g = m->g;
942 register int i;
943 register int first = 1;
944
945 if (!(m->eflags&REG_TRACE))
946 return;
947
948 fprintf(d, "%s", caption);
949 if (ch != '\0')
950 fprintf(d, " %s", pchar(ch));
951 for (i = 0; i < g->nstates; i++)
952 if (ISSET(st, i)) {
953 fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
954 first = 0;
955 }
956 fprintf(d, "\n");
957}
958
959/*
960 - at - print current situation
961 == #ifdef REDEBUG
962 == static void at(struct match *m, char *title, char *start, char *stop, \
963 == sopno startst, sopno stopst);
964 == #endif
965 */
966static void
967at(m, title, start, stop, startst, stopst)
968struct match *m;
969char *title;
970char *start;
971char *stop;
972sopno startst;
973sopno stopst;
974{
975 if (!(m->eflags&REG_TRACE))
976 return;
977
978 printf("%s %s-", title, pchar(*start));
979 printf("%s ", pchar(*stop));
980 printf("%ld-%ld\n", (long)startst, (long)stopst);
981}
982
983#ifndef PCHARDONE
984#define PCHARDONE /* never again */
985/*
986 - pchar - make a character printable
987 == #ifdef REDEBUG
988 == static char *pchar(int ch);
989 == #endif
990 *
991 * Is this identical to regchar() over in debug.c? Well, yes. But a
992 * duplicate here avoids having a debugging-capable regexec.o tied to
993 * a matching debug.o, and this is convenient. It all disappears in
994 * the non-debug compilation anyway, so it doesn't matter much.
995 */
996static char * /* -> representation */
997pchar(ch)
998int ch;
999{
1000 static char pbuf[10];
1001
1002 if (isprint(ch) || ch == ' ')
1003 sprintf(pbuf, "%c", ch);
1004 else
1005 sprintf(pbuf, "\\%o", ch);
1006 return(pbuf);
1007}
1008#endif
1009#endif
1010
1011#undef matcher
1012#undef fast
1013#undef slow
1014#undef dissect
1015#undef backref
1016#undef step
1017#undef print
1018#undef at
1019#undef match
diff --git a/src/lib/evil/regex/engine.ih b/src/lib/evil/regex/engine.ih
deleted file mode 100644
index 6b744d4..0000000
--- a/src/lib/evil/regex/engine.ih
+++ /dev/null
@@ -1,35 +0,0 @@
1/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
2#ifdef __cplusplus
3extern "C" {
4#endif
5
6/* === lib/evil/regex/engine.c === */
7static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
8static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
9static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
10static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
11static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
12static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
13#define BOL (OUT+1)
14#define EOL (BOL+1)
15#define BOLEOL (BOL+2)
16#define NOTHING (BOL+3)
17#define BOW (BOL+4)
18#define EOW (BOL+5)
19#define CODEMAX (BOL+5) /* highest code used */
20#define NONCHAR(c) ((c) > CHAR_MAX)
21#define NNONCHAR (CODEMAX-CHAR_MAX)
22#ifdef REDEBUG
23static void print(struct match *m, char *caption, states st, int ch, FILE *d);
24#endif
25#ifdef REDEBUG
26static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
27#endif
28#ifdef REDEBUG
29static char *pchar(int ch);
30#endif
31
32#ifdef __cplusplus
33}
34#endif
35/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
diff --git a/src/lib/evil/regex/meson.build b/src/lib/evil/regex/meson.build
deleted file mode 100644
index d04769b..0000000
--- a/src/lib/evil/regex/meson.build
+++ /dev/null
@@ -1,10 +0,0 @@
1evil_src += files([
2'regcomp.c',
3'regerror.c',
4'regexec.c',
5'regfree.c',
6'cclass.h',
7'cname.h',
8'regex2.h',
9'utils.h'
10])
diff --git a/src/lib/evil/regex/regcomp.c b/src/lib/evil/regex/regcomp.c
deleted file mode 100644
index c1b7850..0000000
--- a/src/lib/evil/regex/regcomp.c
+++ /dev/null
@@ -1,1604 +0,0 @@
1#include <sys/types.h>
2#include <stdio.h>
3#include <string.h>
4#include <ctype.h>
5#include <limits.h>
6#include <stdlib.h>
7#include <regex.h>
8
9#include "utils.h"
10#include "regex2.h"
11
12#include "cclass.h"
13#include "cname.h"
14
15/*
16 * parse structure, passed up and down to avoid global variables and
17 * other clumsinesses
18 */
19struct parse {
20 char *next; /* next character in RE */
21 char *end; /* end of string (-> NUL normally) */
22 int error; /* has an error been seen? */
23 sop *strip; /* malloced strip */
24 sopno ssize; /* malloced strip size (allocated) */
25 sopno slen; /* malloced strip length (used) */
26 int ncsalloc; /* number of csets allocated */
27 struct re_guts *g;
28# define NPAREN 10 /* we need to remember () 1-9 for back refs */
29 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
30 sopno pend[NPAREN]; /* -> ) ([0] unused) */
31};
32
33#include "regcomp.ih"
34
35static char nuls[10]; /* place to point scanner in event of error */
36
37/*
38 * macros for use with parse structure
39 * BEWARE: these know that the parse structure is named `p' !!!
40 */
41#define PEEK() (*p->next)
42#define PEEK2() (*(p->next+1))
43#define MORE() (p->next < p->end)
44#define MORE2() (p->next+1 < p->end)
45#define SEE(c) (MORE() && PEEK() == (c))
46#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
47#define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
48#define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
49#define NEXT() (p->next++)
50#define NEXT2() (p->next += 2)
51#define NEXTn(n) (p->next += (n))
52#define GETNEXT() (*p->next++)
53#define SETERROR(e) seterr(p, (e))
54#define REQUIRE(co, e) ((co) || SETERROR(e))
55#define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
56#define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
57#define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
58#define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
59#define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
60#define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
61#define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
62#define HERE() (p->slen)
63#define THERE() (p->slen - 1)
64#define THERETHERE() (p->slen - 2)
65#define DROP(n) (p->slen -= (n))
66
67#ifndef NDEBUG
68static int never = 0; /* for use in asserts; shuts lint up */
69#else
70#define never 0 /* some <assert.h>s have bugs too */
71#endif
72
73/*
74 - regcomp - interface for parser and compilation
75 = extern int regcomp(regex_t *, const char *, int);
76 = #define REG_BASIC 0000
77 = #define REG_EXTENDED 0001
78 = #define REG_ICASE 0002
79 = #define REG_NOSUB 0004
80 = #define REG_NEWLINE 0010
81 = #define REG_NOSPEC 0020
82 = #define REG_PEND 0040
83 = #define REG_DUMP 0200
84 */
85EAPI int /* 0 success, otherwise REG_something */
86regcomp(preg, pattern, cflags)
87regex_t *preg;
88const char *pattern;
89int cflags;
90{
91 struct parse pa;
92 register struct re_guts *g;
93 register struct parse *p = &pa;
94 register int i;
95 register size_t len;
96#ifdef REDEBUG
97# define GOODFLAGS(f) (f)
98#else
99# define GOODFLAGS(f) ((f)&~REG_DUMP)
100#endif
101
102 cflags = GOODFLAGS(cflags);
103 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
104 return(REG_INVARG);
105
106 if (cflags&REG_PEND) {
107 if (preg->re_endp < pattern)
108 return(REG_INVARG);
109 len = preg->re_endp - pattern;
110 } else
111 len = strlen((char *)pattern);
112
113 /* do the mallocs early so failure handling is easy */
114 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
115 (NC-1)*sizeof(cat_t));
116 if (g == NULL)
117 return(REG_ESPACE);
118 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
119 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
120 p->slen = 0;
121 if (p->strip == NULL) {
122 free((char *)g);
123 return(REG_ESPACE);
124 }
125
126 /* set things up */
127 p->g = g;
128 p->next = (char *)pattern; /* convenience; we do not modify it */
129 p->end = p->next + len;
130 p->error = 0;
131 p->ncsalloc = 0;
132 for (i = 0; i < NPAREN; i++) {
133 p->pbegin[i] = 0;
134 p->pend[i] = 0;
135 }
136 g->csetsize = NC;
137 g->sets = NULL;
138 g->setbits = NULL;
139 g->ncsets = 0;
140 g->cflags = cflags;
141 g->iflags = 0;
142 g->nbol = 0;
143 g->neol = 0;
144 g->must = NULL;
145 g->mlen = 0;
146 g->nsub = 0;
147 g->ncategories = 1; /* category 0 is "everything else" */
148 g->categories = &g->catspace[-(CHAR_MIN)];
149 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
150 g->backrefs = 0;
151
152 /* do it */
153 EMIT(OEND, 0);
154 g->firststate = THERE();
155 if (cflags&REG_EXTENDED)
156 p_ere(p, OUT);
157 else if (cflags&REG_NOSPEC)
158 p_str(p);
159 else
160 p_bre(p, OUT, OUT);
161 EMIT(OEND, 0);
162 g->laststate = THERE();
163
164 /* tidy up loose ends and fill things in */
165 categorize(p, g);
166 stripsnug(p, g);
167 findmust(p, g);
168 g->nplus = pluscount(p, g);
169 g->magic = MAGIC2;
170 preg->re_nsub = g->nsub;
171 preg->re_g = g;
172 preg->re_magic = MAGIC1;
173#ifndef REDEBUG
174 /* not debugging, so can't rely on the assert() in regexec() */
175 if (g->iflags&BAD)
176 SETERROR(REG_ASSERT);
177#endif
178
179 /* win or lose, we're done */
180 if (p->error != 0) /* lose */
181 regfree(preg);
182 return(p->error);
183}
184
185/*
186 - p_ere - ERE parser top level, concatenation and alternation
187 == static void p_ere(register struct parse *p, int stop);
188 */
189static void
190p_ere(p, stop)
191register struct parse *p;
192int stop; /* character this ERE should end at */
193{
194 register char c;
195 register sopno prevback;
196 register sopno prevfwd;
197 register sopno conc;
198 register int first = 1; /* is this the first alternative? */
199
200 for (;;) {
201 /* do a bunch of concatenated expressions */
202 conc = HERE();
203 while (MORE() && (c = PEEK()) != '|' && c != stop)
204 p_ere_exp(p);
205 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
206
207 if (!EAT('|'))
208 break; /* NOTE BREAK OUT */
209
210 if (first) {
211 INSERT(OCH_, conc); /* offset is wrong */
212 prevfwd = conc;
213 prevback = conc;
214 first = 0;
215 }
216 ASTERN(OOR1, prevback);
217 prevback = THERE();
218 AHEAD(prevfwd); /* fix previous offset */
219 prevfwd = HERE();
220 EMIT(OOR2, 0); /* offset is very wrong */
221 }
222
223 if (!first) { /* tail-end fixups */
224 AHEAD(prevfwd);
225 ASTERN(O_CH, prevback);
226 }
227
228 assert(!MORE() || SEE(stop));
229}
230
231/*
232 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
233 == static void p_ere_exp(register struct parse *p);
234 */
235static void
236p_ere_exp(p)
237register struct parse *p;
238{
239 register char c;
240 register sopno pos;
241 register int count;
242 register int count2;
243 register sopno subno;
244 int wascaret = 0;
245
246 assert(MORE()); /* caller should have ensured this */
247 c = GETNEXT();
248
249 pos = HERE();
250 switch (c) {
251 case '(':
252 REQUIRE(MORE(), REG_EPAREN);
253 p->g->nsub++;
254 subno = p->g->nsub;
255 if (subno < NPAREN)
256 p->pbegin[subno] = HERE();
257 EMIT(OLPAREN, subno);
258 if (!SEE(')'))
259 p_ere(p, ')');
260 if (subno < NPAREN) {
261 p->pend[subno] = HERE();
262 assert(p->pend[subno] != 0);
263 }
264 EMIT(ORPAREN, subno);
265 MUSTEAT(')', REG_EPAREN);
266 break;
267#ifndef POSIX_MISTAKE
268 case ')': /* happens only if no current unmatched ( */
269 /*
270 * You may ask, why the ifndef? Because I didn't notice
271 * this until slightly too late for 1003.2, and none of the
272 * other 1003.2 regular-expression reviewers noticed it at
273 * all. So an unmatched ) is legal POSIX, at least until
274 * we can get it fixed.
275 */
276 SETERROR(REG_EPAREN);
277 break;
278#endif
279 case '^':
280 EMIT(OBOL, 0);
281 p->g->iflags |= USEBOL;
282 p->g->nbol++;
283 wascaret = 1;
284 break;
285 case '$':
286 EMIT(OEOL, 0);
287 p->g->iflags |= USEEOL;
288 p->g->neol++;
289 break;
290 case '|':
291 SETERROR(REG_EMPTY);
292 break;
293 case '*':
294 case '+':
295 case '?':
296 SETERROR(REG_BADRPT);
297 break;
298 case '.':
299 if (p->g->cflags&REG_NEWLINE)
300 nonnewline(p);
301 else
302 EMIT(OANY, 0);
303 break;
304 case '[':
305 p_bracket(p);
306 break;
307 case '\\':
308 REQUIRE(MORE(), REG_EESCAPE);
309 c = GETNEXT();
310 ordinary(p, c);
311 break;
312 case '{': /* okay as ordinary except if digit follows */
313 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
314 /* FALLTHROUGH */
315 default:
316 ordinary(p, c);
317 break;
318 }
319
320 if (!MORE())
321 return;
322 c = PEEK();
323 /* we call { a repetition if followed by a digit */
324 if (!( c == '*' || c == '+' || c == '?' ||
325 (c == '{' && MORE2() && isdigit(PEEK2())) ))
326 return; /* no repetition, we're done */
327 NEXT();
328
329 REQUIRE(!wascaret, REG_BADRPT);
330 switch (c) {
331 case '*': /* implemented as +? */
332 /* this case does not require the (y|) trick, noKLUDGE */
333 INSERT(OPLUS_, pos);
334 ASTERN(O_PLUS, pos);
335 INSERT(OQUEST_, pos);
336 ASTERN(O_QUEST, pos);
337 break;
338 case '+':
339 INSERT(OPLUS_, pos);
340 ASTERN(O_PLUS, pos);
341 break;
342 case '?':
343 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
344 INSERT(OCH_, pos); /* offset slightly wrong */
345 ASTERN(OOR1, pos); /* this one's right */
346 AHEAD(pos); /* fix the OCH_ */
347 EMIT(OOR2, 0); /* offset very wrong... */
348 AHEAD(THERE()); /* ...so fix it */
349 ASTERN(O_CH, THERETHERE());
350 break;
351 case '{':
352 count = p_count(p);
353 if (EAT(',')) {
354 if (isdigit(PEEK())) {
355 count2 = p_count(p);
356 REQUIRE(count <= count2, REG_BADBR);
357 } else /* single number with comma */
358 count2 = INFINITY;
359 } else /* just a single number */
360 count2 = count;
361 repeat(p, pos, count, count2);
362 if (!EAT('}')) { /* error heuristics */
363 while (MORE() && PEEK() != '}')
364 NEXT();
365 REQUIRE(MORE(), REG_EBRACE);
366 SETERROR(REG_BADBR);
367 }
368 break;
369 }
370
371 if (!MORE())
372 return;
373 c = PEEK();
374 if (!( c == '*' || c == '+' || c == '?' ||
375 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
376 return;
377 SETERROR(REG_BADRPT);
378}
379
380/*
381 - p_str - string (no metacharacters) "parser"
382 == static void p_str(register struct parse *p);
383 */
384static void
385p_str(p)
386register struct parse *p;
387{
388 REQUIRE(MORE(), REG_EMPTY);
389 while (MORE())
390 ordinary(p, GETNEXT());
391}
392
393/*
394 - p_bre - BRE parser top level, anchoring and concatenation
395 == static void p_bre(register struct parse *p, register int end1, \
396 == register int end2);
397 * Giving end1 as OUT essentially eliminates the end1/end2 check.
398 *
399 * This implementation is a bit of a kludge, in that a trailing $ is first
400 * taken as an ordinary character and then revised to be an anchor. The
401 * only undesirable side effect is that '$' gets included as a character
402 * category in such cases. This is fairly harmless; not worth fixing.
403 * The amount of lookahead needed to avoid this kludge is excessive.
404 */
405static void
406p_bre(p, end1, end2)
407register struct parse *p;
408register int end1; /* first terminating character */
409register int end2; /* second terminating character */
410{
411 register sopno start = HERE();
412 register int first = 1; /* first subexpression? */
413 register int wasdollar = 0;
414
415 if (EAT('^')) {
416 EMIT(OBOL, 0);
417 p->g->iflags |= USEBOL;
418 p->g->nbol++;
419 }
420 while (MORE() && !SEETWO(end1, end2)) {
421 wasdollar = p_simp_re(p, first);
422 first = 0;
423 }
424 if (wasdollar) { /* oops, that was a trailing anchor */
425 DROP(1);
426 EMIT(OEOL, 0);
427 p->g->iflags |= USEEOL;
428 p->g->neol++;
429 }
430
431 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
432}
433
434/*
435 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
436 == static int p_simp_re(register struct parse *p, int starordinary);
437 */
438static int /* was the simple RE an unbackslashed $? */
439p_simp_re(p, starordinary)
440register struct parse *p;
441int starordinary; /* is a leading * an ordinary character? */
442{
443 register int c;
444 register int count;
445 register int count2;
446 register sopno pos;
447 register int i;
448 register sopno subno;
449# define BACKSL (1<<CHAR_BIT)
450
451 pos = HERE(); /* repetion op, if any, covers from here */
452
453 assert(MORE()); /* caller should have ensured this */
454 c = GETNEXT();
455 if (c == '\\') {
456 REQUIRE(MORE(), REG_EESCAPE);
457 c = BACKSL | (unsigned char)GETNEXT();
458 }
459 switch (c) {
460 case '.':
461 if (p->g->cflags&REG_NEWLINE)
462 nonnewline(p);
463 else
464 EMIT(OANY, 0);
465 break;
466 case '[':
467 p_bracket(p);
468 break;
469 case BACKSL|'{':
470 SETERROR(REG_BADRPT);
471 break;
472 case BACKSL|'(':
473 p->g->nsub++;
474 subno = p->g->nsub;
475 if (subno < NPAREN)
476 p->pbegin[subno] = HERE();
477 EMIT(OLPAREN, subno);
478 /* the MORE here is an error heuristic */
479 if (MORE() && !SEETWO('\\', ')'))
480 p_bre(p, '\\', ')');
481 if (subno < NPAREN) {
482 p->pend[subno] = HERE();
483 assert(p->pend[subno] != 0);
484 }
485 EMIT(ORPAREN, subno);
486 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
487 break;
488 case BACKSL|')': /* should not get here -- must be user */
489 case BACKSL|'}':
490 SETERROR(REG_EPAREN);
491 break;
492 case BACKSL|'1':
493 case BACKSL|'2':
494 case BACKSL|'3':
495 case BACKSL|'4':
496 case BACKSL|'5':
497 case BACKSL|'6':
498 case BACKSL|'7':
499 case BACKSL|'8':
500 case BACKSL|'9':
501 i = (c&~BACKSL) - '0';
502 assert(i < NPAREN);
503 if (p->pend[i] != 0) {
504 assert(i <= p->g->nsub);
505 EMIT(OBACK_, i);
506 assert(p->pbegin[i] != 0);
507 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
508 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
509 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
510 EMIT(O_BACK, i);
511 } else
512 SETERROR(REG_ESUBREG);
513 p->g->backrefs = 1;
514 break;
515 case '*':
516 REQUIRE(starordinary, REG_BADRPT);
517 /* FALLTHROUGH */
518 default:
519 ordinary(p, (char)c); /* takes off BACKSL, if any */
520 break;
521 }
522
523 if (EAT('*')) { /* implemented as +? */
524 /* this case does not require the (y|) trick, noKLUDGE */
525 INSERT(OPLUS_, pos);
526 ASTERN(O_PLUS, pos);
527 INSERT(OQUEST_, pos);
528 ASTERN(O_QUEST, pos);
529 } else if (EATTWO('\\', '{')) {
530 count = p_count(p);
531 if (EAT(',')) {
532 if (MORE() && isdigit(PEEK())) {
533 count2 = p_count(p);
534 REQUIRE(count <= count2, REG_BADBR);
535 } else /* single number with comma */
536 count2 = INFINITY;
537 } else /* just a single number */
538 count2 = count;
539 repeat(p, pos, count, count2);
540 if (!EATTWO('\\', '}')) { /* error heuristics */
541 while (MORE() && !SEETWO('\\', '}'))
542 NEXT();
543 REQUIRE(MORE(), REG_EBRACE);
544 SETERROR(REG_BADBR);
545 }
546 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
547 return(1);
548
549 return(0);
550}
551
552/*
553 - p_count - parse a repetition count
554 == static int p_count(register struct parse *p);
555 */
556static int /* the value */
557p_count(p)
558register struct parse *p;
559{
560 register int count = 0;
561 register int ndigits = 0;
562
563 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
564 count = count*10 + (GETNEXT() - '0');
565 ndigits++;
566 }
567
568 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
569 return(count);
570}
571
572/*
573 - p_bracket - parse a bracketed character list
574 == static void p_bracket(register struct parse *p);
575 *
576 * Note a significant property of this code: if the allocset() did SETERROR,
577 * no set operations are done.
578 */
579static void
580p_bracket(p)
581register struct parse *p;
582{
583 register cset *cs = allocset(p);
584 register int invert = 0;
585
586 /* Dept of Truly Sickening Special-Case Kludges */
587 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
588 EMIT(OBOW, 0);
589 NEXTn(6);
590 return;
591 }
592 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
593 EMIT(OEOW, 0);
594 NEXTn(6);
595 return;
596 }
597
598 if (EAT('^'))
599 invert++; /* make note to invert set at end */
600 if (EAT(']'))
601 CHadd(cs, ']');
602 else if (EAT('-'))
603 CHadd(cs, '-');
604 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
605 p_b_term(p, cs);
606 if (EAT('-'))
607 CHadd(cs, '-');
608 MUSTEAT(']', REG_EBRACK);
609
610 if (p->error != 0) /* don't mess things up further */
611 return;
612
613 if (p->g->cflags&REG_ICASE) {
614 register int i;
615 register int ci;
616
617 for (i = p->g->csetsize - 1; i >= 0; i--)
618 if (CHIN(cs, i) && isalpha(i)) {
619 ci = othercase(i);
620 if (ci != i)
621 CHadd(cs, ci);
622 }
623 if (cs->multis != NULL)
624 mccase(p, cs);
625 }
626 if (invert) {
627 register int i;
628
629 for (i = p->g->csetsize - 1; i >= 0; i--)
630 if (CHIN(cs, i))
631 CHsub(cs, i);
632 else
633 CHadd(cs, i);
634 if (p->g->cflags&REG_NEWLINE)
635 CHsub(cs, '\n');
636 if (cs->multis != NULL)
637 mcinvert(p, cs);
638 }
639
640 assert(cs->multis == NULL); /* xxx */
641
642 if (nch(p, cs) == 1) { /* optimize singleton sets */
643 ordinary(p, firstch(p, cs));
644 freeset(p, cs);
645 } else
646 EMIT(OANYOF, freezeset(p, cs));
647}
648
649/*
650 - p_b_term - parse one term of a bracketed character list
651 == static void p_b_term(register struct parse *p, register cset *cs);
652 */
653static void
654p_b_term(p, cs)
655register struct parse *p;
656register cset *cs;
657{
658 register char c;
659 register char start, finish;
660 register int i;
661
662 /* classify what we've got */
663 switch ((MORE()) ? PEEK() : '\0') {
664 case '[':
665 c = (MORE2()) ? PEEK2() : '\0';
666 break;
667 case '-':
668 SETERROR(REG_ERANGE);
669 return; /* NOTE RETURN */
670 break;
671 default:
672 c = '\0';
673 break;
674 }
675
676 switch (c) {
677 case ':': /* character class */
678 NEXT2();
679 REQUIRE(MORE(), REG_EBRACK);
680 c = PEEK();
681 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
682 p_b_cclass(p, cs);
683 REQUIRE(MORE(), REG_EBRACK);
684 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
685 break;
686 case '=': /* equivalence class */
687 NEXT2();
688 REQUIRE(MORE(), REG_EBRACK);
689 c = PEEK();
690 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
691 p_b_eclass(p, cs);
692 REQUIRE(MORE(), REG_EBRACK);
693 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
694 break;
695 default: /* symbol, ordinary character, or range */
696/* xxx revision needed for multichar stuff */
697 start = p_b_symbol(p);
698 if (SEE('-') && MORE2() && PEEK2() != ']') {
699 /* range */
700 NEXT();
701 if (EAT('-'))
702 finish = '-';
703 else
704 finish = p_b_symbol(p);
705 } else
706 finish = start;
707/* xxx what about signed chars here... */
708 REQUIRE(start <= finish, REG_ERANGE);
709 for (i = start; i <= finish; i++)
710 CHadd(cs, i);
711 break;
712 }
713}
714
715/*
716 - p_b_cclass - parse a character-class name and deal with it
717 == static void p_b_cclass(register struct parse *p, register cset *cs);
718 */
719static void
720p_b_cclass(p, cs)
721register struct parse *p;
722register cset *cs;
723{
724 register char *sp = p->next;
725 register struct cclass *cp;
726 register size_t len;
727 register char *u;
728 register char c;
729
730 while (MORE() && isalpha(PEEK()))
731 NEXT();
732 len = p->next - sp;
733 for (cp = cclasses; cp->name != NULL; cp++)
734 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
735 break;
736 if (cp->name == NULL) {
737 /* oops, didn't find it */
738 SETERROR(REG_ECTYPE);
739 return;
740 }
741
742 u = cp->chars;
743 while ((c = *u++) != '\0')
744 CHadd(cs, c);
745 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
746 MCadd(p, cs, u);
747}
748
749/*
750 - p_b_eclass - parse an equivalence-class name and deal with it
751 == static void p_b_eclass(register struct parse *p, register cset *cs);
752 *
753 * This implementation is incomplete. xxx
754 */
755static void
756p_b_eclass(p, cs)
757register struct parse *p;
758register cset *cs;
759{
760 register char c;
761
762 c = p_b_coll_elem(p, '=');
763 CHadd(cs, c);
764}
765
766/*
767 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
768 == static char p_b_symbol(register struct parse *p);
769 */
770static char /* value of symbol */
771p_b_symbol(p)
772register struct parse *p;
773{
774 register char value;
775
776 REQUIRE(MORE(), REG_EBRACK);
777 if (!EATTWO('[', '.'))
778 return(GETNEXT());
779
780 /* collating symbol */
781 value = p_b_coll_elem(p, '.');
782 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
783 return(value);
784}
785
786/*
787 - p_b_coll_elem - parse a collating-element name and look it up
788 == static char p_b_coll_elem(register struct parse *p, int endc);
789 */
790static char /* value of collating element */
791p_b_coll_elem(p, endc)
792register struct parse *p;
793int endc; /* name ended by endc,']' */
794{
795 register char *sp = p->next;
796 register struct cname *cp;
797 register int len;
798
799 while (MORE() && !SEETWO(endc, ']'))
800 NEXT();
801 if (!MORE()) {
802 SETERROR(REG_EBRACK);
803 return(0);
804 }
805 len = p->next - sp;
806 for (cp = cnames; cp->name != NULL; cp++)
807 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
808 return(cp->code); /* known name */
809 if (len == 1)
810 return(*sp); /* single character */
811 SETERROR(REG_ECOLLATE); /* neither */
812 return(0);
813}
814
815/*
816 - othercase - return the case counterpart of an alphabetic
817 == static char othercase(int ch);
818 */
819static char /* if no counterpart, return ch */
820othercase(ch)
821int ch;
822{
823 assert(isalpha(ch));
824 if (isupper(ch))
825 return(tolower(ch));
826 else if (islower(ch))
827 return(toupper(ch));
828 else /* peculiar, but could happen */
829 return(ch);
830}
831
832/*
833 - bothcases - emit a dualcase version of a two-case character
834 == static void bothcases(register struct parse *p, int ch);
835 *
836 * Boy, is this implementation ever a kludge...
837 */
838static void
839bothcases(p, ch)
840register struct parse *p;
841int ch;
842{
843 register char *oldnext = p->next;
844 register char *oldend = p->end;
845 char bracket[3];
846
847 assert(othercase(ch) != ch); /* p_bracket() would recurse */
848 p->next = bracket;
849 p->end = bracket+2;
850 bracket[0] = ch;
851 bracket[1] = ']';
852 bracket[2] = '\0';
853 p_bracket(p);
854 assert(p->next == bracket+2);
855 p->next = oldnext;
856 p->end = oldend;
857}
858
859/*
860 - ordinary - emit an ordinary character
861 == static void ordinary(register struct parse *p, register int ch);
862 */
863static void
864ordinary(p, ch)
865register struct parse *p;
866register int ch;
867{
868 register cat_t *cap = p->g->categories;
869
870 if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
871 bothcases(p, ch);
872 else {
873 EMIT(OCHAR, (unsigned char)ch);
874 if (cap[ch] == 0)
875 cap[ch] = p->g->ncategories++;
876 }
877}
878
879/*
880 - nonnewline - emit REG_NEWLINE version of OANY
881 == static void nonnewline(register struct parse *p);
882 *
883 * Boy, is this implementation ever a kludge...
884 */
885static void
886nonnewline(p)
887register struct parse *p;
888{
889 register char *oldnext = p->next;
890 register char *oldend = p->end;
891 char bracket[4];
892
893 p->next = bracket;
894 p->end = bracket+3;
895 bracket[0] = '^';
896 bracket[1] = '\n';
897 bracket[2] = ']';
898 bracket[3] = '\0';
899 p_bracket(p);
900 assert(p->next == bracket+3);
901 p->next = oldnext;
902 p->end = oldend;
903}
904
905/*
906 - repeat - generate code for a bounded repetition, recursively if needed
907 == static void repeat(register struct parse *p, sopno start, int from, int to);
908 */
909static void
910repeat(p, start, from, to)
911register struct parse *p;
912sopno start; /* operand from here to end of strip */
913int from; /* repeated from this number */
914int to; /* to this number of times (maybe INFINITY) */
915{
916 register sopno finish = HERE();
917# define N 2
918# define INF 3
919# define REP(f, t) ((f)*8 + (t))
920# define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
921 register sopno copy;
922
923 if (p->error != 0) /* head off possible runaway recursion */
924 return;
925
926 assert(from <= to);
927
928 switch (REP(MAP(from), MAP(to))) {
929 case REP(0, 0): /* must be user doing this */
930 DROP(finish-start); /* drop the operand */
931 break;
932 case REP(0, 1): /* as x{1,1}? */
933 case REP(0, N): /* as x{1,n}? */
934 case REP(0, INF): /* as x{1,}? */
935 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
936 INSERT(OCH_, start); /* offset is wrong... */
937 repeat(p, start+1, 1, to);
938 ASTERN(OOR1, start);
939 AHEAD(start); /* ... fix it */
940 EMIT(OOR2, 0);
941 AHEAD(THERE());
942 ASTERN(O_CH, THERETHERE());
943 break;
944 case REP(1, 1): /* trivial case */
945 /* done */
946 break;
947 case REP(1, N): /* as x?x{1,n-1} */
948 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
949 INSERT(OCH_, start);
950 ASTERN(OOR1, start);
951 AHEAD(start);
952 EMIT(OOR2, 0); /* offset very wrong... */
953 AHEAD(THERE()); /* ...so fix it */
954 ASTERN(O_CH, THERETHERE());
955 copy = dupl(p, start+1, finish+1);
956 assert(copy == finish+4);
957 repeat(p, copy, 1, to-1);
958 break;
959 case REP(1, INF): /* as x+ */
960 INSERT(OPLUS_, start);
961 ASTERN(O_PLUS, start);
962 break;
963 case REP(N, N): /* as xx{m-1,n-1} */
964 copy = dupl(p, start, finish);
965 repeat(p, copy, from-1, to-1);
966 break;
967 case REP(N, INF): /* as xx{n-1,INF} */
968 copy = dupl(p, start, finish);
969 repeat(p, copy, from-1, to);
970 break;
971 default: /* "can't happen" */
972 SETERROR(REG_ASSERT); /* just in case */
973 break;
974 }
975}
976
977/*
978 - seterr - set an error condition
979 == static int seterr(register struct parse *p, int e);
980 */
981static int /* useless but makes type checking happy */
982seterr(p, e)
983register struct parse *p;
984int e;
985{
986 if (p->error == 0) /* keep earliest error condition */
987 p->error = e;
988 p->next = nuls; /* try to bring things to a halt */
989 p->end = nuls;
990 return(0); /* make the return value well-defined */
991}
992
993/*
994 - allocset - allocate a set of characters for []
995 == static cset *allocset(register struct parse *p);
996 */
997static cset *
998allocset(p)
999register struct parse *p;
1000{
1001 register int no = p->g->ncsets++;
1002 register size_t nc;
1003 register size_t nbytes;
1004 register cset *cs;
1005 register size_t css = (size_t)p->g->csetsize;
1006 register int i;
1007
1008 if (no >= p->ncsalloc) { /* need another column of space */
1009 p->ncsalloc += CHAR_BIT;
1010 nc = p->ncsalloc;
1011 assert(nc % CHAR_BIT == 0);
1012 nbytes = nc / CHAR_BIT * css;
1013 if (p->g->sets == NULL)
1014 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1015 else
1016 p->g->sets = (cset *)realloc((char *)p->g->sets,
1017 nc * sizeof(cset));
1018 if (p->g->setbits == NULL)
1019 p->g->setbits = (uch *)malloc(nbytes);
1020 else {
1021 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1022 nbytes);
1023 /* xxx this isn't right if setbits is now NULL */
1024 for (i = 0; i < no; i++)
1025 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1026 }
1027 if (p->g->sets != NULL && p->g->setbits != NULL)
1028 (void) memset((char *)p->g->setbits + (nbytes - css),
1029 0, css);
1030 else {
1031 no = 0;
1032 SETERROR(REG_ESPACE);
1033 /* caller's responsibility not to do set ops */
1034 }
1035 }
1036
1037 assert(p->g->sets != NULL); /* xxx */
1038 cs = &p->g->sets[no];
1039 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1040 cs->mask = 1 << ((no) % CHAR_BIT);
1041 cs->hash = 0;
1042 cs->smultis = 0;
1043 cs->multis = NULL;
1044
1045 return(cs);
1046}
1047
1048/*
1049 - freeset - free a now-unused set
1050 == static void freeset(register struct parse *p, register cset *cs);
1051 */
1052static void
1053freeset(p, cs)
1054register struct parse *p;
1055register cset *cs;
1056{
1057 register int i;
1058 register cset *top = &p->g->sets[p->g->ncsets];
1059 register size_t css = (size_t)p->g->csetsize;
1060
1061 for (i = 0; i < css; i++)
1062 CHsub(cs, i);
1063 if (cs == top-1) /* recover only the easy case */
1064 p->g->ncsets--;
1065}
1066
1067/*
1068 - freezeset - final processing on a set of characters
1069 == static int freezeset(register struct parse *p, register cset *cs);
1070 *
1071 * The main task here is merging identical sets. This is usually a waste
1072 * of time (although the hash code minimizes the overhead), but can win
1073 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1074 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1075 * the same value!
1076 */
1077static int /* set number */
1078freezeset(p, cs)
1079register struct parse *p;
1080register cset *cs;
1081{
1082 register uch h = cs->hash;
1083 register int i;
1084 register cset *top = &p->g->sets[p->g->ncsets];
1085 register cset *cs2;
1086 register size_t css = (size_t)p->g->csetsize;
1087
1088 /* look for an earlier one which is the same */
1089 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1090 if (cs2->hash == h && cs2 != cs) {
1091 /* maybe */
1092 for (i = 0; i < css; i++)
1093 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1094 break; /* no */
1095 if (i == css)
1096 break; /* yes */
1097 }
1098
1099 if (cs2 < top) { /* found one */
1100 freeset(p, cs);
1101 cs = cs2;
1102 }
1103
1104 return((int)(cs - p->g->sets));
1105}
1106
1107/*
1108 - firstch - return first character in a set (which must have at least one)
1109 == static int firstch(register struct parse *p, register cset *cs);
1110 */
1111static int /* character; there is no "none" value */
1112firstch(p, cs)
1113register struct parse *p;
1114register cset *cs;
1115{
1116 register int i;
1117 register size_t css = (size_t)p->g->csetsize;
1118
1119 for (i = 0; i < css; i++)
1120 if (CHIN(cs, i))
1121 return((char)i);
1122 assert(never);
1123 return(0); /* arbitrary */
1124}
1125
1126/*
1127 - nch - number of characters in a set
1128 == static int nch(register struct parse *p, register cset *cs);
1129 */
1130static int
1131nch(p, cs)
1132register struct parse *p;
1133register cset *cs;
1134{
1135 register int i;
1136 register size_t css = (size_t)p->g->csetsize;
1137 register int n = 0;
1138
1139 for (i = 0; i < css; i++)
1140 if (CHIN(cs, i))
1141 n++;
1142 return(n);
1143}
1144
1145/*
1146 - mcadd - add a collating element to a cset
1147 == static void mcadd(register struct parse *p, register cset *cs, \
1148 == register char *cp);
1149 */
1150static void
1151mcadd(p, cs, cp)
1152register struct parse *p;
1153register cset *cs;
1154register char *cp;
1155{
1156 register size_t oldend = cs->smultis;
1157
1158 cs->smultis += strlen(cp) + 1;
1159 if (cs->multis == NULL)
1160 cs->multis = malloc(cs->smultis);
1161 else
1162 cs->multis = realloc(cs->multis, cs->smultis);
1163 if (cs->multis == NULL) {
1164 SETERROR(REG_ESPACE);
1165 return;
1166 }
1167
1168 (void) strcpy(cs->multis + oldend - 1, cp);
1169 cs->multis[cs->smultis - 1] = '\0';
1170}
1171
1172/*
1173 - mcsub - subtract a collating element from a cset
1174 == static void mcsub(register cset *cs, register char *cp);
1175 */
1176static void
1177mcsub(cs, cp)
1178register cset *cs;
1179register char *cp;
1180{
1181 register char *fp = mcfind(cs, cp);
1182 register size_t len;
1183
1184 assert(fp != NULL);
1185 len = strlen(fp);
1186 (void) memmove(fp, fp + len + 1,
1187 cs->smultis - (fp + len + 1 - cs->multis));
1188 cs->smultis -= len;
1189
1190 if (cs->smultis == 0) {
1191 free(cs->multis);
1192 cs->multis = NULL;
1193 return;
1194 }
1195
1196 cs->multis = realloc(cs->multis, cs->smultis);
1197 assert(cs->multis != NULL);
1198}
1199
1200/*
1201 - mcin - is a collating element in a cset?
1202 == static int mcin(register cset *cs, register char *cp);
1203 */
1204static int
1205mcin(cs, cp)
1206register cset *cs;
1207register char *cp;
1208{
1209 return(mcfind(cs, cp) != NULL);
1210}
1211
1212/*
1213 - mcfind - find a collating element in a cset
1214 == static char *mcfind(register cset *cs, register char *cp);
1215 */
1216static char *
1217mcfind(cs, cp)
1218register cset *cs;
1219register char *cp;
1220{
1221 register char *p;
1222
1223 if (cs->multis == NULL)
1224 return(NULL);
1225 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1226 if (strcmp(cp, p) == 0)
1227 return(p);
1228 return(NULL);
1229}
1230
1231/*
1232 - mcinvert - invert the list of collating elements in a cset
1233 == static void mcinvert(register struct parse *p, register cset *cs);
1234 *
1235 * This would have to know the set of possibilities. Implementation
1236 * is deferred.
1237 */
1238static void
1239mcinvert(p, cs)
1240register struct parse *p;
1241register cset *cs;
1242{
1243 assert(cs->multis == NULL); /* xxx */
1244}
1245
1246/*
1247 - mccase - add case counterparts of the list of collating elements in a cset
1248 == static void mccase(register struct parse *p, register cset *cs);
1249 *
1250 * This would have to know the set of possibilities. Implementation
1251 * is deferred.
1252 */
1253static void
1254mccase(p, cs)
1255register struct parse *p;
1256register cset *cs;
1257{
1258 assert(cs->multis == NULL); /* xxx */
1259}
1260
1261/*
1262 - isinsets - is this character in any sets?
1263 == static int isinsets(register struct re_guts *g, int c);
1264 */
1265static int /* predicate */
1266isinsets(g, c)
1267register struct re_guts *g;
1268int c;
1269{
1270 register uch *col;
1271 register int i;
1272 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1273 register unsigned uc = (unsigned char)c;
1274
1275 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1276 if (col[uc] != 0)
1277 return(1);
1278 return(0);
1279}
1280
1281/*
1282 - samesets - are these two characters in exactly the same sets?
1283 == static int samesets(register struct re_guts *g, int c1, int c2);
1284 */
1285static int /* predicate */
1286samesets(g, c1, c2)
1287register struct re_guts *g;
1288int c1;
1289int c2;
1290{
1291 register uch *col;
1292 register int i;
1293 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1294 register unsigned uc1 = (unsigned char)c1;
1295 register unsigned uc2 = (unsigned char)c2;
1296
1297 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1298 if (col[uc1] != col[uc2])
1299 return(0);
1300 return(1);
1301}
1302
1303/*
1304 - categorize - sort out character categories
1305 == static void categorize(struct parse *p, register struct re_guts *g);
1306 */
1307static void
1308categorize(p, g)
1309struct parse *p;
1310register struct re_guts *g;
1311{
1312 register cat_t *cats = g->categories;
1313 register int c;
1314 register int c2;
1315 register cat_t cat;
1316
1317 /* avoid making error situations worse */
1318 if (p->error != 0)
1319 return;
1320
1321 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1322 if (cats[c] == 0 && isinsets(g, c)) {
1323 cat = g->ncategories++;
1324 cats[c] = cat;
1325 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1326 if (cats[c2] == 0 && samesets(g, c, c2))
1327 cats[c2] = cat;
1328 }
1329}
1330
1331/*
1332 - dupl - emit a duplicate of a bunch of sops
1333 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1334 */
1335static sopno /* start of duplicate */
1336dupl(p, start, finish)
1337register struct parse *p;
1338sopno start; /* from here */
1339sopno finish; /* to this less one */
1340{
1341 register sopno ret = HERE();
1342 register sopno len = finish - start;
1343
1344 assert(finish >= start);
1345 if (len == 0)
1346 return(ret);
1347 enlarge(p, p->ssize + len); /* this many unexpected additions */
1348 assert(p->ssize >= p->slen + len);
1349 (void) memcpy((char *)(p->strip + p->slen),
1350 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1351 p->slen += len;
1352 return(ret);
1353}
1354
1355/*
1356 - doemit - emit a strip operator
1357 == static void doemit(register struct parse *p, sop op, size_t opnd);
1358 *
1359 * It might seem better to implement this as a macro with a function as
1360 * hard-case backup, but it's just too big and messy unless there are
1361 * some changes to the data structures. Maybe later.
1362 */
1363static void
1364doemit(p, op, opnd)
1365register struct parse *p;
1366sop op;
1367size_t opnd;
1368{
1369 /* avoid making error situations worse */
1370 if (p->error != 0)
1371 return;
1372
1373 /* deal with oversize operands ("can't happen", more or less) */
1374 assert(opnd < 1<<OPSHIFT);
1375
1376 /* deal with undersized strip */
1377 if (p->slen >= p->ssize)
1378 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1379 assert(p->slen < p->ssize);
1380
1381 /* finally, it's all reduced to the easy case */
1382 p->strip[p->slen++] = SOP(op, opnd);
1383}
1384
1385/*
1386 - doinsert - insert a sop into the strip
1387 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1388 */
1389static void
1390doinsert(p, op, opnd, pos)
1391register struct parse *p;
1392sop op;
1393size_t opnd;
1394sopno pos;
1395{
1396 register sopno sn;
1397 register sop s;
1398 register int i;
1399
1400 /* avoid making error situations worse */
1401 if (p->error != 0)
1402 return;
1403
1404 sn = HERE();
1405 EMIT(op, opnd); /* do checks, ensure space */
1406 assert(HERE() == sn+1);
1407 s = p->strip[sn];
1408
1409 /* adjust paren pointers */
1410 assert(pos > 0);
1411 for (i = 1; i < NPAREN; i++) {
1412 if (p->pbegin[i] >= pos) {
1413 p->pbegin[i]++;
1414 }
1415 if (p->pend[i] >= pos) {
1416 p->pend[i]++;
1417 }
1418 }
1419
1420 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1421 (HERE()-pos-1)*sizeof(sop));
1422 p->strip[pos] = s;
1423}
1424
1425/*
1426 - dofwd - complete a forward reference
1427 == static void dofwd(register struct parse *p, sopno pos, sop value);
1428 */
1429static void
1430dofwd(p, pos, value)
1431register struct parse *p;
1432register sopno pos;
1433sop value;
1434{
1435 /* avoid making error situations worse */
1436 if (p->error != 0)
1437 return;
1438
1439 assert(value < 1<<OPSHIFT);
1440 p->strip[pos] = OP(p->strip[pos]) | value;
1441}
1442
1443/*
1444 - enlarge - enlarge the strip
1445 == static void enlarge(register struct parse *p, sopno size);
1446 */
1447static void
1448enlarge(p, size)
1449register struct parse *p;
1450register sopno size;
1451{
1452 register sop *sp;
1453
1454 if (p->ssize >= size)
1455 return;
1456
1457 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1458 if (sp == NULL) {
1459 SETERROR(REG_ESPACE);
1460 return;
1461 }
1462 p->strip = sp;
1463 p->ssize = size;
1464}
1465
1466/*
1467 - stripsnug - compact the strip
1468 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1469 */
1470static void
1471stripsnug(p, g)
1472register struct parse *p;
1473register struct re_guts *g;
1474{
1475 g->nstates = p->slen;
1476 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1477 if (g->strip == NULL) {
1478 SETERROR(REG_ESPACE);
1479 g->strip = p->strip;
1480 }
1481}
1482
1483/*
1484 - findmust - fill in must and mlen with longest mandatory literal string
1485 == static void findmust(register struct parse *p, register struct re_guts *g);
1486 *
1487 * This algorithm could do fancy things like analyzing the operands of |
1488 * for common subsequences. Someday. This code is simple and finds most
1489 * of the interesting cases.
1490 *
1491 * Note that must and mlen got initialized during setup.
1492 */
1493static void
1494findmust(p, g)
1495struct parse *p;
1496register struct re_guts *g;
1497{
1498 register sop *scan;
1499 sop *start;
1500 register sop *newstart;
1501 register sopno newlen;
1502 register sop s;
1503 register char *cp;
1504 register sopno i;
1505
1506 /* avoid making error situations worse */
1507 if (p->error != 0)
1508 return;
1509
1510 /* find the longest OCHAR sequence in strip */
1511 newlen = 0;
1512 scan = g->strip + 1;
1513 do {
1514 s = *scan++;
1515 switch (OP(s)) {
1516 case OCHAR: /* sequence member */
1517 if (newlen == 0) /* new sequence */
1518 newstart = scan - 1;
1519 newlen++;
1520 break;
1521 case OPLUS_: /* things that don't break one */
1522 case OLPAREN:
1523 case ORPAREN:
1524 break;
1525 case OQUEST_: /* things that must be skipped */
1526 case OCH_:
1527 scan--;
1528 do {
1529 scan += OPND(s);
1530 s = *scan;
1531 /* assert() interferes w debug printouts */
1532 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1533 OP(s) != OOR2) {
1534 g->iflags |= BAD;
1535 return;
1536 }
1537 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1538 /* fallthrough */
1539 default: /* things that break a sequence */
1540 if (newlen > g->mlen) { /* ends one */
1541 start = newstart;
1542 g->mlen = newlen;
1543 }
1544 newlen = 0;
1545 break;
1546 }
1547 } while (OP(s) != OEND);
1548
1549 if (g->mlen == 0) /* there isn't one */
1550 return;
1551
1552 /* turn it into a character string */
1553 g->must = malloc((size_t)g->mlen + 1);
1554 if (g->must == NULL) { /* argh; just forget it */
1555 g->mlen = 0;
1556 return;
1557 }
1558 cp = g->must;
1559 scan = start;
1560 for (i = g->mlen; i > 0; i--) {
1561 while (OP(s = *scan++) != OCHAR)
1562 continue;
1563 assert(cp < g->must + g->mlen);
1564 *cp++ = (char)OPND(s);
1565 }
1566 assert(cp == g->must + g->mlen);
1567 *cp++ = '\0'; /* just on general principles */
1568}
1569
1570/*
1571 - pluscount - count + nesting
1572 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1573 */
1574static sopno /* nesting depth */
1575pluscount(p, g)
1576struct parse *p;
1577register struct re_guts *g;
1578{
1579 register sop *scan;
1580 register sop s;
1581 register sopno plusnest = 0;
1582 register sopno maxnest = 0;
1583
1584 if (p->error != 0)
1585 return(0); /* there may not be an OEND */
1586
1587 scan = g->strip + 1;
1588 do {
1589 s = *scan++;
1590 switch (OP(s)) {
1591 case OPLUS_:
1592 plusnest++;
1593 break;
1594 case O_PLUS:
1595 if (plusnest > maxnest)
1596 maxnest = plusnest;
1597 plusnest--;
1598 break;
1599 }
1600 } while (OP(s) != OEND);
1601 if (plusnest != 0)
1602 g->iflags |= BAD;
1603 return(maxnest);
1604}
diff --git a/src/lib/evil/regex/regcomp.ih b/src/lib/evil/regex/regcomp.ih
deleted file mode 100644
index 357461f..0000000
--- a/src/lib/evil/regex/regcomp.ih
+++ /dev/null
@@ -1,51 +0,0 @@
1/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
2#ifdef __cplusplus
3extern "C" {
4#endif
5
6/* === lib/evil/regex/regcomp.c === */
7static void p_ere(register struct parse *p, int stop);
8static void p_ere_exp(register struct parse *p);
9static void p_str(register struct parse *p);
10static void p_bre(register struct parse *p, register int end1, register int end2);
11static int p_simp_re(register struct parse *p, int starordinary);
12static int p_count(register struct parse *p);
13static void p_bracket(register struct parse *p);
14static void p_b_term(register struct parse *p, register cset *cs);
15static void p_b_cclass(register struct parse *p, register cset *cs);
16static void p_b_eclass(register struct parse *p, register cset *cs);
17static char p_b_symbol(register struct parse *p);
18static char p_b_coll_elem(register struct parse *p, int endc);
19static char othercase(int ch);
20static void bothcases(register struct parse *p, int ch);
21static void ordinary(register struct parse *p, register int ch);
22static void nonnewline(register struct parse *p);
23static void repeat(register struct parse *p, sopno start, int from, int to);
24static int seterr(register struct parse *p, int e);
25static cset *allocset(register struct parse *p);
26static void freeset(register struct parse *p, register cset *cs);
27static int freezeset(register struct parse *p, register cset *cs);
28static int firstch(register struct parse *p, register cset *cs);
29static int nch(register struct parse *p, register cset *cs);
30static void mcadd(register struct parse *p, register cset *cs, register char *cp);
31static void mcsub(register cset *cs, register char *cp);
32static int mcin(register cset *cs, register char *cp);
33static char *mcfind(register cset *cs, register char *cp);
34static void mcinvert(register struct parse *p, register cset *cs);
35static void mccase(register struct parse *p, register cset *cs);
36static int isinsets(register struct re_guts *g, int c);
37static int samesets(register struct re_guts *g, int c1, int c2);
38static void categorize(struct parse *p, register struct re_guts *g);
39static sopno dupl(register struct parse *p, sopno start, sopno finish);
40static void doemit(register struct parse *p, sop op, size_t opnd);
41static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
42static void dofwd(register struct parse *p, sopno pos, sop value);
43static void enlarge(register struct parse *p, sopno size);
44static void stripsnug(register struct parse *p, register struct re_guts *g);
45static void findmust(register struct parse *p, register struct re_guts *g);
46static sopno pluscount(register struct parse *p, register struct re_guts *g);
47
48#ifdef __cplusplus
49}
50#endif
51/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
diff --git a/src/lib/evil/regex/regerror.c b/src/lib/evil/regex/regerror.c
deleted file mode 100644
index ae46a43..0000000
--- a/src/lib/evil/regex/regerror.c
+++ /dev/null
@@ -1,122 +0,0 @@
1#include <sys/types.h>
2#include <stdio.h>
3#include <string.h>
4#include <ctype.h>
5#include <limits.h>
6#include <stdlib.h>
7
8#include "regex.h"
9#include "utils.h"
10#include "regerror.ih"
11
12/*
13 = #define REG_OKAY 0
14 = #define REG_NOMATCH 1
15 = #define REG_BADPAT 2
16 = #define REG_ECOLLATE 3
17 = #define REG_ECTYPE 4
18 = #define REG_EESCAPE 5
19 = #define REG_ESUBREG 6
20 = #define REG_EBRACK 7
21 = #define REG_EPAREN 8
22 = #define REG_EBRACE 9
23 = #define REG_BADBR 10
24 = #define REG_ERANGE 11
25 = #define REG_ESPACE 12
26 = #define REG_BADRPT 13
27 = #define REG_EMPTY 14
28 = #define REG_ASSERT 15
29 = #define REG_INVARG 16
30 = #define REG_ATOI 255 // convert name to number (!)
31 = #define REG_ITOA 0400 // convert number to name (!)
32 */
33static struct rerr {
34 int code;
35 char *name;
36 char *explain;
37} rerrs[] = {
38 REG_OKAY, "REG_OKAY", "no errors detected",
39 REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match",
40 REG_BADPAT, "REG_BADPAT", "invalid regular expression",
41 REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
42 REG_ECTYPE, "REG_ECTYPE", "invalid character class",
43 REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)",
44 REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
45 REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced",
46 REG_EPAREN, "REG_EPAREN", "parentheses not balanced",
47 REG_EBRACE, "REG_EBRACE", "braces not balanced",
48 REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
49 REG_ERANGE, "REG_ERANGE", "invalid character range",
50 REG_ESPACE, "REG_ESPACE", "out of memory",
51 REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid",
52 REG_EMPTY, "REG_EMPTY", "empty (sub)expression",
53 REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
54 REG_INVARG, "REG_INVARG", "invalid argument to regex routine",
55 -1, "", "*** unknown regexp error code ***",
56};
57
58/*
59 - regerror - the interface to error numbers
60 = extern size_t regerror(int, const regex_t *, char *, size_t);
61 */
62/* ARGSUSED */
63EAPI size_t
64regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
65{
66 register struct rerr *r;
67 register size_t len;
68 register int target = errcode &~ REG_ITOA;
69 register char *s;
70 char convbuf[50];
71
72 if (errcode == REG_ATOI)
73 s = regatoi(preg, convbuf);
74 else {
75 for (r = rerrs; r->code >= 0; r++)
76 if (r->code == target)
77 break;
78
79 if (errcode&REG_ITOA) {
80 if (r->code >= 0)
81 (void) strcpy(convbuf, r->name);
82 else
83 sprintf(convbuf, "REG_0x%x", target);
84 assert(strlen(convbuf) < sizeof(convbuf));
85 s = convbuf;
86 } else
87 s = r->explain;
88 }
89
90 len = strlen(s) + 1;
91 if (errbuf_size > 0) {
92 if (errbuf_size > len)
93 (void) strcpy(errbuf, s);
94 else {
95 (void) strncpy(errbuf, s, errbuf_size-1);
96 errbuf[errbuf_size-1] = '\0';
97 }
98 }
99
100 return(len);
101}
102
103/*
104 - regatoi - internal routine to implement REG_ATOI
105 == static char *regatoi(const regex_t *preg, char *localbuf);
106 */
107static char *
108regatoi(preg, localbuf)
109const regex_t *preg;
110char *localbuf;
111{
112 register struct rerr *r;
113
114 for (r = rerrs; r->code >= 0; r++)
115 if (strcmp(r->name, preg->re_endp) == 0)
116 break;
117 if (r->code < 0)
118 return("0");
119
120 sprintf(localbuf, "%d", r->code);
121 return(localbuf);
122}
diff --git a/src/lib/evil/regex/regerror.ih b/src/lib/evil/regex/regerror.ih
deleted file mode 100644
index e3f391c..0000000
--- a/src/lib/evil/regex/regerror.ih
+++ /dev/null
@@ -1,12 +0,0 @@
1/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
2#ifdef __cplusplus
3extern "C" {
4#endif
5
6/* === lib/evil/regex/regerror.c === */
7static char *regatoi(const regex_t *preg, char *localbuf);
8
9#ifdef __cplusplus
10}
11#endif
12/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
diff --git a/src/lib/evil/regex/regex.h b/src/lib/evil/regex/regex.h
deleted file mode 100644
index b521b3a..0000000
--- a/src/lib/evil/regex/regex.h
+++ /dev/null
@@ -1,78 +0,0 @@
1#ifndef _REGEX_H_
2#define _REGEX_H_ /* never again */
3
4#include <evil_macro.h>
5#include <sys/types.h>
6
7/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
8#ifdef __cplusplus
9extern "C" {
10#endif
11
12/* === lib/evil/regex/regex2.h === */
13typedef off_t regoff_t;
14typedef struct {
15 int re_magic;
16 size_t re_nsub; /* number of parenthesized subexpressions */
17 const char *re_endp; /* end pointer for REG_PEND */
18 struct re_guts *re_g; /* none of your business :-) */
19} regex_t;
20typedef struct {
21 regoff_t rm_so; /* start of match */
22 regoff_t rm_eo; /* end of match */
23} regmatch_t;
24
25
26/* === lib/evil/regex/regcomp.c === */
27EAPI int regcomp(regex_t *, const char *, int);
28#define REG_BASIC 0000
29#define REG_EXTENDED 0001
30#define REG_ICASE 0002
31#define REG_NOSUB 0004
32#define REG_NEWLINE 0010
33#define REG_NOSPEC 0020
34#define REG_PEND 0040
35#define REG_DUMP 0200
36
37
38/* === lib/evil/regex/regerror.c === */
39#define REG_OKAY 0
40#define REG_NOMATCH 1
41#define REG_BADPAT 2
42#define REG_ECOLLATE 3
43#define REG_ECTYPE 4
44#define REG_EESCAPE 5
45#define REG_ESUBREG 6
46#define REG_EBRACK 7
47#define REG_EPAREN 8
48#define REG_EBRACE 9
49#define REG_BADBR 10
50#define REG_ERANGE 11
51#define REG_ESPACE 12
52#define REG_BADRPT 13
53#define REG_EMPTY 14
54#define REG_ASSERT 15
55#define REG_INVARG 16
56#define REG_ATOI 255 /* convert name to number (!) */
57#define REG_ITOA 0400 /* convert number to name (!) */
58EAPI size_t regerror(int, const regex_t *, char *, size_t);
59
60
61/* === lib/evil/regex/regexec.c === */
62EAPI int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
63#define REG_NOTBOL 00001
64#define REG_NOTEOL 00002
65#define REG_STARTEND 00004
66#define REG_TRACE 00400 /* tracing of execution */
67#define REG_LARGE 01000 /* force large representation */
68#define REG_BACKR 02000 /* force use of backref code */
69
70
71/* === lib/evil/regex/regfree.c === */
72EAPI void regfree(regex_t *);
73
74#ifdef __cplusplus
75}
76#endif
77/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
78#endif
diff --git a/src/lib/evil/regex/regex2.h b/src/lib/evil/regex/regex2.h
deleted file mode 100644
index 58fd8d8..0000000
--- a/src/lib/evil/regex/regex2.h
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * First, the stuff that ends up in the outside-world include file
3 = typedef off_t regoff_t;
4 = typedef struct {
5 = int re_magic;
6 = size_t re_nsub; // number of parenthesized subexpressions
7 = const char *re_endp; // end pointer for REG_PEND
8 = struct re_guts *re_g; // none of your business :-)
9 = } regex_t;
10 = typedef struct {
11 = regoff_t rm_so; // start of match
12 = regoff_t rm_eo; // end of match
13 = } regmatch_t;
14 */
15/*
16 * internals of regex_t
17 */
18#define MAGIC1 ((('r'^0200)<<8) | 'e')
19
20/*
21 * The internal representation is a *strip*, a sequence of
22 * operators ending with an endmarker. (Some terminology etc. is a
23 * historical relic of earlier versions which used multiple strips.)
24 * Certain oddities in the representation are there to permit running
25 * the machinery backwards; in particular, any deviation from sequential
26 * flow must be marked at both its source and its destination. Some
27 * fine points:
28 *
29 * - OPLUS_ and O_PLUS are *inside* the loop they create.
30 * - OQUEST_ and O_QUEST are *outside* the bypass they create.
31 * - OCH_ and O_CH are *outside* the multi-way branch they create, while
32 * OOR1 and OOR2 are respectively the end and the beginning of one of
33 * the branches. Note that there is an implicit OOR2 following OCH_
34 * and an implicit OOR1 preceding O_CH.
35 *
36 * In state representations, an operator's bit is on to signify a state
37 * immediately *preceding* "execution" of that operator.
38 */
39typedef long sop; /* strip operator */
40typedef long sopno;
41#define OPRMASK 0x7c000000
42#define OPDMASK 0x03ffffff
43#define OPSHIFT (26)
44#define OP(n) ((n)&OPRMASK)
45#define OPND(n) ((n)&OPDMASK)
46#define SOP(op, opnd) ((op)|(opnd))
47/* operators meaning operand */
48/* (back, fwd are offsets) */
49#define OEND (1<<OPSHIFT) /* endmarker - */
50#define OCHAR (2<<OPSHIFT) /* character unsigned char */
51#define OBOL (3<<OPSHIFT) /* left anchor - */
52#define OEOL (4<<OPSHIFT) /* right anchor - */
53#define OANY (5<<OPSHIFT) /* . - */
54#define OANYOF (6<<OPSHIFT) /* [...] set number */
55#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
56#define O_BACK (8<<OPSHIFT) /* end \d paren number */
57#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
58#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
59#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
60#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
61#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
62#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
63#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
64#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
65#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
66#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
67#define OBOW (19<<OPSHIFT) /* begin word - */
68#define OEOW (20<<OPSHIFT) /* end word - */
69
70/*
71 * Structure for [] character-set representation. Character sets are
72 * done as bit vectors, grouped 8 to a byte vector for compactness.
73 * The individual set therefore has both a pointer to the byte vector
74 * and a mask to pick out the relevant bit of each byte. A hash code
75 * simplifies testing whether two sets could be identical.
76 *
77 * This will get trickier for multicharacter collating elements. As
78 * preliminary hooks for dealing with such things, we also carry along
79 * a string of multi-character elements, and decide the size of the
80 * vectors at run time.
81 */
82typedef struct {
83 uch *ptr; /* -> uch [csetsize] */
84 uch mask; /* bit within array */
85 uch hash; /* hash code */
86 size_t smultis;
87 char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
88} cset;
89/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
90#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
91#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
92#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
93#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
94#define MCsub(p, cs, cp) mcsub(p, cs, cp)
95#define MCin(p, cs, cp) mcin(p, cs, cp)
96
97/* stuff for character categories */
98typedef unsigned char cat_t;
99
100/*
101 * main compiled-expression structure
102 */
103struct re_guts {
104 int magic;
105# define MAGIC2 ((('R'^0200)<<8)|'E')
106 sop *strip; /* malloced area for strip */
107 int csetsize; /* number of bits in a cset vector */
108 int ncsets; /* number of csets in use */
109 cset *sets; /* -> cset [ncsets] */
110 uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
111 int cflags; /* copy of regcomp() cflags argument */
112 sopno nstates; /* = number of sops */
113 sopno firststate; /* the initial OEND (normally 0) */
114 sopno laststate; /* the final OEND */
115 int iflags; /* internal flags */
116# define USEBOL 01 /* used ^ */
117# define USEEOL 02 /* used $ */
118# define BAD 04 /* something wrong */
119 int nbol; /* number of ^ used */
120 int neol; /* number of $ used */
121 int ncategories; /* how many character categories */
122 cat_t *categories; /* ->catspace[-CHAR_MIN] */
123 char *must; /* match must contain this string */
124 int mlen; /* length of must */
125 size_t nsub; /* copy of re_nsub */
126 int backrefs; /* does it use back references? */
127 sopno nplus; /* how deep does it nest +s? */
128 /* catspace must be last */
129 cat_t catspace[1]; /* actually [NC] */
130};
131
132/* misc utilities */
133#define OUT (CHAR_MAX+1) /* a non-character value */
134#define ISWORD(c) (isalnum(c) || (c) == '_')
diff --git a/src/lib/evil/regex/regexec.c b/src/lib/evil/regex/regexec.c
deleted file mode 100644
index 3a9135d..0000000
--- a/src/lib/evil/regex/regexec.c
+++ /dev/null
@@ -1,138 +0,0 @@
1/*
2 * the outer shell of regexec()
3 *
4 * This file includes engine.c *twice*, after muchos fiddling with the
5 * macros that code uses. This lets the same code operate on two different
6 * representations for state sets.
7 */
8#include <sys/types.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <limits.h>
13#include <ctype.h>
14#include <regex.h>
15
16#include "utils.h"
17#include "regex2.h"
18
19static int nope = 0; /* for use in asserts; shuts lint up */
20
21/* macros for manipulating states, small version */
22#define states unsigned
23#define states1 unsigned /* for later use in regexec() decision */
24#define CLEAR(v) ((v) = 0)
25#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
26#define SET1(v, n) ((v) |= (unsigned)1 << (n))
27#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
28#define ASSIGN(d, s) ((d) = (s))
29#define EQ(a, b) ((a) == (b))
30#define STATEVARS int dummy /* dummy version */
31#define STATESETUP(m, n) /* nothing */
32#define STATETEARDOWN(m) /* nothing */
33#define SETUP(v) ((v) = 0)
34#define onestate unsigned
35#define INIT(o, n) ((o) = (unsigned)1 << (n))
36#define INC(o) ((o) <<= 1)
37#define ISSTATEIN(v, o) ((v) & (o))
38/* some abbreviations; note that some of these know variable names! */
39/* do "if I'm here, I can also be there" etc without branches */
40#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
41#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
42#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
43/* function names */
44#define SNAMES /* engine.c looks after details */
45
46#include "engine.c"
47
48/* now undo things */
49#undef states
50#undef CLEAR
51#undef SET0
52#undef SET1
53#undef ISSET
54#undef ASSIGN
55#undef EQ
56#undef STATEVARS
57#undef STATESETUP
58#undef STATETEARDOWN
59#undef SETUP
60#undef onestate
61#undef INIT
62#undef INC
63#undef ISSTATEIN
64#undef FWD
65#undef BACK
66#undef ISSETBACK
67#undef SNAMES
68
69/* macros for manipulating states, large version */
70#define states char *
71#define CLEAR(v) memset(v, 0, m->g->nstates)
72#define SET0(v, n) ((v)[n] = 0)
73#define SET1(v, n) ((v)[n] = 1)
74#define ISSET(v, n) ((v)[n])
75#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
76#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
77#define STATEVARS int vn; char *space
78#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
79 if ((m)->space == NULL) return(REG_ESPACE); \
80 (m)->vn = 0; }
81#define STATETEARDOWN(m) { free((m)->space); }
82#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
83#define onestate int
84#define INIT(o, n) ((o) = (n))
85#define INC(o) ((o)++)
86#define ISSTATEIN(v, o) ((v)[o])
87/* some abbreviations; note that some of these know variable names! */
88/* do "if I'm here, I can also be there" etc without branches */
89#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
90#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
91#define ISSETBACK(v, n) ((v)[here - (n)])
92/* function names */
93#define LNAMES /* flag */
94
95#include "engine.c"
96
97/*
98 - regexec - interface for matching
99 = extern int regexec(const regex_t *, const char *, size_t, \
100 = regmatch_t [], int);
101 = #define REG_NOTBOL 00001
102 = #define REG_NOTEOL 00002
103 = #define REG_STARTEND 00004
104 = #define REG_TRACE 00400 // tracing of execution
105 = #define REG_LARGE 01000 // force large representation
106 = #define REG_BACKR 02000 // force use of backref code
107 *
108 * We put this here so we can exploit knowledge of the state representation
109 * when choosing which matcher to call. Also, by this point the matchers
110 * have been prototyped.
111 */
112EAPI int /* 0 success, REG_NOMATCH failure */
113regexec(preg, string, nmatch, pmatch, eflags)
114const regex_t *preg;
115const char *string;
116size_t nmatch;
117regmatch_t pmatch[];
118int eflags;
119{
120 register struct re_guts *g = preg->re_g;
121#ifdef REDEBUG
122# define GOODFLAGS(f) (f)
123#else
124# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
125#endif
126
127 if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
128 return(REG_BADPAT);
129 assert(!(g->iflags&BAD));
130 if (g->iflags&BAD) /* backstop for no-debug case */
131 return(REG_BADPAT);
132 eflags = GOODFLAGS(eflags);
133
134 if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags&REG_LARGE))
135 return(smatcher(g, (char *)string, nmatch, pmatch, eflags));