forked from enlightenment/efl
evil: add regex code (needed for elm).
Signed-off-by: Cedric Bail <cedric.bail@samsung.com>
This commit is contained in:
parent
058e03aa74
commit
bc07d80e0c
|
@ -69,9 +69,32 @@ else
|
|||
lib_evil_libevil_la_LINK = $(CXXLINK) $(lib_evil_libevil_la_LDFLAGS)
|
||||
endif
|
||||
|
||||
# regex
|
||||
|
||||
dist_install_evilheaders_DATA += \
|
||||
lib/evil/regex/regex.h
|
||||
|
||||
lib_evil_libevil_la_SOURCES += \
|
||||
lib/evil/regex/regcomp.c \
|
||||
lib/evil/regex/regerror.c \
|
||||
lib/evil/regex/regexec.c \
|
||||
lib/evil/regex/regfree.c \
|
||||
lib/evil/regex/cclass.h \
|
||||
lib/evil/regex/cname.h \
|
||||
lib/evil/regex/regex2.h \
|
||||
lib/evil/regex/utils.h
|
||||
|
||||
lib_evil_libevil_la_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src/lib/evil \
|
||||
-I$(top_srcdir)/src/lib/evil/regex \
|
||||
-DPOSIX_MISTAKE
|
||||
|
||||
#libdl
|
||||
|
||||
lib_evil_libdl_la_SOURCES = lib/evil/dlfcn.c
|
||||
|
||||
lib_evil_libdl_la_CPPFLAGS = -I$(top_builddir)/src/lib/efl \
|
||||
lib_evil_libdl_la_CPPFLAGS = \
|
||||
-I$(top_builddir)/src/lib/efl \
|
||||
@EVIL_CFLAGS@ \
|
||||
@EVIL_DLFCN_CPPFLAGS@
|
||||
lib_evil_libdl_la_LIBADD = \
|
||||
|
@ -121,6 +144,8 @@ bin_evil_test_evil_LDADD = @USE_EVIL_LIBS@
|
|||
|
||||
endif
|
||||
EXTRA_DIST += \
|
||||
lib/evil/gdtoa/README \
|
||||
lib/evil/gdtoa/README.mingw \
|
||||
lib/evil/regex/regerror.ih \
|
||||
lib/evil/regex/engine.ih \
|
||||
lib/evil/regex/regcomp.ih \
|
||||
lib/evil/regex/engine.c \
|
||||
bin/evil/memcpy_glibc_i686.S
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
/* character-class table */
|
||||
static struct cclass {
|
||||
char *name;
|
||||
char *chars;
|
||||
char *multis;
|
||||
} cclasses[] = {
|
||||
"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789", "",
|
||||
"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
||||
"",
|
||||
"blank", " \t", "",
|
||||
"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
||||
\25\26\27\30\31\32\33\34\35\36\37\177", "",
|
||||
"digit", "0123456789", "",
|
||||
"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
"",
|
||||
"lower", "abcdefghijklmnopqrstuvwxyz",
|
||||
"",
|
||||
"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
||||
"",
|
||||
"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
"",
|
||||
"space", "\t\n\v\f\r ", "",
|
||||
"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
"",
|
||||
"xdigit", "0123456789ABCDEFabcdef",
|
||||
"",
|
||||
NULL, 0, ""
|
||||
};
|
|
@ -0,0 +1,102 @@
|
|||
/* character-name table */
|
||||
static struct cname {
|
||||
char *name;
|
||||
char code;
|
||||
} cnames[] = {
|
||||
"NUL", '\0',
|
||||
"SOH", '\001',
|
||||
"STX", '\002',
|
||||
"ETX", '\003',
|
||||
"EOT", '\004',
|
||||
"ENQ", '\005',
|
||||
"ACK", '\006',
|
||||
"BEL", '\007',
|
||||
"alert", '\007',
|
||||
"BS", '\010',
|
||||
"backspace", '\b',
|
||||
"HT", '\011',
|
||||
"tab", '\t',
|
||||
"LF", '\012',
|
||||
"newline", '\n',
|
||||
"VT", '\013',
|
||||
"vertical-tab", '\v',
|
||||
"FF", '\014',
|
||||
"form-feed", '\f',
|
||||
"CR", '\015',
|
||||
"carriage-return", '\r',
|
||||
"SO", '\016',
|
||||
"SI", '\017',
|
||||
"DLE", '\020',
|
||||
"DC1", '\021',
|
||||
"DC2", '\022',
|
||||
"DC3", '\023',
|
||||
"DC4", '\024',
|
||||
"NAK", '\025',
|
||||
"SYN", '\026',
|
||||
"ETB", '\027',
|
||||
"CAN", '\030',
|
||||
"EM", '\031',
|
||||
"SUB", '\032',
|
||||
"ESC", '\033',
|
||||
"IS4", '\034',
|
||||
"FS", '\034',
|
||||
"IS3", '\035',
|
||||
"GS", '\035',
|
||||
"IS2", '\036',
|
||||
"RS", '\036',
|
||||
"IS1", '\037',
|
||||
"US", '\037',
|
||||
"space", ' ',
|
||||
"exclamation-mark", '!',
|
||||
"quotation-mark", '"',
|
||||
"number-sign", '#',
|
||||
"dollar-sign", '$',
|
||||
"percent-sign", '%',
|
||||
"ampersand", '&',
|
||||
"apostrophe", '\'',
|
||||
"left-parenthesis", '(',
|
||||
"right-parenthesis", ')',
|
||||
"asterisk", '*',
|
||||
"plus-sign", '+',
|
||||
"comma", ',',
|
||||
"hyphen", '-',
|
||||
"hyphen-minus", '-',
|
||||
"period", '.',
|
||||
"full-stop", '.',
|
||||
"slash", '/',
|
||||
"solidus", '/',
|
||||
"zero", '0',
|
||||
"one", '1',
|
||||
"two", '2',
|
||||
"three", '3',
|
||||
"four", '4',
|
||||
"five", '5',
|
||||
"six", '6',
|
||||
"seven", '7',
|
||||
"eight", '8',
|
||||
"nine", '9',
|
||||
"colon", ':',
|
||||
"semicolon", ';',
|
||||
"less-than-sign", '<',
|
||||
"equals-sign", '=',
|
||||
"greater-than-sign", '>',
|
||||
"question-mark", '?',
|
||||
"commercial-at", '@',
|
||||
"left-square-bracket", '[',
|
||||
"backslash", '\\',
|
||||
"reverse-solidus", '\\',
|
||||
"right-square-bracket", ']',
|
||||
"circumflex", '^',
|
||||
"circumflex-accent", '^',
|
||||
"underscore", '_',
|
||||
"low-line", '_',
|
||||
"grave-accent", '`',
|
||||
"left-brace", '{',
|
||||
"left-curly-bracket", '{',
|
||||
"vertical-line", '|',
|
||||
"right-brace", '}',
|
||||
"right-curly-bracket", '}',
|
||||
"tilde", '~',
|
||||
"DEL", '\177',
|
||||
NULL, 0,
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,35 @@
|
|||
/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === lib/evil/regex/engine.c === */
|
||||
static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
|
||||
static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
|
||||
static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
|
||||
static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
|
||||
#define BOL (OUT+1)
|
||||
#define EOL (BOL+1)
|
||||
#define BOLEOL (BOL+2)
|
||||
#define NOTHING (BOL+3)
|
||||
#define BOW (BOL+4)
|
||||
#define EOW (BOL+5)
|
||||
#define CODEMAX (BOL+5) /* highest code used */
|
||||
#define NONCHAR(c) ((c) > CHAR_MAX)
|
||||
#define NNONCHAR (CODEMAX-CHAR_MAX)
|
||||
#ifdef REDEBUG
|
||||
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static void at(struct match *m, char *title, char *start, char *stop, sopno startst, sopno stopst);
|
||||
#endif
|
||||
#ifdef REDEBUG
|
||||
static char *pchar(int ch);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,51 @@
|
|||
/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === lib/evil/regex/regcomp.c === */
|
||||
static void p_ere(register struct parse *p, int stop);
|
||||
static void p_ere_exp(register struct parse *p);
|
||||
static void p_str(register struct parse *p);
|
||||
static void p_bre(register struct parse *p, register int end1, register int end2);
|
||||
static int p_simp_re(register struct parse *p, int starordinary);
|
||||
static int p_count(register struct parse *p);
|
||||
static void p_bracket(register struct parse *p);
|
||||
static void p_b_term(register struct parse *p, register cset *cs);
|
||||
static void p_b_cclass(register struct parse *p, register cset *cs);
|
||||
static void p_b_eclass(register struct parse *p, register cset *cs);
|
||||
static char p_b_symbol(register struct parse *p);
|
||||
static char p_b_coll_elem(register struct parse *p, int endc);
|
||||
static char othercase(int ch);
|
||||
static void bothcases(register struct parse *p, int ch);
|
||||
static void ordinary(register struct parse *p, register int ch);
|
||||
static void nonnewline(register struct parse *p);
|
||||
static void repeat(register struct parse *p, sopno start, int from, int to);
|
||||
static int seterr(register struct parse *p, int e);
|
||||
static cset *allocset(register struct parse *p);
|
||||
static void freeset(register struct parse *p, register cset *cs);
|
||||
static int freezeset(register struct parse *p, register cset *cs);
|
||||
static int firstch(register struct parse *p, register cset *cs);
|
||||
static int nch(register struct parse *p, register cset *cs);
|
||||
static void mcadd(register struct parse *p, register cset *cs, register char *cp);
|
||||
static void mcsub(register cset *cs, register char *cp);
|
||||
static int mcin(register cset *cs, register char *cp);
|
||||
static char *mcfind(register cset *cs, register char *cp);
|
||||
static void mcinvert(register struct parse *p, register cset *cs);
|
||||
static void mccase(register struct parse *p, register cset *cs);
|
||||
static int isinsets(register struct re_guts *g, int c);
|
||||
static int samesets(register struct re_guts *g, int c1, int c2);
|
||||
static void categorize(struct parse *p, register struct re_guts *g);
|
||||
static sopno dupl(register struct parse *p, sopno start, sopno finish);
|
||||
static void doemit(register struct parse *p, sop op, size_t opnd);
|
||||
static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
|
||||
static void dofwd(register struct parse *p, sopno pos, sop value);
|
||||
static void enlarge(register struct parse *p, sopno size);
|
||||
static void stripsnug(register struct parse *p, register struct re_guts *g);
|
||||
static void findmust(register struct parse *p, register struct re_guts *g);
|
||||
static sopno pluscount(register struct parse *p, register struct re_guts *g);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
|
@ -0,0 +1,126 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regerror.ih"
|
||||
|
||||
/*
|
||||
= #define REG_OKAY 0
|
||||
= #define REG_NOMATCH 1
|
||||
= #define REG_BADPAT 2
|
||||
= #define REG_ECOLLATE 3
|
||||
= #define REG_ECTYPE 4
|
||||
= #define REG_EESCAPE 5
|
||||
= #define REG_ESUBREG 6
|
||||
= #define REG_EBRACK 7
|
||||
= #define REG_EPAREN 8
|
||||
= #define REG_EBRACE 9
|
||||
= #define REG_BADBR 10
|
||||
= #define REG_ERANGE 11
|
||||
= #define REG_ESPACE 12
|
||||
= #define REG_BADRPT 13
|
||||
= #define REG_EMPTY 14
|
||||
= #define REG_ASSERT 15
|
||||
= #define REG_INVARG 16
|
||||
= #define REG_ATOI 255 // convert name to number (!)
|
||||
= #define REG_ITOA 0400 // convert number to name (!)
|
||||
*/
|
||||
static struct rerr {
|
||||
int code;
|
||||
char *name;
|
||||
char *explain;
|
||||
} rerrs[] = {
|
||||
REG_OKAY, "REG_OKAY", "no errors detected",
|
||||
REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match",
|
||||
REG_BADPAT, "REG_BADPAT", "invalid regular expression",
|
||||
REG_ECOLLATE, "REG_ECOLLATE", "invalid collating element",
|
||||
REG_ECTYPE, "REG_ECTYPE", "invalid character class",
|
||||
REG_EESCAPE, "REG_EESCAPE", "trailing backslash (\\)",
|
||||
REG_ESUBREG, "REG_ESUBREG", "invalid backreference number",
|
||||
REG_EBRACK, "REG_EBRACK", "brackets ([ ]) not balanced",
|
||||
REG_EPAREN, "REG_EPAREN", "parentheses not balanced",
|
||||
REG_EBRACE, "REG_EBRACE", "braces not balanced",
|
||||
REG_BADBR, "REG_BADBR", "invalid repetition count(s)",
|
||||
REG_ERANGE, "REG_ERANGE", "invalid character range",
|
||||
REG_ESPACE, "REG_ESPACE", "out of memory",
|
||||
REG_BADRPT, "REG_BADRPT", "repetition-operator operand invalid",
|
||||
REG_EMPTY, "REG_EMPTY", "empty (sub)expression",
|
||||
REG_ASSERT, "REG_ASSERT", "\"can't happen\" -- you found a bug",
|
||||
REG_INVARG, "REG_INVARG", "invalid argument to regex routine",
|
||||
-1, "", "*** unknown regexp error code ***",
|
||||
};
|
||||
|
||||
/*
|
||||
- regerror - the interface to error numbers
|
||||
= extern size_t regerror(int, const regex_t *, char *, size_t);
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
EAPI size_t
|
||||
regerror(errcode, preg, errbuf, errbuf_size)
|
||||
int errcode;
|
||||
const regex_t *preg;
|
||||
char *errbuf;
|
||||
size_t errbuf_size;
|
||||
{
|
||||
register struct rerr *r;
|
||||
register size_t len;
|
||||
register int target = errcode &~ REG_ITOA;
|
||||
register char *s;
|
||||
char convbuf[50];
|
||||
|
||||
if (errcode == REG_ATOI)
|
||||
s = regatoi(preg, convbuf);
|
||||
else {
|
||||
for (r = rerrs; r->code >= 0; r++)
|
||||
if (r->code == target)
|
||||
break;
|
||||
|
||||
if (errcode®_ITOA) {
|
||||
if (r->code >= 0)
|
||||
(void) strcpy(convbuf, r->name);
|
||||
else
|
||||
sprintf(convbuf, "REG_0x%x", target);
|
||||
assert(strlen(convbuf) < sizeof(convbuf));
|
||||
s = convbuf;
|
||||
} else
|
||||
s = r->explain;
|
||||
}
|
||||
|
||||
len = strlen(s) + 1;
|
||||
if (errbuf_size > 0) {
|
||||
if (errbuf_size > len)
|
||||
(void) strcpy(errbuf, s);
|
||||
else {
|
||||
(void) strncpy(errbuf, s, errbuf_size-1);
|
||||
errbuf[errbuf_size-1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
return(len);
|
||||
}
|
||||
|
||||
/*
|
||||
- regatoi - internal routine to implement REG_ATOI
|
||||
== static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
*/
|
||||
static char *
|
||||
regatoi(preg, localbuf)
|
||||
const regex_t *preg;
|
||||
char *localbuf;
|
||||
{
|
||||
register struct rerr *r;
|
||||
|
||||
for (r = rerrs; r->code >= 0; r++)
|
||||
if (strcmp(r->name, preg->re_endp) == 0)
|
||||
break;
|
||||
if (r->code < 0)
|
||||
return("0");
|
||||
|
||||
sprintf(localbuf, "%d", r->code);
|
||||
return(localbuf);
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === lib/evil/regex/regerror.c === */
|
||||
static char *regatoi(const regex_t *preg, char *localbuf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
|
@ -0,0 +1,77 @@
|
|||
#ifndef _REGEX_H_
|
||||
#define _REGEX_H_ /* never again */
|
||||
|
||||
#include <evil_macro.h>
|
||||
|
||||
/* ========= begin header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* === lib/evil/regex/regex2.h === */
|
||||
typedef off_t regoff_t;
|
||||
typedef struct {
|
||||
int re_magic;
|
||||
size_t re_nsub; /* number of parenthesized subexpressions */
|
||||
const char *re_endp; /* end pointer for REG_PEND */
|
||||
struct re_guts *re_g; /* none of your business :-) */
|
||||
} regex_t;
|
||||
typedef struct {
|
||||
regoff_t rm_so; /* start of match */
|
||||
regoff_t rm_eo; /* end of match */
|
||||
} regmatch_t;
|
||||
|
||||
|
||||
/* === lib/evil/regex/regcomp.c === */
|
||||
EAPI int regcomp(regex_t *, const char *, int);
|
||||
#define REG_BASIC 0000
|
||||
#define REG_EXTENDED 0001
|
||||
#define REG_ICASE 0002
|
||||
#define REG_NOSUB 0004
|
||||
#define REG_NEWLINE 0010
|
||||
#define REG_NOSPEC 0020
|
||||
#define REG_PEND 0040
|
||||
#define REG_DUMP 0200
|
||||
|
||||
|
||||
/* === lib/evil/regex/regerror.c === */
|
||||
#define REG_OKAY 0
|
||||
#define REG_NOMATCH 1
|
||||
#define REG_BADPAT 2
|
||||
#define REG_ECOLLATE 3
|
||||
#define REG_ECTYPE 4
|
||||
#define REG_EESCAPE 5
|
||||
#define REG_ESUBREG 6
|
||||
#define REG_EBRACK 7
|
||||
#define REG_EPAREN 8
|
||||
#define REG_EBRACE 9
|
||||
#define REG_BADBR 10
|
||||
#define REG_ERANGE 11
|
||||
#define REG_ESPACE 12
|
||||
#define REG_BADRPT 13
|
||||
#define REG_EMPTY 14
|
||||
#define REG_ASSERT 15
|
||||
#define REG_INVARG 16
|
||||
#define REG_ATOI 255 /* convert name to number (!) */
|
||||
#define REG_ITOA 0400 /* convert number to name (!) */
|
||||
EAPI size_t regerror(int, const regex_t *, char *, size_t);
|
||||
|
||||
|
||||
/* === lib/evil/regex/regexec.c === */
|
||||
EAPI int regexec(const regex_t *, const char *, size_t, regmatch_t [], int);
|
||||
#define REG_NOTBOL 00001
|
||||
#define REG_NOTEOL 00002
|
||||
#define REG_STARTEND 00004
|
||||
#define REG_TRACE 00400 /* tracing of execution */
|
||||
#define REG_LARGE 01000 /* force large representation */
|
||||
#define REG_BACKR 02000 /* force use of backref code */
|
||||
|
||||
|
||||
/* === lib/evil/regex/regfree.c === */
|
||||
EAPI void regfree(regex_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/* ========= end header generated by ../src/lib/evil/regex/mkh.sh ========= */
|
||||
#endif
|
|
@ -0,0 +1,134 @@
|
|||
/*
|
||||
* First, the stuff that ends up in the outside-world include file
|
||||
= typedef off_t regoff_t;
|
||||
= typedef struct {
|
||||
= int re_magic;
|
||||
= size_t re_nsub; // number of parenthesized subexpressions
|
||||
= const char *re_endp; // end pointer for REG_PEND
|
||||
= struct re_guts *re_g; // none of your business :-)
|
||||
= } regex_t;
|
||||
= typedef struct {
|
||||
= regoff_t rm_so; // start of match
|
||||
= regoff_t rm_eo; // end of match
|
||||
= } regmatch_t;
|
||||
*/
|
||||
/*
|
||||
* internals of regex_t
|
||||
*/
|
||||
#define MAGIC1 ((('r'^0200)<<8) | 'e')
|
||||
|
||||
/*
|
||||
* The internal representation is a *strip*, a sequence of
|
||||
* operators ending with an endmarker. (Some terminology etc. is a
|
||||
* historical relic of earlier versions which used multiple strips.)
|
||||
* Certain oddities in the representation are there to permit running
|
||||
* the machinery backwards; in particular, any deviation from sequential
|
||||
* flow must be marked at both its source and its destination. Some
|
||||
* fine points:
|
||||
*
|
||||
* - OPLUS_ and O_PLUS are *inside* the loop they create.
|
||||
* - OQUEST_ and O_QUEST are *outside* the bypass they create.
|
||||
* - OCH_ and O_CH are *outside* the multi-way branch they create, while
|
||||
* OOR1 and OOR2 are respectively the end and the beginning of one of
|
||||
* the branches. Note that there is an implicit OOR2 following OCH_
|
||||
* and an implicit OOR1 preceding O_CH.
|
||||
*
|
||||
* In state representations, an operator's bit is on to signify a state
|
||||
* immediately *preceding* "execution" of that operator.
|
||||
*/
|
||||
typedef long sop; /* strip operator */
|
||||
typedef long sopno;
|
||||
#define OPRMASK 0x7c000000
|
||||
#define OPDMASK 0x03ffffff
|
||||
#define OPSHIFT (26)
|
||||
#define OP(n) ((n)&OPRMASK)
|
||||
#define OPND(n) ((n)&OPDMASK)
|
||||
#define SOP(op, opnd) ((op)|(opnd))
|
||||
/* operators meaning operand */
|
||||
/* (back, fwd are offsets) */
|
||||
#define OEND (1<<OPSHIFT) /* endmarker - */
|
||||
#define OCHAR (2<<OPSHIFT) /* character unsigned char */
|
||||
#define OBOL (3<<OPSHIFT) /* left anchor - */
|
||||
#define OEOL (4<<OPSHIFT) /* right anchor - */
|
||||
#define OANY (5<<OPSHIFT) /* . - */
|
||||
#define OANYOF (6<<OPSHIFT) /* [...] set number */
|
||||
#define OBACK_ (7<<OPSHIFT) /* begin \d paren number */
|
||||
#define O_BACK (8<<OPSHIFT) /* end \d paren number */
|
||||
#define OPLUS_ (9<<OPSHIFT) /* + prefix fwd to suffix */
|
||||
#define O_PLUS (10<<OPSHIFT) /* + suffix back to prefix */
|
||||
#define OQUEST_ (11<<OPSHIFT) /* ? prefix fwd to suffix */
|
||||
#define O_QUEST (12<<OPSHIFT) /* ? suffix back to prefix */
|
||||
#define OLPAREN (13<<OPSHIFT) /* ( fwd to ) */
|
||||
#define ORPAREN (14<<OPSHIFT) /* ) back to ( */
|
||||
#define OCH_ (15<<OPSHIFT) /* begin choice fwd to OOR2 */
|
||||
#define OOR1 (16<<OPSHIFT) /* | pt. 1 back to OOR1 or OCH_ */
|
||||
#define OOR2 (17<<OPSHIFT) /* | pt. 2 fwd to OOR2 or O_CH */
|
||||
#define O_CH (18<<OPSHIFT) /* end choice back to OOR1 */
|
||||
#define OBOW (19<<OPSHIFT) /* begin word - */
|
||||
#define OEOW (20<<OPSHIFT) /* end word - */
|
||||
|
||||
/*
|
||||
* Structure for [] character-set representation. Character sets are
|
||||
* done as bit vectors, grouped 8 to a byte vector for compactness.
|
||||
* The individual set therefore has both a pointer to the byte vector
|
||||
* and a mask to pick out the relevant bit of each byte. A hash code
|
||||
* simplifies testing whether two sets could be identical.
|
||||
*
|
||||
* This will get trickier for multicharacter collating elements. As
|
||||
* preliminary hooks for dealing with such things, we also carry along
|
||||
* a string of multi-character elements, and decide the size of the
|
||||
* vectors at run time.
|
||||
*/
|
||||
typedef struct {
|
||||
uch *ptr; /* -> uch [csetsize] */
|
||||
uch mask; /* bit within array */
|
||||
uch hash; /* hash code */
|
||||
size_t smultis;
|
||||
char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
|
||||
} cset;
|
||||
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
|
||||
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
|
||||
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
|
||||
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
|
||||
#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
|
||||
#define MCsub(p, cs, cp) mcsub(p, cs, cp)
|
||||
#define MCin(p, cs, cp) mcin(p, cs, cp)
|
||||
|
||||
/* stuff for character categories */
|
||||
typedef unsigned char cat_t;
|
||||
|
||||
/*
|
||||
* main compiled-expression structure
|
||||
*/
|
||||
struct re_guts {
|
||||
int magic;
|
||||
# define MAGIC2 ((('R'^0200)<<8)|'E')
|
||||
sop *strip; /* malloced area for strip */
|
||||
int csetsize; /* number of bits in a cset vector */
|
||||
int ncsets; /* number of csets in use */
|
||||
cset *sets; /* -> cset [ncsets] */
|
||||
uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */
|
||||
int cflags; /* copy of regcomp() cflags argument */
|
||||
sopno nstates; /* = number of sops */
|
||||
sopno firststate; /* the initial OEND (normally 0) */
|
||||
sopno laststate; /* the final OEND */
|
||||
int iflags; /* internal flags */
|
||||
# define USEBOL 01 /* used ^ */
|
||||
# define USEEOL 02 /* used $ */
|
||||
# define BAD 04 /* something wrong */
|
||||
int nbol; /* number of ^ used */
|
||||
int neol; /* number of $ used */
|
||||
int ncategories; /* how many character categories */
|
||||
cat_t *categories; /* ->catspace[-CHAR_MIN] */
|
||||
char *must; /* match must contain this string */
|
||||
int mlen; /* length of must */
|
||||
size_t nsub; /* copy of re_nsub */
|
||||
int backrefs; /* does it use back references? */
|
||||
sopno nplus; /* how deep does it nest +s? */
|
||||
/* catspace must be last */
|
||||
cat_t catspace[1]; /* actually [NC] */
|
||||
};
|
||||
|
||||
/* misc utilities */
|
||||
#define OUT (CHAR_MAX+1) /* a non-character value */
|
||||
#define ISWORD(c) (isalnum(c) || (c) == '_')
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* the outer shell of regexec()
|
||||
*
|
||||
* This file includes engine.c *twice*, after muchos fiddling with the
|
||||
* macros that code uses. This lets the same code operate on two different
|
||||
* representations for state sets.
|
||||
*/
|
||||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
static int nope = 0; /* for use in asserts; shuts lint up */
|
||||
|
||||
/* macros for manipulating states, small version */
|
||||
#define states unsigned
|
||||
#define states1 unsigned /* for later use in regexec() decision */
|
||||
#define CLEAR(v) ((v) = 0)
|
||||
#define SET0(v, n) ((v) &= ~((unsigned)1 << (n)))
|
||||
#define SET1(v, n) ((v) |= (unsigned)1 << (n))
|
||||
#define ISSET(v, n) ((v) & ((unsigned)1 << (n)))
|
||||
#define ASSIGN(d, s) ((d) = (s))
|
||||
#define EQ(a, b) ((a) == (b))
|
||||
#define STATEVARS int dummy /* dummy version */
|
||||
#define STATESETUP(m, n) /* nothing */
|
||||
#define STATETEARDOWN(m) /* nothing */
|
||||
#define SETUP(v) ((v) = 0)
|
||||
#define onestate unsigned
|
||||
#define INIT(o, n) ((o) = (unsigned)1 << (n))
|
||||
#define INC(o) ((o) <<= 1)
|
||||
#define ISSTATEIN(v, o) ((v) & (o))
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) << (n))
|
||||
#define BACK(dst, src, n) ((dst) |= ((unsigned)(src)&(here)) >> (n))
|
||||
#define ISSETBACK(v, n) ((v) & ((unsigned)here >> (n)))
|
||||
/* function names */
|
||||
#define SNAMES /* engine.c looks after details */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/* now undo things */
|
||||
#undef states
|
||||
#undef CLEAR
|
||||
#undef SET0
|
||||
#undef SET1
|
||||
#undef ISSET
|
||||
#undef ASSIGN
|
||||
#undef EQ
|
||||
#undef STATEVARS
|
||||
#undef STATESETUP
|
||||
#undef STATETEARDOWN
|
||||
#undef SETUP
|
||||
#undef onestate
|
||||
#undef INIT
|
||||
#undef INC
|
||||
#undef ISSTATEIN
|
||||
#undef FWD
|
||||
#undef BACK
|
||||
#undef ISSETBACK
|
||||
#undef SNAMES
|
||||
|
||||
/* macros for manipulating states, large version */
|
||||
#define states char *
|
||||
#define CLEAR(v) memset(v, 0, m->g->nstates)
|
||||
#define SET0(v, n) ((v)[n] = 0)
|
||||
#define SET1(v, n) ((v)[n] = 1)
|
||||
#define ISSET(v, n) ((v)[n])
|
||||
#define ASSIGN(d, s) memcpy(d, s, m->g->nstates)
|
||||
#define EQ(a, b) (memcmp(a, b, m->g->nstates) == 0)
|
||||
#define STATEVARS int vn; char *space
|
||||
#define STATESETUP(m, nv) { (m)->space = malloc((nv)*(m)->g->nstates); \
|
||||
if ((m)->space == NULL) return(REG_ESPACE); \
|
||||
(m)->vn = 0; }
|
||||
#define STATETEARDOWN(m) { free((m)->space); }
|
||||
#define SETUP(v) ((v) = &m->space[m->vn++ * m->g->nstates])
|
||||
#define onestate int
|
||||
#define INIT(o, n) ((o) = (n))
|
||||
#define INC(o) ((o)++)
|
||||
#define ISSTATEIN(v, o) ((v)[o])
|
||||
/* some abbreviations; note that some of these know variable names! */
|
||||
/* do "if I'm here, I can also be there" etc without branches */
|
||||
#define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here])
|
||||
#define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here])
|
||||
#define ISSETBACK(v, n) ((v)[here - (n)])
|
||||
/* function names */
|
||||
#define LNAMES /* flag */
|
||||
|
||||
#include "engine.c"
|
||||
|
||||
/*
|
||||
- regexec - interface for matching
|
||||
= extern int regexec(const regex_t *, const char *, size_t, \
|
||||
= regmatch_t [], int);
|
||||
= #define REG_NOTBOL 00001
|
||||
= #define REG_NOTEOL 00002
|
||||
= #define REG_STARTEND 00004
|
||||
= #define REG_TRACE 00400 // tracing of execution
|
||||
= #define REG_LARGE 01000 // force large representation
|
||||
= #define REG_BACKR 02000 // force use of backref code
|
||||
*
|
||||
* We put this here so we can exploit knowledge of the state representation
|
||||
* when choosing which matcher to call. Also, by this point the matchers
|
||||
* have been prototyped.
|
||||
*/
|
||||
EAPI int /* 0 success, REG_NOMATCH failure */
|
||||
regexec(preg, string, nmatch, pmatch, eflags)
|
||||
const regex_t *preg;
|
||||
const char *string;
|
||||
size_t nmatch;
|
||||
regmatch_t pmatch[];
|
||||
int eflags;
|
||||
{
|
||||
register struct re_guts *g = preg->re_g;
|
||||
#ifdef REDEBUG
|
||||
# define GOODFLAGS(f) (f)
|
||||
#else
|
||||
# define GOODFLAGS(f) ((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
|
||||
#endif
|
||||
|
||||
if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
|
||||
return(REG_BADPAT);
|
||||
assert(!(g->iflags&BAD));
|
||||
if (g->iflags&BAD) /* backstop for no-debug case */
|
||||
return(REG_BADPAT);
|
||||
eflags = GOODFLAGS(eflags);
|
||||
|
||||
if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE))
|
||||
return(smatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
else
|
||||
return(lmatcher(g, (char *)string, nmatch, pmatch, eflags));
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
#include <sys/types.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <regex.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "regex2.h"
|
||||
|
||||
/*
|
||||
- regfree - free everything
|
||||
= extern void regfree(regex_t *);
|
||||
*/
|
||||
EAPI void
|
||||
regfree(preg)
|
||||
regex_t *preg;
|
||||
{
|
||||
register struct re_guts *g;
|
||||
|
||||
if (preg->re_magic != MAGIC1) /* oops */
|
||||
return; /* nice to complain, but hard */
|
||||
|
||||
g = preg->re_g;
|
||||
if (g == NULL || g->magic != MAGIC2) /* oops again */
|
||||
return;
|
||||
preg->re_magic = 0; /* mark it invalid */
|
||||
g->magic = 0; /* mark it invalid */
|
||||
|
||||
if (g->strip != NULL)
|
||||
free((char *)g->strip);
|
||||
if (g->sets != NULL)
|
||||
free((char *)g->sets);
|
||||
if (g->setbits != NULL)
|
||||
free((char *)g->setbits);
|
||||
if (g->must != NULL)
|
||||
free(g->must);
|
||||
free((char *)g);
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/* utility definitions */
|
||||
#ifdef _POSIX2_RE_DUP_MAX
|
||||
#define DUPMAX _POSIX2_RE_DUP_MAX
|
||||
#else
|
||||
#define DUPMAX 255
|
||||
#endif
|
||||
#define INFINITY (DUPMAX + 1)
|
||||
#define NC (CHAR_MAX - CHAR_MIN + 1)
|
||||
typedef unsigned char uch;
|
||||
|
||||
/* switch off assertions (if not already off) if no REDEBUG */
|
||||
#ifndef REDEBUG
|
||||
#ifndef NDEBUG
|
||||
#define NDEBUG /* no assertions please */
|
||||
#endif
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
/* for old systems with bcopy() but no memmove() */
|
||||
#ifdef USEBCOPY
|
||||
#define memmove(d, s, c) bcopy(s, d, c)
|
||||
#endif
|
Loading…
Reference in New Issue