efl/src/lib/eolian/eo_lexer.c

1294 lines
32 KiB
C

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <stdio.h>
#include <ctype.h>
#include <locale.h>
#include <setjmp.h>
#include <assert.h>
#include <libgen.h>
#include "eo_lexer.h"
static int lastbytes = 0;
static void
next_char(Eo_Lexer *ls)
{
int nb;
Eina_Bool end = EINA_FALSE;
if (ls->stream == ls->stream_end)
{
end = EINA_TRUE;
ls->current = '\0';
}
else
ls->current = *(ls->stream++);
nb = lastbytes;
if (!nb && end) nb = 1;
if (!nb) eina_unicode_utf8_next_get(ls->stream - 1, &nb);
if (nb == 1)
{
nb = 0;
++ls->icolumn;
ls->column = ls->icolumn;
}
else --nb;
lastbytes = nb;
}
#define KW(x) #x
#define KWAT(x) "@" #x
static const char * const tokens[] =
{
"==", "!=", ">=", "<=", "&&", "||", "<<", ">>",
"<doc>", "<string>", "<char>", "<number>", "<value>"
};
static const char * const keywords[] = { KEYWORDS };
static const char * const ctypes[] =
{
"signed char", "unsigned char", "char", "short", "unsigned short", "int",
"unsigned int", "long", "unsigned long", "long long", "unsigned long long",
"int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t",
"int64_t", "uint64_t", "int128_t", "uint128_t",
"size_t", "ssize_t", "intptr_t", "uintptr_t", "ptrdiff_t",
"time_t",
"float", "double",
"Eina_Bool",
"void",
NULL, NULL, /* array types */
"Eina_Accessor", "Eina_Array", "Eina_Iterator", "Eina_Hash", "Eina_List",
"Eina_Promise", "Efl_Future",
"Eina_Value", "const char *", "Eina_Stringshare *",
"void *",
"Efl_Event_Cb",
};
#undef KW
#undef KWAT
#define is_newline(c) ((c) == '\n' || (c) == '\r')
static Eina_Hash *keyword_map = NULL;
static void
throw(Eo_Lexer *ls, const char *fmt, ...)
{
const char *ln = ls->stream_line, *end = ls->stream_end;
Eina_Strbuf *buf = eina_strbuf_new();
int i;
va_list ap;
va_start(ap, fmt);
eina_strbuf_append_vprintf(buf, fmt, ap);
va_end(ap);
eina_strbuf_append(buf, "\n ");
while (ln != end && !is_newline(*ln))
eina_strbuf_append_char(buf,*(ln++));
eina_strbuf_append_char(buf, '\n');
for (i = 0; i < ls->column; ++i)
eina_strbuf_append_char(buf, ' ');
eina_strbuf_append(buf, "^\n");
fprintf(stderr, "eolian:%s:%d:%d: %s\n", ls->source, ls->line_number,
ls->column, eina_strbuf_string_get(buf));
eina_strbuf_free(buf);
longjmp(ls->err_jmp, EINA_TRUE);
}
static void
init_hash(void)
{
unsigned int i;
if (keyword_map) return;
keyword_map = eina_hash_string_superfast_new(NULL);
for (i = 0; i < (sizeof(keywords) / sizeof(keywords[0])); ++i)
eina_hash_add(keyword_map, keywords[i], (void *)(size_t)(i + 1));
}
static void
destroy_hash(void)
{
if (keyword_map)
{
eina_hash_free(keyword_map);
keyword_map = NULL;
}
}
static void
txt_token(Eo_Lexer *ls, int token, char *buf)
{
if (token == TOK_VALUE)
memcpy(buf, ls->t.value.s, strlen(ls->t.value.s) + 1);
else
return eo_lexer_token_to_str(token, buf);
}
void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token);
void eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg);
static void next_line(Eo_Lexer *ls)
{
int old = ls->current;
assert(is_newline(ls->current));
ls->stream_line = ls->stream;
next_char(ls);
if (is_newline(ls->current) && ls->current != old)
{
next_char(ls);
ls->stream_line = ls->stream;
}
if (++ls->iline_number >= INT_MAX)
eo_lexer_syntax_error(ls, "chunk has too many lines");
ls->line_number = ls->iline_number;
ls->icolumn = ls->column = 0;
}
static void skip_ws(Eo_Lexer *ls)
{
while (isspace(ls->current) && !is_newline(ls->current))
next_char(ls);
}
/* go to next line and strip leading whitespace */
static void next_line_ws(Eo_Lexer *ls)
{
next_line(ls);
skip_ws(ls);
}
static Eina_Bool
should_skip_star(Eo_Lexer *ls, int ccol, Eina_Bool *term)
{
Eina_Bool had_star = EINA_FALSE;
if (ls->column == ccol && ls->current == '*')
{
had_star = EINA_TRUE;
next_char(ls);
if (ls->current == '/')
{
next_char(ls);
*term = EINA_TRUE;
return EINA_FALSE;
}
skip_ws(ls);
}
return had_star;
}
static void
read_long_comment(Eo_Lexer *ls, int ccol)
{
Eina_Bool had_star = EINA_FALSE, had_nl = EINA_FALSE;
eina_strbuf_reset(ls->buff);
if (is_newline(ls->current))
{
Eina_Bool term = EINA_FALSE;
had_nl = EINA_TRUE;
next_line_ws(ls);
had_star = should_skip_star(ls, ccol, &term);
if (term) goto cend;
}
for (;;)
{
if (!ls->current)
eo_lexer_lex_error(ls, "unfinished long comment", -1);
if (ls->current == '*')
{
next_char(ls);
if (ls->current == '/')
{
next_char(ls);
break;
}
eina_strbuf_append_char(ls->buff, '*');
}
else if (is_newline(ls->current))
{
eina_strbuf_append_char(ls->buff, '\n');
next_line_ws(ls);
if (!had_nl)
{
Eina_Bool term = EINA_FALSE;
had_nl = EINA_TRUE;
had_star = should_skip_star(ls, ccol, &term);
if (term) break;
}
else if (had_star && ls->column == ccol && ls->current == '*')
{
next_char(ls);
if (ls->current == '/')
{
next_char(ls);
break;
}
skip_ws(ls);
}
}
else
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
}
}
cend:
eina_strbuf_trim(ls->buff);
}
enum Doc_Tokens {
DOC_MANGLED = -2, DOC_UNFINISHED = -1, DOC_TEXT = 0, DOC_SINCE = 1
};
static void
doc_ref_class(const char *cname)
{
size_t clen = strlen(cname);
char *buf = alloca(clen + 4);
memcpy(buf, cname, clen);
buf[clen] = '\0';
for (char *p = buf; *p; ++p)
{
if (*p == '.')
*p = '_';
else
*p = tolower(*p);
}
memcpy(buf + clen, ".eo", sizeof(".eo"));
const char *eop = eina_hash_find(_filenames, buf);
if (!eop)
return;
eina_hash_set(_defereos, buf, eop);
}
static void
doc_ref(Eo_Lexer *ls)
{
const char *st = ls->stream, *ste = ls->stream_end;
size_t rlen = 0;
while ((st != ste) && ((*st == '.') || isalnum(*st)))
{
++st;
++rlen;
}
if ((rlen > 1) && (*(st - 1) == '.'))
--rlen;
if (!rlen)
return;
if (*ls->stream == '.')
return;
char *buf = alloca(rlen + 1);
memcpy(buf, ls->stream, rlen);
buf[rlen] = '\0';
/* actual full class name */
doc_ref_class(buf);
/* method name at the end */
char *end = strrchr(buf, '.');
if (!end)
return;
*end = '\0';
doc_ref_class(buf);
/* .get or .set at the end, handle possible property */
if (strcmp(end + 1, "get") && strcmp(end + 1, "set"))
return;
end = strrchr(buf, '.');
if (!end)
return;
*end = '\0';
doc_ref_class(buf);
}
static int
doc_lex(Eo_Lexer *ls, Eina_Bool *term, Eina_Bool *since)
{
int tokret = -1;
eina_strbuf_reset(ls->buff);
*since = EINA_FALSE;
for (;;) switch (ls->current)
{
/* error case */
case '\0':
return DOC_UNFINISHED;
/* newline case: if two or more newlines are present, new paragraph
* if only one newline is present, append space to the text buffer
* when starting new paragraph, reset doc continutation
*/
case '\n':
case '\r':
next_line(ls);
skip_ws(ls);
if (!is_newline(ls->current))
{
eina_strbuf_append_char(ls->buff, ' ');
continue;
}
while (is_newline(ls->current))
next_line_ws(ls);
tokret = DOC_TEXT;
goto exit_with_token;
/* escape case: for any \X, output \X
* except for \\]], then output just ]]
*/
case '\\':
next_char(ls);
if (ls->current == ']')
{
next_char(ls);
if (ls->current == ']')
{
next_char(ls);
eina_strbuf_append(ls->buff, "]]");
}
else
eina_strbuf_append(ls->buff, "\\]");
}
else
eina_strbuf_append_char(ls->buff, '\\');
continue;
/* terminating case */
case ']':
next_char(ls);
if (ls->current == ']')
{
/* terminate doc */
tokret = DOC_TEXT;
goto terminated;
}
eina_strbuf_append_char(ls->buff, ']');
continue;
/* references and @since */
case '@':
if ((size_t)(ls->stream_end - ls->stream) >= (sizeof("since")) &&
!memcmp(ls->stream, "since ", sizeof("since")))
{
next_char(ls);
*since = EINA_TRUE;
for (size_t i = 0; i < sizeof("since"); ++i)
next_char(ls);
skip_ws(ls);
tokret = DOC_TEXT;
goto exit_with_token;
}
doc_ref(ls);
eina_strbuf_append_char(ls->buff, '@');
next_char(ls);
/* in-class references */
if (ls->tmp.kls && ls->current == '.')
{
next_char(ls);
if (isalpha(ls->current) || ls->current == '_')
eina_strbuf_append(ls->buff, ls->tmp.kls->full_name);
eina_strbuf_append_char(ls->buff, '.');
}
continue;
/* default case - append character */
default:
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
continue;
}
terminated:
next_char(ls);
*term = EINA_TRUE;
exit_with_token:
eina_strbuf_trim(ls->buff);
return tokret;
}
static int
read_since(Eo_Lexer *ls)
{
eina_strbuf_reset(ls->buff);
while (ls->current && (ls->current == '.' ||
ls->current == '_' ||
isalnum(ls->current)))
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
}
if (!eina_strbuf_length_get(ls->buff))
return DOC_UNFINISHED;
skip_ws(ls);
while (is_newline(ls->current))
next_line_ws(ls);
if (ls->current != ']')
return DOC_MANGLED;
next_char(ls);
if (ls->current != ']')
return DOC_MANGLED;
next_char(ls);
return DOC_SINCE;
}
void doc_error(Eo_Lexer *ls, const char *msg, Eolian_Documentation *doc, Eina_Strbuf *buf)
{
eina_stringshare_del(doc->summary);
eina_stringshare_del(doc->description);
free(doc);
eina_strbuf_free(buf);
eo_lexer_lex_error(ls, msg, -1);
}
static void
read_doc(Eo_Lexer *ls, Eo_Token *tok, int line, int column)
{
Eolian_Documentation *doc = calloc(1, sizeof(Eolian_Documentation));
doc->base.file = ls->filename;
doc->base.line = line;
doc->base.column = column;
Eina_Strbuf *rbuf = eina_strbuf_new();
Eina_Bool term = EINA_FALSE, since = EINA_FALSE;
while (!term)
{
int read;
if (since)
{
read = read_since(ls);
term = EINA_TRUE;
}
else
read = doc_lex(ls, &term, &since);
switch (read)
{
case DOC_MANGLED:
doc_error(ls, "mangled documentation", doc, rbuf);
return;
case DOC_UNFINISHED:
doc_error(ls, "unfinished documentation", doc, rbuf);
return;
case DOC_TEXT:
if (!eina_strbuf_length_get(ls->buff))
continue;
if (!doc->summary)
doc->summary = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
else
{
if (eina_strbuf_length_get(rbuf))
eina_strbuf_append(rbuf, "\n\n");
eina_strbuf_append(rbuf, eina_strbuf_string_get(ls->buff));
}
break;
case DOC_SINCE:
doc->since = eina_stringshare_add(eina_strbuf_string_get(ls->buff));
break;
}
}
if (eina_strbuf_length_get(rbuf))
doc->description = eina_stringshare_add(eina_strbuf_string_get(rbuf));
if (!doc->summary)
doc->summary = eina_stringshare_add("No description supplied.");
if (!doc->since && ls->tmp.kls && ls->tmp.kls->doc)
doc->since = eina_stringshare_ref(ls->tmp.kls->doc->since);
eina_strbuf_free(rbuf);
tok->value.doc = doc;
}
static void
esc_error(Eo_Lexer *ls, int *c, int n, const char *msg)
{
int i;
eina_strbuf_reset(ls->buff);
eina_strbuf_append_char(ls->buff, '\\');
for (i = 0; i < n && c[i]; ++i)
eina_strbuf_append_char(ls->buff, c[i]);
eo_lexer_lex_error(ls, msg, TOK_STRING);
}
static int
hex_val(int c)
{
if (c >= 'a') return c - 'a' + 10;
if (c >= 'A') return c - 'A' + 10;
return c - '0';
}
static int
read_hex_esc(Eo_Lexer *ls)
{
int c[3] = { 'x' };
int i, r = 0;
for (i = 1; i < 3; ++i)
{
next_char(ls);
c[i] = ls->current;
if (!isxdigit(c[i]))
esc_error(ls, c, i + 1, "hexadecimal digit expected");
r = (r << 4) + hex_val(c[i]);
}
return r;
}
static int
read_dec_esc(Eo_Lexer *ls)
{
int c[3];
int i, r = 0;
for (i = 0; i < 3 && isdigit(ls->current); ++i)
{
c[i] = ls->current;
r = r * 10 + (c[i] - '0');
next_char(ls);
}
if (r > UCHAR_MAX)
esc_error(ls, c, i, "decimal escape too large");
return r;
}
static void
read_escape(Eo_Lexer *ls)
{
switch (ls->current)
{
case 'a': eina_strbuf_append_char(ls->buff, '\a'); next_char(ls); break;
case 'b': eina_strbuf_append_char(ls->buff, '\b'); next_char(ls); break;
case 'f': eina_strbuf_append_char(ls->buff, '\f'); next_char(ls); break;
case 'n': eina_strbuf_append_char(ls->buff, '\n'); next_char(ls); break;
case 'r': eina_strbuf_append_char(ls->buff, '\r'); next_char(ls); break;
case 't': eina_strbuf_append_char(ls->buff, '\t'); next_char(ls); break;
case 'v': eina_strbuf_append_char(ls->buff, '\v'); next_char(ls); break;
case 'x':
eina_strbuf_append_char(ls->buff, read_hex_esc(ls));
next_char(ls);
break;
case '\n': case '\r':
next_line(ls);
eina_strbuf_append_char(ls->buff, '\n');
break;
case '\\': case '"': case '\'':
eina_strbuf_append_char(ls->buff, ls->current);
break;
case '\0':
break;
default:
if (!isdigit(ls->current))
esc_error(ls, &ls->current, 1, "invalid escape sequence");
eina_strbuf_append_char(ls->buff, read_dec_esc(ls));
break;
}
}
static void
read_string(Eo_Lexer *ls, Eo_Token *tok)
{
eina_strbuf_reset(ls->buff);
eina_strbuf_append_char(ls->buff, '"');
next_char(ls);
while (ls->current != '"') switch (ls->current)
{
case '\0':
eo_lexer_lex_error(ls, "unfinished string", -1);
break;
case '\n': case '\r':
eo_lexer_lex_error(ls, "unfinished string", TOK_STRING);
break;
case '\\':
{
next_char(ls);
read_escape(ls);
break;
}
default:
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
}
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
tok->value.s = eina_stringshare_add_length(eina_strbuf_string_get(ls->buff) + 1,
(unsigned int)eina_strbuf_length_get(ls->buff) - 2);
}
static int
get_type(Eo_Lexer *ls, Eina_Bool is_float)
{
if (is_float)
{
if (ls->current == 'f' || ls->current == 'F')
{
next_char(ls);
return NUM_FLOAT;
}
return NUM_DOUBLE;
}
if (ls->current == 'u' || ls->current == 'U')
{
next_char(ls);
if (ls->current == 'l' || ls->current == 'L')
{
next_char(ls);
if (ls->current == 'l' || ls->current == 'L')
{
next_char(ls);
return NUM_ULLONG;
}
return NUM_ULONG;
}
return NUM_UINT;
}
if (ls->current == 'l' || ls->current == 'L')
{
next_char(ls);
if (ls->current == 'l' || ls->current == 'L')
{
next_char(ls);
return NUM_LLONG;
}
return NUM_LONG;
}
return NUM_INT;
}
static void
replace_decpoint(Eo_Lexer *ls, char prevdecp)
{
if (ls->decpoint == prevdecp) return;
char *bufs = eina_strbuf_string_steal(ls->buff);
char *p = bufs;
while ((p = strchr(p, prevdecp))) *p = ls->decpoint;
eina_strbuf_append(ls->buff, bufs);
free(bufs);
}
static void
write_val_with_decpoint(Eo_Lexer *ls, Eo_Token *tok, int type)
{
struct lconv *lc = localeconv();
char prev = ls->decpoint;
ls->decpoint = lc ? lc->decimal_point[0] : '.';
if (ls->decpoint == prev)
{
eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
return;
}
replace_decpoint(ls, prev);
char *end = NULL;
if (type == NUM_FLOAT)
tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
else if (type == NUM_DOUBLE)
tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
if (end && end[0])
eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
}
static void
write_val(Eo_Lexer *ls, Eo_Token *tok, Eina_Bool is_float)
{
int type = get_type(ls, is_float);
char *end = NULL;
if (is_float)
{
replace_decpoint(ls, '.');
if (type == NUM_FLOAT)
tok->value.f = strtof(eina_strbuf_string_get(ls->buff), &end);
else if (type == NUM_DOUBLE)
tok->value.d = strtod(eina_strbuf_string_get(ls->buff), &end);
}
else
{
const char *str = eina_strbuf_string_get(ls->buff);
/* signed is always in the same memory location */
if (type == NUM_INT || type == NUM_UINT)
tok->value.u = strtoul(str, &end, 0);
else if (type == NUM_LONG || type == NUM_ULONG)
tok->value.ul = strtoul(str, &end, 0);
else if (type == NUM_LLONG || type == NUM_ULLONG)
tok->value.ull = strtoull(str, &end, 0);
}
if (end && end[0])
{
if (is_float)
{
write_val_with_decpoint(ls, tok, type);
return;
}
eo_lexer_lex_error(ls, "malformed number", TOK_NUMBER);
}
tok->kw = type;
}
static void
write_exp(Eo_Lexer *ls)
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
if (ls->current == '+' || ls->current == '-')
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
while (isdigit(ls->current))
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
}
}
}
static void
read_hex_number(Eo_Lexer *ls, Eo_Token *tok)
{
Eina_Bool is_float = EINA_FALSE;
while (isxdigit(ls->current) || ls->current == '.')
{
eina_strbuf_append_char(ls->buff, ls->current);
if (ls->current == '.') is_float = EINA_TRUE;
next_char(ls);
}
if (is_float && (ls->current != 'p' && ls->current != 'P'))
{
eo_lexer_lex_error(ls, "hex float literals require an exponent",
TOK_NUMBER);
}
if (ls->current == 'p' || ls->current == 'P')
{
is_float = EINA_TRUE;
write_exp(ls);
}
write_val(ls, tok, is_float);
}
static void
read_number(Eo_Lexer *ls, Eo_Token *tok)
{
Eina_Bool is_float = eina_strbuf_string_get(ls->buff)[0] == '.';
if (ls->current == '0' && !is_float)
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
if (ls->current == 'x' || ls->current == 'X')
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
read_hex_number(ls, tok);
return;
}
}
while (isdigit(ls->current) || ls->current == '.')
{
eina_strbuf_append_char(ls->buff, ls->current);
if (ls->current == '.') is_float = EINA_TRUE;
next_char(ls);
}
if (ls->current == 'e' || ls->current == 'E')
{
is_float = EINA_TRUE;
write_exp(ls);
}
write_val(ls, tok, is_float);
}
static int
lex(Eo_Lexer *ls, Eo_Token *tok)
{
eina_strbuf_reset(ls->buff);
tok->value.s = NULL;
for (;;) switch (ls->current)
{
case '\n':
case '\r':
next_line(ls);
continue;
case '/':
{
next_char(ls);
if (ls->current == '*')
{
int ccol = ls->column;
next_char(ls);
if (ls->current == '@')
{
eo_lexer_lex_error(ls, "old style documentation comment", -1);
return -1; /* unreachable */
}
read_long_comment(ls, ccol);
continue;
}
else if (ls->current != '/') return '/';
next_char(ls);
while (ls->current && !is_newline(ls->current))
next_char(ls);
continue;
}
case '[':
{
int dline = ls->line_number, dcol = ls->column;
const char *sline = ls->stream_line;
next_char(ls);
if (ls->current != '[') return '[';
next_char(ls);
read_doc(ls, tok, dline, dcol);
ls->column = dcol + 1;
/* doc is the only potentially multiline token */
ls->line_number = dline;
ls->stream_line = sline;
return TOK_DOC;
}
case '\0':
return -1;
case '=':
next_char(ls);
if (!ls->expr_mode || (ls->current != '=')) return '=';
next_char(ls);
--ls->column;
return TOK_EQ;
case '!':
next_char(ls);
if (!ls->expr_mode || (ls->current != '=')) return '!';
next_char(ls);
--ls->column;
return TOK_NQ;
case '>':
next_char(ls);
if (!ls->expr_mode) return '>';
if (ls->current == '=')
{
next_char(ls);
--ls->column;
return TOK_GE;
}
else if (ls->current == '>')
{
next_char(ls);
--ls->column;
return TOK_RSH;
}
return '>';
case '<':
next_char(ls);
if (!ls->expr_mode) return '<';
if (ls->current == '=')
{
next_char(ls);
--ls->column;
return TOK_LE;
}
else if (ls->current == '<')
{
next_char(ls);
--ls->column;
return TOK_LSH;
}
return '<';
case '&':
next_char(ls);
if (!ls->expr_mode || (ls->current != '&')) return '&';
next_char(ls);
--ls->column;
return TOK_AND;
case '|':
next_char(ls);
if (!ls->expr_mode || (ls->current != '|')) return '|';
next_char(ls);
--ls->column;
return TOK_OR;
case '"':
{
int dcol = ls->column;
if (!ls->expr_mode)
{
next_char(ls);
return '"';
}
/* strings are not multiline for now at least */
read_string(ls, tok);
ls->column = dcol + 1;
return TOK_STRING;
}
case '\'':
{
int dcol = ls->column;
next_char(ls);
if (!ls->expr_mode) return '\'';
if (ls->current == '\\')
{
next_char(ls);
eina_strbuf_reset(ls->buff);
read_escape(ls);
tok->value.c = (char)*eina_strbuf_string_get(ls->buff);
}
else
{
tok->value.c = ls->current;
next_char(ls);
}
if (ls->current != '\'')
eo_lexer_lex_error(ls, "unfinished character", TOK_CHAR);
next_char(ls);
ls->column = dcol + 1;
return TOK_CHAR;
}
case '.':
{
int dcol = ls->column;
next_char(ls);
if (!isdigit(ls->current)) return '.';
eina_strbuf_reset(ls->buff);
eina_strbuf_append_char(ls->buff, '.');
read_number(ls, tok);
ls->column = dcol + 1;
return TOK_NUMBER;
}
default:
{
if (isspace(ls->current))
{
assert(!is_newline(ls->current));
next_char(ls);
continue;
}
else if (isdigit(ls->current))
{
int col = ls->column;
eina_strbuf_reset(ls->buff);
read_number(ls, tok);
ls->column = col + 1;
return TOK_NUMBER;
}
if (ls->current && (isalnum(ls->current)
|| ls->current == '@' || ls->current == '_'))
{
int col = ls->column;
Eina_Bool at_kw = (ls->current == '@');
const char *str;
eina_strbuf_reset(ls->buff);
do
{
eina_strbuf_append_char(ls->buff, ls->current);
next_char(ls);
}
while (ls->current && (isalnum(ls->current)
|| ls->current == '_'));
str = eina_strbuf_string_get(ls->buff);
tok->kw = (int)(uintptr_t)eina_hash_find(keyword_map,
str);
ls->column = col + 1;
tok->value.s = eina_stringshare_add(str);
if (at_kw && tok->kw == 0)
eo_lexer_syntax_error(ls, "invalid keyword");
return TOK_VALUE;
}
else
{
int c = ls->current;
next_char(ls);
return c;
}
}
}
}
static const char *
get_filename(Eo_Lexer *ls)
{
char *dup = strdup(ls->source);
char *s = basename(dup);
const char *file = eina_stringshare_add(s);
free(dup);
return file;
}
static void
eo_lexer_set_input(Eo_Lexer *ls, const char *source)
{
Eina_File *f = eina_file_open(source, EINA_FALSE);
if (!f)
{
fprintf(stderr, "eolian: %s\n", strerror(errno));
longjmp(ls->err_jmp, EINA_TRUE);
}
ls->lookahead.token = -1;
ls->buff = eina_strbuf_new();
ls->handle = f;
ls->stream = eina_file_map_all(f, EINA_FILE_RANDOM);
ls->stream_end = ls->stream + eina_file_size_get(f);
ls->stream_line = ls->stream;
ls->source = eina_stringshare_add(source);
ls->filename = get_filename(ls);
ls->iline_number = ls->line_number = 1;
ls->icolumn = ls->column = -1;
ls->decpoint = '.';
next_char(ls);
if (ls->current != 0xEF)
return;
next_char(ls);
if (ls->current != 0xBB)
return;
next_char(ls);
if (ls->current != 0xBF)
return;
next_char(ls);
}
static void
_temps_free(Eo_Lexer_Temps *tmp)
{
Eina_Strbuf *buf;
Eolian_Type *tp;
Eolian_Typedecl *tpd;
const char *s;
if (tmp->kls)
database_class_del(tmp->kls);
if (tmp->var)
database_var_del(tmp->var);
EINA_LIST_FREE(tmp->str_bufs, buf)
eina_strbuf_free(buf);
EINA_LIST_FREE(tmp->type_defs, tp)
database_type_del(tp);
EINA_LIST_FREE(tmp->type_decls, tpd)
database_typedecl_del(tpd);
EINA_LIST_FREE(tmp->strs, s)
if (s) eina_stringshare_del(s);
}
static void
_free_tok(Eo_Token *tok)
{
if (tok->token < START_CUSTOM || tok->token == TOK_NUMBER ||
tok->token == TOK_CHAR)
return;
if (tok->token == TOK_DOC)
{
/* free doc */
if (!tok->value.doc) return;
eina_stringshare_del(tok->value.doc->summary);
eina_stringshare_del(tok->value.doc->description);
free(tok->value.doc);
tok->value.doc = NULL;
return;
}
eina_stringshare_del(tok->value.s);
tok->value.s = NULL;
}
void
eo_lexer_free(Eo_Lexer *ls)
{
if (!ls) return;
if (ls->source ) eina_stringshare_del(ls->source);
if (ls->filename) eina_stringshare_del(ls->filename);
if (ls->buff ) eina_strbuf_free (ls->buff);
if (ls->handle ) eina_file_close (ls->handle);
_free_tok(&ls->t);
eo_lexer_context_clear(ls);
_temps_free(&ls->tmp);
free(ls);
}
Eo_Lexer *
eo_lexer_new(const char *source)
{
Eo_Lexer *ls = calloc(1, sizeof(Eo_Lexer));
if (!setjmp(ls->err_jmp))
{
eo_lexer_set_input(ls, source);
return ls;
}
eo_lexer_free(ls);
return NULL;
}
int
eo_lexer_get(Eo_Lexer *ls)
{
_free_tok(&ls->t);
if (ls->lookahead.token >= 0)
{
ls->t = ls->lookahead;
ls->lookahead.token = -1;
return ls->t.token;
}
ls->t.kw = 0;
return (ls->t.token = lex(ls, &ls->t));
}
int
eo_lexer_lookahead(Eo_Lexer *ls)
{
assert (ls->lookahead.token < 0);
ls->lookahead.kw = 0;
eo_lexer_context_push(ls);
ls->lookahead.token = lex(ls, &ls->lookahead);
eo_lexer_context_restore(ls);
eo_lexer_context_pop(ls);
return ls->lookahead.token;
}
void
eo_lexer_lex_error(Eo_Lexer *ls, const char *msg, int token)
{
if (token)
{
char buf[256];
txt_token(ls, token, buf);
throw(ls, "%s near '%s'", msg, buf);
}
else
throw(ls, "%s", msg);
}
void
eo_lexer_syntax_error(Eo_Lexer *ls, const char *msg)
{
eo_lexer_lex_error(ls, msg, ls->t.token);
}
void
eo_lexer_token_to_str(int token, char *buf)
{
if (token < 0)
{
memcpy(buf, "<eof>", 6);
}
else if (token < START_CUSTOM)
{
assert((unsigned char)token == token);
if (iscntrl(token))
sprintf(buf, "char(%d)", token);
else
sprintf(buf, "%c", token);
}
else
{
const char *v;
size_t idx = token - START_CUSTOM;
size_t tsz = sizeof(tokens) / sizeof(tokens[0]);
if (idx >= tsz)
v = keywords[idx - tsz];
else
v = tokens[idx];
memcpy(buf, v, strlen(v) + 1);
}
}
const char *
eo_lexer_keyword_str_get(int kw)
{
return keywords[kw - 1];
}
Eina_Bool
eo_lexer_is_type_keyword(int kw)
{
return (kw >= KW_byte && kw < KW_true);
}
int
eo_lexer_keyword_str_to_id(const char *kw)
{
return (int)(uintptr_t)eina_hash_find(keyword_map, kw);
}
const char *
eo_lexer_get_c_type(int kw)
{
if (!eo_lexer_is_type_keyword(kw)) return NULL;
return ctypes[kw - KW_byte];
}
static int _init_counter = 0;
int
eo_lexer_init()
{
if (!_init_counter)
{
eina_init();
init_hash();
}
return _init_counter++;
}
int
eo_lexer_shutdown()
{
if (_init_counter <= 0) return 0;
_init_counter--;
if (!_init_counter)
{
destroy_hash();
eina_shutdown();
}
return _init_counter;
}
static Eina_Bool
_eo_is_tokstr(int t) {
return (t == TOK_STRING) || (t == TOK_VALUE);
}
void
eo_lexer_context_push(Eo_Lexer *ls)
{
Lexer_Ctx *ctx = malloc(sizeof(Lexer_Ctx));
ctx->line = ls->line_number;
ctx->column = ls->column;
ctx->linestr = ls->stream_line;
ctx->token = ls->t;
if (_eo_is_tokstr(ctx->token.token))
eina_stringshare_ref(ctx->token.value.s);
ls->saved_ctxs = eina_list_prepend(ls->saved_ctxs, ctx);
}
void
eo_lexer_context_pop(Eo_Lexer *ls)
{
Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
if (_eo_is_tokstr(ctx->token.token))
eina_stringshare_del(ctx->token.value.s);
free(ctx);
ls->saved_ctxs = eina_list_remove_list(ls->saved_ctxs, ls->saved_ctxs);
}
void
eo_lexer_context_restore(Eo_Lexer *ls)
{
if (!eina_list_count(ls->saved_ctxs)) return;
Lexer_Ctx *ctx = (Lexer_Ctx*)eina_list_data_get(ls->saved_ctxs);
ls->line_number = ctx->line;
ls->column = ctx->column;
ls->stream_line = ctx->linestr;
if (_eo_is_tokstr(ls->t.token))
eina_stringshare_del(ls->t.value.s);
ls->t = ctx->token;
if (_eo_is_tokstr(ls->t.token))
eina_stringshare_ref(ls->t.value.s);
}
void
eo_lexer_context_clear(Eo_Lexer *ls)
{
Lexer_Ctx *ctx;
EINA_LIST_FREE(ls->saved_ctxs, ctx) free(ctx);
}