2014-06-18 03:25:07 -07:00
|
|
|
#ifndef __EO_LEXER_H__
|
|
|
|
#define __EO_LEXER_H__
|
|
|
|
|
|
|
|
#include <setjmp.h>
|
2014-02-11 05:42:59 -08:00
|
|
|
|
|
|
|
#include <Eina.h>
|
2014-02-17 04:05:52 -08:00
|
|
|
#include <Eolian.h>
|
2014-09-12 05:40:46 -07:00
|
|
|
|
|
|
|
#include "eolian_database.h"
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2014-07-15 02:09:50 -07:00
|
|
|
/* a token is an int, custom tokens start at this - single-char tokens are
|
|
|
|
* simply represented by their ascii */
|
2014-06-18 03:25:07 -07:00
|
|
|
#define START_CUSTOM 257
|
|
|
|
|
|
|
|
enum Tokens
|
|
|
|
{
|
2014-07-18 03:48:40 -07:00
|
|
|
TOK_EQ = START_CUSTOM, TOK_NQ, TOK_GE, TOK_LE,
|
2014-07-17 09:16:31 -07:00
|
|
|
TOK_AND, TOK_OR, TOK_LSH, TOK_RSH,
|
|
|
|
|
2015-09-03 07:08:08 -07:00
|
|
|
TOK_DOC, TOK_STRING, TOK_CHAR, TOK_NUMBER, TOK_VALUE
|
2014-06-18 03:25:07 -07:00
|
|
|
};
|
|
|
|
|
2014-07-15 02:09:50 -07:00
|
|
|
/* all keywords in eolian, they can still be used as names (they're TOK_VALUE)
|
2019-05-21 07:01:07 -07:00
|
|
|
* they just fill in the "kw" field of the token
|
|
|
|
*
|
|
|
|
* reserved for the future: @nullable
|
|
|
|
*/
|
2014-08-11 06:56:50 -07:00
|
|
|
#define KEYWORDS KW(class), KW(const), KW(enum), KW(return), KW(struct), \
|
2014-06-26 04:02:19 -07:00
|
|
|
\
|
2019-09-19 09:17:37 -07:00
|
|
|
KW(abstract), KW(c_prefix), KW(composites), KW(constructor), KW(constructors), \
|
2019-09-26 07:39:56 -07:00
|
|
|
KW(data), KW(destructor), KW(error), KW(event_c_prefix), KW(events), KW(extends), \
|
2019-05-09 07:16:42 -07:00
|
|
|
KW(free), KW(get), KW(implements), KW(import), KW(interface), \
|
2019-03-08 06:58:52 -08:00
|
|
|
KW(keys), KW(legacy), KW(methods), KW(mixin), KW(params), \
|
2019-09-24 08:06:24 -07:00
|
|
|
KW(parse), KW(parts), KW(ptr), KW(set), KW(type), KW(values), KW(requires), \
|
2018-11-22 08:17:29 -08:00
|
|
|
\
|
2019-08-29 05:43:41 -07:00
|
|
|
KWAT(auto), KWAT(beta), KWAT(by_ref), KWAT(c_name), KWAT(const), \
|
2019-05-26 08:25:15 -07:00
|
|
|
KWAT(empty), KWAT(extern), KWAT(free), KWAT(hot), KWAT(in), KWAT(inout), \
|
2019-08-30 07:08:31 -07:00
|
|
|
KWAT(move), KWAT(no_unused), KWAT(nullable), KWAT(optional), KWAT(out), \
|
2019-09-06 08:07:01 -07:00
|
|
|
KWAT(private), KWAT(property), KWAT(protected), KWAT(restart), \
|
2019-08-30 07:08:31 -07:00
|
|
|
KWAT(pure_virtual), KWAT(static), \
|
2014-06-26 04:02:19 -07:00
|
|
|
\
|
2019-05-26 09:09:34 -07:00
|
|
|
KWH(version), \
|
|
|
|
\
|
2014-07-14 08:25:26 -07:00
|
|
|
KW(byte), KW(ubyte), KW(char), KW(short), KW(ushort), KW(int), KW(uint), \
|
2014-06-26 06:11:52 -07:00
|
|
|
KW(long), KW(ulong), KW(llong), KW(ullong), \
|
|
|
|
\
|
|
|
|
KW(int8), KW(uint8), KW(int16), KW(uint16), KW(int32), KW(uint32), \
|
|
|
|
KW(int64), KW(uint64), KW(int128), KW(uint128), \
|
|
|
|
\
|
2014-07-14 08:51:38 -07:00
|
|
|
KW(size), KW(ssize), KW(intptr), KW(uintptr), KW(ptrdiff), \
|
|
|
|
\
|
2014-07-14 09:02:58 -07:00
|
|
|
KW(time), \
|
|
|
|
\
|
2014-08-18 05:12:08 -07:00
|
|
|
KW(float), KW(double), \
|
2014-06-26 06:11:52 -07:00
|
|
|
\
|
2014-07-11 06:40:14 -07:00
|
|
|
KW(bool), \
|
|
|
|
\
|
2019-02-21 06:24:35 -08:00
|
|
|
KW(slice), KW(rw_slice), \
|
|
|
|
\
|
2014-07-11 06:40:14 -07:00
|
|
|
KW(void), \
|
|
|
|
\
|
2019-09-26 07:26:10 -07:00
|
|
|
KW(accessor), KW(array), KW(future), KW(iterator), KW(list), \
|
2019-09-20 09:02:07 -07:00
|
|
|
KW(any_value), KW(any_value_ref), KW(binbuf), KW(event), \
|
2017-11-21 22:44:06 -08:00
|
|
|
KW(mstring), KW(string), KW(stringshare), KW(strbuf), \
|
2014-09-08 06:52:49 -07:00
|
|
|
\
|
2019-09-26 07:26:10 -07:00
|
|
|
KW(hash), \
|
2016-05-26 06:36:41 -07:00
|
|
|
KW(void_ptr), \
|
2017-04-07 09:54:55 -07:00
|
|
|
KW(function), \
|
2016-04-01 18:50:28 -07:00
|
|
|
KW(__undefined_type), \
|
2015-05-29 03:19:27 -07:00
|
|
|
\
|
2019-09-16 06:27:04 -07:00
|
|
|
KW(true), KW(false), KW(null)
|
2014-06-18 03:25:07 -07:00
|
|
|
|
2014-07-15 02:09:50 -07:00
|
|
|
/* "regular" keyword and @ prefixed keyword */
|
2014-06-18 03:25:07 -07:00
|
|
|
#define KW(x) KW_##x
|
|
|
|
#define KWAT(x) KW_at_##x
|
2019-05-26 09:09:34 -07:00
|
|
|
#define KWH(x) KW_hash_##x
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2014-06-18 03:25:07 -07:00
|
|
|
enum Keywords
|
|
|
|
{
|
|
|
|
KW_UNKNOWN = 0,
|
2014-06-26 06:27:45 -07:00
|
|
|
KEYWORDS
|
2014-06-18 03:25:07 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
#undef KW
|
|
|
|
#undef KWAT
|
2019-05-26 09:09:34 -07:00
|
|
|
#undef KWH
|
2014-06-18 03:25:07 -07:00
|
|
|
|
2014-07-16 08:18:13 -07:00
|
|
|
enum Numbers
|
|
|
|
{
|
|
|
|
NUM_INT,
|
|
|
|
NUM_UINT,
|
|
|
|
NUM_LONG,
|
|
|
|
NUM_ULONG,
|
|
|
|
NUM_LLONG,
|
|
|
|
NUM_ULLONG,
|
|
|
|
NUM_FLOAT,
|
2014-08-18 05:12:08 -07:00
|
|
|
NUM_DOUBLE
|
2014-07-16 08:18:13 -07:00
|
|
|
};
|
|
|
|
|
2015-06-02 10:12:09 -07:00
|
|
|
typedef union
|
|
|
|
{
|
|
|
|
char c;
|
|
|
|
const char *s;
|
|
|
|
signed int i;
|
|
|
|
unsigned int u;
|
|
|
|
signed long l;
|
|
|
|
unsigned long ul;
|
|
|
|
signed long long ll;
|
|
|
|
unsigned long long ull;
|
|
|
|
float f;
|
|
|
|
double d;
|
2015-06-02 10:27:46 -07:00
|
|
|
Eolian_Documentation *doc;
|
2015-06-02 10:12:09 -07:00
|
|
|
} Eo_Token_Union;
|
|
|
|
|
2014-07-15 02:09:50 -07:00
|
|
|
/* a token - "token" is the actual token id, "value" is the value of a token
|
|
|
|
* if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw"
|
|
|
|
* is the keyword id if this is a keyword, it's 0 when not a keyword */
|
2014-06-18 03:25:07 -07:00
|
|
|
typedef struct _Eo_Token
|
|
|
|
{
|
2014-07-16 08:18:13 -07:00
|
|
|
int token, kw;
|
2015-06-02 10:12:09 -07:00
|
|
|
Eo_Token_Union value;
|
2014-06-18 03:25:07 -07:00
|
|
|
} Eo_Token;
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2014-07-22 07:27:11 -07:00
|
|
|
typedef struct _Lexer_Ctx
|
|
|
|
{
|
|
|
|
int line, column;
|
|
|
|
const char *linestr;
|
2015-05-27 03:25:04 -07:00
|
|
|
Eo_Token token;
|
2014-07-22 07:27:11 -07:00
|
|
|
} Lexer_Ctx;
|
|
|
|
|
2018-03-15 06:12:18 -07:00
|
|
|
typedef struct _Eo_Lexer_Dtor
|
|
|
|
{
|
|
|
|
Eina_Free_Cb free_cb;
|
|
|
|
void *data;
|
|
|
|
} Eo_Lexer_Dtor;
|
|
|
|
|
2014-07-15 02:09:50 -07:00
|
|
|
/* keeps all lexer state */
|
2014-06-18 03:25:07 -07:00
|
|
|
typedef struct _Eo_Lexer
|
2014-02-11 05:42:59 -08:00
|
|
|
{
|
2014-07-15 02:09:50 -07:00
|
|
|
/* current character being tested */
|
2014-06-18 03:25:07 -07:00
|
|
|
int current;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* column is token aware column number, for example when lexing a keyword
|
|
|
|
* it points to the beginning of it after the lexing is done, icolumn is
|
|
|
|
* token unaware, always pointing to current column */
|
2014-07-02 16:14:28 -07:00
|
|
|
int column, icolumn;
|
2016-06-13 06:53:35 -07:00
|
|
|
/* the current line number, token aware and unaware */
|
|
|
|
int line_number, iline_number;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* t: "normal" - token to lex into, "lookahead" - a lookahead token, used
|
|
|
|
* to look one token past "t", when we need to check for a token after the
|
|
|
|
* current one and use it in a conditional without consuming the current
|
|
|
|
* token - used in pretty few cases - because we have one extra lookahead
|
|
|
|
* token, that makes our grammar LL(2) - two tokens in total */
|
2014-06-18 03:25:07 -07:00
|
|
|
Eo_Token t, lookahead;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* a string buffer used to keep contents of token currently being read,
|
|
|
|
* if needed at all */
|
2014-06-18 03:25:07 -07:00
|
|
|
Eina_Strbuf *buff;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* a handle pointing to a memory mapped file representing the file we're
|
|
|
|
* currently lexing */
|
2014-06-18 03:25:07 -07:00
|
|
|
Eina_File *handle;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* the source file name */
|
2014-06-18 03:25:07 -07:00
|
|
|
const char *source;
|
2014-08-08 07:19:52 -07:00
|
|
|
/* only basename */
|
|
|
|
const char *filename;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* points to the current character in our mmapped file being lexed, just
|
|
|
|
* incremented until the end */
|
2014-06-18 03:25:07 -07:00
|
|
|
const char *stream;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* end pointer - required to check if we've reached past the file, as
|
|
|
|
* mmapped data will give us no EOF */
|
2014-06-20 02:45:55 -07:00
|
|
|
const char *stream_end;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* points to the current line being lexed, used by error messages to
|
|
|
|
* display the current line with a caret at the respective column */
|
2014-07-02 16:30:39 -07:00
|
|
|
const char *stream_line;
|
2017-12-06 06:06:54 -08:00
|
|
|
/* a pointer to the state this lexer belongs to */
|
2018-02-27 04:00:36 -08:00
|
|
|
Eolian_State *state;
|
2017-12-14 07:38:35 -08:00
|
|
|
/* the unit being filled during current parsing */
|
|
|
|
Eolian_Unit *unit;
|
2014-07-15 02:09:50 -07:00
|
|
|
/* this is jumped to when an error happens */
|
2014-06-18 03:25:07 -07:00
|
|
|
jmp_buf err_jmp;
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2014-07-22 07:27:11 -07:00
|
|
|
/* saved context info */
|
|
|
|
Eina_List *saved_ctxs;
|
|
|
|
|
2018-03-15 04:30:06 -07:00
|
|
|
Eolian_Class *klass;
|
2018-03-15 08:14:02 -07:00
|
|
|
/* a dtor list; dtors can be pushed and popped during
|
|
|
|
* parser execution to simulate scoped resource management
|
|
|
|
*
|
|
|
|
* unpopped dtors (e.g. on error) are run when the state is freed
|
|
|
|
*/
|
2018-03-15 06:12:18 -07:00
|
|
|
Eina_List *dtors;
|
2018-03-15 08:14:02 -07:00
|
|
|
/* a node hash; eolian objects can be allocated through this and
|
|
|
|
* they are stored here (with 1 reference) until they're released
|
|
|
|
* into the environment (they also get deref'd)
|
|
|
|
*
|
|
|
|
* if the release never happens, everything is just freed when the state is
|
|
|
|
*/
|
2018-03-15 07:31:08 -07:00
|
|
|
Eina_Hash *nodes;
|
2018-03-15 04:30:06 -07:00
|
|
|
|
2015-06-25 02:43:54 -07:00
|
|
|
/* whether we allow lexing expression related tokens */
|
|
|
|
Eina_Bool expr_mode;
|
|
|
|
|
|
|
|
/* decimal point, by default '.' */
|
|
|
|
char decpoint;
|
2014-06-18 02:08:43 -07:00
|
|
|
} Eo_Lexer;
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2018-03-20 09:34:38 -07:00
|
|
|
typedef enum _Eo_Lexer_Error
|
|
|
|
{
|
|
|
|
EO_LEXER_ERROR_UNKNOWN = 0,
|
|
|
|
EO_LEXER_ERROR_NORMAL,
|
|
|
|
EO_LEXER_ERROR_OOM
|
|
|
|
} Eo_Lexer_Error;
|
|
|
|
|
2018-03-16 06:49:35 -07:00
|
|
|
void eo_lexer_init (void);
|
|
|
|
void eo_lexer_shutdown (void);
|
2018-02-27 04:00:36 -08:00
|
|
|
Eo_Lexer *eo_lexer_new (Eolian_State *state, const char *source);
|
2014-06-20 07:31:45 -07:00
|
|
|
void eo_lexer_free (Eo_Lexer *ls);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* gets a regular token, singlechar or one of TOK_something */
|
2014-06-20 07:31:45 -07:00
|
|
|
int eo_lexer_get (Eo_Lexer *ls);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* lookahead token - see Eo_Lexer */
|
2014-06-20 07:31:45 -07:00
|
|
|
int eo_lexer_lookahead (Eo_Lexer *ls);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* "throws" an error, with a custom message and custom token */
|
2014-06-20 07:31:45 -07:00
|
|
|
void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token);
|
2019-04-02 04:28:48 -07:00
|
|
|
/* like above, but uses the lexstate->t.token, a.k.a. current token */
|
2014-06-20 07:31:45 -07:00
|
|
|
void eo_lexer_syntax_error (Eo_Lexer *ls, const char *msg);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* turns the token into a string, writes into the given buffer */
|
2014-06-20 07:31:45 -07:00
|
|
|
void eo_lexer_token_to_str (int token, char *buf);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* returns the string representation of a keyword */
|
2014-06-20 07:31:45 -07:00
|
|
|
const char *eo_lexer_keyword_str_get(int kw);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* checks if the given keyword is a builtin type */
|
2014-06-26 04:02:19 -07:00
|
|
|
Eina_Bool eo_lexer_is_type_keyword(int kw);
|
2014-08-07 07:15:07 -07:00
|
|
|
/* gets a keyword id from the keyword string */
|
|
|
|
int eo_lexer_keyword_str_to_id(const char *kw);
|
2014-07-15 02:09:50 -07:00
|
|
|
/* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */
|
2014-06-26 04:02:19 -07:00
|
|
|
const char *eo_lexer_get_c_type (int kw);
|
2014-07-22 07:27:11 -07:00
|
|
|
/* save, restore and clear context (line, column, line string) */
|
|
|
|
void eo_lexer_context_push (Eo_Lexer *ls);
|
|
|
|
void eo_lexer_context_pop (Eo_Lexer *ls);
|
|
|
|
void eo_lexer_context_restore(Eo_Lexer *ls);
|
|
|
|
void eo_lexer_context_clear (Eo_Lexer *ls);
|
2014-02-11 05:42:59 -08:00
|
|
|
|
2018-03-15 06:12:18 -07:00
|
|
|
/* node ("heap") management */
|
2018-03-15 05:53:50 -07:00
|
|
|
Eolian_Object *eo_lexer_node_new(Eo_Lexer *ls, size_t objsize);
|
2018-03-15 07:31:08 -07:00
|
|
|
Eolian_Object *eo_lexer_node_release(Eo_Lexer *ls, Eolian_Object *obj);
|
|
|
|
|
|
|
|
static inline Eolian_Type *
|
|
|
|
eo_lexer_type_new(Eo_Lexer *ls)
|
|
|
|
{
|
|
|
|
return (Eolian_Type *)eo_lexer_node_new(ls, sizeof(Eolian_Type));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Eolian_Type *
|
|
|
|
eo_lexer_type_release(Eo_Lexer *ls, Eolian_Type *tp)
|
|
|
|
{
|
|
|
|
return (Eolian_Type *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
|
|
|
|
}
|
2018-03-15 05:53:50 -07:00
|
|
|
|
2018-03-15 07:42:40 -07:00
|
|
|
static inline Eolian_Typedecl *
|
|
|
|
eo_lexer_typedecl_new(Eo_Lexer *ls)
|
|
|
|
{
|
|
|
|
return (Eolian_Typedecl *)eo_lexer_node_new(ls, sizeof(Eolian_Typedecl));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Eolian_Typedecl *
|
|
|
|
eo_lexer_typedecl_release(Eo_Lexer *ls, Eolian_Typedecl *tp)
|
|
|
|
{
|
|
|
|
return (Eolian_Typedecl *)eo_lexer_node_release(ls, (Eolian_Object *)tp);
|
|
|
|
}
|
|
|
|
|
2019-09-24 08:06:24 -07:00
|
|
|
static inline Eolian_Constant *
|
|
|
|
eo_lexer_constant_new(Eo_Lexer *ls)
|
2018-03-15 07:42:40 -07:00
|
|
|
{
|
2019-09-24 08:06:24 -07:00
|
|
|
return (Eolian_Constant *)eo_lexer_node_new(ls, sizeof(Eolian_Constant));
|
2018-03-15 07:42:40 -07:00
|
|
|
}
|
|
|
|
|
2019-09-24 08:06:24 -07:00
|
|
|
static inline Eolian_Constant *
|
|
|
|
eo_lexer_constant_release(Eo_Lexer *ls, Eolian_Constant *var)
|
2018-03-15 07:42:40 -07:00
|
|
|
{
|
2019-09-24 08:06:24 -07:00
|
|
|
return (Eolian_Constant *)eo_lexer_node_release(ls, (Eolian_Object *)var);
|
2018-03-15 07:42:40 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline Eolian_Expression *
|
|
|
|
eo_lexer_expr_new(Eo_Lexer *ls)
|
|
|
|
{
|
|
|
|
return (Eolian_Expression *)eo_lexer_node_new(ls, sizeof(Eolian_Expression));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Eolian_Expression *
|
|
|
|
eo_lexer_expr_release(Eo_Lexer *ls, Eolian_Expression *expr)
|
|
|
|
{
|
|
|
|
return (Eolian_Expression *)eo_lexer_node_release(ls, (Eolian_Object *)expr);
|
|
|
|
}
|
|
|
|
|
2018-03-15 08:08:59 -07:00
|
|
|
static inline Eolian_Expression *
|
|
|
|
eo_lexer_expr_release_ref(Eo_Lexer *ls, Eolian_Expression *expr)
|
|
|
|
{
|
|
|
|
eolian_object_ref(&expr->base);
|
|
|
|
return eo_lexer_expr_release(ls, expr);
|
|
|
|
}
|
|
|
|
|
2019-06-21 06:05:50 -07:00
|
|
|
static inline Eolian_Error *
|
|
|
|
eo_lexer_error_new(Eo_Lexer *ls)
|
|
|
|
{
|
|
|
|
return (Eolian_Error *)eo_lexer_node_new(ls, sizeof(Eolian_Error));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline Eolian_Error *
|
|
|
|
eo_lexer_error_release(Eo_Lexer *ls, Eolian_Error *err)
|
|
|
|
{
|
|
|
|
return (Eolian_Error *)eo_lexer_node_release(ls, (Eolian_Object *)err);
|
|
|
|
}
|
|
|
|
|
2018-03-15 06:12:18 -07:00
|
|
|
/* "stack" management, only to protect against errors (jumps) in parsing */
|
|
|
|
void eo_lexer_dtor_push(Eo_Lexer *ls, Eina_Free_Cb free_cb, void *data);
|
|
|
|
void eo_lexer_dtor_pop(Eo_Lexer *ls);
|
|
|
|
|
2018-03-15 05:53:50 -07:00
|
|
|
|
2016-04-01 18:50:28 -07:00
|
|
|
#endif /* __EO_LEXER_H__ */
|