#ifndef __EO_LEXER_H__ #define __EO_LEXER_H__ #include #include #include #include "eolian_database.h" /* a token is an int, custom tokens start at this - single-char tokens are * simply represented by their ascii */ #define START_CUSTOM 257 enum Tokens { TOK_EQ = START_CUSTOM, TOK_NQ, TOK_GE, TOK_LE, TOK_AND, TOK_OR, TOK_LSH, TOK_RSH, TOK_DOC, TOK_STRING, TOK_CHAR, TOK_NUMBER, TOK_VALUE }; /* all keywords in eolian, they can still be used as names (they're TOK_VALUE) * they just fill in the "kw" field of the token */ #define KEYWORDS KW(class), KW(const), KW(enum), KW(return), KW(struct), \ \ KW(abstract), KW(constructor), KW(constructors), KW(data), \ KW(destructor), KW(eo), KW(eo_prefix), KW(events), KW(free), \ KW(get), KW(implements), KW(import), KW(interface), KW(keys), KW(legacy), \ KW(legacy_prefix), KW(methods), KW(mixin), KW(own), KW(params), \ KW(set), KW(type), KW(values), KW(var), KWAT(auto), KWAT(beta), \ KWAT(c_only), KWAT(class), KWAT(const), KWAT(empty), KWAT(extern), \ KWAT(free), KWAT(hot), KWAT(restart), KWAT(in), KWAT(inout), KWAT(nonull), KWAT(nullable), \ KWAT(optional), KWAT(out), KWAT(private), KWAT(property), \ KWAT(protected), KWAT(virtual_pure), KWAT(warn_unused), \ \ KW(byte), KW(ubyte), KW(char), KW(short), KW(ushort), KW(int), KW(uint), \ KW(long), KW(ulong), KW(llong), KW(ullong), \ \ KW(int8), KW(uint8), KW(int16), KW(uint16), KW(int32), KW(uint32), \ KW(int64), KW(uint64), KW(int128), KW(uint128), \ \ KW(size), KW(ssize), KW(intptr), KW(uintptr), KW(ptrdiff), \ \ KW(time), \ \ KW(float), KW(double), \ \ KW(bool), \ \ KW(void), \ \ KW(accessor), KW(array), KW(iterator), KW(hash), KW(list), \ KW(promise), \ KW(generic_value), \ \ KW(__builtin_event_cb), \ KW(__undefined_type), \ \ KW(true), KW(false), KW(null) /* "regular" keyword and @ prefixed keyword */ #define KW(x) KW_##x #define KWAT(x) KW_at_##x enum Keywords { KW_UNKNOWN = 0, KEYWORDS }; #undef KW #undef KWAT enum Numbers { NUM_INT, NUM_UINT, NUM_LONG, NUM_ULONG, NUM_LLONG, NUM_ULLONG, NUM_FLOAT, NUM_DOUBLE }; typedef union { char c; const char *s; signed int i; unsigned int u; signed long l; unsigned long ul; signed long long ll; unsigned long long ull; float f; double d; Eolian_Documentation *doc; } Eo_Token_Union; /* a token - "token" is the actual token id, "value" is the value of a token * if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw" * is the keyword id if this is a keyword, it's 0 when not a keyword */ typedef struct _Eo_Token { int token, kw; Eo_Token_Union value; } Eo_Token; typedef struct _Lexer_Ctx { int line, column; const char *linestr; Eo_Token token; } Lexer_Ctx; typedef struct _Eo_Lexer_Temps { Eolian_Class *kls; Eolian_Variable *var; Eina_List *str_bufs; Eina_List *type_defs; Eina_List *type_decls; Eina_List *expr_defs; Eina_List *strs; } Eo_Lexer_Temps; /* keeps all lexer state */ typedef struct _Eo_Lexer { /* current character being tested */ int current; /* column is token aware column number, for example when lexing a keyword * it points to the beginning of it after the lexing is done, icolumn is * token unaware, always pointing to current column */ int column, icolumn; /* the current line number */ int line_number; /* t: "normal" - token to lex into, "lookahead" - a lookahead token, used * to look one token past "t", when we need to check for a token after the * current one and use it in a conditional without consuming the current * token - used in pretty few cases - because we have one extra lookahead * token, that makes our grammar LL(2) - two tokens in total */ Eo_Token t, lookahead; /* a string buffer used to keep contents of token currently being read, * if needed at all */ Eina_Strbuf *buff; /* a handle pointing to a memory mapped file representing the file we're * currently lexing */ Eina_File *handle; /* the source file name */ const char *source; /* only basename */ const char *filename; /* points to the current character in our mmapped file being lexed, just * incremented until the end */ const char *stream; /* end pointer - required to check if we've reached past the file, as * mmapped data will give us no EOF */ const char *stream_end; /* points to the current line being lexed, used by error messages to * display the current line with a caret at the respective column */ const char *stream_line; /* this is jumped to when an error happens */ jmp_buf err_jmp; /* saved context info */ Eina_List *saved_ctxs; /* represents the temporaries, every object that is allocated by the * parser is temporarily put here so the resources can be reclaimed in * case of error - and it's nulled when it's written into a more permanent * position (e.g. as part of another struct, or into nodes */ Eo_Lexer_Temps tmp; /* whether we allow lexing expression related tokens */ Eina_Bool expr_mode; /* decimal point, by default '.' */ char decpoint; } Eo_Lexer; int eo_lexer_init (void); int eo_lexer_shutdown (void); Eo_Lexer *eo_lexer_new (const char *source); void eo_lexer_free (Eo_Lexer *ls); /* gets a regular token, singlechar or one of TOK_something */ int eo_lexer_get (Eo_Lexer *ls); /* lookahead token - see Eo_Lexer */ int eo_lexer_lookahead (Eo_Lexer *ls); /* "throws" an error, with a custom message and custom token */ void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token); /* like above, but uses the lexstate->t.token, aka current token */ void eo_lexer_syntax_error (Eo_Lexer *ls, const char *msg); /* turns the token into a string, writes into the given buffer */ void eo_lexer_token_to_str (int token, char *buf); /* returns the string representation of a keyword */ const char *eo_lexer_keyword_str_get(int kw); /* checks if the given keyword is a builtin type */ Eina_Bool eo_lexer_is_type_keyword(int kw); /* gets a keyword id from the keyword string */ int eo_lexer_keyword_str_to_id(const char *kw); /* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */ const char *eo_lexer_get_c_type (int kw); /* save, restore and clear context (line, column, line string) */ void eo_lexer_context_push (Eo_Lexer *ls); void eo_lexer_context_pop (Eo_Lexer *ls); void eo_lexer_context_restore(Eo_Lexer *ls); void eo_lexer_context_clear (Eo_Lexer *ls); #endif /* __EO_LEXER_H__ */