forked from enlightenment/efl
eolian: comment eo_lexer.h
This commit is contained in:
parent
958e57f8ae
commit
5c3cc82bdd
|
@ -7,6 +7,8 @@
|
|||
#include <Eolian.h>
|
||||
#include "eo_definitions.h"
|
||||
|
||||
/* a token is an int, custom tokens start at this - single-char tokens are
|
||||
* simply represented by their ascii */
|
||||
#define START_CUSTOM 257
|
||||
|
||||
enum Tokens
|
||||
|
@ -14,6 +16,8 @@ enum Tokens
|
|||
TOK_COMMENT = START_CUSTOM, TOK_EOF, TOK_VALUE
|
||||
};
|
||||
|
||||
/* all keywords in eolian, they can still be used as names (they're TOK_VALUE)
|
||||
* they just fill in the "kw" field of the token */
|
||||
#define KEYWORDS KW(class), KW(const), KW(private), KW(protected), \
|
||||
KW(return), KW(signed), KW(struct), KW(unsigned), KW(virtual), \
|
||||
\
|
||||
|
@ -42,6 +46,7 @@ enum Tokens
|
|||
\
|
||||
KW(true), KW(false), KW(null)
|
||||
|
||||
/* "regular" keyword and @ prefixed keyword */
|
||||
#define KW(x) KW_##x
|
||||
#define KWAT(x) KW_at_##x
|
||||
|
||||
|
@ -54,6 +59,9 @@ enum Keywords
|
|||
#undef KW
|
||||
#undef KWAT
|
||||
|
||||
/* a token - "token" is the actual token id, "value" is the value of a token
|
||||
* if needed - NULL otherwise - for example the value of a TOK_VALUE, "kw"
|
||||
* is the keyword id if this is a keyword, it's 0 when not a keyword */
|
||||
typedef struct _Eo_Token
|
||||
{
|
||||
int token;
|
||||
|
@ -68,6 +76,9 @@ enum Nodes
|
|||
NODE_STRUCT
|
||||
};
|
||||
|
||||
/* represents a node, aka a result of parsing - currently class, typedef
|
||||
* or struct, they're all stored in a list in lexer state and their type
|
||||
* is determined by enum Nodes above */
|
||||
typedef struct _Eo_Node
|
||||
{
|
||||
unsigned char type;
|
||||
|
@ -79,21 +90,49 @@ typedef struct _Eo_Node
|
|||
};
|
||||
} Eo_Node;
|
||||
|
||||
/* keeps all lexer state */
|
||||
typedef struct _Eo_Lexer
|
||||
{
|
||||
/* current character being tested */
|
||||
int current;
|
||||
/* column is token aware column number, for example when lexing a keyword
|
||||
* it points to the beginning of it after the lexing is done, icolumn is
|
||||
* token unaware, always pointing to current column */
|
||||
int column, icolumn;
|
||||
/* the current line number */
|
||||
int line_number;
|
||||
/* t: "normal" - token to lex into, "lookahead" - a lookahead token, used
|
||||
* to look one token past "t", when we need to check for a token after the
|
||||
* current one and use it in a conditional without consuming the current
|
||||
* token - used in pretty few cases - because we have one extra lookahead
|
||||
* token, that makes our grammar LL(2) - two tokens in total */
|
||||
Eo_Token t, lookahead;
|
||||
/* a string buffer used to keep contents of token currently being read,
|
||||
* if needed at all */
|
||||
Eina_Strbuf *buff;
|
||||
/* a handle pointing to a memory mapped file representing the file we're
|
||||
* currently lexing */
|
||||
Eina_File *handle;
|
||||
/* the source file name */
|
||||
const char *source;
|
||||
/* points to the current character in our mmapped file being lexed, just
|
||||
* incremented until the end */
|
||||
const char *stream;
|
||||
/* end pointer - required to check if we've reached past the file, as
|
||||
* mmapped data will give us no EOF */
|
||||
const char *stream_end;
|
||||
/* points to the current line being lexed, used by error messages to
|
||||
* display the current line with a caret at the respective column */
|
||||
const char *stream_line;
|
||||
/* this is jumped to when an error happens */
|
||||
jmp_buf err_jmp;
|
||||
|
||||
/* represents the results of parsing */
|
||||
Eina_List *nodes;
|
||||
/* represents the temporaries, every object that is allocated by the
|
||||
* parser is temporarily put here so the resources can be reclaimed in
|
||||
* case of error - and it's nulled when it's written into a more permanent
|
||||
* position (e.g. as part of another struct, or into nodes */
|
||||
Eo_Lexer_Temps tmp;
|
||||
} Eo_Lexer;
|
||||
|
||||
|
@ -101,17 +140,31 @@ int eo_lexer_init (void);
|
|||
int eo_lexer_shutdown (void);
|
||||
Eo_Lexer *eo_lexer_new (const char *source);
|
||||
void eo_lexer_free (Eo_Lexer *ls);
|
||||
/* gets a TOK_VALUE balanced token, aka keeps lexing everything until the
|
||||
* "end" character, but keeps it balanced, so if it hits "beg" during lexing,
|
||||
* it won't end at the next "end", useful for lexing between () or [] */
|
||||
int eo_lexer_get_balanced (Eo_Lexer *ls, char beg, char end);
|
||||
/* gets a TOK_VALUE until the "end" character */
|
||||
int eo_lexer_get_until (Eo_Lexer *ls, char end);
|
||||
/* gets a regular token, singlechar or one of TOK_something */
|
||||
int eo_lexer_get (Eo_Lexer *ls);
|
||||
/* like above, but allows you to specify a list of custom characters that can
|
||||
* be used as part of identifiers */
|
||||
int eo_lexer_get_ident (Eo_Lexer *ls, const char *chars);
|
||||
/* lookahead token - see Eo_Lexer */
|
||||
int eo_lexer_lookahead (Eo_Lexer *ls);
|
||||
int eo_lexer_lookahead_ident(Eo_Lexer *ls, const char *chars);
|
||||
/* "throws" an error, with a custom message and custom token */
|
||||
void eo_lexer_lex_error (Eo_Lexer *ls, const char *msg, int token);
|
||||
/* like above, but uses the lexstate->t.token, aka current token */
|
||||
void eo_lexer_syntax_error (Eo_Lexer *ls, const char *msg);
|
||||
/* turns the token into a string, writes into the given buffer */
|
||||
void eo_lexer_token_to_str (int token, char *buf);
|
||||
/* returns the string representation of a keyword */
|
||||
const char *eo_lexer_keyword_str_get(int kw);
|
||||
/* checks if the given keyword is a builtin type */
|
||||
Eina_Bool eo_lexer_is_type_keyword(int kw);
|
||||
/* gets the C type name for a builtin type name - e.g. uchar -> unsigned char */
|
||||
const char *eo_lexer_get_c_type (int kw);
|
||||
|
||||
extern int _eo_lexer_log_dom;
|
||||
|
|
Loading…
Reference in New Issue