From 6220dd596836374aee6e93e0923d67c19fca5545 Mon Sep 17 00:00:00 2001 From: Daniel Kolesa Date: Tue, 6 May 2014 16:07:10 +0100 Subject: [PATCH] elua: lexer for xgettext --- src/bin/elua/modules/xgettext/lexer.lua | 275 ++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 src/bin/elua/modules/xgettext/lexer.lua diff --git a/src/bin/elua/modules/xgettext/lexer.lua b/src/bin/elua/modules/xgettext/lexer.lua new file mode 100644 index 0000000000..aef624455f --- /dev/null +++ b/src/bin/elua/modules/xgettext/lexer.lua @@ -0,0 +1,275 @@ +-- Elua xgettext: lexer + +local yield = coroutine.yield +local tconc = table.concat + +local source_to_msg = function(source) + local c = source:sub(1, 1) + local srclen = #source + if c == "@" then + if srclen <= (max_fname_len + 1) then + return source:sub(2) + else + return "..." .. source:sub(srclen - max_fname_len + 1) + end + elseif c == "=" then + return source:sub(2, max_custom_len + 1) + else + return '[string "' .. source:sub(1, max_str_len) + .. ((srclen > max_str_len) and '..."]' or '"]') + end +end + +local lex_error = function(ls, msg, tok) + msg = ("%s:%d: %s"):format(source_to_msg(ls.source), ls.line_number, msg) + if tok then + msg = msg .. " near '" .. tok .. "'" + end + error(msg, 0) +end + +local next_char = function(ls) + local c = ls.reader() + ls.current = c + return c +end + +local next_line = function(ls, cs) + local old = ls.current + assert(old == "\n" or old == "\r") + local c = next_char(ls) + if (c == "\n" or c == "\r") and c ~= old then + c = next_char(ls) + end + ls.line_number = ls.line_number + 1 + return c +end + +local read_number = function(ls, beg) + local buf = {} + if beg then buf[1] = beg end + local c = ls.current + while c == "." or c:match("%d") do + buf[#buf + 1] = c + c = next_char(ls) + end + if c == "e" or c == "E" then + buf[#buf + 1] = c + c = next_char(ls) + if c == "+" or c == "-" then + buf[#buf + 1] = c + c = next_char(ls) + end + end + while c:match("%w") do + buf[#buf + 1] = c + c = next_char(ls) + end + local str = tconc(buf) + if not tonumber(str) then + lex_error(ls, "malformed number", str) + end + return str +end + +local skip_sep = function(ls, buf) + local cnt = 0 + local s = ls.current + assert(s == "[" or s == "]") + buf[#buf + 1] = s + local c = next_char(ls) + while c == "=" do + buf[#buf + 1] = c + c = next_char(ls) + cnt = cnt + 1 + end + return c == s and cnt or ((-cnt) - 1) +end + +local read_long_string = function(ls, sep, cmt) + local buf = {} + local c = next_char(ls) + if c == "\n" or c == "\r" then c = next_line(ls) end + while true do + if not c then + lex_error(ls, tok and cmt and "unfinished long comment" + or "unfinished long string", "") + elseif c == "]" then + local tbuf = {} + if skip_sep(ls, tbuf) == sep then + c = next_char(ls) + break + elseif not cmt then + buf[#buf + 1] = tconc(tbuf) + end + c = ls.current + else + if not cmt then buf[#buf + 1] = c end + c = next_char(ls) + end + end + if not cmt then return tconc(buf) end +end + +local read_string = function(ls) + local delim = ls.current + local buf = { delim } + local c = next_char(ls) + while c ~= delim do + if not c then lex_error(ls, "unfinished string", "") + elseif c == "\n" or c == "\r" then + lex_error(ls, "unfinished string", tconc(buf)) + elseif c == "\\" then + buf[#buf + 1] = c + buf[#buf + 1] = next_char(ls) + c = next_char(ls) + else + buf[#buf + 1] = c + c = next_char(ls) + end + end + buf[#buf + 1] = c + next_char(ls) + return tconc(buf) +end + +local lex_tbl = { + ["\n"] = function(ls) next_line(ls) end, + [" " ] = function(ls) next_char(ls) end, + ["-" ] = function(ls) + local c = next_char(ls) + if c ~= "-" then + return "-" + end + c = next_char(ls) + if c == "[" then + local sep = skip_sep(ls, {}) + if sep >= 0 then + read_long_string(ls, sep, true) + return + end + end + while ls.current and ls.current ~= "\n" and ls.current ~= "\r" do + next_char(ls) + end + end, + ["[" ] = function(ls) + local buf = {} + local sep = skip_sep(ls, {}) + if sep >= 0 then + return "", read_long_string(ls, sep) + elseif sep == -1 then + return "[" + else + lex_error(ls, "invalid long string delimiter", tconc(buf)) + end + end, + ['"' ] = function(ls) + return "", read_string(ls) + end, + ["." ] = function(ls) + local c = next_char(ls) + if c:match("%d") then + return "", read_number(ls, ".") + elseif c ~= "." then + return "." + end + next_char(ls) + return ".." + end, + ["0" ] = function(ls) + return "", read_number(ls) + end +} +lex_tbl["\r"] = lex_tbl["\n"] +lex_tbl["\f"] = lex_tbl[" " ] +lex_tbl["\t"] = lex_tbl[" " ] +lex_tbl["\v"] = lex_tbl[" " ] +lex_tbl["'" ] = lex_tbl['"' ] +lex_tbl["1" ] = lex_tbl["0" ] +lex_tbl["2" ] = lex_tbl["0" ] +lex_tbl["3" ] = lex_tbl["0" ] +lex_tbl["4" ] = lex_tbl["0" ] +lex_tbl["5" ] = lex_tbl["0" ] +lex_tbl["6" ] = lex_tbl["0" ] +lex_tbl["7" ] = lex_tbl["0" ] +lex_tbl["8" ] = lex_tbl["0" ] +lex_tbl["9" ] = lex_tbl["0" ] + +local lex_default = function(ls) + local c = ls.current + if c == "_" or c:match("%a") then + local buf = {} + repeat + buf[#buf + 1] = c + c = next_char(ls) + if not c then break end + until not (c == "_" or c:match("%w")) + local str = tconc(buf) + return "", str + else + next_char(ls) + return c + end +end + +local lex_main = function(ls) + yield() + while true do + local c = ls.current + if c == nil then + return "" + end + local opt = lex_tbl[c] + if opt then + local t, v = opt(ls) + if t then yield(t, v) end + else + yield(lex_default(ls)) + end + end +end + +local strstream = function(str) + return str:gmatch(".") +end + +local skip_bom = function(rdr) + local c = rdr() + if c ~= 0xEF then return c end + c = rdr() + if c ~= 0xBB then return c end + c = rdr() + if c ~= 0xBF then return c end + return rdr() +end + +local skip_shebang = function(rdr) + local c = skip_bom(rdr) + if c == 35 then -- # + repeat + c = rdr() + until not c or is_newline(c) + local e = c + c = rdr() + if (e == 10 and c == 13) or (e == 13 and c == 10) then -- LF, CR + c = rdr() + end + end + return c +end + +return { init = function(chunkname, input) + local reader = type(input) == "string" and strstream(input) or input + local current = skip_shebang(reader) + local ls = { + reader = reader, + token = {}, + source = chunkname, + current = current, + line_number = 1 + } + local coro = coroutine.wrap(lex_main, ls) + coro(ls) + return coro +end } \ No newline at end of file