diff --git a/src/Makefile_Elua.am b/src/Makefile_Elua.am index 277e556625..96a6bf5e4e 100644 --- a/src/Makefile_Elua.am +++ b/src/Makefile_Elua.am @@ -107,6 +107,7 @@ eluacoredir = $(datadir)/elua/core eluacore_DATA = \ scripts/elua/core/gettext.lua \ scripts/elua/core/module.lua \ + scripts/elua/core/serializer.lua \ scripts/elua/core/util.lua EXTRA_DIST2 += $(eluacore_DATA) diff --git a/src/scripts/elua/core/serializer.lua b/src/scripts/elua/core/serializer.lua new file mode 100644 index 0000000000..4b61e6f766 --- /dev/null +++ b/src/scripts/elua/core/serializer.lua @@ -0,0 +1,377 @@ +-- serialization functions for use by elua apps/modules + +local M = {} + +local is_array = function(t) + local i = 0 + while t[i + 1] do i = i + 1 end + for _ in pairs(t) do + i = i - 1 if i < 0 then return false end + end + return i == 0 +end + +local str_escapes = setmetatable({ + ["\n"] = "\\n", ["\r"] = "\\r", + ["\a"] = "\\a", ["\b"] = "\\b", + ["\f"] = "\\f", ["\t"] = "\\t", + ["\v"] = "\\v", ["\\"] = "\\\\", + ['"' ] = '\\"', ["'" ] = "\\'" +}, { + __index = function(self, c) return ("\\%03d"):format(c:byte()) end +}) + +local escape_string = function(s) + -- a space optimization: decide which string quote to + -- use as a delimiter (the one that needs less escaping) + local nsq, ndq = 0, 0 + for c in s:gmatch("'") do nsq = nsq + 1 end + for c in s:gmatch('"') do ndq = ndq + 1 end + local sd = (ndq > nsq) and "'" or '"' + return sd .. s:gsub("[\\"..sd.."%z\001-\031]", str_escapes) .. sd +end + +local function serialize_fn(v, stream, kwargs, simp, tables, indent) + if simp then + v = simp(v) + end + local tv = type(v) + if tv == "string" then + stream(escape_string(v)) + elseif tv == "number" or tv == "boolean" then + stream(tostring(v)) + elseif tv == "table" then + local mline = kwargs.multiline + local indstr = kwargs.indent + local asstr = kwargs.assign or "=" + local sepstr = kwargs.table_sep or "," + local isepstr = kwargs.item_sep + local endsep = kwargs.end_sep + local optk = kwargs.optimize_keys + local arr = is_array(v) + local nline = arr and kwargs.narr_line or kwargs.nrec_line or 0 + if tables[v] then + stream() -- let the stream know about an error + return false, + "circular table reference detected during serialization" + end + tables[v] = true + stream("{") + if mline then stream("\n") end + local first = true + local n = 0 + for k, v in (arr and ipairs or pairs)(v) do + if first then first = false + else + stream(sepstr) + if mline then + if n == 0 then + stream("\n") + elseif isepstr then + stream(isepstr) + end + end + end + if mline and indstr and n == 0 then + for i = 1, indent do stream(indstr) end + end + if arr then + local ret, err = serialize_fn(v, stream, kwargs, simp, tables, + indent + 1) + if not ret then return ret, err end + else + if optk and type(k) == "string" + and k:match("^[%a_][%w_]*$") then + stream(k) + else + stream("[") + local ret, err = serialize_fn(k, stream, kwargs, simp, + tables, indent + 1) + if not ret then return ret, err end + stream("]") + end + stream(asstr) + local ret, err = serialize_fn(v, stream, kwargs, simp, tables, + indent + 1) + if not ret then return ret, err end + end + n = (n + 1) % nline + end + if not first then + if endsep then stream(sepstr) end + if mline then stream("\n") end + end + if mline and indstr then + for i = 2, indent do stream(indstr) end + end + stream("}") + else + stream() + return false, ("invalid value type: " .. tv) + end + return true +end + +local defkw = { + multiline = false, indent = nil, assign = "=", table_sep = ",", + end_sep = false, optimize_keys = true +} + +local defkwp = { + multiline = true, indent = " ", assign = " = ", table_sep = ",", + item_sep = " ", narr_line = 4, nrec_line = 2, end_sep = false, + optimize_keys = true +} + +--[[ + Serializes the given table, returning a string containing a literal + representation of the table. It tries to be compact by default so it + avoids whitespace and newlines. Arrays and associative arrays are + serialized differently (for compact output). + + Besides tables this can also serialize other Lua values. It serializes + them in the same way as values inside a table, returning their literal + representation (if serializable, otherwise just their tostring). The + serializer allows strings, numbers, booleans and tables. + + Circular tables can't be serialized. The function normally returns either + the string output or nil + an error message (which can signalize either + circular references or invalid types). + + The function allows you to pass in a "kwargs" table as the second argument. + It's a table of options. Those can be multiline (boolean, false by default, + pretty much pretty-printing), indent (string, nil by default, specifies + how an indent level looks), assign (string, "=" by default, specifies how + an assignment between a key and a value looks), table_sep (table separator, + by default ",", can also be ";" for tables, separates items in all cases), + item_sep (item separator, string, nil by default, comes after table_sep + but only if it isn't followed by a newline), narr_line (number, 0 by + default, how many array elements to fit on a line), nrec_line (same, + just for key-value pairs), end_sep (boolean, false by default, makes + the serializer put table_sep after every item including the last one), + optimize_keys (boolean, true by default, optimizes string keys like + that it doesn't use string literals for keys that can be expressed + as Lua names). + + If kwargs is nil or false, the values above are used. If kwargs is a + boolean value true, pretty-printing defaults are used (multiline is + true, indent is 4 spaces, assign is " = ", table_sep is ",", item_sep + is one space, narr_line is 4, nrec_line is 2, end_sep is false, + optimize_keys is true). + + A third argument, "stream" can be passed. As a table is serialized + by pieces, "stream" is called each time a new piece is saved. It's + useful for example for file I/O. When a custom stream is supplied, + the function doesn't return a string, instead it returns true + or false depending on whether it succeeded and the error message + if any. + + And finally there is the fourth argument, "simplifier". It's a + function that takes a value and "simplifies" it (returns another + value it should be replaced by). By default nothing is simplified + of course. + + This function is externally available as "table_serialize". +]] +M.serialize = function(val, kwargs, stream, simplifier) + if kwargs == true then + kwargs = defkwp + elseif not kwargs then + kwargs = defkw + else + if kwargs.optimize_keys == nil then + kwargs.optimize_keys = true + end + end + if stream then + return serialize_fn(val, stream, kwargs, simplifier, {}, 1) + else + local t = {} + local ret, err = serialize_fn(val, function(out) + t[#t + 1] = out end, kwargs, simplifier, {}, 1) + if not ret then + return nil, err + else + return table.concat(t) + end + end +end + +local lex_get = function(ls) + while true do + local c = ls.curr + if not c then break end + ls.tname, ls.tval = nil, nil + if c == "\n" or c == "\r" then + local prev = c + c = ls.rdr() + if (c == "\n" or c == "\r") and c ~= prev then + c = ls.rdr() + end + ls.curr = c + ls.linenum = ls.linenum + 1 + elseif c == " " or c == "\t" or c == "\f" or c == "\v" then + ls.curr = ls.rdr() + elseif c == "." or c:byte() >= 48 and c:byte() <= 57 then + local buf = { ls.curr } + ls.curr = ls.rdr() + while ls.curr and ls.curr:match("[epxEPX0-9.+-]") do + buf[#buf + 1] = ls.curr + ls.curr = ls.rdr() + end + local str = table.concat(buf) + local num = tonumber(str) + if not num then error(("%d: malformed number near '%s'") + :format(ls.linenum, str), 0) end + ls.tname, ls.tval = "", num + return "" + elseif c == '"' or c == "'" then + local d = ls.curr + ls.curr = ls.rdr() + local buf = {} + while ls.curr ~= d do + local c = ls.curr + if c == nil then + error(("%d: unfinished string near ''") + :format(ls.linenum), 0) + elseif c == "\n" or c == "\r" then + error(("%d: unfinished string near ''") + :format(ls.linenum), 0) + -- not complete escape sequence handling: handles only these + -- that are or can be in the serialized output + elseif c == "\\" then + c = ls.rdr() + if c == "a" then + buf[#buf + 1] = "\a" ls.curr = ls.rdr() + elseif c == "b" then + buf[#buf + 1] = "\b" ls.curr = ls.rdr() + elseif c == "f" then + buf[#buf + 1] = "\f" ls.curr = ls.rdr() + elseif c == "n" then + buf[#buf + 1] = "\n" ls.curr = ls.rdr() + elseif c == "r" then + buf[#buf + 1] = "\r" ls.curr = ls.rdr() + elseif c == "t" then + buf[#buf + 1] = "\t" ls.curr = ls.rdr() + elseif c == "v" then + buf[#buf + 1] = "\v" ls.curr = ls.rdr() + elseif c == "\\" or c == '"' or c == "'" then + buf[#buf + 1] = c + ls.curr = ls.rdr() + elseif not c then + error(("%d: unfinished string near ''") + :format(ls.linenum), 0) + else + if not c:match("%d") then + error(("%d: invalid escape sequence") + :format(ls.linenum), 0) + end + local dbuf = { c } + c = ls.rdr() + if c:match("%d") then + dbuf[2] = c + c = ls.rdr() + if c:match("%d") then + dbuf[3] = c + c = ls.rdr() + end + end + ls.curr = c + buf[#buf + 1] = table.concat(dbuf):char() + end + else + buf[#buf + 1] = c + ls.curr = ls.rdr() + end + end + ls.curr = ls.rdr() -- skip delim + ls.tname, ls.tval = "", table.concat(buf) + return "" + elseif c:match("[%a_]") then + local buf = { c } + ls.curr = ls.rdr() + while ls.curr and ls.curr:match("[%w_]") do + buf[#buf + 1] = ls.curr + ls.curr = ls.rdr() + end + local str = table.concat(buf) + if str == "true" or str == "false" or str == "nil" then + ls.tname, ls.tval = str, nil + return str + else + ls.tname, ls.tval = "", str + return "" + end + else + ls.curr = ls.rdr() + ls.tname, ls.tval = c, nil + return c + end + end +end + +local function assert_tok(ls, tok, ...) + if not tok then return nil end + if ls.tname ~= tok then + error(("%d: unexpected symbol near '%s'"):format(ls.linenum, + ls.tname), 0) + end + lex_get(ls) + assert_tok(ls, ...) +end + +local function parse(ls) + local tok = ls.tname + if tok == "" or tok == "" then + local v = ls.tval + lex_get(ls) + return v + elseif tok == "true" then lex_get(ls) return true + elseif tok == "false" then lex_get(ls) return false + elseif tok == "nil" then lex_get(ls) return nil + else + assert_tok(ls, "{") + local tbl = {} + if ls.tname == "}" then + lex_get(ls) + return tbl + end + repeat + if ls.tname == "" then + local key = ls.tval + lex_get(ls) + assert_tok(ls, "=") + tbl[key] = parse(ls) + elseif ls.tname == "[" then + lex_get(ls) + local key = parse(ls) + assert_tok(ls, "]", "=") + tbl[key] = parse(ls) + else + tbl[#tbl + 1] = parse(ls) + end + until (ls.tname ~= "," and ls.tname ~= ";") or not lex_get(ls) + assert_tok(ls, "}") + return tbl + end +end + +--[[ + Takes a previously serialized table and converts it back to the original. + Uses a simple tokenizer and a recursive descent parser to build the result, + so it's safe (doesn't evaluate anything). The input can also be a callable + value that return the next character each call. + External as "table_deserialize". This returns the deserialized value on + success and nil + the error message on failure. +]] +M.deserialize = function(s) + local stream = (type(s) == "string") and s:gmatch(".") or s + local ls = { curr = stream(), rdr = stream, linenum = 1 } + local r, v = pcall(lex_get, ls) + if not r then return nil, v end + r, v = pcall(parse, ls) + if not r then return nil, v end + return v +end + +return M