elua: add table serializer/deserializer

This is important for docgen use (reading theme configs).
This commit is contained in:
Daniel Kolesa 2016-07-29 14:26:55 +01:00
parent 9c8eea6ad8
commit 085623006d
2 changed files with 378 additions and 0 deletions

View File

@ -107,6 +107,7 @@ eluacoredir = $(datadir)/elua/core
eluacore_DATA = \
scripts/elua/core/gettext.lua \
scripts/elua/core/module.lua \
scripts/elua/core/serializer.lua \
scripts/elua/core/util.lua
EXTRA_DIST2 += $(eluacore_DATA)

View File

@ -0,0 +1,377 @@
-- serialization functions for use by elua apps/modules
local M = {}
local is_array = function(t)
local i = 0
while t[i + 1] do i = i + 1 end
for _ in pairs(t) do
i = i - 1 if i < 0 then return false end
end
return i == 0
end
local str_escapes = setmetatable({
["\n"] = "\\n", ["\r"] = "\\r",
["\a"] = "\\a", ["\b"] = "\\b",
["\f"] = "\\f", ["\t"] = "\\t",
["\v"] = "\\v", ["\\"] = "\\\\",
['"' ] = '\\"', ["'" ] = "\\'"
}, {
__index = function(self, c) return ("\\%03d"):format(c:byte()) end
})
local escape_string = function(s)
-- a space optimization: decide which string quote to
-- use as a delimiter (the one that needs less escaping)
local nsq, ndq = 0, 0
for c in s:gmatch("'") do nsq = nsq + 1 end
for c in s:gmatch('"') do ndq = ndq + 1 end
local sd = (ndq > nsq) and "'" or '"'
return sd .. s:gsub("[\\"..sd.."%z\001-\031]", str_escapes) .. sd
end
local function serialize_fn(v, stream, kwargs, simp, tables, indent)
if simp then
v = simp(v)
end
local tv = type(v)
if tv == "string" then
stream(escape_string(v))
elseif tv == "number" or tv == "boolean" then
stream(tostring(v))
elseif tv == "table" then
local mline = kwargs.multiline
local indstr = kwargs.indent
local asstr = kwargs.assign or "="
local sepstr = kwargs.table_sep or ","
local isepstr = kwargs.item_sep
local endsep = kwargs.end_sep
local optk = kwargs.optimize_keys
local arr = is_array(v)
local nline = arr and kwargs.narr_line or kwargs.nrec_line or 0
if tables[v] then
stream() -- let the stream know about an error
return false,
"circular table reference detected during serialization"
end
tables[v] = true
stream("{")
if mline then stream("\n") end
local first = true
local n = 0
for k, v in (arr and ipairs or pairs)(v) do
if first then first = false
else
stream(sepstr)
if mline then
if n == 0 then
stream("\n")
elseif isepstr then
stream(isepstr)
end
end
end
if mline and indstr and n == 0 then
for i = 1, indent do stream(indstr) end
end
if arr then
local ret, err = serialize_fn(v, stream, kwargs, simp, tables,
indent + 1)
if not ret then return ret, err end
else
if optk and type(k) == "string"
and k:match("^[%a_][%w_]*$") then
stream(k)
else
stream("[")
local ret, err = serialize_fn(k, stream, kwargs, simp,
tables, indent + 1)
if not ret then return ret, err end
stream("]")
end
stream(asstr)
local ret, err = serialize_fn(v, stream, kwargs, simp, tables,
indent + 1)
if not ret then return ret, err end
end
n = (n + 1) % nline
end
if not first then
if endsep then stream(sepstr) end
if mline then stream("\n") end
end
if mline and indstr then
for i = 2, indent do stream(indstr) end
end
stream("}")
else
stream()
return false, ("invalid value type: " .. tv)
end
return true
end
local defkw = {
multiline = false, indent = nil, assign = "=", table_sep = ",",
end_sep = false, optimize_keys = true
}
local defkwp = {
multiline = true, indent = " ", assign = " = ", table_sep = ",",
item_sep = " ", narr_line = 4, nrec_line = 2, end_sep = false,
optimize_keys = true
}
--[[
Serializes the given table, returning a string containing a literal
representation of the table. It tries to be compact by default so it
avoids whitespace and newlines. Arrays and associative arrays are
serialized differently (for compact output).
Besides tables this can also serialize other Lua values. It serializes
them in the same way as values inside a table, returning their literal
representation (if serializable, otherwise just their tostring). The
serializer allows strings, numbers, booleans and tables.
Circular tables can't be serialized. The function normally returns either
the string output or nil + an error message (which can signalize either
circular references or invalid types).
The function allows you to pass in a "kwargs" table as the second argument.
It's a table of options. Those can be multiline (boolean, false by default,
pretty much pretty-printing), indent (string, nil by default, specifies
how an indent level looks), assign (string, "=" by default, specifies how
an assignment between a key and a value looks), table_sep (table separator,
by default ",", can also be ";" for tables, separates items in all cases),
item_sep (item separator, string, nil by default, comes after table_sep
but only if it isn't followed by a newline), narr_line (number, 0 by
default, how many array elements to fit on a line), nrec_line (same,
just for key-value pairs), end_sep (boolean, false by default, makes
the serializer put table_sep after every item including the last one),
optimize_keys (boolean, true by default, optimizes string keys like
that it doesn't use string literals for keys that can be expressed
as Lua names).
If kwargs is nil or false, the values above are used. If kwargs is a
boolean value true, pretty-printing defaults are used (multiline is
true, indent is 4 spaces, assign is " = ", table_sep is ",", item_sep
is one space, narr_line is 4, nrec_line is 2, end_sep is false,
optimize_keys is true).
A third argument, "stream" can be passed. As a table is serialized
by pieces, "stream" is called each time a new piece is saved. It's
useful for example for file I/O. When a custom stream is supplied,
the function doesn't return a string, instead it returns true
or false depending on whether it succeeded and the error message
if any.
And finally there is the fourth argument, "simplifier". It's a
function that takes a value and "simplifies" it (returns another
value it should be replaced by). By default nothing is simplified
of course.
This function is externally available as "table_serialize".
]]
M.serialize = function(val, kwargs, stream, simplifier)
if kwargs == true then
kwargs = defkwp
elseif not kwargs then
kwargs = defkw
else
if kwargs.optimize_keys == nil then
kwargs.optimize_keys = true
end
end
if stream then
return serialize_fn(val, stream, kwargs, simplifier, {}, 1)
else
local t = {}
local ret, err = serialize_fn(val, function(out)
t[#t + 1] = out end, kwargs, simplifier, {}, 1)
if not ret then
return nil, err
else
return table.concat(t)
end
end
end
local lex_get = function(ls)
while true do
local c = ls.curr
if not c then break end
ls.tname, ls.tval = nil, nil
if c == "\n" or c == "\r" then
local prev = c
c = ls.rdr()
if (c == "\n" or c == "\r") and c ~= prev then
c = ls.rdr()
end
ls.curr = c
ls.linenum = ls.linenum + 1
elseif c == " " or c == "\t" or c == "\f" or c == "\v" then
ls.curr = ls.rdr()
elseif c == "." or c:byte() >= 48 and c:byte() <= 57 then
local buf = { ls.curr }
ls.curr = ls.rdr()
while ls.curr and ls.curr:match("[epxEPX0-9.+-]") do
buf[#buf + 1] = ls.curr
ls.curr = ls.rdr()
end
local str = table.concat(buf)
local num = tonumber(str)
if not num then error(("%d: malformed number near '%s'")
:format(ls.linenum, str), 0) end
ls.tname, ls.tval = "<number>", num
return "<number>"
elseif c == '"' or c == "'" then
local d = ls.curr
ls.curr = ls.rdr()
local buf = {}
while ls.curr ~= d do
local c = ls.curr
if c == nil then
error(("%d: unfinished string near '<eos>'")
:format(ls.linenum), 0)
elseif c == "\n" or c == "\r" then
error(("%d: unfinished string near '<string>'")
:format(ls.linenum), 0)
-- not complete escape sequence handling: handles only these
-- that are or can be in the serialized output
elseif c == "\\" then
c = ls.rdr()
if c == "a" then
buf[#buf + 1] = "\a" ls.curr = ls.rdr()
elseif c == "b" then
buf[#buf + 1] = "\b" ls.curr = ls.rdr()
elseif c == "f" then
buf[#buf + 1] = "\f" ls.curr = ls.rdr()
elseif c == "n" then
buf[#buf + 1] = "\n" ls.curr = ls.rdr()
elseif c == "r" then
buf[#buf + 1] = "\r" ls.curr = ls.rdr()
elseif c == "t" then
buf[#buf + 1] = "\t" ls.curr = ls.rdr()
elseif c == "v" then
buf[#buf + 1] = "\v" ls.curr = ls.rdr()
elseif c == "\\" or c == '"' or c == "'" then
buf[#buf + 1] = c
ls.curr = ls.rdr()
elseif not c then
error(("%d: unfinished string near '<eos>'")
:format(ls.linenum), 0)
else
if not c:match("%d") then
error(("%d: invalid escape sequence")
:format(ls.linenum), 0)
end
local dbuf = { c }
c = ls.rdr()
if c:match("%d") then
dbuf[2] = c
c = ls.rdr()
if c:match("%d") then
dbuf[3] = c
c = ls.rdr()
end
end
ls.curr = c
buf[#buf + 1] = table.concat(dbuf):char()
end
else
buf[#buf + 1] = c
ls.curr = ls.rdr()
end
end
ls.curr = ls.rdr() -- skip delim
ls.tname, ls.tval = "<string>", table.concat(buf)
return "<string>"
elseif c:match("[%a_]") then
local buf = { c }
ls.curr = ls.rdr()
while ls.curr and ls.curr:match("[%w_]") do
buf[#buf + 1] = ls.curr
ls.curr = ls.rdr()
end
local str = table.concat(buf)
if str == "true" or str == "false" or str == "nil" then
ls.tname, ls.tval = str, nil
return str
else
ls.tname, ls.tval = "<name>", str
return "<name>"
end
else
ls.curr = ls.rdr()
ls.tname, ls.tval = c, nil
return c
end
end
end
local function assert_tok(ls, tok, ...)
if not tok then return nil end
if ls.tname ~= tok then
error(("%d: unexpected symbol near '%s'"):format(ls.linenum,
ls.tname), 0)
end
lex_get(ls)
assert_tok(ls, ...)
end
local function parse(ls)
local tok = ls.tname
if tok == "<string>" or tok == "<number>" then
local v = ls.tval
lex_get(ls)
return v
elseif tok == "true" then lex_get(ls) return true
elseif tok == "false" then lex_get(ls) return false
elseif tok == "nil" then lex_get(ls) return nil
else
assert_tok(ls, "{")
local tbl = {}
if ls.tname == "}" then
lex_get(ls)
return tbl
end
repeat
if ls.tname == "<name>" then
local key = ls.tval
lex_get(ls)
assert_tok(ls, "=")
tbl[key] = parse(ls)
elseif ls.tname == "[" then
lex_get(ls)
local key = parse(ls)
assert_tok(ls, "]", "=")
tbl[key] = parse(ls)
else
tbl[#tbl + 1] = parse(ls)
end
until (ls.tname ~= "," and ls.tname ~= ";") or not lex_get(ls)
assert_tok(ls, "}")
return tbl
end
end
--[[
Takes a previously serialized table and converts it back to the original.
Uses a simple tokenizer and a recursive descent parser to build the result,
so it's safe (doesn't evaluate anything). The input can also be a callable
value that return the next character each call.
External as "table_deserialize". This returns the deserialized value on
success and nil + the error message on failure.
]]
M.deserialize = function(s)
local stream = (type(s) == "string") and s:gmatch(".") or s
local ls = { curr = stream(), rdr = stream, linenum = 1 }
local r, v = pcall(lex_get, ls)
if not r then return nil, v end
r, v = pcall(parse, ls)
if not r then return nil, v end
return v
end
return M