diff options
author | Daniel Kolesa <d.kolesa@osg.samsung.com> | 2016-07-29 14:26:55 +0100 |
---|---|---|
committer | Daniel Kolesa <d.kolesa@osg.samsung.com> | 2016-07-29 14:27:26 +0100 |
commit | 085623006d172d07c2eec0694845b564d8de3f12 (patch) | |
tree | c9bf93c82f0ac2a4f91b15179d3abc4f74dcc4c8 /src | |
parent | 9c8eea6ad8a5a5b4081c14a9b44b243800e28616 (diff) |
elua: add table serializer/deserializer
This is important for docgen use (reading theme configs).
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile_Elua.am | 1 | ||||
-rw-r--r-- | src/scripts/elua/core/serializer.lua | 377 |
2 files changed, 378 insertions, 0 deletions
diff --git a/src/Makefile_Elua.am b/src/Makefile_Elua.am index 277e556625..96a6bf5e4e 100644 --- a/src/Makefile_Elua.am +++ b/src/Makefile_Elua.am | |||
@@ -107,6 +107,7 @@ eluacoredir = $(datadir)/elua/core | |||
107 | eluacore_DATA = \ | 107 | eluacore_DATA = \ |
108 | scripts/elua/core/gettext.lua \ | 108 | scripts/elua/core/gettext.lua \ |
109 | scripts/elua/core/module.lua \ | 109 | scripts/elua/core/module.lua \ |
110 | scripts/elua/core/serializer.lua \ | ||
110 | scripts/elua/core/util.lua | 111 | scripts/elua/core/util.lua |
111 | 112 | ||
112 | EXTRA_DIST2 += $(eluacore_DATA) | 113 | EXTRA_DIST2 += $(eluacore_DATA) |
diff --git a/src/scripts/elua/core/serializer.lua b/src/scripts/elua/core/serializer.lua new file mode 100644 index 0000000000..4b61e6f766 --- /dev/null +++ b/src/scripts/elua/core/serializer.lua | |||
@@ -0,0 +1,377 @@ | |||
1 | -- serialization functions for use by elua apps/modules | ||
2 | |||
3 | local M = {} | ||
4 | |||
5 | local is_array = function(t) | ||
6 | local i = 0 | ||
7 | while t[i + 1] do i = i + 1 end | ||
8 | for _ in pairs(t) do | ||
9 | i = i - 1 if i < 0 then return false end | ||
10 | end | ||
11 | return i == 0 | ||
12 | end | ||
13 | |||
14 | local str_escapes = setmetatable({ | ||
15 | ["\n"] = "\\n", ["\r"] = "\\r", | ||
16 | ["\a"] = "\\a", ["\b"] = "\\b", | ||
17 | ["\f"] = "\\f", ["\t"] = "\\t", | ||
18 | ["\v"] = "\\v", ["\\"] = "\\\\", | ||
19 | ['"' ] = '\\"', ["'" ] = "\\'" | ||
20 | }, { | ||
21 | __index = function(self, c) return ("\\%03d"):format(c:byte()) end | ||
22 | }) | ||
23 | |||
24 | local escape_string = function(s) | ||
25 | -- a space optimization: decide which string quote to | ||
26 | -- use as a delimiter (the one that needs less escaping) | ||
27 | local nsq, ndq = 0, 0 | ||
28 | for c in s:gmatch("'") do nsq = nsq + 1 end | ||
29 | for c in s:gmatch('"') do ndq = ndq + 1 end | ||
30 | local sd = (ndq > nsq) and "'" or '"' | ||
31 | return sd .. s:gsub("[\\"..sd.."%z\001-\031]", str_escapes) .. sd | ||
32 | end | ||
33 | |||
34 | local function serialize_fn(v, stream, kwargs, simp, tables, indent) | ||
35 | if simp then | ||
36 | v = simp(v) | ||
37 | end | ||
38 | local tv = type(v) | ||
39 | if tv == "string" then | ||
40 | stream(escape_string(v)) | ||
41 | elseif tv == "number" or tv == "boolean" then | ||
42 | stream(tostring(v)) | ||
43 | elseif tv == "table" then | ||
44 | local mline = kwargs.multiline | ||
45 | local indstr = kwargs.indent | ||
46 | local asstr = kwargs.assign or "=" | ||
47 | local sepstr = kwargs.table_sep or "," | ||
48 | local isepstr = kwargs.item_sep | ||
49 | local endsep = kwargs.end_sep | ||
50 | local optk = kwargs.optimize_keys | ||
51 | local arr = is_array(v) | ||
52 | local nline = arr and kwargs.narr_line or kwargs.nrec_line or 0 | ||
53 | if tables[v] then | ||
54 | stream() -- let the stream know about an error | ||
55 | return false, | ||
56 | "circular table reference detected during serialization" | ||
57 | end | ||
58 | tables[v] = true | ||
59 | stream("{") | ||
60 | if mline then stream("\n") end | ||
61 | local first = true | ||
62 | local n = 0 | ||
63 | for k, v in (arr and ipairs or pairs)(v) do | ||
64 | if first then first = false | ||
65 | else | ||
66 | stream(sepstr) | ||
67 | if mline then | ||
68 | if n == 0 then | ||
69 | stream("\n") | ||
70 | elseif isepstr then | ||
71 | stream(isepstr) | ||
72 | end | ||
73 | end | ||
74 | end | ||
75 | if mline and indstr and n == 0 then | ||
76 | for i = 1, indent do stream(indstr) end | ||
77 | end | ||
78 | if arr then | ||
79 | local ret, err = serialize_fn(v, stream, kwargs, simp, tables, | ||
80 | indent + 1) | ||
81 | if not ret then return ret, err end | ||
82 | else | ||
83 | if optk and type(k) == "string" | ||
84 | and k:match("^[%a_][%w_]*$") then | ||
85 | stream(k) | ||
86 | else | ||
87 | stream("[") | ||
88 | local ret, err = serialize_fn(k, stream, kwargs, simp, | ||
89 | tables, indent + 1) | ||
90 | if not ret then return ret, err end | ||
91 | stream("]") | ||
92 | end | ||
93 | stream(asstr) | ||
94 | local ret, err = serialize_fn(v, stream, kwargs, simp, tables, | ||
95 | indent + 1) | ||
96 | if not ret then return ret, err end | ||
97 | end | ||
98 | n = (n + 1) % nline | ||
99 | end | ||
100 | if not first then | ||
101 | if endsep then stream(sepstr) end | ||
102 | if mline then stream("\n") end | ||
103 | end | ||
104 | if mline and indstr then | ||
105 | for i = 2, indent do stream(indstr) end | ||
106 | end | ||
107 | stream("}") | ||
108 | else | ||
109 | stream() | ||
110 | return false, ("invalid value type: " .. tv) | ||
111 | end | ||
112 | return true | ||
113 | end | ||
114 | |||
115 | local defkw = { | ||
116 | multiline = false, indent = nil, assign = "=", table_sep = ",", | ||
117 | end_sep = false, optimize_keys = true | ||
118 | } | ||
119 | |||
120 | local defkwp = { | ||
121 | multiline = true, indent = " ", assign = " = ", table_sep = ",", | ||
122 | item_sep = " ", narr_line = 4, nrec_line = 2, end_sep = false, | ||
123 | optimize_keys = true | ||
124 | } | ||
125 | |||
126 | --[[ | ||
127 | Serializes the given table, returning a string containing a literal | ||
128 | representation of the table. It tries to be compact by default so it | ||
129 | avoids whitespace and newlines. Arrays and associative arrays are | ||
130 | serialized differently (for compact output). | ||
131 | |||
132 | Besides tables this can also serialize other Lua values. It serializes | ||
133 | them in the same way as values inside a table, returning their literal | ||
134 | representation (if serializable, otherwise just their tostring). The | ||
135 | serializer allows strings, numbers, booleans and tables. | ||
136 | |||
137 | Circular tables can't be serialized. The function normally returns either | ||
138 | the string output or nil + an error message (which can signalize either | ||
139 | circular references or invalid types). | ||
140 | |||
141 | The function allows you to pass in a "kwargs" table as the second argument. | ||
142 | It's a table of options. Those can be multiline (boolean, false by default, | ||
143 | pretty much pretty-printing), indent (string, nil by default, specifies | ||
144 | how an indent level looks), assign (string, "=" by default, specifies how | ||
145 | an assignment between a key and a value looks), table_sep (table separator, | ||
146 | by default ",", can also be ";" for tables, separates items in all cases), | ||
147 | item_sep (item separator, string, nil by default, comes after table_sep | ||
148 | but only if it isn't followed by a newline), narr_line (number, 0 by | ||
149 | default, how many array elements to fit on a line), nrec_line (same, | ||
150 | just for key-value pairs), end_sep (boolean, false by default, makes | ||
151 | the serializer put table_sep after every item including the last one), | ||
152 | optimize_keys (boolean, true by default, optimizes string keys like | ||
153 | that it doesn't use string literals for keys that can be expressed | ||
154 | as Lua names). | ||
155 | |||
156 | If kwargs is nil or false, the values above are used. If kwargs is a | ||
157 | boolean value true, pretty-printing defaults are used (multiline is | ||
158 | true, indent is 4 spaces, assign is " = ", table_sep is ",", item_sep | ||
159 | is one space, narr_line is 4, nrec_line is 2, end_sep is false, | ||
160 | optimize_keys is true). | ||
161 | |||
162 | A third argument, "stream" can be passed. As a table is serialized | ||
163 | by pieces, "stream" is called each time a new piece is saved. It's | ||
164 | useful for example for file I/O. When a custom stream is supplied, | ||
165 | the function doesn't return a string, instead it returns true | ||
166 | or false depending on whether it succeeded and the error message | ||
167 | if any. | ||
168 | |||
169 | And finally there is the fourth argument, "simplifier". It's a | ||
170 | function that takes a value and "simplifies" it (returns another | ||
171 | value it should be replaced by). By default nothing is simplified | ||
172 | of course. | ||
173 | |||
174 | This function is externally available as "table_serialize". | ||
175 | ]] | ||
176 | M.serialize = function(val, kwargs, stream, simplifier) | ||
177 | if kwargs == true then | ||
178 | kwargs = defkwp | ||
179 | elseif not kwargs then | ||
180 | kwargs = defkw | ||
181 | else | ||
182 | if kwargs.optimize_keys == nil then | ||
183 | kwargs.optimize_keys = true | ||
184 | end | ||
185 | end | ||
186 | if stream then | ||
187 | return serialize_fn(val, stream, kwargs, simplifier, {}, 1) | ||
188 | else | ||
189 | local t = {} | ||
190 | local ret, err = serialize_fn(val, function(out) | ||
191 | t[#t + 1] = out end, kwargs, simplifier, {}, 1) | ||
192 | if not ret then | ||
193 | return nil, err | ||
194 | else | ||
195 | return table.concat(t) | ||
196 | end | ||
197 | end | ||
198 | end | ||
199 | |||
200 | local lex_get = function(ls) | ||
201 | while true do | ||
202 | local c = ls.curr | ||
203 | if not c then break end | ||
204 | ls.tname, ls.tval = nil, nil | ||
205 | if c == "\n" or c == "\r" then | ||
206 | local prev = c | ||
207 | c = ls.rdr() | ||
208 | if (c == "\n" or c == "\r") and c ~= prev then | ||
209 | c = ls.rdr() | ||
210 | end | ||
211 | ls.curr = c | ||
212 | ls.linenum = ls.linenum + 1 | ||
213 | elseif c == " " or c == "\t" or c == "\f" or c == "\v" then | ||
214 | ls.curr = ls.rdr() | ||
215 | elseif c == "." or c:byte() >= 48 and c:byte() <= 57 then | ||
216 | local buf = { ls.curr } | ||
217 | ls.curr = ls.rdr() | ||
218 | while ls.curr and ls.curr:match("[epxEPX0-9.+-]") do | ||
219 | buf[#buf + 1] = ls.curr | ||
220 | ls.curr = ls.rdr() | ||
221 | end | ||
222 | local str = table.concat(buf) | ||
223 | local num = tonumber(str) | ||
224 | if not num then error(("%d: malformed number near '%s'") | ||
225 | :format(ls.linenum, str), 0) end | ||
226 | ls.tname, ls.tval = "<number>", num | ||
227 | return "<number>" | ||
228 | elseif c == '"' or c == "'" then | ||
229 | local d = ls.curr | ||
230 | ls.curr = ls.rdr() | ||
231 | local buf = {} | ||
232 | while ls.curr ~= d do | ||
233 | local c = ls.curr | ||
234 | if c == nil then | ||
235 | error(("%d: unfinished string near '<eos>'") | ||
236 | :format(ls.linenum), 0) | ||
237 | elseif c == "\n" or c == "\r" then | ||
238 | error(("%d: unfinished string near '<string>'") | ||
239 | :format(ls.linenum), 0) | ||
240 | -- not complete escape sequence handling: handles only these | ||
241 | -- that are or can be in the serialized output | ||
242 | elseif c == "\\" then | ||
243 | c = ls.rdr() | ||
244 | if c == "a" then | ||
245 | buf[#buf + 1] = "\a" ls.curr = ls.rdr() | ||
246 | elseif c == "b" then | ||
247 | buf[#buf + 1] = "\b" ls.curr = ls.rdr() | ||
248 | elseif c == "f" then | ||
249 | buf[#buf + 1] = "\f" ls.curr = ls.rdr() | ||
250 | elseif c == "n" then | ||
251 | buf[#buf + 1] = "\n" ls.curr = ls.rdr() | ||
252 | elseif c == "r" then | ||
253 | buf[#buf + 1] = "\r" ls.curr = ls.rdr() | ||
254 | elseif c == "t" then | ||
255 | buf[#buf + 1] = "\t" ls.curr = ls.rdr() | ||
256 | elseif c == "v" then | ||
257 | buf[#buf + 1] = "\v" ls.curr = ls.rdr() | ||
258 | elseif c == "\\" or c == '"' or c == "'" then | ||
259 | buf[#buf + 1] = c | ||
260 | ls.curr = ls.rdr() | ||
261 | elseif not c then | ||
262 | error(("%d: unfinished string near '<eos>'") | ||
263 | :format(ls.linenum), 0) | ||
264 | else | ||
265 | if not c:match("%d") then | ||
266 | error(("%d: invalid escape sequence") | ||
267 | :format(ls.linenum), 0) | ||
268 | end | ||
269 | local dbuf = { c } | ||
270 | c = ls.rdr() | ||
271 | if c:match("%d") then | ||
272 | dbuf[2] = c | ||
273 | c = ls.rdr() | ||
274 | if c:match("%d") then | ||
275 | dbuf[3] = c | ||
276 | c = ls.rdr() | ||
277 | end | ||
278 | end | ||
279 | ls.curr = c | ||
280 | buf[#buf + 1] = table.concat(dbuf):char() | ||
281 | end | ||
282 | else | ||
283 | buf[#buf + 1] = c | ||
284 | ls.curr = ls.rdr() | ||
285 | end | ||
286 | end | ||
287 | ls.curr = ls.rdr() -- skip delim | ||
288 | ls.tname, ls.tval = "<string>", table.concat(buf) | ||
289 | return "<string>" | ||
290 | elseif c:match("[%a_]") then | ||
291 | local buf = { c } | ||
292 | ls.curr = ls.rdr() | ||
293 | while ls.curr and ls.curr:match("[%w_]") do | ||
294 | buf[#buf + 1] = ls.curr | ||
295 | ls.curr = ls.rdr() | ||
296 | end | ||
297 | local str = table.concat(buf) | ||
298 | if str == "true" or str == "false" or str == "nil" then | ||
299 | ls.tname, ls.tval = str, nil | ||
300 | return str | ||
301 | else | ||
302 | ls.tname, ls.tval = "<name>", str | ||
303 | return "<name>" | ||
304 | end | ||
305 | else | ||
306 | ls.curr = ls.rdr() | ||
307 | ls.tname, ls.tval = c, nil | ||
308 | return c | ||
309 | end | ||
310 | end | ||
311 | end | ||
312 | |||
313 | local function assert_tok(ls, tok, ...) | ||
314 | if not tok then return nil end | ||
315 | if ls.tname ~= tok then | ||
316 | error(("%d: unexpected symbol near '%s'"):format(ls.linenum, | ||
317 | ls.tname), 0) | ||
318 | end | ||
319 | lex_get(ls) | ||
320 | assert_tok(ls, ...) | ||
321 | end | ||
322 | |||
323 | local function parse(ls) | ||
324 | local tok = ls.tname | ||
325 | if tok == "<string>" or tok == "<number>" then | ||
326 | local v = ls.tval | ||
327 | lex_get(ls) | ||
328 | return v | ||
329 | elseif tok == "true" then lex_get(ls) return true | ||
330 | elseif tok == "false" then lex_get(ls) return false | ||
331 | elseif tok == "nil" then lex_get(ls) return nil | ||
332 | else | ||
333 | assert_tok(ls, "{") | ||
334 | local tbl = {} | ||
335 | if ls.tname == "}" then | ||
336 | lex_get(ls) | ||
337 | return tbl | ||
338 | end | ||
339 | repeat | ||
340 | if ls.tname == "<name>" then | ||
341 | local key = ls.tval | ||
342 | lex_get(ls) | ||
343 | assert_tok(ls, "=") | ||
344 | tbl[key] = parse(ls) | ||
345 | elseif ls.tname == "[" then | ||
346 | lex_get(ls) | ||
347 | local key = parse(ls) | ||
348 | assert_tok(ls, "]", "=") | ||
349 | tbl[key] = parse(ls) | ||
350 | else | ||
351 | tbl[#tbl + 1] = parse(ls) | ||
352 | end | ||
353 | until (ls.tname ~= "," and ls.tname ~= ";") or not lex_get(ls) | ||
354 | assert_tok(ls, "}") | ||
355 | return tbl | ||
356 | end | ||
357 | end | ||
358 | |||
359 | --[[ | ||
360 | Takes a previously serialized table and converts it back to the original. | ||
361 | Uses a simple tokenizer and a recursive descent parser to build the result, | ||
362 | so it's safe (doesn't evaluate anything). The input can also be a callable | ||
363 | value that return the next character each call. | ||
364 | External as "table_deserialize". This returns the deserialized value on | ||
365 | success and nil + the error message on failure. | ||
366 | ]] | ||
367 | M.deserialize = function(s) | ||
368 | local stream = (type(s) == "string") and s:gmatch(".") or s | ||
369 | local ls = { curr = stream(), rdr = stream, linenum = 1 } | ||
370 | local r, v = pcall(lex_get, ls) | ||
371 | if not r then return nil, v end | ||
372 | r, v = pcall(parse, ls) | ||
373 | if not r then return nil, v end | ||
374 | return v | ||
375 | end | ||
376 | |||
377 | return M | ||