util-jsn.lmt /size: 16 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['util-jsn'] = {
2    version   = 1.001,
3    comment   = "companion to m-json.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Of course we could make a nice complete parser with proper error messages but
10-- as json is generated programmatically errors are systematic and we can assume
11-- a correct stream. If not, we have some fatal error anyway. So, we can just rely
12-- on strings being strings (apart from the unicode escape which is not in 5.1) and
13-- as we first catch known types we just assume that anything else is a number.
14--
15-- Reminder for me: check usage in framework and extend when needed. Also document
16-- it in the cld lib documentation.
17--
18-- Upgraded for handling the somewhat more fax server templates.
19
20if utilities and utilities.json then
21    return json
22end
23
24local P, V, R, S, C, Cc, Cs, Ct, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cg
25----- Cf = lpeg.Cf
26local lpegmatch = lpeg.match
27local format, gsub = string.format, string.gsub
28local formatters = string.formatters
29local utfchar = utf.char
30local concat, sortedkeys = table.concat, table.sortedkeys
31
32local tonumber, tostring, rawset, type, next = tonumber, tostring, rawset, type, next
33
34local json      = utilities.json or { }
35utilities.json  = json
36
37do
38
39    -- \\ \/ \b \f \n \r \t \uHHHH
40
41    local lbrace     = P("{")
42    local rbrace     = P("}")
43    local lparent    = P("[")
44    local rparent    = P("]")
45    local comma      = P(",")
46    local colon      = P(":")
47    local dquote     = P('"')
48
49    local whitespace = lpeg.patterns.whitespace
50    local optionalws = whitespace^0
51
52    local escapes    = {
53        ["b"] = "\010",
54        ["f"] = "\014",
55        ["n"] = "\n",
56        ["r"] = "\r",
57        ["t"] = "\t",
58    }
59
60    -- todo: also handle larger utf16
61
62    local escape_un  = P("\\u")/"" * (C(R("09","AF","af")^-4) / function(s)
63        return utfchar(tonumber(s,16))
64    end)
65
66    local escape_bs  = P([[\]]) / "" * (P(1) / escapes) -- if not found then P(1) is returned i.e. the to be escaped char
67
68    local jstring    = dquote * Cs((escape_un + escape_bs + (1-dquote))^0) * dquote
69    local jtrue      = P("true")  * Cc(true)
70    local jfalse     = P("false") * Cc(false)
71    local jnull      = P("null")  * Cc(nil)
72    local jnumber    = (1-whitespace-rparent-rbrace-comma)^1 / tonumber
73
74    local key        = jstring
75
76--     local jsonconverter = { "value",
77--         hash  = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace,
78--         pair  = Cg(optionalws * key * optionalws * colon * V("value")),
79--         array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent),
80--     --  value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws,
81--         value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws,
82--     }
83
84    local jsonconverter = { "value",
85        hash  = lbrace * Cg(Ct("") * ((V("pair") * (comma * V("pair"))^0 + optionalws))) * rbrace,
86        pair  = ((optionalws * key * optionalws * colon * V("value")) % rawset),
87        array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent),
88    --  value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws,
89        value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws,
90    }
91
92    -- local jsonconverter = { "value",
93    --     hash   = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace,
94    --     pair   = Cg(optionalws * V("string") * optionalws * colon * V("value")),
95    --     array  = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent),
96    --     string = jstring,
97    --     value  = optionalws * (V("string") + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws,
98    -- }
99
100    -- lpeg.print(jsonconverter) -- size 181
101
102    function json.tolua(str)
103        return lpegmatch(jsonconverter,str)
104    end
105
106    function json.load(filename)
107        local data = io.loaddata(filename)
108        if data then
109            return lpegmatch(jsonconverter,data)
110        end
111    end
112
113end
114
115do
116
117    -- It's pretty bad that JSON doesn't allow the trailing comma ... it's a
118    -- typical example of a spec that then forces all generators to check for
119    -- this. It's a way to make sure programmers keep jobs.
120
121    local escaper
122
123    local f_start_hash      = formatters[         '%w{' ]
124    local f_start_array     = formatters[         '%w[' ]
125    local f_start_hash_new  = formatters[ "\n" .. '%w{' ]
126    local f_start_array_new = formatters[ "\n" .. '%w[' ]
127    local f_start_hash_key  = formatters[ "\n" .. '%w"%s" : {' ]
128    local f_start_array_key = formatters[ "\n" .. '%w"%s" : [' ]
129
130    local f_stop_hash       = formatters[ "\n" .. '%w}' ]
131    local f_stop_array      = formatters[ "\n" .. '%w]' ]
132
133    local f_key_val_seq     = formatters[ "\n" .. '%w"%s" : %s'    ]
134    local f_key_val_str     = formatters[ "\n" .. '%w"%s" : "%s"'  ]
135    local f_key_val_num     = f_key_val_seq
136    local f_key_val_yes     = formatters[ "\n" .. '%w"%s" : true'  ]
137    local f_key_val_nop     = formatters[ "\n" .. '%w"%s" : false' ]
138    local f_key_val_null    = formatters[ "\n" .. '%w"%s" : null'  ]
139
140    local f_val_num         = formatters[ "\n" .. '%w%s'    ]
141    local f_val_str         = formatters[ "\n" .. '%w"%s"'  ]
142    local f_val_yes         = formatters[ "\n" .. '%wtrue'  ]
143    local f_val_nop         = formatters[ "\n" .. '%wfalse' ]
144    local f_val_null        = formatters[ "\n" .. '%wnull'  ]
145    local f_val_empty       = formatters[ "\n" .. '%w{ }'  ]
146    local f_val_seq         = f_val_num
147
148    -- no empty tables because unknown if table or hash
149
150    local t = { }
151    local n = 0
152
153    local function is_simple_table(tt) -- also used in util-tab so maybe public
154        local l = #tt
155        if l > 0 then
156            for i=1,l do
157                if type(tt[i]) == "table" then
158                    return false
159                end
160            end
161            local nn = n
162            n = n + 1 t[n] = "[ "
163            for i=1,l do
164                if i > 1 then
165                    n = n + 1 t[n] = ", "
166                end
167                local v = tt[i]
168                local tv = type(v)
169                if tv == "number" then
170                    n = n + 1 t[n] = v
171                elseif tv == "string" then
172                    n = n + 1 t[n] = '"'
173                    n = n + 1 t[n] = lpegmatch(escaper,v) or v
174                    n = n + 1 t[n] = '"'
175                elseif tv == "boolean" then
176                    n = n + 1 t[n] = v and "true" or "false"
177                elseif v then
178                    n = n + 1 t[n] = tostring(v)
179                else
180                    n = n + 1 t[n] = "null"
181                end
182            end
183            n = n + 1 t[n] = " ]"
184            local s = concat(t,"",nn+1,n)
185            n = nn
186            return s
187        end
188        return false
189    end
190
191    local function tojsonpp(root,name,depth,level,size)
192        if root then
193            local indexed = size > 0
194            n = n + 1
195            if level == 0 then
196                if indexed then
197                    t[n] = f_start_array(depth)
198                else
199                    t[n] = f_start_hash(depth)
200                end
201            elseif name then
202                if tn == "string" then
203                    name = lpegmatch(escaper,name) or name
204                elseif tn ~= "number" then
205                    name = tostring(name)
206                end
207                if indexed then
208                    t[n] = f_start_array_key(depth,name)
209                else
210                    t[n] = f_start_hash_key(depth,name)
211                end
212            else
213                if indexed then
214                    t[n] = f_start_array_new(depth)
215                else
216                    t[n] = f_start_hash_new(depth)
217                end
218            end
219            depth = depth + 1
220            if indexed then -- indexed
221                for i=1,size do
222                    if i > 1 then
223                        n = n + 1 t[n] = ","
224                    end
225                    local v  = root[i]
226                    local tv = type(v)
227                    if tv == "number" then
228                        n = n + 1 t[n] = f_val_num(depth,v)
229                    elseif tv == "string" then
230                        v = lpegmatch(escaper,v) or v
231                        n = n + 1 t[n] = f_val_str(depth,v)
232                    elseif tv == "table" then
233                        if next(v) then
234                            local st = is_simple_table(v)
235                            if st then
236                                n = n + 1 t[n] = f_val_seq(depth,st)
237                            else
238                                tojsonpp(v,nil,depth,level+1,#v)
239                            end
240                        else
241                            n = n + 1
242                            t[n] = f_val_empty(depth)
243                        end
244                    elseif tv == "boolean" then
245                        n = n + 1
246                        if v then
247                            t[n] = f_val_yes(depth,v)
248                        else
249                            t[n] = f_val_nop(depth,v)
250                        end
251                    else
252                        n = n + 1
253                        t[n] = f_val_null(depth)
254                    end
255                end
256            elseif next(root) then
257                local sk = sortedkeys(root)
258                for i=1,#sk do
259                    if i > 1 then
260                        n = n + 1 t[n] = ","
261                    end
262                    local k  = sk[i]
263                    local v  = root[k]
264                    local tv = type(v)
265                    local tk = type(k)
266                    if tv == "number" then
267                        if tk == "number" then
268                            n = n + 1 t[n] = f_key_val_num(depth,k,v)
269                        elseif tk == "string" then
270                            k = lpegmatch(escaper,k) or k
271                            n = n + 1 t[n] = f_key_val_num(depth,k,v)
272                        end
273                    elseif tv == "string" then
274                        if tk == "number" then
275                            v = lpegmatch(escaper,v) or v
276                            n = n + 1 t[n] = f_key_val_str(depth,k,v)
277                        elseif tk == "string" then
278                            k = lpegmatch(escaper,k) or k
279                            v = lpegmatch(escaper,v) or v
280                            n = n + 1 t[n] = f_key_val_str(depth,k,v)
281                        elseif i > 1 then
282                            n = n - 1
283                        end
284                    elseif tv == "table" then
285                        local l = #v
286                        if l > 0 then
287                            local st = is_simple_table(v)
288                            if not st then
289                                tojsonpp(v,k,depth,level+1,l)
290                            elseif tk == "number" then
291                                n = n + 1 t[n] = f_key_val_seq(depth,k,st)
292                            elseif tk == "string" then
293                                k = lpegmatch(escaper,k) or k
294                                n = n + 1 t[n] = f_key_val_seq(depth,k,st)
295                            end
296                        elseif next(v) then
297                            tojsonpp(v,k,depth,level+1,0)
298                        elseif i > 1 then
299                            n = n - 1
300                            -- we don't know if we have a hash or string
301                        end
302                    elseif tv == "boolean" then
303                        if tk == "number" then
304                            n = n + 1
305                            if v then
306                                t[n] = f_key_val_yes(depth,k)
307                            else
308                                t[n] = f_key_val_nop(depth,k)
309                            end
310                        elseif tk == "string" then
311                            k = lpegmatch(escaper,k) or k
312                            n = n + 1
313                            if v then
314                                t[n] = f_key_val_yes(depth,k)
315                            else
316                                t[n] = f_key_val_nop(depth,k)
317                            end
318                        elseif i > 1 then
319                            n = n - 1
320                        end
321                    else
322                        if tk == "number" then
323                            n = n + 1
324                            t[n] = f_key_val_null(depth,k)
325                        elseif tk == "string" then
326                            k = lpegmatch(escaper,k) or k
327                            n = n + 1
328                            t[n] = f_key_val_null(depth,k)
329                        elseif i > 1 then
330                            n = n - 1
331                        end
332                    end
333                end
334            end
335            n = n + 1
336            if indexed then
337                t[n] = f_stop_array(depth-1)
338            else
339                t[n] = f_stop_hash(depth-1)
340            end
341        end
342    end
343
344    local function tojson(value,n)
345        local kind = type(value)
346        if kind == "table" then
347            local done = false
348            local size = #value
349            if size == 0 then
350                for k, v in next, value do
351                    if done then
352                     -- n = n + 1 ; t[n] = ","
353                        n = n + 1 ; t[n] = ',"'
354                    else
355                     -- n = n + 1 ; t[n] = "{"
356                        n = n + 1 ; t[n] = '{"'
357                        done = true
358                    end
359                    n = n + 1 ; t[n] = lpegmatch(escaper,k) or k
360                    n = n + 1 ; t[n] = '":'
361                    t, n = tojson(v,n)
362                end
363                if done then
364                    n = n + 1 ; t[n] = "}"
365                else
366                    n = n + 1 ; t[n] = "{}"
367                end
368            elseif size == 1 then
369                -- we can optimize for non tables
370                n = n + 1 ; t[n] = "["
371                t, n = tojson(value[1],n)
372                n = n + 1 ; t[n] = "]"
373            else
374                for i=1,size do
375                    if done then
376                        n = n + 1 ; t[n] = ","
377                    else
378                        n = n + 1 ; t[n] = "["
379                        done = true
380                    end
381                    t, n = tojson(value[i],n)
382                end
383                n = n + 1 ; t[n] = "]"
384            end
385        elseif kind == "string"  then
386            n = n + 1 ; t[n] = '"'
387            n = n + 1 ; t[n] = lpegmatch(escaper,value) or value
388            n = n + 1 ; t[n] = '"'
389        elseif kind == "number" then
390            n = n + 1 ; t[n] = value
391        elseif kind == "boolean" then
392            n = n + 1 ; t[n] = tostring(value)
393        else
394            n = n + 1 ; t[n] = "null"
395        end
396        return t, n
397    end
398
399    -- escaping keys can become an option
400
401    local function jsontostring(value,pretty)
402        -- todo optimize for non table
403        local kind = type(value)
404        if kind == "table" then
405            if not escaper then
406                local escapes = {
407                    ["\\"] = "\\u005C",
408                    ["\""] = "\\u0022",
409                }
410                for i=0,0x1F do
411                    escapes[utfchar(i)] = format("\\u%04X",i)
412                end
413                escaper = Cs( (
414                    (R('\0\x20') + S('\"\\')) / escapes
415                  + P(1)
416                )^1 )
417
418            end
419            -- local to the closure (saves wrapping and local functions)
420            t = { }
421            n = 0
422            if pretty then
423                tojsonpp(value,name,0,0,#value)
424                value = concat(t,"",1,n)
425            else
426                t, n = tojson(value,0)
427                value = concat(t,"",1,n)
428            end
429            t = nil
430            n = 0
431            return value
432        elseif kind == "string" or kind == "number" then
433            return lpegmatch(escaper,value) or value
434        else
435            return tostring(value)
436        end
437    end
438
439    json.tostring = jsontostring
440
441    function json.tojson(value)
442        return jsontostring(value,true)
443    end
444
445end
446
447local tmp = [[ { "t\nt t" : "foo bar", "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]]
448tmp = json.tolua(tmp)
449inspect(tmp)
450tmp = json.tostring(tmp,true)
451inspect(tmp)
452tmp = json.tolua(tmp)
453inspect(tmp)
454tmp = json.tostring(tmp)
455inspect(tmp)
456inspect(json.tostring(true))
457
458-- local s = [[\foo"bar"]]
459-- local j = json.tostring { s = s }
460-- local l = json.tolua(j)
461-- inspect(j)
462-- inspect(l)
463-- print(s==l.s)
464
465-- if not package.loaded.json then
466--     package.loaded.json = json
467-- end
468
469return json
470