toks-scn.lua /size: 17 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['toks-scn'] = {
2    version   = 1.001,
3    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
4    copyright = "PRAGMA ADE / ConTeXt Development Team",
5    license   = "see context related readme files"
6}
7
8-- Writing this kind of code (and completing the newtoken code base) is fun. I did
9-- so with the brilliant film music from The Girl with the Dragon Tattoo running in a
10-- loop in the background (three cd's by Trent Reznor and Atticus Ross). An alien
11-- feeling helps with alien code.
12
13-- todo: more \let's at the tex end
14
15local type, next, tostring, tonumber = type, next, tostring, tonumber
16
17local formatters     = string.formatters
18local concat         = table.concat
19
20local scanners       = tokens.scanners
21local tokenbits      = tokens.bits
22
23local scanstring     = scanners.string
24local scanargument   = scanners.argument
25local scandelimited  = scanners.delimited  -- lmtx
26local scanverbatim   = scanners.verbatim
27local scantokenlist  = scanners.tokenlist
28local scantoks       = scanners.toks
29local scaninteger    = scanners.integer
30local scancardinal   = scanners.cardinal
31local scannumber     = scanners.number
32local scankeyword    = scanners.keyword
33local scankeywordcs  = scanners.keywordcs
34local scanword       = scanners.word
35local scanletters    = scanners.letters
36local scankey        = scanners.key
37local scancode       = scanners.code
38local scanboolean    = scanners.boolean
39local scandimen      = scanners.dimen
40local scanglue       = scanners.glue
41local scangluevalues = scanners.gluevalues
42local scangluespec   = scanners.gluespec
43local scancsname     = scanners.csname
44
45local todimen        = number.todimen
46local toboolean      = toboolean
47
48local lpegmatch      = lpeg.match
49local p_unquoted     = lpeg.Cs(lpeg.patterns.unquoted)
50
51local trace_compile  = false  trackers.register("tokens.compile", function(v) trace_compile = v end)
52local report_compile = logs.reporter("tokens","compile")
53local report_scan    = logs.reporter("tokens","scan")
54
55local open  = tokenbits.open
56local close = tokenbits.close
57
58local function scanopen()
59    while true do
60        local c = scancode(open)
61        if c == 123 then
62            return true
63     -- elseif c ~= 32 then
64        elseif not c then
65            return
66        end
67    end
68end
69
70local function scanclose()
71    while true do
72        local c = scancode(close)
73        if c == 125 then
74            return true
75     -- elseif c ~= 32 then
76        elseif not c then
77            return
78        end
79    end
80end
81
82scanners.scanopen  = scanopen
83scanners.scanclose = scanclose
84
85local function scanlist()
86    local wrapped = scanopen()
87    local list    = { }
88    local size    = 0
89    while true do
90        local entry = scanstring()
91        if entry then
92            size = size + 1
93            list[size] = entry
94        else
95            break
96        end
97    end
98    if wrapped then
99        scanclose()
100    end
101    return list
102end
103
104local function scanconditional()
105    local kw = scanword()
106    if kw == "true" then
107        return true
108    end
109    if kw == "false" then
110        return false
111    end
112    local c = scaninteger()
113    if c then
114        return c == 0 -- with a conditional 0=true
115    end
116    return nil
117end
118
119local function scantable(t,data)
120    if not data then
121        data = { }
122    end
123    if t then
124        local wrapped = scanopen()
125        while true do
126            local key = scanword(true)
127            if key then
128                local get = t[key]
129                if get then
130                    data[key] = get()
131                else
132                    -- catch all we can get
133                end
134            else
135                break
136            end
137        end
138        if wrapped then
139            scanclose()
140        end
141    end
142    return data
143end
144
145function tokens.constant(s)
146    if type(s) == "string" then
147        return "'" .. s .. "'"
148    else
149        return s
150    end
151end
152
153scanners.list        = scanlist
154scanners.table       = scantable
155scanners.conditional = scanconditional
156
157function scanners.whd()
158    local width, height, depth
159    while true do
160        if scankeyword("width") then
161            width = scandimen()
162        elseif scankeyword("height") then
163            height = scandimen()
164        elseif scankeyword("depth") then
165            depth = scandimen()
166        else
167            break
168        end
169    end
170    if width or height or depth then
171        return width or 0, height or 0, depth or 0
172    else
173        -- we inherit
174    end
175end
176
177-- begin lmtx
178
179local l = utf.byte("[")
180local r = utf.byte("]")
181
182local function scanbracketed()
183    local s = scandelimited(l, r)
184    if s then
185        return s
186    else
187        local readstate = status.getreadstate()
188        report_scan("missing argument in line %i of %a", readstate.linenumber, readstate.filename)
189        return ""
190    end
191end
192
193local function scanoptional()
194    return scandelimited(l, r) or ""
195end
196
197local function scanbracketedasis()
198    return scandelimited(l, r, false)
199end
200
201local function scanargumentasis()
202    return scanargument(false)
203end
204
205scanners.bracketed     = scanbracketed
206scanners.optional      = scanoptional
207scanners.bracketedasis = scanbracketedasis
208scanners.argumentasis  = scanargumentasis
209
210-- end lmtx
211
212local shortcuts = {
213    tokens            = tokens,
214    bits              = tokenbits,
215    open              = open,
216    close             = close,
217    scanners          = scanners,
218    scanstring        = scanstring,
219    scanargument      = scanargument,
220    scanverbatim      = scanverbatim,
221    scantokenlist     = scantokenlist,
222    scantoks          = scantoks,
223    scaninteger       = scaninteger,
224    scancardinal      = scancardinal,
225    scannumber        = scannumber,
226    scantable         = scantable, -- not directly useable
227    scankeyword       = scankeyword,
228    scankeywordcs     = scankeywordcs,
229    scanword          = scanword,
230    scanletters       = scanletters,
231 -- scankey           = scankey,
232    scancode          = scancode,
233    scanboolean       = scanboolean,
234    scanglue          = scanglue, -- list
235    scangluespec      = scangluespec,
236    scangluevalues    = scangluevalues,
237    scandimen         = scandimen,
238    scandimension     = scandimen,
239    scanbox           = scanners.box,
240    scanhbox          = scanners.hbox,
241    scanvbox          = scanners.vbox,
242    scanvtop          = scanners.vtop,
243    scanconditional   = scanconditional,
244    scanopen          = scanopen,
245    scanclose         = scanclose,
246    scanlist          = scanlist,
247    scancsname        = scancsname,
248    todimen           = todimen,
249    tonumber          = tonumber,
250    tostring          = tostring,
251    toboolean         = toboolean,
252    inspect           = inspect,
253    report            = report_scan,
254    -- lmtx
255    scandelimited     = scandelimited, -- not directly useable
256    scanbracketed     = scanbracketed,
257    scanoptional      = scanoptional,
258    scanbracketedasis = scanbracketedasis,
259    scanargumentasis  = scanargumentasis,
260}
261
262tokens.shortcuts = shortcuts
263
264local load = load
265local dump = string.dump
266
267local function loadstripped(code)
268     return load(code,nil,nil,shortcuts)
269  -- return load(dump(load(code),true),nil,nil,shortcuts)
270end
271
272tokens.converters = {
273    tonumber  = "tonumber",
274    tostring  = "tostring",
275    toboolean = "toboolean",
276    todimen   = "todimen",
277    toglue    = "todimen",
278}
279
280-- We could just pickup a keyword but then we really need to make sure that no number
281-- follows it when that is the assignment and adding an optional = defeats the gain
282-- in speed. Currently we have sources with no spaces (\startcontextdefinitioncode
283-- ...) so it fails there.
284--
285-- Another drawback is that we then need to use { } instead of ending with \relax (as
286-- we can do now) but that is no big deal. It's just that I then need to check the TeX
287-- end. More pain than gain and a bit risky too. Using scanletters works better, but
288-- the gain is only some 10 percent but if we don't have keywords with numbers it might
289-- make sense in the end, some day.
290
291local f_if       = formatters[    "  if scankeywordcs('%s') then data['%s'] = scan%s()"]
292local f_elseif   = formatters["  elseif scankeywordcs('%s') then data['%s'] = scan%s()"]
293
294----- f_if_x     = formatters[    "  if not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
295----- f_elseif_x = formatters["  elseif not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
296
297local f_local    = formatters["local scan%s = scanners.%s"]
298local f_scan     = formatters["scan%s()"]
299local f_shortcut = formatters["local %s = scanners.converters.%s"]
300
301local f_if_c     = formatters[    "  if scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
302local f_elseif_c = formatters["  elseif scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
303local f_scan_c   = formatters["%s(scan%s())"]
304
305-- see above
306
307local f_any      = formatters["  else local key = scanword(true) if key then data[key] = scan%s() else break end end"]
308local f_any_c    = formatters["  else local key = scanword(true) if key then data[key] = %s(scan%s()) else break end end"]
309local s_done     = "  else break end"
310
311local f_any_all  = formatters["  local key = scanword(true) if key then data[key] = scan%s() else break end"]
312local f_any_all_c= formatters["  local key = scanword(true) if key then data[key] = %s(scan%s()) else break end"]
313
314local f_table    = formatters["%\nt\nreturn function()\n  local data = { }\n%s\n  return %s\nend\n"]
315local f_sequence = formatters["%\nt\n%\nt\n%\nt\nreturn function()\n    return %s\nend\n"]
316local f_singular = formatters["%\nt\n%\nt\n\nreturn function(%s)\n    return %s\nend\n"]
317local f_simple   = formatters["%\nt\nreturn function()\n    return %s\nend\n"]
318local f_string   = formatters["%q"]
319local f_action_f = formatters["action%s(%s)"]
320local f_action_s = formatters["local action%s = tokens._action[%s]"]
321local f_nested   = formatters["local function scan%s()\n  local data = { }\n%s\n  return data\nend\n"]
322
323local f_check = formatters[ [[
324  local wrapped = scanopen()
325  while true do
326    ]] .. "%\nt\n" .. [[
327    %s
328  end
329  if wrapped then
330    scanclose()
331  end
332]] ]
333
334-- using these shortcuts saves temporary small tables (okay, it looks uglier)
335
336local presets = {
337    ["1 string" ] = { "string" },
338    ["2 strings"] = { "string", "string" },
339    ["3 strings"] = { "string", "string", "string" },
340    ["4 strings"] = { "string", "string", "string", "string" },
341    ["5 strings"] = { "string", "string", "string", "string", "string" },
342    ["6 strings"] = { "string", "string", "string", "string", "string", "string" },
343    ["7 strings"] = { "string", "string", "string", "string", "string", "string", "string" },
344    ["8 strings"] = { "string", "string", "string", "string", "string", "string", "string", "string" },
345
346    ["1 argument" ] = { "argument" },
347    ["2 arguments"] = { "argument", "argument" },
348    ["3 arguments"] = { "argument", "argument", "argument" },
349    ["4 arguments"] = { "argument", "argument", "argument", "argument" },
350}
351
352tokens.presets = presets
353
354function tokens.compile(specification)
355    local f = { }
356    local n = 0
357    local c = { }
358    local t = specification.arguments or specification
359    local a = specification.actions or nil
360    if type(a) == "function" then
361        a = { a }
362    end
363    local code
364    local args
365    local function compile(t,nested)
366        local done = s_done
367        local r = { }
368        local m = 0
369        for i=1,#t do
370            local ti = t[i]
371            if ti == "*" and i == 1 then
372                done = f_any_all("string")
373            else
374                local t1 = ti[1]
375                local t2 = ti[2] or "string"
376                if type(t2) == "table" then
377                    n = n + 1
378                    f[n] = compile(t2,n)
379                    t2 = n
380                end
381                local t3 = ti[3]
382                if type(t3) == "function" then
383                    -- todo: also create shortcut
384                elseif t3 then
385                    c[t3] = f_shortcut(t3,t3)
386                    if t1 == "*" then
387                        if i == 1 then
388                            done = f_any_all_c(t3,t2)
389                            break
390                        else
391                            done = f_any_c(t3,t2)
392                        end
393                    else
394                        m = m + 1
395                        r[m] = (m > 1 and f_elseif_c or f_if_c)(t1,t1,t3,t2)
396                    end
397                else
398                    if t1 == "*" then
399                        if i == 1 then
400                            done = f_any_all(t2)
401                            break
402                        else
403                            done = f_any(t2)
404                        end
405                    else
406                        m = m + 1
407                        r[m] = (m > 1 and f_elseif   or f_if  )(t1,t1,t2)
408                     -- r[m] = (m > 1 and f_elseif_x or f_if_x)(t1,t1,t1,t2)
409                    end
410                end
411            end
412        end
413        local c = f_check(r,done)
414        if nested then
415            return f_nested(nested,c)
416        else
417            return c
418        end
419    end
420    local p = t and presets[t] -- already done in implement
421    if p then
422        t = p
423    end
424    local tt = type(t)
425    if tt == "string" then
426        if a then
427            local s = lpegmatch(p_unquoted,t)
428            if s and t ~= s then
429                code = t
430            else
431                code = f_scan(t)
432            end
433            tokens._action = a
434            for i=1,#a do
435                code = f_action_f(i,code)
436                n    = n + 1
437                f[n] = f_action_s(i,i)
438            end
439            code = f_simple(f,code)
440        else
441            return scanners[t]
442        end
443    elseif tt ~= "table" then
444        return
445    elseif #t == 1 then
446        local ti = t[1]
447        if type(ti) == "table" then
448            ti = compile(ti)
449            code = "data"
450            if a then
451                tokens._action = a
452                for i=1,#a do
453                    code = f_action_f(i,code)
454                    n    = n + 1
455                    f[n] = f_action_s(i,i)
456                end
457            end
458            code = f_table(f,ti,code)
459        elseif a then
460            code = f_scan(ti)
461            tokens._action = a
462            for i=1,#a do
463                code = f_action_f(i,code)
464                n    = n + 1
465                f[n] = f_action_s(i,i)
466            end
467            code = f_simple(f,code)
468        else
469            return scanners[ti]
470        end
471    elseif #t == 0 then
472        if specification.usage == "value" then
473            code = "b"
474            args = "_,b"
475        else
476            code = ""
477            args = ""
478        end
479        if a then
480            tokens._action = a
481            for i=1,#a do
482                code = f_action_f(i,code)
483                n    = n + 1
484                f[n] = f_action_s(i,i)
485            end
486        end
487        code = f_singular(c,f,args,code)
488    else
489        local r = { }
490        local p = { }
491        local m = 0
492        for i=1,#t do
493            local ti = t[i]
494            local tt = type(ti)
495            if tt == "table" then
496                if ti[1] == "_constant_" then
497                    local v = ti[2]
498                    if type(v) == "string" then
499                        r[i] = f_string(v)
500                    else
501                        r[i] = tostring(v)
502                    end
503                else
504                    m = m + 1
505                    p[m] = compile(ti,100+m)
506                    r[i] = f_scan(100+m)
507                end
508            elseif tt == "number" then
509                r[i] = tostring(ti)
510            elseif tt == "boolean" then
511                r[i] = tostring(ti)
512            else
513                local s = lpegmatch(p_unquoted,ti)
514                if s and ti ~= s then
515                    r[i] = ti -- a string, given as "'foo'" or '"foo"'
516                elseif scanners[ti] then
517                    r[i] = f_scan(ti)
518                else
519                    report_compile("unknown scanner %a",ti)
520                    r[i] = ti
521                end
522            end
523        end
524        code = concat(r,",")
525        if a then
526            tokens._action = a
527            for i=1,#a do
528                code = f_action_f(i,code)
529                n    = n + 1
530                f[n] = f_action_s(i,i)
531            end
532        end
533        code = f_sequence(c,f,p,code)
534    end
535    if not code then
536        return
537    end
538    if trace_compile then
539        report_compile("code: %s",code)
540    end
541    local code, message = loadstripped(code)
542    if code then
543        code = code() -- sets action
544    else
545        report_compile("error in code: %s",code)
546        report_compile("error message: %s",message)
547    end
548    if a then
549        tokens._action = nil
550    end
551    if code then
552        return code
553    end
554end
555
556-- local fetch = tokens.compile {
557--     "string",
558--     "string",
559--     {
560--         { "data",    "string" },
561--         { "tab",     "string" },
562--         { "method",  "string" },
563--         { "foo", {
564--             { "method", "integer" },
565--             { "compact", "number" },
566--             { "nature" },
567--             { "*" }, -- any key
568--         } },
569--         { "compact", "string", "tonumber" },
570--         { "nature",  "boolean" },
571--         { "escape",  "string" },
572--         { "escape"  },
573--     },
574--     "boolean",
575-- }
576--
577-- os.exit()
578