toks-scn.lmt /size: 18 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['toks-scn'] = {
2    version   = 1.001,
3    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
4    copyright = "PRAGMA ADE / ConTeXt Development Team",
5    license   = "see context related readme files"
6}
7
8-- Writing this kind of code (and completing the newtoken code base) is fun. I did
9-- so with the brilliant film music from The Girl with the Dragon Tattoo running in a
10-- loop in the background (three cd's by Trent Reznor and Atticus Ross). An alien
11-- feeling helps with alien code.
12
13-- todo: more \let's at the tex end
14
15local type, next, tostring, tonumber = type, next, tostring, tonumber
16
17local formatters          = string.formatters
18local concat              = table.concat
19
20local scanners            = tokens.scanners
21local tokenbits           = tokens.bits
22
23local scanstring          = scanners.string
24local scanargument        = scanners.argument
25local scandelimited       = scanners.delimited
26local scanverbatim        = scanners.verbatim
27local scantokenlist       = scanners.tokenlist
28local scantoks            = scanners.toks
29local scaninteger         = scanners.integer
30local scancardinal        = scanners.cardinal
31local scannumber          = scanners.number
32local scanfloat           = scanners.float
33local scankeyword         = scanners.keyword
34local scankeywordcs       = scanners.keywordcs
35local scanword            = scanners.word
36local scanletters         = scanners.letters
37local scankey             = scanners.key
38local scancode            = scanners.code
39local scanboolean         = scanners.boolean
40local scandimen           = scanners.dimen
41local scanglue            = scanners.glue
42local scangluevalues      = scanners.gluevalues
43local scangluespec        = scanners.gluespec
44local scancsname          = scanners.csname
45local scanintegerargument = scanners.integerargument
46local scandimenargument   = scanners.dimenargument
47local scandetokened       = scanners.detokened
48
49local todimen             = number.todimen
50local toboolean           = toboolean
51
52local lpegmatch           = lpeg.match
53local p_unquoted          = lpeg.Cs(lpeg.patterns.unquoted)
54
55local trace_compile       = false  trackers.register("tokens.compile", function(v) trace_compile = v end)
56local report_compile      = logs.reporter("tokens","compile")
57local report_scan         = logs.reporter("tokens","scan")
58
59local open                = tokenbits.open
60local close               = tokenbits.close
61
62local function scanopen()
63    while true do
64        local c = scancode(open)
65        if c == 123 then -- {
66            return true
67     -- elseif c ~= 32 then
68        elseif not c then
69            return
70        end
71    end
72end
73
74local function scanclose()
75    while true do
76        local c = scancode(close)
77        if c == 125 then -- }
78            return true
79     -- elseif c ~= 32 then
80        elseif not c then
81            return
82        end
83    end
84end
85
86scanners.scanopen  = scanopen
87scanners.scanclose = scanclose
88
89local function scanlist()
90    local wrapped = scanopen()
91    local list    = { }
92    local size    = 0
93    while true do
94        local entry = scanstring()
95        if entry then
96            size = size + 1
97            list[size] = entry
98        else
99            break
100        end
101    end
102    if wrapped then
103        scanclose()
104    end
105    return list
106end
107
108local function scanconditional()
109    local kw = scanword()
110    if kw == "true" then
111        return true
112    end
113    if kw == "false" then
114        return false
115    end
116    local c = scaninteger()
117    if c then
118        return c == 0 -- with a conditional 0=true
119    end
120    return nil
121end
122
123local function scantable(t,data)
124    if not data then
125        data = { }
126    end
127    if t then
128        local wrapped = scanopen()
129        while true do
130            local key = scanword(true)
131            if key then
132                local get = t[key]
133                if get then
134                    data[key] = get()
135                else
136                    -- catch all we can get
137                end
138            else
139                break
140            end
141        end
142        if wrapped then
143            scanclose()
144        end
145    end
146    return data
147end
148
149function tokens.constant(s)
150    if type(s) == "string" then
151        return "'" .. s .. "'"
152    else
153        return s
154    end
155end
156
157scanners.list        = scanlist
158scanners.table       = scantable
159scanners.conditional = scanconditional
160
161function scanners.whd()
162    local width, height, depth
163    while true do
164        if scankeyword("width") then
165            width = scandimen()
166        elseif scankeyword("height") then
167            height = scandimen()
168        elseif scankeyword("depth") then
169            depth = scandimen()
170        else
171            break
172        end
173    end
174    if width or height or depth then
175        return width or 0, height or 0, depth or 0
176    else
177        -- we inherit
178    end
179end
180
181local l = utf.byte("[")
182local r = utf.byte("]")
183
184local function scanbracketed()
185    local s = scandelimited(l, r)
186    if s then
187        return s
188    else
189        local readstate = status.getreadstate()
190        report_scan("missing argument in line %i of %a", readstate.linenumber, readstate.filename)
191        return ""
192    end
193end
194
195local function scanoptional()
196    return scandelimited(l, r) or ""
197end
198
199local function scanbracketedasis()
200    return scandelimited(l, r, false)
201end
202
203local function scanargumentasis()
204    return scanargument(false)
205end
206
207local function scancsnameunchecked()
208    return scancsname(true)
209end
210
211scanners.bracketed       = scanbracketed
212scanners.optional        = scanoptional
213scanners.bracketedasis   = scanbracketedasis
214scanners.argumentasis    = scanargumentasis
215scanners.csnameunchecked = scancsnameunchecked
216
217--------.detokenize      = function() return scanners.tokenstring(false) end -- needs testing
218
219local shortcuts = {
220    tokens              = tokens,
221    bits                = tokenbits,
222    open                = open,
223    close               = close,
224    scanners            = scanners,
225    scanstring          = scanstring,
226    scanargument        = scanargument,
227    scantokenstring     = scanners.tokenstring,
228 -- scandetokenize      = scanners.detokenize,
229    scanverbatim        = scanverbatim,
230    scantokenlist       = scantokenlist,
231    scantoks            = scantoks,
232    scaninteger         = scaninteger,
233    scancardinal        = scancardinal,
234    scannumber          = scannumber,
235    scanfloat           = scanfloat,
236    scantable           = scantable, -- not directly useable
237    scankeyword         = scankeyword,
238    scankeywordcs       = scankeywordcs,
239    scanword            = scanword,
240    scanletters         = scanletters,
241 -- scankey             = scankey,
242    scancode            = scancode,
243    scanboolean         = scanboolean,
244    scanglue            = scanglue, -- list
245    scangluespec        = scangluespec,
246    scangluevalues      = scangluevalues,
247    scandimen           = scandimen,
248    scandimension       = scandimen,
249    scanbox             = scanners.box,
250    scanhbox            = scanners.hbox,
251    scanvbox            = scanners.vbox,
252    scanvtop            = scanners.vtop,
253    scanconditional     = scanconditional,
254    scanopen            = scanopen,
255    scanclose           = scanclose,
256    scanlist            = scanlist,
257    scancsname          = scancsname,
258    scancsnameunchecked = scancsnameunchecked,
259    scandelimited       = scandelimited, -- not directly useable
260    scanbracketed       = scanbracketed,
261    scanoptional        = scanoptional,
262    scanbracketedasis   = scanbracketedasis,
263    scanargumentasis    = scanargumentasis,
264    scanintegerargument = scanintegerargument,
265    scandimenargument   = scandimenargument,
266    scandetokened       = scandetokened,
267    todimen             = todimen,
268    tonumber            = tonumber,
269    tostring            = tostring,
270    toboolean           = toboolean,
271    inspect             = inspect,
272    report              = report_scan,
273    posit               = posit,
274}
275
276tokens.shortcuts = shortcuts
277
278local load = load
279local dump = string.dump
280
281local function loadstripped(code)
282     return load(code,nil,nil,shortcuts)
283  -- return load(dump(load(code),true),nil,nil,shortcuts)
284end
285
286tokens.converters = {
287    tonumber  = "tonumber",
288    tostring  = "tostring",
289    toboolean = "toboolean",
290    todimen   = "todimen",
291    toglue    = "todimen",
292}
293
294-- We could just pickup a keyword but then we really need to make sure that no number
295-- follows it when that is the assignment and adding an optional = defeats the gain
296-- in speed. Currently we have sources with no spaces (\startcontextdefinitioncode
297-- ...) so it fails there.
298--
299-- Another drawback is that we then need to use { } instead of ending with \relax (as
300-- we can do now) but that is no big deal. It's just that I then need to check the TeX
301-- end. More pain than gain and a bit risky too. Using scanletters works better, but
302-- the gain is only some 10 percent but if we don't have keywords with numbers it might
303-- make sense in the end, some day.
304
305local f_if           = formatters[    "  if scankeywordcs('%s') then data['%s'] = scan%s()"]
306local f_elseif       = formatters["  elseif scankeywordcs('%s') then data['%s'] = scan%s()"]
307
308----- f_if_x         = formatters[    "  if not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
309----- f_elseif_x     = formatters["  elseif not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
310
311----- f_if           = formatters["  local key = scanletters() if key == '' then break elseif key == '%s' then data['%s'] = scan%s()"]
312----- f_elseif       = formatters["  elseif key == '%s' then data['%s'] = scan%s()"]
313
314local f_local        = formatters["local scan%s = scanners.%s"]
315local f_scan         = formatters["scan%s()"]
316local f_shortcut     = formatters["local %s = scanners.converters.%s"]
317
318local f_if_c         = formatters[    "  if scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
319local f_elseif_c     = formatters["  elseif scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
320local f_scan_c       = formatters["%s(scan%s())"]
321
322-- see above
323
324----- f_if_c         = formatters["  local key = scanletters() if key == '' then break elseif key == '%s' then data['%s'] = %s(scan%s())"]
325----- f_elseif_c     = formatters["  elseif k == '%s' then data['%s'] = %s(scan%s())"]
326
327local f_any          = formatters["  else local key = scanword(true) if key then data[key] = scan%s() else break end end"]
328local f_any_c        = formatters["  else local key = scanword(true) if key then data[key] = %s(scan%s()) else break end end"]
329local s_done <const> = "  else break end"
330
331local f_any_all      = formatters["  local key = scanword(true) if key then data[key] = scan%s() else break end"]
332local f_any_all_c    = formatters["  local key = scanword(true) if key then data[key] = %s(scan%s()) else break end"]
333
334local f_table        = formatters["%\nt\nreturn function()\n  local data = { }\n%s\n  return %s\nend\n"]
335local f_sequence     = formatters["%\nt\n%\nt\n%\nt\nreturn function()\n    return %s\nend\n"]
336local f_singular     = formatters["%\nt\n%\nt\n\nreturn function(%s)\n    return %s\nend\n"]
337local f_simple       = formatters["%\nt\nreturn function()\n    return %s\nend\n"]
338local f_string       = formatters["%q"]
339local f_action_f     = formatters["action%s(%s)"]
340local f_action_s     = formatters["local action%s = tokens._action[%s]"]
341local f_nested       = formatters["local function scan%s()\n  local data = { }\n%s\n  return data\nend\n"]
342
343local f_check = formatters[ [[
344  local wrapped = scanopen()
345  while true do
346    ]] .. "%\nt\n" .. [[
347    %s
348  end
349  if wrapped then
350    scanclose()
351  end
352]] ]
353
354-- using these shortcuts saves temporary small tables (okay, it looks uglier)
355
356local presets = {
357    ["1 string" ] = { "string" },
358    ["2 strings"] = { "string", "string" },
359    ["3 strings"] = { "string", "string", "string" },
360    ["4 strings"] = { "string", "string", "string", "string" },
361    ["5 strings"] = { "string", "string", "string", "string", "string" },
362    ["6 strings"] = { "string", "string", "string", "string", "string", "string" },
363    ["7 strings"] = { "string", "string", "string", "string", "string", "string", "string" },
364    ["8 strings"] = { "string", "string", "string", "string", "string", "string", "string", "string" },
365
366    ["1 argument" ] = { "argument" },
367    ["2 arguments"] = { "argument", "argument" },
368    ["3 arguments"] = { "argument", "argument", "argument" },
369    ["4 arguments"] = { "argument", "argument", "argument", "argument" },
370
371    ["1 integer"]  = { "integer" },
372    ["2 integers"] = { "integer", "integer" },
373    ["3 integers"] = { "integer", "integer", "integer" },
374    ["4 integers"] = { "integer", "integer", "integer", "integer" },
375
376    ["1 optional"]  = { "optional" },
377    ["2 optionals"] = { "optional", "optional" },
378    ["3 optionals"] = { "optional", "optional", "optional" },
379    ["4 optionals"] = { "optional", "optional", "optional", "optional" },
380}
381
382tokens.presets = presets
383
384function tokens.compile(specification)
385    local f = { }
386    local n = 0
387    local c = { }
388    local t = specification.arguments or specification
389    local a = specification.actions or nil
390    if type(a) == "function" then
391        a = { a }
392    end
393    local code
394    local args
395    local function compile(t,nested)
396        local done = s_done
397        local r = { }
398        local m = 0
399        for i=1,#t do
400            local ti = t[i]
401            if ti == "*" and i == 1 then
402                done = f_any_all("string")
403            else
404                local t1 = ti[1]
405                local t2 = ti[2] or "string"
406                if type(t2) == "table" then
407                    n = n + 1
408                    f[n] = compile(t2,n)
409                    t2 = n
410                end
411                local t3 = ti[3]
412                if type(t3) == "function" then
413                    -- todo: also create shortcut
414                elseif t3 then
415                    c[t3] = f_shortcut(t3,t3)
416                    if t1 == "*" then
417                        if i == 1 then
418                            done = f_any_all_c(t3,t2)
419                            break
420                        else
421                            done = f_any_c(t3,t2)
422                        end
423                    else
424                        m = m + 1
425                        r[m] = (m > 1 and f_elseif_c or f_if_c)(t1,t1,t3,t2)
426                    end
427                else
428                    if t1 == "*" then
429                        if i == 1 then
430                            done = f_any_all(t2)
431                            break
432                        else
433                            done = f_any(t2)
434                        end
435                    else
436                        m = m + 1
437                        r[m] = (m > 1 and f_elseif   or f_if  )(t1,t1,t2)
438                     -- r[m] = (m > 1 and f_elseif_x or f_if_x)(t1,t1,t1,t2)
439                    end
440                end
441            end
442        end
443        local c = f_check(r,done)
444        if nested then
445            return f_nested(nested,c)
446        else
447            return c
448        end
449    end
450    local p = t and presets[t] -- already done in implement
451    if p then
452        t = p
453    end
454    local tt = type(t)
455    if tt == "string" then
456        if a then
457            local s = lpegmatch(p_unquoted,t)
458            if s and t ~= s then
459                code = t
460            else
461                code = f_scan(t)
462            end
463            tokens._action = a
464            for i=1,#a do
465                code = f_action_f(i,code)
466                n    = n + 1
467                f[n] = f_action_s(i,i)
468            end
469            code = f_simple(f,code)
470        else
471            return scanners[t]
472        end
473    elseif tt ~= "table" then
474        return
475    elseif #t == 1 then
476        local ti = t[1]
477        if type(ti) == "table" then
478            ti = compile(ti)
479            code = "data"
480            if a then
481                tokens._action = a
482                for i=1,#a do
483                    code = f_action_f(i,code)
484                    n    = n + 1
485                    f[n] = f_action_s(i,i)
486                end
487            end
488            code = f_table(f,ti,code)
489        elseif a then
490            code = f_scan(ti)
491            tokens._action = a
492            for i=1,#a do
493                code = f_action_f(i,code)
494                n    = n + 1
495                f[n] = f_action_s(i,i)
496            end
497            code = f_simple(f,code)
498        else
499            return scanners[ti]
500        end
501    elseif #t == 0 then
502        if specification.usage == "value" then
503            code = "b"
504            args = "_,b"
505        else
506            code = ""
507            args = ""
508        end
509        if a then
510            tokens._action = a
511            for i=1,#a do
512                code = f_action_f(i,code)
513                n    = n + 1
514                f[n] = f_action_s(i,i)
515            end
516        end
517        code = f_singular(c,f,args,code)
518    else
519        local r = { }
520        local p = { }
521        local m = 0
522        for i=1,#t do
523            local ti = t[i]
524            local tt = type(ti)
525            if tt == "table" then
526                if ti[1] == "_constant_" then
527                    local v = ti[2]
528                    if type(v) == "string" then
529                        r[i] = f_string(v)
530                    else
531                        r[i] = tostring(v)
532                    end
533                else
534                    m = m + 1
535                    p[m] = compile(ti,100+m)
536                    r[i] = f_scan(100+m)
537                end
538            elseif tt == "number" then
539                r[i] = tostring(ti)
540            elseif tt == "boolean" then
541                r[i] = tostring(ti)
542            else
543                local s = lpegmatch(p_unquoted,ti)
544                if s and ti ~= s then
545                    r[i] = ti -- a string, given as "'foo'" or '"foo"'
546                elseif scanners[ti] then
547                    r[i] = f_scan(ti)
548                else
549                    report_compile("unknown scanner %a",ti)
550                    r[i] = ti
551                end
552            end
553        end
554        code = concat(r,",")
555        if a then
556            tokens._action = a
557            for i=1,#a do
558                code = f_action_f(i,code)
559                n    = n + 1
560                f[n] = f_action_s(i,i)
561            end
562        end
563        code = f_sequence(c,f,p,code)
564    end
565    if not code then
566        return
567    end
568    if trace_compile then
569        report_compile("code: %s",code)
570    end
571    local code, message = loadstripped(code)
572    if code then
573        code = code() -- sets action
574    else
575        report_compile("error in code: %s",code)
576        report_compile("error message: %s",message)
577    end
578    if a then
579        tokens._action = nil
580    end
581    if code then
582        return code
583    end
584end
585
586-- local fetch = tokens.compile {
587--     "string",
588--     "string",
589--     {
590--         { "data",    "string" },
591--         { "tab",     "string" },
592--         { "method",  "string" },
593--         { "foo", {
594--             { "method", "integer" },
595--             { "compact", "number" },
596--             { "nature" },
597--             { "*" }, -- any key
598--         } },
599--         { "compact", "string", "tonumber" },
600--         { "nature",  "boolean" },
601--         { "escape",  "string" },
602--         { "escape"  },
603--     },
604--     "boolean",
605-- }
606--
607-- os.exit()
608