toks-scn.lmt /size: 19 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['toks-scn'] = {
2    version   = 1.001,
3    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
4    copyright = "PRAGMA ADE / ConTeXt Development Team",
5    license   = "see context related readme files"
6}
7
8-- Writing this kind of code (and completing the newtoken code base) is fun. I did
9-- so with the brilliant film music from The Girl with the Dragon Tattoo running in a
10-- loop in the background (three cd's by Trent Reznor and Atticus Ross). An alien
11-- feeling helps with alien code.
12
13-- todo: more \let's at the tex end
14
15local type, next, tostring, tonumber, load = type, next, tostring, tonumber, load
16local dump = string.dump
17
18local formatters          = string.formatters
19local concat              = table.concat
20
21local scanners            = tokens.scanners
22local tokenbits           = tokens.bits
23
24local scanstring          = scanners.string
25local scanargument        = scanners.argument
26local scandelimited       = scanners.delimited
27local scanverbatim        = scanners.verbatim
28local scantokenlist       = scanners.tokenlist
29local scantoks            = scanners.toks
30local scaninteger         = scanners.integer
31local scancardinal        = scanners.cardinal
32local scannumber          = scanners.number
33local scanfloat           = scanners.float
34local scanposit           = scanners.posit
35local scankeyword         = scanners.keyword
36local scankeywordcs       = scanners.keywordcs
37local scanword            = scanners.word
38local scanletters         = scanners.letters
39local scankey             = scanners.key
40local scancode            = scanners.code
41local scanboolean         = scanners.boolean
42local scandimension       = scanners.dimension
43local scanglue            = scanners.glue
44local scangluevalues      = scanners.gluevalues
45local scangluespec        = scanners.gluespec
46local scancsname          = scanners.csname
47local scandetokened       = scanners.detokened
48
49local todimen             = number.todimen
50local toboolean           = toboolean
51
52local lpegmatch           = lpeg.match
53local p_unquoted          = lpeg.Cs(lpeg.patterns.unquoted)
54
55local trace_compile       = false  trackers.register("tokens.compile", function(v) trace_compile = v end)
56local report_compile      = logs.reporter("tokens","compile")
57local report_scan         = logs.reporter("tokens","scan")
58
59local open                = tokenbits.open
60local close               = tokenbits.close
61
62local function scanopen()
63    while true do
64        local c = scancode(open)
65        if c == 123 then -- {
66            return true
67     -- elseif c ~= 32 then
68        elseif not c then
69            return
70        end
71    end
72end
73
74local function scanclose()
75    while true do
76        local c = scancode(close)
77        if c == 125 then -- }
78            return true
79     -- elseif c ~= 32 then
80        elseif not c then
81            return
82        end
83    end
84end
85
86scanners.scanopen  = scanopen
87scanners.scanclose = scanclose
88
89local function scanlist()
90    local wrapped = scanopen()
91    local list    = { }
92    local size    = 0
93    while true do
94        local entry = scanstring()
95        if entry then
96            size = size + 1
97            list[size] = entry
98        else
99            break
100        end
101    end
102    if wrapped then
103        scanclose()
104    end
105    return list
106end
107
108local function scanconditional()
109    local kw = scanword()
110    if kw == "true" then
111        return true
112    end
113    if kw == "false" then
114        return false
115    end
116    local c = scaninteger()
117    if c then
118        return c == 0 -- with a conditional 0=true
119    end
120    return nil
121end
122
123local function scantable(t,data)
124    if not data then
125        data = { }
126    end
127    if t then
128        local wrapped = scanopen()
129        while true do
130            local key = scanword(true)
131            if key then
132                local get = t[key]
133                if get then
134                    data[key] = get()
135                else
136                    -- catch all we can get
137                end
138            else
139                break
140            end
141        end
142        if wrapped then
143            scanclose()
144        end
145    end
146    return data
147end
148
149function tokens.constant(s)
150    if type(s) == "string" then
151        return "'" .. s .. "'"
152    else
153        return s
154    end
155end
156
157scanners.list        = scanlist
158scanners.table       = scantable
159scanners.conditional = scanconditional
160
161function scanners.whd()
162    local width, height, depth
163    while true do
164        if scankeyword("width") then
165            width = scandimension()
166        elseif scankeyword("height") then
167            height = scandimension()
168        elseif scankeyword("depth") then
169            depth = scandimension()
170        else
171            break
172        end
173    end
174    if width or height or depth then
175        return width or 0, height or 0, depth or 0
176    else
177        -- we inherit
178    end
179end
180
181local l = utf.byte("[")
182local r = utf.byte("]")
183
184local function scanbracketed()
185    local s = scandelimited(l, r)
186    if s then
187        return s
188    else
189        local readstate = status.getreadstate()
190        report_scan("missing argument in line %i of %a", readstate.linenumber, readstate.filename)
191        return ""
192    end
193end
194
195local function scanoptional()
196    return scandelimited(l, r) or ""
197end
198
199local function scanbracketedasis()
200    return scandelimited(l, r, false)
201end
202
203local function scanargumentasis()
204    return scanargument(false)
205end
206
207local function scancsnameunchecked()
208    return scancsname(true)
209end
210
211scanners.bracketed       = scanbracketed
212scanners.optional        = scanoptional
213scanners.bracketedasis   = scanbracketedasis
214scanners.argumentasis    = scanargumentasis
215scanners.csnameunchecked = scancsnameunchecked
216
217local scanlua = function()
218    local s = load("return " .. scanstring())
219    if s then
220        return s()
221    end
222end
223
224scanners.lua = scanlua
225
226--------.detokenize      = function() return scanners.tokenstring(false) end -- needs testing
227
228local shortcuts = {
229    tokens              = tokens,
230    bits                = tokenbits,
231    open                = open,
232    close               = close,
233    scanners            = scanners,
234    scanstring          = scanstring,
235    scanargument        = scanargument,
236    scantokenstring     = scanners.tokenstring,
237 -- scandetokenize      = scanners.detokenize,
238    scanverbatim        = scanverbatim,
239    scantokenlist       = scantokenlist,
240    scantoks            = scantoks,
241    scaninteger         = scaninteger,
242    scancardinal        = scancardinal,
243    scannumber          = scannumber,
244    scanfloat           = scanfloat,
245    scanposit           = scanposit,
246    scantable           = scantable, -- not directly useable
247    scankeyword         = scankeyword,
248    scankeywordcs       = scankeywordcs,
249    scanword            = scanword,
250    scanletters         = scanletters,
251 -- scankey             = scankey,
252    scancode            = scancode,
253    scanboolean         = scanboolean,
254    scanglue            = scanglue, -- list
255    scangluespec        = scangluespec,
256    scangluevalues      = scangluevalues,
257    scandimension       = scandimension,
258    scanbox             = scanners.box,
259    scanhbox            = scanners.hbox,
260    scanvbox            = scanners.vbox,
261    scanvtop            = scanners.vtop,
262    scanconditional     = scanconditional,
263    scanopen            = scanopen,
264    scanclose           = scanclose,
265    scanlist            = scanlist,
266    scancsname          = scancsname,
267    scancsnameunchecked = scancsnameunchecked,
268    scandelimited       = scandelimited, -- not directly useable
269    scanbracketed       = scanbracketed,
270    scanoptional        = scanoptional,
271    scanbracketedasis   = scanbracketedasis,
272    scanargumentasis    = scanargumentasis,
273    scandetokened       = scandetokened,
274    scanlua             = scanlua,
275    todimen             = todimen,
276    tonumber            = tonumber,
277    tostring            = tostring,
278    toboolean           = toboolean,
279    inspect             = inspect,
280    report              = report_scan,
281    posit               = posit,
282
283    -- obsolete:
284
285    scandimen             = scandimension,
286    scanintegerargument   = scanners.integerargument,
287    scandimensionargument = scanners.dimensionargument,
288}
289
290tokens.shortcuts = shortcuts
291
292local function loadstripped(code)
293     return load(code,nil,nil,shortcuts)
294  -- return load(dump(load(code),true),nil,nil,shortcuts)
295end
296
297tokens.converters = {
298    tonumber  = "tonumber",
299    tostring  = "tostring",
300    toboolean = "toboolean",
301    todimen   = "todimen",
302    toglue    = "todimen",
303}
304
305-- We could just pickup a keyword but then we really need to make sure that no number
306-- follows it when that is the assignment and adding an optional = defeats the gain
307-- in speed. Currently we have sources with no spaces (\startcontextdefinitioncode
308-- ...) so it fails there.
309--
310-- Another drawback is that we then need to use { } instead of ending with \relax (as
311-- we can do now) but that is no big deal. It's just that I then need to check the TeX
312-- end. More pain than gain and a bit risky too. Using scanletters works better, but
313-- the gain is only some 10 percent but if we don't have keywords with numbers it might
314-- make sense in the end, some day.
315
316local f_if           = formatters[    "  if scankeywordcs('%s') then data['%s'] = scan%s()"]
317local f_elseif       = formatters["  elseif scankeywordcs('%s') then data['%s'] = scan%s()"]
318
319----- f_if_x         = formatters[    "  if not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
320----- f_elseif_x     = formatters["  elseif not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
321
322----- f_if           = formatters["  local key = scanletters() if key == '' then break elseif key == '%s' then data['%s'] = scan%s()"]
323----- f_elseif       = formatters["  elseif key == '%s' then data['%s'] = scan%s()"]
324
325local f_local        = formatters["local scan%s = scanners.%s"]
326local f_scan         = formatters["scan%s()"]
327local f_shortcut     = formatters["local %s = scanners.converters.%s"]
328
329local f_if_c         = formatters[    "  if scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
330local f_elseif_c     = formatters["  elseif scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
331local f_scan_c       = formatters["%s(scan%s())"]
332
333-- see above
334
335----- f_if_c         = formatters["  local key = scanletters() if key == '' then break elseif key == '%s' then data['%s'] = %s(scan%s())"]
336----- f_elseif_c     = formatters["  elseif k == '%s' then data['%s'] = %s(scan%s())"]
337
338local f_any          = formatters["  else local key = scanword(true) if key then data[key] = scan%s() else break end end"]
339local f_any_c        = formatters["  else local key = scanword(true) if key then data[key] = %s(scan%s()) else break end end"]
340local s_done         <const> = "  else break end"
341
342local f_any_all      = formatters["  local key = scanword(true) if key then data[key] = scan%s() else break end"]
343local f_any_all_c    = formatters["  local key = scanword(true) if key then data[key] = %s(scan%s()) else break end"]
344
345local f_table        = formatters["%\nt\nreturn function()\n  local data = { }\n%s\n  return %s\nend\n"]
346local f_sequence     = formatters["%\nt\n%\nt\n%\nt\nreturn function()\n    return %s\nend\n"]
347local f_singular     = formatters["%\nt\n%\nt\n\nreturn function(%s)\n    return %s\nend\n"]
348local f_simple       = formatters["%\nt\nreturn function()\n    return %s\nend\n"]
349local f_string       = formatters["%q"]
350local f_action_f     = formatters["action%s(%s)"]
351local f_action_s     = formatters["local action%s = tokens._action[%s]"]
352local f_nested       = formatters["local function scan%s()\n  local data = { }\n%s\n  return data\nend\n"]
353
354local f_check = formatters[ [[
355  local wrapped = scanopen()
356  while true do
357    ]] .. "%\nt\n" .. [[
358    %s
359  end
360  if wrapped then
361    scanclose()
362  end
363]] ]
364
365-- using these shortcuts saves temporary small tables (okay, it looks uglier)
366
367local presets = {
368    ["1 string" ] = { "string" },
369    ["2 strings"] = { "string", "string" },
370    ["3 strings"] = { "string", "string", "string" },
371    ["4 strings"] = { "string", "string", "string", "string" },
372    ["5 strings"] = { "string", "string", "string", "string", "string" },
373    ["6 strings"] = { "string", "string", "string", "string", "string", "string" },
374    ["7 strings"] = { "string", "string", "string", "string", "string", "string", "string" },
375    ["8 strings"] = { "string", "string", "string", "string", "string", "string", "string", "string" },
376
377    ["1 argument" ] = { "argument" },
378    ["2 arguments"] = { "argument", "argument" },
379    ["3 arguments"] = { "argument", "argument", "argument" },
380    ["4 arguments"] = { "argument", "argument", "argument", "argument" },
381
382    ["1 integer"]  = { "integer" },
383    ["2 integers"] = { "integer", "integer" },
384    ["3 integers"] = { "integer", "integer", "integer" },
385    ["4 integers"] = { "integer", "integer", "integer", "integer" },
386
387    ["1 optional"]  = { "optional" },
388    ["2 optionals"] = { "optional", "optional" },
389    ["3 optionals"] = { "optional", "optional", "optional" },
390    ["4 optionals"] = { "optional", "optional", "optional", "optional" },
391}
392
393tokens.presets = presets
394
395function tokens.compile(specification)
396    local f = { }
397    local n = 0
398    local c = { }
399    local t = specification.arguments or specification
400    local a = specification.actions or nil
401    if type(a) == "function" then
402        a = { a }
403    end
404    local code
405    local args
406    local function compile(t,nested)
407        local done = s_done
408        local r = { }
409        local m = 0
410        for i=1,#t do
411            local ti = t[i]
412            if ti == "*" and i == 1 then
413                done = f_any_all("string")
414            else
415                local t1 = ti[1]
416                local t2 = ti[2] or "string"
417                if type(t2) == "table" then
418                    n = n + 1
419                    f[n] = compile(t2,n)
420                    t2 = n
421                end
422                local t3 = ti[3]
423                if type(t3) == "function" then
424                    -- todo: also create shortcut
425                elseif t3 then
426                    c[t3] = f_shortcut(t3,t3)
427                    if t1 == "*" then
428                        if i == 1 then
429                            done = f_any_all_c(t3,t2)
430                            break
431                        else
432                            done = f_any_c(t3,t2)
433                        end
434                    else
435                        m = m + 1
436                        r[m] = (m > 1 and f_elseif_c or f_if_c)(t1,t1,t3,t2)
437                    end
438                else
439                    if t1 == "*" then
440                        if i == 1 then
441                            done = f_any_all(t2)
442                            break
443                        else
444                            done = f_any(t2)
445                        end
446                    else
447                        m = m + 1
448                        r[m] = (m > 1 and f_elseif   or f_if  )(t1,t1,t2)
449                     -- r[m] = (m > 1 and f_elseif_x or f_if_x)(t1,t1,t1,t2)
450                    end
451                end
452            end
453        end
454        local c = f_check(r,done)
455        if nested then
456            return f_nested(nested,c)
457        else
458            return c
459        end
460    end
461    local p = t and presets[t] -- already done in implement
462    if p then
463        t = p
464    end
465    local tt = type(t)
466    if tt == "string" then
467        if a then
468            local s = lpegmatch(p_unquoted,t)
469            if s and t ~= s then
470                code = t
471            else
472                code = f_scan(t)
473            end
474            tokens._action = a
475            for i=1,#a do
476                code = f_action_f(i,code)
477                n    = n + 1
478                f[n] = f_action_s(i,i)
479            end
480            code = f_simple(f,code)
481        else
482            return scanners[t]
483        end
484    elseif tt ~= "table" then
485        return
486    elseif #t == 1 then
487        local ti = t[1]
488        if type(ti) == "table" then
489            ti = compile(ti)
490            code = "data"
491            if a then
492                tokens._action = a
493                for i=1,#a do
494                    code = f_action_f(i,code)
495                    n    = n + 1
496                    f[n] = f_action_s(i,i)
497                end
498            end
499            code = f_table(f,ti,code)
500        elseif a then
501            code = f_scan(ti)
502            tokens._action = a
503            for i=1,#a do
504                code = f_action_f(i,code)
505                n    = n + 1
506                f[n] = f_action_s(i,i)
507            end
508            code = f_simple(f,code)
509        else
510            return scanners[ti]
511        end
512    elseif #t == 0 then
513        if specification.usage == "value" then
514            code = "b"
515            args = "_,b"
516        else
517            code = ""
518            args = ""
519        end
520        if a then
521            tokens._action = a
522            for i=1,#a do
523                code = f_action_f(i,code)
524                n    = n + 1
525                f[n] = f_action_s(i,i)
526            end
527        end
528        code = f_singular(c,f,args,code)
529    else
530        local r = { }
531        local p = { }
532        local m = 0
533        for i=1,#t do
534            local ti = t[i]
535            local tt = type(ti)
536            if tt == "table" then
537                if ti[1] == "_constant_" then
538                    local v = ti[2]
539                    if type(v) == "string" then
540                        r[i] = f_string(v)
541                    else
542                        r[i] = tostring(v)
543                    end
544                else
545                    m = m + 1
546                    p[m] = compile(ti,100+m)
547                    r[i] = f_scan(100+m)
548                end
549            elseif tt == "number" then
550                r[i] = tostring(ti)
551            elseif tt == "boolean" then
552                r[i] = tostring(ti)
553            else
554                local s = lpegmatch(p_unquoted,ti)
555                if s and ti ~= s then
556                    r[i] = ti -- a string, given as "'foo'" or '"foo"'
557                elseif scanners[ti] then
558                    r[i] = f_scan(ti)
559                else
560                    report_compile("unknown scanner %a",ti)
561                    r[i] = ti
562                end
563            end
564        end
565        code = concat(r,",")
566        if a then
567            tokens._action = a
568            for i=1,#a do
569                code = f_action_f(i,code)
570                n    = n + 1
571                f[n] = f_action_s(i,i)
572            end
573        end
574        code = f_sequence(c,f,p,code)
575    end
576    if not code then
577        return
578    end
579    if trace_compile then
580        report_compile("code: %s",code)
581    end
582    local code, message = loadstripped(code)
583    if code then
584        code = code() -- sets action
585    else
586        report_compile("error in code: %s",code)
587        report_compile("error message: %s",message)
588    end
589    if a then
590        tokens._action = nil
591    end
592    if code then
593        return code
594    end
595end
596
597-- local fetch = tokens.compile {
598--     "string",
599--     "string",
600--     {
601--         { "data",    "string" },
602--         { "tab",     "string" },
603--         { "method",  "string" },
604--         { "foo", {
605--             { "method", "integer" },
606--             { "compact", "number" },
607--             { "nature" },
608--             { "*" }, -- any key
609--         } },
610--         { "compact", "string", "tonumber" },
611--         { "nature",  "boolean" },
612--         { "escape",  "string" },
613--         { "escape"  },
614--     },
615--     "boolean",
616-- }
617--
618-- os.exit()
619