toks-scn.lua /size: 17 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['toks-scn'] = {
2    version   = 1.001,
3    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
4    copyright = "PRAGMA ADE / ConTeXt Development Team",
5    license   = "see context related readme files"
6}
7
8-- Writing this kind of code (and completing the newtoken code base) is fun. I did
9-- so with the brilliant film music from The Girl with the Dragon Tattoo running in a
10-- loop in the background (three cd's by Trent Reznor and Atticus Ross). An alien
11-- feeling helps with alien code.
12
13-- todo: more \let's at the tex end
14
15local type, next, tostring, tonumber = type, next, tostring, tonumber
16
17local formatters     = string.formatters
18local concat         = table.concat
19
20local scanners       = tokens.scanners
21local tokenbits      = tokens.bits
22
23local scanstring     = scanners.string
24local scanargument   = scanners.argument
25local scandelimited  = scanners.delimited  -- lmtx
26local scanverbatim   = scanners.verbatim
27local scantokenlist  = scanners.tokenlist
28local scantoks       = scanners.toks
29local scaninteger    = scanners.integer
30local scancardinal   = scanners.cardinal
31local scannumber     = scanners.number
32local scankeyword    = scanners.keyword
33local scankeywordcs  = scanners.keywordcs
34local scanword       = scanners.word
35local scanletters    = scanners.letters
36local scankey        = scanners.key
37local scancode       = scanners.code
38local scanboolean    = scanners.boolean
39local scandimen      = scanners.dimen
40local scanglue       = scanners.glue
41local scangluevalues = scanners.gluevalues
42local scangluespec   = scanners.gluespec
43local scancsname     = scanners.csname
44
45local todimen        = number.todimen
46local toboolean      = toboolean
47
48local lpegmatch      = lpeg.match
49local p_unquoted     = lpeg.Cs(lpeg.patterns.unquoted)
50
51local trace_compile  = false  trackers.register("tokens.compile", function(v) trace_compile = v end)
52local report_compile = logs.reporter("tokens","compile")
53local report_scan    = logs.reporter("tokens","scan")
54
55local open  = tokenbits.open
56local close = tokenbits.close
57
58local function scanopen()
59    while true do
60        local c = scancode(open)
61        if c == 123 then
62            return true
63     -- elseif c ~= 32 then
64        elseif not c then
65            return
66        end
67    end
68end
69
70local function scanclose()
71    while true do
72        local c = scancode(close)
73        if c == 125 then
74            return true
75     -- elseif c ~= 32 then
76        elseif not c then
77            return
78        end
79    end
80end
81
82scanners.scanopen  = scanopen
83scanners.scanclose = scanclose
84
85local function scanlist()
86    local wrapped = scanopen()
87    local list    = { }
88    local size    = 0
89    while true do
90        local entry = scanstring()
91        if entry then
92            size = size + 1
93            list[size] = entry
94        else
95            break
96        end
97    end
98    if wrapped then
99        scanclose()
100    end
101    return list
102end
103
104local function scanconditional()
105    local kw = scanword()
106    if kw == "true" then
107        return true
108    end
109    if kw == "false" then
110        return false
111    end
112    local c = scaninteger()
113    if c then
114        return c == 0 -- with a conditional 0=true
115    end
116    return nil
117end
118
119local function scantable(t,data)
120    if not data then
121        data = { }
122    end
123    if t then
124        local wrapped = scanopen()
125        while true do
126            local key = scanword(true)
127            if key then
128                local get = t[key]
129                if get then
130                    data[key] = get()
131                else
132                    -- catch all we can get
133                end
134            else
135                break
136            end
137        end
138        if wrapped then
139            scanclose()
140        end
141    end
142    return data
143end
144
145function tokens.constant(s)
146    if type(s) == "string" then
147        return "'" .. s .. "'"
148    else
149        return s
150    end
151end
152
153scanners.list        = scanlist
154scanners.table       = scantable
155scanners.conditional = scanconditional
156
157function scanners.whd()
158    local width, height, depth
159    while true do
160        if scankeyword("width") then
161            width = scandimen()
162        elseif scankeyword("height") then
163            height = scandimen()
164        elseif scankeyword("depth") then
165            depth = scandimen()
166        else
167            break
168        end
169    end
170    if width or height or depth then
171        return width or 0, height or 0, depth or 0
172    else
173        -- we inherit
174    end
175end
176
177-- begin lmtx
178
179local l = utf.byte("[")
180local r = utf.byte("]")
181
182local function scanbracketed()
183    local s = scandelimited(l, r)
184    if s then
185        return s
186    else
187        local readstate = status.getreadstate()
188        report_scan("missing argument in line %i of %a", readstate.linenumber, readstate.filename)
189        return ""
190    end
191end
192
193local function scanoptional()
194    return scandelimited(l, r) or ""
195end
196
197local function scanbracketedasis()
198    return scandelimited(l, r, false)
199end
200
201local function scanargumentasis()
202    return scanargument(false)
203end
204
205scanners.bracketed     = scanbracketed
206scanners.optional      = scanoptional
207scanners.bracketedasis = scanbracketedasis
208scanners.argumentasis  = scanargumentasis
209
210-- end lmtx
211
212local shortcuts = {
213    tokens            = tokens,
214    bits              = tokenbits,
215    open              = open,
216    close             = close,
217    scanners          = scanners,
218    scanstring        = scanstring,
219    scanargument      = scanargument,
220    scanverbatim      = scanverbatim,
221    scantokenlist     = scantokenlist,
222    scantoks          = scantoks,
223    scaninteger       = scaninteger,
224    scancardinal      = scancardinal,
225    scannumber        = scannumber,
226    scantable         = scantable, -- not directly useable
227    scankeyword       = scankeyword,
228    scankeywordcs     = scankeywordcs,
229    scanword          = scanword,
230    scanletters       = scanletters,
231 -- scankey           = scankey,
232    scancode          = scancode,
233    scanboolean       = scanboolean,
234    scanglue          = scanglue, -- list
235    scangluespec      = scangluespec,
236    scangluevalues    = scangluevalues,
237    scandimen         = scandimen,
238    scandimension     = scandimen,
239    scanbox           = scanners.box,
240    scanhbox          = scanners.hbox,
241    scanvbox          = scanners.vbox,
242    scanvtop          = scanners.vtop,
243    scanconditional   = scanconditional,
244    scanopen          = scanopen,
245    scanclose         = scanclose,
246    scanlist          = scanlist,
247    scancsname        = scancsname,
248    todimen           = todimen,
249    tonumber          = tonumber,
250    tostring          = tostring,
251    toboolean         = toboolean,
252    inspect           = inspect,
253    report            = report_scan,
254    -- lmtx
255    scandelimited     = scandelimited, -- not directly useable
256    scanbracketed     = scanbracketed,
257    scanoptional      = scanoptional,
258    scanbracketedasis = scanbracketedasis,
259    scanargumentasis  = scanargumentasis,
260}
261
262tokens.shortcuts = shortcuts
263
264local load = load
265local dump = string.dump
266
267local function loadstripped(code)
268     return load(code,nil,nil,shortcuts)
269  -- return load(dump(load(code),true),nil,nil,shortcuts)
270end
271
272tokens.converters = {
273    tonumber  = "tonumber",
274    tostring  = "tostring",
275    toboolean = "toboolean",
276    todimen   = "todimen",
277    toglue    = "todimen",
278}
279
280-- We could just pickup a keyword but then we really need to make sure that no number
281-- follows it when that is the assignment and adding an optional = defeats the gain
282-- in speed. Currently we have sources with no spaces (\startcontextdefinitioncode
283-- ...) so it fails there.
284--
285-- Another drawback is that we then need to use { } instead of ending with \relax (as
286-- we can do now) but that is no big deal. It's just that I then need to check the TeX
287-- end. More pain than gain and a bit risky too. Using scanletters works better, but
288-- the gain is only some 10 percent but if we don't have keywords with numbers it might
289-- make sense in the end, some day.
290
291local f_if       = formatters[    "  if scankeywordcs('%s') then data['%s'] = scan%s()"]
292local f_elseif   = formatters["  elseif scankeywordcs('%s') then data['%s'] = scan%s()"]
293
294----- f_if_x     = formatters[    "  if not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
295----- f_elseif_x = formatters["  elseif not data['%s'] and scankeywordcs('%s') then data['%s'] = scan%s()"]
296
297local f_local    = formatters["local scan%s = scanners.%s"]
298local f_scan     = formatters["scan%s()"]
299local f_shortcut = formatters["local %s = scanners.converters.%s"]
300
301local f_if_c     = formatters[    "  if scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
302local f_elseif_c = formatters["  elseif scankeywordcs('%s') then data['%s'] = %s(scan%s())"]
303local f_scan_c   = formatters["%s(scan%s())"]
304
305-- see above
306
307local f_any      = formatters["  else local key = scanword(true) if key then data[key] = scan%s() else break end end"]
308local f_any_c    = formatters["  else local key = scanword(true) if key then data[key] = %s(scan%s()) else break end end"]
309local s_done     = "  else break end"
310
311local f_any_all  = formatters["  local key = scanword(true) if key then data[key] = scan%s() else break end"]
312local f_any_all_c= formatters["  local key = scanword(true) if key then data[key] = %s(scan%s()) else break end"]
313
314local f_table    = formatters["%\nt\nreturn function()\n  local data = { }\n%s\n  return %s\nend\n"]
315local f_sequence = formatters["%\nt\n%\nt\n%\nt\nreturn function()\n    return %s\nend\n"]
316local f_singular = formatters["%\nt\n%\nt\n\nreturn function(%s)\n    return %s\nend\n"]
317local f_simple   = formatters["%\nt\nreturn function()\n    return %s\nend\n"]
318local f_string   = formatters["%q"]
319local f_action_f = formatters["action%s(%s)"]
320local f_action_s = formatters["local action%s = tokens._action[%s]"]
321local f_nested   = formatters["local function scan%s()\n  local data = { }\n%s\n  return data\nend\n"]
322
323local f_check = formatters[ [[
324  local wrapped = scanopen()
325  while true do
326    ]] .. "%\nt\n" .. [[
327    %s
328  end
329  if wrapped then
330    scanclose()
331  end
332]] ]
333
334-- using these shortcuts saves temporary small tables (okay, it looks uglier)
335
336local presets = {
337    ["1 string" ] = { "string" },
338    ["2 strings"] = { "string", "string" },
339    ["3 strings"] = { "string", "string", "string" },
340    ["4 strings"] = { "string", "string", "string", "string" },
341    ["5 strings"] = { "string", "string", "string", "string", "string" },
342    ["6 strings"] = { "string", "string", "string", "string", "string", "string" },
343    ["7 strings"] = { "string", "string", "string", "string", "string", "string", "string" },
344    ["8 strings"] = { "string", "string", "string", "string", "string", "string", "string", "string" },
345
346    ["1 argument" ] = { "argument" },
347    ["2 arguments"] = { "argument", "argument" },
348    ["3 arguments"] = { "argument", "argument", "argument" },
349    ["4 arguments"] = { "argument", "argument", "argument", "argument" },
350
351    ["1 integer"]  = { "integer" },
352    ["2 integers"] = { "integer", "integer" },
353    ["3 integers"] = { "integer", "integer", "integer" },
354    ["4 integers"] = { "integer", "integer", "integer", "integer" },
355}
356
357tokens.presets = presets
358
359function tokens.compile(specification)
360    local f = { }
361    local n = 0
362    local c = { }
363    local t = specification.arguments or specification
364    local a = specification.actions or nil
365    if type(a) == "function" then
366        a = { a }
367    end
368    local code
369    local args
370    local function compile(t,nested)
371        local done = s_done
372        local r = { }
373        local m = 0
374        for i=1,#t do
375            local ti = t[i]
376            if ti == "*" and i == 1 then
377                done = f_any_all("string")
378            else
379                local t1 = ti[1]
380                local t2 = ti[2] or "string"
381                if type(t2) == "table" then
382                    n = n + 1
383                    f[n] = compile(t2,n)
384                    t2 = n
385                end
386                local t3 = ti[3]
387                if type(t3) == "function" then
388                    -- todo: also create shortcut
389                elseif t3 then
390                    c[t3] = f_shortcut(t3,t3)
391                    if t1 == "*" then
392                        if i == 1 then
393                            done = f_any_all_c(t3,t2)
394                            break
395                        else
396                            done = f_any_c(t3,t2)
397                        end
398                    else
399                        m = m + 1
400                        r[m] = (m > 1 and f_elseif_c or f_if_c)(t1,t1,t3,t2)
401                    end
402                else
403                    if t1 == "*" then
404                        if i == 1 then
405                            done = f_any_all(t2)
406                            break
407                        else
408                            done = f_any(t2)
409                        end
410                    else
411                        m = m + 1
412                        r[m] = (m > 1 and f_elseif   or f_if  )(t1,t1,t2)
413                     -- r[m] = (m > 1 and f_elseif_x or f_if_x)(t1,t1,t1,t2)
414                    end
415                end
416            end
417        end
418        local c = f_check(r,done)
419        if nested then
420            return f_nested(nested,c)
421        else
422            return c
423        end
424    end
425    local p = t and presets[t] -- already done in implement
426    if p then
427        t = p
428    end
429    local tt = type(t)
430    if tt == "string" then
431        if a then
432            local s = lpegmatch(p_unquoted,t)
433            if s and t ~= s then
434                code = t
435            else
436                code = f_scan(t)
437            end
438            tokens._action = a
439            for i=1,#a do
440                code = f_action_f(i,code)
441                n    = n + 1
442                f[n] = f_action_s(i,i)
443            end
444            code = f_simple(f,code)
445        else
446            return scanners[t]
447        end
448    elseif tt ~= "table" then
449        return
450    elseif #t == 1 then
451        local ti = t[1]
452        if type(ti) == "table" then
453            ti = compile(ti)
454            code = "data"
455            if a then
456                tokens._action = a
457                for i=1,#a do
458                    code = f_action_f(i,code)
459                    n    = n + 1
460                    f[n] = f_action_s(i,i)
461                end
462            end
463            code = f_table(f,ti,code)
464        elseif a then
465            code = f_scan(ti)
466            tokens._action = a
467            for i=1,#a do
468                code = f_action_f(i,code)
469                n    = n + 1
470                f[n] = f_action_s(i,i)
471            end
472            code = f_simple(f,code)
473        else
474            return scanners[ti]
475        end
476    elseif #t == 0 then
477        if specification.usage == "value" then
478            code = "b"
479            args = "_,b"
480        else
481            code = ""
482            args = ""
483        end
484        if a then
485            tokens._action = a
486            for i=1,#a do
487                code = f_action_f(i,code)
488                n    = n + 1
489                f[n] = f_action_s(i,i)
490            end
491        end
492        code = f_singular(c,f,args,code)
493    else
494        local r = { }
495        local p = { }
496        local m = 0
497        for i=1,#t do
498            local ti = t[i]
499            local tt = type(ti)
500            if tt == "table" then
501                if ti[1] == "_constant_" then
502                    local v = ti[2]
503                    if type(v) == "string" then
504                        r[i] = f_string(v)
505                    else
506                        r[i] = tostring(v)
507                    end
508                else
509                    m = m + 1
510                    p[m] = compile(ti,100+m)
511                    r[i] = f_scan(100+m)
512                end
513            elseif tt == "number" then
514                r[i] = tostring(ti)
515            elseif tt == "boolean" then
516                r[i] = tostring(ti)
517            else
518                local s = lpegmatch(p_unquoted,ti)
519                if s and ti ~= s then
520                    r[i] = ti -- a string, given as "'foo'" or '"foo"'
521                elseif scanners[ti] then
522                    r[i] = f_scan(ti)
523                else
524                    report_compile("unknown scanner %a",ti)
525                    r[i] = ti
526                end
527            end
528        end
529        code = concat(r,",")
530        if a then
531            tokens._action = a
532            for i=1,#a do
533                code = f_action_f(i,code)
534                n    = n + 1
535                f[n] = f_action_s(i,i)
536            end
537        end
538        code = f_sequence(c,f,p,code)
539    end
540    if not code then
541        return
542    end
543    if trace_compile then
544        report_compile("code: %s",code)
545    end
546    local code, message = loadstripped(code)
547    if code then
548        code = code() -- sets action
549    else
550        report_compile("error in code: %s",code)
551        report_compile("error message: %s",message)
552    end
553    if a then
554        tokens._action = nil
555    end
556    if code then
557        return code
558    end
559end
560
561-- local fetch = tokens.compile {
562--     "string",
563--     "string",
564--     {
565--         { "data",    "string" },
566--         { "tab",     "string" },
567--         { "method",  "string" },
568--         { "foo", {
569--             { "method", "integer" },
570--             { "compact", "number" },
571--             { "nature" },
572--             { "*" }, -- any key
573--         } },
574--         { "compact", "string", "tonumber" },
575--         { "nature",  "boolean" },
576--         { "escape",  "string" },
577--         { "escape"  },
578--     },
579--     "boolean",
580-- }
581--
582-- os.exit()
583