mtx-wtoc.lua /size: 20 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['mtx-wtoc'] = {
2    version   = 1.001,
3    comment   = "a hack to avoid a dependency on cweb / web2c",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- This is a hack. When I have time and motivation I'll make a better version. Sorry
10-- for the mess. It's not an example of proper coding. It's also not that efficient.
11-- It is not general purpose too, just a helper for luametatex in order to not be
12-- dependent on installing the cweb infrastructure (which normally gets compiled as
13-- part of the complex tl build). Okay, we do have a dependency on luametatex as lua
14-- runner although this script can be easily turned into a pure lua variant (not
15-- needing mtxrun helpers). If really needed one could build luametatex without
16-- mplib and then do the first bootstrap, but there's always a c to start with
17-- anyway; only when mp.w cum suis get updated we need to convert.
18--
19-- The w files get converted to into in .25 seconds which is not that bad.
20
21-- @, @/ @| @# @+ @; @[ @]
22-- @.text @>(monospaced) | @:text @>(macro driven) | @= verbose@> | @! underlined @>| @t text @> (hbox) | @q ignored @>
23-- @^index@>
24-- @f text renderclass
25-- @s idem | @p idem | @& strip (spaces before) | @h
26-- @'char' (ascii code)
27-- @l nonascii
28-- @x @y @z changefile | @i webfile
29-- @* title.
30-- @  explanation (not ok ... stops at outer @
31--
32-- The comment option doesn't really work so one needs to do some manual work
33-- afterwards but I'll only use that when we move away from w files.
34
35local next = next
36local lower, find, gsub = string.lower, string.find, string.gsub
37local topattern = string.topattern
38local striplines = utilities.strings.striplines
39local concat = table.concat
40
41local P, R, S, C, Cs, Ct, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.Cc
42local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
43
44local newline    = lpegpatterns.newline
45local space      = lpegpatterns.space -- S(" \n\r\t\f\v")
46local restofline = (1-newline)^0
47
48local cweb = { }
49
50-- We have several ways to look at and filter the data so we have different
51-- lpegs. The output looks ugly but that is the whole idea I think as cweb.
52
53local report  = logs.reporter("cweb to normal c")
54local verbose = false
55-- local verbose = true
56
57-- common
58
59local p_beginofweb = P("@")
60local p_endofweb   = P("@>")
61local p_noweb      = (1-p_endofweb)^1
62local p_squote     = P("'")
63local p_period     = P(".")
64local p_noperiod   = (1-p_period)^1
65local p_spacing    = space^1
66local p_nospacing  = (1-space)^1
67local p_equal      = P("=")
68local p_noequal    = (1-p_equal)^1
69local p_rest       = P(1)
70local p_escape     = p_beginofweb * p_beginofweb
71local c_unescape   = p_escape / "@"
72local p_structure  = p_beginofweb * (S("*dc \t\n\r"))
73local p_content    = (p_escape + (1 - p_structure))^1
74local c_noweb      = C(p_noweb)
75local c_content    = C(p_content)
76local c_nospacing  = C(p_nospacing)
77local c_noperiod   = C(p_noperiod)
78
79local function clean(s)
80    s = lower(s)
81    s = gsub(s,"%s+"," ")
82    s = gsub(s,"%s+$","")
83    s = gsub(s,"%s*%.%.%.$","...")
84    return s
85end
86
87local cleanup do
88
89    local p_ignore_1  = S(",/|#+;[]sp&")
90    local p_ignore_2  = S("^.:=!tq") * p_noweb * p_endofweb
91    local p_ignore_3  = S("f") * p_spacing * p_nospacing * p_spacing * p_nospacing
92    local p_ignore_4  = p_squote * (1-p_squote)^0 * p_squote
93    local p_ignore_5  = S("l") * p_spacing * p_nospacing
94
95    local p_replace_1 = P("h") / "\n@<header goes here@>\n"
96    local p_replace_2 = (P("#") * space^0) / "\n"
97
98    local p_strip_1   = (newline * space^1) / "\n"
99
100    local p_whatever  = (
101        p_beginofweb / ""
102      * (
103            p_replace_1
104          + p_replace_2
105          + Cs(
106                p_ignore_1
107              + p_ignore_2
108              + p_ignore_3
109              + p_ignore_4
110              + p_ignore_5
111            ) / ""
112        )
113    )
114
115    local p_whatever =
116        (newline * space^1) / ""
117      * p_whatever
118      * (space^0 * newline) / "\n"
119      + p_whatever
120
121    local pattern = Cs ( (
122        p_escape
123      + p_whatever
124      + p_rest
125    )^1 )
126
127    cleanup = function(s)
128        return lpegmatch(pattern,s)
129    end
130
131end
132
133local finalize do
134
135    -- The reason why we need to strip leading spaces it that compilers complain about this:
136    --
137    -- if (what)
138    --   this;
139    --   that;
140    --
141    -- with the 'that' being confusingly indented. The fact that it has to be mentioned is of
142    -- course a side effect of compact c coding which can introduce 'errors'. Now, this
143    -- 'confusing' indentatoin is a side effect of
144    --
145    -- if (what)
146    --   this;
147    --   @<that@>;
148    --
149    --
150    -- or actually:
151    --
152    --   @<this is what@>;
153    --   this;
154    --   @<that@>;
155    --
156    -- which then lead to the conclusion that @<that@> should not be indented! But ... cweb
157    -- removes all leading spaces in lines, so that obscured the issue. Bad or not? It is
158    -- anyway a very strong argument for careful coding and matbe using some more { } in case
159    -- of snippets because web2c obscures some warnings!
160
161    ----- strip_display = (P("/*") * (1 - P("*/"))^1 * P("*/")) / " "
162    local strip_inline  = (P("//") * (1 - newline)^0)           / ""
163    local keep_inline   = P("//") * space^0 * P("fall through")
164
165    local strip_display = (P("/*") * (1 - P("*/"))^1 * P("*/"))
166
167    strip_display =
168        (newline * space^0 * strip_display * newline) / "\n"
169        + strip_display / " "
170
171    local strip_spaces  = (space^1 * newline)             / "\n"
172    ----- strip_lines   = (space^0 * newline * space^0)^3 / "\n\n"
173    local strip_lines   = newline * (space^0 * newline)^3 / "\n\n"
174
175    local strip_empties = newline/"" * newline * space^1 * P("}")
176                        + space^2 * P("}")   * (newline * space^0 * newline / "\n")
177                        + space^2 * R("AZ") * R("AZ","__","09")^1 * P(":") * (space^0 * newline * space^0 * newline / "\n")
178
179    local finalize_0 = Cs((c_unescape + p_rest)^0)
180    local finalize_1 = Cs((strip_display + keep_inline + strip_inline + c_unescape + p_rest)^0)
181    local finalize_2 = Cs((strip_lines                  + p_rest)^0)
182    local finalize_3 = Cs((c_unescape + strip_spaces    + p_rest)^1)
183    local finalize_4 = Cs((c_unescape + strip_empties   + p_rest)^1)
184
185    finalize = function(s,keepcomment)
186        s = keepcomment and lpegmatch(finalize_0,s) or lpegmatch(finalize_1,s)
187        s = lpegmatch(finalize_2,s)
188        s = lpegmatch(finalize_3,s)
189        s = lpegmatch(finalize_4,s)
190        -- maybe also empty lines after a LABEL:
191        return s
192    end
193
194end
195
196local function fixdefine(s)
197    s = finalize(s)
198    s = gsub(s,"[\n\r\t ]+$","")
199    s = gsub(s,"[\t ]*[\n\r]+"," \\\n")
200    return s
201end
202
203local function addcomment(c,s)
204    if c ~= "" then
205        c = striplines(c)
206        if find(c,"\n") then
207            c = "\n\n/*\n" .. c .. "\n*/\n\n"
208        else
209            c = "\n\n/* " .. c .. " */\n\n"
210        end
211        return c .. s
212    else
213        return s
214    end
215end
216
217do
218
219    local result = { }
220
221    local p_nothing   = Cc("")
222    local p_comment   = Cs(((p_beginofweb * (space + newline + P("*")))/"" * c_content)^1)
223                      + p_nothing
224
225    local p_title     = c_noperiod * (p_period/"")
226    local p_skipspace = newline + space
227    local c_skipspace = p_skipspace / ""
228    local c_title     = c_skipspace * p_title * c_skipspace * Cc("\n\n")
229    local c_obeyspace = p_skipspace / "\n\n"
230
231    local p_comment   = Cs( (
232                            ((p_beginofweb * p_skipspace)/""           * c_content)
233                          + ((p_beginofweb * P("*")^1   )/"" * c_title * c_content)
234                          + c_obeyspace
235                        )^1 )
236                      + p_nothing
237
238    local p_define    = C(p_beginofweb * P("d")) * Cs(Cc("# define ") * p_content)
239    local p_code      = C(p_beginofweb * P("c")) * c_content
240    local p_header    = C(p_beginofweb * P("(")) * c_noweb * C(p_endofweb * p_equal) * c_content
241    local p_snippet   = C(p_beginofweb * S("<")) * c_noweb * C(p_endofweb * p_equal) * c_content
242    local p_reference = C(p_beginofweb * S("<")) * c_noweb * C(p_endofweb          ) * #(1-p_equal)
243    local p_preset    =   p_beginofweb * S("<")  * c_noweb *   p_endofweb
244
245    local p_indent    = C(space^0)
246    local p_reference = p_indent * p_reference
247
248    local p_c_define  = p_comment * p_define
249    local p_c_code    = p_comment * p_code
250    local p_c_header  = p_comment * p_header
251    local p_c_snippet = p_comment * p_snippet
252
253    local p_n_define  = p_nothing * p_define
254    local p_n_code    = p_nothing * p_code
255    local p_n_header  = p_nothing * p_header
256    local p_n_snippet = p_nothing * p_snippet
257
258    local function preset(tag)
259        tag = clean(tag)
260        if find(tag,"%.%.%.$") then
261            result.dottags[tag] = false
262        end
263        result.alltags[tag] = tag
264    end
265
266    local p_preset = (p_preset / preset + p_rest)^1
267
268    -- we can have both definitions and references with trailing ... and this is imo
269    -- a rather error prone feature: i'd expect the definitions to be the expanded one
270    -- so that references can be shorter ... anyway, we're stuck with this (also with
271    -- inconsistent usage of "...", " ...", "... " and such.
272
273    local function getpresets(data)
274
275        local alltags = result.alltags
276        local dottags = result.dottags
277
278        lpegmatch(p_preset,data)
279
280        local list = table.keys(alltags)
281
282        table.sort(list,function(a,b)
283            a = gsub(a,"%.+$"," ") -- slow
284            b = gsub(b,"%.+$"," ") -- slow
285            return a < b
286        end)
287
288        for k, v in next, dottags do
289            local s = gsub(k,"%.%.%.$","")
290            local p = "^" .. topattern(s,false,"all")
291            for i=1,#list do
292                local a = list[i]
293                if a ~= k and find(a,p) then
294                    dottags[k] = true
295                    alltags[k] = a
296                end
297            end
298        end
299
300        for k, v in next, alltags do
301            local t = alltags[v]
302            if t then
303                alltags[k] = t
304            end
305        end
306
307    end
308
309    local function addsnippet(c,b,tag,e,s)
310        if c ~= "" then
311            s = addcomment(c,s)
312        end
313        local alltags  = result.alltags
314        local snippets = result.snippets
315        local tag  = clean(tag)
316        local name = alltags[tag]
317        if snippets[name] then
318            if verbose then
319                report("add snippet  : %s",name)
320            end
321            s = snippets[name] .. "\n" .. s
322        else
323            if verbose then
324                report("new snippet  : %s",name)
325            end
326            s = "/* snippet: " .. name .. " */\n" .. s
327        end
328        snippets[name] = s
329        result.nofsnippets = result.nofsnippets + 1
330        return ""
331    end
332
333    local function addheader(c,b,tag,e,s)
334        if c ~= "" then
335            s = addcomment(c,s)
336        end
337        local headers     = result.headers
338        local headerorder = result.headerorder
339        if headers[tag] then
340            if verbose then
341                report("add header   : %s",tag)
342            end
343            s = headers[tag] .. "\n" .. s
344        else
345            if verbose then
346                report("new header   : %s",tag)
347            end
348            headerorder[#headerorder+1] = tag
349        end
350        headers[tag] = s
351        result.nofheaders = result.nofheaders + 1
352        return ""
353    end
354
355    local function addcode(c,b,s)
356        if c ~= "" then
357            s = addcomment(c,s)
358        end
359        local nofcode = result.nofcode + 1
360        result.codes[nofcode] = s
361        result.nofcode = nofcode
362        return ""
363    end
364
365    local function adddefine(c,b,s)
366        s = fixdefine(s)
367        if c ~= "" then
368            s = addcomment(c,s)
369        end
370        nofdefines = result.nofdefines + 1
371        result.defines[nofdefines] = s
372        result.nofdefines = nofdefines
373        return ""
374    end
375
376    local p_n_collect_1 = Cs ( (
377        p_n_snippet / addsnippet
378      + p_n_header  / addheader
379      + p_rest
380    )^1 )
381
382    local p_n_collect_2 = Cs ( (
383        p_n_code   / addcode
384      + p_n_define / adddefine
385      + p_rest
386    )^1 )
387
388    local p_c_collect_1 = Cs ( (
389        p_c_snippet / addsnippet
390      + p_c_header  / addheader
391      + p_rest
392    )^1 )
393
394    local p_c_collect_2 = Cs ( (
395        p_c_code   / addcode
396      + p_c_define / adddefine
397      + p_rest
398    )^1 )
399
400    local function getcontent_1(data)
401        return lpegmatch(result.keepcomment and p_c_collect_1 or p_n_collect_1,data)
402    end
403
404    local function getcontent_2(data)
405        return lpegmatch(result.keepcomment and p_c_collect_2 or p_n_collect_2,data)
406    end
407
408 -- local function dereference(b,tag,e)
409    local function dereference(indent,b,tag,e)
410        local tag  = clean(tag)
411        local name = result.alltags[tag]
412        if name then
413            local data = result.snippets[name]
414            if data then
415                result.usedsnippets[name] = true
416                result.unresolved[name] = nil
417                result.nofresolved = result.nofresolved + 1
418                if verbose then
419                    report("resolved     : %s",tag)
420                end
421            --  return data
422                return indent .. string.gsub(data,"[\n\r]+","\n" .. indent)
423            elseif tag == "header goes here" then
424                return "@<header goes here@>"
425            else
426                result.nofunresolved = result.nofunresolved + 1
427                result.unresolved[name] = name
428                report("unresolved   : %s",tag)
429                return "\n/* unresolved: " .. tag .. " */\n"
430            end
431        else
432            report("fatal error  : invalid tag")
433            os.exit()
434        end
435    end
436
437    local p_resolve = Cs((p_reference / dereference + p_rest)^1)
438
439    local function resolve(data)
440        local iteration = 0
441        while true do
442            iteration = iteration + 1
443            if data == "" then
444                if verbose then
445                    report("warning      : empty code at iteration %i",iteration)
446                end
447                return data
448            else
449                local done = lpegmatch(p_resolve,data)
450                if not done then
451                    report("fatal error  : invalid code at iteration %i",iteration)
452                    os.exit()
453                elseif done == data then
454                    return done
455                else
456                    data = done
457                end
458            end
459        end
460        return data
461    end
462
463    local function patch(filename,data)
464        local patchfile = file.replacesuffix(filename,"patch.lua")
465        local patches   = table.load(patchfile)
466        if not patches then
467            patchfile = file.basename(patchfile)
468            patches   = table.load(patchfile)
469        end
470        if patches then
471            local action = patches.action
472            if type(action) == "function" then
473                if verbose then
474                    report("patching     : %s", filename)
475                end
476                data = action(data,report)
477            end
478        end
479        return data
480    end
481
482    function cweb.convert(filename,target)
483
484        statistics.starttiming(filename)
485
486        result = {
487            snippets      = { },
488            usedsnippets  = { },
489            alltags       = { },
490            dottags       = { },
491            headers       = { },
492            headerorder   = { },
493            defines       = { },
494            codes         = { },
495            unresolved    = { },
496            nofsnippets   = 0,
497            nofheaders    = 0,
498            nofdefines    = 0,
499            nofcode       = 0,
500            nofresolved   = 0,
501            nofunresolved = 0,
502
503         -- keepcomment   = true, - not okay but good enough for a rough initial
504
505        }
506
507        local data   = io.loaddata(filename)
508        local banner = '/* This file is generated by "mtxrun --script "mtx-wtoc.lua" from the metapost cweb files. */\n\n'
509
510        report("main file    : %s", filename)
511        report("main size    : %i bytes", #data)
512
513        data = patch(filename,data)
514        data = cleanup(data)
515
516        result.alltags["header goes here"] = clean("header goes here")
517
518        getpresets(data) -- into result
519
520        data = getcontent_1(data) -- into result
521        data = getcontent_2(data) -- into result
522
523        result.defines = concat(result.defines,"\n\n")
524        result.codes   = concat(result.codes,"\n\n")
525
526        result.snippets["header goes here"] = result.defines
527
528        result.codes = resolve(result.codes)
529        result.codes = finalize(result.codes,result.keepcomment)
530
531        for i=1,#result.headerorder do
532            local name = result.headerorder[i]
533            local code = result.headers[name]
534            report("found header : %s", name)
535            code = resolve(code)
536            code = finalize(code,result.keepcomment)
537            result.headers[name] = code
538        end
539
540        local fullname = file.join(target,file.addsuffix(file.nameonly(filename),"c"))
541
542        report("result file  : %s", fullname)
543        report("result size  : %i bytes", result.codes and #result.codes or 0)
544
545        if result.keepcomment then
546            report("unprocessed  : %i bytes", #data)
547            print(data)
548        end
549
550        io.savedata(fullname,banner .. result.codes)
551
552        -- save header files
553
554        for i=1,#result.headerorder do
555            local name = result.headerorder[i]
556            local code = result.headers[name]
557            local fullname = file.join(target,name)
558            report("extra file %i : %s", i, fullname)
559            report("extra size %i : %i bytes", i, #code)
560            io.savedata(fullname,banner .. code)
561        end
562
563        -- some statistics
564
565        report("nofsnippets  : %i", result.nofsnippets)
566        report("nofheaders   : %i", result.nofheaders)
567        report("nofdefines   : %i", result.nofdefines)
568        report("nofcode      : %i", result.nofcode)
569        report("nofresolved  : %i", result.nofresolved)
570        report("nofunresolved: %i", result.nofunresolved)
571
572        for tag in table.sortedhash(result.unresolved) do
573            report("fuzzy tag    : %s",tag)
574        end
575
576        for tag in table.sortedhash(result.snippets) do
577            if not result.usedsnippets[tag] then
578                report("unused tag   : %s",tag)
579            end
580        end
581
582        statistics.stoptiming(filename)
583
584        report("run time     : %s", statistics.elapsedtime(filename))
585
586    end
587
588end
589
590function cweb.convertfiles(source,target)
591
592    report("source path  : %s", source)
593    report("target path  : %s", target)
594
595    report()
596
597    local files = dir.glob(file.join(source,"*.w"))
598
599    statistics.starttiming(files)
600    for i=1,#files do
601        cweb.convert(files[i],target)
602        report()
603    end
604    statistics.stoptiming(files)
605
606    report("total time   : %s", statistics.elapsedtime(files))
607
608end
609
610-- We sort of hard code the files that we convert. In principle we can make a more
611-- general converter but I don't need to convert cweb files other than these. The
612-- converter tries to make the H/C files look kind of good so that I can expect then
613-- in (for instance) Visual Studio.
614
615local source = file.join(dir.current(),"../source/mp/mpw")
616local target = file.join(dir.current(),"../source/mp/mpc")
617
618-- local source = file.join("e:/luatex/luatex-experimental-export/source/texk/web2c/mplibdir/")
619-- local target = file.join("e:/luatex/luatex-experimental-export/source/texk/web2c")
620
621cweb.convertfiles(source,target)
622
623-- -- inefficient but good enough
624--
625-- local function strip(s)
626--
627--     local newline = lpeg.patterns.newline
628--     local spaces  = S(" \t")
629--
630--     local strip_comment  = (P("/*") * (1-P("*/"))^1 * P("*/")) / ""
631--     local strip_line     = (P("#line") * (1 - newline)^1 * newline * spaces^0) / ""
632--     local strip_spaces   = spaces^1 / " "
633--     local strip_trailing = (P("//") * (1 - newline)^0) / ""
634--     local strip_final    = (spaces^0 * P("\\") * spaces^0) / "" * newline
635--     local strip_lines    = (spaces^0 / "") * newline^1 * (spaces^0 / "") / "\n"
636--     local strip_weird    = (spaces + newline)^0 * (P("{") * (spaces + newline)^0 * P("}")) * (spaces + newline)^0 / "{}\n"
637--     local strip_singles  = (spaces^0 / "") * S("^`'\"&%|()[]#?!<>\\/{}=,.*+-;:") * (spaces^0 / "")
638--
639--     local pattern_1 = Cs ( (
640--         strip_singles +
641--         P(1)
642--     )^1 )
643--
644--     local pattern_2 = Cs ( (
645--         strip_weird +
646--         strip_comment +
647--         strip_line +
648--         strip_trailing +
649--         strip_lines +
650--         strip_final +
651--         strip_spaces +
652--         P(1)
653--     )^1 )
654--
655--     while true do
656--         local r = lpegmatch(pattern_1,s)
657--         local r = lpegmatch(pattern_2,r)
658--         if s == r then
659--             break
660--         else
661--             s = r
662--         end
663--     end
664--
665--     return s
666--
667-- end
668