back-exp.lmt /size: 102 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['back-exp'] = {
2    version   = 1.001,
3    comment   = "companion to back-exp.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Todo: share properties more with tagged pdf (or thge reverse)
10
11-- Because we run into the 200 local limit we quite some do .. end wrappers .. not always
12-- that nice but it has to be.
13
14-- Experiments demonstrated that mapping to <div> and classes is messy because we have to
15-- package attributes (some 30) into one set of (space seperatated but prefixed classes)
16-- which only makes things worse .. so if you want something else, use xslt to get there.
17
18-- language       -> only mainlanguage, local languages should happen through start/stoplanguage
19-- tocs/registers -> maybe add a stripper (i.e. just don't flush entries in final tree)
20-- footnotes      -> css 3
21-- bodyfont       -> in styles.css
22
23-- Because we need to look ahead we now always build a tree (this was optional in
24-- the beginning). The extra overhead in the frontend is neglectable.
25--
26-- We can optimize the code ... currently the overhead is some 10% for xml + html so
27-- there is no hurry.
28
29-- todo: move critital formatters out of functions
30-- todo: delay loading (apart from basic tag stuff)
31
32-- problem : too many local variables
33
34-- check setting __i__
35
36local next, type, tonumber = next, type, tonumber
37local sub, gsub, match = string.sub, string.gsub, string.match
38local validstring = string.valid
39local lpegmatch = lpeg.match
40local utfchar, utfvalues, utflen = utf.char, utf.values, utf.len
41local concat, merge, sort, setmetatableindex = table.concat, table.merge, table.sort, table.setmetatableindex
42local sortedhash, sortedkeys = table.sortedhash, table.sortedkeys
43local formatters = string.formatters
44local todimen = number.todimen
45local replacetemplate = utilities.templates.replace
46local settings_to_array = utilities.parsers.settings_to_array
47local settings_to_hash = utilities.parsers.settings_to_hash
48
49local addsuffix, joinfile, nameonly, basename, filesuffix = file.addsuffix, file.join, file.nameonly, file.basename, file.suffix
50
51local trace_export  = false  trackers.register  ("export.trace",         function(v) trace_export  = v end)
52local trace_spacing = false  trackers.register  ("export.trace.spacing", function(v) trace_spacing = v end)
53local trace_details = false  trackers.register  ("export.trace.details", function(v) trace_details = v end)
54
55local less_state    = false  directives.register("export.lessstate",     function(v) less_state    = v end)
56local show_comment  = true   directives.register("export.comment",       function(v) show_comment  = v end)
57
58-- maybe we will also support these:
59--
60-- local css_hyphens       = false  directives.register("export.css.hyphens",      function(v) css_hyphens      = v end)
61-- local css_textalign     = false  directives.register("export.css.textalign",    function(v) css_textalign    = v end)
62-- local css_bodyfontsize  = false  directives.register("export.css.bodyfontsize", function(v) css_bodyfontsize = v end)
63-- local css_textwidth     = false  directives.register("export.css.textwidth",    function(v) css_textwidth    = v end)
64
65local report_export     = logs.reporter("backend","export")
66
67local nodes             = nodes
68local attributes        = attributes
69
70local variables         = interfaces.variables
71local v_yes             = variables.yes
72local v_no              = variables.no
73local v_xml             = variables.xml
74local v_hidden          = variables.hidden
75
76local implement         = interfaces.implement
77
78local tasks             = nodes.tasks
79local fontchar          = fonts.hashes.characters
80local fontquads         = fonts.hashes.quads
81local languagenames     = languages.numbers
82
83local texgetcount       = tex.getcount
84
85local references        = structures.references
86local structurestags    = structures.tags
87local taglist           = structurestags.taglist
88local specifications    = structurestags.specifications
89local properties        = structurestags.properties
90local locatedtag        = structurestags.locatedtag
91
92structurestags.usewithcare = { }
93
94local starttiming       = statistics.starttiming
95local stoptiming        = statistics.stoptiming
96
97local characterdata     = characters.data
98local overloads         = fonts.mappings.overloads
99
100-- todo: more locals (and optimize)
101
102local exportversion     <const> = "0.35"
103local mathmlns          <const> = "http://www.w3.org/1998/Math/MathML"
104local contextns         <const> = "http://www.contextgarden.net/context/export" -- whatever suits
105local cssnamespaceurl   <const> = "@namespace context url('%namespace%') ;"
106local cssnamespace      <const> = "context|"
107----- cssnamespacenop   <const> = "/* no namespace */"
108
109local usecssnamespace   = false
110
111local nofcurrentcontent = 0 -- so we don't free (less garbage collection)
112local currentcontent    = { }
113local currentnesting    = nil
114local currentattribute  = nil
115local last              = nil
116local currentparagraph  = nil
117
118local noftextblocks     = 0
119
120----- hyphencode        = 0xAD
121local hyphen            = utfchar(0xAD) -- todo: also emdash etc
122local tagsplitter       = structurestags.patterns.splitter
123----- colonsplitter     = lpeg.splitat(":")
124----- dashsplitter      = lpeg.splitat("-")
125local threshold         = 65536
126local indexing          = false
127local keephyphens       = false
128local exportproperties  = false
129
130local finetuning        = { }
131
132local treestack         = { }
133local nesting           = { }
134local currentdepth      = 0
135
136local wrapups           = { }
137
138local tree              = { data = { }, fulltag == "root" } -- root
139local treehash          = { }
140local extras            = { }
141local checks            = { }
142local fixes             = { }
143local finalizers        = { }
144local nofbreaks         = 0
145local used              = { }
146local exporting         = false
147local restart           = false
148local specialspaces     = { [0x20] = " "  }               -- for conversion
149local somespace         = { [0x20] = true, [" "] = true } -- for testing
150local entities          = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;" }
151local attribentities    = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;", ['"'] = "quot;" }
152
153local p_entity          = lpeg.replacer(entities) -- was: entityremapper = utf.remapper(entities)
154local p_attribute       = lpeg.replacer(attribentities)
155local p_escaped         = lpeg.patterns.xml.escaped
156
157local f_tagid           = formatters["%s-%04i"]
158
159-- local alignmapping = {
160--     flushright = "right",
161--     middle     = "center",
162--     flushleft  = "left",
163-- }
164
165local defaultnature = "mixed" -- "inline"
166
167setmetatableindex(used, function(t,k)
168    if k then
169        local v = { }
170        t[k] = v
171        return v
172    end
173end)
174
175local f_entity    = formatters["&#x%X;"]
176local f_attribute = formatters[" %s=%q"]
177local f_property  = formatters[" %s%s=%q"]
178
179setmetatableindex(specialspaces, function(t,k)
180    local v = utfchar(k)
181    t[k] = v
182    entities[v] = f_entity(k)
183    somespace[k] = true
184    somespace[v] = true
185    return v
186end)
187
188
189local namespaced = {
190    -- filled on
191}
192
193local namespaces = {
194    msubsup     = "m",
195    msub        = "m",
196    msup        = "m",
197    mn          = "m",
198    mi          = "m",
199    ms          = "m",
200    mo          = "m",
201    mtext       = "m",
202    mrow        = "m",
203    mfrac       = "m",
204    mroot       = "m",
205    msqrt       = "m",
206    munderover  = "m",
207    munder      = "m",
208    mover       = "m",
209    merror      = "m",
210    math        = "m",
211    mrow        = "m",
212    mtable      = "m",
213    mtr         = "m",
214    mtd         = "m",
215    mfenced     = "m",
216    maction     = "m",
217    mspace      = "m",
218    -- only when testing
219    mstacker    = "m",
220    mstackertop = "m",
221    mstackermid = "m",
222    mstackerbot = "m",
223    mextensible = 'm',
224}
225
226setmetatableindex(namespaced, function(t,k)
227    if k then
228        local namespace = namespaces[k]
229        local v = namespace and namespace .. ":" .. k or k
230        t[k] = v
231        return v
232    end
233end)
234
235local function attribute(key,value)
236    if value and value ~= "" then
237        return f_attribute(key,lpegmatch(p_attribute,value))
238    else
239        return ""
240    end
241end
242
243local function setattribute(di,key,value,escaped)
244    if value and value ~= "" then
245        local a = di.attributes
246        if escaped then
247            value = lpegmatch(p_escaped,value)
248        end
249        if not a then
250            di.attributes = { [key] = value }
251        else
252            a[key] = value
253        end
254    end
255end
256
257local listdata = { } -- this has to be done otherwise: each element can just point back to ...
258
259function wrapups.hashlistdata()
260    local c = structures.lists.collected
261    for i=1,#c do
262        local ci = c[i]
263        local tag = ci.references.tag
264        if tag then
265            local m = ci.metadata
266            local t = m.kind .. ">" .. tag -- todo: use internal (see strc-lst.lua where it's set)
267            listdata[t] = ci
268        end
269    end
270end
271
272function structurestags.setattributehash(attr,key,value) -- public hash
273    local specification = taglist[attr]
274    if specification then
275        specification[key] = value
276    else
277        -- some kind of error
278    end
279end
280
281local usedstyles      = { }
282local usedimages      = { }
283local referencehash   = { } -- move ?
284local destinationhash = { } -- move ?
285
286structurestags.backend = {
287    setattribute    = setattribute,
288    extras          = extras,
289    checks          = checks,
290    fixes           = fixes,
291    listdata        = listdata,
292    finalizers      = finalizers,
293    usedstyles      = usedstyles,
294    usedimages      = usedimages,
295    referencehash   = referencehash,
296    destinationhash = destinationhash,
297}
298
299local namespacetemplate <const> = [[
300/* %what% for file %filename% */
301
302%cssnamespaceurl%
303]]
304
305do
306
307    -- experiment: styles and images
308    --
309    -- officially we should convert to bp but we round anyway
310
311    -- /* padding      : ; */
312    -- /* text-justify : inter-word ; */
313    -- /* text-align : justify ; */
314
315local documenttemplate <const> = [[
316document,
317%namespace%div.document {
318    font-size  : %size% !important ;
319    max-width  : %width% !important ;
320    text-align : %align% !important ;
321    hyphens    : %hyphens% !important ;
322}]]
323
324local styletemplate <const> = [[
325%element%[detail="%detail%"],
326%namespace%div.%element%.%detail% {
327    display      : inline ;
328    font-style   : %style% ;
329    font-variant : %variant% ;
330    font-weight  : %weight% ;
331    font-family  : %family% ;
332    color        : %color% ;
333}]]
334
335    local numbertoallign = {
336        [0] = "justify", ["0"] = "justify", [variables.normal    ] = "justify",
337              "right",   ["1"] = "right",   [variables.flushright] = "right",
338              "center",  ["2"] = "center",  [variables.middle    ] = "center",
339              "left",    ["3"] = "left",    [variables.flushleft ] = "left",
340    }
341
342    function wrapups.allusedstyles(filename)
343        local result = { replacetemplate(namespacetemplate, {
344            what            = "styles",
345            filename        = filename,
346            namespace       = contextns,
347         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or cssnamespacenop,
348            cssnamespaceurl = cssnamespaceurl,
349        },false,true) }
350        --
351        local bodyfont = finetuning.bodyfont
352        local width    = finetuning.width
353        local hyphen   = finetuning.hyphen
354        local align    = finetuning.align
355        --
356        if type(bodyfont) == "number" then
357            bodyfont = todimen(bodyfont)
358        else
359            bodyfont = "12pt"
360        end
361        if type(width) == "number" then
362            width = todimen(width) or "50em"
363        else
364            width = "50em"
365        end
366        if hyphen == v_yes then
367            hyphen = "manual"
368        else
369            hyphen = "inherited"
370        end
371        if align then
372            align = numbertoallign[align]
373        end
374        if not align then
375            align = hyphen and "justify" or "inherited"
376        end
377        --
378        result[#result+1] = replacetemplate(documenttemplate,{
379            size    = bodyfont,
380            width   = width,
381            align   = align,
382            hyphens = hyphen
383        })
384        --
385        local colorspecification = xml.css.colorspecification
386        local fontspecification  = xml.css.fontspecification
387        for element, details in sortedhash(usedstyles) do
388            for detail, data in sortedhash(details) do
389                local s = fontspecification(data.style)
390                local c = colorspecification(data.color)
391                detail = gsub(detail,"[^A-Za-z0-9]+","-")
392                result[#result+1] = replacetemplate(styletemplate,{
393                    namespace = usecssnamespace and cssnamespace or "",
394                    element   = element,
395                    detail    = detail,
396                    style     = s.style   or "inherit",
397                    variant   = s.variant or "inherit",
398                    weight    = s.weight  or "inherit",
399                    family    = s.family  or "inherit",
400                    color     = c         or "inherit",
401                    display   = s.display and "block" or nil,
402                })
403            end
404        end
405        return concat(result,"\n\n")
406    end
407
408end
409
410do
411
412local imagetemplate <const> = [[
413%element%[id="%id%"], %namespace%div.%element%[id="%id%"] {
414    display           : block ;
415    background-image  : url('%url%') ;
416    background-size   : 100%% auto ;
417    background-repeat : no-repeat ;
418    width             : %width% ;
419    height            : %height% ;
420}]]
421
422    local f_svgname = formatters["%s.svg"]
423    local f_svgpage = formatters["%s-page-%s.svg"]
424    local collected = { }
425
426    local function usedname(name,page)
427        if filesuffix(name) == "pdf" then
428            -- temp hack .. we will have a remapper
429            if page and page > 1 then
430                name = f_svgpage(nameonly(name),page)
431            else
432                name = f_svgname(nameonly(name))
433            end
434        end
435        local scheme = url.hasscheme(name)
436        if not scheme or scheme == "file" then
437            -- or can we just use the name ?
438            return joinfile("../images",basename(url.filename(name)))
439        else
440            return name
441        end
442    end
443
444    function wrapups.allusedimages(filename)
445        local result = { replacetemplate(namespacetemplate, {
446            what            = "images",
447            filename        = filename,
448            namespace       = contextns,
449         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
450            cssnamespaceurl = cssnamespaceurl,
451        },false,true) }
452        for element, details in sortedhash(usedimages) do
453            for detail, data in sortedhash(details) do
454                local name = data.name
455                local page = tonumber(data.page) or 1
456                local spec = {
457                    element   = element,
458                    id        = data.id,
459                    name      = name,
460                    page      = page,
461                    url       = usedname(name,page),
462                    width     = data.width,
463                    height    = data.height,
464                    used      = data.used,
465                    namespace = usecssnamespace and cssnamespace or "",
466                }
467                result[#result+1] = replacetemplate(imagetemplate,spec)
468                collected[detail] = spec
469            end
470        end
471        return concat(result,"\n\n")
472    end
473
474    function wrapups.uniqueusedimages() -- todo: combine these two
475        return collected
476    end
477
478end
479
480--
481
482properties.vspace = { export = "break",     nature = "display" }
483----------------- = { export = "pagebreak", nature = "display" }
484
485local function makebreaklist(list)
486    nofbreaks = nofbreaks + 1
487    local t = { }
488    local l = list and list.taglist
489    if l then
490        for i=1,#list do
491            t[i] = l[i]
492        end
493    end
494    t[#t+1] = "break>" .. nofbreaks -- maybe no number or 0
495    return { taglist = t }
496end
497
498local breakattributes = {
499    type = "collapse"
500}
501
502local function makebreaknode(attributes) -- maybe no fulltag
503    nofbreaks = nofbreaks + 1
504    return {
505        tg         = "break",
506        fulltag    = "break>" .. nofbreaks,
507        n          = nofbreaks,
508        element    = "break",
509        nature     = "display",
510        attributes = attributes or nil,
511     -- data       = { }, -- not needed
512     -- attribute  = 0, -- not needed
513     -- parnumber  = 0,
514    }
515end
516
517do
518
519    local fields = { "title", "subtitle", "author", "keywords", "url", "version" }
520
521    local ignoredelements = false
522
523    local function checkdocument(root)
524        local data = root.data
525        if data then
526            for i=1,#data do
527                local di = data[i]
528                local tg = di.tg
529                if tg == "noexport" then
530                    local s = specifications[di.fulltag]
531                    local u = s and s.userdata
532                    if u then
533                        local comment = u.comment
534                        if comment then
535                            di.element = "comment"
536                            di.data = { { content = comment } }
537                            u.comment = nil
538                        else
539                            data[i] = false
540                        end
541                    else
542                        data[i] = false
543                    end
544                elseif di.content then
545                    -- okay
546                elseif tg == "ignore" then
547                    di.element = ""
548                    checkdocument(di)
549                elseif ignoredelements and ignoredelements[tg] then
550                    di.element = ""
551                    checkdocument(di)
552                else
553                    checkdocument(di) -- new, else no noexport handling
554                end
555            end
556        end
557    end
558
559    function extras.document(di,element,n,fulltag)
560        setattribute(di,"language",languagenames[texgetcount("mainlanguagenumber")])
561        if not less_state then
562            setattribute(di,"file",tex.jobname)
563            setattribute(di,"date",os.fulltime())
564            setattribute(di,"context",environment.version)
565            setattribute(di,"version",exportversion)
566            setattribute(di,"xmlns:m",mathmlns)
567            local identity = interactions.general.getidentity()
568            for i=1,#fields do
569                local key   = fields[i]
570                local value = identity[key]
571                if value and value ~= "" then
572                    setattribute(di,key,value)
573                end
574            end
575        end
576        checkdocument(di)
577    end
578
579    implement {
580        name      = "ignoretagsinexport",
581        arguments = "string",
582        actions   = function(list)
583            for tag in string.gmatch(list,"[a-z]+") do
584                if ignoredelements then
585                    ignoredelements[tag] = true
586                else
587                    ignoredelements = { [tag] = true }
588                end
589            end
590        end,
591    }
592
593end
594
595-- flusher
596
597do
598
599    local f_detail                     = formatters[' detail="%s"']
600    local f_chain                      = formatters[' chain="%s"']
601    local f_index                      = formatters[' n="%s"']
602    local f_spacing                    = formatters['<c p="%s">%s</c>']
603
604    local f_empty_inline               = formatters["<%s/>"]
605    local f_empty_mixed                = formatters["%w<%s/>\n"]
606    local f_empty_display              = formatters["\n%w<%s/>\n"]
607    local f_empty_inline_attr          = formatters["<%s%s/>"]
608    local f_empty_mixed_attr           = formatters["%w<%s%s/>"]
609    local f_empty_display_attr         = formatters["\n%w<%s%s/>\n"]
610
611    local f_begin_inline               = formatters["<%s>"]
612    local f_begin_mixed                = formatters["%w<%s>"]
613    local f_begin_display              = formatters["\n%w<%s>\n"]
614    local f_begin_inline_attr          = formatters["<%s%s>"]
615    local f_begin_mixed_attr           = formatters["%w<%s%s>"]
616    local f_begin_display_attr         = formatters["\n%w<%s%s>\n"]
617
618    local f_end_inline                 = formatters["</%s>"]
619    local f_end_mixed                  = formatters["</%s>\n"]
620    local f_end_display                = formatters["%w</%s>\n"]
621
622    local f_begin_inline_comment       = formatters["<!-- %s --><%s>"]
623    local f_begin_mixed_comment        = formatters["%w<!-- %s --><%s>"]
624    local f_begin_display_comment      = formatters["\n%w<!-- %s -->\n%w<%s>\n"]
625    local f_begin_inline_attr_comment  = formatters["<!-- %s --><%s%s>"]
626    local f_begin_mixed_attr_comment   = formatters["%w<!-- %s --><%s%s>"]
627    local f_begin_display_attr_comment = formatters["\n%w<!-- %s -->\n%w<%s%s>\n"]
628
629    local f_comment_begin_inline       = formatters["<!-- begin %s -->"]
630    local f_comment_begin_mixed        = formatters["%w<!-- begin %s -->"]
631    local f_comment_begin_display      = formatters["\n%w<!-- begin %s -->\n"]
632
633    local f_comment_end_inline         = formatters["<!-- end %s -->"]
634    local f_comment_end_mixed          = formatters["<!-- end %s -->\n"]
635    local f_comment_end_display        = formatters["%w<!-- end %s -->\n"]
636
637    local f_metadata_begin             = formatters["\n%w<metadata>\n"]
638    local f_metadata                   = formatters["%w<metavariable name=%q>%s</metavariable>\n"]
639    local f_metadata_end               = formatters["%w</metadata>\n"]
640
641    local function attributes(a)
642        local r = { }
643        local n = 0
644        for k, v in next, a do
645            n = n + 1
646            r[n] = f_attribute(k,tostring(v)) -- tostring because of %q
647        end
648        sort(r)
649        return concat(r,"")
650    end
651
652    local function properties(a)
653        local r = { }
654        local n = 0
655        for k, v in next, a do
656            n = n + 1
657            r[n] = f_property(exportproperties,k,tostring(v)) -- tostring because of %q
658        end
659        sort(r)
660        return concat(r,"")
661    end
662
663    local depth  = 0
664    local inline = 0
665
666    local function emptytag(result,element,nature,di) -- currently only break but at some point
667        local a = di.attributes                       -- we might add detail etc
668        if a then -- happens seldom
669            if nature == "display" then
670                result[#result+1] = f_empty_display_attr(depth,namespaced[element],attributes(a))
671            elseif nature == "mixed" then
672                result[#result+1] = f_empty_mixed_attr(depth,namespaced[element],attributes(a))
673            else
674                result[#result+1] = f_empty_inline_attr(namespaced[element],attributes(a))
675            end
676        else
677            if nature == "display" then
678                result[#result+1] = f_empty_display(depth,namespaced[element])
679            elseif nature == "mixed" then
680                result[#result+1] = f_empty_mixed(depth,namespaced[element])
681            else
682                result[#result+1] = f_empty_inline(namespaced[element])
683            end
684        end
685    end
686
687 -- local function stripspaces(di)
688 --     local d = di.data
689 --     local n = #d
690 --     local m = 0
691 --     for i=1,n do
692 --         local di = d[i]
693 --         if di.tg then
694 --             m = m + 1
695 --             d[m] = di
696 --         end
697 --     end
698 --     for i=n,m+1,-1 do
699 --         d[i] = nil
700 --     end
701 -- end
702 --
703 -- -- simpler:
704
705    local function stripspaces(di)
706        local d = di.data
707        for i=1,#d do
708            local di = d[i]
709            if not di.tg then
710                di.content = ""
711            end
712        end
713    end
714
715    local function begintag(result,element,nature,di,skip)
716        local index         = di.n
717        local fulltag       = di.fulltag
718        local specification = specifications[fulltag] or { } -- we can have a dummy
719        local comment       = di.comment
720        local detail        = specification.detail
721        if skip == "comment" then
722            if show_comment then
723                if nature == "inline" or inline > 0 then
724                    result[#result+1] = f_comment_begin_inline(namespaced[element])
725                    inline = inline + 1
726                elseif nature == "mixed" then
727                    result[#result+1] = f_comment_begin_mixed(depth,namespaced[element])
728                    depth = depth + 1
729                    inline = 1
730                else
731                    result[#result+1] = f_comment_begin_display(depth,namespaced[element])
732                    depth = depth + 1
733                end
734            end
735        elseif skip then
736            -- ignore
737        else
738
739            local n = 0
740            local r = { } -- delay this
741            if detail then
742                detail = gsub(detail,"[^A-Za-z0-9]+","-")
743                specification.detail = detail -- we use it later in for the div
744                n = n + 1
745                r[n] = f_detail(detail)
746            end
747            local parents = specification.parents
748            if parents then
749                parents = gsub(parents,"[^A-Za-z0-9 ]+","-")
750                specification.parents = parents -- we use it later in for the div
751                n = n + 1
752                r[n] = f_chain(parents)
753            end
754            if indexing and index then
755                n = n + 1
756                r[n] = f_index(index)
757            end
758            --
759            local extra = extras[element]
760            if extra then
761                extra(di,element,index,fulltag)
762            end
763            --
764            if di.record then
765                stripspaces(di)
766            end
767            --
768            if exportproperties then
769                local p = specification.userdata
770                if not p then
771                    -- skip
772                elseif exportproperties == v_yes then
773                    n = n + 1
774                    r[n] = attributes(p)
775                else
776                    n = n + 1
777                    r[n] = properties(p)
778                end
779            end
780            local a = di.attributes
781            if a then
782                if trace_spacing then
783                    a.p = di.parnumber or 0
784                end
785                n = n + 1
786                r[n] = attributes(a)
787            elseif trace_spacing then
788                n = n + 1
789                r[n] = attributes { p = di.parnumber or 0 }
790            end
791            if n == 0 then
792                if nature == "inline" or inline > 0 then
793                    if show_comment and comment then
794                        result[#result+1] = f_begin_inline_comment(comment,namespaced[element])
795                    else
796                        result[#result+1] = f_begin_inline(namespaced[element])
797                    end
798                    inline = inline + 1
799                elseif nature == "mixed" then
800                    if show_comment and comment then
801                        result[#result+1] = f_begin_mixed_comment(depth,comment,namespaced[element])
802                    else
803                        result[#result+1] = f_begin_mixed(depth,namespaced[element])
804                    end
805                    depth = depth + 1
806                    inline = 1
807                else
808                    if show_comment and comment then
809                        result[#result+1] = f_begin_display_comment(depth,comment,depth,namespaced[element])
810                    else
811                        result[#result+1] = f_begin_display(depth,namespaced[element])
812                    end
813                    depth = depth + 1
814                end
815            else
816                r = concat(r,"",1,n)
817                if nature == "inline" or inline > 0 then
818                    if show_comment and comment then
819                        result[#result+1] = f_begin_inline_attr_comment(comment,namespaced[element],r)
820                    else
821                        result[#result+1] = f_begin_inline_attr(namespaced[element],r)
822                    end
823                    inline = inline + 1
824                elseif nature == "mixed" then
825                    if show_comment and comment then
826                        result[#result+1] = f_begin_mixed_attr_comment(depth,comment,namespaced[element],r)
827                    else
828                        result[#result+1] = f_begin_mixed_attr(depth,namespaced[element],r)
829                    end
830                    depth = depth + 1
831                    inline = 1
832                else
833                    if show_comment and comment then
834                        result[#result+1] = f_begin_display_attr_comment(depth,comment,depth,namespaced[element],r)
835                    else
836                        result[#result+1] = f_begin_display_attr(depth,namespaced[element],r)
837                    end
838                    depth = depth + 1
839                end
840            end
841        end
842        used[element][detail or ""] = { nature, specification.parents }  -- for template css
843        -- also in last else ?
844        local metadata = specification.metadata
845        if metadata and next(metadata) then
846            result[#result+1] = f_metadata_begin(depth)
847            for k, v in sortedhash(metadata) do
848                if v ~= "" then
849                    result[#result+1] = f_metadata(depth+1,k,lpegmatch(p_entity,v))
850                end
851            end
852            result[#result+1] = f_metadata_end(depth)
853        end
854    end
855
856    local function endtag(result,element,nature,di,skip)
857        if skip == "comment" then
858            if show_comment then
859                if nature == "display" and (inline == 0 or inline == 1) then
860                    depth = depth - 1
861                    result[#result+1] = f_comment_end_display(depth,namespaced[element])
862                    inline = 0
863                elseif nature == "mixed" and (inline == 0 or inline == 1) then
864                    depth = depth - 1
865                    result[#result+1] = f_comment_end_mixed(namespaced[element])
866                    inline = 0
867                else
868                    inline = inline - 1
869                    result[#result+1] = f_comment_end_inline(namespaced[element])
870                end
871            end
872        elseif skip then
873            -- ignore
874        else
875            if nature == "display" and (inline == 0 or inline == 1) then
876                depth = depth - 1
877                result[#result+1] = f_end_display(depth,namespaced[element])
878                inline = 0
879            elseif nature == "mixed" and (inline == 0 or inline == 1) then
880                depth = depth - 1
881                result[#result+1] = f_end_mixed(namespaced[element])
882                inline = 0
883            else
884                inline = inline - 1
885                result[#result+1] = f_end_inline(namespaced[element])
886            end
887        end
888    end
889
890    local function flushtree(result,data,nature)
891        local nofdata = #data
892        for i=1,nofdata do
893            local di = data[i]
894            if not di then -- hm, di can be string
895                -- whatever
896            else
897                local content = di.content
898             -- also optimize for content == "" : trace that first
899                if content then
900                    -- already has breaks
901                    local content = lpegmatch(p_entity,content)
902                    if i == nofdata and sub(content,-1) == "\n" then -- move check
903                        -- can be an end of line in par but can also be the last line
904                        if trace_spacing then
905                            result[#result+1] = f_spacing(di.parnumber or 0,sub(content,1,-2))
906                        else
907                            result[#result+1] = sub(content,1,-2)
908                        end
909                        result[#result+1] = " "
910                    else
911                        if trace_spacing then
912                            result[#result+1] = f_spacing(di.parnumber or 0,content)
913                        else
914                            result[#result+1] = content
915                        end
916                    end
917                elseif not di.collapsed then -- ignore collapsed data (is appended, reconstructed par)
918                    local element = di.element
919                    if not element then
920                        -- skip
921                    elseif element == "break" then -- or element == "pagebreak" -- todo: use empty flag
922                        emptytag(result,element,nature,di)
923                    elseif element == "mspace" then -- todo: use empty flag
924                        emptytag(result,element,nature,di)
925                    elseif element == "" or di.skip == "ignore" then
926                        -- skip
927                    else
928                        if di.before then
929                            flushtree(result,di.before,nature)
930                        end
931                        local natu = di.nature
932                        local skip = di.skip
933                        if di.breaknode then
934                            emptytag(result,"break","display",di)
935                        end
936                        begintag(result,element,natu,di,skip)
937                        flushtree(result,di.data,natu)
938                        endtag(result,element,natu,di,skip)
939                        if di.after then
940                            flushtree(result,di.after,nature)
941                        end
942                    end
943                else
944--                     local element = di.element
945--                     if element == "mspace" then -- todo: use empty flag
946--                         emptytag(result,element,nature,di)
947--                     end
948                end
949            end
950        end
951    end
952
953    local function breaktree(tree,parent,parentelement) -- also removes double breaks
954        local data = tree.data
955        if data then
956            local nofdata = #data
957            local prevelement
958            local prevnature
959            local prevparnumber
960            local newdata = { }
961            local nofnewdata = 0
962            for i=1,nofdata do
963                local di = data[i]
964                if not di then
965                    -- skip
966                elseif di.skip == "ignore" then
967                    -- skip (new)
968                elseif di.tg == "ignore" then
969                    -- skip (new)
970                elseif di.content then
971                    if di.samepar then
972                        prevparnumber = false
973                    else
974                        local parnumber = di.parnumber
975                        if prevnature == "inline" and prevparnumber and prevparnumber ~= parnumber then
976                            nofnewdata = nofnewdata + 1
977                            if trace_spacing then
978                                newdata[nofnewdata] = makebreaknode { type = "a", p = prevparnumber, n = parnumber }
979                            else
980                                newdata[nofnewdata] = makebreaknode()
981                            end
982                        end
983                        prevelement = nil
984                        prevparnumber = parnumber
985                    end
986                    prevnature = "inline"
987                    nofnewdata = nofnewdata + 1
988                    newdata[nofnewdata] = di
989                elseif not di.collapsed then
990                    local element = di.element
991                    if element == "break" then -- or element == "pagebreak"
992                        if prevelement == "break" then
993                            di.element = ""
994                        end
995                        prevelement = element
996                        prevnature = "display"
997                        nofnewdata = nofnewdata + 1
998                        newdata[nofnewdata] = di
999                    elseif element == "" or di.skip == "ignore" then
1000                        -- skip
1001                    else
1002                        if di.samepar then
1003                            prevnature    = "inline"
1004                            prevparnumber = false
1005                        else
1006                            local nature = di.nature
1007                            local parnumber = di.parnumber
1008                            if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
1009                                nofnewdata = nofnewdata + 1
1010                                if trace_spacing then
1011                                    newdata[nofnewdata] = makebreaknode { type = "b", p = prevparnumber, n = parnumber }
1012                                else
1013                                    newdata[nofnewdata] = makebreaknode()
1014                                end
1015                            end
1016                            prevnature = nature
1017                            prevparnumber = parnumber
1018                        end
1019                        prevelement = element
1020                        breaktree(di,tree,element)
1021                        nofnewdata = nofnewdata + 1
1022                        newdata[nofnewdata] = di
1023                    end
1024                else
1025                    if di.samepar then
1026                        prevnature    = "inline"
1027                        prevparnumber = false
1028                    else
1029                        local nature = di.nature
1030                        local parnumber = di.parnumber
1031                        if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
1032                            nofnewdata = nofnewdata + 1
1033                            if trace_spacing then
1034                                newdata[nofnewdata] = makebreaknode { type = "c", p = prevparnumber, n = parnumber }
1035                            else
1036                                newdata[nofnewdata] = makebreaknode()
1037                            end
1038                        end
1039                        prevnature = nature
1040                        prevparnumber = parnumber
1041                    end
1042                    nofnewdata = nofnewdata + 1
1043                    newdata[nofnewdata] = di
1044                end
1045            end
1046            tree.data = newdata
1047        end
1048    end
1049
1050    -- also tabulaterow reconstruction .. maybe better as a checker
1051    -- i.e cell attribute
1052
1053    local function showtree(data,when,where)
1054        if data then
1055            for i=1,#data do
1056                local d = data[i]
1057                if type(d) == "table" and d.element then
1058                    print(when,where,i,d.element,d.parnumber or 0)
1059                end
1060            end
1061        end
1062    end
1063
1064    local function collapsetree(tree)
1065     -- showtree(data,"before","collapse")
1066     -- for tag, trees in sortedhash(treehash) do
1067        for tag, trees in next, treehash do
1068            local d = trees[1].data
1069            if d then
1070                local nd = #d
1071                if nd > 0 then
1072                    for i=2,#trees do
1073                        local currenttree = trees[i]
1074                        local currentdata = currenttree.data
1075                        local currentpar  = currenttree.parnumber
1076                        local previouspar = trees[i-1].parnumber
1077                        currenttree.collapsed = true
1078                        -- is the next ok?
1079                        if previouspar == 0 or not (di and di.content) then
1080                            previouspar = nil -- no need anyway so no further testing needed
1081                        end
1082                        for j=1,#currentdata do
1083                            local cd = currentdata[j]
1084                            if not cd or cd == "" then
1085                                -- skip
1086                            elseif cd.skip == "ignore" then
1087                                -- skip
1088                            elseif cd.content then
1089                                if not currentpar then
1090                                    -- add space ?
1091                                elseif not previouspar then
1092                                    -- add space ?
1093                                elseif currentpar ~= previouspar then
1094                                    nd = nd + 1
1095                                    if trace_spacing then
1096                                        d[nd] = makebreaknode { type = "d", p = previouspar, n = currentpar }
1097                                    else
1098                                        d[nd] = makebreaknode()
1099                                    end
1100                                end
1101                                previouspar = currentpar
1102                                nd = nd + 1
1103                                d[nd] = cd
1104                            else
1105                                nd = nd + 1
1106                                d[nd] = cd
1107                            end
1108                            currentdata[j] = false
1109                        end
1110                    end
1111                end
1112            end
1113        end
1114     -- showtree(data,"after","collapse")
1115    end
1116
1117    local function finalizetree(tree)
1118     -- showtree(data,"before","finalize")
1119        for _, finalizer in next, finalizers do
1120            finalizer(tree)
1121        end
1122     -- showtree(data,"after","finalize")
1123    end
1124
1125    local function indextree(tree)
1126        local data = tree.data
1127        if data then
1128         -- showtree(data,"before","index")
1129            local n, new = 0, { }
1130            for i=1,#data do
1131                local d = data[i]
1132                if not d then
1133                    -- skip
1134                elseif d.content then
1135                    n = n + 1
1136                    new[n] = d
1137                elseif not d.collapsed then
1138                    n = n + 1
1139                    d.__i__ = n
1140                    d.__p__ = tree
1141                    indextree(d)
1142                    new[n] = d
1143                end
1144            end
1145            tree.data = new
1146         -- showtree(new,"after","index")
1147        end
1148    end
1149
1150    local function checktree(tree)
1151        local data = tree.data
1152        if data then
1153         -- showtree(data,"before","check")
1154            for i=1,#data do
1155                local d = data[i]
1156                if type(d) == "table" then
1157                    local tg = d.tg
1158                    if tg then
1159                        local check = checks[tg]
1160                        if check then
1161                            check(d,data,i)
1162                        end
1163                    end
1164                    checktree(d) -- so parts can pass twice
1165                end
1166            end
1167         -- showtree(data,"after","check")
1168        end
1169    end
1170
1171    local function fixtree(tree)
1172        local data = tree.data
1173        if data then
1174         -- showtree(data,"before","fix")
1175            for i=1,#data do
1176                local d = data[i]
1177                if type(d) == "table" then
1178                    local tg = d.tg
1179                    if tg then
1180                        local fix = fixes[tg]
1181                        if fix then
1182                            fix(d,data,i)
1183                        end
1184                    end
1185                    fixtree(d) -- so parts can pass twice
1186                end
1187            end
1188         -- showtree(data,"after","fix")
1189        end
1190    end
1191
1192    wrapups.flushtree    = flushtree
1193    wrapups.breaktree    = breaktree
1194    wrapups.collapsetree = collapsetree
1195    wrapups.finalizetree = finalizetree
1196    wrapups.indextree    = indextree
1197    wrapups.checktree    = checktree
1198    wrapups.fixtree      = fixtree
1199
1200end
1201
1202-- collector code
1203
1204local function push(fulltag,depth)
1205    local tg, n, detail, element, nature, record
1206    local specification = specifications[fulltag]
1207    if specification then
1208        tg     = specification.tagname
1209        n      = specification.tagindex
1210        detail = specification.detail
1211    else
1212        -- a break (more efficient if we don't store those in specifications)
1213        tg, n = lpegmatch(tagsplitter,fulltag)
1214        n = tonumber(n) -- to tonumber in tagsplitter
1215    end
1216    local p = properties[tg]
1217    if p then
1218        element = p.export or tg
1219        nature  = p.nature or "inline" -- defaultnature
1220        record  = p.record
1221    end
1222    local treedata = tree.data
1223    local t = { -- maybe we can use the tag table
1224        tg        = tg,
1225        fulltag   = fulltag,
1226        detail    = detail,
1227        n         = n, -- already a number
1228        element   = element,
1229        nature    = nature,
1230        data      = { },
1231        attribute = currentattribute,
1232        parnumber = currentparagraph,
1233        record    = record, -- we can consider storing properties
1234    }
1235    treedata[#treedata+1] = t
1236    currentdepth = currentdepth + 1
1237    nesting[currentdepth] = fulltag
1238    treestack[currentdepth] = tree
1239    if trace_export then
1240        if detail and detail ~= "" then
1241            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q detail=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata,detail)
1242        else
1243            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata)
1244        end
1245    end
1246    tree = t
1247    if tg == "break" then
1248        -- no need for this
1249    else
1250        local h = treehash[fulltag]
1251        if h then
1252            h[#h+1] = t
1253        else
1254            treehash[fulltag] = { t }
1255        end
1256    end
1257end
1258
1259local function pop()
1260    if currentdepth > 0 then
1261        local top = nesting[currentdepth]
1262        tree = treestack[currentdepth]
1263        currentdepth = currentdepth - 1
1264        if trace_export then
1265            if top then
1266                report_export("%w</%s>",currentdepth,match(top,"[^>]+"))
1267            else
1268                report_export("</BAD>")
1269            end
1270        end
1271    else
1272        report_export("%w<!-- too many pops -->",currentdepth)
1273    end
1274end
1275
1276local function continueexport()
1277    if nofcurrentcontent > 0 then
1278        if trace_export then
1279            report_export("%w<!-- injecting pagebreak space -->",currentdepth)
1280        end
1281        nofcurrentcontent = nofcurrentcontent + 1
1282        currentcontent[nofcurrentcontent] = " " -- pagebreak
1283    end
1284end
1285
1286local function pushentry(current)
1287    if not current then
1288        -- bad news
1289        return
1290    end
1291    current = current.taglist
1292    if not current then
1293        -- even worse news
1294        return
1295    end
1296    if restart then
1297        continueexport()
1298        restart = false
1299    end
1300    local newdepth = #current
1301    local olddepth = currentdepth
1302    if trace_export then
1303        report_export("%w<!-- moving from depth %s to %s (%s) -->",currentdepth,olddepth,newdepth,current[newdepth])
1304    end
1305    if olddepth <= 0 then
1306        for i=1,newdepth do
1307            push(current[i],i)
1308        end
1309    else
1310        local difference
1311        if olddepth < newdepth then
1312            for i=1,olddepth do
1313                if current[i] ~= nesting[i] then
1314                    difference = i
1315                    break
1316                end
1317            end
1318        else
1319            for i=1,newdepth do
1320                if current[i] ~= nesting[i] then
1321                    difference = i
1322                    break
1323                end
1324            end
1325        end
1326        if difference then
1327            for i=olddepth,difference,-1 do
1328                pop()
1329            end
1330            for i=difference,newdepth do
1331                push(current[i],i)
1332            end
1333        elseif newdepth > olddepth then
1334            for i=olddepth+1,newdepth do
1335                push(current[i],i)
1336            end
1337        elseif newdepth < olddepth then
1338            for i=olddepth,newdepth,-1 do
1339                pop()
1340            end
1341        elseif trace_export then
1342            report_export("%w<!-- staying at depth %s (%s) -->",currentdepth,newdepth,nesting[newdepth] or "?")
1343        end
1344    end
1345    return olddepth, newdepth
1346end
1347
1348local function pushcontent(oldparagraph,newparagraph)
1349    if nofcurrentcontent > 0 then
1350        if oldparagraph then
1351            if currentcontent[nofcurrentcontent] == "\n" then
1352                if trace_export then
1353                    report_export("%w<!-- removing newline -->",currentdepth)
1354                end
1355                nofcurrentcontent = nofcurrentcontent - 1
1356            end
1357        end
1358        local content = concat(currentcontent,"",1,nofcurrentcontent)
1359        if content == "" then
1360            -- omit; when oldparagraph we could push, remove spaces, pop
1361        elseif somespace[content] and oldparagraph then
1362            -- omit; when oldparagraph we could push, remove spaces, pop
1363        else
1364            local olddepth, newdepth
1365            local list = taglist[currentattribute]
1366            if list then
1367                olddepth, newdepth = pushentry(list)
1368            end
1369            if tree then
1370                local td = tree.data
1371                local nd = #td
1372                td[nd+1] = { parnumber = oldparagraph or currentparagraph, content = content }
1373                if trace_export then
1374                    report_export("%w<!-- start content with length %s -->",currentdepth,utflen(content))
1375                    report_export("%w%s",currentdepth,(gsub(content,"\n","\\n")))
1376                    report_export("%w<!-- stop content -->",currentdepth)
1377                end
1378                if olddepth then
1379                    for i=newdepth-1,olddepth,-1 do
1380                        pop()
1381                    end
1382                end
1383            end
1384        end
1385        nofcurrentcontent = 0
1386    end
1387    if oldparagraph then
1388        pushentry(makebreaklist(currentnesting))
1389        if trace_export then
1390            report_export("%w<!-- break added between paragraph %a and %a -->",currentdepth,oldparagraph,newparagraph)
1391        end
1392    end
1393end
1394
1395local function finishexport()
1396    if trace_export then
1397        report_export("%w<!-- start finalizing -->",currentdepth)
1398    end
1399    if nofcurrentcontent > 0 then
1400        if somespace[currentcontent[nofcurrentcontent]] then
1401            if trace_export then
1402                report_export("%w<!-- removing space -->",currentdepth)
1403            end
1404            nofcurrentcontent = nofcurrentcontent - 1
1405        end
1406        pushcontent()
1407    end
1408    for i=currentdepth,1,-1 do
1409        pop()
1410    end
1411    currentcontent = { } -- we're nice and do a cleanup
1412    if trace_export then
1413        report_export("%w<!-- stop finalizing -->",currentdepth)
1414    end
1415end
1416
1417-- inserts ?
1418
1419local collectresults  do -- too many locals otherwise
1420
1421    local nodecodes          = nodes.nodecodes
1422    local gluecodes          = nodes.gluecodes
1423    local listcodes          = nodes.listcodes
1424    local whatsitcodes       = nodes.whatsitcodes
1425
1426    local subtypes           = nodes.subtypes
1427
1428    local hlist_code         = nodecodes.hlist
1429    local vlist_code         = nodecodes.vlist
1430    local glyph_code         = nodecodes.glyph
1431    local glue_code          = nodecodes.glue
1432    local kern_code          = nodecodes.kern
1433    local disc_code          = nodecodes.disc
1434    local whatsit_code       = nodecodes.whatsit
1435    local par_code           = nodecodes.par
1436
1437    local userskip_code      = gluecodes.userskip
1438    local rightskip_code     = gluecodes.rightskip
1439    local parfillskip_code   = gluecodes.parfillskip
1440    local spaceskip_code     = gluecodes.spaceskip
1441    local xspaceskip_code    = gluecodes.xspaceskip
1442    local intermathskip_code = gluecodes.intermathskip
1443
1444    local linelist_code      = listcodes.line
1445
1446    local userdefinedwhatsit_code  = whatsitcodes.userdefined
1447
1448    local privateattribute = attributes.private
1449    local a_image          = privateattribute('image')
1450    local a_reference      = privateattribute('reference')
1451    local a_destination    = privateattribute('destination')
1452    local a_characters     = privateattribute('characters')
1453    local a_exportstatus   = privateattribute('exportstatus')
1454    local a_tagged         = privateattribute('tagged')
1455    local a_taggedpar      = privateattribute("taggedpar")
1456    local a_textblock      = privateattribute("textblock")
1457
1458    local inline_mark      = nodes.pool.userids["margins.inline"]
1459
1460    local nuts             = nodes.nuts
1461
1462    local getnext          = nuts.getnext
1463    local getdisc          = nuts.getdisc
1464    local getlist          = nuts.getlist
1465    local getid            = nuts.getid
1466    local getattr          = nuts.getattr
1467    local setattr          = nuts.setattr -- maybe use properties
1468    local isglyph          = nuts.isglyph
1469    local getkern          = nuts.getkern
1470    local getwidth         = nuts.getwidth
1471
1472    local startofpar       = nuts.startofpar
1473
1474    local nexthlist        = nuts.traversers.hlist
1475    local nextnode         = nuts.traversers.node
1476
1477    local function addtomaybe(maybewrong,c,case)
1478        if trace_export then
1479            report_export("%w<!-- possible paragraph mixup at %C case %i -->",currentdepth,c,case)
1480        else
1481            local s = formatters["%C"](c)
1482            if maybewrong then
1483                maybewrong[#maybewrong+1] = s
1484            else
1485                maybewrong = { s }
1486            end
1487            return maybewrong
1488        end
1489    end
1490
1491    local function showmaybe(maybewrong)
1492        if not trace_export then
1493            report_export("fuzzy paragraph: % t",maybewrong)
1494        end
1495    end
1496
1497    local function showdetail(n,id,subtype)
1498        local a = getattr(n,a_tagged)
1499        local t = taglist[a]
1500        local c = nodecodes[id]
1501        local s = subtypes[id][subtype]
1502        if a and t then
1503            report_export("node %a, subtype %a, tag %a, element %a, tree '% t'",c,s,a,t.tagname,t.taglist)
1504        else
1505            report_export("node %a, subtype %a, untagged",c,s)
1506        end
1507    end
1508
1509    local function collectresults(head,list,pat,pap) -- is last used (we also have currentattribute)
1510        local p
1511        local paragraph
1512        local maybewrong
1513        local pid
1514        for n, id, subtype in nextnode, head do
1515            if trace_details then
1516                showdetail(n,id,subtype)
1517            end
1518            if id == glyph_code then
1519                local c, f = isglyph(n)
1520                local at   = getattr(n,a_tagged) or pat
1521                if not at then
1522                 -- we need to tag the pagebody stuff as being valid skippable
1523                 --
1524                 -- report_export("skipping character: %C (no attribute)",n.char)
1525                else
1526                    if last ~= at then
1527                        local tl = taglist[at]
1528                        local ap = getattr(n,a_taggedpar) or pap
1529                        if paragraph and (not ap or ap < paragraph) then
1530                            maybewrong = addtomaybe(maybewrong,c,1)
1531                        end
1532                        pushcontent()
1533                        currentnesting   = tl
1534                        currentparagraph = ap
1535                        currentattribute = at
1536                        last = at
1537                        pushentry(currentnesting)
1538                        if trace_export then
1539                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
1540                        end
1541                        -- We need to intercept this here; maybe I will also move this
1542                        -- to a regular setter at the tex end.
1543                        local r = getattr(n,a_reference)
1544                        if r then
1545                            local t = tl.taglist
1546                            referencehash[t[#t]] = r -- fulltag
1547                        end
1548                        local d = getattr(n,a_destination)
1549                        if d then
1550                            local t = tl.taglist
1551                            destinationhash[t[#t]] = d -- fulltag
1552                        end
1553                        --
1554                    elseif last then
1555                        -- we can consider tagging the pars (lines) in the parbuilder but then we loose some
1556                        -- information unless we inject a special node (but even then we can run into nesting
1557                        -- issues)
1558                        local ap = getattr(n,a_taggedpar) or pap
1559                        if ap ~= currentparagraph then
1560                            pushcontent(currentparagraph,ap)
1561                            pushentry(currentnesting)
1562                            currentattribute = last
1563                            currentparagraph = ap
1564                        end
1565                        if paragraph and (not ap or ap < paragraph) then
1566                            maybewrong = addtomaybe(maybewrong,c,2)
1567                        end
1568                        if trace_export then
1569                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,last)
1570                        end
1571                    else
1572                        if trace_export then
1573                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
1574                        end
1575                    end
1576                    local s = getattr(n,a_exportstatus)
1577                    if s then
1578                        c = s
1579                    end
1580                    if c == 0 or c == 0xFFFD then
1581                        if trace_export then
1582                            report_export("%w<!-- skipping glyph %U -->",currentdepth,c)
1583                        end
1584                    elseif c == 0x20 then
1585                        local a = getattr(n,a_characters)
1586                        nofcurrentcontent = nofcurrentcontent + 1
1587                        if a then
1588                            if trace_export then
1589                                report_export("%w<!-- turning last space into special space %U -->",currentdepth,a)
1590                            end
1591                            currentcontent[nofcurrentcontent] = specialspaces[a] -- special space
1592                        else
1593                            currentcontent[nofcurrentcontent] = " "
1594                        end
1595                    else
1596                        local fc = fontchar[f]
1597                        if fc then
1598                            fc = fc and fc[c]
1599                            if fc then
1600                                local u = fc.unicode
1601                                if u == 0 or u == 0xFFFD then
1602                                    -- ignore (can make disappear)
1603                                elseif not u then
1604                                    nofcurrentcontent = nofcurrentcontent + 1
1605                                    currentcontent[nofcurrentcontent] = utfchar(c)
1606                                elseif type(u) == "table" then
1607                                    for i=1,#u do
1608                                        nofcurrentcontent = nofcurrentcontent + 1
1609                                        currentcontent[nofcurrentcontent] = utfchar(u[i])
1610                                    end
1611                                else
1612                                    nofcurrentcontent = nofcurrentcontent + 1
1613                                    currentcontent[nofcurrentcontent] = utfchar(u)
1614                                end
1615                            elseif c > 0 then
1616                                nofcurrentcontent = nofcurrentcontent + 1
1617                                currentcontent[nofcurrentcontent] = utfchar(c)
1618                            else
1619                                -- we can have -1 as side effect of an explicit hyphen (unless we expand)
1620                            end
1621                        elseif c > 0 then
1622                            nofcurrentcontent = nofcurrentcontent + 1
1623                            currentcontent[nofcurrentcontent] = utfchar(c)
1624                        else
1625                            -- we can have -1 as side effect of an explicit hyphen (unless we expand)
1626                        end
1627                    end
1628                end
1629            elseif id == glue_code then
1630                -- we need to distinguish between hskips and vskips
1631                local ca = getattr(n,a_characters)
1632                if ca == 0 then
1633                    -- skip this one ... already converted special character (node-acc)
1634                elseif ca then
1635                    local a = getattr(n,a_tagged) or pat
1636                    if a then
1637                        local c = specialspaces[ca]
1638                        if last ~= a then
1639                            local tl = taglist[a]
1640                            if trace_export then
1641                                report_export("%w<!-- processing space glyph %U tagged %a case 1 -->",currentdepth,ca,a)
1642                            end
1643                            pushcontent()
1644                            currentnesting = tl
1645                            currentparagraph = getattr(n,a_taggedpar) or pap
1646                            currentattribute = a
1647                            last = a
1648                            pushentry(currentnesting)
1649                            -- no reference check (see above)
1650                        elseif last then
1651                            local ap = getattr(n,a_taggedpar) or pap
1652                            if ap ~= currentparagraph then
1653                                pushcontent(currentparagraph,ap)
1654                                pushentry(currentnesting)
1655                                currentattribute = last
1656                                currentparagraph = ap
1657                            end
1658                            if trace_export then
1659                                report_export("%w<!-- processing space glyph %U tagged %a case 2 -->",currentdepth,ca,last)
1660                            end
1661                        end
1662                        -- if somespace[currentcontent[nofcurrentcontent]] then
1663                        --     if trace_export then
1664                        --         report_export("%w<!-- removing space -->",currentdepth)
1665                        --     end
1666                        --     nofcurrentcontent = nofcurrentcontent - 1
1667                        -- end
1668                        nofcurrentcontent = nofcurrentcontent + 1
1669                        currentcontent[nofcurrentcontent] = c
1670                    end
1671                elseif subtype == userskip_code then
1672-- local at = getattr(n,a_tagged)
1673-- local tl = taglist[at]
1674-- if tl and structurestags.strip(tl.taglist[#tl.taglist]) == "mspace" then
1675--     if nofcurrentcontent > 0 then
1676--         pushcontent()
1677--         pushentry(currentnesting) -- ??
1678--     end
1679--     -- in the past we'd push a space here ... check mkiv : otherwise no threshold with mspace
1680--     pushentry(tl)
1681--     if trace_export then
1682--         report_export("%w<!-- processing mspace tagged %a",currentdepth,at)
1683--     end
1684--     last = nil
1685--     currentparagraph = nil
1686-- else
1687                    if getwidth(n) > threshold then
1688                        if last and not somespace[currentcontent[nofcurrentcontent]] then
1689                            local a = getattr(n,a_tagged) or pat
1690                            if a == last then
1691                                if trace_export then
1692                                    report_export("%w<!-- injecting spacing 5a -->",currentdepth)
1693                                end
1694                                nofcurrentcontent = nofcurrentcontent + 1
1695                                currentcontent[nofcurrentcontent] = " "
1696                            elseif a then
1697                                -- e.g LOGO<space>LOGO
1698                                if trace_export then
1699                                    report_export("%w<!-- processing glue > threshold tagged %s becomes %s -->",currentdepth,last,a)
1700                                end
1701                                pushcontent()
1702                                if trace_export then
1703                                    report_export("%w<!-- injecting spacing 5b -->",currentdepth)
1704                                end
1705                                last = a
1706                                nofcurrentcontent = nofcurrentcontent + 1
1707                                currentcontent[nofcurrentcontent] = " "
1708                                currentnesting = taglist[last]
1709                                pushentry(currentnesting)
1710                                currentattribute = last
1711                            end
1712                        end
1713                    end
1714-- end
1715                elseif subtype == spaceskip_code or subtype == xspaceskip_code then
1716                    if not somespace[currentcontent[nofcurrentcontent]] then
1717                        local a = getattr(n,a_tagged) or pat
1718                        if a == last then
1719                            if trace_export then
1720                                report_export("%w<!-- injecting spacing 7 (stay in element) -->",currentdepth)
1721                            end
1722                            nofcurrentcontent = nofcurrentcontent + 1
1723                            currentcontent[nofcurrentcontent] = " "
1724                        else
1725                            if trace_export then
1726                                report_export("%w<!-- injecting spacing 7 (end of element) -->",currentdepth)
1727                            end
1728                            last = a
1729                            pushcontent()
1730                            nofcurrentcontent = nofcurrentcontent + 1
1731                            currentcontent[nofcurrentcontent] = " "
1732                            currentnesting = taglist[last]
1733                            pushentry(currentnesting)
1734                            currentattribute = last
1735                        end
1736                    end
1737                elseif subtype == intermathskip_code then
1738                    -- put this as attribute when it differs, maybe more ... check mathml
1739                elseif subtype == rightskip_code then
1740                    -- a line
1741                    if nofcurrentcontent > 0 then
1742                        local r = currentcontent[nofcurrentcontent]
1743                        if r == hyphen then
1744                            if not keephyphens then
1745                                nofcurrentcontent = nofcurrentcontent - 1
1746                            end
1747                        elseif pid == disc_code then
1748                            -- go on .. tricky: we should mark the glyhs as coming from a disc
1749                        elseif not somespace[r] then
1750                            local a = getattr(n,a_tagged) or pat
1751                            if a == last then
1752                                if trace_export then
1753                                    report_export("%w<!-- injecting spacing 1 (end of line, stay in element) -->",currentdepth)
1754                                end
1755                                nofcurrentcontent = nofcurrentcontent + 1
1756                                currentcontent[nofcurrentcontent] = " "
1757                            else
1758                                if trace_export then
1759                                    report_export("%w<!-- injecting spacing 1 (end of line, end of element) -->",currentdepth)
1760                                end
1761                                last = a
1762                                pushcontent()
1763                                nofcurrentcontent = nofcurrentcontent + 1
1764                                currentcontent[nofcurrentcontent] = " "
1765                                currentnesting = taglist[last]
1766                                pushentry(currentnesting)
1767                                currentattribute = last
1768                            end
1769                        end
1770                    end
1771                elseif subtype == parfillskip_code then
1772                    -- deal with paragraph endings (crossings) elsewhere and we quit here
1773                    -- as we don't want the rightskip space addition
1774                    if maybewrong then
1775                        showmaybe(maybewrong)
1776                    end
1777                    return
1778                end
1779            elseif id == hlist_code or id == vlist_code then
1780                local ai = getattr(n,a_image)
1781                if ai then
1782                    local at = getattr(n,a_tagged) or pat
1783                    if nofcurrentcontent > 0 then
1784                        pushcontent()
1785                        pushentry(currentnesting) -- ??
1786                    end
1787                    pushentry(taglist[at]) -- has an index, todo: flag empty element
1788                    if trace_export then
1789                        report_export("%w<!-- processing image tagged %a",currentdepth,last)
1790                    end
1791                    last = nil
1792                    currentparagraph = nil
1793                else
1794                    -- we need to determine an end-of-line
1795                    local list = getlist(n)
1796                    if list then
1797                        -- todo: no par checking needed in math
1798                        local at = getattr(n,a_tagged) or pat
1799                        collectresults(list,n,at)
1800                    end
1801                end
1802            elseif id == kern_code then
1803                local kern = getkern(n)
1804                if kern > 0 then
1805                    local a = getattr(n,a_tagged) or pat
1806                    local t = taglist[a]
1807                    if not t or t.tagname ~= "ignore" then -- maybe earlier on top)
1808                        local limit = threshold
1809                        if p then
1810                            local c, f = isglyph(p)
1811                            if c then
1812                                limit = fontquads[f] / 4
1813                            end
1814                        end
1815                        if kern > limit then
1816                            if last and not somespace[currentcontent[nofcurrentcontent]] then
1817                             -- local a = getattr(n,a_tagged) or pat
1818                                if a == last then
1819                                    if not somespace[currentcontent[nofcurrentcontent]] then
1820                                        if trace_export then
1821                                            report_export("%w<!-- injecting spacing 8 (kern %p) -->",currentdepth,kern)
1822                                        end
1823                                        nofcurrentcontent = nofcurrentcontent + 1
1824                                        currentcontent[nofcurrentcontent] = " "
1825                                    end
1826                                elseif a then
1827                                    -- e.g LOGO<space>LOGO
1828                                    if trace_export then
1829                                        report_export("%w<!-- processing kern, threshold %p, tag %s => %s -->",currentdepth,limit,last,a)
1830                                    end
1831                                    last = a
1832                                    pushcontent()
1833                                    if trace_export then
1834                                        report_export("%w<!-- injecting spacing 9 (kern %p) -->",currentdepth,kern)
1835                                    end
1836                                    nofcurrentcontent = nofcurrentcontent + 1
1837                                    currentcontent[nofcurrentcontent] = " "
1838                                 -- currentnesting = taglist[last]
1839                                    currentnesting = t
1840                                    pushentry(currentnesting)
1841                                    currentattribute = last
1842                                end
1843                            end
1844                        end
1845                    end
1846                end
1847            elseif id == whatsit_code then
1848                -- todo (lmtx)
1849                if subtype == userdefinedwhatsit_code then
1850                    -- similar to images, see above
1851                    local at = getattr(n,a_tagged)
1852                    if nofcurrentcontent > 0 then
1853                        pushcontent()
1854                        pushentry(currentnesting) -- ??
1855                    end
1856                    pushentry(taglist[at])
1857                    if trace_export then
1858                        report_export("%w<!-- processing anchor tagged %a",currentdepth,last)
1859                    end
1860                    last = nil
1861                    currentparagraph = nil
1862                end
1863            elseif not paragraph and id == par_code and startofpar(n) then
1864                paragraph = getattr(n,a_taggedpar)
1865            elseif id == disc_code then
1866                -- very unlikely because we stripped them
1867                local pre, post, replace = getdisc(n)
1868                if keephyphens then
1869                    if pre and not getnext(pre) and isglyph(pre) == 0xAD then -- hyphencode then
1870                        nofcurrentcontent = nofcurrentcontent + 1
1871                        currentcontent[nofcurrentcontent] = hyphen
1872                    end
1873                end
1874                if replace then
1875                    collectresults(replace,nil)
1876                end
1877            end
1878            p   = n
1879            pid = id
1880        end
1881        if maybewrong then
1882            showmaybe(maybewrong)
1883        end
1884    end
1885
1886    local enabled = true
1887
1888    updaters.register("tagging.state.disable",function() enabled = false end)
1889    updaters.register("tagging.state.enable", function() enabled = true  end)
1890
1891    function nodes.handlers.export(head) -- hooks into the page builder
1892        if enabled then
1893            starttiming(treehash)
1894            if trace_export then
1895                report_export("%w<!-- start flushing page -->",currentdepth)
1896            end
1897         -- continueexport()
1898            restart = true
1899            collectresults(head)
1900            if trace_export then
1901                report_export("%w<!-- stop flushing page -->",currentdepth)
1902            end
1903            stoptiming(treehash)
1904        end
1905        return head
1906    end
1907
1908    function nodes.handlers.checkparcounter(p)
1909        setattr(p,a_taggedpar,texgetcount("tagparcounter") + 1)
1910        return p
1911    end
1912
1913    -- needs checking!
1914
1915    function builders.paragraphs.tag(head)
1916        noftextblocks = noftextblocks + 1
1917        for n, subtype in nexthlist, head do
1918            if subtype == linelist_code then
1919                setattr(n,a_textblock,noftextblocks)
1920         -- elseif subtype == glue_code or subtype == kern_code then -- weird, no list
1921         --     setattr(n,a_textblock,0)
1922            end
1923        end
1924        return false
1925    end
1926
1927end
1928
1929do
1930
1931    local xmlcollected  = xml.collected
1932    local xmlsetcomment = xml.setcomment
1933
1934local xmlpreamble_nop = [[
1935<?xml version="1.0" encoding="UTF-8" standalone="%standalone%" ?>
1936]]
1937
1938local xmlpreamble_yes = [[
1939<?xml version="1.0" encoding="UTF-8" standalone="%standalone%" ?>
1940
1941<!--
1942
1943    input filename   : %filename%
1944    processing date  : %date%
1945    context version  : %contextversion%
1946    exporter version : %exportversion%
1947
1948-->
1949
1950]]
1951
1952    local flushtree = wrapups.flushtree
1953
1954    local function wholepreamble(standalone,nocomment)
1955        return replacetemplate(nocomment and xmlpreamble_nop or xmlpreamble_yes, {
1956            standalone     = standalone and "yes" or "no",
1957            filename       = tex.jobname,
1958            date           = os.fulltime(),
1959            contextversion = environment.version,
1960            exportversion  = exportversion,
1961        })
1962    end
1963
1964
1965local csspreamble = [[
1966<?xml-stylesheet type="text/css" href="%filename%" ?>
1967]]
1968
1969local cssheadlink = [[
1970<link type="text/css" rel="stylesheet" href="%filename%" />
1971]]
1972
1973-- great, these suggested valuess attributes
1974
1975local mathmlheadscript = [[
1976<script
1977    type="text/javascript"
1978    id="MathJax-script"
1979    async="async"
1980    src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/mml-chtml.js">
1981</script>
1982]]
1983
1984    local function allusedstylesheets(cssfiles,files,path,extra)
1985        local done   = { }
1986        local result = { }
1987        local extras = { }
1988        for i=1,#cssfiles do
1989            local cssfile = cssfiles[i]
1990            if type(cssfile) ~= "string" then
1991                -- error
1992            elseif cssfile == "export-example.css" then
1993                -- ignore
1994            elseif not done[cssfile] then
1995                cssfile = joinfile(path,basename(cssfile))
1996                report_export("adding css reference '%s'",cssfile)
1997                files[#files+1]   = cssfile
1998                result[#result+1] = replacetemplate(csspreamble, { filename = cssfile })
1999                extras[#extras+1] = replacetemplate(cssheadlink, { filename = cssfile })
2000                done[cssfile]     = true
2001            end
2002        end
2003        if extra then
2004            extras[#extras+1] = extra
2005        end
2006        return concat(result), concat(extras)
2007    end
2008
2009local elementtemplate <const> = [[
2010/* element="%element%" detail="%detail%" chain="%chain%" */
2011
2012%element%,
2013%namespace%div.%element% {
2014    display: %display% ;
2015}]]
2016
2017local detailtemplate <const> = [[
2018/* element="%element%" detail="%detail%" chain="%chain%" */
2019
2020%element%[detail=%detail%],
2021%namespace%div.%element%.%detail% {
2022    display: %display% ;
2023}]]
2024
2025-- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" >
2026
2027local htmltemplate <const> = [[
2028%preamble%
2029
2030<html xmlns="http://www.w3.org/1999/xhtml" xmlns:math="http://www.w3.org/1998/Math/MathML">
2031
2032    <head>
2033
2034        <meta charset="utf-8"/>
2035
2036        <title>%title%</title>
2037
2038%style%
2039
2040    </head>
2041    <body>
2042        <div class="document" xmlns="http://www.pragma-ade.com/context/export">
2043
2044<div class="warning">Rendering can be suboptimal because there is no default/fallback css loaded.</div>
2045
2046%body%
2047
2048        </div>
2049    </body>
2050</html>
2051]]
2052
2053    local displaymapping = {
2054        inline  = "inline",
2055        display = "block",
2056        mixed   = "inline",
2057    }
2058
2059    local function allusedelements(filename)
2060        local result = { replacetemplate(namespacetemplate, {
2061            what            = "template",
2062            filename        = filename,
2063            namespace       = contextns,
2064         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
2065            cssnamespaceurl = cssnamespaceurl,
2066        },false,true) }
2067        for element, details in sortedhash(used) do
2068            if namespaces[element] then
2069                -- skip math
2070            else
2071                for detail, what in sortedhash(details) do
2072                    local nature  = what[1] or "display"
2073                    local chain   = what[2]
2074                    local display = displaymapping[nature] or "block"
2075                    if detail == "" then
2076                        result[#result+1] = replacetemplate(elementtemplate, {
2077                            element   = element,
2078                            display   = display,
2079                            chain     = chain,
2080                            namespace = usecssnamespace and namespace or "",
2081                        })
2082                    else
2083                        result[#result+1] = replacetemplate(detailtemplate, {
2084                            element   = element,
2085                            display   = display,
2086                            detail    = detail,
2087                            chain     = chain,
2088                            namespace = usecssnamespace and cssnamespace or "",
2089                        })
2090                    end
2091                end
2092            end
2093        end
2094        return concat(result,"\n\n")
2095    end
2096
2097    local function allcontent(tree)
2098        local result = { }
2099        local data   = tree.data
2100        for i=1,#data do
2101            if data[i].tg ~= "document" then
2102                data[i] = { }
2103            end
2104        end
2105        flushtree(result,tree.data,"display") -- we need to collect images
2106        result = concat(result)
2107        -- no need to lpeg .. fast enough
2108        result = gsub(result,"\n *\n","\n")
2109        result = gsub(result,"\n +([^< ])","\n%1")
2110        --
2111        return result
2112    end
2113
2114    -- local xhtmlpreamble = [[
2115    --     <!DOCTYPE html PUBLIC
2116    --         "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
2117    --         "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"
2118    --     >
2119    -- ]]
2120
2121    local function cleanxhtmltree(xmltree)
2122        if xmltree then
2123            local implicits = { }
2124            local explicits = { }
2125            local overloads = { }
2126            for e in xmlcollected(xmltree,"*") do
2127                local at = e.at
2128                if at then
2129                    local explicit = at.explicit
2130                    local implicit = at.implicit
2131                    if explicit then
2132                        if not explicits[explicit] then
2133                            explicits[explicit] = true
2134                            at.id = explicit
2135                            if implicit then
2136                                overloads[implicit] = explicit
2137                            end
2138                        end
2139                    else
2140                        if implicit and not implicits[implicit] then
2141                            implicits[implicit] = true
2142                            at.id = "aut:" .. implicit
2143                        end
2144                    end
2145                end
2146            end
2147            for e in xmlcollected(xmltree,"*") do
2148                local at = e.at
2149                if at then
2150                    local internal = at.internal
2151                    local location = at.location
2152                    if internal then
2153                        if location then
2154                            local explicit = overloads[location]
2155                            if explicit then
2156                                at.href = "#" .. explicit
2157                            else
2158                                at.href = "#aut:" .. internal
2159                            end
2160                        else
2161                            at.href = "#aut:" .. internal
2162                        end
2163                    else
2164                        if location then
2165                            at.href = "#" .. location
2166                        else
2167                            local url = at.url
2168                            if url then
2169                                at.href = url
2170                            else
2171                                local file = at.file
2172                                if file then
2173                                    at.href = file
2174                                end
2175                            end
2176                        end
2177                    end
2178                end
2179            end
2180            return xmltree
2181        else
2182            return xml.convert('<?xml version="1.0"?>\n<error>invalid xhtml tree</error>')
2183        end
2184    end
2185
2186    -- maybe the reverse: be explicit about what is permitted
2187
2188    local private = {
2189        destination = true,
2190        prefix      = true,
2191        reference   = true,
2192        --
2193        id          = true,
2194        href        = true,
2195        --
2196        implicit    = true,
2197        explicit    = true,
2198        --
2199        url         = true,
2200        file        = true,
2201        internal    = true,
2202        location    = true,
2203        --
2204        name        = true, -- image name
2205        used        = true, -- image name
2206        page        = true, -- image name
2207        width       = true,
2208        height      = true,
2209        --
2210    }
2211
2212    local addclicks   = true
2213    local f_onclick   = formatters[ [[location.href='%s']] ]
2214    local f_onclick   = formatters[ [[location.href='%s']] ]
2215
2216    local p_cleanid   = lpeg.replacer { [":"] = "-" }
2217    local p_cleanhref = lpeg.Cs(lpeg.P("#") * p_cleanid)
2218
2219    local p_splitter  = lpeg.Ct ( (
2220        lpeg.Carg(1) * lpeg.C((1-lpeg.P(" "))^1) / function(d,s) if not d[s] then d[s] = true return s end end
2221      * lpeg.P(" ")^0 )^1 )
2222
2223
2224    local classes = table.setmetatableindex(function(t,k)
2225        local v = concat(lpegmatch(p_splitter,k,1,{})," ")
2226        t[k] = v
2227        return v
2228    end)
2229
2230    local function makeclass(tg,at)
2231        local detail     = at.detail
2232        local chain      = at.chain
2233        local extra      = nil
2234        local classes    = { }
2235        local nofclasses = 0
2236        at.detail        = nil
2237        at.chain         = nil
2238        for k, v in next, at do
2239            if not private[k] then
2240                nofclasses = nofclasses + 1
2241                classes[nofclasses] = k .. "-" .. v
2242            end
2243        end
2244        if detail and detail ~= "" then
2245            if chain and chain ~= "" then
2246                if chain ~= detail then
2247                    extra = classes[tg .. " " .. chain .. " " .. detail]
2248                elseif tg ~= detail then
2249                    extra = detail
2250                end
2251            elseif tg ~= detail then
2252                extra = detail
2253            end
2254        elseif chain and chain ~= "" then
2255            if tg ~= chain then
2256                extra = chain
2257            end
2258        end
2259        -- in this order
2260        if nofclasses > 0 then
2261            sort(classes)
2262            classes = concat(classes," ")
2263            if extra then
2264                return tg .. " " .. extra .. " " .. classes
2265            else
2266                return tg .. " " .. classes
2267            end
2268        else
2269            if extra then
2270                return tg .. " " .. extra
2271            else
2272                return tg
2273            end
2274        end
2275    end
2276
2277    -- Some elements are not supported (well) in css so we need to retain them. For
2278    -- instance, tablecells have no colspan so basically that renders css table div
2279    -- elements quite useless. A side effect is that we nwo can have conflicts when
2280    -- we mix in with other html (as there is no reset). Of course, when it eventually
2281    -- gets added, there is a change then that those not using the div abstraction
2282    -- will be rediculed.
2283    --
2284    -- a table tr td th thead tbody tfoot
2285    --
2286
2287    local crappycss = {
2288        table     = "table", tabulate      = "table",
2289        tablehead = "thead", tabulatehead  = "thead",
2290        tablebody = "tbody", tabulatebody  = "tbody",
2291        tablefoot = "tfoot", tabulatefoot  = "tfoot",
2292        tablerow  = "tr",    tabulaterow   = "tr",
2293        tablecell = "td",    tabulatecell  = "td",
2294    }
2295
2296    local cssmapping = false
2297
2298    directives.register("export.nativetags", function(v)
2299        cssmapping = v and crappycss or false
2300    end)
2301
2302    local function remap(specification,source,target)
2303        local comment = nil -- share comments
2304        for c in xmlcollected(source,"*") do
2305            if not c.special then
2306                local tg = c.tg
2307                local ns = c.ns
2308                local at = c.at
2309                if ns == "m" then
2310                    -- should happen elsewhere
2311                 -- if false then -- yes or no
2312                        c.ns = ""
2313                        at["xmlns:m"] = nil
2314                        if tg == "math" then
2315                            at["xmlns"]   = mathmlns
2316                        end
2317                 -- end
2318             -- elseif tg == "a" then
2319             --     c.ns = ""
2320                else
2321                    local dt = c.dt
2322                    local nt = #dt
2323                    if nt == 0 or (nt == 1 and dt[1] == "") then
2324                        if comment then
2325                            c.dt = comment
2326                        else
2327                            xmlsetcomment(c,"empty")
2328                            comment = c.dt
2329                        end
2330                    end
2331                    local class = nil
2332                    local label = nil
2333                    if tg == "document" then
2334                        at.href   = nil
2335                        at.detail = nil
2336                        at.chain  = nil
2337                    elseif tg == "metavariable" then
2338                        label = at.name
2339                        at.detail = "metaname-" .. label
2340                        class = makeclass(tg,at)
2341                    else
2342                        class = makeclass(tg,at)
2343                    end
2344                    local id   = at.id
2345                    local href = at.href
2346                    local attr = nil
2347                    if id then
2348                        id = lpegmatch(p_cleanid, id) or id
2349                        if href then
2350                            href = lpegmatch(p_cleanhref,href) or href
2351                            attr = {
2352                                class   = class,
2353                                id      = id,
2354                                href    = href,
2355                                onclick = addclicks and f_onclick(href) or nil,
2356                            }
2357                        else
2358                            attr = {
2359                                class = class,
2360                                id    = id,
2361                            }
2362                        end
2363                    else
2364                        if href then
2365                            href = lpegmatch(p_cleanhref,href) or href
2366                            attr = {
2367                                class   = class,
2368                                href    = href,
2369                                onclick = addclicks and f_onclick(href) or nil,
2370                            }
2371                        else
2372                            attr = {
2373                                class = class,
2374                            }
2375                        end
2376                    end
2377                    c.at = attr
2378                    if label then
2379                        attr.label = label
2380                    end
2381                    c.tg = cssmapping and cssmapping[tg] or "div"
2382                end
2383            end
2384        end
2385    end
2386
2387 -- local cssfile = nil  directives.register("backend.export.css", function(v) cssfile = v end)
2388
2389    local embedfile = false  directives.register("export.embed",function(v) embedfile = v end)
2390
2391    local justexport = nodes.handlers.export
2392
2393    local function wrapuptree(tree)
2394        wrapups.fixtree(tree)
2395        wrapups.collapsetree(tree)
2396        wrapups.indextree(tree)
2397        wrapups.checktree(tree)
2398        wrapups.breaktree(tree)
2399        wrapups.finalizetree(tree)
2400    end
2401
2402    local function localexport(head)
2403        starttiming(treehash)
2404
2405        local saved_treestack    = treestack
2406        local saved_nesting      = nesting
2407        local saved_currentdepth = currentdepth
2408        local saved_tree         = tree
2409        local saved_treehash     = treehash
2410        local saved_nofbreaks    = nofbreaks
2411        local saved_show_comment = show_comment
2412
2413        treestack         = { }
2414        nesting           = { }
2415        currentdepth      = 0
2416        tree              = { data = { }, fulltag == "root" } -- root
2417        treehash          = { }
2418        nofbreaks         = 0
2419        show_comment      = false
2420
2421        justexport(head)
2422        finishexport()
2423        wrapuptree(tree)
2424
2425     -- tree.data = tree.data[1].data
2426
2427        local result = concat {
2428            wholepreamble(true,true),
2429            allcontent(tree),
2430        }
2431
2432        treestack    = saved_treestack
2433        nesting      = saved_nesting
2434        currentdepth = saved_currentdepth
2435        tree         = saved_tree
2436        treehash     = saved_treehash
2437        nofbreaks    = saved_nofbreaks
2438        show_comment = saved_show_comment
2439
2440        stoptiming(treehash)
2441
2442        return result
2443
2444    end
2445
2446    structurestags.localexport = localexport
2447
2448    function structures.tags.exportbox(n,filename,buffername)
2449        local list = nodes.nuts.getbox(n)
2450        if n then
2451            local e = localexport(list)
2452            if filename and filename ~= "" then
2453                io.savedata(filename,e)
2454            elseif buffername then
2455                buffers.assign(buffername == interfaces.variables.yes and "" or buffername,e)
2456            else
2457                return e
2458            end
2459        end
2460    end
2461
2462    interfaces.implement {
2463        name      = "exportbox",
2464        arguments = { "integer", "string", "string" },
2465        actions   = structures.tags.exportbox
2466    }
2467
2468    function structurestags.finishexport()
2469
2470        if exporting then
2471            exporting = false
2472        else
2473            return
2474        end
2475
2476        local onlyxml = finetuning.export == v_xml
2477
2478        starttiming(treehash)
2479        --
2480        finishexport()
2481        --
2482        report_export("")
2483        if onlyxml then
2484            report_export("exporting xml, no other files")
2485        else
2486            report_export("exporting xml, xhtml, html and css files")
2487        end
2488        report_export("")
2489        --
2490        wrapuptree(tree)
2491        --
2492        local askedname = finetuning.file
2493        --
2494        -- we use a dedicated subpath:
2495        --
2496        -- ./jobname-export
2497        -- ./jobname-export/images
2498        -- ./jobname-export/styles
2499        -- ./jobname-export/styles
2500        -- ./jobname-export/jobname-export.xml
2501        -- ./jobname-export/jobname-export.xhtml
2502        -- ./jobname-export/jobname-export.html
2503        -- ./jobname-export/jobname-specification.lua
2504        -- ./jobname-export/styles/jobname-defaults.css
2505        -- ./jobname-export/styles/jobname-styles.css
2506        -- ./jobname-export/styles/jobname-images.css
2507        -- ./jobname-export/styles/jobname-templates.css
2508
2509        if type(askedname) ~= "string" or askedname == "" then
2510            askedname = tex.jobname
2511        end
2512
2513        local usedname  = nameonly(askedname)
2514        local basepath  = usedname .. "-export"
2515        local imagepath = joinfile(basepath,"images")
2516        local stylepath = joinfile(basepath,"styles")
2517
2518        local function validpath(what,pathname)
2519            if lfs.isdir(pathname) then
2520                report_export("using existing %s path %a",what,pathname)
2521                return pathname
2522            end
2523            lfs.mkdir(pathname)
2524            if lfs.isdir(pathname) then
2525                report_export("using cretated %s path %a",what,basepath)
2526                return pathname
2527            else
2528                report_export("unable to create %s path %a",what,basepath)
2529                return false
2530            end
2531        end
2532
2533        if not (validpath("export",basepath) and validpath("images",imagepath) and validpath("styles",stylepath)) then
2534            return
2535        end
2536
2537        -- we're now on the dedicated export subpath so we can't clash names
2538        --
2539        -- a xhtml suffix no longer seems to be work well with browsers
2540
2541        local xmlfilebase           = addsuffix(usedname .. "-raw","xml"  )
2542        local xhtmlfilebase         = addsuffix(usedname .. "-tag","xhtml")
2543        local htmlfilebase          = addsuffix(usedname .. "-div","html")
2544        local specificationfilebase = addsuffix(usedname .. "-pub","lua"  )
2545
2546        local xmlfilename           = joinfile(basepath, xmlfilebase          )
2547        local xhtmlfilename         = joinfile(basepath, xhtmlfilebase        )
2548        local htmlfilename          = joinfile(basepath, htmlfilebase         )
2549        local specificationfilename = joinfile(basepath, specificationfilebase)
2550        --
2551        local defaultfilebase       = addsuffix(usedname .. "-defaults", "css")
2552        local imagefilebase         = addsuffix(usedname .. "-images",   "css")
2553        local stylefilebase         = addsuffix(usedname .. "-styles",   "css")
2554        local templatefilebase      = addsuffix(usedname .. "-templates","css")
2555        --
2556        local defaultfilename       = joinfile(stylepath,defaultfilebase )
2557        local imagefilename         = joinfile(stylepath,imagefilebase   )
2558        local stylefilename         = joinfile(stylepath,stylefilebase   )
2559        local templatefilename      = joinfile(stylepath,templatefilebase)
2560
2561        local cssfile               = finetuning.cssfile
2562
2563        -- we keep track of all used files
2564
2565        local files = {
2566        }
2567
2568        -- we always load the defaults and optionally extra css files; we also copy the example
2569        -- css file so that we always have the latest version
2570
2571        local cssfiles = {
2572            defaultfilebase,
2573            imagefilebase,
2574            stylefilebase,
2575        }
2576
2577        local cssextra = cssfile and table.unique(settings_to_array(cssfile)) or { }
2578
2579        -- at this point we're ready for the content; the collector also does some
2580        -- housekeeping and data collecting; at this point we still have an xml
2581        -- representation that uses verbose element names and carries information in
2582        -- attributes
2583
2584        local result = allcontent(tree)
2585
2586        -- ugly but so be it:
2587
2588        local extradata = structures.tags.getextradata()
2589        if extradata then
2590            local t = { "" }
2591            t[#t+1] = "<extradata>"
2592            for name, action in sortedhash(extradata) do
2593                t[#t+1] = action()
2594            end
2595            t[#t+1] = "</extradata>"
2596            t[#t+1] = "</document>"
2597            -- we use a function because otherwise we can have a bad capture index
2598            result = gsub(result,"</document>",function()
2599                return concat(t,"\n")
2600            end)
2601        end
2602
2603        -- done with ugly
2604
2605        if onlyxml then
2606
2607            os.remove(defaultfilename)
2608            os.remove(imagefilename)
2609            os.remove(stylefilename)
2610            os.remove(templatefilename)
2611
2612            for i=1,#cssextra do
2613                os.remove(joinfile(stylepath,basename(source)))
2614            end
2615
2616         -- os.remove(xmlfilename)
2617
2618            os.remove(imagefilename)
2619            os.remove(stylefilename)
2620            os.remove(templatefilename)
2621            os.remove(xhtmlfilename)
2622            os.remove(specificationfilename)
2623            os.remove(htmlfilename)
2624
2625            result = concat {
2626                wholepreamble(true,true),
2627                "<!-- This export file is used for filtering runtime only! -->\n",
2628                result,
2629            }
2630
2631            report_export("saving xml data in %a",xmlfilename)
2632            io.savedata(xmlfilename,result)
2633
2634            return
2635
2636        end
2637
2638        local examplefilename = resolvers.findfile("export-example.css")
2639        if examplefilename then
2640            local data = io.loaddata(examplefilename)
2641            if not data or data == "" then
2642                data = "/* missing css file */"
2643            elseif not usecssnamespace then
2644                data = gsub(data,cssnamespace,"")
2645            end
2646            io.savedata(defaultfilename,data)
2647        end
2648
2649        if cssfile then
2650            for i=1,#cssextra do
2651                local source = addsuffix(cssextra[i],"css")
2652                local target = joinfile(stylepath,basename(source))
2653                cssfiles[#cssfiles+1] = source
2654                if not lfs.isfile(source) then
2655                    source = joinfile("../",source)
2656                end
2657                if lfs.isfile(source) then
2658                    report_export("copying %s",source)
2659                    file.copy(source,target)
2660                end
2661            end
2662        end
2663
2664
2665        local script = settings_to_hash(finetuning.option or "").mathjax and mathmlheadscript or nil
2666
2667        local x_styles, h_styles = allusedstylesheets(cssfiles,files,"styles",script)
2668
2669        local attach = backends.nodeinjections.attachfile
2670
2671        if embedfile and attach then
2672            -- only for testing
2673            attach {
2674                data       = concat{ wholepreamble(true), result },
2675                name       = basename(xmlfilename),
2676                registered = "export",
2677                title      = "raw xml export",
2678                method     = v_hidden,
2679                mimetype   = "application/mathml+xml",
2680            }
2681        end
2682
2683        result = concat {
2684            wholepreamble(true),
2685            x_styles, -- adds to files
2686            result,
2687        }
2688
2689        cssfiles = table.unique(cssfiles)
2690
2691        -- we're now ready for saving the result in the xml file
2692
2693        report_export("saving xml data in %a",xmlfilename)
2694        io.savedata(xmlfilename,result)
2695
2696        report_export("saving css image definitions in %a",imagefilename)
2697        io.savedata(imagefilename,wrapups.allusedimages(usedname))
2698
2699        report_export("saving css style definitions in %a",stylefilename)
2700        io.savedata(stylefilename,wrapups.allusedstyles(usedname))
2701
2702        report_export("saving css template in %a",templatefilename)
2703        io.savedata(templatefilename,allusedelements(usedname))
2704
2705        -- additionally we save an xhtml file; for that we load the file as xml tree
2706
2707        report_export("saving xhtml variant in %a",xhtmlfilename)
2708
2709        local xmltree = cleanxhtmltree(xml.convert(result))
2710
2711     -- local xmltree = xml.convert(result)
2712     -- for c in xml.collected(xmltree,"m:mtext[lastindex()=1]/m:mrow") do
2713     --     print(c)
2714     -- end
2715     -- for c in xml.collected(xmltree,"mtext/mrow") do
2716     --     print(c)
2717     -- end
2718     -- local xmltree = cleanxhtmltree(xmltree)
2719
2720        xml.save(xmltree,xhtmlfilename)
2721
2722        -- now we save a specification file that can b eused for generating an epub file
2723
2724        -- looking at identity is somewhat redundant as we also inherit from interaction
2725        -- at the tex end
2726
2727        local identity  = interactions.general.getidentity()
2728        local metadata  = structures.tags.getmetadata()
2729
2730        local specification = {
2731            name       = usedname,
2732            identifier = os.uuid(),
2733            images     = wrapups.uniqueusedimages(),
2734            imagefile  = joinfile("styles",imagefilebase),
2735            imagepath  = "images",
2736            stylepath  = "styles",
2737            xmlfiles   = { xmlfilebase },
2738            xhtmlfiles = { xhtmlfilebase },
2739            htmlfiles  = { htmlfilebase },
2740            styles     = cssfiles,
2741            htmlroot   = htmlfilebase,
2742            language   = languagenames[texgetcount("mainlanguagenumber")],
2743            title      = validstring(finetuning.title) or validstring(identity.title),
2744            subtitle   = validstring(finetuning.subtitle) or validstring(identity.subtitle),
2745            author     = validstring(finetuning.author) or validstring(identity.author),
2746            firstpage  = validstring(finetuning.firstpage),
2747            lastpage   = validstring(finetuning.lastpage),
2748            metadata   = metadata,
2749        }
2750
2751        report_export("saving specification in %a",specificationfilename,specificationfilename)
2752
2753        xml.wipe(xmltree,"metadata") -- maybe optional
2754
2755        io.savedata(specificationfilename,table.serialize(specification,true))
2756
2757        -- the html export for epub is different in the sense that it uses div's instead of
2758        -- specific tags
2759
2760        report_export("saving div based alternative in %a",htmlfilename)
2761
2762        remap(specification,xmltree)
2763
2764        -- believe it or not, but a <title/> can prevent viewing in browsers
2765
2766        local title = specification.title
2767
2768        if not title or title == "" then
2769            title = metadata.title
2770            if not title or title == "" then
2771                title = usedname -- was: "no title"
2772            end
2773        end
2774
2775        local variables = {
2776            style    = h_styles,
2777            body     = xml.tostring(xml.first(xmltree,"/div")),
2778            preamble = wholepreamble(false),
2779            title    = title,
2780        }
2781
2782        io.savedata(htmlfilename,replacetemplate(htmltemplate,variables,"xml"))
2783
2784        -- finally we report how an epub file can be made (using the specification)
2785
2786        report_export("")
2787        report_export('create epub with: mtxrun --script epub --make "%s" [--purge --rename --svgmath]',usedname)
2788        report_export("")
2789
2790        stoptiming(treehash)
2791    end
2792
2793    local enableaction = nodes.tasks.enableaction
2794
2795    function structurestags.initializeexport()
2796        if not exporting then
2797            report_export("enabling export to xml")
2798            enableaction("shipouts","nodes.handlers.export")
2799         -- enableaction("shipouts","nodes.handlers.accessibility")
2800            enableaction("math",    "noads.handlers.tags")
2801            enableaction("everypar","nodes.handlers.checkparcounter")
2802            luatex.registerstopactions(structurestags.finishexport)
2803            exporting = true
2804        end
2805    end
2806
2807    function structurestags.setupexport(t)
2808        merge(finetuning,t)
2809        keephyphens      = finetuning.hyphen == v_yes
2810        exportproperties = finetuning.properties
2811        if exportproperties == v_no then
2812            exportproperties = false
2813        end
2814    end
2815
2816    statistics.register("xml exporting time", function()
2817        if exporting then
2818            return string.format("%s seconds, version %s", statistics.elapsedtime(treehash),exportversion)
2819        end
2820    end)
2821
2822end
2823
2824-- These are called at the tex end:
2825
2826implement {
2827    name      = "setupexport",
2828    actions   = structurestags.setupexport,
2829    arguments = {
2830        {
2831            { "align" },
2832            { "bodyfont", "dimen" },
2833            { "width", "dimen" },
2834            { "properties" },
2835            { "hyphen" },
2836            { "title" },
2837            { "subtitle" },
2838            { "author" },
2839            { "firstpage" },
2840            { "lastpage" },
2841            { "svgstyle" },
2842            { "cssfile" },
2843            { "file" },
2844            { "option" },
2845            { "export" },
2846        }
2847    }
2848}
2849
2850implement {
2851    name      = "finishexport",
2852    actions   = structurestags.finishexport,
2853}
2854
2855implement {
2856    name      = "initializeexport",
2857    actions   = structurestags.initializeexport,
2858}
2859