back-exp.lmt /size: 101 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['back-exp'] = {
2    version   = 1.001,
3    comment   = "companion to back-exp.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Todo: share properties more with tagged pdf (or thge reverse)
10
11-- Because we run into the 200 local limit we quite some do .. end wrappers .. not always
12-- that nice but it has to be.
13
14-- Experiments demonstrated that mapping to <div> and classes is messy because we have to
15-- package attributes (some 30) into one set of (space seperatated but prefixed classes)
16-- which only makes things worse .. so if you want something else, use xslt to get there.
17
18-- language       -> only mainlanguage, local languages should happen through start/stoplanguage
19-- tocs/registers -> maybe add a stripper (i.e. just don't flush entries in final tree)
20-- footnotes      -> css 3
21-- bodyfont       -> in styles.css
22
23-- Because we need to look ahead we now always build a tree (this was optional in
24-- the beginning). The extra overhead in the frontend is neglectable.
25--
26-- We can optimize the code ... currently the overhead is some 10% for xml + html so
27-- there is no hurry.
28
29-- todo: move critital formatters out of functions
30-- todo: delay loading (apart from basic tag stuff)
31
32-- problem : too many local variables
33
34-- check setting __i__
35
36local next, type, tonumber = next, type, tonumber
37local sub, gsub, match = string.sub, string.gsub, string.match
38local validstring = string.valid
39local lpegmatch = lpeg.match
40local utfchar, utfvalues, utflen = utf.char, utf.values, utf.len
41local concat, merge, sort, setmetatableindex = table.concat, table.merge, table.sort, table.setmetatableindex
42local sortedhash, sortedkeys = table.sortedhash, table.sortedkeys
43local formatters = string.formatters
44local todimen = number.todimen
45local replacetemplate = utilities.templates.replace
46local settings_to_array = utilities.parsers.settings_to_array
47
48local addsuffix, joinfile, nameonly, basename, filesuffix = file.addsuffix, file.join, file.nameonly, file.basename, file.suffix
49
50local trace_export  = false  trackers.register  ("export.trace",         function(v) trace_export  = v end)
51local trace_spacing = false  trackers.register  ("export.trace.spacing", function(v) trace_spacing = v end)
52local trace_details = false  trackers.register  ("export.trace.details", function(v) trace_details = v end)
53
54local less_state    = false  directives.register("export.lessstate",     function(v) less_state    = v end)
55local show_comment  = true   directives.register("export.comment",       function(v) show_comment  = v end)
56
57-- maybe we will also support these:
58--
59-- local css_hyphens       = false  directives.register("export.css.hyphens",      function(v) css_hyphens      = v end)
60-- local css_textalign     = false  directives.register("export.css.textalign",    function(v) css_textalign    = v end)
61-- local css_bodyfontsize  = false  directives.register("export.css.bodyfontsize", function(v) css_bodyfontsize = v end)
62-- local css_textwidth     = false  directives.register("export.css.textwidth",    function(v) css_textwidth    = v end)
63
64local report_export     = logs.reporter("backend","export")
65
66local nodes             = nodes
67local attributes        = attributes
68
69local variables         = interfaces.variables
70local v_yes             = variables.yes
71local v_no              = variables.no
72local v_xml             = variables.xml
73local v_hidden          = variables.hidden
74
75local implement         = interfaces.implement
76
77local tasks             = nodes.tasks
78local fontchar          = fonts.hashes.characters
79local fontquads         = fonts.hashes.quads
80local languagenames     = languages.numbers
81
82local texgetcount       = tex.getcount
83
84local references        = structures.references
85local structurestags    = structures.tags
86local taglist           = structurestags.taglist
87local specifications    = structurestags.specifications
88local properties        = structurestags.properties
89local locatedtag        = structurestags.locatedtag
90
91structurestags.usewithcare = { }
92
93local starttiming       = statistics.starttiming
94local stoptiming        = statistics.stoptiming
95
96local characterdata     = characters.data
97local overloads         = fonts.mappings.overloads
98
99-- todo: more locals (and optimize)
100
101local exportversion     <const> = "0.35"
102local mathmlns          <const> = "http://www.w3.org/1998/Math/MathML"
103local contextns         <const> = "http://www.contextgarden.net/context/export" -- whatever suits
104local cssnamespaceurl   <const> = "@namespace context url('%namespace%') ;"
105local cssnamespace      <const> = "context|"
106----- cssnamespacenop   <const> = "/* no namespace */"
107
108local usecssnamespace   = false
109
110local nofcurrentcontent = 0 -- so we don't free (less garbage collection)
111local currentcontent    = { }
112local currentnesting    = nil
113local currentattribute  = nil
114local last              = nil
115local currentparagraph  = nil
116
117local noftextblocks     = 0
118
119----- hyphencode        = 0xAD
120local hyphen            = utfchar(0xAD) -- todo: also emdash etc
121local tagsplitter       = structurestags.patterns.splitter
122----- colonsplitter     = lpeg.splitat(":")
123----- dashsplitter      = lpeg.splitat("-")
124local threshold         = 65536
125local indexing          = false
126local keephyphens       = false
127local exportproperties  = false
128
129local finetuning        = { }
130
131local treestack         = { }
132local nesting           = { }
133local currentdepth      = 0
134
135local wrapups           = { }
136
137local tree              = { data = { }, fulltag == "root" } -- root
138local treehash          = { }
139local extras            = { }
140local checks            = { }
141local fixes             = { }
142local finalizers        = { }
143local nofbreaks         = 0
144local used              = { }
145local exporting         = false
146local restart           = false
147local specialspaces     = { [0x20] = " "  }               -- for conversion
148local somespace         = { [0x20] = true, [" "] = true } -- for testing
149local entities          = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;" }
150local attribentities    = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;", ['"'] = "quot;" }
151
152local p_entity          = lpeg.replacer(entities) -- was: entityremapper = utf.remapper(entities)
153local p_attribute       = lpeg.replacer(attribentities)
154local p_escaped         = lpeg.patterns.xml.escaped
155
156local f_tagid           = formatters["%s-%04i"]
157
158-- local alignmapping = {
159--     flushright = "right",
160--     middle     = "center",
161--     flushleft  = "left",
162-- }
163
164local defaultnature = "mixed" -- "inline"
165
166setmetatableindex(used, function(t,k)
167    if k then
168        local v = { }
169        t[k] = v
170        return v
171    end
172end)
173
174local f_entity    = formatters["&#x%X;"]
175local f_attribute = formatters[" %s=%q"]
176local f_property  = formatters[" %s%s=%q"]
177
178setmetatableindex(specialspaces, function(t,k)
179    local v = utfchar(k)
180    t[k] = v
181    entities[v] = f_entity(k)
182    somespace[k] = true
183    somespace[v] = true
184    return v
185end)
186
187
188local namespaced = {
189    -- filled on
190}
191
192local namespaces = {
193    msubsup      = "m",
194    msub         = "m",
195    msup         = "m",
196    mn           = "m",
197    mi           = "m",
198    ms           = "m",
199    mo           = "m",
200    mtext        = "m",
201    mrow         = "m",
202    mfrac        = "m",
203    mroot        = "m",
204    msqrt        = "m",
205    munderover   = "m",
206    munder       = "m",
207    mover        = "m",
208    merror       = "m",
209    math         = "m",
210    mrow         = "m",
211    mtable       = "m",
212    mtr          = "m",
213    mtd          = "m",
214    mfenced      = "m",
215    maction      = "m",
216    mspace       = "m",
217    -- only when testing
218    mstacker     = "m",
219    mstackertop  = "m",
220    mstackermid  = "m",
221    mstackerbot  = "m",
222}
223
224setmetatableindex(namespaced, function(t,k)
225    if k then
226        local namespace = namespaces[k]
227        local v = namespace and namespace .. ":" .. k or k
228        t[k] = v
229        return v
230    end
231end)
232
233local function attribute(key,value)
234    if value and value ~= "" then
235        return f_attribute(key,lpegmatch(p_attribute,value))
236    else
237        return ""
238    end
239end
240
241local function setattribute(di,key,value,escaped)
242    if value and value ~= "" then
243        local a = di.attributes
244        if escaped then
245            value = lpegmatch(p_escaped,value)
246        end
247        if not a then
248            di.attributes = { [key] = value }
249        else
250            a[key] = value
251        end
252    end
253end
254
255local listdata = { } -- this has to be done otherwise: each element can just point back to ...
256
257function wrapups.hashlistdata()
258    local c = structures.lists.collected
259    for i=1,#c do
260        local ci = c[i]
261        local tag = ci.references.tag
262        if tag then
263            local m = ci.metadata
264            local t = m.kind .. ">" .. tag -- todo: use internal (see strc-lst.lua where it's set)
265            listdata[t] = ci
266        end
267    end
268end
269
270function structurestags.setattributehash(attr,key,value) -- public hash
271    local specification = taglist[attr]
272    if specification then
273        specification[key] = value
274    else
275        -- some kind of error
276    end
277end
278
279local usedstyles      = { }
280local usedimages      = { }
281local referencehash   = { } -- move ?
282local destinationhash = { } -- move ?
283
284structurestags.backend = {
285    setattribute    = setattribute,
286    extras          = extras,
287    checks          = checks,
288    fixes           = fixes,
289    listdata        = listdata,
290    finalizers      = finalizers,
291    usedstyles      = usedstyles,
292    usedimages      = usedimages,
293    referencehash   = referencehash,
294    destinationhash = destinationhash,
295}
296
297local namespacetemplate <const> = [[
298/* %what% for file %filename% */
299
300%cssnamespaceurl%
301]]
302
303do
304
305    -- experiment: styles and images
306    --
307    -- officially we should convert to bp but we round anyway
308
309    -- /* padding      : ; */
310    -- /* text-justify : inter-word ; */
311    -- /* text-align : justify ; */
312
313local documenttemplate <const> = [[
314document,
315%namespace%div.document {
316    font-size  : %size% !important ;
317    max-width  : %width% !important ;
318    text-align : %align% !important ;
319    hyphens    : %hyphens% !important ;
320}]]
321
322local styletemplate <const> = [[
323%element%[detail="%detail%"],
324%namespace%div.%element%.%detail% {
325    display      : inline ;
326    font-style   : %style% ;
327    font-variant : %variant% ;
328    font-weight  : %weight% ;
329    font-family  : %family% ;
330    color        : %color% ;
331}]]
332
333    local numbertoallign = {
334        [0] = "justify", ["0"] = "justify", [variables.normal    ] = "justify",
335              "right",   ["1"] = "right",   [variables.flushright] = "right",
336              "center",  ["2"] = "center",  [variables.middle    ] = "center",
337              "left",    ["3"] = "left",    [variables.flushleft ] = "left",
338    }
339
340    function wrapups.allusedstyles(filename)
341        local result = { replacetemplate(namespacetemplate, {
342            what            = "styles",
343            filename        = filename,
344            namespace       = contextns,
345         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or cssnamespacenop,
346            cssnamespaceurl = cssnamespaceurl,
347        },false,true) }
348        --
349        local bodyfont = finetuning.bodyfont
350        local width    = finetuning.width
351        local hyphen   = finetuning.hyphen
352        local align    = finetuning.align
353        --
354        if type(bodyfont) == "number" then
355            bodyfont = todimen(bodyfont)
356        else
357            bodyfont = "12pt"
358        end
359        if type(width) == "number" then
360            width = todimen(width) or "50em"
361        else
362            width = "50em"
363        end
364        if hyphen == v_yes then
365            hyphen = "manual"
366        else
367            hyphen = "inherited"
368        end
369        if align then
370            align = numbertoallign[align]
371        end
372        if not align then
373            align = hyphen and "justify" or "inherited"
374        end
375        --
376        result[#result+1] = replacetemplate(documenttemplate,{
377            size    = bodyfont,
378            width   = width,
379            align   = align,
380            hyphens = hyphen
381        })
382        --
383        local colorspecification = xml.css.colorspecification
384        local fontspecification  = xml.css.fontspecification
385        for element, details in sortedhash(usedstyles) do
386            for detail, data in sortedhash(details) do
387                local s = fontspecification(data.style)
388                local c = colorspecification(data.color)
389                detail = gsub(detail,"[^A-Za-z0-9]+","-")
390                result[#result+1] = replacetemplate(styletemplate,{
391                    namespace = usecssnamespace and cssnamespace or "",
392                    element   = element,
393                    detail    = detail,
394                    style     = s.style   or "inherit",
395                    variant   = s.variant or "inherit",
396                    weight    = s.weight  or "inherit",
397                    family    = s.family  or "inherit",
398                    color     = c         or "inherit",
399                    display   = s.display and "block" or nil,
400                })
401            end
402        end
403        return concat(result,"\n\n")
404    end
405
406end
407
408do
409
410local imagetemplate <const> = [[
411%element%[id="%id%"], %namespace%div.%element%[id="%id%"] {
412    display           : block ;
413    background-image  : url('%url%') ;
414    background-size   : 100%% auto ;
415    background-repeat : no-repeat ;
416    width             : %width% ;
417    height            : %height% ;
418}]]
419
420    local f_svgname = formatters["%s.svg"]
421    local f_svgpage = formatters["%s-page-%s.svg"]
422    local collected = { }
423
424    local function usedname(name,page)
425        if filesuffix(name) == "pdf" then
426            -- temp hack .. we will have a remapper
427            if page and page > 1 then
428                name = f_svgpage(nameonly(name),page)
429            else
430                name = f_svgname(nameonly(name))
431            end
432        end
433        local scheme = url.hasscheme(name)
434        if not scheme or scheme == "file" then
435            -- or can we just use the name ?
436            return joinfile("../images",basename(url.filename(name)))
437        else
438            return name
439        end
440    end
441
442    function wrapups.allusedimages(filename)
443        local result = { replacetemplate(namespacetemplate, {
444            what            = "images",
445            filename        = filename,
446            namespace       = contextns,
447         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
448            cssnamespaceurl = cssnamespaceurl,
449        },false,true) }
450        for element, details in sortedhash(usedimages) do
451            for detail, data in sortedhash(details) do
452                local name = data.name
453                local page = tonumber(data.page) or 1
454                local spec = {
455                    element   = element,
456                    id        = data.id,
457                    name      = name,
458                    page      = page,
459                    url       = usedname(name,page),
460                    width     = data.width,
461                    height    = data.height,
462                    used      = data.used,
463                    namespace = usecssnamespace and cssnamespace or "",
464                }
465                result[#result+1] = replacetemplate(imagetemplate,spec)
466                collected[detail] = spec
467            end
468        end
469        return concat(result,"\n\n")
470    end
471
472    function wrapups.uniqueusedimages() -- todo: combine these two
473        return collected
474    end
475
476end
477
478--
479
480properties.vspace = { export = "break",     nature = "display" }
481----------------- = { export = "pagebreak", nature = "display" }
482
483local function makebreaklist(list)
484    nofbreaks = nofbreaks + 1
485    local t = { }
486    local l = list and list.taglist
487    if l then
488        for i=1,#list do
489            t[i] = l[i]
490        end
491    end
492    t[#t+1] = "break>" .. nofbreaks -- maybe no number or 0
493    return { taglist = t }
494end
495
496local breakattributes = {
497    type = "collapse"
498}
499
500local function makebreaknode(attributes) -- maybe no fulltag
501    nofbreaks = nofbreaks + 1
502    return {
503        tg         = "break",
504        fulltag    = "break>" .. nofbreaks,
505        n          = nofbreaks,
506        element    = "break",
507        nature     = "display",
508        attributes = attributes or nil,
509     -- data       = { }, -- not needed
510     -- attribute  = 0, -- not needed
511     -- parnumber  = 0,
512    }
513end
514
515do
516
517    local fields = { "title", "subtitle", "author", "keywords", "url", "version" }
518
519    local ignoredelements = false
520
521    local function checkdocument(root)
522        local data = root.data
523        if data then
524            for i=1,#data do
525                local di = data[i]
526                local tg = di.tg
527                if tg == "noexport" then
528                    local s = specifications[di.fulltag]
529                    local u = s and s.userdata
530                    if u then
531                        local comment = u.comment
532                        if comment then
533                            di.element = "comment"
534                            di.data = { { content = comment } }
535                            u.comment = nil
536                        else
537                            data[i] = false
538                        end
539                    else
540                        data[i] = false
541                    end
542                elseif di.content then
543                    -- okay
544                elseif tg == "ignore" then
545                    di.element = ""
546                    checkdocument(di)
547                elseif ignoredelements and ignoredelements[tg] then
548                    di.element = ""
549                    checkdocument(di)
550                else
551                    checkdocument(di) -- new, else no noexport handling
552                end
553            end
554        end
555    end
556
557    function extras.document(di,element,n,fulltag)
558        setattribute(di,"language",languagenames[texgetcount("mainlanguagenumber")])
559        if not less_state then
560            setattribute(di,"file",tex.jobname)
561            setattribute(di,"date",os.fulltime())
562            setattribute(di,"context",environment.version)
563            setattribute(di,"version",exportversion)
564            setattribute(di,"xmlns:m",mathmlns)
565            local identity = interactions.general.getidentity()
566            for i=1,#fields do
567                local key   = fields[i]
568                local value = identity[key]
569                if value and value ~= "" then
570                    setattribute(di,key,value)
571                end
572            end
573        end
574        checkdocument(di)
575    end
576
577    implement {
578        name      = "ignoretagsinexport",
579        arguments = "string",
580        actions   = function(list)
581            for tag in string.gmatch(list,"[a-z]+") do
582                if ignoredelements then
583                    ignoredelements[tag] = true
584                else
585                    ignoredelements = { [tag] = true }
586                end
587            end
588        end,
589    }
590
591end
592
593-- flusher
594
595do
596
597    local f_detail                     = formatters[' detail="%s"']
598    local f_chain                      = formatters[' chain="%s"']
599    local f_index                      = formatters[' n="%s"']
600    local f_spacing                    = formatters['<c p="%s">%s</c>']
601
602    local f_empty_inline               = formatters["<%s/>"]
603    local f_empty_mixed                = formatters["%w<%s/>\n"]
604    local f_empty_display              = formatters["\n%w<%s/>\n"]
605    local f_empty_inline_attr          = formatters["<%s%s/>"]
606    local f_empty_mixed_attr           = formatters["%w<%s%s/>"]
607    local f_empty_display_attr         = formatters["\n%w<%s%s/>\n"]
608
609    local f_begin_inline               = formatters["<%s>"]
610    local f_begin_mixed                = formatters["%w<%s>"]
611    local f_begin_display              = formatters["\n%w<%s>\n"]
612    local f_begin_inline_attr          = formatters["<%s%s>"]
613    local f_begin_mixed_attr           = formatters["%w<%s%s>"]
614    local f_begin_display_attr         = formatters["\n%w<%s%s>\n"]
615
616    local f_end_inline                 = formatters["</%s>"]
617    local f_end_mixed                  = formatters["</%s>\n"]
618    local f_end_display                = formatters["%w</%s>\n"]
619
620    local f_begin_inline_comment       = formatters["<!-- %s --><%s>"]
621    local f_begin_mixed_comment        = formatters["%w<!-- %s --><%s>"]
622    local f_begin_display_comment      = formatters["\n%w<!-- %s -->\n%w<%s>\n"]
623    local f_begin_inline_attr_comment  = formatters["<!-- %s --><%s%s>"]
624    local f_begin_mixed_attr_comment   = formatters["%w<!-- %s --><%s%s>"]
625    local f_begin_display_attr_comment = formatters["\n%w<!-- %s -->\n%w<%s%s>\n"]
626
627    local f_comment_begin_inline       = formatters["<!-- begin %s -->"]
628    local f_comment_begin_mixed        = formatters["%w<!-- begin %s -->"]
629    local f_comment_begin_display      = formatters["\n%w<!-- begin %s -->\n"]
630
631    local f_comment_end_inline         = formatters["<!-- end %s -->"]
632    local f_comment_end_mixed          = formatters["<!-- end %s -->\n"]
633    local f_comment_end_display        = formatters["%w<!-- end %s -->\n"]
634
635    local f_metadata_begin             = formatters["\n%w<metadata>\n"]
636    local f_metadata                   = formatters["%w<metavariable name=%q>%s</metavariable>\n"]
637    local f_metadata_end               = formatters["%w</metadata>\n"]
638
639    local function attributes(a)
640        local r = { }
641        local n = 0
642        for k, v in next, a do
643            n = n + 1
644            r[n] = f_attribute(k,tostring(v)) -- tostring because of %q
645        end
646        sort(r)
647        return concat(r,"")
648    end
649
650    local function properties(a)
651        local r = { }
652        local n = 0
653        for k, v in next, a do
654            n = n + 1
655            r[n] = f_property(exportproperties,k,tostring(v)) -- tostring because of %q
656        end
657        sort(r)
658        return concat(r,"")
659    end
660
661    local depth  = 0
662    local inline = 0
663
664    local function emptytag(result,element,nature,di) -- currently only break but at some point
665        local a = di.attributes                       -- we might add detail etc
666        if a then -- happens seldom
667            if nature == "display" then
668                result[#result+1] = f_empty_display_attr(depth,namespaced[element],attributes(a))
669            elseif nature == "mixed" then
670                result[#result+1] = f_empty_mixed_attr(depth,namespaced[element],attributes(a))
671            else
672                result[#result+1] = f_empty_inline_attr(namespaced[element],attributes(a))
673            end
674        else
675            if nature == "display" then
676                result[#result+1] = f_empty_display(depth,namespaced[element])
677            elseif nature == "mixed" then
678                result[#result+1] = f_empty_mixed(depth,namespaced[element])
679            else
680                result[#result+1] = f_empty_inline(namespaced[element])
681            end
682        end
683    end
684
685 -- local function stripspaces(di)
686 --     local d = di.data
687 --     local n = #d
688 --     local m = 0
689 --     for i=1,n do
690 --         local di = d[i]
691 --         if di.tg then
692 --             m = m + 1
693 --             d[m] = di
694 --         end
695 --     end
696 --     for i=n,m+1,-1 do
697 --         d[i] = nil
698 --     end
699 -- end
700 --
701 -- -- simpler:
702
703    local function stripspaces(di)
704        local d = di.data
705        for i=1,#d do
706            local di = d[i]
707            if not di.tg then
708                di.content = ""
709            end
710        end
711    end
712
713    local function begintag(result,element,nature,di,skip)
714        local index         = di.n
715        local fulltag       = di.fulltag
716        local specification = specifications[fulltag] or { } -- we can have a dummy
717        local comment       = di.comment
718        local detail        = specification.detail
719        if skip == "comment" then
720            if show_comment then
721                if nature == "inline" or inline > 0 then
722                    result[#result+1] = f_comment_begin_inline(namespaced[element])
723                    inline = inline + 1
724                elseif nature == "mixed" then
725                    result[#result+1] = f_comment_begin_mixed(depth,namespaced[element])
726                    depth = depth + 1
727                    inline = 1
728                else
729                    result[#result+1] = f_comment_begin_display(depth,namespaced[element])
730                    depth = depth + 1
731                end
732            end
733        elseif skip then
734            -- ignore
735        else
736
737            local n = 0
738            local r = { } -- delay this
739            if detail then
740                detail = gsub(detail,"[^A-Za-z0-9]+","-")
741                specification.detail = detail -- we use it later in for the div
742                n = n + 1
743                r[n] = f_detail(detail)
744            end
745            local parents = specification.parents
746            if parents then
747                parents = gsub(parents,"[^A-Za-z0-9 ]+","-")
748                specification.parents = parents -- we use it later in for the div
749                n = n + 1
750                r[n] = f_chain(parents)
751            end
752            if indexing and index then
753                n = n + 1
754                r[n] = f_index(index)
755            end
756            --
757            local extra = extras[element]
758            if extra then
759                extra(di,element,index,fulltag)
760            end
761            --
762            if di.record then
763                stripspaces(di)
764            end
765            --
766            if exportproperties then
767                local p = specification.userdata
768                if not p then
769                    -- skip
770                elseif exportproperties == v_yes then
771                    n = n + 1
772                    r[n] = attributes(p)
773                else
774                    n = n + 1
775                    r[n] = properties(p)
776                end
777            end
778            local a = di.attributes
779            if a then
780                if trace_spacing then
781                    a.p = di.parnumber or 0
782                end
783                n = n + 1
784                r[n] = attributes(a)
785            elseif trace_spacing then
786                n = n + 1
787                r[n] = attributes { p = di.parnumber or 0 }
788            end
789            if n == 0 then
790                if nature == "inline" or inline > 0 then
791                    if show_comment and comment then
792                        result[#result+1] = f_begin_inline_comment(comment,namespaced[element])
793                    else
794                        result[#result+1] = f_begin_inline(namespaced[element])
795                    end
796                    inline = inline + 1
797                elseif nature == "mixed" then
798                    if show_comment and comment then
799                        result[#result+1] = f_begin_mixed_comment(depth,comment,namespaced[element])
800                    else
801                        result[#result+1] = f_begin_mixed(depth,namespaced[element])
802                    end
803                    depth = depth + 1
804                    inline = 1
805                else
806                    if show_comment and comment then
807                        result[#result+1] = f_begin_display_comment(depth,comment,depth,namespaced[element])
808                    else
809                        result[#result+1] = f_begin_display(depth,namespaced[element])
810                    end
811                    depth = depth + 1
812                end
813            else
814                r = concat(r,"",1,n)
815                if nature == "inline" or inline > 0 then
816                    if show_comment and comment then
817                        result[#result+1] = f_begin_inline_attr_comment(comment,namespaced[element],r)
818                    else
819                        result[#result+1] = f_begin_inline_attr(namespaced[element],r)
820                    end
821                    inline = inline + 1
822                elseif nature == "mixed" then
823                    if show_comment and comment then
824                        result[#result+1] = f_begin_mixed_attr_comment(depth,comment,namespaced[element],r)
825                    else
826                        result[#result+1] = f_begin_mixed_attr(depth,namespaced[element],r)
827                    end
828                    depth = depth + 1
829                    inline = 1
830                else
831                    if show_comment and comment then
832                        result[#result+1] = f_begin_display_attr_comment(depth,comment,depth,namespaced[element],r)
833                    else
834                        result[#result+1] = f_begin_display_attr(depth,namespaced[element],r)
835                    end
836                    depth = depth + 1
837                end
838            end
839        end
840        used[element][detail or ""] = { nature, specification.parents }  -- for template css
841        -- also in last else ?
842        local metadata = specification.metadata
843        if metadata and next(metadata) then
844            result[#result+1] = f_metadata_begin(depth)
845            for k, v in sortedhash(metadata) do
846                if v ~= "" then
847                    result[#result+1] = f_metadata(depth+1,k,lpegmatch(p_entity,v))
848                end
849            end
850            result[#result+1] = f_metadata_end(depth)
851        end
852    end
853
854    local function endtag(result,element,nature,di,skip)
855        if skip == "comment" then
856            if show_comment then
857                if nature == "display" and (inline == 0 or inline == 1) then
858                    depth = depth - 1
859                    result[#result+1] = f_comment_end_display(depth,namespaced[element])
860                    inline = 0
861                elseif nature == "mixed" and (inline == 0 or inline == 1) then
862                    depth = depth - 1
863                    result[#result+1] = f_comment_end_mixed(namespaced[element])
864                    inline = 0
865                else
866                    inline = inline - 1
867                    result[#result+1] = f_comment_end_inline(namespaced[element])
868                end
869            end
870        elseif skip then
871            -- ignore
872        else
873            if nature == "display" and (inline == 0 or inline == 1) then
874                depth = depth - 1
875                result[#result+1] = f_end_display(depth,namespaced[element])
876                inline = 0
877            elseif nature == "mixed" and (inline == 0 or inline == 1) then
878                depth = depth - 1
879                result[#result+1] = f_end_mixed(namespaced[element])
880                inline = 0
881            else
882                inline = inline - 1
883                result[#result+1] = f_end_inline(namespaced[element])
884            end
885        end
886    end
887
888    local function flushtree(result,data,nature)
889        local nofdata = #data
890        for i=1,nofdata do
891            local di = data[i]
892            if not di then -- hm, di can be string
893                -- whatever
894            else
895                local content = di.content
896             -- also optimize for content == "" : trace that first
897                if content then
898                    -- already has breaks
899                    local content = lpegmatch(p_entity,content)
900                    if i == nofdata and sub(content,-1) == "\n" then -- move check
901                        -- can be an end of line in par but can also be the last line
902                        if trace_spacing then
903                            result[#result+1] = f_spacing(di.parnumber or 0,sub(content,1,-2))
904                        else
905                            result[#result+1] = sub(content,1,-2)
906                        end
907                        result[#result+1] = " "
908                    else
909                        if trace_spacing then
910                            result[#result+1] = f_spacing(di.parnumber or 0,content)
911                        else
912                            result[#result+1] = content
913                        end
914                    end
915                elseif not di.collapsed then -- ignore collapsed data (is appended, reconstructed par)
916                    local element = di.element
917                    if not element then
918                        -- skip
919                    elseif element == "break" then -- or element == "pagebreak" -- todo: use empty flag
920                        emptytag(result,element,nature,di)
921                    elseif element == "mspace" then -- todo: use empty flag
922                        emptytag(result,element,nature,di)
923                    elseif element == "" or di.skip == "ignore" then
924                        -- skip
925                    else
926                        if di.before then
927                            flushtree(result,di.before,nature)
928                        end
929                        local natu = di.nature
930                        local skip = di.skip
931                        if di.breaknode then
932                            emptytag(result,"break","display",di)
933                        end
934                        begintag(result,element,natu,di,skip)
935                        flushtree(result,di.data,natu)
936                        endtag(result,element,natu,di,skip)
937                        if di.after then
938                            flushtree(result,di.after,nature)
939                        end
940                    end
941                else
942--                     local element = di.element
943--                     if element == "mspace" then -- todo: use empty flag
944--                         emptytag(result,element,nature,di)
945--                     end
946                end
947            end
948        end
949    end
950
951    local function breaktree(tree,parent,parentelement) -- also removes double breaks
952        local data = tree.data
953        if data then
954            local nofdata = #data
955            local prevelement
956            local prevnature
957            local prevparnumber
958            local newdata = { }
959            local nofnewdata = 0
960            for i=1,nofdata do
961                local di = data[i]
962                if not di then
963                    -- skip
964                elseif di.skip == "ignore" then
965                    -- skip (new)
966                elseif di.tg == "ignore" then
967                    -- skip (new)
968                elseif di.content then
969                    if di.samepar then
970                        prevparnumber = false
971                    else
972                        local parnumber = di.parnumber
973                        if prevnature == "inline" and prevparnumber and prevparnumber ~= parnumber then
974                            nofnewdata = nofnewdata + 1
975                            if trace_spacing then
976                                newdata[nofnewdata] = makebreaknode { type = "a", p = prevparnumber, n = parnumber }
977                            else
978                                newdata[nofnewdata] = makebreaknode()
979                            end
980                        end
981                        prevelement = nil
982                        prevparnumber = parnumber
983                    end
984                    prevnature = "inline"
985                    nofnewdata = nofnewdata + 1
986                    newdata[nofnewdata] = di
987                elseif not di.collapsed then
988                    local element = di.element
989                    if element == "break" then -- or element == "pagebreak"
990                        if prevelement == "break" then
991                            di.element = ""
992                        end
993                        prevelement = element
994                        prevnature = "display"
995                        nofnewdata = nofnewdata + 1
996                        newdata[nofnewdata] = di
997                    elseif element == "" or di.skip == "ignore" then
998                        -- skip
999                    else
1000                        if di.samepar then
1001                            prevnature    = "inline"
1002                            prevparnumber = false
1003                        else
1004                            local nature = di.nature
1005                            local parnumber = di.parnumber
1006                            if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
1007                                nofnewdata = nofnewdata + 1
1008                                if trace_spacing then
1009                                    newdata[nofnewdata] = makebreaknode { type = "b", p = prevparnumber, n = parnumber }
1010                                else
1011                                    newdata[nofnewdata] = makebreaknode()
1012                                end
1013                            end
1014                            prevnature = nature
1015                            prevparnumber = parnumber
1016                        end
1017                        prevelement = element
1018                        breaktree(di,tree,element)
1019                        nofnewdata = nofnewdata + 1
1020                        newdata[nofnewdata] = di
1021                    end
1022                else
1023                    if di.samepar then
1024                        prevnature    = "inline"
1025                        prevparnumber = false
1026                    else
1027                        local nature = di.nature
1028                        local parnumber = di.parnumber
1029                        if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
1030                            nofnewdata = nofnewdata + 1
1031                            if trace_spacing then
1032                                newdata[nofnewdata] = makebreaknode { type = "c", p = prevparnumber, n = parnumber }
1033                            else
1034                                newdata[nofnewdata] = makebreaknode()
1035                            end
1036                        end
1037                        prevnature = nature
1038                        prevparnumber = parnumber
1039                    end
1040                    nofnewdata = nofnewdata + 1
1041                    newdata[nofnewdata] = di
1042                end
1043            end
1044            tree.data = newdata
1045        end
1046    end
1047
1048    -- also tabulaterow reconstruction .. maybe better as a checker
1049    -- i.e cell attribute
1050
1051    local function showtree(data,when,where)
1052        if data then
1053            for i=1,#data do
1054                local d = data[i]
1055                if type(d) == "table" and d.element then
1056                    print(when,where,i,d.element,d.parnumber or 0)
1057                end
1058            end
1059        end
1060    end
1061
1062    local function collapsetree(tree)
1063     -- showtree(data,"before","collapse")
1064     -- for tag, trees in sortedhash(treehash) do
1065        for tag, trees in next, treehash do
1066            local d = trees[1].data
1067            if d then
1068                local nd = #d
1069                if nd > 0 then
1070                    for i=2,#trees do
1071                        local currenttree = trees[i]
1072                        local currentdata = currenttree.data
1073                        local currentpar  = currenttree.parnumber
1074                        local previouspar = trees[i-1].parnumber
1075                        currenttree.collapsed = true
1076                        -- is the next ok?
1077                        if previouspar == 0 or not (di and di.content) then
1078                            previouspar = nil -- no need anyway so no further testing needed
1079                        end
1080                        for j=1,#currentdata do
1081                            local cd = currentdata[j]
1082                            if not cd or cd == "" then
1083                                -- skip
1084                            elseif cd.skip == "ignore" then
1085                                -- skip
1086                            elseif cd.content then
1087                                if not currentpar then
1088                                    -- add space ?
1089                                elseif not previouspar then
1090                                    -- add space ?
1091                                elseif currentpar ~= previouspar then
1092                                    nd = nd + 1
1093                                    if trace_spacing then
1094                                        d[nd] = makebreaknode { type = "d", p = previouspar, n = currentpar }
1095                                    else
1096                                        d[nd] = makebreaknode()
1097                                    end
1098                                end
1099                                previouspar = currentpar
1100                                nd = nd + 1
1101                                d[nd] = cd
1102                            else
1103                                nd = nd + 1
1104                                d[nd] = cd
1105                            end
1106                            currentdata[j] = false
1107                        end
1108                    end
1109                end
1110            end
1111        end
1112     -- showtree(data,"after","collapse")
1113    end
1114
1115    local function finalizetree(tree)
1116     -- showtree(data,"before","finalize")
1117        for _, finalizer in next, finalizers do
1118            finalizer(tree)
1119        end
1120     -- showtree(data,"after","finalize")
1121    end
1122
1123    local function indextree(tree)
1124        local data = tree.data
1125        if data then
1126         -- showtree(data,"before","index")
1127            local n, new = 0, { }
1128            for i=1,#data do
1129                local d = data[i]
1130                if not d then
1131                    -- skip
1132                elseif d.content then
1133                    n = n + 1
1134                    new[n] = d
1135                elseif not d.collapsed then
1136                    n = n + 1
1137                    d.__i__ = n
1138                    d.__p__ = tree
1139                    indextree(d)
1140                    new[n] = d
1141                end
1142            end
1143            tree.data = new
1144         -- showtree(new,"after","index")
1145        end
1146    end
1147
1148    local function checktree(tree)
1149        local data = tree.data
1150        if data then
1151         -- showtree(data,"before","check")
1152            for i=1,#data do
1153                local d = data[i]
1154                if type(d) == "table" then
1155                    local tg = d.tg
1156                    if tg then
1157                        local check = checks[tg]
1158                        if check then
1159                            check(d,data,i)
1160                        end
1161                    end
1162                    checktree(d) -- so parts can pass twice
1163                end
1164            end
1165         -- showtree(data,"after","check")
1166        end
1167    end
1168
1169    local function fixtree(tree)
1170        local data = tree.data
1171        if data then
1172         -- showtree(data,"before","fix")
1173            for i=1,#data do
1174                local d = data[i]
1175                if type(d) == "table" then
1176                    local tg = d.tg
1177                    if tg then
1178                        local fix = fixes[tg]
1179                        if fix then
1180                            fix(d,data,i)
1181                        end
1182                    end
1183                    fixtree(d) -- so parts can pass twice
1184                end
1185            end
1186         -- showtree(data,"after","fix")
1187        end
1188    end
1189
1190    wrapups.flushtree    = flushtree
1191    wrapups.breaktree    = breaktree
1192    wrapups.collapsetree = collapsetree
1193    wrapups.finalizetree = finalizetree
1194    wrapups.indextree    = indextree
1195    wrapups.checktree    = checktree
1196    wrapups.fixtree      = fixtree
1197
1198end
1199
1200-- collector code
1201
1202local function push(fulltag,depth)
1203    local tg, n, detail, element, nature, record
1204    local specification = specifications[fulltag]
1205    if specification then
1206        tg     = specification.tagname
1207        n      = specification.tagindex
1208        detail = specification.detail
1209    else
1210        -- a break (more efficient if we don't store those in specifications)
1211        tg, n = lpegmatch(tagsplitter,fulltag)
1212        n = tonumber(n) -- to tonumber in tagsplitter
1213    end
1214    local p = properties[tg]
1215    if p then
1216        element = p.export or tg
1217        nature  = p.nature or "inline" -- defaultnature
1218        record  = p.record
1219    end
1220    local treedata = tree.data
1221    local t = { -- maybe we can use the tag table
1222        tg        = tg,
1223        fulltag   = fulltag,
1224        detail    = detail,
1225        n         = n, -- already a number
1226        element   = element,
1227        nature    = nature,
1228        data      = { },
1229        attribute = currentattribute,
1230        parnumber = currentparagraph,
1231        record    = record, -- we can consider storing properties
1232    }
1233    treedata[#treedata+1] = t
1234    currentdepth = currentdepth + 1
1235    nesting[currentdepth] = fulltag
1236    treestack[currentdepth] = tree
1237    if trace_export then
1238        if detail and detail ~= "" then
1239            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q detail=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata,detail)
1240        else
1241            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata)
1242        end
1243    end
1244    tree = t
1245    if tg == "break" then
1246        -- no need for this
1247    else
1248        local h = treehash[fulltag]
1249        if h then
1250            h[#h+1] = t
1251        else
1252            treehash[fulltag] = { t }
1253        end
1254    end
1255end
1256
1257local function pop()
1258    if currentdepth > 0 then
1259        local top = nesting[currentdepth]
1260        tree = treestack[currentdepth]
1261        currentdepth = currentdepth - 1
1262        if trace_export then
1263            if top then
1264                report_export("%w</%s>",currentdepth,match(top,"[^>]+"))
1265            else
1266                report_export("</BAD>")
1267            end
1268        end
1269    else
1270        report_export("%w<!-- too many pops -->",currentdepth)
1271    end
1272end
1273
1274local function continueexport()
1275    if nofcurrentcontent > 0 then
1276        if trace_export then
1277            report_export("%w<!-- injecting pagebreak space -->",currentdepth)
1278        end
1279        nofcurrentcontent = nofcurrentcontent + 1
1280        currentcontent[nofcurrentcontent] = " " -- pagebreak
1281    end
1282end
1283
1284local function pushentry(current)
1285    if not current then
1286        -- bad news
1287        return
1288    end
1289    current = current.taglist
1290    if not current then
1291        -- even worse news
1292        return
1293    end
1294    if restart then
1295        continueexport()
1296        restart = false
1297    end
1298    local newdepth = #current
1299    local olddepth = currentdepth
1300    if trace_export then
1301        report_export("%w<!-- moving from depth %s to %s (%s) -->",currentdepth,olddepth,newdepth,current[newdepth])
1302    end
1303    if olddepth <= 0 then
1304        for i=1,newdepth do
1305            push(current[i],i)
1306        end
1307    else
1308        local difference
1309        if olddepth < newdepth then
1310            for i=1,olddepth do
1311                if current[i] ~= nesting[i] then
1312                    difference = i
1313                    break
1314                end
1315            end
1316        else
1317            for i=1,newdepth do
1318                if current[i] ~= nesting[i] then
1319                    difference = i
1320                    break
1321                end
1322            end
1323        end
1324        if difference then
1325            for i=olddepth,difference,-1 do
1326                pop()
1327            end
1328            for i=difference,newdepth do
1329                push(current[i],i)
1330            end
1331        elseif newdepth > olddepth then
1332            for i=olddepth+1,newdepth do
1333                push(current[i],i)
1334            end
1335        elseif newdepth < olddepth then
1336            for i=olddepth,newdepth,-1 do
1337                pop()
1338            end
1339        elseif trace_export then
1340            report_export("%w<!-- staying at depth %s (%s) -->",currentdepth,newdepth,nesting[newdepth] or "?")
1341        end
1342    end
1343    return olddepth, newdepth
1344end
1345
1346local function pushcontent(oldparagraph,newparagraph)
1347    if nofcurrentcontent > 0 then
1348        if oldparagraph then
1349            if currentcontent[nofcurrentcontent] == "\n" then
1350                if trace_export then
1351                    report_export("%w<!-- removing newline -->",currentdepth)
1352                end
1353                nofcurrentcontent = nofcurrentcontent - 1
1354            end
1355        end
1356        local content = concat(currentcontent,"",1,nofcurrentcontent)
1357        if content == "" then
1358            -- omit; when oldparagraph we could push, remove spaces, pop
1359        elseif somespace[content] and oldparagraph then
1360            -- omit; when oldparagraph we could push, remove spaces, pop
1361        else
1362            local olddepth, newdepth
1363            local list = taglist[currentattribute]
1364            if list then
1365                olddepth, newdepth = pushentry(list)
1366            end
1367            if tree then
1368                local td = tree.data
1369                local nd = #td
1370                td[nd+1] = { parnumber = oldparagraph or currentparagraph, content = content }
1371                if trace_export then
1372                    report_export("%w<!-- start content with length %s -->",currentdepth,utflen(content))
1373                    report_export("%w%s",currentdepth,(gsub(content,"\n","\\n")))
1374                    report_export("%w<!-- stop content -->",currentdepth)
1375                end
1376                if olddepth then
1377                    for i=newdepth-1,olddepth,-1 do
1378                        pop()
1379                    end
1380                end
1381            end
1382        end
1383        nofcurrentcontent = 0
1384    end
1385    if oldparagraph then
1386        pushentry(makebreaklist(currentnesting))
1387        if trace_export then
1388            report_export("%w<!-- break added between paragraph %a and %a -->",currentdepth,oldparagraph,newparagraph)
1389        end
1390    end
1391end
1392
1393local function finishexport()
1394    if trace_export then
1395        report_export("%w<!-- start finalizing -->",currentdepth)
1396    end
1397    if nofcurrentcontent > 0 then
1398        if somespace[currentcontent[nofcurrentcontent]] then
1399            if trace_export then
1400                report_export("%w<!-- removing space -->",currentdepth)
1401            end
1402            nofcurrentcontent = nofcurrentcontent - 1
1403        end
1404        pushcontent()
1405    end
1406    for i=currentdepth,1,-1 do
1407        pop()
1408    end
1409    currentcontent = { } -- we're nice and do a cleanup
1410    if trace_export then
1411        report_export("%w<!-- stop finalizing -->",currentdepth)
1412    end
1413end
1414
1415-- inserts ?
1416
1417local collectresults  do -- too many locals otherwise
1418
1419    local nodecodes          = nodes.nodecodes
1420    local gluecodes          = nodes.gluecodes
1421    local listcodes          = nodes.listcodes
1422    local whatsitcodes       = nodes.whatsitcodes
1423
1424    local subtypes           = nodes.subtypes
1425
1426    local hlist_code         = nodecodes.hlist
1427    local vlist_code         = nodecodes.vlist
1428    local glyph_code         = nodecodes.glyph
1429    local glue_code          = nodecodes.glue
1430    local kern_code          = nodecodes.kern
1431    local disc_code          = nodecodes.disc
1432    local whatsit_code       = nodecodes.whatsit
1433    local par_code           = nodecodes.par
1434
1435    local userskip_code      = gluecodes.userskip
1436    local rightskip_code     = gluecodes.rightskip
1437    local parfillskip_code   = gluecodes.parfillskip
1438    local spaceskip_code     = gluecodes.spaceskip
1439    local xspaceskip_code    = gluecodes.xspaceskip
1440    local intermathskip_code = gluecodes.intermathskip
1441
1442    local linelist_code      = listcodes.line
1443
1444    local userdefinedwhatsit_code  = whatsitcodes.userdefined
1445
1446    local privateattribute = attributes.private
1447    local a_image          = privateattribute('image')
1448    local a_reference      = privateattribute('reference')
1449    local a_destination    = privateattribute('destination')
1450    local a_characters     = privateattribute('characters')
1451    local a_exportstatus   = privateattribute('exportstatus')
1452    local a_tagged         = privateattribute('tagged')
1453    local a_taggedpar      = privateattribute("taggedpar")
1454    local a_textblock      = privateattribute("textblock")
1455
1456    local inline_mark      = nodes.pool.userids["margins.inline"]
1457
1458    local nuts             = nodes.nuts
1459
1460    local getnext          = nuts.getnext
1461    local getdisc          = nuts.getdisc
1462    local getlist          = nuts.getlist
1463    local getid            = nuts.getid
1464    local getattr          = nuts.getattr
1465    local setattr          = nuts.setattr -- maybe use properties
1466    local isglyph          = nuts.isglyph
1467    local getkern          = nuts.getkern
1468    local getwidth         = nuts.getwidth
1469
1470    local startofpar       = nuts.startofpar
1471
1472    local nexthlist        = nuts.traversers.hlist
1473    local nextnode         = nuts.traversers.node
1474
1475    local function addtomaybe(maybewrong,c,case)
1476        if trace_export then
1477            report_export("%w<!-- possible paragraph mixup at %C case %i -->",currentdepth,c,case)
1478        else
1479            local s = formatters["%C"](c)
1480            if maybewrong then
1481                maybewrong[#maybewrong+1] = s
1482            else
1483                maybewrong = { s }
1484            end
1485            return maybewrong
1486        end
1487    end
1488
1489    local function showmaybe(maybewrong)
1490        if not trace_export then
1491            report_export("fuzzy paragraph: % t",maybewrong)
1492        end
1493    end
1494
1495    local function showdetail(n,id,subtype)
1496        local a = getattr(n,a_tagged)
1497        local t = taglist[a]
1498        local c = nodecodes[id]
1499        local s = subtypes[id][subtype]
1500        if a and t then
1501            report_export("node %a, subtype %a, tag %a, element %a, tree '% t'",c,s,a,t.tagname,t.taglist)
1502        else
1503            report_export("node %a, subtype %a, untagged",c,s)
1504        end
1505    end
1506
1507    local function collectresults(head,list,pat,pap) -- is last used (we also have currentattribute)
1508        local p
1509        local paragraph
1510        local maybewrong
1511        local pid
1512        for n, id, subtype in nextnode, head do
1513            if trace_details then
1514                showdetail(n,id,subtype)
1515            end
1516            if id == glyph_code then
1517                local c, f = isglyph(n)
1518                local at   = getattr(n,a_tagged) or pat
1519                if not at then
1520                 -- we need to tag the pagebody stuff as being valid skippable
1521                 --
1522                 -- report_export("skipping character: %C (no attribute)",n.char)
1523                else
1524                    if last ~= at then
1525                        local tl = taglist[at]
1526                        local ap = getattr(n,a_taggedpar) or pap
1527                        if paragraph and (not ap or ap < paragraph) then
1528                            maybewrong = addtomaybe(maybewrong,c,1)
1529                        end
1530                        pushcontent()
1531                        currentnesting   = tl
1532                        currentparagraph = ap
1533                        currentattribute = at
1534                        last = at
1535                        pushentry(currentnesting)
1536                        if trace_export then
1537                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
1538                        end
1539                        -- We need to intercept this here; maybe I will also move this
1540                        -- to a regular setter at the tex end.
1541                        local r = getattr(n,a_reference)
1542                        if r then
1543                            local t = tl.taglist
1544                            referencehash[t[#t]] = r -- fulltag
1545                        end
1546                        local d = getattr(n,a_destination)
1547                        if d then
1548                            local t = tl.taglist
1549                            destinationhash[t[#t]] = d -- fulltag
1550                        end
1551                        --
1552                    elseif last then
1553                        -- we can consider tagging the pars (lines) in the parbuilder but then we loose some
1554                        -- information unless we inject a special node (but even then we can run into nesting
1555                        -- issues)
1556                        local ap = getattr(n,a_taggedpar) or pap
1557                        if ap ~= currentparagraph then
1558                            pushcontent(currentparagraph,ap)
1559                            pushentry(currentnesting)
1560                            currentattribute = last
1561                            currentparagraph = ap
1562                        end
1563                        if paragraph and (not ap or ap < paragraph) then
1564                            maybewrong = addtomaybe(maybewrong,c,2)
1565                        end
1566                        if trace_export then
1567                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,last)
1568                        end
1569                    else
1570                        if trace_export then
1571                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
1572                        end
1573                    end
1574                    local s = getattr(n,a_exportstatus)
1575                    if s then
1576                        c = s
1577                    end
1578                    if c == 0 then
1579                        if trace_export then
1580                            report_export("%w<!-- skipping last glyph -->",currentdepth)
1581                        end
1582                    elseif c == 0x20 then
1583                        local a = getattr(n,a_characters)
1584                        nofcurrentcontent = nofcurrentcontent + 1
1585                        if a then
1586                            if trace_export then
1587                                report_export("%w<!-- turning last space into special space %U -->",currentdepth,a)
1588                            end
1589                            currentcontent[nofcurrentcontent] = specialspaces[a] -- special space
1590                        else
1591                            currentcontent[nofcurrentcontent] = " "
1592                        end
1593                    else
1594                        local fc = fontchar[f]
1595                        if fc then
1596                            fc = fc and fc[c]
1597                            if fc then
1598                                local u = fc.unicode
1599                                if not u then
1600                                    nofcurrentcontent = nofcurrentcontent + 1
1601                                    currentcontent[nofcurrentcontent] = utfchar(c)
1602                                elseif type(u) == "table" then
1603                                    for i=1,#u do
1604                                        nofcurrentcontent = nofcurrentcontent + 1
1605                                        currentcontent[nofcurrentcontent] = utfchar(u[i])
1606                                    end
1607                                else
1608                                    nofcurrentcontent = nofcurrentcontent + 1
1609                                    currentcontent[nofcurrentcontent] = utfchar(u)
1610                                end
1611                            elseif c > 0 then
1612                                nofcurrentcontent = nofcurrentcontent + 1
1613                                currentcontent[nofcurrentcontent] = utfchar(c)
1614                            else
1615                                -- we can have -1 as side effect of an explicit hyphen (unless we expand)
1616                            end
1617                        elseif c > 0 then
1618                            nofcurrentcontent = nofcurrentcontent + 1
1619                            currentcontent[nofcurrentcontent] = utfchar(c)
1620                        else
1621                            -- we can have -1 as side effect of an explicit hyphen (unless we expand)
1622                        end
1623                    end
1624                end
1625            elseif id == glue_code then
1626                -- we need to distinguish between hskips and vskips
1627                local ca = getattr(n,a_characters)
1628                if ca == 0 then
1629                    -- skip this one ... already converted special character (node-acc)
1630                elseif ca then
1631                    local a = getattr(n,a_tagged) or pat
1632                    if a then
1633                        local c = specialspaces[ca]
1634                        if last ~= a then
1635                            local tl = taglist[a]
1636                            if trace_export then
1637                                report_export("%w<!-- processing space glyph %U tagged %a case 1 -->",currentdepth,ca,a)
1638                            end
1639                            pushcontent()
1640                            currentnesting = tl
1641                            currentparagraph = getattr(n,a_taggedpar) or pap
1642                            currentattribute = a
1643                            last = a
1644                            pushentry(currentnesting)
1645                            -- no reference check (see above)
1646                        elseif last then
1647                            local ap = getattr(n,a_taggedpar) or pap
1648                            if ap ~= currentparagraph then
1649                                pushcontent(currentparagraph,ap)
1650                                pushentry(currentnesting)
1651                                currentattribute = last
1652                                currentparagraph = ap
1653                            end
1654                            if trace_export then
1655                                report_export("%w<!-- processing space glyph %U tagged %a case 2 -->",currentdepth,ca,last)
1656                            end
1657                        end
1658                        -- if somespace[currentcontent[nofcurrentcontent]] then
1659                        --     if trace_export then
1660                        --         report_export("%w<!-- removing space -->",currentdepth)
1661                        --     end
1662                        --     nofcurrentcontent = nofcurrentcontent - 1
1663                        -- end
1664                        nofcurrentcontent = nofcurrentcontent + 1
1665                        currentcontent[nofcurrentcontent] = c
1666                    end
1667                elseif subtype == userskip_code then
1668-- local at = getattr(n,a_tagged)
1669-- local tl = taglist[at]
1670-- if tl and structurestags.strip(tl.taglist[#tl.taglist]) == "mspace" then
1671--     if nofcurrentcontent > 0 then
1672--         pushcontent()
1673--         pushentry(currentnesting) -- ??
1674--     end
1675--     -- in the past we'd push a space here ... check mkiv : otherwise no threshold with mspace
1676--     pushentry(tl)
1677--     if trace_export then
1678--         report_export("%w<!-- processing mspace tagged %a",currentdepth,at)
1679--     end
1680--     last = nil
1681--     currentparagraph = nil
1682-- else
1683                    if getwidth(n) > threshold then
1684                        if last and not somespace[currentcontent[nofcurrentcontent]] then
1685                            local a = getattr(n,a_tagged) or pat
1686                            if a == last then
1687                                if trace_export then
1688                                    report_export("%w<!-- injecting spacing 5a -->",currentdepth)
1689                                end
1690                                nofcurrentcontent = nofcurrentcontent + 1
1691                                currentcontent[nofcurrentcontent] = " "
1692                            elseif a then
1693                                -- e.g LOGO<space>LOGO
1694                                if trace_export then
1695                                    report_export("%w<!-- processing glue > threshold tagged %s becomes %s -->",currentdepth,last,a)
1696                                end
1697                                pushcontent()
1698                                if trace_export then
1699                                    report_export("%w<!-- injecting spacing 5b -->",currentdepth)
1700                                end
1701                                last = a
1702                                nofcurrentcontent = nofcurrentcontent + 1
1703                                currentcontent[nofcurrentcontent] = " "
1704                                currentnesting = taglist[last]
1705                                pushentry(currentnesting)
1706                                currentattribute = last
1707                            end
1708                        end
1709                    end
1710-- end
1711                elseif subtype == spaceskip_code or subtype == xspaceskip_code then
1712                    if not somespace[currentcontent[nofcurrentcontent]] then
1713                        local a = getattr(n,a_tagged) or pat
1714                        if a == last then
1715                            if trace_export then
1716                                report_export("%w<!-- injecting spacing 7 (stay in element) -->",currentdepth)
1717                            end
1718                            nofcurrentcontent = nofcurrentcontent + 1
1719                            currentcontent[nofcurrentcontent] = " "
1720                        else
1721                            if trace_export then
1722                                report_export("%w<!-- injecting spacing 7 (end of element) -->",currentdepth)
1723                            end
1724                            last = a
1725                            pushcontent()
1726                            nofcurrentcontent = nofcurrentcontent + 1
1727                            currentcontent[nofcurrentcontent] = " "
1728                            currentnesting = taglist[last]
1729                            pushentry(currentnesting)
1730                            currentattribute = last
1731                        end
1732                    end
1733                elseif subtype == intermathskip_code then
1734                    -- put this as attribute when it differs, maybe more ... check mathml
1735                elseif subtype == rightskip_code then
1736                    -- a line
1737                    if nofcurrentcontent > 0 then
1738                        local r = currentcontent[nofcurrentcontent]
1739                        if r == hyphen then
1740                            if not keephyphens then
1741                                nofcurrentcontent = nofcurrentcontent - 1
1742                            end
1743                        elseif pid == disc_code then
1744                            -- go on .. tricky: we should mark the glyhs as coming from a disc
1745                        elseif not somespace[r] then
1746                            local a = getattr(n,a_tagged) or pat
1747                            if a == last then
1748                                if trace_export then
1749                                    report_export("%w<!-- injecting spacing 1 (end of line, stay in element) -->",currentdepth)
1750                                end
1751                                nofcurrentcontent = nofcurrentcontent + 1
1752                                currentcontent[nofcurrentcontent] = " "
1753                            else
1754                                if trace_export then
1755                                    report_export("%w<!-- injecting spacing 1 (end of line, end of element) -->",currentdepth)
1756                                end
1757                                last = a
1758                                pushcontent()
1759                                nofcurrentcontent = nofcurrentcontent + 1
1760                                currentcontent[nofcurrentcontent] = " "
1761                                currentnesting = taglist[last]
1762                                pushentry(currentnesting)
1763                                currentattribute = last
1764                            end
1765                        end
1766                    end
1767                elseif subtype == parfillskip_code then
1768                    -- deal with paragraph endings (crossings) elsewhere and we quit here
1769                    -- as we don't want the rightskip space addition
1770                    if maybewrong then
1771                        showmaybe(maybewrong)
1772                    end
1773                    return
1774                end
1775            elseif id == hlist_code or id == vlist_code then
1776                local ai = getattr(n,a_image)
1777                if ai then
1778                    local at = getattr(n,a_tagged) or pat
1779                    if nofcurrentcontent > 0 then
1780                        pushcontent()
1781                        pushentry(currentnesting) -- ??
1782                    end
1783                    pushentry(taglist[at]) -- has an index, todo: flag empty element
1784                    if trace_export then
1785                        report_export("%w<!-- processing image tagged %a",currentdepth,last)
1786                    end
1787                    last = nil
1788                    currentparagraph = nil
1789                else
1790                    -- we need to determine an end-of-line
1791                    local list = getlist(n)
1792                    if list then
1793                        -- todo: no par checking needed in math
1794                        local at = getattr(n,a_tagged) or pat
1795                        collectresults(list,n,at)
1796                    end
1797                end
1798            elseif id == kern_code then
1799                local kern = getkern(n)
1800                if kern > 0 then
1801                    local a = getattr(n,a_tagged) or pat
1802                    local t = taglist[a]
1803                    if not t or t.tagname ~= "ignore" then -- maybe earlier on top)
1804                        local limit = threshold
1805                        if p then
1806                            local c, f = isglyph(p)
1807                            if c then
1808                                limit = fontquads[f] / 4
1809                            end
1810                        end
1811                        if kern > limit then
1812                            if last and not somespace[currentcontent[nofcurrentcontent]] then
1813                             -- local a = getattr(n,a_tagged) or pat
1814                                if a == last then
1815                                    if not somespace[currentcontent[nofcurrentcontent]] then
1816                                        if trace_export then
1817                                            report_export("%w<!-- injecting spacing 8 (kern %p) -->",currentdepth,kern)
1818                                        end
1819                                        nofcurrentcontent = nofcurrentcontent + 1
1820                                        currentcontent[nofcurrentcontent] = " "
1821                                    end
1822                                elseif a then
1823                                    -- e.g LOGO<space>LOGO
1824                                    if trace_export then
1825                                        report_export("%w<!-- processing kern, threshold %p, tag %s => %s -->",currentdepth,limit,last,a)
1826                                    end
1827                                    last = a
1828                                    pushcontent()
1829                                    if trace_export then
1830                                        report_export("%w<!-- injecting spacing 9 (kern %p) -->",currentdepth,kern)
1831                                    end
1832                                    nofcurrentcontent = nofcurrentcontent + 1
1833                                    currentcontent[nofcurrentcontent] = " "
1834                                 -- currentnesting = taglist[last]
1835                                    currentnesting = t
1836                                    pushentry(currentnesting)
1837                                    currentattribute = last
1838                                end
1839                            end
1840                        end
1841                    end
1842                end
1843            elseif id == whatsit_code then
1844                -- todo (lmtx)
1845                if subtype == userdefinedwhatsit_code then
1846                    -- similar to images, see above
1847                    local at = getattr(n,a_tagged)
1848                    if nofcurrentcontent > 0 then
1849                        pushcontent()
1850                        pushentry(currentnesting) -- ??
1851                    end
1852                    pushentry(taglist[at])
1853                    if trace_export then
1854                        report_export("%w<!-- processing anchor tagged %a",currentdepth,last)
1855                    end
1856                    last = nil
1857                    currentparagraph = nil
1858                end
1859            elseif not paragraph and id == par_code and startofpar(n) then
1860                paragraph = getattr(n,a_taggedpar)
1861            elseif id == disc_code then
1862                -- very unlikely because we stripped them
1863                local pre, post, replace = getdisc(n)
1864                if keephyphens then
1865                    if pre and not getnext(pre) and isglyph(pre) == 0xAD then -- hyphencode then
1866                        nofcurrentcontent = nofcurrentcontent + 1
1867                        currentcontent[nofcurrentcontent] = hyphen
1868                    end
1869                end
1870                if replace then
1871                    collectresults(replace,nil)
1872                end
1873            end
1874            p   = n
1875            pid = id
1876        end
1877        if maybewrong then
1878            showmaybe(maybewrong)
1879        end
1880    end
1881
1882    function nodes.handlers.export(head) -- hooks into the page builder
1883        starttiming(treehash)
1884        if trace_export then
1885            report_export("%w<!-- start flushing page -->",currentdepth)
1886        end
1887     -- continueexport()
1888        restart = true
1889        collectresults(head)
1890        if trace_export then
1891            report_export("%w<!-- stop flushing page -->",currentdepth)
1892        end
1893        stoptiming(treehash)
1894        return head
1895    end
1896
1897    function nodes.handlers.checkparcounter(p)
1898        setattr(p,a_taggedpar,texgetcount("tagparcounter") + 1)
1899        return p
1900    end
1901
1902    function builders.paragraphs.tag(head)
1903        noftextblocks = noftextblocks + 1
1904        for n, subtype in nexthlist, head do
1905            if subtype == linelist_code then
1906                setattr(n,a_textblock,noftextblocks)
1907            elseif subtype == glue_code or subtype == kern_code then -- no need to set fontkerns
1908                setattr(n,a_textblock,0)
1909            end
1910        end
1911        return false
1912    end
1913
1914end
1915
1916do
1917
1918    local xmlcollected  = xml.collected
1919    local xmlsetcomment = xml.setcomment
1920
1921local xmlpreamble_nop = [[
1922<?xml version="1.0" encoding="UTF-8" standalone="%standalone%" ?>
1923]]
1924
1925local xmlpreamble_yes = [[
1926<?xml version="1.0" encoding="UTF-8" standalone="%standalone%" ?>
1927
1928<!--
1929
1930    input filename   : %filename%
1931    processing date  : %date%
1932    context version  : %contextversion%
1933    exporter version : %exportversion%
1934
1935-->
1936
1937]]
1938
1939    local flushtree = wrapups.flushtree
1940
1941    local function wholepreamble(standalone,nocomment)
1942        return replacetemplate(nocomment and xmlpreamble_nop or xmlpreamble_yes, {
1943            standalone     = standalone and "yes" or "no",
1944            filename       = tex.jobname,
1945            date           = os.fulltime(),
1946            contextversion = environment.version,
1947            exportversion  = exportversion,
1948        })
1949    end
1950
1951
1952local csspreamble = [[
1953<?xml-stylesheet type="text/css" href="%filename%" ?>
1954]]
1955
1956local cssheadlink = [[
1957<link type="text/css" rel="stylesheet" href="%filename%" />
1958]]
1959
1960    local function allusedstylesheets(cssfiles,files,path)
1961        local done   = { }
1962        local result = { }
1963        local extras = { }
1964        for i=1,#cssfiles do
1965            local cssfile = cssfiles[i]
1966            if type(cssfile) ~= "string" then
1967                -- error
1968            elseif cssfile == "export-example.css" then
1969                -- ignore
1970            elseif not done[cssfile] then
1971                cssfile = joinfile(path,basename(cssfile))
1972                report_export("adding css reference '%s'",cssfile)
1973                files[#files+1]   = cssfile
1974                result[#result+1] = replacetemplate(csspreamble, { filename = cssfile })
1975                extras[#extras+1] = replacetemplate(cssheadlink, { filename = cssfile })
1976                done[cssfile]     = true
1977            end
1978        end
1979        return concat(result), concat(extras)
1980    end
1981
1982local elementtemplate <const> = [[
1983/* element="%element%" detail="%detail%" chain="%chain%" */
1984
1985%element%,
1986%namespace%div.%element% {
1987    display: %display% ;
1988}]]
1989
1990local detailtemplate <const> = [[
1991/* element="%element%" detail="%detail%" chain="%chain%" */
1992
1993%element%[detail=%detail%],
1994%namespace%div.%element%.%detail% {
1995    display: %display% ;
1996}]]
1997
1998-- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" >
1999
2000local htmltemplate <const> = [[
2001%preamble%
2002
2003<html xmlns="http://www.w3.org/1999/xhtml" xmlns:math="http://www.w3.org/1998/Math/MathML">
2004
2005    <head>
2006
2007        <meta charset="utf-8"/>
2008
2009        <title>%title%</title>
2010
2011%style%
2012
2013    </head>
2014    <body>
2015        <div class="document" xmlns="http://www.pragma-ade.com/context/export">
2016
2017<div class="warning">Rendering can be suboptimal because there is no default/fallback css loaded.</div>
2018
2019%body%
2020
2021        </div>
2022    </body>
2023</html>
2024]]
2025
2026    local displaymapping = {
2027        inline  = "inline",
2028        display = "block",
2029        mixed   = "inline",
2030    }
2031
2032    local function allusedelements(filename)
2033        local result = { replacetemplate(namespacetemplate, {
2034            what            = "template",
2035            filename        = filename,
2036            namespace       = contextns,
2037         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
2038            cssnamespaceurl = cssnamespaceurl,
2039        },false,true) }
2040        for element, details in sortedhash(used) do
2041            if namespaces[element] then
2042                -- skip math
2043            else
2044                for detail, what in sortedhash(details) do
2045                    local nature  = what[1] or "display"
2046                    local chain   = what[2]
2047                    local display = displaymapping[nature] or "block"
2048                    if detail == "" then
2049                        result[#result+1] = replacetemplate(elementtemplate, {
2050                            element   = element,
2051                            display   = display,
2052                            chain     = chain,
2053                            namespace = usecssnamespace and namespace or "",
2054                        })
2055                    else
2056                        result[#result+1] = replacetemplate(detailtemplate, {
2057                            element   = element,
2058                            display   = display,
2059                            detail    = detail,
2060                            chain     = chain,
2061                            namespace = usecssnamespace and cssnamespace or "",
2062                        })
2063                    end
2064                end
2065            end
2066        end
2067        return concat(result,"\n\n")
2068    end
2069
2070    local function allcontent(tree)
2071        local result = { }
2072        local data   = tree.data
2073        for i=1,#data do
2074            if data[i].tg ~= "document" then
2075                data[i] = { }
2076            end
2077        end
2078        flushtree(result,tree.data,"display") -- we need to collect images
2079        result = concat(result)
2080        -- no need to lpeg .. fast enough
2081        result = gsub(result,"\n *\n","\n")
2082        result = gsub(result,"\n +([^< ])","\n%1")
2083        --
2084        return result
2085    end
2086
2087    -- local xhtmlpreamble = [[
2088    --     <!DOCTYPE html PUBLIC
2089    --         "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
2090    --         "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"
2091    --     >
2092    -- ]]
2093
2094    local function cleanxhtmltree(xmltree)
2095        if xmltree then
2096            local implicits = { }
2097            local explicits = { }
2098            local overloads = { }
2099            for e in xmlcollected(xmltree,"*") do
2100                local at = e.at
2101                if at then
2102                    local explicit = at.explicit
2103                    local implicit = at.implicit
2104                    if explicit then
2105                        if not explicits[explicit] then
2106                            explicits[explicit] = true
2107                            at.id = explicit
2108                            if implicit then
2109                                overloads[implicit] = explicit
2110                            end
2111                        end
2112                    else
2113                        if implicit and not implicits[implicit] then
2114                            implicits[implicit] = true
2115                            at.id = "aut:" .. implicit
2116                        end
2117                    end
2118                end
2119            end
2120            for e in xmlcollected(xmltree,"*") do
2121                local at = e.at
2122                if at then
2123                    local internal = at.internal
2124                    local location = at.location
2125                    if internal then
2126                        if location then
2127                            local explicit = overloads[location]
2128                            if explicit then
2129                                at.href = "#" .. explicit
2130                            else
2131                                at.href = "#aut:" .. internal
2132                            end
2133                        else
2134                            at.href = "#aut:" .. internal
2135                        end
2136                    else
2137                        if location then
2138                            at.href = "#" .. location
2139                        else
2140                            local url = at.url
2141                            if url then
2142                                at.href = url
2143                            else
2144                                local file = at.file
2145                                if file then
2146                                    at.href = file
2147                                end
2148                            end
2149                        end
2150                    end
2151                end
2152            end
2153            return xmltree
2154        else
2155            return xml.convert('<?xml version="1.0"?>\n<error>invalid xhtml tree</error>')
2156        end
2157    end
2158
2159    -- maybe the reverse: be explicit about what is permitted
2160
2161    local private = {
2162        destination = true,
2163        prefix      = true,
2164        reference   = true,
2165        --
2166        id          = true,
2167        href        = true,
2168        --
2169        implicit    = true,
2170        explicit    = true,
2171        --
2172        url         = true,
2173        file        = true,
2174        internal    = true,
2175        location    = true,
2176        --
2177        name        = true, -- image name
2178        used        = true, -- image name
2179        page        = true, -- image name
2180        width       = true,
2181        height      = true,
2182        --
2183    }
2184
2185    local addclicks   = true
2186    local f_onclick   = formatters[ [[location.href='%s']] ]
2187    local f_onclick   = formatters[ [[location.href='%s']] ]
2188
2189    local p_cleanid   = lpeg.replacer { [":"] = "-" }
2190    local p_cleanhref = lpeg.Cs(lpeg.P("#") * p_cleanid)
2191
2192    local p_splitter  = lpeg.Ct ( (
2193        lpeg.Carg(1) * lpeg.C((1-lpeg.P(" "))^1) / function(d,s) if not d[s] then d[s] = true return s end end
2194      * lpeg.P(" ")^0 )^1 )
2195
2196
2197    local classes = table.setmetatableindex(function(t,k)
2198        local v = concat(lpegmatch(p_splitter,k,1,{})," ")
2199        t[k] = v
2200        return v
2201    end)
2202
2203    local function makeclass(tg,at)
2204        local detail     = at.detail
2205        local chain      = at.chain
2206        local extra      = nil
2207        local classes    = { }
2208        local nofclasses = 0
2209        at.detail        = nil
2210        at.chain         = nil
2211        for k, v in next, at do
2212            if not private[k] then
2213                nofclasses = nofclasses + 1
2214                classes[nofclasses] = k .. "-" .. v
2215            end
2216        end
2217        if detail and detail ~= "" then
2218            if chain and chain ~= "" then
2219                if chain ~= detail then
2220                    extra = classes[tg .. " " .. chain .. " " .. detail]
2221                elseif tg ~= detail then
2222                    extra = detail
2223                end
2224            elseif tg ~= detail then
2225                extra = detail
2226            end
2227        elseif chain and chain ~= "" then
2228            if tg ~= chain then
2229                extra = chain
2230            end
2231        end
2232        -- in this order
2233        if nofclasses > 0 then
2234            sort(classes)
2235            classes = concat(classes," ")
2236            if extra then
2237                return tg .. " " .. extra .. " " .. classes
2238            else
2239                return tg .. " " .. classes
2240            end
2241        else
2242            if extra then
2243                return tg .. " " .. extra
2244            else
2245                return tg
2246            end
2247        end
2248    end
2249
2250    -- Some elements are not supported (well) in css so we need to retain them. For
2251    -- instance, tablecells have no colspan so basically that renders css table div
2252    -- elements quite useless. A side effect is that we nwo can have conflicts when
2253    -- we mix in with other html (as there is no reset). Of course, when it eventually
2254    -- gets added, there is a change then that those not using the div abstraction
2255    -- will be rediculed.
2256    --
2257    -- a table tr td th thead tbody tfoot
2258    --
2259
2260    local crappycss = {
2261        table     = "table", tabulate      = "table",
2262        tablehead = "thead", tabulatehead  = "thead",
2263        tablebody = "tbody", tabulatebody  = "tbody",
2264        tablefoot = "tfoot", tabulatefoot  = "tfoot",
2265        tablerow  = "tr",    tabulaterow   = "tr",
2266        tablecell = "td",    tabulatecell  = "td",
2267    }
2268
2269    local cssmapping = false
2270
2271    directives.register("export.nativetags", function(v)
2272        cssmapping = v and crappycss or false
2273    end)
2274
2275    local function remap(specification,source,target)
2276        local comment = nil -- share comments
2277        for c in xmlcollected(source,"*") do
2278            if not c.special then
2279                local tg = c.tg
2280                local ns = c.ns
2281                if ns == "m" then
2282                    if false then -- yes or no
2283                        c.ns = ""
2284                        c.at["xmlns:m"] = nil
2285                    end
2286             -- elseif tg == "a" then
2287             --     c.ns = ""
2288                else
2289                    local dt = c.dt
2290                    local nt = #dt
2291                    if nt == 0 or (nt == 1 and dt[1] == "") then
2292                        if comment then
2293                            c.dt = comment
2294                        else
2295                            xmlsetcomment(c,"empty")
2296                            comment = c.dt
2297                        end
2298                    end
2299                    local at    = c.at
2300                    local class = nil
2301                    local label = nil
2302                    if tg == "document" then
2303                        at.href   = nil
2304                        at.detail = nil
2305                        at.chain  = nil
2306                    elseif tg == "metavariable" then
2307                        label = at.name
2308                        at.detail = "metaname-" .. label
2309                        class = makeclass(tg,at)
2310                    else
2311                        class = makeclass(tg,at)
2312                    end
2313                    local id   = at.id
2314                    local href = at.href
2315                    local attr = nil
2316                    if id then
2317                        id = lpegmatch(p_cleanid, id) or id
2318                        if href then
2319                            href = lpegmatch(p_cleanhref,href) or href
2320                            attr = {
2321                                class   = class,
2322                                id      = id,
2323                                href    = href,
2324                                onclick = addclicks and f_onclick(href) or nil,
2325                            }
2326                        else
2327                            attr = {
2328                                class = class,
2329                                id    = id,
2330                            }
2331                        end
2332                    else
2333                        if href then
2334                            href = lpegmatch(p_cleanhref,href) or href
2335                            attr = {
2336                                class   = class,
2337                                href    = href,
2338                                onclick = addclicks and f_onclick(href) or nil,
2339                            }
2340                        else
2341                            attr = {
2342                                class = class,
2343                            }
2344                        end
2345                    end
2346                    c.at = attr
2347                    if label then
2348                        attr.label = label
2349                    end
2350                    c.tg = cssmapping and cssmapping[tg] or "div"
2351                end
2352            end
2353        end
2354    end
2355
2356 -- local cssfile = nil  directives.register("backend.export.css", function(v) cssfile = v end)
2357
2358    local embedfile = false  directives.register("export.embed",function(v) embedfile = v end)
2359
2360    local justexport = nodes.handlers.export
2361
2362    local function wrapuptree(tree)
2363        wrapups.fixtree(tree)
2364        wrapups.collapsetree(tree)
2365        wrapups.indextree(tree)
2366        wrapups.checktree(tree)
2367        wrapups.breaktree(tree)
2368        wrapups.finalizetree(tree)
2369    end
2370
2371    local function localexport(head)
2372        starttiming(treehash)
2373
2374        local saved_treestack    = treestack
2375        local saved_nesting      = nesting
2376        local saved_currentdepth = currentdepth
2377        local saved_tree         = tree
2378        local saved_treehash     = treehash
2379        local saved_nofbreaks    = nofbreaks
2380        local saved_show_comment = show_comment
2381
2382        treestack         = { }
2383        nesting           = { }
2384        currentdepth      = 0
2385        tree              = { data = { }, fulltag == "root" } -- root
2386        treehash          = { }
2387        nofbreaks         = 0
2388        show_comment      = false
2389
2390        justexport(head)
2391        finishexport()
2392        wrapuptree(tree)
2393
2394     -- tree.data = tree.data[1].data
2395
2396        local result = concat {
2397            wholepreamble(true,true),
2398            allcontent(tree),
2399        }
2400
2401        treestack    = saved_treestack
2402        nesting      = saved_nesting
2403        currentdepth = saved_currentdepth
2404        tree         = saved_tree
2405        treehash     = saved_treehash
2406        nofbreaks    = saved_nofbreaks
2407        show_comment = saved_show_comment
2408
2409        stoptiming(treehash)
2410
2411        return result
2412
2413    end
2414
2415    structurestags.localexport = localexport
2416
2417    function structures.tags.exportbox(n,filename,buffername)
2418        local list = nodes.nuts.getbox(n)
2419        if n then
2420            local e = localexport(list)
2421            if filename and filename ~= "" then
2422                io.savedata(filename,e)
2423            elseif buffername then
2424                buffers.assign(buffername == interfaces.variables.yes and "" or buffername,e)
2425            else
2426                return e
2427            end
2428        end
2429    end
2430
2431    interfaces.implement {
2432        name      = "exportbox",
2433        arguments = { "integer", "string", "string" },
2434        actions   = structures.tags.exportbox
2435    }
2436
2437    function structurestags.finishexport()
2438
2439        if exporting then
2440            exporting = false
2441        else
2442            return
2443        end
2444
2445        local onlyxml = finetuning.export == v_xml
2446
2447        starttiming(treehash)
2448        --
2449        finishexport()
2450        --
2451        report_export("")
2452        if onlyxml then
2453            report_export("exporting xml, no other files")
2454        else
2455            report_export("exporting xml, xhtml, html and css files")
2456        end
2457        report_export("")
2458        --
2459        wrapuptree(tree)
2460        --
2461        local askedname = finetuning.file
2462        --
2463        -- we use a dedicated subpath:
2464        --
2465        -- ./jobname-export
2466        -- ./jobname-export/images
2467        -- ./jobname-export/styles
2468        -- ./jobname-export/styles
2469        -- ./jobname-export/jobname-export.xml
2470        -- ./jobname-export/jobname-export.xhtml
2471        -- ./jobname-export/jobname-export.html
2472        -- ./jobname-export/jobname-specification.lua
2473        -- ./jobname-export/styles/jobname-defaults.css
2474        -- ./jobname-export/styles/jobname-styles.css
2475        -- ./jobname-export/styles/jobname-images.css
2476        -- ./jobname-export/styles/jobname-templates.css
2477
2478        if type(askedname) ~= "string" or askedname == "" then
2479            askedname = tex.jobname
2480        end
2481
2482        local usedname  = nameonly(askedname)
2483        local basepath  = usedname .. "-export"
2484        local imagepath = joinfile(basepath,"images")
2485        local stylepath = joinfile(basepath,"styles")
2486
2487        local function validpath(what,pathname)
2488            if lfs.isdir(pathname) then
2489                report_export("using existing %s path %a",what,pathname)
2490                return pathname
2491            end
2492            lfs.mkdir(pathname)
2493            if lfs.isdir(pathname) then
2494                report_export("using cretated %s path %a",what,basepath)
2495                return pathname
2496            else
2497                report_export("unable to create %s path %a",what,basepath)
2498                return false
2499            end
2500        end
2501
2502        if not (validpath("export",basepath) and validpath("images",imagepath) and validpath("styles",stylepath)) then
2503            return
2504        end
2505
2506        -- we're now on the dedicated export subpath so we can't clash names
2507        --
2508        -- a xhtml suffix no longer seems to be work well with browsers
2509
2510        local xmlfilebase           = addsuffix(usedname .. "-raw","xml"  )
2511        local xhtmlfilebase         = addsuffix(usedname .. "-tag","xhtml")
2512        local htmlfilebase          = addsuffix(usedname .. "-div","html")
2513        local specificationfilebase = addsuffix(usedname .. "-pub","lua"  )
2514
2515        local xmlfilename           = joinfile(basepath, xmlfilebase          )
2516        local xhtmlfilename         = joinfile(basepath, xhtmlfilebase        )
2517        local htmlfilename          = joinfile(basepath, htmlfilebase         )
2518        local specificationfilename = joinfile(basepath, specificationfilebase)
2519        --
2520        local defaultfilebase       = addsuffix(usedname .. "-defaults", "css")
2521        local imagefilebase         = addsuffix(usedname .. "-images",   "css")
2522        local stylefilebase         = addsuffix(usedname .. "-styles",   "css")
2523        local templatefilebase      = addsuffix(usedname .. "-templates","css")
2524        --
2525        local defaultfilename       = joinfile(stylepath,defaultfilebase )
2526        local imagefilename         = joinfile(stylepath,imagefilebase   )
2527        local stylefilename         = joinfile(stylepath,stylefilebase   )
2528        local templatefilename      = joinfile(stylepath,templatefilebase)
2529
2530        local cssfile               = finetuning.cssfile
2531
2532        -- we keep track of all used files
2533
2534        local files = {
2535        }
2536
2537        -- we always load the defaults and optionally extra css files; we also copy the example
2538        -- css file so that we always have the latest version
2539
2540        local cssfiles = {
2541            defaultfilebase,
2542            imagefilebase,
2543            stylefilebase,
2544        }
2545
2546        local cssextra = cssfile and table.unique(settings_to_array(cssfile)) or { }
2547
2548        -- at this point we're ready for the content; the collector also does some
2549        -- housekeeping and data collecting; at this point we still have an xml
2550        -- representation that uses verbose element names and carries information in
2551        -- attributes
2552
2553        local result = allcontent(tree)
2554
2555        -- ugly but so be it:
2556
2557        local extradata = structures.tags.getextradata()
2558        if extradata then
2559            local t = { "" }
2560            t[#t+1] = "<extradata>"
2561            for name, action in sortedhash(extradata) do
2562                t[#t+1] = action()
2563            end
2564            t[#t+1] = "</extradata>"
2565            t[#t+1] = "</document>"
2566            -- we use a function because otherwise we can have a bad capture index
2567            result = gsub(result,"</document>",function()
2568                return concat(t,"\n")
2569            end)
2570        end
2571
2572        -- done with ugly
2573
2574        if onlyxml then
2575
2576            os.remove(defaultfilename)
2577            os.remove(imagefilename)
2578            os.remove(stylefilename)
2579            os.remove(templatefilename)
2580
2581            for i=1,#cssextra do
2582                os.remove(joinfile(stylepath,basename(source)))
2583            end
2584
2585         -- os.remove(xmlfilename)
2586
2587            os.remove(imagefilename)
2588            os.remove(stylefilename)
2589            os.remove(templatefilename)
2590            os.remove(xhtmlfilename)
2591            os.remove(specificationfilename)
2592            os.remove(htmlfilename)
2593
2594            result = concat {
2595                wholepreamble(true,true),
2596                "<!-- This export file is used for filtering runtime only! -->\n",
2597                result,
2598            }
2599
2600            report_export("saving xml data in %a",xmlfilename)
2601            io.savedata(xmlfilename,result)
2602
2603            return
2604
2605        end
2606
2607        local examplefilename = resolvers.findfile("export-example.css")
2608        if examplefilename then
2609            local data = io.loaddata(examplefilename)
2610            if not data or data == "" then
2611                data = "/* missing css file */"
2612            elseif not usecssnamespace then
2613                data = gsub(data,cssnamespace,"")
2614            end
2615            io.savedata(defaultfilename,data)
2616        end
2617
2618        if cssfile then
2619            for i=1,#cssextra do
2620                local source = addsuffix(cssextra[i],"css")
2621                local target = joinfile(stylepath,basename(source))
2622                cssfiles[#cssfiles+1] = source
2623                if not lfs.isfile(source) then
2624                    source = joinfile("../",source)
2625                end
2626                if lfs.isfile(source) then
2627                    report_export("copying %s",source)
2628                    file.copy(source,target)
2629                end
2630            end
2631        end
2632
2633        local x_styles, h_styles = allusedstylesheets(cssfiles,files,"styles")
2634
2635        local attach = backends.nodeinjections.attachfile
2636
2637        if embedfile and attach then
2638            -- only for testing
2639            attach {
2640                data       = concat{ wholepreamble(true), result },
2641                name       = basename(xmlfilename),
2642                registered = "export",
2643                title      = "raw xml export",
2644                method     = v_hidden,
2645                mimetype   = "application/mathml+xml",
2646            }
2647        end
2648
2649        result = concat {
2650            wholepreamble(true),
2651            x_styles, -- adds to files
2652            result,
2653        }
2654
2655        cssfiles = table.unique(cssfiles)
2656
2657        -- we're now ready for saving the result in the xml file
2658
2659        report_export("saving xml data in %a",xmlfilename)
2660        io.savedata(xmlfilename,result)
2661
2662        report_export("saving css image definitions in %a",imagefilename)
2663        io.savedata(imagefilename,wrapups.allusedimages(usedname))
2664
2665        report_export("saving css style definitions in %a",stylefilename)
2666        io.savedata(stylefilename,wrapups.allusedstyles(usedname))
2667
2668        report_export("saving css template in %a",templatefilename)
2669        io.savedata(templatefilename,allusedelements(usedname))
2670
2671        -- additionally we save an xhtml file; for that we load the file as xml tree
2672
2673        report_export("saving xhtml variant in %a",xhtmlfilename)
2674
2675        local xmltree = cleanxhtmltree(xml.convert(result))
2676
2677     -- local xmltree = xml.convert(result)
2678     -- for c in xml.collected(xmltree,"m:mtext[lastindex()=1]/m:mrow") do
2679     --     print(c)
2680     -- end
2681     -- for c in xml.collected(xmltree,"mtext/mrow") do
2682     --     print(c)
2683     -- end
2684     -- local xmltree = cleanxhtmltree(xmltree)
2685
2686        xml.save(xmltree,xhtmlfilename)
2687
2688        -- now we save a specification file that can b eused for generating an epub file
2689
2690        -- looking at identity is somewhat redundant as we also inherit from interaction
2691        -- at the tex end
2692
2693        local identity  = interactions.general.getidentity()
2694        local metadata  = structures.tags.getmetadata()
2695
2696        local specification = {
2697            name       = usedname,
2698            identifier = os.uuid(),
2699            images     = wrapups.uniqueusedimages(),
2700            imagefile  = joinfile("styles",imagefilebase),
2701            imagepath  = "images",
2702            stylepath  = "styles",
2703            xmlfiles   = { xmlfilebase },
2704            xhtmlfiles = { xhtmlfilebase },
2705            htmlfiles  = { htmlfilebase },
2706            styles     = cssfiles,
2707            htmlroot   = htmlfilebase,
2708            language   = languagenames[texgetcount("mainlanguagenumber")],
2709            title      = validstring(finetuning.title) or validstring(identity.title),
2710            subtitle   = validstring(finetuning.subtitle) or validstring(identity.subtitle),
2711            author     = validstring(finetuning.author) or validstring(identity.author),
2712            firstpage  = validstring(finetuning.firstpage),
2713            lastpage   = validstring(finetuning.lastpage),
2714            metadata   = metadata,
2715        }
2716
2717        report_export("saving specification in %a",specificationfilename,specificationfilename)
2718
2719        xml.wipe(xmltree,"metadata") -- maybe optional
2720
2721        io.savedata(specificationfilename,table.serialize(specification,true))
2722
2723        -- the html export for epub is different in the sense that it uses div's instead of
2724        -- specific tags
2725
2726        report_export("saving div based alternative in %a",htmlfilename)
2727
2728        remap(specification,xmltree)
2729
2730        -- believe it or not, but a <title/> can prevent viewing in browsers
2731
2732        local title = specification.title
2733
2734        if not title or title == "" then
2735            title = metadata.title
2736            if not title or title == "" then
2737                title = usedname -- was: "no title"
2738            end
2739        end
2740
2741        local variables = {
2742            style    = h_styles,
2743            body     = xml.tostring(xml.first(xmltree,"/div")),
2744            preamble = wholepreamble(false),
2745            title    = title,
2746        }
2747
2748        io.savedata(htmlfilename,replacetemplate(htmltemplate,variables,"xml"))
2749
2750        -- finally we report how an epub file can be made (using the specification)
2751
2752        report_export("")
2753        report_export('create epub with: mtxrun --script epub --make "%s" [--purge --rename --svgmath]',usedname)
2754        report_export("")
2755
2756        stoptiming(treehash)
2757    end
2758
2759    local enableaction = nodes.tasks.enableaction
2760
2761    function structurestags.initializeexport()
2762        if not exporting then
2763            report_export("enabling export to xml")
2764            enableaction("shipouts","nodes.handlers.export")
2765            enableaction("shipouts","nodes.handlers.accessibility")
2766            enableaction("math",    "noads.handlers.tags")
2767            enableaction("everypar","nodes.handlers.checkparcounter")
2768            luatex.registerstopactions(structurestags.finishexport)
2769            exporting = true
2770        end
2771    end
2772
2773    function structurestags.setupexport(t)
2774        merge(finetuning,t)
2775        keephyphens      = finetuning.hyphen == v_yes
2776        exportproperties = finetuning.properties
2777        if exportproperties == v_no then
2778            exportproperties = false
2779        end
2780    end
2781
2782    statistics.register("xml exporting time", function()
2783        if exporting then
2784            return string.format("%s seconds, version %s", statistics.elapsedtime(treehash),exportversion)
2785        end
2786    end)
2787
2788end
2789
2790-- These are called at the tex end:
2791
2792implement {
2793    name      = "setupexport",
2794    actions   = structurestags.setupexport,
2795    arguments = {
2796        {
2797            { "align" },
2798            { "bodyfont", "dimen" },
2799            { "width", "dimen" },
2800            { "properties" },
2801            { "hyphen" },
2802            { "title" },
2803            { "subtitle" },
2804            { "author" },
2805            { "firstpage" },
2806            { "lastpage" },
2807            { "svgstyle" },
2808            { "cssfile" },
2809            { "file" },
2810            { "export" },
2811        }
2812    }
2813}
2814
2815implement {
2816    name      = "finishexport",
2817    actions   = structurestags.finishexport,
2818}
2819
2820implement {
2821    name      = "initializeexport",
2822    actions   = structurestags.initializeexport,
2823}
2824