back-exp.lua /size: 149 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['back-exp'] = {
2    version   = 1.001,
3    comment   = "companion to back-exp.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Todo: share properties more with tagged pdf (or thge reverse)
10
11-- Because we run into the 200 local limit we quite some do .. end wrappers .. not always
12-- that nice but it has to be.
13
14-- Experiments demonstrated that mapping to <div> and classes is messy because we have to
15-- package attributes (some 30) into one set of (space seperatated but prefixed classes)
16-- which only makes things worse .. so if you want something else, use xslt to get there.
17
18-- language       -> only mainlanguage, local languages should happen through start/stoplanguage
19-- tocs/registers -> maybe add a stripper (i.e. just don't flush entries in final tree)
20-- footnotes      -> css 3
21-- bodyfont       -> in styles.css
22
23-- Because we need to look ahead we now always build a tree (this was optional in
24-- the beginning). The extra overhead in the frontend is neglectable.
25--
26-- We can optimize the code ... currently the overhead is some 10% for xml + html so
27-- there is no hurry.
28
29-- todo: move critital formatters out of functions
30-- todo: delay loading (apart from basic tag stuff)
31
32-- problem : too many local variables
33
34-- check setting __i__
35
36local next, type, tonumber = next, type, tonumber
37local sub, gsub, match = string.sub, string.gsub, string.match
38local validstring = string.valid
39local lpegmatch = lpeg.match
40local utfchar, utfvalues, utflen = utf.char, utf.values, utf.len
41local concat, insert, remove, merge, sort = table.concat, table.insert, table.remove, table.merge, table.sort
42local sortedhash, sortedkeys = table.sortedhash, table.sortedkeys
43local formatters = string.formatters
44local todimen = number.todimen
45local replacetemplate = utilities.templates.replace
46
47local addsuffix, joinfile, nameonly, basename, filesuffix = file.addsuffix, file.join, file.nameonly, file.basename, file.suffix
48
49local trace_export  = false  trackers.register  ("export.trace",         function(v) trace_export  = v end)
50local trace_spacing = false  trackers.register  ("export.trace.spacing", function(v) trace_spacing = v end)
51local trace_details = false  trackers.register  ("export.trace.details", function(v) trace_details = v end)
52
53local less_state    = false  directives.register("export.lessstate",     function(v) less_state    = v end)
54local show_comment  = true   directives.register("export.comment",       function(v) show_comment  = v end)
55
56show_comment = false -- figure out why break comment
57
58-- maybe we will also support these:
59--
60-- local css_hyphens       = false  directives.register("export.css.hyphens",      function(v) css_hyphens      = v end)
61-- local css_textalign     = false  directives.register("export.css.textalign",    function(v) css_textalign    = v end)
62-- local css_bodyfontsize  = false  directives.register("export.css.bodyfontsize", function(v) css_bodyfontsize = v end)
63-- local css_textwidth     = false  directives.register("export.css.textwidth",    function(v) css_textwidth    = v end)
64
65local report_export     = logs.reporter("backend","export")
66
67local nodes             = nodes
68local attributes        = attributes
69
70local variables         = interfaces.variables
71local v_yes             = variables.yes
72local v_no              = variables.no
73local v_xml             = variables.xml
74local v_hidden          = variables.hidden
75
76local implement         = interfaces.implement
77
78local included          = backends.included
79
80local settings_to_array = utilities.parsers.settings_to_array
81local settings_to_hash  = utilities.parsers.settings_to_hash
82
83local setmetatableindex = table.setmetatableindex
84local tasks             = nodes.tasks
85local fontchar          = fonts.hashes.characters
86local fontquads         = fonts.hashes.quads
87local languagenames     = languages.numbers
88
89local texgetcount       = tex.getcount
90
91local references        = structures.references
92local structurestags    = structures.tags
93local taglist           = structurestags.taglist
94local specifications    = structurestags.specifications
95local properties        = structurestags.properties
96local locatedtag        = structurestags.locatedtag
97
98structurestags.usewithcare = { }
99
100local starttiming       = statistics.starttiming
101local stoptiming        = statistics.stoptiming
102
103local characterdata     = characters.data
104local overloads         = fonts.mappings.overloads
105
106-- todo: more locals (and optimize)
107
108local exportversion     = "0.35"
109local mathmlns          = "http://www.w3.org/1998/Math/MathML"
110local contextns         = "http://www.contextgarden.net/context/export" -- whatever suits
111local cssnamespaceurl   = "@namespace context url('%namespace%') ;"
112local cssnamespace      = "context|"
113----- cssnamespacenop   = "/* no namespace */"
114
115local usecssnamespace   = false
116
117local nofcurrentcontent = 0 -- so we don't free (less garbage collection)
118local currentcontent    = { }
119local currentnesting    = nil
120local currentattribute  = nil
121local last              = nil
122local currentparagraph  = nil
123
124local noftextblocks     = 0
125
126----- hyphencode        = 0xAD
127local hyphen            = utfchar(0xAD) -- todo: also emdash etc
128local tagsplitter       = structurestags.patterns.splitter
129----- colonsplitter     = lpeg.splitat(":")
130----- dashsplitter      = lpeg.splitat("-")
131local threshold         = 65536
132local indexing          = false
133local keephyphens       = false
134local exportproperties  = false
135
136local finetuning        = { }
137
138local treestack         = { }
139local nesting           = { }
140local currentdepth      = 0
141
142local wrapups           = { }
143
144local tree              = { data = { }, fulltag == "root" } -- root
145local treeroot          = tree
146local treehash          = { }
147local extras            = { }
148local checks            = { }
149local fixes             = { }
150local finalizers        = { }
151local nofbreaks         = 0
152local used              = { }
153local exporting         = false
154local restart           = false
155local specialspaces     = { [0x20] = " "  }               -- for conversion
156local somespace         = { [0x20] = true, [" "] = true } -- for testing
157local entities          = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;" }
158local attribentities    = { ["&"] = "&amp;", [">"] = "&gt;", ["<"] = "&lt;", ['"'] = "quot;" }
159
160local p_entity          = lpeg.replacer(entities) -- was: entityremapper = utf.remapper(entities)
161local p_attribute       = lpeg.replacer(attribentities)
162local p_stripper        = lpeg.patterns.stripper
163local p_escaped         = lpeg.patterns.xml.escaped
164
165local f_tagid           = formatters["%s-%04i"]
166
167-- local alignmapping = {
168--     flushright = "right",
169--     middle     = "center",
170--     flushleft  = "left",
171-- }
172
173local defaultnature = "mixed" -- "inline"
174
175setmetatableindex(used, function(t,k)
176    if k then
177        local v = { }
178        t[k] = v
179        return v
180    end
181end)
182
183local f_entity    = formatters["&#x%X;"]
184local f_attribute = formatters[" %s=%q"]
185local f_property  = formatters[" %s%s=%q"]
186
187setmetatableindex(specialspaces, function(t,k)
188    local v = utfchar(k)
189    t[k] = v
190    entities[v] = f_entity(k)
191    somespace[k] = true
192    somespace[v] = true
193    return v
194end)
195
196
197local namespaced = {
198    -- filled on
199}
200
201local namespaces = {
202    msubsup      = "m",
203    msub         = "m",
204    msup         = "m",
205    mn           = "m",
206    mi           = "m",
207    ms           = "m",
208    mo           = "m",
209    mtext        = "m",
210    mrow         = "m",
211    mfrac        = "m",
212    mroot        = "m",
213    msqrt        = "m",
214    munderover   = "m",
215    munder       = "m",
216    mover        = "m",
217    merror       = "m",
218    math         = "m",
219    mrow         = "m",
220    mtable       = "m",
221    mtr          = "m",
222    mtd          = "m",
223    mfenced      = "m",
224    maction      = "m",
225    mspace       = "m",
226    -- only when testing
227    mstacker     = "m",
228    mstackertop  = "m",
229    mstackermid  = "m",
230    mstackerbot  = "m",
231}
232
233setmetatableindex(namespaced, function(t,k)
234    if k then
235        local namespace = namespaces[k]
236        local v = namespace and namespace .. ":" .. k or k
237        t[k] = v
238        return v
239    end
240end)
241
242local function attribute(key,value)
243    if value and value ~= "" then
244        return f_attribute(key,lpegmatch(p_attribute,value))
245    else
246        return ""
247    end
248end
249
250local function setattribute(di,key,value,escaped)
251    if value and value ~= "" then
252        local a = di.attributes
253        if escaped then
254            value = lpegmatch(p_escaped,value)
255        end
256        if not a then
257            di.attributes = { [key] = value }
258        else
259            a[key] = value
260        end
261    end
262end
263
264local listdata = { } -- this has to be done otherwise: each element can just point back to ...
265
266function wrapups.hashlistdata()
267    local c = structures.lists.collected
268    for i=1,#c do
269        local ci = c[i]
270        local tag = ci.references.tag
271        if tag then
272            local m = ci.metadata
273            local t = m.kind .. ">" .. tag -- todo: use internal (see strc-lst.lua where it's set)
274            listdata[t] = ci
275        end
276    end
277end
278
279function structurestags.setattributehash(attr,key,value) -- public hash
280    local specification = taglist[attr]
281    if specification then
282        specification[key] = value
283    else
284        -- some kind of error
285    end
286end
287
288local usedstyles = { }
289
290local namespacetemplate = [[
291/* %what% for file %filename% */
292
293%cssnamespaceurl%
294]]
295
296do
297
298    -- experiment: styles and images
299    --
300    -- officially we should convert to bp but we round anyway
301
302    -- /* padding      : ; */
303    -- /* text-justify : inter-word ; */
304    -- /* text-align : justify ; */
305
306local documenttemplate = [[
307document,
308%namespace%div.document {
309    font-size  : %size% !important ;
310    max-width  : %width% !important ;
311    text-align : %align% !important ;
312    hyphens    : %hyphens% !important ;
313}]]
314
315local styletemplate = [[
316%element%[detail="%detail%"],
317%namespace%div.%element%.%detail% {
318    display      : inline ;
319    font-style   : %style% ;
320    font-variant : %variant% ;
321    font-weight  : %weight% ;
322    font-family  : %family% ;
323    color        : %color% ;
324}]]
325
326    local numbertoallign = {
327        [0] = "justify", ["0"] = "justify", [variables.normal    ] = "justify",
328              "right",   ["1"] = "right",   [variables.flushright] = "right",
329              "center",  ["2"] = "center",  [variables.middle    ] = "center",
330              "left",    ["3"] = "left",    [variables.flushleft ] = "left",
331    }
332
333    function wrapups.allusedstyles(filename)
334        local result = { replacetemplate(namespacetemplate, {
335            what            = "styles",
336            filename        = filename,
337            namespace       = contextns,
338         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or cssnamespacenop,
339            cssnamespaceurl = cssnamespaceurl,
340        },false,true) }
341        --
342        local bodyfont = finetuning.bodyfont
343        local width    = finetuning.width
344        local hyphen   = finetuning.hyphen
345        local align    = finetuning.align
346        --
347        if type(bodyfont) == "number" then
348            bodyfont = todimen(bodyfont)
349        else
350            bodyfont = "12pt"
351        end
352        if type(width) == "number" then
353            width = todimen(width) or "50em"
354        else
355            width = "50em"
356        end
357        if hyphen == v_yes then
358            hyphen = "manual"
359        else
360            hyphen = "inherited"
361        end
362        if align then
363            align = numbertoallign[align]
364        end
365        if not align then
366            align = hyphen and "justify" or "inherited"
367        end
368        --
369        result[#result+1] = replacetemplate(documenttemplate,{
370            size    = bodyfont,
371            width   = width,
372            align   = align,
373            hyphens = hyphen
374        })
375        --
376        local colorspecification = xml.css.colorspecification
377        local fontspecification  = xml.css.fontspecification
378        for element, details in sortedhash(usedstyles) do
379            for detail, data in sortedhash(details) do
380                local s = fontspecification(data.style)
381                local c = colorspecification(data.color)
382                detail = gsub(detail,"[^A-Za-z0-9]+","-")
383                result[#result+1] = replacetemplate(styletemplate,{
384                    namespace = usecssnamespace and cssnamespace or "",
385                    element   = element,
386                    detail    = detail,
387                    style     = s.style   or "inherit",
388                    variant   = s.variant or "inherit",
389                    weight    = s.weight  or "inherit",
390                    family    = s.family  or "inherit",
391                    color     = c         or "inherit",
392                    display   = s.display and "block" or nil,
393                })
394            end
395        end
396        return concat(result,"\n\n")
397    end
398
399end
400
401local usedimages = { }
402
403do
404
405local imagetemplate = [[
406%element%[id="%id%"], %namespace%div.%element%[id="%id%"] {
407    display           : block ;
408    background-image  : url('%url%') ;
409    background-size   : 100%% auto ;
410    background-repeat : no-repeat ;
411    width             : %width% ;
412    height            : %height% ;
413}]]
414
415    local f_svgname = formatters["%s.svg"]
416    local f_svgpage = formatters["%s-page-%s.svg"]
417    local collected = { }
418
419    local function usedname(name,page)
420        if filesuffix(name) == "pdf" then
421            -- temp hack .. we will have a remapper
422            if page and page > 1 then
423                name = f_svgpage(nameonly(name),page)
424            else
425                name = f_svgname(nameonly(name))
426            end
427        end
428        local scheme = url.hasscheme(name)
429        if not scheme or scheme == "file" then
430            -- or can we just use the name ?
431            return joinfile("../images",basename(url.filename(name)))
432        else
433            return name
434        end
435    end
436
437    function wrapups.allusedimages(filename)
438        local result = { replacetemplate(namespacetemplate, {
439            what            = "images",
440            filename        = filename,
441            namespace       = contextns,
442         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
443            cssnamespaceurl = cssnamespaceurl,
444        },false,true) }
445        for element, details in sortedhash(usedimages) do
446            for detail, data in sortedhash(details) do
447                local name = data.name
448                local page = tonumber(data.page) or 1
449                local spec = {
450                    element   = element,
451                    id        = data.id,
452                    name      = name,
453                    page      = page,
454                    url       = usedname(name,page),
455                    width     = data.width,
456                    height    = data.height,
457                    used      = data.used,
458                    namespace = usecssnamespace and cssnamespace or "",
459                }
460                result[#result+1] = replacetemplate(imagetemplate,spec)
461                collected[detail] = spec
462            end
463        end
464        return concat(result,"\n\n")
465    end
466
467    function wrapups.uniqueusedimages() -- todo: combine these two
468        return collected
469    end
470
471end
472
473--
474
475properties.vspace = { export = "break",     nature = "display" }
476----------------- = { export = "pagebreak", nature = "display" }
477
478local function makebreaklist(list)
479    nofbreaks = nofbreaks + 1
480    local t = { }
481    local l = list and list.taglist
482    if l then
483        for i=1,#list do
484            t[i] = l[i]
485        end
486    end
487    t[#t+1] = "break>" .. nofbreaks -- maybe no number or 0
488    return { taglist = t }
489end
490
491local breakattributes = {
492    type = "collapse"
493}
494
495local function makebreaknode(attributes) -- maybe no fulltag
496    nofbreaks = nofbreaks + 1
497    return {
498        tg         = "break",
499        fulltag    = "break>" .. nofbreaks,
500        n          = nofbreaks,
501        element    = "break",
502        nature     = "display",
503        attributes = attributes or nil,
504     -- data       = { }, -- not needed
505     -- attribute  = 0, -- not needed
506     -- parnumber  = 0,
507    }
508end
509
510do
511
512    local fields = { "title", "subtitle", "author", "keywords", "url", "version" }
513
514    local ignoredelements = false
515
516    local function checkdocument(root)
517        local data = root.data
518        if data then
519            for i=1,#data do
520                local di = data[i]
521                local tg = di.tg
522                if tg == "noexport" then
523                    local s = specifications[di.fulltag]
524                    local u = s and s.userdata
525                    if u then
526                        local comment = u.comment
527                        if comment then
528                            di.element = "comment"
529                            di.data = { { content = comment } }
530                            u.comment = nil
531                        else
532                            data[i] = false
533                        end
534                    else
535                        data[i] = false
536                    end
537                elseif di.content then
538                    -- okay
539                elseif tg == "ignore" then
540                    di.element = ""
541                    checkdocument(di)
542                elseif ignoredelements and ignoredelements[tg] then
543                    di.element = ""
544                    checkdocument(di)
545                else
546                    checkdocument(di) -- new, else no noexport handling
547                end
548            end
549        end
550    end
551
552    function extras.document(di,element,n,fulltag)
553        setattribute(di,"language",languagenames[texgetcount("mainlanguagenumber")])
554        if not less_state then
555            setattribute(di,"file",tex.jobname)
556            if included.date then
557                setattribute(di,"date",os.fulltime())
558            end
559            setattribute(di,"context",environment.version)
560            setattribute(di,"version",exportversion)
561            setattribute(di,"xmlns:m",mathmlns)
562            local identity = interactions.general.getidentity()
563            for i=1,#fields do
564                local key   = fields[i]
565                local value = identity[key]
566                if value and value ~= "" then
567                    setattribute(di,key,value)
568                end
569            end
570        end
571        checkdocument(di)
572    end
573
574    implement {
575        name      = "ignoretagsinexport",
576        arguments = "string",
577        actions   = function(list)
578            for tag in string.gmatch(list,"[a-z]+") do
579                if ignoredelements then
580                    ignoredelements[tag] = true
581                else
582                    ignoredelements = { [tag] = true }
583                end
584            end
585        end,
586    }
587
588end
589
590do
591
592    local marginanchors = { }
593    local margincontent = { }
594
595    implement {
596        name      = "settagmargintext",
597        arguments = "integer",
598        actions   = function(n)
599            marginanchors[locatedtag("margintext")] = n
600        end
601    }
602
603    implement {
604        name      = "settagmarginanchor",
605        arguments = "integer",
606        actions   = function(n)
607            marginanchors[locatedtag("marginanchor")] = n
608        end
609    }
610
611    function checks.margintext(di)
612        local i = marginanchors[di.fulltag]
613        margincontent[i] = di
614    end
615
616    function checks.marginanchor(di)
617        local i = marginanchors[di.fulltag]
618        local d = margincontent[i]
619        --
620        di.attribute = d.attribute
621        di.data      = d.data
622        di.detail    = d.detail
623        di.element   = d.element
624        di.fulltag   = d.fulltag
625        di.nature    = d.nature
626        di.samepar   = true
627        di.tg        = d.tg
628        --
629        d.skip       = "ignore"
630  end
631
632end
633
634do
635
636    local symbols = { }
637
638    function structurestags.settagdelimitedsymbol(symbol)
639        symbols[locatedtag("delimitedsymbol")] = {
640            symbol = symbol,
641        }
642    end
643
644    function extras.delimitedsymbol(di,element,n,fulltag)
645        local hash = symbols[fulltag]
646        if hash then
647            setattribute(di,"symbol",hash.symbol or nil)
648        end
649    end
650
651end
652
653do
654
655    local symbols = { }
656
657    function structurestags.settagsubsentencesymbol(symbol)
658        symbols[locatedtag("subsentencesymbol")] = {
659            symbol = symbol,
660        }
661    end
662
663    function extras.subsentencesymbol(di,element,n,fulltag)
664        local hash = symbols[fulltag]
665        if hash then
666            setattribute(di,"symbol",hash.symbol or nil)
667        end
668    end
669
670end
671
672do
673
674    local itemgroups = { }
675
676    function structurestags.setitemgroup(packed,level,symbol)
677        itemgroups[locatedtag("itemgroup")] = {
678            packed = packed,
679            symbol = symbol,
680            level  = level,
681        }
682    end
683
684    function structurestags.setitem(kind)
685        itemgroups[locatedtag("item")] = {
686            kind = kind,
687        }
688    end
689
690    function extras.itemgroup(di,element,n,fulltag)
691        local hash = itemgroups[fulltag]
692        if hash then
693            setattribute(di,"packed",hash.packed and "yes" or nil)
694            setattribute(di,"symbol",hash.symbol)
695            setattribute(di,"level",hash.level)
696        end
697    end
698
699    function extras.item(di,element,n,fulltag)
700        local hash = itemgroups[fulltag]
701        if hash then
702            local kind = hash.kind
703            if kind and kind ~= "" then
704                setattribute(di,"kind",kind)
705            end
706        end
707    end
708
709end
710
711do
712
713    function fixes.linenumber(di,data,i)
714        local ni = data[i+1]
715        if ni then
716            if ni.data then
717                while true do
718                    local d = ni.data[1]
719                    if d then
720                        local e = d.element
721                        if e then
722                            if e == "line" or e == "verbatimline" then
723                                insert(d.data,1,di)
724                                data[i] = false
725                                return
726                            else
727                                ni = d
728                            end
729                        else
730                            return
731                        end
732                    else
733                        return
734                    end
735                end
736            end
737        end
738    end
739
740end
741
742do
743
744    local synonyms = { }
745    local sortings = { }
746
747    function structurestags.setsynonym(tag)
748        synonyms[locatedtag("synonym")] = tag
749    end
750
751    function extras.synonym(di,element,n,fulltag)
752        local tag = synonyms[fulltag]
753        if tag then
754            setattribute(di,"tag",tag)
755        end
756    end
757
758    function structurestags.setsorting(tag)
759        sortings[locatedtag("sorting")] = tag
760    end
761
762    function extras.sorting(di,element,n,fulltag)
763        local tag = sortings[fulltag]
764        if tag then
765            setattribute(di,"tag",tag)
766        end
767    end
768
769end
770
771do
772
773    local strippedtag    = structurestags.strip -- we assume global styles
774
775    local highlight      = { }
776    local construct      = { }
777
778    usedstyles.highlight = highlight
779    usedstyles.construct = construct
780
781    function structurestags.sethighlight(name,style,color,mode)
782        if not highlight[name] then
783            highlight[name] = {
784                style = style,
785                color = color,
786                mode  = mode == 1 and "display" or nil,
787            }
788        end
789    end
790
791    function structurestags.setconstruct(name,style,color,mode)
792        if not construct[name] then
793            construct[name] = {
794                style = style,
795                color = color,
796                mode  = mode == 1 and "display" or nil,
797            }
798        end
799    end
800
801end
802
803do
804
805    local descriptions = { }
806    local symbols      = { }
807    local linked       = { }
808
809    -- we could move the notation itself to the first reference (can be an option)
810
811    function structurestags.setnotation(tag,n) -- needs checking (is tag needed)
812        -- we can also use the internals hash or list
813        local nd = structures.notes.get(tag,n)
814        if nd then
815            local references = nd.references
816            descriptions[references and references.internal] = locatedtag("description")
817        end
818    end
819
820    function structurestags.setnotationsymbol(tag,n) -- needs checking (is tag needed)
821        local nd = structures.notes.get(tag,n) -- todo: use listdata instead
822        if nd then
823            local references = nd.references
824            symbols[references and references.internal] = locatedtag("descriptionsymbol")
825        end
826    end
827
828    function finalizers.descriptions(tree)
829        local n = 0
830        for id, tag in sortedhash(descriptions) do
831            local sym = symbols[id]
832            if sym then
833                n = n + 1
834                linked[tag] = n
835                linked[sym] = n
836            end
837        end
838    end
839
840    function extras.description(di,element,n,fulltag)
841        local id = linked[fulltag]
842        if id then
843            setattribute(di,"insert",id)
844        end
845    end
846
847    function extras.descriptionsymbol(di,element,n,fulltag)
848        local id = linked[fulltag]
849        if id then
850            setattribute(di,"insert",id)
851        end
852    end
853
854end
855
856-- -- todo: ignore breaks
857--
858-- function extras.verbatimline(di,element,n,fulltag)
859--     inspect(di)
860-- end
861
862do
863
864    local f_id       = formatters["%s-%s"]
865    local image      = { }
866    usedimages.image = image
867
868    structurestags.usewithcare.images = image
869
870    function structurestags.setfigure(name,used,page,width,height,label)
871        local fulltag = locatedtag("image")
872        local spec    = specifications[fulltag]
873        if spec then
874            local page = tonumber(page)
875            image[fulltag] = {
876                id     = f_id(spec.tagname,spec.tagindex),
877                name   = name,
878                used   = used,
879                page   = page and page > 1 and page or nil,
880                width  = todimen(width, "cm","%0.3F%s"),
881                height = todimen(height,"cm","%0.3F%s"),
882                label  = label,
883            }
884        else
885            -- we ignore images in layers in the background / pagebody
886        end
887    end
888
889    function extras.image(di,element,n,fulltag)
890        local data = image[fulltag]
891        if data then
892            setattribute(di,"name",data.name)
893            setattribute(di,"page",data.page)
894            setattribute(di,"id",data.id)
895            setattribute(di,"width",data.width)
896            setattribute(di,"height",data.height)
897            setattribute(di,"label",data.height)
898        end
899    end
900
901end
902
903do
904
905    local combinations = { }
906
907    function structurestags.setcombination(nx,ny)
908        combinations[locatedtag("combination")] = {
909            nx = nx,
910            ny = ny,
911        }
912    end
913
914    function extras.combination(di,element,n,fulltag)
915        local data = combinations[fulltag]
916        if data then
917            setattribute(di,"nx",data.nx)
918            setattribute(di,"ny",data.ny)
919        end
920    end
921
922end
923
924-- quite some code deals with exporting references  --
925
926-- links:
927--
928-- url      :
929-- file     :
930-- internal : automatic location
931-- location : named reference
932
933-- references:
934--
935-- implicit : automatic reference
936-- explicit : named reference
937
938local evaluators = { }
939local specials   = { }
940local explicits  = { }
941
942evaluators.inner = function(di,var)
943    local inner = var.inner
944    if inner then
945        setattribute(di,"location",inner,true)
946    end
947end
948
949evaluators.outer = function(di,var)
950    local file, url = references.checkedfileorurl(var.outer,var.outer)
951    if url then
952        setattribute(di,"url",url,true)
953    elseif file then
954        setattribute(di,"file",file,true)
955    end
956end
957
958evaluators["outer with inner"] = function(di,var)
959    local file = references.checkedfile(var.f)
960    if file then
961        setattribute(di,"file",file,true)
962    end
963    local inner = var.inner
964    if inner then
965        setattribute(di,"inner",inner,true)
966    end
967end
968
969evaluators.special = function(di,var)
970    local handler = specials[var.special]
971    if handler then
972        handler(di,var)
973    end
974end
975
976local referencehash   = { }
977local destinationhash = { }
978
979do
980
981    evaluators["special outer with operation"]     = evaluators.special
982    evaluators["special operation"]                = evaluators.special
983    evaluators["special operation with arguments"] = evaluators.special
984
985    function specials.url(di,var)
986        local url = references.checkedurl(var.operation)
987        if url and url ~= "" then
988            setattribute(di,"url",url,true)
989        end
990    end
991
992    function specials.file(di,var)
993        local file = references.checkedfile(var.operation)
994        if file and file ~= "" then
995            setattribute(di,"file",file,true)
996        end
997    end
998
999    function specials.fileorurl(di,var)
1000        local file, url = references.checkedfileorurl(var.operation,var.operation)
1001        if url and url ~= "" then
1002            setattribute(di,"url",url,true)
1003        elseif file and file ~= "" then
1004            setattribute(di,"file",file,true)
1005        end
1006    end
1007
1008    function specials.internal(di,var)
1009        local internal = references.checkedurl(var.operation)
1010        if internal then
1011            setattribute(di,"location",internal)
1012        end
1013    end
1014
1015    local function adddestination(di,references) -- todo: specials -> exporters and then concat
1016        if references then
1017            local reference = references.reference
1018            if reference and reference ~= "" then
1019                local prefix = references.prefix
1020                if prefix and prefix ~= "" then
1021                    setattribute(di,"prefix",prefix,true)
1022                end
1023                setattribute(di,"destination",reference,true)
1024                for i=1,#references do
1025                    local r = references[i]
1026                    local e = evaluators[r.kind]
1027                    if e then
1028                        e(di,r)
1029                    end
1030                end
1031            end
1032        end
1033    end
1034
1035    function extras.addimplicit(di,references)
1036        if references then
1037            local internal = references.internal
1038            if internal then
1039                setattribute(di,"implicit",internal)
1040            end
1041        end
1042    end
1043
1044    function extras.addinternal(di,references)
1045        if references then
1046            local internal = references.internal
1047            if internal then
1048                setattribute(di,"internal",internal)
1049            end
1050        end
1051    end
1052
1053    local p_firstpart = lpeg.Cs((1-lpeg.P(","))^0)
1054
1055    local function addreference(di,references)
1056        if references then
1057            local reference = references.reference
1058            if reference and reference ~= "" then
1059                local prefix = references.prefix
1060                if prefix and prefix ~= "" then
1061                    setattribute(di,"prefix",prefix)
1062                end
1063                setattribute(di,"reference",reference,true)
1064                setattribute(di,"explicit",lpegmatch(p_firstpart,reference),true)
1065            end
1066            local internal = references.internal
1067            if internal and internal ~= "" then
1068                setattribute(di,"implicit",internal)
1069            end
1070        end
1071    end
1072
1073    local function link(di,element,n,fulltag)
1074        -- for instance in lists a link has nested elements and no own text
1075        local reference = referencehash[fulltag]
1076        if reference then
1077            adddestination(di,structures.references.get(reference))
1078            return true
1079        else
1080            local data = di.data
1081            if data then
1082                for i=1,#data do
1083                    local di = data[i]
1084                    if di then
1085                        local fulltag = di.fulltag
1086                        if fulltag and link(di,element,n,fulltag) then
1087                            return true
1088                        end
1089                    end
1090                end
1091            end
1092        end
1093    end
1094
1095    local function reference(di,element,n,fulltag)
1096        local destination = destinationhash[fulltag]
1097        if destination then
1098            local d = structures.references.internals[destination]
1099            if d then
1100                addreference(di,d.references)
1101                return true
1102            else
1103                return false
1104            end
1105        else
1106            local data = di.data
1107            if data then
1108                for i=1,#data do
1109                    local di = data[i]
1110                    if di then
1111                        local fulltag = di.fulltag
1112                        if fulltag and reference(di,element,n,fulltag) then
1113                            return true
1114                        end
1115                    end
1116                end
1117            end
1118        end
1119    end
1120
1121    extras.adddestination = adddestination
1122    extras.addreference   = addreference
1123
1124    extras.link           = link
1125    extras.reference      = reference
1126
1127end
1128
1129-- no settings, as these are obscure ones
1130
1131do
1132
1133    local automathrows   = true  directives.register("export.math.autorows",   function(v) automathrows   = v end)
1134    local automathapply  = true  directives.register("export.math.autoapply",  function(v) automathapply  = v end)
1135    local automathnumber = true  directives.register("export.math.autonumber", function(v) automathnumber = v end)
1136    local automathstrip  = true  directives.register("export.math.autostrip",  function(v) automathstrip  = v end)
1137
1138    local functions      = mathematics.categories.functions
1139
1140    local function collapse(di,i,data,ndata,detail,element)
1141        local collapsing = di.data
1142        if data then
1143            di.element = element
1144            di.detail = nil
1145            i = i + 1
1146            while i <= ndata do
1147                local dn = data[i]
1148                if dn.detail == detail then
1149                    collapsing[#collapsing+1] = dn.data[1]
1150                    dn.skip = "ignore"
1151                    i = i + 1
1152                else
1153                    break
1154                end
1155            end
1156        end
1157        return i
1158    end
1159
1160    local function collapse_mn(di,i,data,ndata)
1161        -- this is tricky ... we need to make sure that we wrap in mrows if we want
1162        -- to bypass this one
1163        local collapsing = di.data
1164        if data then
1165            i = i + 1
1166            while i <= ndata do
1167                local dn = data[i]
1168                local tg = dn.tg
1169                if tg == "mn" then
1170                    collapsing[#collapsing+1] = dn.data[1]
1171                    dn.skip = "ignore"
1172                    i = i + 1
1173                elseif tg == "mo" then
1174                    local d = dn.data[1]
1175                    if d == "." then
1176                        collapsing[#collapsing+1] = d
1177                        dn.skip = "ignore"
1178                        i = i + 1
1179                    else
1180                        break
1181                    end
1182                else
1183                    break
1184                end
1185            end
1186        end
1187        return i
1188    end
1189
1190    -- maybe delay __i__ till we need it
1191
1192    local apply_function = {
1193        {
1194            element = "mo",
1195         -- comment = "apply function",
1196         -- data    = { utfchar(0x2061) },
1197            data    = { "&#x2061;" },
1198            nature  = "mixed",
1199        }
1200    }
1201
1202    local functioncontent = { }
1203
1204    setmetatableindex(functioncontent,function(t,k)
1205        local v = { { content = k } }
1206        t[k] = v
1207        return v
1208    end)
1209
1210    local dummy_nucleus = {
1211        element   = "mtext",
1212        data      = { content = "" },
1213        nature    = "inline",
1214        comment   = "dummy nucleus",
1215        fulltag   = "mtext>0"
1216    }
1217
1218    local function accentchar(d)
1219        for i=1,3 do
1220            d = d.data
1221            if not d then
1222                return
1223            end
1224            d = d[1]
1225            if not d then
1226                return
1227            end
1228            local tg = d.tg
1229            if tg == "mover" then
1230                local s = specifications[d.fulltag]
1231                local t = s.top
1232                if t then
1233                    d = d.data[1]
1234                    local d1 = d.data[1]
1235                    d1.content = utfchar(t)
1236                    d.data = { d1 }
1237                    return d
1238                end
1239            elseif tg == "munder" then
1240                local s = specifications[d.fulltag]
1241                local b = s.bottom
1242                if b then
1243                    d = d.data[1]
1244                    local d1 = d.data[1]
1245                    d1.content = utfchar(b)
1246                    d.data = { d1 }
1247                    return d
1248                end
1249            end
1250        end
1251    end
1252
1253    local no_mrow = {
1254        mrow     = true,
1255        mfenced  = true,
1256        mfrac    = true,
1257        mroot    = true,
1258        msqrt    = true,
1259        mtable   = true,
1260        mi       = true,
1261        mo       = true,
1262        mn       = true,
1263    }
1264
1265    local function checkmath(root) -- we can provide utf.toentities as an option
1266        local data = root.data
1267        if data then
1268            local ndata = #data
1269            local roottg = root.tg
1270            if roottg == "msubsup" then
1271                -- kind of tricky: we have a diufferent order in display mode
1272                local nucleus, superscript, subscript
1273                if ndata > 3 then
1274                    -- error
1275                else
1276                    for i=1,ndata do
1277                        local di = data[i]
1278                        if not di then
1279                            -- weird
1280                        elseif di.content then
1281                            -- text
1282                        else
1283                            local s = specifications[di.fulltag]
1284                            if s.subscript then
1285                                subscript = i
1286                            elseif s.superscript then
1287                                superscript = i
1288                            else
1289                                nucleus = i
1290                            end
1291                        end
1292                    end
1293                    if superscript or subscript then
1294                        -- we probably always have 3 anyway ... needs checking
1295                        local nuc = nucleus     and data[nucleus]
1296                        local sub = subscript   and data[subscript]
1297                        local sup = superscript and data[superscript]
1298                        local n = 0 -- play safe
1299                        if nuc then n = n + 1 ; data[n] = nuc end
1300                        if sub then n = n + 1 ; data[n] = sub end
1301                        if sup then n = n + 1 ; data[n] = sup end
1302                    end
1303                end
1304         -- elseif roottg == "msup" or roottg == "msub" then
1305         --     -- m$^2$
1306         --     if ndata == 1 then
1307         --         local d = data[1]
1308         --         data[2] = d
1309         --         d.__i__ = 2
1310         --         data[1] = dummy_nucleus
1311         --     end
1312            elseif roottg == "mfenced" then
1313                local s = specifications[root.fulltag]
1314                local l, m, r = s.left, s.middle, s.right
1315                if l then
1316                    l = utfchar(l)
1317                end
1318                if m then
1319                    local t = { }
1320                    for i=1,#m do
1321                        t[i] = utfchar(m[i])
1322                    end
1323                    m = concat(t)
1324                end
1325                if r then
1326                    r = utfchar(r)
1327                end
1328                root.attributes = {
1329                    open       = l,
1330                    separators = m,
1331                    close      = r,
1332                }
1333            end
1334            if ndata == 0 then
1335                root.skip = "comment" -- get rid of weird artefacts
1336                root.nota = "weird"
1337                return
1338            elseif ndata == 1 then
1339                local d = data[1]
1340                if not d or d == "" then
1341                    root.skip = "comment"
1342                    return
1343                elseif d.content then
1344                    return
1345                else -- if ndata == 1 then
1346                    local tg = d.tg
1347                    if automathrows and (roottg == "mrow" or roottg == "mtext") then
1348                        -- maybe just always ! check spec first
1349                        -- or we can have chesks.* for each as we then can flatten
1350                        if no_mrow[tg] then
1351                            root.skip = "comment"
1352                        end
1353                    elseif roottg == "mo" then
1354                        if tg == "mo" then
1355                            root.skip = "comment"
1356                        end
1357                    end
1358                end
1359            end
1360            local i = 1
1361            while i <= ndata do                   -- -- -- TOO MUCH NESTED CHECKING -- -- --
1362                local di = data[i]
1363                if di and not di.content then
1364                    local tg = di.tg
1365                    if tg == "math" then
1366                     -- di.element = "mrow" -- when properties
1367                        di.skip = "comment"
1368                        checkmath(di)
1369                        i = i + 1
1370                    elseif tg == "mover" then
1371                        local s = specifications[di.fulltag]
1372                        if s.accent then
1373                            local t = s.top
1374                            local d = di.data
1375                            -- todo: accent = "false" (for scripts like limits)
1376                            di.attributes = {
1377                                accent = "true",
1378                            }
1379                            -- todo: p.topfixed
1380                            if t then
1381                                -- mover
1382                                d[1].data[1].content = utfchar(t)
1383                                di.data = { d[2], d[1] }
1384                            end
1385                        else
1386                            -- can't happen
1387                        end
1388                        checkmath(di)
1389                        i = i + 1
1390                    elseif tg == "munder" then
1391                        local s = specifications[di.fulltag]
1392                        if s.accent then
1393                            local b = s.bottom
1394                            local d = di.data
1395                            -- todo: accent = "false" (for scripts like limits)
1396                            di.attributes = {
1397                                accent = "true",
1398                            }
1399                         -- todo: p.bottomfixed
1400                            if b then
1401                                -- munder
1402                                d[2].data[1].content = utfchar(b)
1403                            end
1404                        else
1405                            -- can't happen
1406                        end
1407                        checkmath(di)
1408                        i = i + 1
1409                    elseif tg == "munderover" then
1410                        local s = specifications[di.fulltag]
1411                        if s.accent then
1412                            local t = s.top
1413                            local b = s.bottom
1414                            local d = di.data
1415                            -- todo: accent      = "false" (for scripts like limits)
1416                            -- todo: accentunder = "false" (for scripts like limits)
1417                            di.attributes = {
1418                                accent      = "true",
1419                                accentunder = "true",
1420                            }
1421                         -- todo: p.topfixed
1422                         -- todo: p.bottomfixed
1423                            if t and b then
1424                                -- munderover
1425                                d[1].data[1].content = utfchar(t)
1426                                d[3].data[1].content = utfchar(b)
1427                                di.data = { d[2], d[3], d[1] }
1428                            else
1429                                -- can't happen
1430                            end
1431                        else
1432                            -- can't happen
1433                        end
1434                        checkmath(di)
1435                        i = i + 1
1436                    elseif tg == "mstacker" then
1437                        local d = di.data
1438                        local d1 = d[1]
1439                        local d2 = d[2]
1440                        local d3 = d[3]
1441                        local t1 = d1 and d1.tg
1442                        local t2 = d2 and d2.tg
1443                        local t3 = d3 and d3.tg
1444                        local m  = nil -- d1.data[1]
1445                        local t  = nil
1446                        local b  = nil
1447                        -- only accent when top / bot have stretch
1448                        -- normally we flush [base under over] which is better for tagged pdf
1449                        if t1 == "mstackermid" then
1450                            m = accentchar(d1) -- or m
1451                            if t2 == "mstackertop" then
1452                                if t3 == "mstackerbot" then
1453                                    t = accentchar(d2)
1454                                    b = accentchar(d3)
1455                                    di.element = "munderover"
1456                                    di.data    = { m or d1.data[1], b or d3.data[1], t or d2.data[1] }
1457                                else
1458                                    t = accentchar(d2)
1459                                    di.element = "mover"
1460                                    di.data    = { m or d1.data[1], t or d2.data[1] }
1461                                end
1462                            elseif t2 == "mstackerbot" then
1463                                if t3 == "mstackertop" then
1464                                    b = accentchar(d2)
1465                                    t = accentchar(d3)
1466                                    di.element = "munderover"
1467                                    di.data    = { m or d1.data[1], t or d3.data[1], m, b or d2.data[1] }
1468                                else
1469                                    b = accentchar(d2)
1470                                    di.element = "munder"
1471                                    di.data    = { m or d1.data[1], b or d2.data[1] }
1472                                end
1473                            else
1474                                -- can't happen
1475                            end
1476                        else
1477                            -- can't happen
1478                        end
1479                        if t or b then
1480                            di.attributes = {
1481                                accent      = t and "true" or nil,
1482                                accentunder = b and "true" or nil,
1483                            }
1484                            di.detail = nil
1485                        end
1486                        checkmath(di)
1487                        i = i + 1
1488                    elseif tg == "mroot" then
1489                        local data = di.data
1490                        local size = #data
1491                        if size == 1 then
1492                            -- else firefox complains ... code in math-tag (for pdf tagging)
1493                            di.element = "msqrt"
1494                        elseif size == 2 then
1495                            data[1], data[2] = data[2], data[1]
1496                        end
1497                        checkmath(di)
1498                        i = i + 1
1499                    elseif tg == "break" then
1500                        di.skip = "comment"
1501                        i = i + 1
1502                    elseif tg == "mtext" then
1503                        -- this is only needed for unboxed mtexts ... all kind of special
1504                        -- tex border cases and optimizations ... trial and error
1505                        local data = di.data
1506                        if #data > 1 then
1507                            for i=1,#data do
1508                                local di = data[i]
1509                                local content = di.content
1510                                if content then
1511                                    data[i] = {
1512                                        element = "mtext",
1513                                        nature  = "inline",
1514                                        data    = { di },
1515                                        n       = 0,
1516                                    }
1517                                elseif di.tg == "math" then
1518                                    local di = di.data[1]
1519                                    if di then
1520                                        data[i] = di
1521                                        checkmath(di)
1522                                    end
1523                                end
1524                            end
1525                            di.element = "mrow"
1526                         -- di.tg = "mrow"
1527                         -- di.nature  = "inline"
1528                        end
1529                        checkmath(di)
1530                        i = i + 1
1531                    elseif tg == "mrow" and detail then -- hm, falls through
1532                        di.detail = nil
1533                        checkmath(di)
1534                        di = {
1535                            element    = "maction",
1536                            nature     = "display",
1537                            attributes = { actiontype = detail },
1538                            data       = { di },
1539                            n          = 0,
1540                        }
1541                        data[i] = di
1542                        i = i + 1
1543                    else
1544                        local category = di.mathcategory
1545                        if category then
1546                         -- no checkmath(di) here
1547                            if category == 1 then -- mo
1548                                i = collapse(di,i,data,ndata,detail,"mo")
1549                            elseif category == 2 then -- mi
1550                                i = collapse(di,i,data,ndata,detail,"mi")
1551                            elseif category == 3 then -- mn
1552                                i = collapse(di,i,data,ndata,detail,"mn")
1553                            elseif category == 4 then -- ms
1554                                i = collapse(di,i,data,ndata,detail,"ms")
1555                            elseif category >= 1000 then
1556                                local apply = category >= 2000
1557                                if apply then
1558                                    category = category - 1000
1559                                end
1560                                if tg == "mi" then -- function
1561                                    if roottg == "mrow" then
1562                                        root.skip = "comment"
1563                                        root.element = "function"
1564                                    end
1565                                    i = collapse(di,i,data,ndata,detail,"mi")
1566                                    local tag = functions[category]
1567                                    if tag then
1568                                        di.data = functioncontent[tag]
1569                                    end
1570                                    if apply then
1571                                        di.after = apply_function
1572                                    elseif automathapply then -- make function
1573                                        local following
1574                                        if i <= ndata then
1575                                            -- normally not the case
1576                                            following = data[i]
1577                                        else
1578                                            local parent = di.__p__ -- == root
1579                                            if parent.tg == "mrow" then
1580                                                parent = parent.__p__
1581                                            end
1582                                            local index = parent.__i__
1583                                            following = parent.data[index+1]
1584                                        end
1585                                        if following then
1586                                            local tg = following.tg
1587                                            if tg == "mrow" or tg == "mfenced" then -- we need to figure out the right condition
1588                                                di.after = apply_function
1589                                            end
1590                                        end
1591                                    end
1592                                else -- some problem
1593                                    checkmath(di)
1594                                    i = i + 1
1595                                end
1596                            else
1597                                checkmath(di)
1598                                i = i + 1
1599                            end
1600                        elseif automathnumber and tg == "mn" then
1601                            checkmath(di)
1602                            i = collapse_mn(di,i,data,ndata)
1603                        else
1604                            checkmath(di)
1605                            i = i + 1
1606                        end
1607                    end
1608                else -- can be string or boolean
1609                    if parenttg ~= "mtext" and di == " " then
1610                        data[i] = false
1611                    end
1612                    i = i + 1
1613                end
1614            end
1615        end
1616    end
1617
1618    local function stripmath(di)
1619        if not di then
1620            --
1621        elseif di.content then
1622            return di
1623        else
1624            local tg = di.tg
1625            if tg == "mtext" or tg == "ms" then
1626                return di
1627            else
1628                local data = di.data
1629                local ndata = #data
1630                local n = 0
1631                for i=1,ndata do
1632                    local d = data[i]
1633                    if d and not d.content then
1634                        d = stripmath(d)
1635                    end
1636                    if d then
1637                        local content = d.content
1638                        if not content then
1639                            n = n + 1
1640                            d.__i__ = n
1641                            data[n] = d
1642                        elseif content == " " or content == "" then
1643                            if d.tg == "mspace" then
1644                                -- we append or prepend a space to a preceding or following mtext
1645                                local parent = di.__p__
1646                                local index  = di.__i__ -- == i
1647                                local data   = parent.data
1648                                if index > 1 then
1649                                    local d = data[index-1]
1650                                    if d.tg == "mtext" then
1651                                        local dd = d.data
1652                                        local dn = dd[#dd]
1653                                        local dc = dn.content
1654                                        if dc then
1655                                            dn.content = dc .. content
1656                                        end
1657                                    end
1658                                elseif index < ndata then
1659                                    local d = data[index+1]
1660                                    if d.tg == "mtext" then
1661                                        local dd = d.data
1662                                        local dn = dd[1]
1663                                        local dc = dn.content
1664                                        if dc then
1665                                            dn.content = content .. dc
1666                                        end
1667                                    end
1668                                end
1669                            end
1670                        else
1671                            n = n + 1
1672                            data[n] = d
1673                        end
1674                    end
1675                end
1676                for i=ndata,n+1,-1 do
1677                    data[i] = nil
1678                end
1679                if #data > 0 then
1680                    return di
1681                end
1682            end
1683        end
1684    end
1685
1686    function checks.math(di)
1687        if di.skip == "comment" then
1688            -- already done, kind of weird, happens in mathmatrix, maybe some collapse
1689            -- issue that i need to look into
1690        else
1691            local specification = specifications[di.fulltag]
1692            local mode = specification and specification.mode == "display" and "block" or "inline"
1693            di.attributes = {
1694                ["display"] = mode,
1695                ["xmlns:m"] = mathmlns,
1696            }
1697            -- can be option if needed:
1698            if mode == "inline" then
1699             -- di.nature = "mixed"  -- else spacing problem (maybe inline)
1700                di.nature = "inline" -- we need to catch x$X$x and x $X$ x
1701            else
1702                di.nature = "display"
1703            end
1704            if automathstrip then
1705                stripmath(di)
1706            end
1707            checkmath(di)
1708        end
1709    end
1710
1711    -- this one can replace some of the previous code .. todo (test on mathmatrix)
1712
1713    -- ignore with no data can be removed
1714
1715    local function checked(d)
1716        local n = #d
1717        if n == 1 then
1718            local di = d[1]
1719            local tg = di.tg
1720            if tg == "ignore" then
1721                -- todo: we can move ignore's data one level up
1722                return 1
1723            elseif di.content then
1724                return 1
1725            else
1726                local dd = di.data
1727                if #dd > 0 and checked(dd) > 0 then
1728                    return 1
1729                else
1730                    return 0
1731                end
1732            end
1733        else
1734            local m = 0
1735            for i=1,n do
1736                local di = d[i]
1737                local tg = di.tg
1738                if tg == "ignore" then
1739                    -- skip
1740                elseif di.content then
1741                    m = m + 1
1742                    d[m] = di
1743                else
1744                    local dd = di.data
1745                    if #dd > 0 and checked(dd) > 0 then
1746                        m = m + 1
1747                        d[m] = di
1748                    end
1749                end
1750            end
1751            if m < n then
1752                for i=n,m+1,-1 do
1753                    d[i] = nil
1754                end
1755            end
1756            return m
1757        end
1758    end
1759
1760    function checks.mrow(di)
1761     -- local d = di.data
1762     -- if d then
1763     --     checked(d)
1764     -- end
1765    end
1766
1767    -- we can move more checks here
1768
1769    local function flatten(di)
1770        local r = di.__p__
1771        while r do
1772            local d = r.data
1773            local n = #d
1774            if d and n > 1 then
1775                n = checked(d)
1776            end
1777            local tg = r.tg
1778            if n == 1 and (tg == "mtext" or tg == "mrow") then
1779                r.skip = "comment" -- weird error
1780                r = r.__p__
1781            else
1782                break
1783            end
1784        end
1785    end
1786
1787    function checks.mtable(di)
1788        flatten(di)
1789        local d = di.data
1790        for i=1,#d do
1791            local d = d[i]
1792            if d.tg == "mtr" then
1793                local d = d.data
1794                for i=1,#d do
1795                    local d = d[i]
1796                    if d.tg == "mtd" then
1797                        -- okay
1798                    elseif d.content then
1799                        d.content = ""
1800                    else
1801                        d.skip = "comment" -- weird error
1802                    end
1803                end
1804            elseif d.content then
1805                d.content = ""
1806            else
1807                d.skip = "comment" -- weird error
1808            end
1809        end
1810    end
1811
1812    do
1813
1814        local a, z, A, Z = 0x61, 0x7A, 0x41, 0x5A
1815
1816        function extras.mi(di,element,n,fulltag) -- check with content
1817            local str = di.data[1].content
1818            if str and sub(str,1,1) ~= "&" then -- hack but good enough (maybe gsub op eerste)
1819                for v in utfvalues(str) do
1820                    if (v >= a and v <= z) or (v >= A and v <= Z) then
1821                        local a = di.attributes
1822                        if a then
1823                            a.mathvariant = "normal"
1824                        else
1825                            di.attributes = { mathvariant = "normal" }
1826                        end
1827                    end
1828                end
1829            end
1830        end
1831
1832    end
1833
1834    function extras.msub(di,element,n,fulltag)
1835        -- m$^2$
1836        local data = di.data
1837        if #data == 1 then
1838            local d = data[1]
1839            data[2] = d
1840            d.__i__ = 2
1841            data[1] = dummy_nucleus
1842        end
1843    end
1844
1845    extras.msup = extras.msub
1846
1847end
1848
1849do
1850
1851    local registered = { }
1852
1853    function structurestags.setformulacontent(n)
1854        registered[locatedtag("formulacontent")] = {
1855            n = n,
1856        }
1857    end
1858
1859    function extras.formulacontent(di,element,n,fulltag)
1860        local r = registered[fulltag]
1861        if r then
1862            setattribute(di,"n",r.n)
1863        end
1864    end
1865
1866end
1867
1868do
1869
1870    local registered = structures.sections.registered
1871
1872    local function resolve(di,element,n,fulltag)
1873        local data = listdata[fulltag]
1874        if data then
1875            extras.addreference(di,data.references)
1876            return true
1877        else
1878            local data = di.data
1879            if data then
1880                for i=1,#data do
1881                    local di = data[i]
1882                    if di then
1883                        local ft = di.fulltag
1884                        if ft and resolve(di,element,n,ft) then
1885                            return true
1886                        end
1887                    end
1888                end
1889            end
1890        end
1891    end
1892
1893    function extras.section(di,element,n,fulltag)
1894        local r = registered[specifications[fulltag].detail]
1895        if r then
1896            setattribute(di,"level",r.level)
1897        end
1898        resolve(di,element,n,fulltag)
1899    end
1900
1901    local floats = { }
1902
1903    function structurestags.setfloat(options,method)
1904        floats[locatedtag("float")] = {
1905            options = options,
1906            method  = method,
1907        }
1908    end
1909
1910    function extras.float(di,element,n,fulltag)
1911        local hash = floats[fulltag]
1912        if hash then
1913            local method  = hash.method
1914            if not method or method == "" then
1915                method = "here"
1916            end
1917            setattribute(di,"method",method)
1918            local options = hash.options
1919            if options and options ~= "" then
1920                options = settings_to_hash(options)
1921                options[method] = nil
1922                options = concat(sortedkeys(options),",")
1923                if #options > 0 then
1924                    setattribute(di,"options",options)
1925                end
1926            end
1927        end
1928        resolve(di,element,n,fulltag)
1929    end
1930
1931    -- todo: internal is already hashed
1932
1933    function structurestags.setlist(n)
1934        local data = structures.lists.getresult(n)
1935        if data then
1936            referencehash[locatedtag("listitem")] = data
1937        end
1938    end
1939
1940    function extras.listitem(di,element,n,fulltag)
1941        local data = referencehash[fulltag]
1942        if data then
1943            extras.addinternal(di,data.references)
1944            return true
1945        end
1946    end
1947
1948end
1949
1950do
1951
1952    -- todo: internal is already hashed
1953
1954    function structurestags.setregister(tag,n) -- check if tag is needed
1955        local data = structures.registers.get(tag,n)
1956        if data then
1957            referencehash[locatedtag("registerlocation")] = data
1958        end
1959    end
1960
1961    function extras.registerlocation(di,element,n,fulltag)
1962        local data = referencehash[fulltag]
1963        if type(data) == "table" then
1964            extras.addinternal(di,data.references)
1965            return true
1966        else
1967            -- needs checking, probably bookmarks
1968        end
1969    end
1970
1971    function extras.registerpages(di,element,n,fulltag) -- ignorebreaks
1972        local data = di.data
1973        for i=1,#data do
1974            local d = data[i]
1975            if d.content == " " then
1976                d.content = ""
1977            end
1978        end
1979    end
1980
1981    function extras.registerseparator(di,element,n,fulltag) -- ignorespaces
1982        local data = di.data
1983        for i=1,#data do
1984            local d = data[i]
1985            local c = d.content
1986            if type(c) == "string" then
1987                d.content = lpegmatch(p_stripper,c)
1988            end
1989        end
1990    end
1991
1992end
1993
1994do
1995
1996    local tabledata = { }
1997
1998    local function hascontent(data)
1999        for i=1,#data do
2000            local di = data[i]
2001            if not di or di.tg == "ignore" then
2002                --
2003            else
2004                local content = di.content
2005                if content == " " then
2006                    --
2007                elseif content then
2008                    return true
2009                else
2010                    local d = di.data
2011                    if d and #d > 0 and hascontent(d) then
2012                        return true
2013                    end
2014                end
2015            end
2016        end
2017    end
2018
2019    function structurestags.settablecell(rows,columns,align)
2020        if align > 0 or rows > 1 or columns > 1 then -- or kind > 0
2021            tabledata[locatedtag("tablecell")] = {
2022                rows    = rows,
2023                columns = columns,
2024                align   = align,
2025            }
2026        end
2027    end
2028
2029    function structurestags.gettablecell(fulltag)
2030        return tabledata[fulltag]
2031    end
2032
2033    function extras.tablecell(di,element,n,fulltag)
2034        local hash = tabledata[fulltag]
2035        if hash then
2036            local columns = hash.columns
2037            if columns and columns > 1 then
2038                setattribute(di,"columns",columns)
2039            end
2040            local rows = hash.rows
2041            if rows and rows > 1 then
2042                setattribute(di,"rows",rows)
2043            end
2044            local align = hash.align
2045            if not align or align == 0 then
2046                -- normal
2047            elseif align == 1 then -- use numbertoalign here
2048                setattribute(di,"align","flushright")
2049            elseif align == 2 then
2050                setattribute(di,"align","middle")
2051            elseif align == 3 then
2052                setattribute(di,"align","flushleft")
2053            end
2054        end
2055    end
2056
2057    local tabulatedata = { }
2058
2059    function structurestags.settabulatecell(align,kind)
2060        if align > 0 or kind > 0 then
2061            tabulatedata[locatedtag("tabulatecell")] = {
2062                align = align,
2063                kind  = kind, -- 1 = bold head
2064            }
2065        end
2066    end
2067
2068    function structurestags.gettabulatecell(fulltag)
2069        return tabulatedata[fulltag]
2070    end
2071
2072    function extras.tabulate(di,element,n,fulltag)
2073        local data = di.data
2074        for i=1,#data do
2075            local di = data[i]
2076            if di.tg == "tabulaterow" and not hascontent(di.data) then
2077                di.element = "" -- or simply remove
2078            end
2079        end
2080    end
2081
2082    function extras.tabulatecell(di,element,n,fulltag)
2083        local hash = tabulatedata[fulltag]
2084        if hash then
2085            local align = hash.align
2086            if not align or align == 0 then
2087                -- normal
2088            elseif align == 1 then
2089                setattribute(di,"align","flushleft")
2090            elseif align == 2 then
2091                setattribute(di,"align","flushright")
2092            elseif align == 3 then
2093                setattribute(di,"align","middle")
2094            end
2095            local kind = hash.kind
2096            if kind == 1 then
2097                setattribute(di,"kind","strong")
2098            elseif kind == 2 then
2099                setattribute(di,"kind","equals")
2100            end
2101        end
2102    end
2103
2104end
2105
2106do
2107
2108    local usedpublications = { }
2109    local tagsindatasets   = setmetatableindex("table")
2110    local serialize        = false
2111
2112    function structurestags.setpublication(dataset,tag,rendering)
2113        usedpublications[locatedtag("publication")] = {
2114            dataset   = dataset,
2115            tag       = tag,
2116            rendering = rendering
2117        }
2118        tagsindatasets[dataset][tag] = true
2119        if not serialize then
2120            structures.tags.registerextradata("btx",function()
2121                local t = { "<btxdata>"}
2122                for dataset, used in sortedhash(tagsindatasets) do
2123                    t[#t+1] = publications.converttoxml(dataset,true,false,true,false,true,true)
2124                end
2125                t[#t+1] = "</btxdata>"
2126                return concat(t,"\n")
2127            end)
2128        end
2129    end
2130
2131    function extras.publication(di,element,n,fulltag)
2132        local hash = usedpublications[fulltag]
2133        if hash then
2134            setattribute(di,"dataset",hash.dataset)
2135            setattribute(di,"tag",hash.tag)
2136        end
2137    end
2138
2139end
2140
2141do
2142
2143    local usedparagraphs = { }
2144
2145    function structurestags.setparagraph(align)
2146        if align ~= "" then
2147            usedparagraphs[locatedtag("paragraph")] = {
2148                align = align,
2149            }
2150        end
2151    end
2152
2153    function extras.paragraph(di,element,n,fulltag)
2154        local hash = usedparagraphs[fulltag]
2155        if hash then
2156            setattribute(di,"align",hash.align)
2157        end
2158    end
2159
2160end
2161
2162-- flusher
2163
2164do
2165
2166    local f_detail                     = formatters[' detail="%s"']
2167    local f_chain                      = formatters[' chain="%s"']
2168    local f_index                      = formatters[' n="%s"']
2169    local f_spacing                    = formatters['<c p="%s">%s</c>']
2170
2171    local f_empty_inline               = formatters["<%s/>"]
2172    local f_empty_mixed                = formatters["%w<%s/>\n"]
2173    local f_empty_display              = formatters["\n%w<%s/>\n"]
2174    local f_empty_inline_attr          = formatters["<%s%s/>"]
2175    local f_empty_mixed_attr           = formatters["%w<%s%s/>"]
2176    local f_empty_display_attr         = formatters["\n%w<%s%s/>\n"]
2177
2178    local f_begin_inline               = formatters["<%s>"]
2179    local f_begin_mixed                = formatters["%w<%s>"]
2180    local f_begin_display              = formatters["\n%w<%s>\n"]
2181    local f_begin_inline_attr          = formatters["<%s%s>"]
2182    local f_begin_mixed_attr           = formatters["%w<%s%s>"]
2183    local f_begin_display_attr         = formatters["\n%w<%s%s>\n"]
2184
2185    local f_end_inline                 = formatters["</%s>"]
2186    local f_end_mixed                  = formatters["</%s>\n"]
2187    local f_end_display                = formatters["%w</%s>\n"]
2188
2189    local f_begin_inline_comment       = formatters["<!-- %s --><%s>"]
2190    local f_begin_mixed_comment        = formatters["%w<!-- %s --><%s>"]
2191    local f_begin_display_comment      = formatters["\n%w<!-- %s -->\n%w<%s>\n"]
2192    local f_begin_inline_attr_comment  = formatters["<!-- %s --><%s%s>"]
2193    local f_begin_mixed_attr_comment   = formatters["%w<!-- %s --><%s%s>"]
2194    local f_begin_display_attr_comment = formatters["\n%w<!-- %s -->\n%w<%s%s>\n"]
2195
2196    local f_comment_begin_inline       = formatters["<!-- begin %s -->"]
2197    local f_comment_begin_mixed        = formatters["%w<!-- begin %s -->"]
2198    local f_comment_begin_display      = formatters["\n%w<!-- begin %s -->\n"]
2199
2200    local f_comment_end_inline         = formatters["<!-- end %s -->"]
2201    local f_comment_end_mixed          = formatters["<!-- end %s -->\n"]
2202    local f_comment_end_display        = formatters["%w<!-- end %s -->\n"]
2203
2204    local f_metadata_begin             = formatters["\n%w<metadata>\n"]
2205    local f_metadata                   = formatters["%w<metavariable name=%q>%s</metavariable>\n"]
2206    local f_metadata_end               = formatters["%w</metadata>\n"]
2207
2208    local function attributes(a)
2209        local r = { }
2210        local n = 0
2211        for k, v in next, a do
2212            n = n + 1
2213            r[n] = f_attribute(k,tostring(v)) -- tostring because of %q
2214        end
2215        sort(r)
2216        return concat(r,"")
2217    end
2218
2219    local function properties(a)
2220        local r = { }
2221        local n = 0
2222        for k, v in next, a do
2223            n = n + 1
2224            r[n] = f_property(exportproperties,k,tostring(v)) -- tostring because of %q
2225        end
2226        sort(r)
2227        return concat(r,"")
2228    end
2229
2230    local depth  = 0
2231    local inline = 0
2232
2233    local function emptytag(result,element,nature,di) -- currently only break but at some point
2234        local a = di.attributes                       -- we might add detail etc
2235        if a then -- happens seldom
2236            if nature == "display" then
2237                result[#result+1] = f_empty_display_attr(depth,namespaced[element],attributes(a))
2238            elseif nature == "mixed" then
2239                result[#result+1] = f_empty_mixed_attr(depth,namespaced[element],attributes(a))
2240            else
2241                result[#result+1] = f_empty_inline_attr(namespaced[element],attributes(a))
2242            end
2243        else
2244            if nature == "display" then
2245                result[#result+1] = f_empty_display(depth,namespaced[element])
2246            elseif nature == "mixed" then
2247                result[#result+1] = f_empty_mixed(depth,namespaced[element])
2248            else
2249                result[#result+1] = f_empty_inline(namespaced[element])
2250            end
2251        end
2252    end
2253
2254 -- local function stripspaces(di)
2255 --     local d = di.data
2256 --     local n = #d
2257 --     local m = 0
2258 --     for i=1,n do
2259 --         local di = d[i]
2260 --         if di.tg then
2261 --             m = m + 1
2262 --             d[m] = di
2263 --         end
2264 --     end
2265 --     for i=n,m+1,-1 do
2266 --         d[i] = nil
2267 --     end
2268 -- end
2269 --
2270 -- -- simpler:
2271
2272    local function stripspaces(di)
2273        local d = di.data
2274        for i=1,#d do
2275            local di = d[i]
2276            if not di.tg then
2277                di.content = ""
2278            end
2279        end
2280    end
2281
2282    local function begintag(result,element,nature,di,skip)
2283        local index         = di.n
2284        local fulltag       = di.fulltag
2285        local specification = specifications[fulltag] or { } -- we can have a dummy
2286        local comment       = di.comment
2287        local detail        = specification.detail
2288        if skip == "comment" then
2289            if show_comment then
2290                if nature == "inline" or inline > 0 then
2291                    result[#result+1] = f_comment_begin_inline(namespaced[element])
2292                    inline = inline + 1
2293                elseif nature == "mixed" then
2294                    result[#result+1] = f_comment_begin_mixed(depth,namespaced[element])
2295                    depth = depth + 1
2296                    inline = 1
2297                else
2298                    result[#result+1] = f_comment_begin_display(depth,namespaced[element])
2299                    depth = depth + 1
2300                end
2301            end
2302        elseif skip then
2303            -- ignore
2304        else
2305
2306            local n = 0
2307            local r = { } -- delay this
2308            if detail then
2309                detail = gsub(detail,"[^A-Za-z0-9]+","-")
2310                specification.detail = detail -- we use it later in for the div
2311                n = n + 1
2312                r[n] = f_detail(detail)
2313            end
2314            local parents = specification.parents
2315            if parents then
2316                parents = gsub(parents,"[^A-Za-z0-9 ]+","-")
2317                specification.parents = parents -- we use it later in for the div
2318                n = n + 1
2319                r[n] = f_chain(parents)
2320            end
2321            if indexing and index then
2322                n = n + 1
2323                r[n] = f_index(index)
2324            end
2325            --
2326            local extra = extras[element]
2327            if extra then
2328                extra(di,element,index,fulltag)
2329            end
2330            --
2331            if di.record then
2332                stripspaces(di)
2333            end
2334            --
2335            if exportproperties then
2336                local p = specification.userdata
2337                if not p then
2338                    -- skip
2339                elseif exportproperties == v_yes then
2340                    n = n + 1
2341                    r[n] = attributes(p)
2342                else
2343                    n = n + 1
2344                    r[n] = properties(p)
2345                end
2346            end
2347            local a = di.attributes
2348            if a then
2349                if trace_spacing then
2350                    a.p = di.parnumber or 0
2351                end
2352                n = n + 1
2353                r[n] = attributes(a)
2354            elseif trace_spacing then
2355                n = n + 1
2356                r[n] = attributes { p = di.parnumber or 0 }
2357            end
2358            if n == 0 then
2359                if nature == "inline" or inline > 0 then
2360                    if show_comment and comment then
2361                        result[#result+1] = f_begin_inline_comment(comment,namespaced[element])
2362                    else
2363                        result[#result+1] = f_begin_inline(namespaced[element])
2364                    end
2365                    inline = inline + 1
2366                elseif nature == "mixed" then
2367                    if show_comment and comment then
2368                        result[#result+1] = f_begin_mixed_comment(depth,comment,namespaced[element])
2369                    else
2370                        result[#result+1] = f_begin_mixed(depth,namespaced[element])
2371                    end
2372                    depth = depth + 1
2373                    inline = 1
2374                else
2375                    if show_comment and comment then
2376                        result[#result+1] = f_begin_display_comment(depth,comment,depth,namespaced[element])
2377                    else
2378                        result[#result+1] = f_begin_display(depth,namespaced[element])
2379                    end
2380                    depth = depth + 1
2381                end
2382            else
2383                r = concat(r,"",1,n)
2384                if nature == "inline" or inline > 0 then
2385                    if show_comment and comment then
2386                        result[#result+1] = f_begin_inline_attr_comment(comment,namespaced[element],r)
2387                    else
2388                        result[#result+1] = f_begin_inline_attr(namespaced[element],r)
2389                    end
2390                    inline = inline + 1
2391                elseif nature == "mixed" then
2392                    if show_comment and comment then
2393                        result[#result+1] = f_begin_mixed_attr_comment(depth,comment,namespaced[element],r)
2394                    else
2395                        result[#result+1] = f_begin_mixed_attr(depth,namespaced[element],r)
2396                    end
2397                    depth = depth + 1
2398                    inline = 1
2399                else
2400                    if show_comment and comment then
2401                        result[#result+1] = f_begin_display_attr_comment(depth,comment,depth,namespaced[element],r)
2402                    else
2403                        result[#result+1] = f_begin_display_attr(depth,namespaced[element],r)
2404                    end
2405                    depth = depth + 1
2406                end
2407            end
2408        end
2409        used[element][detail or ""] = { nature, specification.parents }  -- for template css
2410        -- also in last else ?
2411        local metadata = specification.metadata
2412        if metadata then
2413            result[#result+1] = f_metadata_begin(depth)
2414            for k, v in table.sortedpairs(metadata) do
2415                if v ~= "" then
2416                    result[#result+1] = f_metadata(depth+1,k,lpegmatch(p_entity,v))
2417                end
2418            end
2419            result[#result+1] = f_metadata_end(depth)
2420        end
2421    end
2422
2423    local function endtag(result,element,nature,di,skip)
2424        if skip == "comment" then
2425            if show_comment then
2426                if nature == "display" and (inline == 0 or inline == 1) then
2427                    depth = depth - 1
2428                    result[#result+1] = f_comment_end_display(depth,namespaced[element])
2429                    inline = 0
2430                elseif nature == "mixed" and (inline == 0 or inline == 1) then
2431                    depth = depth - 1
2432                    result[#result+1] = f_comment_end_mixed(namespaced[element])
2433                    inline = 0
2434                else
2435                    inline = inline - 1
2436                    result[#result+1] = f_comment_end_inline(namespaced[element])
2437                end
2438            end
2439        elseif skip then
2440            -- ignore
2441        else
2442            if nature == "display" and (inline == 0 or inline == 1) then
2443                depth = depth - 1
2444                result[#result+1] = f_end_display(depth,namespaced[element])
2445                inline = 0
2446            elseif nature == "mixed" and (inline == 0 or inline == 1) then
2447                depth = depth - 1
2448                result[#result+1] = f_end_mixed(namespaced[element])
2449                inline = 0
2450            else
2451                inline = inline - 1
2452                result[#result+1] = f_end_inline(namespaced[element])
2453            end
2454        end
2455    end
2456
2457    local function flushtree(result,data,nature)
2458        local nofdata = #data
2459        for i=1,nofdata do
2460            local di = data[i]
2461            if not di then -- hm, di can be string
2462                -- whatever
2463            else
2464                local content = di.content
2465             -- also optimize for content == "" : trace that first
2466                if content then
2467                    -- already has breaks
2468                    local content = lpegmatch(p_entity,content)
2469                    if i == nofdata and sub(content,-1) == "\n" then -- move check
2470                        -- can be an end of line in par but can also be the last line
2471                        if trace_spacing then
2472                            result[#result+1] = f_spacing(di.parnumber or 0,sub(content,1,-2))
2473                        else
2474                            result[#result+1] = sub(content,1,-2)
2475                        end
2476                        result[#result+1] = " "
2477                    else
2478                        if trace_spacing then
2479                            result[#result+1] = f_spacing(di.parnumber or 0,content)
2480                        else
2481                            result[#result+1] = content
2482                        end
2483                    end
2484                elseif not di.collapsed then -- ignore collapsed data (is appended, reconstructed par)
2485                    local element = di.element
2486                    if not element then
2487                        -- skip
2488                    elseif element == "break" then -- or element == "pagebreak"
2489                        emptytag(result,element,nature,di)
2490                    elseif element == "" or di.skip == "ignore" then
2491                        -- skip
2492                    else
2493                        if di.before then
2494                            flushtree(result,di.before,nature)
2495                        end
2496                        local natu = di.nature
2497                        local skip = di.skip
2498                        if di.breaknode then
2499                            emptytag(result,"break","display",di)
2500                        end
2501                        begintag(result,element,natu,di,skip)
2502                        flushtree(result,di.data,natu)
2503                        endtag(result,element,natu,di,skip)
2504                        if di.after then
2505                            flushtree(result,di.after,nature)
2506                        end
2507                    end
2508                end
2509            end
2510        end
2511    end
2512
2513    local function breaktree(tree,parent,parentelement) -- also removes double breaks
2514        local data = tree.data
2515        if data then
2516            local nofdata = #data
2517            local prevelement
2518            local prevnature
2519            local prevparnumber
2520            local newdata = { }
2521            local nofnewdata = 0
2522            for i=1,nofdata do
2523                local di = data[i]
2524                if not di then
2525                    -- skip
2526                elseif di.skip == "ignore" then
2527                    -- skip (new)
2528elseif di.tg == "ignore" then
2529    -- skip (new)
2530                elseif di.content then
2531                    if di.samepar then
2532                        prevparnumber = false
2533                    else
2534                        local parnumber = di.parnumber
2535                        if prevnature == "inline" and prevparnumber and prevparnumber ~= parnumber then
2536                            nofnewdata = nofnewdata + 1
2537                            if trace_spacing then
2538                                newdata[nofnewdata] = makebreaknode { type = "a", p = prevparnumber, n = parnumber }
2539                            else
2540                                newdata[nofnewdata] = makebreaknode()
2541                            end
2542                        end
2543                        prevelement = nil
2544                        prevparnumber = parnumber
2545                    end
2546                    prevnature = "inline"
2547                    nofnewdata = nofnewdata + 1
2548                    newdata[nofnewdata] = di
2549                elseif not di.collapsed then
2550                    local element = di.element
2551                    if element == "break" then -- or element == "pagebreak"
2552                        if prevelement == "break" then
2553                            di.element = ""
2554                        end
2555                        prevelement = element
2556                        prevnature = "display"
2557                        nofnewdata = nofnewdata + 1
2558                        newdata[nofnewdata] = di
2559                    elseif element == "" or di.skip == "ignore" then
2560                        -- skip
2561                    else
2562                        if di.samepar then
2563                            prevnature    = "inline"
2564                            prevparnumber = false
2565                        else
2566                            local nature = di.nature
2567                            local parnumber = di.parnumber
2568                            if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
2569                                nofnewdata = nofnewdata + 1
2570                                if trace_spacing then
2571                                    newdata[nofnewdata] = makebreaknode { type = "b", p = prevparnumber, n = parnumber }
2572                                else
2573                                    newdata[nofnewdata] = makebreaknode()
2574                                end
2575                            end
2576                            prevnature = nature
2577                            prevparnumber = parnumber
2578                        end
2579                        prevelement = element
2580                        breaktree(di,tree,element)
2581                        nofnewdata = nofnewdata + 1
2582                        newdata[nofnewdata] = di
2583                    end
2584                else
2585                    if di.samepar then
2586                        prevnature    = "inline"
2587                        prevparnumber = false
2588                    else
2589                        local nature = di.nature
2590                        local parnumber = di.parnumber
2591                        if prevnature == "inline" and nature == "inline" and prevparnumber and prevparnumber ~= parnumber then
2592                            nofnewdata = nofnewdata + 1
2593                            if trace_spacing then
2594                                newdata[nofnewdata] = makebreaknode { type = "c", p = prevparnumber, n = parnumber }
2595                            else
2596                                newdata[nofnewdata] = makebreaknode()
2597                            end
2598                        end
2599                        prevnature = nature
2600                        prevparnumber = parnumber
2601                    end
2602                    nofnewdata = nofnewdata + 1
2603                    newdata[nofnewdata] = di
2604                end
2605            end
2606            tree.data = newdata
2607        end
2608    end
2609
2610    -- also tabulaterow reconstruction .. maybe better as a checker
2611    -- i.e cell attribute
2612
2613    local function collapsetree(tree)
2614--         for tag, trees in sortedhash(treehash) do
2615        for tag, trees in next, treehash do
2616            local d = trees[1].data
2617-- print("!!!!!!!!",tag)
2618-- inspect(trees)
2619            if d then
2620                local nd = #d
2621                if nd > 0 then
2622                    for i=2,#trees do
2623                        local currenttree = trees[i]
2624                        local currentdata = currenttree.data
2625                        local currentpar  = currenttree.parnumber
2626                        local previouspar = trees[i-1].parnumber
2627                        currenttree.collapsed = true
2628                        -- is the next ok?
2629                        if previouspar == 0 or not (di and di.content) then
2630                            previouspar = nil -- no need anyway so no further testing needed
2631                        end
2632                        for j=1,#currentdata do
2633                            local cd = currentdata[j]
2634                            if not cd or cd == "" then
2635                                -- skip
2636                            elseif cd.skip == "ignore" then
2637                                -- skip
2638                            elseif cd.content then
2639                                if not currentpar then
2640                                    -- add space ?
2641                                elseif not previouspar then
2642                                    -- add space ?
2643                                elseif currentpar ~= previouspar then
2644                                    nd = nd + 1
2645                                    if trace_spacing then
2646                                        d[nd] = makebreaknode { type = "d", p = previouspar, n = currentpar }
2647                                    else
2648                                        d[nd] = makebreaknode()
2649                                    end
2650                                end
2651                                previouspar = currentpar
2652                                nd = nd + 1
2653                                d[nd] = cd
2654                            else
2655                                nd = nd + 1
2656                                d[nd] = cd
2657                            end
2658                            currentdata[j] = false
2659                        end
2660                    end
2661                end
2662            end
2663        end
2664    end
2665
2666    local function finalizetree(tree)
2667        for _, finalizer in next, finalizers do
2668            finalizer(tree)
2669        end
2670    end
2671
2672 -- local function showtree(data,when,where)
2673 --     if data then
2674 --         for i=1,#data do
2675 --             local d = data[i]
2676 --             if type(d) == "table" and d.element then
2677 --                 print(when,where,i,d.element,d.parnumber or 0)
2678 --             end
2679 --         end
2680 --     end
2681 -- end
2682
2683    local function indextree(tree)
2684        local data = tree.data
2685        if data then
2686            local n, new = 0, { }
2687         -- showtree(data,"before","index")
2688            for i=1,#data do
2689                local d = data[i]
2690                if not d then
2691                    -- skip
2692                elseif d.content then
2693                    n = n + 1
2694                    new[n] = d
2695                elseif not d.collapsed then
2696                    n = n + 1
2697                    d.__i__ = n
2698                    d.__p__ = tree
2699                    indextree(d)
2700                    new[n] = d
2701                end
2702            end
2703            tree.data = new
2704         -- showtree(new,"after","index")
2705        end
2706    end
2707
2708    local function checktree(tree)
2709        local data = tree.data
2710        if data then
2711         -- showtree(data,"before","check")
2712            for i=1,#data do
2713                local d = data[i]
2714                if type(d) == "table" then
2715                    local check = checks[d.tg]
2716                    if check then
2717                        check(d,data,i)
2718                    end
2719                    checktree(d) -- so parts can pass twice
2720                end
2721            end
2722         -- showtree(data,"after","check")
2723        end
2724    end
2725
2726    local function fixtree(tree)
2727        local data = tree.data
2728        if data then
2729         -- showtree(data,"before","fix")
2730            for i=1,#data do
2731                local d = data[i]
2732                if type(d) == "table" then
2733                    local fix = fixes[d.tg]
2734                    if fix then
2735                        fix(d,data,i)
2736                    end
2737                    fixtree(d) -- so parts can pass twice
2738                end
2739            end
2740         -- showtree(data,"after","fix")
2741        end
2742    end
2743
2744    wrapups.flushtree    = flushtree
2745    wrapups.breaktree    = breaktree
2746    wrapups.collapsetree = collapsetree
2747    wrapups.finalizetree = finalizetree
2748    wrapups.indextree    = indextree
2749    wrapups.checktree    = checktree
2750    wrapups.fixtree      = fixtree
2751
2752end
2753
2754-- collector code
2755
2756local function push(fulltag,depth)
2757    local tg, n, detail, element, nature, record
2758    local specification = specifications[fulltag]
2759    if specification then
2760        tg     = specification.tagname
2761        n      = specification.tagindex
2762        detail = specification.detail
2763    else
2764        -- a break (more efficient if we don't store those in specifications)
2765        tg, n = lpegmatch(tagsplitter,fulltag)
2766        n = tonumber(n) -- to tonumber in tagsplitter
2767    end
2768    local p = properties[tg]
2769    if p then
2770        element = p.export or tg
2771        nature  = p.nature or "inline" -- defaultnature
2772        record  = p.record
2773    end
2774    local treedata = tree.data
2775    local t = { -- maybe we can use the tag table
2776        tg        = tg,
2777        fulltag   = fulltag,
2778        detail    = detail,
2779        n         = n, -- already a number
2780        element   = element,
2781        nature    = nature,
2782        data      = { },
2783        attribute = currentattribute,
2784        parnumber = currentparagraph,
2785        record    = record, -- we can consider storing properties
2786    }
2787    treedata[#treedata+1] = t
2788    currentdepth = currentdepth + 1
2789    nesting[currentdepth] = fulltag
2790    treestack[currentdepth] = tree
2791    if trace_export then
2792        if detail and detail ~= "" then
2793            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q detail=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata,detail)
2794        else
2795            report_export("%w<%s trigger=%q n=%q paragraph=%q index=%q>",currentdepth-1,tg,n,currentattribute or 0,currentparagraph or 0,#treedata)
2796        end
2797    end
2798    tree = t
2799    if tg == "break" then
2800        -- no need for this
2801    else
2802        local h = treehash[fulltag]
2803        if h then
2804            h[#h+1] = t
2805        else
2806            treehash[fulltag] = { t }
2807        end
2808    end
2809end
2810
2811local function pop()
2812    if currentdepth > 0 then
2813        local top = nesting[currentdepth]
2814        tree = treestack[currentdepth]
2815        currentdepth = currentdepth - 1
2816        if trace_export then
2817            if top then
2818                report_export("%w</%s>",currentdepth,match(top,"[^>]+"))
2819            else
2820                report_export("</BAD>")
2821            end
2822        end
2823    else
2824        report_export("%w<!-- too many pops -->",currentdepth)
2825    end
2826end
2827
2828local function continueexport()
2829    if nofcurrentcontent > 0 then
2830        if trace_export then
2831            report_export("%w<!-- injecting pagebreak space -->",currentdepth)
2832        end
2833        nofcurrentcontent = nofcurrentcontent + 1
2834        currentcontent[nofcurrentcontent] = " " -- pagebreak
2835    end
2836end
2837
2838local function pushentry(current)
2839    if not current then
2840        -- bad news
2841        return
2842    end
2843    current = current.taglist
2844    if not current then
2845        -- even worse news
2846        return
2847    end
2848    if restart then
2849        continueexport()
2850        restart = false
2851    end
2852    local newdepth = #current
2853    local olddepth = currentdepth
2854    if trace_export then
2855        report_export("%w<!-- moving from depth %s to %s (%s) -->",currentdepth,olddepth,newdepth,current[newdepth])
2856    end
2857    if olddepth <= 0 then
2858        for i=1,newdepth do
2859            push(current[i],i)
2860        end
2861    else
2862        local difference
2863        if olddepth < newdepth then
2864            for i=1,olddepth do
2865                if current[i] ~= nesting[i] then
2866                    difference = i
2867                    break
2868                end
2869            end
2870        else
2871            for i=1,newdepth do
2872                if current[i] ~= nesting[i] then
2873                    difference = i
2874                    break
2875                end
2876            end
2877        end
2878        if difference then
2879            for i=olddepth,difference,-1 do
2880                pop()
2881            end
2882            for i=difference,newdepth do
2883                push(current[i],i)
2884            end
2885        elseif newdepth > olddepth then
2886            for i=olddepth+1,newdepth do
2887                push(current[i],i)
2888            end
2889        elseif newdepth < olddepth then
2890            for i=olddepth,newdepth,-1 do
2891                pop()
2892            end
2893        elseif trace_export then
2894            report_export("%w<!-- staying at depth %s (%s) -->",currentdepth,newdepth,nesting[newdepth] or "?")
2895        end
2896    end
2897    return olddepth, newdepth
2898end
2899
2900local function pushcontent(oldparagraph,newparagraph)
2901    if nofcurrentcontent > 0 then
2902        if oldparagraph then
2903            if currentcontent[nofcurrentcontent] == "\n" then
2904                if trace_export then
2905                    report_export("%w<!-- removing newline -->",currentdepth)
2906                end
2907                nofcurrentcontent = nofcurrentcontent - 1
2908            end
2909        end
2910        local content = concat(currentcontent,"",1,nofcurrentcontent)
2911        if content == "" then
2912            -- omit; when oldparagraph we could push, remove spaces, pop
2913        elseif somespace[content] and oldparagraph then
2914            -- omit; when oldparagraph we could push, remove spaces, pop
2915        else
2916            local olddepth, newdepth
2917            local list = taglist[currentattribute]
2918            if list then
2919                olddepth, newdepth = pushentry(list)
2920            end
2921            if tree then
2922                local td = tree.data
2923                local nd = #td
2924                td[nd+1] = { parnumber = oldparagraph or currentparagraph, content = content }
2925                if trace_export then
2926                    report_export("%w<!-- start content with length %s -->",currentdepth,utflen(content))
2927                    report_export("%w%s",currentdepth,(gsub(content,"\n","\\n")))
2928                    report_export("%w<!-- stop content -->",currentdepth)
2929                end
2930                if olddepth then
2931                    for i=newdepth-1,olddepth,-1 do
2932                        pop()
2933                    end
2934                end
2935            end
2936        end
2937        nofcurrentcontent = 0
2938    end
2939    if oldparagraph then
2940        pushentry(makebreaklist(currentnesting))
2941        if trace_export then
2942            report_export("%w<!-- break added between paragraph %a and %a -->",currentdepth,oldparagraph,newparagraph)
2943        end
2944    end
2945end
2946
2947local function finishexport()
2948    if trace_export then
2949        report_export("%w<!-- start finalizing -->",currentdepth)
2950    end
2951    if nofcurrentcontent > 0 then
2952        if somespace[currentcontent[nofcurrentcontent]] then
2953            if trace_export then
2954                report_export("%w<!-- removing space -->",currentdepth)
2955            end
2956            nofcurrentcontent = nofcurrentcontent - 1
2957        end
2958        pushcontent()
2959    end
2960    for i=currentdepth,1,-1 do
2961        pop()
2962    end
2963    currentcontent = { } -- we're nice and do a cleanup
2964    if trace_export then
2965        report_export("%w<!-- stop finalizing -->",currentdepth)
2966    end
2967end
2968
2969-- inserts ?
2970
2971local collectresults  do -- too many locals otherwise
2972
2973    local nodecodes        = nodes.nodecodes
2974    local gluecodes        = nodes.gluecodes
2975    local listcodes        = nodes.listcodes
2976    local whatsitcodes     = nodes.whatsitcodes
2977
2978    local subtypes         = nodes.subtypes
2979
2980    local hlist_code       = nodecodes.hlist
2981    local vlist_code       = nodecodes.vlist
2982    local glyph_code       = nodecodes.glyph
2983    local glue_code        = nodecodes.glue
2984    local kern_code        = nodecodes.kern
2985    local disc_code        = nodecodes.disc
2986    local whatsit_code     = nodecodes.whatsit
2987    local par_code         = nodecodes.par
2988
2989    local userskip_code    = gluecodes.userskip
2990    local rightskip_code   = gluecodes.rightskip
2991    local parfillskip_code = gluecodes.parfillskip
2992    local spaceskip_code   = gluecodes.spaceskip
2993    local xspaceskip_code  = gluecodes.xspaceskip
2994
2995    local linelist_code    = listcodes.line
2996
2997    local userdefinedwhatsit_code  = whatsitcodes.userdefined
2998
2999    local privateattribute = attributes.private
3000    local a_image          = privateattribute('image')
3001    local a_reference      = privateattribute('reference')
3002    local a_destination    = privateattribute('destination')
3003    local a_characters     = privateattribute('characters')
3004    local a_exportstatus   = privateattribute('exportstatus')
3005    local a_tagged         = privateattribute('tagged')
3006    local a_taggedpar      = privateattribute("taggedpar")
3007    local a_textblock      = privateattribute("textblock")
3008
3009    local inline_mark      = nodes.pool.userids["margins.inline"]
3010
3011    local nuts             = nodes.nuts
3012
3013    local getnext          = nuts.getnext
3014    local getdisc          = nuts.getdisc
3015    local getlist          = nuts.getlist
3016    local getid            = nuts.getid
3017    local getattr          = nuts.getattr
3018    local setattr          = nuts.setattr -- maybe use properties
3019    local isglyph          = nuts.isglyph
3020    local getkern          = nuts.getkern
3021    local getwidth         = nuts.getwidth
3022
3023    local startofpar       = nuts.startofpar
3024
3025    local nexthlist        = nuts.traversers.hlist
3026    local nextnode         = nuts.traversers.node
3027
3028    local function addtomaybe(maybewrong,c,case)
3029        if trace_export then
3030            report_export("%w<!-- possible paragraph mixup at %C case %i -->",currentdepth,c,case)
3031        else
3032            local s = formatters["%C"](c)
3033            if maybewrong then
3034                maybewrong[#maybewrong+1] = s
3035            else
3036                maybewrong = { s }
3037            end
3038            return maybewrong
3039        end
3040    end
3041
3042    local function showmaybe(maybewrong)
3043        if not trace_export then
3044            report_export("fuzzy paragraph: % t",maybewrong)
3045        end
3046    end
3047
3048    local function showdetail(n,id,subtype)
3049        local a = getattr(n,a_tagged)
3050        local t = taglist[a]
3051        local c = nodecodes[id]
3052        local s = subtypes[id][subtype]
3053        if a and t then
3054            report_export("node %a, subtype %a, tag %a, element %a, tree '% t'",c,s,a,t.tagname,t.taglist)
3055        else
3056            report_export("node %a, subtype %a, untagged",c,s)
3057        end
3058    end
3059
3060    local function collectresults(head,list,pat,pap) -- is last used (we also have currentattribute)
3061        local p
3062        local paragraph
3063        local maybewrong
3064        local pid
3065        for n, id, subtype in nextnode, head do
3066            if trace_details then
3067                showdetail(n,id,subtype)
3068            end
3069            if id == glyph_code then
3070                local c, f = isglyph(n)
3071                local at   = getattr(n,a_tagged) or pat
3072                if not at then
3073                 -- we need to tag the pagebody stuff as being valid skippable
3074                 --
3075                 -- report_export("skipping character: %C (no attribute)",n.char)
3076                else
3077                    if last ~= at then
3078                        local tl = taglist[at]
3079                        local ap = getattr(n,a_taggedpar) or pap
3080                        if paragraph and (not ap or ap < paragraph) then
3081                            maybewrong = addtomaybe(maybewrong,c,1)
3082                        end
3083                        pushcontent()
3084                        currentnesting   = tl
3085                        currentparagraph = ap
3086                        currentattribute = at
3087                        last = at
3088                        pushentry(currentnesting)
3089                        if trace_export then
3090                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
3091                        end
3092                        -- We need to intercept this here; maybe I will also move this
3093                        -- to a regular setter at the tex end.
3094                        local r = getattr(n,a_reference)
3095                        if r then
3096                            local t = tl.taglist
3097                            referencehash[t[#t]] = r -- fulltag
3098                        end
3099                        local d = getattr(n,a_destination)
3100                        if d then
3101                            local t = tl.taglist
3102                            destinationhash[t[#t]] = d -- fulltag
3103                        end
3104                        --
3105                    elseif last then
3106                        -- we can consider tagging the pars (lines) in the parbuilder but then we loose some
3107                        -- information unless we inject a special node (but even then we can run into nesting
3108                        -- issues)
3109                        local ap = getattr(n,a_taggedpar) or pap
3110                        if ap ~= currentparagraph then
3111                            pushcontent(currentparagraph,ap)
3112                            pushentry(currentnesting)
3113                            currentattribute = last
3114                            currentparagraph = ap
3115                        end
3116                        if paragraph and (not ap or ap < paragraph) then
3117                            maybewrong = addtomaybe(maybewrong,c,2)
3118                        end
3119                        if trace_export then
3120                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,last)
3121                        end
3122                    else
3123                        if trace_export then
3124                            report_export("%w<!-- processing glyph %C tagged %a -->",currentdepth,c,at)
3125                        end
3126                    end
3127                    local s = getattr(n,a_exportstatus)
3128                    if s then
3129                        c = s
3130                    end
3131                    if c == 0 then
3132                        if trace_export then
3133                            report_export("%w<!-- skipping last glyph -->",currentdepth)
3134                        end
3135                    elseif c == 0x20 then
3136                        local a = getattr(n,a_characters)
3137                        nofcurrentcontent = nofcurrentcontent + 1
3138                        if a then
3139                            if trace_export then
3140                                report_export("%w<!-- turning last space into special space %U -->",currentdepth,a)
3141                            end
3142                            currentcontent[nofcurrentcontent] = specialspaces[a] -- special space
3143                        else
3144                            currentcontent[nofcurrentcontent] = " "
3145                        end
3146                    else
3147                        local fc = fontchar[f]
3148                        if fc then
3149                            fc = fc and fc[c]
3150                            if fc then
3151                                local u = fc.unicode
3152                                if not u then
3153                                    nofcurrentcontent = nofcurrentcontent + 1
3154                                    currentcontent[nofcurrentcontent] = utfchar(c)
3155                                elseif type(u) == "table" then
3156                                    for i=1,#u do
3157                                        nofcurrentcontent = nofcurrentcontent + 1
3158                                        currentcontent[nofcurrentcontent] = utfchar(u[i])
3159                                    end
3160                                else
3161                                    nofcurrentcontent = nofcurrentcontent + 1
3162                                    currentcontent[nofcurrentcontent] = utfchar(u)
3163                                end
3164                            elseif c > 0 then
3165                                nofcurrentcontent = nofcurrentcontent + 1
3166                                currentcontent[nofcurrentcontent] = utfchar(c)
3167                            else
3168                                -- we can have -1 as side effect of an explicit hyphen (unless we expand)
3169                            end
3170                        elseif c > 0 then
3171                            nofcurrentcontent = nofcurrentcontent + 1
3172                            currentcontent[nofcurrentcontent] = utfchar(c)
3173                        else
3174                            -- we can have -1 as side effect of an explicit hyphen (unless we expand)
3175                        end
3176                    end
3177                end
3178            elseif id == glue_code then
3179                -- we need to distinguish between hskips and vskips
3180                local ca = getattr(n,a_characters)
3181                if ca == 0 then
3182                    -- skip this one ... already converted special character (node-acc)
3183                elseif ca then
3184                    local a = getattr(n,a_tagged) or pat
3185                    if a then
3186                        local c = specialspaces[ca]
3187                        if last ~= a then
3188                            local tl = taglist[a]
3189                            if trace_export then
3190                                report_export("%w<!-- processing space glyph %U tagged %a case 1 -->",currentdepth,ca,a)
3191                            end
3192                            pushcontent()
3193                            currentnesting = tl
3194                            currentparagraph = getattr(n,a_taggedpar) or pap
3195                            currentattribute = a
3196                            last = a
3197                            pushentry(currentnesting)
3198                            -- no reference check (see above)
3199                        elseif last then
3200                            local ap = getattr(n,a_taggedpar) or pap
3201                            if ap ~= currentparagraph then
3202                                pushcontent(currentparagraph,ap)
3203                                pushentry(currentnesting)
3204                                currentattribute = last
3205                                currentparagraph = ap
3206                            end
3207                            if trace_export then
3208                                report_export("%w<!-- processing space glyph %U tagged %a case 2 -->",currentdepth,ca,last)
3209                            end
3210                        end
3211                        -- if somespace[currentcontent[nofcurrentcontent]] then
3212                        --     if trace_export then
3213                        --         report_export("%w<!-- removing space -->",currentdepth)
3214                        --     end
3215                        --     nofcurrentcontent = nofcurrentcontent - 1
3216                        -- end
3217                        nofcurrentcontent = nofcurrentcontent + 1
3218                        currentcontent[nofcurrentcontent] = c
3219                    end
3220                elseif subtype == userskip_code then
3221                    if getwidth(n) > threshold then
3222                        if last and not somespace[currentcontent[nofcurrentcontent]] then
3223                            local a = getattr(n,a_tagged) or pat
3224                            if a == last then
3225                                if trace_export then
3226                                    report_export("%w<!-- injecting spacing 5a -->",currentdepth)
3227                                end
3228                                nofcurrentcontent = nofcurrentcontent + 1
3229                                currentcontent[nofcurrentcontent] = " "
3230                            elseif a then
3231                                -- e.g LOGO<space>LOGO
3232                                if trace_export then
3233                                    report_export("%w<!-- processing glue > threshold tagged %s becomes %s -->",currentdepth,last,a)
3234                                end
3235                                pushcontent()
3236                                if trace_export then
3237                                    report_export("%w<!-- injecting spacing 5b -->",currentdepth)
3238                                end
3239                                last = a
3240                                nofcurrentcontent = nofcurrentcontent + 1
3241                                currentcontent[nofcurrentcontent] = " "
3242                                currentnesting = taglist[last]
3243                                pushentry(currentnesting)
3244                                currentattribute = last
3245                            end
3246                        end
3247                    end
3248                elseif subtype == spaceskip_code or subtype == xspaceskip_code then
3249                    if not somespace[currentcontent[nofcurrentcontent]] then
3250                        local a = getattr(n,a_tagged) or pat
3251                        if a == last then
3252                            if trace_export then
3253                                report_export("%w<!-- injecting spacing 7 (stay in element) -->",currentdepth)
3254                            end
3255                            nofcurrentcontent = nofcurrentcontent + 1
3256                            currentcontent[nofcurrentcontent] = " "
3257                        else
3258                            if trace_export then
3259                                report_export("%w<!-- injecting spacing 7 (end of element) -->",currentdepth)
3260                            end
3261                            last = a
3262                            pushcontent()
3263                            nofcurrentcontent = nofcurrentcontent + 1
3264                            currentcontent[nofcurrentcontent] = " "
3265                            currentnesting = taglist[last]
3266                            pushentry(currentnesting)
3267                            currentattribute = last
3268                        end
3269                    end
3270                elseif subtype == rightskip_code then
3271                    -- a line
3272                    if nofcurrentcontent > 0 then
3273                        local r = currentcontent[nofcurrentcontent]
3274                        if r == hyphen then
3275                            if not keephyphens then
3276                                nofcurrentcontent = nofcurrentcontent - 1
3277                            end
3278                        elseif pid == disc_code then
3279                            -- go on .. tricky: we should mark the glyhs as coming from a disc
3280                        elseif not somespace[r] then
3281                            local a = getattr(n,a_tagged) or pat
3282                            if a == last then
3283                                if trace_export then
3284                                    report_export("%w<!-- injecting spacing 1 (end of line, stay in element) -->",currentdepth)
3285                                end
3286                                nofcurrentcontent = nofcurrentcontent + 1
3287                                currentcontent[nofcurrentcontent] = " "
3288                            else
3289                                if trace_export then
3290                                    report_export("%w<!-- injecting spacing 1 (end of line, end of element) -->",currentdepth)
3291                                end
3292                                last = a
3293                                pushcontent()
3294                                nofcurrentcontent = nofcurrentcontent + 1
3295                                currentcontent[nofcurrentcontent] = " "
3296                                currentnesting = taglist[last]
3297                                pushentry(currentnesting)
3298                                currentattribute = last
3299                            end
3300                        end
3301                    end
3302                elseif subtype == parfillskip_code then
3303                    -- deal with paragraph endings (crossings) elsewhere and we quit here
3304                    -- as we don't want the rightskip space addition
3305                    if maybewrong then
3306                        showmaybe(maybewrong)
3307                    end
3308                    return
3309                end
3310            elseif id == hlist_code or id == vlist_code then
3311                local ai = getattr(n,a_image)
3312                if ai then
3313                    local at = getattr(n,a_tagged) or pat
3314                    if nofcurrentcontent > 0 then
3315                        pushcontent()
3316                        pushentry(currentnesting) -- ??
3317                    end
3318                    pushentry(taglist[at]) -- has an index, todo: flag empty element
3319                    if trace_export then
3320                        report_export("%w<!-- processing image tagged %a",currentdepth,last)
3321                    end
3322                    last = nil
3323                    currentparagraph = nil
3324                else
3325                    -- we need to determine an end-of-line
3326                    local list = getlist(n)
3327                    if list then
3328                        -- todo: no par checking needed in math
3329                        local at = getattr(n,a_tagged) or pat
3330                        collectresults(list,n,at)
3331                    end
3332                end
3333            elseif id == kern_code then
3334                local kern = getkern(n)
3335                if kern > 0 then
3336local a = getattr(n,a_tagged) or pat
3337local t = taglist[a]
3338if not t or t.tagname ~= "ignore" then -- maybe earlier on top)
3339                    local limit = threshold
3340                    if p then
3341                        local c, f = isglyph(p)
3342                        if c then
3343                            limit = fontquads[f] / 4
3344                        end
3345                    end
3346                    if kern > limit then
3347                        if last and not somespace[currentcontent[nofcurrentcontent]] then
3348--                             local a = getattr(n,a_tagged) or pat
3349                            if a == last then
3350                                if not somespace[currentcontent[nofcurrentcontent]] then
3351                                    if trace_export then
3352                                        report_export("%w<!-- injecting spacing 8 (kern %p) -->",currentdepth,kern)
3353                                    end
3354                                    nofcurrentcontent = nofcurrentcontent + 1
3355                                    currentcontent[nofcurrentcontent] = " "
3356                                end
3357                            elseif a then
3358                                -- e.g LOGO<space>LOGO
3359                                if trace_export then
3360                                    report_export("%w<!-- processing kern, threshold %p, tag %s => %s -->",currentdepth,limit,last,a)
3361                                end
3362                                last = a
3363                                pushcontent()
3364                                if trace_export then
3365                                    report_export("%w<!-- injecting spacing 9 (kern %p) -->",currentdepth,kern)
3366                                end
3367                                nofcurrentcontent = nofcurrentcontent + 1
3368                                currentcontent[nofcurrentcontent] = " "
3369--                                 currentnesting = taglist[last]
3370currentnesting = t
3371                                pushentry(currentnesting)
3372                                currentattribute = last
3373                            end
3374                        end
3375                    end
3376end
3377                end
3378            elseif id == whatsit_code then
3379                if subtype == userdefinedwhatsit_code then
3380                    -- similar to images, see above
3381                    local at = getattr(n,a_tagged)
3382                    if nofcurrentcontent > 0 then
3383                        pushcontent()
3384                        pushentry(currentnesting) -- ??
3385                    end
3386                    pushentry(taglist[at])
3387                    if trace_export then
3388                        report_export("%w<!-- processing anchor tagged %a",currentdepth,last)
3389                    end
3390                    last = nil
3391                    currentparagraph = nil
3392                end
3393            elseif not paragraph and id == par_code and startofpar(n) then
3394                paragraph = getattr(n,a_taggedpar)
3395            elseif id == disc_code then
3396                -- very unlikely because we stripped them
3397                local pre, post, replace = getdisc(n)
3398                if keephyphens then
3399                    if pre and not getnext(pre) and isglyph(pre) == 0xAD then -- hyphencode then
3400                        nofcurrentcontent = nofcurrentcontent + 1
3401                        currentcontent[nofcurrentcontent] = hyphen
3402                    end
3403                end
3404                if replace then
3405                    collectresults(replace,nil)
3406                end
3407            end
3408            p   = n
3409            pid = id
3410        end
3411        if maybewrong then
3412            showmaybe(maybewrong)
3413        end
3414    end
3415
3416    function nodes.handlers.export(head) -- hooks into the page builder
3417        starttiming(treehash)
3418        if trace_export then
3419            report_export("%w<!-- start flushing page -->",currentdepth)
3420        end
3421     -- continueexport()
3422        restart = true
3423        collectresults(head)
3424        if trace_export then
3425            report_export("%w<!-- stop flushing page -->",currentdepth)
3426        end
3427        stoptiming(treehash)
3428        return head
3429    end
3430
3431    function nodes.handlers.checkparcounter(p)
3432        setattr(p,a_taggedpar,texgetcount("tagparcounter") + 1)
3433        return p
3434    end
3435
3436    function builders.paragraphs.tag(head)
3437        noftextblocks = noftextblocks + 1
3438        for n, subtype in nexthlist, head do
3439            if subtype == linelist_code then
3440                setattr(n,a_textblock,noftextblocks)
3441--             elseif subtype == glue_code or subtype == kern_code then -- weird, no list
3442--                 setattr(n,a_textblock,0)
3443            end
3444        end
3445        return false
3446    end
3447
3448end
3449
3450do
3451
3452    local xmlcollected  = xml.collected
3453    local xmlsetcomment = xml.setcomment
3454
3455local xmlpreamble = [[
3456<?xml version="1.0" encoding="UTF-8" standalone="%standalone%" ?>
3457
3458<!--
3459
3460    input filename   : %filename%
3461    processing date  : %date%
3462    context version  : %contextversion%
3463    exporter version : %exportversion%
3464
3465-->
3466
3467]]
3468
3469    local flushtree = wrapups.flushtree
3470
3471    local function wholepreamble(standalone)
3472        return replacetemplate(xmlpreamble, {
3473            standalone     = standalone and "yes" or "no",
3474            filename       = tex.jobname,
3475            date           = included.date and os.fulltime(),
3476            contextversion = environment.version,
3477            exportversion  = exportversion,
3478        })
3479    end
3480
3481
3482local csspreamble = [[
3483<?xml-stylesheet type="text/css" href="%filename%" ?>
3484]]
3485
3486local cssheadlink = [[
3487<link type="text/css" rel="stylesheet" href="%filename%" />
3488]]
3489
3490    local function allusedstylesheets(cssfiles,files,path)
3491        local done   = { }
3492        local result = { }
3493        local extras = { }
3494        for i=1,#cssfiles do
3495            local cssfile = cssfiles[i]
3496            if type(cssfile) ~= "string" then
3497                -- error
3498            elseif cssfile == "export-example.css" then
3499                -- ignore
3500            elseif not done[cssfile] then
3501                cssfile = joinfile(path,basename(cssfile))
3502                report_export("adding css reference '%s'",cssfile)
3503                files[#files+1]   = cssfile
3504                result[#result+1] = replacetemplate(csspreamble, { filename = cssfile })
3505                extras[#extras+1] = replacetemplate(cssheadlink, { filename = cssfile })
3506                done[cssfile]     = true
3507            end
3508        end
3509        return concat(result), concat(extras)
3510    end
3511
3512local elementtemplate = [[
3513/* element="%element%" detail="%detail%" chain="%chain%" */
3514
3515%element%,
3516%namespace%div.%element% {
3517    display: %display% ;
3518}]]
3519
3520local detailtemplate = [[
3521/* element="%element%" detail="%detail%" chain="%chain%" */
3522
3523%element%[detail=%detail%],
3524%namespace%div.%element%.%detail% {
3525    display: %display% ;
3526}]]
3527
3528-- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd" >
3529
3530local htmltemplate = [[
3531%preamble%
3532
3533<html xmlns="http://www.w3.org/1999/xhtml" xmlns:math="http://www.w3.org/1998/Math/MathML">
3534
3535    <head>
3536
3537        <meta charset="utf-8"/>
3538
3539        <title>%title%</title>
3540
3541%style%
3542
3543    </head>
3544    <body>
3545        <div class="document" xmlns="http://www.pragma-ade.com/context/export">
3546
3547<div class="warning">Rendering can be suboptimal because there is no default/fallback css loaded.</div>
3548
3549%body%
3550
3551        </div>
3552    </body>
3553</html>
3554]]
3555
3556    local displaymapping = {
3557        inline  = "inline",
3558        display = "block",
3559        mixed   = "inline",
3560    }
3561
3562    local function allusedelements(filename)
3563        local result = { replacetemplate(namespacetemplate, {
3564            what            = "template",
3565            filename        = filename,
3566            namespace       = contextns,
3567         -- cssnamespaceurl = usecssnamespace and cssnamespaceurl or "",
3568            cssnamespaceurl = cssnamespaceurl,
3569        },false,true) }
3570        for element, details in sortedhash(used) do
3571            if namespaces[element] then
3572                -- skip math
3573            else
3574                for detail, what in sortedhash(details) do
3575                    local nature  = what[1] or "display"
3576                    local chain   = what[2]
3577                    local display = displaymapping[nature] or "block"
3578                    if detail == "" then
3579                        result[#result+1] = replacetemplate(elementtemplate, {
3580                            element   = element,
3581                            display   = display,
3582                            chain     = chain,
3583                            namespace = usecssnamespace and namespace or "",
3584                        })
3585                    else
3586                        result[#result+1] = replacetemplate(detailtemplate, {
3587                            element   = element,
3588                            display   = display,
3589                            detail    = detail,
3590                            chain     = chain,
3591                            namespace = usecssnamespace and cssnamespace or "",
3592                        })
3593                    end
3594                end
3595            end
3596        end
3597        return concat(result,"\n\n")
3598    end
3599
3600    local function allcontent(tree,embed)
3601        local result   = { }
3602        flushtree(result,tree.data,"display") -- we need to collect images
3603        result = concat(result)
3604        -- no need to lpeg .. fast enough
3605        result = gsub(result,"\n *\n","\n")
3606        result = gsub(result,"\n +([^< ])","\n%1")
3607        return result
3608    end
3609
3610    -- local xhtmlpreamble = [[
3611    --     <!DOCTYPE html PUBLIC
3612    --         "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
3613    --         "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"
3614    --     >
3615    -- ]]
3616
3617    local function cleanxhtmltree(xmltree)
3618        if xmltree then
3619            local implicits = { }
3620            local explicits = { }
3621            local overloads = { }
3622            for e in xmlcollected(xmltree,"*") do
3623                local at = e.at
3624                if at then
3625                    local explicit = at.explicit
3626                    local implicit = at.implicit
3627                    if explicit then
3628                        if not explicits[explicit] then
3629                            explicits[explicit] = true
3630                            at.id = explicit
3631                            if implicit then
3632                                overloads[implicit] = explicit
3633                            end
3634                        end
3635                    else
3636                        if implicit and not implicits[implicit] then
3637                            implicits[implicit] = true
3638                            at.id = "aut:" .. implicit
3639                        end
3640                    end
3641                end
3642            end
3643            for e in xmlcollected(xmltree,"*") do
3644                local at = e.at
3645                if at then
3646                    local internal = at.internal
3647                    local location = at.location
3648                    if internal then
3649                        if location then
3650                            local explicit = overloads[location]
3651                            if explicit then
3652                                at.href = "#" .. explicit
3653                            else
3654                                at.href = "#aut:" .. internal
3655                            end
3656                        else
3657                            at.href = "#aut:" .. internal
3658                        end
3659                    else
3660                        if location then
3661                            at.href = "#" .. location
3662                        else
3663                            local url = at.url
3664                            if url then
3665                                at.href = url
3666                            else
3667                                local file = at.file
3668                                if file then
3669                                    at.href = file
3670                                end
3671                            end
3672                        end
3673                    end
3674                end
3675            end
3676            return xmltree
3677        else
3678            return xml.convert('<?xml version="1.0"?>\n<error>invalid xhtml tree</error>')
3679        end
3680    end
3681
3682    -- maybe the reverse: be explicit about what is permitted
3683
3684    local private = {
3685        destination = true,
3686        prefix      = true,
3687        reference   = true,
3688        --
3689        id          = true,
3690        href        = true,
3691        --
3692        implicit    = true,
3693        explicit    = true,
3694        --
3695        url         = true,
3696        file        = true,
3697        internal    = true,
3698        location    = true,
3699        --
3700        name        = true, -- image name
3701        used        = true, -- image name
3702        page        = true, -- image name
3703        width       = true,
3704        height      = true,
3705        --
3706    }
3707
3708    local addclicks   = true
3709    local f_onclick   = formatters[ [[location.href='%s']] ]
3710    local f_onclick   = formatters[ [[location.href='%s']] ]
3711
3712    local p_cleanid   = lpeg.replacer { [":"] = "-" }
3713    local p_cleanhref = lpeg.Cs(lpeg.P("#") * p_cleanid)
3714
3715    local p_splitter  = lpeg.Ct ( (
3716        lpeg.Carg(1) * lpeg.C((1-lpeg.P(" "))^1) / function(d,s) if not d[s] then d[s] = true return s end end
3717      * lpeg.P(" ")^0 )^1 )
3718
3719
3720    local classes = table.setmetatableindex(function(t,k)
3721        local v = concat(lpegmatch(p_splitter,k,1,{})," ")
3722        t[k] = v
3723        return v
3724    end)
3725
3726    local function makeclass(tg,at)
3727        local detail     = at.detail
3728        local chain      = at.chain
3729        local extra      = nil
3730        local classes    = { }
3731        local nofclasses = 0
3732        at.detail        = nil
3733        at.chain         = nil
3734        for k, v in next, at do
3735            if not private[k] then
3736                nofclasses = nofclasses + 1
3737                classes[nofclasses] = k .. "-" .. v
3738            end
3739        end
3740        if detail and detail ~= "" then
3741            if chain and chain ~= "" then
3742                if chain ~= detail then
3743                    extra = classes[tg .. " " .. chain .. " " .. detail]
3744                elseif tg ~= detail then
3745                    extra = detail
3746                end
3747            elseif tg ~= detail then
3748                extra = detail
3749            end
3750        elseif chain and chain ~= "" then
3751            if tg ~= chain then
3752                extra = chain
3753            end
3754        end
3755        -- in this order
3756        if nofclasses > 0 then
3757            sort(classes)
3758            classes = concat(classes," ")
3759            if extra then
3760                return tg .. " " .. extra .. " " .. classes
3761            else
3762                return tg .. " " .. classes
3763            end
3764        else
3765            if extra then
3766                return tg .. " " .. extra
3767            else
3768                return tg
3769            end
3770        end
3771    end
3772
3773    -- Some elements are not supported (well) in css so we need to retain them. For
3774    -- instance, tablecells have no colspan so basically that renders css table div
3775    -- elements quite useless. A side effect is that we nwo can have conflicts when
3776    -- we mix in with other html (as there is no reset). Of course, when it eventually
3777    -- gets added, there is a change then that those not using the div abstraction
3778    -- will be rediculed.
3779    --
3780    -- a table tr td th thead tbody tfoot
3781    --
3782
3783    local crappycss = {
3784        table     = "table", tabulate      = "table",
3785        tablehead = "thead", tabulatehead  = "thead",
3786        tablebody = "tbody", tabulatebody  = "tbody",
3787        tablefoot = "tfoot", tabulatefoot  = "tfoot",
3788        tablerow  = "tr",    tabulaterow   = "tr",
3789        tablecell = "td",    tabulatecell  = "td",
3790    }
3791
3792    local cssmapping = false
3793
3794    directives.register("export.nativetags", function(v)
3795        cssmapping = v and crappycss or false
3796    end)
3797
3798    local function remap(specification,source,target)
3799        local comment = nil -- share comments
3800        for c in xmlcollected(source,"*") do
3801            if not c.special then
3802                local tg = c.tg
3803                local ns = c.ns
3804                if ns == "m" then
3805                    if false then -- yes or no
3806                        c.ns = ""
3807                        c.at["xmlns:m"] = nil
3808                    end
3809             -- elseif tg == "a" then
3810             --     c.ns = ""
3811                else
3812                    local dt = c.dt
3813                    local nt = #dt
3814                    if nt == 0 or (nt == 1 and dt[1] == "") then
3815                        if comment then
3816                            c.dt = comment
3817                        else
3818                            xmlsetcomment(c,"empty")
3819                            comment = c.dt
3820                        end
3821                    end
3822                    local at    = c.at
3823                    local class = nil
3824                    local label = nil
3825                    if tg == "document" then
3826                        at.href   = nil
3827                        at.detail = nil
3828                        at.chain  = nil
3829                    elseif tg == "metavariable" then
3830                        label = at.name
3831                        at.detail = "metaname-" .. label
3832                        class = makeclass(tg,at)
3833                    else
3834                        class = makeclass(tg,at)
3835                    end
3836                    local id   = at.id
3837                    local href = at.href
3838                    local attr = nil
3839                    if id then
3840                        id = lpegmatch(p_cleanid, id) or id
3841                        if href then
3842                            href = lpegmatch(p_cleanhref,href) or href
3843                            attr = {
3844                                class   = class,
3845                                id      = id,
3846                                href    = href,
3847                                onclick = addclicks and f_onclick(href) or nil,
3848                            }
3849                        else
3850                            attr = {
3851                                class = class,
3852                                id    = id,
3853                            }
3854                        end
3855                    else
3856                        if href then
3857                            href = lpegmatch(p_cleanhref,href) or href
3858                            attr = {
3859                                class   = class,
3860                                href    = href,
3861                                onclick = addclicks and f_onclick(href) or nil,
3862                            }
3863                        else
3864                            attr = {
3865                                class = class,
3866                            }
3867                        end
3868                    end
3869                    c.at = attr
3870                    if label then
3871                        attr.label = label
3872                    end
3873                    c.tg = cssmapping and cssmapping[tg] or "div"
3874                end
3875            end
3876        end
3877    end
3878
3879 -- local cssfile = nil  directives.register("backend.export.css", function(v) cssfile = v end)
3880
3881    local embedfile = false  directives.register("export.embed",function(v) embedfile = v end)
3882
3883    function structurestags.finishexport()
3884
3885        if exporting then
3886            exporting = false
3887        else
3888            return
3889        end
3890
3891        local onlyxml = finetuning.export == v_xml
3892
3893        starttiming(treehash)
3894        --
3895        finishexport()
3896        --
3897        report_export("")
3898        if onlyxml then
3899            report_export("exporting xml, no other files")
3900        else
3901            report_export("exporting xml, xhtml, html and css files")
3902        end
3903        report_export("")
3904        --
3905        wrapups.fixtree(tree)
3906        wrapups.collapsetree(tree)
3907        wrapups.indextree(tree)
3908        wrapups.checktree(tree)
3909        wrapups.breaktree(tree)
3910        wrapups.finalizetree(tree)
3911        --
3912        wrapups.hashlistdata()
3913        --
3914        local askedname = finetuning.file
3915        --
3916        -- we use a dedicated subpath:
3917        --
3918        -- ./jobname-export
3919        -- ./jobname-export/images
3920        -- ./jobname-export/styles
3921        -- ./jobname-export/styles
3922        -- ./jobname-export/jobname-export.xml
3923        -- ./jobname-export/jobname-export.xhtml
3924        -- ./jobname-export/jobname-export.html
3925        -- ./jobname-export/jobname-specification.lua
3926        -- ./jobname-export/styles/jobname-defaults.css
3927        -- ./jobname-export/styles/jobname-styles.css
3928        -- ./jobname-export/styles/jobname-images.css
3929        -- ./jobname-export/styles/jobname-templates.css
3930
3931        if type(askedname) ~= "string" or askedname == "" then
3932            askedname = tex.jobname
3933        end
3934
3935        local usedname  = nameonly(askedname)
3936        local basepath  = usedname .. "-export"
3937        local imagepath = joinfile(basepath,"images")
3938        local stylepath = joinfile(basepath,"styles")
3939
3940        local function validpath(what,pathname)
3941            if lfs.isdir(pathname) then
3942                report_export("using existing %s path %a",what,pathname)
3943                return pathname
3944            end
3945            lfs.mkdir(pathname)
3946            if lfs.isdir(pathname) then
3947                report_export("using cretated %s path %a",what,basepath)
3948                return pathname
3949            else
3950                report_export("unable to create %s path %a",what,basepath)
3951                return false
3952            end
3953        end
3954
3955        if not (validpath("export",basepath) and validpath("images",imagepath) and validpath("styles",stylepath)) then
3956            return
3957        end
3958
3959        -- we're now on the dedicated export subpath so we can't clash names
3960        --
3961        -- a xhtml suffix no longer seems to be work well with browsers
3962
3963        local xmlfilebase           = addsuffix(usedname .. "-raw","xml"  )
3964        local xhtmlfilebase         = addsuffix(usedname .. "-tag","xhtml")
3965        local htmlfilebase          = addsuffix(usedname .. "-div","html")
3966        local specificationfilebase = addsuffix(usedname .. "-pub","lua"  )
3967
3968        local xmlfilename           = joinfile(basepath, xmlfilebase          )
3969        local xhtmlfilename         = joinfile(basepath, xhtmlfilebase        )
3970        local htmlfilename          = joinfile(basepath, htmlfilebase         )
3971        local specificationfilename = joinfile(basepath, specificationfilebase)
3972        --
3973        local defaultfilebase       = addsuffix(usedname .. "-defaults", "css")
3974        local imagefilebase         = addsuffix(usedname .. "-images",   "css")
3975        local stylefilebase         = addsuffix(usedname .. "-styles",   "css")
3976        local templatefilebase      = addsuffix(usedname .. "-templates","css")
3977        --
3978        local defaultfilename       = joinfile(stylepath,defaultfilebase )
3979        local imagefilename         = joinfile(stylepath,imagefilebase   )
3980        local stylefilename         = joinfile(stylepath,stylefilebase   )
3981        local templatefilename      = joinfile(stylepath,templatefilebase)
3982
3983        local cssfile               = finetuning.cssfile
3984
3985        -- we keep track of all used files
3986
3987        local files = {
3988        }
3989
3990        -- we always load the defaults and optionally extra css files; we also copy the example
3991        -- css file so that we always have the latest version
3992
3993        local cssfiles = {
3994            defaultfilebase,
3995            imagefilebase,
3996            stylefilebase,
3997        }
3998
3999        local cssextra = cssfile and table.unique(settings_to_array(cssfile)) or { }
4000
4001        -- at this point we're ready for the content; the collector also does some
4002        -- housekeeping and data collecting; at this point we still have an xml
4003        -- representation that uses verbose element names and carries information in
4004        -- attributes
4005
4006        local data = tree.data
4007        for i=1,#data do
4008            if data[i].tg ~= "document" then
4009                data[i] = { }
4010            end
4011        end
4012
4013        local result = allcontent(tree,embedmath) -- embedfile is for testing
4014
4015        -- ugly but so be it:
4016
4017        local extradata = structures.tags.getextradata()
4018        if extradata then
4019            local t = { "" }
4020            t[#t+1] = "<extradata>"
4021            for name, action in sortedhash(extradata) do
4022                t[#t+1] = action()
4023            end
4024            t[#t+1] = "</extradata>"
4025            t[#t+1] = "</document>"
4026            -- we use a function because otherwise we can have a bad capture index
4027            result = gsub(result,"</document>",function()
4028                return concat(t,"\n")
4029            end)
4030        end
4031
4032        -- done with ugly
4033
4034        if onlyxml then
4035
4036            os.remove(defaultfilename)
4037            os.remove(imagefilename)
4038            os.remove(stylefilename)
4039            os.remove(templatefilename)
4040
4041            for i=1,#cssextra do
4042                os.remove(joinfile(stylepath,basename(source)))
4043            end
4044
4045         -- os.remove(xmlfilename)
4046
4047            os.remove(imagefilename)
4048            os.remove(stylefilename)
4049            os.remove(templatefilename)
4050            os.remove(xhtmlfilename)
4051            os.remove(specificationfilename)
4052            os.remove(htmlfilename)
4053
4054            result = concat {
4055                wholepreamble(true),
4056                "<!-- This export file is used for filtering runtime only! -->\n",
4057                result,
4058            }
4059
4060            report_export("saving xml data in %a",xmlfilename)
4061            io.savedata(xmlfilename,result)
4062
4063            return
4064
4065        end
4066
4067        local examplefilename = resolvers.findfile("export-example.css")
4068        if examplefilename then
4069            local data = io.loaddata(examplefilename)
4070            if not data or data == "" then
4071                data = "/* missing css file */"
4072            elseif not usecssnamespace then
4073                data = gsub(data,cssnamespace,"")
4074            end
4075            io.savedata(defaultfilename,data)
4076        end
4077
4078        if cssfile then
4079            for i=1,#cssextra do
4080                local source = addsuffix(cssextra[i],"css")
4081                local target = joinfile(stylepath,basename(source))
4082                cssfiles[#cssfiles+1] = source
4083                if not lfs.isfile(source) then
4084                    source = joinfile("../",source)
4085                end
4086                if lfs.isfile(source) then
4087                    report_export("copying %s",source)
4088                    file.copy(source,target)
4089                end
4090            end
4091        end
4092
4093        local x_styles, h_styles = allusedstylesheets(cssfiles,files,"styles")
4094
4095        local attach = backends.nodeinjections.attachfile
4096
4097        if embedfile and attach then
4098            -- only for testing
4099            attach {
4100                data       = concat{ wholepreamble(true), result },
4101                name       = basename(xmlfilename),
4102                registered = "export",
4103                title      = "raw xml export",
4104                method     = v_hidden,
4105                mimetype   = "application/mathml+xml",
4106            }
4107        end
4108
4109        result = concat {
4110            wholepreamble(true),
4111            x_styles, -- adds to files
4112            result,
4113        }
4114
4115        cssfiles = table.unique(cssfiles)
4116
4117        -- we're now ready for saving the result in the xml file
4118
4119        report_export("saving xml data in %a",xmlfilename)
4120        io.savedata(xmlfilename,result)
4121
4122        report_export("saving css image definitions in %a",imagefilename)
4123        io.savedata(imagefilename,wrapups.allusedimages(usedname))
4124
4125        report_export("saving css style definitions in %a",stylefilename)
4126        io.savedata(stylefilename,wrapups.allusedstyles(usedname))
4127
4128        report_export("saving css template in %a",templatefilename)
4129        io.savedata(templatefilename,allusedelements(usedname))
4130
4131        -- additionally we save an xhtml file; for that we load the file as xml tree
4132
4133        report_export("saving xhtml variant in %a",xhtmlfilename)
4134
4135        local xmltree = cleanxhtmltree(xml.convert(result))
4136
4137     -- local xmltree = xml.convert(result)
4138     -- for c in xml.collected(xmltree,"m:mtext[lastindex()=1]/m:mrow") do
4139     --     print(c)
4140     -- end
4141     -- for c in xml.collected(xmltree,"mtext/mrow") do
4142     --     print(c)
4143     -- end
4144     -- local xmltree = cleanxhtmltree(xmltree)
4145
4146        xml.save(xmltree,xhtmlfilename)
4147
4148        -- now we save a specification file that can b eused for generating an epub file
4149
4150        -- looking at identity is somewhat redundant as we also inherit from interaction
4151        -- at the tex end
4152
4153        local identity  = interactions.general.getidentity()
4154        local metadata  = structures.tags.getmetadata()
4155
4156        local specification = {
4157            name       = usedname,
4158            identifier = os.uuid(),
4159            images     = wrapups.uniqueusedimages(),
4160            imagefile  = joinfile("styles",imagefilebase),
4161            imagepath  = "images",
4162            stylepath  = "styles",
4163            xmlfiles   = { xmlfilebase },
4164            xhtmlfiles = { xhtmlfilebase },
4165            htmlfiles  = { htmlfilebase },
4166            styles     = cssfiles,
4167            htmlroot   = htmlfilebase,
4168            language   = languagenames[texgetcount("mainlanguagenumber")],
4169            title      = validstring(finetuning.title) or validstring(identity.title),
4170            subtitle   = validstring(finetuning.subtitle) or validstring(identity.subtitle),
4171            author     = validstring(finetuning.author) or validstring(identity.author),
4172            firstpage  = validstring(finetuning.firstpage),
4173            lastpage   = validstring(finetuning.lastpage),
4174            metadata   = metadata,
4175        }
4176
4177        report_export("saving specification in %a",specificationfilename,specificationfilename)
4178
4179        xml.wipe(xmltree,"metadata") -- maybe optional
4180
4181        io.savedata(specificationfilename,table.serialize(specification,true))
4182
4183        -- the html export for epub is different in the sense that it uses div's instead of
4184        -- specific tags
4185
4186        report_export("saving div based alternative in %a",htmlfilename)
4187
4188        remap(specification,xmltree)
4189
4190        -- believe it or not, but a <title/> can prevent viewing in browsers
4191
4192        local title = specification.title
4193
4194        if not title or title == "" then
4195            title = metadata.title
4196            if not title or title == "" then
4197                title = usedname -- was: "no title"
4198            end
4199        end
4200
4201        local variables = {
4202            style    = h_styles,
4203            body     = xml.tostring(xml.first(xmltree,"/div")),
4204            preamble = wholepreamble(false),
4205            title    = title,
4206        }
4207
4208        io.savedata(htmlfilename,replacetemplate(htmltemplate,variables,"xml"))
4209
4210        -- finally we report how an epub file can be made (using the specification)
4211
4212        report_export("")
4213        report_export('create epub with: mtxrun --script epub --make "%s" [--purge --rename --svgmath]',usedname)
4214        report_export("")
4215
4216        stoptiming(treehash)
4217    end
4218
4219    local enableaction = nodes.tasks.enableaction
4220
4221    function structurestags.initializeexport()
4222        if not exporting then
4223            report_export("enabling export to xml")
4224            enableaction("shipouts","nodes.handlers.export")
4225            enableaction("shipouts","nodes.handlers.accessibility")
4226            enableaction("math",    "noads.handlers.tags")
4227            enableaction("everypar","nodes.handlers.checkparcounter")
4228            luatex.registerstopactions(structurestags.finishexport)
4229            exporting = true
4230        end
4231    end
4232
4233    function structurestags.setupexport(t)
4234        merge(finetuning,t)
4235        keephyphens      = finetuning.hyphen == v_yes
4236        exportproperties = finetuning.properties
4237        if exportproperties == v_no then
4238            exportproperties = false
4239        end
4240    end
4241
4242    statistics.register("xml exporting time", function()
4243        if exporting then
4244            return string.format("%s seconds, version %s", statistics.elapsedtime(treehash),exportversion)
4245        end
4246    end)
4247
4248end
4249
4250-- These are called at the tex end:
4251
4252implement {
4253    name      = "setupexport",
4254    actions   = structurestags.setupexport,
4255    arguments = {
4256        {
4257            { "align" },
4258            { "bodyfont", "dimen" },
4259            { "width", "dimen" },
4260            { "properties" },
4261            { "hyphen" },
4262            { "title" },
4263            { "subtitle" },
4264            { "author" },
4265            { "firstpage" },
4266            { "lastpage" },
4267            { "svgstyle" },
4268            { "cssfile" },
4269            { "file" },
4270            { "export" },
4271        }
4272    }
4273}
4274
4275implement {
4276    name      = "finishexport",
4277    actions   = structurestags.finishexport,
4278}
4279
4280implement {
4281    name      = "initializeexport",
4282    actions   = structurestags.initializeexport,
4283}
4284
4285implement {
4286    name      = "settagitemgroup",
4287    actions   = structurestags.setitemgroup,
4288    arguments = { "boolean", "integer", "string" }
4289}
4290
4291implement {
4292    name      = "settagitem",
4293    actions   = structurestags.setitem,
4294    arguments = "string"
4295}
4296
4297implement {
4298    name      = "settagfloat",
4299    actions   = structurestags.setfloat,
4300    arguments = "2 strings",
4301}
4302
4303implement {
4304    name      = "settagformulacontent",
4305    actions   = structurestags.setformulacontent,
4306    arguments = "integer",
4307}
4308
4309implement {
4310    name      = "settagdelimitedsymbol",
4311    actions   = structurestags.settagdelimitedsymbol,
4312    arguments = "string"
4313}
4314
4315implement {
4316    name      = "settagsubsentencesymbol",
4317    actions   = structurestags.settagsubsentencesymbol,
4318    arguments = "string"
4319}
4320
4321implement {
4322    name      = "settagsynonym",
4323    actions   = structurestags.setsynonym,
4324    arguments = "string"
4325}
4326
4327implement {
4328    name      = "settagsorting",
4329    actions   = structurestags.setsorting,
4330    arguments = "string"
4331}
4332
4333implement {
4334    name      = "settagnotation",
4335    actions   = structurestags.setnotation,
4336    arguments = { "string", "integer" }
4337}
4338
4339implement {
4340    name      = "settagnotationsymbol",
4341    actions   = structurestags.setnotationsymbol,
4342    arguments = { "string", "integer" }
4343}
4344
4345implement {
4346    name      = "settaghighlight",
4347    actions   = structurestags.sethighlight,
4348    arguments = { "string", "string", "integer", "integer" }
4349}
4350
4351implement {
4352    name      = "settagconstruct",
4353    actions   = structurestags.setconstruct,
4354    arguments = { "string", "string", "integer", "integer" }
4355}
4356
4357implement {
4358    name      = "settagfigure",
4359    actions    = structurestags.setfigure,
4360    arguments = { "string", "string", "string", "dimen", "dimen", "string" }
4361}
4362
4363implement {
4364    name      = "settagcombination",
4365    actions   = structurestags.setcombination,
4366    arguments = { "integer", "integer" }
4367}
4368
4369implement {
4370    name      = "settagtablecell",
4371    actions   = structurestags.settablecell,
4372    arguments = { "integer", "integer", "integer" }
4373}
4374
4375implement {
4376    name      = "settagtabulatecell",
4377    actions   = structurestags.settabulatecell,
4378    arguments = { "integer", "integer" },
4379}
4380
4381implement {
4382    name      = "settagregister",
4383    actions   = structurestags.setregister,
4384    arguments = { "string", "integer" }
4385}
4386
4387implement {
4388    name      = "settaglist",
4389    actions   = structurestags.setlist,
4390    arguments = "integer"
4391}
4392
4393implement {
4394    name      = "settagpublication",
4395    actions   = structurestags.setpublication,
4396    arguments = "2 strings"
4397}
4398
4399implement {
4400    name      = "settagparagraph",
4401    actions   = structurestags.setparagraph,
4402    arguments = "string"
4403}
4404