lxml-tex.lua /size: 84 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['lxml-tex'] = {
2    version   = 1.001,
3    comment   = "companion to lxml-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Because we split and resolve entities we use the direct printing
10-- interface and not the context one. If we ever do that there will
11-- be an cldf-xml helper library.
12
13local concat, insert, remove, sortedkeys, reversed = table.concat, table.insert, table.remove, table.sortedkeys, table.reverse
14local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match
15local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select
16local lpegmatch = lpeg.match
17local P, S, C = lpeg.P, lpeg.S, lpeg.C
18local patterns = lpeg.patterns
19local setmetatableindex = table.setmetatableindex
20local formatters, strip = string.formatters, string.strip
21
22local tex, xml = tex, xml
23local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered
24local basename, dirname, joinfile = file.basename, file.dirname, file.join
25
26lxml = lxml or { }
27local lxml = lxml
28
29local catcodenumbers     = catcodes.numbers
30local ctxcatcodes        = catcodenumbers.ctxcatcodes -- todo: use different method
31local notcatcodes        = catcodenumbers.notcatcodes -- todo: use different method
32
33local commands           = commands
34local context            = context
35local contextsprint      = context.sprint             -- with catcodes (here we use fast variants, but with option for tracing)
36
37local synctex            = luatex.synctex
38
39local implement          = interfaces.implement
40
41local xmlelements        = xml.elements
42local xmlcollected       = xml.collected
43local xmlsetproperty     = xml.setproperty
44local xmlwithelements    = xml.withelements
45local xmlserialize       = xml.serialize
46local xmlcollect         = xml.collect
47local xmltext            = xml.text
48local xmltostring        = xml.tostring
49local xmlapplylpath      = xml.applylpath
50local xmlunspecialized   = xml.unspecialized
51local xmldespecialized   = xml.despecialized -- nicer in expanded xml
52local xmlprivatetoken    = xml.privatetoken
53local xmlstripelement    = xml.stripelement
54local xmlinclusion       = xml.inclusion
55local xmlinclusions      = xml.inclusions
56local xmlbadinclusions   = xml.badinclusions
57local xmlcontent         = xml.content
58local xmllastmatch       = xml.lastmatch
59local xmlpushmatch       = xml.pushmatch
60local xmlpopmatch        = xml.popmatch
61local xmlstring          = xml.string
62local xmlserializetotext = xml.serializetotext
63local xmlrename          = xml.rename
64
65local variables          = interfaces and interfaces.variables or { }
66
67local parsers            = utilities.parsers
68local settings_to_hash   = parsers.settings_to_hash
69local settings_to_set    = parsers.settings_to_set
70local options_to_hash    = parsers.options_to_hash
71local options_to_array   = parsers.options_to_array
72
73local insertbeforevalue  = utilities.tables.insertbeforevalue
74local insertaftervalue   = utilities.tables.insertaftervalue
75
76local resolveprefix      = resolvers.resolve
77
78local starttiming        = statistics.starttiming
79local stoptiming         = statistics.stoptiming
80
81local trace_setups       = false  trackers.register("lxml.setups",   function(v) trace_setups    = v end)
82local trace_loading      = false  trackers.register("lxml.loading",  function(v) trace_loading   = v end)
83local trace_access       = false  trackers.register("lxml.access",   function(v) trace_access    = v end)
84local trace_comments     = false  trackers.register("lxml.comments", function(v) trace_comments  = v end)
85local trace_entities     = false  trackers.register("xml.entities",  function(v) trace_entities  = v end)
86local trace_selectors    = false  trackers.register("lxml.selectors",function(v) trace_selectors = v end)
87
88local report_lxml        = logs.reporter("lxml","tex")
89local report_xml         = logs.reporter("xml","tex")
90
91local forceraw           = false
92
93local p_texescape        = patterns.texescape
94
95local tokenizedxmlw      = context.tokenizedcs and context.tokenizedcs.xmlw
96
97directives.enable("xml.path.keeplastmatch")
98
99-- tex entities
100
101lxml.entities = lxml.entities or { }
102
103storage.register("lxml/entities",lxml.entities,"lxml.entities")
104
105local xmlentities     = xml.entities             -- these are more or less standard entities
106local texentities     = lxml.entities            -- these are specific for a tex run
107local reparsedentity  = xml.reparsedentitylpeg   -- \Ux{...}
108local unescapedentity = xml.unescapedentitylpeg
109local parsedentity    = reparsedentity
110local useelement      = false                    -- probably no longer needed / used
111
112function lxml.startunescaped()
113    parsedentity = unescapedentity
114end
115
116function lxml.stopunescaped()
117    parsedentity = reparsedentity
118end
119
120directives.register("lxml.entities.useelement",function(v)
121    useelement = v
122end)
123
124function lxml.registerentity(key,value)
125    texentities[key] = value
126    if trace_entities then
127        report_xml("registering tex entity %a as %a",key,value)
128    end
129end
130
131function lxml.resolvedentity(str)
132    if forceraw then
133        -- should not happen as we then can as well bypass this function
134        if trace_entities then
135            report_xml("passing entity %a as &%s;",str,str)
136        end
137        context("&%s;",str)
138    else
139        local e = texentities[str]
140        if e then
141            local te = type(e)
142            if te == "function" then
143                if trace_entities then
144                    report_xml("passing entity %a using function",str)
145                end
146                e(str)
147            elseif e then
148                if trace_entities then
149                    report_xml("passing entity %a as %a using %a",str,e,"ctxcatcodes")
150                end
151                context(e)
152            end
153            return
154        end
155        local e = xmlentities[str]
156        if e then
157            local te = type(e)
158            if te == "function" then
159                e = e(str)
160            end
161            if e then
162                if trace_entities then
163                    report_xml("passing entity %a as %a using %a",str,e,"notcatcodes")
164                end
165                contextsprint(notcatcodes,e)
166                return
167            end
168        end
169        -- resolve hex and dec, todo: escape # & etc for ctxcatcodes
170        -- normally this is already solved while loading the file
171        local chr, err = lpegmatch(parsedentity,str)
172        if chr then
173            if parsedentity == reparsedentity then
174                if trace_entities then
175                    report_xml("passing entity %a as %a using %a",str,chr,"ctxcatcodes")
176                end
177                context(chr)
178            else
179                contextsprint(notcatcodes,chr)
180                if trace_entities then
181                    report_xml("passing entity %a as %a using %a",str,chr,"notcatcodes")
182                end
183            end
184        elseif err then
185            if trace_entities then
186                report_xml("passing faulty entity %a as %a",str,err)
187            end
188            context(err)
189        elseif useelement then
190            local tag = upperchars(str)
191            if trace_entities then
192                report_xml("passing entity %a to \\xmle using tag %a",str,tag)
193            end
194            contextsprint(texcatcodes,"\\xmle{")
195            contextsprint(notcatcodes,e)
196            contextsprint(texcatcodes,"}")
197        else
198            if trace_entities then
199                report_xml("passing entity %a as %a using %a",str,str,"notcatcodes")
200            end
201            contextsprint(notcatcodes,str)
202        end
203    end
204end
205
206-- tex interface
207
208local loaded    = lxml.loaded or { }
209lxml.loaded     = loaded
210
211-- print(contextdirective("context-mathml-directive function reduction yes "))
212-- print(contextdirective("context-mathml-directive function "))
213
214xml.defaultprotocol = "tex"
215
216local finalizers  = xml.finalizers
217
218finalizers.xml = finalizers.xml or { }
219finalizers.tex = finalizers.tex or { }
220
221local xmlfinalizers = finalizers.xml
222local texfinalizers = finalizers.tex
223
224-- serialization with entity handling
225
226local ampersand  = P("&")
227local semicolon  = P(";")
228
229local entity     = (ampersand * C((1-semicolon)^1) * semicolon) / lxml.resolvedentity -- context.bold
230
231local _, xmltextcapture_yes = context.newtexthandler {
232    catcodes  = notcatcodes,
233    exception = entity,
234}
235local _, xmltextcapture_nop = context.newtexthandler {
236    catcodes  = notcatcodes,
237}
238
239local _, xmlspacecapture_yes = context.newtexthandler {
240    endofline  = context.xmlcdataobeyedline,
241    emptyline  = context.xmlcdataobeyedline,
242    simpleline = context.xmlcdataobeyedline,
243    space      = context.xmlcdataobeyedspace,
244    catcodes   = notcatcodes,
245    exception  = entity,
246}
247local _, xmlspacecapture_nop = context.newtexthandler {
248    endofline  = context.xmlcdataobeyedline,
249    emptyline  = context.xmlcdataobeyedline,
250    simpleline = context.xmlcdataobeyedline,
251    space      = context.xmlcdataobeyedspace,
252    catcodes   = notcatcodes,
253}
254
255local _, xmllinecapture_yes = context.newtexthandler {
256    endofline  = context.xmlcdataobeyedline,
257    emptyline  = context.xmlcdataobeyedline,
258    simpleline = context.xmlcdataobeyedline,
259    catcodes   = notcatcodes,
260    exception  = entity,
261}
262local _, xmllinecapture_nop = context.newtexthandler {
263    endofline  = context.xmlcdataobeyedline,
264    emptyline  = context.xmlcdataobeyedline,
265    simpleline = context.xmlcdataobeyedline,
266    catcodes   = notcatcodes,
267}
268
269local _, ctxtextcapture_yes = context.newtexthandler {
270    catcodes  = ctxcatcodes,
271    exception = entity,
272}
273local _, ctxtextcapture_nop = context.newtexthandler {
274    catcodes  = ctxcatcodes,
275}
276
277local xmltextcapture    = xmltextcapture_yes
278local xmlspacecapture   = xmlspacecapture_yes
279local xmllinecapture    = xmllinecapture_yes
280local ctxtextcapture    = ctxtextcapture_yes
281
282directives.register("lxml.entities.escaped",function(v)
283    if v then
284        xmltextcapture  = xmltextcapture_yes
285        xmlspacecapture = xmlspacecapture_yes
286        xmllinecapture  = xmllinecapture_yes
287        ctxtextcapture  = ctxtextcapture_yes
288    else
289        xmltextcapture  = xmltextcapture_nop
290        xmlspacecapture = xmlspacecapture_nop
291        xmllinecapture  = xmllinecapture_nop
292        ctxtextcapture  = ctxtextcapture_nop
293    end
294end)
295
296-- cdata
297
298local toverbatim = context.newverbosehandler {
299    line   = context.xmlcdataobeyedline,
300    space  = context.xmlcdataobeyedspace,
301    before = context.xmlcdatabefore,
302    after  = context.xmlcdataafter,
303}
304
305lxml.toverbatim = context.newverbosehandler {
306    line   = context.xmlcdataobeyedline,
307    space  = context.xmlcdataobeyedspace,
308    before = context.xmlcdatabefore,
309    after  = context.xmlcdataafter,
310    strip  = true,
311}
312
313-- raw flushing
314
315function lxml.startraw()
316    forceraw = true
317end
318
319function lxml.stopraw()
320    forceraw = false
321end
322
323function lxml.rawroot()
324    return rawroot
325end
326
327-- storage
328
329do
330
331    local noferrors    = 0
332    local errors       = setmetatableindex("number")
333    local errorhandler = xml.errorhandler
334
335    function xml.errorhandler(message,filename)
336        if filename and filename ~= "" then
337            noferrors = noferrors + 1
338            errors[filename] = errors[filename] + 1
339        end
340        errorhandler(message) -- (filename)
341    end
342
343    logs.registerfinalactions(function()
344        if noferrors > 0 then
345            local report = logs.startfilelogging("lxml","problematic xml files")
346            for k, v in table.sortedhash(errors) do
347                report("%4i  %s",v,k)
348            end
349            logs.stopfilelogging()
350            --
351            if logs.loggingerrors() then
352                logs.starterrorlogging(report,"problematic xml files")
353                for k, v in table.sortedhash(errors) do
354                    report("%4i  %s",v,k)
355                end
356                logs.stoperrorlogging()
357            end
358        end
359    end)
360
361end
362
363function lxml.store(id,root,filename)
364    loaded[id] = root
365    xmlsetproperty(root,"name",id)
366    if filename then
367        xmlsetproperty(root,"filename",filename)
368    end
369end
370
371local splitter = lpeg.splitat("::")
372
373lxml.idsplitter = splitter
374
375function lxml.splitid(id)
376    local d, i = lpegmatch(splitter,id)
377    if d then
378        return d, i
379    else
380        return "", id
381    end
382end
383
384local function getid(id, qualified)
385    if id then
386        local lid = loaded[id]
387        if lid then
388            return lid
389        elseif type(id) == "table" then
390            return id
391        else
392            local d, i = lpegmatch(splitter,id)
393            if d then
394                local ld = loaded[d]
395                if ld then
396                    local ldi = ld.index
397                    if ldi then
398                        local root = ldi[tonumber(i)]
399                        if root then
400                            if qualified then -- we need this else two args that confuse others
401                                return root, d
402                            else
403                                return root
404                            end
405                        elseif trace_access then
406                            report_lxml("%a has no index entry %a",d,i)
407                        end
408                    elseif trace_access then
409                        report_lxml("%a has no index",d)
410                    end
411                elseif trace_access then
412                    report_lxml("%a is not loaded",d)
413                end
414            elseif trace_access then
415                report_lxml("%a is not loaded",i)
416            end
417        end
418    elseif trace_access then
419        report_lxml("invalid id (nil)")
420    end
421end
422
423lxml.id    = getid -- we provide two names as locals can already use such
424lxml.getid = getid -- names and we don't want clashes
425
426function lxml.root(id)
427    return loaded[id]
428end
429
430-- index
431
432local nofindices = 0
433
434local function addindex(name,check_sum,force)
435    local root = getid(name)
436    if root and (not root.index or force) then -- weird, only called once
437        local n, index, maxindex, check = 0, root.index or { }, root.maxindex or 0, root.check or { }
438        local function nest(root)
439            local dt = root.dt
440            if not root.ix then
441                maxindex = maxindex + 1
442                root.ix = maxindex
443                check[maxindex] = root.tg -- still needed ?
444                index[maxindex] = root
445                n = n + 1
446            end
447            if dt then
448                for k=1,#dt do
449                    local dk = dt[k]
450                    if type(dk) == "table" then
451                        nest(dk)
452                    end
453                end
454            end
455        end
456        nest(root)
457        nofindices = nofindices + n
458        --
459        if type(name) ~= "string" then
460            name = "unknown"
461        end
462        root.index = index
463        root.maxindex = maxindex
464        if trace_access then
465            report_lxml("indexed entries %a, found nodes %a",tostring(name),maxindex)
466        end
467    end
468end
469
470lxml.addindex = addindex
471
472implement {
473    name      = "xmladdindex",
474    arguments = "string",
475    public    = true,
476    actions   = addindex,
477}
478
479-- another cache
480
481local function lxmlapplylpath(id,pattern) -- better inline, saves call
482    return xmlapplylpath(getid(id),pattern)
483end
484
485lxml.filter = lxmlapplylpath
486
487function lxml.filterlist(list,pattern)
488    for s in gmatch(list,"[^, ]+") do -- we could cache a table
489        xmlapplylpath(getid(s),pattern)
490    end
491end
492
493function lxml.applyfunction(id,name)
494    local f = xml.functions[name]
495    return f and f(getid(id))
496end
497
498-- rather new, indexed storage (backward refs), maybe i will merge this
499
500function lxml.checkindex(name)
501    local root = getid(name)
502    return root and root.index or 0
503end
504
505if tokenizedxmlw then
506
507    function lxml.withindex(name,n,command) -- will change as name is always there now
508        local i, p = lpegmatch(splitter,n)
509        if p then
510            contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",n,"}")
511        else
512            contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",name,"::",n,"}")
513        end
514    end
515
516else
517
518    function lxml.withindex(name,n,command) -- will change as name is always there now
519        local i, p = lpegmatch(splitter,n)
520        if p then
521            contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",n,"}")
522        else
523            contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",name,"::",n,"}")
524        end
525    end
526
527end
528
529function lxml.getindex(name,n) -- will change as name is always there now
530    local i, p = lpegmatch(splitter,n)
531    if p then
532        contextsprint(ctxcatcodes,n)
533    else
534        contextsprint(ctxcatcodes,name,"::",n)
535    end
536end
537
538-- loading (to be redone, no overload) .. best use different methods and
539-- keep raw xml (at least as option)
540
541xml.originalload = xml.originalload or xml.load
542
543local noffiles     = 0
544local nofconverted = 0
545local linenumbers  = false
546
547synctex.registerenabler (function() linenumbers = true  end)
548synctex.registerdisabler(function() linenumbers = false end)
549
550function xml.load(filename,settings)
551    noffiles, nofconverted = noffiles + 1, nofconverted + 1
552    starttiming(xml)
553    local ok, data = resolvers.loadbinfile(filename)
554    settings = settings or { }
555    settings.linenumbers = linenumbers
556    settings.currentresource = filename
557    local xmltable = xml.convert((ok and data) or "",settings)
558    settings.currentresource = nil
559    stoptiming(xml)
560    return xmltable
561end
562
563local function entityconverter(id,str,ent) -- todo: disable tex entities when raw
564    -- tex driven entity
565    local t = texentities[str]
566    if t then
567        local p = xmlprivatetoken(str)
568-- only once
569-- context.xmlprivate(p,t)
570        return p
571    end
572    -- dtd determined entity
573    local e = ent and ent[str]
574    if e then
575        return e
576    end
577    -- predefined entity (mathml and so)
578    local x = xmlentities[str]
579    if x then
580        return x
581    end
582    -- keep original somehow
583    return xmlprivatetoken(str)
584end
585
586lxml.preprocessor = nil
587
588local function lxmlconvert(id,data,compress,currentresource)
589    local settings = { -- we're now roundtrip anyway
590        unify_predefined_entities   = false, -- is also default
591        utfize_entities             = true,  -- is also default
592        resolve_predefined_entities = true,  -- is also default
593        resolve_entities            = function(str,ent) return entityconverter(id,str,ent) end,
594        currentresource             = tostring(currentresource or id),
595        preprocessor                = lxml.preprocessor,
596        linenumbers                 = linenumbers,
597    }
598    if compress and compress == variables.yes then
599        settings.strip_cm_and_dt = true
600    end
601    return xml.convert(data,settings)
602end
603
604lxml.convert = lxmlconvert
605
606function lxml.load(id,filename,compress)
607    filename = ctxrunner.preparedfile(filename)
608    if trace_loading then
609        report_lxml("loading file %a as %a",filename,id)
610    end
611    noffiles, nofconverted = noffiles + 1, nofconverted + 1
612    starttiming(xml)
613    local ok, data = resolvers.loadbinfile(filename)
614 -- local xmltable = lxmlconvert(id,(ok and data) or "",compress,formatters["id: %s, file: %s"](id,filename))
615    local xmltable = lxmlconvert(id,(ok and data) or "",compress,filename)
616    stoptiming(xml)
617    lxml.store(id,xmltable,filename)
618    return xmltable, filename
619end
620
621function lxml.register(id,xmltable,filename)
622    lxml.store(id,xmltable,filename)
623    return xmltable
624end
625
626-- recurse prepare rootpath resolve basename
627
628local options_true = { "recurse", "prepare", "rootpath" }
629local options_nil  = { "prepare", "rootpath" }
630
631function lxml.include(id,pattern,attribute,options)
632    starttiming(xml)
633    local root = getid(id)
634    if options == true then
635        -- downward compatible
636        options = options_true
637    elseif not options then
638        -- downward compatible
639        options = options_nil
640    else
641        options = settings_to_hash(options) or { }
642    end
643    xml.include(root,pattern,attribute,options.recurse,function(filename)
644        if filename then
645            -- preprocessing
646            if options.prepare then
647                filename = commands.preparedfile(filename)
648            end
649            -- handy if we have a flattened structure
650            if options.basename then
651                filename = basename(filename)
652            end
653            if options.resolve then
654                filename = resolveprefix(filename) or filename
655            end
656            -- some protection
657            if options.rootpath and dirname(filename) == "" and root.filename then
658                local dn = dirname(root.filename)
659                if dn ~= "" then
660                    filename = joinfile(dn,filename)
661                end
662            end
663            if trace_loading then
664                report_lxml("including file %a",filename)
665            end
666            noffiles, nofconverted = noffiles + 1, nofconverted + 1
667            return
668                resolvers.loadtexfile(filename) or "",
669                resolvers.findtexfile(filename) or ""
670        else
671            return ""
672        end
673    end)
674    stoptiming(xml)
675end
676
677function lxml.inclusion(id,default,base)
678    local inclusion = xmlinclusion(getid(id),default)
679    if inclusion then
680        context(base and basename(inclusion) or inclusion)
681    end
682end
683
684function lxml.inclusions(id,sorted)
685    local inclusions = xmlinclusions(getid(id),sorted)
686    if inclusions then
687        context(concat(inclusions,","))
688    end
689end
690
691function lxml.badinclusions(id,sorted)
692    local badinclusions = xmlbadinclusions(getid(id),sorted)
693    if badinclusions then
694        context(concat(badinclusions,","))
695    end
696end
697
698function lxml.save(id,name)
699    xml.save(getid(id),name)
700end
701
702function xml.getbuffer(name,compress) -- we need to make sure that commands are processed
703    if not name or name == "" then
704        name = tex.jobname
705    end
706    nofconverted = nofconverted + 1
707    local data = buffers.getcontent(name)
708    xmltostring(lxmlconvert(name,data,compress,format("buffer: %s",tostring(name or "?")))) -- one buffer
709end
710
711function lxml.loadbuffer(id,name,compress)
712    starttiming(xml)
713    nofconverted = nofconverted + 1
714    local data = buffers.collectcontent(name or id) -- name can be list
715    local xmltable = lxmlconvert(id,data,compress,format("buffer: %s",tostring(name or id or "?")))
716    lxml.store(id,xmltable)
717    stoptiming(xml)
718    return xmltable, name or id
719end
720
721function lxml.loaddata(id,str,compress)
722    starttiming(xml)
723    nofconverted = nofconverted + 1
724    local xmltable = lxmlconvert(id,str or "",compress,format("id: %s",id))
725    lxml.store(id,xmltable)
726    stoptiming(xml)
727    return xmltable, id
728end
729
730-- e.command:
731--
732-- string   : setup
733-- true     : text (no <self></self>)
734-- false    : ignore
735-- function : call
736
737local function tex_doctype(e,handlers)
738    -- ignore
739end
740
741local function tex_comment(e,handlers)
742    if trace_comments then
743        report_lxml("comment %a",e.dt[1])
744    end
745end
746
747local default_element_handler = xml.gethandlers("verbose").functions["@el@"]
748
749local setfilename = false
750local trace_name  = false
751local report_name = logs.reporter("lxml")
752
753synctex.registerenabler (function() setfilename = synctex.setfilename end)
754synctex.registerdisabler(function() setfilename = false end)
755
756local function syncfilename(e,where)
757    local cf = e.cf
758    if cf then
759        local cl = e.cl or 1
760        if trace_name then
761            report_name("set filename, case %a, tag %a, file %a, line %a",where,e.tg,cf,cl)
762        end
763        setfilename(cf,cl);
764    end
765end
766
767trackers.register("system.synctex.xml",function(v)
768    trace_name = v
769end)
770
771local tex_element
772
773if tokenizedxmlw then
774
775-- local expandmacro = token.expandmacro
776
777    tex_element = function(e,handlers)
778        if setfilename then
779            syncfilename(e,"element")
780        end
781        local command = e.command
782        if command == nil then
783            default_element_handler(e,handlers)
784        elseif command == true then
785            -- text (no <self></self>) / so, no mkii fallback then
786            handlers.serialize(e.dt,handlers)
787        elseif command == false then
788            -- ignore
789        else
790            local tc = type(command)
791            if tc == "string" then
792                local rootname, ix = e.name, e.ix
793                if rootname then
794                    if not ix then
795                        addindex(rootname,false,true)
796                        ix = e.ix
797                    end
798-- lmtx only, same performance, a bit more immediate:
799--
800-- expandmacro(tokenizedxmlw,ctxcatcodes,true,command,true,rootname.."::"..ix)
801--
802                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",rootname,"::",ix,"}")
803                else
804                    report_lxml("fatal error: no index for %a",command)
805                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",ix or 0,"}")
806                end
807            elseif tc == "function" then
808                command(e)
809            end
810        end
811    end
812
813else
814
815    tex_element = function(e,handlers)
816        if setfilename then
817            syncfilename(e,"element")
818        end
819        local command = e.command
820        if command == nil then
821            default_element_handler(e,handlers)
822        elseif command == true then
823            -- text (no <self></self>) / so, no mkii fallback then
824            handlers.serialize(e.dt,handlers)
825        elseif command == false then
826            -- ignore
827        else
828            local tc = type(command)
829            if tc == "string" then
830                local rootname, ix = e.name, e.ix
831                if rootname then
832                    if not ix then
833                        addindex(rootname,false,true)
834                        ix = e.ix
835                    end
836                 -- faster than context.xmlw
837                    contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",rootname,"::",ix,"}")
838                 -- contextsprint(ctxcatcodes,xmlw[command][rootname],ix,"}")
839                else
840                    report_lxml("fatal error: no index for %a",command)
841                    contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",ix or 0,"}")
842                 -- contextsprint(ctxcatcodes,xmlw[command][false],ix or 0,"}")
843                end
844            elseif tc == "function" then
845                command(e)
846            end
847        end
848    end
849
850end
851
852-- <?context-directive foo ... ?>
853-- <?context-foo-directive ... ?>
854
855local pihandlers = { }  xml.pihandlers = pihandlers
856
857local space    = S(" \n\r")
858local spaces   = space^0
859local class    = C((1-space)^0)
860local key      = class
861local rest     = C(P(1)^0)
862local value    = C(P(1-(space * -1))^0)
863local category = P("context-") * (
864                    C((1-P("-"))^1) * P("-directive")
865                  + P("directive") * spaces * key
866                 )
867
868local c_parser = category * spaces * value -- rest
869local k_parser = class * spaces * key * spaces * rest --value
870
871implement {
872    name      = "xmlinstalldirective",
873    arguments = "2 strings",
874    actions   = function(name,csname)
875        if csname then
876            local keyvalueparser  = k_parser / context[csname]
877            local keyvaluechecker = function(category,rest,e)
878                lpegmatch(keyvalueparser,rest)
879            end
880            pihandlers[name] = keyvaluechecker
881        end
882    end
883}
884
885local function tex_pi(e,handlers)
886    local str = e.dt[1]
887    if str and str ~= "" then
888        local category, rest = lpegmatch(c_parser,str)
889        if category and rest and #rest > 0 then
890            local handler = pihandlers[category]
891            if handler then
892                handler(category,rest,e)
893            end
894        end
895    end
896end
897
898local obeycdata = true
899
900function lxml.setcdata()
901    obeycdata = true
902end
903
904function lxml.resetcdata()
905    obeycdata = false
906end
907
908local function tex_cdata(e,handlers)
909    if obeycdata then
910        toverbatim(e.dt[1])
911    end
912end
913
914-- we could try to merge the conversion and flusher but we don't gain much and it makes tracing
915-- harder: xunspecialized = utf.remapper(xml.specialcodes,"dynamic",lxml.resolvedentity)
916
917local function tex_text(e)
918    e = xmlunspecialized(e)
919    lpegmatch(xmltextcapture,e)
920end
921
922--
923
924local function ctx_text(e) -- can be just context(e) as we split there
925    lpegmatch(ctxtextcapture,e)
926end
927
928local function tex_handle(...)
929    contextsprint(ctxcatcodes,...) -- notcatcodes is active anyway
930end
931
932local xmltexhandler = xml.newhandlers {
933    name       = "tex",
934    handle     = tex_handle,
935    functions  = {
936     -- ["@dc@"]   = tex_document,
937        ["@dt@"]   = tex_doctype,
938     -- ["@rt@"]   = tex_root,
939        ["@el@"]   = tex_element,
940        ["@pi@"]   = tex_pi,
941        ["@cm@"]   = tex_comment,
942        ["@cd@"]   = tex_cdata,
943        ["@tx@"]   = tex_text,
944    }
945}
946
947lxml.xmltexhandler = xmltexhandler
948
949-- begin of test
950
951local function tex_space(e)
952    e = xmlunspecialized(e)
953    lpegmatch(xmlspacecapture,e)
954end
955
956local xmltexspacehandler = xml.newhandlers {
957    name       = "texspace",
958    handle     = tex_handle,
959    functions  = {
960        ["@dt@"]   = tex_doctype,
961        ["@el@"]   = tex_element,
962        ["@pi@"]   = tex_pi,
963        ["@cm@"]   = tex_comment,
964        ["@cd@"]   = tex_cdata,
965        ["@tx@"]   = tex_space,
966    }
967}
968
969local function tex_line(e)
970    e = xmlunspecialized(e)
971    lpegmatch(xmllinecapture,e)
972end
973
974local xmltexlinehandler = xml.newhandlers {
975    name       = "texline",
976    handle     = tex_handle,
977    functions  = {
978        ["@dt@"]   = tex_doctype,
979        ["@el@"]   = tex_element,
980        ["@pi@"]   = tex_pi,
981        ["@cm@"]   = tex_comment,
982        ["@cd@"]   = tex_cdata,
983        ["@tx@"]   = tex_line,
984    }
985}
986
987function lxml.flushspacewise(id) -- keeps spaces and lines
988    id = getid(id)
989    local dt = id and id.dt
990    if dt then
991        xmlserialize(dt,xmltexspacehandler)
992    end
993end
994
995function lxml.flushlinewise(id) -- keeps lines
996    id = getid(id)
997    local dt = id and id.dt
998    if dt then
999        xmlserialize(dt,xmltexlinehandler)
1000    end
1001end
1002
1003-- end of test
1004
1005function lxml.serialize(root)
1006    xmlserialize(root,xmltexhandler)
1007end
1008
1009function lxml.setaction(id,pattern,action)
1010    local collected = xmlapplylpath(getid(id),pattern)
1011    if collected then
1012        local nc = #collected
1013        if nc > 0 then
1014            for c=1,nc do
1015                collected[c].command = action
1016            end
1017        end
1018    end
1019end
1020
1021local function sprint(root,p) -- check rawroot usage
1022    if root then
1023        local tr = type(root)
1024        if tr == "string" then -- can also be result of lpath
1025         -- rawroot = false -- ?
1026            if setfilename and p then
1027                syncfilename(p,"sprint s")
1028            end
1029            root = xmlunspecialized(root)
1030            lpegmatch(xmltextcapture,root)
1031        elseif tr == "table" then
1032            if forceraw then
1033                rawroot = root
1034             -- contextsprint(ctxcatcodes,xmltostring(root)) -- goes wrong with % etc
1035             -- root = xmlunspecialized(xmltostring(root))   -- we loose < > &
1036                root = xmldespecialized(xmltostring(root))
1037                lpegmatch(xmltextcapture,root) -- goes to toc
1038            else
1039if setfilename and p then -- and not root.cl
1040    syncfilename(p,"sprint t")
1041end
1042                xmlserialize(root,xmltexhandler)
1043            end
1044        end
1045    end
1046end
1047
1048-- local function tprint(root) -- we can move sprint inline
1049--     local tr = type(root)
1050--     if tr == "table" then
1051--         local n = #root
1052--         if n == 0 then
1053--             -- skip
1054--         else
1055--             for i=1,n do
1056--                 sprint(root[i])
1057--             end
1058--         end
1059--     elseif tr == "string" then
1060--         root = xmlunspecialized(root)
1061--         lpegmatch(xmltextcapture,root)
1062--     end
1063-- end
1064
1065local function tprint(root) -- we can move sprint inline
1066    local tr = type(root)
1067    if tr == "table" then
1068        local n = #root
1069        if n == 0 then
1070            -- skip
1071        else
1072            for i=1,n do
1073             -- sprint(root[i]) -- inlined because of filename:
1074                local ri = root[i]
1075                local tr = type(ri)
1076                if tr == "string" then -- can also be result of lpath
1077                    if setfilename then
1078                        syncfilename(ri,"tprint")
1079                    end
1080                    root = xmlunspecialized(ri)
1081                    lpegmatch(xmltextcapture,ri)
1082                elseif tr == "table" then
1083                    if forceraw then
1084                        rawroot = ri
1085                        root = xmldespecialized(xmltostring(ri))
1086                        lpegmatch(xmltextcapture,ri) -- goes to toc
1087                    else
1088                        xmlserialize(ri,xmltexhandler)
1089                    end
1090                end
1091            end
1092        end
1093    elseif tr == "string" then
1094        root = xmlunspecialized(root)
1095        lpegmatch(xmltextcapture,root)
1096    end
1097end
1098
1099local function cprint(root) -- content
1100    if not root then
1101     -- rawroot = false
1102        -- quit
1103    elseif type(root) == 'string' then
1104     -- rawroot = false
1105        root = xmlunspecialized(root)
1106        lpegmatch(xmltextcapture,root)
1107    else
1108        if setfilename then
1109            syncfilename(root,"cprint")
1110        end
1111        local rootdt = root.dt
1112        if forceraw then
1113            rawroot = root
1114         -- contextsprint(ctxcatcodes,xmltostring(rootdt or root))
1115            root = xmlunspecialized(xmltostring(root))
1116            lpegmatch(xmltextcapture,root) -- goes to toc
1117        else
1118            xmlserialize(rootdt or root,xmltexhandler)
1119        end
1120    end
1121end
1122
1123xml.sprint = sprint local xmlsprint = sprint  -- calls ct mathml   -> will be replaced
1124xml.tprint = tprint local xmltprint = tprint  -- only used here
1125xml.cprint = cprint local xmlcprint = cprint  -- calls ct  mathml  -> will be replaced
1126
1127-- now we can flush
1128
1129function lxml.main(id)
1130    local root = getid(id)
1131    xmlserialize(root,xmltexhandler) -- the real root (@rt@)
1132end
1133
1134-- -- lines (untested)
1135--
1136-- local buffer = { }
1137--
1138-- local xmllinescapture = (
1139--     newline^2 / function()  buffer[#buffer+1] = "" end +
1140--     newline   / function()  buffer[#buffer] = buffer[#buffer] .. " " end +
1141--     content   / function(s) buffer[#buffer] = buffer[#buffer] ..  s  end
1142-- )^0
1143--
1144-- local xmllineshandler = table.copy(xmltexhandler)
1145--
1146-- xmllineshandler.handle = function(...) lpegmatch(xmllinescapture,concat{ ... }) end
1147--
1148-- function lines(root)
1149--     if not root then
1150--      -- rawroot = false
1151--      -- quit
1152--     elseif type(root) == 'string' then
1153--      -- rawroot = false
1154--         lpegmatch(xmllinescapture,root)
1155--     elseif next(root) then -- tr == 'table'
1156--         xmlserialize(root,xmllineshandler)
1157--     end
1158-- end
1159--
1160-- function xml.lines(root) -- used at all?
1161--     buffer = { "" }
1162--     lines(root)
1163--     return result
1164-- end
1165
1166local function to_text(e)
1167    if e.command == nil then
1168        local etg = e.tg
1169        if etg and e.special and etg ~= "@rt@" then
1170            e.command = false -- i.e. skip
1171        else
1172            e.command = true  -- i.e. no <self></self>
1173        end
1174    end
1175end
1176
1177local function to_none(e)
1178    if e.command == nil then
1179        e.command = false -- i.e. skip
1180    end
1181end
1182
1183-- setups
1184
1185local setups = { }
1186
1187function lxml.setcommandtotext(id)
1188    xmlwithelements(getid(id),to_text)
1189end
1190
1191function lxml.setcommandtonone(id)
1192    xmlwithelements(getid(id),to_none)
1193end
1194
1195function lxml.installsetup(what,document,setup,where)
1196    document = document or "*"
1197    local sd = setups[document]
1198    if not sd then sd = { } setups[document] = sd end
1199    for k=1,#sd do
1200        if sd[k] == setup then sd[k] = nil break end
1201    end
1202    if what == 1 then
1203        if trace_loading then
1204            report_lxml("prepending setup %a for %a",setup,document)
1205        end
1206        insert(sd,1,setup)
1207    elseif what == 2 then
1208        if trace_loading then
1209            report_lxml("appending setup %a for %a",setup,document)
1210        end
1211        insert(sd,setup)
1212    elseif what == 3 then
1213        if trace_loading then
1214            report_lxml("inserting setup %a for %a before %a",setup,document,where)
1215        end
1216        insertbeforevalue(sd,setup,where)
1217    elseif what == 4 then
1218        if trace_loading then
1219            report_lxml("inserting setup %a for %a after %a",setup,document,where)
1220        end
1221        insertaftervalue(sd,setup,where)
1222    end
1223end
1224
1225function lxml.flushsetups(id,...)
1226    local done = { }
1227    for i=1,select("#",...) do
1228        local document = select(i,...)
1229        local sd = setups[document]
1230        if sd then
1231            for k=1,#sd do
1232                local v = sd[k]
1233                if not done[v] then
1234                    if trace_loading then
1235                        report_lxml("applying setup %02i : %a to %a",k,v,document)
1236                    end
1237                    contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}")
1238                    done[v] = true
1239                end
1240            end
1241        elseif trace_loading then
1242            report_lxml("no setups for %a",document)
1243        end
1244    end
1245end
1246
1247function lxml.resetsetups(document)
1248    if trace_loading then
1249        report_lxml("resetting all setups for %a",document)
1250    end
1251    setups[document] = { }
1252end
1253
1254function lxml.removesetup(document,setup)
1255    local s = setups[document]
1256    if s then
1257        for i=1,#s do
1258            if s[i] == setup then
1259                if trace_loading then
1260                    report_lxml("removing setup %a for %a",setup,document)
1261                end
1262                remove(t,i)
1263                break
1264            end
1265        end
1266    end
1267end
1268
1269function lxml.setsetup(id,pattern,setup)
1270    if not setup or setup == "" or setup == "*" or setup == "-" or setup == "+" then
1271        local collected = xmlapplylpath(getid(id),pattern)
1272        if collected then
1273            local nc = #collected
1274            if nc > 0 then
1275                if trace_setups then
1276                    for c=1,nc do
1277                        local e = collected[c]
1278                        local ix = e.ix or 0
1279                        if setup == "-" then
1280                            e.command = false
1281                            report_lxml("lpath matched (a) %5i: %s = %s -> skipped",c,ix,setup)
1282                        elseif setup == "+" then
1283                            e.command = true
1284                            report_lxml("lpath matched (b) %5i: %s = %s -> text",c,ix,setup)
1285                        else
1286                            local tg = e.tg
1287                            if tg then -- to be sure
1288                                e.command = tg
1289                                local ns = e.rn or e.ns
1290                                if ns == "" then
1291                                    report_lxml("lpath matched (c) %5i: %s = %s -> %s",c,ix,tg,tg)
1292                                else
1293                                    report_lxml("lpath matched (d) %5i: %s = %s:%s -> %s",c,ix,ns,tg,tg)
1294                                end
1295                            end
1296                        end
1297                    end
1298                elseif setup == "-" then
1299                    for c=1,nc do
1300                        collected[c].command = false
1301                    end
1302                elseif setup == "+" then
1303                    for c=1,nc do
1304                        collected[c].command = true
1305                    end
1306                else
1307                    for c=1,nc do
1308                        local e = collected[c]
1309                        e.command = e.tg
1310                    end
1311                end
1312            elseif trace_setups then
1313                report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1314            end
1315        elseif trace_setups then
1316            report_lxml("%s lpath matches for pattern: %s","no",pattern)
1317        end
1318    else
1319        local a, b = match(setup,"^(.+:)([%*%-%+])$")
1320        if a and b then
1321            local collected = xmlapplylpath(getid(id),pattern)
1322            if collected then
1323                local nc = #collected
1324                if nc > 0 then
1325                    if trace_setups then
1326                        for c=1,nc do
1327                            local e = collected[c]
1328                            local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
1329                            if b == "-" then
1330                                e.command = false
1331                                if ns == "" then
1332                                    report_lxml("lpath matched (e) %5i: %s = %s -> skipped",c,ix,tg)
1333                                else
1334                                    report_lxml("lpath matched (f) %5i: %s = %s:%s -> skipped",c,ix,ns,tg)
1335                                end
1336                            elseif b == "+" then
1337                                e.command = true
1338                                if ns == "" then
1339                                    report_lxml("lpath matched (g) %5i: %s = %s -> text",c,ix,tg)
1340                                else
1341                                    report_lxml("lpath matched (h) %5i: %s = %s:%s -> text",c,ix,ns,tg)
1342                                end
1343                            else
1344                                e.command = a .. tg
1345                                if ns == "" then
1346                                    report_lxml("lpath matched (i) %5i: %s = %s -> %s",c,ix,tg,e.command)
1347                                else
1348                                    report_lxml("lpath matched (j) %5i: %s = %s:%s -> %s",c,ix,ns,tg,e.command)
1349                                end
1350                            end
1351                        end
1352                    elseif b == "-" then
1353                        for c=1,nc do
1354                            collected[c].command = false
1355                        end
1356                    elseif b == "+" then
1357                        for c=1,nc do
1358                            collected[c].command = true
1359                        end
1360                    else
1361                        for c=1,nc do
1362                            local e = collected[c]
1363                            e.command = a .. e.tg
1364                        end
1365                    end
1366                elseif trace_setups then
1367                    report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1368                end
1369            elseif trace_setups then
1370                report_lxml("%s lpath matches for pattern: %s","no",pattern)
1371            end
1372        else
1373            local collected = xmlapplylpath(getid(id),pattern)
1374            if collected then
1375                local nc = #collected
1376                if nc > 0 then
1377                    if trace_setups then
1378                        for c=1,nc do
1379                            local e = collected[c]
1380                            e.command = setup
1381                            local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
1382                            if ns == "" then
1383                                report_lxml("lpath matched (k) %5i: %s = %s -> %s",c,ix,tg,setup)
1384                            else
1385                                report_lxml("lpath matched (l) %5i: %s = %s:%s -> %s",c,ix,ns,tg,setup)
1386                            end
1387                        end
1388                    else
1389                        for c=1,nc do
1390                            collected[c].command = setup
1391                        end
1392                    end
1393                elseif trace_setups then
1394                    report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1395                end
1396            elseif trace_setups then
1397                report_lxml("%s lpath matches for pattern: %s","no",pattern)
1398            end
1399        end
1400    end
1401end
1402
1403-- finalizers
1404
1405local function first(collected)
1406    if collected and #collected > 0 then
1407        xmlsprint(collected[1])
1408    end
1409end
1410
1411local function last(collected)
1412    if collected then
1413        local nc = #collected
1414        if nc > 0 then
1415            xmlsprint(collected[nc])
1416        end
1417    end
1418end
1419
1420local function all(collected)
1421    if collected then
1422        local nc = #collected
1423        if nc > 0 then
1424            for c=1,nc do
1425                xmlsprint(collected[c])
1426            end
1427        end
1428    end
1429end
1430
1431local function reverse(collected)
1432    if collected then
1433        local nc = #collected
1434        if nc >0 then
1435            for c=nc,1,-1 do
1436                xmlsprint(collected[c])
1437            end
1438        end
1439    end
1440end
1441
1442local function count(collected)
1443    contextsprint(ctxcatcodes,(collected and #collected) or 0) -- why ctxcatcodes
1444end
1445
1446local function position(collected,n)
1447    -- todo: if not n then == match
1448    if collected then
1449        local nc = #collected
1450        if nc > 0 then
1451            n = tonumber(n) or 0
1452            if n < 0 then
1453                n = nc + n + 1
1454            end
1455            if n > 0 then
1456                local cn = collected[n]
1457                if cn then
1458                    xmlsprint(cn)
1459                    return
1460                end
1461            end
1462        end
1463    end
1464end
1465
1466local function match(collected) -- is match in preceding collected, never change, see bibxml
1467    local m = collected and collected[1]
1468    contextsprint(ctxcatcodes,m and m.mi or 0) -- why ctxcatcodes
1469end
1470
1471local function index(collected,n)
1472    if collected then
1473        local nc = #collected
1474        if nc > 0 then
1475            n = tonumber(n) or 0
1476            if n < 0 then
1477                n = nc + n + 1 -- brrr
1478            end
1479            if n > 0 then
1480                local cn = collected[n]
1481                if cn then
1482                    contextsprint(ctxcatcodes,cn.ni or 0) -- why ctxcatcodes
1483                    return
1484                end
1485            end
1486        end
1487    end
1488    contextsprint(ctxcatcodes,0) -- why ctxcatcodes
1489end
1490
1491-- the number of commands is often relative small but there can be many calls
1492-- to this finalizer
1493
1494local command
1495
1496if tokenizedxmlw then
1497
1498    command = function(collected,cmd,otherwise)
1499        local n = collected and #collected
1500        if n and n > 0 then
1501            local wildcard = find(cmd,"*",1,true)
1502            for c=1,n do -- maybe optimize for n=1
1503                local e = collected[c]
1504                local ix = e.ix
1505                local name = e.name
1506                if name and not ix then
1507                    addindex(name,false,true)
1508                    ix = e.ix
1509                end
1510                if not ix or not name then
1511                    report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1512                elseif wildcard then
1513                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
1514                else
1515                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",name,"::",ix,"}")
1516                end
1517            end
1518        elseif otherwise then
1519            contextsprint(ctxcatcodes,tokenizedxmlw,"{",otherwise,"}{#1}")
1520        end
1521    end
1522
1523else
1524
1525    command = function(collected,cmd,otherwise)
1526        local n = collected and #collected
1527        if n and n > 0 then
1528            local wildcard = find(cmd,"*",1,true)
1529            for c=1,n do -- maybe optimize for n=1
1530                local e = collected[c]
1531                local ix = e.ix
1532                local name = e.name
1533                if name and not ix then
1534                    addindex(name,false,true)
1535                    ix = e.ix
1536                end
1537                if not ix or not name then
1538                    report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1539                elseif wildcard then
1540                    contextsprint(ctxcatcodes,"\\xmlw{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
1541                else
1542                    contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
1543                end
1544            end
1545        elseif otherwise then
1546            contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
1547        end
1548    end
1549
1550end
1551
1552-- local wildcards = setmetatableindex(function(t,k)
1553--     local v = false
1554--     if find(k,"*",1,true) then
1555--         v = setmetatableindex(function(t,kk)
1556--             local v = gsub(k,"%*",kk)
1557--             t[k] = v
1558--          -- report_lxml("wildcard %a key %a value %a",kk,k,v)
1559--             return v
1560--         end)
1561--     end
1562--     t[k] = v
1563--     return v
1564-- end)
1565--
1566-- local function command(collected,cmd,otherwise)
1567--     local n = collected and #collected
1568--     if n and n > 0 then
1569--         local wildcard = wildcards[cmd]
1570--         for c=1,n do -- maybe optimize for n=1
1571--             local e = collected[c]
1572--             local ix = e.ix
1573--             local name = e.name
1574--             if name and not ix then
1575--                 addindex(name,false,true)
1576--                 ix = e.ix
1577--             end
1578--             if not ix or not name then
1579--                 report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1580--             elseif wildcard then
1581--                 contextsprint(ctxcatcodes,"\\xmlw{",wildcard[e.tg],"}{",name,"::",ix,"}")
1582--             else
1583--                 contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
1584--             end
1585--         end
1586--     elseif otherwise then
1587--         contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
1588--     end
1589-- end
1590
1591local function attribute(collected,a,default)
1592    if collected and #collected > 0 then
1593        local at = collected[1].at
1594        local str = (at and at[a]) or default
1595        if str and str ~= "" then
1596            contextsprint(notcatcodes,str)
1597        end
1598    elseif default then
1599        contextsprint(notcatcodes,default)
1600    end
1601end
1602
1603local function parameter(collected,p,default)
1604    if collected and #collected > 0 then
1605        local pa = collected[1].pa
1606        local str = (pa and pa[p]) or default
1607        if str and str ~= "" then
1608            contextsprint(notcatcodes,str)
1609        end
1610    elseif default then
1611        contextsprint(notcatcodes,default)
1612    end
1613end
1614
1615local function chainattribute(collected,arguments,default) -- todo: optional levels
1616    if collected and #collected > 0 then
1617        local e = collected[1]
1618        while e do
1619            local at = e.at
1620            if at then
1621                local a = at[arguments]
1622                if a then
1623                    contextsprint(notcatcodes,a)
1624                    return
1625                end
1626            else
1627                break -- error
1628            end
1629            e = e.__p__
1630        end
1631    end
1632    if default then
1633        contextsprint(notcatcodes,default)
1634    end
1635end
1636
1637local function chainpath(collected,nonamespace)
1638    if collected and #collected > 0 then
1639        local e = collected[1]
1640        local t = { }
1641        while e do
1642            local tg = e.tg
1643            local rt = e.__p__
1644            local ns = e.ns
1645            if tg == "@rt@" then
1646                break
1647            elseif rt.tg == "@rt@" then
1648                if nonamespace or not ns or ns == "" then
1649                    t[#t+1] = tg
1650                else
1651                    t[#t+1] = ns .. ":" .. tg
1652                end
1653            else
1654                if nonamespace or not ns or ns == "" then
1655                    t[#t+1] = tg .. "[" .. e.ei .. "]"
1656                else
1657                    t[#t+1] = ns .. ":" .. tg .. "[" .. e.ei .. "]"
1658                end
1659            end
1660            e = rt
1661        end
1662        contextsprint(notcatcodes,concat(reversed(t),"/"))
1663    end
1664end
1665
1666local function text(collected)
1667    if collected then
1668        local nc = #collected
1669        if nc == 0 then
1670            -- nothing
1671        elseif nc == 1 then -- hardly any gain so this will go
1672            cprint(collected[1])
1673        else for c=1,nc do
1674            cprint(collected[c])
1675        end end
1676    end
1677end
1678
1679local function ctxtext(collected)
1680    if collected then
1681        local nc = #collected
1682        if nc > 0 then
1683            for c=1,nc do
1684                contextsprint(ctxcatcodes,collected[c].dt)
1685            end
1686        end
1687    end
1688end
1689
1690local function stripped(collected) -- tricky as we strip in place
1691    if collected then
1692        local nc = #collected
1693        if nc > 0 then
1694            for c=1,nc do
1695                cprint(xmlstripelement(collected[c]))
1696            end
1697        end
1698    end
1699end
1700
1701local function lower(collected)
1702    if not collected then
1703        local nc = #collected
1704        if nc > 0 then
1705            for c=1,nc do
1706                contextsprint(ctxcatcodes,lowerchars(collected[c].dt[1]))
1707            end
1708        end
1709    end
1710end
1711
1712local function upper(collected)
1713    if collected then
1714        local nc = #collected
1715        if nc > 0 then
1716            for c=1,nc do
1717                contextsprint(ctxcatcodes,upperchars(collected[c].dt[1]))
1718            end
1719        end
1720    end
1721end
1722
1723local function number(collected)
1724    local nc = collected and #collected or 0
1725    local n = 0
1726    if nc > 0 then
1727        for c=1,nc do
1728            n = n + tonumber(collected[c].dt[1] or 0)
1729        end
1730    end
1731    contextsprint(ctxcatcodes,n)
1732end
1733
1734local function concatrange(collected,start,stop,separator,lastseparator,textonly) -- test this on mml
1735    if collected then
1736        local nofcollected = #collected
1737        if nofcollected > 0 then
1738            local separator = separator or ""
1739            local lastseparator = lastseparator or separator or ""
1740            start, stop = (start == "" and 1) or tonumber(start) or 1, (stop == "" and nofcollected) or tonumber(stop) or nofcollected
1741            if stop < 0 then stop = nofcollected + stop end -- -1 == last-1
1742            for i=start,stop do
1743                if textonly then
1744                    xmlcprint(collected[i])
1745                else
1746                    xmlsprint(collected[i])
1747                end
1748                if i == nofcollected then
1749                    -- nothing
1750                elseif i == nofcollected-1 and lastseparator ~= "" then
1751                    contextsprint(ctxcatcodes,lastseparator)
1752                elseif separator ~= "" then
1753                    contextsprint(ctxcatcodes,separator)
1754                end
1755            end
1756        end
1757    end
1758end
1759
1760local function concatlist(collected,separator,lastseparator,textonly) -- test this on mml
1761    concatrange(collected,false,false,separator,lastseparator,textonly)
1762end
1763
1764local function depth(collected)
1765    local d = 0
1766    if collected then
1767        local c = collected and collected[1]
1768        if c.tg then
1769            while c do
1770                d = d + 1
1771                c = c.__p__
1772                if not c then
1773                    break
1774                end
1775            end
1776        end
1777    end
1778    contextsprint(ctxcatcodes,d)
1779end
1780
1781texfinalizers.first          = first
1782texfinalizers.last           = last
1783texfinalizers.all            = all
1784texfinalizers.reverse        = reverse
1785texfinalizers.count          = count
1786texfinalizers.command        = command
1787texfinalizers.attribute      = attribute
1788texfinalizers.param          = parameter
1789texfinalizers.parameter      = parameter
1790texfinalizers.text           = text
1791texfinalizers.stripped       = stripped
1792texfinalizers.lower          = lower
1793texfinalizers.upper          = upper
1794texfinalizers.ctxtext        = ctxtext
1795texfinalizers.context        = ctxtext
1796texfinalizers.position       = position
1797texfinalizers.match          = match
1798texfinalizers.index          = index
1799texfinalizers.concat         = concatlist
1800texfinalizers.concatrange    = concatrange
1801texfinalizers.chainattribute = chainattribute
1802texfinalizers.chainpath      = chainpath
1803texfinalizers.default        = all -- !!
1804texfinalizers.depth          = depth
1805
1806function texfinalizers.tag(collected,n)
1807    if collected then
1808        local nc = #collected
1809        if nc > 0 then
1810            n = tonumber(n) or 0
1811            local c
1812            if n == 0 then
1813                c = collected[1]
1814            elseif n > 1 then
1815                c = collected[n]
1816            else
1817                c = collected[nc-n+1]
1818            end
1819            if c then
1820                contextsprint(ctxcatcodes,c.tg)
1821            end
1822        end
1823    end
1824end
1825
1826function texfinalizers.name(collected,n)
1827    if collected then
1828        local nc = #collected
1829        if nc > 0 then
1830            local c
1831            if n == 0 or not n then
1832                c = collected[1]
1833            elseif n > 1 then
1834                c = collected[n]
1835            else
1836                c = collected[nc-n+1]
1837            end
1838            if c then
1839                local ns = c.ns
1840                if not ns or ns == "" then
1841                    contextsprint(ctxcatcodes,c.tg)
1842                else
1843                    contextsprint(ctxcatcodes,ns,":",c.tg)
1844                end
1845            end
1846        end
1847    end
1848end
1849
1850function texfinalizers.tags(collected,nonamespace)
1851    if collected then
1852        local nc = #collected
1853        if nc > 0 then
1854            for c=1,nc do
1855                local e = collected[c]
1856                local ns = e.ns
1857                if nonamespace or (not ns or ns == "") then
1858                    contextsprint(ctxcatcodes,e.tg)
1859                else
1860                    contextsprint(ctxcatcodes,ns,":",e.tg)
1861                end
1862            end
1863        end
1864    end
1865end
1866
1867--
1868
1869local function verbatim(id,before,after)
1870    local e = getid(id)
1871    if e then
1872        if before then contextsprint(ctxcatcodes,before,"[",e.tg or "?","]") end
1873        lxml.toverbatim(xmltostring(e.dt)) -- lxml.toverbatim(xml.totext(e.dt))
1874        if after then contextsprint(ctxcatcodes,after) end
1875    end
1876end
1877
1878function lxml.inlineverbatim(id)
1879    verbatim(id,"\\startxmlinlineverbatim","\\stopxmlinlineverbatim")
1880end
1881
1882function lxml.displayverbatim(id)
1883    verbatim(id,"\\startxmldisplayverbatim","\\stopxmldisplayverbatim")
1884end
1885
1886lxml.verbatim = verbatim
1887
1888-- helpers
1889
1890function lxml.depth(id)
1891    depth { getid(id) }
1892end
1893
1894function lxml.first(id,pattern)
1895    local collected = xmlapplylpath(getid(id),pattern)
1896    if collected then
1897        first(collected)
1898    end
1899end
1900
1901function lxml.last(id,pattern)
1902    local collected = xmlapplylpath(getid(id),pattern)
1903    if collected then
1904        last(collected)
1905    end
1906end
1907
1908function lxml.all(id,pattern)
1909    local collected = xmlapplylpath(getid(id),pattern)
1910    if collected then
1911        all(collected)
1912    end
1913end
1914
1915function lxml.count(id,pattern)
1916    -- always needs to produce a result so no test here
1917    count(xmlapplylpath(getid(id),pattern))
1918end
1919
1920function lxml.attribute(id,pattern,a,default)
1921    local collected = xmlapplylpath(getid(id),pattern)
1922    if collected then
1923        attribute(collected,a,default)
1924    end
1925end
1926
1927function lxml.parameter(id,pattern,p,default)
1928    local collected = xmlapplylpath(getid(id),pattern)
1929    if collected then
1930        parameter(collected,p,default)
1931    end
1932end
1933
1934lxml.param = lxml.parameter
1935
1936function lxml.raw(id,pattern) -- the content, untouched by commands
1937    local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
1938    if collected and #collected > 0 then
1939        local s = xmltostring(collected[1].dt)
1940        if s ~= "" then
1941            contextsprint(notcatcodes,s)
1942        end
1943    end
1944end
1945
1946-- templates
1947
1948function lxml.rawtex(id,pattern) -- the content, untouched by commands
1949    local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
1950    if collected and #collected > 0 then
1951        local s = xmltostring(collected[1].dt)
1952        if s ~= "" then
1953            contextsprint(notcatcodes,lpegmatch(p_texescape,s) or s)
1954        end
1955    end
1956end
1957
1958function lxml.context(id,pattern) -- the content, untouched by commands
1959    if pattern then
1960        local collected = xmlapplylpath(getid(id),pattern)
1961        if collected and #collected > 0 then
1962            ctx_text(collected[1].dt[1])
1963        end
1964    else
1965        local collected = getid(id)
1966        if collected then
1967            local dt = collected.dt
1968            if dt and #dt > 0 then
1969                ctx_text(dt[1])
1970            end
1971        end
1972    end
1973end
1974
1975function lxml.text(id,pattern)
1976    if pattern then
1977        local collected = xmlapplylpath(getid(id),pattern)
1978        if collected and #collected > 0 then
1979            text(collected)
1980        end
1981    else
1982        local e = getid(id)
1983        if e then
1984            text(e.dt)
1985        end
1986    end
1987end
1988
1989function lxml.pure(id,pattern)
1990    if pattern then
1991        local collected = xmlapplylpath(getid(id),pattern)
1992        if collected and #collected > 0 then
1993            parsedentity = unescapedentity
1994            text(collected)
1995            parsedentity = reparsedentity
1996        end
1997    else
1998        parsedentity = unescapedentity
1999        local e = getid(id)
2000        if e then
2001            text(e.dt)
2002        end
2003        parsedentity = reparsedentity
2004    end
2005end
2006
2007lxml.content = text
2008
2009function lxml.position(id,pattern,n)
2010    position(xmlapplylpath(getid(id),pattern),tonumber(n))
2011end
2012
2013function lxml.chainattribute(id,pattern,a,default)
2014    chainattribute(xmlapplylpath(getid(id),pattern),a,default)
2015end
2016
2017function lxml.path(id,pattern,nonamespace)
2018    chainpath(xmlapplylpath(getid(id),pattern),nonamespace)
2019end
2020
2021function lxml.concatrange(id,pattern,start,stop,separator,lastseparator,textonly) -- test this on mml
2022    concatrange(xmlapplylpath(getid(id),pattern),start,stop,separator,lastseparator,textonly)
2023end
2024
2025function lxml.concat(id,pattern,separator,lastseparator,textonly)
2026    concatrange(xmlapplylpath(getid(id),pattern),false,false,separator,lastseparator,textonly)
2027end
2028
2029function lxml.element(id,n)
2030    position(xmlapplylpath(getid(id),"/*"),tonumber(n)) -- tonumber handy
2031end
2032
2033lxml.index = lxml.position
2034
2035function lxml.pos(id)
2036    local e = getid(id)
2037    contextsprint(ctxcatcodes,e and e.ni or 0)
2038end
2039
2040do
2041
2042    local att
2043
2044    function lxml.att(id,a,default)
2045        local e = getid(id)
2046        if e then
2047            local at = e.at
2048            if at then
2049                -- normally always true
2050                att = at[a]
2051                if not att then
2052                    if default and default ~= "" then
2053                        att = default
2054                        contextsprint(notcatcodes,default)
2055                    end
2056                elseif att ~= "" then
2057                    contextsprint(notcatcodes,att)
2058                else
2059                    -- explicit empty is valid
2060                end
2061            elseif default and default ~= "" then
2062                att = default
2063                contextsprint(notcatcodes,default)
2064            end
2065        elseif default and default ~= "" then
2066            att = default
2067            contextsprint(notcatcodes,default)
2068        else
2069            att = ""
2070        end
2071    end
2072
2073    function lxml.ifatt(id,a,value)
2074        local e = getid(id)
2075        if e then
2076            local at = e.at
2077            att = at and at[a] or ""
2078        else
2079            att = ""
2080        end
2081        return att == value
2082    end
2083
2084    function lxml.ifattempty(id,a)
2085        local e = getid(id)
2086        if e then
2087            local at = e.at
2088            att = at and at[a] or ""
2089        else
2090            att = ""
2091        end
2092        return att == ""
2093    end
2094
2095    function lxml.refatt(id,a)
2096        local e = getid(id)
2097        if e then
2098            local at = e.at
2099            if at then
2100                att = at[a]
2101                if att and att ~= "" then
2102                    att = gsub(att,"^#+","")
2103                    if att ~= "" then
2104                        contextsprint(notcatcodes,att)
2105                        return
2106                    end
2107                end
2108            end
2109        end
2110        att = ""
2111    end
2112
2113    function lxml.lastatt()
2114        contextsprint(notcatcodes,att)
2115    end
2116
2117    local ctx_doif     = commands.doif
2118    local ctx_doifnot  = commands.doifnot
2119    local ctx_doifelse = commands.doifelse
2120
2121    implement {
2122        name      = "xmldoifatt",
2123        arguments = "3 strings",
2124        public    = true,
2125        actions   = function(id,k,v)
2126            local e = getid(id)
2127            ctx_doif(e and e.at[k] == v or false)
2128        end
2129    }
2130
2131    implement {
2132        name      = "xmldoifnotatt",
2133        arguments = "3 strings",
2134        public    = true,
2135        actions   = function(id,k,v)
2136            local e = getid(id)
2137            ctx_doifnot(e and e.at[k] == v or false)
2138        end
2139    }
2140
2141    implement {
2142        name      = "xmldoifelseatt",
2143        arguments = "3 strings",
2144        public    = true,
2145        actions   = function(id,k,v)
2146            local e = getid(id)
2147            ctx_doifelse(e and e.at[k] == v or false)
2148        end
2149    }
2150
2151end
2152
2153do
2154
2155    local par
2156
2157    function lxml.par(id,p,default)
2158        local e = getid(id)
2159        if e then
2160            local pa = e.pa
2161            if pa then
2162                -- normally always true
2163                par = pa[p]
2164                if not par then
2165                    if default and default ~= "" then
2166                        par = default
2167                        contextsprint(notcatcodes,default)
2168                    end
2169                elseif par ~= "" then
2170                    contextsprint(notcatcodes,par)
2171                else
2172                    -- explicit empty is valid
2173                end
2174            elseif default and default ~= "" then
2175                par = default
2176                contextsprint(notcatcodes,default)
2177            end
2178        elseif default and default ~= "" then
2179            par = default
2180            contextsprint(notcatcodes,default)
2181        else
2182            par = ""
2183        end
2184    end
2185
2186    function lxml.lastpar()
2187        contextsprint(notcatcodes,par)
2188    end
2189
2190end
2191
2192function lxml.name(id)
2193    local e = getid(id)
2194    if e then
2195        local ns = e.rn or e.ns
2196        if ns and ns ~= "" then
2197            contextsprint(ctxcatcodes,ns,":",e.tg)
2198        else
2199            contextsprint(ctxcatcodes,e.tg)
2200        end
2201    end
2202end
2203
2204function lxml.match(id)
2205    local e = getid(id)
2206    contextsprint(ctxcatcodes,e and e.mi or 0)
2207end
2208
2209function lxml.tag(id) -- tag vs name -> also in l-xml tag->name
2210    local e = getid(id)
2211    if e then
2212        local tg = e.tg
2213        if tg and tg ~= "" then
2214            contextsprint(ctxcatcodes,tg)
2215        end
2216    end
2217end
2218
2219function lxml.namespace(id)
2220    local e = getid(id)
2221    if e then
2222        local ns = e.rn or e.ns
2223        if ns and ns ~= "" then
2224            contextsprint(ctxcatcodes,ns)
2225        end
2226    end
2227end
2228
2229function lxml.flush(id)
2230    local e = getid(id)
2231    if e then
2232        local dt = e.dt
2233        if dt then
2234            xmlsprint(dt,e)
2235        end
2236    end
2237end
2238
2239function lxml.lastmatch()
2240    local collected = xmllastmatch()
2241    if collected then
2242        all(collected)
2243    end
2244end
2245
2246lxml.pushmatch = xmlpushmatch
2247lxml.popmatch  = xmlpopmatch
2248
2249function lxml.snippet(id,i)
2250    local e = getid(id)
2251    if e then
2252        local dt = e.dt
2253        if dt then
2254            local dti = dt[i]
2255            if dti then
2256                xmlsprint(dti,e)
2257            end
2258        end
2259    end
2260end
2261
2262function lxml.direct(id)
2263    local e = getid(id)
2264    if e then
2265        xmlsprint(e)
2266    end
2267end
2268
2269if tokenizedxmlw then
2270
2271    function lxml.command(id,pattern,cmd)
2272        local i, p = getid(id,true)
2273        local collected = xmlapplylpath(getid(i),pattern) -- again getid?
2274        if collected then
2275            local nc = #collected
2276            if nc > 0 then
2277                local rootname = p or i.name
2278                for c=1,nc do
2279                    local e = collected[c]
2280                    local ix = e.ix
2281                    if not ix then
2282                        addindex(rootname,false,true)
2283                        ix = e.ix
2284                    end
2285                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",rootname,"::",ix,"}")
2286                end
2287            end
2288        end
2289    end
2290
2291else
2292
2293    function lxml.command(id,pattern,cmd)
2294        local i, p = getid(id,true)
2295        local collected = xmlapplylpath(getid(i),pattern) -- again getid?
2296        if collected then
2297            local nc = #collected
2298            if nc > 0 then
2299                local rootname = p or i.name
2300                for c=1,nc do
2301                    local e = collected[c]
2302                    local ix = e.ix
2303                    if not ix then
2304                        addindex(rootname,false,true)
2305                        ix = e.ix
2306                    end
2307                    contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",rootname,"::",ix,"}")
2308                end
2309            end
2310        end
2311    end
2312
2313end
2314
2315-- loops
2316
2317function lxml.collected(id,pattern,reverse)
2318    return xmlcollected(getid(id),pattern,reverse)
2319end
2320
2321function lxml.elements(id,pattern,reverse)
2322    return xmlelements(getid(id),pattern,reverse)
2323end
2324
2325-- testers
2326
2327do
2328
2329    local found, empty = xml.found, xml.empty
2330
2331    local doif, doifnot, doifelse = commands.doif, commands.doifnot, commands.doifelse
2332
2333    function lxml.doif         (id,pattern) doif    (found(getid(id),pattern)) end
2334    function lxml.doifnot      (id,pattern) doifnot (found(getid(id),pattern)) end
2335    function lxml.doifelse     (id,pattern) doifelse(found(getid(id),pattern)) end
2336    function lxml.doiftext     (id,pattern) doif    (not empty(getid(id),pattern)) end
2337    function lxml.doifnottext  (id,pattern) doifnot (not empty(getid(id),pattern)) end
2338    function lxml.doifelsetext (id,pattern) doifelse(not empty(getid(id),pattern)) end
2339
2340    -- special case: "*" and "" -> self else lpath lookup
2341
2342    local function checkedempty(id,pattern)
2343        local e = getid(id)
2344        if not pattern or pattern == "" then
2345            local dt = e.dt
2346            local nt = #dt
2347            return (nt == 0) or (nt == 1 and dt[1] == "")
2348        else
2349            return empty(getid(id),pattern)
2350        end
2351    end
2352
2353    xml.checkedempty = checkedempty
2354
2355    function lxml.doifempty    (id,pattern) doif    (checkedempty(id,pattern)) end
2356    function lxml.doifnotempty (id,pattern) doifnot (checkedempty(id,pattern)) end
2357    function lxml.doifelseempty(id,pattern) doifelse(checkedempty(id,pattern)) end
2358
2359end
2360
2361-- status info
2362
2363statistics.register("xml load time", function()
2364    if noffiles > 0 or nofconverted > 0 then
2365        return format("%s seconds, %s files, %s converted", statistics.elapsedtime(xml), noffiles, nofconverted)
2366    else
2367        return nil
2368    end
2369end)
2370
2371statistics.register("lxml preparation time", function()
2372    if noffiles > 0 or nofconverted > 0 then
2373        local calls  = xml.lpathcalls()
2374        local cached = xml.lpathcached()
2375        if calls > 0 or cached > 0 then
2376            return format("%s seconds, %s nodes, %s lpath calls, %s cached calls",
2377                statistics.elapsedtime(lxml), nofindices, calls, cached)
2378        else
2379            return nil
2380        end
2381    else
2382        -- pretty close to zero so not worth mentioning
2383    end
2384end)
2385
2386statistics.register("lxml lpath profile", function()
2387    local p = xml.profiled
2388    if p and next(p) then
2389        local s = table.sortedkeys(p)
2390        local tested, matched, finalized = 0, 0, 0
2391        logs.pushtarget("logfile")
2392        logs.writer("\nbegin of lxml profile\n")
2393        logs.writer("\n   tested    matched  finalized    pattern\n\n")
2394        for i=1,#s do
2395            local pattern = s[i]
2396            local pp = p[pattern]
2397            local t, m, f = pp.tested, pp.matched, pp.finalized
2398            tested, matched, finalized = tested + t, matched + m, finalized + f
2399            logs.writer(format("%9i  %9i  %9i    %s",t,m,f,pattern))
2400        end
2401        logs.writer("\nend of lxml profile\n")
2402        logs.poptarget()
2403        return format("%s patterns, %s tested, %s matched, %s finalized (see log for details)",#s,tested,matched,finalized)
2404    else
2405        return nil
2406    end
2407end)
2408
2409-- misc
2410
2411function lxml.nonspace(id,pattern) -- slow, todo loop
2412    xmltprint(xmlcollect(getid(id),pattern,true))
2413end
2414
2415function lxml.strip(id,pattern,nolines,anywhere,everywhere)
2416    xml.strip(getid(id),pattern,nolines,anywhere,everywhere)
2417end
2418
2419function lxml.stripped(id,pattern,nolines)
2420    local root = getid(id)
2421    local str = xmltext(root,pattern) or ""
2422    str = gsub(str,"^%s*(.-)%s*$","%1")
2423    if nolines then
2424        str = gsub(str,"%s+"," ")
2425    end
2426    xmlsprint(str,root)
2427end
2428
2429function lxml.delete(id,pattern)
2430    xml.delete(getid(id),pattern)
2431end
2432
2433lxml.obsolete = { }
2434
2435lxml.get_id = getid   lxml.obsolete.get_id = getid
2436
2437-- goodies:
2438
2439function texfinalizers.lettered(collected)
2440    if collected then
2441        local nc = #collected
2442        if nc > 0 then
2443            for c=1,nc do
2444                contextsprint(ctxcatcodes,lettered(collected[c].dt[1]))
2445            end
2446        end
2447    end
2448end
2449
2450-- function texfinalizers.apply(collected,what) -- to be tested
2451--     if collected then
2452--         for c=1,#collected do
2453--             contextsprint(ctxcatcodes,what(collected[c].dt[1]))
2454--         end
2455--     end
2456-- end
2457
2458function lxml.toparameters(id)
2459    local e = getid(id)
2460    if e then
2461        local a = e.at
2462        if a and next(a) then
2463            local setups, s = { }, 0
2464            for k, v in next, a do
2465                s = s + 1
2466                setups[s] = k .. "=" .. v
2467            end
2468            setups = concat(setups,",")
2469            -- tracing
2470            context(setups)
2471        end
2472    end
2473end
2474
2475local template = '<?xml version="1.0" ?>\n\n<!-- %s -->\n\n%s'
2476
2477function lxml.tofile(id,pattern,filename,comment)
2478    local collected = xmlapplylpath(getid(id),pattern)
2479    if collected then
2480        io.savedata(filename,format(template,comment or "exported fragment",tostring(collected[1])))
2481    else
2482        os.remove(filename) -- get rid of old content
2483    end
2484end
2485
2486texfinalizers.upperall = xmlfinalizers.upperall
2487texfinalizers.lowerall = xmlfinalizers.lowerall
2488
2489function lxml.tobuffer(id,pattern,name,unescaped,contentonly)
2490    local collected = xmlapplylpath(getid(id),pattern)
2491    if collected then
2492        local collected = collected[1]
2493        if unescaped == true then
2494            -- expanded entities !
2495            if contentonly then
2496                collected = xmlserializetotext(collected.dt)
2497            else
2498                collected = xmlcontent(collected)
2499            end
2500        elseif unescaped == false then
2501            local t = { }
2502            xmlstring(collected,function(s) t[#t+1] = s end)
2503            collected = concat(t)
2504        else
2505            collected = tostring(collected)
2506        end
2507        buffers.assign(name,collected)
2508    else
2509        buffers.erase(name)
2510    end
2511end
2512
2513-- parameters
2514
2515do
2516
2517    local function setatt(id,name,value)
2518        local e = getid(id)
2519        if e then
2520            local a = e.at
2521            if a then
2522                a[name] = value
2523            else
2524                e.at = { [name] = value }
2525            end
2526        end
2527    end
2528
2529    local function setpar(id,name,value)
2530        local e = getid(id)
2531        if e then
2532            local p = e.pa
2533            if p then
2534                p[name] = value
2535            else
2536                e.pa = { [name] = value }
2537            end
2538        end
2539    end
2540
2541    lxml.setatt = setatt
2542    lxml.setpar = setpar
2543
2544    function lxml.setattribute(id,pattern,name,value)
2545        local collected = xmlapplylpath(getid(id),pattern)
2546        if collected then
2547            for i=1,#collected do
2548                setatt(collected[i],name,value)
2549            end
2550        end
2551    end
2552
2553    function lxml.setparameter(id,pattern,name,value)
2554        local collected = xmlapplylpath(getid(id),pattern)
2555        if collected then
2556            for i=1,#collected do
2557                setpar(collected[i],name,value)
2558            end
2559        end
2560    end
2561
2562    lxml.setparam = lxml.setparameter
2563
2564end
2565
2566-- relatively new:
2567
2568do
2569
2570    local permitted        = nil
2571    local ctx_xmlinjector  = context.xmlinjector
2572
2573    xml.pihandlers["injector"] = function(category,rest,e)
2574        local options = options_to_array(rest)
2575        local action  = options[1]
2576        if not action then
2577            return
2578        end
2579        local n = #options
2580        if n > 1 then
2581            local category = options[2]
2582            if category == "*" then
2583                ctx_xmlinjector(action)
2584            elseif permitted then
2585                if n == 2 then
2586                    if permitted[category] then
2587                        ctx_xmlinjector(action)
2588                    end
2589                else
2590                    for i=2,n do
2591                        local category = options[i]
2592                        if category == "*" or permitted[category] then
2593                            ctx_xmlinjector(action)
2594                            return
2595                        end
2596                    end
2597                end
2598            end
2599        else
2600            ctx_xmlinjector(action)
2601        end
2602    end
2603
2604    local pattern = P("context-") * C((1-patterns.whitespace)^1) * C(P(1)^1)
2605
2606    function lxml.applyselectors(id)
2607        local root = getid(id)
2608        local function filter(e)
2609            local dt = e.dt
2610            if not dt then
2611                report_lxml("error in selector, no data in %a",e.tg or "?")
2612                return
2613            end
2614            local ndt  = #dt
2615            local done = false
2616            local i = 1
2617            while i <= ndt do
2618                local dti = dt[i]
2619                if type(dti) == "table" then
2620                    if dti.tg == "@pi@" then
2621                        local text = dti.dt[1]
2622                        local what, rest = lpegmatch(pattern,text)
2623                        if what == "select" then
2624                            local categories = options_to_hash(rest)
2625                            if categories["begin"] then
2626                                local okay = false
2627                                if permitted then
2628                                    for k, v in next, permitted do
2629                                        if categories[k] then
2630                                            okay = k
2631                                            break
2632                                        end
2633                                    end
2634                                end
2635                                if okay then
2636                                    if trace_selectors then
2637                                        report_lxml("accepting selector: %s",okay)
2638                                    end
2639                                else
2640                                    categories.begin = false
2641                                    if trace_selectors then
2642                                        report_lxml("rejecting selector: % t",sortedkeys(categories))
2643                                    end
2644                                end
2645                                for j=i,ndt do
2646                                    local dtj = dt[j]
2647                                    if type(dtj) == "table" then
2648                                        local tg = dtj.tg
2649                                        if tg == "@pi@" then
2650                                            local text = dtj.dt[1]
2651                                            local what, rest = lpegmatch(pattern,text)
2652                                            if what == "select" then
2653                                                local categories = options_to_hash(rest)
2654                                                if categories["end"] then
2655                                                    i = j
2656                                                    break
2657                                                else
2658                                                    -- error
2659                                                end
2660                                            end
2661                                        elseif not okay then
2662                                            dtj.tg = "@cm@"
2663                                        end
2664                                    else
2665    --                                     dt[j] = "" -- okay ?
2666                                    end
2667                                end
2668                            end
2669                        elseif what == "include" then
2670                            local categories = options_to_hash(rest)
2671                            if categories["begin"] then
2672                                local okay = false
2673                                if permitted then
2674                                    for k, v in next, permitted do
2675                                        if categories[k] then
2676                                            okay = k
2677                                            break
2678                                        end
2679                                    end
2680                                end
2681                                if okay then
2682                                    if trace_selectors then
2683                                        report_lxml("accepting include: %s",okay)
2684                                    end
2685                                else
2686                                    categories.begin = false
2687                                    if trace_selectors then
2688                                        report_lxml("rejecting include: % t",sortedkeys(categories))
2689                                    end
2690                                end
2691                                if okay then
2692                                    for j=i,ndt do
2693                                        local dtj = dt[j]
2694                                        if type(dtj) == "table" then
2695                                            local tg = dtj.tg
2696                                            if tg == "@cm@" then
2697                                                local content = dtj.dt[1]
2698                                                local element = root and xml.toelement(content,root)
2699                                                dt[j] = element
2700                                                element.__p__ = dt -- needs checking
2701                                                done = true
2702                                            elseif tg == "@pi@" then
2703                                                local text = dtj.dt[1]
2704                                                local what, rest = lpegmatch(pattern,text)
2705                                                if what == "include" then
2706                                                    local categories = options_to_hash(rest)
2707                                                    if categories["end"] then
2708                                                        i = j
2709                                                        break
2710                                                    else
2711                                                        -- error
2712                                                    end
2713                                                end
2714                                            end
2715                                        end
2716                                    end
2717                                end
2718                            end
2719                        elseif dti then
2720                            filter(dti)
2721                        end
2722                    end
2723                    if done then
2724                        -- probably not needed
2725                        xml.reindex(dt)
2726                    end
2727                end
2728                i = i + 1
2729            end
2730        end
2731        xmlwithelements(root,filter)
2732    end
2733
2734    function xml.setinjectors(set)
2735        local s = settings_to_set(set)
2736        if permitted then
2737            for k, v in next, s do
2738                permitted[k] = true
2739            end
2740        else
2741            permitted = s
2742        end
2743    end
2744
2745    function xml.resetinjectors(set)
2746        if permitted and set and set ~= "" then
2747            local s = settings_to_set(set)
2748            for k, v in next, s do
2749                if v then
2750                    permitted[k] = nil
2751                end
2752            end
2753        else
2754            permitted = nil
2755        end
2756    end
2757
2758end
2759
2760-- hm, maybe to ini to, these implements
2761
2762implement {
2763    name      = "xmlsetinjectors",
2764    actions   = xml.setinjectors,
2765    arguments = "string"
2766}
2767
2768implement {
2769    name      = "xmlresetinjectors",
2770    actions   = xml.resetinjectors,
2771    arguments = "string"
2772}
2773
2774implement {
2775    name      = "xmlapplyselectors",
2776    actions   = lxml.applyselectors,
2777    arguments = "string"
2778}
2779
2780-- bonus: see x-lmx-html.mkiv
2781
2782function texfinalizers.xml(collected,name,setup)
2783    local root = collected[1]
2784    if not root then
2785        return
2786    end
2787    if not name or name == "" then
2788        report_lxml("missing name in xml finalizer")
2789        return
2790    end
2791    xmlrename(root,name)
2792    name = "lmx:" .. name
2793    buffers.assign(name,strip(xmltostring(root)))
2794    context.xmlprocessbuffer(name,name,setup or (name..":setup"))
2795end
2796
2797-- experiment
2798
2799do
2800
2801    local xmltoelement = xml.toelement
2802    local xmlreindex   = xml.reindex
2803
2804    function lxml.replace(root,pattern,whatever)
2805        if type(root) == "string" then
2806            root = lxml.getid(root)
2807        end
2808        local collected = xmlapplylpath(root,pattern)
2809        if collected then
2810            local isstring = type(whatever) == "string"
2811            for c=1,#collected do
2812                local e = collected[c]
2813                local p = e.__p__
2814                if p then
2815                    local d = p.dt
2816                    local n = e.ni
2817                    local w = isstring and whatever or whatever(e)
2818                    if w then
2819                        local t = xmltoelement(w,root).dt
2820                        if t then
2821                            t.__p__ = p
2822                            if type(t) == "table" then
2823                                local t1 = t[1]
2824                                d[n] = t1
2825                                t1.at.type = e.at.type or t1.at.type
2826                                for i=2,#t do
2827                                    n = n + 1
2828                                    insert(d,n,t[i])
2829                                end
2830                            else
2831                                d[n] = t
2832                            end
2833                            xmlreindex(d) -- probably not needed
2834                        end
2835                    end
2836                end
2837            end
2838        end
2839    end
2840
2841    -- function document.mess_around(root)
2842    --     lxml.replace(
2843    --         root,
2844    --         "p[@variant='foo']",
2845    --         function(c)
2846    --             return (string.gsub(tostring(c),"foo","<bar>%1</bar>"))
2847    --         end
2848    --     )
2849    -- end
2850
2851end
2852