lxml-tex.lua /size: 86 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lxml-tex'] = {
2    version   = 1.001,
3    comment   = "companion to lxml-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Because we split and resolve entities we use the direct printing
10-- interface and not the context one. If we ever do that there will
11-- be an cldf-xml helper library.
12
13local concat, insert, remove, sortedkeys, reversed = table.concat, table.insert, table.remove, table.sortedkeys, table.reverse
14local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match
15local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select
16local lpegmatch = lpeg.match
17local P, S, C = lpeg.P, lpeg.S, lpeg.C
18local patterns = lpeg.patterns
19local setmetatableindex = table.setmetatableindex
20local formatters, strip, collapse = string.formatters, string.strip, utilities.strings.collapse
21
22local tex, xml = tex, xml
23local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered
24local basename, dirname, joinfile = file.basename, file.dirname, file.join
25
26lxml = lxml or { }
27local lxml = lxml
28
29local catcodenumbers     = catcodes.numbers
30local ctxcatcodes        = catcodenumbers.ctxcatcodes -- todo: use different method
31local notcatcodes        = catcodenumbers.notcatcodes -- todo: use different method
32
33local commands           = commands
34local context            = context
35local contextsprint      = context.sprint             -- with catcodes (here we use fast variants, but with option for tracing)
36
37local ctx_doif           = commands.doif
38local ctx_doifnot        = commands.doifnot
39local ctx_doifelse       = commands.doifelse
40
41local synctex            = luatex.synctex
42
43local implement          = interfaces.implement
44
45local xmlelements        = xml.elements
46local xmlcollected       = xml.collected
47local xmlsetproperty     = xml.setproperty
48local xmlwithelements    = xml.withelements
49local xmlserialize       = xml.serialize
50local xmlcollect         = xml.collect
51local xmltext            = xml.text
52local xmltostring        = xml.tostring
53local xmlapplylpath      = xml.applylpath
54local xmlunspecialized   = xml.unspecialized
55local xmldespecialized   = xml.despecialized -- nicer in expanded xml
56local xmlprivatetoken    = xml.privatetoken
57local xmlstripelement    = xml.stripelement
58local xmlinclusion       = xml.inclusion
59local xmlinclusions      = xml.inclusions
60local xmlbadinclusions   = xml.badinclusions
61local xmlcontent         = xml.content
62local xmllastmatch       = xml.lastmatch
63local xmlpushmatch       = xml.pushmatch
64local xmlpopmatch        = xml.popmatch
65local xmlstring          = xml.string
66local xmlserializetotext = xml.serializetotext
67local xmlrename          = xml.rename
68
69local variables          = interfaces and interfaces.variables or { }
70
71local parsers            = utilities.parsers
72local settings_to_hash   = parsers.settings_to_hash
73local settings_to_set    = parsers.settings_to_set
74local options_to_hash    = parsers.options_to_hash
75local options_to_array   = parsers.options_to_array
76
77local insertbeforevalue  = utilities.tables.insertbeforevalue
78local insertaftervalue   = utilities.tables.insertaftervalue
79
80local resolveprefix      = resolvers.resolve
81
82local starttiming        = statistics.starttiming
83local stoptiming         = statistics.stoptiming
84
85local trace_setups       = false  trackers.register("lxml.setups",   function(v) trace_setups    = v end)
86local trace_loading      = false  trackers.register("lxml.loading",  function(v) trace_loading   = v end)
87local trace_access       = false  trackers.register("lxml.access",   function(v) trace_access    = v end)
88local trace_comments     = false  trackers.register("lxml.comments", function(v) trace_comments  = v end)
89local trace_entities     = false  trackers.register("xml.entities",  function(v) trace_entities  = v end)
90local trace_selectors    = false  trackers.register("lxml.selectors",function(v) trace_selectors = v end)
91
92local report_lxml        = logs.reporter("lxml","tex")
93local report_xml         = logs.reporter("xml","tex")
94
95local forceraw           = false
96
97local p_texescape        = patterns.texescape
98
99local tokenizedxmlw      = context.tokenizedcs and context.tokenizedcs.xmlw
100
101directives.enable("xml.path.keeplastmatch")
102
103-- tex entities
104
105lxml.entities = lxml.entities or { }
106
107storage.register("lxml/entities",lxml.entities,"lxml.entities")
108
109local xmlentities     = xml.entities             -- these are more or less standard entities
110local texentities     = lxml.entities            -- these are specific for a tex run
111local reparsedentity  = xml.reparsedentitylpeg   -- \Ux{...}
112local unescapedentity = xml.unescapedentitylpeg
113local parsedentity    = reparsedentity
114local useelement      = false                    -- probably no longer needed / used
115
116function lxml.startunescaped()
117    parsedentity = unescapedentity
118end
119
120function lxml.stopunescaped()
121    parsedentity = reparsedentity
122end
123
124directives.register("lxml.entities.useelement",function(v)
125    useelement = v
126end)
127
128function lxml.registerentity(key,value)
129    texentities[key] = value
130    if trace_entities then
131        report_xml("registering tex entity %a as %a",key,value)
132    end
133end
134
135function lxml.resolvedentity(str)
136    if forceraw then
137        -- should not happen as we then can as well bypass this function
138        if trace_entities then
139            report_xml("passing entity %a as &%s;",str,str)
140        end
141        context("&%s;",str)
142    else
143        local e = texentities[str]
144        if e then
145            local te = type(e)
146            if te == "function" then
147                if trace_entities then
148                    report_xml("passing entity %a using function",str)
149                end
150                e(str)
151            elseif e then
152                if trace_entities then
153                    report_xml("passing entity %a as %a using %a",str,e,"ctxcatcodes")
154                end
155                context(e)
156            end
157            return
158        end
159        local e = xmlentities[str]
160        if e then
161            local te = type(e)
162            if te == "function" then
163                e = e(str)
164            end
165            if e then
166                if trace_entities then
167                    report_xml("passing entity %a as %a using %a",str,e,"notcatcodes")
168                end
169                contextsprint(notcatcodes,e)
170                return
171            end
172        end
173        -- resolve hex and dec, todo: escape # & etc for ctxcatcodes
174        -- normally this is already solved while loading the file
175        local chr, err = lpegmatch(parsedentity,str)
176        if chr then
177            if parsedentity == reparsedentity then
178                if trace_entities then
179                    report_xml("passing entity %a as %a using %a",str,chr,"ctxcatcodes")
180                end
181                context(chr)
182            else
183                contextsprint(notcatcodes,chr)
184                if trace_entities then
185                    report_xml("passing entity %a as %a using %a",str,chr,"notcatcodes")
186                end
187            end
188        elseif err then
189            if trace_entities then
190                report_xml("passing faulty entity %a as %a",str,err)
191            end
192            context(err)
193        elseif useelement then
194            local tag = upperchars(str)
195            if trace_entities then
196                report_xml("passing entity %a to \\xmle using tag %a",str,tag)
197            end
198            contextsprint(texcatcodes,"\\xmle{")
199            contextsprint(notcatcodes,e)
200            contextsprint(texcatcodes,"}")
201        else
202            if trace_entities then
203                report_xml("passing entity %a as %a using %a",str,str,"notcatcodes")
204            end
205            contextsprint(notcatcodes,str)
206        end
207    end
208end
209
210-- tex interface
211
212local loaded    = lxml.loaded or { }
213lxml.loaded     = loaded
214
215-- print(contextdirective("context-mathml-directive function reduction yes "))
216-- print(contextdirective("context-mathml-directive function "))
217
218xml.defaultprotocol = "tex"
219
220local finalizers  = xml.finalizers
221
222finalizers.xml = finalizers.xml or { }
223finalizers.tex = finalizers.tex or { }
224
225local xmlfinalizers = finalizers.xml
226local texfinalizers = finalizers.tex
227
228-- serialization with entity handling
229
230local ampersand  = P("&")
231local semicolon  = P(";")
232
233local entity     = (ampersand * C((1-semicolon)^1) * semicolon) / lxml.resolvedentity -- context.bold
234
235local _, xmltextcapture_yes = context.newtexthandler {
236    catcodes  = notcatcodes,
237    exception = entity,
238}
239local _, xmltextcapture_nop = context.newtexthandler {
240    catcodes  = notcatcodes,
241}
242
243local _, xmlspacecapture_yes = context.newtexthandler {
244    endofline  = context.xmlcdataobeyedline,
245    emptyline  = context.xmlcdataobeyedline,
246    simpleline = context.xmlcdataobeyedline,
247    space      = context.xmlcdataobeyedspace,
248    catcodes   = notcatcodes,
249    exception  = entity,
250}
251local _, xmlspacecapture_nop = context.newtexthandler {
252    endofline  = context.xmlcdataobeyedline,
253    emptyline  = context.xmlcdataobeyedline,
254    simpleline = context.xmlcdataobeyedline,
255    space      = context.xmlcdataobeyedspace,
256    catcodes   = notcatcodes,
257}
258
259local _, xmllinecapture_yes = context.newtexthandler {
260    endofline  = context.xmlcdataobeyedline,
261    emptyline  = context.xmlcdataobeyedline,
262    simpleline = context.xmlcdataobeyedline,
263    catcodes   = notcatcodes,
264    exception  = entity,
265}
266local _, xmllinecapture_nop = context.newtexthandler {
267    endofline  = context.xmlcdataobeyedline,
268    emptyline  = context.xmlcdataobeyedline,
269    simpleline = context.xmlcdataobeyedline,
270    catcodes   = notcatcodes,
271}
272
273local _, ctxtextcapture_yes = context.newtexthandler {
274    catcodes  = ctxcatcodes,
275    exception = entity,
276}
277local _, ctxtextcapture_nop = context.newtexthandler {
278    catcodes  = ctxcatcodes,
279}
280
281local xmltextcapture    = xmltextcapture_yes
282local xmlspacecapture   = xmlspacecapture_yes
283local xmllinecapture    = xmllinecapture_yes
284local ctxtextcapture    = ctxtextcapture_yes
285
286directives.register("lxml.entities.escaped",function(v)
287    if v then
288        xmltextcapture  = xmltextcapture_yes
289        xmlspacecapture = xmlspacecapture_yes
290        xmllinecapture  = xmllinecapture_yes
291        ctxtextcapture  = ctxtextcapture_yes
292    else
293        xmltextcapture  = xmltextcapture_nop
294        xmlspacecapture = xmlspacecapture_nop
295        xmllinecapture  = xmllinecapture_nop
296        ctxtextcapture  = ctxtextcapture_nop
297    end
298end)
299
300-- cdata
301
302local toverbatim = context.newverbosehandler {
303    line   = context.xmlcdataobeyedline,
304    space  = context.xmlcdataobeyedspace,
305    before = context.xmlcdatabefore,
306    after  = context.xmlcdataafter,
307}
308
309lxml.toverbatim = context.newverbosehandler {
310    line   = context.xmlcdataobeyedline,
311    space  = context.xmlcdataobeyedspace,
312    before = context.xmlcdatabefore,
313    after  = context.xmlcdataafter,
314    strip  = true,
315}
316
317-- raw flushing
318
319function lxml.startraw()
320    forceraw = true
321end
322
323function lxml.stopraw()
324    forceraw = false
325end
326
327function lxml.rawroot()
328    return rawroot
329end
330
331-- storage
332
333do
334
335    local noferrors    = 0
336    local errors       = setmetatableindex("number")
337    local errorhandler = xml.errorhandler
338
339    function xml.errorhandler(message,filename)
340        if filename and filename ~= "" then
341            noferrors = noferrors + 1
342            errors[filename] = errors[filename] + 1
343        end
344        errorhandler(message) -- (filename)
345    end
346
347    logs.registerfinalactions(function()
348        if noferrors > 0 then
349            local report = logs.startfilelogging("lxml","problematic xml files")
350            for k, v in table.sortedhash(errors) do
351                report("%4i  %s",v,k)
352            end
353            logs.stopfilelogging()
354            --
355            if logs.loggingerrors() then
356                logs.starterrorlogging(report,"problematic xml files")
357                for k, v in table.sortedhash(errors) do
358                    report("%4i  %s",v,k)
359                end
360                logs.stoperrorlogging()
361            end
362        end
363    end)
364
365end
366
367function lxml.store(id,root,filename)
368    loaded[id] = root
369    xmlsetproperty(root,"name",id)
370    if filename then
371        xmlsetproperty(root,"filename",filename)
372    end
373end
374
375local splitter = lpeg.splitat("::")
376
377lxml.idsplitter = splitter
378
379function lxml.splitid(id)
380    local d, i = lpegmatch(splitter,id)
381    if d then
382        return d, i
383    else
384        return "", id
385    end
386end
387
388local function getid(id, qualified)
389    if id then
390        local lid = loaded[id]
391        if lid then
392            return lid
393        elseif type(id) == "table" then
394            return id
395        else
396            local d, i = lpegmatch(splitter,id)
397            if d then
398                local ld = loaded[d]
399                if ld then
400                    local ldi = ld.index
401                    if ldi then
402                        local root = ldi[tonumber(i)]
403                        if root then
404                            if qualified then -- we need this else two args that confuse others
405                                return root, d
406                            else
407                                return root
408                            end
409                        elseif trace_access then
410                            report_lxml("%a has no index entry %a",d,i)
411                        end
412                    elseif trace_access then
413                        report_lxml("%a has no index",d)
414                    end
415                elseif trace_access then
416                    report_lxml("%a is not loaded",d)
417                end
418            elseif trace_access then
419                report_lxml("%a is not loaded",i)
420            end
421        end
422    elseif trace_access then
423        report_lxml("invalid id (nil)")
424    end
425end
426
427lxml.id    = getid -- we provide two names as locals can already use such
428lxml.getid = getid -- names and we don't want clashes
429
430function lxml.root(id)
431    return loaded[id]
432end
433
434-- index
435
436local nofindices = 0
437
438local function addindex(name,check_sum,force)
439    local root = getid(name)
440    if root and (not root.index or force) then -- weird, only called once
441        local n, index, maxindex, check = 0, root.index or { }, root.maxindex or 0, root.check or { }
442        local function nest(root)
443            local dt = root.dt
444            if not root.ix then
445                maxindex = maxindex + 1
446                root.ix = maxindex
447                check[maxindex] = root.tg -- still needed ?
448                index[maxindex] = root
449                n = n + 1
450            end
451            if dt then
452                for k=1,#dt do
453                    local dk = dt[k]
454                    if type(dk) == "table" then
455                        nest(dk)
456                    end
457                end
458            end
459        end
460        nest(root)
461        nofindices = nofindices + n
462        --
463        if type(name) ~= "string" then
464            name = "unknown"
465        end
466        root.index = index
467        root.maxindex = maxindex
468        if trace_access then
469            report_lxml("indexed entries %a, found nodes %a",tostring(name),maxindex)
470        end
471    end
472end
473
474lxml.addindex = addindex
475
476implement {
477    name      = "xmladdindex",
478    arguments = "string",
479    public    = true,
480    actions   = addindex,
481}
482
483-- another cache
484
485local function lxmlapplylpath(id,pattern) -- better inline, saves call
486    return xmlapplylpath(getid(id),pattern)
487end
488
489lxml.filter = lxmlapplylpath
490
491function lxml.filterlist(list,pattern)
492    for s in gmatch(list,"[^, ]+") do -- we could cache a table
493        xmlapplylpath(getid(s),pattern)
494    end
495end
496
497function lxml.applyfunction(id,name)
498    local f = xml.functions[name]
499    return f and f(getid(id))
500end
501
502-- rather new, indexed storage (backward refs), maybe i will merge this
503
504function lxml.checkindex(name)
505    local root = getid(name)
506    return root and root.index or 0
507end
508
509if tokenizedxmlw then
510
511    function lxml.withindex(name,n,command) -- will change as name is always there now
512        local i, p = lpegmatch(splitter,n)
513        if p then
514            contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",n,"}")
515        else
516            contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",name,"::",n,"}")
517        end
518    end
519
520else
521
522    function lxml.withindex(name,n,command) -- will change as name is always there now
523        local i, p = lpegmatch(splitter,n)
524        if p then
525            contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",n,"}")
526        else
527            contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",name,"::",n,"}")
528        end
529    end
530
531end
532
533function lxml.getindex(name,n) -- will change as name is always there now
534    local i, p = lpegmatch(splitter,n)
535    if p then
536        contextsprint(ctxcatcodes,n)
537    else
538        contextsprint(ctxcatcodes,name,"::",n)
539    end
540end
541
542-- loading (to be redone, no overload) .. best use different methods and
543-- keep raw xml (at least as option)
544
545xml.originalload = xml.originalload or xml.load
546
547local noffiles     = 0
548local nofconverted = 0
549local linenumbers  = false
550
551synctex.registerenabler (function() linenumbers = true  end)
552synctex.registerdisabler(function() linenumbers = false end)
553
554function xml.load(filename,settings)
555    noffiles, nofconverted = noffiles + 1, nofconverted + 1
556    starttiming(xml)
557    local ok, data = resolvers.loadbinfile(filename)
558    settings = settings or { }
559    settings.linenumbers = linenumbers
560    settings.currentresource = filename
561    local xmltable = xml.convert((ok and data) or "",settings)
562    settings.currentresource = nil
563    stoptiming(xml)
564    return xmltable
565end
566
567local function entityconverter(id,str,ent) -- todo: disable tex entities when raw
568    -- tex driven entity
569    local t = texentities[str]
570    if t then
571        local p = xmlprivatetoken(str)
572-- only once
573-- context.xmlprivate(p,t)
574        return p
575    end
576    -- dtd determined entity
577    local e = ent and ent[str]
578    if e then
579        return e
580    end
581    -- predefined entity (mathml and so)
582    local x = xmlentities[str]
583    if x then
584        return x
585    end
586    -- keep original somehow
587    return xmlprivatetoken(str)
588end
589
590lxml.preprocessor = nil
591
592local function lxmlconvert(id,data,compress,currentresource)
593    local settings = { -- we're now roundtrip anyway
594        unify_predefined_entities   = false, -- is also default
595        utfize_entities             = true,  -- is also default
596        resolve_predefined_entities = true,  -- is also default
597        resolve_entities            = function(str,ent) return entityconverter(id,str,ent) end,
598        currentresource             = tostring(currentresource or id),
599        preprocessor                = lxml.preprocessor,
600        linenumbers                 = linenumbers,
601    }
602    if compress and compress == variables.yes then
603        settings.strip_cm_and_dt = true
604    end
605    return xml.convert(data,settings)
606end
607
608lxml.convert = lxmlconvert
609
610function lxml.load(id,filename,compress)
611    filename = ctxrunner.preparedfile(filename)
612    if trace_loading then
613        report_lxml("loading file %a as %a",filename,id)
614    end
615    noffiles, nofconverted = noffiles + 1, nofconverted + 1
616    starttiming(xml)
617    local ok, data = resolvers.loadbinfile(filename)
618 -- local xmltable = lxmlconvert(id,(ok and data) or "",compress,formatters["id: %s, file: %s"](id,filename))
619    local xmltable = lxmlconvert(id,(ok and data) or "",compress,filename)
620    stoptiming(xml)
621    lxml.store(id,xmltable,filename)
622    return xmltable, filename
623end
624
625function lxml.register(id,xmltable,filename)
626    lxml.store(id,xmltable,filename)
627    return xmltable
628end
629
630-- recurse prepare rootpath resolve basename
631
632local options_true = { "recurse", "prepare", "rootpath" }
633local options_nil  = { "prepare", "rootpath" }
634
635function lxml.include(id,pattern,attribute,options)
636    starttiming(xml)
637    local root = getid(id)
638    if options == true then
639        -- downward compatible
640        options = options_true
641    elseif not options then
642        -- downward compatible
643        options = options_nil
644    else
645        options = settings_to_hash(options) or { }
646    end
647    xml.include(root,pattern,attribute,options.recurse,function(filename)
648        if filename then
649            -- preprocessing
650            if options.prepare then
651                filename = ctxrunner.preparedfile(filename)
652            end
653            -- handy if we have a flattened structure
654            if options.basename then
655                filename = basename(filename)
656            end
657            if options.resolve then
658                filename = resolveprefix(filename) or filename
659            end
660            -- some protection
661            if options.rootpath and dirname(filename) == "" and root.filename then
662                local dn = dirname(root.filename)
663                if dn ~= "" then
664                    filename = joinfile(dn,filename)
665                end
666            end
667            if trace_loading then
668                report_lxml("including file %a",filename)
669            end
670            noffiles, nofconverted = noffiles + 1, nofconverted + 1
671            return
672                resolvers.loadtexfile(filename) or "",
673                resolvers.findtexfile(filename) or ""
674        else
675            return ""
676        end
677    end)
678    stoptiming(xml)
679end
680
681function lxml.filename(id)
682    local e = getid(id)
683    if e then
684        context(e.cf)
685    end
686end
687
688function lxml.fileline(id)
689    local e = getid(id)
690    if e then
691        context(e.cl)
692    end
693end
694
695function lxml.inclusion(id,default,base)
696    local inclusion = xmlinclusion(getid(id),default)
697    if inclusion then
698        context(base and basename(inclusion) or inclusion)
699    end
700end
701
702function lxml.inclusions(id,sorted)
703    local inclusions = xmlinclusions(getid(id),sorted)
704    if inclusions then
705        context(concat(inclusions,","))
706    end
707end
708
709function lxml.badinclusions(id,sorted)
710    local badinclusions = xmlbadinclusions(getid(id),sorted)
711    if badinclusions then
712        context(concat(badinclusions,","))
713    end
714end
715
716function lxml.save(id,name)
717    xml.save(getid(id),name)
718end
719
720function xml.getbuffer(name,compress) -- we need to make sure that commands are processed
721    if not name or name == "" then
722        name = tex.jobname
723    end
724    nofconverted = nofconverted + 1
725    local data = buffers.getcontent(name)
726    xmltostring(lxmlconvert(name,data,compress,format("buffer: %s",tostring(name or "?")))) -- one buffer
727end
728
729function lxml.loadbuffer(id,name,compress)
730    starttiming(xml)
731    nofconverted = nofconverted + 1
732    local data = buffers.collectcontent(name or id) -- name can be list
733    local xmltable = lxmlconvert(id,data,compress,format("buffer: %s",tostring(name or id or "?")))
734    lxml.store(id,xmltable)
735    stoptiming(xml)
736    return xmltable, name or id
737end
738
739function lxml.loaddata(id,str,compress)
740    starttiming(xml)
741    nofconverted = nofconverted + 1
742    local xmltable = lxmlconvert(id,str or "",compress,format("id: %s",id))
743    lxml.store(id,xmltable)
744    stoptiming(xml)
745    return xmltable, id
746end
747
748-- e.command:
749--
750-- string   : setup
751-- true     : text (no <self></self>)
752-- false    : ignore
753-- function : call
754
755local function tex_doctype(e,handlers)
756    -- ignore
757end
758
759local function tex_comment(e,handlers)
760    if trace_comments then
761        report_lxml("comment %a",e.dt[1])
762    end
763end
764
765local default_element_handler = xml.gethandlers("verbose").functions["@el@"]
766
767local setfilename = false
768local trace_name  = false
769local report_name = logs.reporter("lxml")
770
771synctex.registerenabler (function() setfilename = synctex.setfilename end)
772synctex.registerdisabler(function() setfilename = false end)
773
774local function syncfilename(e,where)
775    local cf = e.cf
776    if cf then
777        local cl = e.cl or 1
778        if trace_name then
779            report_name("set filename, case %a, tag %a, file %a, line %a",where,e.tg,cf,cl)
780        end
781        setfilename(cf,cl);
782    end
783end
784
785trackers.register("system.synctex.xml",function(v)
786    trace_name = v
787end)
788
789local tex_element
790
791if tokenizedxmlw then
792
793-- local expandmacro = token.expandmacro
794
795    tex_element = function(e,handlers)
796        if setfilename then
797            syncfilename(e,"element")
798        end
799        local command = e.command
800        if command == nil then
801            default_element_handler(e,handlers)
802        elseif command == true then
803            -- text (no <self></self>) / so, no mkii fallback then
804            handlers.serialize(e.dt,handlers)
805        elseif command == false then
806            -- ignore
807        else
808            local tc = type(command)
809            if tc == "string" then
810                local rootname, ix = e.name, e.ix
811                if rootname then
812                    if not ix then
813                        addindex(rootname,false,true)
814                        ix = e.ix
815                    end
816-- lmtx only, same performance, a bit more immediate:
817--
818-- expandmacro(tokenizedxmlw,ctxcatcodes,true,command,true,rootname.."::"..ix)
819--
820                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",rootname,"::",ix,"}")
821                else
822                    report_lxml("fatal error: no index for %a",command)
823                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",ix or 0,"}")
824                end
825            elseif tc == "function" then
826                command(e)
827            end
828        end
829    end
830
831else
832
833    tex_element = function(e,handlers)
834        if setfilename then
835            syncfilename(e,"element")
836        end
837        local command = e.command
838        if command == nil then
839            default_element_handler(e,handlers)
840        elseif command == true then
841            -- text (no <self></self>) / so, no mkii fallback then
842            handlers.serialize(e.dt,handlers)
843        elseif command == false then
844            -- ignore
845        else
846            local tc = type(command)
847            if tc == "string" then
848                local rootname, ix = e.name, e.ix
849                if rootname then
850                    if not ix then
851                        addindex(rootname,false,true)
852                        ix = e.ix
853                    end
854                 -- faster than context.xmlw
855                    contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",rootname,"::",ix,"}")
856                 -- contextsprint(ctxcatcodes,xmlw[command][rootname],ix,"}")
857                else
858                    report_lxml("fatal error: no index for %a",command)
859                    contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",ix or 0,"}")
860                 -- contextsprint(ctxcatcodes,xmlw[command][false],ix or 0,"}")
861                end
862            elseif tc == "function" then
863                command(e)
864            end
865        end
866    end
867
868end
869
870-- <?context-directive foo ... ?>
871-- <?context-foo-directive ... ?>
872
873local pihandlers = { }  xml.pihandlers = pihandlers
874
875local space    = S(" \n\r")
876local spaces   = space^0
877local class    = C((1-space)^0)
878local key      = class
879local rest     = C(P(1)^0)
880local value    = C(P(1-(space * -1))^0)
881local category = P("context-") * (
882                    C((1-P("-"))^1) * P("-directive")
883                  + P("directive") * spaces * key
884                 )
885
886local c_parser = category * spaces * value -- rest
887local k_parser = class * spaces * key * spaces * rest --value
888
889implement {
890    name      = "xmlinstalldirective",
891    arguments = "2 strings",
892    actions   = function(name,csname)
893        if csname then
894            local keyvalueparser  = k_parser / context[csname]
895            local keyvaluechecker = function(category,rest,e)
896                lpegmatch(keyvalueparser,rest)
897            end
898            pihandlers[name] = keyvaluechecker
899        end
900    end
901}
902
903local function tex_pi(e,handlers)
904    local str = e.dt[1]
905    if str and str ~= "" then
906        local category, rest = lpegmatch(c_parser,str)
907        if category and rest and #rest > 0 then
908            local handler = pihandlers[category]
909            if handler then
910                handler(category,rest,e)
911            end
912        end
913    end
914end
915
916local obeycdata = true
917
918function lxml.setcdata()
919    obeycdata = true
920end
921
922function lxml.resetcdata()
923    obeycdata = false
924end
925
926local function tex_cdata(e,handlers)
927    if obeycdata then
928        toverbatim(e.dt[1])
929    end
930end
931
932-- we could try to merge the conversion and flusher but we don't gain much and it makes tracing
933-- harder: xunspecialized = utf.remapper(xml.specialcodes,"dynamic",lxml.resolvedentity)
934
935local function tex_text(e)
936    e = xmlunspecialized(e)
937    lpegmatch(xmltextcapture,e)
938end
939
940--
941
942local function ctx_text(e) -- can be just context(e) as we split there
943    lpegmatch(ctxtextcapture,e)
944end
945
946local function tex_handle(...)
947    contextsprint(ctxcatcodes,...) -- notcatcodes is active anyway
948end
949
950local xmltexhandler = xml.newhandlers {
951    name       = "tex",
952    handle     = tex_handle,
953    functions  = {
954     -- ["@dc@"]   = tex_document,
955        ["@dt@"]   = tex_doctype,
956     -- ["@rt@"]   = tex_root,
957        ["@el@"]   = tex_element,
958        ["@pi@"]   = tex_pi,
959        ["@cm@"]   = tex_comment,
960        ["@cd@"]   = tex_cdata,
961        ["@tx@"]   = tex_text,
962    }
963}
964
965lxml.xmltexhandler = xmltexhandler
966
967-- begin of test
968
969local function tex_space(e)
970    e = xmlunspecialized(e)
971    lpegmatch(xmlspacecapture,e)
972end
973
974local xmltexspacehandler = xml.newhandlers {
975    name       = "texspace",
976    handle     = tex_handle,
977    functions  = {
978        ["@dt@"]   = tex_doctype,
979        ["@el@"]   = tex_element,
980        ["@pi@"]   = tex_pi,
981        ["@cm@"]   = tex_comment,
982        ["@cd@"]   = tex_cdata,
983        ["@tx@"]   = tex_space,
984    }
985}
986
987local function tex_line(e)
988    e = xmlunspecialized(e)
989    lpegmatch(xmllinecapture,e)
990end
991
992local xmltexlinehandler = xml.newhandlers {
993    name       = "texline",
994    handle     = tex_handle,
995    functions  = {
996        ["@dt@"]   = tex_doctype,
997        ["@el@"]   = tex_element,
998        ["@pi@"]   = tex_pi,
999        ["@cm@"]   = tex_comment,
1000        ["@cd@"]   = tex_cdata,
1001        ["@tx@"]   = tex_line,
1002    }
1003}
1004
1005function lxml.flushspacewise(id) -- keeps spaces and lines
1006    id = getid(id)
1007    local dt = id and id.dt
1008    if dt then
1009        xmlserialize(dt,xmltexspacehandler)
1010    end
1011end
1012
1013function lxml.flushlinewise(id) -- keeps lines
1014    id = getid(id)
1015    local dt = id and id.dt
1016    if dt then
1017        xmlserialize(dt,xmltexlinehandler)
1018    end
1019end
1020
1021-- end of test
1022
1023function lxml.serialize(root)
1024    xmlserialize(root,xmltexhandler)
1025end
1026
1027function lxml.setaction(id,pattern,action)
1028    local collected = xmlapplylpath(getid(id),pattern)
1029    if collected then
1030        local nc = #collected
1031        if nc > 0 then
1032            for c=1,nc do
1033                collected[c].command = action
1034            end
1035        end
1036    end
1037end
1038
1039local function sprint(root,p) -- check rawroot usage
1040    if root then
1041        local tr = type(root)
1042        if tr == "string" then -- can also be result of lpath
1043         -- rawroot = false -- ?
1044            if setfilename and p then
1045                syncfilename(p,"sprint s")
1046            end
1047            root = xmlunspecialized(root)
1048            lpegmatch(xmltextcapture,root)
1049        elseif tr == "table" then
1050            if forceraw then
1051                rawroot = root
1052             -- contextsprint(ctxcatcodes,xmltostring(root)) -- goes wrong with % etc
1053             -- root = xmlunspecialized(xmltostring(root))   -- we loose < > &
1054                root = xmldespecialized(xmltostring(root))
1055                lpegmatch(xmltextcapture,root) -- goes to toc
1056            else
1057if setfilename and p then -- and not root.cl
1058    syncfilename(p,"sprint t")
1059end
1060                xmlserialize(root,xmltexhandler)
1061            end
1062        end
1063    end
1064end
1065
1066-- local function tprint(root) -- we can move sprint inline
1067--     local tr = type(root)
1068--     if tr == "table" then
1069--         local n = #root
1070--         if n == 0 then
1071--             -- skip
1072--         else
1073--             for i=1,n do
1074--                 sprint(root[i])
1075--             end
1076--         end
1077--     elseif tr == "string" then
1078--         root = xmlunspecialized(root)
1079--         lpegmatch(xmltextcapture,root)
1080--     end
1081-- end
1082
1083local function tprint(root) -- we can move sprint inline
1084    local tr = type(root)
1085    if tr == "table" then
1086        local n = #root
1087        if n == 0 then
1088            -- skip
1089        else
1090            for i=1,n do
1091             -- sprint(root[i]) -- inlined because of filename:
1092                local ri = root[i]
1093                local tr = type(ri)
1094                if tr == "string" then -- can also be result of lpath
1095                    if setfilename then
1096                        syncfilename(ri,"tprint")
1097                    end
1098                    root = xmlunspecialized(ri)
1099                    lpegmatch(xmltextcapture,ri)
1100                elseif tr == "table" then
1101                    if forceraw then
1102                        rawroot = ri
1103                        root = xmldespecialized(xmltostring(ri))
1104                        lpegmatch(xmltextcapture,ri) -- goes to toc
1105                    else
1106                        xmlserialize(ri,xmltexhandler)
1107                    end
1108                end
1109            end
1110        end
1111    elseif tr == "string" then
1112        root = xmlunspecialized(root)
1113        lpegmatch(xmltextcapture,root)
1114    end
1115end
1116
1117local function cprint(root) -- content
1118    if not root then
1119     -- rawroot = false
1120        -- quit
1121    elseif type(root) == 'string' then
1122     -- rawroot = false
1123        root = xmlunspecialized(root)
1124        lpegmatch(xmltextcapture,root)
1125    else
1126        if setfilename then
1127            syncfilename(root,"cprint")
1128        end
1129        local rootdt = root.dt
1130        if forceraw then
1131            rawroot = root
1132         -- contextsprint(ctxcatcodes,xmltostring(rootdt or root))
1133            root = xmlunspecialized(xmltostring(root))
1134            lpegmatch(xmltextcapture,root) -- goes to toc
1135        else
1136            xmlserialize(rootdt or root,xmltexhandler)
1137        end
1138    end
1139end
1140
1141xml.sprint = sprint local xmlsprint = sprint  -- calls ct mathml   -> will be replaced
1142xml.tprint = tprint local xmltprint = tprint  -- only used here
1143xml.cprint = cprint local xmlcprint = cprint  -- calls ct  mathml  -> will be replaced
1144
1145-- now we can flush
1146
1147function lxml.main(id)
1148    local root = getid(id)
1149    xmlserialize(root,xmltexhandler) -- the real root (@rt@)
1150end
1151
1152-- -- lines (untested)
1153--
1154-- local buffer = { }
1155--
1156-- local xmllinescapture = (
1157--     newline^2 / function()  buffer[#buffer+1] = "" end +
1158--     newline   / function()  buffer[#buffer] = buffer[#buffer] .. " " end +
1159--     content   / function(s) buffer[#buffer] = buffer[#buffer] ..  s  end
1160-- )^0
1161--
1162-- local xmllineshandler = table.copy(xmltexhandler)
1163--
1164-- xmllineshandler.handle = function(...) lpegmatch(xmllinescapture,concat{ ... }) end
1165--
1166-- function lines(root)
1167--     if not root then
1168--      -- rawroot = false
1169--      -- quit
1170--     elseif type(root) == 'string' then
1171--      -- rawroot = false
1172--         lpegmatch(xmllinescapture,root)
1173--     elseif next(root) then -- tr == 'table'
1174--         xmlserialize(root,xmllineshandler)
1175--     end
1176-- end
1177--
1178-- function xml.lines(root) -- used at all?
1179--     buffer = { "" }
1180--     lines(root)
1181--     return result
1182-- end
1183
1184local function to_text(e)
1185    if e.command == nil then
1186        local etg = e.tg
1187        if etg and e.special and etg ~= "@rt@" then
1188            e.command = false -- i.e. skip
1189        else
1190            e.command = true  -- i.e. no <self></self>
1191        end
1192    end
1193end
1194
1195local function to_none(e)
1196    if e.command == nil then
1197        e.command = false -- i.e. skip
1198    end
1199end
1200
1201-- setups
1202
1203local setups = { }
1204
1205function lxml.setcommandtotext(id)
1206    xmlwithelements(getid(id),to_text)
1207end
1208
1209function lxml.setcommandtonone(id)
1210    xmlwithelements(getid(id),to_none)
1211end
1212
1213function lxml.installsetup(what,document,setup,where)
1214    document = document or "*"
1215    local sd = setups[document]
1216    if not sd then sd = { } setups[document] = sd end
1217    for k=1,#sd do
1218        if sd[k] == setup then sd[k] = nil break end
1219    end
1220    if what == 1 then
1221        if trace_loading then
1222            report_lxml("prepending setup %a for %a",setup,document)
1223        end
1224        insert(sd,1,setup)
1225    elseif what == 2 then
1226        if trace_loading then
1227            report_lxml("appending setup %a for %a",setup,document)
1228        end
1229        insert(sd,setup)
1230    elseif what == 3 then
1231        if trace_loading then
1232            report_lxml("inserting setup %a for %a before %a",setup,document,where)
1233        end
1234        insertbeforevalue(sd,setup,where)
1235    elseif what == 4 then
1236        if trace_loading then
1237            report_lxml("inserting setup %a for %a after %a",setup,document,where)
1238        end
1239        insertaftervalue(sd,setup,where)
1240    end
1241end
1242
1243function lxml.flushsetups(id,...)
1244    local done = { }
1245    for i=1,select("#",...) do
1246        local document = select(i,...)
1247        local sd = setups[document]
1248        if sd then
1249            for k=1,#sd do
1250                local v = sd[k]
1251                if not done[v] then
1252                    if trace_loading then
1253                        report_lxml("applying setup %02i : %a to %a",k,v,document)
1254                    end
1255                    contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}")
1256                    done[v] = true
1257                end
1258            end
1259        elseif trace_loading then
1260            report_lxml("no setups for %a",document)
1261        end
1262    end
1263end
1264
1265function lxml.resetsetups(document)
1266    if trace_loading then
1267        report_lxml("resetting all setups for %a",document)
1268    end
1269    setups[document] = { }
1270end
1271
1272function lxml.removesetup(document,setup)
1273    local s = setups[document]
1274    if s then
1275        for i=1,#s do
1276            if s[i] == setup then
1277                if trace_loading then
1278                    report_lxml("removing setup %a for %a",setup,document)
1279                end
1280                remove(t,i)
1281                break
1282            end
1283        end
1284    end
1285end
1286
1287function lxml.setsetup(id,pattern,setup)
1288    if not setup or setup == "" or setup == "*" or setup == "-" or setup == "+" then
1289        local collected = xmlapplylpath(getid(id),pattern)
1290        if collected then
1291            local nc = #collected
1292            if nc > 0 then
1293                if trace_setups then
1294                    for c=1,nc do
1295                        local e = collected[c]
1296                        local ix = e.ix or 0
1297                        if setup == "-" then
1298                            e.command = false
1299                            report_lxml("lpath matched (a) %5i: %s = %s -> skipped",c,ix,setup)
1300                        elseif setup == "+" then
1301                            e.command = true
1302                            report_lxml("lpath matched (b) %5i: %s = %s -> text",c,ix,setup)
1303                        else
1304                            local tg = e.tg
1305                            if tg then -- to be sure
1306                                e.command = tg
1307                                local ns = e.rn or e.ns
1308                                if ns == "" then
1309                                    report_lxml("lpath matched (c) %5i: %s = %s -> %s",c,ix,tg,tg)
1310                                else
1311                                    report_lxml("lpath matched (d) %5i: %s = %s:%s -> %s",c,ix,ns,tg,tg)
1312                                end
1313                            end
1314                        end
1315                    end
1316                elseif setup == "-" then
1317                    for c=1,nc do
1318                        collected[c].command = false
1319                    end
1320                elseif setup == "+" then
1321                    for c=1,nc do
1322                        collected[c].command = true
1323                    end
1324                else
1325                    for c=1,nc do
1326                        local e = collected[c]
1327                        e.command = e.tg
1328                    end
1329                end
1330            elseif trace_setups then
1331                report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1332            end
1333        elseif trace_setups then
1334            report_lxml("%s lpath matches for pattern: %s","no",pattern)
1335        end
1336    else
1337        local a, b = match(setup,"^(.+:)([%*%-%+])$")
1338        if a and b then
1339            local collected = xmlapplylpath(getid(id),pattern)
1340            if collected then
1341                local nc = #collected
1342                if nc > 0 then
1343                    if trace_setups then
1344                        for c=1,nc do
1345                            local e = collected[c]
1346                            local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
1347                            if b == "-" then
1348                                e.command = false
1349                                if ns == "" then
1350                                    report_lxml("lpath matched (e) %5i: %s = %s -> skipped",c,ix,tg)
1351                                else
1352                                    report_lxml("lpath matched (f) %5i: %s = %s:%s -> skipped",c,ix,ns,tg)
1353                                end
1354                            elseif b == "+" then
1355                                e.command = true
1356                                if ns == "" then
1357                                    report_lxml("lpath matched (g) %5i: %s = %s -> text",c,ix,tg)
1358                                else
1359                                    report_lxml("lpath matched (h) %5i: %s = %s:%s -> text",c,ix,ns,tg)
1360                                end
1361                            else
1362                                e.command = a .. tg
1363                                if ns == "" then
1364                                    report_lxml("lpath matched (i) %5i: %s = %s -> %s",c,ix,tg,e.command)
1365                                else
1366                                    report_lxml("lpath matched (j) %5i: %s = %s:%s -> %s",c,ix,ns,tg,e.command)
1367                                end
1368                            end
1369                        end
1370                    elseif b == "-" then
1371                        for c=1,nc do
1372                            collected[c].command = false
1373                        end
1374                    elseif b == "+" then
1375                        for c=1,nc do
1376                            collected[c].command = true
1377                        end
1378                    else
1379                        for c=1,nc do
1380                            local e = collected[c]
1381                            e.command = a .. e.tg
1382                        end
1383                    end
1384                elseif trace_setups then
1385                    report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1386                end
1387            elseif trace_setups then
1388                report_lxml("%s lpath matches for pattern: %s","no",pattern)
1389            end
1390        else
1391            local collected = xmlapplylpath(getid(id),pattern)
1392            if collected then
1393                local nc = #collected
1394                if nc > 0 then
1395                    if trace_setups then
1396                        for c=1,nc do
1397                            local e = collected[c]
1398                            e.command = setup
1399                            local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
1400                            if ns == "" then
1401                                report_lxml("lpath matched (k) %5i: %s = %s -> %s",c,ix,tg,setup)
1402                            else
1403                                report_lxml("lpath matched (l) %5i: %s = %s:%s -> %s",c,ix,ns,tg,setup)
1404                            end
1405                        end
1406                    else
1407                        for c=1,nc do
1408                            collected[c].command = setup
1409                        end
1410                    end
1411                elseif trace_setups then
1412                    report_lxml("%s lpath matches for pattern: %s","zero",pattern)
1413                end
1414            elseif trace_setups then
1415                report_lxml("%s lpath matches for pattern: %s","no",pattern)
1416            end
1417        end
1418    end
1419end
1420
1421-- finalizers
1422
1423local function first(collected)
1424    if collected and #collected > 0 then
1425        xmlsprint(collected[1])
1426    end
1427end
1428
1429local function last(collected)
1430    if collected then
1431        local nc = #collected
1432        if nc > 0 then
1433            xmlsprint(collected[nc])
1434        end
1435    end
1436end
1437
1438local function all(collected)
1439    if collected then
1440        local nc = #collected
1441        if nc > 0 then
1442            for c=1,nc do
1443                xmlsprint(collected[c])
1444            end
1445        end
1446    end
1447end
1448
1449texfinalizers.reverse = function(collected)
1450    if collected then
1451        local nc = #collected
1452        if nc >0 then
1453            for c=nc,1,-1 do
1454                xmlsprint(collected[c])
1455            end
1456        end
1457    end
1458end
1459
1460local function count(collected)
1461    contextsprint(ctxcatcodes,(collected and #collected) or 0) -- why ctxcatcodes
1462end
1463
1464local function position(collected,n)
1465    -- todo: if not n then == match
1466    if collected then
1467        local nc = #collected
1468        if nc > 0 then
1469            n = tonumber(n) or 0
1470            if n < 0 then
1471                n = nc + n + 1
1472            end
1473            if n > 0 then
1474                local cn = collected[n]
1475                if cn then
1476                    xmlsprint(cn)
1477                    return
1478                end
1479            end
1480        end
1481    end
1482end
1483
1484local function match(collected) -- is match in preceding collected, never change, see bibxml
1485    local m = collected and collected[1]
1486    contextsprint(ctxcatcodes,m and m.mi or 0) -- why ctxcatcodes
1487end
1488
1489local function index(collected,n)
1490    if collected then
1491        local nc = #collected
1492        if nc > 0 then
1493            n = tonumber(n) or 0
1494            if n < 0 then
1495                n = nc + n + 1 -- brrr
1496            end
1497            if n > 0 then
1498                local cn = collected[n]
1499                if cn then
1500                    contextsprint(ctxcatcodes,cn.ni or 0) -- why ctxcatcodes
1501                    return
1502                end
1503            end
1504        end
1505    end
1506    contextsprint(ctxcatcodes,0) -- why ctxcatcodes
1507end
1508
1509-- the number of commands is often relative small but there can be many calls
1510-- to this finalizer
1511
1512local command
1513
1514if tokenizedxmlw then
1515
1516    command = function(collected,cmd,otherwise)
1517        local n = collected and #collected
1518        if n and n > 0 then
1519            local wildcard = find(cmd,"*",1,true)
1520            for c=1,n do -- maybe optimize for n=1
1521                local e = collected[c]
1522                local ix = e.ix
1523                local name = e.name
1524                if name and not ix then
1525                    addindex(name,false,true)
1526                    ix = e.ix
1527                end
1528                if not ix or not name then
1529                    report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1530                elseif wildcard then
1531                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
1532                else
1533                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",name,"::",ix,"}")
1534                end
1535            end
1536        elseif otherwise then
1537            contextsprint(ctxcatcodes,tokenizedxmlw,"{",otherwise,"}{#1}")
1538        end
1539    end
1540
1541else
1542
1543    command = function(collected,cmd,otherwise)
1544        local n = collected and #collected
1545        if n and n > 0 then
1546            local wildcard = find(cmd,"*",1,true)
1547            for c=1,n do -- maybe optimize for n=1
1548                local e = collected[c]
1549                local ix = e.ix
1550                local name = e.name
1551                if name and not ix then
1552                    addindex(name,false,true)
1553                    ix = e.ix
1554                end
1555                if not ix or not name then
1556                    report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1557                elseif wildcard then
1558                    contextsprint(ctxcatcodes,"\\xmlw{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
1559                else
1560                    contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
1561                end
1562            end
1563        elseif otherwise then
1564            contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
1565        end
1566    end
1567
1568end
1569
1570-- local wildcards = setmetatableindex(function(t,k)
1571--     local v = false
1572--     if find(k,"*",1,true) then
1573--         v = setmetatableindex(function(t,kk)
1574--             local v = gsub(k,"%*",kk)
1575--             t[k] = v
1576--          -- report_lxml("wildcard %a key %a value %a",kk,k,v)
1577--             return v
1578--         end)
1579--     end
1580--     t[k] = v
1581--     return v
1582-- end)
1583--
1584-- local function command(collected,cmd,otherwise)
1585--     local n = collected and #collected
1586--     if n and n > 0 then
1587--         local wildcard = wildcards[cmd]
1588--         for c=1,n do -- maybe optimize for n=1
1589--             local e = collected[c]
1590--             local ix = e.ix
1591--             local name = e.name
1592--             if name and not ix then
1593--                 addindex(name,false,true)
1594--                 ix = e.ix
1595--             end
1596--             if not ix or not name then
1597--                 report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
1598--             elseif wildcard then
1599--                 contextsprint(ctxcatcodes,"\\xmlw{",wildcard[e.tg],"}{",name,"::",ix,"}")
1600--             else
1601--                 contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
1602--             end
1603--         end
1604--     elseif otherwise then
1605--         contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
1606--     end
1607-- end
1608
1609local function attribute(collected,a,default)
1610    if collected and #collected > 0 then
1611        local at = collected[1].at
1612        local str = (at and at[a]) or default
1613        if str and str ~= "" then
1614            contextsprint(notcatcodes,str)
1615        end
1616    elseif default then
1617        contextsprint(notcatcodes,default)
1618    end
1619end
1620
1621local function parameter(collected,p,default)
1622    if collected and #collected > 0 then
1623        local pa = collected[1].pa
1624        local str = (pa and pa[p]) or default
1625        if str and str ~= "" then
1626            contextsprint(notcatcodes,str)
1627        end
1628    elseif default then
1629        contextsprint(notcatcodes,default)
1630    end
1631end
1632
1633local function chainattribute(collected,arguments,default) -- todo: optional levels
1634    if collected and #collected > 0 then
1635        local e = collected[1]
1636        while e do
1637            local at = e.at
1638            if at then
1639                local a = at[arguments]
1640                if a then
1641                    contextsprint(notcatcodes,a)
1642                    return
1643                end
1644            else
1645                break -- error
1646            end
1647            e = e.__p__
1648        end
1649    end
1650    if default then
1651        contextsprint(notcatcodes,default)
1652    end
1653end
1654
1655local function chainpath(collected,nonamespace)
1656    if collected and #collected > 0 then
1657        local e = collected[1]
1658        local t = { }
1659        while e do
1660            local tg = e.tg
1661            local rt = e.__p__
1662            local ns = e.ns
1663            if tg == "@rt@" then
1664                break
1665            elseif rt.tg == "@rt@" then
1666                if nonamespace or not ns or ns == "" then
1667                    t[#t+1] = tg
1668                else
1669                    t[#t+1] = ns .. ":" .. tg
1670                end
1671            else
1672                if nonamespace or not ns or ns == "" then
1673                    t[#t+1] = tg .. "[" .. e.ei .. "]"
1674                else
1675                    t[#t+1] = ns .. ":" .. tg .. "[" .. e.ei .. "]"
1676                end
1677            end
1678            e = rt
1679        end
1680        contextsprint(notcatcodes,concat(reversed(t),"/"))
1681    end
1682end
1683
1684local function text(collected)
1685    if collected then
1686        local nc = #collected
1687        if nc == 0 then
1688            -- nothing
1689        elseif nc == 1 then -- hardly any gain so this will go
1690            cprint(collected[1])
1691        else for c=1,nc do
1692            cprint(collected[c])
1693        end end
1694    end
1695end
1696
1697local function ctxtext(collected)
1698    if collected then
1699        local nc = #collected
1700        if nc > 0 then
1701            for c=1,nc do
1702                contextsprint(ctxcatcodes,collected[c].dt)
1703            end
1704        end
1705    end
1706end
1707
1708texfinalizers.stripped = function(collected) -- tricky as we strip in place
1709    if collected then
1710        local nc = #collected
1711        if nc > 0 then
1712            for c=1,nc do
1713                cprint(xmlstripelement(collected[c]))
1714            end
1715        end
1716    end
1717end
1718
1719texfinalizers.collapsed = function(collected)
1720    if collected and #collected > 0 then
1721        local s = xmltext(collected[1])
1722        if s ~= "" then
1723            sprint(collapse(s))
1724        end
1725    end
1726end
1727
1728texfinalizers.lower = function(collected)
1729    if not collected then
1730        local nc = #collected
1731        if nc > 0 then
1732            for c=1,nc do
1733                contextsprint(ctxcatcodes,lowerchars(collected[c].dt[1]))
1734            end
1735        end
1736    end
1737end
1738
1739texfinalizers.upper = function(collected)
1740    if collected then
1741        local nc = #collected
1742        if nc > 0 then
1743            for c=1,nc do
1744                contextsprint(ctxcatcodes,upperchars(collected[c].dt[1]))
1745            end
1746        end
1747    end
1748end
1749
1750local function number(collected)
1751    local nc = collected and #collected or 0
1752    local n = 0
1753    if nc > 0 then
1754        for c=1,nc do
1755            n = n + tonumber(collected[c].dt[1] or 0)
1756        end
1757    end
1758    contextsprint(ctxcatcodes,n)
1759end
1760
1761local function concatrange(collected,start,stop,separator,lastseparator,textonly) -- test this on mml
1762    if collected then
1763        local nofcollected = #collected
1764        if nofcollected > 0 then
1765            local separator = separator or ""
1766            local lastseparator = lastseparator or separator or ""
1767            start, stop = (start == "" and 1) or tonumber(start) or 1, (stop == "" and nofcollected) or tonumber(stop) or nofcollected
1768            if stop < 0 then stop = nofcollected + stop end -- -1 == last-1
1769            for i=start,stop do
1770                if textonly then
1771                    xmlcprint(collected[i])
1772                else
1773                    xmlsprint(collected[i])
1774                end
1775                if i == nofcollected then
1776                    -- nothing
1777                elseif i == nofcollected-1 and lastseparator ~= "" then
1778                    contextsprint(ctxcatcodes,lastseparator)
1779                elseif separator ~= "" then
1780                    contextsprint(ctxcatcodes,separator)
1781                end
1782            end
1783        end
1784    end
1785end
1786
1787local function concatlist(collected,separator,lastseparator,textonly) -- test this on mml
1788    concatrange(collected,false,false,separator,lastseparator,textonly)
1789end
1790
1791local function depth(collected)
1792    local d = 0
1793    if collected then
1794        local c = collected and collected[1]
1795        if c.tg then
1796            while c do
1797                d = d + 1
1798                c = c.__p__
1799                if not c then
1800                    break
1801                end
1802            end
1803        end
1804    end
1805    contextsprint(ctxcatcodes,d)
1806end
1807
1808-- todo just move up as not used local
1809
1810texfinalizers.first          = first
1811texfinalizers.last           = last
1812texfinalizers.all            = all
1813texfinalizers.count          = count
1814texfinalizers.command        = command
1815texfinalizers.attribute      = attribute
1816texfinalizers.param          = parameter            -- obsolete
1817texfinalizers.parameter      = parameter
1818texfinalizers.text           = text
1819texfinalizers.ctxtext        = ctxtext
1820texfinalizers.context        = ctxtext
1821texfinalizers.position       = position
1822texfinalizers.match          = match
1823texfinalizers.index          = index
1824texfinalizers.concat         = concatlist
1825texfinalizers.concatrange    = concatrange         -- used below
1826texfinalizers.chainattribute = chainattribute
1827texfinalizers.chainpath      = chainpath
1828texfinalizers.default        = all -- !!
1829texfinalizers.depth          = depth               -- used below
1830
1831--
1832
1833function texfinalizers.tag(collected,n)
1834    if collected then
1835        local nc = #collected
1836        if nc > 0 then
1837            n = tonumber(n) or 0
1838            local c
1839            if n == 0 then
1840                c = collected[1]
1841            elseif n > 1 then
1842                c = collected[n]
1843            else
1844                c = collected[nc-n+1]
1845            end
1846            if c then
1847                contextsprint(ctxcatcodes,c.tg)
1848            end
1849        end
1850    end
1851end
1852
1853function texfinalizers.name(collected,n)
1854    if collected then
1855        local nc = #collected
1856        if nc > 0 then
1857            local c
1858            if n == 0 or not n then
1859                c = collected[1]
1860            elseif n > 1 then
1861                c = collected[n]
1862            else
1863                c = collected[nc-n+1]
1864            end
1865            if c then
1866                local ns = c.ns
1867                if not ns or ns == "" then
1868                    contextsprint(ctxcatcodes,c.tg)
1869                else
1870                    contextsprint(ctxcatcodes,ns,":",c.tg)
1871                end
1872            end
1873        end
1874    end
1875end
1876
1877function texfinalizers.tags(collected,nonamespace)
1878    if collected then
1879        local nc = #collected
1880        if nc > 0 then
1881            for c=1,nc do
1882                local e = collected[c]
1883                local ns = e.ns
1884                if nonamespace or (not ns or ns == "") then
1885                    contextsprint(ctxcatcodes,e.tg)
1886                else
1887                    contextsprint(ctxcatcodes,ns,":",e.tg)
1888                end
1889            end
1890        end
1891    end
1892end
1893
1894--
1895
1896local function verbatim(id,before,after)
1897    local e = getid(id)
1898    if e then
1899        if before then contextsprint(ctxcatcodes,before,"[",e.tg or "?","]") end
1900        lxml.toverbatim(xmltostring(e.dt)) -- lxml.toverbatim(xml.totext(e.dt))
1901        if after then contextsprint(ctxcatcodes,after) end
1902    end
1903end
1904
1905function lxml.inlineverbatim(id)
1906    verbatim(id,"\\startxmlinlineverbatim","\\stopxmlinlineverbatim")
1907end
1908
1909function lxml.displayverbatim(id)
1910    verbatim(id,"\\startxmldisplayverbatim","\\stopxmldisplayverbatim")
1911end
1912
1913lxml.verbatim = verbatim
1914
1915-- helpers
1916
1917function lxml.depth(id)
1918    depth { getid(id) }
1919end
1920
1921function lxml.first(id,pattern)
1922    local collected = xmlapplylpath(getid(id),pattern)
1923    if collected then
1924        first(collected)
1925    end
1926end
1927
1928function lxml.last(id,pattern)
1929    local collected = xmlapplylpath(getid(id),pattern)
1930    if collected then
1931        last(collected)
1932    end
1933end
1934
1935function lxml.all(id,pattern)
1936    local collected = xmlapplylpath(getid(id),pattern)
1937    if collected then
1938        all(collected)
1939    end
1940end
1941
1942function lxml.count(id,pattern)
1943    -- always needs to produce a result so no test here
1944    count(xmlapplylpath(getid(id),pattern))
1945end
1946
1947function lxml.attribute(id,pattern,a,default)
1948    local collected = xmlapplylpath(getid(id),pattern)
1949    if collected then
1950        attribute(collected,a,default)
1951    end
1952end
1953
1954function lxml.parameter(id,pattern,p,default)
1955    local collected = xmlapplylpath(getid(id),pattern)
1956    if collected then
1957        parameter(collected,p,default)
1958    end
1959end
1960
1961lxml.param = lxml.parameter
1962
1963function lxml.raw(id,pattern) -- the content, untouched by commands
1964    local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
1965    if collected and #collected > 0 then
1966        local s = xmltostring(collected[1].dt)
1967        if s ~= "" then
1968            contextsprint(notcatcodes,s)
1969        end
1970    end
1971end
1972
1973-- templates
1974
1975function lxml.rawtex(id,pattern) -- the content, untouched by commands
1976    local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
1977    if collected and #collected > 0 then
1978        local s = xmltostring(collected[1].dt)
1979        if s ~= "" then
1980            contextsprint(notcatcodes,lpegmatch(p_texescape,s) or s)
1981        end
1982    end
1983end
1984
1985function lxml.context(id,pattern) -- the content, untouched by commands
1986    if pattern then
1987        local collected = xmlapplylpath(getid(id),pattern)
1988        if collected and #collected > 0 then
1989            ctx_text(collected[1].dt[1])
1990        end
1991    else
1992        local collected = getid(id)
1993        if collected then
1994            local dt = collected.dt
1995            if dt and #dt > 0 then
1996                ctx_text(dt[1])
1997            end
1998        end
1999    end
2000end
2001
2002function lxml.text(id,pattern)
2003    if pattern then
2004        local collected = xmlapplylpath(getid(id),pattern)
2005        if collected and #collected > 0 then
2006            text(collected)
2007        end
2008    else
2009        local e = getid(id)
2010        if e then
2011            text(e.dt)
2012        end
2013    end
2014end
2015
2016function lxml.pure(id,pattern)
2017    if pattern then
2018        local collected = xmlapplylpath(getid(id),pattern)
2019        if collected and #collected > 0 then
2020            parsedentity = unescapedentity
2021            text(collected)
2022            parsedentity = reparsedentity
2023        end
2024    else
2025        parsedentity = unescapedentity
2026        local e = getid(id)
2027        if e then
2028            text(e.dt)
2029        end
2030        parsedentity = reparsedentity
2031    end
2032end
2033
2034lxml.content = text
2035
2036function lxml.position(id,pattern,n)
2037    position(xmlapplylpath(getid(id),pattern),tonumber(n))
2038end
2039
2040function lxml.chainattribute(id,pattern,a,default)
2041    chainattribute(xmlapplylpath(getid(id),pattern),a,default)
2042end
2043
2044function lxml.path(id,pattern,nonamespace)
2045    chainpath(xmlapplylpath(getid(id),pattern),nonamespace)
2046end
2047
2048function lxml.concatrange(id,pattern,start,stop,separator,lastseparator,textonly) -- test this on mml
2049    concatrange(xmlapplylpath(getid(id),pattern),start,stop,separator,lastseparator,textonly)
2050end
2051
2052function lxml.concat(id,pattern,separator,lastseparator,textonly)
2053    concatrange(xmlapplylpath(getid(id),pattern),false,false,separator,lastseparator,textonly)
2054end
2055
2056function lxml.element(id,n)
2057    position(xmlapplylpath(getid(id),"/*"),tonumber(n)) -- tonumber handy
2058end
2059
2060lxml.index = lxml.position
2061
2062function lxml.pos(id)
2063    local e = getid(id)
2064    contextsprint(ctxcatcodes,e and e.ni or 0)
2065end
2066
2067do
2068
2069    local att
2070
2071    function lxml.att(id,a,default)
2072        local e = getid(id)
2073        if e then
2074            local at = e.at
2075            if at then
2076                -- normally always true
2077                att = at[a]
2078                if not att then
2079                    if default and default ~= "" then
2080                        att = default
2081                        contextsprint(notcatcodes,default)
2082                    end
2083                elseif att ~= "" then
2084                    contextsprint(notcatcodes,att)
2085                else
2086                    -- explicit empty is valid
2087                end
2088            elseif default and default ~= "" then
2089                att = default
2090                contextsprint(notcatcodes,default)
2091            end
2092        elseif default and default ~= "" then
2093            att = default
2094            contextsprint(notcatcodes,default)
2095        else
2096            att = ""
2097        end
2098    end
2099
2100    function lxml.texatt(id,a,default)
2101        local e = getid(id)
2102        if e then
2103            local at = e.at
2104            if at then
2105                att = at[a]
2106                if att ~= "" then
2107--                     context(ctxcatcodes,att)
2108                    context(att)
2109                end
2110            else
2111                att = ""
2112            end
2113        else
2114            att = ""
2115        end
2116    end
2117
2118    function lxml.ifatt(id,a,value)
2119        local e = getid(id)
2120        if e then
2121            local at = e.at
2122            att = at and at[a] or ""
2123        else
2124            att = ""
2125        end
2126        return att == value
2127    end
2128
2129    function lxml.ifattempty(id,a)
2130        local e = getid(id)
2131        if e then
2132            local at = e.at
2133            att = at and at[a] or ""
2134        else
2135            att = ""
2136        end
2137        return att == ""
2138    end
2139
2140    function lxml.refatt(id,a)
2141        local e = getid(id)
2142        if e then
2143            local at = e.at
2144            if at then
2145                att = at[a]
2146                if att and att ~= "" then
2147                    att = gsub(att,"^#+","")
2148                    if att ~= "" then
2149                        contextsprint(notcatcodes,att)
2150                        return
2151                    end
2152                end
2153            end
2154        end
2155        att = ""
2156    end
2157
2158    function lxml.lastatt()
2159        contextsprint(notcatcodes,att)
2160    end
2161
2162    implement {
2163        name      = "xmldoifatt",
2164        arguments = "3 strings",
2165        public    = true,
2166        actions   = function(id,k,v)
2167            local e = getid(id)
2168            ctx_doif(e and e.at[k] == v or false)
2169        end
2170    }
2171
2172    implement {
2173        name      = "xmldoifnotatt",
2174        arguments = "3 strings",
2175        public    = true,
2176        actions   = function(id,k,v)
2177            local e = getid(id)
2178            ctx_doifnot(e and e.at[k] == v or false)
2179        end
2180    }
2181
2182    implement {
2183        name      = "xmldoifelseatt",
2184        arguments = "3 strings",
2185        public    = true,
2186        actions   = function(id,k,v)
2187            local e = getid(id)
2188            ctx_doifelse(e and e.at[k] == v or false)
2189        end
2190    }
2191
2192end
2193
2194do
2195
2196    local par
2197
2198    function lxml.par(id,p,default)
2199        local e = getid(id)
2200        if e then
2201            local pa = e.pa
2202            if pa then
2203                -- normally always true
2204                par = pa[p]
2205                if not par then
2206                    if default and default ~= "" then
2207                        par = default
2208                        contextsprint(notcatcodes,default)
2209                    end
2210                elseif par ~= "" then
2211                    contextsprint(notcatcodes,par)
2212                else
2213                    -- explicit empty is valid
2214                end
2215            elseif default and default ~= "" then
2216                par = default
2217                contextsprint(notcatcodes,default)
2218            end
2219        elseif default and default ~= "" then
2220            par = default
2221            contextsprint(notcatcodes,default)
2222        else
2223            par = ""
2224        end
2225    end
2226
2227    function lxml.lastpar()
2228        contextsprint(notcatcodes,par)
2229    end
2230
2231end
2232
2233function lxml.name(id)
2234    local e = getid(id)
2235    if e then
2236        local ns = e.rn or e.ns
2237        if ns and ns ~= "" then
2238            contextsprint(ctxcatcodes,ns,":",e.tg)
2239        else
2240            contextsprint(ctxcatcodes,e.tg)
2241        end
2242    end
2243end
2244
2245function lxml.match(id)
2246    local e = getid(id)
2247    contextsprint(ctxcatcodes,e and e.mi or 0)
2248end
2249
2250function lxml.tag(id) -- tag vs name -> also in l-xml tag->name
2251    local e = getid(id)
2252    if e then
2253        local tg = e.tg
2254        if tg and tg ~= "" then
2255            contextsprint(ctxcatcodes,tg)
2256        end
2257    end
2258end
2259
2260function lxml.namespace(id)
2261    local e = getid(id)
2262    if e then
2263        local ns = e.rn or e.ns
2264        if ns and ns ~= "" then
2265            contextsprint(ctxcatcodes,ns)
2266        end
2267    end
2268end
2269
2270function lxml.flush(id)
2271    local e = getid(id)
2272    if e then
2273        local dt = e.dt
2274        if dt then
2275            xmlsprint(dt,e)
2276        end
2277    end
2278end
2279
2280function lxml.lastmatch()
2281    local collected = xmllastmatch()
2282    if collected then
2283        all(collected)
2284    end
2285end
2286
2287lxml.pushmatch = xmlpushmatch
2288lxml.popmatch  = xmlpopmatch
2289
2290function lxml.snippet(id,i)
2291    local e = getid(id)
2292    if e then
2293        local dt = e.dt
2294        if dt then
2295            local dti = dt[tonumber(i)] -- string in lxml
2296            if dti then
2297                xmlsprint(dti,e)
2298            end
2299        end
2300    end
2301end
2302
2303function lxml.direct(id)
2304    local e = getid(id)
2305    if e then
2306        xmlsprint(e)
2307    end
2308end
2309
2310if tokenizedxmlw then
2311
2312    function lxml.command(id,pattern,cmd)
2313        local i, p = getid(id,true)
2314        local collected = xmlapplylpath(getid(i),pattern) -- again getid?
2315        if collected then
2316            local nc = #collected
2317            if nc > 0 then
2318                local rootname = p or i.name
2319                for c=1,nc do
2320                    local e = collected[c]
2321                    local ix = e.ix
2322                    if not ix then
2323                        addindex(rootname,false,true)
2324                        ix = e.ix
2325                    end
2326                    contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",rootname,"::",ix,"}")
2327                end
2328            end
2329        end
2330    end
2331
2332else
2333
2334    function lxml.command(id,pattern,cmd)
2335        local i, p = getid(id,true)
2336        local collected = xmlapplylpath(getid(i),pattern) -- again getid?
2337        if collected then
2338            local nc = #collected
2339            if nc > 0 then
2340                local rootname = p or i.name
2341                for c=1,nc do
2342                    local e = collected[c]
2343                    local ix = e.ix
2344                    if not ix then
2345                        addindex(rootname,false,true)
2346                        ix = e.ix
2347                    end
2348                    contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",rootname,"::",ix,"}")
2349                end
2350            end
2351        end
2352    end
2353
2354end
2355
2356-- loops
2357
2358function lxml.collected(id,pattern,reverse)
2359    return xmlcollected(getid(id),pattern,reverse)
2360end
2361
2362function lxml.elements(id,pattern,reverse)
2363    return xmlelements(getid(id),pattern,reverse)
2364end
2365
2366-- testers
2367
2368do
2369
2370    local found, empty = xml.found, xml.empty
2371
2372    function lxml.doif         (id,pattern) ctx_doif    (found(getid(id),pattern)) end
2373    function lxml.doifnot      (id,pattern) ctx_doifnot (found(getid(id),pattern)) end
2374    function lxml.doifelse     (id,pattern) ctx_doifelse(found(getid(id),pattern)) end
2375    function lxml.doiftext     (id,pattern) ctx_doif    (not empty(getid(id),pattern)) end
2376    function lxml.doifnottext  (id,pattern) ctx_doifnot (not empty(getid(id),pattern)) end
2377    function lxml.doifelsetext (id,pattern) ctx_doifelse(not empty(getid(id),pattern)) end
2378
2379    -- special case: "*" and "" -> self else lpath lookup
2380
2381    local function checkedempty(id,pattern)
2382        local e = getid(id)
2383        if not pattern or pattern == "" then
2384            local dt = e.dt
2385            local nt = #dt
2386            return (nt == 0) or (nt == 1 and dt[1] == "")
2387        else
2388            return empty(getid(id),pattern)
2389        end
2390    end
2391
2392    xml.checkedempty = checkedempty
2393
2394    function lxml.doifempty    (id,pattern) ctx_doif    (checkedempty(id,pattern)) end
2395    function lxml.doifnotempty (id,pattern) ctx_doifnot (checkedempty(id,pattern)) end
2396    function lxml.doifelseempty(id,pattern) ctx_doifelse(checkedempty(id,pattern)) end
2397
2398end
2399
2400-- status info
2401
2402statistics.register("xml load time", function()
2403    if noffiles > 0 or nofconverted > 0 then
2404        return format("%s seconds, %s files, %s converted", statistics.elapsedtime(xml), noffiles, nofconverted)
2405    else
2406        return nil
2407    end
2408end)
2409
2410statistics.register("lxml preparation time", function()
2411    if noffiles > 0 or nofconverted > 0 then
2412        local calls  = xml.lpathcalls()
2413        local cached = xml.lpathcached()
2414        if calls > 0 or cached > 0 then
2415            return format("%s seconds, %s nodes, %s lpath calls, %s cached calls",
2416                statistics.elapsedtime(lxml), nofindices, calls, cached)
2417        else
2418            return nil
2419        end
2420    else
2421        -- pretty close to zero so not worth mentioning
2422    end
2423end)
2424
2425statistics.register("lxml lpath profile", function()
2426    local p = xml.profiled
2427    if p and next(p) then
2428        local s = table.sortedkeys(p)
2429        local tested, matched, finalized = 0, 0, 0
2430        logs.pushtarget("logfile")
2431        logs.writer("\nbegin of lxml profile\n")
2432        logs.writer("\n   tested    matched  finalized    pattern\n\n")
2433        for i=1,#s do
2434            local pattern = s[i]
2435            local pp = p[pattern]
2436            local t, m, f = pp.tested, pp.matched, pp.finalized
2437            tested, matched, finalized = tested + t, matched + m, finalized + f
2438            logs.writer(format("%9i  %9i  %9i    %s",t,m,f,pattern))
2439        end
2440        logs.writer("\nend of lxml profile\n")
2441        logs.poptarget()
2442        return format("%s patterns, %s tested, %s matched, %s finalized (see log for details)",#s,tested,matched,finalized)
2443    else
2444        return nil
2445    end
2446end)
2447
2448-- misc
2449
2450function lxml.nonspace(id,pattern) -- slow, todo loop
2451    xmltprint(xmlcollect(getid(id),pattern,true))
2452end
2453
2454function lxml.strip(id,pattern,nolines,anywhere,everywhere)
2455    xml.strip(getid(id),pattern,nolines,anywhere,everywhere)
2456end
2457
2458function lxml.stripped(id,pattern,nolines)
2459    local root = getid(id)
2460    local str = xmltext(root,pattern) or ""
2461    str = gsub(str,"^%s*(.-)%s*$","%1")
2462    if nolines then
2463        str = gsub(str,"%s+"," ")
2464    end
2465    xmlsprint(str,root)
2466end
2467
2468function lxml.delete(id,pattern)
2469    xml.delete(getid(id),pattern)
2470end
2471
2472lxml.obsolete = { }
2473
2474lxml.get_id = getid   lxml.obsolete.get_id = getid
2475
2476-- goodies:
2477
2478function texfinalizers.lettered(collected)
2479    if collected then
2480        local nc = #collected
2481        if nc > 0 then
2482            for c=1,nc do
2483                contextsprint(ctxcatcodes,lettered(collected[c].dt[1]))
2484            end
2485        end
2486    end
2487end
2488
2489-- function texfinalizers.apply(collected,what) -- to be tested
2490--     if collected then
2491--         for c=1,#collected do
2492--             contextsprint(ctxcatcodes,what(collected[c].dt[1]))
2493--         end
2494--     end
2495-- end
2496
2497function lxml.toparameters(id)
2498    local e = getid(id)
2499    if e then
2500        local a = e.at
2501        if a and next(a) then
2502            local setups, s = { }, 0
2503            for k, v in next, a do
2504                s = s + 1
2505                setups[s] = k .. "=" .. v
2506            end
2507            setups = concat(setups,",")
2508            -- tracing
2509            context(setups)
2510        end
2511    end
2512end
2513
2514local template = '<?xml version="1.0" ?>\n\n<!-- %s -->\n\n%s'
2515
2516function lxml.tofile(id,pattern,filename,comment)
2517    local collected = xmlapplylpath(getid(id),pattern)
2518    if collected then
2519        io.savedata(filename,format(template,comment or "exported fragment",tostring(collected[1])))
2520    else
2521        os.remove(filename) -- get rid of old content
2522    end
2523end
2524
2525texfinalizers.upperall = xmlfinalizers.upperall
2526texfinalizers.lowerall = xmlfinalizers.lowerall
2527
2528function lxml.tobuffer(id,pattern,name,unescaped,contentonly)
2529    local collected = xmlapplylpath(getid(id),pattern)
2530    if collected then
2531        local collected = collected[1]
2532        if unescaped == true then
2533            -- expanded entities !
2534            if contentonly then
2535                collected = xmlserializetotext(collected.dt)
2536            else
2537                collected = xmlcontent(collected)
2538            end
2539        elseif unescaped == false then
2540            local t = { }
2541            xmlstring(collected,function(s) t[#t+1] = s end)
2542            collected = concat(t)
2543        else
2544            collected = tostring(collected)
2545        end
2546        buffers.assign(name,collected)
2547    else
2548        buffers.erase(name)
2549    end
2550end
2551
2552-- parameters
2553
2554do
2555
2556    local function setatt(id,name,value)
2557        local e = getid(id)
2558        if e then
2559            local a = e.at
2560            if a then
2561                a[name] = value
2562            else
2563                e.at = { [name] = value }
2564            end
2565        end
2566    end
2567
2568    local function setpar(id,name,value)
2569        local e = getid(id)
2570        if e then
2571            local p = e.pa
2572            if p then
2573                p[name] = value
2574            else
2575                e.pa = { [name] = value }
2576            end
2577        end
2578    end
2579
2580    lxml.setatt = setatt
2581    lxml.setpar = setpar
2582
2583    function lxml.setattribute(id,pattern,name,value)
2584        local collected = xmlapplylpath(getid(id),pattern)
2585        if collected then
2586            for i=1,#collected do
2587                setatt(collected[i],name,value)
2588            end
2589        end
2590    end
2591
2592    function lxml.setparameter(id,pattern,name,value)
2593        local collected = xmlapplylpath(getid(id),pattern)
2594        if collected then
2595            for i=1,#collected do
2596                setpar(collected[i],name,value)
2597            end
2598        end
2599    end
2600
2601    lxml.setparam = lxml.setparameter
2602
2603end
2604
2605-- relatively new:
2606
2607do
2608
2609    local permitted        = nil
2610    local ctx_xmlinjector  = context.xmlinjector
2611
2612    xml.pihandlers["injector"] = function(category,rest,e)
2613        local options = options_to_array(rest)
2614        local action  = options[1]
2615        if not action then
2616            return
2617        end
2618        local n = #options
2619        if n > 1 then
2620            local category = options[2]
2621            if category == "*" then
2622                ctx_xmlinjector(action)
2623            elseif permitted then
2624                if n == 2 then
2625                    if permitted[category] then
2626                        ctx_xmlinjector(action)
2627                    end
2628                else
2629                    for i=2,n do
2630                        local category = options[i]
2631                        if category == "*" or permitted[category] then
2632                            ctx_xmlinjector(action)
2633                            return
2634                        end
2635                    end
2636                end
2637            end
2638        else
2639            ctx_xmlinjector(action)
2640        end
2641    end
2642
2643    local pattern = P("context-") * C((1-patterns.whitespace)^1) * C(P(1)^1)
2644
2645    function lxml.applyselectors(id)
2646        local root = getid(id)
2647        local function filter(e)
2648            local dt = e.dt
2649            if not dt then
2650                report_lxml("error in selector, no data in %a",e.tg or "?")
2651                return
2652            end
2653            local ndt  = #dt
2654            local done = false
2655            local i = 1
2656            while i <= ndt do
2657                local dti = dt[i]
2658                if type(dti) == "table" then
2659                    if dti.tg == "@pi@" then
2660                        local text = dti.dt[1]
2661                        local what, rest = lpegmatch(pattern,text)
2662                        if what == "select" then
2663                            local categories = options_to_hash(rest)
2664                            if categories["begin"] then
2665                                local okay = false
2666                                if permitted then
2667                                    for k, v in next, permitted do
2668                                        if categories[k] then
2669                                            okay = k
2670                                            break
2671                                        end
2672                                    end
2673                                end
2674                                if okay then
2675                                    if trace_selectors then
2676                                        report_lxml("accepting selector: %s",okay)
2677                                    end
2678                                else
2679                                    categories.begin = false
2680                                    if trace_selectors then
2681                                        report_lxml("rejecting selector: % t",sortedkeys(categories))
2682                                    end
2683                                end
2684                                for j=i,ndt do
2685                                    local dtj = dt[j]
2686                                    if type(dtj) == "table" then
2687                                        local tg = dtj.tg
2688                                        if tg == "@pi@" then
2689                                            local text = dtj.dt[1]
2690                                            local what, rest = lpegmatch(pattern,text)
2691                                            if what == "select" then
2692                                                local categories = options_to_hash(rest)
2693                                                if categories["end"] then
2694                                                    i = j
2695                                                    break
2696                                                else
2697                                                    -- error
2698                                                end
2699                                            end
2700                                        elseif not okay then
2701                                            dtj.tg = "@cm@"
2702                                        end
2703                                    else
2704    --                                     dt[j] = "" -- okay ?
2705                                    end
2706                                end
2707                            end
2708                        elseif what == "include" then
2709                            local categories = options_to_hash(rest)
2710                            if categories["begin"] then
2711                                local okay = false
2712                                if permitted then
2713                                    for k, v in next, permitted do
2714                                        if categories[k] then
2715                                            okay = k
2716                                            break
2717                                        end
2718                                    end
2719                                end
2720                                if okay then
2721                                    if trace_selectors then
2722                                        report_lxml("accepting include: %s",okay)
2723                                    end
2724                                else
2725                                    categories.begin = false
2726                                    if trace_selectors then
2727                                        report_lxml("rejecting include: % t",sortedkeys(categories))
2728                                    end
2729                                end
2730                                if okay then
2731                                    for j=i,ndt do
2732                                        local dtj = dt[j]
2733                                        if type(dtj) == "table" then
2734                                            local tg = dtj.tg
2735                                            if tg == "@cm@" then
2736                                                local content = dtj.dt[1]
2737                                                local element = root and xml.toelement(content,root)
2738                                                dt[j] = element
2739                                                element.__p__ = dt -- needs checking
2740                                                done = true
2741                                            elseif tg == "@pi@" then
2742                                                local text = dtj.dt[1]
2743                                                local what, rest = lpegmatch(pattern,text)
2744                                                if what == "include" then
2745                                                    local categories = options_to_hash(rest)
2746                                                    if categories["end"] then
2747                                                        i = j
2748                                                        break
2749                                                    else
2750                                                        -- error
2751                                                    end
2752                                                end
2753                                            end
2754                                        end
2755                                    end
2756                                end
2757                            end
2758                        elseif dti then
2759                            filter(dti)
2760                        end
2761                    end
2762                    if done then
2763                        -- probably not needed
2764                        xml.reindex(dt)
2765                    end
2766                end
2767                i = i + 1
2768            end
2769        end
2770        xmlwithelements(root,filter)
2771    end
2772
2773    function xml.setinjectors(set)
2774        local s = settings_to_set(set)
2775        if permitted then
2776            for k, v in next, s do
2777                permitted[k] = true
2778            end
2779        else
2780            permitted = s
2781        end
2782    end
2783
2784    function xml.resetinjectors(set)
2785        if permitted and set and set ~= "" then
2786            local s = settings_to_set(set)
2787            for k, v in next, s do
2788                if v then
2789                    permitted[k] = nil
2790                end
2791            end
2792        else
2793            permitted = nil
2794        end
2795    end
2796
2797end
2798
2799-- hm, maybe to ini to, these implements
2800
2801implement {
2802    name      = "xmlsetinjectors",
2803    actions   = xml.setinjectors,
2804    arguments = "string"
2805}
2806
2807implement {
2808    name      = "xmlresetinjectors",
2809    actions   = xml.resetinjectors,
2810    arguments = "string"
2811}
2812
2813implement {
2814    name      = "xmlapplyselectors",
2815    actions   = lxml.applyselectors,
2816    arguments = "string"
2817}
2818
2819-- bonus: see x-lmx-html.mkiv
2820
2821function texfinalizers.xml(collected,name,setup)
2822    local root = collected[1]
2823    if not root then
2824        return
2825    end
2826    if not name or name == "" then
2827        report_lxml("missing name in xml finalizer")
2828        return
2829    end
2830    xmlrename(root,name)
2831    name = "lmx:" .. name
2832    buffers.assign(name,strip(xmltostring(root)))
2833    context.xmlprocessbuffer(name,name,setup or (name..":setup"))
2834end
2835
2836-- experiment
2837
2838do
2839
2840    local xmltoelement = xml.toelement
2841    local xmlreindex   = xml.reindex
2842
2843    function lxml.replace(root,pattern,whatever)
2844        if type(root) == "string" then
2845            root = lxml.getid(root)
2846        end
2847        local collected = xmlapplylpath(root,pattern)
2848        if collected then
2849            local isstring = type(whatever) == "string"
2850            for c=1,#collected do
2851                local e = collected[c]
2852                local p = e.__p__
2853                if p then
2854                    local d = p.dt
2855                    local n = e.ni
2856                    local w = isstring and whatever or whatever(e)
2857                    if w then
2858                        local t = xmltoelement(w,root).dt
2859                        if t then
2860                            t.__p__ = p
2861                            if type(t) == "table" then
2862                                local t1 = t[1]
2863                                d[n] = t1
2864                                t1.at.type = e.at.type or t1.at.type
2865                                for i=2,#t do
2866                                    n = n + 1
2867                                    insert(d,n,t[i])
2868                                end
2869                            else
2870                                d[n] = t
2871                            end
2872                            xmlreindex(d) -- probably not needed
2873                        end
2874                    end
2875                end
2876            end
2877        end
2878    end
2879
2880    -- function document.mess_around(root)
2881    --     lxml.replace(
2882    --         root,
2883    --         "p[@variant='foo']",
2884    --         function(c)
2885    --             return (string.gsub(tostring(c),"foo","<bar>%1</bar>"))
2886    --         end
2887    --     )
2888    -- end
2889
2890end
2891
2892do
2893
2894    local lpegmatch = lpegmatch
2895    local unescaper = lpeg.patterns.urlunescaper
2896
2897    function xmlfinalizers.url(e,a)
2898        local u = #e > 0 and e[1].at[a]
2899        return u and lpegmatch(unescaper,u)
2900    end
2901
2902    if CONTEXTLMTXMODE > 0 then
2903
2904        function texfinalizers.url(e,a)
2905            local u = #e > 0 and e[1].at[a]
2906            if u then
2907                contextsprint(tex.hshcatcodes,string.texhashed(lpegmatch(unescaper,u)))
2908            end
2909        end
2910
2911    else
2912
2913        function texfinalizers.url(e,a)
2914            local u = #e > 0 and e[1].at[a]
2915            if u then
2916             -- context.verbatim(lpegmatch(unescaper,u)) -- no hash intercept here, verbatim is new per 23-09-06
2917                context(lpegmatch(unescaper,u))
2918            end
2919        end
2920
2921    end
2922
2923end
2924
2925if CONTEXTLMTXMODE > 0 then
2926
2927    local setmacro = tokens.setters.macro
2928
2929    xmlfinalizers.tomacro = function(collected,macroname,index)
2930        if macroname and macroname ~= '' then
2931            if index == 'last' then
2932                index = #collected
2933            elseif index == 'first' then
2934                index = 1
2935            else
2936                index = tonumber(index) or 1
2937            end
2938            setmacro(tex.nilcatcodes,macroname,collapse(xmltext(collected[index])))
2939        end
2940    end
2941
2942end
2943