lxml-aux.lua /size: 32 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['lxml-aux'] = {
2    version   = 1.001,
3    comment   = "this module is the basis for the lxml-* ones",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- not all functions here make sense anymore vbut we keep them for
10-- compatibility reasons
11
12local trace_manipulations = false  trackers.register("lxml.manipulations", function(v) trace_manipulations = v end)
13local trace_inclusions    = false  trackers.register("lxml.inclusions",    function(v) trace_inclusions    = v end)
14
15local report_xml = logs.reporter("xml")
16
17local xml = xml
18
19local xmlcopy, xmlname = xml.copy, xml.name
20local xmlinheritedconvert = xml.inheritedconvert
21local xmlapplylpath = xml.applylpath
22
23local type, next, setmetatable, getmetatable = type, next, setmetatable, getmetatable
24local insert, remove, fastcopy, concat = table.insert, table.remove, table.fastcopy, table.concat
25local gmatch, gsub, format, find, strip, match = string.gmatch, string.gsub, string.format, string.find, string.strip, string.match
26local utfbyte = utf.byte
27local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
28local striplinepatterns = utilities.strings.striplinepatterns
29
30local function report(what,pattern,c,e)
31    report_xml("%s element %a, root %a, position %a, index %a, pattern %a",what,xmlname(e),xmlname(e.__p__),c,e.ni,pattern)
32end
33
34local function withelements(e,handle,depth)
35    if e and handle then
36        local edt = e.dt
37        if edt then
38            depth = depth or 0
39            for i=1,#edt do
40                local e = edt[i]
41                if type(e) == "table" then
42                    handle(e,depth)
43                    withelements(e,handle,depth+1)
44                end
45            end
46        end
47    end
48end
49
50xml.withelements = withelements
51
52function xml.withelement(e,n,handle) -- slow
53    if e and n ~= 0 and handle then
54        local edt = e.dt
55        if edt then
56            if n > 0 then
57                for i=1,#edt do
58                    local ei = edt[i]
59                    if type(ei) == "table" then
60                        if n == 1 then
61                            handle(ei)
62                            return
63                        else
64                            n = n - 1
65                        end
66                    end
67                end
68            elseif n < 0 then
69                for i=#edt,1,-1 do
70                    local ei = edt[i]
71                    if type(ei) == "table" then
72                        if n == -1 then
73                            handle(ei)
74                            return
75                        else
76                            n = n + 1
77                        end
78                    end
79                end
80            end
81        end
82    end
83end
84
85function xml.each(root,pattern,handle,reverse)
86    local collected = xmlapplylpath(root,pattern)
87    if collected then
88        if handle then
89            if reverse then
90                for c=#collected,1,-1 do
91                    handle(collected[c])
92                end
93            else
94                for c=1,#collected do
95                    handle(collected[c])
96                end
97            end
98        end
99        return collected
100    end
101end
102
103function xml.processattributes(root,pattern,handle)
104    local collected = xmlapplylpath(root,pattern)
105    if collected and handle then
106        for c=1,#collected do
107            handle(collected[c].at)
108        end
109    end
110    return collected
111end
112
113--[[ldx--
114<p>The following functions collect elements and texts.</p>
115--ldx]]--
116
117-- are these still needed -> lxml-cmp.lua
118
119function xml.collect(root, pattern)
120    return xmlapplylpath(root,pattern)
121end
122
123function xml.collecttexts(root, pattern, flatten) -- todo: variant with handle
124    local collected = xmlapplylpath(root,pattern)
125    if collected and flatten then
126        local xmltostring = xml.tostring
127        for c=1,#collected do
128            collected[c] = xmltostring(collected[c].dt)
129        end
130    end
131    return collected or { }
132end
133
134function xml.collect_tags(root, pattern, nonamespace)
135    local collected = xmlapplylpath(root,pattern)
136    if collected then
137        local t = { }
138        local n = 0
139        for c=1,#collected do
140            local e  = collected[c]
141            local ns = e.ns
142            local tg = e.tg
143            n = n + 1
144            if nonamespace then
145                t[n] = tg
146            elseif ns == "" then
147                t[n] = tg
148            else
149                t[n] = ns .. ":" .. tg
150            end
151        end
152        return t
153    end
154end
155
156--[[ldx--
157<p>We've now arrived at the functions that manipulate the tree.</p>
158--ldx]]--
159
160local no_root = { no_root = true }
161
162local function redo_ni(d)
163    for k=1,#d do
164        local dk = d[k]
165        if type(dk) == "table" then
166            dk.ni = k
167        end
168    end
169end
170
171xml.reindex = redo_ni
172
173local function xmltoelement(whatever,root)
174    if not whatever then
175        return nil
176    end
177    local element
178    if type(whatever) == "string" then
179        element = xmlinheritedconvert(whatever,root,true) -- beware, not really a root
180    else
181        element = whatever -- we assume a table
182    end
183    if element.error then
184        return whatever -- string
185    end
186    if element then
187    --~ if element.ri then
188    --~     element = element.dt[element.ri].dt
189    --~ else
190    --~     element = element.dt
191    --~ end
192    end
193    return element
194end
195
196xml.toelement = xmltoelement
197
198-- local function copiedelement(element,newparent)
199--     if type(element) ~= "string" then
200--         element = xmlcopy(element).dt
201--         if newparent and type(element) == "table" then
202--             element.__p__ = newparent
203--         end
204--     end
205--     return element
206-- end
207
208local function copiedelement(element,newparent)
209    if type(element) ~= "string" then
210        element = xmlcopy(element).dt
211        if newparent and type(element) == "table" then
212            for i=1,#element do
213                local e = element[i]
214                if type(e) == "table" then
215                    e.__p__ = newparent
216                end
217            end
218        end
219    end
220    return element
221end
222
223function xml.delete(root,pattern)
224    if not pattern or pattern == "" then
225        local p = root.__p__
226        if p then
227            if trace_manipulations then
228                report('deleting',"--",c,root)
229            end
230            local d = p.dt
231            remove(d,root.ni)
232            redo_ni(d) -- can be made faster and inlined
233        end
234    else
235        local collected = xmlapplylpath(root,pattern)
236        if collected then
237            for c=1,#collected do
238                local e = collected[c]
239                local p = e.__p__
240                if p then
241                    if trace_manipulations then
242                        report('deleting',pattern,c,e)
243                    end
244                    local d  = p.dt
245                    local ni = e.ni
246                    if ni <= #d then
247                        if false then
248                            p.dt[ni] = ""
249                        else
250                            -- what if multiple deleted in one set
251                            remove(d,ni)
252                            redo_ni(d) -- can be made faster and inlined
253                        end
254                    else
255                        -- disturbing
256                    end
257                end
258            end
259        end
260    end
261end
262
263function xml.wipe(root,pattern) -- not yet in manual
264    local collected = xmlapplylpath(root,pattern)
265    if collected then
266        for c=1,#collected do
267            local e = collected[c]
268            local p = e.__p__
269            if p then
270                local d  = p.dt
271                local ni = e.ni
272                if ni <= #d then
273                    local dt = e.dt
274                    if #dt == 1 then
275                        local d1 = dt[1]
276                        if type(d1) == "string" and match(d1,"^%s*$") then
277                            if trace_manipulations then
278                                report('wiping',pattern,c,e)
279                            end
280                            remove(d,ni)
281                            redo_ni(d) -- can be made faster and inlined
282                        end
283                    end
284                end
285            end
286        end
287    end
288end
289
290function xml.replace(root,pattern,whatever)
291    local element = root and xmltoelement(whatever,root)
292    local collected = element and xmlapplylpath(root,pattern)
293    if collected then
294        for c=1,#collected do
295            local e = collected[c]
296            local p = e.__p__
297            if p then
298                if trace_manipulations then
299                    report('replacing',pattern,c,e)
300                end
301                local d = p.dt
302                local n = e.ni
303                local t = copiedelement(element,p)
304                if type(t) == "table" then
305                    d[n] = t[1]
306                    for i=2,#t do
307                        n = n + 1
308                        insert(d,n,t[i])
309                    end
310                else
311                    d[n] = t
312                end
313                redo_ni(d) -- probably not needed
314            end
315        end
316    end
317end
318
319local function wrap(e,wrapper)
320    local t = {
321        rn = e.rn,
322        tg = e.tg,
323        ns = e.ns,
324        at = e.at,
325        dt = e.dt,
326        __p__ = e,
327    }
328    setmetatable(t,getmetatable(e))
329    e.rn = wrapper.rn or e.rn or ""
330    e.tg = wrapper.tg or e.tg or ""
331    e.ns = wrapper.ns or e.ns or ""
332    e.at = fastcopy(wrapper.at)
333    e.dt = { t }
334end
335
336function xml.wrap(root,pattern,whatever)
337    if whatever then
338        local wrapper = xmltoelement(whatever,root)
339        local collected = xmlapplylpath(root,pattern)
340        if collected then
341            for c=1,#collected do
342                local e = collected[c]
343                if trace_manipulations then
344                    report('wrapping',pattern,c,e)
345                end
346                wrap(e,wrapper)
347            end
348        end
349    else
350        wrap(root,xmltoelement(pattern))
351    end
352end
353
354local function inject_element(root,pattern,whatever,prepend)
355    local element = root and xmltoelement(whatever,root)
356    local collected = element and xmlapplylpath(root,pattern)
357    local function inject_e(e)
358        local r   = e.__p__
359        local d   = r.dt
360        local k   = e.ni
361        local rri = r.ri
362        local edt = (rri and d[rri].dt) or (d and d[k] and d[k].dt)
363        if edt then
364            local be, af
365            local cp = copiedelement(element,e)
366            if prepend then
367                be, af = cp, edt
368            else
369                be, af = edt, cp
370            end
371            local bn = #be
372            for i=1,#af do
373                bn = bn + 1
374                be[bn] = af[i]
375            end
376            if rri then
377                r.dt[rri].dt = be
378            else
379                d[k].dt = be
380            end
381            redo_ni(d)
382        end
383    end
384    if not collected then
385        -- nothing
386    elseif collected.tg then
387        -- first or so
388        inject_e(collected)
389    else
390        for c=1,#collected do
391            inject_e(collected[c])
392        end
393    end
394end
395
396local function insert_element(root,pattern,whatever,before) -- todo: element als functie
397    local element = root and xmltoelement(whatever,root)
398    local collected = element and xmlapplylpath(root,pattern)
399    local function insert_e(e)
400        local r = e.__p__
401        local d = r.dt
402        local k = e.ni
403        if not before then
404            k = k + 1
405        end
406        insert(d,k,copiedelement(element,r))
407        redo_ni(d)
408    end
409    if not collected then
410        -- nothing
411    elseif collected.tg then
412        -- first or so
413        insert_e(collected)
414    else
415        for c=1,#collected do
416            insert_e(collected[c])
417        end
418    end
419end
420
421xml.insert_element  =                 insert_element
422xml.insertafter     =                 insert_element
423xml.insertbefore    = function(r,p,e) insert_element(r,p,e,true) end
424xml.injectafter     =                 inject_element
425xml.injectbefore    = function(r,p,e) inject_element(r,p,e,true) end
426
427-- loaddata can restrict loading
428
429local function include(xmldata,pattern,attribute,recursive,loaddata,level)
430 -- attribute = attribute or 'href'
431    pattern   = pattern or 'include'
432    loaddata  = loaddata or io.loaddata
433    local collected = xmlapplylpath(xmldata,pattern)
434    if collected then
435        if not level then
436            level = 1
437        end
438        for c=1,#collected do
439            local ek = collected[c]
440            local name = nil
441            local ekdt = ek.dt
442            if ekdt then
443                local ekat = ek.at
444                local ekrt = ek.__p__
445                if ekrt then
446                    local epdt = ekrt.dt
447                    if not attribute or attribute == "" then
448                        name = (type(ekdt) == "table" and ekdt[1]) or ekdt -- check, probably always tab or str
449                    end
450                    if not name then
451                        for a in gmatch(attribute or "href","([^|]+)") do
452                            name = ekat[a]
453                            if name then
454                                break
455                            end
456                        end
457                    end
458                    local data = nil
459                    if name and name ~= "" then
460                        local d, n = loaddata(name)
461                        data = d or ""
462                        name = n or name
463                        if trace_inclusions then
464                            report_xml("including %s bytes from %a at level %s by pattern %a and attribute %a (%srecursing)",#data,name,level,pattern,attribute or "",recursive and "" or "not ")
465                        end
466                    end
467                    if not data or data == "" then
468                        epdt[ek.ni] = "" -- xml.empty(d,k)
469                    elseif ekat["parse"] == "text" then
470                        -- for the moment hard coded
471                        epdt[ek.ni] = xml.escaped(data) -- d[k] = xml.escaped(data)
472                    else
473                        local settings = xmldata.settings
474                        local savedresource = settings.currentresource
475                        settings.currentresource = name
476                        local xi = xmlinheritedconvert(data,xmldata,true)
477                        if not xi then
478                            epdt[ek.ni] = "" -- xml.empty(d,k)
479                        else
480                            if recursive then
481                                include(xi,pattern,attribute,recursive,loaddata,level+1)
482                            end
483                            local child = xml.body(xi) -- xml.assign(d,k,xi)
484                            child.__p__ = ekrt
485                            child.__f__ = name -- handy for tracing
486                            child.cf = name
487                            epdt[ek.ni] = child
488                            local settings   = xmldata.settings
489                            local inclusions = settings and settings.inclusions
490                            if inclusions then
491                                inclusions[#inclusions+1] = name
492                            elseif settings then
493                                settings.inclusions = { name }
494                            else
495                                settings = { inclusions = { name } }
496                                xmldata.settings = settings
497                            end
498                            if child.er then
499                                local badinclusions = settings.badinclusions
500                                if badinclusions then
501                                    badinclusions[#badinclusions+1] = name
502                                else
503                                    settings.badinclusions = { name }
504                                end
505                            end
506                        end
507settings.currentresource = savedresource
508                    end
509                end
510            end
511        end
512    end
513end
514
515xml.include = include
516
517function xml.inclusion(e,default)
518    while e do
519        local f = e.__f__
520        if f then
521            return f
522        else
523            e = e.__p__
524        end
525    end
526    return default
527end
528
529local function getinclusions(key,e,sorted)
530    while e do
531        local settings = e.settings
532        if settings then
533            local inclusions = settings[key]
534            if inclusions then
535                inclusions = table.unique(inclusions) -- a copy
536                if sorted then
537                    table.sort(inclusions) -- so we sort the copy
538                end
539                return inclusions -- and return the copy
540            else
541                e = e.__p__
542            end
543        else
544            e = e.__p__
545        end
546    end
547end
548
549function xml.inclusions(e,sorted)
550    return getinclusions("inclusions",e,sorted)
551end
552
553function xml.badinclusions(e,sorted)
554    return getinclusions("badinclusions",e,sorted)
555end
556
557local b_collapser  = lpegpatterns.b_collapser
558local m_collapser  = lpegpatterns.m_collapser
559local e_collapser  = lpegpatterns.e_collapser
560local x_collapser  = lpegpatterns.x_collapser
561
562local b_stripper   = lpegpatterns.b_stripper
563local m_stripper   = lpegpatterns.m_stripper
564local e_stripper   = lpegpatterns.e_stripper
565local x_stripper   = lpegpatterns.x_stripper
566
567local function stripelement(e,nolines,anywhere,everything)
568    local edt = e.dt
569    if edt then
570        local n = #edt
571        if n == 0 then
572            return e -- convenient
573        elseif everything then
574            local t = { }
575            local m = 0
576            for i=1,n do
577                local str = edt[i]
578                if type(str) ~= "string" then
579                    m = m + 1
580                    t[m] = str
581                elseif str ~= "" then
582                    str = lpegmatch(x_collapser,str)
583                    if str ~= "" then
584                        m = m + 1
585                        t[m] = str
586                    end
587                end
588            end
589            e.dt = t
590        elseif anywhere then
591            local t = { }
592            local m = 0
593            for i=1,n do
594                local str = edt[i]
595                if type(str) ~= "string" then
596                    m = m + 1
597                    t[m] = str
598                elseif str ~= "" then
599                    if nolines then
600                        str = lpegmatch((i == 1 and b_collapser) or (i == m and e_collapser) or m_collapser,str)
601                    else
602                        str = lpegmatch((i == 1 and b_stripper) or (i == m and e_stripper) or m_stripper,str)
603                    end
604                    if str ~= "" then
605                        m = m + 1
606                        t[m] = str
607                    end
608                end
609            end
610            e.dt = t
611        else
612            local str = edt[1]
613            if type(str) == "string" then
614                if str ~= "" then
615                    str = lpegmatch(nolines and b_collapser or b_stripper,str)
616                end
617                if str == "" then
618                    remove(edt,1)
619                    n = n - 1
620                else
621                    edt[1] = str
622                end
623            end
624            if n > 0 then
625                str = edt[n]
626                if type(str) == "string" then
627                    if str == "" then
628                        remove(edt)
629                    else
630                        str = lpegmatch(nolines and e_collapser or e_stripper,str)
631                        if str == "" then
632                            remove(edt)
633                        else
634                            edt[n] = str
635                        end
636                    end
637                end
638            end
639        end
640    end
641    return e -- convenient
642end
643
644xml.stripelement = stripelement
645
646function xml.strip(root,pattern,nolines,anywhere,everything) -- strips all leading and trailing spacing
647    local collected = xmlapplylpath(root,pattern) -- beware, indices no longer are valid now
648    if collected then
649        for i=1,#collected do
650            stripelement(collected[i],nolines,anywhere,everything)
651        end
652    end
653--  return root
654end
655
656-- local function compactelement(e)
657--     local edt = e.dt
658--     if edt then
659--         local t = { }
660--         local m = 0
661--         for e=1,#edt do
662--             local str = edt[e]
663--             if type(str) ~= "string" then
664--                 m = m + 1
665--                 t[m] = str
666--             elseif str ~= "" and find(str,"%S") then
667--                 m = m + 1
668--                 t[m] = str
669--             end
670--         end
671--         e.dt = t
672--     end
673--     return e -- convenient
674-- end
675
676local function compactelement(e)
677    local edt = e.dt
678    if edt then
679        for e=1,#edt do
680            local str = edt[e]
681            if type(str) == "string" and not find(str,"%S") then
682                edt[e] = ""
683            end
684        end
685    end
686    return e -- convenient
687end
688
689xml.compactelement = compactelement
690
691local function renamespace(root, oldspace, newspace) -- fast variant
692    local ndt = #root.dt
693    for i=1,ndt or 0 do
694        local e = root[i]
695        if type(e) == "table" then
696            if e.ns == oldspace then
697                e.ns = newspace
698                if e.rn then
699                    e.rn = newspace
700                end
701            end
702            local edt = e.dt
703            if edt then
704                renamespace(edt, oldspace, newspace)
705            end
706        end
707    end
708end
709
710xml.renamespace = renamespace
711
712function xml.remaptag(root, pattern, newtg)
713    local collected = xmlapplylpath(root,pattern)
714    if collected then
715        for c=1,#collected do
716            collected[c].tg = newtg
717        end
718    end
719end
720
721function xml.remapnamespace(root, pattern, newns)
722    local collected = xmlapplylpath(root,pattern)
723    if collected then
724        for c=1,#collected do
725            collected[c].ns = newns
726        end
727    end
728end
729
730function xml.checknamespace(root, pattern, newns)
731    local collected = xmlapplylpath(root,pattern)
732    if collected then
733        for c=1,#collected do
734            local e = collected[c]
735            if (not e.rn or e.rn == "") and e.ns == "" then
736                e.rn = newns
737            end
738        end
739    end
740end
741
742function xml.remapname(root, pattern, newtg, newns, newrn)
743    local collected = xmlapplylpath(root,pattern)
744    if collected then
745        for c=1,#collected do
746            local e = collected[c]
747            e.tg, e.ns, e.rn = newtg, newns, newrn
748        end
749    end
750end
751
752--[[ldx--
753<p>Helper (for q2p).</p>
754--ldx]]--
755
756function xml.cdatatotext(e)
757    local dt = e.dt
758    if #dt == 1 then
759        local first = dt[1]
760        if first.tg == "@cd@" then
761            e.dt = first.dt
762        end
763    else
764        -- maybe option
765    end
766end
767
768-- local x = xml.convert("<x><a>1<b>2</b>3</a></x>")
769-- xml.texttocdata(xml.first(x,"a"))
770-- print(x) -- <x><![CDATA[1<b>2</b>3]]></x>
771
772function xml.texttocdata(e) -- could be a finalizer
773    local dt = e.dt
774    local s = xml.tostring(dt) -- no shortcut?
775    e.tg = "@cd@"
776    e.special = true
777    e.ns = ""
778    e.rn = ""
779    e.dt = { s }
780    e.at = nil
781end
782
783-- local x = xml.convert("<x><a>1<b>2</b>3</a></x>")
784-- xml.tocdata(xml.first(x,"a"))
785-- print(x) -- <x><![CDATA[<a>1<b>2</b>3</a>]]></x>
786
787function xml.elementtocdata(e) -- could be a finalizer
788    local dt = e.dt
789    local s = xml.tostring(e) -- no shortcut?
790    e.tg = "@cd@"
791    e.special = true
792    e.ns = ""
793    e.rn = ""
794    e.dt = { s }
795    e.at = nil
796end
797
798xml.builtinentities = table.tohash { "amp", "quot", "apos", "lt", "gt" } -- used often so share
799
800local entities        = characters and characters.entities or nil
801local builtinentities = xml.builtinentities
802
803function xml.addentitiesdoctype(root,option) -- we could also have a 'resolve' i.e. inline hex
804    if not entities then
805        require("char-ent")
806        entities = characters.entities
807    end
808    if entities and root and root.tg == "@rt@" and root.statistics then
809        local list = { }
810        local hexify = option == "hexadecimal"
811        for k, v in table.sortedhash(root.statistics.entities.names) do
812            if not builtinentities[k] then
813                local e = entities[k]
814                if not e then
815                    e = format("[%s]",k)
816                elseif hexify then
817                    e = format("&#%05X;",utfbyte(k))
818                end
819                list[#list+1] = format("  <!ENTITY %s %q >",k,e)
820            end
821        end
822        local dt = root.dt
823        local n = dt[1].tg == "@pi@" and 2 or 1
824        if #list > 0 then
825            insert(dt, n, { "\n" })
826            insert(dt, n, {
827               tg      = "@dt@", -- beware, doctype is unparsed
828               dt      = { format("Something [\n%s\n] ",concat(list)) },
829               ns      = "",
830               special = true,
831            })
832            insert(dt, n, { "\n\n" })
833        else
834         -- insert(dt, n, { table.serialize(root.statistics) })
835        end
836    end
837end
838
839-- local str = [==[
840-- <?xml version='1.0' standalone='yes' ?>
841-- <root>
842-- <a>test &nbsp; test &#123; test</a>
843-- <b><![CDATA[oeps]]></b>
844-- </root>
845-- ]==]
846--
847-- local x = xml.convert(str)
848-- xml.addentitiesdoctype(x,"hexadecimal")
849-- print(x)
850
851--[[ldx--
852<p>Here are a few synonyms.</p>
853--ldx]]--
854
855xml.all     = xml.each
856xml.insert  = xml.insertafter
857xml.inject  = xml.injectafter
858xml.after   = xml.insertafter
859xml.before  = xml.insertbefore
860xml.process = xml.each
861
862-- obsolete
863
864xml.obsolete   = xml.obsolete or { }
865local obsolete = xml.obsolete
866
867xml.strip_whitespace           = xml.strip                 obsolete.strip_whitespace      = xml.strip
868xml.collect_elements           = xml.collect               obsolete.collect_elements      = xml.collect
869xml.delete_element             = xml.delete                obsolete.delete_element        = xml.delete
870xml.replace_element            = xml.replace               obsolete.replace_element       = xml.replace
871xml.each_element               = xml.each                  obsolete.each_element          = xml.each
872xml.process_elements           = xml.process               obsolete.process_elements      = xml.process
873xml.insert_element_after       = xml.insertafter           obsolete.insert_element_after  = xml.insertafter
874xml.insert_element_before      = xml.insertbefore          obsolete.insert_element_before = xml.insertbefore
875xml.inject_element_after       = xml.injectafter           obsolete.inject_element_after  = xml.injectafter
876xml.inject_element_before      = xml.injectbefore          obsolete.inject_element_before = xml.injectbefore
877xml.process_attributes         = xml.processattributes     obsolete.process_attributes    = xml.processattributes
878xml.collect_texts              = xml.collecttexts          obsolete.collect_texts         = xml.collecttexts
879xml.inject_element             = xml.inject                obsolete.inject_element        = xml.inject
880xml.remap_tag                  = xml.remaptag              obsolete.remap_tag             = xml.remaptag
881xml.remap_name                 = xml.remapname             obsolete.remap_name            = xml.remapname
882xml.remap_namespace            = xml.remapnamespace        obsolete.remap_namespace       = xml.remapnamespace
883
884-- new (probably ok)
885
886function xml.cdata(e)
887    if e then
888        local dt = e.dt
889        if dt and #dt == 1 then
890            local first = dt[1]
891            return first.tg == "@cd@" and first.dt[1] or ""
892        end
893    end
894    return ""
895end
896
897function xml.finalizers.xml.cdata(collected)
898    if collected then
899        local e = collected[1]
900        if e then
901            local dt = e.dt
902            if dt and #dt == 1 then
903                local first = dt[1]
904                return first.tg == "@cd@" and first.dt[1] or ""
905            end
906        end
907    end
908    return ""
909end
910
911function xml.insertcomment(e,str,n)
912    insert(e.dt,n or 1,{
913        tg      = "@cm@",
914        ns      = "",
915        special = true,
916        at      = { },
917        dt      = { str },
918    })
919end
920
921function xml.insertcdata(e,str,n)
922    insert(e.dt,n or 1,{
923        tg      = "@cd@",
924        ns      = "",
925        special = true,
926        at      = { },
927        dt      = { str },
928    })
929end
930
931function xml.setcomment(e,str,n)
932    e.dt = { {
933        tg      = "@cm@",
934        ns      = "",
935        special = true,
936        at      = { },
937        dt      = { str },
938    } }
939end
940
941function xml.setcdata(e,str)
942    e.dt = { {
943        tg      = "@cd@",
944        ns      = "",
945        special = true,
946        at      = { },
947        dt      = { str },
948    } }
949end
950
951-- maybe helpers like this will move to an autoloader
952
953function xml.separate(x,pattern)
954    local collected = xmlapplylpath(x,pattern)
955    if collected then
956        for c=1,#collected do
957            local e = collected[c]
958            local d = e.dt
959            if d == x then
960                report_xml("warning: xml.separate changes root")
961                x = d
962            end
963            local t  = { "\n" }
964            local n  = 1
965            local i  = 1
966            local nd = #d
967            while i <= nd do
968                while i <= nd do
969                    local di = d[i]
970                    if type(di) == "string" then
971                        if di == "\n" or find(di,"^%s+$") then -- first test is speedup
972                            i = i + 1
973                        else
974                            d[i] = strip(di)
975                            break
976                        end
977                    else
978                        break
979                    end
980                end
981                if i > nd then
982                    break
983                end
984                t[n+1] = "\n"
985                t[n+2] = d[i]
986                t[n+3] = "\n"
987                n = n + 3
988                i = i + 1
989            end
990            t[n+1] = "\n"
991            setmetatable(t,getmetatable(d))
992            e.dt = t
993        end
994    end
995    return x
996end
997
998--
999
1000local helpers = xml.helpers or { }
1001xml.helpers   = helpers
1002
1003local function normal(e,action)
1004    local edt = e.dt
1005    if edt then
1006        for i=1,#edt do
1007            local str = edt[i]
1008            if type(str) == "string" and str ~= "" then
1009                edt[i] = action(str)
1010            end
1011        end
1012    end
1013end
1014
1015local function recurse(e,action)
1016    local edt = e.dt
1017    if edt then
1018        for i=1,#edt do
1019            local str = edt[i]
1020            if type(str) ~= "string" then
1021                recurse(str,action) -- ,recursive
1022            elseif str ~= "" then
1023                edt[i] = action(str)
1024            end
1025        end
1026    end
1027end
1028
1029function helpers.recursetext(collected,action,recursive)
1030    if recursive then
1031        for i=1,#collected do
1032            recurse(collected[i],action)
1033        end
1034    else
1035        for i=1,#collected do
1036           normal(collected[i],action)
1037        end
1038    end
1039end
1040
1041-- on request ... undocumented ...
1042--
1043-- _tag       : element name
1044-- _type      : node type (_element can be an option)
1045-- _namespace : only if given
1046--
1047-- [1..n]     : text or table
1048-- key        : value or attribite 'key'
1049--
1050-- local str = [[
1051-- <?xml version="1.0" ?>
1052-- <a one="1">
1053--     <!-- rubish -->
1054--   <b two="1"/>
1055--   <b two="2">
1056--     c &gt; d
1057--   </b>
1058-- </a>
1059-- ]]
1060--
1061-- inspect(xml.totable(xml.convert(str)))
1062-- inspect(xml.totable(xml.convert(str),true))
1063-- inspect(xml.totable(xml.convert(str),true,true))
1064
1065local specials = {
1066    ["@rt@"] = "root",
1067    ["@pi@"] = "instruction",
1068    ["@cm@"] = "comment",
1069    ["@dt@"] = "declaration",
1070    ["@cd@"] = "cdata",
1071}
1072
1073local function convert(x,strip,flat)
1074    local ns = x.ns
1075    local tg = x.tg
1076    local at = x.at
1077    local dt = x.dt
1078    local node = flat and {
1079        [0] = (not x.special and (ns ~= "" and ns .. ":" .. tg or tg)) or nil,
1080    } or {
1081        _namespace = ns ~= "" and ns or nil,
1082        _tag       = not x.special and tg or nil,
1083        _type      = specials[tg] or "_element",
1084    }
1085    if at then
1086        for k, v in next, at do
1087            node[k] = v
1088        end
1089    end
1090    local n = 0
1091    for i=1,#dt do
1092        local di = dt[i]
1093        if type(di) == "table" then
1094            if flat and di.special then
1095                -- ignore
1096            else
1097                di = convert(di,strip,flat)
1098                if di then
1099                    n = n + 1
1100                    node[n] = di
1101                end
1102            end
1103        elseif strip then
1104            di = lpegmatch(strip,di)
1105            if di ~= "" then
1106                n = n + 1
1107                node[n] = di
1108            end
1109        else
1110            n = n + 1
1111            node[n] = di
1112        end
1113    end
1114    if next(node) then
1115        return node
1116    end
1117end
1118
1119function xml.totable(x,strip,flat)
1120    if type(x) == "table" then
1121        if strip then
1122            strip = striplinepatterns[strip]
1123        end
1124        return convert(x,strip,flat)
1125    end
1126end
1127
1128-- namespace, name, attributes
1129-- name, attributes
1130-- name
1131
1132function xml.rename(e,namespace,name,attributes)
1133    if type(e) ~= "table" or not e.tg then
1134        return
1135    end
1136    if type(name) == "table" then
1137        attributes = name
1138        name       = namespace
1139        namespace  = ""
1140    elseif type(name) ~= "string" then
1141        attributes = { }
1142        name       = namespace
1143        namespace  = ""
1144    end
1145    if type(attributes) ~= "table" then
1146        attributes = { }
1147    end
1148    e.ns = namespace
1149    e.rn = namespace
1150    e.tg = name
1151    e.at = attributes
1152end
1153