lxml-xml.lua /size: 10 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['lxml-xml'] = {
2    version   = 1.001,
3    comment   = "this module is the basis for the lxml-* ones",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local tonumber, next = tonumber, next
10local concat = table.concat
11local find, lower, upper = string.find, string.lower, string.upper
12
13local xml = xml
14
15local finalizers     = xml.finalizers.xml
16local xmlfilter      = xml.filter -- we could inline this one for speed
17local xmltostring    = xml.tostring
18local xmlserialize   = xml.serialize
19local xmlcollected   = xml.collected
20local xmlnewhandlers = xml.newhandlers
21
22local reparsedentity  = xml.reparsedentitylpeg   -- \Ux{...}
23local unescapedentity = xml.unescapedentitylpeg
24local parsedentity    = reparsedentity
25
26local function first(collected) -- wrong ?
27    return collected and collected[1]
28end
29
30local function last(collected)
31    return collected and collected[#collected]
32end
33
34local function all(collected)
35    return collected
36end
37
38-- local function reverse(collected)
39--     if collected then
40--         local nc = #collected
41--         if nc > 0 then
42--             local reversed, r = { }, 0
43--             for c=nc,1,-1 do
44--                 r = r + 1
45--                 reversed[r] = collected[c]
46--             end
47--             return reversed
48--         else
49--             return collected
50--         end
51--     end
52-- end
53
54local reverse = table.reversed
55
56local function attribute(collected,name)
57    if collected and #collected > 0 then
58        local at = collected[1].at
59        return at and at[name]
60    end
61end
62
63local function att(id,name)
64    local at = id.at
65    return at and at[name]
66end
67
68local function count(collected)
69    return collected and #collected or 0
70end
71
72local function position(collected,n)
73    if not collected then
74        return 0
75    end
76    local nc = #collected
77    if nc == 0 then
78        return 0
79    end
80    n = tonumber(n) or 0
81    if n < 0 then
82        return collected[nc + n + 1]
83    elseif n > 0 then
84        return collected[n]
85    else
86        return collected[1].mi or 0
87    end
88end
89
90local function match(collected)
91    return collected and #collected > 0 and collected[1].mi or 0 -- match
92end
93
94local function index(collected)
95    return collected and #collected > 0 and collected[1].ni or 0 -- 0 is new
96end
97
98local function attributes(collected,arguments)
99    if collected and #collected > 0 then
100        local at = collected[1].at
101        if arguments then
102            return at[arguments]
103        elseif next(at) then
104            return at -- all of them
105        end
106    end
107end
108
109local function chainattribute(collected,arguments) -- todo: optional levels
110    if collected and #collected > 0 then
111        local e = collected[1]
112        while e do
113            local at = e.at
114            if at then
115                local a = at[arguments]
116                if a then
117                    return a
118                end
119            else
120                break -- error
121            end
122            e = e.__p__
123        end
124    end
125    return ""
126end
127
128local function raw(collected) -- hybrid (not much different from text so it might go)
129    if collected and #collected > 0 then
130        local e = collected[1] or collected
131        return e and xmltostring(e) or "" -- only first as we cannot concat function
132    else
133        return ""
134    end
135end
136
137--
138
139local xmltexthandler = xmlnewhandlers {
140    name       = "string",
141    initialize = function()
142        result = { }
143        return result
144    end,
145    finalize   = function()
146        return concat(result)
147    end,
148    handle     = function(...)
149        result[#result+1] = concat { ... }
150    end,
151    escape     = false,
152}
153
154local function xmltotext(root)
155    local dt = root.dt
156    if not dt then
157        return ""
158    end
159    local nt = #dt -- string or table
160    if nt == 0 then
161        return ""
162    elseif nt == 1 and type(dt[1]) == "string" then
163        return dt[1] -- no escaping of " ' < > &
164    else
165        return xmlserialize(root,xmltexthandler) or ""
166    end
167end
168
169function xml.serializetotext(root)
170    return root and xmlserialize(root,xmltexthandler) or ""
171end
172
173--
174
175local function text(collected) -- hybrid
176    if collected then -- no # test here !
177        local e = collected[1] or collected -- why fallback to element, how about cdata
178        return e and xmltotext(e) or ""
179    else
180        return ""
181    end
182end
183
184local function texts(collected)
185    if not collected then
186        return { } -- why no nil
187    end
188    local nc = #collected
189    if nc == 0 then
190        return { } -- why no nil
191    end
192    local t, n = { }, 0
193    for c=1,nc do
194        local e = collected[c]
195        if e and e.dt then
196            n = n + 1
197            t[n] = e.dt
198        end
199    end
200    return t
201end
202
203local function tag(collected,n)
204    if not collected then
205        return
206    end
207    local nc = #collected
208    if nc == 0 then
209        return
210    end
211    local c
212    if n == 0 or not n then
213        c = collected[1]
214    elseif n > 1 then
215        c = collected[n]
216    else
217        c = collected[nc-n+1]
218    end
219    return c and c.tg
220end
221
222local function name(collected,n)
223    if not collected then
224        return
225    end
226    local nc = #collected
227    if nc == 0 then
228        return
229    end
230    local c
231    if n == 0 or not n then
232        c = collected[1]
233    elseif n > 1 then
234        c = collected[n]
235    else
236        c = collected[nc-n+1]
237    end
238    if not c then
239        -- sorry
240    elseif c.ns == "" then
241        return c.tg
242    else
243        return c.ns .. ":" .. c.tg
244    end
245end
246
247local function tags(collected,nonamespace)
248    if not collected then
249        return
250    end
251    local nc = #collected
252    if nc == 0 then
253        return
254    end
255    local t, n = { }, 0
256    for c=1,nc do
257        local e = collected[c]
258        local ns, tg = e.ns, e.tg
259        n = n + 1
260        if nonamespace or ns == "" then
261            t[n] = tg
262        else
263            t[n] = ns .. ":" .. tg
264        end
265    end
266    return t
267end
268
269local function empty(collected,spacesonly)
270    if not collected then
271        return true
272    end
273    local nc = #collected
274    if nc == 0 then
275        return true
276    end
277    for c=1,nc do
278        local e = collected[c]
279        if e then
280            local edt = e.dt
281            if edt then
282                local n = #edt
283                if n == 1 then
284                    local edk = edt[1]
285                    local typ = type(edk)
286                    if typ == "table" then
287                        return false
288                    elseif edk ~= "" then
289                        return false
290                    elseif spacesonly and not find(edk,"%S") then
291                        return false
292                    end
293                elseif n > 1 then
294                    return false
295                end
296            end
297        end
298    end
299    return true
300end
301
302finalizers.first          = first
303finalizers.last           = last
304finalizers.all            = all
305finalizers.reverse        = reverse
306finalizers.elements       = all
307finalizers.default        = all
308finalizers.attribute      = attribute
309finalizers.att            = att
310finalizers.count          = count
311finalizers.position       = position
312finalizers.match          = match
313finalizers.index          = index
314finalizers.attributes     = attributes
315finalizers.chainattribute = chainattribute
316finalizers.text           = text
317finalizers.texts          = texts
318finalizers.tag            = tag
319finalizers.name           = name
320finalizers.tags           = tags
321finalizers.empty          = empty
322
323-- shortcuts -- we could support xmlfilter(id,pattern,first)
324
325function xml.first(id,pattern)
326    return first(xmlfilter(id,pattern))
327end
328
329function xml.last(id,pattern)
330    return last(xmlfilter(id,pattern))
331end
332
333function xml.count(id,pattern)
334    return count(xmlfilter(id,pattern))
335end
336
337function xml.attribute(id,pattern,a,default)
338    return attribute(xmlfilter(id,pattern),a,default)
339end
340
341function xml.raw(id,pattern)
342    if pattern then
343        return raw(xmlfilter(id,pattern))
344    else
345        return raw(id)
346    end
347end
348
349function xml.text(id,pattern) -- brrr either content or element (when cdata)
350    if pattern then
351     -- return text(xmlfilter(id,pattern))
352        local collected = xmlfilter(id,pattern)
353        return collected and #collected > 0 and xmltotext(collected[1]) or ""
354    elseif id then
355     -- return text(id)
356        return xmltotext(id) or ""
357    else
358        return ""
359    end
360end
361
362function xml.pure(id,pattern)
363    if pattern then
364        local collected = xmlfilter(id,pattern)
365        if collected and #collected > 0 then
366            parsedentity = unescapedentity
367            local s = collected and #collected > 0 and xmltotext(collected[1]) or ""
368            parsedentity = reparsedentity
369            return s
370        else
371            return ""
372        end
373    else
374        parsedentity = unescapedentity
375        local s = xmltotext(id) or ""
376        parsedentity = reparsedentity
377        return s
378    end
379end
380
381xml.content = text
382
383--
384
385function xml.position(id,pattern,n) -- element
386    return position(xmlfilter(id,pattern),n)
387end
388
389function xml.match(id,pattern) -- number
390    return match(xmlfilter(id,pattern))
391end
392
393function xml.empty(id,pattern,spacesonly)
394    return empty(xmlfilter(id,pattern),spacesonly)
395end
396
397xml.all    = xml.filter
398xml.index  = xml.position
399xml.found  = xml.filter
400
401-- a nice one:
402
403local function totable(x)
404    local t = { }
405    for e in xmlcollected(x[1] or x,"/*") do
406        t[e.tg] = xmltostring(e.dt) or ""
407    end
408    return next(t) and t or nil
409end
410
411xml.table        = totable
412finalizers.table = totable
413
414local function textonly(e,t)
415    if e then
416        local edt = e.dt
417        if edt then
418            for i=1,#edt do
419                local e = edt[i]
420                if type(e) == "table" then
421                    textonly(e,t)
422                else
423                    t[#t+1] = e
424                end
425            end
426        end
427    end
428    return t
429end
430
431function xml.textonly(e) -- no pattern
432    return concat(textonly(e,{}))
433end
434
435--
436
437-- local x = xml.convert("<x><a x='+'>1<B>2</B>3</a></x>")
438-- xml.filter(x,"**/lowerall()") print(x)
439-- xml.filter(x,"**/upperall()") print(x)
440
441function finalizers.lowerall(collected)
442    for c=1,#collected do
443        local e = collected[c]
444        if not e.special then
445            e.tg = lower(e.tg)
446            local eat = e.at
447            if eat then
448                local t = { }
449                for k,v in next, eat do
450                    t[lower(k)] = v
451                end
452                e.at = t
453            end
454        end
455    end
456end
457
458function finalizers.upperall(collected)
459    for c=1,#collected do
460        local e = collected[c]
461        if not e.special then
462            e.tg = upper(e.tg)
463            local eat = e.at
464            if eat then
465                local t = { }
466                for k,v in next, eat do
467                    t[upper(k)] = v
468                end
469                e.at = t
470            end
471        end
472    end
473end
474