lxml-xml.lua /size: 10 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lxml-xml
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
this module is the basis for the lxml-* ones
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
tonumber
,
next
=
tonumber
,
next
10
local
concat
=
table
.
concat
11
local
find
,
lower
,
upper
=
string
.
find
,
string
.
lower
,
string
.
upper
12 13
local
xml
=
xml
14 15
local
finalizers
=
xml
.
finalizers
.
xml
16
local
xmlfilter
=
xml
.
filter
-- we could inline this one for speed
17
local
xmltostring
=
xml
.
tostring
18
local
xmlserialize
=
xml
.
serialize
19
local
xmlcollected
=
xml
.
collected
20
local
xmlnewhandlers
=
xml
.
newhandlers
21 22
local
reparsedentity
=
xml
.
reparsedentitylpeg
-- \Ux{...}
23
local
unescapedentity
=
xml
.
unescapedentitylpeg
24
local
parsedentity
=
reparsedentity
25 26
local
function
first
(
collected
)
-- wrong ?
27
return
collected
and
collected
[
1
]
28
end
29 30
local
function
last
(
collected
)
31
return
collected
and
collected
[
#
collected
]
32
end
33 34
local
function
all
(
collected
)
35
return
collected
36
end
37 38
-- local function reverse(collected)
39
-- if collected then
40
-- local nc = #collected
41
-- if nc > 0 then
42
-- local reversed, r = { }, 0
43
-- for c=nc,1,-1 do
44
-- r = r + 1
45
-- reversed[r] = collected[c]
46
-- end
47
-- return reversed
48
-- else
49
-- return collected
50
-- end
51
-- end
52
-- end
53 54
local
reverse
=
table
.
reversed
55 56
local
function
attribute
(
collected
,
name
)
57
if
collected
and
#
collected
>
0
then
58
local
at
=
collected
[
1
]
.
at
59
return
at
and
at
[
name
]
60
end
61
end
62 63
local
function
att
(
id
,
name
)
64
local
at
=
id
.
at
65
return
at
and
at
[
name
]
66
end
67 68
local
function
count
(
collected
)
69
return
collected
and
#
collected
or
0
70
end
71 72
local
function
position
(
collected
,
n
)
73
if
not
collected
then
74
return
0
75
end
76
local
nc
=
#
collected
77
if
nc
=
=
0
then
78
return
0
79
end
80
n
=
tonumber
(
n
)
or
0
81
if
n
<
0
then
82
return
collected
[
nc
+
n
+
1
]
83
elseif
n
>
0
then
84
return
collected
[
n
]
85
else
86
return
collected
[
1
]
.
mi
or
0
87
end
88
end
89 90
local
function
match
(
collected
)
91
return
collected
and
#
collected
>
0
and
collected
[
1
]
.
mi
or
0
-- match
92
end
93 94
local
function
index
(
collected
)
95
return
collected
and
#
collected
>
0
and
collected
[
1
]
.
ni
or
0
-- 0 is new
96
end
97 98
local
function
attributes
(
collected
,
arguments
)
99
if
collected
and
#
collected
>
0
then
100
local
at
=
collected
[
1
]
.
at
101
if
arguments
then
102
return
at
[
arguments
]
103
elseif
next
(
at
)
then
104
return
at
-- all of them
105
end
106
end
107
end
108 109
local
function
chainattribute
(
collected
,
arguments
)
-- todo: optional levels
110
if
collected
and
#
collected
>
0
then
111
local
e
=
collected
[
1
]
112
while
e
do
113
local
at
=
e
.
at
114
if
at
then
115
local
a
=
at
[
arguments
]
116
if
a
then
117
return
a
118
end
119
else
120
break
-- error
121
end
122
e
=
e
.
__p__
123
end
124
end
125
return
"
"
126
end
127 128
local
function
raw
(
collected
)
-- hybrid (not much different from text so it might go)
129
if
collected
and
#
collected
>
0
then
130
local
e
=
collected
[
1
]
or
collected
131
return
e
and
xmltostring
(
e
)
or
"
"
-- only first as we cannot concat function
132
else
133
return
"
"
134
end
135
end
136 137
--
138 139
local
xmltexthandler
=
xmlnewhandlers
{
140
name
=
"
string
"
,
141
initialize
=
function
(
)
142
result
=
{
}
143
return
result
144
end
,
145
finalize
=
function
(
)
146
return
concat
(
result
)
147
end
,
148
handle
=
function
(
...
)
149
result
[
#
result
+
1
]
=
concat
{
...
}
150
end
,
151
escape
=
false
,
152
}
153 154
local
function
xmltotext
(
root
)
155
local
dt
=
root
.
dt
156
if
not
dt
then
157
return
"
"
158
end
159
local
nt
=
#
dt
-- string or table
160
if
nt
=
=
0
then
161
return
"
"
162
elseif
nt
=
=
1
and
type
(
dt
[
1
]
)
=
=
"
string
"
then
163
return
dt
[
1
]
-- no escaping of " ' < > &
164
else
165
return
xmlserialize
(
root
,
xmltexthandler
)
or
"
"
166
end
167
end
168 169
function
xml
.
serializetotext
(
root
)
170
return
root
and
xmlserialize
(
root
,
xmltexthandler
)
or
"
"
171
end
172 173
--
174 175
local
function
text
(
collected
)
-- hybrid
176
if
collected
then
-- no # test here !
177
local
e
=
collected
[
1
]
or
collected
-- why fallback to element, how about cdata
178
return
e
and
xmltotext
(
e
)
or
"
"
179
else
180
return
"
"
181
end
182
end
183 184
local
function
texts
(
collected
)
185
if
not
collected
then
186
return
{
}
-- why no nil
187
end
188
local
nc
=
#
collected
189
if
nc
=
=
0
then
190
return
{
}
-- why no nil
191
end
192
local
t
,
n
=
{
}
,
0
193
for
c
=
1
,
nc
do
194
local
e
=
collected
[
c
]
195
if
e
and
e
.
dt
then
196
n
=
n
+
1
197
t
[
n
]
=
e
.
dt
198
end
199
end
200
return
t
201
end
202 203
local
function
tag
(
collected
,
n
)
204
if
not
collected
then
205
return
206
end
207
local
nc
=
#
collected
208
if
nc
=
=
0
then
209
return
210
end
211
local
c
212
if
n
=
=
0
or
not
n
then
213
c
=
collected
[
1
]
214
elseif
n
>
1
then
215
c
=
collected
[
n
]
216
else
217
c
=
collected
[
nc
-
n
+
1
]
218
end
219
return
c
and
c
.
tg
220
end
221 222
local
function
name
(
collected
,
n
)
223
if
not
collected
then
224
return
225
end
226
local
nc
=
#
collected
227
if
nc
=
=
0
then
228
return
229
end
230
local
c
231
if
n
=
=
0
or
not
n
then
232
c
=
collected
[
1
]
233
elseif
n
>
1
then
234
c
=
collected
[
n
]
235
else
236
c
=
collected
[
nc
-
n
+
1
]
237
end
238
if
not
c
then
239
-- sorry
240
elseif
c
.
ns
=
=
"
"
then
241
return
c
.
tg
242
else
243
return
c
.
ns
.
.
"
:
"
.
.
c
.
tg
244
end
245
end
246 247
local
function
tags
(
collected
,
nonamespace
)
248
if
not
collected
then
249
return
250
end
251
local
nc
=
#
collected
252
if
nc
=
=
0
then
253
return
254
end
255
local
t
,
n
=
{
}
,
0
256
for
c
=
1
,
nc
do
257
local
e
=
collected
[
c
]
258
local
ns
,
tg
=
e
.
ns
,
e
.
tg
259
n
=
n
+
1
260
if
nonamespace
or
ns
=
=
"
"
then
261
t
[
n
]
=
tg
262
else
263
t
[
n
]
=
ns
.
.
"
:
"
.
.
tg
264
end
265
end
266
return
t
267
end
268 269
local
function
empty
(
collected
,
spacesonly
)
270
if
not
collected
then
271
return
true
272
end
273
local
nc
=
#
collected
274
if
nc
=
=
0
then
275
return
true
276
end
277
for
c
=
1
,
nc
do
278
local
e
=
collected
[
c
]
279
if
e
then
280
local
edt
=
e
.
dt
281
if
edt
then
282
local
n
=
#
edt
283
if
n
=
=
1
then
284
local
edk
=
edt
[
1
]
285
local
typ
=
type
(
edk
)
286
if
typ
=
=
"
table
"
then
287
return
false
288
elseif
edk
~
=
"
"
then
289
return
false
290
elseif
spacesonly
and
not
find
(
edk
,
"
%S
"
)
then
291
return
false
292
end
293
elseif
n
>
1
then
294
return
false
295
end
296
end
297
end
298
end
299
return
true
300
end
301 302
finalizers
.
first
=
first
303
finalizers
.
last
=
last
304
finalizers
.
all
=
all
305
finalizers
.
reverse
=
reverse
306
finalizers
.
elements
=
all
307
finalizers
.
default
=
all
308
finalizers
.
attribute
=
attribute
309
finalizers
.
att
=
att
310
finalizers
.
count
=
count
311
finalizers
.
position
=
position
312
finalizers
.
match
=
match
313
finalizers
.
index
=
index
314
finalizers
.
attributes
=
attributes
315
finalizers
.
chainattribute
=
chainattribute
316
finalizers
.
text
=
text
317
finalizers
.
texts
=
texts
318
finalizers
.
tag
=
tag
319
finalizers
.
name
=
name
320
finalizers
.
tags
=
tags
321
finalizers
.
empty
=
empty
322 323
-- shortcuts -- we could support xmlfilter(id,pattern,first)
324 325
function
xml
.
first
(
id
,
pattern
)
326
return
first
(
xmlfilter
(
id
,
pattern
)
)
327
end
328 329
function
xml
.
last
(
id
,
pattern
)
330
return
last
(
xmlfilter
(
id
,
pattern
)
)
331
end
332 333
function
xml
.
count
(
id
,
pattern
)
334
return
count
(
xmlfilter
(
id
,
pattern
)
)
335
end
336 337
function
xml
.
attribute
(
id
,
pattern
,
a
,
default
)
338
return
attribute
(
xmlfilter
(
id
,
pattern
)
,
a
,
default
)
339
end
340 341
function
xml
.
raw
(
id
,
pattern
)
342
if
pattern
then
343
return
raw
(
xmlfilter
(
id
,
pattern
)
)
344
else
345
return
raw
(
id
)
346
end
347
end
348 349
function
xml
.
text
(
id
,
pattern
)
-- brrr either content or element (when cdata)
350
if
pattern
then
351
-- return text(xmlfilter(id,pattern))
352
local
collected
=
xmlfilter
(
id
,
pattern
)
353
return
collected
and
#
collected
>
0
and
xmltotext
(
collected
[
1
]
)
or
"
"
354
elseif
id
then
355
-- return text(id)
356
return
xmltotext
(
id
)
or
"
"
357
else
358
return
"
"
359
end
360
end
361 362
function
xml
.
pure
(
id
,
pattern
)
363
if
pattern
then
364
local
collected
=
xmlfilter
(
id
,
pattern
)
365
if
collected
and
#
collected
>
0
then
366
parsedentity
=
unescapedentity
367
local
s
=
collected
and
#
collected
>
0
and
xmltotext
(
collected
[
1
]
)
or
"
"
368
parsedentity
=
reparsedentity
369
return
s
370
else
371
return
"
"
372
end
373
else
374
parsedentity
=
unescapedentity
375
local
s
=
xmltotext
(
id
)
or
"
"
376
parsedentity
=
reparsedentity
377
return
s
378
end
379
end
380 381
xml
.
content
=
text
382 383
--
384 385
function
xml
.
position
(
id
,
pattern
,
n
)
-- element
386
return
position
(
xmlfilter
(
id
,
pattern
)
,
n
)
387
end
388 389
function
xml
.
match
(
id
,
pattern
)
-- number
390
return
match
(
xmlfilter
(
id
,
pattern
)
)
391
end
392 393
function
xml
.
empty
(
id
,
pattern
,
spacesonly
)
394
return
empty
(
xmlfilter
(
id
,
pattern
)
,
spacesonly
)
395
end
396 397
xml
.
all
=
xml
.
filter
398
xml
.
index
=
xml
.
position
399
xml
.
found
=
xml
.
filter
400 401
-- a nice one:
402 403
local
function
totable
(
x
)
404
local
t
=
{
}
405
for
e
in
xmlcollected
(
x
[
1
]
or
x
,
"
/*
"
)
do
406
t
[
e
.
tg
]
=
xmltostring
(
e
.
dt
)
or
"
"
407
end
408
return
next
(
t
)
and
t
or
nil
409
end
410 411
xml
.
table
=
totable
412
finalizers
.
table
=
totable
413 414
local
function
textonly
(
e
,
t
)
415
if
e
then
416
local
edt
=
e
.
dt
417
if
edt
then
418
for
i
=
1
,
#
edt
do
419
local
e
=
edt
[
i
]
420
if
type
(
e
)
=
=
"
table
"
then
421
textonly
(
e
,
t
)
422
else
423
t
[
#
t
+
1
]
=
e
424
end
425
end
426
end
427
end
428
return
t
429
end
430 431
function
xml
.
textonly
(
e
)
-- no pattern
432
return
concat
(
textonly
(
e
,
{
}
)
)
433
end
434 435
--
436 437
-- local x = xml.convert("<x><a x='+'>1<B>2</B>3</a></x>")
438
-- xml.filter(x,"**/lowerall()") print(x)
439
-- xml.filter(x,"**/upperall()") print(x)
440 441
function
finalizers
.
lowerall
(
collected
)
442
for
c
=
1
,
#
collected
do
443
local
e
=
collected
[
c
]
444
if
not
e
.
special
then
445
e
.
tg
=
lower
(
e
.
tg
)
446
local
eat
=
e
.
at
447
if
eat
then
448
local
t
=
{
}
449
for
k
,
v
in
next
,
eat
do
450
t
[
lower
(
k
)
]
=
v
451
end
452
e
.
at
=
t
453
end
454
end
455
end
456
end
457 458
function
finalizers
.
upperall
(
collected
)
459
for
c
=
1
,
#
collected
do
460
local
e
=
collected
[
c
]
461
if
not
e
.
special
then
462
e
.
tg
=
upper
(
e
.
tg
)
463
local
eat
=
e
.
at
464
if
eat
then
465
local
t
=
{
}
466
for
k
,
v
in
next
,
eat
do
467
t
[
upper
(
k
)
]
=
v
468
end
469
e
.
at
=
t
470
end
471
end
472
end
473
end
474