1if not modules then modules = { } end modules ['lxml-lpt'] = {
2 version = 1.001,
3 comment = "this module is the basis for the lxml-* ones",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12local concat, remove, insert = table.concat, table.remove, table.insert
13local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
14local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
15local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
16
17local setmetatableindex = table.setmetatableindex
18local formatters = string.formatters
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39local trace_lpath = false
40local trace_lparse = false
41local trace_lprofile = false
42local report_lpath = logs.reporter("xml","lpath")
43
44if trackers then
45 trackers.register("xml.path", function(v)
46 trace_lpath = v
47 end)
48 trackers.register("xml.parse", function(v)
49 trace_lparse = v
50 end)
51 trackers.register("xml.profile", function(v)
52 trace_lpath = v
53 trace_lparse = v
54 trace_lprofile = v
55 end)
56end
57
58
59
60
61
62local xml = xml
63
64local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
65local lpathcached = 0 function xml.lpathcached() return lpathcached end
66
67xml.functions = xml.functions or { }
68local functions = xml.functions
69
70xml.expressions = xml.expressions or { }
71local expressions = xml.expressions
72
73xml.finalizers = xml.finalizers or { }
74local finalizers = xml.finalizers
75
76xml.specialhandler = xml.specialhandler or { }
77local specialhandler = xml.specialhandler
78
79lpegpatterns.xml = lpegpatterns.xml or { }
80local xmlpatterns = lpegpatterns.xml
81
82finalizers.xml = finalizers.xml or { }
83finalizers.tex = finalizers.tex or { }
84
85local function fallback (t, name)
86 local fn = finalizers[name]
87 if fn then
88 t[name] = fn
89 else
90 report_lpath("unknown sub finalizer %a",name)
91 fn = function() end
92 end
93 return fn
94end
95
96setmetatableindex(finalizers.xml, fallback)
97setmetatableindex(finalizers.tex, fallback)
98
99xml.defaultprotocol = "xml"
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132local apply_axis = { }
133
134apply_axis['root'] = function(list)
135 local collected = { }
136 for l=1,#list do
137 local ll = list[l]
138 local rt = ll
139 while ll do
140 ll = ll.__p__
141 if ll then
142 rt = ll
143 end
144 end
145 collected[l] = rt
146 end
147 return collected
148end
149
150apply_axis['self'] = function(list)
151
152
153
154
155
156 return list
157end
158
159apply_axis['child'] = function(list)
160 local collected = { }
161 local c = 0
162 for l=1,#list do
163 local ll = list[l]
164 local dt = ll.dt
165 if dt then
166 local n = #dt
167 if n == 0 then
168 ll.en = 0
169 elseif n == 1 then
170 local dk = dt[1]
171 if dk.tg then
172 c = c + 1
173 collected[c] = dk
174 dk.ni = 1
175 dk.ei = 1
176 ll.en = 1
177 end
178 else
179 local en = 0
180 for k=1,#dt do
181 local dk = dt[k]
182 if dk.tg then
183 c = c + 1
184 en = en + 1
185 collected[c] = dk
186 dk.ni = k
187 dk.ei = en
188 end
189 end
190 ll.en = en
191 end
192 end
193 end
194 return collected
195end
196
197local function collect(list,collected,c)
198 local dt = list.dt
199 if dt then
200 local n = #dt
201 if n == 0 then
202 list.en = 0
203 elseif n == 1 then
204 local dk = dt[1]
205 if dk.tg then
206 c = c + 1
207 collected[c] = dk
208 dk.ni = 1
209 dk.ei = 1
210 c = collect(dk,collected,c)
211 list.en = 1
212 else
213 list.en = 0
214 end
215 else
216 local en = 0
217 for k=1,n do
218 local dk = dt[k]
219 if dk.tg then
220 c = c + 1
221 en = en + 1
222 collected[c] = dk
223 dk.ni = k
224 dk.ei = en
225 c = collect(dk,collected,c)
226 end
227 end
228 list.en = en
229 end
230 end
231 return c
232end
233
234apply_axis['descendant'] = function(list)
235 local collected = { }
236 local c = 0
237 for l=1,#list do
238 c = collect(list[l],collected,c)
239 end
240 return collected
241end
242
243local function collect(list,collected,c)
244 local dt = list.dt
245 if dt then
246 local n = #dt
247 if n == 0 then
248 list.en = 0
249 elseif n == 1 then
250 local dk = dt[1]
251 if dk.tg then
252 c = c + 1
253 collected[c] = dk
254 dk.ni = 1
255 dk.ei = 1
256 c = collect(dk,collected,c)
257 list.en = 1
258 end
259 else
260 local en = 0
261 for k=1,#dt do
262 local dk = dt[k]
263 if dk.tg then
264 c = c + 1
265 en = en + 1
266 collected[c] = dk
267 dk.ni = k
268 dk.ei = en
269 c = collect(dk,collected,c)
270 end
271 end
272 list.en = en
273 end
274 end
275 return c
276end
277
278apply_axis['descendant-or-self'] = function(list)
279 local collected = { }
280 local c = 0
281 for l=1,#list do
282 local ll = list[l]
283 if ll.special ~= true then
284 c = c + 1
285 collected[c] = ll
286 end
287 c = collect(ll,collected,c)
288 end
289 return collected
290end
291
292apply_axis['ancestor'] = function(list)
293 local collected = { }
294 local c = 0
295 for l=1,#list do
296 local ll = list[l]
297 while ll do
298 ll = ll.__p__
299 if ll then
300 c = c + 1
301 collected[c] = ll
302 end
303 end
304 end
305 return collected
306end
307
308apply_axis['ancestor-or-self'] = function(list)
309 local collected = { }
310 local c = 0
311 for l=1,#list do
312 local ll = list[l]
313 c = c + 1
314 collected[c] = ll
315 while ll do
316 ll = ll.__p__
317 if ll then
318 c = c + 1
319 collected[c] = ll
320 end
321 end
322 end
323 return collected
324end
325
326apply_axis['parent'] = function(list)
327 local collected = { }
328 local c = 0
329 for l=1,#list do
330 local pl = list[l].__p__
331 if pl then
332 c = c + 1
333 collected[c] = pl
334 end
335 end
336 return collected
337end
338
339apply_axis['attribute'] = function(list)
340 return { }
341end
342
343apply_axis['namespace'] = function(list)
344 return { }
345end
346
347apply_axis['following'] = function(list)
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363 return { }
364end
365
366apply_axis['preceding'] = function(list)
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383 return { }
384end
385
386apply_axis['following-sibling'] = function(list)
387 local collected = { }
388 local c = 0
389 for l=1,#list do
390 local ll = list[l]
391 local p = ll.__p__
392 local d = p.dt
393 for i=ll.ni+1,#d do
394 local di = d[i]
395 if type(di) == "table" then
396 c = c + 1
397 collected[c] = di
398 end
399 end
400 end
401 return collected
402end
403
404apply_axis['preceding-sibling'] = function(list)
405 local collected = { }
406 local c = 0
407 for l=1,#list do
408 local ll = list[l]
409 local p = ll.__p__
410 local d = p.dt
411 for i=1,ll.ni-1 do
412 local di = d[i]
413 if type(di) == "table" then
414 c = c + 1
415 collected[c] = di
416 end
417 end
418 end
419 return collected
420end
421
422apply_axis['reverse-sibling'] = function(list)
423 local collected = { }
424 local c = 0
425 for l=1,#list do
426 local ll = list[l]
427 local p = ll.__p__
428 local d = p.dt
429 for i=ll.ni-1,1,-1 do
430 local di = d[i]
431 if type(di) == "table" then
432 c = c + 1
433 collected[c] = di
434 end
435 end
436 end
437 return collected
438end
439
440apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
441apply_axis['auto-descendant'] = apply_axis['descendant']
442apply_axis['auto-child'] = apply_axis['child']
443apply_axis['auto-self'] = apply_axis['self']
444apply_axis['initial-child'] = apply_axis['child']
445
446local function apply_nodes(list,directive,nodes)
447
448
449 local maxn = #nodes
450 if maxn == 3 then
451 local nns = nodes[2]
452 local ntg = nodes[3]
453 if not nns and not ntg then
454 if directive then
455 return list
456 else
457 return { }
458 end
459 else
460 local collected = { }
461 local c = 0
462 local m = 0
463 local p = nil
464 if not nns then
465 for l=1,#list do
466 local ll = list[l]
467 local ltg = ll.tg
468 if ltg then
469 if directive then
470 if ntg == ltg then
471 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
472 c = c + 1
473 collected[c] = ll
474 ll.mi = m
475 end
476 elseif ntg ~= ltg then
477 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
478 c = c + 1
479 collected[c] = ll
480 ll.mi = m
481 end
482 end
483 end
484 elseif not ntg then
485 for l=1,#list do
486 local ll = list[l]
487 local lns = ll.rn or ll.ns
488 if lns then
489 if directive then
490 if lns == nns then
491 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
492 c = c + 1
493 collected[c] = ll
494 ll.mi = m
495 end
496 elseif lns ~= nns then
497 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
498 c = c + 1
499 collected[c] = ll
500 ll.mi = m
501 end
502 end
503 end
504 else
505 for l=1,#list do
506 local ll = list[l]
507 local ltg = ll.tg
508 if ltg then
509 local lns = ll.rn or ll.ns
510 local ok = ltg == ntg and lns == nns
511 if directive then
512 if ok then
513 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
514 c = c + 1
515 collected[c] = ll
516 ll.mi = m
517 end
518 elseif not ok then
519 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
520 c = c + 1
521 collected[c] = ll
522 ll.mi = m
523 end
524 end
525 end
526 end
527 return collected
528 end
529 else
530 local collected = { }
531 local c = 0
532 local m = 0
533 local p = nil
534 for l=1,#list do
535 local ll = list[l]
536 local ltg = ll.tg
537 if ltg then
538 local lns = ll.rn or ll.ns
539 local ok = false
540 for n=1,maxn,3 do
541 local nns = nodes[n+1]
542 local ntg = nodes[n+2]
543 ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
544 if ok then
545 break
546 end
547 end
548 if directive then
549 if ok then
550 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
551 c = c + 1
552 collected[c] = ll
553 ll.mi = m
554 end
555 elseif not ok then
556 local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
557 c = c + 1
558 collected[c] = ll
559 ll.mi = m
560 end
561 end
562 end
563 return collected
564 end
565end
566
567local quit_expression = false
568
569local function apply_expression(list,expression,order)
570 local collected = { }
571 local c = 0
572 quit_expression = false
573 for l=1,#list do
574 local ll = list[l]
575 if expression(list,ll,l,order) then
576 c = c + 1
577 collected[c] = ll
578 end
579 if quit_expression then
580 break
581 end
582 end
583 return collected
584end
585
586local function apply_selector(list,specification)
587 if xml.applyselector then
588 apply_selector = xml.applyselector
589 return apply_selector(list,specification)
590 else
591 return list
592 end
593end
594
595
596
597
598local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
599
600local spaces = S(" \n\r\t\f")^0
601local lp_space = S(" \n\r\t\f")
602local lp_any = P(1)
603local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
604local lp_doequal = P("=") / "=="
605local lp_or = P("|") / " or "
606local lp_and = P("&") / " and "
607
608local builtin = {
609 text = "(ll.dt[1] or '')",
610 content = "ll.dt",
611 name = "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)",
612 tag = "ll.tg",
613 position = "l",
614 firstindex = "1",
615 firstelement = "1",
616 first = "1",
617 lastindex = "(#ll.__p__.dt or 1)",
618 lastelement = "(ll.__p__.en or 1)",
619 last = "#list",
620 list = "list",
621 self = "ll",
622 rootposition = "order",
623 order = "order",
624 element = "(ll.ei or 1)",
625 index = "(ll.ni or 1)",
626 match = "(ll.mi or 1)",
627 namespace = "ll.ns",
628 ns = "ll.ns",
629
630
631
632}
633
634local lp_builtin = lpeg.utfchartabletopattern(builtin)/builtin * ((spaces * P("(") * spaces * P(")"))/"")
635
636
637
638local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])")
639
640
641
642
643local lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0"
644local lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0+1==l))"
645local lp_fastpos = lp_fastpos_n + lp_fastpos_p
646
647local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
648
649
650
651
652
653
654local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0"
655
656local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t)
657 if expressions[t] then
658 return "expr." .. t .. "("
659 else
660 return "expr.error("
661 end
662end
663
664local lparent = P("(")
665local rparent = P(")")
666local noparent = 1 - (lparent+rparent)
667local nested = P{lparent * (noparent + V(1))^0 * rparent}
668local value = P(lparent * C((noparent + nested)^0) * rparent)
669
670local lp_child = Cc("expr.child(ll,'") * R("az","AZ") * R("az","AZ","--","__")^0 * Cc("')")
671local lp_number = S("+-") * R("09")^1
672local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
673local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
674
675local cleaner
676
677local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
678 if expressions[t] then
679 s = s and s ~= "" and lpegmatch(cleaner,s)
680 if s and s ~= "" then
681 return "expr." .. t .. "(ll," .. s ..")"
682 else
683 return "expr." .. t .. "(ll)"
684 end
685 else
686 return "expr.error(" .. t .. ")"
687 end
688end
689
690local content =
691 lp_builtin +
692 lp_attribute +
693 lp_special +
694 lp_noequal + lp_doequal +
695 lp_or + lp_and +
696 lp_reserved +
697 lp_lua_function + lp_function +
698 lp_content +
699 lp_child +
700 lp_any
701
702local converter = Cs (
703 lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0
704)
705
706cleaner = Cs ( (
707
708 lp_reserved +
709 lp_number +
710 lp_string +
7111 )^1 )
712
713local template_e = [[
714 local expr = xml.expressions
715 return function(list,ll,l,order)
716 return %s
717 end
718]]
719
720local template_f_y = [[
721 local finalizer = xml.finalizers['%s']['%s']
722 return function(collection)
723 return finalizer(collection,%s)
724 end
725]]
726
727local template_f_n = [[
728 return xml.finalizers['%s']['%s']
729]]
730
731
732
733local register_last_match = { kind = "axis", axis = "last-match" }
734local register_self = { kind = "axis", axis = "self" }
735local register_parent = { kind = "axis", axis = "parent" }
736local register_descendant = { kind = "axis", axis = "descendant" }
737local register_child = { kind = "axis", axis = "child" }
738local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" }
739local register_root = { kind = "axis", axis = "root" }
740local register_ancestor = { kind = "axis", axis = "ancestor" }
741local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" }
742local register_attribute = { kind = "axis", axis = "attribute" }
743local register_namespace = { kind = "axis", axis = "namespace" }
744local register_following = { kind = "axis", axis = "following" }
745local register_following_sibling = { kind = "axis", axis = "following-sibling" }
746local register_preceding = { kind = "axis", axis = "preceding" }
747local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" }
748local register_reverse_sibling = { kind = "axis", axis = "reverse-sibling" }
749
750local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" }
751local register_auto_descendant = { kind = "axis", axis = "auto-descendant" }
752local register_auto_self = { kind = "axis", axis = "auto-self" }
753local register_auto_child = { kind = "axis", axis = "auto-child" }
754
755local register_initial_child = { kind = "axis", axis = "initial-child" }
756
757local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
758
759local skip = { }
760
761local function errorrunner_e(str,cnv)
762 if not skip[str] then
763 report_lpath("error in expression: %s => %s",str,cnv)
764 skip[str] = cnv or str
765 end
766 return false
767end
768
769local function errorrunner_f(str,arg)
770 report_lpath("error in finalizer: %s(%s)",str,arg or "")
771 return false
772end
773
774local function register_nodes(nodetest,nodes)
775 return { kind = "nodes", nodetest = nodetest, nodes = nodes }
776end
777
778local function register_selector(specification)
779 return { kind = "selector", specification = specification }
780end
781
782local function register_expression(expression)
783 local converted = lpegmatch(converter,expression)
784 local wrapped = format(template_e,converted)
785 local runner = load(wrapped)
786
787 runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
788 return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
789end
790
791local function register_finalizer(protocol,name,arguments)
792 local runner
793 if arguments and arguments ~= "" then
794 runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
795 else
796 runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
797 end
798 runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
799 return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
800end
801
802local expression = P { "ex",
803 ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
804 sq = "'" * (1 - S("'"))^0 * "'",
805 dq = '"' * (1 - S('"'))^0 * '"',
806}
807
808local arguments = P { "ar",
809 ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
810 nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
811 sq = P("'") * (1 - P("'"))^0 * P("'"),
812 dq = P('"') * (1 - P('"'))^0 * P('"'),
813}
814
815
816
817local function register_error(str)
818 return { kind = "error", error = format("unparsed: %s",str) }
819end
820
821
822
823local special_1 = P("*") * Cc(register_auto_descendant) * Cc(register_all_nodes)
824local special_2 = P("/") * Cc(register_auto_self)
825local special_3 = P("") * Cc(register_auto_self)
826
827local no_nextcolon = P(-1) + #(1-P(":"))
828local no_nextlparent = P(-1) + #(1-P("("))
829
830local pathparser = Ct { "patterns",
831
832 patterns = spaces * V("protocol") * spaces * (
833 ( V("special") * spaces * P(-1) ) +
834 ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 )
835 ),
836
837 protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
838
839
840
841
842 step = ((V("shortcuts") + V("selector") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
843
844 axis = V("last_match")
845 + V("descendant")
846 + V("child")
847 + V("parent")
848 + V("self")
849 + V("root")
850 + V("ancestor")
851 + V("descendant_or_self")
852 + V("following_sibling")
853 + V("following")
854 + V("reverse_sibling")
855 + V("preceding_sibling")
856 + V("preceding")
857 + V("ancestor_or_self")
858 + #(1-P(-1)) * Cc(register_auto_child),
859
860 special = special_1
861 + special_2
862 + special_3,
863
864 initial = (P("/") * spaces * Cc(register_initial_child))^-1,
865
866 error = (P(1)^1) / register_error,
867
868 shortcuts_a = V("s_descendant_or_self")
869 + V("s_descendant")
870 + V("s_child")
871 + V("s_parent")
872 + V("s_self")
873 + V("s_root")
874 + V("s_ancestor")
875 + V("s_lastmatch"),
876
877 shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
878
879 s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self),
880 s_descendant = P("**") * Cc(register_descendant),
881 s_child = P("*") * no_nextcolon * Cc(register_child),
882 s_parent = P("..") * Cc(register_parent),
883 s_self = P("." ) * Cc(register_self),
884 s_root = P("^^") * Cc(register_root),
885 s_ancestor = P("^") * Cc(register_ancestor),
886 s_lastmatch = P("=") * Cc(register_last_match),
887
888
889
890 descendant = P("descendant::") * Cc(register_descendant),
891 child = P("child::") * Cc(register_child),
892 parent = P("parent::") * Cc(register_parent),
893 self = P("self::") * Cc(register_self),
894 root = P('root::') * Cc(register_root),
895 ancestor = P('ancestor::') * Cc(register_ancestor),
896 descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self),
897 ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self),
898
899
900 following = P('following::') * Cc(register_following),
901 following_sibling = P('following-sibling::') * Cc(register_following_sibling),
902 preceding = P('preceding::') * Cc(register_preceding),
903 preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling),
904 reverse_sibling = P('reverse-sibling::') * Cc(register_reverse_sibling),
905 last_match = P('last-match::') * Cc(register_last_match),
906
907 selector = P("{") * C((1-P("}"))^1) * P("}") / register_selector,
908
909 nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
910
911 expressions = expression / register_expression,
912
913 letters = R("az")^1,
914 name = (1-S("/[]()|:*!"))^1,
915 negate = P("!") * Cc(false),
916
917 nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
918 nodetest = V("negate") + Cc(true),
919 nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
920 wildnodename = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent,
921 nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
922
923 finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
924
925}
926
927xmlpatterns.pathparser = pathparser
928
929local cache = { }
930
931local function nodesettostring(set,nodetest)
932 local t = { }
933 for i=1,#set,3 do
934 local directive, ns, tg = set[i], set[i+1], set[i+2]
935 if not ns or ns == "" then ns = "*" end
936 if not tg or tg == "" then tg = "*" end
937 tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
938 t[#t+1] = (directive and tg) or format("not(%s)",tg)
939 end
940 if nodetest == false then
941 return format("not(%s)",concat(t,"|"))
942 else
943 return concat(t,"|")
944 end
945end
946
947local function tagstostring(list)
948 if #list == 0 then
949 return "no elements"
950 else
951 local t = { }
952 for i=1, #list do
953 local li = list[i]
954 local ns = li.ns
955 local tg = li.tg
956 if not ns or ns == "" then ns = "*" end
957 if not tg or tg == "" then tg = "*" end
958 t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
959 end
960 return concat(t," ")
961 end
962end
963
964xml.nodesettostring = nodesettostring
965
966local lpath
967
968local function lshow(parsed)
969 if type(parsed) == "string" then
970 parsed = lpath(parsed)
971 end
972 report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,
973 table.serialize(parsed,false))
974end
975
976xml.lshow = lshow
977
978local function add_comment(p,str)
979 local pc = p.comment
980 if not pc then
981 p.comment = { str }
982 else
983 pc[#pc+1] = str
984 end
985end
986
987lpath = function (pattern)
988 lpathcalls = lpathcalls + 1
989 if type(pattern) == "table" then
990 return pattern
991 else
992 local parsed = cache[pattern]
993 if parsed then
994 lpathcached = lpathcached + 1
995 else
996 parsed = lpegmatch(pathparser,pattern)
997 if parsed then
998 parsed.pattern = pattern
999 local np = #parsed
1000 if np == 0 then
1001 parsed = { pattern = pattern, register_self, state = "parsing error" }
1002 report_lpath("parsing error in pattern: %s",pattern)
1003 lshow(parsed)
1004 else
1005
1006
1007 local pi = parsed[1]
1008 if pi.axis == "auto-child" then
1009 if false then
1010 add_comment(parsed, "auto-child replaced by auto-descendant-or-self")
1011 parsed[1] = register_auto_descendant_or_self
1012 else
1013 add_comment(parsed, "auto-child replaced by auto-descendant")
1014 parsed[1] = register_auto_descendant
1015 end
1016 elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
1017 add_comment(parsed, "initial-child removed")
1018 remove(parsed,1)
1019 end
1020 local np = #parsed
1021 if np > 1 then
1022 local pnp = parsed[np]
1023 if pnp.kind == "nodes" and pnp.nodetest == true then
1024 local nodes = pnp.nodes
1025 if nodes[1] == true and nodes[2] == false and nodes[3] == false then
1026 add_comment(parsed, "redundant final wildcard filter removed")
1027 remove(parsed,np)
1028 end
1029 end
1030 end
1031 end
1032 else
1033 parsed = { pattern = pattern }
1034 end
1035 cache[pattern] = parsed
1036 if trace_lparse and not trace_lprofile then
1037 lshow(parsed)
1038 end
1039 end
1040 return parsed
1041 end
1042end
1043
1044xml.lpath = lpath
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062do
1063
1064 local profiled = { }
1065 xml.profiled = profiled
1066 local lastmatch = nil
1067 local keepmatch = nil
1068
1069 if directives then
1070 directives.register("xml.path.keeplastmatch",function(v)
1071 keepmatch = v
1072 lastmatch = nil
1073 end)
1074 end
1075
1076 apply_axis["last-match"] = function()
1077 return lastmatch or { }
1078 end
1079
1080 local function profiled_apply(list,parsed,nofparsed,order)
1081 local p = profiled[parsed.pattern]
1082 if p then
1083 p.tested = p.tested + 1
1084 else
1085 p = { tested = 1, matched = 0, finalized = 0 }
1086 profiled[parsed.pattern] = p
1087 end
1088 local collected = list
1089 for i=1,nofparsed do
1090 local pi = parsed[i]
1091 local kind = pi.kind
1092 if kind == "axis" then
1093 collected = apply_axis[pi.axis](collected)
1094 elseif kind == "nodes" then
1095 collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1096 elseif kind == "expression" then
1097 collected = apply_expression(collected,pi.evaluator,order)
1098 elseif kind == "selector" then
1099 collected = apply_selector(collected,pi.specification)
1100 elseif kind == "finalizer" then
1101 collected = pi.finalizer(collected)
1102 p.matched = p.matched + 1
1103 p.finalized = p.finalized + 1
1104 return collected
1105 end
1106 if not collected or #collected == 0 then
1107 local pn = i < nofparsed and parsed[nofparsed]
1108 if pn and pn.kind == "finalizer" then
1109 collected = pn.finalizer(collected)
1110 p.finalized = p.finalized + 1
1111 return collected
1112 end
1113 return nil
1114 end
1115 end
1116 if collected then
1117 p.matched = p.matched + 1
1118 end
1119 return collected
1120 end
1121
1122 local function traced_apply(list,parsed,nofparsed,order)
1123 if trace_lparse then
1124 lshow(parsed)
1125 end
1126 report_lpath("collecting: %s",parsed.pattern)
1127 report_lpath("root tags : %s",tagstostring(list))
1128 report_lpath("order : %s",order or "unset")
1129 local collected = list
1130 for i=1,nofparsed do
1131 local pi = parsed[i]
1132 local kind = pi.kind
1133 if kind == "axis" then
1134 collected = apply_axis[pi.axis](collected)
1135 report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
1136 elseif kind == "nodes" then
1137 collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1138 report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
1139 elseif kind == "expression" then
1140 collected = apply_expression(collected,pi.evaluator,order)
1141 report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
1142 elseif kind == "selector" then
1143 collected = apply_selector(collected,pi.specification)
1144 report_lpath("% 10i : se : %s ",(collected and #collected) or 0,pi.specification)
1145 elseif kind == "finalizer" then
1146 collected = pi.finalizer(collected)
1147 report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
1148 return collected
1149 end
1150 if not collected or #collected == 0 then
1151 local pn = i < nofparsed and parsed[nofparsed]
1152 if pn and pn.kind == "finalizer" then
1153 collected = pn.finalizer(collected)
1154 report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
1155 return collected
1156 end
1157 return nil
1158 end
1159 end
1160 return collected
1161 end
1162
1163 local function normal_apply(list,parsed,nofparsed,order)
1164 local collected = list
1165 for i=1,nofparsed do
1166 local pi = parsed[i]
1167 local kind = pi.kind
1168 if kind == "axis" then
1169 local axis = pi.axis
1170 if axis ~= "self" then
1171 collected = apply_axis[axis](collected)
1172 end
1173 elseif kind == "nodes" then
1174 collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1175 elseif kind == "expression" then
1176 collected = apply_expression(collected,pi.evaluator,order)
1177 elseif kind == "selector" then
1178 collected = apply_selector(collected,pi.specification)
1179 elseif kind == "finalizer" then
1180 return pi.finalizer(collected)
1181 end
1182 if not collected or #collected == 0 then
1183 local pf = i < nofparsed and parsed[nofparsed].finalizer
1184 if pf then
1185 return pf(collected)
1186 end
1187 return nil
1188 end
1189 end
1190 return collected
1191 end
1192
1193 local apply = normal_apply
1194
1195 if trackers then
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209 trackers.register("xml.path,xml.parse,xml.profile",function()
1210 if trace_lprofile then
1211 apply = profiled_apply
1212 elseif trace_lpath then
1213 apply = traced_apply
1214 else
1215 apply = normal_apply
1216 end
1217 end)
1218 end
1219
1220
1221 function xml.applylpath(list,pattern)
1222 if not list then
1223 lastmatch = nil
1224 return
1225 end
1226 local parsed = cache[pattern]
1227 if parsed then
1228 lpathcalls = lpathcalls + 1
1229 lpathcached = lpathcached + 1
1230 elseif type(pattern) == "table" then
1231 lpathcalls = lpathcalls + 1
1232 parsed = pattern
1233 else
1234 parsed = lpath(pattern) or pattern
1235 end
1236 if not parsed then
1237 lastmatch = nil
1238 return
1239 end
1240 local nofparsed = #parsed
1241 if nofparsed == 0 then
1242 lastmatch = nil
1243 return
1244 end
1245 local collected = apply({ list },parsed,nofparsed,list.mi)
1246 lastmatch = keepmatch and collected or nil
1247 return collected
1248 end
1249
1250 function xml.lastmatch()
1251 return lastmatch
1252 end
1253
1254 local stack = { }
1255
1256 function xml.pushmatch()
1257 insert(stack,lastmatch)
1258 end
1259
1260 function xml.popmatch()
1261 lastmatch = remove(stack)
1262 end
1263
1264end
1265
1266local applylpath = xml.applylpath
1267
1268
1269
1270function xml.filter(root,pattern)
1271 return applylpath(root,pattern)
1272end
1273
1274
1275
1276expressions.child = function(e,pattern)
1277 return applylpath(e,pattern)
1278end
1279
1280expressions.count = function(e,pattern)
1281 local collected = applylpath(e,pattern)
1282 return pattern and (collected and #collected) or 0
1283end
1284
1285expressions.attribute = function(e,name,value)
1286 if type(e) == "table" and name then
1287 local a = e.at
1288 if a then
1289 local v = a[name]
1290 if value then
1291 return v == value
1292 else
1293 return v
1294 end
1295 end
1296 end
1297 return nil
1298end
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314expressions.oneof = function(s,...)
1315 for i=1,select("#",...) do
1316 if s == select(i,...) then
1317 return true
1318 end
1319 end
1320 return false
1321end
1322
1323expressions.error = function(str)
1324 xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
1325 return false
1326end
1327
1328expressions.undefined = function(s)
1329 return s == nil
1330end
1331
1332expressions.quit = function(s)
1333 if s or s == nil then
1334 quit_expression = true
1335 end
1336 return true
1337end
1338
1339expressions.print = function(...)
1340 print(...)
1341 return true
1342end
1343
1344expressions.find = function(str,...)
1345 return str and find(str,...)
1346end
1347
1348expressions.upper = function(str) return str and upper(str) or "" end
1349expressions.lower = function(str) return str and lower(str) or "" end
1350
1351expressions.number = tonumber
1352expressions.boolean = toboolean
1353
1354function expressions.contains(str,pattern)
1355 local t = type(str)
1356 if t == "string" then
1357 if find(str,pattern) then
1358 return true
1359 end
1360 elseif t == "table" then
1361 for i=1,#str do
1362 local d = str[i]
1363 if type(d) == "string" and find(d,pattern) then
1364 return true
1365 end
1366 end
1367 end
1368 return false
1369end
1370
1371function expressions.idstring(str)
1372 return type(str) == "string" and gsub(str,"^#","") or ""
1373end
1374
1375
1376
1377local function traverse(root,pattern,handle)
1378
1379 local collected = applylpath(root,pattern)
1380 if collected then
1381 for c=1,#collected do
1382 local e = collected[c]
1383 local r = e.__p__
1384 handle(r,r.dt,e.ni)
1385 end
1386 end
1387end
1388
1389local function selection(root,pattern,handle)
1390 local collected = applylpath(root,pattern)
1391 if collected then
1392 if handle then
1393 for c=1,#collected do
1394 handle(collected[c])
1395 end
1396 else
1397 return collected
1398 end
1399 end
1400end
1401
1402xml.traverse = traverse
1403xml.selection = selection
1404
1405
1406
1407
1408
1409
1410
1411local function dofunction(collected,fnc,...)
1412 if collected then
1413 local f = functions[fnc]
1414 if f then
1415 for c=1,#collected do
1416 f(collected[c],...)
1417 end
1418 else
1419 report_lpath("unknown function %a",fnc)
1420 end
1421 end
1422end
1423
1424finalizers.xml["function"] = dofunction
1425finalizers.tex["function"] = dofunction
1426
1427
1428
1429expressions.text = function(e,n)
1430 local rdt = e.__p__.dt
1431 return rdt and rdt[n] or ""
1432end
1433
1434expressions.name = function(e,n)
1435 local found = false
1436 n = tonumber(n) or 0
1437 if n == 0 then
1438 found = type(e) == "table" and e
1439 elseif n < 0 then
1440 local d = e.__p__.dt
1441 local k = e.ni
1442 for i=k-1,1,-1 do
1443 local di = d[i]
1444 if type(di) == "table" then
1445 if n == -1 then
1446 found = di
1447 break
1448 else
1449 n = n + 1
1450 end
1451 end
1452 end
1453 else
1454 local d = e.__p__.dt
1455 local k = e.ni
1456 for i=k+1,#d,1 do
1457 local di = d[i]
1458 if type(di) == "table" then
1459 if n == 1 then
1460 found = di
1461 break
1462 else
1463 n = n - 1
1464 end
1465 end
1466 end
1467 end
1468 if found then
1469 local ns = found.rn or found.ns or ""
1470 local tg = found.tg
1471 if ns ~= "" then
1472 return ns .. ":" .. tg
1473 else
1474 return tg
1475 end
1476 else
1477 return ""
1478 end
1479end
1480
1481expressions.tag = function(e,n)
1482 if not e then
1483 return ""
1484 else
1485 local found = false
1486 n = tonumber(n) or 0
1487 if n == 0 then
1488 found = (type(e) == "table") and e
1489 elseif n < 0 then
1490 local d = e.__p__.dt
1491 local k = e.ni
1492 for i=k-1,1,-1 do
1493 local di = d[i]
1494 if type(di) == "table" then
1495 if n == -1 then
1496 found = di
1497 break
1498 else
1499 n = n + 1
1500 end
1501 end
1502 end
1503 else
1504 local d = e.__p__.dt
1505 local k = e.ni
1506 for i=k+1,#d,1 do
1507 local di = d[i]
1508 if type(di) == "table" then
1509 if n == 1 then
1510 found = di
1511 break
1512 else
1513 n = n - 1
1514 end
1515 end
1516 end
1517 end
1518 return (found and found.tg) or ""
1519 end
1520end
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566local dummy = function() end
1567
1568function xml.elements(root,pattern,reverse)
1569 local collected = applylpath(root,pattern)
1570 if not collected then
1571 return dummy
1572 end
1573 local n = #collected
1574 if n == 0 then
1575 return dummy
1576 end
1577 if reverse then
1578 local c = n + 1
1579 return function()
1580 if c > 1 then
1581 c = c - 1
1582 local e = collected[c]
1583 local r = e.__p__
1584 return r, r.dt, e.ni
1585 end
1586 end
1587 else
1588 local c = 0
1589 return function()
1590 if c < n then
1591 c = c + 1
1592 local e = collected[c]
1593 local r = e.__p__
1594 return r, r.dt, e.ni
1595 end
1596 end
1597 end
1598end
1599
1600function xml.collected(root,pattern,reverse)
1601 local collected = applylpath(root,pattern)
1602 if not collected then
1603 return dummy
1604 end
1605 local n = #collected
1606 if n == 0 then
1607 return dummy
1608 end
1609 if reverse then
1610 local c = n + 1
1611 return function()
1612 if c > 1 then
1613 c = c - 1
1614 return collected[c]
1615 end
1616 end
1617 else
1618 local c = 0
1619 return function()
1620 if c < n then
1621 c = c + 1
1622 return collected[c]
1623 end
1624 end
1625 end
1626end
1627
1628
1629
1630function xml.inspect(collection,pattern)
1631 pattern = pattern or "."
1632 for e in xml.collected(collection,pattern or ".") do
1633 report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e))
1634 end
1635end
1636
1637
1638
1639local function split(e)
1640 local dt = e.dt
1641 if dt then
1642 for i=1,#dt do
1643 local dti = dt[i]
1644 if type(dti) == "string" then
1645 dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1")
1646 dti = gsub(dti,"[\n\r]+","\n\n")
1647 dt[i] = dti
1648 else
1649 split(dti)
1650 end
1651 end
1652 end
1653 return e
1654end
1655
1656function xml.finalizers.paragraphs(c)
1657 for i=1,#c do
1658 split(c[i])
1659 end
1660 return c
1661end
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693 |