lxml-lpt.lua /size: 53 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['lxml-lpt'] = {
2    version   = 1.001,
3    comment   = "this module is the basis for the lxml-* ones",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- e.ni is only valid after a filter run
10-- todo: B/C/[get first match]
11
12local concat, remove, insert = table.concat, table.remove, table.insert
13local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
14local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
15local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
16
17local setmetatableindex = table.setmetatableindex
18local formatters = string.formatters -- no need (yet) as paths are cached anyway
19
20-- beware, this is not xpath ... e.g. position is different (currently) and
21-- we have reverse-sibling as reversed preceding sibling
22
23--[[ldx--
24<p>This module can be used stand alone but also inside <l n='mkiv'/> in
25which case it hooks into the tracker code. Therefore we provide a few
26functions that set the tracers. Here we overload a previously defined
27function.</p>
28<p>If I can get in the mood I will make a variant that is XSLT compliant
29but I wonder if it makes sense.</P>
30--ldx]]--
31
32--[[ldx--
33<p>Expecially the lpath code is experimental, we will support some of xpath, but
34only things that make sense for us; as compensation it is possible to hook in your
35own functions. Apart from preprocessing content for <l n='context'/> we also need
36this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
37files.</p>
38
39<typing>
40a/b/c /*/c
41a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
42a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
43</typing>
44--ldx]]--
45
46local trace_lpath    = false
47local trace_lparse   = false
48local trace_lprofile = false
49local report_lpath   = logs.reporter("xml","lpath")
50
51if trackers then
52    trackers.register("xml.path", function(v)
53        trace_lpath  = v
54    end)
55    trackers.register("xml.parse", function(v)
56        trace_lparse = v
57    end)
58    trackers.register("xml.profile", function(v)
59        trace_lpath    = v
60        trace_lparse   = v
61        trace_lprofile = v
62    end)
63end
64
65--[[ldx--
66<p>We've now arrived at an interesting part: accessing the tree using a subset
67of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
68will explain more about its usage in other documents.</p>
69--ldx]]--
70
71local xml = xml
72
73local lpathcalls  = 0  function xml.lpathcalls () return lpathcalls  end
74local lpathcached = 0  function xml.lpathcached() return lpathcached end
75
76xml.functions        = xml.functions or { } -- internal
77local functions      = xml.functions
78
79xml.expressions      = xml.expressions or { } -- in expressions
80local expressions    = xml.expressions
81
82xml.finalizers       = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
83local finalizers     = xml.finalizers
84
85xml.specialhandler   = xml.specialhandler or { }
86local specialhandler = xml.specialhandler
87
88lpegpatterns.xml     = lpegpatterns.xml or { }
89local xmlpatterns    = lpegpatterns.xml
90
91finalizers.xml = finalizers.xml or { }
92finalizers.tex = finalizers.tex or { }
93
94local function fallback (t, name)
95    local fn = finalizers[name]
96    if fn then
97        t[name] = fn
98    else
99        report_lpath("unknown sub finalizer %a",name)
100        fn = function() end
101    end
102    return fn
103end
104
105setmetatableindex(finalizers.xml, fallback)
106setmetatableindex(finalizers.tex, fallback)
107
108xml.defaultprotocol = "xml"
109
110-- as xsl does not follow xpath completely here we will also
111-- be more liberal especially with regards to the use of | and
112-- the rootpath:
113--
114-- test    : all 'test' under current
115-- /test   : 'test' relative to current
116-- a|b|c   : set of names
117-- (a|b|c) : idem
118-- !       : not
119--
120-- after all, we're not doing transformations but filtering. in
121-- addition we provide filter functions (last bit)
122--
123-- todo: optimizer
124--
125-- .. : parent
126-- *  : all kids
127-- /  : anchor here
128-- // : /**/
129-- ** : all in between
130--
131-- so far we had (more practical as we don't transform)
132--
133-- {/test}   : kids 'test' under current node
134-- {test}    : any kid with tag 'test'
135-- {//test}  : same as above
136
137-- evaluator (needs to be redone, for the moment copied)
138
139-- todo: apply_axis(list,notable) and collection vs single
140
141local apply_axis = { }
142
143apply_axis['root'] = function(list)
144    local collected = { }
145    for l=1,#list do
146        local ll = list[l]
147        local rt = ll
148        while ll do
149            ll = ll.__p__
150            if ll then
151                rt = ll
152            end
153        end
154        collected[l] = rt
155    end
156    return collected
157end
158
159apply_axis['self'] = function(list)
160 -- local collected = { }
161 -- for l=1,#list do
162 --     collected[l] = list[l]
163 -- end
164 -- return collected
165    return list
166end
167
168apply_axis['child'] = function(list)
169    local collected = { }
170    local c         = 0
171    for l=1,#list do
172        local ll = list[l]
173        local dt = ll.dt
174        if dt then -- weird that this is needed
175            local n = #dt
176            if n == 0 then
177                ll.en = 0
178            elseif n == 1 then
179                local dk = dt[1]
180                if dk.tg then
181                    c = c + 1
182                    collected[c] = dk
183                    dk.ni = 1 -- refresh
184                    dk.ei = 1
185                    ll.en = 1
186                end
187            else
188                local en = 0
189                for k=1,#dt do
190                    local dk = dt[k]
191                    if dk.tg then
192                        c = c + 1
193                        en = en + 1
194                        collected[c] = dk
195                        dk.ni = k -- refresh
196                        dk.ei = en
197                    end
198                end
199                ll.en = en
200            end
201        end
202    end
203    return collected
204end
205
206local function collect(list,collected,c)
207    local dt = list.dt
208    if dt then
209        local n = #dt
210        if n == 0 then
211            list.en = 0
212        elseif n == 1 then
213            local dk = dt[1]
214            if dk.tg then
215                c = c + 1
216                collected[c] = dk
217                dk.ni = 1 -- refresh
218                dk.ei = 1
219                c = collect(dk,collected,c)
220                list.en = 1
221            else
222                list.en = 0
223            end
224        else
225            local en = 0
226            for k=1,n do
227                local dk = dt[k]
228                if dk.tg then
229                    c = c + 1
230                    en = en + 1
231                    collected[c] = dk
232                    dk.ni = k -- refresh
233                    dk.ei = en
234                    c = collect(dk,collected,c)
235                end
236            end
237            list.en = en
238        end
239    end
240    return c
241end
242
243apply_axis['descendant'] = function(list)
244    local collected = { }
245    local c = 0
246    for l=1,#list do
247        c = collect(list[l],collected,c)
248    end
249    return collected
250end
251
252local function collect(list,collected,c)
253    local dt = list.dt
254    if dt then
255        local n = #dt
256        if n == 0 then
257            list.en = 0
258        elseif n == 1 then
259            local dk = dt[1]
260            if dk.tg then
261                c = c + 1
262                collected[c] = dk
263                dk.ni = 1 -- refresh
264                dk.ei = 1
265                c = collect(dk,collected,c)
266                list.en = 1
267            end
268        else
269            local en = 0
270            for k=1,#dt do
271                local dk = dt[k]
272                if dk.tg then
273                    c = c + 1
274                    en = en + 1
275                    collected[c] = dk
276                    dk.ni = k -- refresh
277                    dk.ei = en
278                    c = collect(dk,collected,c)
279                end
280            end
281            list.en = en
282        end
283    end
284    return c
285end
286
287apply_axis['descendant-or-self'] = function(list)
288    local collected = { }
289    local c = 0
290    for l=1,#list do
291        local ll = list[l]
292        if ll.special ~= true then -- catch double root
293            c = c + 1
294            collected[c] = ll
295        end
296        c = collect(ll,collected,c)
297    end
298    return collected
299end
300
301apply_axis['ancestor'] = function(list)
302    local collected = { }
303    local c = 0
304    for l=1,#list do
305        local ll = list[l]
306        while ll do
307            ll = ll.__p__
308            if ll then
309                c = c + 1
310                collected[c] = ll
311            end
312        end
313    end
314    return collected
315end
316
317apply_axis['ancestor-or-self'] = function(list)
318    local collected = { }
319    local c = 0
320    for l=1,#list do
321        local ll = list[l]
322        c = c + 1
323        collected[c] = ll
324        while ll do
325            ll = ll.__p__
326            if ll then
327                c = c + 1
328                collected[c] = ll
329            end
330        end
331    end
332    return collected
333end
334
335apply_axis['parent'] = function(list)
336    local collected = { }
337    local c = 0
338    for l=1,#list do
339        local pl = list[l].__p__
340        if pl then
341            c = c + 1
342            collected[c] = pl
343        end
344    end
345    return collected
346end
347
348apply_axis['attribute'] = function(list)
349    return { }
350end
351
352apply_axis['namespace'] = function(list)
353    return { }
354end
355
356apply_axis['following'] = function(list) -- incomplete
357 -- local collected, c = { }, 0
358 -- for l=1,#list do
359 --     local ll = list[l]
360 --     local p = ll.__p__
361 --     local d = p.dt
362 --     for i=ll.ni+1,#d do
363 --         local di = d[i]
364 --         if type(di) == "table" then
365 --             c = c + 1
366 --             collected[c] = di
367 --             break
368 --         end
369 --     end
370 -- end
371 -- return collected
372    return { }
373end
374
375apply_axis['preceding'] = function(list) -- incomplete
376 -- local collected = { }
377 -- local c = 0
378 -- for l=1,#list do
379 --     local ll = list[l]
380 --     local p = ll.__p__
381 --     local d = p.dt
382 --     for i=ll.ni-1,1,-1 do
383 --         local di = d[i]
384 --         if type(di) == "table" then
385 --             c = c + 1
386 --             collected[c] = di
387 --             break
388 --         end
389 --     end
390 -- end
391 -- return collected
392    return { }
393end
394
395apply_axis['following-sibling'] = function(list)
396    local collected = { }
397    local c = 0
398    for l=1,#list do
399        local ll = list[l]
400        local p = ll.__p__
401        local d = p.dt
402        for i=ll.ni+1,#d do
403            local di = d[i]
404            if type(di) == "table" then
405                c = c + 1
406                collected[c] = di
407            end
408        end
409    end
410    return collected
411end
412
413apply_axis['preceding-sibling'] = function(list)
414    local collected = { }
415    local c = 0
416    for l=1,#list do
417        local ll = list[l]
418        local p = ll.__p__
419        local d = p.dt
420        for i=1,ll.ni-1 do
421            local di = d[i]
422            if type(di) == "table" then
423                c = c + 1
424                collected[c] = di
425            end
426        end
427    end
428    return collected
429end
430
431apply_axis['reverse-sibling'] = function(list) -- reverse preceding
432    local collected = { }
433    local c = 0
434    for l=1,#list do
435        local ll = list[l]
436        local p = ll.__p__
437        local d = p.dt
438        for i=ll.ni-1,1,-1 do
439            local di = d[i]
440            if type(di) == "table" then
441                c = c + 1
442                collected[c] = di
443            end
444        end
445    end
446    return collected
447end
448
449apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
450apply_axis['auto-descendant']         = apply_axis['descendant']
451apply_axis['auto-child']              = apply_axis['child']
452apply_axis['auto-self']               = apply_axis['self']
453apply_axis['initial-child']           = apply_axis['child']
454
455local function apply_nodes(list,directive,nodes)
456    -- todo: nodes[1] etc ... negated node name in set ... when needed
457    -- ... currently ignored
458    local maxn = #nodes
459    if maxn == 3 then --optimized loop
460        local nns = nodes[2]
461        local ntg = nodes[3]
462        if not nns and not ntg then -- wildcard
463            if directive then
464                return list
465            else
466                return { }
467            end
468        else
469            local collected = { }
470            local c = 0
471            local m = 0
472            local p = nil
473            if not nns then -- only check tag
474                for l=1,#list do
475                    local ll  = list[l]
476                    local ltg = ll.tg
477                    if ltg then
478                        if directive then
479                            if ntg == ltg then
480                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
481                                c = c + 1
482                                collected[c] = ll
483                                ll.mi = m
484                            end
485                        elseif ntg ~= ltg then
486                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
487                            c = c + 1
488                            collected[c] = ll
489                            ll.mi = m
490                        end
491                    end
492                end
493            elseif not ntg then -- only check namespace
494                for l=1,#list do
495                    local ll  = list[l]
496                    local lns = ll.rn or ll.ns
497                    if lns then
498                        if directive then
499                            if lns == nns then
500                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
501                                c = c + 1
502                                collected[c] = ll
503                                ll.mi = m
504                            end
505                        elseif lns ~= nns then
506                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
507                            c = c + 1
508                            collected[c] = ll
509                            ll.mi = m
510                        end
511                    end
512                end
513            else -- check both
514                for l=1,#list do
515                    local ll = list[l]
516                    local ltg = ll.tg
517                    if ltg then
518                        local lns = ll.rn or ll.ns
519                        local ok = ltg == ntg and lns == nns
520                        if directive then
521                            if ok then
522                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
523                                c = c + 1
524                                collected[c] = ll
525                                ll.mi = m
526                            end
527                        elseif not ok then
528                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
529                            c = c + 1
530                            collected[c] = ll
531                            ll.mi = m
532                        end
533                    end
534                end
535            end
536            return collected
537        end
538    else
539        local collected = { }
540        local c = 0
541        local m = 0
542        local p = nil
543        for l=1,#list do
544            local ll  = list[l]
545            local ltg = ll.tg
546            if ltg then
547                local lns = ll.rn or ll.ns
548                local ok  = false
549                for n=1,maxn,3 do
550                    local nns = nodes[n+1]
551                    local ntg = nodes[n+2]
552                    ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
553                    if ok then
554                        break
555                    end
556                end
557                if directive then
558                    if ok then
559                        local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
560                        c = c + 1
561                        collected[c] = ll
562                        ll.mi = m
563                    end
564                elseif not ok then
565                    local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
566                    c = c + 1
567                    collected[c] = ll
568                    ll.mi = m
569                end
570            end
571        end
572        return collected
573    end
574end
575
576local quit_expression = false
577
578local function apply_expression(list,expression,order)
579    local collected = { }
580    local c = 0
581    quit_expression = false
582    for l=1,#list do
583        local ll = list[l]
584        if expression(list,ll,l,order) then -- nasty, order alleen valid als n=1
585            c = c + 1
586            collected[c] = ll
587        end
588        if quit_expression then
589            break
590        end
591    end
592    return collected
593end
594
595local function apply_selector(list,specification)
596    if xml.applyselector then
597        apply_selector = xml.applyselector
598        return apply_selector(list,specification)
599    else
600        return list
601    end
602end
603
604-- this one can be made faster but there are not that many conversions so it doesn't
605-- really pay of
606
607local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
608
609local spaces     = S(" \n\r\t\f")^0
610local lp_space   = S(" \n\r\t\f")
611local lp_any     = P(1)
612local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
613local lp_doequal = P("=")  / "=="
614local lp_or      = P("|")  / " or "
615local lp_and     = P("&")  / " and "
616
617local builtin = {
618    text         = "(ll.dt[1] or '')", -- fragile
619    content      = "ll.dt",
620    name         = "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)",
621    tag          = "ll.tg",
622    position     = "l", -- is element in finalizer
623    firstindex   = "1",
624    firstelement = "1",
625    first        = "1",
626    lastindex    = "(#ll.__p__.dt or 1)",
627    lastelement  = "(ll.__p__.en or 1)",
628    last         = "#list",
629    list         = "list",
630    self         = "ll",
631    rootposition = "order",
632    order        = "order",
633    element      = "(ll.ei or 1)",
634    index        = "(ll.ni or 1)",
635    match        = "(ll.mi or 1)",
636    namespace    = "ll.ns",
637    ns           = "ll.ns",
638    -- new
639 -- attribute    = "ll.at",
640 -- at           = "ll.at",
641}
642
643local lp_builtin   = lpeg.utfchartabletopattern(builtin)/builtin * ((spaces * P("(") * spaces * P(")"))/"")
644
645-- for the moment we keep namespaces with attributes
646
647local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])")
648
649----- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end
650----- lp_fastpos_n = (P("-")   * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end
651
652local lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0"
653local lp_fastpos_n = P("-")   * R("09")^1 * P(-1) / "(%0<0 and (#list+%0+1==l))" -- +1 added
654local lp_fastpos   = lp_fastpos_n + lp_fastpos_p
655
656local lp_reserved  = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
657
658-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
659--     return t .. "("
660-- end
661
662-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0("
663local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0"
664
665local lp_function  = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
666    if expressions[t] then
667        return "expr." .. t .. "("
668    else
669        return "expr.error("
670    end
671end
672
673local lparent  = P("(")
674local rparent  = P(")")
675local noparent = 1 - (lparent+rparent)
676local nested   = P{lparent * (noparent + V(1))^0 * rparent}
677local value    = P(lparent * C((noparent + nested)^0) * rparent) -- P{"("*C(((1-S("()"))+V(1))^0)*")"}
678
679local lp_child   = Cc("expr.child(ll,'") * R("az","AZ") * R("az","AZ","--","__")^0 * Cc("')")
680local lp_number  = S("+-") * R("09")^1
681local lp_string  = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
682local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
683
684local cleaner
685
686local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
687    if expressions[t] then
688        s = s and s ~= "" and lpegmatch(cleaner,s)
689        if s and s ~= "" then
690            return "expr." .. t .. "(ll," .. s ..")"
691        else
692            return "expr." .. t .. "(ll)"
693        end
694    else
695        return "expr.error(" .. t .. ")"
696    end
697end
698
699local content =
700    lp_builtin +
701    lp_attribute +
702    lp_special +
703    lp_noequal + lp_doequal +
704    lp_or + lp_and +
705    lp_reserved +
706    lp_lua_function + lp_function +
707    lp_content + -- too fragile
708    lp_child +
709    lp_any
710
711local converter = Cs (
712    lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0
713)
714
715cleaner = Cs ( (
716 -- lp_fastpos +
717    lp_reserved +
718    lp_number +
719    lp_string +
7201 )^1 )
721
722local template_e = [[
723    local expr = xml.expressions
724    return function(list,ll,l,order)
725        return %s
726    end
727]]
728
729local template_f_y = [[
730    local finalizer = xml.finalizers['%s']['%s']
731    return function(collection)
732        return finalizer(collection,%s)
733    end
734]]
735
736local template_f_n = [[
737    return xml.finalizers['%s']['%s']
738]]
739
740--
741
742local register_last_match              = { kind = "axis", axis = "last-match"              } -- , apply = apply_axis["self"]               }
743local register_self                    = { kind = "axis", axis = "self"                    } -- , apply = apply_axis["self"]               }
744local register_parent                  = { kind = "axis", axis = "parent"                  } -- , apply = apply_axis["parent"]             }
745local register_descendant              = { kind = "axis", axis = "descendant"              } -- , apply = apply_axis["descendant"]         }
746local register_child                   = { kind = "axis", axis = "child"                   } -- , apply = apply_axis["child"]              }
747local register_descendant_or_self      = { kind = "axis", axis = "descendant-or-self"      } -- , apply = apply_axis["descendant-or-self"] }
748local register_root                    = { kind = "axis", axis = "root"                    } -- , apply = apply_axis["root"]               }
749local register_ancestor                = { kind = "axis", axis = "ancestor"                } -- , apply = apply_axis["ancestor"]           }
750local register_ancestor_or_self        = { kind = "axis", axis = "ancestor-or-self"        } -- , apply = apply_axis["ancestor-or-self"]   }
751local register_attribute               = { kind = "axis", axis = "attribute"               } -- , apply = apply_axis["attribute"]          }
752local register_namespace               = { kind = "axis", axis = "namespace"               } -- , apply = apply_axis["namespace"]          }
753local register_following               = { kind = "axis", axis = "following"               } -- , apply = apply_axis["following"]          }
754local register_following_sibling       = { kind = "axis", axis = "following-sibling"       } -- , apply = apply_axis["following-sibling"]  }
755local register_preceding               = { kind = "axis", axis = "preceding"               } -- , apply = apply_axis["preceding"]          }
756local register_preceding_sibling       = { kind = "axis", axis = "preceding-sibling"       } -- , apply = apply_axis["preceding-sibling"]  }
757local register_reverse_sibling         = { kind = "axis", axis = "reverse-sibling"         } -- , apply = apply_axis["reverse-sibling"]    }
758
759local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
760local register_auto_descendant         = { kind = "axis", axis = "auto-descendant"         } -- , apply = apply_axis["auto-descendant"] }
761local register_auto_self               = { kind = "axis", axis = "auto-self"               } -- , apply = apply_axis["auto-self"] }
762local register_auto_child              = { kind = "axis", axis = "auto-child"              } -- , apply = apply_axis["auto-child"] }
763
764local register_initial_child           = { kind = "axis", axis = "initial-child"           } -- , apply = apply_axis["initial-child"] }
765
766local register_all_nodes               = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
767
768local skip = { }
769
770local function errorrunner_e(str,cnv)
771    if not skip[str] then
772        report_lpath("error in expression: %s => %s",str,cnv)
773        skip[str] = cnv or str
774    end
775    return false
776end
777
778local function errorrunner_f(str,arg)
779    report_lpath("error in finalizer: %s(%s)",str,arg or "")
780    return false
781end
782
783local function register_nodes(nodetest,nodes)
784    return { kind = "nodes", nodetest = nodetest, nodes = nodes }
785end
786
787local function register_selector(specification)
788    return { kind = "selector", specification = specification }
789end
790
791local function register_expression(expression)
792    local converted = lpegmatch(converter,expression)
793    local wrapped   = format(template_e,converted)
794    local runner = load(wrapped)
795 -- print(wrapped)
796    runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
797    return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
798end
799
800local function register_finalizer(protocol,name,arguments)
801    local runner
802    if arguments and arguments ~= "" then
803        runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
804    else
805        runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
806    end
807    runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
808    return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
809end
810
811local expression = P { "ex",
812    ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
813    sq = "'" * (1 - S("'"))^0 * "'",
814    dq = '"' * (1 - S('"'))^0 * '"',
815}
816
817local arguments = P { "ar",
818    ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
819    nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
820    sq = P("'") * (1 - P("'"))^0 * P("'"),
821    dq = P('"') * (1 - P('"'))^0 * P('"'),
822}
823
824-- todo: better arg parser
825
826local function register_error(str)
827    return { kind = "error", error = format("unparsed: %s",str) }
828end
829
830-- there is a difference in * and /*/ and so we need to catch a few special cases
831
832local special_1 = P("*")  * Cc(register_auto_descendant) * Cc(register_all_nodes) -- last one not needed
833local special_2 = P("/")  * Cc(register_auto_self)
834local special_3 = P("")   * Cc(register_auto_self)
835
836local no_nextcolon   = P(-1) + #(1-P(":")) -- newer lpeg needs the P(-1)
837local no_nextlparent = P(-1) + #(1-P("(")) -- newer lpeg needs the P(-1)
838
839local pathparser = Ct { "patterns", -- can be made a bit faster by moving some patterns outside
840
841    patterns             = spaces * V("protocol") * spaces * (
842                              ( V("special") * spaces * P(-1)                                                         ) +
843                              ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 )
844                           ),
845
846    protocol             = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
847
848 -- the / is needed for // as descendant or self is somewhat special
849 --
850 -- step                 = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
851    step                 = ((V("shortcuts") + V("selector") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
852
853    axis                 = V("last_match")
854                         + V("descendant")
855                         + V("child")
856                         + V("parent")
857                         + V("self")
858                         + V("root")
859                         + V("ancestor")
860                         + V("descendant_or_self")
861                         + V("following_sibling")
862                         + V("following")
863                         + V("reverse_sibling")
864                         + V("preceding_sibling")
865                         + V("preceding")
866                         + V("ancestor_or_self")
867                         + #(1-P(-1)) * Cc(register_auto_child),
868
869    special              = special_1
870                         + special_2
871                         + special_3,
872
873    initial              = (P("/") * spaces * Cc(register_initial_child))^-1,
874
875    error                = (P(1)^1) / register_error,
876
877    shortcuts_a          = V("s_descendant_or_self")
878                         + V("s_descendant")
879                         + V("s_child")
880                         + V("s_parent")
881                         + V("s_self")
882                         + V("s_root")
883                         + V("s_ancestor")
884                         + V("s_lastmatch"),
885
886    shortcuts            = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
887
888    s_descendant_or_self = (P("***/") + P("/"))  * Cc(register_descendant_or_self), --- *** is a bonus
889    s_descendant         = P("**")               * Cc(register_descendant),
890    s_child              = P("*") * no_nextcolon * Cc(register_child),
891    s_parent             = P("..")               * Cc(register_parent),
892    s_self               = P("." )               * Cc(register_self),
893    s_root               = P("^^")               * Cc(register_root),
894    s_ancestor           = P("^")                * Cc(register_ancestor),
895    s_lastmatch          = P("=")                * Cc(register_last_match),
896
897    -- we can speed this up when needed but we cache anyway so ...
898
899    descendant           = P("descendant::")         * Cc(register_descendant),
900    child                = P("child::")              * Cc(register_child),
901    parent               = P("parent::")             * Cc(register_parent),
902    self                 = P("self::")               * Cc(register_self),
903    root                 = P('root::')               * Cc(register_root),
904    ancestor             = P('ancestor::')           * Cc(register_ancestor),
905    descendant_or_self   = P('descendant-or-self::') * Cc(register_descendant_or_self),
906    ancestor_or_self     = P('ancestor-or-self::')   * Cc(register_ancestor_or_self),
907 -- attribute            = P('attribute::')          * Cc(register_attribute),
908 -- namespace            = P('namespace::')          * Cc(register_namespace),
909    following            = P('following::')          * Cc(register_following),
910    following_sibling    = P('following-sibling::')  * Cc(register_following_sibling),
911    preceding            = P('preceding::')          * Cc(register_preceding),
912    preceding_sibling    = P('preceding-sibling::')  * Cc(register_preceding_sibling),
913    reverse_sibling      = P('reverse-sibling::')    * Cc(register_reverse_sibling),
914    last_match           = P('last-match::')         * Cc(register_last_match),
915
916    selector             = P("{") * C((1-P("}"))^1) * P("}") / register_selector,
917
918    nodes                = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
919
920    expressions          = expression / register_expression,
921
922    letters              = R("az")^1,
923    name                 = (1-S("/[]()|:*!"))^1, -- make inline
924    negate               = P("!") * Cc(false),
925
926    nodefunction         = V("negate") + P("not") * Cc(false) + Cc(true),
927    nodetest             = V("negate") + Cc(true),
928    nodename             = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
929    wildnodename         = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent,
930    nodeset              = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
931
932    finalizer            = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
933
934}
935
936xmlpatterns.pathparser = pathparser
937
938local cache = { }
939
940local function nodesettostring(set,nodetest)
941    local t = { }
942    for i=1,#set,3 do
943        local directive, ns, tg = set[i], set[i+1], set[i+2]
944        if not ns or ns == "" then ns = "*" end
945        if not tg or tg == "" then tg = "*" end
946        tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
947        t[#t+1] = (directive and tg) or format("not(%s)",tg)
948    end
949    if nodetest == false then
950        return format("not(%s)",concat(t,"|"))
951    else
952        return concat(t,"|")
953    end
954end
955
956local function tagstostring(list)
957    if #list == 0 then
958        return "no elements"
959    else
960        local t = { }
961        for i=1, #list do
962            local li = list[i]
963            local ns = li.ns
964            local tg = li.tg
965            if not ns or ns == "" then ns = "*" end
966            if not tg or tg == "" then tg = "*" end
967            t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
968        end
969        return concat(t," ")
970    end
971end
972
973xml.nodesettostring = nodesettostring
974
975local lpath -- we have a harmless kind of circular reference
976
977local function lshow(parsed)
978    if type(parsed) == "string" then
979        parsed = lpath(parsed)
980    end
981    report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,
982        table.serialize(parsed,false))
983end
984
985xml.lshow = lshow
986
987local function add_comment(p,str)
988    local pc = p.comment
989    if not pc then
990        p.comment = { str }
991    else
992        pc[#pc+1] = str
993    end
994end
995
996lpath = function (pattern) -- the gain of caching is rather minimal
997    lpathcalls = lpathcalls + 1
998    if type(pattern) == "table" then
999        return pattern
1000    else
1001        local parsed = cache[pattern]
1002        if parsed then
1003            lpathcached = lpathcached + 1
1004        else
1005            parsed = lpegmatch(pathparser,pattern)
1006            if parsed then
1007                parsed.pattern = pattern
1008                local np = #parsed
1009                if np == 0 then
1010                    parsed = { pattern = pattern, register_self, state = "parsing error" }
1011                    report_lpath("parsing error in pattern: %s",pattern)
1012                    lshow(parsed)
1013                else
1014                    -- we could have done this with a more complex parser but this
1015                    -- is cleaner
1016                    local pi = parsed[1]
1017                    if pi.axis == "auto-child" then
1018                        if false then
1019                            add_comment(parsed, "auto-child replaced by auto-descendant-or-self")
1020                            parsed[1] = register_auto_descendant_or_self
1021                        else
1022                            add_comment(parsed, "auto-child replaced by auto-descendant")
1023                            parsed[1] = register_auto_descendant
1024                        end
1025                    elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
1026                        add_comment(parsed, "initial-child removed") -- we could also make it a auto-self
1027                        remove(parsed,1)
1028                    end
1029                    local np = #parsed -- can have changed
1030                    if np > 1 then
1031                        local pnp = parsed[np]
1032                        if pnp.kind == "nodes" and pnp.nodetest == true then
1033                            local nodes = pnp.nodes
1034                            if nodes[1] == true and nodes[2] == false and nodes[3] == false then
1035                                add_comment(parsed, "redundant final wildcard filter removed")
1036                                remove(parsed,np)
1037                            end
1038                        end
1039                    end
1040                end
1041            else
1042                parsed = { pattern = pattern }
1043            end
1044            cache[pattern] = parsed
1045            if trace_lparse and not trace_lprofile then
1046                lshow(parsed)
1047            end
1048        end
1049        return parsed
1050    end
1051end
1052
1053xml.lpath = lpath
1054
1055-- we can move all calls inline and then merge the trace back
1056-- technically we can combine axis and the next nodes which is
1057-- what we did before but this a bit cleaner (but slower too)
1058-- but interesting is that it's not that much faster when we
1059-- go inline
1060--
1061-- beware: we need to return a collection even when we filter
1062-- else the (simple) cache gets messed up
1063
1064-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
1065-- and it also messes up finalizers
1066
1067-- watch out: when there is a finalizer, it's always called as there
1068-- can be cases that a finalizer returns (or does) something in case
1069-- there is no match; an example of this is count()
1070
1071do
1072
1073    local profiled  = { }
1074    xml.profiled    = profiled
1075    local lastmatch = nil  -- we remember the last one .. drawback: no collection till new collect
1076    local keepmatch = nil  -- we remember the last one .. drawback: no collection till new collect
1077
1078    if directives then
1079        directives.register("xml.path.keeplastmatch",function(v)
1080            keepmatch = v
1081            lastmatch = nil
1082        end)
1083    end
1084
1085    apply_axis["last-match"] = function()
1086        return lastmatch or { }
1087    end
1088
1089    local function profiled_apply(list,parsed,nofparsed,order)
1090        local p = profiled[parsed.pattern]
1091        if p then
1092            p.tested = p.tested + 1
1093        else
1094            p = { tested = 1, matched = 0, finalized = 0 }
1095            profiled[parsed.pattern] = p
1096        end
1097        local collected = list
1098        for i=1,nofparsed do
1099            local pi = parsed[i]
1100            local kind = pi.kind
1101            if kind == "axis" then
1102                collected = apply_axis[pi.axis](collected)
1103            elseif kind == "nodes" then
1104                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1105            elseif kind == "expression" then
1106                collected = apply_expression(collected,pi.evaluator,order)
1107            elseif kind == "selector" then
1108                collected = apply_selector(collected,pi.specification)
1109            elseif kind == "finalizer" then
1110                collected = pi.finalizer(collected) -- no check on # here
1111                p.matched = p.matched + 1
1112                p.finalized = p.finalized + 1
1113                return collected
1114            end
1115            if not collected or #collected == 0 then
1116                local pn = i < nofparsed and parsed[nofparsed]
1117                if pn and pn.kind == "finalizer" then
1118                    collected = pn.finalizer(collected) -- collected can be nil
1119                    p.finalized = p.finalized + 1
1120                    return collected
1121                end
1122                return nil
1123            end
1124        end
1125        if collected then
1126            p.matched = p.matched + 1
1127        end
1128        return collected
1129    end
1130
1131    local function traced_apply(list,parsed,nofparsed,order)
1132        if trace_lparse then
1133            lshow(parsed)
1134        end
1135        report_lpath("collecting: %s",parsed.pattern)
1136        report_lpath("root tags : %s",tagstostring(list))
1137        report_lpath("order     : %s",order or "unset")
1138        local collected = list
1139        for i=1,nofparsed do
1140            local pi = parsed[i]
1141            local kind = pi.kind
1142            if kind == "axis" then
1143                collected = apply_axis[pi.axis](collected)
1144                report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
1145            elseif kind == "nodes" then
1146                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1147                report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
1148            elseif kind == "expression" then
1149                collected = apply_expression(collected,pi.evaluator,order)
1150                report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
1151            elseif kind == "selector" then
1152                collected = apply_selector(collected,pi.specification)
1153                report_lpath("% 10i : se : %s ",(collected and #collected) or 0,pi.specification)
1154            elseif kind == "finalizer" then
1155                collected = pi.finalizer(collected)
1156                report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
1157                return collected
1158            end
1159            if not collected or #collected == 0 then
1160                local pn = i < nofparsed and parsed[nofparsed]
1161                if pn and pn.kind == "finalizer" then
1162                    collected = pn.finalizer(collected)
1163                    report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
1164                    return collected
1165                end
1166                return nil
1167            end
1168        end
1169        return collected
1170    end
1171
1172    local function normal_apply(list,parsed,nofparsed,order)
1173        local collected = list
1174        for i=1,nofparsed do
1175            local pi = parsed[i]
1176            local kind = pi.kind
1177            if kind == "axis" then
1178                local axis = pi.axis
1179                if axis ~= "self" then
1180                    collected = apply_axis[axis](collected)
1181                end
1182            elseif kind == "nodes" then
1183                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1184            elseif kind == "expression" then
1185                collected = apply_expression(collected,pi.evaluator,order)
1186            elseif kind == "selector" then
1187                collected = apply_selector(collected,pi.specification)
1188            elseif kind == "finalizer" then
1189                return pi.finalizer(collected)
1190            end
1191            if not collected or #collected == 0 then
1192                local pf = i < nofparsed and parsed[nofparsed].finalizer
1193                if pf then
1194                    return pf(collected) -- can be anything
1195                end
1196                return nil
1197            end
1198        end
1199        return collected
1200    end
1201
1202    local apply = normal_apply
1203
1204    if trackers then
1205     -- local function check()
1206     --     if trace_lprofile or then
1207     --         apply = profiled_apply
1208     --     elseif trace_lpath then
1209     --         apply = traced_apply
1210     --     else
1211     --         apply = normal_apply
1212     --     end
1213     -- end
1214     -- trackers.register("xml.path",   check) -- can be "xml.path,xml.parse,xml.profile
1215     -- trackers.register("xml.parse",  check)
1216     -- trackers.register("xml.profile",check)
1217
1218        trackers.register("xml.path,xml.parse,xml.profile",function()
1219            if trace_lprofile then
1220                apply = profiled_apply
1221            elseif trace_lpath then
1222                apply = traced_apply
1223            else
1224                apply = normal_apply
1225            end
1226        end)
1227    end
1228
1229
1230    function xml.applylpath(list,pattern)
1231        if not list then
1232            lastmatch = nil
1233            return
1234        end
1235        local parsed = cache[pattern]
1236        if parsed then
1237            lpathcalls  = lpathcalls + 1
1238            lpathcached = lpathcached + 1
1239        elseif type(pattern) == "table" then
1240            lpathcalls = lpathcalls + 1
1241            parsed = pattern
1242        else
1243            parsed = lpath(pattern) or pattern
1244        end
1245        if not parsed then
1246            lastmatch = nil
1247            return
1248        end
1249        local nofparsed = #parsed
1250        if nofparsed == 0 then
1251            lastmatch = nil
1252            return -- something is wrong
1253        end
1254        local collected = apply({ list },parsed,nofparsed,list.mi)
1255        lastmatch = keepmatch and collected or nil
1256        return collected
1257    end
1258
1259    function xml.lastmatch()
1260        return lastmatch
1261    end
1262
1263    local stack  = { }
1264
1265    function xml.pushmatch()
1266        insert(stack,lastmatch)
1267    end
1268
1269    function xml.popmatch()
1270        lastmatch = remove(stack)
1271    end
1272
1273end
1274
1275local applylpath = xml.applylpath
1276--[[ldx--
1277<p>This is the main filter function. It returns whatever is asked for.</p>
1278--ldx]]--
1279
1280function xml.filter(root,pattern) -- no longer funny attribute handling here
1281    return applylpath(root,pattern)
1282end
1283
1284-- internal (parsed)
1285
1286expressions.child = function(e,pattern)
1287    return applylpath(e,pattern) -- todo: cache
1288end
1289
1290expressions.count = function(e,pattern) -- what if pattern == empty or nil
1291    local collected = applylpath(e,pattern) -- todo: cache
1292    return pattern and (collected and #collected) or 0
1293end
1294
1295expressions.attribute = function(e,name,value)
1296    if type(e) == "table" and name then
1297        local a = e.at
1298        if a then
1299            local v = a[name]
1300            if value then
1301                return v == value
1302            else
1303                return v
1304            end
1305        end
1306    end
1307    return nil
1308end
1309
1310-- external
1311
1312-- expressions.oneof = function(s,...)
1313--     local t = {...}
1314--     for i=1,#t do
1315--         if s == t[i] then
1316--             return true
1317--         end
1318--     end
1319--     return false
1320-- end
1321
1322-- could be a hashed hash
1323
1324expressions.oneof = function(s,...)
1325    for i=1,select("#",...) do
1326        if s == select(i,...) then
1327            return true
1328        end
1329    end
1330    return false
1331end
1332
1333expressions.error = function(str)
1334    xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
1335    return false
1336end
1337
1338expressions.undefined = function(s)
1339    return s == nil
1340end
1341
1342expressions.quit = function(s)
1343    if s or s == nil then
1344        quit_expression = true
1345    end
1346    return true
1347end
1348
1349expressions.print = function(...)
1350    print(...)
1351    return true
1352end
1353
1354expressions.find      = find
1355expressions.upper     = upper
1356expressions.lower     = lower
1357expressions.number    = tonumber
1358expressions.boolean   = toboolean
1359
1360function expressions.contains(str,pattern)
1361    local t = type(str)
1362    if t == "string" then
1363        if find(str,pattern) then
1364            return true
1365        end
1366    elseif t == "table" then
1367        for i=1,#str do
1368            local d = str[i]
1369            if type(d) == "string" and find(d,pattern) then
1370                return true
1371            end
1372        end
1373    end
1374    return false
1375end
1376
1377function expressions.idstring(str)
1378    return type(str) == "string" and gsub(str,"^#","") or ""
1379end
1380
1381-- user interface
1382
1383local function traverse(root,pattern,handle)
1384 -- report_lpath("use 'xml.selection' instead for pattern: %s",pattern)
1385    local collected = applylpath(root,pattern)
1386    if collected then
1387        for c=1,#collected do
1388            local e = collected[c]
1389            local r = e.__p__
1390            handle(r,r.dt,e.ni)
1391        end
1392    end
1393end
1394
1395local function selection(root,pattern,handle)
1396    local collected = applylpath(root,pattern)
1397    if collected then
1398        if handle then
1399            for c=1,#collected do
1400                handle(collected[c])
1401            end
1402        else
1403            return collected
1404        end
1405    end
1406end
1407
1408xml.traverse      = traverse           -- old method, r, d, k
1409xml.selection     = selection          -- new method, simple handle
1410
1411--~ function xml.cachedpatterns()
1412--~     return cache
1413--~ end
1414
1415-- generic function finalizer (independant namespace)
1416
1417local function dofunction(collected,fnc,...)
1418    if collected then
1419        local f = functions[fnc]
1420        if f then
1421            for c=1,#collected do
1422                f(collected[c],...)
1423            end
1424        else
1425            report_lpath("unknown function %a",fnc)
1426        end
1427    end
1428end
1429
1430finalizers.xml["function"] = dofunction
1431finalizers.tex["function"] = dofunction
1432
1433-- functions
1434
1435expressions.text = function(e,n)
1436    local rdt = e.__p__.dt
1437    return rdt and rdt[n] or ""
1438end
1439
1440expressions.name = function(e,n) -- ns + tg
1441    local found = false
1442    n = tonumber(n) or 0
1443    if n == 0 then
1444        found = type(e) == "table" and e
1445    elseif n < 0 then
1446        local d = e.__p__.dt
1447        local k = e.ni
1448        for i=k-1,1,-1 do
1449            local di = d[i]
1450            if type(di) == "table" then
1451                if n == -1 then
1452                    found = di
1453                    break
1454                else
1455                    n = n + 1
1456                end
1457            end
1458        end
1459    else
1460        local d = e.__p__.dt
1461        local k = e.ni
1462        for i=k+1,#d,1 do
1463            local di = d[i]
1464            if type(di) == "table" then
1465                if n == 1 then
1466                    found = di
1467                    break
1468                else
1469                    n = n - 1
1470                end
1471            end
1472        end
1473    end
1474    if found then
1475        local ns = found.rn or found.ns or ""
1476        local tg = found.tg
1477        if ns ~= "" then
1478            return ns .. ":" .. tg
1479        else
1480            return tg
1481        end
1482    else
1483        return ""
1484    end
1485end
1486
1487expressions.tag = function(e,n) -- only tg
1488    if not e then
1489        return ""
1490    else
1491        local found = false
1492        n = tonumber(n) or 0
1493        if n == 0 then
1494            found = (type(e) == "table") and e -- seems to fail
1495        elseif n < 0 then
1496            local d = e.__p__.dt
1497            local k = e.ni
1498            for i=k-1,1,-1 do
1499                local di = d[i]
1500                if type(di) == "table" then
1501                    if n == -1 then
1502                        found = di
1503                        break
1504                    else
1505                        n = n + 1
1506                    end
1507                end
1508            end
1509        else
1510            local d = e.__p__.dt
1511            local k = e.ni
1512            for i=k+1,#d,1 do
1513                local di = d[i]
1514                if type(di) == "table" then
1515                    if n == 1 then
1516                        found = di
1517                        break
1518                    else
1519                        n = n - 1
1520                    end
1521                end
1522            end
1523        end
1524        return (found and found.tg) or ""
1525    end
1526end
1527
1528--[[ldx--
1529<p>Often using an iterators looks nicer in the code than passing handler
1530functions. The <l n='lua'/> book describes how to use coroutines for that
1531purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
1532code like:</p>
1533
1534<typing>
1535for r, d, k in xml.elements(xml.load('text.xml'),"title") do
1536    print(d[k]) -- old method
1537end
1538for e in xml.collected(xml.load('text.xml'),"title") do
1539    print(e) -- new one
1540end
1541</typing>
1542--ldx]]--
1543
1544-- local wrap, yield = coroutine.wrap, coroutine.yield
1545-- local dummy = function() end
1546--
1547-- function xml.elements(root,pattern,reverse) -- r, d, k
1548--     local collected = applylpath(root,pattern)
1549--     if collected then
1550--         if reverse then
1551--             return wrap(function() for c=#collected,1,-1 do
1552--                 local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
1553--             end end)
1554--         else
1555--             return wrap(function() for c=1,#collected    do
1556--                 local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
1557--             end end)
1558--         end
1559--     end
1560--     return wrap(dummy)
1561-- end
1562--
1563-- function xml.collected(root,pattern,reverse) -- e
1564--     local collected = applylpath(root,pattern)
1565--     if collected then
1566--         if reverse then
1567--             return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
1568--         else
1569--             return wrap(function() for c=1,#collected    do yield(collected[c]) end end)
1570--         end
1571--     end
1572--     return wrap(dummy)
1573-- end
1574
1575-- faster:
1576
1577local dummy = function() end
1578
1579function xml.elements(root,pattern,reverse) -- r, d, k
1580    local collected = applylpath(root,pattern)
1581    if not collected then
1582        return dummy
1583    end
1584    local n = #collected
1585    if n == 0 then
1586        return dummy
1587    end
1588    if reverse then
1589        local c = n + 1
1590        return function()
1591            if c > 1 then
1592                c = c - 1
1593                local e = collected[c]
1594                local r = e.__p__
1595                return r, r.dt, e.ni
1596            end
1597        end
1598    else
1599        local c = 0
1600        return function()
1601            if c < n then
1602                c = c + 1
1603                local e = collected[c]
1604                local r = e.__p__
1605                return r, r.dt, e.ni
1606            end
1607        end
1608    end
1609end
1610
1611function xml.collected(root,pattern,reverse) -- e
1612    local collected = applylpath(root,pattern)
1613    if not collected then
1614        return dummy
1615    end
1616    local n = #collected
1617    if n == 0 then
1618        return dummy
1619    end
1620    if reverse then
1621        local c = n + 1
1622        return function()
1623            if c > 1 then
1624                c = c - 1
1625                return collected[c]
1626            end
1627        end
1628    else
1629        local c = 0
1630        return function()
1631            if c < n then
1632                c = c + 1
1633                return collected[c]
1634            end
1635        end
1636    end
1637end
1638
1639-- handy
1640
1641function xml.inspect(collection,pattern)
1642    pattern = pattern or "."
1643    for e in xml.collected(collection,pattern or ".") do
1644        report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e))
1645    end
1646end
1647
1648-- texy (see xfdf):
1649
1650local function split(e) -- todo: use helpers / lpeg
1651    local dt = e.dt
1652    if dt then
1653        for i=1,#dt do
1654            local dti = dt[i]
1655            if type(dti) == "string" then
1656                dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1")
1657                dti = gsub(dti,"[\n\r]+","\n\n")
1658                dt[i] = dti
1659            else
1660                split(dti)
1661            end
1662        end
1663    end
1664    return e
1665end
1666
1667function xml.finalizers.paragraphs(c)
1668    for i=1,#c do
1669        split(c[i])
1670    end
1671    return c
1672end
1673
1674-- local lpegmatch = lpeg.match
1675-- local w = lpeg.patterns.whitespace
1676-- local p = w^0 * lpeg.Cf(lpeg.Ct("") * lpeg.Cg(lpeg.C((1-w)^1) * lpeg.Cc(true) * w^0)^1,rawset)
1677
1678-- function xml.functions.classes(e,class) -- cache
1679--     class = class and e.at[class] or e.at.class
1680--     if class then
1681--         return lpegmatch(p,class)
1682--     else
1683--         return { }
1684--     end
1685-- end
1686
1687-- local gmatch = string.gmatch
1688
1689-- function xml.functions.hasclass(e,c,class)
1690--     class = class and e.at[class] or e.at.class
1691--     if class and class ~= "" then
1692--         if class == c then
1693--             return true
1694--         else
1695--             for s in gmatch(class,"%S+") do
1696--                 if s == c then
1697--                     return true
1698--                 end
1699--             end
1700--         end
1701--     end
1702--     return false
1703-- end
1704