lxml-lpt.lua /size: 53 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['lxml-lpt'] = {
2    version   = 1.001,
3    comment   = "this module is the basis for the lxml-* ones",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- e.ni is only valid after a filter run
10-- todo: B/C/[get first match]
11
12local concat, remove, insert = table.concat, table.remove, table.insert
13local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
14local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
15local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
16
17local setmetatableindex = table.setmetatableindex
18local formatters = string.formatters -- no need (yet) as paths are cached anyway
19
20-- beware, this is not xpath ... e.g. position is different (currently) and
21-- we have reverse-sibling as reversed preceding sibling
22
23-- This module can be used stand alone but also inside ConTeXt in which case it
24-- hooks into the tracker code. Therefore we provide a few functions that set the
25-- tracers. Here we overload a previously defined function.
26--
27-- If I can get in the mood I will make a variant that is XSLT compliant but I
28-- wonder if it makes sense.
29--
30-- Expecially the lpath code is experimental, we will support some of xpath, but
31-- only things that make sense for us; as compensation it is possible to hook in
32-- your own functions. Apart from preprocessing content for ConTeXt we also need
33-- this module for process management, like handling CTX and RLX files.
34--
35--   a/b/c /*/c
36--   a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
37--   a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
38
39local trace_lpath    = false
40local trace_lparse   = false
41local trace_lprofile = false
42local report_lpath   = logs.reporter("xml","lpath")
43
44if trackers then
45    trackers.register("xml.path", function(v)
46        trace_lpath  = v
47    end)
48    trackers.register("xml.parse", function(v)
49        trace_lparse = v
50    end)
51    trackers.register("xml.profile", function(v)
52        trace_lpath    = v
53        trace_lparse   = v
54        trace_lprofile = v
55    end)
56end
57
58-- We've now arrived at an interesting part: accessing the tree using a subset of
59-- XPATH and since we're not compatible we call it LPATH. We will explain more about
60-- its usage in other documents.
61
62local xml = xml
63
64local lpathcalls  = 0  function xml.lpathcalls () return lpathcalls  end
65local lpathcached = 0  function xml.lpathcached() return lpathcached end
66
67xml.functions        = xml.functions or { } -- internal
68local functions      = xml.functions
69
70xml.expressions      = xml.expressions or { } -- in expressions
71local expressions    = xml.expressions
72
73xml.finalizers       = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
74local finalizers     = xml.finalizers
75
76xml.specialhandler   = xml.specialhandler or { }
77local specialhandler = xml.specialhandler
78
79lpegpatterns.xml     = lpegpatterns.xml or { }
80local xmlpatterns    = lpegpatterns.xml
81
82finalizers.xml = finalizers.xml or { }
83finalizers.tex = finalizers.tex or { }
84
85local function fallback (t, name)
86    local fn = finalizers[name]
87    if fn then
88        t[name] = fn
89    else
90        report_lpath("unknown sub finalizer %a",name)
91        fn = function() end
92    end
93    return fn
94end
95
96setmetatableindex(finalizers.xml, fallback)
97setmetatableindex(finalizers.tex, fallback)
98
99xml.defaultprotocol = "xml"
100
101-- as xsl does not follow xpath completely here we will also
102-- be more liberal especially with regards to the use of | and
103-- the rootpath:
104--
105-- test    : all 'test' under current
106-- /test   : 'test' relative to current
107-- a|b|c   : set of names
108-- (a|b|c) : idem
109-- !       : not
110--
111-- after all, we're not doing transformations but filtering. in
112-- addition we provide filter functions (last bit)
113--
114-- todo: optimizer
115--
116-- .. : parent
117-- *  : all kids
118-- /  : anchor here
119-- // : /**/
120-- ** : all in between
121--
122-- so far we had (more practical as we don't transform)
123--
124-- {/test}   : kids 'test' under current node
125-- {test}    : any kid with tag 'test'
126-- {//test}  : same as above
127
128-- evaluator (needs to be redone, for the moment copied)
129
130-- todo: apply_axis(list,notable) and collection vs single
131
132local apply_axis = { }
133
134apply_axis['root'] = function(list)
135    local collected = { }
136    for l=1,#list do
137        local ll = list[l]
138        local rt = ll
139        while ll do
140            ll = ll.__p__
141            if ll then
142                rt = ll
143            end
144        end
145        collected[l] = rt
146    end
147    return collected
148end
149
150apply_axis['self'] = function(list)
151 -- local collected = { }
152 -- for l=1,#list do
153 --     collected[l] = list[l]
154 -- end
155 -- return collected
156    return list
157end
158
159apply_axis['child'] = function(list)
160    local collected = { }
161    local c         = 0
162    for l=1,#list do
163        local ll = list[l]
164        local dt = ll.dt
165        if dt then -- weird that this is needed
166            local n = #dt
167            if n == 0 then
168                ll.en = 0
169            elseif n == 1 then
170                local dk = dt[1]
171                if dk.tg then
172                    c = c + 1
173                    collected[c] = dk
174                    dk.ni = 1 -- refresh
175                    dk.ei = 1
176                    ll.en = 1
177                end
178            else
179                local en = 0
180                for k=1,#dt do
181                    local dk = dt[k]
182                    if dk.tg then
183                        c = c + 1
184                        en = en + 1
185                        collected[c] = dk
186                        dk.ni = k -- refresh
187                        dk.ei = en
188                    end
189                end
190                ll.en = en
191            end
192        end
193    end
194    return collected
195end
196
197local function collect(list,collected,c)
198    local dt = list.dt
199    if dt then
200        local n = #dt
201        if n == 0 then
202            list.en = 0
203        elseif n == 1 then
204            local dk = dt[1]
205            if dk.tg then
206                c = c + 1
207                collected[c] = dk
208                dk.ni = 1 -- refresh
209                dk.ei = 1
210                c = collect(dk,collected,c)
211                list.en = 1
212            else
213                list.en = 0
214            end
215        else
216            local en = 0
217            for k=1,n do
218                local dk = dt[k]
219                if dk.tg then
220                    c = c + 1
221                    en = en + 1
222                    collected[c] = dk
223                    dk.ni = k -- refresh
224                    dk.ei = en
225                    c = collect(dk,collected,c)
226                end
227            end
228            list.en = en
229        end
230    end
231    return c
232end
233
234apply_axis['descendant'] = function(list)
235    local collected = { }
236    local c = 0
237    for l=1,#list do
238        c = collect(list[l],collected,c)
239    end
240    return collected
241end
242
243local function collect(list,collected,c)
244    local dt = list.dt
245    if dt then
246        local n = #dt
247        if n == 0 then
248            list.en = 0
249        elseif n == 1 then
250            local dk = dt[1]
251            if dk.tg then
252                c = c + 1
253                collected[c] = dk
254                dk.ni = 1 -- refresh
255                dk.ei = 1
256                c = collect(dk,collected,c)
257                list.en = 1
258            end
259        else
260            local en = 0
261            for k=1,#dt do
262                local dk = dt[k]
263                if dk.tg then
264                    c = c + 1
265                    en = en + 1
266                    collected[c] = dk
267                    dk.ni = k -- refresh
268                    dk.ei = en
269                    c = collect(dk,collected,c)
270                end
271            end
272            list.en = en
273        end
274    end
275    return c
276end
277
278apply_axis['descendant-or-self'] = function(list)
279    local collected = { }
280    local c = 0
281    for l=1,#list do
282        local ll = list[l]
283        if ll.special ~= true then -- catch double root
284            c = c + 1
285            collected[c] = ll
286        end
287        c = collect(ll,collected,c)
288    end
289    return collected
290end
291
292apply_axis['ancestor'] = function(list)
293    local collected = { }
294    local c = 0
295    for l=1,#list do
296        local ll = list[l]
297        while ll do
298            ll = ll.__p__
299            if ll then
300                c = c + 1
301                collected[c] = ll
302            end
303        end
304    end
305    return collected
306end
307
308apply_axis['ancestor-or-self'] = function(list)
309    local collected = { }
310    local c = 0
311    for l=1,#list do
312        local ll = list[l]
313        c = c + 1
314        collected[c] = ll
315        while ll do
316            ll = ll.__p__
317            if ll then
318                c = c + 1
319                collected[c] = ll
320            end
321        end
322    end
323    return collected
324end
325
326apply_axis['parent'] = function(list)
327    local collected = { }
328    local c = 0
329    for l=1,#list do
330        local pl = list[l].__p__
331        if pl then
332            c = c + 1
333            collected[c] = pl
334        end
335    end
336    return collected
337end
338
339apply_axis['attribute'] = function(list)
340    return { }
341end
342
343apply_axis['namespace'] = function(list)
344    return { }
345end
346
347apply_axis['following'] = function(list) -- incomplete
348 -- local collected, c = { }, 0
349 -- for l=1,#list do
350 --     local ll = list[l]
351 --     local p = ll.__p__
352 --     local d = p.dt
353 --     for i=ll.ni+1,#d do
354 --         local di = d[i]
355 --         if type(di) == "table" then
356 --             c = c + 1
357 --             collected[c] = di
358 --             break
359 --         end
360 --     end
361 -- end
362 -- return collected
363    return { }
364end
365
366apply_axis['preceding'] = function(list) -- incomplete
367 -- local collected = { }
368 -- local c = 0
369 -- for l=1,#list do
370 --     local ll = list[l]
371 --     local p = ll.__p__
372 --     local d = p.dt
373 --     for i=ll.ni-1,1,-1 do
374 --         local di = d[i]
375 --         if type(di) == "table" then
376 --             c = c + 1
377 --             collected[c] = di
378 --             break
379 --         end
380 --     end
381 -- end
382 -- return collected
383    return { }
384end
385
386apply_axis['following-sibling'] = function(list)
387    local collected = { }
388    local c = 0
389    for l=1,#list do
390        local ll = list[l]
391        local p = ll.__p__
392        local d = p.dt
393        for i=ll.ni+1,#d do
394            local di = d[i]
395            if type(di) == "table" then
396                c = c + 1
397                collected[c] = di
398            end
399        end
400    end
401    return collected
402end
403
404apply_axis['preceding-sibling'] = function(list)
405    local collected = { }
406    local c = 0
407    for l=1,#list do
408        local ll = list[l]
409        local p = ll.__p__
410        local d = p.dt
411        for i=1,ll.ni-1 do
412            local di = d[i]
413            if type(di) == "table" then
414                c = c + 1
415                collected[c] = di
416            end
417        end
418    end
419    return collected
420end
421
422apply_axis['reverse-sibling'] = function(list) -- reverse preceding
423    local collected = { }
424    local c = 0
425    for l=1,#list do
426        local ll = list[l]
427        local p = ll.__p__
428        local d = p.dt
429        for i=ll.ni-1,1,-1 do
430            local di = d[i]
431            if type(di) == "table" then
432                c = c + 1
433                collected[c] = di
434            end
435        end
436    end
437    return collected
438end
439
440apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
441apply_axis['auto-descendant']         = apply_axis['descendant']
442apply_axis['auto-child']              = apply_axis['child']
443apply_axis['auto-self']               = apply_axis['self']
444apply_axis['initial-child']           = apply_axis['child']
445
446local function apply_nodes(list,directive,nodes)
447    -- todo: nodes[1] etc ... negated node name in set ... when needed
448    -- ... currently ignored
449    local maxn = #nodes
450    if maxn == 3 then --optimized loop
451        local nns = nodes[2]
452        local ntg = nodes[3]
453        if not nns and not ntg then -- wildcard
454            if directive then
455                return list
456            else
457                return { }
458            end
459        else
460            local collected = { }
461            local c = 0
462            local m = 0
463            local p = nil
464            if not nns then -- only check tag
465                for l=1,#list do
466                    local ll  = list[l]
467                    local ltg = ll.tg
468                    if ltg then
469                        if directive then
470                            if ntg == ltg then
471                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
472                                c = c + 1
473                                collected[c] = ll
474                                ll.mi = m
475                            end
476                        elseif ntg ~= ltg then
477                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
478                            c = c + 1
479                            collected[c] = ll
480                            ll.mi = m
481                        end
482                    end
483                end
484            elseif not ntg then -- only check namespace
485                for l=1,#list do
486                    local ll  = list[l]
487                    local lns = ll.rn or ll.ns
488                    if lns then
489                        if directive then
490                            if lns == nns then
491                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
492                                c = c + 1
493                                collected[c] = ll
494                                ll.mi = m
495                            end
496                        elseif lns ~= nns then
497                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
498                            c = c + 1
499                            collected[c] = ll
500                            ll.mi = m
501                        end
502                    end
503                end
504            else -- check both
505                for l=1,#list do
506                    local ll = list[l]
507                    local ltg = ll.tg
508                    if ltg then
509                        local lns = ll.rn or ll.ns
510                        local ok = ltg == ntg and lns == nns
511                        if directive then
512                            if ok then
513                                local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
514                                c = c + 1
515                                collected[c] = ll
516                                ll.mi = m
517                            end
518                        elseif not ok then
519                            local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
520                            c = c + 1
521                            collected[c] = ll
522                            ll.mi = m
523                        end
524                    end
525                end
526            end
527            return collected
528        end
529    else
530        local collected = { }
531        local c = 0
532        local m = 0
533        local p = nil
534        for l=1,#list do
535            local ll  = list[l]
536            local ltg = ll.tg
537            if ltg then
538                local lns = ll.rn or ll.ns
539                local ok  = false
540                for n=1,maxn,3 do
541                    local nns = nodes[n+1]
542                    local ntg = nodes[n+2]
543                    ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
544                    if ok then
545                        break
546                    end
547                end
548                if directive then
549                    if ok then
550                        local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
551                        c = c + 1
552                        collected[c] = ll
553                        ll.mi = m
554                    end
555                elseif not ok then
556                    local llp = ll.__p__ ; if llp ~= p then p = llp ; m = 1 else m = m + 1 end
557                    c = c + 1
558                    collected[c] = ll
559                    ll.mi = m
560                end
561            end
562        end
563        return collected
564    end
565end
566
567local quit_expression = false
568
569local function apply_expression(list,expression,order)
570    local collected = { }
571    local c = 0
572    quit_expression = false
573    for l=1,#list do
574        local ll = list[l]
575        if expression(list,ll,l,order) then -- nasty, order alleen valid als n=1
576            c = c + 1
577            collected[c] = ll
578        end
579        if quit_expression then
580            break
581        end
582    end
583    return collected
584end
585
586local function apply_selector(list,specification)
587    if xml.applyselector then
588        apply_selector = xml.applyselector
589        return apply_selector(list,specification)
590    else
591        return list
592    end
593end
594
595-- this one can be made faster but there are not that many conversions so it doesn't
596-- really pay of
597
598local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
599
600local spaces     = S(" \n\r\t\f")^0
601local lp_space   = S(" \n\r\t\f")
602local lp_any     = P(1)
603local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
604local lp_doequal = P("=")  / "=="
605local lp_or      = P("|")  / " or "
606local lp_and     = P("&")  / " and "
607
608local builtin = {
609    text         = "(ll.dt[1] or '')", -- fragile
610    content      = "ll.dt",
611    name         = "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)",
612    tag          = "ll.tg",
613    position     = "l", -- is element in finalizer
614    firstindex   = "1",
615    firstelement = "1",
616    first        = "1",
617    lastindex    = "(#ll.__p__.dt or 1)",
618    lastelement  = "(ll.__p__.en or 1)",
619    last         = "#list",
620    list         = "list",
621    self         = "ll",
622    rootposition = "order",
623    order        = "order",
624    element      = "(ll.ei or 1)",
625    index        = "(ll.ni or 1)",
626    match        = "(ll.mi or 1)",
627    namespace    = "ll.ns",
628    ns           = "ll.ns",
629    -- new
630 -- attribute    = "ll.at",
631 -- at           = "ll.at",
632}
633
634local lp_builtin   = lpeg.utfchartabletopattern(builtin)/builtin * ((spaces * P("(") * spaces * P(")"))/"")
635
636-- for the moment we keep namespaces with attributes
637
638local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])")
639
640----- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end
641----- lp_fastpos_n = (P("-")   * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end
642
643local lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0"
644local lp_fastpos_n = P("-")   * R("09")^1 * P(-1) / "(%0<0 and (#list+%0+1==l))" -- +1 added
645local lp_fastpos   = lp_fastpos_n + lp_fastpos_p
646
647local lp_reserved  = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
648
649-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
650--     return t .. "("
651-- end
652
653-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0("
654local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0"
655
656local lp_function  = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
657    if expressions[t] then
658        return "expr." .. t .. "("
659    else
660        return "expr.error("
661    end
662end
663
664local lparent  = P("(")
665local rparent  = P(")")
666local noparent = 1 - (lparent+rparent)
667local nested   = P{lparent * (noparent + V(1))^0 * rparent}
668local value    = P(lparent * C((noparent + nested)^0) * rparent) -- P{"("*C(((1-S("()"))+V(1))^0)*")"}
669
670local lp_child   = Cc("expr.child(ll,'") * R("az","AZ") * R("az","AZ","--","__")^0 * Cc("')")
671local lp_number  = S("+-") * R("09")^1
672local lp_string  = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
673local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
674
675local cleaner
676
677local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
678    if expressions[t] then
679        s = s and s ~= "" and lpegmatch(cleaner,s)
680        if s and s ~= "" then
681            return "expr." .. t .. "(ll," .. s ..")"
682        else
683            return "expr." .. t .. "(ll)"
684        end
685    else
686        return "expr.error(" .. t .. ")"
687    end
688end
689
690local content =
691    lp_builtin +
692    lp_attribute +
693    lp_special +
694    lp_noequal + lp_doequal +
695    lp_or + lp_and +
696    lp_reserved +
697    lp_lua_function + lp_function +
698    lp_content + -- too fragile
699    lp_child +
700    lp_any
701
702local converter = Cs (
703    lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0
704)
705
706cleaner = Cs ( (
707 -- lp_fastpos +
708    lp_reserved +
709    lp_number +
710    lp_string +
7111 )^1 )
712
713local template_e = [[
714    local expr = xml.expressions
715    return function(list,ll,l,order)
716        return %s
717    end
718]]
719
720local template_f_y = [[
721    local finalizer = xml.finalizers['%s']['%s']
722    return function(collection)
723        return finalizer(collection,%s)
724    end
725]]
726
727local template_f_n = [[
728    return xml.finalizers['%s']['%s']
729]]
730
731--
732
733local register_last_match              = { kind = "axis", axis = "last-match"              } -- , apply = apply_axis["self"]               }
734local register_self                    = { kind = "axis", axis = "self"                    } -- , apply = apply_axis["self"]               }
735local register_parent                  = { kind = "axis", axis = "parent"                  } -- , apply = apply_axis["parent"]             }
736local register_descendant              = { kind = "axis", axis = "descendant"              } -- , apply = apply_axis["descendant"]         }
737local register_child                   = { kind = "axis", axis = "child"                   } -- , apply = apply_axis["child"]              }
738local register_descendant_or_self      = { kind = "axis", axis = "descendant-or-self"      } -- , apply = apply_axis["descendant-or-self"] }
739local register_root                    = { kind = "axis", axis = "root"                    } -- , apply = apply_axis["root"]               }
740local register_ancestor                = { kind = "axis", axis = "ancestor"                } -- , apply = apply_axis["ancestor"]           }
741local register_ancestor_or_self        = { kind = "axis", axis = "ancestor-or-self"        } -- , apply = apply_axis["ancestor-or-self"]   }
742local register_attribute               = { kind = "axis", axis = "attribute"               } -- , apply = apply_axis["attribute"]          }
743local register_namespace               = { kind = "axis", axis = "namespace"               } -- , apply = apply_axis["namespace"]          }
744local register_following               = { kind = "axis", axis = "following"               } -- , apply = apply_axis["following"]          }
745local register_following_sibling       = { kind = "axis", axis = "following-sibling"       } -- , apply = apply_axis["following-sibling"]  }
746local register_preceding               = { kind = "axis", axis = "preceding"               } -- , apply = apply_axis["preceding"]          }
747local register_preceding_sibling       = { kind = "axis", axis = "preceding-sibling"       } -- , apply = apply_axis["preceding-sibling"]  }
748local register_reverse_sibling         = { kind = "axis", axis = "reverse-sibling"         } -- , apply = apply_axis["reverse-sibling"]    }
749
750local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
751local register_auto_descendant         = { kind = "axis", axis = "auto-descendant"         } -- , apply = apply_axis["auto-descendant"] }
752local register_auto_self               = { kind = "axis", axis = "auto-self"               } -- , apply = apply_axis["auto-self"] }
753local register_auto_child              = { kind = "axis", axis = "auto-child"              } -- , apply = apply_axis["auto-child"] }
754
755local register_initial_child           = { kind = "axis", axis = "initial-child"           } -- , apply = apply_axis["initial-child"] }
756
757local register_all_nodes               = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
758
759local skip = { }
760
761local function errorrunner_e(str,cnv)
762    if not skip[str] then
763        report_lpath("error in expression: %s => %s",str,cnv)
764        skip[str] = cnv or str
765    end
766    return false
767end
768
769local function errorrunner_f(str,arg)
770    report_lpath("error in finalizer: %s(%s)",str,arg or "")
771    return false
772end
773
774local function register_nodes(nodetest,nodes)
775    return { kind = "nodes", nodetest = nodetest, nodes = nodes }
776end
777
778local function register_selector(specification)
779    return { kind = "selector", specification = specification }
780end
781
782local function register_expression(expression)
783    local converted = lpegmatch(converter,expression)
784    local wrapped   = format(template_e,converted)
785    local runner = load(wrapped)
786 -- print(wrapped)
787    runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
788    return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
789end
790
791local function register_finalizer(protocol,name,arguments)
792    local runner
793    if arguments and arguments ~= "" then
794        runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
795    else
796        runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
797    end
798    runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
799    return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
800end
801
802local expression = P { "ex",
803    ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
804    sq = "'" * (1 - S("'"))^0 * "'",
805    dq = '"' * (1 - S('"'))^0 * '"',
806}
807
808local arguments = P { "ar",
809    ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
810    nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
811    sq = P("'") * (1 - P("'"))^0 * P("'"),
812    dq = P('"') * (1 - P('"'))^0 * P('"'),
813}
814
815-- todo: better arg parser
816
817local function register_error(str)
818    return { kind = "error", error = format("unparsed: %s",str) }
819end
820
821-- there is a difference in * and /*/ and so we need to catch a few special cases
822
823local special_1 = P("*")  * Cc(register_auto_descendant) * Cc(register_all_nodes) -- last one not needed
824local special_2 = P("/")  * Cc(register_auto_self)
825local special_3 = P("")   * Cc(register_auto_self)
826
827local no_nextcolon   = P(-1) + #(1-P(":")) -- newer lpeg needs the P(-1)
828local no_nextlparent = P(-1) + #(1-P("(")) -- newer lpeg needs the P(-1)
829
830local pathparser = Ct { "patterns", -- can be made a bit faster by moving some patterns outside
831
832    patterns             = spaces * V("protocol") * spaces * (
833                              ( V("special") * spaces * P(-1)                                                         ) +
834                              ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 )
835                           ),
836
837    protocol             = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
838
839 -- the / is needed for // as descendant or self is somewhat special
840 --
841 -- step                 = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
842    step                 = ((V("shortcuts") + V("selector") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
843
844    axis                 = V("last_match")
845                         + V("descendant")
846                         + V("child")
847                         + V("parent")
848                         + V("self")
849                         + V("root")
850                         + V("ancestor")
851                         + V("descendant_or_self")
852                         + V("following_sibling")
853                         + V("following")
854                         + V("reverse_sibling")
855                         + V("preceding_sibling")
856                         + V("preceding")
857                         + V("ancestor_or_self")
858                         + #(1-P(-1)) * Cc(register_auto_child),
859
860    special              = special_1
861                         + special_2
862                         + special_3,
863
864    initial              = (P("/") * spaces * Cc(register_initial_child))^-1,
865
866    error                = (P(1)^1) / register_error,
867
868    shortcuts_a          = V("s_descendant_or_self")
869                         + V("s_descendant")
870                         + V("s_child")
871                         + V("s_parent")
872                         + V("s_self")
873                         + V("s_root")
874                         + V("s_ancestor")
875                         + V("s_lastmatch"),
876
877    shortcuts            = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
878
879    s_descendant_or_self = (P("***/") + P("/"))  * Cc(register_descendant_or_self), --- *** is a bonus
880    s_descendant         = P("**")               * Cc(register_descendant),
881    s_child              = P("*") * no_nextcolon * Cc(register_child),
882    s_parent             = P("..")               * Cc(register_parent),
883    s_self               = P("." )               * Cc(register_self),
884    s_root               = P("^^")               * Cc(register_root),
885    s_ancestor           = P("^")                * Cc(register_ancestor),
886    s_lastmatch          = P("=")                * Cc(register_last_match),
887
888    -- we can speed this up when needed but we cache anyway so ...
889
890    descendant           = P("descendant::")         * Cc(register_descendant),
891    child                = P("child::")              * Cc(register_child),
892    parent               = P("parent::")             * Cc(register_parent),
893    self                 = P("self::")               * Cc(register_self),
894    root                 = P('root::')               * Cc(register_root),
895    ancestor             = P('ancestor::')           * Cc(register_ancestor),
896    descendant_or_self   = P('descendant-or-self::') * Cc(register_descendant_or_self),
897    ancestor_or_self     = P('ancestor-or-self::')   * Cc(register_ancestor_or_self),
898 -- attribute            = P('attribute::')          * Cc(register_attribute),
899 -- namespace            = P('namespace::')          * Cc(register_namespace),
900    following            = P('following::')          * Cc(register_following),
901    following_sibling    = P('following-sibling::')  * Cc(register_following_sibling),
902    preceding            = P('preceding::')          * Cc(register_preceding),
903    preceding_sibling    = P('preceding-sibling::')  * Cc(register_preceding_sibling),
904    reverse_sibling      = P('reverse-sibling::')    * Cc(register_reverse_sibling),
905    last_match           = P('last-match::')         * Cc(register_last_match),
906
907    selector             = P("{") * C((1-P("}"))^1) * P("}") / register_selector,
908
909    nodes                = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
910
911    expressions          = expression / register_expression,
912
913    letters              = R("az")^1,
914    name                 = (1-S("/[]()|:*!"))^1, -- make inline
915    negate               = P("!") * Cc(false),
916
917    nodefunction         = V("negate") + P("not") * Cc(false) + Cc(true),
918    nodetest             = V("negate") + Cc(true),
919    nodename             = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
920    wildnodename         = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent,
921    nodeset              = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
922
923    finalizer            = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
924
925}
926
927xmlpatterns.pathparser = pathparser
928
929local cache = { }
930
931local function nodesettostring(set,nodetest)
932    local t = { }
933    for i=1,#set,3 do
934        local directive, ns, tg = set[i], set[i+1], set[i+2]
935        if not ns or ns == "" then ns = "*" end
936        if not tg or tg == "" then tg = "*" end
937        tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
938        t[#t+1] = (directive and tg) or format("not(%s)",tg)
939    end
940    if nodetest == false then
941        return format("not(%s)",concat(t,"|"))
942    else
943        return concat(t,"|")
944    end
945end
946
947local function tagstostring(list)
948    if #list == 0 then
949        return "no elements"
950    else
951        local t = { }
952        for i=1, #list do
953            local li = list[i]
954            local ns = li.ns
955            local tg = li.tg
956            if not ns or ns == "" then ns = "*" end
957            if not tg or tg == "" then tg = "*" end
958            t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
959        end
960        return concat(t," ")
961    end
962end
963
964xml.nodesettostring = nodesettostring
965
966local lpath -- we have a harmless kind of circular reference
967
968local function lshow(parsed)
969    if type(parsed) == "string" then
970        parsed = lpath(parsed)
971    end
972    report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,
973        table.serialize(parsed,false))
974end
975
976xml.lshow = lshow
977
978local function add_comment(p,str)
979    local pc = p.comment
980    if not pc then
981        p.comment = { str }
982    else
983        pc[#pc+1] = str
984    end
985end
986
987lpath = function (pattern) -- the gain of caching is rather minimal
988    lpathcalls = lpathcalls + 1
989    if type(pattern) == "table" then
990        return pattern
991    else
992        local parsed = cache[pattern]
993        if parsed then
994            lpathcached = lpathcached + 1
995        else
996            parsed = lpegmatch(pathparser,pattern)
997            if parsed then
998                parsed.pattern = pattern
999                local np = #parsed
1000                if np == 0 then
1001                    parsed = { pattern = pattern, register_self, state = "parsing error" }
1002                    report_lpath("parsing error in pattern: %s",pattern)
1003                    lshow(parsed)
1004                else
1005                    -- we could have done this with a more complex parser but this
1006                    -- is cleaner
1007                    local pi = parsed[1]
1008                    if pi.axis == "auto-child" then
1009                        if false then
1010                            add_comment(parsed, "auto-child replaced by auto-descendant-or-self")
1011                            parsed[1] = register_auto_descendant_or_self
1012                        else
1013                            add_comment(parsed, "auto-child replaced by auto-descendant")
1014                            parsed[1] = register_auto_descendant
1015                        end
1016                    elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
1017                        add_comment(parsed, "initial-child removed") -- we could also make it a auto-self
1018                        remove(parsed,1)
1019                    end
1020                    local np = #parsed -- can have changed
1021                    if np > 1 then
1022                        local pnp = parsed[np]
1023                        if pnp.kind == "nodes" and pnp.nodetest == true then
1024                            local nodes = pnp.nodes
1025                            if nodes[1] == true and nodes[2] == false and nodes[3] == false then
1026                                add_comment(parsed, "redundant final wildcard filter removed")
1027                                remove(parsed,np)
1028                            end
1029                        end
1030                    end
1031                end
1032            else
1033                parsed = { pattern = pattern }
1034            end
1035            cache[pattern] = parsed
1036            if trace_lparse and not trace_lprofile then
1037                lshow(parsed)
1038            end
1039        end
1040        return parsed
1041    end
1042end
1043
1044xml.lpath = lpath
1045
1046-- we can move all calls inline and then merge the trace back
1047-- technically we can combine axis and the next nodes which is
1048-- what we did before but this a bit cleaner (but slower too)
1049-- but interesting is that it's not that much faster when we
1050-- go inline
1051--
1052-- beware: we need to return a collection even when we filter
1053-- else the (simple) cache gets messed up
1054
1055-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
1056-- and it also messes up finalizers
1057
1058-- watch out: when there is a finalizer, it's always called as there
1059-- can be cases that a finalizer returns (or does) something in case
1060-- there is no match; an example of this is count()
1061
1062do
1063
1064    local profiled  = { }
1065    xml.profiled    = profiled
1066    local lastmatch = nil  -- we remember the last one .. drawback: no collection till new collect
1067    local keepmatch = nil  -- we remember the last one .. drawback: no collection till new collect
1068
1069    if directives then
1070        directives.register("xml.path.keeplastmatch",function(v)
1071            keepmatch = v
1072            lastmatch = nil
1073        end)
1074    end
1075
1076    apply_axis["last-match"] = function()
1077        return lastmatch or { }
1078    end
1079
1080    local function profiled_apply(list,parsed,nofparsed,order)
1081        local p = profiled[parsed.pattern]
1082        if p then
1083            p.tested = p.tested + 1
1084        else
1085            p = { tested = 1, matched = 0, finalized = 0 }
1086            profiled[parsed.pattern] = p
1087        end
1088        local collected = list
1089        for i=1,nofparsed do
1090            local pi = parsed[i]
1091            local kind = pi.kind
1092            if kind == "axis" then
1093                collected = apply_axis[pi.axis](collected)
1094            elseif kind == "nodes" then
1095                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1096            elseif kind == "expression" then
1097                collected = apply_expression(collected,pi.evaluator,order)
1098            elseif kind == "selector" then
1099                collected = apply_selector(collected,pi.specification)
1100            elseif kind == "finalizer" then
1101                collected = pi.finalizer(collected) -- no check on # here
1102                p.matched = p.matched + 1
1103                p.finalized = p.finalized + 1
1104                return collected
1105            end
1106            if not collected or #collected == 0 then
1107                local pn = i < nofparsed and parsed[nofparsed]
1108                if pn and pn.kind == "finalizer" then
1109                    collected = pn.finalizer(collected) -- collected can be nil
1110                    p.finalized = p.finalized + 1
1111                    return collected
1112                end
1113                return nil
1114            end
1115        end
1116        if collected then
1117            p.matched = p.matched + 1
1118        end
1119        return collected
1120    end
1121
1122    local function traced_apply(list,parsed,nofparsed,order)
1123        if trace_lparse then
1124            lshow(parsed)
1125        end
1126        report_lpath("collecting: %s",parsed.pattern)
1127        report_lpath("root tags : %s",tagstostring(list))
1128        report_lpath("order     : %s",order or "unset")
1129        local collected = list
1130        for i=1,nofparsed do
1131            local pi = parsed[i]
1132            local kind = pi.kind
1133            if kind == "axis" then
1134                collected = apply_axis[pi.axis](collected)
1135                report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
1136            elseif kind == "nodes" then
1137                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1138                report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
1139            elseif kind == "expression" then
1140                collected = apply_expression(collected,pi.evaluator,order)
1141                report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
1142            elseif kind == "selector" then
1143                collected = apply_selector(collected,pi.specification)
1144                report_lpath("% 10i : se : %s ",(collected and #collected) or 0,pi.specification)
1145            elseif kind == "finalizer" then
1146                collected = pi.finalizer(collected)
1147                report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
1148                return collected
1149            end
1150            if not collected or #collected == 0 then
1151                local pn = i < nofparsed and parsed[nofparsed]
1152                if pn and pn.kind == "finalizer" then
1153                    collected = pn.finalizer(collected)
1154                    report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
1155                    return collected
1156                end
1157                return nil
1158            end
1159        end
1160        return collected
1161    end
1162
1163    local function normal_apply(list,parsed,nofparsed,order)
1164        local collected = list
1165        for i=1,nofparsed do
1166            local pi = parsed[i]
1167            local kind = pi.kind
1168            if kind == "axis" then
1169                local axis = pi.axis
1170                if axis ~= "self" then
1171                    collected = apply_axis[axis](collected)
1172                end
1173            elseif kind == "nodes" then
1174                collected = apply_nodes(collected,pi.nodetest,pi.nodes)
1175            elseif kind == "expression" then
1176                collected = apply_expression(collected,pi.evaluator,order)
1177            elseif kind == "selector" then
1178                collected = apply_selector(collected,pi.specification)
1179            elseif kind == "finalizer" then
1180                return pi.finalizer(collected)
1181            end
1182            if not collected or #collected == 0 then
1183                local pf = i < nofparsed and parsed[nofparsed].finalizer
1184                if pf then
1185                    return pf(collected) -- can be anything
1186                end
1187                return nil
1188            end
1189        end
1190        return collected
1191    end
1192
1193    local apply = normal_apply
1194
1195    if trackers then
1196     -- local function check()
1197     --     if trace_lprofile or then
1198     --         apply = profiled_apply
1199     --     elseif trace_lpath then
1200     --         apply = traced_apply
1201     --     else
1202     --         apply = normal_apply
1203     --     end
1204     -- end
1205     -- trackers.register("xml.path",   check) -- can be "xml.path,xml.parse,xml.profile
1206     -- trackers.register("xml.parse",  check)
1207     -- trackers.register("xml.profile",check)
1208
1209        trackers.register("xml.path,xml.parse,xml.profile",function()
1210            if trace_lprofile then
1211                apply = profiled_apply
1212            elseif trace_lpath then
1213                apply = traced_apply
1214            else
1215                apply = normal_apply
1216            end
1217        end)
1218    end
1219
1220
1221    function xml.applylpath(list,pattern)
1222        if not list then
1223            lastmatch = nil
1224            return
1225        end
1226        local parsed = cache[pattern]
1227        if parsed then
1228            lpathcalls  = lpathcalls + 1
1229            lpathcached = lpathcached + 1
1230        elseif type(pattern) == "table" then
1231            lpathcalls = lpathcalls + 1
1232            parsed = pattern
1233        else
1234            parsed = lpath(pattern) or pattern
1235        end
1236        if not parsed then
1237            lastmatch = nil
1238            return
1239        end
1240        local nofparsed = #parsed
1241        if nofparsed == 0 then
1242            lastmatch = nil
1243            return -- something is wrong
1244        end
1245        local collected = apply({ list },parsed,nofparsed,list.mi)
1246        lastmatch = keepmatch and collected or nil
1247        return collected
1248    end
1249
1250    function xml.lastmatch()
1251        return lastmatch
1252    end
1253
1254    local stack  = { }
1255
1256    function xml.pushmatch()
1257        insert(stack,lastmatch)
1258    end
1259
1260    function xml.popmatch()
1261        lastmatch = remove(stack)
1262    end
1263
1264end
1265
1266local applylpath = xml.applylpath
1267
1268-- This is the main filter function. It returns whatever is asked for.
1269
1270function xml.filter(root,pattern) -- no longer funny attribute handling here
1271    return applylpath(root,pattern)
1272end
1273
1274-- internal (parsed)
1275
1276expressions.child = function(e,pattern)
1277    return applylpath(e,pattern) -- todo: cache
1278end
1279
1280expressions.count = function(e,pattern) -- what if pattern == empty or nil
1281    local collected = applylpath(e,pattern) -- todo: cache
1282    return pattern and (collected and #collected) or 0
1283end
1284
1285expressions.attribute = function(e,name,value)
1286    if type(e) == "table" and name then
1287        local a = e.at
1288        if a then
1289            local v = a[name]
1290            if value then
1291                return v == value
1292            else
1293                return v
1294            end
1295        end
1296    end
1297    return nil
1298end
1299
1300-- external
1301
1302-- expressions.oneof = function(s,...)
1303--     local t = {...}
1304--     for i=1,#t do
1305--         if s == t[i] then
1306--             return true
1307--         end
1308--     end
1309--     return false
1310-- end
1311
1312-- could be a hashed hash
1313
1314expressions.oneof = function(s,...)
1315    for i=1,select("#",...) do
1316        if s == select(i,...) then
1317            return true
1318        end
1319    end
1320    return false
1321end
1322
1323expressions.error = function(str)
1324    xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
1325    return false
1326end
1327
1328expressions.undefined = function(s)
1329    return s == nil
1330end
1331
1332expressions.quit = function(s)
1333    if s or s == nil then
1334        quit_expression = true
1335    end
1336    return true
1337end
1338
1339expressions.print = function(...)
1340    print(...)
1341    return true
1342end
1343
1344expressions.find = function(str,...)
1345    return str and find(str,...)
1346end
1347
1348expressions.upper = function(str) return str and upper(str) or "" end -- todo: utf
1349expressions.lower = function(str) return str and lower(str) or "" end -- todo: utf
1350
1351expressions.number  = tonumber
1352expressions.boolean = toboolean
1353
1354function expressions.contains(str,pattern)
1355    local t = type(str)
1356    if t == "string" then
1357        if find(str,pattern) then
1358            return true
1359        end
1360    elseif t == "table" then
1361        for i=1,#str do
1362            local d = str[i]
1363            if type(d) == "string" and find(d,pattern) then
1364                return true
1365            end
1366        end
1367    end
1368    return false
1369end
1370
1371function expressions.idstring(str)
1372    return type(str) == "string" and gsub(str,"^#","") or ""
1373end
1374
1375-- user interface
1376
1377local function traverse(root,pattern,handle)
1378 -- report_lpath("use 'xml.selection' instead for pattern: %s",pattern)
1379    local collected = applylpath(root,pattern)
1380    if collected then
1381        for c=1,#collected do
1382            local e = collected[c]
1383            local r = e.__p__
1384            handle(r,r.dt,e.ni)
1385        end
1386    end
1387end
1388
1389local function selection(root,pattern,handle)
1390    local collected = applylpath(root,pattern)
1391    if collected then
1392        if handle then
1393            for c=1,#collected do
1394                handle(collected[c])
1395            end
1396        else
1397            return collected
1398        end
1399    end
1400end
1401
1402xml.traverse      = traverse           -- old method, r, d, k
1403xml.selection     = selection          -- new method, simple handle
1404
1405--~ function xml.cachedpatterns()
1406--~     return cache
1407--~ end
1408
1409-- generic function finalizer (independant namespace)
1410
1411local function dofunction(collected,fnc,...)
1412    if collected then
1413        local f = functions[fnc]
1414        if f then
1415            for c=1,#collected do
1416                f(collected[c],...)
1417            end
1418        else
1419            report_lpath("unknown function %a",fnc)
1420        end
1421    end
1422end
1423
1424finalizers.xml["function"] = dofunction
1425finalizers.tex["function"] = dofunction
1426
1427-- functions
1428
1429expressions.text = function(e,n)
1430    local rdt = e.__p__.dt
1431    return rdt and rdt[n] or ""
1432end
1433
1434expressions.name = function(e,n) -- ns + tg
1435    local found = false
1436    n = tonumber(n) or 0
1437    if n == 0 then
1438        found = type(e) == "table" and e
1439    elseif n < 0 then
1440        local d = e.__p__.dt
1441        local k = e.ni
1442        for i=k-1,1,-1 do
1443            local di = d[i]
1444            if type(di) == "table" then
1445                if n == -1 then
1446                    found = di
1447                    break
1448                else
1449                    n = n + 1
1450                end
1451            end
1452        end
1453    else
1454        local d = e.__p__.dt
1455        local k = e.ni
1456        for i=k+1,#d,1 do
1457            local di = d[i]
1458            if type(di) == "table" then
1459                if n == 1 then
1460                    found = di
1461                    break
1462                else
1463                    n = n - 1
1464                end
1465            end
1466        end
1467    end
1468    if found then
1469        local ns = found.rn or found.ns or ""
1470        local tg = found.tg
1471        if ns ~= "" then
1472            return ns .. ":" .. tg
1473        else
1474            return tg
1475        end
1476    else
1477        return ""
1478    end
1479end
1480
1481expressions.tag = function(e,n) -- only tg
1482    if not e then
1483        return ""
1484    else
1485        local found = false
1486        n = tonumber(n) or 0
1487        if n == 0 then
1488            found = (type(e) == "table") and e -- seems to fail
1489        elseif n < 0 then
1490            local d = e.__p__.dt
1491            local k = e.ni
1492            for i=k-1,1,-1 do
1493                local di = d[i]
1494                if type(di) == "table" then
1495                    if n == -1 then
1496                        found = di
1497                        break
1498                    else
1499                        n = n + 1
1500                    end
1501                end
1502            end
1503        else
1504            local d = e.__p__.dt
1505            local k = e.ni
1506            for i=k+1,#d,1 do
1507                local di = d[i]
1508                if type(di) == "table" then
1509                    if n == 1 then
1510                        found = di
1511                        break
1512                    else
1513                        n = n - 1
1514                    end
1515                end
1516            end
1517        end
1518        return (found and found.tg) or ""
1519    end
1520end
1521
1522-- Often using an iterators looks nicer in the code than passing handler functions.
1523-- The LUA book describes how to use coroutines for that purpose
1524-- 'href="http://www.lua.org/pil/9.3.html"'. This permits code like:
1525--
1526--   for r, d, k in xml.elements(xml.load('text.xml'),"title") do
1527--       print(d[k]) -- old method
1528--   end
1529--   for e in xml.collected(xml.load('text.xml'),"title") do
1530--       print(e) -- new one
1531--   end
1532
1533-- local wrap, yield = coroutine.wrap, coroutine.yield
1534-- local dummy = function() end
1535--
1536-- function xml.elements(root,pattern,reverse) -- r, d, k
1537--     local collected = applylpath(root,pattern)
1538--     if collected then
1539--         if reverse then
1540--             return wrap(function() for c=#collected,1,-1 do
1541--                 local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
1542--             end end)
1543--         else
1544--             return wrap(function() for c=1,#collected    do
1545--                 local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
1546--             end end)
1547--         end
1548--     end
1549--     return wrap(dummy)
1550-- end
1551--
1552-- function xml.collected(root,pattern,reverse) -- e
1553--     local collected = applylpath(root,pattern)
1554--     if collected then
1555--         if reverse then
1556--             return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
1557--         else
1558--             return wrap(function() for c=1,#collected    do yield(collected[c]) end end)
1559--         end
1560--     end
1561--     return wrap(dummy)
1562-- end
1563
1564-- faster:
1565
1566local dummy = function() end
1567
1568function xml.elements(root,pattern,reverse) -- r, d, k
1569    local collected = applylpath(root,pattern)
1570    if not collected then
1571        return dummy
1572    end
1573    local n = #collected
1574    if n == 0 then
1575        return dummy
1576    end
1577    if reverse then
1578        local c = n + 1
1579        return function()
1580            if c > 1 then
1581                c = c - 1
1582                local e = collected[c]
1583                local r = e.__p__
1584                return r, r.dt, e.ni
1585            end
1586        end
1587    else
1588        local c = 0
1589        return function()
1590            if c < n then
1591                c = c + 1
1592                local e = collected[c]
1593                local r = e.__p__
1594                return r, r.dt, e.ni
1595            end
1596        end
1597    end
1598end
1599
1600function xml.collected(root,pattern,reverse) -- e
1601    local collected = applylpath(root,pattern)
1602    if not collected then
1603        return dummy
1604    end
1605    local n = #collected
1606    if n == 0 then
1607        return dummy
1608    end
1609    if reverse then
1610        local c = n + 1
1611        return function()
1612            if c > 1 then
1613                c = c - 1
1614                return collected[c]
1615            end
1616        end
1617    else
1618        local c = 0
1619        return function()
1620            if c < n then
1621                c = c + 1
1622                return collected[c]
1623            end
1624        end
1625    end
1626end
1627
1628-- handy
1629
1630function xml.inspect(collection,pattern)
1631    pattern = pattern or "."
1632    for e in xml.collected(collection,pattern or ".") do
1633        report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e))
1634    end
1635end
1636
1637-- texy (see xfdf):
1638
1639local function split(e) -- todo: use helpers / lpeg
1640    local dt = e.dt
1641    if dt then
1642        for i=1,#dt do
1643            local dti = dt[i]
1644            if type(dti) == "string" then
1645                dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1")
1646                dti = gsub(dti,"[\n\r]+","\n\n")
1647                dt[i] = dti
1648            else
1649                split(dti)
1650            end
1651        end
1652    end
1653    return e
1654end
1655
1656function xml.finalizers.paragraphs(c)
1657    for i=1,#c do
1658        split(c[i])
1659    end
1660    return c
1661end
1662
1663-- local lpegmatch = lpeg.match
1664-- local w = lpeg.patterns.whitespace
1665-- local p = w^0 * lpeg.Cf(lpeg.Ct("") * lpeg.Cg(lpeg.C((1-w)^1) * lpeg.Cc(true) * w^0)^1,rawset)
1666
1667-- function xml.functions.classes(e,class) -- cache
1668--     class = class and e.at[class] or e.at.class
1669--     if class then
1670--         return lpegmatch(p,class)
1671--     else
1672--         return { }
1673--     end
1674-- end
1675
1676-- local gmatch = string.gmatch
1677
1678-- function xml.functions.hasclass(e,c,class)
1679--     class = class and e.at[class] or e.at.class
1680--     if class and class ~= "" then
1681--         if class == c then
1682--             return true
1683--         else
1684--             for s in gmatch(class,"%S+") do
1685--                 if s == c then
1686--                     return true
1687--                 end
1688--             end
1689--         end
1690--     end
1691--     return false
1692-- end
1693