lxml-mis.lua /size: 3542 b    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['lxml-mis'] = {
2    version   = 1.001,
3    comment   = "this module is the basis for the lxml-* ones",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local xml, lpeg, string = xml, lpeg, string
10
11local type = type
12local concat = table.concat
13local format, gsub, match = string.format, string.gsub, string.match
14local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
15local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
16
17lpegpatterns.xml  = lpegpatterns.xml or { }
18local xmlpatterns = lpegpatterns.xml
19
20-- The following helper functions best belong to the 'lxml-ini' module. Some are
21-- here because we need then in the 'mk' document and other manuals, others came up
22-- when playing with this module. Since this module is also used in 'mtxrun' we've
23-- put them here instead of loading mode modules there then needed.
24
25local function xmlgsub(t,old,new) -- will be replaced
26    local dt = t.dt
27    if dt then
28        for k=1,#dt do
29            local v = dt[k]
30            if type(v) == "string" then
31                dt[k] = gsub(v,old,new)
32            else
33                xmlgsub(v,old,new)
34            end
35        end
36    end
37end
38
39-- xml.gsub = xmlgsub
40
41function xml.stripleadingspaces(dk,d,k) -- cosmetic, for manual
42    if d and k then
43        local dkm = d[k-1]
44        if dkm and type(dkm) == "string" then
45            local s = match(dkm,"\n(%s+)")
46            xmlgsub(dk,"\n"..rep(" ",#s),"\n")
47        end
48    end
49end
50
51-- xml.escapes   = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
52-- xml.unescapes = { } for k,v in next, xml.escapes do xml.unescapes[v] = k end
53
54-- function xml.escaped  (str) return (gsub(str,"(.)"   , xml.escapes  )) end
55-- function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
56-- function xml.cleansed (str) return (gsub(str,"<.->"  , ''           )) end -- "%b<>"
57
58-- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
59--
60-- 1021:0335:0287:0247
61
62-- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
63--
64-- 1559:0257:0288:0190 (last one suggested by roberto)
65
66----- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
67----- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
68local normal  = (1 - S("<&>"))^0
69local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
70local escaped = Cs(normal * (special * normal)^0)
71
72-- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
73
74local normal    = (1 - S"&")^0
75local special   = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
76local unescaped = Cs(normal * (special * normal)^0)
77
78-- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
79
80local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
81
82xmlpatterns.escaped   = escaped
83xmlpatterns.unescaped = unescaped
84xmlpatterns.cleansed  = cleansed
85
86function xml.escaped  (str) return lpegmatch(escaped,str)   end
87function xml.unescaped(str) return lpegmatch(unescaped,str) end
88function xml.cleansed (str) return lpegmatch(cleansed,str)  end
89
90-- this might move
91
92function xml.fillin(root,pattern,str,check)
93    local e = xml.first(root,pattern)
94    if e then
95        local n = #e.dt
96        if not check or n == 0 or (n == 1 and e.dt[1] == "") then
97            e.dt = { str }
98        end
99    end
100end
101