1if not modules then modules = { } end modules ['s-xml-analyzers'] = {
2 version = 1.001,
3 comment = "companion to s-xml-analyzers.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9moduledata.xml = moduledata.xml or { }
10moduledata.xml.analyzers = moduledata.xml.analyzers or { }
11
12local next, type = next, type
13local utfvalues = string.utfvalues
14local formatters = string.formatters
15local setmetatableindex = table.setmetatableindex
16local context = context
17local ctxescaped = context.ctxescaped
18local NC, NR, HL, FL, LL, SL, TB = context.NC, context.NR, context.HL, context.TB, context.FL, context.LL, context.SL
19local sortedhash, sortedkeys, concat, sequenced = table.sortedhash, table.sortedkeys, table.concat, table.sequenced
20
21local chardata = characters.data
22
23local tags = { }
24local char = { }
25local attr = { }
26local ents = { }
27local name = nil
28
29local function analyze(filename)
30
31 if type(filename) == "string" then
32 filename = { filename }
33 end
34
35 table.sort(filename)
36
37 local hash = concat(filename,"|")
38
39 if hash == name then
40 return
41 end
42
43 name = hash
44 tags = { }
45 char = { }
46 attr = { }
47 ents = { }
48
49 local function att(t,k)
50 local v = setmetatableindex("number")
51 t[k] = v
52 return v
53 end
54
55 local function add(t,k)
56 local v = {
57 n = 0,
58 attributes = setmetatableindex(att),
59 children = setmetatableindex(add),
60 }
61 t[k] = v
62 return v
63 end
64
65 setmetatableindex(tags,add)
66
67 setmetatableindex(ents,"number")
68 setmetatableindex(char,"number")
69
70 setmetatableindex(attr,function(t,k)
71 char[k] = char[k] or 0
72 t[k] = 0
73 return 0
74 end)
75
76 local function collect(e,parent)
77 local dt = e.dt
78 if e.special then
79 if dt then
80 for i=1,#dt do
81 local d = dt[i]
82 if type(d) == "table" then
83 collect(d,tg)
84 end
85 end
86 end
87 else
88 local at = e.at
89 local tg = e.tg
90 local tag = tags[tg]
91 tag.n = tag.n + 1
92 local children = parent and tags[parent].children[tg]
93 local childatt = children and children.attributes
94 if children then
95 children.n = children.n + 1
96 end
97 if at then
98 local attributes = tag.attributes
99 for k, v in next, at do
100 local a = attributes[k]
101 a[v] = a[v] + 1
102 if childatt then
103 local a = childatt[k]
104 a[v] = a[v] + 1
105 end
106 for s in utfvalues(v) do
107 attr[s] = attr[s] + 1
108 end
109 end
110 end
111 if dt then
112 for i=1,#dt do
113 local d = dt[i]
114 if type(d) == "table" then
115 collect(d,tg)
116 else
117 for s in utfvalues(d) do
118 char[s] = char[s] + 1
119 end
120 end
121 end
122 end
123 end
124 end
125
126 for i=1,#filename do
127 local name = filename[i]
128 local root = xml.load(name)
129
130 logs.report("xml analyze","loaded: %s",name)
131
132 collect(root)
133
134 local names = root.statistics.entities.names
135 for n in next, names do
136 ents[n] = ents[n] + 1
137 end
138 end
139
140 setmetatableindex(tags,nil)
141 setmetatableindex(char,nil)
142 setmetatableindex(attr,nil)
143 setmetatableindex(ents,nil)
144
145end
146
147moduledata.xml.analyzers.maxnofattributes = 100
148
149function moduledata.xml.analyzers.structure(filename)
150 analyze(filename)
151 local done = false
152 local maxnofattributes = tonumber(moduledata.xml.analyzers.maxnofattributes) or 100
153 context.starttabulate { "|l|pA{nothyphenated,flushleft,verytolerant,stretch,broad}|" }
154 for name, data in table.sortedhash(tags) do
155 if done then
156 context.TB()
157 else
158 done = true
159 end
160 local children = data.children
161 local attributes = data.attributes
162 NC() context.bold("element")
163 NC() context.darkred(name)
164 NC() NR()
165 NC() context.bold("frequency")
166 NC() context(data.n)
167 NC() NR()
168 if next(children) then
169 local t = { }
170 for k, v in next, children do
171 t[k] = v.n
172 end
173 NC() context.bold("children") NC() context.puretext(sequenced(t)) NC() NR()
174 end
175 if next(attributes) then
176 NC() context.bold("attributes") NC() context.puretext.darkgreen(concat(sortedkeys(attributes)," ")) NC() NR()
177 for attribute, values in sortedhash(attributes) do
178 local n = table.count(values)
179 if attribute == "id" or attribute == "xml:id" or n > maxnofattributes then
180 NC() context("@%s",attribute) NC() context("%s different values",n) NC() NR()
181 else
182 NC() context("@%s",attribute) NC() context.puretext(sequenced(values)) NC() NR()
183 end
184 end
185 end
186 end
187 context.stoptabulate()
188end
189
190function moduledata.xml.analyzers.characters(filename)
191 analyze(filename)
192 context.starttabulate { "|r|r|l|c|l|" }
193 for c, n in table.sortedhash(char) do
194 NC() context.color({ "darkred" }, n)
195 NC() context.color({ "darkgreen" }, attr[c] or "")
196 NC() context("%U",c)
197 NC() context.char(c)
198 NC() context("%s",chardata[c].description)
199 NC() NR()
200 end
201 context.stoptabulate()
202end
203
204function moduledata.xml.analyzers.entities(filename)
205 analyze(filename)
206 context.starttabulate { "|l|r|" }
207 for e, n in table.sortedhash(ents) do
208 NC() context(e)
209 NC() context(n)
210 NC() NR()
211 end
212 context.stoptabulate()
213end
214
215local f_parent_s = formatters["xml:%s"]
216local f_parent_n = formatters["\\startxmlsetups xml:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
217local f_parent_a = formatters["\\startxmlsetups xml:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
218local f_child_s = formatters["xml:%s:%s"]
219local f_child_n = formatters["\\startxmlsetups xml:%s:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
220local f_child_a = formatters["\\startxmlsetups xml:%s:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
221
222local f_template = formatters [ [[
223%% file: %s
224
225%% Beware, these are all (first level) setups. If you have a complex document
226%% it often makes sense to use \\xmlfilter or similar local filter options.
227
228%% presets
229
230\startxmlsetups xml:presets:all
231 \xmlsetsetup {#1} {
232 %s
233 }
234\stopxmlsetups
235
236%% setups
237
238\xmlregistersetup{xml:presets:all}
239
240\starttext
241 \xmlprocessfile{main}{somefile.xml}{}
242\stoptext
243
244%s
245]] ]
246
247function moduledata.xml.analyzers.allsetups(filename,usedname)
248 analyze(filename)
249 local result = { }
250 local setups = { }
251 for name, data in table.sortedhash(tags) do
252 local children = data.children
253 local attributes = data.attributes
254 if next(attributes) then
255 result[#result+1] = f_parent_a(name,sortedkeys(attributes))
256 else
257 result[#result+1] = f_parent_n(name)
258 end
259 setups[#setups+1] = f_parent_s(name)
260 if next(children) then
261 for k, v in sortedhash(children) do
262 local attributes = v.attributes
263 if next(attributes) then
264 result[#result+1] = f_child_a(name,k,sortedkeys(attributes))
265 else
266 result[#result+1] = f_child_n(name,k)
267 end
268 setups[#setups+1] = f_child_s(name,k)
269 end
270 end
271 end
272 table.sort(setups)
273
274 if type(filename) == "table" then
275 filename = concat(filename," | ")
276 end
277
278 usedname = usedname or "xml-analyze-template.tex"
279
280 io.savedata(usedname,f_template(filename,concat(setups,"|\n "),concat(result,"\n\n")))
281 logs.report("xml analyze","presets saved in: %s",usedname)
282end
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339 |