1if not modules then modules = { } end modules ['s-xml-analyzers'] = {
2 version = 1.001,
3 comment = "companion to s-xml-analyzers.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9moduledata.xml = moduledata.xml or { }
10moduledata.xml.analyzers = moduledata.xml.analyzers or { }
11
12local next, type = next, type
13local utfvalues = string.utfvalues
14local formatters = string.formatters
15local setmetatableindex = table.setmetatableindex
16local context = context
17local NC, NR, HL, FL, LL, SL, TB = context.NC, context.NR, context.HL, context.TB, context.FL, context.LL, context.SL
18local sortedhash, sortedkeys, concat, sequenced = table.sortedhash, table.sortedkeys, table.concat, table.sequenced
19
20local chardata = characters.data
21
22local tags = { }
23local char = { }
24local attr = { }
25local ents = { }
26local name = nil
27
28local function analyze(filename)
29
30 if type(filename) == "string" then
31 filename = { filename }
32 end
33
34 table.sort(filename)
35
36 local hash = concat(filename,"|")
37
38 if hash == name then
39 return
40 end
41
42 name = hash
43 tags = { }
44 char = { }
45 attr = { }
46 ents = { }
47
48 local function att(t,k)
49 local v = setmetatableindex("number")
50 t[k] = v
51 return v
52 end
53
54 local function add(t,k)
55 local v = {
56 n = 0,
57 attributes = setmetatableindex(att),
58 children = setmetatableindex(add),
59 }
60 t[k] = v
61 return v
62 end
63
64 setmetatableindex(tags,add)
65
66 setmetatableindex(ents,"number")
67 setmetatableindex(char,"number")
68
69 setmetatableindex(attr,function(t,k)
70 char[k] = char[k] or 0
71 t[k] = 0
72 return 0
73 end)
74
75 local function collect(e,parent)
76 local dt = e.dt
77 if e.special then
78 if dt then
79 for i=1,#dt do
80 local d = dt[i]
81 if type(d) == "table" then
82 collect(d,tg)
83 end
84 end
85 end
86 else
87 local at = e.at
88 local tg = e.tg
89 local tag = tags[tg]
90 tag.n = tag.n + 1
91 local children = parent and tags[parent].children[tg]
92 local childatt = children and children.attributes
93 if children then
94 children.n = children.n + 1
95 end
96 if at then
97 local attributes = tag.attributes
98 for k, v in next, at do
99 local a = attributes[k]
100 a[v] = a[v] + 1
101 if childatt then
102 local a = childatt[k]
103 a[v] = a[v] + 1
104 end
105 for s in utfvalues(v) do
106 attr[s] = attr[s] + 1
107 end
108 end
109 end
110 if dt then
111 for i=1,#dt do
112 local d = dt[i]
113 if type(d) == "table" then
114 collect(d,tg)
115 else
116 for s in utfvalues(d) do
117 char[s] = char[s] + 1
118 end
119 end
120 end
121 end
122 end
123 end
124
125 for i=1,#filename do
126 local name = filename[i]
127 local root = xml.load(name)
128
129 logs.report("xml analyze","loaded: %s",name)
130
131 collect(root)
132
133 local names = root.statistics.entities.names
134 for n in next, names do
135 ents[n] = ents[n] + 1
136 end
137 end
138
139 setmetatableindex(tags,nil)
140 setmetatableindex(char,nil)
141 setmetatableindex(attr,nil)
142 setmetatableindex(ents,nil)
143
144end
145
146moduledata.xml.analyzers.maxnofattributes = 100
147
148function moduledata.xml.analyzers.structure(filename)
149 analyze(filename)
150 local done = false
151 local maxnofattributes = tonumber(moduledata.xml.analyzers.maxnofattributes) or 100
152 context.starttabulate { "|l|pA{nothyphenated,flushleft,verytolerant,stretch,broad}|" }
153 for name, data in table.sortedhash(tags) do
154 if done then
155 context.TB()
156 else
157 done = true
158 end
159 local children = data.children
160 local attributes = data.attributes
161 NC() context.bold("element") NC() context.darkred(name) NC() NR()
162 NC() context.bold("frequency") NC() context(data.n) NC() NR()
163 if next(children) then
164 local t = { }
165 for k, v in next, children do
166 t[k] = v.n
167 end
168 NC() context.bold("children") NC() context.puretext(sequenced(t)) NC() NR()
169 end
170 if next(attributes) then
171 NC() context.bold("attributes") NC() context.puretext.darkgreen(concat(sortedkeys(attributes)," ")) NC() NR()
172 for attribute, values in sortedhash(attributes) do
173 local n = table.count(values)
174 if attribute == "id" or attribute == "xml:id" or n > maxnofattributes then
175 NC() context("@%s",attribute) NC() context("%s different values",n) NC() NR()
176 else
177 NC() context("@%s",attribute) NC() context.puretext(sequenced(values)) NC() NR()
178 end
179 end
180 end
181 end
182 context.stoptabulate()
183end
184
185function moduledata.xml.analyzers.characters(filename)
186 analyze(filename)
187 context.starttabulate { "|r|r|l|c|l|" }
188 for c, n in table.sortedhash(char) do
189 NC() context.darkred("%s",n)
190 NC() context.darkgreen("%s",attr[c])
191 NC() context("%U",c)
192 NC() context.char(c)
193 NC() context("%s",chardata[c].description)
194 NC() NR()
195 end
196 context.stoptabulate()
197end
198
199function moduledata.xml.analyzers.entities(filename)
200 analyze(filename)
201 context.starttabulate { "|l|r|" }
202 for e, n in table.sortedhash(ents) do
203 NC() context(e)
204 NC() context(n)
205 NC() NR()
206 end
207 context.stoptabulate()
208end
209
210local f_parent_s = formatters["xml:%s"]
211local f_parent_n = formatters["\\startxmlsetups xml:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
212local f_parent_a = formatters["\\startxmlsetups xml:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
213local f_child_s = formatters["xml:%s:%s"]
214local f_child_n = formatters["\\startxmlsetups xml:%s:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
215local f_child_a = formatters["\\startxmlsetups xml:%s:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
216
217local f_template = formatters [ [[
218%% file: %s
219
220%% Beware, these are all (first level) setups. If you have a complex document
221%% it often makes sense to use \\xmlfilter or similar local filter options.
222
223%% presets
224
225\startxmlsetup xml:presets:all
226 \xmlsetsetups {#1} {
227 %s
228 }
229\stopxmlsetups
230
231%% setups
232
233\xmlregistersetup{xml:presets:all}
234
235\starttext
236 \xmlprocessfile{main}{somefile.xml}{}
237\stoptext
238
239%s
240]] ]
241
242function moduledata.xml.analyzers.allsetups(filename,usedname)
243 analyze(filename)
244 local result = { }
245 local setups = { }
246 for name, data in table.sortedhash(tags) do
247 local children = data.children
248 local attributes = data.attributes
249 if next(attributes) then
250 result[#result+1] = f_parent_a(name,sortedkeys(attributes))
251 else
252 result[#result+1] = f_parent_n(name)
253 end
254 setups[#setups+1] = f_parent_s(name)
255 if next(children) then
256 for k, v in sortedhash(children) do
257 local attributes = v.attributes
258 if next(attributes) then
259 result[#result+1] = f_child_a(name,k,sortedkeys(attributes))
260 else
261 result[#result+1] = f_child_n(name,k)
262 end
263 setups[#setups+1] = f_child_s(name,k)
264 end
265 end
266 end
267 table.sort(setups)
268
269 if type(filename) == "table" then
270 filename = concat(filename," | ")
271 end
272
273 usedname = usedname or "xml-analyze-template.tex"
274
275 io.savedata(usedname,f_template(filename,concat(setups,"|\n "),concat(result,"\n\n")))
276 logs.report("xml analyze","presets saved in: %s",usedname)
277end
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334 |