1if not modules then modules = { } end modules ['mtx-pdf'] = {
2 version = 1.001,
3 comment = "companion to mtxrun.lua",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local tonumber = tonumber
10local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
11local utfchar = utf.char
12local concat = table.concat
13local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19 <entry name="name">mtx-pdf</entry>
20 <entry name="detail">ConTeXt PDF Helpers</entry>
21 <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24 <category name="basic">
25 <subcategory>
26 <flag name="info"><short>show some info about the given file</short></flag>
27 <flag name="metadata"><short>show metadata xml blob</short></flag>
28 <flag name="pretty"><short>replace newlines in metadata</short></flag>
29 <flag name="fonts"><short>show used fonts (<ref name="detail)"/></short></flag>
30 <flag name="object"><short>show object"/></short></flag>
31 </subcategory>
32 <subcategory>
33 <example><command>mtxrun --script pdf --info foo.pdf</command></example>
34 <example><command>mtxrun --script pdf --metadata foo.pdf</command></example>
35 <example><command>mtxrun --script pdf --metadata --pretty foo.pdf</command></example>
36 <example><command>mtxrun --script pdf --stream=4 foo.pdf</command></example>
37 </subcategory>
38 </category>
39 </flags>
40</application>
41]]
42
43local application = logs.application {
44 name = "mtx-pdf",
45 banner = "ConTeXt PDF Helpers 0.10",
46 helpinfo = helpinfo,
47}
48
49local report = application.report
50
51if not pdfe then
52 dofile(resolvers.findfile("lpdf-epd.lua","tex"))
53elseif CONTEXTLMTXMODE then
54 dofile(resolvers.findfile("util-dim.lua","tex"))
55 dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
56 dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
57else
58 dofile(resolvers.findfile("lpdf-pde.lua","tex"))
59end
60
61scripts = scripts or { }
62scripts.pdf = scripts.pdf or { }
63
64local details = environment.argument("detail") or environment.argument("details")
65
66local function loadpdffile(filename)
67 if not filename or filename == "" then
68 report("no filename given")
69 elseif not lfs.isfile(filename) then
70 report("unknown file '%s'",filename)
71 else
72 local pdffile = lpdf.epdf.load(filename)
73 if pdffile then
74 return pdffile
75 else
76 report("no valid pdf file '%s'",filename)
77 end
78 end
79end
80
81function scripts.pdf.info(filename)
82 local pdffile = loadpdffile(filename)
83 if pdffile then
84 local catalog = pdffile.Catalog
85 local info = pdffile.Info
86 local pages = pdffile.pages
87 local nofpages = pdffile.nofpages
88
89 local unset = "<unset>"
90
91 report("%-17s > %s","filename", filename)
92 report("%-17s > %s","pdf version", catalog.Version or unset)
93 report("%-17s > %s","major version", pdffile.majorversion or unset)
94 report("%-17s > %s","minor version", pdffile.minorversion or unset)
95 report("%-17s > %s","number of pages", nofpages or 0)
96 report("%-17s > %s","title", info.Title or unset)
97 report("%-17s > %s","creator", info.Creator or unset)
98 report("%-17s > %s","producer", info.Producer or unset)
99 report("%-17s > %s","author", info.Author or unset)
100 report("%-17s > %s","creation date", info.CreationDate or unset)
101 report("%-17s > %s","modification date", info.ModDate or unset)
102
103 local function somebox(what)
104 local box = string.lower(what)
105 local width, height, start
106 for i=1, nofpages do
107 local page = pages[i]
108 local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
109 local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
110 if w ~= width or h ~= height then
111 if start then
112 report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
113 end
114 width, height, start = w, h, i
115 end
116 end
117 report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
118 end
119
120 if details then
121 somebox("MediaBox")
122 somebox("ArtBox")
123 somebox("BleedBox")
124 somebox("CropBox")
125 somebox("TrimBox")
126 else
127 somebox("CropBox")
128 end
129
130
131 local annotations = 0
132 for i=1, nofpages do
133 local page = pages[i]
134 local a = page.Annots
135 if a then
136 annotations = annotations + #a
137 end
138 end
139 if annotations > 0 then
140 report("%-17s > %s", "annotations",annotations)
141 end
142
143
144
145 local d = pdffile.destinations
146 local k = d and sortedkeys(d)
147 if k and #k > 0 then
148 report("%-17s > %s", "destinations",#k)
149 end
150 local d = pdffile.javascripts
151 local k = d and sortedkeys(d)
152 if k and #k > 0 then
153 report("%-17s > %s", "javascripts",#k)
154 end
155 local d = pdffile.widgets
156 if d and #d > 0 then
157 report("%-17s > %s", "widgets",#d)
158 end
159 local d = pdffile.embeddedfiles
160 local k = d and sortedkeys(d)
161 if k and #k > 0 then
162 report("%-17s > %s", "embeddedfiles",#k)
163 end
164
165
166 end
167end
168
169function scripts.pdf.metadata(filename,pretty)
170 local pdffile = loadpdffile(filename)
171 if pdffile then
172 local catalog = pdffile.Catalog
173 local metadata = catalog.Metadata
174 if metadata then
175 metadata = metadata()
176 if pretty then
177 metadata = gsub(metadata,"\r","\n")
178 end
179 report("metadata > \n\n%s\n",metadata)
180 else
181 report("no metadata")
182 end
183 end
184end
185
186local expanded = lpdf.epdf.expanded
187
188local function getfonts(pdffile)
189 local usedfonts = { }
190
191 local function collect(where,tag)
192 local resources = where.Resources
193 if resources then
194 local fontlist = resources.Font
195 if fontlist then
196 for k, v in expanded(fontlist) do
197 usedfonts[tag and (tag .. "." .. k) or k] = v
198 end
199 end
200 local objects = resources.XObject
201 if objects then
202 for k, v in expanded(objects) do
203 collect(v,tag and (tag .. "." .. k) or k)
204 end
205 end
206 end
207 end
208
209 for i=1,pdffile.nofpages do
210 collect(pdffile.pages[i])
211 end
212
213 return usedfonts
214end
215
216local function getunicodes(font)
217 local cid = font.ToUnicode
218 if cid then
219 cid = cid()
220 local counts = { }
221 local indices = { }
222
223
224
225
226
227 setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
228 for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
229 for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
230 first = tonumber(first,16)
231 last = tonumber(last,16)
232 offset = tonumber(offset,16)
233 offset = offset - first
234 for i=first,last do
235 local c = i + offset
236 counts[c] = counts[c] + 1
237 indices[i] = true
238 end
239 end
240 end
241 for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
242 for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
243 indices[tonumber(old,16)] = true
244 for n in gmatch(new,"....") do
245 local c = tonumber(n,16)
246 counts[c] = counts[c] + 1
247 end
248 end
249 end
250 return counts, indices
251 end
252end
253
254function scripts.pdf.fonts(filename)
255 local pdffile = loadpdffile(filename)
256 if pdffile then
257 local usedfonts = getfonts(pdffile)
258 local found = { }
259 local common = table.setmetatableindex("table")
260 for k, v in table.sortedhash(usedfonts) do
261 local basefont = v.BaseFont
262 local encoding = v.Encoding
263 local subtype = v.Subtype
264 local unicode = v.ToUnicode
265 local counts,
266 indices = getunicodes(v)
267 local codes = { }
268 local chars = { }
269 local freqs = { }
270 local names = { }
271 if counts then
272 codes = sortedkeys(counts)
273 for i=1,#codes do
274 local k = codes[i]
275 if k > 32 then
276 local c = utfchar(k)
277 chars[i] = c
278 freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c)
279 else
280 freqs[i] = format("U+%05X %s --",k,counts[k] > 1 and "+" or " ")
281 end
282 end
283 if basefont and unicode then
284 local b = gsub(basefont,"^.*%+","")
285 local c = common[b]
286 for k in next, indices do
287 c[k] = true
288 end
289 end
290 for i=1,#codes do
291 codes[i] = format("U+%05X",codes[i])
292 end
293 end
294 local d = encoding and encoding.Differences
295 if d then
296 for i=1,#d do
297 local di = d[i]
298 if type(di) == "string" then
299 names[#names+1] = di
300 end
301 end
302 end
303 found[k] = {
304 basefont = basefont or "no basefont",
305 encoding = (d and "custom n=" .. #d) or "no encoding",
306 subtype = subtype or "no subtype",
307 unicode = tounicode and "unicode" or "no vector",
308 chars = chars,
309 codes = codes,
310 freqs = freqs,
311 names = names,
312 }
313 end
314
315 if details then
316 for k, v in sortedhash(found) do
317 report("id : %s", k)
318 report("basefont : %s", v.basefont)
319 report("encoding : % t", v.names)
320 report("subtype : %s", v.subtype)
321 report("unicode : %s", v.unicode)
322 if #v.chars > 0 then
323 report("characters : % t", v.chars)
324 end
325 if #v.codes > 0 then
326 report("codepoints : % t", v.codes)
327 end
328 report("")
329 end
330 for k, v in sortedhash(common) do
331 report("basefont : %s",k)
332 report("indices : % t", sortedkeys(v))
333 report("")
334 end
335 else
336 local haschar = false
337 for k, v in sortedhash(found) do
338 if #v.chars > 0 then
339 haschar = true
340 break
341 end
342 end
343 local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
344 for k, v in sortedhash(found) do
345 results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, haschar and concat(v.chars," ") or nil }
346 end
347 utilities.formatters.formatcolumns(results)
348 report(results[1])
349 report("")
350 for i=2,#results do
351 report(results[i])
352 end
353 report("")
354 end
355 end
356end
357
358function scripts.pdf.object(filename,n)
359 if n then
360 local pdffile = loadpdffile(filename)
361 if pdffile then
362 print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
363 end
364 end
365end
366
367
368
369
370
371
372local filename = environment.files[1] or ""
373
374if filename == "" then
375 application.help()
376elseif environment.argument("info") then
377 scripts.pdf.info(filename)
378elseif environment.argument("metadata") then
379 scripts.pdf.metadata(filename,environment.argument("pretty"))
380elseif environment.argument("fonts") then
381 scripts.pdf.fonts(filename)
382elseif environment.argument("object") then
383 scripts.pdf.object(filename,tonumber(environment.argument("object")))
384elseif environment.argument("exporthelp") then
385 application.export(environment.argument("exporthelp"),filename)
386else
387 application.help()
388end
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410 |