mtx-pdf.lua /size: 13 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['mtx-pdf'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local tonumber = tonumber
10local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
11local utfchar = utf.char
12local concat = table.concat
13local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19  <entry name="name">mtx-pdf</entry>
20  <entry name="detail">ConTeXt PDF Helpers</entry>
21  <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24  <category name="basic">
25   <subcategory>
26    <flag name="info"><short>show some info about the given file</short></flag>
27    <flag name="metadata"><short>show metadata xml blob</short></flag>
28    <flag name="pretty"><short>replace newlines in metadata</short></flag>
29    <flag name="fonts"><short>show used fonts (<ref name="detail)"/></short></flag>
30    <flag name="object"><short>show object"/></short></flag>
31   </subcategory>
32   <subcategory>
33    <example><command>mtxrun --script pdf --info foo.pdf</command></example>
34    <example><command>mtxrun --script pdf --metadata foo.pdf</command></example>
35    <example><command>mtxrun --script pdf --metadata --pretty foo.pdf</command></example>
36    <example><command>mtxrun --script pdf --stream=4 foo.pdf</command></example>
37   </subcategory>
38  </category>
39 </flags>
40</application>
41]]
42
43local application = logs.application {
44    name     = "mtx-pdf",
45    banner   = "ConTeXt PDF Helpers 0.10",
46    helpinfo = helpinfo,
47}
48
49local report = application.report
50
51if not pdfe then
52    dofile(resolvers.findfile("lpdf-epd.lua","tex"))
53elseif CONTEXTLMTXMODE then
54    dofile(resolvers.findfile("util-dim.lua","tex"))
55    dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
56    dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
57else
58    dofile(resolvers.findfile("lpdf-pde.lua","tex"))
59end
60
61scripts     = scripts     or { }
62scripts.pdf = scripts.pdf or { }
63
64local details = environment.argument("detail") or environment.argument("details")
65
66local function loadpdffile(filename)
67    if not filename or filename == "" then
68        report("no filename given")
69    elseif not lfs.isfile(filename) then
70        report("unknown file '%s'",filename)
71    else
72        local pdffile  = lpdf.epdf.load(filename)
73        if pdffile then
74            return pdffile
75        else
76            report("no valid pdf file '%s'",filename)
77        end
78    end
79end
80
81function scripts.pdf.info(filename)
82    local pdffile = loadpdffile(filename)
83    if pdffile then
84        local catalog      = pdffile.Catalog
85        local info         = pdffile.Info
86        local pages        = pdffile.pages
87        local nofpages     = pdffile.nofpages
88
89        local unset    = "<unset>"
90
91        report("%-17s > %s","filename",          filename)
92        report("%-17s > %s","pdf version",       catalog.Version      or unset)
93        report("%-17s > %s","major version",     pdffile.majorversion or unset)
94        report("%-17s > %s","minor version",     pdffile.minorversion or unset)
95        report("%-17s > %s","number of pages",   nofpages             or 0)
96        report("%-17s > %s","title",             info.Title           or unset)
97        report("%-17s > %s","creator",           info.Creator         or unset)
98        report("%-17s > %s","producer",          info.Producer        or unset)
99        report("%-17s > %s","author",            info.Author          or unset)
100        report("%-17s > %s","creation date",     info.CreationDate    or unset)
101        report("%-17s > %s","modification date", info.ModDate         or unset)
102
103        local function somebox(what)
104            local box = string.lower(what)
105            local width, height, start
106            for i=1, nofpages do
107                local page = pages[i]
108                local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
109                local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
110                if w ~= width or h ~= height then
111                    if start then
112                        report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
113                    end
114                    width, height, start = w, h, i
115                end
116            end
117            report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
118        end
119
120        if details then
121            somebox("MediaBox")
122            somebox("ArtBox")
123            somebox("BleedBox")
124            somebox("CropBox")
125            somebox("TrimBox")
126        else
127            somebox("CropBox")
128        end
129
130     -- if details then
131            local annotations = 0
132            for i=1, nofpages do
133                local page = pages[i]
134                local a    = page.Annots
135                if a then
136                    annotations = annotations + #a
137                end
138            end
139            if annotations > 0 then
140                report("%-17s > %s", "annotations",annotations)
141            end
142     -- end
143
144     -- if details then
145            local d = pdffile.destinations
146            local k = d and sortedkeys(d)
147            if k and #k > 0 then
148                report("%-17s > %s", "destinations",#k)
149            end
150            local d = pdffile.javascripts
151            local k = d and sortedkeys(d)
152            if k and #k > 0 then
153                report("%-17s > %s", "javascripts",#k)
154            end
155            local d = pdffile.widgets
156            if d and #d > 0 then
157                report("%-17s > %s", "widgets",#d)
158            end
159            local d = pdffile.embeddedfiles
160            local k = d and sortedkeys(d)
161            if k and #k > 0 then
162                report("%-17s > %s", "embeddedfiles",#k)
163            end
164    --  end
165
166    end
167end
168
169function scripts.pdf.metadata(filename,pretty)
170    local pdffile = loadpdffile(filename)
171    if pdffile then
172        local catalog  = pdffile.Catalog
173        local metadata = catalog.Metadata
174        if metadata then
175            metadata = metadata()
176            if pretty then
177                metadata = gsub(metadata,"\r","\n")
178            end
179            report("metadata > \n\n%s\n",metadata)
180        else
181            report("no metadata")
182        end
183    end
184end
185
186local expanded = lpdf.epdf.expanded
187
188local function getfonts(pdffile)
189    local usedfonts = { }
190
191    local function collect(where,tag)
192        local resources = where.Resources
193        if resources then
194            local fontlist = resources.Font
195            if fontlist then
196                for k, v in expanded(fontlist) do
197                    usedfonts[tag and (tag .. "." .. k) or k] = v
198                end
199            end
200            local objects = resources.XObject
201            if objects then
202                for k, v in expanded(objects) do
203                    collect(v,tag and (tag .. "." .. k) or k)
204                end
205            end
206        end
207    end
208
209    for i=1,pdffile.nofpages do
210        collect(pdffile.pages[i])
211    end
212
213    return usedfonts
214end
215
216local function getunicodes(font)
217    local cid = font.ToUnicode
218    if cid then
219        cid = cid()
220        local counts  = { }
221        local indices = { }
222     -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
223     --     for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
224     --         print(a,b)
225     --     end
226     -- end
227        setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
228        for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
229            for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
230                first  = tonumber(first,16)
231                last   = tonumber(last,16)
232                offset = tonumber(offset,16)
233                offset = offset - first
234                for i=first,last do
235                    local c = i + offset
236                    counts[c] = counts[c] + 1
237                    indices[i] = true
238                end
239            end
240        end
241        for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
242            for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
243                indices[tonumber(old,16)] = true
244                for n in gmatch(new,"....") do
245                    local c = tonumber(n,16)
246                    counts[c] = counts[c] + 1
247                end
248            end
249        end
250        return counts, indices
251    end
252end
253
254function scripts.pdf.fonts(filename)
255    local pdffile = loadpdffile(filename)
256    if pdffile then
257        local usedfonts = getfonts(pdffile)
258        local found     = { }
259        local common    = table.setmetatableindex("table")
260        for k, v in table.sortedhash(usedfonts) do
261            local basefont = v.BaseFont
262            local encoding = v.Encoding
263            local subtype  = v.Subtype
264            local unicode  = v.ToUnicode
265            local counts,
266                  indices  = getunicodes(v)
267            local codes    = { }
268            local chars    = { }
269            local freqs    = { }
270            local names    = { }
271            if counts then
272                codes = sortedkeys(counts)
273                for i=1,#codes do
274                    local k = codes[i]
275                    if k > 32 then
276                        local c = utfchar(k)
277                        chars[i] = c
278                        freqs[i] = format("U+%05X  %s  %s",k,counts[k] > 1 and "+" or " ", c)
279                    else
280                        freqs[i] = format("U+%05X  %s  --",k,counts[k] > 1 and "+" or " ")
281                    end
282                end
283                if basefont and unicode then
284                    local b = gsub(basefont,"^.*%+","")
285                    local c = common[b]
286                    for k in next, indices do
287                        c[k] = true
288                    end
289                end
290                for i=1,#codes do
291                    codes[i] = format("U+%05X",codes[i])
292                end
293            end
294            local d = encoding and encoding.Differences
295            if d then
296                for i=1,#d do
297                    local di = d[i]
298                    if type(di) == "string" then
299                        names[#names+1] = di
300                    end
301                end
302            end
303            found[k] = {
304                basefont = basefont or "no basefont",
305                encoding = (d and "custom n=" .. #d) or "no encoding",
306                subtype  = subtype or "no subtype",
307                unicode  = tounicode and "unicode" or "no vector",
308                chars    = chars,
309                codes    = codes,
310                freqs    = freqs,
311                names    = names,
312            }
313        end
314
315        if details then
316            for k, v in sortedhash(found) do
317                report("id         : %s",  k)
318                report("basefont   : %s",  v.basefont)
319                report("encoding   : % t", v.names)
320                report("subtype    : %s",  v.subtype)
321                report("unicode    : %s",  v.unicode)
322                if #v.chars > 0 then
323                    report("characters : % t", v.chars)
324                end
325                if #v.codes > 0 then
326                    report("codepoints : % t", v.codes)
327                end
328                report("")
329            end
330            for k, v in sortedhash(common) do
331                report("basefont   : %s",k)
332                report("indices    : % t", sortedkeys(v))
333                report("")
334            end
335        else
336            local haschar = false
337            for k, v in sortedhash(found) do
338                if #v.chars > 0 then
339                    haschar = true
340                    break
341                end
342            end
343            local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
344            for k, v in sortedhash(found) do
345                results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, haschar and concat(v.chars," ") or nil }
346            end
347            utilities.formatters.formatcolumns(results)
348            report(results[1])
349            report("")
350            for i=2,#results do
351                report(results[i])
352            end
353            report("")
354        end
355    end
356end
357
358function scripts.pdf.object(filename,n)
359    if n then
360        local pdffile = loadpdffile(filename)
361        if pdffile then
362            print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
363        end
364    end
365end
366
367-- scripts.pdf.info("e:/tmp/oeps.pdf")
368-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
369-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
370-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
371
372local filename = environment.files[1] or ""
373
374if filename == "" then
375    application.help()
376elseif environment.argument("info") then
377    scripts.pdf.info(filename)
378elseif environment.argument("metadata") then
379    scripts.pdf.metadata(filename,environment.argument("pretty"))
380elseif environment.argument("fonts") then
381    scripts.pdf.fonts(filename)
382elseif environment.argument("object") then
383    scripts.pdf.object(filename,tonumber(environment.argument("object")))
384elseif environment.argument("exporthelp") then
385    application.export(environment.argument("exporthelp"),filename)
386else
387    application.help()
388end
389
390-- a variant on an experiment by hartmut
391
392--~ function downloadlinks(filename)
393--~     local document = lpdf.epdf.load(filename)
394--~     if document then
395--~         local pages = document.pages
396--~         for p = 1,#pages do
397--~             local annotations = pages[p].Annots
398--~             if annotations then
399--~                 for a=1,#annotations do
400--~                     local annotation = annotations[a]
401--~                     local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
402--~                     if uri and string.find(uri,"^http") then
403--~                         os.execute("wget " .. uri)
404--~                     end
405--~                 end
406--~             end
407--~         end
408--~     end
409--~ end
410