mtx-pdf.lua /size: 26 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['mtx-pdf'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local tonumber = tonumber
10local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
11local utfchar = utf.char
12local concat, insert, swapped = table.concat, table.insert, table.swapped
13local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19  <entry name="name">mtx-pdf</entry>
20  <entry name="detail">ConTeXt PDF Helpers</entry>
21  <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24  <category name="basic">
25   <subcategory>
26    <flag name="info"><short>show some info about the given file</short></flag>
27    <flag name="metadata"><short>show metadata xml blob</short></flag>
28    <flag name="formdata"><short>show formdata</short></flag>
29    <flag name="pretty"><short>replace newlines in metadata</short></flag>
30    <flag name="fonts"><short>show used fonts (<ref name="detail"/>)</short></flag>
31    <flag name="object"><short>show object</short></flag>
32    <flag name="links"><short>show links</short></flag>
33    <flag name="sign"><short>sign document (assumes signature template)</short></flag>
34    <flag name="verify"><short>verify document</short></flag>
35   </subcategory>
36   <subcategory>
37    <example><command>mtxrun --script pdf --info foo.pdf</command></example>
38    <example><command>mtxrun --script pdf --metadata foo.pdf</command></example>
39    <example><command>mtxrun --script pdf --metadata --pretty foo.pdf</command></example>
40    <example><command>mtxrun --script pdf --stream=4 foo.pdf</command></example>
41    <example><command>mtxrun --script pdf --sign --certificate=somesign.pem --password=test --uselibrary somefile</command></example>
42    <example><command>mtxrun --script pdf --verify --certificate=somesign.pem --password=test --uselibrary somefile</command></example>
43   </subcategory>
44  </category>
45 </flags>
46</application>
47]]
48
49local application = logs.application {
50    name     = "mtx-pdf",
51    banner   = "ConTeXt PDF Helpers 0.10",
52    helpinfo = helpinfo,
53}
54
55local report = application.report
56
57if not pdfe then
58    dofile(resolvers.findfile("lpdf-epd.lua","tex"))
59elseif CONTEXTLMTXMODE then
60    dofile(resolvers.findfile("util-dim.lua","tex"))
61    dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
62    dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
63    dofile(resolvers.findfile("lpdf-sig.lmt","tex"))
64else
65    dofile(resolvers.findfile("lpdf-pde.lua","tex"))
66end
67dofile(resolvers.findfile("util-jsn.lua","tex"))
68
69scripts     = scripts     or { }
70scripts.pdf = scripts.pdf or { }
71
72local details = environment.argument("detail") or environment.argument("details")
73
74local function loadpdffile(filename)
75    if not filename or filename == "" then
76        report("no filename given")
77    elseif not lfs.isfile(filename) then
78        report("unknown file %a",filename)
79    else
80        local pdffile = lpdf.epdf.load(filename)
81        if pdffile then
82            return pdffile
83        else
84            report("no valid pdf file %a",filename)
85        end
86    end
87end
88
89function scripts.pdf.info(filename)
90    local pdffile = loadpdffile(filename)
91    if pdffile then
92        local catalog  = pdffile.Catalog
93        local info     = pdffile.Info
94        local pages    = pdffile.pages
95        local nofpages = pdffile.nofpages
96
97        local unset    = "<unset>"
98
99        report("%-17s > %s","filename",          filename)
100        report("%-17s > %s","pdf version",       catalog.Version      or unset)
101        report("%-17s > %s","major version",     pdffile.majorversion or unset)
102        report("%-17s > %s","minor version",     pdffile.minorversion or unset)
103        report("%-17s > %s","number of pages",   nofpages             or 0)
104        report("%-17s > %s","title",             info.Title           or unset)
105        report("%-17s > %s","creator",           info.Creator         or unset)
106        report("%-17s > %s","producer",          info.Producer        or unset)
107        report("%-17s > %s","author",            info.Author          or unset)
108        report("%-17s > %s","creation date",     info.CreationDate    or unset)
109        report("%-17s > %s","modification date", info.ModDate         or unset)
110
111        local function somebox(what)
112            local box = string.lower(what)
113            local width, height, start
114            for i=1, nofpages do
115                local page = pages[i]
116                local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
117                local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
118                if w ~= width or h ~= height then
119                    if start then
120                        report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
121                    end
122                    width, height, start = w, h, i
123                end
124            end
125            report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
126        end
127
128        if details then
129            somebox("MediaBox")
130            somebox("ArtBox")
131            somebox("BleedBox")
132            somebox("CropBox")
133            somebox("TrimBox")
134        else
135            somebox("CropBox")
136        end
137
138     -- if details then
139            local annotations = 0
140            for i=1,nofpages do
141                local page = pages[i]
142                local a    = page.Annots
143                if a then
144                    annotations = annotations + #a
145                end
146            end
147            if annotations > 0 then
148                report("%-17s > %s", "annotations",annotations)
149            end
150     -- end
151
152     -- if details then
153            local d = pdffile.destinations
154            local k = d and sortedkeys(d)
155            if k and #k > 0 then
156                report("%-17s > %s", "destinations",#k)
157            end
158            local d = pdffile.javascripts
159            local k = d and sortedkeys(d)
160            if k and #k > 0 then
161                report("%-17s > %s", "javascripts",#k)
162            end
163            local d = pdffile.widgets
164            if d and #d > 0 then
165                report("%-17s > %s", "widgets",#d)
166            end
167            local d = pdffile.embeddedfiles
168            local k = d and sortedkeys(d)
169            if k and #k > 0 then
170                report("%-17s > %s", "embeddedfiles",#k)
171            end
172    --  end
173
174    end
175end
176
177local function flagstoset(flag,flags)
178    local t = { }
179    if flags then
180        for k, v in next, flags do
181            if (flag & v) ~= 0 then
182                t[k] = true
183            end
184        end
185    end
186    return t
187end
188
189function scripts.pdf.formdata(filename,save)
190    local pdffile = loadpdffile(filename)
191    if pdffile then
192        local widgets = pdffile.widgets
193        if widgets then
194            local results = { { "type", "name", "value" } }
195            for i=1,#widgets do
196                local annotation = widgets[i]
197                local parent = annotation.Parent or { }
198                local name   = annotation.T or parent.T
199                local what   = annotation.FT or parent.FT
200                if name and what then
201                    local value = annotation.V and tostring(annotation.V) or ""
202                    if value and value ~= "" then
203                        local wflags = flagstoset(annotation.Ff or parent.Ff or 0, widgetflags)
204                        if what == "Tx" then
205                            if wflags.MultiLine then
206                                wflags.MultiLine = nil
207                                what = "text"
208                            else
209                                what = "line"
210                            end
211                            local default = annotation.V or ""
212                        elseif what == "Btn" then
213                            if wflags.Radio or wflags.RadiosInUnison then
214                                what = "radio"
215                            elseif wflags.PushButton then
216                                what = "push"
217                            else
218                                what = "check"
219                            end
220                        elseif what == "Ch" then
221                            -- F Ff FT Opt T | AA OC (rest follows)
222                            if wflags.PopUp then
223                                wflags.PopUp = nil
224                                if wflags.Edit then
225                                    what = "combo"
226                                else
227                                    what = "popup"
228                                end
229                            else
230                                what = "choice"
231                            end
232                        elseif what == "Sig" then
233                            what  = "signature"
234                        else
235                            what = nil
236                        end
237                        if what then
238                            results[#results+1] = { what, name, value }
239                        end
240                    end
241                end
242            end
243            if save then
244                local values = { }
245                for i=2,#results do
246                    local result= results[i]
247                    values[#values+1] = {
248                        type  = result[1],
249                        name  = result[2],
250                        value = result[3],
251                    }
252                end
253                local data = {
254                    filename = filename,
255                    values   = values,
256                }
257                local name = file.nameonly(filename) .. "-formdata"
258                if save == "json" then
259                    name = file.addsuffix(name,"json")
260                    io.savedata(name,utilities.json.tojson(data))
261                elseif save then
262                    name = file.addsuffix(name,"lua")
263                    table.save(name,data)
264                end
265                report("")
266                report("%i widgets found, %i values saved in %a",#widgets,#results-1,name)
267                report("")
268            end
269            utilities.formatters.formatcolumns(results)
270            report(results[1])
271            report("")
272            for i=2,#results do
273                report(results[i])
274            end
275            report("")
276        end
277    end
278end
279
280function scripts.pdf.signature(filename,save)
281    local pdffile = loadpdffile(filename)
282    if pdffile then
283        local widgets = pdffile.widgets
284        if widgets then
285            for i=1,#widgets do
286                local annotation = widgets[i]
287                local parent = annotation.Parent or { }
288                local name   = annotation.T or parent.T
289                local what   = annotation.FT or parent.FT
290                if what == "Sig" then
291                    local value = annotation.V
292                    if value then
293                        local contents = tostring(value.Contents) or ""
294                        report("")
295                        if save then
296                            local name = file.nameonly(filename) .. "-signature.bin"
297                            report("signature saved in %a",name)
298                            io.savedata(name,string.tobytes(contents))
299                        else
300                            report("signature: %s",contents)
301                        end
302                        report("")
303                        return
304                    end
305                end
306            end
307        end
308        report("there is no signature")
309    end
310end
311
312function scripts.pdf.sign(filename,save)
313    local pdffile = file.addsuffix(filename,"pdf")
314    if not lfs.isfile(pdffile) then
315        report("invalid pdf file %a",pdffile)
316        return
317    end
318    local certificate = environment.argument("certificate")
319    local password    = environment.argument("password")
320    if type(certificate) ~= "string" or type(password) ~= "string" then
321        report("provide --certificate and --password")
322        return
323    end
324    lpdf.sign {
325        filename    = pdffile,
326        certificate = certificate,
327        password    = password,
328        purge       = environment.argument("purge"),
329        uselibrary  = environment.argument("uselibrary"),
330    }
331end
332
333function scripts.pdf.verify(filename,save)
334    local pdffile = file.addsuffix(filename,"pdf")
335    if not lfs.isfile(pdffile) then
336        report("invalid pdf file %a",pdffile)
337        return
338    end
339    local certificate = environment.argument("certificate")
340    local password    = environment.argument("password")
341    if type(certificate) ~= "string" or type(password) ~= "string" then
342        report("provide --certificate and --password")
343        return
344    end
345    lpdf.verify {
346        filename    = pdffile,
347        certificate = certificate,
348        password    = password,
349        uselibrary  = environment.argument("uselibrary"),
350    }
351end
352
353function scripts.pdf.metadata(filename,pretty)
354    local pdffile = loadpdffile(filename)
355    if pdffile then
356        local catalog  = pdffile.Catalog
357        local metadata = catalog.Metadata
358        if metadata then
359            metadata = metadata()
360            if pretty then
361                metadata = gsub(metadata,"\r","\n")
362            end
363            report("metadata > \n\n%s\n",metadata)
364        else
365            report("no metadata")
366        end
367    end
368end
369
370local expanded = lpdf.epdf.expanded
371
372local function getfonts(pdffile)
373    local usedfonts  = { }
374
375    local function collect(where,tag)
376        local resources = where.Resources
377        if resources then
378            local fontlist = resources.Font
379            if fontlist then
380                for k, v in expanded(fontlist) do
381                    usedfonts[tag and (tag .. "." .. k) or k] = v
382                    if v.Subtype == "Type3" then
383                        collect(v,tag and (tag .. "." .. k) or k)
384                    end
385                end
386            end
387            local objects = resources.XObject
388            if objects then
389                for k, v in expanded(objects) do
390                    collect(v,tag and (tag .. "." .. k) or k)
391                end
392            end
393        end
394    end
395
396    for i=1,pdffile.nofpages do
397        collect(pdffile.pages[i])
398    end
399
400    return usedfonts
401end
402
403-- todo: fromunicode16
404
405local function getunicodes(font)
406    local cid = font.ToUnicode
407    if cid then
408        cid = cid()
409        local counts  = { }
410        local indices = { }
411     -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
412     --     for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
413     --         print(a,b)
414     --     end
415     -- end
416        setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
417        for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
418            for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
419                first  = tonumber(first,16)
420                last   = tonumber(last,16)
421                offset = tonumber(offset,16)
422                offset = offset - first
423                for i=first,last do
424                    local c = i + offset
425                    counts[c] = counts[c] + 1
426                    indices[i] = true
427                end
428            end
429        end
430        for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
431            for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
432                indices[tonumber(old,16)] = true
433                for n in gmatch(new,"....") do
434                    local c = tonumber(n,16)
435                    counts[c] = counts[c] + 1
436                end
437            end
438        end
439        return counts, indices
440    end
441end
442
443function scripts.pdf.fonts(filename)
444    local pdffile = loadpdffile(filename)
445    if pdffile then
446        local usedfonts = getfonts(pdffile)
447        local found     = { }
448        local common    = table.setmetatableindex("table")
449        for k, v in table.sortedhash(usedfonts) do
450            local basefont = v.BaseFont
451            local encoding = v.Encoding
452            local subtype  = v.Subtype
453            local unicode  = v.ToUnicode
454            local counts,
455                  indices  = getunicodes(v)
456            local codes    = { }
457            local chars    = { }
458         -- local freqs    = { }
459            local names    = { }
460            if counts then
461                codes = sortedkeys(counts)
462                for i=1,#codes do
463                    local k = codes[i]
464                    if k > 32 then
465                        local c = utfchar(k)
466                        chars[i] = c
467                     -- freqs[i] = format("U+%05X  %s  %s",k,counts[k] > 1 and "+" or " ", c)
468                    else
469                        chars[i] = k == 32 and "SPACE" or format("U+%03X",k)
470                     -- freqs[i] = format("U+%05X  %s  --",k,counts[k] > 1 and "+" or " ")
471                    end
472                end
473                if basefont and unicode then
474                    local b = gsub(basefont,"^.*%+","")
475                    local c = common[b]
476                    for k in next, indices do
477                        c[k] = true
478                    end
479                end
480                for i=1,#codes do
481                    codes[i] = format("U+%05X",codes[i])
482                end
483            end
484            local d = encoding and encoding.Differences
485            if d then
486                for i=1,#d do
487                    local di = d[i]
488                    if type(di) == "string" then
489                        names[#names+1] = di
490                    end
491                end
492            end
493            if not basefont then
494                local fontdescriptor = v.FontDescriptor
495                if fontdescriptor then
496                    basefont = fontdescriptor.FontName
497                end
498            end
499            found[k] = {
500                basefont = basefont or "no basefont",
501                encoding = (d and "custom n=" .. #d) or "no encoding",
502                subtype  = subtype or "no subtype",
503                unicode  = unicode and "unicode" or "no vector",
504                chars    = chars,
505                codes    = codes,
506             -- freqs    = freqs,
507                names    = names,
508            }
509        end
510
511        local haschar = false
512
513        local list = { }
514        for k, v in next, found do
515            local s = string.gsub(k,"(%d+)",function(s) return string.format("%05i",tonumber(s)) end)
516            list[s] = { k, v }
517            if #v.chars > 0 then
518                haschar = true
519            end
520        end
521
522        if details then
523            for k, v in sortedhash(found) do
524--             for s, f in sortedhash(list) do
525--                 local k = f[1]
526--                 local v = f[2]
527                report("id         : %s",  k)
528                report("basefont   : %s",  v.basefont)
529                report("encoding   : % t", v.names)
530                report("subtype    : %s",  v.subtype)
531                report("unicode    : %s",  v.unicode)
532                if #v.chars > 0 then
533                    report("characters : % t", v.chars)
534                end
535                if #v.codes > 0 then
536                    report("codepoints : % t", v.codes)
537                end
538                report("")
539            end
540            for k, v in sortedhash(common) do
541                report("basefont   : %s",k)
542                report("indices    : % t", sortedkeys(v))
543                report("")
544            end
545        else
546            local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
547            local shared  = { }
548            for s, f in sortedhash(list) do
549                local k = f[1]
550                local v = f[2]
551                local basefont   = v.basefont
552                local characters = shared[basefont] or (haschar and concat(v.chars," ")) or nil
553                results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, characters }
554                if not shared[basefont] then
555                    shared[basefont] = "shared with " .. k
556                end
557            end
558            utilities.formatters.formatcolumns(results)
559            report(results[1])
560            report("")
561            for i=2,#results do
562                report(results[i])
563            end
564            report("")
565        end
566    end
567end
568
569function scripts.pdf.object(filename,n)
570    if n then
571        local pdffile = loadpdffile(filename)
572        if pdffile then
573            print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
574        end
575    end
576end
577
578function scripts.pdf.links(filename,asked)
579    local pdffile = loadpdffile(filename)
580    if pdffile then
581
582        local pages    = pdffile.pages
583        local nofpages = pdffile.nofpages
584
585        if asked and (asked < 1 or asked > nofpages) then
586            report("")
587            report("no page %i, last page %i",asked,nofpages)
588            report("")
589            return
590        end
591
592        local reverse = swapped(pages)
593
594        local function banner(pagenumber)
595            report("")
596            report("annotations @ page %i",pagenumber)
597            report("")
598        end
599
600        local function show(pagenumber)
601            local page   = pages[pagenumber]
602            local annots = page.Annots
603            if annots then
604                local done = false
605                for i=1,#annots do
606                    local annotation = annots[i]
607                    local a = annotation.A
608                    if not a then
609                        local d = annotation.Dest
610                        if d then
611                            a = { S = "GoTo", D = d } -- no need for a dict
612                        end
613                    end
614                    if a then
615                        local S = a.S
616                        if S == "GoTo" then
617                            local D = a.D
618                            if D then
619                                local D1 = D[1]
620                                local R1 = reverse[D1]
621                                if not done then
622                                    banner(pagenumber)
623                                    done = true
624                                end
625                                if tonumber(R1) then
626                                    report("intern, page % 4i",R1 or 0)
627                                else
628                                    report("intern, name %s",tostring(D1))
629                                end
630                            end
631                        elseif S == "GoToR" then
632                            local D = a.D
633                            if D then
634                                local F = A.F
635                                if F then
636                                    local D1 = D[1]
637                                    if not done then
638                                        banner(pagenumber)
639                                        done = true
640                                    end
641                                    if tonumber(D1) then
642                                        report("extern, page % 4i, file %s",D1 + 1,F)
643                                    else
644                                        report("extern, page % 4i, file %s, name %s",0,F,D[1])
645                                    end
646                                end
647                            end
648                        elseif S == "URI" then
649                            local URI = a.URI
650                            if URI then
651                                report("extern, uri   %a",URI)
652                            end
653                        end
654                    end
655                end
656            end
657        end
658
659        if asked then
660            show(asked)
661        else
662            for pagenumber=1,nofpages do
663                show(pagenumber)
664            end
665        end
666
667        local destinations = pdffile.destinations
668        if destinations then
669            if asked then
670                report("")
671                report("destinations to page %i",asked)
672                report("")
673                for k, v in sortedhash(destinations) do
674                    local D = v.D
675                    if D then
676                        local p = reverse[D[1]] or 0
677                        if p == asked then
678                            report(k)
679                        end
680                    end
681                end
682            else
683                report("")
684                report("destinations")
685                report("")
686                local list = setmetatableindex("table")
687                for k, v in sortedhash(destinations) do
688                    local D = v.D
689                    if D then
690                        local p = reverse[D[1]]
691                        report("tag %s, page % 4i",k,p)
692                        insert(list[p],k)
693                    end
694                end
695                for k, v in sortedhash(list) do
696                    report("")
697                    report("page %i, names % t",k,v)
698                end
699            end
700        end
701    end
702end
703
704-- scripts.pdf.info("e:/tmp/oeps.pdf")
705-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
706-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
707-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
708
709local filename = environment.files[1] or ""
710
711if filename == "" then
712    application.help()
713elseif environment.argument("info") then
714    scripts.pdf.info(filename)
715elseif environment.argument("metadata") then
716    scripts.pdf.metadata(filename,environment.argument("pretty"))
717elseif environment.argument("formdata") then
718    scripts.pdf.formdata(filename,environment.argument("save"))
719elseif environment.argument("fonts") then
720    scripts.pdf.fonts(filename)
721elseif environment.argument("object") then
722    scripts.pdf.object(filename,tonumber(environment.argument("object")))
723elseif environment.argument("links") then
724    scripts.pdf.links(filename,tonumber(environment.argument("page")))
725elseif environment.argument("signature") then
726    scripts.pdf.signature(filename,environment.argument("save"))
727elseif environment.argument("sign") then
728    scripts.pdf.sign(filename)
729elseif environment.argument("verify") then
730    scripts.pdf.verify(filename)
731elseif environment.argument("exporthelp") then
732    application.export(environment.argument("exporthelp"),filename)
733else
734    application.help()
735end
736
737-- a variant on an experiment by hartmut
738
739--~ function downloadlinks(filename)
740--~     local document = lpdf.epdf.load(filename)
741--~     if document then
742--~         local pages = document.pages
743--~         for p = 1,#pages do
744--~             local annotations = pages[p].Annots
745--~             if annotations then
746--~                 for a=1,#annotations do
747--~                     local annotation = annotations[a]
748--~                     local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
749--~                     if uri and string.find(uri,"^http") then
750--~                         os.execute("wget " .. uri)
751--~                     end
752--~                 end
753--~             end
754--~         end
755--~     end
756--~ end
757