1if not modules then modules = { } end modules ['mtx-pdf'] = {
2 version = 1.001,
3 comment = "companion to mtxrun.lua",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local tonumber = tonumber
10local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
11local utfchar = utf.char
12local concat, insert, swapped = table.concat, table.insert, table.swapped
13local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19 <entry name="name">mtx-pdf</entry>
20 <entry name="detail">ConTeXt PDF Helpers</entry>
21 <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24 <category name="basic">
25 <subcategory>
26 <flag name="info"><short>show some info about the given file</short></flag>
27 <flag name="metadata"><short>show metadata xml blob</short></flag>
28 <flag name="formdata"><short>show formdata</short></flag>
29 <flag name="pretty"><short>replace newlines in metadata</short></flag>
30 <flag name="fonts"><short>show used fonts (<ref name="detail"/>)</short></flag>
31 <flag name="object"><short>show object</short></flag>
32 <flag name="links"><short>show links</short></flag>
33 <flag name="sign"><short>sign document (assumes signature template)</short></flag>
34 <flag name="verify"><short>verify document</short></flag>
35 </subcategory>
36 <subcategory>
37 <example><command>mtxrun --script pdf --info foo.pdf</command></example>
38 <example><command>mtxrun --script pdf --metadata foo.pdf</command></example>
39 <example><command>mtxrun --script pdf --metadata --pretty foo.pdf</command></example>
40 <example><command>mtxrun --script pdf --stream=4 foo.pdf</command></example>
41 <example><command>mtxrun --script pdf --sign --certificate=somesign.pem --password=test --uselibrary somefile</command></example>
42 <example><command>mtxrun --script pdf --verify --certificate=somesign.pem --password=test --uselibrary somefile</command></example>
43 </subcategory>
44 </category>
45 </flags>
46</application>
47]]
48
49local application = logs.application {
50 name = "mtx-pdf",
51 banner = "ConTeXt PDF Helpers 0.10",
52 helpinfo = helpinfo,
53}
54
55local report = application.report
56
57if not pdfe then
58 dofile(resolvers.findfile("lpdf-epd.lua","tex"))
59elseif CONTEXTLMTXMODE then
60 dofile(resolvers.findfile("util-dim.lua","tex"))
61 dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
62 dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
63 dofile(resolvers.findfile("lpdf-sig.lmt","tex"))
64else
65 dofile(resolvers.findfile("lpdf-pde.lua","tex"))
66end
67dofile(resolvers.findfile("util-jsn.lua","tex"))
68
69scripts = scripts or { }
70scripts.pdf = scripts.pdf or { }
71
72local details = environment.argument("detail") or environment.argument("details")
73
74local function loadpdffile(filename)
75 if not filename or filename == "" then
76 report("no filename given")
77 elseif not lfs.isfile(filename) then
78 report("unknown file %a",filename)
79 else
80 local pdffile = lpdf.epdf.load(filename)
81 if pdffile then
82 return pdffile
83 else
84 report("no valid pdf file %a",filename)
85 end
86 end
87end
88
89function scripts.pdf.info(filename)
90 local pdffile = loadpdffile(filename)
91 if pdffile then
92 local catalog = pdffile.Catalog
93 local info = pdffile.Info
94 local pages = pdffile.pages
95 local nofpages = pdffile.nofpages
96
97 local unset = "<unset>"
98
99 report("%-17s > %s","filename", filename)
100 report("%-17s > %s","pdf version", catalog.Version or unset)
101 report("%-17s > %s","major version", pdffile.majorversion or unset)
102 report("%-17s > %s","minor version", pdffile.minorversion or unset)
103 report("%-17s > %s","number of pages", nofpages or 0)
104 report("%-17s > %s","title", info.Title or unset)
105 report("%-17s > %s","creator", info.Creator or unset)
106 report("%-17s > %s","producer", info.Producer or unset)
107 report("%-17s > %s","author", info.Author or unset)
108 report("%-17s > %s","creation date", info.CreationDate or unset)
109 report("%-17s > %s","modification date", info.ModDate or unset)
110
111 local function somebox(what)
112 local box = string.lower(what)
113 local width, height, start
114 for i=1, nofpages do
115 local page = pages[i]
116 local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
117 local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
118 if w ~= width or h ~= height then
119 if start then
120 report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
121 end
122 width, height, start = w, h, i
123 end
124 end
125 report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
126 end
127
128 if details then
129 somebox("MediaBox")
130 somebox("ArtBox")
131 somebox("BleedBox")
132 somebox("CropBox")
133 somebox("TrimBox")
134 else
135 somebox("CropBox")
136 end
137
138
139 local annotations = 0
140 for i=1,nofpages do
141 local page = pages[i]
142 local a = page.Annots
143 if a then
144 annotations = annotations + #a
145 end
146 end
147 if annotations > 0 then
148 report("%-17s > %s", "annotations",annotations)
149 end
150
151
152
153 local d = pdffile.destinations
154 local k = d and sortedkeys(d)
155 if k and #k > 0 then
156 report("%-17s > %s", "destinations",#k)
157 end
158 local d = pdffile.javascripts
159 local k = d and sortedkeys(d)
160 if k and #k > 0 then
161 report("%-17s > %s", "javascripts",#k)
162 end
163 local d = pdffile.widgets
164 if d and #d > 0 then
165 report("%-17s > %s", "widgets",#d)
166 end
167 local d = pdffile.embeddedfiles
168 local k = d and sortedkeys(d)
169 if k and #k > 0 then
170 report("%-17s > %s", "embeddedfiles",#k)
171 end
172
173
174 end
175end
176
177local function flagstoset(flag,flags)
178 local t = { }
179 if flags then
180 for k, v in next, flags do
181 if (flag & v) ~= 0 then
182 t[k] = true
183 end
184 end
185 end
186 return t
187end
188
189function scripts.pdf.formdata(filename,save)
190 local pdffile = loadpdffile(filename)
191 if pdffile then
192 local widgets = pdffile.widgets
193 if widgets then
194 local results = { { "type", "name", "value" } }
195 for i=1,#widgets do
196 local annotation = widgets[i]
197 local parent = annotation.Parent or { }
198 local name = annotation.T or parent.T
199 local what = annotation.FT or parent.FT
200 if name and what then
201 local value = annotation.V and tostring(annotation.V) or ""
202 if value and value ~= "" then
203 local wflags = flagstoset(annotation.Ff or parent.Ff or 0, widgetflags)
204 if what == "Tx" then
205 if wflags.MultiLine then
206 wflags.MultiLine = nil
207 what = "text"
208 else
209 what = "line"
210 end
211 local default = annotation.V or ""
212 elseif what == "Btn" then
213 if wflags.Radio or wflags.RadiosInUnison then
214 what = "radio"
215 elseif wflags.PushButton then
216 what = "push"
217 else
218 what = "check"
219 end
220 elseif what == "Ch" then
221
222 if wflags.PopUp then
223 wflags.PopUp = nil
224 if wflags.Edit then
225 what = "combo"
226 else
227 what = "popup"
228 end
229 else
230 what = "choice"
231 end
232 elseif what == "Sig" then
233 what = "signature"
234 else
235 what = nil
236 end
237 if what then
238 results[#results+1] = { what, name, value }
239 end
240 end
241 end
242 end
243 if save then
244 local values = { }
245 for i=2,#results do
246 local result= results[i]
247 values[#values+1] = {
248 type = result[1],
249 name = result[2],
250 value = result[3],
251 }
252 end
253 local data = {
254 filename = filename,
255 values = values,
256 }
257 local name = file.nameonly(filename) .. "-formdata"
258 if save == "json" then
259 name = file.addsuffix(name,"json")
260 io.savedata(name,utilities.json.tojson(data))
261 elseif save then
262 name = file.addsuffix(name,"lua")
263 table.save(name,data)
264 end
265 report("")
266 report("%i widgets found, %i values saved in %a",#widgets,#results-1,name)
267 report("")
268 end
269 utilities.formatters.formatcolumns(results)
270 report(results[1])
271 report("")
272 for i=2,#results do
273 report(results[i])
274 end
275 report("")
276 end
277 end
278end
279
280function scripts.pdf.signature(filename,save)
281 local pdffile = loadpdffile(filename)
282 if pdffile then
283 local widgets = pdffile.widgets
284 if widgets then
285 for i=1,#widgets do
286 local annotation = widgets[i]
287 local parent = annotation.Parent or { }
288 local name = annotation.T or parent.T
289 local what = annotation.FT or parent.FT
290 if what == "Sig" then
291 local value = annotation.V
292 if value then
293 local contents = tostring(value.Contents) or ""
294 report("")
295 if save then
296 local name = file.nameonly(filename) .. "-signature.bin"
297 report("signature saved in %a",name)
298 io.savedata(name,string.tobytes(contents))
299 else
300 report("signature: %s",contents)
301 end
302 report("")
303 return
304 end
305 end
306 end
307 end
308 report("there is no signature")
309 end
310end
311
312function scripts.pdf.sign(filename,save)
313 local pdffile = file.addsuffix(filename,"pdf")
314 if not lfs.isfile(pdffile) then
315 report("invalid pdf file %a",pdffile)
316 return
317 end
318 local certificate = environment.argument("certificate")
319 local password = environment.argument("password")
320 if type(certificate) ~= "string" or type(password) ~= "string" then
321 report("provide --certificate and --password")
322 return
323 end
324 lpdf.sign {
325 filename = pdffile,
326 certificate = certificate,
327 password = password,
328 purge = environment.argument("purge"),
329 uselibrary = environment.argument("uselibrary"),
330 }
331end
332
333function scripts.pdf.verify(filename,save)
334 local pdffile = file.addsuffix(filename,"pdf")
335 if not lfs.isfile(pdffile) then
336 report("invalid pdf file %a",pdffile)
337 return
338 end
339 local certificate = environment.argument("certificate")
340 local password = environment.argument("password")
341 if type(certificate) ~= "string" or type(password) ~= "string" then
342 report("provide --certificate and --password")
343 return
344 end
345 lpdf.verify {
346 filename = pdffile,
347 certificate = certificate,
348 password = password,
349 uselibrary = environment.argument("uselibrary"),
350 }
351end
352
353function scripts.pdf.metadata(filename,pretty)
354 local pdffile = loadpdffile(filename)
355 if pdffile then
356 local catalog = pdffile.Catalog
357 local metadata = catalog.Metadata
358 if metadata then
359 metadata = metadata()
360 if pretty then
361 metadata = gsub(metadata,"\r","\n")
362 end
363 report("metadata > \n\n%s\n",metadata)
364 else
365 report("no metadata")
366 end
367 end
368end
369
370local expanded = lpdf.epdf.expanded
371
372local function getfonts(pdffile)
373 local usedfonts = { }
374
375 local function collect(where,tag)
376 local resources = where.Resources
377 if resources then
378 local fontlist = resources.Font
379 if fontlist then
380 for k, v in expanded(fontlist) do
381 usedfonts[tag and (tag .. "." .. k) or k] = v
382 if v.Subtype == "Type3" then
383 collect(v,tag and (tag .. "." .. k) or k)
384 end
385 end
386 end
387 local objects = resources.XObject
388 if objects then
389 for k, v in expanded(objects) do
390 collect(v,tag and (tag .. "." .. k) or k)
391 end
392 end
393 end
394 end
395
396 for i=1,pdffile.nofpages do
397 collect(pdffile.pages[i])
398 end
399
400 return usedfonts
401end
402
403
404
405local function getunicodes(font)
406 local cid = font.ToUnicode
407 if cid then
408 cid = cid()
409 local counts = { }
410 local indices = { }
411
412
413
414
415
416 setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
417 for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
418 for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
419 first = tonumber(first,16)
420 last = tonumber(last,16)
421 offset = tonumber(offset,16)
422 offset = offset - first
423 for i=first,last do
424 local c = i + offset
425 counts[c] = counts[c] + 1
426 indices[i] = true
427 end
428 end
429 end
430 for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
431 for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
432 indices[tonumber(old,16)] = true
433 for n in gmatch(new,"....") do
434 local c = tonumber(n,16)
435 counts[c] = counts[c] + 1
436 end
437 end
438 end
439 return counts, indices
440 end
441end
442
443function scripts.pdf.fonts(filename)
444 local pdffile = loadpdffile(filename)
445 if pdffile then
446 local usedfonts = getfonts(pdffile)
447 local found = { }
448 local common = table.setmetatableindex("table")
449 for k, v in table.sortedhash(usedfonts) do
450 local basefont = v.BaseFont
451 local encoding = v.Encoding
452 local subtype = v.Subtype
453 local unicode = v.ToUnicode
454 local counts,
455 indices = getunicodes(v)
456 local codes = { }
457 local chars = { }
458
459 local names = { }
460 if counts then
461 codes = sortedkeys(counts)
462 for i=1,#codes do
463 local k = codes[i]
464 if k > 32 then
465 local c = utfchar(k)
466 chars[i] = c
467
468 else
469 chars[i] = k == 32 and "SPACE" or format("U+%03X",k)
470
471 end
472 end
473 if basefont and unicode then
474 local b = gsub(basefont,"^.*%+","")
475 local c = common[b]
476 for k in next, indices do
477 c[k] = true
478 end
479 end
480 for i=1,#codes do
481 codes[i] = format("U+%05X",codes[i])
482 end
483 end
484 local d = encoding and encoding.Differences
485 if d then
486 for i=1,#d do
487 local di = d[i]
488 if type(di) == "string" then
489 names[#names+1] = di
490 end
491 end
492 end
493 if not basefont then
494 local fontdescriptor = v.FontDescriptor
495 if fontdescriptor then
496 basefont = fontdescriptor.FontName
497 end
498 end
499 found[k] = {
500 basefont = basefont or "no basefont",
501 encoding = (d and "custom n=" .. #d) or "no encoding",
502 subtype = subtype or "no subtype",
503 unicode = unicode and "unicode" or "no vector",
504 chars = chars,
505 codes = codes,
506
507 names = names,
508 }
509 end
510
511 local haschar = false
512
513 local list = { }
514 for k, v in next, found do
515 local s = string.gsub(k,"(%d+)",function(s) return string.format("%05i",tonumber(s)) end)
516 list[s] = { k, v }
517 if #v.chars > 0 then
518 haschar = true
519 end
520 end
521
522 if details then
523 for k, v in sortedhash(found) do
524
525
526
527 report("id : %s", k)
528 report("basefont : %s", v.basefont)
529 report("encoding : % t", v.names)
530 report("subtype : %s", v.subtype)
531 report("unicode : %s", v.unicode)
532 if #v.chars > 0 then
533 report("characters : % t", v.chars)
534 end
535 if #v.codes > 0 then
536 report("codepoints : % t", v.codes)
537 end
538 report("")
539 end
540 for k, v in sortedhash(common) do
541 report("basefont : %s",k)
542 report("indices : % t", sortedkeys(v))
543 report("")
544 end
545 else
546 local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
547 local shared = { }
548 for s, f in sortedhash(list) do
549 local k = f[1]
550 local v = f[2]
551 local basefont = v.basefont
552 local characters = shared[basefont] or (haschar and concat(v.chars," ")) or nil
553 results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, characters }
554 if not shared[basefont] then
555 shared[basefont] = "shared with " .. k
556 end
557 end
558 utilities.formatters.formatcolumns(results)
559 report(results[1])
560 report("")
561 for i=2,#results do
562 report(results[i])
563 end
564 report("")
565 end
566 end
567end
568
569function scripts.pdf.object(filename,n)
570 if n then
571 local pdffile = loadpdffile(filename)
572 if pdffile then
573 print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
574 end
575 end
576end
577
578function scripts.pdf.links(filename,asked)
579 local pdffile = loadpdffile(filename)
580 if pdffile then
581
582 local pages = pdffile.pages
583 local nofpages = pdffile.nofpages
584
585 if asked and (asked < 1 or asked > nofpages) then
586 report("")
587 report("no page %i, last page %i",asked,nofpages)
588 report("")
589 return
590 end
591
592 local reverse = swapped(pages)
593
594 local function banner(pagenumber)
595 report("")
596 report("annotations @ page %i",pagenumber)
597 report("")
598 end
599
600 local function show(pagenumber)
601 local page = pages[pagenumber]
602 local annots = page.Annots
603 if annots then
604 local done = false
605 for i=1,#annots do
606 local annotation = annots[i]
607 local a = annotation.A
608 if not a then
609 local d = annotation.Dest
610 if d then
611 a = { S = "GoTo", D = d }
612 end
613 end
614 if a then
615 local S = a.S
616 if S == "GoTo" then
617 local D = a.D
618 if D then
619 local D1 = D[1]
620 local R1 = reverse[D1]
621 if not done then
622 banner(pagenumber)
623 done = true
624 end
625 if tonumber(R1) then
626 report("intern, page % 4i",R1 or 0)
627 else
628 report("intern, name %s",tostring(D1))
629 end
630 end
631 elseif S == "GoToR" then
632 local D = a.D
633 if D then
634 local F = A.F
635 if F then
636 local D1 = D[1]
637 if not done then
638 banner(pagenumber)
639 done = true
640 end
641 if tonumber(D1) then
642 report("extern, page % 4i, file %s",D1 + 1,F)
643 else
644 report("extern, page % 4i, file %s, name %s",0,F,D[1])
645 end
646 end
647 end
648 elseif S == "URI" then
649 local URI = a.URI
650 if URI then
651 report("extern, uri %a",URI)
652 end
653 end
654 end
655 end
656 end
657 end
658
659 if asked then
660 show(asked)
661 else
662 for pagenumber=1,nofpages do
663 show(pagenumber)
664 end
665 end
666
667 local destinations = pdffile.destinations
668 if destinations then
669 if asked then
670 report("")
671 report("destinations to page %i",asked)
672 report("")
673 for k, v in sortedhash(destinations) do
674 local D = v.D
675 if D then
676 local p = reverse[D[1]] or 0
677 if p == asked then
678 report(k)
679 end
680 end
681 end
682 else
683 report("")
684 report("destinations")
685 report("")
686 local list = setmetatableindex("table")
687 for k, v in sortedhash(destinations) do
688 local D = v.D
689 if D then
690 local p = reverse[D[1]]
691 report("tag %s, page % 4i",k,p)
692 insert(list[p],k)
693 end
694 end
695 for k, v in sortedhash(list) do
696 report("")
697 report("page %i, names % t",k,v)
698 end
699 end
700 end
701 end
702end
703
704
705
706
707
708
709local filename = environment.files[1] or ""
710
711if filename == "" then
712 application.help()
713elseif environment.argument("info") then
714 scripts.pdf.info(filename)
715elseif environment.argument("metadata") then
716 scripts.pdf.metadata(filename,environment.argument("pretty"))
717elseif environment.argument("formdata") then
718 scripts.pdf.formdata(filename,environment.argument("save"))
719elseif environment.argument("fonts") then
720 scripts.pdf.fonts(filename)
721elseif environment.argument("object") then
722 scripts.pdf.object(filename,tonumber(environment.argument("object")))
723elseif environment.argument("links") then
724 scripts.pdf.links(filename,tonumber(environment.argument("page")))
725elseif environment.argument("signature") then
726 scripts.pdf.signature(filename,environment.argument("save"))
727elseif environment.argument("sign") then
728 scripts.pdf.sign(filename)
729elseif environment.argument("verify") then
730 scripts.pdf.verify(filename)
731elseif environment.argument("exporthelp") then
732 application.export(environment.argument("exporthelp"),filename)
733else
734 application.help()
735end
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757 |