if not modules then modules = { } end modules ['mtx-pdf'] = {
version = 1.001,
comment = "companion to mtxrun.lua",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
local tonumber = tonumber
local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
local utfchar = utf.char
local concat, insert, swapped = table.concat, table.insert, table.swapped
local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
local helpinfo = [[
mtx-pdf
ConTeXt PDF Helpers
0.10
show some info about the given file
show metadata xml blob
show formdata
replace newlines in metadata
show used fonts ()
show object
show links
sign document (assumes signature template)
verify document
mtxrun --script pdf --info foo.pdf
mtxrun --script pdf --metadata foo.pdf
mtxrun --script pdf --metadata --pretty foo.pdf
mtxrun --script pdf --stream=4 foo.pdf
mtxrun --script pdf --sign --certificate=somesign.pem --password=test --uselibrary somefile
mtxrun --script pdf --verify --certificate=somesign.pem --password=test --uselibrary somefile
]]
local application = logs.application {
name = "mtx-pdf",
banner = "ConTeXt PDF Helpers 0.10",
helpinfo = helpinfo,
}
local report = application.report
if not pdfe then
dofile(resolvers.findfile("lpdf-epd.lua","tex"))
elseif CONTEXTLMTXMODE then
dofile(resolvers.findfile("util-dim.lua","tex"))
dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
dofile(resolvers.findfile("lpdf-sig.lmt","tex"))
else
dofile(resolvers.findfile("lpdf-pde.lua","tex"))
end
dofile(resolvers.findfile("util-jsn.lua","tex"))
scripts = scripts or { }
scripts.pdf = scripts.pdf or { }
local details = environment.argument("detail") or environment.argument("details")
local function loadpdffile(filename)
if not filename or filename == "" then
report("no filename given")
elseif not lfs.isfile(filename) then
report("unknown file %a",filename)
else
local pdffile = lpdf.epdf.load(filename)
if pdffile then
return pdffile
else
report("no valid pdf file %a",filename)
end
end
end
function scripts.pdf.info(filename)
local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local info = pdffile.Info
local pages = pdffile.pages
local nofpages = pdffile.nofpages
local unset = ""
report("%-17s > %s","filename", filename)
report("%-17s > %s","pdf version", catalog.Version or unset)
report("%-17s > %s","major version", pdffile.majorversion or unset)
report("%-17s > %s","minor version", pdffile.minorversion or unset)
report("%-17s > %s","number of pages", nofpages or 0)
report("%-17s > %s","title", info.Title or unset)
report("%-17s > %s","creator", info.Creator or unset)
report("%-17s > %s","producer", info.Producer or unset)
report("%-17s > %s","author", info.Author or unset)
report("%-17s > %s","creation date", info.CreationDate or unset)
report("%-17s > %s","modification date", info.ModDate or unset)
local function somebox(what)
local box = string.lower(what)
local width, height, start
for i=1, nofpages do
local page = pages[i]
local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
if w ~= width or h ~= height then
if start then
report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
end
width, height, start = w, h, i
end
end
report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
end
if details then
somebox("MediaBox")
somebox("ArtBox")
somebox("BleedBox")
somebox("CropBox")
somebox("TrimBox")
else
somebox("CropBox")
end
-- if details then
local annotations = 0
for i=1,nofpages do
local page = pages[i]
local a = page.Annots
if a then
annotations = annotations + #a
end
end
if annotations > 0 then
report("%-17s > %s", "annotations",annotations)
end
-- end
-- if details then
local d = pdffile.destinations
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "destinations",#k)
end
local d = pdffile.javascripts
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "javascripts",#k)
end
local d = pdffile.widgets
if d and #d > 0 then
report("%-17s > %s", "widgets",#d)
end
local d = pdffile.embeddedfiles
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "embeddedfiles",#k)
end
-- end
end
end
local function flagstoset(flag,flags)
local t = { }
if flags then
for k, v in next, flags do
if (flag & v) ~= 0 then
t[k] = true
end
end
end
return t
end
function scripts.pdf.formdata(filename,save)
local pdffile = loadpdffile(filename)
if pdffile then
local widgets = pdffile.widgets
if widgets then
local results = { { "type", "name", "value" } }
for i=1,#widgets do
local annotation = widgets[i]
local parent = annotation.Parent or { }
local name = annotation.T or parent.T
local what = annotation.FT or parent.FT
if name and what then
local value = annotation.V and tostring(annotation.V) or ""
if value and value ~= "" then
local wflags = flagstoset(annotation.Ff or parent.Ff or 0, widgetflags)
if what == "Tx" then
if wflags.MultiLine then
wflags.MultiLine = nil
what = "text"
else
what = "line"
end
local default = annotation.V or ""
elseif what == "Btn" then
if wflags.Radio or wflags.RadiosInUnison then
what = "radio"
elseif wflags.PushButton then
what = "push"
else
what = "check"
end
elseif what == "Ch" then
-- F Ff FT Opt T | AA OC (rest follows)
if wflags.PopUp then
wflags.PopUp = nil
if wflags.Edit then
what = "combo"
else
what = "popup"
end
else
what = "choice"
end
elseif what == "Sig" then
what = "signature"
else
what = nil
end
if what then
results[#results+1] = { what, name, value }
end
end
end
end
if save then
local values = { }
for i=2,#results do
local result= results[i]
values[#values+1] = {
type = result[1],
name = result[2],
value = result[3],
}
end
local data = {
filename = filename,
values = values,
}
local name = file.nameonly(filename) .. "-formdata"
if save == "json" then
name = file.addsuffix(name,"json")
io.savedata(name,utilities.json.tojson(data))
elseif save then
name = file.addsuffix(name,"lua")
table.save(name,data)
end
report("")
report("%i widgets found, %i values saved in %a",#widgets,#results-1,name)
report("")
end
utilities.formatters.formatcolumns(results)
report(results[1])
report("")
for i=2,#results do
report(results[i])
end
report("")
end
end
end
function scripts.pdf.signature(filename,save)
local pdffile = loadpdffile(filename)
if pdffile then
local widgets = pdffile.widgets
if widgets then
for i=1,#widgets do
local annotation = widgets[i]
local parent = annotation.Parent or { }
local name = annotation.T or parent.T
local what = annotation.FT or parent.FT
if what == "Sig" then
local value = annotation.V
if value then
local contents = tostring(value.Contents) or ""
report("")
if save then
local name = file.nameonly(filename) .. "-signature.bin"
report("signature saved in %a",name)
io.savedata(name,string.tobytes(contents))
else
report("signature: %s",contents)
end
report("")
return
end
end
end
end
report("there is no signature")
end
end
function scripts.pdf.sign(filename,save)
local pdffile = file.addsuffix(filename,"pdf")
if not lfs.isfile(pdffile) then
report("invalid pdf file %a",pdffile)
return
end
local certificate = environment.argument("certificate")
local password = environment.argument("password")
if type(certificate) ~= "string" or type(password) ~= "string" then
report("provide --certificate and --password")
return
end
lpdf.sign {
filename = pdffile,
certificate = certificate,
password = password,
purge = environment.argument("purge"),
uselibrary = environment.argument("uselibrary"),
}
end
function scripts.pdf.verify(filename,save)
local pdffile = file.addsuffix(filename,"pdf")
if not lfs.isfile(pdffile) then
report("invalid pdf file %a",pdffile)
return
end
local certificate = environment.argument("certificate")
local password = environment.argument("password")
if type(certificate) ~= "string" or type(password) ~= "string" then
report("provide --certificate and --password")
return
end
lpdf.verify {
filename = pdffile,
certificate = certificate,
password = password,
uselibrary = environment.argument("uselibrary"),
}
end
function scripts.pdf.metadata(filename,pretty)
local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local metadata = catalog.Metadata
if metadata then
metadata = metadata()
if pretty then
metadata = gsub(metadata,"\r","\n")
end
report("metadata > \n\n%s\n",metadata)
else
report("no metadata")
end
end
end
local expanded = lpdf.epdf.expanded
local function getfonts(pdffile)
local usedfonts = { }
local function collect(where,tag)
local resources = where.Resources
if resources then
local fontlist = resources.Font
if fontlist then
for k, v in expanded(fontlist) do
usedfonts[tag and (tag .. "." .. k) or k] = v
if v.Subtype == "Type3" then
collect(v,tag and (tag .. "." .. k) or k)
end
end
end
local objects = resources.XObject
if objects then
for k, v in expanded(objects) do
collect(v,tag and (tag .. "." .. k) or k)
end
end
end
end
for i=1,pdffile.nofpages do
collect(pdffile.pages[i])
end
return usedfonts
end
-- todo: fromunicode16
local function getunicodes(font)
local cid = font.ToUnicode
if cid then
cid = cid()
local counts = { }
local indices = { }
-- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
-- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
-- print(a,b)
-- end
-- end
setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
first = tonumber(first,16)
last = tonumber(last,16)
offset = tonumber(offset,16)
offset = offset - first
for i=first,last do
local c = i + offset
counts[c] = counts[c] + 1
indices[i] = true
end
end
end
for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
indices[tonumber(old,16)] = true
for n in gmatch(new,"....") do
local c = tonumber(n,16)
counts[c] = counts[c] + 1
end
end
end
return counts, indices
end
end
function scripts.pdf.fonts(filename)
local pdffile = loadpdffile(filename)
if pdffile then
local usedfonts = getfonts(pdffile)
local found = { }
local common = table.setmetatableindex("table")
for k, v in table.sortedhash(usedfonts) do
local basefont = v.BaseFont
local encoding = v.Encoding
local subtype = v.Subtype
local unicode = v.ToUnicode
local counts,
indices = getunicodes(v)
local codes = { }
local chars = { }
-- local freqs = { }
local names = { }
if counts then
codes = sortedkeys(counts)
for i=1,#codes do
local k = codes[i]
if k > 32 then
local c = utfchar(k)
chars[i] = c
-- freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c)
else
chars[i] = k == 32 and "SPACE" or format("U+%03X",k)
-- freqs[i] = format("U+%05X %s --",k,counts[k] > 1 and "+" or " ")
end
end
if basefont and unicode then
local b = gsub(basefont,"^.*%+","")
local c = common[b]
for k in next, indices do
c[k] = true
end
end
for i=1,#codes do
codes[i] = format("U+%05X",codes[i])
end
end
local d = encoding and encoding.Differences
if d then
for i=1,#d do
local di = d[i]
if type(di) == "string" then
names[#names+1] = di
end
end
end
if not basefont then
local fontdescriptor = v.FontDescriptor
if fontdescriptor then
basefont = fontdescriptor.FontName
end
end
found[k] = {
basefont = basefont or "no basefont",
encoding = (d and "custom n=" .. #d) or "no encoding",
subtype = subtype or "no subtype",
unicode = unicode and "unicode" or "no vector",
chars = chars,
codes = codes,
-- freqs = freqs,
names = names,
}
end
local haschar = false
local list = { }
for k, v in next, found do
local s = string.gsub(k,"(%d+)",function(s) return string.format("%05i",tonumber(s)) end)
list[s] = { k, v }
if #v.chars > 0 then
haschar = true
end
end
if details then
for k, v in sortedhash(found) do
-- for s, f in sortedhash(list) do
-- local k = f[1]
-- local v = f[2]
report("id : %s", k)
report("basefont : %s", v.basefont)
report("encoding : % t", v.names)
report("subtype : %s", v.subtype)
report("unicode : %s", v.unicode)
if #v.chars > 0 then
report("characters : % t", v.chars)
end
if #v.codes > 0 then
report("codepoints : % t", v.codes)
end
report("")
end
for k, v in sortedhash(common) do
report("basefont : %s",k)
report("indices : % t", sortedkeys(v))
report("")
end
else
local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
local shared = { }
for s, f in sortedhash(list) do
local k = f[1]
local v = f[2]
local basefont = v.basefont
local characters = shared[basefont] or (haschar and concat(v.chars," ")) or nil
results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, characters }
if not shared[basefont] then
shared[basefont] = "shared with " .. k
end
end
utilities.formatters.formatcolumns(results)
report(results[1])
report("")
for i=2,#results do
report(results[i])
end
report("")
end
end
end
function scripts.pdf.object(filename,n)
if n then
local pdffile = loadpdffile(filename)
if pdffile then
print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
end
end
end
function scripts.pdf.links(filename,asked)
local pdffile = loadpdffile(filename)
if pdffile then
local pages = pdffile.pages
local nofpages = pdffile.nofpages
if asked and (asked < 1 or asked > nofpages) then
report("")
report("no page %i, last page %i",asked,nofpages)
report("")
return
end
local reverse = swapped(pages)
local function banner(pagenumber)
report("")
report("annotations @ page %i",pagenumber)
report("")
end
local function show(pagenumber)
local page = pages[pagenumber]
local annots = page.Annots
if annots then
local done = false
for i=1,#annots do
local annotation = annots[i]
local a = annotation.A
if not a then
local d = annotation.Dest
if d then
a = { S = "GoTo", D = d } -- no need for a dict
end
end
if a then
local S = a.S
if S == "GoTo" then
local D = a.D
if D then
local D1 = D[1]
local R1 = reverse[D1]
if not done then
banner(pagenumber)
done = true
end
if tonumber(R1) then
report("intern, page % 4i",R1 or 0)
else
report("intern, name %s",tostring(D1))
end
end
elseif S == "GoToR" then
local D = a.D
if D then
local F = A.F
if F then
local D1 = D[1]
if not done then
banner(pagenumber)
done = true
end
if tonumber(D1) then
report("extern, page % 4i, file %s",D1 + 1,F)
else
report("extern, page % 4i, file %s, name %s",0,F,D[1])
end
end
end
elseif S == "URI" then
local URI = a.URI
if URI then
report("extern, uri %a",URI)
end
end
end
end
end
end
if asked then
show(asked)
else
for pagenumber=1,nofpages do
show(pagenumber)
end
end
local destinations = pdffile.destinations
if destinations then
if asked then
report("")
report("destinations to page %i",asked)
report("")
for k, v in sortedhash(destinations) do
local D = v.D
if D then
local p = reverse[D[1]] or 0
if p == asked then
report(k)
end
end
end
else
report("")
report("destinations")
report("")
local list = setmetatableindex("table")
for k, v in sortedhash(destinations) do
local D = v.D
if D then
local p = reverse[D[1]]
report("tag %s, page % 4i",k,p)
insert(list[p],k)
end
end
for k, v in sortedhash(list) do
report("")
report("page %i, names % t",k,v)
end
end
end
end
end
-- scripts.pdf.info("e:/tmp/oeps.pdf")
-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
local filename = environment.files[1] or ""
if filename == "" then
application.help()
elseif environment.argument("info") then
scripts.pdf.info(filename)
elseif environment.argument("metadata") then
scripts.pdf.metadata(filename,environment.argument("pretty"))
elseif environment.argument("formdata") then
scripts.pdf.formdata(filename,environment.argument("save"))
elseif environment.argument("fonts") then
scripts.pdf.fonts(filename)
elseif environment.argument("object") then
scripts.pdf.object(filename,tonumber(environment.argument("object")))
elseif environment.argument("links") then
scripts.pdf.links(filename,tonumber(environment.argument("page")))
elseif environment.argument("signature") then
scripts.pdf.signature(filename,environment.argument("save"))
elseif environment.argument("sign") then
scripts.pdf.sign(filename)
elseif environment.argument("verify") then
scripts.pdf.verify(filename)
elseif environment.argument("exporthelp") then
application.export(environment.argument("exporthelp"),filename)
else
application.help()
end
-- a variant on an experiment by hartmut
--~ function downloadlinks(filename)
--~ local document = lpdf.epdf.load(filename)
--~ if document then
--~ local pages = document.pages
--~ for p = 1,#pages do
--~ local annotations = pages[p].Annots
--~ if annotations then
--~ for a=1,#annotations do
--~ local annotation = annotations[a]
--~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
--~ if uri and string.find(uri,"^http") then
--~ os.execute("wget " .. uri)
--~ end
--~ end
--~ end
--~ end
--~ end
--~ end