lpdf-xmp.lua /size: 12 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lpdf-xmp'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "with help from Peter Rolf",
8}
9
10local tostring, type = tostring, type
11local format, gsub, match, rep, count = string.format, string.gsub, string.match, string.rep, string.count
12local utfchar = utf.char
13local md5HEX = md5.HEX
14local xmlfillin, xmldelete, xmltext = xml.fillin, xml.delete, xml.text
15
16local trace_xmp  = false  trackers.register("backend.xmp",  function(v) trace_xmp  = v end)
17local trace_info = false  trackers.register("backend.info", function(v) trace_info = v end)
18
19local report_xmp  = logs.reporter("backend","xmp")
20local report_info = logs.reporter("backend","info")
21
22local backends, lpdf = backends, lpdf
23
24local codeinjections       = backends.pdf.codeinjections -- normally it is registered
25
26local pdfdictionary        = lpdf.dictionary
27local pdfconstant          = lpdf.constant
28local pdfreference         = lpdf.reference
29local pdfflushstreamobject = lpdf.flushstreamobject
30
31local pdfgetmetadata       = lpdf.getmetadata
32
33-- The XMP packet wrapper is kind of fixed, see page 10 of XMPSpecificationsPart1.pdf from
34-- XMP-Toolkit-SDK-CC201607.zip. So we hardcode the id.
35
36local xpacket = format ( [[
37<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>
38
39%%s
40
41<?xpacket end="w"?>]], utfchar(0xFEFF) )
42
43local mapping = {
44    -- user defined keys (pdfx:)
45    ["ConTeXt.Jobname"]      = { "context", "rdf:Description/pdfx:ConTeXt.Jobname" },
46    ["ConTeXt.Time"]         = { "date",    "rdf:Description/pdfx:ConTeXt.Time" },
47    ["ConTeXt.Url"]          = { "context", "rdf:Description/pdfx:ConTeXt.Url" },
48    ["ConTeXt.Support"]      = { "context", "rdf:Description/pdfx:ConTeXt.Support" },
49    ["ConTeXt.Version"]      = { "context", "rdf:Description/pdfx:ConTeXt.Version" },
50    ["TeX.Support"]          = { "metadata","rdf:Description/pdfx:TeX.Support" },
51    ["LuaTeX.Version"]       = { "metadata","rdf:Description/pdfx:LuaTeX.Version" },
52    ["LuaTeX.Functionality"] = { "metadata","rdf:Description/pdfx:LuaTeX.Functionality" },
53    ["LuaTeX.LuaVersion"]    = { "metadata","rdf:Description/pdfx:LuaTeX.LuaVersion" },
54    ["LuaTeX.Platform"]      = { "metadata","rdf:Description/pdfx:LuaTeX.Platform" },
55    ["ID"]                   = { "id",      "rdf:Description/pdfx:ID" },                         -- has date
56    -- Adobe PDF schema
57    ["Keywords"]             = { "metadata","rdf:Description/pdf:Keywords", true },
58    ["Producer"]             = { "metadata","rdf:Description/pdf:Producer", true },
59 -- ["Trapped"]              = { "pdf",     "rdf:Description/pdf:Trapped" },                     -- '/False' in /Info, but 'False' in XMP
60    -- Dublin Core schema
61    ["Format"]               = { "metadata","rdf:Description/dc:format" },                       -- optional, but nice to have
62    -- see xml file for comment:
63 -- ["Author"]               = { "metadata","rdf:Description/dc:creator" },
64 -- ["Subject"]              = { "metadata","rdf:Description/dc:description" },
65 -- ["Title"]                = { "metadata","rdf:Description/dc:title" },
66    ["Author"]               = { "metadata","rdf:Description/dc:creator/rdf:Seq/rdf:li", true },
67    ["Subject"]              = { "metadata","rdf:Description/dc:description/rdf:Alt/rdf:li", true },
68    ["Title"]                = { "metadata","rdf:Description/dc:title/rdf:Alt/rdf:li", true },
69    -- XMP Basic schema
70    ["CreateDate"]           = { "date",    "rdf:Description/xmp:CreateDate" },
71    ["CreationDate"]         = { "date",    "rdf:Description/xmp:CreationDate" },                -- dummy
72    ["CreatorTool"]          = { "metadata","rdf:Description/xmp:CreatorTool" },
73 -- ["Creator"]              = { "metadata","rdf:Description/xmp:CreatorTool" },
74    ["MetadataDate"]         = { "date",    "rdf:Description/xmp:MetadataDate" },
75    ["ModDate"]              = { "date",    "rdf:Description/xmp:ModDate" },                     -- dummy
76    ["ModifyDate"]           = { "date",    "rdf:Description/xmp:ModifyDate" },
77    -- XMP Media Management schema
78    ["DocumentID"]           = { "id",      "rdf:Description/xmpMM:DocumentID" },                -- uuid
79    ["InstanceID"]           = { "id",      "rdf:Description/xmpMM:InstanceID" },                -- uuid
80    ["RenditionClass"]       = { "pdf",     "rdf:Description/xmpMM:RenditionClass" },            -- PDF/X-4
81    ["VersionID"]            = { "pdf",     "rdf:Description/xmpMM:VersionID" },                 -- PDF/X-4
82    -- additional entries
83    -- PDF/X
84    ["GTS_PDFXVersion"]      = { "pdf",     "rdf:Description/pdfxid:GTS_PDFXVersion" },
85    -- optional entries
86    -- all what is visible in the 'document properties --> additional metadata' window
87    -- XMP Rights Management schema (optional)
88    ["Marked"]               = { "pdf",      "rdf:Description/xmpRights:Marked" },
89 -- ["Owner"]                = { "metadata", "rdf:Description/xmpRights:Owner/rdf:Bag/rdf:li" }, -- maybe useful (not visible)
90 -- ["UsageTerms"]           = { "metadata", "rdf:Description/xmpRights:UsageTerms" },           -- maybe useful (not visible)
91    ["WebStatement"]         = { "metadata", "rdf:Description/xmpRights:WebStatement" },
92    -- Photoshop PDF schema (optional)
93    ["AuthorsPosition"]      = { "metadata", "rdf:Description/photoshop:AuthorsPosition" },
94    ["Copyright"]            = { "metadata", "rdf:Description/photoshop:Copyright" },
95    ["CaptionWriter"]        = { "metadata", "rdf:Description/photoshop:CaptionWriter" },
96    --
97    ["Placeholder"]          = { "metadata", "pdfaid-placeholder", true }
98}
99
100lpdf.setsuppressoptionalinfo (
101        0 --
102    +   1 -- pdfnofullbanner
103    +   2 -- pdfnofilename
104    +   4 -- pdfnopagenumber
105    +   8 -- pdfnoinfodict
106    +  16 -- pdfnocreator
107    +  32 -- pdfnocreationdate
108    +  64 -- pdfnomoddate
109    + 128 -- pdfnoproducer
110    + 256 -- pdfnotrapped
111 -- + 512 -- pdfnoid
112)
113
114local included = backends.included
115local lpdfid   = lpdf.id
116
117function lpdf.id() -- overload of ini
118    return lpdfid(included.date)
119end
120
121local settrailerid = lpdf.settrailerid -- this is the wrapped one
122
123local trailerid = nil
124local dates     = nil
125
126local function update()
127    if trailer_id then
128        local b = toboolean(trailer_id) or trailer_id == ""
129        if b then
130            trailer_id = "This file is processed by ConTeXt and LuaTeX."
131        else
132            trailer_id = tostring(trailer_id)
133        end
134        local h = md5HEX(trailer_id)
135        if b then
136            report_info("using frozen trailer id")
137        else
138            report_info("using hashed trailer id %a (%a)",trailer_id,h)
139        end
140        settrailerid(format("[<%s> <%s>]",h,h))
141    end
142    --
143    local t = type(dates)
144    if t == "number" or t == "string" then
145        local d = converters.totime(dates)
146        if d then
147            included.date = true
148            included.id   = "fake"
149            report_info("forced date/time information %a will be used",lpdf.settime(d))
150            settrailerid(false)
151            return
152        end
153        if t == "string" then
154            dates = toboolean(dates)
155            included.date = dates
156            if dates ~= false then
157                included.id = true
158            else
159                report_info("no date/time but fake id information will be added")
160                settrailerid(true)
161                included.id = "fake"
162            end
163        end
164    end
165end
166
167function lpdf.settrailerid(v) trailerid = v end
168function lpdf.setdates    (v) dates     = v end
169
170lpdf.registerdocumentfinalizer(update,"trailer id and dates",1)
171
172directives.register("backend.trailerid", lpdf.settrailerid)
173directives.register("backend.date",      lpdf.setdates)
174
175local function permitdetail(what)
176    local m = mapping[what]
177    if m then
178        return included[m[1]] and m[2]
179    else
180        return included[what] and true or false
181    end
182end
183
184lpdf.permitdetail = permitdetail
185
186-- maybe some day we will load the xmp file at runtime
187
188local xmp, xmpfile, xmpname = nil, nil, "lpdf-pdx.xml"
189
190local function setxmpfile(name)
191    if xmp then
192        report_xmp("discarding loaded file %a",xmpfile)
193        xmp = nil
194    end
195    xmpfile = name ~= "" and name
196end
197
198codeinjections.setxmpfile = setxmpfile
199
200interfaces.implement {
201    name      = "setxmpfile",
202    arguments = "string",
203    actions   = setxmpfile
204}
205
206local function valid_xmp()
207    if not xmp then
208     -- local xmpfile = xmpfile or resolvers.findfile(xmpname) or ""
209        if xmpfile and xmpfile ~= "" then
210            xmpfile = resolvers.findfile(xmpfile) or ""
211        end
212        if not xmpfile or xmpfile == "" then
213            xmpfile = resolvers.findfile(xmpname) or ""
214        end
215        if xmpfile ~= "" then
216            report_xmp("using file %a",xmpfile)
217        end
218        local xmpdata = xmpfile ~= "" and io.loaddata(xmpfile) or ""
219        xmp = xml.convert(xmpdata)
220    end
221    return xmp
222end
223
224function lpdf.addxmpinfo(tag,value,check)
225    local pattern = permitdetail(tag)
226    if type(pattern) == "string" then
227        xmlfillin(xmp or valid_xmp(),pattern,value,check)
228    end
229end
230
231-- redefined
232
233local pdfaddtoinfo  = lpdf.addtoinfo
234local pdfaddxmpinfo = lpdf.addxmpinfo
235
236function lpdf.addtoinfo(tag,pdfvalue,strvalue)
237    local pattern = permitdetail(tag)
238    if pattern then
239        pdfaddtoinfo(tag,pdfvalue)
240    end
241    if type(pattern) == "string" then
242        local value = strvalue or gsub(tostring(pdfvalue),"^%((.*)%)$","%1") -- hack
243        if trace_info then
244            report_info("set %a to %a",tag,value)
245        end
246        xmlfillin(xmp or valid_xmp(),pattern,value,check)
247    end
248end
249
250local pdfaddtoinfo = lpdf.addtoinfo -- used later
251
252-- for the do-it-yourselvers
253
254function lpdf.insertxmpinfo(pattern,whatever,prepend)
255    xml.insert(xmp or valid_xmp(),pattern,whatever,prepend)
256end
257
258function lpdf.injectxmpinfo(pattern,whatever,prepend)
259    xml.inject(xmp or valid_xmp(),pattern,whatever,prepend)
260end
261
262function lpdf.replacexmpinfo(pattern,whatever)
263    xml.replace(xmp or valid_xmp(),pattern,whatever)
264end
265
266-- flushing
267
268local add_xmp_blob = true  directives.register("backend.xmp",function(v) add_xmp_blob = v end)
269
270local function flushxmpinfo()
271    commands.pushrandomseed()
272    commands.setrandomseed(os.time())
273
274    local documentid = "no unique document id here"
275    local instanceid = "no unique instance id here"
276    local metadata   = pdfgetmetadata()
277    local time       = metadata.time
278    local producer   = metadata.producer
279    local creator    = metadata.creator
280
281    if included.id ~= "fake" then
282        documentid = "uuid:" .. os.uuid()
283        instanceid = "uuid:" .. os.uuid()
284    end
285
286    pdfaddtoinfo("Producer",producer)
287    pdfaddtoinfo("Creator",creator)
288    pdfaddtoinfo("CreationDate",time)
289    pdfaddtoinfo("ModDate",time)
290
291    if add_xmp_blob then
292
293        pdfaddxmpinfo("DocumentID",documentid)
294        pdfaddxmpinfo("InstanceID",instanceid)
295        pdfaddxmpinfo("Producer",producer)
296        pdfaddxmpinfo("CreatorTool",creator)
297        pdfaddxmpinfo("CreateDate",time)
298        pdfaddxmpinfo("ModifyDate",time)
299        pdfaddxmpinfo("MetadataDate",time)
300        pdfaddxmpinfo("LuaTeX.Version",metadata.luatexversion)
301        pdfaddxmpinfo("LuaTeX.Functionality",metadata.luatexfunctionality)
302        pdfaddxmpinfo("LuaTeX.LuaVersion",metadata.luaversion)
303        pdfaddxmpinfo("LuaTeX.Platform",metadata.platform)
304
305        -- checks for empty:
306
307        for tag, map in next, mapping do
308            if map[3] == true then
309                local pattern = map[2]
310                if type(pattern) == "string" and xmltext(xmp,pattern) == "" then
311                    xmldelete(xmp,pattern .. rep("/..",count(pattern,"/")-1))
312                end
313            end
314        end
315        
316        local blob = xml.tostring(xml.first(xmp or valid_xmp(),"/x:xmpmeta"))
317        local md = pdfdictionary {
318            Subtype = pdfconstant("XML"),
319            Type    = pdfconstant("Metadata"),
320        }
321        if trace_xmp then
322            report_xmp("data flushed, see log file")
323            logs.pushtarget("logfile")
324            report_xmp("start xmp blob")
325            logs.newline()
326            logs.writer(blob)
327            logs.newline()
328            report_xmp("stop xmp blob")
329            logs.poptarget()
330        end
331        blob = format(xpacket,blob)
332        if not verbose and lpdf.compresslevel() > 0 then
333            blob = gsub(blob,">%s+<","><")
334        end
335        local r = pdfflushstreamobject(blob,md,false) -- uncompressed
336        lpdf.addtocatalog("Metadata",pdfreference(r))
337    end
338
339    commands.poprandomseed() -- hack
340end
341
342--  this will be enabled when we can inhibit compression for a stream at the lua end
343
344lpdf.registerdocumentfinalizer(flushxmpinfo,1,"metadata")
345
346directives.register("backend.verbosexmp", function(v)
347    verbose = v
348end)
349