lpdf-xmp.lmt /size: 18 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lpdf-xmp'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "with help from Peter Rolf",
8}
9
10local tostring, type = tostring, type
11local format, gsub, match, rep, count = string.format, string.gsub, string.match, string.rep, string.count
12local concat = table.concat
13local utfchar = utf.char
14local md5HEX = md5.HEX
15local xmlfillin, xmldelete, xmltext = xml.fillin, xml.delete, xml.text
16local osdate, ostime, ostimezone, osuuid = os.date, os.time, os.timezone, os.uuid
17local settings_to_array = utilities.parsers.settings_to_array
18
19local trace_xmp  = false  trackers.register("backend.xmp",  function(v) trace_xmp  = v end)
20local trace_info = false  trackers.register("backend.info", function(v) trace_info = v end)
21
22local report_xmp  = logs.reporter("backend","xmp")
23local report_info = logs.reporter("backend","info")
24
25local backends             = backends
26local pdfbackend           = backends.registered.pdf
27local codeinjections       = pdfbackend.codeinjections
28
29local lpdf                 = lpdf
30local pdfdictionary        = lpdf.dictionary
31local pdfconstant          = lpdf.constant
32local pdfunicode           = lpdf.unicode
33local pdfstring            = lpdf.string
34local pdfreference         = lpdf.reference
35local pdfflushstreamobject = lpdf.flushstreamobject
36
37-- The XMP packet wrapper is kind of fixed, see page 10 of XMPSpecificationsPart1.pdf from
38-- XMP-Toolkit-SDK-CC201607.zip. So we hardcode the id.
39
40local xpacket = format ( [[
41<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>
42
43%%s
44
45<?xpacket end="w"?>]], utfchar(0xFEFF) )
46
47local unknown = { false, false }
48local mapping = table.setmetatableindex ( {
49    -- user defined keys (pdfx:)
50    ["ConTeXt.Jobname"]      = { "context", "rdf:Description/pdfx:ConTeXt.Jobname" },
51    ["ConTeXt.Time"]         = { "date",    "rdf:Description/pdfx:ConTeXt.Time" },
52    ["ConTeXt.Url"]          = { "context", "rdf:Description/pdfx:ConTeXt.Url" },
53    ["ConTeXt.Support"]      = { "context", "rdf:Description/pdfx:ConTeXt.Support" },
54    ["ConTeXt.Version"]      = { "context", "rdf:Description/pdfx:ConTeXt.Version" },
55    ["TeX.Support"]          = { "metadata","rdf:Description/pdfx:TeX.Support" },
56    ["LuaTeX.Version"]       = { "metadata","rdf:Description/pdfx:LuaTeX.Version" },
57    ["LuaTeX.Functionality"] = { "metadata","rdf:Description/pdfx:LuaTeX.Functionality" },
58    ["LuaTeX.LuaVersion"]    = { "metadata","rdf:Description/pdfx:LuaTeX.LuaVersion" },
59    ["LuaTeX.Platform"]      = { "metadata","rdf:Description/pdfx:LuaTeX.Platform" },
60    ["ID"]                   = { "id",      "rdf:Description/pdfx:ID" },                         -- has date
61    -- Adobe PDF schema
62    ["Keywords"]             = { "metadata","rdf:Description/pdf:Keywords", true },
63    ["Producer"]             = { "metadata","rdf:Description/pdf:Producer", true },
64 -- ["Trapped"]              = { "pdf",     "rdf:Description/pdf:Trapped" },                     -- '/False' in /Info, but 'False' in XMP
65    -- Dublin Core schema
66    ["Format"]               = { "metadata","rdf:Description/dc:format" },                       -- optional, but nice to have
67    -- see xml file for comment:
68 -- ["Author"]               = { "metadata","rdf:Description/dc:creator" },
69 -- ["Subject"]              = { "metadata","rdf:Description/dc:description" },
70 -- ["Title"]                = { "metadata","rdf:Description/dc:title" },
71    ["Author"]               = { "metadata","rdf:Description/dc:creator/rdf:Seq/rdf:li", true },
72    ["Subject"]              = { "metadata","rdf:Description/dc:description/rdf:Alt/rdf:li", true },
73    ["Title"]                = { "metadata","rdf:Description/dc:title/rdf:Alt/rdf:li", true },
74    -- XMP Basic schema
75    ["CreateDate"]           = { "date",    "rdf:Description/xmp:CreateDate" },
76    ["CreationDate"]         = { "date",    "rdf:Description/xmp:CreationDate" },                -- dummy
77    ["CreatorTool"]          = { "metadata","rdf:Description/xmp:CreatorTool" },
78 -- ["Creator"]              = { "metadata","rdf:Description/xmp:CreatorTool" },
79    ["MetadataDate"]         = { "date",    "rdf:Description/xmp:MetadataDate" },
80    ["ModDate"]              = { "date",    "rdf:Description/xmp:ModDate" },                     -- dummy
81    ["ModifyDate"]           = { "date",    "rdf:Description/xmp:ModifyDate" },
82    -- XMP Media Management schema
83    ["DocumentID"]           = { "id",      "rdf:Description/xmpMM:DocumentID" },                -- uuid
84    ["InstanceID"]           = { "id",      "rdf:Description/xmpMM:InstanceID" },                -- uuid
85    ["RenditionClass"]       = { "pdf",     "rdf:Description/xmpMM:RenditionClass" },            -- PDF/X-4
86    ["VersionID"]            = { "pdf",     "rdf:Description/xmpMM:VersionID" },                 -- PDF/X-4
87    -- additional entries
88    -- PDF/X
89    ["GTS_PDFXVersion"]      = { "pdf",     "rdf:Description/pdfxid:GTS_PDFXVersion" },
90    -- optional entries
91    -- all what is visible in the 'document properties --> additional metadata' window
92    -- XMP Rights Management schema (optional)
93    ["Marked"]               = { "pdf",      "rdf:Description/xmpRights:Marked" },
94 -- ["Owner"]                = { "metadata", "rdf:Description/xmpRights:Owner/rdf:Bag/rdf:li" }, -- maybe useful (not visible)
95 -- ["UsageTerms"]           = { "metadata", "rdf:Description/xmpRights:UsageTerms" },           -- maybe useful (not visible)
96    ["WebStatement"]         = { "metadata", "rdf:Description/xmpRights:WebStatement" },
97    -- Photoshop PDF schema (optional)
98    ["AuthorsPosition"]      = { "metadata", "rdf:Description/photoshop:AuthorsPosition" },
99    ["Copyright"]            = { "metadata", "rdf:Description/photoshop:Copyright" },
100    ["CaptionWriter"]        = { "metadata", "rdf:Description/photoshop:CaptionWriter" },
101    --
102    ["Placeholder"]          = { "metadata", "pdfaid-placeholder", true }
103}, function() return unknown end )
104
105
106local metadata         = nil
107local trailerid        = true
108local creationdate     = false
109local modificationdate = false
110
111local function pdftimestamp(str)
112    local t = type(str)
113    if t == "string" then
114        local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
115        return Y and format("D:%s%s%s%s%s%s%s%s'%s",Y,M,D,h,m,s,Zs,Zh,Zm)
116    else
117        return osdate("D:%Y%m%d%H%M%S",t == "number" and str or ostime()) -- maybe "!D..." : universal time
118    end
119end
120
121local function pdfgetmetadata()
122    if not metadata then
123        local contextversion      = environment.version
124        local luatexversion       = LUATEXVERBOSE
125        local luatexfunctionality = tostring(LUATEXFUNCTIONALITY)
126        local jobname             = environment.jobname or tex.jobname or "unknown"
127        local documentid          = trailerid and ("uuid:" .. osuuid()) or "no unique document id here"
128        local instanceid          = trailerid and ("uuid:" .. osuuid()) or "no unique instance id here"
129        local producer            = "LuaMetaTeX"
130        local creator             = format("LuaMetaTeX %s %s + ConTeXt LMTX %s",luatexversion,luatexfunctionality,contextversion)
131        metadata = creationdate and {
132            producer            = producer,
133            creator             = creator,
134            id                  = format("%s | %s",jobname,creationdate),
135            documentid          = documentid,
136            instanceid          = instanceid,
137            jobname             = jobname,
138            --
139            luatexversion       = luatexversion,
140            contextversion      = contextversion,
141            luatexfunctionality = luatexfunctionality,
142            luaversion          = tostring(LUAVERSION),
143            platform            = os.platform,
144            creationdate        = creationdate,
145            modificationdate    = modificationdate,
146        } or {
147            producer            = producer,
148            creator             = creator,
149            id                  = jobname,
150            documentid          = documentid,
151            instanceid          = instanceid,
152            jobname             = jobname,
153        }
154     -- inspect(metadata)
155    end
156    return metadata
157end
158
159local function pdfsetmetadate(n,both)
160    if n then
161        n = converters.totime(n)
162        if n then
163            creationdate = osdate("%Y-%m-%dT%H:%M:%S",ostime(n)) .. ostimezone()
164            if both then
165                modificationdate = creationdate
166            end
167        end
168    end
169    return creationdate
170end
171
172lpdf.pdftimestamp = pdftimestamp
173
174function lpdf.gettrailerid()
175    if trailerid == true then
176        return md5.HEX(osuuid())
177    elseif type(trailerid) == "string" then
178        return md5.HEX(trailerid)
179    else
180        return false
181    end
182end
183
184-- string: use that, true: uuid, false: nothing
185
186directives.register("backend.trailerid", function(v)
187    trailerid = type(v) and v or toboolean(v)
188end)
189
190-- year-mm-dd : use that for creation and modification
191
192local function setdates(v)
193    local t = type(v)
194    if t == "number" or t == "string" then
195        local d = converters.totime(v)
196        if d then
197            report_info("forced date/time information %a will be used",pdfsetmetadate(d,true))
198            return
199        end
200    end
201    if toboolean(v) then
202        creationdate     = osdate("%Y-%m-%dT%H:%M:%S") .. ostimezone()
203        modificationdate = creationdate
204    else
205        creationdate     = false
206        modificationdate = false
207    end
208end
209
210setdates(true)
211
212directives.register("backend.date", setdates)
213
214-- maybe some day we will load the xmp file at runtime
215
216local xmp, xmpfile, xmpname = nil, nil, "lpdf-pdx.xml"
217
218local function setxmpfile(name)
219    if xmp then
220        report_xmp("discarding loaded file %a",xmpfile)
221        xmp = nil
222    end
223    xmpfile = name ~= "" and name
224end
225
226codeinjections.setxmpfile = setxmpfile
227
228interfaces.implement {
229    name      = "setxmpfile",
230    arguments = "string",
231    actions   = setxmpfile
232}
233
234local function valid_xmp()
235    if not xmp then
236     -- local xmpfile = xmpfile or resolvers.findfile(xmpname) or ""
237        if xmpfile and xmpfile ~= "" then
238            xmpfile = resolvers.findfile(xmpfile) or ""
239        end
240        if not xmpfile or xmpfile == "" then
241            xmpfile = resolvers.findfile(xmpname) or ""
242        end
243        if xmpfile ~= "" then
244            report_xmp("using file %a",xmpfile)
245        end
246        local xmpdata = xmpfile ~= "" and io.loaddata(xmpfile) or ""
247        xmp = xml.convert(xmpdata)
248    end
249    return xmp
250end
251
252function lpdf.addxmpinfo(tag,value,check)
253    local pattern = mapping[tag][2]
254    if type(pattern) == "string" then
255        if not xmp then
256            xmp = valid_xmp()
257        end
258        if xmp and value then
259            xmlfillin(xmp,pattern,value,check)
260        end
261    end
262end
263
264-- redefined
265
266local pdfaddtoinfo  = lpdf.addtoinfo
267local pdfaddxmpinfo = lpdf.addxmpinfo
268
269function lpdf.addtoinfo(tag,pdfvalue,strvalue)
270    local pattern = mapping[tag][2]
271    if pattern or strvalue == true then
272        pdfaddtoinfo(tag,pdfvalue)
273    end
274    if type(pattern) == "string" then
275        local value = (type(strvalue) == "string" and strvalue) or gsub(tostring(pdfvalue),"^%((.*)%)$","%1") -- hack
276        if trace_info then
277            report_info("set %a to %a",tag,value)
278        end
279        xmlfillin(xmp or valid_xmp(),pattern,value,check)
280    end
281end
282
283local pdfaddtoinfo = lpdf.addtoinfo -- used later
284
285-- for the do-it-yourselvers
286
287function lpdf.insertxmpinfo(pattern,whatever,prepend)
288    xml.insert(xmp or valid_xmp(),pattern,whatever,prepend)
289end
290
291function lpdf.injectxmpinfo(pattern,whatever,prepend)
292    xml.inject(xmp or valid_xmp(),pattern,whatever,prepend)
293end
294
295function lpdf.replacexmpinfo(pattern,whatever)
296    xml.replace(xmp or valid_xmp(),pattern,whatever)
297end
298
299-- flushing
300
301local add_xmp_blob   = true
302--   indentity_done = false  -- using "setupidentity = function() end" fails as the meaning is frozen in register
303
304local checkidentity
305
306checkidentity = function(metadata)
307    local identity    = interactions.general.getidentity()
308    metadata.title    = identity.title
309    metadata.subtitle = identity.subtitle
310    metadata.author   = identity.author
311    metadata.date     = identity.date
312    metadata.keywords = identity.keywords
313    checkidentity     = false
314end
315
316local function setupidentity()
317 -- if not identity_done then
318        --
319        local metadata = pdfgetmetadata()
320
321        if checkidentity then
322            checkidentity(metadata)
323        end
324
325        local title    = metadata.title
326        local subtitle = metadata.subtitle
327        local author   = metadata.author
328        local date     = metadata.date
329        local keywords = metadata.keywords
330        --
331        if date and date ~= "" then
332            pdfsetmetadate(date)
333        end
334        if keywords then
335            keywords = concat(settings_to_array(keywords), " ")
336        end
337        --
338        local creator        = metadata.creator
339        local contextversion = metadata.contextversion
340        local id             = metadata.id
341        local jobname        = metadata.jobname
342        local creation       = metadata.creationdate
343        local modification   = metadata.modificationdate
344        --
345        if creator then
346            pdfaddtoinfo("Creator",pdfunicode(creator),creator)
347        end
348        if creation then
349            pdfaddtoinfo("CreationDate",pdfstring(pdftimestamp(creation)),creation)
350        end
351        if modification then
352            pdfaddtoinfo("ModDate",pdfstring(pdftimestamp(modification)),modification)
353        end
354        if id then
355            pdfaddtoinfo("ID",pdfstring(id),id) -- needed for pdf/x
356        end
357        --
358        if title and title ~= "" then
359            pdfaddtoinfo("Title",pdfunicode(title),title)
360        end
361        if subtitle and subtitle ~= "" then
362            pdfaddtoinfo("Subject",pdfunicode(subtitle),subtitle)
363        end
364        if author and author ~= "" then
365            pdfaddtoinfo("Author",pdfunicode(author),author)
366        end
367        if keywords and keywords ~= "" then
368            pdfaddtoinfo("Keywords",pdfunicode(keywords),keywords)
369        end
370        --
371        if contextversion then
372            pdfaddtoinfo("ConTeXt.Version",contextversion)
373        end
374        if creation then
375            pdfaddtoinfo("ConTeXt.Time",creation)
376        end
377        if jobname then
378            pdfaddtoinfo("ConTeXt.Jobname",jobname)
379        end
380        --
381     -- pdfaddtoinfo("ConTeXt.Url","www.pragma-ade.com")
382        pdfaddtoinfo("ConTeXt.Url","github.com/contextgarden/context")
383        pdfaddtoinfo("ConTeXt.Support","contextgarden.net")
384        pdfaddtoinfo("TeX.Support","tug.org")
385        --
386 --     identity_done = true
387 -- else
388 --     -- no need for a message
389 -- end
390    return metadata
391end
392
393local function flushxmpinfo()
394    commands.pushrandomseed()
395    commands.setrandomseed(ostime())
396
397    local metadata = setupidentity()            -- tod: merge into here and save code
398 -- local metadata     = pdfgetmetadata()
399 -- if checkidentity then
400 --     checkidentity(metadata)
401 -- end
402    local creation     = metadata.time or metadata.creationdate     or creationdate
403    local modification = metadata.time or metadata.modificationdate or modificationdate or creation
404    local producer     = metadata.producer
405    local creator      = metadata.creator
406    local documentid   = metadata.documentid
407    local instanceid   = metadata.instanceid
408
409    pdfaddtoinfo("Producer",producer)
410    pdfaddtoinfo("Creator",creator)
411    pdfaddtoinfo("CreationDate",creation)
412    pdfaddtoinfo("ModDate",modification)
413
414    if add_xmp_blob then
415
416        pdfaddxmpinfo("DocumentID",documentid)
417        pdfaddxmpinfo("InstanceID",instanceid)
418        pdfaddxmpinfo("Producer",producer)
419        pdfaddxmpinfo("CreatorTool",creator)
420        pdfaddxmpinfo("CreateDate",creation)
421        pdfaddxmpinfo("ModifyDate",modification)
422        pdfaddxmpinfo("MetadataDate",creation)
423        pdfaddxmpinfo("LuaTeX.Version",metadata.luatexversion)
424        pdfaddxmpinfo("LuaTeX.Functionality",metadata.luatexfunctionality)
425        pdfaddxmpinfo("LuaTeX.LuaVersion",metadata.luaversion)
426        pdfaddxmpinfo("LuaTeX.Platform",metadata.platform)
427
428        local title    = metadata.title
429        local subtitle = metadata.subtitle
430        local author   = metadata.author
431        local keywords = metadata.keywords
432
433        -- We need to wipe some fields in the xml because otherwise validators
434        -- complain ... they don't see an empty (nonexistent, default) info field
435        -- as being the same as an empty element.
436
437        if title and title ~= "" then
438            pdfaddxmpinfo("Title",pdfunicode(title),title)
439        end
440        if subtitle and subtitle ~= "" then
441            pdfaddxmpinfo("Subject",pdfunicode(subtitle),subtitle)
442        end
443        if author and author ~= "" then
444            pdfaddxmpinfo("Author",pdfunicode(author),author)
445        end
446        if keywords and keywords ~= "" then
447            pdfaddxmpinfo("Keywords",pdfunicode(author),author)
448        end
449
450        -- checks for empty:
451
452        for tag, map in next, mapping do
453            if map[3] == true then
454                local pattern = map[2]
455                if type(pattern) == "string" and xmltext(xmp,pattern) == "" then
456                    xmldelete(xmp,pattern .. rep("/..",count(pattern,"/")-1))
457                end
458            end
459        end
460
461        local blob = xml.tostring(xml.first(xmp or valid_xmp(),"/x:xmpmeta"))
462        local md = pdfdictionary {
463            Subtype = pdfconstant("XML"),
464            Type    = pdfconstant("Metadata"),
465        }
466        if trace_xmp then
467            report_xmp("data flushed, see log file")
468            logs.pushtarget("logfile")
469            report_xmp("start xmp blob")
470            logs.newline()
471            logs.writer(blob)
472            logs.newline()
473            report_xmp("stop xmp blob")
474            logs.poptarget()
475        end
476        blob = format(xpacket,blob)
477        if not verbose and lpdf.compresslevel() > 0 then
478            blob = gsub(blob,">%s+<","><")
479        else
480            -- todo: lpeg
481            while true do
482                local b = gsub(blob,"\n +\n( +<)","\n%1")
483                if b == blob then
484                    break
485                else
486                    blob = b
487                end
488            end
489        end
490        local r = pdfflushstreamobject(blob,md,false) -- uncompressed
491        lpdf.addtocatalog("Metadata",pdfreference(r))
492
493    end
494
495    commands.poprandomseed() -- hack
496end
497
498-- lpdf.registerpagefinalizer(setupidentity,"identity")
499lpdf.registerdocumentfinalizer(flushxmpinfo,1,"metadata")
500
501directives.register("backend.xmp",        function(v) add_xmp_blob = v end)
502directives.register("backend.verbosexmp", function(v) verbose = v end)
503