lpdf-xmp.lmt /size: 18 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lpdf-xmp'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "with help from Peter Rolf",
8}
9
10local tostring, type = tostring, type
11local format, gsub, match, rep, count = string.format, string.gsub, string.match, string.rep, string.count
12local concat = table.concat
13local utfchar = utf.char
14local md5HEX = md5.HEX
15local xmlfillin, xmldelete, xmltext = xml.fillin, xml.delete, xml.text
16local osdate, ostime, ostimezone, osuuid = os.date, os.time, os.timezone, os.uuid
17local settings_to_array = utilities.parsers.settings_to_array
18
19local trace_xmp  = false  trackers.register("backend.xmp",  function(v) trace_xmp  = v end)
20local trace_info = false  trackers.register("backend.info", function(v) trace_info = v end)
21
22local report_xmp  = logs.reporter("backend","xmp")
23local report_info = logs.reporter("backend","info")
24
25local backends             = backends
26local pdfbackend           = backends.registered.pdf
27local codeinjections       = pdfbackend.codeinjections
28
29local lpdf                 = lpdf
30local pdfdictionary        = lpdf.dictionary
31local pdfconstant          = lpdf.constant
32local pdfunicode           = lpdf.unicode
33local pdfstring            = lpdf.string
34local pdfreference         = lpdf.reference
35local pdfflushstreamobject = lpdf.flushstreamobject
36
37-- The XMP packet wrapper is kind of fixed, see page 10 of XMPSpecificationsPart1.pdf from
38-- XMP-Toolkit-SDK-CC201607.zip. So we hardcode the id.
39
40local xpacket = format ( [[
41<?xpacket begin="%s" id="W5M0MpCehiHzreSzNTczkc9d"?>
42
43%%s
44
45<?xpacket end="w"?>]], utfchar(0xFEFF) )
46
47local unknown = { false, false }
48local mapping = table.setmetatableindex ( {
49    -- user defined keys (pdfx:)
50    ["ConTeXt.Jobname"]      = { "context",  "rdf:Description/pdfx:ConTeXt.Jobname" },
51    ["ConTeXt.Time"]         = { "date",     "rdf:Description/pdfx:ConTeXt.Time" },
52    ["ConTeXt.Url"]          = { "context",  "rdf:Description/pdfx:ConTeXt.Url" },
53    ["ConTeXt.Support"]      = { "context",  "rdf:Description/pdfx:ConTeXt.Support" },
54    ["ConTeXt.Version"]      = { "context",  "rdf:Description/pdfx:ConTeXt.Version" },
55    ["TeX.Support"]          = { "metadata", "rdf:Description/pdfx:TeX.Support" },
56    ["LuaTeX.Version"]       = { "metadata", "rdf:Description/pdfx:LuaTeX.Version" },
57    ["LuaTeX.Functionality"] = { "metadata", "rdf:Description/pdfx:LuaTeX.Functionality" },
58    ["LuaTeX.LuaVersion"]    = { "metadata", "rdf:Description/pdfx:LuaTeX.LuaVersion" },
59    ["LuaTeX.Platform"]      = { "metadata", "rdf:Description/pdfx:LuaTeX.Platform" },
60    ["ID"]                   = { "id",       "rdf:Description/pdfx:ID" },                         -- has date
61    -- Adobe PDF schema
62    ["Keywords"]             = { "metadata", "rdf:Description/pdf:Keywords", true },
63    ["Producer"]             = { "metadata", "rdf:Description/pdf:Producer", true },
64 -- ["Trapped"]              = { "pdf",      "rdf:Description/pdf:Trapped" },                     -- '/False' in /Info, but 'False' in XMP
65    -- Dublin Core schema
66    ["Format"]               = { "metadata", "rdf:Description/dc:format" },                       -- optional, but nice to have
67    -- see xml file for comment:
68 -- ["Author"]               = { "metadata", "rdf:Description/dc:creator" },
69 -- ["Subject"]              = { "metadata", "rdf:Description/dc:description" },
70 -- ["Title"]                = { "metadata", "rdf:Description/dc:title" },
71    ["Author"]               = { "metadata", "rdf:Description/dc:creator/rdf:Seq/rdf:li", true },
72    ["Subject"]              = { "metadata", "rdf:Description/dc:description/rdf:Alt/rdf:li", true },
73    ["Title"]                = { "metadata", "rdf:Description/dc:title/rdf:Alt/rdf:li", true },
74    -- XMP Basic schema
75    ["CreateDate"]           = { "date",     "rdf:Description/xmp:CreateDate" },
76    ["CreationDate"]         = { "date",     "rdf:Description/xmp:CreationDate" },                -- dummy
77    ["CreatorTool"]          = { "metadata", "rdf:Description/xmp:CreatorTool" },
78 -- ["Creator"]              = { "metadata", "rdf:Description/xmp:CreatorTool" },
79    ["MetadataDate"]         = { "date",     "rdf:Description/xmp:MetadataDate" },
80    ["ModDate"]              = { "date",     "rdf:Description/xmp:ModDate" },                     -- dummy
81    ["ModifyDate"]           = { "date",     "rdf:Description/xmp:ModifyDate" },
82    -- XMP Media Management schema
83    ["DocumentID"]           = { "id",       "rdf:Description/xmpMM:DocumentID" },                -- uuid
84    ["InstanceID"]           = { "id",       "rdf:Description/xmpMM:InstanceID" },                -- uuid
85    ["RenditionClass"]       = { "pdf",      "rdf:Description/xmpMM:RenditionClass" },            -- PDF/X-4
86    ["VersionID"]            = { "pdf",      "rdf:Description/xmpMM:VersionID" },                 -- PDF/X-4
87    -- additional entries
88    -- PDF/X
89    ["GTS_PDFXVersion"]      = { "pdf",      "rdf:Description/pdfxid:GTS_PDFXVersion" },
90    -- optional entries
91    -- all what is visible in the 'document properties --> additional metadata' window
92    -- XMP Rights Management schema (optional)
93    ["Marked"]               = { "pdf",      "rdf:Description/xmpRights:Marked" },
94 -- ["Owner"]                = { "metadata", "rdf:Description/xmpRights:Owner/rdf:Bag/rdf:li" }, -- maybe useful (not visible)
95 -- ["UsageTerms"]           = { "metadata", "rdf:Description/xmpRights:UsageTerms" },           -- maybe useful (not visible)
96 -- ["WebStatement"]         = { "metadata", "rdf:Description/xmpRights:WebStatement" },
97    -- Photoshop PDF schema (optional)
98 -- ["AuthorsPosition"]      = { "metadata", "rdf:Description/photoshop:AuthorsPosition" },
99 -- ["Copyright"]            = { "metadata", "rdf:Description/photoshop:Copyright" },
100 -- ["CaptionWriter"]        = { "metadata", "rdf:Description/photoshop:CaptionWriter" },
101    --
102    ["Placeholder1"]          = { "metadata", "pdfaid-placeholder",  true },
103    ["Placeholder2"]          = { "metadata", "pdfuaid-placeholder", true },
104    ["Placeholder3"]          = { "metadata", "wtpdf-placeholder",   true },
105}, function() return unknown end )
106
107
108local metadata         = nil
109local trailerid        = true
110local creationdate     = false
111local modificationdate = false
112
113local function pdftimestamp(str)
114    local t = type(str)
115    if t == "string" then
116        local Y, M, D, h, m, s, Zs, Zh, Zm = match(str,"^(%d%d%d%d)%-(%d%d)%-(%d%d)T(%d%d):(%d%d):(%d%d)([%+%-])(%d%d):(%d%d)$")
117        return Y and format("D:%s%s%s%s%s%s%s%s'%s",Y,M,D,h,m,s,Zs,Zh,Zm)
118    else
119        return osdate("D:%Y%m%d%H%M%S",t == "number" and str or ostime()) -- maybe "!D..." : universal time
120    end
121end
122
123local function pdfgetmetadata()
124    if not metadata then
125        local contextversion      = environment.version
126        local luatexversion       = LUATEXVERBOSE
127        local luatexfunctionality = tostring(LUATEXFUNCTIONALITY)
128        local jobname             = environment.jobname or tex.jobname or "unknown"
129        local documentid          = trailerid and ("uuid:" .. osuuid()) or "no unique document id here"
130        local instanceid          = trailerid and ("uuid:" .. osuuid()) or "no unique instance id here"
131        local producer            = "LuaMetaTeX"
132        local creator             = format("LuaMetaTeX %s %s + ConTeXt LMTX %s",luatexversion,luatexfunctionality,contextversion)
133        metadata = creationdate and {
134            producer            = producer,
135            creator             = creator,
136            id                  = format("%s | %s",jobname,creationdate),
137            documentid          = documentid,
138            instanceid          = instanceid,
139            jobname             = jobname,
140            --
141            luatexversion       = luatexversion,
142            contextversion      = contextversion,
143            luatexfunctionality = luatexfunctionality,
144            luaversion          = tostring(LUAVERSION),
145            platform            = os.platform,
146            creationdate        = creationdate,
147            modificationdate    = modificationdate,
148        } or {
149            producer            = producer,
150            creator             = creator,
151            id                  = jobname,
152            documentid          = documentid,
153            instanceid          = instanceid,
154            jobname             = jobname,
155        }
156     -- inspect(metadata)
157    end
158    return metadata
159end
160
161local function pdfsetmetadate(n,both)
162    if n then
163        n = converters.totime(n)
164        if n then
165            creationdate = osdate("%Y-%m-%dT%H:%M:%S",ostime(n)) .. ostimezone()
166            if both then
167                modificationdate = creationdate
168            end
169        end
170    end
171    return creationdate
172end
173
174lpdf.pdftimestamp = pdftimestamp
175
176function lpdf.gettrailerid()
177    if trailerid == true then
178        return md5.HEX(osuuid())
179    elseif type(trailerid) == "string" then
180        return md5.HEX(trailerid)
181    else
182        return false
183    end
184end
185
186-- string: use that, true: uuid, false: nothing
187
188directives.register("backend.trailerid", function(v)
189    trailerid = type(v) and v or toboolean(v)
190end)
191
192-- year-mm-dd : use that for creation and modification
193
194local function setdates(v)
195    local t = type(v)
196    if t == "number" or t == "string" then
197        local d = converters.totime(v)
198        if d then
199            report_info("forced date/time information %a will be used",pdfsetmetadate(d,true))
200            return
201        end
202    end
203    if toboolean(v) then
204        creationdate     = osdate("%Y-%m-%dT%H:%M:%S") .. ostimezone()
205        modificationdate = creationdate
206    else
207        creationdate     = false
208        modificationdate = false
209    end
210end
211
212setdates(true)
213
214directives.register("backend.date", setdates)
215
216-- maybe some day we will load the xmp file at runtime
217
218local xmp, xmpfile, xmpname = nil, nil, "lpdf-pdx.xml"
219
220local function setxmpfile(name)
221    if xmp then
222        report_xmp("discarding loaded file %a",xmpfile)
223        xmp = nil
224    end
225    xmpfile = name ~= "" and name
226end
227
228codeinjections.setxmpfile = setxmpfile
229
230interfaces.implement {
231    name      = "setxmpfile",
232    arguments = "string",
233    actions   = setxmpfile
234}
235
236local function valid_xmp()
237    if not xmp then
238     -- local xmpfile = xmpfile or resolvers.findfile(xmpname) or ""
239        if xmpfile and xmpfile ~= "" then
240            xmpfile = resolvers.findfile(xmpfile) or ""
241        end
242        if not xmpfile or xmpfile == "" then
243            xmpfile = resolvers.findfile(xmpname) or ""
244        end
245        if xmpfile ~= "" then
246            report_xmp("using file %a",xmpfile)
247        end
248        local xmpdata = xmpfile ~= "" and io.loaddata(xmpfile) or ""
249        xmp = xml.convert(xmpdata, { strip_cm_and_dt = true })
250    end
251    return xmp
252end
253
254function lpdf.addxmpinfo(tag,value,check)
255    local pattern = mapping[tag][2]
256    if type(pattern) == "string" then
257        if not xmp then
258            xmp = valid_xmp()
259        end
260        if xmp and value then
261            xmlfillin(xmp,pattern,value,check)
262        end
263    end
264end
265
266-- redefined
267
268local pdfaddtoinfo  = lpdf.addtoinfo
269local pdfaddxmpinfo = lpdf.addxmpinfo
270
271function lpdf.addtoinfo(tag,pdfvalue,strvalue)
272    local pattern = mapping[tag][2]
273    if pattern or strvalue == true then
274        pdfaddtoinfo(tag,pdfvalue)
275    end
276    if type(pattern) == "string" then
277        local value = (type(strvalue) == "string" and strvalue) or gsub(tostring(pdfvalue),"^%((.*)%)$","%1") -- hack
278        if trace_info then
279            report_info("set %a to %a",tag,value)
280        end
281        xmlfillin(xmp or valid_xmp(),pattern,value,check)
282    end
283end
284
285local pdfaddtoinfo = lpdf.addtoinfo -- used later
286
287-- for the do-it-yourselvers
288
289function lpdf.insertxmpinfo(pattern,whatever,prepend)
290    xml.insert(xmp or valid_xmp(),pattern,whatever,prepend)
291end
292
293function lpdf.injectxmpinfo(pattern,whatever,prepend)
294    xml.inject(xmp or valid_xmp(),pattern,whatever,prepend)
295end
296
297function lpdf.replacexmpinfo(pattern,whatever)
298    local xmp = xmp or valid_xmp()
299    if whatever == "" then
300        xml.delete(xmp,pattern)
301    else
302        xml.replace(xmp,pattern,whatever)
303    end
304end
305
306-- flushing
307
308local add_xmp_blob   = true
309--   indentity_done = false  -- using "setupidentity = function() end" fails as the meaning is frozen in register
310
311local checkidentity
312
313checkidentity = function(metadata)
314    local identity    = interactions.general.getidentity()
315    metadata.title    = identity.title
316    metadata.subtitle = identity.subtitle
317    metadata.author   = identity.author
318    metadata.date     = identity.date
319    metadata.keywords = identity.keywords
320    checkidentity     = false
321end
322
323local function setupidentity()
324 -- if not identity_done then
325        --
326        local metadata = pdfgetmetadata()
327
328        if checkidentity then
329            checkidentity(metadata)
330        end
331
332        local title    = metadata.title
333        local subtitle = metadata.subtitle
334        local author   = metadata.author
335        local date     = metadata.date
336        local keywords = metadata.keywords
337        --
338        if date and date ~= "" then
339            pdfsetmetadate(date)
340        end
341        if keywords then
342            keywords = concat(settings_to_array(keywords), " ")
343        end
344        --
345        local creator        = metadata.creator
346        local contextversion = metadata.contextversion
347        local id             = metadata.id
348        local jobname        = metadata.jobname
349        local creation       = metadata.creationdate
350        local modification   = metadata.modificationdate
351        --
352        if creator then
353            pdfaddtoinfo("Creator",pdfunicode(creator),creator)
354        end
355        if creation then
356            pdfaddtoinfo("CreationDate",pdfstring(pdftimestamp(creation)),creation)
357        end
358        if modification then
359            pdfaddtoinfo("ModDate",pdfstring(pdftimestamp(modification)),modification)
360        end
361        if id then
362            pdfaddtoinfo("ID",pdfstring(id),id) -- needed for pdf/x
363        end
364        --
365        if title and title ~= "" then
366            pdfaddtoinfo("Title",pdfunicode(title),title)
367        end
368        if subtitle and subtitle ~= "" then
369            pdfaddtoinfo("Subject",pdfunicode(subtitle),subtitle)
370        end
371        if author and author ~= "" then
372            pdfaddtoinfo("Author",pdfunicode(author),author)
373        end
374        if keywords and keywords ~= "" then
375            pdfaddtoinfo("Keywords",pdfunicode(keywords),keywords)
376        end
377        --
378        if contextversion then
379            pdfaddtoinfo("ConTeXt.Version",contextversion)
380        end
381        if creation then
382            pdfaddtoinfo("ConTeXt.Time",creation)
383        end
384        if jobname then
385            pdfaddtoinfo("ConTeXt.Jobname",jobname)
386        end
387        --
388     -- pdfaddtoinfo("ConTeXt.Url","www.pragma-ade.com")
389        pdfaddtoinfo("ConTeXt.Url","github.com/contextgarden/context")
390        pdfaddtoinfo("ConTeXt.Support","contextgarden.net")
391        pdfaddtoinfo("TeX.Support","tug.org")
392        --
393 --     identity_done = true
394 -- else
395 --     -- no need for a message
396 -- end
397    return metadata
398end
399
400local function flushxmpinfo()
401    commands.pushrandomseed()
402    commands.setrandomseed(ostime())
403
404    local metadata = setupidentity()            -- tod: merge into here and save code
405 -- local metadata     = pdfgetmetadata()
406 -- if checkidentity then
407 --     checkidentity(metadata)
408 -- end
409    local creation     = metadata.time or metadata.creationdate     or creationdate
410    local modification = metadata.time or metadata.modificationdate or modificationdate or creation
411    local producer     = metadata.producer
412    local creator      = metadata.creator
413    local documentid   = metadata.documentid
414    local instanceid   = metadata.instanceid
415
416    pdfaddtoinfo("Producer",producer)
417    pdfaddtoinfo("Creator",creator)
418    pdfaddtoinfo("CreationDate",creation)
419    pdfaddtoinfo("ModDate",modification)
420
421    if add_xmp_blob then
422
423        pdfaddxmpinfo("DocumentID",documentid)
424        pdfaddxmpinfo("InstanceID",instanceid)
425        pdfaddxmpinfo("Producer",producer)
426        pdfaddxmpinfo("CreatorTool",creator)
427        pdfaddxmpinfo("CreateDate",creation)
428        pdfaddxmpinfo("ModifyDate",modification)
429        pdfaddxmpinfo("MetadataDate",creation)
430        pdfaddxmpinfo("LuaTeX.Version",metadata.luatexversion)
431        pdfaddxmpinfo("LuaTeX.Functionality",metadata.luatexfunctionality)
432        pdfaddxmpinfo("LuaTeX.LuaVersion",metadata.luaversion)
433        pdfaddxmpinfo("LuaTeX.Platform",metadata.platform)
434
435        local title    = metadata.title
436        local subtitle = metadata.subtitle
437        local author   = metadata.author
438        local keywords = metadata.keywords
439
440        -- We need to wipe some fields in the xml because otherwise validators
441        -- complain ... they don't see an empty (nonexistent, default) info field
442        -- as being the same as an empty element.
443
444        if title and title ~= "" then
445            pdfaddxmpinfo("Title",pdfunicode(title),title)
446        end
447        if subtitle and subtitle ~= "" then
448            pdfaddxmpinfo("Subject",pdfunicode(subtitle),subtitle)
449        end
450        if author and author ~= "" then
451            pdfaddxmpinfo("Author",pdfunicode(author),author)
452        end
453        if keywords and keywords ~= "" then
454            pdfaddxmpinfo("Keywords",pdfunicode(author),author)
455        end
456
457        -- checks for empty:
458
459        for tag, map in next, mapping do
460            if map[3] == true then
461                local pattern = map[2]
462                if type(pattern) == "string" and xmltext(xmp,pattern) == "" then
463                    xmldelete(xmp,pattern .. rep("/..",count(pattern,"/")-1))
464                end
465            end
466        end
467
468        local blob = xml.tostring(xml.first(xmp or valid_xmp(),"/x:xmpmeta"))
469        local md = pdfdictionary {
470            Subtype = pdfconstant("XML"),
471            Type    = pdfconstant("Metadata"),
472        }
473        if trace_xmp then
474            report_xmp("data flushed, see log file")
475            logs.pushtarget("logfile")
476            report_xmp("start xmp blob")
477            logs.newline()
478            logs.writer(blob)
479            logs.newline()
480            report_xmp("stop xmp blob")
481            logs.poptarget()
482        end
483        blob = format(xpacket,blob)
484        if not verbose and lpdf.compresslevel() > 0 then
485            blob = gsub(blob,">%s+<","><")
486        else
487            -- todo: lpeg
488            while true do
489                local b = gsub(blob,"\n +\n( +<)","\n%1")
490                if b == blob then
491                    break
492                else
493                    blob = b
494                end
495            end
496        end
497        local r = pdfflushstreamobject(blob,md,false) -- uncompressed
498        lpdf.addtocatalog("Metadata",pdfreference(r))
499
500    end
501
502    commands.poprandomseed() -- hack
503end
504
505-- lpdf.registerpagefinalizer(setupidentity,"identity")
506lpdf.registerdocumentfinalizer(flushxmpinfo,1,"metadata")
507
508directives.register("backend.xmp",        function(v) add_xmp_blob = v end)
509directives.register("backend.verbosexmp", function(v) verbose = v end)
510