lpdf-pde.lmt /size: 56 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lpdf-epd'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-epa.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    history   = "this one replaces the poppler/pdfe binding",
8}
9
10-- This was never designed as a manipulative interface so that bit is kind of messy. I will
11-- provide a beter interface for changing values. We need to keep in mind that pass through
12-- is meant to be fast so we mighe have to sacrifice some there.
13
14-- \enabledirectives[graphics.pdf.uselua]
15-- \enabledirectives[graphics.pdf.recompress]
16
17-- maximum integer : +2^32
18-- maximum real    : +2^15
19-- minimum real    : 1/(2^16)
20
21-- get_flagged : does that still work
22
23-- ppdoc_permissions (ppdoc *pdf);
24
25-- PPSTRING_ENCODED        1 <<  0
26-- PPSTRING_DECODED        1 <<  1
27-- PPSTRING_EXEC           1 <<  2   postscript only
28-- PPSTRING_PLAIN                0
29-- PPSTRING_BASE16         1 <<  3
30-- PPSTRING_BASE85         1 <<  4
31-- PPSTRING_UTF16BE        1 <<  5
32-- PPSTRING_UTF16LE        1 <<  6
33
34-- PPDOC_ALLOW_PRINT       1 <<  2   printing
35-- PPDOC_ALLOW_MODIFY      1 <<  3   filling form fields, signing, creating template pages
36-- PPDOC_ALLOW_COPY        1 <<  4   copying, copying for accessibility
37-- PPDOC_ALLOW_ANNOTS      1 <<  5   filling form fields, copying, signing
38-- PPDOC_ALLOW_EXTRACT     1 <<  9   contents copying for accessibility
39-- PPDOC_ALLOW_ASSEMBLY    1 << 10   no effect
40-- PPDOC_ALLOW_PRINT_HIRES 1 << 11   no effect
41
42-- PPCRYPT_NONE                  0   no encryption, go ahead
43-- PPCRYPT_DONE                  1   encryption present but password succeeded, go ahead
44-- PPCRYPT_PASS                 -1   encryption present, need non-empty password
45-- PPCRYPT_FAIL                 -2   invalid or unsupported encryption (eg. undocumented in pdf spec)
46
47local setmetatable, type, next = setmetatable, type, next
48local tostring, tonumber, unpack = tostring, tonumber, unpack
49local char, byte, find = string.char, string.byte, string.find
50local abs = math.abs
51local concat, swapped, sortedhash, sortedkeys = table.concat, table.swapped, table.sortedhash, table.sortedkeys
52local utfchar = string.char
53local setmetatableindex = table.setmetatableindex
54local ioopen = io.open
55local octtointeger, dectointeger, hextointeger = string.octtointeger, string.dectointeger, string.hextointeger
56
57local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
58local P, C, S, R, Ct, Cc, V, Carg, Cs, Cf, Cg = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs, lpeg.Cf, lpeg.Cg
59
60if not lpdf then
61    require("lpdf-aux")
62end
63
64if not (number and number.dimenfactors) then
65    require("util-dim")
66end
67
68local pdfe              = pdfe
69      lpdf              = lpdf or { }
70local lpdf              = lpdf
71local lpdf_epdf         = { }
72      lpdf.epdf         = lpdf_epdf
73
74local pdfopenfile       = pdfe.openfile
75local pdfnew            = pdfe.new
76local pdfclose          = pdfe.close
77
78local getcatalog        = pdfe.getcatalog
79local getpermissions    = pdfe.getpermissions
80local getinfo           = pdfe.getinfo
81local gettrailer        = pdfe.gettrailer
82local getnofpages       = pdfe.getnofpages
83local getversion        = pdfe.getversion
84local getbox            = pdfe.getbox
85local getstatus         = pdfe.getstatus
86local unencrypt         = pdfe.unencrypt
87local dictionarytotable = pdfe.dictionarytotable
88local arraytotable      = pdfe.arraytotable
89local pagestotable      = pdfe.pagestotable
90local readwholestream   = pdfe.readwholestream
91local getfromreference  = pdfe.getfromreference
92local getfromobject     = pdfe.getfromobject
93local getobjectrange    = pdfe.getobjectrange
94
95local report_epdf       = logs.reporter("epdf")
96
97local allocate          = utilities.storage.allocate
98
99local bpfactor          <const> = number.dimenfactors.bp
100
101local objectcodes = { [0] =
102    "none",
103    "null",
104    "bool",
105    "integer",
106    "number",
107    "name",
108    "string",
109    "array",
110    "dictionary",
111    "stream",
112    "reference",
113    "lpdf",
114}
115
116local encryptioncodes = {
117     [0] = "notencrypted",
118     [1] = "unencrypted",
119    [-1] = "protected",
120    [-2] = "failure",
121}
122
123objectcodes                  = allocate(swapped(objectcodes,objectcodes))
124encryptioncodes              = allocate(swapped(encryptioncodes,encryptioncodes))
125
126lpdf_epdf.objectcodes        = objectcodes
127lpdf_epdf.encryptioncodes    = encryptioncodes
128
129local none_object_code       <const> = objectcodes.none
130local null_object_code       <const> = objectcodes.null
131local bool_object_code       <const> = objectcodes.bool
132local integer_object_code    <const> = objectcodes.integer
133local number_object_code     <const> = objectcodes.number
134local name_object_code       <const> = objectcodes.name
135local string_object_code     <const> = objectcodes.string
136local array_object_code      <const> = objectcodes.array
137local dictionary_object_code <const> = objectcodes.dictionary
138local stream_object_code     <const> = objectcodes.stream
139local reference_object_code  <const> = objectcodes.reference
140local lpdf_object_code       <const> = objectcodes.lpdf
141
142local recompress  = false
143
144-- We need to convert the string from utf16 although there is no way to
145-- check if we have a regular string starting with a bom. So, we have
146-- na dilemma here: a pdf doc encoded string can be invalid utf.
147
148-- <hex encoded>   : implicit 0 appended if odd
149-- (byte encoded)  : \( \) \\ escaped
150--
151-- <FE><FF> : utf16be
152--
153-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
154--
155-- the getString function gives back bytes so we don't need to worry about
156-- the hex aspect.
157
158local some_dictionary
159local some_array
160local some_stream
161local some_reference
162
163local some_string = lpdf.frombytes
164
165function lpdf_epdf.objecttype(object)
166    if type(object) == "table" then
167        local kind = object.__type__
168        return kind and objectcodes[kind]
169    end
170end
171
172local function get_value(document,t,key)
173    if not key or not t then
174        return
175    end
176    local value = t[key]
177    if not value then
178        return
179    end
180    if type(value) ~= "table" then
181        return value
182    end
183    local kind = value[1]
184    if kind == name_object_code then
185        return value[2]
186    elseif kind == string_object_code then
187        return some_string(value[2],value[3])
188    elseif kind == array_object_code then
189        return some_array(value[2],document)
190    elseif kind == dictionary_object_code then
191        return some_dictionary(value[2],document)
192    elseif kind == stream_object_code then
193        return some_stream(value,value[2],document) -- needs checking
194    elseif kind == reference_object_code then
195        return some_reference(value,document)
196    end
197    return value
198end
199
200local checked_access
201local get_flagged     -- from pdfe -> lpdf
202
203if lpdf.dictionary then
204
205    -- these are used in mtx-pdf.lua
206
207    local pdfdictionary  = lpdf.dictionary
208    local pdfarray       = lpdf.array
209    local pdfconstant    = lpdf.constant
210    local pdfreference   = lpdf.reference
211    local pdfliteral     = lpdf.literal
212
213    local copyarray      = nil
214    local copydictionary = nil
215
216    local function copyobject(document,object,key,value)
217        if not value then
218            value = object.__raw__[key]
219        end
220        local t = type(value)
221        if t == "string" then
222            return pdfconstant(value)
223        elseif t ~= "table" then
224            return value
225        end
226        local kind = value[1]
227        if kind == name_object_code then
228            return pdfconstant(value[2])
229        elseif kind == string_object_code then
230            return pdfliteral(value[2],false)
231        elseif kind == array_object_code then
232            return copyarray(document,object[key])
233        elseif kind == dictionary_object_code then
234            return copydictionary(document,object[key])
235        elseif kind == null_object_code then
236            return pdfnull()
237        elseif kind == reference_object_code then
238            return pdfreference(value[3])
239        else
240         -- report("weird: %s", objectcodes[kind] or "?")
241        end
242    end
243
244    copyarray = function(document,object)
245        local target = pdfarray()
246        local source = object.__raw__
247        for i=1,#source do
248            target[i] = copyobject(document,object,i,source[i])
249        end
250        return target
251    end
252
253    copydictionary = function(document,object)
254        local target = pdfdictionary()
255        local source = object.__raw__
256        for key, value in sortedhash(source) do
257            target[key] = copyobject(document,object,key,value)
258        end
259        return target
260    end
261
262    get_flagged = function(document,t,f)
263        local kind = t.__type__
264        if kind == name_object_code then
265            return pdfconstant(f)
266        elseif kind == array_object_code then
267            return copyarray(document,t)
268        elseif kind == dictionary_object_code then
269            return copydictionary(document,t)
270        elseif kind == stream_object_code then
271            return copydictionary(document,t)
272        elseif kind == string_object_code then
273            return pdfunicode(f)
274        elseif kind == null_object_code then
275            return pdfnull()
276        elseif kind == reference_object_code then
277            return pdfreference(t[3])
278        else
279            return f
280        end
281    end
282
283    function lpdf_epdf.verboseobject(document,n)
284        if document and n then
285            local object = document.objects[n]
286            if object then
287                local t = { n .. " 0 obj" }
288                if lpdf.epdf.objecttype(object) == "stream" then
289                    t[#t+1] = object("dictionary")()
290                    t[#t+1] = "stream"
291                    t[#t+1] = tostring(object(true))
292                    t[#t+1] = "endstream"
293                else
294                    t[#t+1] = tostring(object())
295                end
296                t[#t+1] = "endobj"
297                return concat(t,"\n")
298            end
299        end
300    end
301
302else
303
304    get_flagged = function(document,t,f)
305        return t[k] -- hm
306    end
307
308end
309
310some_dictionary = function(d,document)
311    local f = dictionarytotable(d,true)
312    local t = setmetatable({ __raw__ = f, __type__ = dictionary_object_code }, {
313        __index = function(t,k)
314            return get_value(document,f,k)
315        end,
316        __call = function(t)
317            return get_flagged(document,t,f)
318        end,
319    } )
320    return t, "dictionary"
321end
322
323some_array = function(a,document)
324    local f = arraytotable(a,true)
325    local n = #f
326    local t = setmetatable({ __raw__ = f, __type__ = array_object_code, n = n }, {
327        __index = function(t,k)
328            return get_value(document,f,k)
329        end,
330        __call = function(t)
331            return get_flagged(document,t,f)
332        end,
333        __len = function(t,k)
334            return n
335        end,
336    } )
337    return t, "array"
338end
339
340some_stream = function(s,d,document)
341    local f = dictionarytotable(d,true)
342    local t = setmetatable({ __raw__ = f, __type__ = stream_object_code }, {
343        __index = function(t,k)
344            return get_value(document,f,k)
345        end,
346        __call = function(t,how)
347            if how == "dictionary" then
348                return get_flagged(document,t,f)
349         -- elseif how == false and not recompress then
350            elseif how == false then
351-- print(">>>>> ? ",readwholestream(s,true))
352--                 local s = readwholestream(s,false) -- original
353-- print(">>>>> F ",s)
354--                 return s
355                return readwholestream(s,false) -- original
356            else
357--                 local s = readwholestream(s,true)  -- uncompressed
358-- print(">>>>> T ",s)
359--                 return s
360                return readwholestream(s,true)  -- uncompressed
361            end
362        end,
363    } )
364    return t, "stream"
365end
366
367-- Just temporarily put this in a font resource and test if it gets copied:
368--
369-- local nA = pdfflushobject("/FOO")
370-- local nB = pdfflushobject("true")
371-- local nC = pdfflushobject("1234")
372-- local nD = pdfflushobject("(12abc)")
373-- local nE = pdfflushobject("<4E6F762073686D6F7A206B6120706F702E>")
374-- local nF = pdfflushobject("null")
375-- ByHansA = pdfreference(nA),
376-- ByHansB = pdfreference(nB),
377-- ByHansC = pdfreference(nC),
378-- ByHansD = pdfreference(nD),
379-- ByHansE = pdfreference(nE),
380-- ByHansF = pdfreference(nF),
381
382some_reference = function(r,document)
383    local objnum = r[3]
384    local cached = document.__cache__[objnum]
385    if not cached then
386        local kind, object, b, c = getfromreference(r[2])
387        if kind == dictionary_object_code then
388            cached = some_dictionary(object,document)
389        elseif kind == array_object_code then
390            cached = some_array(object,document)
391        elseif kind == stream_object_code then
392            cached = some_stream(object,b,document)
393     -- elseif kind == string_object_code then
394     --     cached = some_string(object,document) -- no, just the one below
395        else
396            -- really cache this?
397            cached = { kind, object, b, c }
398        end
399        document.__cache__[objnum] = cached
400        document.__xrefs__[cached] = objnum
401    end
402    return cached
403end
404
405local function some_object(document,n) -- name obj?
406    local kind, object, b, c = getfromobject(document.__data__,n)
407    if kind == dictionary_object_code then
408        return some_dictionary(object,document)
409    elseif kind == array_object_code then
410        return some_array(object,document)
411    elseif kind == stream_object_code then
412        return some_stream(object,b,document)
413    else
414        -- really cache this?
415        return { kind, object, b, c }
416    end
417end
418
419local resolvers     = { }
420lpdf_epdf.resolvers = resolvers
421
422local function resolve(document,k)
423    local resolver = resolvers[k]
424    if resolver then
425        local entry = resolver(document)
426        document[k] = entry
427        return entry
428    end
429end
430
431-- A plain \TEX\ test case for duplicates:
432--
433-- abc \pdfdest name {a\string\040b c}           fit \relax \eject
434-- abc \pdfdest name {a b\string\040c}           fit \relax \eject
435-- abc \pdfdest name {a\string\040b\string\040c} fit \relax \eject
436-- def \pdfdest name {a b c}                     fit \relax \eject
437-- \bye
438
439local function getnames(document,n,target) -- direct
440    if n then
441        local Names = n.Names
442        if Names then
443            if not target then
444                target = { }
445            end
446            local duplicates = { }
447            for i=1,#Names,2 do
448                local name = lpdf.fromeight(Names[i])
449--              local name = lpdf.frombytes(Names[i])
450                if target[name] then
451                    local d = duplicates[name] or 0
452                    d = d + 1
453                    duplicates[name] = d
454                    name = name .. " [duplicate." .. d .. "]"
455                end
456                target[name] = Names[i+1]
457            end
458            local Limits = n.Limits
459            if Limits then
460                local first = Limits[1]
461                local last  = Limits[2]
462                local fname = Names[1]
463                local lname = Names[#Names-1]
464                if fname ~= first or lname ~= last then
465                    report_epdf()
466                    report_epdf("check file %a, first value %a, first limit %a, last value %a, last limit %a",document.filename,fname,first,lname,last)
467                    report_epdf()
468                end
469            end
470        else
471            local Kids = n.Kids
472            if Kids then
473                for i=1,#Kids do
474                    target = getnames(document,Kids[i],target)
475                end
476            end
477        end
478        return target
479    end
480end
481
482local function getkids(document,n,target) -- direct
483    if n then
484        local Kids = n.Kids
485        if Kids then
486            for i=1,#Kids do
487                target = getkids(document,Kids[i],target)
488            end
489        elseif target then
490            target[#target+1] = n
491        else
492            target = { n }
493        end
494        return target
495    end
496end
497
498function resolvers.destinations(document)
499    local Names = document.Catalog.Names
500    return getnames(document,Names and Names.Dests)
501end
502
503function resolvers.javascripts(document)
504    local Names = document.Catalog.Names
505    return getnames(document,Names and Names.JavaScript)
506end
507
508function resolvers.widgets(document)
509    local Names = document.Catalog.AcroForm
510    return Names and Names.Fields
511end
512
513function resolvers.embeddedfiles(document)
514    local Names = document.Catalog.Names
515    return getnames(document,Names and Names.EmbeddedFiles)
516end
517
518-- /OCProperties <<
519--     /OCGs [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
520--     /D <<
521--         /Order [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
522--         /ON    [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
523--         /OFF   [ ]
524--     >>
525-- >>
526
527function resolvers.layers(document)
528    local properties = document.Catalog.OCProperties
529    if properties then
530        local layers = properties.OCGs
531        if layers then
532            local t = { }
533            for i=1,#layers do
534                local layer = layers[i]
535                t[i] = layer.Name
536            end
537         -- t.n = n
538            return t
539        end
540    end
541end
542
543function resolvers.structure(document)
544    -- this might become a tree
545    return document.Catalog.StructTreeRoot
546end
547
548function resolvers.pages(document)
549    local __data__  = document.__data__
550    local __xrefs__ = document.__xrefs__
551    local __cache__ = document.__cache__
552    --
553    local nofpages = document.nofpages
554    local pages    = { }
555    local rawpages = pagestotable(__data__)
556    document.pages = pages
557    --
558    for pagenumber=1,nofpages do
559        local rawpagedata   = rawpages[pagenumber]
560        if rawpagedata then
561            local pagereference = rawpagedata[3]
562            local pageobject    = rawpagedata[1]
563            local pagedata      = some_dictionary(pageobject,document)
564            if pagedata and pageobject then
565                pagedata.number   = pagenumber
566                pagedata.MediaBox = getbox(pageobject,"MediaBox")
567                pagedata.CropBox  = getbox(pageobject,"CropBox")
568                pagedata.BleedBox = getbox(pageobject,"BleedBox")
569                pagedata.ArtBox   = getbox(pageobject,"ArtBox")
570                pagedata.TrimBox  = getbox(pageobject,"TrimBox")
571                pages[pagenumber] = pagedata
572                __xrefs__[pagedata]      = pagereference
573                __cache__[pagereference] = pagedata
574            else
575                report_epdf("missing pagedata for page %i, case %i",pagenumber,1)
576            end
577        else
578            report_epdf("missing pagedata for page %i, case %i",pagenumber,2)
579        end
580    end
581    --
582 -- pages.n = nofpages
583    --
584    return pages
585end
586
587local loaded    = { }
588local nofloaded = 0
589
590function lpdf_epdf.load(filename,userpassword,ownerpassword,fromstring)
591    local document = loaded[filename]
592    if not document then
593        statistics.starttiming(lpdf_epdf)
594        local __data__
595        local __file__
596        if fromstring then
597            __data__ = pdfnew(filename,#filename)
598        else
599            local f = ioopen(filename,"rb")
600            __data__ = f and pdfopenfile(f)
601        end
602        if __data__ then
603            if userpassword and getstatus(__data__) < 0 then
604                unencrypt(__data__,userpassword,nil)
605            end
606            if ownerpassword and getstatus(__data__) < 0 then
607                unencrypt(__data__,nil,ownerpassword)
608            end
609            if getstatus(__data__) < 0 then
610                report_epdf("the document is encrypted, provide proper passwords")
611                __data__ = false
612            end
613            if __data__ then
614                local __cache__ = { }
615                local __xrefs__ = { }
616                document = {
617                    filename   = filename,
618                    nofcopied  = 0,
619                    copied     = { },
620                    __cache__  = __cache__,
621                    __xrefs__  = __xrefs__,
622                    __fonts__  = { },
623                    __copied__ = { },
624                    __data__   = __data__,
625                }
626                document.Catalog     = some_dictionary(getcatalog(__data__),document)
627                document.Info        = some_dictionary(getinfo(__data__),document)
628                document.Trailer     = some_dictionary(gettrailer(__data__),document)
629                document.catalog     = document.Catalog
630                document.info        = document.Info
631                document.trailer     = document.Trailer
632                --
633                document.encrypted   = document.Trailer.Encrypt and true or false
634                local permissions    = getpermissions(__data__)
635                document.permissions = permissions and permissions >= 0 and lpdf.topermissions(permissions) or nil
636                --
637                setmetatableindex(document,resolve)
638                --
639                document.majorversion, document.minorversion = getversion(__data__)
640                --
641                document.nofpages = getnofpages(__data__)
642                -- we could also use cached but this proxy hides it
643                -- setmetatableindex(__cache__,function(t,objnum)
644                document.objects = setmetatableindex(function(t,objnum)
645                    local kind = type(objnum)
646                    if kind == "table" and objnum[1] == reference_object_code then
647                        objnum = objnum[3]
648                        kind   = type(objnum)
649                    end
650                    if kind == "number" then
651                        local cached = __cache__[objnum]
652                        if not cached then
653                            cached = some_object(document,objnum)
654                            __cache__[objnum] = cached
655                            __xrefs__[cached] = objnum
656                        end
657                        return cached
658                    end
659                end)
660            else
661                document = false
662            end
663        else
664            if not __data_ then
665                report_epdf("the document is damaged or empty")
666            end
667            document = false
668        end
669        loaded[filename] = document
670        loaded[document] = document
671        statistics.stoptiming(lpdf_epdf)
672     -- print(statistics.elapsedtime(lpdf_epdf))
673    end
674    if document then
675        nofloaded = nofloaded + 1
676    end
677    return document or nil
678end
679
680function lpdf_epdf.objectrange(filename,n)
681    local document = loaded[filename]
682    if document then
683        return getobjectrange(document.__data__,n)
684    end
685end
686
687function lpdf_epdf.unload(filename)
688    if type(filename) == "table" then
689        filename = filename.filename
690    end
691    if type(filename) == "string" then
692        local document = loaded[filename]
693        if document then
694            loaded[document] = nil
695            loaded[filename] = nil
696            pdfclose(document.__data__)
697        end
698    end
699end
700
701function lpdf.close(document)
702    if loaded[document] then
703        loaded[document] = nil
704        loaded[document.filename] = nil
705        pdfclose(document.__data__)
706    end
707end
708
709-- for k, v in expanded(t) do
710
711local function expanded(t) -- maybe also a sorted variant
712    local function iterator(raw,k)
713        local k, v = next(raw,k)
714        if v then
715            return k, t[k]
716        end
717    end
718    return iterator, t.__raw__, nil
719end
720
721---------.expand   = expand
722lpdf_epdf.expanded = expanded
723
724-- we could resolve the text stream in one pass if we directly handle the
725-- font but why should we complicate things
726
727-- local unescape = Cs((
728--     p_remap / remapper
729--   + P("\\")/"" * (lpegpatterns.octdigit * lpegpatterns.octdigit^-2) / remapper
730--   + P(1) / h_hex_2
731-- )^0)
732
733-- decstring  = Ct(Cc("dec")   * P("(")  * Cs((numchar+1-(P")"))^1) * P(")")), -- untested
734--     decstring  = Ct(Cc("dec")   * P("(")  * (Cs((P("\\")*P(1)+1-(P")"))^1)/lpdf.fromeight) * P(")")), -- untested
735--     decstring  = Ct(Cc("dec")   * P("(")  * (Cs((P("\\")*P(1)+1-(P")"))^1)) * P(")")), -- untested
736--     decstring  = Ct(Cc("dec")   * P("(")  * (C((P("\\)")+1-(P")"))^1)/lpdf.fromeight) * P(")")), -- untested
737--     decstring  = Ct(Cc("dec")   * P("(")  * (lpegpatterns.pdffromeight - P(")")) * P(")")), -- untested
738
739-- We normalize to hex because when strings move through lua we sometimes get escaping
740-- (ff) that I can't figure out.
741
742
743local h_hex_2 = lpdf.h_hex_2
744
745local remapper = {
746    ["\\("]  = h_hex_2["("],
747    ["\\)"]  = h_hex_2[")"],
748    ["\\n"]  = h_hex_2["\n"],
749    ["\\r"]  = h_hex_2["\r"],
750    ["\\t"]  = h_hex_2["\t"],
751    ["\\b"]  = h_hex_2["\b"],
752    ["\\f"]  = h_hex_2["\f"],
753    ["\\\n"] = "",
754    ["\\\r"] = "",
755    ["\\\\"] = h_hex_2["\\"],
756}
757
758local p_remap = lpeg.utfchartabletopattern(remapper)
759
760setmetatableindex(remapper,function(t,k)
761    local v = h_hex_2[char(octtointeger(k))]
762    t[k] = v
763    return v
764end)
765
766local p_hex_string = Ct(Cc("hex") *
767    P("<")
768  * Cs((1 - P(">"))^0)
769  * P(">")
770)
771
772local p_dec_string = Ct(Cc("hex") *
773    P("(")
774  * Cs(
775        (
776            p_remap / remapper
777          + P("\\")/"" * ((lpegpatterns.octdigit * lpegpatterns.octdigit^-2) / remapper)
778          + P(1) / h_hex_2
779          - P(")")
780        )^0
781    )
782  * P(")")
783)
784
785local spaces    = lpegpatterns.whitespace^1
786local optspaces = lpegpatterns.whitespace^0
787local comment   = P("%") * (1 - lpegpatterns.newline)^0
788
789local numchar   = P("\\")/"" * (R("09")^3/function(s) return char(octtointeger(s)) end)
790                + P("\\") * P(1)
791
792----- key       = P("/") * C(R("AZ","az","09","__","--")^1)
793local key       = P("/") * C((1 - lpegpatterns.whitespace - S("<>/[]()"))^1)
794
795local number    = Ct(Cc("number") * (lpegpatterns.number/dectointeger))
796local keyword   = Ct(Cc("name") * key)
797local operator  = C((R("AZ","az")+P("*")+P("'")+P('"'))^1)
798
799local grammar = P { "start",
800    start      = (comment + keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces)^1,
801    keyvalue   = optspaces * keyword * optspaces * V("start"),
802    dictionary = Ct(Cc("dict")  * P("<<") * Ct(V("keyvalue")^0) * P(">>")),
803--     dictionary = Ct(Cc("dict")  * P("<<") * (Ct(V("keyvalue"))^0) * P(">>")),
804    array      = Ct(Cc("array") * P("[")  * Ct(V("start")^0) * P("]")),
805    hexstring  = p_hex_string,
806    decstring  = p_dec_string,
807}
808
809local operation = Ct(grammar^1 * operator + operator)
810local parser    = Ct((operation + P(1))^1)
811
812local number = C(lpegpatterns.number)
813
814local fastgrammar = P { "start",
815    start      = (comment + keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces)^1,
816    keyvalue   = optspaces * keyword * optspaces * V("start"),
817    dictionary = Ct(Cc("dict")  * P("<<") * Ct(V("keyvalue")^0) * P(">>")),
818--     dictionary = Ct(Cc("dict")  * P("<<") * (Ct(V("keyvalue"))^0) * P(">>")),
819    array      = Ct(Cc("array") * P("[")  * Ct(V("start")^0) * P("]")),
820    hexstring  = p_hex_string,
821    decstring  = p_dec_string,
822}
823
824local fastoperation = Ct(fastgrammar^1 * operator + operator)
825local fastparser    = Ct((fastoperation + P(1))^1)
826
827function lpdf_epdf.parsecontent(str,fast)
828    return lpegmatch(fast and fastparser or parser,str)
829end
830
831---------------------------------------------------------------------------
832
833-- beginbfrange : <start> <stop> <firstcode>
834--                <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
835-- beginbfchar  : <code> <newcodes>
836
837local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it
838
839local function f_bfchar(t,a,b)
840    t[hextointeger(a)] = fromsixteen(b)
841end
842
843local function f_bfrange_1(t,a,b,c)
844    print("todo 1",a,b,c)
845    -- c is string
846    -- todo t[hextointeger(a)] = fromsixteen(b)
847end
848
849local function f_bfrange_2(t,a,b,c)
850    print("todo 2",a,b,c)
851    -- c is table
852    -- todo t[hextointeger(a)] = fromsixteen(b)
853end
854
855local optionals   = spaces^0
856local hexstring   = optionals * P("<") * C((1-P(">"))^1) * P(">")
857local bfchar      = Carg(1) * hexstring * hexstring / f_bfchar
858local bfrange     = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
859                  + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
860local fromunicode = (
861    P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
862    P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
863    spaces +
864    P(1)
865)^1  * Carg(1)
866
867lpdf_epdf.helpers = { }
868
869function lpdf_epdf.helpers.tounicodetable(tounicode)
870    return tounicode and lpegmatch(fromunicode,tounicode,1,{})
871end
872
873---------------------------------------------------------------------------
874
875-- when there is no tounicode we can look at the encoding
876
877local function analyzefonts(document,resources) -- unfinished, see mtx-pdf for better code
878    local fonts = document.__fonts__
879    if resources then
880        local fontlist = resources.Font
881        if fontlist then
882            for id, data in expanded(fontlist) do
883                if not fonts[id] then
884                    --  a quick hack ... I will look into it more detail if I find a real
885                    -- -application for it
886                    local tounicode = data.ToUnicode()
887                    if tounicode then
888                        tounicode = lpegmatch(fromunicode,tounicode,1,{})
889                    end
890                    fonts[id] = {
891                        tounicode = type(tounicode) == "table" and tounicode or { }
892                    }
893                    setmetatableindex(fonts[id],"self")
894                end
895            end
896        end
897    end
898    return fonts
899end
900
901lpdf_epdf.analyzefonts = analyzefonts
902
903local more = 0
904local unic = nil -- cheaper than passing each time as Carg(1)
905
906local p_hex_to_utf = C(4) / function(s) -- needs checking !
907    local now = hextointeger(s)
908    if more > 0 then
909        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
910        more = 0
911        return unic[now] or utfchar(now)
912    elseif now >= 0xD800 and now <= 0xDBFF then
913        more = now
914     -- return ""
915    else
916        return unic[now] or utfchar(now)
917    end
918end
919
920local p_dec_to_utf = C(1) / function(s) -- needs checking ! not needed now that we always go hex
921    local now = byte(s)
922    return unic[now] or utfchar(now)
923end
924
925local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
926local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
927
928-- also xform?
929
930local function allcontent(content)
931    if type(content) == "table" then
932        local ctype = content.__type__
933        if ctype == stream_object_code then
934            content = content()
935        elseif ctype == array_object_code then
936            local c = { }
937            for i=1,#content do
938                c[i] = content[i]()
939            end
940            content = concat(c," ")
941        end
942    end
943    return content
944end
945
946lpdf_epdf.allcontent = allcontent
947
948function lpdf_epdf.getpagecontent(document,pagenumber,asis,fast)
949
950    local page = document.pages[pagenumber]
951
952    if not page then
953        return
954    end
955
956    local content = allcontent(page.Contents or "")
957    local list    = lpegmatch(fast and fastparser or parser,content)
958
959    if asis then
960        return list -- , fonts
961    end
962
963    local fonts = analyzefonts(document,page.Resources)
964    local font  = nil
965 -- local unic  = nil
966
967    for i=1,#list do
968        local entry    = list[i]
969        local size     = #entry
970        local operator = entry[size]
971        if operator == "Tf" then
972            font = fonts[entry[1][2]]
973            unic = font and font.tounicode or { }
974        elseif operator == "TJ" then
975            local data = entry[1] -- { "array", { ... } }
976            local list = data[2]  -- { { ... }, { ... } }
977         -- inspect(list)
978            for i=1,#list do
979                local li = list[i]
980--                 if type(li) == "table" then
981                    local kind = li[1]
982                    if kind == "hex" then
983                        list[i] = lpegmatch(p_hex_to_utf,li[2])
984                    elseif kind == "string" then
985                        list[i] = lpegmatch(p_dec_to_utf,li[2])
986                    else
987                        list[i] = li[2] -- kern
988                    end
989--                 else
990--                     -- kern
991--                 end
992            end
993        elseif operator == "Tj" or operator == "'" or operator == '"' then
994            -- { string,  Tj } { string, ' } { n, m, string, " }
995            local data = entry[size-1]
996            local list = data[2]
997            local kind = list[1]
998            if kind == "hex" then
999                list[2] = lpegmatch(p_hex_to_utf,li[2])
1000            elseif kind == "string" then
1001                list[2] = lpegmatch(p_dec_to_utf,li[2])
1002            end
1003        end
1004    end
1005
1006    unic = nil -- can be collected
1007
1008    return list
1009
1010end
1011
1012-- This is also an experiment. When I really need it I can improve it, for instance
1013-- with proper position calculating. It might be usefull for some search or so.
1014
1015local softhyphen = utfchar(0xAD) .. "$"
1016local linefactor = 1.3
1017
1018function lpdf_epdf.contenttotext(document,list) -- maybe signal fonts
1019    local last_y = 0
1020    local last_f = 0
1021    local text   = { }
1022    local last   = 0
1023
1024    for i=1,#list do
1025        local entry    = list[i]
1026        local size     = #entry
1027        local operator = entry[size]
1028        if operator == "Tf" then
1029            last_f = entry[2][2] -- size
1030        elseif operator == "TJ" then
1031            local data = entry[1] -- { "array", { ... } }
1032            local list = data[2]  -- { { ... }, { ... } }
1033            for i=1,#list do
1034                local li = list[i]
1035                local kind = type(li)
1036                if kind == "string" then
1037                    last = last + 1
1038                    text[last] = li
1039                elseif kind == "number" and li < -50 then
1040                    last = last + 1
1041                    text[last] = " "
1042                end
1043            end
1044        elseif operator == "Tj" then
1045            last = last + 1
1046            local li = entry[size-1]
1047            local kind = type(li)
1048            if kind == "string" then
1049                last = last + 1
1050                text[last] = li
1051            end
1052        elseif operator == "cm" or operator == "Tm" then
1053            local data = entry
1054            local ty = entry[6][2]
1055            local dy = abs(last_y - ty)
1056            if dy > linefactor*last_f then
1057                if last > 0 then
1058                    if find(text[last],softhyphen,1,true) then
1059                        -- ignore
1060                    else
1061                        last = last + 1
1062                        text[last] = "\n"
1063                    end
1064                end
1065            end
1066            last_y = ty
1067        end
1068    end
1069
1070    return concat(text)
1071end
1072
1073function lpdf_epdf.contenttostring(contents)
1074    local r       = 0
1075    local result  = { }
1076    local compact = false
1077    local rr      = false
1078
1079    local flatten ; flatten = function(t)
1080        local nt = #t
1081        compact = t[nt] == "TJ"
1082        for i=1,nt do
1083            local ti = t[i]
1084            if type(ti) == "table" then
1085                local t1 = ti[1]
1086                local t2 = ti[2]
1087                if t1 == "array" then
1088                    if compact then
1089                        local sr, sresult = r, result
1090                        r, result = 1, { "[" }
1091                        flatten(t2)
1092                        r = r + 1 ; result[r] = "]"
1093                        sr = sr + 1; sresult[sr] = concat(result)
1094                        r, result = sr, sresult
1095                    else
1096                        r = r + 1 ; result[r] = "["
1097                        flatten(t2)
1098                        r = r + 1 ; result[r] = "]"
1099                    end
1100                elseif t1 == "dict" then
1101                    r = r + 1 ; result[r] = "<<"
1102                    flatten(t2)
1103                    r = r + 1 ; result[r] = ">>"
1104                elseif t1 == "hex" then
1105                    r = r + 1 ; result[r] = "<" .. t2 .. ">"
1106                elseif t1 == "dec" then -- can't happen anymore
1107                 -- r = r + 1 ; result[r] = "(" .. t2 .. ")"
1108                    r = r + 1 ; result[r] = lpdf.toeight(t2)
1109                elseif type(t2) == "number" then
1110                    r = r + 1 ; result[r] = t2 -- todo formatter %N
1111                else -- name
1112                    r = r + 1 ; result[r] = "/" .. t2
1113                end
1114            else
1115                r = r + 1 ; result[r] = ti
1116            end
1117        end
1118    end
1119
1120    if true then -- option
1121        for i=1,#contents do
1122            local c = contents[i]
1123            if #c > 0 then
1124                flatten(c)
1125                r = r + 1 ; result[r] = "\n"
1126            end
1127        end
1128        result[#result] = nil
1129        result = concat(result," ")
1130        result = string.gsub(result,"\n ","\n")
1131        return result
1132    else
1133        for i=1,#contents do
1134            flatten(contents[i])
1135        end
1136        return concat(result," ")
1137    end
1138end
1139
1140function lpdf_epdf.getstructure(document,list) -- just a test
1141    local depth = 0
1142    for i=1,#list do
1143        local entry    = list[i]
1144        local size     = #entry
1145        local operator = entry[size]
1146        if operator == "BDC" then
1147            report_epdf("%w%s : %s",depth,entry[1] or "?",entry[2] and entry[2].MCID or "?")
1148            depth = depth + 1
1149        elseif operator == "EMC" then
1150            depth = depth - 1
1151        elseif operator == "TJ" then
1152            local list = entry[1]
1153            for i=1,#list do
1154                local li = list[i]
1155                if type(li) == "string" then
1156                    report_epdf("%w > %s",depth,li)
1157                elseif li < -50 then
1158                    report_epdf("%w >",depth,li)
1159                end
1160            end
1161        elseif operator == "Tj" then
1162            report_epdf("%w > %s",depth,entry[size-1])
1163        end
1164    end
1165end
1166
1167if images then do
1168
1169    -- This can be made a bit faster (just get raw data and pass it) but I will
1170    -- do that later. In the end the benefit is probably neglectable.
1171
1172    local copydictionary       = nil
1173    local copyarray            = nil
1174
1175    local pdfreference         = lpdf.reference
1176    local pdfconstant          = lpdf.constant
1177    local pdfarray             = lpdf.array
1178    local pdfdictionary        = lpdf.dictionary
1179    local pdfnull              = lpdf.null
1180    local pdfliteral           = lpdf.literal
1181
1182    local pdfreserveobject     = lpdf.reserveobject
1183    local shareobjectreference = lpdf.shareobjectreference
1184    local pdfflushobject       = lpdf.flushobject
1185    local pdfflushstreamobject = lpdf.flushstreamobject
1186
1187    local report               = logs.reporter("backend","xobjects")
1188
1189    local factor               = 65536 / (7200/7227) -- 1/number.dimenfactors.bp
1190
1191    local createimage          = images.create
1192
1193    directives.register("graphics.pdf.recompress",  function(v) recompress  = v end)
1194
1195    local function scaledbbox(b)
1196        return { b[1]*factor, b[2]*factor, b[3]*factor, b[4]*factor }
1197    end
1198
1199    local codecs = {
1200        ["/ASCIIHexDecode"]  = true,
1201        ["/ASCII85Decode"]   = true,
1202        ["/RunLengthDecode"] = true,
1203        ["/FlateDecode"]     = true,
1204        ["/LZWDecode"]       = true,
1205    }
1206
1207    local function deepcopyobject(pdfdoc,xref,copied,value)
1208        -- no need for tables, just nested loop with obj
1209        local objnum = xref[value]
1210        if objnum then
1211            local usednum = copied[objnum]
1212            if usednum then
1213             -- report("%s object %i is reused",kind,objnum)
1214            else
1215                usednum = pdfreserveobject()
1216                copied[objnum] = usednum
1217                local entry = value
1218                local kind  = entry.__type__
1219                if kind == array_object_code then
1220                    local a = copyarray(pdfdoc,xref,copied,entry)
1221                    pdfflushobject(usednum,tostring(a))
1222                elseif kind == dictionary_object_code then
1223                    local d = copydictionary(pdfdoc,xref,copied,entry)
1224                    pdfflushobject(usednum,tostring(d))
1225                elseif kind == stream_object_code then
1226                    local d = copydictionary(pdfdoc,xref,copied,entry)
1227                    local filter = d.Filter
1228                    -- watch out we have a lpdf object now so we have the "/" prepended to names
1229                    if filter and codecs[tostring(filter)] and recompress then
1230                        -- recompress
1231                        d.Filter      = nil
1232                        d.Length      = nil
1233                        d.DecodeParms = nil -- relates to filter
1234                        d.DL          = nil -- needed?
1235                        local s = entry()                        -- get uncompressed stream
1236                        pdfflushstreamobject(s,d,true,usednum)   -- compress stream
1237                    else
1238                        -- keep as-is, even Length which indicates the decompressed length
1239                        local s = entry(false)                        -- get compressed stream
1240                     -- pdfflushstreamobject(s,d,false,usednum,true)  -- don't compress stream
1241                        pdfflushstreamobject(s,d,"raw",usednum)       -- don't compress stream
1242                    end
1243                else
1244                    local t = type(value)
1245                    if t == "table" then
1246                        local kind  = value[1]
1247                        local entry = value[2]
1248                        if kind == name_object_code then
1249                            value = pdfconstant(entry)
1250                        elseif kind == string_object_code then
1251                            value = pdfliteral(entry,false)
1252                        elseif kind == null_object_code then
1253                            value = pdfnull()
1254                        elseif kind == reference_object_code then
1255                            value = deepcopyobject(pdfdoc,xref,copied,entry)
1256                        elseif entry == nil then
1257                            value = pdfnull()
1258                        else
1259                            value = tostring(entry)
1260                        end
1261                        -- I need to make a few testcases for objects with whatever in it.
1262                 -- elseif t == "string" then
1263                 --  -- value = pdfconstant(value) -- name is handled elsewhere
1264                    end
1265                    pdfflushobject(usednum,value)
1266                end
1267            end
1268            return pdfreference(usednum)
1269        elseif kind == stream_object_code then
1270            report("stream not done: %s", objectcodes[kind] or "?")
1271        else
1272            report("object not done: %s", objectcodes[kind] or "?")
1273        end
1274    end
1275
1276    local function copyobject(pdfdoc,xref,copied,object,key,value)
1277        if not value then
1278            value = object.__raw__[key]
1279        end
1280        local t = type(value)
1281        if t == "string" then
1282            return pdfconstant(value)
1283        elseif t ~= "table" then
1284            return value
1285        end
1286        local kind = value[1]
1287        if kind == name_object_code then
1288            return pdfconstant(value[2])
1289        elseif kind == string_object_code then
1290            return pdfliteral(value[2],false)
1291        elseif kind == array_object_code then
1292            return copyarray(pdfdoc,xref,copied,object[key])
1293        elseif kind == dictionary_object_code then
1294            return copydictionary(pdfdoc,xref,copied,object[key]) -- value
1295        elseif kind == null_object_code then
1296            return pdfnull()
1297        elseif kind == reference_object_code then
1298            return deepcopyobject(pdfdoc,xref,copied,object[key])
1299        elseif kind == lpdf_object_code then
1300            return value[2]
1301        else
1302         -- report("weird: %s", objectcodes[kind] or "?")
1303        end
1304    end
1305
1306    copyarray = function(pdfdoc,xref,copied,object)
1307        local target = pdfarray()
1308        local source = object.__raw__
1309        for i=1,#source do
1310            target[i] = copyobject(pdfdoc,xref,copied,object,i,source[i])
1311        end
1312        return target
1313    end
1314
1315    copydictionary = function (pdfdoc,xref,copied,object)
1316        local target = pdfdictionary()
1317        local source = object.__raw__
1318     -- for key, value in next, source do -- sorting is easier on checking
1319        for key, value in sortedhash(source) do
1320            target[key] = copyobject(pdfdoc,xref,copied,object,key,value)
1321        end
1322        return target
1323    end
1324
1325    local openpdf  = lpdf_epdf.load
1326    local closepdf = lpdf_epdf.unload
1327
1328    -- todo: keep track of already open files
1329
1330    local function newpdf(str,userpassword,ownerpassword)
1331        return openpdf(str,userpassword,ownerpassword,true)
1332    end
1333
1334    local sizes = {
1335        crop  = "CropBox",
1336        media = "MediaBox",
1337        bleed = "BleedBox",
1338        art   = "ArtBox",
1339        trim  = "TrimBox",
1340    }
1341
1342    local function querypdf(pdfdoc,pagenumber,size,pagelabel)
1343        if pdfdoc then
1344            local root = pdfdoc.Catalog
1345            if type(pagelabel) == "string" and pagelabel ~= "" then
1346                local pagedata = root.LMTX_Pages
1347                if pagedata then
1348                    local labels = pagedata.Labels
1349                    if labels then
1350                        local found = labels[pagelabel]
1351                        if found then
1352                            pagenumber = tonumber(type(found) == "table" and found[1] or found)
1353                        end
1354                    end
1355                end
1356            end
1357            if not pagenumber then
1358                pagenumber = 1
1359            end
1360            local page = pdfdoc.pages[pagenumber]
1361            if page then
1362                local sizetag  = sizes[size or "crop"] or sizes.crop
1363                local mediabox = page.MediaBox or { 0, 0, 0, 0 }
1364                local cropbox  = page[sizetag] or mediabox
1365                local filename = pdfdoc.filename
1366                if cropbox[4] then
1367                    return {
1368                        filename    = filename,
1369                        pagenumber  = pagenumber,
1370                        nofpages    = pdfdoc.nofpages,
1371                        boundingbox = scaledbbox(cropbox),
1372                        cropbox     = cropbox,
1373                        mediabox    = mediabox,
1374                        bleedbox    = page.BleedBox or cropbox,
1375                        trimbox     = page.TrimBox or cropbox,
1376                        artbox      = page.ArtBox or cropbox,
1377                        rotation    = page.Rotate or 0,
1378                        xsize       = cropbox[3] - cropbox[1],
1379                        ysize       = cropbox[4] - cropbox[2],
1380                    }
1381                else
1382                    report("bad page %i in file %a",pagenumber,filename or "?")
1383                end
1384            end
1385        end
1386    end
1387
1388    local function copyresources(pdfdoc,xref,copied,Resources)
1389        if Resources then
1390            local d = copydictionary(pdfdoc,xref,copied,Resources)
1391            return shareobjectreference(d)
1392        else
1393            return lpdf.checkedresources()
1394        end
1395    end
1396
1397    local variables = interfaces.variables
1398
1399    local function getinclusion(pdfdoc)
1400        local inclusion = pdfdoc.lmtxinclusion
1401        if not inclusion then
1402            local catalog  = pdfdoc.Catalog
1403            local info     = pdfdoc.Info
1404            local metadata = nil
1405            local function checked(tag,pattern)
1406                local str = info[tag]
1407                if not str then
1408                    if metadata == nil then
1409                        metadata = catalog.Metadata or false
1410                        if metadata then
1411                            metadata = metadata() or false
1412                        end
1413                        if metadata then
1414                            metadata = xml.convert(metadata)
1415                        end
1416                    end
1417                    if metadata then
1418                        str = xml.text(metadata,pattern)
1419                    end
1420                end
1421                return str and str ~= "" and str or "unknown"
1422            end
1423            -- We don't want to claim to be the producer so we add this to the
1424            -- image.
1425            inclusion = pdfreference(pdfflushobject(pdfdictionary {
1426             -- Title            = checked("Title",       "Description/title/**/*"),
1427             -- Author           = checked("Author",      "Description/author/**/*"),
1428                Creator          = checked("Creator",     "Description/CreatorTool"),
1429                Producer         = checked("Producer",    "Description/Producer"),
1430             -- Creationdate     = checked("CreationDate","Description/CreateDate"),
1431             -- Modificationdate = checked("ModDate",     "Description/ModifyDate"),
1432            }))
1433            pdfdoc.lmtxinclusion = inclusion
1434        end
1435        return inclusion
1436    end
1437
1438    local function copypage(pdfdoc,pagenumber,attributes,compact,width,height,attr,copymeta)
1439        if pdfdoc then
1440            local root     = pdfdoc.Catalog
1441            local page     = pdfdoc.pages[pagenumber or 1]
1442            local pageinfo = querypdf(pdfdoc,pagenumber)
1443            if pageinfo then
1444                local contents  = page.Contents
1445                local xref      = pdfdoc.__xrefs__
1446                local copied    = pdfdoc.__copied__
1447                local resources = page.Resources
1448                if compact and resources and lpdf_epdf.pageplugin then
1449                    lpdf_epdf.pageplugin(pdfdoc,page,pagenumber,resources,compact)
1450                    contents = page.Contents -- can now be a string
1451                end
1452                local metadata = nil
1453                -- page     : only page (default, compatibility)
1454                -- document : only document
1455                -- yes      : page or document
1456                if copymeta == variables.page or copymeta == variables.yes then
1457                    -- We seldom have metadata with a page.
1458                    metadata = copyobject(pdfdoc,xref,copied,page,"Metadata")
1459                end
1460                if not metadata and (copymeta == variables.document or copymeta == variables.yes) then
1461                    -- For our own documents we don't need this.
1462                    metadata = copyobject(pdfdoc,xref,copied,root,"Metadata")
1463                end
1464                local xobject = pdfdictionary {
1465                    Type           = pdfconstant("XObject"),
1466                    Subtype        = pdfconstant("Form"),
1467                    FormType       = 1,
1468                    Group          = copyobject(pdfdoc,xref,copied,page,"Group"),
1469                    LastModified   = copyobject(pdfdoc,xref,copied,page,"LastModified"),
1470                 -- Metadata       = copyobject(pdfdoc,xref,copied,page,"Metadata"),
1471                    Metadata       = metadata,
1472                 -- PieceInfo      = copyobject(pdfdoc,xref,copied,page,"PieceInfo"), -- useless as meant for driver
1473                    Resources      = copyresources(pdfdoc,xref,copied,resources),
1474                 -- Resources      = copyobject(pdfdoc,xref,copied,page,"Resources"),
1475                    SeparationInfo = copyobject(pdfdoc,xref,copied,page,"SeparationInfo"),
1476                    LMTX_Inclusion = getinclusion(pdfdoc),
1477                } + attr
1478                if attributes then
1479                    for k, v in expanded(attributes) do
1480                        page[k] = v -- maybe nested
1481                    end
1482                end
1483                local content  = ""
1484                local nolength = nil
1485                if type(contents) == "string" then
1486                    content = contents -- can be result of plugin
1487                elseif contents then
1488                    local ctype = contents.__type__
1489                    -- we always recompress because image object streams can not be
1490                    -- influenced (yet)
1491                    if ctype == stream_object_code then
1492                        if recompress then
1493                            content = contents() -- uncompressed
1494                        else
1495                            local Filter = copyobject(pdfdoc,xref,copied,contents,"Filter")
1496                            local Length = copyobject(pdfdoc,xref,copied,contents,"Length")
1497                            if Length and Filter then
1498                                nolength = true
1499                                xobject.Length = Length
1500                                xobject.Filter = Filter
1501                                content = contents(false) -- uncompressed
1502                            else
1503                                content = contents() -- uncompressed
1504                            end
1505                        end
1506                    elseif ctype == array_object_code then
1507                        content = { }
1508                        for i=1,#contents do
1509                            content[i] = contents[i]() -- uncompressed
1510                        end
1511                        content = concat(content," ")
1512                    end
1513                else
1514                    content = ""
1515                end
1516                -- still not nice: we double wrap now
1517--                 plugins = nil
1518                local rotation    = pageinfo.rotation
1519                local boundingbox = pageinfo.boundingbox
1520                local transform   = nil
1521                if rotation == 90 then
1522                    transform = 3
1523                elseif rotation == 180 then
1524                    transform = 2
1525                elseif rotation == 270 then
1526                    transform = 1
1527                elseif rotation > 1 and rotation < 4 then
1528                    transform = rotation
1529                end
1530                xobject.BBox = pdfarray {
1531                    boundingbox[1] * bpfactor,
1532                    boundingbox[2] * bpfactor,
1533                    boundingbox[3] * bpfactor,
1534                    boundingbox[4] * bpfactor,
1535                }
1536                -- maybe like bitmaps
1537                return createimage { -- beware: can be a img.new or a dummy
1538                    bbox      = boundingbox,
1539                    transform = transform,
1540                    nolength  = nolength,
1541                    nobbox    = true,
1542                    notype    = true,
1543                    stream    = content, -- todo: no compress, pass directly also length, filter etc
1544                    attr      = xobject(),
1545                    kind      = images.types.stream,
1546                }
1547            else
1548                report("bad page %i in file %a",pagenumber or "0",pdfdoc.filename or "?")
1549            end
1550        end
1551    end
1552
1553    lpdf_epdf.image = {
1554        open   = openpdf,
1555        close  = closepdf,
1556        new    = newpdf,
1557        query  = querypdf,
1558        copy   = copypage,
1559    }
1560
1561--     lpdf.injectors.pdf = function(specification)
1562--         local d = lpdf_epdf.load(specification.filename)
1563--         print(d)
1564--     end
1565
1566
1567end end
1568
1569function lpdf_epdf.producer(pdfdoc)
1570    local producer = false
1571    if pdfdoc then
1572        local info = pdfdoc.Info
1573        if info then
1574            producer = info.Producer
1575        end
1576        if not producer then
1577            local metadata = pdfdoc.Catalog.Metadata
1578            if metadata then
1579                local x = xml.convert(metadata())
1580                if x then
1581                    producer = xml.text(x,"rdf:Description/pdf:Producer")
1582                    if not producer or producer == "" then
1583                        producer = xml.text(x,"Producer")
1584                    end
1585                end
1586            end
1587        end
1588    end
1589    return producer or ""
1590end
1591
1592function lpdf_epdf.expandwidths(widths,expanded)
1593    if not expanded then
1594        expanded = { }
1595    end
1596    local min = false
1597    local max = false
1598    local i = 1
1599    local n = #widths -- - 1
1600    while i < n do
1601        local w1 = widths[i] ; i = i + 1
1602        local w2 = widths[i] ; i = i + 1
1603        if type(w2) == "table" then
1604            local k = 1
1605            local wn = w1 + #w2 - 1
1606            for j=w1,wn do
1607                expanded[j] = w2[k]
1608                k = k + 1
1609            end
1610            if not min then
1611                min = w1
1612                max = wn
1613            elseif wn > max then
1614                max = wn
1615            end
1616        else
1617            local w3 = widths[i] ; i = i + 1
1618            if w3 then
1619                for j=w1,w2 do
1620                    expanded[j] = w3
1621                end
1622                if not min then
1623                    min = w1
1624                    max = w2
1625                elseif w2 > max then
1626                    max = w2
1627                end
1628            end
1629        end
1630    end
1631    return expanded, min or 0, max or 0
1632end
1633
1634function lpdf_epdf.mergewidths(widths,expanded)
1635    if not expanded then
1636        expanded = { }
1637    end
1638    local min = 1
1639    local max = #widths
1640    for i=1,#widths do
1641        expanded[i] = widths[i]
1642    end
1643    return expanded, min, max
1644end
1645
1646-- local d = lpdf_epdf.load("e:/tmp/oeps.pdf")
1647-- inspect(d)
1648-- inspect(d.Catalog.Lang)
1649-- inspect(d.Catalog.OCProperties.D.AS[1].Event)
1650-- inspect(d.Catalog.Metadata())
1651-- inspect(d.Catalog.Pages.Kids[1])
1652-- inspect(d.layers)
1653-- inspect(d.pages)
1654-- inspect(d.destinations)
1655-- inspect(lpdf_epdf.getpagecontent(d,1))
1656-- inspect(lpdf_epdf.contenttotext(document,lpdf_epdf.getpagecontent(d,1)))
1657-- inspect(lpdf_epdf.getstructure(document,lpdf_epdf.getpagecontent(d,1)))
1658