lpdf-pde.lmt /size: 52 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lpdf-epd'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-epa.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    history   = "this one replaces the poppler/pdfe binding",
8}
9
10-- \enabledirectives[graphics.pdf.uselua]
11-- \enabledirectives[graphics.pdf.recompress]
12-- \enabledirectives[graphics.pdf.stripmarked]
13
14-- maximum integer : +2^32
15-- maximum real    : +2^15
16-- minimum real    : 1/(2^16)
17
18-- get_flagged : does that still work
19
20-- ppdoc_permissions (ppdoc *pdf);
21
22-- PPSTRING_ENCODED        1 <<  0
23-- PPSTRING_DECODED        1 <<  1
24-- PPSTRING_EXEC           1 <<  2   postscript only
25-- PPSTRING_PLAIN                0
26-- PPSTRING_BASE16         1 <<  3
27-- PPSTRING_BASE85         1 <<  4
28-- PPSTRING_UTF16BE        1 <<  5
29-- PPSTRING_UTF16LE        1 <<  6
30
31-- PPDOC_ALLOW_PRINT       1 <<  2   printing
32-- PPDOC_ALLOW_MODIFY      1 <<  3   filling form fields, signing, creating template pages
33-- PPDOC_ALLOW_COPY        1 <<  4   copying, copying for accessibility
34-- PPDOC_ALLOW_ANNOTS      1 <<  5   filling form fields, copying, signing
35-- PPDOC_ALLOW_EXTRACT     1 <<  9   contents copying for accessibility
36-- PPDOC_ALLOW_ASSEMBLY    1 << 10   no effect
37-- PPDOC_ALLOW_PRINT_HIRES 1 << 11   no effect
38
39-- PPCRYPT_NONE                  0   no encryption, go ahead
40-- PPCRYPT_DONE                  1   encryption present but password succeeded, go ahead
41-- PPCRYPT_PASS                 -1   encryption present, need non-empty password
42-- PPCRYPT_FAIL                 -2   invalid or unsupported encryption (eg. undocumented in pdf spec)
43
44local setmetatable, type, next = setmetatable, type, next
45local tostring, unpack = tostring, unpack
46local char, byte, find = string.char, string.byte, string.find
47local abs = math.abs
48local concat, swapped, sortedhash, sortedkeys = table.concat, table.swapped, table.sortedhash, table.sortedkeys
49local utfchar = string.char
50local setmetatableindex = table.setmetatableindex
51local ioopen = io.open
52local octtointeger, dectointeger, hextointeger = string.octtointeger, string.dectointeger, string.hextointeger
53
54local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
55local P, C, S, R, Ct, Cc, V, Carg, Cs, Cf, Cg = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs, lpeg.Cf, lpeg.Cg
56
57if not lpdf then
58    require("lpdf-aux")
59end
60
61if not (number and number.dimenfactors) then
62    require("util-dim")
63end
64
65local pdfe              = pdfe
66      lpdf              = lpdf or { }
67local lpdf              = lpdf
68local lpdf_epdf         = { }
69      lpdf.epdf         = lpdf_epdf
70
71local pdfopenfile       = pdfe.openfile
72local pdfnew            = pdfe.new
73local pdfclose          = pdfe.close
74
75local getcatalog        = pdfe.getcatalog
76local getinfo           = pdfe.getinfo
77local gettrailer        = pdfe.gettrailer
78local getnofpages       = pdfe.getnofpages
79local getversion        = pdfe.getversion
80local getbox            = pdfe.getbox
81local getstatus         = pdfe.getstatus
82local unencrypt         = pdfe.unencrypt
83local dictionarytotable = pdfe.dictionarytotable
84local arraytotable      = pdfe.arraytotable
85local pagestotable      = pdfe.pagestotable
86local readwholestream   = pdfe.readwholestream
87local getfromreference  = pdfe.getfromreference
88local getfromobject     = pdfe.getfromobject
89local getobjectrange    = pdfe.getobjectrange
90
91local report_epdf       = logs.reporter("epdf")
92
93local allocate          = utilities.storage.allocate
94
95local bpfactor          = number.dimenfactors.bp
96
97local objectcodes = { [0] =
98    "none",
99    "null",
100    "bool",
101    "integer",
102    "number",
103    "name",
104    "string",
105    "array",
106    "dictionary",
107    "stream",
108    "reference",
109    "lpdf",
110}
111
112local encryptioncodes = {
113     [0] = "notencrypted",
114     [1] = "unencrypted",
115    [-1] = "protected",
116    [-2] = "failure",
117}
118
119objectcodes                  = allocate(swapped(objectcodes,objectcodes))
120encryptioncodes              = allocate(swapped(encryptioncodes,encryptioncodes))
121
122lpdf_epdf.objectcodes             = objectcodes
123lpdf_epdf.encryptioncodes         = encryptioncodes
124
125local none_object_code       = objectcodes.none
126local null_object_code       = objectcodes.null
127local bool_object_code       = objectcodes.bool
128local integer_object_code    = objectcodes.integer
129local number_object_code     = objectcodes.number
130local name_object_code       = objectcodes.name
131local string_object_code     = objectcodes.string
132local array_object_code      = objectcodes.array
133local dictionary_object_code = objectcodes.dictionary
134local stream_object_code     = objectcodes.stream
135local reference_object_code  = objectcodes.reference
136local lpdf_object_code       = objectcodes.lpdf
137
138-- We need to convert the string from utf16 although there is no way to
139-- check if we have a regular string starting with a bom. So, we have
140-- na dilemma here: a pdf doc encoded string can be invalid utf.
141
142-- <hex encoded>   : implicit 0 appended if odd
143-- (byte encoded)  : \( \) \\ escaped
144--
145-- <FE><FF> : utf16be
146--
147-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
148--
149-- the getString function gives back bytes so we don't need to worry about
150-- the hex aspect.
151
152local some_dictionary
153local some_array
154local some_stream
155local some_reference
156
157local some_string = lpdf.frombytes
158
159function lpdf_epdf.objecttype(object)
160    if type(object) == "table" then
161        local kind = object.__type__
162        return kind and objectcodes[kind]
163    end
164end
165
166local function get_value(document,t,key)
167    if not key then
168        return
169    end
170    local value = t[key]
171    if not value then
172        return
173    end
174    if type(value) ~= "table" then
175        return value
176    end
177    -- we can assume names to be simple and strings to be tables
178    local kind = value[1]
179    if kind == name_object_code then
180        return value[2]
181    elseif kind == string_object_code then
182        return some_string(value[2],value[3])
183    elseif kind == array_object_code then
184        return some_array(value[2],document)
185    elseif kind == dictionary_object_code then
186        return some_dictionary(value[2],document)
187    elseif kind == stream_object_code then
188        return some_stream(value,value[2],document) -- needs checking
189    elseif kind == reference_object_code then
190        return some_reference(value,document)
191    end
192    return value
193end
194
195local checked_access
196local get_flagged     -- from pdfe -> lpdf
197
198if lpdf.dictionary then
199
200    -- these are used in mtx-pdf.lua
201
202    local pdfdictionary = lpdf.dictionary
203    local pdfarray      = lpdf.array
204    local pdfconstant   = lpdf.constant
205    local pdfreference  = lpdf.reference
206    local pdfliteral    = lpdf.literal
207
208    local copy_array, copy_dictionary
209
210    local function copyobject(object,key,value)
211        if not value then
212            value = object.__raw__[key]
213        end
214        local t = type(value)
215        if t == "string" then
216            return pdfconstant(value)
217        elseif t ~= "table" then
218            return value
219        end
220        local kind = value[1]
221        if kind == name_object_code then
222            return pdfconstant(value[2])
223        elseif kind == string_object_code then
224            return pdfliteral(value[2],value[3])
225        elseif kind == array_object_code then
226            return copyarray(object[key])
227        elseif kind == dictionary_object_code then
228            return copydictionary(object[key])
229        elseif kind == null_object_code then
230            return pdfnull()
231        elseif kind == reference_object_code then
232            return pdfreference(value[3])
233        else
234         -- report("weird: %s", objectcodes[kind] or "?")
235        end
236    end
237
238    copyarray = function(object)
239        local target = pdfarray()
240        local source = object.__raw__
241        for i=1,#source do
242            target[i] = copyobject(object,i,source[i])
243        end
244        return target
245    end
246
247    copydictionary = function(object)
248        local target = pdfdictionary()
249        local source = object.__raw__
250        for key, value in sortedhash(source) do
251            target[key] = copyobject(object,key,value)
252        end
253        return target
254    end
255
256    get_flagged = function(t,f)
257        local kind = t.__type__
258        if kind == name_object_code then
259            return pdfconstant(f)
260        elseif kind == array_object_code then
261            return copyarray(t)
262        elseif kind == dictionary_object_code then
263            return copydictionary(t)
264        elseif kind == stream_object_code then
265            return copydictionary(t)
266        elseif kind == string_object_code then
267            return pdfunicode(f)
268        elseif kind == null_object_code then
269            return pdfnull()
270        elseif kind == reference_object_code then
271            return pdfreference(t[3])
272        else
273            return f
274        end
275    end
276
277    function lpdf_epdf.verboseobject(document,n)
278        if document and n then
279            local object = document.objects[n]
280            if object then
281                local t = { n .. " 0 obj" }
282                if lpdf.epdf.objecttype(object) == "stream" then
283                    t[#t+1] = object("dictionary")()
284                    t[#t+1] = "stream"
285                    t[#t+1] = tostring(object(true))
286                    t[#t+1] = "endstream"
287                else
288                    t[#t+1] = tostring(object())
289                end
290                t[#t+1] = "endobj"
291                return concat(t,"\n")
292            end
293        end
294    end
295
296else
297
298    get_flagged = function(t,f)
299        return t[k] -- hm
300    end
301
302end
303
304some_dictionary = function(d,document)
305    local f = dictionarytotable(d,true)
306    local t = setmetatable({ __raw__ = f, __type__ = dictionary_object_code }, {
307        __index = function(t,k)
308            return get_value(document,f,k)
309        end,
310        __call = function(t)
311            return get_flagged(t,f)
312        end,
313    } )
314    return t, "dictionary"
315end
316
317some_array = function(a,document)
318    local f = arraytotable(a,true)
319    local n = #f
320    local t = setmetatable({ __raw__ = f, __type__ = array_object_code, n = n }, {
321        __index = function(t,k)
322            return get_value(document,f,k)
323        end,
324        __call = function(t)
325            return get_flagged(t,f)
326        end,
327        __len = function(t,k)
328            return n
329        end,
330    } )
331    return t, "array"
332end
333
334some_stream = function(s,d,document)
335    local f = dictionarytotable(d,true)
336    local t = setmetatable({ __raw__ = f, __type__ = stream_object_code }, {
337        __index = function(t,k)
338            return get_value(document,f,k)
339        end,
340        __call = function(t,how)
341            if how == "dictionary" then
342                return get_flagged(t,f)
343            elseif how == false then
344                return readwholestream(s,false) -- original
345            else
346                return readwholestream(s,true)  -- uncompressed
347            end
348        end,
349    } )
350    return t, "stream"
351end
352
353some_reference = function(r,document)
354    local objnum = r[3]
355    local cached = document.__cache__[objnum]
356    if not cached then
357        local kind, object, b, c = getfromreference(r[2])
358        if kind == dictionary_object_code then
359            cached = some_dictionary(object,document)
360        elseif kind == array_object_code then
361            cached = some_array(object,document)
362        elseif kind == stream_object_code then
363            -- optionally not cached
364            cached = some_stream(object,b,document)
365        elseif kind == string_object_code then
366            cached = some_string(object,document)
367        else
368            -- really cache this?
369            cached = { kind, object, b, c }
370        end
371        document.__cache__[objnum] = cached
372        document.__xrefs__[cached] = objnum
373    end
374-- local handles = document.handles
375-- if handles then
376--     local h = handles[objnum]
377--     if h then
378--      -- handles[objnum] = nil
379--         h(document,cached,objnum) -- todo: pass less
380--     end
381-- end
382    return cached
383end
384
385local function some_object(document,n)
386    local kind, object, b, c = getfromobject(document.__data__,n)
387    if kind == dictionary_object_code then
388        return some_dictionary(object,document)
389    elseif kind == array_object_code then
390        return some_array(object,document)
391    elseif kind == stream_object_code then
392        return some_stream(object,b,document)
393    else
394        -- really cache this?
395        return { kind, object, b, c }
396    end
397end
398
399local resolvers     = { }
400lpdf_epdf.resolvers = resolvers
401
402local function resolve(document,k)
403    local resolver = resolvers[k]
404    if resolver then
405        local entry = resolver(document)
406        document[k] = entry
407        return entry
408    end
409end
410
411local function getnames(document,n,target) -- direct
412    if n then
413        local Names = n.Names
414        if Names then
415            if not target then
416                target = { }
417            end
418            for i=1,#Names,2 do
419                target[Names[i]] = Names[i+1]
420            end
421        else
422            local Kids = n.Kids
423            if Kids then
424                for i=1,#Kids do
425                    target = getnames(document,Kids[i],target)
426                end
427            end
428        end
429        return target
430    end
431end
432
433local function getkids(document,n,target) -- direct
434    if n then
435        local Kids = n.Kids
436        if Kids then
437            for i=1,#Kids do
438                target = getkids(document,Kids[i],target)
439            end
440        elseif target then
441            target[#target+1] = n
442        else
443            target = { n }
444        end
445        return target
446    end
447end
448
449function resolvers.destinations(document)
450    local Names = document.Catalog.Names
451    return getnames(document,Names and Names.Dests)
452end
453
454function resolvers.javascripts(document)
455    local Names = document.Catalog.Names
456    return getnames(document,Names and Names.JavaScript)
457end
458
459function resolvers.widgets(document)
460    local Names = document.Catalog.AcroForm
461    return Names and Names.Fields
462end
463
464function resolvers.embeddedfiles(document)
465    local Names = document.Catalog.Names
466    return getnames(document,Names and Names.EmbeddedFiles)
467end
468
469-- /OCProperties <<
470--     /OCGs [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
471--     /D <<
472--         /Order [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
473--         /ON    [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
474--         /OFF   [ ]
475--     >>
476-- >>
477
478function resolvers.layers(document)
479    local properties = document.Catalog.OCProperties
480    if properties then
481        local layers = properties.OCGs
482        if layers then
483            local t = { }
484            for i=1,#layers do
485                local layer = layers[i]
486                t[i] = layer.Name
487            end
488         -- t.n = n
489            return t
490        end
491    end
492end
493
494function resolvers.structure(document)
495    -- this might become a tree
496    return document.Catalog.StructTreeRoot
497end
498
499function resolvers.pages(document)
500    local __data__  = document.__data__
501    local __xrefs__ = document.__xrefs__
502    local __cache__ = document.__cache__
503    --
504    local nofpages = document.nofpages
505    local pages    = { }
506    local rawpages = pagestotable(__data__)
507    document.pages = pages
508    --
509    for pagenumber=1,nofpages do
510        local rawpagedata   = rawpages[pagenumber]
511        if rawpagedata then
512            local pagereference = rawpagedata[3]
513            local pageobject    = rawpagedata[1]
514            local pagedata      = some_dictionary(pageobject,document)
515            if pagedata and pageobject then
516                pagedata.number   = pagenumber
517                pagedata.MediaBox = getbox(pageobject,"MediaBox")
518                pagedata.CropBox  = getbox(pageobject,"CropBox")
519                pagedata.BleedBox = getbox(pageobject,"BleedBox")
520                pagedata.ArtBox   = getbox(pageobject,"ArtBox")
521                pagedata.TrimBox  = getbox(pageobject,"TrimBox")
522                pages[pagenumber] = pagedata
523                __xrefs__[pagedata]      = pagereference
524                __cache__[pagereference] = pagedata
525            else
526                report_epdf("missing pagedata for page %i, case %i",pagenumber,1)
527            end
528        else
529            report_epdf("missing pagedata for page %i, case %i",pagenumber,2)
530        end
531    end
532    --
533 -- pages.n = nofpages
534    --
535    return pages
536end
537
538local loaded    = { }
539local nofloaded = 0
540
541function lpdf_epdf.load(filename,userpassword,ownerpassword,fromstring)
542    local document = loaded[filename]
543    if not document then
544        statistics.starttiming(lpdf_epdf)
545        local __data__
546        local __file__
547        if fromstring then
548            __data__ = pdfnew(filename,#filename)
549        else
550            local f = ioopen(filename,"rb")
551            __data__ = f and pdfopenfile(f)
552        end
553        if __data__ then
554            if userpassword and getstatus(__data__) < 0 then
555                unencrypt(__data__,userpassword,nil)
556            end
557            if ownerpassword and getstatus(__data__) < 0 then
558                unencrypt(__data__,nil,ownerpassword)
559            end
560            if getstatus(__data__) < 0 then
561                report_epdf("the document is encrypted, provide proper passwords")
562                __data__ = false
563            end
564            if __data__ then
565                local __cache__ = { }
566                local __xrefs__ = { }
567                document = {
568                    filename   = filename,
569                    nofcopied  = 0,
570                    copied     = { },
571                    __cache__  = __cache__,
572                    __xrefs__  = __xrefs__,
573                    __fonts__  = { },
574                    __copied__ = { },
575                    __data__   = __data__,
576                }
577                document.Catalog = some_dictionary(getcatalog(__data__),document)
578                document.Info    = some_dictionary(getinfo(__data__),document)
579                document.Trailer = some_dictionary(gettrailer(__data__),document)
580                --
581                setmetatableindex(document,resolve)
582                --
583                document.majorversion, document.minorversion = getversion(__data__)
584                --
585                document.nofpages = getnofpages(__data__)
586                -- we could also use cached but this proxy hides it
587                -- setmetatableindex(__cache__,function(t,objnum)
588                document.objects = setmetatableindex(function(t,objnum)
589                    local kind = type(objnum)
590                    if kind == "table" and objnum[1] == reference_object_code then
591                        objnum = objnum[3]
592                        kind   = type(objnum)
593                    end
594                    if kind == "number" then
595                        local cached = __cache__[objnum]
596                        if not cached then
597                            cached = some_object(document,objnum)
598                            __cache__[objnum] = cached
599                            __xrefs__[cached] = objnum
600                        end
601                        return cached
602                    end
603                end)
604            else
605                document = false
606            end
607        else
608            if not __data_ then
609                report_epdf("the document is damaged or empty")
610            end
611            document = false
612        end
613        loaded[filename] = document
614        loaded[document] = document
615        statistics.stoptiming(lpdf_epdf)
616     -- print(statistics.elapsedtime(lpdf_epdf))
617    end
618    if document then
619        nofloaded = nofloaded + 1
620    end
621    return document or nil
622end
623
624function lpdf_epdf.objectrange(filename,n)
625    local document = loaded[filename]
626    if document then
627        return getobjectrange(document.__data__,n)
628    end
629end
630
631function lpdf_epdf.unload(filename)
632    if type(filename) == "table" then
633        filename = filename.filename
634    end
635    if type(filename) == "string" then
636        local document = loaded[filename]
637        if document then
638            loaded[document] = nil
639            loaded[filename] = nil
640            pdfclose(document.__data__)
641        end
642    end
643end
644
645function lpdf.close(document)
646    if loaded[document] then
647        loaded[document] = nil
648        loaded[document.filename] = nil
649        pdfclose(document.__data__)
650    end
651end
652
653-- for k, v in expanded(t) do
654
655local function expanded(t)
656    local function iterator(raw,k)
657        local k, v = next(raw,k)
658        if v then
659            return k, t[k]
660        end
661    end
662    return iterator, t.__raw__, nil
663end
664
665---------.expand   = expand
666lpdf_epdf.expanded = expanded
667
668-- we could resolve the text stream in one pass if we directly handle the
669-- font but why should we complicate things
670
671-- local unescape = Cs((
672--     p_remap / remapper
673--   + P("\\")/"" * (lpegpatterns.octdigit * lpegpatterns.octdigit^-2) / remapper
674--   + P(1) / h_hex_2
675-- )^0)
676
677-- decstring  = Ct(Cc("dec")   * P("(")  * Cs((numchar+1-(P")"))^1) * P(")")), -- untested
678--     decstring  = Ct(Cc("dec")   * P("(")  * (Cs((P("\\")*P(1)+1-(P")"))^1)/lpdf.fromeight) * P(")")), -- untested
679--     decstring  = Ct(Cc("dec")   * P("(")  * (Cs((P("\\")*P(1)+1-(P")"))^1)) * P(")")), -- untested
680--     decstring  = Ct(Cc("dec")   * P("(")  * (C((P("\\)")+1-(P")"))^1)/lpdf.fromeight) * P(")")), -- untested
681--     decstring  = Ct(Cc("dec")   * P("(")  * (lpegpatterns.pdffromeight - P(")")) * P(")")), -- untested
682
683-- We normalize to hex because when strings move through lua we sometimes get escaping
684-- (ff) that I can't figure out.
685
686
687local h_hex_2 = lpdf.h_hex_2
688
689local remapper = {
690    ["\\("]  = h_hex_2["("],
691    ["\\)"]  = h_hex_2[")"],
692    ["\\n"]  = h_hex_2["\n"],
693    ["\\r"]  = h_hex_2["\r"],
694    ["\\t"]  = h_hex_2["\t"],
695    ["\\b"]  = h_hex_2["\b"],
696    ["\\f"]  = h_hex_2["\f"],
697    ["\\\n"] = "",
698    ["\\\r"] = "",
699    ["\\\\"] = h_hex_2["\\"],
700}
701
702local p_remap = lpeg.utfchartabletopattern(remapper)
703
704setmetatableindex(remapper,function(t,k)
705    local v = h_hex_2[char(octtointeger(k))]
706    t[k] = v
707    return v
708end)
709
710local p_hex_string = Ct(Cc("hex") *
711    P("<")
712  * Cs((1 - P(">"))^1)
713  * P(">")
714)
715
716local p_dec_string = Ct(Cc("hex") *
717    P("(")
718  * Cs(
719        (
720            p_remap / remapper
721          + P("\\")/"" * ((lpegpatterns.octdigit * lpegpatterns.octdigit^-2) / remapper)
722          + P(1) / h_hex_2
723          - P(")")
724        )^0
725    )
726  * P(")")
727)
728
729local spaces    = lpegpatterns.whitespace^1
730local optspaces = lpegpatterns.whitespace^0
731local comment   = P("%") * (1 - lpegpatterns.newline)^0
732
733local numchar   = P("\\")/"" * (R("09")^3/function(s) return char(octtointeger(s)) end)
734                + P("\\") * P(1)
735
736local key       = P("/") * C(R("AZ","az","09","__","--")^1)
737local number    = Ct(Cc("number") * (lpegpatterns.number/dectointeger))
738local keyword   = Ct(Cc("name") * key)
739local operator  = C((R("AZ","az")+P("*")+P("'")+P('"'))^1)
740
741local grammar = P { "start",
742    start      = (comment + keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces)^1,
743    keyvalue   = key * optspaces * V("start"),
744    dictionary = Ct(Cc("dict")  * P("<<") * Ct(V("keyvalue")^1) * P(">>")),
745    array      = Ct(Cc("array") * P("[")  * Ct(V("start")^1) * P("]")),
746    hexstring  = p_hex_string,
747    decstring  = p_dec_string,
748}
749
750local operation = Ct(grammar^1 * operator + operator)
751local parser    = Ct((operation + P(1))^1)
752
753local number = C(lpegpatterns.number)
754
755local fastgrammar = P { "start",
756    start      = (comment + keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces)^1,
757    keyvalue   = key * optspaces * V("start"),
758    dictionary = Ct(Cc("dict")  * P("<<") * Ct(V("keyvalue")^1) * P(">>")),
759    array      = Ct(Cc("array") * P("[")  * Ct(V("start")^1) * P("]")),
760    hexstring  = p_hex_string,
761    decstring  = p_dec_string,
762}
763
764local fastoperation = Ct(fastgrammar^1 * operator + operator)
765local fastparser    = Ct((fastoperation + P(1))^1)
766
767function lpdf_epdf.parsecontent(str,fast)
768    return lpegmatch(fast and fastparser or parser,str)
769end
770
771-- this will go as we can better run over the list --
772
773local numchar   = P("\\") * (R("09")^3 + P(1))
774local number    = lpegpatterns.number
775local keyword   = P("/") * R("AZ","az","09","__")^1
776local operator  = (R("AZ","az")+P("*")+P("'")+P('"'))^1
777
778local skipstart = P("BDC") + P("BMC") + P("DP") + P("MP")
779local skipstop  = P("EMC")
780local skipkeep  = P("/ActualText")
781
782local grammar   = P { "skip",
783    start      = keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces,
784    keyvalue   = optspaces * (keyword * optspaces * V("start") * optspaces)^1,
785    xeyvalue   = optspaces * ((keyword - skipkeep) * optspaces * V("start") * optspaces)^1,
786    array      = P("[")  * V("start")^0         * P("]"),
787    dictionary = P("<<") * V("keyvalue")^0      * P(">>"),
788    xictionary = P("<<") * V("xeyvalue")^0      * P(">>"),
789    hexstring  = P("<")  * (        1-P(">"))^0 * P(">"),
790    decstring  = P("(")  * (numchar+1-(P")"))^0 * P(")"),
791    skip       = (optspaces * ( keyword * optspaces * V("xictionary") * optspaces * skipstart + skipstop) / "")
792               + V("start")
793               + operator
794}
795
796local stripper = Cs((grammar + P(1))^1)
797
798function lpdf_epdf.stripcontent(str)
799    if find(str,"EMC") then
800        return lpegmatch(stripper,str)
801    else
802        return str
803    end
804end
805
806---------------------------------------------------------------------------
807
808-- beginbfrange : <start> <stop> <firstcode>
809--                <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
810-- beginbfchar  : <code> <newcodes>
811
812local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it
813
814local function f_bfchar(t,a,b)
815    t[hextointeger(a)] = fromsixteen(b)
816end
817
818local function f_bfrange_1(t,a,b,c)
819    print("todo 1",a,b,c)
820    -- c is string
821    -- todo t[hextointeger(a)] = fromsixteen(b)
822end
823
824local function f_bfrange_2(t,a,b,c)
825    print("todo 2",a,b,c)
826    -- c is table
827    -- todo t[hextointeger(a)] = fromsixteen(b)
828end
829
830local optionals   = spaces^0
831local hexstring   = optionals * P("<") * C((1-P(">"))^1) * P(">")
832local bfchar      = Carg(1) * hexstring * hexstring / f_bfchar
833local bfrange     = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
834                  + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
835local fromunicode = (
836    P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
837    P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
838    spaces +
839    P(1)
840)^1  * Carg(1)
841
842lpdf_epdf.helpers = { }
843
844function lpdf_epdf.helpers.tounicodetable(tounicode)
845    return tounicode and lpegmatch(fromunicode,tounicode,1,{})
846end
847
848---------------------------------------------------------------------------
849
850local function analyzefonts(document,resources) -- unfinished, see mtx-pdf for better code
851    local fonts = document.__fonts__
852    if resources then
853        local fontlist = resources.Font
854        if fontlist then
855            for id, data in expanded(fontlist) do
856                if not fonts[id] then
857                    --  a quick hack ... I will look into it more detail if I find a real
858                    -- -application for it
859                    local tounicode = data.ToUnicode()
860                    if tounicode then
861                        tounicode = lpegmatch(fromunicode,tounicode,1,{})
862                    end
863                    fonts[id] = {
864                        tounicode = type(tounicode) == "table" and tounicode or { }
865                    }
866                    setmetatableindex(fonts[id],"self")
867                end
868            end
869        end
870    end
871    return fonts
872end
873
874lpdf_epdf.analyzefonts = analyzefonts
875
876local more = 0
877local unic = nil -- cheaper than passing each time as Carg(1)
878
879local p_hex_to_utf = C(4) / function(s) -- needs checking !
880    local now = hextointeger(s)
881    if more > 0 then
882        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
883        more = 0
884        return unic[now] or utfchar(now)
885    elseif now >= 0xD800 and now <= 0xDBFF then
886        more = now
887     -- return ""
888    else
889        return unic[now] or utfchar(now)
890    end
891end
892
893local p_dec_to_utf = C(1) / function(s) -- needs checking ! not needed now that we always go hex
894    local now = byte(s)
895    return unic[now] or utfchar(now)
896end
897
898local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
899local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
900
901-- also xform?
902
903local function allcontent(content)
904    if type(content) == "table" then
905        local ctype = content.__type__
906        if ctype == stream_object_code then
907            content = content()
908        elseif ctype == array_object_code then
909            local c = { }
910            for i=1,#content do
911                c[i] = content[i]()
912            end
913            content = concat(c," ")
914        end
915    end
916    return content
917end
918
919lpdf_epdf.allcontent = allcontent
920
921function lpdf_epdf.getpagecontent(document,pagenumber,asis,fast)
922
923    local page = document.pages[pagenumber]
924
925    if not page then
926        return
927    end
928
929    local content = allcontent(page.Contents or "")
930    local list    = lpegmatch(fast and fastparser or parser,content)
931
932    if asis then
933        return list -- , fonts
934    end
935
936    local fonts = analyzefonts(document,page.Resources)
937    local font  = nil
938 -- local unic  = nil
939
940    for i=1,#list do
941        local entry    = list[i]
942        local size     = #entry
943        local operator = entry[size]
944        if operator == "Tf" then
945            font = fonts[entry[1][2]]
946            unic = font and font.tounicode or { }
947        elseif operator == "TJ" then
948            local data = entry[1] -- { "array", { ... } }
949            local list = data[2]  -- { { ... }, { ... } }
950         -- inspect(list)
951            for i=1,#list do
952                local li = list[i]
953--                 if type(li) == "table" then
954                    local kind = li[1]
955                    if kind == "hex" then
956                        list[i] = lpegmatch(p_hex_to_utf,li[2])
957                    elseif kind == "string" then
958                        list[i] = lpegmatch(p_dec_to_utf,li[2])
959                    else
960                        list[i] = li[2] -- kern
961                    end
962--                 else
963--                     -- kern
964--                 end
965            end
966        elseif operator == "Tj" or operator == "'" or operator == '"' then
967            -- { string,  Tj } { string, ' } { n, m, string, " }
968            local data = entry[size-1]
969            local list = data[2]
970            local kind = list[1]
971            if kind == "hex" then
972                list[2] = lpegmatch(p_hex_to_utf,li[2])
973            elseif kind == "string" then
974                list[2] = lpegmatch(p_dec_to_utf,li[2])
975            end
976        end
977    end
978
979    unic = nil -- can be collected
980
981    return list
982
983end
984
985-- This is also an experiment. When I really need it I can improve it, for instance
986-- with proper position calculating. It might be usefull for some search or so.
987
988local softhyphen = utfchar(0xAD) .. "$"
989local linefactor = 1.3
990
991function lpdf_epdf.contenttotext(document,list) -- maybe signal fonts
992    local last_y = 0
993    local last_f = 0
994    local text   = { }
995    local last   = 0
996
997    for i=1,#list do
998        local entry    = list[i]
999        local size     = #entry
1000        local operator = entry[size]
1001        if operator == "Tf" then
1002            last_f = entry[2][2] -- size
1003        elseif operator == "TJ" then
1004            local data = entry[1] -- { "array", { ... } }
1005            local list = data[2]  -- { { ... }, { ... } }
1006            for i=1,#list do
1007                local li = list[i]
1008                local kind = type(li)
1009                if kind == "string" then
1010                    last = last + 1
1011                    text[last] = li
1012                elseif kind == "number" and li < -50 then
1013                    last = last + 1
1014                    text[last] = " "
1015                end
1016            end
1017        elseif operator == "Tj" then
1018            last = last + 1
1019            local li = entry[size-1]
1020            local kind = type(li)
1021            if kind == "string" then
1022                last = last + 1
1023                text[last] = li
1024            end
1025        elseif operator == "cm" or operator == "Tm" then
1026            local data = entry
1027            local ty = entry[6][2]
1028            local dy = abs(last_y - ty)
1029            if dy > linefactor*last_f then
1030                if last > 0 then
1031                    if find(text[last],softhyphen,1,true) then
1032                        -- ignore
1033                    else
1034                        last = last + 1
1035                        text[last] = "\n"
1036                    end
1037                end
1038            end
1039            last_y = ty
1040        end
1041    end
1042
1043    return concat(text)
1044end
1045
1046function lpdf_epdf.contenttostring(contents)
1047    local r       = 0
1048    local result  = { }
1049    local compact = false
1050    local rr      = false
1051
1052    local flatten ; flatten = function(t)
1053        local nt = #t
1054        compact = t[nt] == "TJ"
1055        for i=1,nt do
1056            local ti = t[i]
1057            if type(ti) == "table" then
1058                local t1 = ti[1]
1059                local t2 = ti[2]
1060                if t1 == "array" then
1061                    if compact then
1062                        local sr, sresult = r, result
1063                        r, result = 1, { "[" }
1064                        flatten(t2)
1065                        r = r + 1 ; result[r] = "]"
1066                        sr = sr + 1; sresult[sr] = concat(result)
1067                        r, result = sr, sresult
1068                    else
1069                        r = r + 1 ; result[r] = "["
1070                        flatten(t2)
1071                        r = r + 1 ; result[r] = "]"
1072                    end
1073                elseif t1 == "dict" then
1074                    r = r + 1 ; result[r] = "<<"
1075                    flatten(t2)
1076                    r = r + 1 ; result[r] = ">>"
1077                elseif t1 == "hex" then
1078                    r = r + 1 ; result[r] = "<" .. t2 .. ">"
1079                elseif t1 == "dec" then -- can't happen anymore
1080                 -- r = r + 1 ; result[r] = "(" .. t2 .. ")"
1081                    r = r + 1 ; result[r] = lpdf.toeight(t2)
1082                elseif type(t2) == "number" then
1083                    r = r + 1 ; result[r] = t2 -- todo formatter %N
1084                else -- name
1085                    r = r + 1 ; result[r] = "/" .. t2
1086                end
1087            else
1088                r = r + 1 ; result[r] = ti
1089            end
1090        end
1091    end
1092
1093    for i=1,#contents do
1094        flatten(contents[i])
1095    end
1096    return concat(result," ")
1097end
1098
1099function lpdf_epdf.getstructure(document,list) -- just a test
1100    local depth = 0
1101    for i=1,#list do
1102        local entry    = list[i]
1103        local size     = #entry
1104        local operator = entry[size]
1105        if operator == "BDC" then
1106            report_epdf("%w%s : %s",depth,entry[1] or "?",entry[2] and entry[2].MCID or "?")
1107            depth = depth + 1
1108        elseif operator == "EMC" then
1109            depth = depth - 1
1110        elseif operator == "TJ" then
1111            local list = entry[1]
1112            for i=1,#list do
1113                local li = list[i]
1114                if type(li) == "string" then
1115                    report_epdf("%w > %s",depth,li)
1116                elseif li < -50 then
1117                    report_epdf("%w >",depth,li)
1118                end
1119            end
1120        elseif operator == "Tj" then
1121            report_epdf("%w > %s",depth,entry[size-1])
1122        end
1123    end
1124end
1125
1126if images then do
1127
1128    -- This can be made a bit faster (just get raw data and pass it) but I will
1129    -- do that later. In the end the benefit is probably neglectable.
1130
1131    local recompress           = false
1132    local stripmarked          = false
1133
1134    local copydictionary       = nil
1135    local copyarray            = nil
1136
1137    local pdfreference         = lpdf.reference
1138    local pdfconstant          = lpdf.constant
1139    local pdfarray             = lpdf.array
1140    local pdfdictionary        = lpdf.dictionary
1141    local pdfnull              = lpdf.null
1142    local pdfliteral           = lpdf.literal
1143
1144    local pdfreserveobject     = lpdf.reserveobject
1145    local shareobjectreference = lpdf.shareobjectreference
1146    local pdfflushobject       = lpdf.flushobject
1147    local pdfflushstreamobject = lpdf.flushstreamobject
1148
1149    local report               = logs.reporter("backend","xobjects")
1150
1151    local factor               = 65536 / (7200/7227) -- 1/number.dimenfactors.bp
1152
1153    local createimage          = images.create
1154
1155    directives.register("graphics.pdf.recompress",  function(v) recompress  = v end)
1156    directives.register("graphics.pdf.stripmarked", function(v) stripmarked = v end)
1157
1158    local function scaledbbox(b)
1159        return { b[1]*factor, b[2]*factor, b[3]*factor, b[4]*factor }
1160    end
1161
1162    local codecs = {
1163        ASCIIHexDecode  = true,
1164        ASCII85Decode   = true,
1165        RunLengthDecode = true,
1166        FlateDecode     = true,
1167        LZWDecode       = true,
1168    }
1169
1170    local function deepcopyobject(xref,copied,value)
1171        -- no need for tables, just nested loop with obj
1172        local objnum = xref[value]
1173        if objnum then
1174            local usednum = copied[objnum]
1175            if usednum then
1176             -- report("%s object %i is reused",kind,objnum)
1177            else
1178                usednum = pdfreserveobject()
1179                copied[objnum] = usednum
1180                local entry = value
1181                local kind  = entry.__type__
1182                if kind == array_object_code then
1183                    local a = copyarray(xref,copied,entry)
1184                    pdfflushobject(usednum,tostring(a))
1185                elseif kind == dictionary_object_code then
1186                    local d = copydictionary(xref,copied,entry)
1187                    pdfflushobject(usednum,tostring(d))
1188                elseif kind == stream_object_code then
1189                    local d = copydictionary(xref,copied,entry)
1190                    local filter = d.Filter
1191                    if filter and codecs[filter] and recompress then
1192                        -- recompress
1193                        d.Filter      = nil
1194                        d.Length      = nil
1195                        d.DecodeParms = nil -- relates to filter
1196                        d.DL          = nil -- needed?
1197                        local s = entry()                        -- get uncompressed stream
1198                        pdfflushstreamobject(s,d,true,usednum)   -- compress stream
1199                    else
1200                        -- keep as-is, even Length which indicates the
1201                        -- decompressed length
1202                        local s = entry(false)                        -- get compressed stream
1203                     -- pdfflushstreamobject(s,d,false,usednum,true)  -- don't compress stream
1204                        pdfflushstreamobject(s,d,"raw",usednum)       -- don't compress stream
1205                    end
1206                else
1207                    local t = type(value)
1208                    if t == "string" then
1209                        value = pdfconstant(value)
1210                    elseif t == "table" then
1211                        local kind  = value[1]
1212                        local entry = value[2]
1213                        if kind == name_object_code then
1214                            value = pdfconstant(entry)
1215                        elseif kind == string_object_code then
1216                            value = pdfliteral(entry,value[3])
1217                        elseif kind == null_object_code then
1218                            value = pdfnull()
1219                        elseif kind == reference_object_code then
1220                            value = deepcopyobject(xref,copied,entry)
1221                        elseif entry == nil then
1222                            value = pdfnull()
1223                        else
1224                            value = tostring(entry)
1225                        end
1226                    end
1227                    pdfflushobject(usednum,value)
1228                end
1229            end
1230            return pdfreference(usednum)
1231        elseif kind == stream_object_code then
1232            report("stream not done: %s", objectcodes[kind] or "?")
1233        else
1234            report("object not done: %s", objectcodes[kind] or "?")
1235        end
1236    end
1237
1238    local function copyobject(xref,copied,object,key,value)
1239        if not value then
1240            value = object.__raw__[key]
1241        end
1242        local t = type(value)
1243        if t == "string" then
1244            return pdfconstant(value)
1245        elseif t ~= "table" then
1246            return value
1247        end
1248        local kind = value[1]
1249        if kind == name_object_code then
1250            return pdfconstant(value[2])
1251        elseif kind == string_object_code then
1252            return pdfliteral(value[2],value[3])
1253        elseif kind == array_object_code then
1254            return copyarray(xref,copied,object[key])
1255        elseif kind == dictionary_object_code then
1256            return copydictionary(xref,copied,object[key]) -- value
1257        elseif kind == null_object_code then
1258            return pdfnull()
1259        elseif kind == reference_object_code then
1260            return deepcopyobject(xref,copied,object[key])
1261        elseif kind == lpdf_object_code then
1262            return value[2]
1263        else
1264         -- report("weird: %s", objectcodes[kind] or "?")
1265        end
1266    end
1267
1268    copyarray = function(xref,copied,object)
1269        local target = pdfarray()
1270        local source = object.__raw__
1271        for i=1,#source do
1272            target[i] = copyobject(xref,copied,object,i,source[i])
1273        end
1274        return target
1275    end
1276
1277--     local plugins = nil
1278
1279    -- Sorting the hash slows down upto 5% bit but it is still as fast as the C
1280    -- code. We could loop over the index instead but sorting might be nicer in
1281    -- the end.
1282
1283    -- This is fragile because keys can be the same in different objects.
1284
1285    copydictionary = function (xref,copied,object)
1286        local target = pdfdictionary()
1287        local source = object.__raw__
1288        -- hm .. no need to sort here as we create a hash
1289        for key, value in next, source do
1290     -- for key, value in sortedhash(source) do
1291--             if plugins then
1292--                 local p = plugins[key]
1293--                 if p then
1294--                     target[key] = p(xref,copied,object,key,value,copyobject) -- maybe a table of methods
1295--                 else
1296--                     target[key] = copyobject(xref,copied,object,key,value)
1297--                 end
1298--             else
1299                target[key] = copyobject(xref,copied,object,key,value)
1300--             end
1301        end
1302        return target
1303    end
1304
1305    local openpdf  = lpdf_epdf.load
1306    local closepdf = lpdf_epdf.unload
1307
1308    -- todo: keep track of already open files
1309
1310    local function newpdf(str,userpassword,ownerpassword)
1311        return openpdf(str,userpassword,ownerpassword,true)
1312    end
1313
1314    local sizes = {
1315        crop  = "CropBox",
1316        media = "MediaBox",
1317        bleed = "BleedBox",
1318        art   = "ArtBox",
1319        trim  = "TrimBox",
1320    }
1321
1322    local function querypdf(pdfdoc,pagenumber,size)
1323        if pdfdoc then
1324            if not pagenumber then
1325                pagenumber = 1
1326            end
1327            local root = pdfdoc.Catalog
1328            local page = pdfdoc.pages[pagenumber]
1329            if page then
1330                local sizetag  = sizes[size or "crop"] or sizes.crop
1331                local mediabox = page.MediaBox or { 0, 0, 0, 0 }
1332                local cropbox  = page[sizetag] or mediabox
1333                return {
1334                    filename    = pdfdoc.filename,
1335                    pagenumber  = pagenumber,
1336                    nofpages    = pdfdoc.nofpages,
1337                    boundingbox = scaledbbox(cropbox),
1338                    cropbox     = cropbox,
1339                    mediabox    = mediabox,
1340                    bleedbox    = page.BleedBox or cropbox,
1341                    trimbox     = page.TrimBox or cropbox,
1342                    artbox      = page.ArtBox or cropbox,
1343                    rotation    = page.Rotate or 0,
1344                    xsize       = cropbox[3] - cropbox[1],
1345                    ysize       = cropbox[4] - cropbox[2],
1346                }
1347            end
1348        end
1349    end
1350
1351    local function copyresources(pdfdoc,xref,copied,Resources)
1352        if Resources then
1353            local d = copydictionary(xref,copied,Resources)
1354            return shareobjectreference(d)
1355        end
1356    end
1357
1358    local variables = interfaces.variables
1359
1360    local function copypage(pdfdoc,pagenumber,attributes,compact,width,height,attr,copymeta)
1361        if pdfdoc then
1362            local root     = pdfdoc.Catalog
1363            local page     = pdfdoc.pages[pagenumber or 1]
1364            local pageinfo = querypdf(pdfdoc,pagenumber)
1365            local contents = page.Contents
1366            if contents then
1367                local xref      = pdfdoc.__xrefs__
1368                local copied    = pdfdoc.__copied__
1369                local resources = page.Resources
1370                if compact and resources and lpdf_epdf.pageplugin then
1371                    lpdf_epdf.pageplugin(pdfdoc,page,pagenumber,resources,compact)
1372                    contents = page.Contents
1373                end
1374                local metadata = nil
1375                -- page     : only page (default, compatibility)
1376                -- document : only document
1377                -- yes      : page or document
1378                if copymeta == variables.page or copymeta == variables.yes then
1379                    -- We seldom have metadata with a page.
1380                    metadata = copyobject(xref,copied,page,"Metadata")
1381                end
1382                if not metadata and (copymeta == variables.document or copymeta == variables.yes) then
1383                    -- For our own documents we don't need this.
1384                    metadata = copyobject(xref,copied,root,"Metadata")
1385                end
1386                local xobject = pdfdictionary {
1387                    Type           = pdfconstant("XObject"),
1388                    Subtype        = pdfconstant("Form"),
1389                    FormType       = 1,
1390                    Group          = copyobject(xref,copied,page,"Group"),
1391                    LastModified   = copyobject(xref,copied,page,"LastModified"),
1392                 -- Metadata       = copyobject(xref,copied,page,"Metadata"),
1393                    Metadata       = metadata,
1394                    PieceInfo      = copyobject(xref,copied,page,"PieceInfo"),
1395                    Resources      = copyresources(pdfdoc,xref,copied,resources),
1396                 -- Resources      = copyobject(xref,copied,page,"Resources"),
1397                    SeparationInfo = copyobject(xref,copied,page,"SeparationInfo"),
1398                } + attr
1399                if attributes then
1400                    for k, v in expanded(attributes) do
1401                        page[k] = v -- maybe nested
1402                    end
1403                end
1404                local content  = ""
1405                local nolength = nil
1406                if type(contents) == "string" then
1407                    content = contents -- can be result of plugin
1408                else
1409                    local ctype = contents.__type__
1410                    -- we always recompress because image object streams can not be
1411                    -- influenced (yet)
1412                    if ctype == stream_object_code then
1413                        if stripmarked then
1414                            content = contents() -- uncompressed
1415                            local stripped = lpdf_epdf.stripcontent(content)
1416                            if stripped ~= content then
1417                             -- report("%i bytes stripped on page %i",#content-#stripped,pagenumber or 1)
1418                                content = stripped
1419                            end
1420                        elseif recompress then
1421                            content = contents() -- uncompressed
1422                        else
1423                            local Filter = copyobject(xref,copied,contents,"Filter")
1424                            local Length = copyobject(xref,copied,contents,"Length")
1425                            if Length and Filter then
1426                                nolength = true
1427                                xobject.Length = Length
1428                                xobject.Filter = Filter
1429                                content = contents(false) -- uncompressed
1430                            else
1431                                content = contents() -- uncompressed
1432                            end
1433                        end
1434                    elseif ctype == array_object_code then
1435                        content = { }
1436                        for i=1,#contents do
1437                            content[i] = contents[i]() -- uncompressed
1438                        end
1439                        content = concat(content," ")
1440                    end
1441                end
1442                -- still not nice: we double wrap now
1443--                 plugins = nil
1444                local rotation    = pageinfo.rotation
1445                local boundingbox = pageinfo.boundingbox
1446                local transform   = nil
1447                if rotation == 90 then
1448                    transform = 3
1449                elseif rotation == 180 then
1450                    transform = 2
1451                elseif rotation == 270 then
1452                    transform = 1
1453                elseif rotation > 1 and rotation < 4 then
1454                    transform = rotation
1455                end
1456                xobject.BBox = pdfarray {
1457                    boundingbox[1] * bpfactor,
1458                    boundingbox[2] * bpfactor,
1459                    boundingbox[3] * bpfactor,
1460                    boundingbox[4] * bpfactor,
1461                }
1462                -- maybe like bitmaps
1463-- TESTTEST = content
1464                    return createimage { -- beware: can be a img.new or a dummy
1465                    bbox      = boundingbox,
1466                    transform = transform,
1467                    nolength  = nolength,
1468                    nobbox    = true,
1469                    notype    = true,
1470                    stream    = content, -- todo: no compress, pass directly also length, filter etc
1471                    attr      = xobject(),
1472                    kind      = images.types.stream,
1473                }
1474            else
1475                -- maybe report an error
1476            end
1477        end
1478    end
1479
1480    lpdf_epdf.image = {
1481        open   = openpdf,
1482        close  = closepdf,
1483        new    = newpdf,
1484        query  = querypdf,
1485        copy   = copypage,
1486    }
1487
1488--     lpdf.injectors.pdf = function(specification)
1489--         local d = lpdf_epdf.load(specification.filename)
1490--         print(d)
1491--     end
1492
1493
1494end end
1495
1496function lpdf_epdf.producer(pdfdoc)
1497    local producer = false
1498    if pdfdoc then
1499        local info = pdfdoc.Info
1500        if info then
1501            producer = info.Producer
1502        end
1503        if not producer then
1504            local metadata = pdfdoc.Catalog.Metadata
1505            if metadata then
1506                local x = xml.convert(metadata())
1507                if x then
1508                    producer = xml.text(x,"rdf:Description/pdf:Producer")
1509                    if not producer or producer == "" then
1510                        producer = xml.text(x,"Producer")
1511                    end
1512                end
1513            end
1514        end
1515    end
1516    return producer or ""
1517end
1518
1519function lpdf_epdf.expandwidths(widths,expanded)
1520    if not expanded then
1521        expanded = { }
1522    end
1523    local min = false
1524    local max = false
1525    local i = 1
1526    local n = #widths -- - 1
1527    while i < n do
1528        local w1 = widths[i] ; i = i + 1
1529        local w2 = widths[i] ; i = i + 1
1530        if type(w2) == "table" then
1531            local k = 1
1532            local wn = w1 + #w2 - 1
1533            for j=w1,wn do
1534                expanded[j] = w2[k]
1535                k = k + 1
1536            end
1537            if not min then
1538                min = w1
1539                max = wn
1540            elseif wn > max then
1541                max = wn
1542            end
1543        else
1544            local w3 = widths[i] ; i = i + 1
1545            if w3 then
1546                for j=w1,w2 do
1547                    expanded[j] = w3
1548                end
1549                if not min then
1550                    min = w1
1551                    max = w2
1552                elseif w2 > max then
1553                    max = w2
1554                end
1555            end
1556        end
1557    end
1558    return expanded, min or 0, max or 0
1559end
1560
1561function lpdf_epdf.mergewidths(widths,expanded)
1562    if not expanded then
1563        expanded = { }
1564    end
1565    local min = 1
1566    local max = #widths
1567    for i=1,#widths do
1568        expanded[i] = widths[i]
1569    end
1570    return expanded, min, max
1571end
1572
1573-- local d = lpdf_epdf.load("e:/tmp/oeps.pdf")
1574-- inspect(d)
1575-- inspect(d.Catalog.Lang)
1576-- inspect(d.Catalog.OCProperties.D.AS[1].Event)
1577-- inspect(d.Catalog.Metadata())
1578-- inspect(d.Catalog.Pages.Kids[1])
1579-- inspect(d.layers)
1580-- inspect(d.pages)
1581-- inspect(d.destinations)
1582-- inspect(lpdf_epdf.getpagecontent(d,1))
1583-- inspect(lpdf_epdf.contenttotext(document,lpdf_epdf.getpagecontent(d,1)))
1584-- inspect(lpdf_epdf.getstructure(document,lpdf_epdf.getpagecontent(d,1)))
1585