1if not modules then modules = { } end modules ['lpdf-epd'] = {
2 version = 1.001,
3 comment = "companion to lpdf-epa.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7 history = "this one replaces the poppler/pdfe binding",
8}
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44local setmetatable, type, next = setmetatable, type, next
45local tostring, tonumber, unpack = tostring, tonumber, unpack
46local char, byte, find = string.char, string.byte, string.find
47local abs = math.abs
48local concat, swapped, sortedhash, sortedkeys = table.concat, table.swapped, table.sortedhash, table.sortedkeys
49local utfchar = string.char
50local setmetatableindex = table.setmetatableindex
51local ioopen = io.open
52
53local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
54local P, C, S, R, Ct, Cc, V, Carg, Cs, Cf, Cg = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs, lpeg.Cf, lpeg.Cg
55
56if not lpdf then
57 require("lpdf-aux")
58end
59
60if not (number and number.dimenfactors) then
61 require("util-dim")
62end
63
64local pdfe = pdfe
65 lpdf = lpdf or { }
66local lpdf = lpdf
67local lpdf_epdf = { }
68 lpdf.epdf = lpdf_epdf
69
70local pdfopen = pdfe.open
71local pdfopenfile = pdfe.openfile
72local pdfnew = pdfe.new
73local pdfclose = pdfe.close
74
75local getcatalog = pdfe.getcatalog
76local getinfo = pdfe.getinfo
77local gettrailer = pdfe.gettrailer
78local getnofpages = pdfe.getnofpages
79local getversion = pdfe.getversion
80local getbox = pdfe.getbox
81local getstatus = pdfe.getstatus
82local unencrypt = pdfe.unencrypt
83
84local dictionarytotable = pdfe.dictionarytotable
85local arraytotable = pdfe.arraytotable
86local pagestotable = pdfe.pagestotable
87local readwholestream = pdfe.readwholestream
88
89local getfromreference = pdfe.getfromreference
90
91local report_epdf = logs.reporter("epdf")
92
93local allocate = utilities.storage.allocate
94
95local bpfactor = number.dimenfactors.bp
96
97local objectcodes = { [0] =
98 "none",
99 "null",
100 "bool",
101 "integer",
102 "number",
103 "name",
104 "string",
105 "array",
106 "dictionary",
107 "stream",
108 "reference",
109}
110
111local encryptioncodes = {
112 [0] = "notencrypted",
113 [1] = "unencrypted",
114 [-1] = "protected",
115 [-2] = "failure",
116}
117
118objectcodes = allocate(swapped(objectcodes,objectcodes))
119encryptioncodes = allocate(swapped(encryptioncodes,encryptioncodes))
120
121pdfe.objectcodes = objectcodes
122pdfe.encryptioncodes = encryptioncodes
123
124local null_object_code = objectcodes.null
125local reference_object_code = objectcodes.reference
126
127local none_object_code = objectcodes.none
128local null_object_code = objectcodes.null
129local bool_object_code = objectcodes.bool
130local integer_object_code = objectcodes.integer
131local number_object_code = objectcodes.number
132local name_object_code = objectcodes.name
133local string_object_code = objectcodes.string
134local array_object_code = objectcodes.array
135local dictionary_object_code = objectcodes.dictionary
136local stream_object_code = objectcodes.stream
137local reference_object_code = objectcodes.reference
138
139local checked_access
140local get_flagged
141
142if lpdf.dictionary then
143
144
145
146 local pdfdictionary = lpdf.dictionary
147 local pdfarray = lpdf.array
148 local pdfconstant = lpdf.constant
149 local pdfstring = lpdf.string
150 local pdfunicode = lpdf.unicode
151
152 get_flagged = function(t,f,k)
153 local tk = t[k]
154 local fk = f[k]
155 if not fk then
156 return tk
157 elseif fk == "name" then
158 return pdfconstant(tk)
159 elseif fk == "array" then
160 return pdfarray(tk)
161 elseif fk == "dictionary" then
162 return pdfarray(tk)
163 elseif fk == "rawtext" then
164 return pdfstring(tk)
165 elseif fk == "unicode" then
166 return pdfunicode(tk)
167 else
168 return tk
169 end
170 end
171
172else
173
174 get_flagged = function(t,f,k)
175 return t[k]
176 end
177
178end
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194local some_dictionary
195local some_array
196local some_stream
197local some_reference
198
199local some_string = lpdf.frombytes
200
201local function get_value(document,t,key)
202 if not key then
203 return
204 end
205 local value = t[key]
206 if not value then
207 return
208 end
209 if type(value) ~= "table" then
210 return value
211 end
212
213 local kind = value[1]
214 if kind == name_object_code then
215 return value[2]
216 elseif kind == string_object_code then
217 return some_string(value[2],value[3])
218 elseif kind == array_object_code then
219 return some_array(value[2],document)
220 elseif kind == dictionary_object_code then
221 return some_dictionary(value[2],document)
222 elseif kind == stream_object_code then
223 return some_stream(value,document)
224 elseif kind == reference_object_code then
225 return some_reference(value,document)
226 end
227 return value
228end
229
230some_dictionary = function (d,document)
231 local f = dictionarytotable(d,true)
232 local t = setmetatable({ __raw__ = f, __type__ = dictionary_object_code }, {
233 __index = function(t,k)
234 return get_value(document,f,k)
235 end,
236 __call = function(t,k)
237 return get_flagged(t,f,k)
238 end,
239 } )
240 return t, "dictionary"
241end
242
243some_array = function (a,document)
244 local f = arraytotable(a,true)
245 local n = #f
246 local t = setmetatable({ __raw__ = f, __type__ = array_object_code, n = n }, {
247 __index = function(t,k)
248 return get_value(document,f,k)
249 end,
250 __call = function(t,k)
251 return get_flagged(t,f,k)
252 end,
253 __len = function(t,k)
254 return n
255 end,
256 } )
257 return t, "array"
258end
259
260some_stream = function(s,d,document)
261 local f = dictionarytotable(d,true)
262 local t = setmetatable({ __raw__ = f, __type__ = stream_object_code }, {
263 __index = function(t,k)
264 return get_value(document,f,k)
265 end,
266 __call = function(t,raw)
267 if raw == false then
268 return readwholestream(s,false)
269 else
270 return readwholestream(s,true)
271 end
272 end,
273 } )
274 return t, "stream"
275end
276
277some_reference = function(r,document)
278 local objnum = r[3]
279 local cached = document.__cache__[objnum]
280 if not cached then
281 local kind, object, b, c = getfromreference(r[2])
282 if kind == dictionary_object_code then
283 cached = some_dictionary(object,document)
284 elseif kind == array_object_code then
285 cached = some_array(object,document)
286 elseif kind == stream_object_code then
287 cached = some_stream(object,b,document)
288 else
289 cached = { kind, object, b, c }
290
291 end
292 document.__cache__[objnum] = cached
293 document.__xrefs__[cached] = objnum
294 end
295 return cached
296end
297
298local resolvers = { }
299lpdf_epdf.resolvers = resolvers
300
301local function resolve(document,k)
302 local resolver = resolvers[k]
303 if resolver then
304 local entry = resolver(document)
305 document[k] = entry
306 return entry
307 end
308end
309
310local function getnames(document,n,target)
311 if n then
312 local Names = n.Names
313 if Names then
314 if not target then
315 target = { }
316 end
317 for i=1,#Names,2 do
318 target[Names[i]] = Names[i+1]
319 end
320 else
321 local Kids = n.Kids
322 if Kids then
323 for i=1,#Kids do
324 target = getnames(document,Kids[i],target)
325 end
326 end
327 end
328 return target
329 end
330end
331
332local function getkids(document,n,target)
333 if n then
334 local Kids = n.Kids
335 if Kids then
336 for i=1,#Kids do
337 target = getkids(document,Kids[i],target)
338 end
339 elseif target then
340 target[#target+1] = n
341 else
342 target = { n }
343 end
344 return target
345 end
346end
347
348function resolvers.destinations(document)
349 local Names = document.Catalog.Names
350 return getnames(document,Names and Names.Dests)
351end
352
353function resolvers.javascripts(document)
354 local Names = document.Catalog.Names
355 return getnames(document,Names and Names.JavaScript)
356end
357
358function resolvers.widgets(document)
359 local Names = document.Catalog.AcroForm
360 return Names and Names.Fields
361end
362
363function resolvers.embeddedfiles(document)
364 local Names = document.Catalog.Names
365 return getnames(document,Names and Names.EmbeddedFiles)
366end
367
368
369
370
371
372
373
374
375
376
377function resolvers.layers(document)
378 local properties = document.Catalog.OCProperties
379 if properties then
380 local layers = properties.OCGs
381 if layers then
382 local t = { }
383 for i=1,#layers do
384 local layer = layers[i]
385 t[i] = layer.Name
386 end
387
388 return t
389 end
390 end
391end
392
393function resolvers.structure(document)
394
395 return document.Catalog.StructTreeRoot
396end
397
398function resolvers.pages(document)
399 local __data__ = document.__data__
400 local __xrefs__ = document.__xrefs__
401 local __cache__ = document.__cache__
402
403 local nofpages = document.nofpages
404 local pages = { }
405 local rawpages = pagestotable(__data__)
406 document.pages = pages
407
408 for pagenumber=1,nofpages do
409 local rawpagedata = rawpages[pagenumber]
410 if rawpagedata then
411 local pagereference = rawpagedata[3]
412 local pageobject = rawpagedata[1]
413 local pagedata = some_dictionary(pageobject,document)
414 if pagedata and pageobject then
415 pagedata.number = pagenumber
416 pagedata.MediaBox = getbox(pageobject,"MediaBox")
417 pagedata.CropBox = getbox(pageobject,"CropBox")
418 pagedata.BleedBox = getbox(pageobject,"BleedBox")
419 pagedata.ArtBox = getbox(pageobject,"ArtBox")
420 pagedata.TrimBox = getbox(pageobject,"TrimBox")
421 pages[pagenumber] = pagedata
422 __xrefs__[pagedata] = pagereference
423 __cache__[pagereference] = pagedata
424 else
425 report_epdf("missing pagedata for page %i, case %i",pagenumber,1)
426 end
427 else
428 report_epdf("missing pagedata for page %i, case %i",pagenumber,2)
429 end
430 end
431
432
433
434 return pages
435end
436
437local loaded = { }
438local nofloaded = 0
439
440function lpdf_epdf.load(filename,userpassword,ownerpassword,fromstring)
441 local document = loaded[filename]
442 if not document then
443 statistics.starttiming(lpdf_epdf)
444 local __data__
445 local __file__
446 if fromstring then
447 __data__ = pdfnew(filename,#filename)
448 elseif pdfopenfile then
449 __data__ = pdfopenfile(ioopen(filename,"rb"))
450 else
451 __data__ = pdfopen(filename)
452 end
453 if __data__ then
454 if userpassword and getstatus(__data__) < 0 then
455 unencrypt(__data__,userpassword,nil)
456 end
457 if ownerpassword and getstatus(__data__) < 0 then
458 unencrypt(__data__,nil,ownerpassword)
459 end
460 if getstatus(__data__) < 0 then
461 report_epdf("the document is encrypted, provide proper passwords",getstatus(__data__))
462 __data__ = false
463 end
464 if __data__ then
465 document = {
466 filename = filename,
467 nofcopied = 0,
468 copied = { },
469 __cache__ = { },
470 __xrefs__ = { },
471 __fonts__ = { },
472 __copied__ = { },
473 __data__ = __data__,
474 }
475 document.Catalog = some_dictionary(getcatalog(__data__),document)
476 document.Info = some_dictionary(getinfo(__data__),document)
477 document.Trailer = some_dictionary(gettrailer(__data__),document)
478
479 setmetatableindex(document,resolve)
480
481 document.majorversion, document.minorversion = getversion(__data__)
482
483 document.nofpages = getnofpages(__data__)
484 else
485 document = false
486 end
487 else
488 document = false
489 end
490 loaded[filename] = document
491 loaded[document] = document
492 statistics.stoptiming(lpdf_epdf)
493
494 end
495 if document then
496 nofloaded = nofloaded + 1
497 end
498 return document or nil
499end
500
501function lpdf_epdf.unload(filename)
502 if type(filename) == "table" then
503 filename = filename.filename
504 end
505 if type(filename) == "string" then
506 local document = loaded[filename]
507 if document then
508 loaded[document] = nil
509 loaded[filename] = nil
510 pdfclose(document.__data__)
511 end
512 end
513end
514
515
516
517local function expanded(t)
518 local function iterator(raw,k)
519 local k, v = next(raw,k)
520 if v then
521 return k, t[k]
522 end
523 end
524 return iterator, t.__raw__, nil
525end
526
527
528lpdf_epdf.expanded = expanded
529
530
531
532
533local spaces = lpegpatterns.whitespace^1
534local optspaces = lpegpatterns.whitespace^0
535local comment = P("%") * (1 - lpegpatterns.newline)^0
536local numchar = P("\\")/"" * (R("09")^3/function(s) return char(tonumber(s,8)) end)
537 + P("\\") * P(1)
538local key = P("/") * C(R("AZ","az","09","__")^1)
539local number = Ct(Cc("number") * (lpegpatterns.number/tonumber))
540local keyword = Ct(Cc("name") * key)
541local operator = C((R("AZ","az")+P("*")+P("'")+P('"'))^1)
542
543local grammar = P { "start",
544 start = (comment + keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces)^1,
545 keyvalue = key * optspaces * V("start"),
546 array = Ct(Cc("array") * P("[") * Ct(V("start")^1) * P("]")),
547 dictionary = Ct(Cc("dict") * P("<<") * Ct(V("keyvalue")^1) * P(">>")),
548 hexstring = Ct(Cc("hex") * P("<") * Cs(( 1-P(">"))^1) * P(">")),
549 decstring = Ct(Cc("dec") * P("(") * Cs((numchar+1-(P")"))^1) * P(")")),
550}
551
552local operation = Ct(grammar^1 * operator)
553local parser = Ct((operation + P(1))^1)
554
555
556
557local numchar = P("\\") * (R("09")^3 + P(1))
558local number = lpegpatterns.number
559local keyword = P("/") * R("AZ","az","09","__")^1
560local operator = (R("AZ","az")+P("*")+P("'")+P('"'))^1
561
562local skipstart = P("BDC") + P("BMC") + P("DP") + P("MP")
563local skipstop = P("EMC")
564local skipkeep = P("/ActualText")
565
566local grammar = P { "skip",
567 start = keyword + number + V("dictionary") + V("array") + V("hexstring") + V("decstring") + spaces,
568 keyvalue = optspaces * (keyword * optspaces * V("start") * optspaces)^1,
569 xeyvalue = optspaces * ((keyword - skipkeep) * optspaces * V("start") * optspaces)^1,
570 array = P("[") * V("start")^0 * P("]"),
571 dictionary = P("<<") * V("keyvalue")^0 * P(">>"),
572 xictionary = P("<<") * V("xeyvalue")^0 * P(">>"),
573 hexstring = P("<") * ( 1-P(">"))^0 * P(">"),
574 decstring = P("(") * (numchar+1-(P")"))^0 * P(")"),
575 skip = (optspaces * ( keyword * optspaces * V("xictionary") * optspaces * skipstart + skipstop) / "")
576 + V("start")
577 + operator
578}
579
580local stripper = Cs((grammar + P(1))^1)
581
582function lpdf_epdf.parsecontent(str)
583 return lpegmatch(parser,str)
584end
585
586function lpdf_epdf.stripcontent(str)
587 if find(str,"EMC") then
588 return lpegmatch(stripper,str)
589 else
590 return str
591 end
592end
593
594
595
596
597
598local fromsixteen = lpdf.fromsixteen
599
600local function f_bfchar(t,a,b)
601 t[tonumber(a,16)] = fromsixteen(b)
602end
603
604local function f_bfrange_1(t,a,b,c)
605 print("todo 1",a,b,c)
606
607
608end
609
610local function f_bfrange_2(t,a,b,c)
611 print("todo 2",a,b,c)
612
613
614end
615
616local optionals = spaces^0
617local hexstring = optionals * P("<") * C((1-P(">"))^1) * P(">")
618local bfchar = Carg(1) * hexstring * hexstring / f_bfchar
619local bfrange = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
620 + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
621local fromunicode = (
622 P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
623 P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
624 spaces +
625 P(1)
626)^1 * Carg(1)
627
628local function analyzefonts(document,resources)
629 local fonts = document.__fonts__
630 if resources then
631 local fontlist = resources.Font
632 if fontlist then
633 for id, data in expanded(fontlist) do
634 if not fonts[id] then
635
636
637 local tounicode = data.ToUnicode()
638 if tounicode then
639 tounicode = lpegmatch(fromunicode,tounicode,1,{})
640 end
641 fonts[id] = {
642 tounicode = type(tounicode) == "table" and tounicode or { }
643 }
644 setmetatableindex(fonts[id],"self")
645 end
646 end
647 end
648 end
649 return fonts
650end
651
652lpdf_epdf.analyzefonts = analyzefonts
653
654local more = 0
655local unic = nil
656
657local p_hex_to_utf = C(4) / function(s)
658 local now = tonumber(s,16)
659 if more > 0 then
660 now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000
661 more = 0
662 return unic[now] or utfchar(now)
663 elseif now >= 0xD800 and now <= 0xDBFF then
664 more = now
665
666 else
667 return unic[now] or utfchar(now)
668 end
669end
670
671local p_dec_to_utf = C(1) / function(s)
672 local now = byte(s)
673 return unic[now] or utfchar(now)
674end
675
676local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
677local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
678
679function lpdf_epdf.getpagecontent(document,pagenumber)
680
681 local page = document.pages[pagenumber]
682
683 if not page then
684 return
685 end
686
687 local fonts = analyzefonts(document,page.Resources)
688
689 local content = page.Contents() or ""
690 local list = lpegmatch(parser,content)
691 local font = nil
692
693
694 for i=1,#list do
695 local entry = list[i]
696 local size = #entry
697 local operator = entry[size]
698 if operator == "Tf" then
699 font = fonts[entry[1][2]]
700 unic = font and font.tounicode or { }
701 elseif operator == "TJ" then
702 local data = entry[1]
703 local list = data[2]
704 for i=1,#list do
705 local li = list[i]
706
707 local kind = li[1]
708 if kind == "hex" then
709 list[i] = lpegmatch(p_hex_to_utf,li[2])
710 elseif kind == "string" then
711 list[i] = lpegmatch(p_dec_to_utf,li[2])
712 else
713 list[i] = li[2]
714 end
715
716
717
718 end
719 elseif operator == "Tj" or operator == "'" or operator == '"' then
720
721 local data = entry[size-1]
722 local list = data[2]
723 local kind = list[1]
724 if kind == "hex" then
725 list[2] = lpegmatch(p_hex_to_utf,li[2])
726 elseif kind == "string" then
727 list[2] = lpegmatch(p_dec_to_utf,li[2])
728 end
729 end
730 end
731
732 unic = nil
733
734 return list
735
736end
737
738
739
740
741local softhyphen = utfchar(0xAD) .. "$"
742local linefactor = 1.3
743
744function lpdf_epdf.contenttotext(document,list)
745 local last_y = 0
746 local last_f = 0
747 local text = { }
748 local last = 0
749
750 for i=1,#list do
751 local entry = list[i]
752 local size = #entry
753 local operator = entry[size]
754 if operator == "Tf" then
755 last_f = entry[2][2]
756 elseif operator == "TJ" then
757 local data = entry[1]
758 local list = data[2]
759 for i=1,#list do
760 local li = list[i]
761 local kind = type(li)
762 if kind == "string" then
763 last = last + 1
764 text[last] = li
765 elseif kind == "number" and li < -50 then
766 last = last + 1
767 text[last] = " "
768 end
769 end
770 elseif operator == "Tj" then
771 last = last + 1
772 local li = entry[size-1]
773 local kind = type(li)
774 if kind == "string" then
775 last = last + 1
776 text[last] = li
777 end
778 elseif operator == "cm" or operator == "Tm" then
779 local data = entry
780 local ty = entry[6][2]
781 local dy = abs(last_y - ty)
782 if dy > linefactor*last_f then
783 if last > 0 then
784 if find(text[last],softhyphen,1,true) then
785
786 else
787 last = last + 1
788 text[last] = "\n"
789 end
790 end
791 end
792 last_y = ty
793 end
794 end
795
796 return concat(text)
797end
798
799function lpdf_epdf.getstructure(document,list)
800 local depth = 0
801 for i=1,#list do
802 local entry = list[i]
803 local size = #entry
804 local operator = entry[size]
805 if operator == "BDC" then
806 report_epdf("%w%s : %s",depth,entry[1] or "?",entry[2] and entry[2].MCID or "?")
807 depth = depth + 1
808 elseif operator == "EMC" then
809 depth = depth - 1
810 elseif operator == "TJ" then
811 local list = entry[1]
812 for i=1,#list do
813 local li = list[i]
814 if type(li) == "string" then
815 report_epdf("%w > %s",depth,li)
816 elseif li < -50 then
817 report_epdf("%w >",depth,li)
818 end
819 end
820 elseif operator == "Tj" then
821 report_epdf("%w > %s",depth,entry[size-1])
822 end
823 end
824end
825
826if images then do
827
828
829
830
831 local recompress = false
832 local stripmarked = false
833
834 local copydictionary = nil
835 local copyarray = nil
836
837 local pdfreserveobject = lpdf.reserveobject
838 local shareobjectreference = lpdf.shareobjectreference
839 local pdfflushobject = lpdf.flushobject
840 local pdfflushstreamobject = lpdf.flushstreamobject
841 local pdfreference = lpdf.reference
842 local pdfconstant = lpdf.constant
843 local pdfarray = lpdf.array
844 local pdfdictionary = lpdf.dictionary
845 local pdfnull = lpdf.null
846 local pdfliteral = lpdf.literal
847
848 local report = logs.reporter("backend","xobjects")
849
850 local factor = 65536 / (7200/7227)
851
852 local createimage = images.create
853
854 directives.register("graphics.pdf.recompress", function(v) recompress = v end)
855 directives.register("graphics.pdf.stripmarked", function(v) stripmarked = v end)
856
857 local function scaledbbox(b)
858 return { b[1]*factor, b[2]*factor, b[3]*factor, b[4]*factor }
859 end
860
861 local codecs = {
862 ASCIIHexDecode = true,
863 ASCII85Decode = true,
864 RunLengthDecode = true,
865 FlateDecode = true,
866 LZWDecode = true,
867 }
868
869 local function deepcopyobject(xref,copied,value)
870
871 local objnum = xref[value]
872 if objnum then
873 local usednum = copied[objnum]
874 if usednum then
875
876 else
877 usednum = pdfreserveobject()
878 copied[objnum] = usednum
879 local entry = value
880 local kind = entry.__type__
881 if kind == array_object_code then
882 local a = copyarray(xref,copied,entry)
883 pdfflushobject(usednum,tostring(a))
884 elseif kind == dictionary_object_code then
885 local d = copydictionary(xref,copied,entry)
886 pdfflushobject(usednum,tostring(d))
887 elseif kind == stream_object_code then
888 local d = copydictionary(xref,copied,entry)
889 local filter = d.Filter
890 if filter and codecs[filter] and recompress then
891
892 d.Filter = nil
893 d.Length = nil
894 d.DecodeParms = nil
895 d.DL = nil
896 local s = entry()
897 pdfflushstreamobject(s,d,true,usednum)
898 else
899
900
901 local s = entry(false)
902
903 pdfflushstreamobject(s,d,"raw",usednum)
904 end
905 else
906 local t = type(value)
907 if t == "string" then
908 value = pdfconstant(value)
909 elseif t == "table" then
910 local kind = value[1]
911 local entry = value[2]
912 if kind == name_object_code then
913 value = pdfconstant(entry)
914 elseif kind == string_object_code then
915 value = pdfliteral(entry,value[3])
916 elseif kind == null_object_code then
917 value = pdfnull()
918 elseif kind == reference_object_code then
919 value = deepcopyobject(xref,copied,entry)
920 elseif entry == nil then
921 value = pdfnull()
922 else
923 value = tostring(entry)
924 end
925 end
926 pdfflushobject(usednum,value)
927 end
928 end
929 return pdfreference(usednum)
930 elseif kind == stream_object_code then
931 report("stream not done: %s", objectcodes[kind] or "?")
932 else
933 report("object not done: %s", objectcodes[kind] or "?")
934 end
935 end
936
937 local function copyobject(xref,copied,object,key,value)
938 if not value then
939 value = object.__raw__[key]
940 end
941 local t = type(value)
942 if t == "string" then
943 return pdfconstant(value)
944 elseif t ~= "table" then
945 return value
946 end
947 local kind = value[1]
948 if kind == name_object_code then
949 return pdfconstant(value[2])
950 elseif kind == string_object_code then
951 return pdfliteral(value[2],value[3])
952 elseif kind == array_object_code then
953 return copyarray(xref,copied,object[key])
954 elseif kind == dictionary_object_code then
955 return copydictionary(xref,copied,object[key])
956 elseif kind == null_object_code then
957 return pdfnull()
958 elseif kind == reference_object_code then
959
960 return deepcopyobject(xref,copied,object[key])
961 else
962 report("weird: %s", objecttypes[kind] or "?")
963 end
964 end
965
966 copyarray = function (xref,copied,object)
967 local target = pdfarray()
968 local source = object.__raw__
969 for i=1,#source do
970 target[i] = copyobject(xref,copied,object,i,source[i])
971 end
972 return target
973 end
974
975 local plugins = nil
976
977
978
979
980
981 copydictionary = function (xref,copied,object)
982 local target = pdfdictionary()
983 local source = object.__raw__
984
985 for key, value in sortedhash(source) do
986 if plugins then
987 local p = plugins[key]
988 if p then
989 target[key] = p(xref,copied,object,key,value,copyobject)
990 else
991 target[key] = copyobject(xref,copied,object,key,value)
992 end
993 else
994 target[key] = copyobject(xref,copied,object,key,value)
995 end
996 end
997 return target
998 end
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 local function copyresources(pdfdoc,xref,copied,pagedata)
1011 local Resources = pagedata.Resources
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025 if Resources then
1026 local d = copydictionary(xref,copied,Resources)
1027 return shareobjectreference(d)
1028 end
1029 end
1030
1031 local openpdf = lpdf_epdf.load
1032 local closepdf = lpdf_epdf.unload
1033
1034
1035
1036 local function newpdf(str,userpassword,ownerpassword)
1037 return openpdf(str,userpassword,ownerpassword,true)
1038 end
1039
1040 local sizes = {
1041 crop = "CropBox",
1042 media = "MediaBox",
1043 bleed = "BleedBox",
1044 art = "ArtBox",
1045 trim = "TrimBox",
1046 }
1047
1048 local function querypdf(pdfdoc,pagenumber,size)
1049 if pdfdoc then
1050 if not pagenumber then
1051 pagenumber = 1
1052 end
1053 local root = pdfdoc.Catalog
1054 local page = pdfdoc.pages[pagenumber]
1055 if page then
1056 local sizetag = sizes[size or "crop"] or sizes.crop
1057 local mediabox = page.MediaBox or { 0, 0, 0, 0 }
1058 local cropbox = page[sizetag] or mediabox
1059 return {
1060 filename = pdfdoc.filename,
1061 pagenumber = pagenumber,
1062 nofpages = pdfdoc.nofpages,
1063 boundingbox = scaledbbox(cropbox),
1064 cropbox = cropbox,
1065 mediabox = mediabox,
1066 bleedbox = page.BleedBox or cropbox,
1067 trimbox = page.TrimBox or cropbox,
1068 artbox = page.ArtBox or cropbox,
1069 rotation = page.Rotate or 0,
1070 xsize = cropbox[3] - cropbox[1],
1071 ysize = cropbox[4] - cropbox[2],
1072 }
1073 end
1074 end
1075 end
1076
1077 local function copypage(pdfdoc,pagenumber,attributes,compact,width,height,attr)
1078 if pdfdoc then
1079 local root = pdfdoc.Catalog
1080 local page = pdfdoc.pages[pagenumber or 1]
1081 local pageinfo = querypdf(pdfdoc,pagenumber)
1082 local contents = page.Contents
1083 if contents then
1084 local xref = pdfdoc.__xrefs__
1085 local copied = pdfdoc.__copied__
1086 if compact and lpdf_epdf.plugin then
1087 plugins = lpdf_epdf.plugin(pdfdoc,xref,copied,page)
1088 end
1089 local xobject = pdfdictionary {
1090 Type = pdfconstant("XObject"),
1091 Subtype = pdfconstant("Form"),
1092 FormType = 1,
1093 Group = copyobject(xref,copied,page,"Group"),
1094 LastModified = copyobject(xref,copied,page,"LastModified"),
1095 Metadata = copyobject(xref,copied,page,"Metadata"),
1096 PieceInfo = copyobject(xref,copied,page,"PieceInfo"),
1097 Resources = copyresources(pdfdoc,xref,copied,page),
1098 SeparationInfo = copyobject(xref,copied,page,"SeparationInfo"),
1099 } + attr
1100 if attributes then
1101 for k, v in expanded(attributes) do
1102 page[k] = v
1103 end
1104 end
1105 local content = ""
1106 local nolength = nil
1107 local ctype = contents.__type__
1108
1109
1110 if ctype == stream_object_code then
1111 if stripmarked then
1112 content = contents()
1113 local stripped = lpdf_epdf.stripcontent(content)
1114 if stripped ~= content then
1115
1116 content = stripped
1117 end
1118 elseif recompress then
1119 content = contents()
1120 else
1121 local Filter = copyobject(xref,copied,contents,"Filter")
1122 local Length = copyobject(xref,copied,contents,"Length")
1123 if Length and Filter then
1124 nolength = true
1125 xobject.Length = Length
1126 xobject.Filter = Filter
1127 content = contents(false)
1128 else
1129 content = contents()
1130 end
1131 end
1132 elseif ctype == array_object_code then
1133 content = { }
1134 for i=1,#contents do
1135 content[i] = contents[i]()
1136 end
1137 content = concat(content," ")
1138 end
1139
1140 plugins = nil
1141 local rotation = pageinfo.rotation
1142 local boundingbox = pageinfo.boundingbox
1143 local transform = nil
1144 if rotation == 90 then
1145 transform = 3
1146 elseif rotation == 180 then
1147 transform = 2
1148 elseif rotation == 270 then
1149 transform = 1
1150 elseif rotation > 1 and rotation < 4 then
1151 transform = rotation
1152 end
1153 xobject.BBox = pdfarray {
1154 boundingbox[1] * bpfactor,
1155 boundingbox[2] * bpfactor,
1156 boundingbox[3] * bpfactor,
1157 boundingbox[4] * bpfactor,
1158 }
1159
1160 return createimage {
1161 bbox = boundingbox,
1162 transform = transform,
1163 nolength = nolength,
1164 nobbox = true,
1165 notype = true,
1166 stream = content,
1167 attr = xobject(),
1168 kind = images.types.stream,
1169 }
1170 else
1171
1172 end
1173 end
1174 end
1175
1176 lpdf_epdf.image = {
1177 open = openpdf,
1178 close = closepdf,
1179 new = newpdf,
1180 query = querypdf,
1181 copy = copypage,
1182 }
1183
1184
1185
1186
1187
1188
1189
1190end end
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204 |