lpdf-pde.lua /size: 40 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lpdf-epd
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to lpdf-epa.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
,
7
history
=
"
this one replaces the poppler/pdfe binding
"
,
8
}
9 10
-- \enabledirectives[graphics.pdf.uselua]
11
-- \enabledirectives[graphics.pdf.recompress]
12
-- \enabledirectives[graphics.pdf.stripmarked]
13 14
-- maximum integer : +2^32
15
-- maximum real : +2^15
16
-- minimum real : 1/(2^16)
17 18
-- get_flagged : does that still work
19 20
-- ppdoc_permissions (ppdoc *pdf);
21 22
-- PPSTRING_ENCODED 1 << 0
23
-- PPSTRING_DECODED 1 << 1
24
-- PPSTRING_EXEC 1 << 2 postscript only
25
-- PPSTRING_PLAIN 0
26
-- PPSTRING_BASE16 1 << 3
27
-- PPSTRING_BASE85 1 << 4
28
-- PPSTRING_UTF16BE 1 << 5
29
-- PPSTRING_UTF16LE 1 << 6
30 31
-- PPDOC_ALLOW_PRINT 1 << 2 printing
32
-- PPDOC_ALLOW_MODIFY 1 << 3 filling form fields, signing, creating template pages
33
-- PPDOC_ALLOW_COPY 1 << 4 copying, copying for accessibility
34
-- PPDOC_ALLOW_ANNOTS 1 << 5 filling form fields, copying, signing
35
-- PPDOC_ALLOW_EXTRACT 1 << 9 contents copying for accessibility
36
-- PPDOC_ALLOW_ASSEMBLY 1 << 10 no effect
37
-- PPDOC_ALLOW_PRINT_HIRES 1 << 11 no effect
38 39
-- PPCRYPT_NONE 0 no encryption, go ahead
40
-- PPCRYPT_DONE 1 encryption present but password succeeded, go ahead
41
-- PPCRYPT_PASS -1 encryption present, need non-empty password
42
-- PPCRYPT_FAIL -2 invalid or unsupported encryption (eg. undocumented in pdf spec)
43 44
local
setmetatable
,
type
,
next
=
setmetatable
,
type
,
next
45
local
tostring
,
tonumber
,
unpack
=
tostring
,
tonumber
,
unpack
46
local
char
,
byte
,
find
=
string
.
char
,
string
.
byte
,
string
.
find
47
local
abs
=
math
.
abs
48
local
concat
,
swapped
,
sortedhash
,
sortedkeys
=
table
.
concat
,
table
.
swapped
,
table
.
sortedhash
,
table
.
sortedkeys
49
local
utfchar
=
string
.
char
50
local
setmetatableindex
=
table
.
setmetatableindex
51
local
ioopen
=
io
.
open
52 53
local
lpegmatch
,
lpegpatterns
=
lpeg
.
match
,
lpeg
.
patterns
54
local
P
,
C
,
S
,
R
,
Ct
,
Cc
,
V
,
Carg
,
Cs
,
Cf
,
Cg
=
lpeg
.
P
,
lpeg
.
C
,
lpeg
.
S
,
lpeg
.
R
,
lpeg
.
Ct
,
lpeg
.
Cc
,
lpeg
.
V
,
lpeg
.
Carg
,
lpeg
.
Cs
,
lpeg
.
Cf
,
lpeg
.
Cg
55 56
if
not
lpdf
then
57
require
(
"
lpdf-aux
"
)
58
end
59 60
if
not
(
number
and
number
.
dimenfactors
)
then
61
require
(
"
util-dim
"
)
62
end
63 64
local
pdfe
=
pdfe
65
lpdf
=
lpdf
or
{
}
66
local
lpdf
=
lpdf
67
local
lpdf_epdf
=
{
}
68
lpdf
.
epdf
=
lpdf_epdf
69 70
local
pdfopen
=
pdfe
.
open
71
local
pdfopenfile
=
pdfe
.
openfile
72
local
pdfnew
=
pdfe
.
new
73
local
pdfclose
=
pdfe
.
close
74 75
local
getcatalog
=
pdfe
.
getcatalog
76
local
getinfo
=
pdfe
.
getinfo
77
local
gettrailer
=
pdfe
.
gettrailer
78
local
getnofpages
=
pdfe
.
getnofpages
79
local
getversion
=
pdfe
.
getversion
80
local
getbox
=
pdfe
.
getbox
81
local
getstatus
=
pdfe
.
getstatus
82
local
unencrypt
=
pdfe
.
unencrypt
83 84
local
dictionarytotable
=
pdfe
.
dictionarytotable
85
local
arraytotable
=
pdfe
.
arraytotable
86
local
pagestotable
=
pdfe
.
pagestotable
87
local
readwholestream
=
pdfe
.
readwholestream
88 89
local
getfromreference
=
pdfe
.
getfromreference
90 91
local
report_epdf
=
logs
.
reporter
(
"
epdf
"
)
92 93
local
allocate
=
utilities
.
storage
.
allocate
94 95
local
bpfactor
=
number
.
dimenfactors
.
bp
96 97
local
objectcodes
=
{
[
0
]
=
98
"
none
"
,
99
"
null
"
,
100
"
bool
"
,
101
"
integer
"
,
102
"
number
"
,
103
"
name
"
,
104
"
string
"
,
105
"
array
"
,
106
"
dictionary
"
,
107
"
stream
"
,
108
"
reference
"
,
109
}
110 111
local
encryptioncodes
=
{
112
[
0
]
=
"
notencrypted
"
,
113
[
1
]
=
"
unencrypted
"
,
114
[
-1
]
=
"
protected
"
,
115
[
-2
]
=
"
failure
"
,
116
}
117 118
objectcodes
=
allocate
(
swapped
(
objectcodes
,
objectcodes
)
)
119
encryptioncodes
=
allocate
(
swapped
(
encryptioncodes
,
encryptioncodes
)
)
120 121
pdfe
.
objectcodes
=
objectcodes
122
pdfe
.
encryptioncodes
=
encryptioncodes
123 124
local
null_object_code
=
objectcodes
.
null
125
local
reference_object_code
=
objectcodes
.
reference
126 127
local
none_object_code
=
objectcodes
.
none
128
local
null_object_code
=
objectcodes
.
null
129
local
bool_object_code
=
objectcodes
.
bool
130
local
integer_object_code
=
objectcodes
.
integer
131
local
number_object_code
=
objectcodes
.
number
132
local
name_object_code
=
objectcodes
.
name
133
local
string_object_code
=
objectcodes
.
string
134
local
array_object_code
=
objectcodes
.
array
135
local
dictionary_object_code
=
objectcodes
.
dictionary
136
local
stream_object_code
=
objectcodes
.
stream
137
local
reference_object_code
=
objectcodes
.
reference
138 139
local
checked_access
140
local
get_flagged
-- from pdfe -> lpdf
141 142
if
lpdf
.
dictionary
then
143 144
-- we're in context
145 146
local
pdfdictionary
=
lpdf
.
dictionary
147
local
pdfarray
=
lpdf
.
array
148
local
pdfconstant
=
lpdf
.
constant
149
local
pdfstring
=
lpdf
.
string
150
local
pdfunicode
=
lpdf
.
unicode
151 152
get_flagged
=
function
(
t
,
f
,
k
)
153
local
tk
=
t
[
k
]
-- triggers resolve
154
local
fk
=
f
[
k
]
155
if
not
fk
then
156
return
tk
157
elseif
fk
=
=
"
name
"
then
158
return
pdfconstant
(
tk
)
159
elseif
fk
=
=
"
array
"
then
160
return
pdfarray
(
tk
)
161
elseif
fk
=
=
"
dictionary
"
then
162
return
pdfarray
(
tk
)
163
elseif
fk
=
=
"
rawtext
"
then
164
return
pdfstring
(
tk
)
165
elseif
fk
=
=
"
unicode
"
then
166
return
pdfunicode
(
tk
)
167
else
168
return
tk
169
end
170
end
171 172
else
173 174
get_flagged
=
function
(
t
,
f
,
k
)
175
return
t
[
k
]
176
end
177 178
end
179 180
-- We need to convert the string from utf16 although there is no way to
181
-- check if we have a regular string starting with a bom. So, we have
182
-- na dilemma here: a pdf doc encoded string can be invalid utf.
183 184
-- <hex encoded> : implicit 0 appended if odd
185
-- (byte encoded) : \( \) \\ escaped
186
--
187
-- <FE><FF> : utf16be
188
--
189
-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
190
--
191
-- the getString function gives back bytes so we don't need to worry about
192
-- the hex aspect.
193 194
local
some_dictionary
195
local
some_array
196
local
some_stream
197
local
some_reference
198 199
local
some_string
=
lpdf
.
frombytes
200 201
local
function
get_value
(
document
,
t
,
key
)
202
if
not
key
then
203
return
204
end
205
local
value
=
t
[
key
]
206
if
not
value
then
207
return
208
end
209
if
type
(
value
)
~
=
"
table
"
then
210
return
value
211
end
212
-- we can assume names to be simple and strings to be tables
213
local
kind
=
value
[
1
]
214
if
kind
=
=
name_object_code
then
215
return
value
[
2
]
216
elseif
kind
=
=
string_object_code
then
217
return
some_string
(
value
[
2
]
,
value
[
3
]
)
218
elseif
kind
=
=
array_object_code
then
219
return
some_array
(
value
[
2
]
,
document
)
220
elseif
kind
=
=
dictionary_object_code
then
221
return
some_dictionary
(
value
[
2
]
,
document
)
222
elseif
kind
=
=
stream_object_code
then
223
return
some_stream
(
value
,
document
)
224
elseif
kind
=
=
reference_object_code
then
225
return
some_reference
(
value
,
document
)
226
end
227
return
value
228
end
229 230
some_dictionary
=
function
(
d
,
document
)
231
local
f
=
dictionarytotable
(
d
,
true
)
232
local
t
=
setmetatable
(
{
__raw__
=
f
,
__type__
=
dictionary_object_code
}
,
{
233
__index
=
function
(
t
,
k
)
234
return
get_value
(
document
,
f
,
k
)
235
end
,
236
__call
=
function
(
t
,
k
)
237
return
get_flagged
(
t
,
f
,
k
)
238
end
,
239
}
)
240
return
t
,
"
dictionary
"
241
end
242 243
some_array
=
function
(
a
,
document
)
244
local
f
=
arraytotable
(
a
,
true
)
245
local
n
=
#
f
246
local
t
=
setmetatable
(
{
__raw__
=
f
,
__type__
=
array_object_code
,
n
=
n
}
,
{
247
__index
=
function
(
t
,
k
)
248
return
get_value
(
document
,
f
,
k
)
249
end
,
250
__call
=
function
(
t
,
k
)
251
return
get_flagged
(
t
,
f
,
k
)
252
end
,
253
__len
=
function
(
t
,
k
)
254
return
n
255
end
,
256
}
)
257
return
t
,
"
array
"
258
end
259 260
some_stream
=
function
(
s
,
d
,
document
)
261
local
f
=
dictionarytotable
(
d
,
true
)
262
local
t
=
setmetatable
(
{
__raw__
=
f
,
__type__
=
stream_object_code
}
,
{
263
__index
=
function
(
t
,
k
)
264
return
get_value
(
document
,
f
,
k
)
265
end
,
266
__call
=
function
(
t
,
raw
)
267
if
raw
=
=
false
then
268
return
readwholestream
(
s
,
false
)
-- original
269
else
270
return
readwholestream
(
s
,
true
)
-- uncompressed
271
end
272
end
,
273
}
)
274
return
t
,
"
stream
"
275
end
276 277
some_reference
=
function
(
r
,
document
)
278
local
objnum
=
r
[
3
]
279
local
cached
=
document
.
__cache__
[
objnum
]
280
if
not
cached
then
281
local
kind
,
object
,
b
,
c
=
getfromreference
(
r
[
2
]
)
282
if
kind
=
=
dictionary_object_code
then
283
cached
=
some_dictionary
(
object
,
document
)
284
elseif
kind
=
=
array_object_code
then
285
cached
=
some_array
(
object
,
document
)
286
elseif
kind
=
=
stream_object_code
then
287
cached
=
some_stream
(
object
,
b
,
document
)
288
else
289
cached
=
{
kind
,
object
,
b
,
c
}
290
-- really cache this?
291
end
292
document
.
__cache__
[
objnum
]
=
cached
293
document
.
__xrefs__
[
cached
]
=
objnum
294
end
295
return
cached
296
end
297 298
local
resolvers
=
{
}
299
lpdf_epdf
.
resolvers
=
resolvers
300 301
local
function
resolve
(
document
,
k
)
302
local
resolver
=
resolvers
[
k
]
303
if
resolver
then
304
local
entry
=
resolver
(
document
)
305
document
[
k
]
=
entry
306
return
entry
307
end
308
end
309 310
local
function
getnames
(
document
,
n
,
target
)
-- direct
311
if
n
then
312
local
Names
=
n
.
Names
313
if
Names
then
314
if
not
target
then
315
target
=
{
}
316
end
317
for
i
=
1
,
#
Names
,
2
do
318
target
[
Names
[
i
]
]
=
Names
[
i
+
1
]
319
end
320
else
321
local
Kids
=
n
.
Kids
322
if
Kids
then
323
for
i
=
1
,
#
Kids
do
324
target
=
getnames
(
document
,
Kids
[
i
]
,
target
)
325
end
326
end
327
end
328
return
target
329
end
330
end
331 332
local
function
getkids
(
document
,
n
,
target
)
-- direct
333
if
n
then
334
local
Kids
=
n
.
Kids
335
if
Kids
then
336
for
i
=
1
,
#
Kids
do
337
target
=
getkids
(
document
,
Kids
[
i
]
,
target
)
338
end
339
elseif
target
then
340
target
[
#
target
+
1
]
=
n
341
else
342
target
=
{
n
}
343
end
344
return
target
345
end
346
end
347 348
function
resolvers
.
destinations
(
document
)
349
local
Names
=
document
.
Catalog
.
Names
350
return
getnames
(
document
,
Names
and
Names
.
Dests
)
351
end
352 353
function
resolvers
.
javascripts
(
document
)
354
local
Names
=
document
.
Catalog
.
Names
355
return
getnames
(
document
,
Names
and
Names
.
JavaScript
)
356
end
357 358
function
resolvers
.
widgets
(
document
)
359
local
Names
=
document
.
Catalog
.
AcroForm
360
return
Names
and
Names
.
Fields
361
end
362 363
function
resolvers
.
embeddedfiles
(
document
)
364
local
Names
=
document
.
Catalog
.
Names
365
return
getnames
(
document
,
Names
and
Names
.
EmbeddedFiles
)
366
end
367 368
-- /OCProperties <<
369
-- /OCGs [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
370
-- /D <<
371
-- /Order [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
372
-- /ON [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
373
-- /OFF [ ]
374
-- >>
375
-- >>
376 377
function
resolvers
.
layers
(
document
)
378
local
properties
=
document
.
Catalog
.
OCProperties
379
if
properties
then
380
local
layers
=
properties
.
OCGs
381
if
layers
then
382
local
t
=
{
}
383
for
i
=
1
,
#
layers
do
384
local
layer
=
layers
[
i
]
385
t
[
i
]
=
layer
.
Name
386
end
387
-- t.n = n
388
return
t
389
end
390
end
391
end
392 393
function
resolvers
.
structure
(
document
)
394
-- this might become a tree
395
return
document
.
Catalog
.
StructTreeRoot
396
end
397 398
function
resolvers
.
pages
(
document
)
399
local
__data__
=
document
.
__data__
400
local
__xrefs__
=
document
.
__xrefs__
401
local
__cache__
=
document
.
__cache__
402
--
403
local
nofpages
=
document
.
nofpages
404
local
pages
=
{
}
405
local
rawpages
=
pagestotable
(
__data__
)
406
document
.
pages
=
pages
407
--
408
for
pagenumber
=
1
,
nofpages
do
409
local
rawpagedata
=
rawpages
[
pagenumber
]
410
if
rawpagedata
then
411
local
pagereference
=
rawpagedata
[
3
]
412
local
pageobject
=
rawpagedata
[
1
]
413
local
pagedata
=
some_dictionary
(
pageobject
,
document
)
414
if
pagedata
and
pageobject
then
415
pagedata
.
number
=
pagenumber
416
pagedata
.
MediaBox
=
getbox
(
pageobject
,
"
MediaBox
"
)
417
pagedata
.
CropBox
=
getbox
(
pageobject
,
"
CropBox
"
)
418
pagedata
.
BleedBox
=
getbox
(
pageobject
,
"
BleedBox
"
)
419
pagedata
.
ArtBox
=
getbox
(
pageobject
,
"
ArtBox
"
)
420
pagedata
.
TrimBox
=
getbox
(
pageobject
,
"
TrimBox
"
)
421
pages
[
pagenumber
]
=
pagedata
422
__xrefs__
[
pagedata
]
=
pagereference
423
__cache__
[
pagereference
]
=
pagedata
424
else
425
report_epdf
(
"
missing pagedata for page %i, case %i
"
,
pagenumber
,
1
)
426
end
427
else
428
report_epdf
(
"
missing pagedata for page %i, case %i
"
,
pagenumber
,
2
)
429
end
430
end
431
--
432
-- pages.n = nofpages
433
--
434
return
pages
435
end
436 437
local
loaded
=
{
}
438
local
nofloaded
=
0
439 440
function
lpdf_epdf
.
load
(
filename
,
userpassword
,
ownerpassword
,
fromstring
)
441
local
document
=
loaded
[
filename
]
442
if
not
document
then
443
statistics
.
starttiming
(
lpdf_epdf
)
444
local
__data__
445
local
__file__
446
if
fromstring
then
447
__data__
=
pdfnew
(
filename
,
#
filename
)
448
elseif
pdfopenfile
then
449
__data__
=
pdfopenfile
(
ioopen
(
filename
,
"
rb
"
)
)
450
else
451
__data__
=
pdfopen
(
filename
)
452
end
453
if
__data__
then
454
if
userpassword
and
getstatus
(
__data__
)
<
0
then
455
unencrypt
(
__data__
,
userpassword
,
nil
)
456
end
457
if
ownerpassword
and
getstatus
(
__data__
)
<
0
then
458
unencrypt
(
__data__
,
nil
,
ownerpassword
)
459
end
460
if
getstatus
(
__data__
)
<
0
then
461
report_epdf
(
"
the document is encrypted, provide proper passwords
"
,
getstatus
(
__data__
)
)
462
__data__
=
false
463
end
464
if
__data__
then
465
document
=
{
466
filename
=
filename
,
467
nofcopied
=
0
,
468
copied
=
{
}
,
469
__cache__
=
{
}
,
470
__xrefs__
=
{
}
,
471
__fonts__
=
{
}
,
472
__copied__
=
{
}
,
473
__data__
=
__data__
,
474
}
475
document
.
Catalog
=
some_dictionary
(
getcatalog
(
__data__
)
,
document
)
476
document
.
Info
=
some_dictionary
(
getinfo
(
__data__
)
,
document
)
477
document
.
Trailer
=
some_dictionary
(
gettrailer
(
__data__
)
,
document
)
478
--
479
setmetatableindex
(
document
,
resolve
)
480
--
481
document
.
majorversion
,
document
.
minorversion
=
getversion
(
__data__
)
482
--
483
document
.
nofpages
=
getnofpages
(
__data__
)
484
else
485
document
=
false
486
end
487
else
488
document
=
false
489
end
490
loaded
[
filename
]
=
document
491
loaded
[
document
]
=
document
492
statistics
.
stoptiming
(
lpdf_epdf
)
493
-- print(statistics.elapsedtime(lpdf_epdf))
494
end
495
if
document
then
496
nofloaded
=
nofloaded
+
1
497
end
498
return
document
or
nil
499
end
500 501
function
lpdf_epdf
.
unload
(
filename
)
502
if
type
(
filename
)
=
=
"
table
"
then
503
filename
=
filename
.
filename
504
end
505
if
type
(
filename
)
=
=
"
string
"
then
506
local
document
=
loaded
[
filename
]
507
if
document
then
508
loaded
[
document
]
=
nil
509
loaded
[
filename
]
=
nil
510
pdfclose
(
document
.
__data__
)
511
end
512
end
513
end
514 515
-- for k, v in expanded(t) do
516 517
local
function
expanded
(
t
)
518
local
function
iterator
(
raw
,
k
)
519
local
k
,
v
=
next
(
raw
,
k
)
520
if
v
then
521
return
k
,
t
[
k
]
522
end
523
end
524
return
iterator
,
t
.
__raw__
,
nil
525
end
526 527
---------.expand = expand
528
lpdf_epdf
.
expanded
=
expanded
529 530
-- we could resolve the text stream in one pass if we directly handle the
531
-- font but why should we complicate things
532 533
local
spaces
=
lpegpatterns
.
whitespace
^
1
534
local
optspaces
=
lpegpatterns
.
whitespace
^
0
535
local
comment
=
P
(
"
%
"
)
*
(
1
-
lpegpatterns
.
newline
)
^
0
536
local
numchar
=
P
(
"
\\
"
)
/
"
"
*
(
R
(
"
09
"
)
^
3
/
function
(
s
)
return
char
(
tonumber
(
s
,
8
)
)
end
)
537
+
P
(
"
\\
"
)
*
P
(
1
)
538
local
key
=
P
(
"
/
"
)
*
C
(
R
(
"
AZ
"
,
"
az
"
,
"
09
"
,
"
__
"
)
^
1
)
539
local
number
=
Ct
(
Cc
(
"
number
"
)
*
(
lpegpatterns
.
number
/
tonumber
)
)
540
local
keyword
=
Ct
(
Cc
(
"
name
"
)
*
key
)
541
local
operator
=
C
(
(
R
(
"
AZ
"
,
"
az
"
)
+
P
(
"
*
"
)
+
P
(
"
'
"
)
+
P
(
'
"
'
)
)
^
1
)
542 543
local
grammar
=
P
{
"
start
"
,
544
start
=
(
comment
+
keyword
+
number
+
V
(
"
dictionary
"
)
+
V
(
"
array
"
)
+
V
(
"
hexstring
"
)
+
V
(
"
decstring
"
)
+
spaces
)
^
1
,
545
keyvalue
=
key
*
optspaces
*
V
(
"
start
"
)
,
546
array
=
Ct
(
Cc
(
"
array
"
)
*
P
(
"
[
"
)
*
Ct
(
V
(
"
start
"
)
^
1
)
*
P
(
"
]
"
)
)
,
547
dictionary
=
Ct
(
Cc
(
"
dict
"
)
*
P
(
"
<<
"
)
*
Ct
(
V
(
"
keyvalue
"
)
^
1
)
*
P
(
"
>>
"
)
)
,
548
hexstring
=
Ct
(
Cc
(
"
hex
"
)
*
P
(
"
<
"
)
*
Cs
(
(
1
-
P
(
"
>
"
)
)
^
1
)
*
P
(
"
>
"
)
)
,
549
decstring
=
Ct
(
Cc
(
"
dec
"
)
*
P
(
"
(
"
)
*
Cs
(
(
numchar
+
1
-
(
P
"
)
"
)
)
^
1
)
*
P
(
"
)
"
)
)
,
-- untested
550
}
551 552
local
operation
=
Ct
(
grammar
^
1
*
operator
)
553
local
parser
=
Ct
(
(
operation
+
P
(
1
)
)
^
1
)
554 555
-- todo: speed this one up
556 557
local
numchar
=
P
(
"
\\
"
)
*
(
R
(
"
09
"
)
^
3
+
P
(
1
)
)
558
local
number
=
lpegpatterns
.
number
559
local
keyword
=
P
(
"
/
"
)
*
R
(
"
AZ
"
,
"
az
"
,
"
09
"
,
"
__
"
)
^
1
560
local
operator
=
(
R
(
"
AZ
"
,
"
az
"
)
+
P
(
"
*
"
)
+
P
(
"
'
"
)
+
P
(
'
"
'
)
)
^
1
561 562
local
skipstart
=
P
(
"
BDC
"
)
+
P
(
"
BMC
"
)
+
P
(
"
DP
"
)
+
P
(
"
MP
"
)
563
local
skipstop
=
P
(
"
EMC
"
)
564
local
skipkeep
=
P
(
"
/ActualText
"
)
565 566
local
grammar
=
P
{
"
skip
"
,
567
start
=
keyword
+
number
+
V
(
"
dictionary
"
)
+
V
(
"
array
"
)
+
V
(
"
hexstring
"
)
+
V
(
"
decstring
"
)
+
spaces
,
568
keyvalue
=
optspaces
*
(
keyword
*
optspaces
*
V
(
"
start
"
)
*
optspaces
)
^
1
,
569
xeyvalue
=
optspaces
*
(
(
keyword
-
skipkeep
)
*
optspaces
*
V
(
"
start
"
)
*
optspaces
)
^
1
,
570
array
=
P
(
"
[
"
)
*
V
(
"
start
"
)
^
0
*
P
(
"
]
"
)
,
571
dictionary
=
P
(
"
<<
"
)
*
V
(
"
keyvalue
"
)
^
0
*
P
(
"
>>
"
)
,
572
xictionary
=
P
(
"
<<
"
)
*
V
(
"
xeyvalue
"
)
^
0
*
P
(
"
>>
"
)
,
573
hexstring
=
P
(
"
<
"
)
*
(
1
-
P
(
"
>
"
)
)
^
0
*
P
(
"
>
"
)
,
574
decstring
=
P
(
"
(
"
)
*
(
numchar
+
1
-
(
P
"
)
"
)
)
^
0
*
P
(
"
)
"
)
,
575
skip
=
(
optspaces
*
(
keyword
*
optspaces
*
V
(
"
xictionary
"
)
*
optspaces
*
skipstart
+
skipstop
)
/
"
"
)
576
+
V
(
"
start
"
)
577
+
operator
578
}
579 580
local
stripper
=
Cs
(
(
grammar
+
P
(
1
)
)
^
1
)
581 582
function
lpdf_epdf
.
parsecontent
(
str
)
583
return
lpegmatch
(
parser
,
str
)
584
end
585 586
function
lpdf_epdf
.
stripcontent
(
str
)
587
if
find
(
str
,
"
EMC
"
)
then
588
return
lpegmatch
(
stripper
,
str
)
589
else
590
return
str
591
end
592
end
593 594
-- beginbfrange : <start> <stop> <firstcode>
595
-- <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
596
-- beginbfchar : <code> <newcodes>
597 598
local
fromsixteen
=
lpdf
.
fromsixteen
-- maybe inline the lpeg ... but not worth it
599 600
local
function
f_bfchar
(
t
,
a
,
b
)
601
t
[
tonumber
(
a
,
16
)
]
=
fromsixteen
(
b
)
602
end
603 604
local
function
f_bfrange_1
(
t
,
a
,
b
,
c
)
605
print
(
"
todo 1
"
,
a
,
b
,
c
)
606
-- c is string
607
-- todo t[tonumber(a,16)] = fromsixteen(b)
608
end
609 610
local
function
f_bfrange_2
(
t
,
a
,
b
,
c
)
611
print
(
"
todo 2
"
,
a
,
b
,
c
)
612
-- c is table
613
-- todo t[tonumber(a,16)] = fromsixteen(b)
614
end
615 616
local
optionals
=
spaces
^
0
617
local
hexstring
=
optionals
*
P
(
"
<
"
)
*
C
(
(
1
-
P
(
"
>
"
)
)
^
1
)
*
P
(
"
>
"
)
618
local
bfchar
=
Carg
(
1
)
*
hexstring
*
hexstring
/
f_bfchar
619
local
bfrange
=
Carg
(
1
)
*
hexstring
*
hexstring
*
hexstring
/
f_bfrange_1
620
+
Carg
(
1
)
*
hexstring
*
hexstring
*
optionals
*
P
(
"
[
"
)
*
Ct
(
hexstring
^
1
)
*
optionals
*
P
(
"
]
"
)
/
f_bfrange_2
621
local
fromunicode
=
(
622
P
(
"
beginbfchar
"
)
*
bfchar
^
1
*
optionals
*
P
(
"
endbfchar
"
)
+
623
P
(
"
beginbfrange
"
)
*
bfrange
^
1
*
optionals
*
P
(
"
endbfrange
"
)
+
624
spaces
+
625
P
(
1
)
626
)
^
1
*
Carg
(
1
)
627 628
local
function
analyzefonts
(
document
,
resources
)
-- unfinished, see mtx-pdf for better code
629
local
fonts
=
document
.
__fonts__
630
if
resources
then
631
local
fontlist
=
resources
.
Font
632
if
fontlist
then
633
for
id
,
data
in
expanded
(
fontlist
)
do
634
if
not
fonts
[
id
]
then
635
-- a quick hack ... I will look into it more detail if I find a real
636
-- -application for it
637
local
tounicode
=
data
.
ToUnicode
(
)
638
if
tounicode
then
639
tounicode
=
lpegmatch
(
fromunicode
,
tounicode
,
1
,
{
}
)
640
end
641
fonts
[
id
]
=
{
642
tounicode
=
type
(
tounicode
)
=
=
"
table
"
and
tounicode
or
{
}
643
}
644
setmetatableindex
(
fonts
[
id
]
,
"
self
"
)
645
end
646
end
647
end
648
end
649
return
fonts
650
end
651 652
lpdf_epdf
.
analyzefonts
=
analyzefonts
653 654
local
more
=
0
655
local
unic
=
nil
-- cheaper than passing each time as Carg(1)
656 657
local
p_hex_to_utf
=
C
(
4
)
/
function
(
s
)
-- needs checking !
658
local
now
=
tonumber
(
s
,
16
)
659
if
more
>
0
then
660
now
=
(
more
-0xD800
)
*
0x400
+
(
now
-0xDC00
)
+
0x10000
-- the 0x10000 smells wrong
661
more
=
0
662
return
unic
[
now
]
or
utfchar
(
now
)
663
elseif
now
>
=
0xD800
and
now
<
=
0xDBFF
then
664
more
=
now
665
-- return ""
666
else
667
return
unic
[
now
]
or
utfchar
(
now
)
668
end
669
end
670 671
local
p_dec_to_utf
=
C
(
1
)
/
function
(
s
)
-- needs checking !
672
local
now
=
byte
(
s
)
673
return
unic
[
now
]
or
utfchar
(
now
)
674
end
675 676
local
p_hex_to_utf
=
P
(
true
)
/
function
(
)
more
=
0
end
*
Cs
(
p_hex_to_utf
^
1
)
677
local
p_dec_to_utf
=
P
(
true
)
/
function
(
)
more
=
0
end
*
Cs
(
p_dec_to_utf
^
1
)
678 679
function
lpdf_epdf
.
getpagecontent
(
document
,
pagenumber
)
680 681
local
page
=
document
.
pages
[
pagenumber
]
682 683
if
not
page
then
684
return
685
end
686 687
local
fonts
=
analyzefonts
(
document
,
page
.
Resources
)
688 689
local
content
=
page
.
Contents
(
)
or
"
"
690
local
list
=
lpegmatch
(
parser
,
content
)
691
local
font
=
nil
692
-- local unic = nil
693 694
for
i
=
1
,
#
list
do
695
local
entry
=
list
[
i
]
696
local
size
=
#
entry
697
local
operator
=
entry
[
size
]
698
if
operator
=
=
"
Tf
"
then
699
font
=
fonts
[
entry
[
1
]
[
2
]
]
700
unic
=
font
and
font
.
tounicode
or
{
}
701
elseif
operator
=
=
"
TJ
"
then
702
local
data
=
entry
[
1
]
-- { "array", { ... } }
703
local
list
=
data
[
2
]
-- { { ... }, { ... } }
704
for
i
=
1
,
#
list
do
705
local
li
=
list
[
i
]
706
-- if type(li) == "table" then
707
local
kind
=
li
[
1
]
708
if
kind
=
=
"
hex
"
then
709
list
[
i
]
=
lpegmatch
(
p_hex_to_utf
,
li
[
2
]
)
710
elseif
kind
=
=
"
string
"
then
711
list
[
i
]
=
lpegmatch
(
p_dec_to_utf
,
li
[
2
]
)
712
else
713
list
[
i
]
=
li
[
2
]
-- kern
714
end
715
-- else
716
-- -- kern
717
-- end
718
end
719
elseif
operator
=
=
"
Tj
"
or
operator
=
=
"
'
"
or
operator
=
=
'
"
'
then
720
-- { string, Tj } { string, ' } { n, m, string, " }
721
local
data
=
entry
[
size
-1
]
722
local
list
=
data
[
2
]
723
local
kind
=
list
[
1
]
724
if
kind
=
=
"
hex
"
then
725
list
[
2
]
=
lpegmatch
(
p_hex_to_utf
,
li
[
2
]
)
726
elseif
kind
=
=
"
string
"
then
727
list
[
2
]
=
lpegmatch
(
p_dec_to_utf
,
li
[
2
]
)
728
end
729
end
730
end
731 732
unic
=
nil
-- can be collected
733 734
return
list
735 736
end
737 738
-- This is also an experiment. When I really need it I can improve it, for instance
739
-- with proper position calculating. It might be usefull for some search or so.
740 741
local
softhyphen
=
utfchar
(
0xAD
)
.
.
"
$
"
742
local
linefactor
=
1
.
3
743 744
function
lpdf_epdf
.
contenttotext
(
document
,
list
)
-- maybe signal fonts
745
local
last_y
=
0
746
local
last_f
=
0
747
local
text
=
{
}
748
local
last
=
0
749 750
for
i
=
1
,
#
list
do
751
local
entry
=
list
[
i
]
752
local
size
=
#
entry
753
local
operator
=
entry
[
size
]
754
if
operator
=
=
"
Tf
"
then
755
last_f
=
entry
[
2
]
[
2
]
-- size
756
elseif
operator
=
=
"
TJ
"
then
757
local
data
=
entry
[
1
]
-- { "array", { ... } }
758
local
list
=
data
[
2
]
-- { { ... }, { ... } }
759
for
i
=
1
,
#
list
do
760
local
li
=
list
[
i
]
761
local
kind
=
type
(
li
)
762
if
kind
=
=
"
string
"
then
763
last
=
last
+
1
764
text
[
last
]
=
li
765
elseif
kind
=
=
"
number
"
and
li
<
-50
then
766
last
=
last
+
1
767
text
[
last
]
=
"
"
768
end
769
end
770
elseif
operator
=
=
"
Tj
"
then
771
last
=
last
+
1
772
local
li
=
entry
[
size
-1
]
773
local
kind
=
type
(
li
)
774
if
kind
=
=
"
string
"
then
775
last
=
last
+
1
776
text
[
last
]
=
li
777
end
778
elseif
operator
=
=
"
cm
"
or
operator
=
=
"
Tm
"
then
779
local
data
=
entry
780
local
ty
=
entry
[
6
]
[
2
]
781
local
dy
=
abs
(
last_y
-
ty
)
782
if
dy
>
linefactor
*
last_f
then
783
if
last
>
0
then
784
if
find
(
text
[
last
]
,
softhyphen
,
1
,
true
)
then
785
-- ignore
786
else
787
last
=
last
+
1
788
text
[
last
]
=
"
\n
"
789
end
790
end
791
end
792
last_y
=
ty
793
end
794
end
795 796
return
concat
(
text
)
797
end
798 799
function
lpdf_epdf
.
getstructure
(
document
,
list
)
-- just a test
800
local
depth
=
0
801
for
i
=
1
,
#
list
do
802
local
entry
=
list
[
i
]
803
local
size
=
#
entry
804
local
operator
=
entry
[
size
]
805
if
operator
=
=
"
BDC
"
then
806
report_epdf
(
"
%w%s : %s
"
,
depth
,
entry
[
1
]
or
"
?
"
,
entry
[
2
]
and
entry
[
2
]
.
MCID
or
"
?
"
)
807
depth
=
depth
+
1
808
elseif
operator
=
=
"
EMC
"
then
809
depth
=
depth
-
1
810
elseif
operator
=
=
"
TJ
"
then
811
local
list
=
entry
[
1
]
812
for
i
=
1
,
#
list
do
813
local
li
=
list
[
i
]
814
if
type
(
li
)
=
=
"
string
"
then
815
report_epdf
(
"
%w > %s
"
,
depth
,
li
)
816
elseif
li
<
-50
then
817
report_epdf
(
"
%w >
"
,
depth
,
li
)
818
end
819
end
820
elseif
operator
=
=
"
Tj
"
then
821
report_epdf
(
"
%w > %s
"
,
depth
,
entry
[
size
-1
]
)
822
end
823
end
824
end
825 826
if
images
then
do
827 828
-- This can be made a bit faster (just get raw data and pass it) but I will
829
-- do that later. In the end the benefit is probably neglectable.
830 831
local
recompress
=
false
832
local
stripmarked
=
false
833 834
local
copydictionary
=
nil
835
local
copyarray
=
nil
836 837
local
pdfreserveobject
=
lpdf
.
reserveobject
838
local
shareobjectreference
=
lpdf
.
shareobjectreference
839
local
pdfflushobject
=
lpdf
.
flushobject
840
local
pdfflushstreamobject
=
lpdf
.
flushstreamobject
841
local
pdfreference
=
lpdf
.
reference
842
local
pdfconstant
=
lpdf
.
constant
843
local
pdfarray
=
lpdf
.
array
844
local
pdfdictionary
=
lpdf
.
dictionary
845
local
pdfnull
=
lpdf
.
null
846
local
pdfliteral
=
lpdf
.
literal
847 848
local
report
=
logs
.
reporter
(
"
backend
"
,
"
xobjects
"
)
849 850
local
factor
=
65536
/
(
7200
/
7227
)
-- 1/number.dimenfactors.bp
851 852
local
createimage
=
images
.
create
853 854
directives
.
register
(
"
graphics.pdf.recompress
"
,
function
(
v
)
recompress
=
v
end
)
855
directives
.
register
(
"
graphics.pdf.stripmarked
"
,
function
(
v
)
stripmarked
=
v
end
)
856 857
local
function
scaledbbox
(
b
)
858
return
{
b
[
1
]
*
factor
,
b
[
2
]
*
factor
,
b
[
3
]
*
factor
,
b
[
4
]
*
factor
}
859
end
860 861
local
codecs
=
{
862
ASCIIHexDecode
=
true
,
863
ASCII85Decode
=
true
,
864
RunLengthDecode
=
true
,
865
FlateDecode
=
true
,
866
LZWDecode
=
true
,
867
}
868 869
local
function
deepcopyobject
(
xref
,
copied
,
value
)
870
-- no need for tables, just nested loop with obj
871
local
objnum
=
xref
[
value
]
872
if
objnum
then
873
local
usednum
=
copied
[
objnum
]
874
if
usednum
then
875
-- report("%s object %i is reused",kind,objnum)
876
else
877
usednum
=
pdfreserveobject
(
)
878
copied
[
objnum
]
=
usednum
879
local
entry
=
value
880
local
kind
=
entry
.
__type__
881
if
kind
=
=
array_object_code
then
882
local
a
=
copyarray
(
xref
,
copied
,
entry
)
883
pdfflushobject
(
usednum
,
tostring
(
a
)
)
884
elseif
kind
=
=
dictionary_object_code
then
885
local
d
=
copydictionary
(
xref
,
copied
,
entry
)
886
pdfflushobject
(
usednum
,
tostring
(
d
)
)
887
elseif
kind
=
=
stream_object_code
then
888
local
d
=
copydictionary
(
xref
,
copied
,
entry
)
889
local
filter
=
d
.
Filter
890
if
filter
and
codecs
[
filter
]
and
recompress
then
891
-- recompress
892
d
.
Filter
=
nil
893
d
.
Length
=
nil
894
d
.
DecodeParms
=
nil
-- relates to filter
895
d
.
DL
=
nil
-- needed?
896
local
s
=
entry
(
)
-- get uncompressed stream
897
pdfflushstreamobject
(
s
,
d
,
true
,
usednum
)
-- compress stream
898
else
899
-- keep as-is, even Length which indicates the
900
-- decompressed length
901
local
s
=
entry
(
false
)
-- get compressed stream
902
-- pdfflushstreamobject(s,d,false,usednum,true) -- don't compress stream
903
pdfflushstreamobject
(
s
,
d
,
"
raw
"
,
usednum
)
-- don't compress stream
904
end
905
else
906
local
t
=
type
(
value
)
907
if
t
=
=
"
string
"
then
908
value
=
pdfconstant
(
value
)
909
elseif
t
=
=
"
table
"
then
910
local
kind
=
value
[
1
]
911
local
entry
=
value
[
2
]
912
if
kind
=
=
name_object_code
then
913
value
=
pdfconstant
(
entry
)
914
elseif
kind
=
=
string_object_code
then
915
value
=
pdfliteral
(
entry
,
value
[
3
]
)
916
elseif
kind
=
=
null_object_code
then
917
value
=
pdfnull
(
)
918
elseif
kind
=
=
reference_object_code
then
919
value
=
deepcopyobject
(
xref
,
copied
,
entry
)
920
elseif
entry
=
=
nil
then
921
value
=
pdfnull
(
)
922
else
923
value
=
tostring
(
entry
)
924
end
925
end
926
pdfflushobject
(
usednum
,
value
)
927
end
928
end
929
return
pdfreference
(
usednum
)
930
elseif
kind
=
=
stream_object_code
then
931
report
(
"
stream not done: %s
"
,
objectcodes
[
kind
]
or
"
?
"
)
932
else
933
report
(
"
object not done: %s
"
,
objectcodes
[
kind
]
or
"
?
"
)
934
end
935
end
936 937
local
function
copyobject
(
xref
,
copied
,
object
,
key
,
value
)
938
if
not
value
then
939
value
=
object
.
__raw__
[
key
]
940
end
941
local
t
=
type
(
value
)
942
if
t
=
=
"
string
"
then
943
return
pdfconstant
(
value
)
944
elseif
t
~
=
"
table
"
then
945
return
value
946
end
947
local
kind
=
value
[
1
]
948
if
kind
=
=
name_object_code
then
949
return
pdfconstant
(
value
[
2
]
)
950
elseif
kind
=
=
string_object_code
then
951
return
pdfliteral
(
value
[
2
]
,
value
[
3
]
)
952
elseif
kind
=
=
array_object_code
then
953
return
copyarray
(
xref
,
copied
,
object
[
key
]
)
954
elseif
kind
=
=
dictionary_object_code
then
955
return
copydictionary
(
xref
,
copied
,
object
[
key
]
)
956
elseif
kind
=
=
null_object_code
then
957
return
pdfnull
(
)
958
elseif
kind
=
=
reference_object_code
then
959
-- expand
960
return
deepcopyobject
(
xref
,
copied
,
object
[
key
]
)
961
else
962
report
(
"
weird: %s
"
,
objecttypes
[
kind
]
or
"
?
"
)
963
end
964
end
965 966
copyarray
=
function
(
xref
,
copied
,
object
)
967
local
target
=
pdfarray
(
)
968
local
source
=
object
.
__raw__
969
for
i
=
1
,
#
source
do
970
target
[
i
]
=
copyobject
(
xref
,
copied
,
object
,
i
,
source
[
i
]
)
971
end
972
return
target
973
end
974 975
local
plugins
=
nil
976 977
-- Sorting the hash slows down upto 5% bit but it is still as fast as the C
978
-- code. We could loop over the index instead but sorting might be nicer in
979
-- the end.
980 981
copydictionary
=
function
(
xref
,
copied
,
object
)
982
local
target
=
pdfdictionary
(
)
983
local
source
=
object
.
__raw__
984
-- for key, value in next, source do
985
for
key
,
value
in
sortedhash
(
source
)
do
986
if
plugins
then
987
local
p
=
plugins
[
key
]
988
if
p
then
989
target
[
key
]
=
p
(
xref
,
copied
,
object
,
key
,
value
,
copyobject
)
-- maybe a table of methods
990
else
991
target
[
key
]
=
copyobject
(
xref
,
copied
,
object
,
key
,
value
)
992
end
993
else
994
target
[
key
]
=
copyobject
(
xref
,
copied
,
object
,
key
,
value
)
995
end
996
end
997
return
target
998
end
999 1000
-- local function copyresources(pdfdoc,xref,copied,pagedata)
1001
-- local Resources = pagedata.Resources
1002
-- if Resources then
1003
-- local r = pdfreserveobject()
1004
-- local d = copydictionary(xref,copied,Resources)
1005
-- pdfflushobject(r,tostring(d))
1006
-- return pdfreference(r)
1007
-- end
1008
-- end
1009 1010
local
function
copyresources
(
pdfdoc
,
xref
,
copied
,
pagedata
)
1011
local
Resources
=
pagedata
.
Resources
1012
--
1013
-- -- This needs testing:
1014
--
1015
-- if not Resources then
1016
-- local Parent = page.Parent
1017
-- while (Parent and (Parent.__type__ == dictionary_object_code or Parent.__type__ == reference_object_code) do
1018
-- Resources = Parent.Resources
1019
-- if Resources then
1020
-- break
1021
-- end
1022
-- Parent = Parent.Parent
1023
-- end
1024
-- end
1025
if
Resources
then
1026
local
d
=
copydictionary
(
xref
,
copied
,
Resources
)
1027
return
shareobjectreference
(
d
)
1028
end
1029
end
1030 1031
local
openpdf
=
lpdf_epdf
.
load
1032
local
closepdf
=
lpdf_epdf
.
unload
1033 1034
-- todo: keep track of already open files
1035 1036
local
function
newpdf
(
str
,
userpassword
,
ownerpassword
)
1037
return
openpdf
(
str
,
userpassword
,
ownerpassword
,
true
)
1038
end
1039 1040
local
sizes
=
{
1041
crop
=
"
CropBox
"
,
1042
media
=
"
MediaBox
"
,
1043
bleed
=
"
BleedBox
"
,
1044
art
=
"
ArtBox
"
,
1045
trim
=
"
TrimBox
"
,
1046
}
1047 1048
local
function
querypdf
(
pdfdoc
,
pagenumber
,
size
)
1049
if
pdfdoc
then
1050
if
not
pagenumber
then
1051
pagenumber
=
1
1052
end
1053
local
root
=
pdfdoc
.
Catalog
1054
local
page
=
pdfdoc
.
pages
[
pagenumber
]
1055
if
page
then
1056
local
sizetag
=
sizes
[
size
or
"
crop
"
]
or
sizes
.
crop
1057
local
mediabox
=
page
.
MediaBox
or
{
0
,
0
,
0
,
0
}
1058
local
cropbox
=
page
[
sizetag
]
or
mediabox
1059
return
{
1060
filename
=
pdfdoc
.
filename
,
1061
pagenumber
=
pagenumber
,
1062
nofpages
=
pdfdoc
.
nofpages
,
1063
boundingbox
=
scaledbbox
(
cropbox
)
,
1064
cropbox
=
cropbox
,
1065
mediabox
=
mediabox
,
1066
bleedbox
=
page
.
BleedBox
or
cropbox
,
1067
trimbox
=
page
.
TrimBox
or
cropbox
,
1068
artbox
=
page
.
ArtBox
or
cropbox
,
1069
rotation
=
page
.
Rotate
or
0
,
1070
xsize
=
cropbox
[
3
]
-
cropbox
[
1
]
,
1071
ysize
=
cropbox
[
4
]
-
cropbox
[
2
]
,
1072
}
1073
end
1074
end
1075
end
1076 1077
local
function
copypage
(
pdfdoc
,
pagenumber
,
attributes
,
compact
,
width
,
height
,
attr
)
1078
if
pdfdoc
then
1079
local
root
=
pdfdoc
.
Catalog
1080
local
page
=
pdfdoc
.
pages
[
pagenumber
or
1
]
1081
local
pageinfo
=
querypdf
(
pdfdoc
,
pagenumber
)
1082
local
contents
=
page
.
Contents
1083
if
contents
then
1084
local
xref
=
pdfdoc
.
__xrefs__
1085
local
copied
=
pdfdoc
.
__copied__
1086
if
compact
and
lpdf_epdf
.
plugin
then
1087
plugins
=
lpdf_epdf
.
plugin
(
pdfdoc
,
xref
,
copied
,
page
)
1088
end
1089
local
xobject
=
pdfdictionary
{
1090
Type
=
pdfconstant
(
"
XObject
"
)
,
1091
Subtype
=
pdfconstant
(
"
Form
"
)
,
1092
FormType
=
1
,
1093
Group
=
copyobject
(
xref
,
copied
,
page
,
"
Group
"
)
,
1094
LastModified
=
copyobject
(
xref
,
copied
,
page
,
"
LastModified
"
)
,
1095
Metadata
=
copyobject
(
xref
,
copied
,
page
,
"
Metadata
"
)
,
1096
PieceInfo
=
copyobject
(
xref
,
copied
,
page
,
"
PieceInfo
"
)
,
1097
Resources
=
copyresources
(
pdfdoc
,
xref
,
copied
,
page
)
,
1098
SeparationInfo
=
copyobject
(
xref
,
copied
,
page
,
"
SeparationInfo
"
)
,
1099
}
+
attr
1100
if
attributes
then
1101
for
k
,
v
in
expanded
(
attributes
)
do
1102
page
[
k
]
=
v
-- maybe nested
1103
end
1104
end
1105
local
content
=
"
"
1106
local
nolength
=
nil
1107
local
ctype
=
contents
.
__type__
1108
-- we always recompress because image object streams can not be
1109
-- influenced (yet)
1110
if
ctype
=
=
stream_object_code
then
1111
if
stripmarked
then
1112
content
=
contents
(
)
-- uncompressed
1113
local
stripped
=
lpdf_epdf
.
stripcontent
(
content
)
1114
if
stripped
~
=
content
then
1115
-- report("%i bytes stripped on page %i",#content-#stripped,pagenumber or 1)
1116
content
=
stripped
1117
end
1118
elseif
recompress
then
1119
content
=
contents
(
)
-- uncompressed
1120
else
1121
local
Filter
=
copyobject
(
xref
,
copied
,
contents
,
"
Filter
"
)
1122
local
Length
=
copyobject
(
xref
,
copied
,
contents
,
"
Length
"
)
1123
if
Length
and
Filter
then
1124
nolength
=
true
1125
xobject
.
Length
=
Length
1126
xobject
.
Filter
=
Filter
1127
content
=
contents
(
false
)
-- uncompressed
1128
else
1129
content
=
contents
(
)
-- uncompressed
1130
end
1131
end
1132
elseif
ctype
=
=
array_object_code
then
1133
content
=
{
}
1134
for
i
=
1
,
#
contents
do
1135
content
[
i
]
=
contents
[
i
]
(
)
-- uncompressed
1136
end
1137
content
=
concat
(
content
,
"
"
)
1138
end
1139
-- still not nice: we double wrap now
1140
plugins
=
nil
1141
local
rotation
=
pageinfo
.
rotation
1142
local
boundingbox
=
pageinfo
.
boundingbox
1143
local
transform
=
nil
1144
if
rotation
=
=
90
then
1145
transform
=
3
1146
elseif
rotation
=
=
180
then
1147
transform
=
2
1148
elseif
rotation
=
=
270
then
1149
transform
=
1
1150
elseif
rotation
>
1
and
rotation
<
4
then
1151
transform
=
rotation
1152
end
1153
xobject
.
BBox
=
pdfarray
{
1154
boundingbox
[
1
]
*
bpfactor
,
1155
boundingbox
[
2
]
*
bpfactor
,
1156
boundingbox
[
3
]
*
bpfactor
,
1157
boundingbox
[
4
]
*
bpfactor
,
1158
}
1159
-- maybe like bitmaps
1160
return
createimage
{
-- beware: can be a img.new or a dummy
1161
bbox
=
boundingbox
,
1162
transform
=
transform
,
1163
nolength
=
nolength
,
1164
nobbox
=
true
,
1165
notype
=
true
,
1166
stream
=
content
,
-- todo: no compress, pass directly also length, filter etc
1167
attr
=
xobject
(
)
,
1168
kind
=
images
.
types
.
stream
,
1169
}
1170
else
1171
-- maybe report an error
1172
end
1173
end
1174
end
1175 1176
lpdf_epdf
.
image
=
{
1177
open
=
openpdf
,
1178
close
=
closepdf
,
1179
new
=
newpdf
,
1180
query
=
querypdf
,
1181
copy
=
copypage
,
1182
}
1183 1184
-- lpdf.injectors.pdf = function(specification)
1185
-- local d = lpdf_epdf.load(specification.filename)
1186
-- print(d)
1187
-- end
1188 1189 1190
end
end
1191 1192
-- local d = lpdf_epdf.load("e:/tmp/oeps.pdf")
1193
-- inspect(d)
1194
-- inspect(d.Catalog.Lang)
1195
-- inspect(d.Catalog.OCProperties.D.AS[1].Event)
1196
-- inspect(d.Catalog.Metadata())
1197
-- inspect(d.Catalog.Pages.Kids[1])
1198
-- inspect(d.layers)
1199
-- inspect(d.pages)
1200
-- inspect(d.destinations)
1201
-- inspect(lpdf_epdf.getpagecontent(d,1))
1202
-- inspect(lpdf_epdf.contenttotext(document,lpdf_epdf.getpagecontent(d,1)))
1203
-- inspect(lpdf_epdf.getstructure(document,lpdf_epdf.getpagecontent(d,1)))
1204