lpdf-epd.lua /size: 41 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lpdf-epd
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to lpdf-epa.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
-- This is an experimental layer around the epdf library. The reason for this layer is that
10
-- I want to be independent of the library (which implements a selection of what a file
11
-- provides) and also because I want an interface closer to Lua's table model while the API
12
-- stays close to the original xpdf library. Of course, after prototyping a solution, we can
13
-- optimize it using the low level epdf accessors. However, not all are accessible (this will
14
-- be fixed).
15
--
16
-- It will be handy when we have a __length and __next that can trigger the resolve till then
17
-- we will provide .n as #; maybe in Lua 5.3 or later.
18
--
19
-- As there can be references to the parent we cannot expand a tree. I played with some
20
-- expansion variants but it does not pay off; adding extra checks is not worth the trouble.
21
--
22
-- The document stays open. In order to free memory one has to explicitly unload the loaded
23
-- document.
24
--
25
-- We have much more checking then needed in the prepare functions because occasionally
26
-- we run into bugs in poppler or the epdf interface. It took us a while to realize that
27
-- there was a long standing gc issue the on long runs with including many pages could
28
-- crash the analyzer.
29
--
30
-- Normally a value is fetched by key, as in foo.Title but as it can be in pdfdoc encoding
31
-- a safer bet is foo("Title") which will return a decoded string (or the original if it
32
-- already was unicode).
33 34
local
setmetatable
,
rawset
,
rawget
,
type
,
next
=
setmetatable
,
rawset
,
rawget
,
type
,
next
35
local
tostring
,
tonumber
,
unpack
=
tostring
,
tonumber
,
unpack
36
local
lower
,
match
,
char
,
byte
,
find
=
string
.
lower
,
string
.
match
,
string
.
char
,
string
.
byte
,
string
.
find
37
local
abs
=
math
.
abs
38
local
concat
=
table
.
concat
39
local
toutf
,
toeight
,
utfchar
=
string
.
toutf
,
utf
.
toeight
,
utf
.
char
40
local
setmetatableindex
=
table
.
setmetatableindex
41 42
local
lpegmatch
,
lpegpatterns
=
lpeg
.
match
,
lpeg
.
patterns
43
local
P
,
C
,
S
,
R
,
Ct
,
Cc
,
V
,
Carg
,
Cs
,
Cf
,
Cg
=
lpeg
.
P
,
lpeg
.
C
,
lpeg
.
S
,
lpeg
.
R
,
lpeg
.
Ct
,
lpeg
.
Cc
,
lpeg
.
V
,
lpeg
.
Carg
,
lpeg
.
Cs
,
lpeg
.
Cf
,
lpeg
.
Cg
44 45
local
epdf
=
epdf
46
lpdf
=
lpdf
or
{
}
47
local
lpdf
=
lpdf
48
local
lpdf_epdf
=
{
}
49
lpdf
.
epdf
=
lpdf_epdf
50 51
-- local getDict, getArray, getReal, getNum, getString, getBool, getName, getRef, getRefNum
52
-- local getType, getTypeName
53
-- local dictGetLength, dictGetVal, dictGetValNF, dictGetKey
54
-- local arrayGetLength, arrayGetNF, arrayGet
55
-- local streamReset, streamGetDict, streamGetChar
56 57
-- We use as little as possible and also not an object interface. After all, we
58
-- don't know how the library (and its api) evolves so we better can be prepared
59
-- for wrappers.
60 61
local
registry
=
debug
.
getregistry
(
)
62 63
local
object
=
registry
[
"
epdf.Object
"
]
64
local
dictionary
=
registry
[
"
epdf.Dict
"
]
65
local
array
=
registry
[
"
epdf.Array
"
]
66
local
xref
=
registry
[
"
epdf.XRef
"
]
67
local
catalog
=
registry
[
"
epdf.Catalog
"
]
68
local
pdfdoc
=
registry
[
"
epdf.PDFDoc
"
]
69 70
if
not
(
object
and
dictionary
and
array
and
xref
and
catalog
and
pdfdoc
)
then
71
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
1
)
72
os
.
exit
(
)
73
end
74 75
local
openPDF
=
epdf
.
open
76 77
local
getMajorVersion
=
pdfdoc
.
getPDFMajorVersion
78
local
getMinorVersion
=
pdfdoc
.
getPDFMinorVersion
79
local
getXRef
=
pdfdoc
.
getXRef
80
local
getRawCatalog
=
pdfdoc
.
getCatalog
81 82
if
not
(
openPDF
and
getMajorVersion
and
getMinorVersion
and
getXRef
and
getRawCatalog
)
then
83
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
2
)
84
os
.
exit
(
)
85
end
86 87
local
getDict
=
object
.
getDict
88
local
getArray
=
object
.
getArray
89
local
getReal
=
object
.
getReal
90
local
getInt
=
object
.
getInt
91
local
getString
=
object
.
getString
92
local
getBool
=
object
.
getBool
93
local
getName
=
object
.
getName
94
local
getRef
=
object
.
getRef
95
local
getRefNum
=
object
.
getRefNum
96 97
local
getType
=
object
.
getType
98 99
if
not
(
getDict
and
getArray
and
getReal
and
getInt
and
getString
and
getBool
and
getName
and
getRef
and
getRefNum
and
getType
)
then
100
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
3
)
101
os
.
exit
(
)
102
end
103 104
local
streamReset
=
object
.
streamReset
105
local
streamGetDict
=
object
.
streamGetDict
106
local
streamGetChar
=
object
.
streamGetChar
107
local
streamGetAll
=
object
.
streamGetAll
108 109
if
not
(
streamReset
and
streamGetDict
and
streamGetChar
)
then
110
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
3
)
111
os
.
exit
(
)
112
end
113 114
local
dictGetLength
=
dictionary
.
getLength
115
local
dictGetVal
=
dictionary
.
getVal
116
local
dictGetValNF
=
dictionary
.
getValNF
117
local
dictGetKey
=
dictionary
.
getKey
118 119
if
not
(
dictGetLength
and
dictGetVal
and
dictGetValNF
and
dictGetKey
)
then
120
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
4
)
121
os
.
exit
(
)
122
end
123 124
local
arrayGetLength
=
array
.
getLength
125
local
arrayGetNF
=
array
.
getNF
126
local
arrayGet
=
array
.
get
127 128
if
not
(
arrayGetLength
and
arrayGetNF
and
arrayGet
)
then
129
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
5
)
130
os
.
exit
(
)
131
end
132 133
-- these are kind of weird as they can't be accessed by (root) object
134 135
local
getNumPages
=
catalog
.
getNumPages
136
local
getPageRef
=
catalog
.
getPageRef
137 138
local
fetch
=
xref
.
fetch
139
local
getCatalog
=
xref
.
getCatalog
140
local
getDocInfo
=
xref
.
getDocInfo
141 142
if
not
(
getNumPages
and
getPageRef
and
fetch
and
getCatalog
and
getDocInfo
)
then
143
logs
.
report
(
"
fatal error
"
,
"
invalid pdf inclusion library (%s)
"
,
6
)
144
os
.
exit
(
)
145
end
146 147
-- we're done with library shortcuts
148 149
local
typenames
=
{
[
0
]
=
150
"
boolean
"
,
151
"
integer
"
,
152
"
real
"
,
153
"
string
"
,
154
"
name
"
,
155
"
null
"
,
156
"
array
"
,
157
"
dictionary
"
,
158
"
stream
"
,
159
"
ref
"
,
160
"
cmd
"
,
161
"
error
"
,
162
"
eof
"
,
163
"
none
"
,
164
"
integer64
"
,
165
}
166 167
local
typenumbers
=
table
.
swapped
(
typenames
)
168 169
local
null_object_code
=
typenumbers
.
null
170
local
ref_object_code
=
typenumbers
.
ref
171 172
local
report_epdf
=
logs
.
reporter
(
"
epdf
"
)
173 174
local
function
fatal_error
(
...
)
175
report_epdf
(
...
)
176
report_epdf
(
"
aborting job in order to avoid crash
"
)
177
os
.
exit
(
)
178
end
179 180
-- epdf is the built-in library
181 182
function
epdf
.
type
(
o
)
183
local
t
=
lower
(
match
(
tostring
(
o
)
,
"
[^ :]+
"
)
)
184
return
t
or
"
?
"
185
end
186 187
local
checked_access
188 189
-- dictionaries (can be optimized: ... resolve and redefine when all locals set)
190 191
local
frompdfdoc
=
lpdf
.
frompdfdoc
192 193
local
get_flagged
194 195
if
lpdf
.
dictionary
then
196 197
local
pdfdictionary
=
lpdf
.
dictionary
198
local
pdfarray
=
lpdf
.
array
199
local
pdfconstant
=
lpdf
.
constant
200
local
pdfstring
=
lpdf
.
string
201
local
pdfunicode
=
lpdf
.
unicode
202 203
get_flagged
=
function
(
t
,
f
,
k
)
204
local
tk
=
t
[
k
]
-- triggers resolve
205
local
fk
=
f
[
k
]
206
if
not
fk
then
207
return
tk
208
elseif
fk
=
=
"
name
"
then
209
return
pdfconstant
(
tk
)
210
elseif
fk
=
=
"
array
"
then
211
return
pdfarray
(
tk
)
212
elseif
fk
=
=
"
dictionary
"
then
213
return
pdfarray
(
tk
)
214
elseif
fk
=
=
"
rawtext
"
then
215
return
pdfstring
(
tk
)
216
elseif
fk
=
=
"
unicode
"
then
217
return
pdfunicode
(
tk
)
218
else
219
return
tk
220
end
221
end
222 223
else
224 225
get_flagged
=
function
(
t
,
f
,
k
)
226
local
tk
=
t
[
k
]
-- triggers resolve
227
local
fk
=
f
[
k
]
228
if
not
fk
then
229
return
tk
230
elseif
fk
=
=
"
rawtext
"
then
231
return
frompdfdoc
(
tk
)
232
else
233
return
tk
234
end
235
end
236 237
end
238 239
local
function
prepare
(
document
,
d
,
t
,
n
,
k
,
mt
,
flags
)
240
for
i
=
1
,
n
do
241
local
v
=
dictGetVal
(
d
,
i
)
242
if
v
then
243
local
r
=
dictGetValNF
(
d
,
i
)
244
local
kind
=
getType
(
v
)
245
if
kind
=
=
null_object_code
then
246
-- ignore
247
elseif
kind
then
248
local
key
=
dictGetKey
(
d
,
i
)
249
if
r
and
getType
(
r
)
=
=
ref_object_code
then
250
local
objnum
=
getRefNum
(
r
)
251
local
cached
=
document
.
__cache__
[
objnum
]
252
if
not
cached
then
253
cached
=
checked_access
[
kind
]
(
v
,
document
,
objnum
,
mt
)
254
if
cached
then
255
document
.
__cache__
[
objnum
]
=
cached
256
document
.
__xrefs__
[
cached
]
=
objnum
257
end
258
end
259
t
[
key
]
=
cached
260
else
261
local
v
,
flag
=
checked_access
[
kind
]
(
v
,
document
)
262
t
[
key
]
=
v
263
if
flag
and
flags
then
264
flags
[
key
]
=
flag
-- flags
265
end
266
end
267
else
268
report_epdf
(
"
warning: nil value for key %a in dictionary
"
,
key
)
269
end
270
else
271
fatal_error
(
"
error: invalid value at index %a in dictionary of %a
"
,
i
,
document
.
filename
)
272
end
273
end
274
if
mt
then
275
setmetatable
(
t
,
mt
)
276
else
277
getmetatable
(
t
)
.
__index
=
nil
278
end
279
return
t
[
k
]
280
end
281 282
-- local function prepare(document,d,t,n,k,mt,flags)
283
-- for i=1,n do
284
-- local v = dictGetValNF(d,i)
285
-- if v then
286
-- local key = dictGetKey(d,i)
287
-- local kind = getType(v)
288
-- if kind == ref_object_code then
289
-- local objnum = getRefNum(v)
290
-- local cached = document.__cache__[objnum]
291
-- if not cached then
292
-- local v = dictGetVal(d,i)
293
-- local kind = getType(v)
294
-- cached = checked_access[kind](v,document,objnum,mt)
295
-- if cached then
296
-- document.__cache__[objnum] = cached
297
-- document.__xrefs__[cached] = objnum
298
-- end
299
-- end
300
-- t[key] = cached
301
-- else
302
-- local v, flag = checked_access[kind](v,document)
303
-- t[key] = v
304
-- if flag and flags then
305
-- flags[key] = flag -- flags
306
-- end
307
-- end
308
-- end
309
-- end
310
-- if mt then
311
-- setmetatable(t,mt)
312
-- else
313
-- getmetatable(t).__index = nil
314
-- end
315
-- return t[k]
316
-- end
317 318
local
function
some_dictionary
(
d
,
document
)
319
local
n
=
d
and
dictGetLength
(
d
)
or
0
320
if
n
>
0
then
321
local
t
=
{
}
322
local
f
=
{
}
323
setmetatable
(
t
,
{
324
__index
=
function
(
t
,
k
)
325
return
prepare
(
document
,
d
,
t
,
n
,
k
,
_
,
_
,
f
)
326
end
,
327
__call
=
function
(
t
,
k
)
328
return
get_flagged
(
t
,
f
,
k
)
329
end
,
330
-- __kind = function(k)
331
-- return f[k] or type(t[k])
332
-- end,
333
}
)
334
return
t
,
"
dictionary
"
335
end
336
end
337 338
local
function
get_dictionary
(
object
,
document
,
r
,
mt
)
339
local
d
=
getDict
(
object
)
340
local
n
=
d
and
dictGetLength
(
d
)
or
0
341
if
n
>
0
then
342
local
t
=
{
}
343
local
f
=
{
}
344
setmetatable
(
t
,
{
345
__index
=
function
(
t
,
k
)
346
return
prepare
(
document
,
d
,
t
,
n
,
k
,
mt
,
f
)
347
end
,
348
__call
=
function
(
t
,
k
)
349
return
get_flagged
(
t
,
f
,
k
)
350
end
,
351
-- __kind = function(k)
352
-- return f[k] or type(t[k])
353
-- end,
354
}
)
355
return
t
,
"
dictionary
"
356
end
357
end
358 359
-- arrays (can be optimized: ... resolve and redefine when all locals set)
360 361
local
function
prepare
(
document
,
a
,
t
,
n
,
k
)
362
for
i
=
1
,
n
do
363
local
v
=
arrayGet
(
a
,
i
)
364
if
v
then
365
local
kind
=
getType
(
v
)
366
if
kind
=
=
null_object_code
then
367
-- ignore
368
elseif
kind
then
369
local
r
=
arrayGetNF
(
a
,
i
)
370
if
r
and
getType
(
r
)
=
=
ref_object_code
then
371
local
objnum
=
getRefNum
(
r
)
372
local
cached
=
document
.
__cache__
[
objnum
]
373
if
not
cached
then
374
cached
=
checked_access
[
kind
]
(
v
,
document
,
objnum
)
375
document
.
__cache__
[
objnum
]
=
cached
376
document
.
__xrefs__
[
cached
]
=
objnum
377
end
378
t
[
i
]
=
cached
379
else
380
t
[
i
]
=
checked_access
[
kind
]
(
v
,
document
)
381
end
382
else
383
report_epdf
(
"
warning: nil value for index %a in array
"
,
i
)
384
end
385
else
386
fatal_error
(
"
error: invalid value at index %a in array of %a
"
,
i
,
document
.
filename
)
387
end
388
end
389
local
m
=
getmetatable
(
t
)
390
if
m
then
391
m
.
__index
=
nil
392
m
.
__len
=
nil
393
end
394
if
k
then
395
return
t
[
k
]
396
end
397
end
398 399
-- local function prepare(document,a,t,n,k)
400
-- for i=1,n do
401
-- local v = arrayGetNF(a,i)
402
-- if v then
403
-- local kind = getType(v)
404
-- if kind == ref_object_code then
405
-- local objnum = getRefNum(v)
406
-- local cached = document.__cache__[objnum]
407
-- if not cached then
408
-- local v = arrayGet(a,i)
409
-- local kind = getType(v)
410
-- cached = checked_access[kind](v,document,objnum)
411
-- document.__cache__[objnum] = cached
412
-- document.__xrefs__[cached] = objnum
413
-- end
414
-- t[i] = cached
415
-- else
416
-- t[i] = checked_access[kind](v,document)
417
-- end
418
-- end
419
-- end
420
-- local m = getmetatable(t)
421
-- if m then
422
-- m.__index = nil
423
-- m.__len = nil
424
-- end
425
-- if k then
426
-- return t[k]
427
-- end
428
-- end
429 430
local
function
some_array
(
a
,
document
)
431
local
n
=
a
and
arrayGetLength
(
a
)
or
0
432
if
n
>
0
then
433
local
t
=
{
n
=
n
}
434
setmetatable
(
t
,
{
435
__index
=
function
(
t
,
k
)
436
return
prepare
(
document
,
a
,
t
,
n
,
k
,
_
,
_
,
f
)
437
end
,
438
__len
=
function
(
t
)
439
prepare
(
document
,
a
,
t
,
n
,
_
,
_
,
f
)
440
return
n
441
end
,
442
__call
=
function
(
t
,
k
)
443
return
get_flagged
(
t
,
f
,
k
)
444
end
,
445
-- __kind = function(k)
446
-- return f[k] or type(t[k])
447
-- end,
448
}
)
449
return
t
,
"
array
"
450
end
451
end
452 453
local
function
get_array
(
object
,
document
)
454
local
a
=
getArray
(
object
)
455
local
n
=
a
and
arrayGetLength
(
a
)
or
0
456
if
n
>
0
then
457
local
t
=
{
n
=
n
}
458
local
f
=
{
}
459
setmetatable
(
t
,
{
460
__index
=
function
(
t
,
k
)
461
return
prepare
(
document
,
a
,
t
,
n
,
k
,
_
,
_
,
f
)
462
end
,
463
__len
=
function
(
t
)
464
prepare
(
document
,
a
,
t
,
n
,
_
,
_
,
f
)
465
return
n
466
end
,
467
__call
=
function
(
t
,
k
)
468
return
get_flagged
(
t
,
f
,
k
)
469
end
,
470
-- __kind = function(k)
471
-- return f[k] or type(t[k])
472
-- end,
473
}
)
474
return
t
,
"
array
"
475
end
476
end
477 478
-- todo: collect chunks
479 480
-- local function streamaccess(s,_,what)
481
-- if not what or what == "all" or what == "*all" then
482
-- local t, n = { }, 0
483
-- streamReset(s)
484
-- while true do
485
-- local c = streamGetChar(s)
486
-- if c < 0 then
487
-- break
488
-- else
489
-- n = n + 1
490
-- t[n] = char(c)
491
-- end
492
-- end
493
-- return concat(t,"",1,n)
494
-- end
495
-- end
496 497
local
function
getstream
(
s
)
498
streamReset
(
s
)
499
if
streamGetAll
then
500
return
streamGetAll
(
s
)
501
else
502
local
t
,
b
,
n
=
{
}
,
{
}
,
0
503
while
true
do
504
local
c
=
streamGetChar
(
s
)
505
if
c
<
0
then
506
break
507
else
508
n
=
n
+
1
509
b
[
n
]
=
c
510
end
511
if
n
=
=
2000
then
512
t
[
#
t
+
1
]
=
char
(
unpack
(
b
,
1
,
n
)
)
513
n
=
1
514
end
515
end
516
t
[
#
t
+
1
]
=
char
(
unpack
(
b
,
1
,
n
)
)
517
return
concat
(
t
)
518
end
519
end
520 521
local
function
streamaccess
(
s
,
_
,
what
)
522
if
not
what
or
what
=
=
"
all
"
or
what
=
=
"
*all
"
then
523
return
getstream
(
s
)
524
end
525
end
526 527
local
function
get_stream
(
d
,
document
)
528
if
d
then
529
streamReset
(
d
)
530
local
s
=
some_dictionary
(
streamGetDict
(
d
)
,
document
)
531
getmetatable
(
s
)
.
__call
=
function
(
...
)
return
streamaccess
(
d
,
...
)
end
532
return
s
533
end
534
end
535 536
-- We need to convert the string from utf16 although there is no way to
537
-- check if we have a regular string starting with a bom. So, we have
538
-- na dilemma here: a pdf doc encoded string can be invalid utf.
539 540
-- <hex encoded> : implicit 0 appended if odd
541
-- (byte encoded) : \( \) \\ escaped
542
--
543
-- <FE><FF> : utf16be
544
--
545
-- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
546
--
547
-- the getString function gives back bytes so we don't need to worry about
548
-- the hex aspect.
549 550
local
u_pattern
=
lpeg
.
patterns
.
utfbom_16_be
*
lpeg
.
patterns
.
utf16_to_utf8_be
551
----- b_pattern = lpeg.patterns.hextobytes
552 553
local
function
get_string
(
v
)
554
-- the toutf function only converts a utf16 string and leaves the original
555
-- untouched otherwise; one might want to apply lpdf.frompdfdoc to a
556
-- non-unicode string
557
local
s
=
getString
(
v
)
558
if
not
s
or
s
=
=
"
"
then
559
return
"
"
560
end
561
local
u
=
lpegmatch
(
u_pattern
,
s
)
562
if
u
then
563
return
u
,
"
unicode
"
564
end
565
-- this is too tricky and fails on e.g. reload of url www.pragma-ade.com)
566
-- local b = lpegmatch(b_pattern,s)
567
-- if b then
568
-- return b, "rawtext"
569
-- end
570
return
s
,
"
rawtext
"
571
end
572 573
local
function
get_name
(
v
)
574
return
getName
(
v
)
,
"
name
"
575
end
576 577
local
function
get_null
(
)
578
return
nil
579
end
580 581
-- we have dual access: by typenumber and by typename
582 583
local
function
invalidaccess
(
k
,
document
)
584
local
fullname
=
type
(
document
)
=
=
"
table
"
and
document
.
fullname
585
if
fullname
then
586
fatal_error
(
"
error, asking for key %a in checker of %a
"
,
k
,
fullname
)
587
else
588
fatal_error
(
"
error, asking for key %a in checker
"
,
k
)
589
end
590
end
591 592
checked_access
=
setmetatableindex
(
function
(
t
,
k
)
593
return
function
(
v
,
document
)
594
invalidaccess
(
k
,
document
)
595
end
596
end
)
597 598
checked_access
[
typenumbers
.
boolean
]
=
getBool
599
checked_access
[
typenumbers
.
integer
]
=
getInt
600
checked_access
[
typenumbers
.
real
]
=
getReal
601
checked_access
[
typenumbers
.
string
]
=
get_string
-- getString
602
checked_access
[
typenumbers
.
name
]
=
get_name
603
checked_access
[
typenumbers
.
null
]
=
get_null
604
checked_access
[
typenumbers
.
array
]
=
get_array
-- d,document,r
605
checked_access
[
typenumbers
.
dictionary
]
=
get_dictionary
-- d,document,r
606
checked_access
[
typenumbers
.
stream
]
=
get_stream
607
checked_access
[
typenumbers
.
ref
]
=
getRef
608 609
for
i
=
0
,
#
typenames
do
610
local
checker
=
checked_access
[
i
]
611
if
not
checker
then
612
checker
=
function
(
)
613
return
function
(
v
,
document
)
614
invalidaccess
(
i
,
document
)
615
end
616
end
617
checked_access
[
i
]
=
checker
618
end
619
checked_access
[
typenames
[
i
]
]
=
checker
620
end
621 622
local
function
getnames
(
document
,
n
,
target
)
-- direct
623
if
n
then
624
local
Names
=
n
.
Names
625
if
Names
then
626
if
not
target
then
627
target
=
{
}
628
end
629
for
i
=
1
,
Names
.
n
,
2
do
630
target
[
Names
[
i
]
]
=
Names
[
i
+
1
]
631
end
632
else
633
local
Kids
=
n
.
Kids
634
if
Kids
then
635
for
i
=
1
,
Kids
.
n
do
636
target
=
getnames
(
document
,
Kids
[
i
]
,
target
)
637
end
638
end
639
end
640
return
target
641
end
642
end
643 644
local
function
getkids
(
document
,
n
,
target
)
-- direct
645
if
n
then
646
local
Kids
=
n
.
Kids
647
if
Kids
then
648
for
i
=
1
,
Kids
.
n
do
649
target
=
getkids
(
document
,
Kids
[
i
]
,
target
)
650
end
651
elseif
target
then
652
target
[
#
target
+
1
]
=
n
653
else
654
target
=
{
n
}
655
end
656
return
target
657
end
658
end
659 660
-- /OCProperties <<
661
-- /OCGs [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
662
-- /D <<
663
-- /Order [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
664
-- /ON [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
665
-- /OFF [ ]
666
-- >>
667
-- >>
668 669
local
function
getlayers
(
document
)
670
local
properties
=
document
.
Catalog
.
OCProperties
671
if
properties
then
672
local
layers
=
properties
.
OCGs
673
if
layers
then
674
local
t
=
{
}
675
local
n
=
layers
.
n
676
for
i
=
1
,
n
do
677
local
layer
=
layers
[
i
]
678
t
[
i
]
=
layer
.
Name
679
end
680
t
.
n
=
n
681
return
t
682
end
683
end
684
end
685 686
local
function
getstructure
(
document
)
687
-- this might become a tree
688
return
document
.
Catalog
.
StructTreeRoot
689
end
690 691
-- This is the only messy helper. We can't access the root as any object (it seems)
692
-- so we need a few low level acessors. It's anyway sort of simple enough to deal
693
-- with but it won't win a beauty contest.
694 695
local
function
getpages
(
document
,
Catalog
)
696
local
__data__
=
document
.
__data__
697
local
__xrefs__
=
document
.
__xrefs__
698
local
__cache__
=
document
.
__cache__
699
local
__xref__
=
document
.
__xref__
700
--
701
local
rawcatalog
=
getRawCatalog
(
__data__
)
702
local
nofpages
=
getNumPages
(
rawcatalog
)
703
--
704
local
majorversion
=
getMajorVersion
(
__data__
)
705
local
minorversion
=
getMinorVersion
(
__data__
)
706
--
707
local
pages
=
{
}
708
local
metatable
=
{
__index
=
Catalog
.
Pages
}
-- somewhat empty
709
--
710
for
pagenumber
=
1
,
nofpages
do
711
local
pagereference
=
getPageRef
(
rawcatalog
,
pagenumber
)
.
num
712
local
pageobject
=
fetch
(
__xref__
,
pagereference
,
0
)
713
local
pagedata
=
get_dictionary
(
pageobject
,
document
,
pagereference
,
metatable
)
714
if
pagedata
then
715
-- rawset(pagedata,"number",pagenumber)
716
pagedata
.
number
=
pagenumber
717
pagedata
.
object
=
pageobject
718
pages
[
pagenumber
]
=
pagedata
719
__xrefs__
[
pagedata
]
=
pagereference
720
__cache__
[
pagereference
]
=
pagedata
721
else
722
report_epdf
(
"
missing pagedata at slot %i
"
,
i
)
723
end
724
end
725
--
726
pages
.
n
=
nofpages
727
--
728
document
.
pages
=
pages
729
document
.
majorversion
=
majorversion
730
document
.
minorversion
=
minorversion
731
--
732
return
pages
733
end
734 735
local
function
resolve
(
document
,
k
)
736
local
entry
=
nil
737
local
Catalog
=
document
.
Catalog
738
local
Names
=
Catalog
.
Names
739
if
k
=
=
"
pages
"
then
740
entry
=
getpages
(
document
,
Catalog
)
741
elseif
k
=
=
"
destinations
"
then
742
entry
=
getnames
(
document
,
Names
and
Names
.
Dests
)
743
elseif
k
=
=
"
javascripts
"
then
744
entry
=
getnames
(
document
,
Names
and
Names
.
JS
)
745
elseif
k
=
=
"
widgets
"
then
746
entry
=
getnames
(
document
,
Names
and
Names
.
AcroForm
)
747
elseif
k
=
=
"
embeddedfiles
"
then
748
entry
=
getnames
(
document
,
Names
and
Names
.
EmbeddedFiles
)
749
elseif
k
=
=
"
layers
"
then
750
entry
=
getlayers
(
document
)
751
elseif
k
=
=
"
structure
"
then
752
entry
=
getstructure
(
document
)
753
end
754
document
[
k
]
=
entry
755
return
entry
756
end
757 758
local
loaded
=
{
}
759 760
function
lpdf_epdf
.
load
(
filename
)
761
local
document
=
loaded
[
filename
]
762
if
not
document
then
763
statistics
.
starttiming
(
lpdf_epdf
)
764
local
__data__
=
openPDF
(
filename
)
-- maybe resolvers.find_file
765
if
__data__
then
766
local
__xref__
=
getXRef
(
__data__
)
767
document
=
{
768
filename
=
filename
,
769
__cache__
=
{
}
,
770
__xrefs__
=
{
}
,
771
__fonts__
=
{
}
,
772
__data__
=
__data__
,
773
__xref__
=
__xref__
774
}
775
document
.
Catalog
=
some_dictionary
(
getDict
(
getCatalog
(
__xref__
)
)
,
document
)
776
document
.
Info
=
some_dictionary
(
getDict
(
getDocInfo
(
__xref__
)
)
,
document
)
777
setmetatableindex
(
document
,
resolve
)
778
--
779
document
.
nofpages
=
getNumPages
(
getRawCatalog
(
__data__
)
)
780
else
781
document
=
false
782
end
783
loaded
[
filename
]
=
document
784
loaded
[
document
]
=
document
785
statistics
.
stoptiming
(
lpdf_epdf
)
786
-- print(statistics.elapsedtime(lpdf_epdf))
787
end
788
return
document
or
nil
789
end
790 791
function
lpdf_epdf
.
unload
(
filename
)
792
local
document
=
loaded
[
filename
]
793
if
document
then
794
loaded
[
document
]
=
nil
795
loaded
[
filename
]
=
nil
796
end
797
end
798 799
-- for k, v in next, expand(t) do
800 801
local
function
expand
(
t
)
802
if
type
(
t
)
=
=
"
table
"
then
803
local
dummy
=
t
.
dummy
804
end
805
return
t
806
end
807 808
-- for k, v in expanded(t) do
809 810
local
function
expanded
(
t
)
811
if
type
(
t
)
=
=
"
table
"
then
812
local
dummy
=
t
.
dummy
813
end
814
return
next
,
t
815
end
816 817
lpdf_epdf
.
expand
=
expand
818
lpdf_epdf
.
expanded
=
expanded
819 820
-- we could resolve the text stream in one pass if we directly handle the
821
-- font but why should we complicate things
822 823
local
hexdigit
=
R
(
"
09
"
,
"
AF
"
)
824
local
numchar
=
(
P
(
"
\\
"
)
*
(
(
R
(
"
09
"
)
^
3
/
tonumber
)
+
C
(
1
)
)
)
+
C
(
1
)
825
local
number
=
lpegpatterns
.
number
/
tonumber
826
local
spaces
=
lpegpatterns
.
whitespace
^
1
827
local
optspaces
=
lpegpatterns
.
whitespace
^
0
828
local
keyword
=
P
(
"
/
"
)
*
C
(
R
(
"
AZ
"
,
"
az
"
,
"
09
"
)
^
1
)
829
local
operator
=
C
(
(
R
(
"
AZ
"
,
"
az
"
)
+
P
(
"
'
"
)
+
P
(
'
"
'
)
)
^
1
)
830 831
local
grammar
=
P
{
"
start
"
,
832
start
=
(
keyword
+
number
+
V
(
"
dictionary
"
)
+
V
(
"
unicode
"
)
+
V
(
"
string
"
)
+
V
(
"
unicode
"
)
+
V
(
"
array
"
)
+
spaces
)
^
1
,
833
-- keyvalue = (keyword * spaces * V("start") + spaces)^1,
834
keyvalue
=
optspaces
*
Cf
(
Ct
(
"
"
)
*
Cg
(
keyword
*
optspaces
*
V
(
"
start
"
)
*
optspaces
)
^
1
,
rawset
)
,
835
array
=
P
(
"
[
"
)
*
Ct
(
V
(
"
start
"
)
^
1
)
*
P
(
"
]
"
)
,
836
dictionary
=
P
(
"
<<
"
)
*
V
(
"
keyvalue
"
)
*
P
(
"
>>
"
)
,
837
unicode
=
P
(
"
<
"
)
*
Ct
(
Cc
(
"
hex
"
)
*
C
(
(
1
-
P
(
"
>
"
)
)
^
1
)
)
*
P
(
"
>
"
)
,
838
string
=
P
(
"
(
"
)
*
Ct
(
Cc
(
"
dec
"
)
*
C
(
(
V
(
"
string
"
)
+
numchar
)
^
1
)
)
*
P
(
"
)
"
)
,
-- untested
839
}
840 841
local
operation
=
Ct
(
grammar
^
1
*
operator
)
842
local
parser
=
Ct
(
(
operation
+
P
(
1
)
)
^
1
)
843 844
-- beginbfrange : <start> <stop> <firstcode>
845
-- <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
846
-- beginbfchar : <code> <newcodes>
847 848
local
fromsixteen
=
lpdf
.
fromsixteen
-- maybe inline the lpeg ... but not worth it
849 850
local
function
f_bfchar
(
t
,
a
,
b
)
851
t
[
tonumber
(
a
,
16
)
]
=
fromsixteen
(
b
)
852
end
853 854
local
function
f_bfrange_1
(
t
,
a
,
b
,
c
)
855
print
(
"
todo 1
"
,
a
,
b
,
c
)
856
-- c is string
857
-- todo t[tonumber(a,16)] = fromsixteen(b)
858
end
859 860
local
function
f_bfrange_2
(
t
,
a
,
b
,
c
)
861
print
(
"
todo 2
"
,
a
,
b
,
c
)
862
-- c is table
863
-- todo t[tonumber(a,16)] = fromsixteen(b)
864
end
865 866
local
optionals
=
spaces
^
0
867
local
hexstring
=
optionals
*
P
(
"
<
"
)
*
C
(
(
1
-
P
(
"
>
"
)
)
^
1
)
*
P
(
"
>
"
)
868
local
bfchar
=
Carg
(
1
)
*
hexstring
*
hexstring
/
f_bfchar
869
local
bfrange
=
Carg
(
1
)
*
hexstring
*
hexstring
*
hexstring
/
f_bfrange_1
870
+
Carg
(
1
)
*
hexstring
*
hexstring
*
optionals
*
P
(
"
[
"
)
*
Ct
(
hexstring
^
1
)
*
optionals
*
P
(
"
]
"
)
/
f_bfrange_2
871
local
fromunicode
=
(
872
P
(
"
beginbfchar
"
)
*
bfchar
^
1
*
optionals
*
P
(
"
endbfchar
"
)
+
873
P
(
"
beginbfrange
"
)
*
bfrange
^
1
*
optionals
*
P
(
"
endbfrange
"
)
+
874
spaces
+
875
P
(
1
)
876
)
^
1
*
Carg
(
1
)
877 878
local
function
analyzefonts
(
document
,
resources
)
-- unfinished, see mtx-pdf for better code
879
local
fonts
=
document
.
__fonts__
880
if
resources
then
881
local
fontlist
=
resources
.
Font
882
if
fontlist
then
883
for
id
,
data
in
expanded
(
fontlist
)
do
884
if
not
fonts
[
id
]
then
885
-- a quick hack ... I will look into it more detail if I find a real
886
-- -application for it
887
local
tounicode
=
data
.
ToUnicode
(
)
888
if
tounicode
then
889
tounicode
=
lpegmatch
(
fromunicode
,
tounicode
,
1
,
{
}
)
890
end
891
fonts
[
id
]
=
{
892
tounicode
=
type
(
tounicode
)
=
=
"
table
"
and
tounicode
or
{
}
893
}
894
setmetatableindex
(
fonts
[
id
]
,
"
self
"
)
895
end
896
end
897
end
898
end
899
return
fonts
900
end
901 902
local
more
=
0
903
local
unic
=
nil
-- cheaper than passing each time as Carg(1)
904 905
local
p_hex_to_utf
=
C
(
4
)
/
function
(
s
)
-- needs checking !
906
local
now
=
tonumber
(
s
,
16
)
907
if
more
>
0
then
908
now
=
(
more
-0xD800
)
*
0x400
+
(
now
-0xDC00
)
+
0x10000
-- the 0x10000 smells wrong
909
more
=
0
910
return
unic
[
now
]
or
utfchar
(
now
)
911
elseif
now
>
=
0xD800
and
now
<
=
0xDBFF
then
912
more
=
now
913
-- return ""
914
else
915
return
unic
[
now
]
or
utfchar
(
now
)
916
end
917
end
918 919
local
p_dec_to_utf
=
C
(
1
)
/
function
(
s
)
-- needs checking !
920
local
now
=
byte
(
s
)
921
return
unic
[
now
]
or
utfchar
(
now
)
922
end
923 924
local
p_hex_to_utf
=
P
(
true
)
/
function
(
)
more
=
0
end
*
Cs
(
p_hex_to_utf
^
1
)
925
local
p_dec_to_utf
=
P
(
true
)
/
function
(
)
more
=
0
end
*
Cs
(
p_dec_to_utf
^
1
)
926 927
function
lpdf_epdf
.
getpagecontent
(
document
,
pagenumber
)
928 929
local
page
=
document
.
pages
[
pagenumber
]
930 931
if
not
page
then
932
return
933
end
934 935
local
fonts
=
analyzefonts
(
document
,
page
.
Resources
)
936 937
local
content
=
page
.
Contents
(
)
or
"
"
938
local
list
=
lpegmatch
(
parser
,
content
)
939
local
font
=
nil
940
-- local unic = nil
941 942
for
i
=
1
,
#
list
do
943
local
entry
=
list
[
i
]
944
local
size
=
#
entry
945
local
operator
=
entry
[
size
]
946
if
operator
=
=
"
Tf
"
then
947
font
=
fonts
[
entry
[
1
]
]
948
unic
=
font
.
tounicode
949
elseif
operator
=
=
"
TJ
"
then
-- { array, TJ }
950
local
list
=
entry
[
1
]
951
for
i
=
1
,
#
list
do
952
local
li
=
list
[
i
]
953
if
type
(
li
)
=
=
"
table
"
then
954
if
li
[
1
]
=
=
"
hex
"
then
955
list
[
i
]
=
lpegmatch
(
p_hex_to_utf
,
li
[
2
]
)
956
else
957
list
[
i
]
=
lpegmatch
(
p_dec_to_utf
,
li
[
2
]
)
958
end
959
else
960
-- kern
961
end
962
end
963
elseif
operator
=
=
"
Tj
"
or
operator
=
=
"
'
"
or
operator
=
=
'
"
'
then
-- { string, Tj } { string, ' } { n, m, string, " }
964
local
list
=
entry
[
size
-1
]
965
if
list
[
1
]
=
=
"
hex
"
then
966
list
[
2
]
=
lpegmatch
(
p_hex_to_utf
,
li
[
2
]
)
967
else
968
list
[
2
]
=
lpegmatch
(
p_dec_to_utf
,
li
[
2
]
)
969
end
970
end
971
end
972 973
unic
=
nil
-- can be collected
974 975
return
list
976 977
end
978 979
-- This is also an experiment. When I really need it I can improve it, for instance
980
-- with proper position calculating. It might be usefull for some search or so.
981 982
local
softhyphen
=
utfchar
(
0xAD
)
.
.
"
$
"
983
local
linefactor
=
1
.
3
984 985
function
lpdf_epdf
.
contenttotext
(
document
,
list
)
-- maybe signal fonts
986
local
last_y
=
0
987
local
last_f
=
0
988
local
text
=
{
}
989
local
last
=
0
990 991
for
i
=
1
,
#
list
do
992
local
entry
=
list
[
i
]
993
local
size
=
#
entry
994
local
operator
=
entry
[
size
]
995
if
operator
=
=
"
Tf
"
then
996
last_f
=
entry
[
2
]
997
elseif
operator
=
=
"
TJ
"
then
998
local
list
=
entry
[
1
]
999
for
i
=
1
,
#
list
do
1000
local
li
=
list
[
i
]
1001
if
type
(
li
)
=
=
"
string
"
then
1002
last
=
last
+
1
1003
text
[
last
]
=
li
1004
elseif
li
<
-50
then
1005
last
=
last
+
1
1006
text
[
last
]
=
"
"
1007
end
1008
end
1009
line
=
concat
(
list
)
1010
elseif
operator
=
=
"
Tj
"
then
1011
last
=
last
+
1
1012
text
[
last
]
=
entry
[
size
-1
]
1013
elseif
operator
=
=
"
cm
"
or
operator
=
=
"
Tm
"
then
1014
local
ty
=
entry
[
6
]
1015
local
dy
=
abs
(
last_y
-
ty
)
1016
if
dy
>
linefactor
*
last_f
then
1017
if
last
>
0
then
1018
if
find
(
text
[
last
]
,
softhyphen
,
1
,
true
)
then
1019
-- ignore
1020
else
1021
last
=
last
+
1
1022
text
[
last
]
=
"
\n
"
1023
end
1024
end
1025
end
1026
last_y
=
ty
1027
end
1028
end
1029 1030
return
concat
(
text
)
1031
end
1032 1033
function
lpdf_epdf
.
getstructure
(
document
,
list
)
-- just a test
1034
local
depth
=
0
1035
for
i
=
1
,
#
list
do
1036
local
entry
=
list
[
i
]
1037
local
size
=
#
entry
1038
local
operator
=
entry
[
size
]
1039
if
operator
=
=
"
BDC
"
then
1040
report_epdf
(
"
%w%s : %s
"
,
depth
,
entry
[
1
]
or
"
?
"
,
entry
[
2
]
.
MCID
or
"
?
"
)
1041
depth
=
depth
+
1
1042
elseif
operator
=
=
"
EMC
"
then
1043
depth
=
depth
-
1
1044
elseif
operator
=
=
"
TJ
"
then
1045
local
list
=
entry
[
1
]
1046
for
i
=
1
,
#
list
do
1047
local
li
=
list
[
i
]
1048
if
type
(
li
)
=
=
"
string
"
then
1049
report_epdf
(
"
%w > %s
"
,
depth
,
li
)
1050
elseif
li
<
-50
then
1051
report_epdf
(
"
%w >
"
,
depth
,
li
)
1052
end
1053
end
1054
elseif
operator
=
=
"
Tj
"
then
1055
report_epdf
(
"
%w > %s
"
,
depth
,
entry
[
size
-1
]
)
1056
end
1057
end
1058
end
1059 1060
-- document.Catalog.StructTreeRoot.ParentTree.Nums[2][1].A.P[1])
1061 1062
-- helpers
1063 1064
-- function lpdf_epdf.getdestinationpage(document,name)
1065
-- local destination = document.__data__:findDest(name)
1066
-- return destination and destination.number
1067
-- end
1068 1069
-- This is experimental code that we need for testing the transition from
1070
-- poppler to a new lightweight library. Don't rely on this code to remain
1071
-- as it is now. Interesting is that performance of this variant is the same
1072
-- as the natural page includer.
1073 1074
if
img
then
do
1075 1076
local
copydictionary
=
nil
1077
local
copyarray
=
nil
1078 1079
local
ref_object_code
=
typenumbers
.
ref
1080
local
boolean_object_code
=
typenumbers
.
boolean
1081
local
integer_object_code
=
typenumbers
.
integer
1082
local
real_object_code
=
typenumbers
.
real
1083
local
string_object_code
=
typenumbers
.
string
1084
local
name_object_code
=
typenumbers
.
name
1085
local
null_object_code
=
typenumbers
.
null
1086
local
array_object_code
=
typenumbers
.
array
1087
local
dictionary_object_code
=
typenumbers
.
dictionary
1088
local
stream_object_code
=
typenumbers
.
stream
1089
local
cmd_object_code
=
typenumbers
.
cmd
1090 1091
local
pdfreserveobject
=
lpdf
.
reserveobject
1092
local
pdfflushobject
=
lpdf
.
flushobject
1093
local
pdfflushstreamobject
=
lpdf
.
flushstreamobject
1094
local
pdfreference
=
lpdf
.
reference
1095
local
pdfconstant
=
lpdf
.
constant
1096
local
pdfarray
=
lpdf
.
array
1097
local
pdfdictionary
=
lpdf
.
dictionary
1098
local
pdfunicode
=
lpdf
.
unicode
1099
local
pdfstring
=
lpdf
.
string
1100
local
pdfnull
=
lpdf
.
null
1101 1102
local
report
=
logs
.
reporter
(
"
backend
"
,
"
xobjects
"
)
1103 1104
local
factor
=
65536
/
(
7200
/
7227
)
-- 1/number.dimenfactors.bp
1105 1106
local
createimage
=
images
.
create
1107 1108
local
function
scaledbbox
(
b
)
1109
return
{
b
[
1
]
*
factor
,
b
[
2
]
*
factor
,
b
[
3
]
*
factor
,
b
[
4
]
*
factor
}
1110
end
1111 1112
local
function
copyobject
(
xref
,
copied
,
kind
,
r
,
v
)
1113
if
kind
=
=
null_object_code
then
1114
return
pdfnull
(
)
1115
elseif
r
and
getType
(
r
)
=
=
ref_object_code
then
1116
local
objnum
=
getRefNum
(
r
)
1117
local
r
=
copied
[
objnum
]
1118
if
r
then
1119
-- report("%s object %i is reused",kind,objnum)
1120
else
1121
local
o
1122
r
=
pdfreserveobject
(
)
1123
copied
[
objnum
]
=
r
1124
if
kind
=
=
array_object_code
then
1125
local
a
=
copyarray
(
xref
,
copied
,
fetch
(
xref
,
objnum
,
0
)
)
1126
pdfflushobject
(
r
,
tostring
(
a
)
)
1127
elseif
kind
=
=
dictionary_object_code
then
1128
local
d
=
copydictionary
(
xref
,
copied
,
fetch
(
xref
,
objnum
,
0
)
)
1129
pdfflushobject
(
r
,
tostring
(
d
)
)
1130
elseif
kind
=
=
stream_object_code
then
1131
local
f
=
fetch
(
xref
,
objnum
,
0
)
1132
local
d
=
copydictionary
(
xref
,
copied
,
false
,
streamGetDict
(
f
)
)
1133
local
s
=
getstream
(
f
)
1134
--
1135
d
.
Filter
=
nil
1136
d
.
Length
=
nil
1137
d
.
DecodeParms
=
nil
1138
d
.
DL
=
nil
1139
--
1140
pdfflushstreamobject
(
s
,
d
,
true
,
r
)
1141
else
1142
report
(
"
reference not done: %s
"
,
kind
)
1143
end
1144
end
1145
return
pdfreference
(
r
)
1146
elseif
kind
=
=
array_object_code
then
1147
return
copyarray
(
xref
,
copied
,
v
)
1148
elseif
kind
=
=
dictionary_object_code
then
1149
return
copydictionary
(
xref
,
copied
,
v
)
1150
elseif
kind
=
=
integer_object_code
then
1151
return
getInt
(
v
)
1152
elseif
kind
=
=
real_object_code
then
1153
return
getReal
(
v
)
1154
elseif
kind
=
=
name_object_code
then
1155
return
pdfconstant
(
getName
(
v
)
)
1156
elseif
kind
=
=
string_object_code
then
1157
local
s
=
getString
(
v
)
1158
if
not
s
or
s
=
=
"
"
then
1159
return
"
"
1160
end
1161
local
u
=
lpegmatch
(
u_pattern
,
s
)
1162
if
u
then
1163
return
pdfunicode
(
s
)
1164
end
1165
return
pdfstring
(
s
)
1166
elseif
kind
=
=
boolean_object_code
then
1167
return
getBool
(
v
)
1168
elseif
kind
=
=
stream_object_code
then
1169
-- hm ...
1170
return
getStream
(
v
)
1171
else
1172
report
(
"
object not done: %s
"
,
kind
)
1173
end
1174
end
1175 1176
copyarray
=
function
(
xref
,
copied
,
object
)
1177
local
a
=
getArray
(
object
)
1178
local
n
=
a
and
arrayGetLength
(
a
)
or
0
1179
if
n
>
0
then
1180
local
target
=
pdfarray
(
)
1181
for
i
=
1
,
n
do
1182
local
v
=
arrayGet
(
a
,
i
)
1183
if
v
then
1184
local
kind
=
getType
(
v
)
1185
local
r
=
arrayGetNF
(
a
,
i
)
1186
target
[
i
]
=
copyobject
(
xref
,
copied
,
kind
,
r
,
v
)
1187
end
1188
end
1189
return
target
1190
end
1191
end
1192 1193
copydictionary
=
function
(
xref
,
copied
,
object
,
d
)
1194
local
d
=
d
or
getDict
(
object
)
1195
local
n
=
d
and
dictGetLength
(
d
)
or
0
1196
if
n
>
0
then
1197
local
target
=
pdfdictionary
(
)
1198
for
i
=
1
,
n
do
1199
local
v
=
dictGetVal
(
d
,
i
)
1200
if
v
then
1201
local
kind
=
getType
(
v
)
1202
local
key
=
dictGetKey
(
d
,
i
)
1203
local
r
=
dictGetValNF
(
d
,
i
)
1204
target
[
key
]
=
copyobject
(
xref
,
copied
,
kind
,
r
,
v
)
1205
end
1206
end
1207
return
target
1208
end
1209
end
1210 1211
local
function
copy_resources
(
pdfdoc
,
xref
,
copied
,
pagedata
)
1212
local
object
=
pagedata
.
object
1213
if
object
then
1214
local
d
=
getDict
(
object
)
1215
local
n
=
d
and
dictGetLength
(
d
)
or
0
1216
for
i
=
1
,
n
do
1217
local
k
=
dictGetKey
(
d
,
i
)
1218
if
v
and
k
=
=
"
Resources
"
then
1219
local
v
=
dictGetVal
(
d
,
i
)
1220
local
kind
=
getType
(
v
)
1221
local
r
=
dictGetValNF
(
d
,
i
)
1222
return
copyobject
(
xref
,
copied
,
kind
,
r
,
v
)
1223
end
1224
end
1225
end
1226
end
1227 1228
local
function
openpdf
(
filename
)
1229
local
pdfdoc
=
lpdf_epdf
.
load
(
filename
)
1230
if
pdfdoc
then
1231
pdfdoc
.
__copied__
=
pdfdoc
.
__copied__
or
{
}
1232
pdfdoc
.
filename
=
filename
1233
return
pdfdoc
1234
end
1235
end
1236 1237
local
function
closepdf
(
pdfdoc
)
1238
if
pdfdoc
then
1239
lpdf_epdf
.
unload
(
pdfdoc
.
filename
)
1240
end
1241
end
1242 1243
local
function
querypdf
(
pdfdoc
,
pagenumber
)
1244
if
pdfdoc
then
1245
if
not
pagenumber
then
1246
pagenumber
=
1
1247
end
1248
local
root
=
pdfdoc
.
Catalog
1249
local
page
=
pdfdoc
.
pages
[
pagenumber
]
1250
if
page
then
1251
local
mediabox
=
page
.
MediaBox
or
{
0
,
0
,
0
,
0
}
1252
local
cropbox
=
page
.
CropBox
or
mediabox
1253
return
{
1254
filename
=
pdfdoc
.
filename
,
1255
pagenumber
=
pagenumber
,
1256
nofpages
=
pdfdoc
.
nofpages
,
1257
boundingbox
=
scaledbbox
(
cropbox
)
,
1258
cropbox
=
cropbox
,
1259
mediabox
=
mediabox
,
1260
bleedbox
=
page
.
BleedBox
or
cropbox
,
1261
trimbox
=
page
.
TrimBox
or
cropbox
,
1262
artbox
=
page
.
ArtBox
or
cropbox
,
1263
}
1264
end
1265
end
1266
end
1267 1268
local
function
copypage
(
pdfdoc
,
pagenumber
,
attributes
)
1269
if
pdfdoc
then
1270
local
root
=
pdfdoc
.
Catalog
1271
local
page
=
pdfdoc
.
pages
[
pagenumber
or
1
]
1272
local
pageinfo
=
querypdf
(
pdfdoc
,
pagenumber
)
1273
local
contents
=
page
.
Contents
1274
local
xref
=
pdfdoc
.
__xref__
1275
local
copied
=
pdfdoc
.
__copied__
1276
--
1277
local
xobject
=
pdfdictionary
{
1278
Type
=
pdfconstant
(
"
XObject
"
)
,
1279
Subtype
=
pdfconstant
(
"
Form
"
)
,
1280
-- image attributes
1281
FormType
=
1
,
1282
BBox
=
pageinfo
.
cropbox
,
1283
-- Metadata = copy(xref,copied,root,"Metadata"),
1284
-- Group = copy(xref,copied,page,"Group"),
1285
-- LastModified = copy(xref,copied,page,"LastModified"),
1286
-- Metadata = copy(xref,copied,page,"Metadata"),
1287
-- PieceInfo = copy(xref,copied,page,"PieceInfo"),
1288
Resources
=
copy_resources
(
pdfdoc
,
xref
,
copied
,
page
)
,
1289
-- SeparationInfo = copy(xref,copied,page,"SeparationInfo"),
1290
}
1291
if
attributes
then
1292
for
k
,
v
in
next
,
expand
(
attributes
)
do
1293
page
[
k
]
=
v
-- maybe nested
1294
end
1295
end
1296
return
createimage
{
1297
bbox
=
pageinfo
.
boundingbox
,
1298
stream
=
contents
(
)
,
1299
attr
=
xobject
(
)
,
1300
}
1301
end
1302
end
1303 1304
-- todo: codeinjections
1305 1306
lpdf_epdf
.
image
=
{
1307
open
=
openpdf
,
1308
close
=
closepdf
,
1309
query
=
querypdf
,
1310
copy
=
copypage
,
1311
}
1312 1313
end
end
1314