mtx-pdf.lua /size: 13 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
mtx-pdf
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to mtxrun.lua
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
tonumber
=
tonumber
10
local
format
,
gmatch
,
gsub
,
match
,
find
=
string
.
format
,
string
.
gmatch
,
string
.
gsub
,
string
.
match
,
string
.
find
11
local
utfchar
=
utf
.
char
12
local
concat
=
table
.
concat
13
local
setmetatableindex
,
sortedhash
,
sortedkeys
=
table
.
setmetatableindex
,
table
.
sortedhash
,
table
.
sortedkeys
14 15
local
helpinfo
=
[[
16<?xml version="1.0"?> 17<application> 18 <metadata> 19 <entry name="name">mtx-pdf</entry> 20 <entry name="detail">ConTeXt PDF Helpers</entry> 21 <entry name="version">0.10</entry> 22 </metadata> 23 <flags> 24 <category name="basic"> 25 <subcategory> 26 <flag name="info"><short>show some info about the given file</short></flag> 27 <flag name="metadata"><short>show metadata xml blob</short></flag> 28 <flag name="pretty"><short>replace newlines in metadata</short></flag> 29 <flag name="fonts"><short>show used fonts (<ref name="detail)"/></short></flag> 30 <flag name="object"><short>show object"/></short></flag> 31 </subcategory> 32 <subcategory> 33 <example><command>mtxrun --script pdf --info foo.pdf</command></example> 34 <example><command>mtxrun --script pdf --metadata foo.pdf</command></example> 35 <example><command>mtxrun --script pdf --metadata --pretty foo.pdf</command></example> 36 <example><command>mtxrun --script pdf --stream=4 foo.pdf</command></example> 37 </subcategory> 38 </category> 39 </flags> 40</application> 41
]]
42 43
local
application
=
logs
.
application
{
44
name
=
"
mtx-pdf
"
,
45
banner
=
"
ConTeXt PDF Helpers 0.10
"
,
46
helpinfo
=
helpinfo
,
47
}
48 49
local
report
=
application
.
report
50 51
if
not
pdfe
then
52
dofile
(
resolvers
.
findfile
(
"
lpdf-epd.lua
"
,
"
tex
"
)
)
53
elseif
CONTEXTLMTXMODE
then
54
dofile
(
resolvers
.
findfile
(
"
util-dim.lua
"
,
"
tex
"
)
)
55
dofile
(
resolvers
.
findfile
(
"
lpdf-ini.lmt
"
,
"
tex
"
)
)
56
dofile
(
resolvers
.
findfile
(
"
lpdf-pde.lmt
"
,
"
tex
"
)
)
57
else
58
dofile
(
resolvers
.
findfile
(
"
lpdf-pde.lua
"
,
"
tex
"
)
)
59
end
60 61
scripts
=
scripts
or
{
}
62
scripts
.
pdf
=
scripts
.
pdf
or
{
}
63 64
local
details
=
environment
.
argument
(
"
detail
"
)
or
environment
.
argument
(
"
details
"
)
65 66
local
function
loadpdffile
(
filename
)
67
if
not
filename
or
filename
=
=
"
"
then
68
report
(
"
no filename given
"
)
69
elseif
not
lfs
.
isfile
(
filename
)
then
70
report
(
"
unknown file '%s'
"
,
filename
)
71
else
72
local
pdffile
=
lpdf
.
epdf
.
load
(
filename
)
73
if
pdffile
then
74
return
pdffile
75
else
76
report
(
"
no valid pdf file '%s'
"
,
filename
)
77
end
78
end
79
end
80 81
function
scripts
.
pdf
.
info
(
filename
)
82
local
pdffile
=
loadpdffile
(
filename
)
83
if
pdffile
then
84
local
catalog
=
pdffile
.
Catalog
85
local
info
=
pdffile
.
Info
86
local
pages
=
pdffile
.
pages
87
local
nofpages
=
pdffile
.
nofpages
88 89
local
unset
=
"
<unset>
"
90 91
report
(
"
%-17s > %s
"
,
"
filename
"
,
filename
)
92
report
(
"
%-17s > %s
"
,
"
pdf version
"
,
catalog
.
Version
or
unset
)
93
report
(
"
%-17s > %s
"
,
"
major version
"
,
pdffile
.
majorversion
or
unset
)
94
report
(
"
%-17s > %s
"
,
"
minor version
"
,
pdffile
.
minorversion
or
unset
)
95
report
(
"
%-17s > %s
"
,
"
number of pages
"
,
nofpages
or
0
)
96
report
(
"
%-17s > %s
"
,
"
title
"
,
info
.
Title
or
unset
)
97
report
(
"
%-17s > %s
"
,
"
creator
"
,
info
.
Creator
or
unset
)
98
report
(
"
%-17s > %s
"
,
"
producer
"
,
info
.
Producer
or
unset
)
99
report
(
"
%-17s > %s
"
,
"
author
"
,
info
.
Author
or
unset
)
100
report
(
"
%-17s > %s
"
,
"
creation date
"
,
info
.
CreationDate
or
unset
)
101
report
(
"
%-17s > %s
"
,
"
modification date
"
,
info
.
ModDate
or
unset
)
102 103
local
function
somebox
(
what
)
104
local
box
=
string
.
lower
(
what
)
105
local
width
,
height
,
start
106
for
i
=
1
,
nofpages
do
107
local
page
=
pages
[
i
]
108
local
bbox
=
page
[
what
]
or
page
.
MediaBox
or
{
0
,
0
,
0
,
0
}
109
local
w
,
h
=
bbox
[
4
]
-
bbox
[
2
]
,
bbox
[
3
]
-
bbox
[
1
]
110
if
w
~
=
width
or
h
~
=
height
then
111
if
start
then
112
report
(
"
%-17s > pages: %s-%s, width: %s, height: %s
"
,
box
,
start
,
i
-1
,
width
,
height
)
113
end
114
width
,
height
,
start
=
w
,
h
,
i
115
end
116
end
117
report
(
"
%-17s > pages: %s-%s, width: %s, height: %s
"
,
box
,
start
,
nofpages
,
width
,
height
)
118
end
119 120
if
details
then
121
somebox
(
"
MediaBox
"
)
122
somebox
(
"
ArtBox
"
)
123
somebox
(
"
BleedBox
"
)
124
somebox
(
"
CropBox
"
)
125
somebox
(
"
TrimBox
"
)
126
else
127
somebox
(
"
CropBox
"
)
128
end
129 130
-- if details then
131
local
annotations
=
0
132
for
i
=
1
,
nofpages
do
133
local
page
=
pages
[
i
]
134
local
a
=
page
.
Annots
135
if
a
then
136
annotations
=
annotations
+
#
a
137
end
138
end
139
if
annotations
>
0
then
140
report
(
"
%-17s > %s
"
,
"
annotations
"
,
annotations
)
141
end
142
-- end
143 144
-- if details then
145
local
d
=
pdffile
.
destinations
146
local
k
=
d
and
sortedkeys
(
d
)
147
if
k
and
#
k
>
0
then
148
report
(
"
%-17s > %s
"
,
"
destinations
"
,
#
k
)
149
end
150
local
d
=
pdffile
.
javascripts
151
local
k
=
d
and
sortedkeys
(
d
)
152
if
k
and
#
k
>
0
then
153
report
(
"
%-17s > %s
"
,
"
javascripts
"
,
#
k
)
154
end
155
local
d
=
pdffile
.
widgets
156
if
d
and
#
d
>
0
then
157
report
(
"
%-17s > %s
"
,
"
widgets
"
,
#
d
)
158
end
159
local
d
=
pdffile
.
embeddedfiles
160
local
k
=
d
and
sortedkeys
(
d
)
161
if
k
and
#
k
>
0
then
162
report
(
"
%-17s > %s
"
,
"
embeddedfiles
"
,
#
k
)
163
end
164
-- end
165 166
end
167
end
168 169
function
scripts
.
pdf
.
metadata
(
filename
,
pretty
)
170
local
pdffile
=
loadpdffile
(
filename
)
171
if
pdffile
then
172
local
catalog
=
pdffile
.
Catalog
173
local
metadata
=
catalog
.
Metadata
174
if
metadata
then
175
metadata
=
metadata
(
)
176
if
pretty
then
177
metadata
=
gsub
(
metadata
,
"
\r
"
,
"
\n
"
)
178
end
179
report
(
"
metadata > \n\n%s\n
"
,
metadata
)
180
else
181
report
(
"
no metadata
"
)
182
end
183
end
184
end
185 186
local
expanded
=
lpdf
.
epdf
.
expanded
187 188
local
function
getfonts
(
pdffile
)
189
local
usedfonts
=
{
}
190 191
local
function
collect
(
where
,
tag
)
192
local
resources
=
where
.
Resources
193
if
resources
then
194
local
fontlist
=
resources
.
Font
195
if
fontlist
then
196
for
k
,
v
in
expanded
(
fontlist
)
do
197
usedfonts
[
tag
and
(
tag
.
.
"
.
"
.
.
k
)
or
k
]
=
v
198
end
199
end
200
local
objects
=
resources
.
XObject
201
if
objects
then
202
for
k
,
v
in
expanded
(
objects
)
do
203
collect
(
v
,
tag
and
(
tag
.
.
"
.
"
.
.
k
)
or
k
)
204
end
205
end
206
end
207
end
208 209
for
i
=
1
,
pdffile
.
nofpages
do
210
collect
(
pdffile
.
pages
[
i
]
)
211
end
212 213
return
usedfonts
214
end
215 216
local
function
getunicodes
(
font
)
217
local
cid
=
font
.
ToUnicode
218
if
cid
then
219
cid
=
cid
(
)
220
local
counts
=
{
}
221
local
indices
=
{
}
222
-- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
223
-- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
224
-- print(a,b)
225
-- end
226
-- end
227
setmetatableindex
(
counts
,
function
(
t
,
k
)
t
[
k
]
=
0
return
0
end
)
228
for
s
in
gmatch
(
cid
,
"
beginbfrange%s*(.-)%s*endbfrange
"
)
do
229
for
first
,
last
,
offset
in
gmatch
(
s
,
"
<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>
"
)
do
230
first
=
tonumber
(
first
,
16
)
231
last
=
tonumber
(
last
,
16
)
232
offset
=
tonumber
(
offset
,
16
)
233
offset
=
offset
-
first
234
for
i
=
first
,
last
do
235
local
c
=
i
+
offset
236
counts
[
c
]
=
counts
[
c
]
+
1
237
indices
[
i
]
=
true
238
end
239
end
240
end
241
for
s
in
gmatch
(
cid
,
"
beginbfchar%s*(.-)%s*endbfchar
"
)
do
242
for
old
,
new
in
gmatch
(
s
,
"
<([^>]+)>%s+<([^>]+)>
"
)
do
243
indices
[
tonumber
(
old
,
16
)
]
=
true
244
for
n
in
gmatch
(
new
,
"
....
"
)
do
245
local
c
=
tonumber
(
n
,
16
)
246
counts
[
c
]
=
counts
[
c
]
+
1
247
end
248
end
249
end
250
return
counts
,
indices
251
end
252
end
253 254
function
scripts
.
pdf
.
fonts
(
filename
)
255
local
pdffile
=
loadpdffile
(
filename
)
256
if
pdffile
then
257
local
usedfonts
=
getfonts
(
pdffile
)
258
local
found
=
{
}
259
local
common
=
table
.
setmetatableindex
(
"
table
"
)
260
for
k
,
v
in
table
.
sortedhash
(
usedfonts
)
do
261
local
basefont
=
v
.
BaseFont
262
local
encoding
=
v
.
Encoding
263
local
subtype
=
v
.
Subtype
264
local
unicode
=
v
.
ToUnicode
265
local
counts
,
266
indices
=
getunicodes
(
v
)
267
local
codes
=
{
}
268
local
chars
=
{
}
269
local
freqs
=
{
}
270
local
names
=
{
}
271
if
counts
then
272
codes
=
sortedkeys
(
counts
)
273
for
i
=
1
,
#
codes
do
274
local
k
=
codes
[
i
]
275
if
k
>
32
then
276
local
c
=
utfchar
(
k
)
277
chars
[
i
]
=
c
278
freqs
[
i
]
=
format
(
"
U+%05X %s %s
"
,
k
,
counts
[
k
]
>
1
and
"
+
"
or
"
"
,
c
)
279
else
280
freqs
[
i
]
=
format
(
"
U+%05X %s --
"
,
k
,
counts
[
k
]
>
1
and
"
+
"
or
"
"
)
281
end
282
end
283
if
basefont
and
unicode
then
284
local
b
=
gsub
(
basefont
,
"
^.*%+
"
,
"
"
)
285
local
c
=
common
[
b
]
286
for
k
in
next
,
indices
do
287
c
[
k
]
=
true
288
end
289
end
290
for
i
=
1
,
#
codes
do
291
codes
[
i
]
=
format
(
"
U+%05X
"
,
codes
[
i
]
)
292
end
293
end
294
local
d
=
encoding
and
encoding
.
Differences
295
if
d
then
296
for
i
=
1
,
#
d
do
297
local
di
=
d
[
i
]
298
if
type
(
di
)
=
=
"
string
"
then
299
names
[
#
names
+
1
]
=
di
300
end
301
end
302
end
303
found
[
k
]
=
{
304
basefont
=
basefont
or
"
no basefont
"
,
305
encoding
=
(
d
and
"
custom n=
"
.
.
#
d
)
or
"
no encoding
"
,
306
subtype
=
subtype
or
"
no subtype
"
,
307
unicode
=
tounicode
and
"
unicode
"
or
"
no vector
"
,
308
chars
=
chars
,
309
codes
=
codes
,
310
freqs
=
freqs
,
311
names
=
names
,
312
}
313
end
314 315
if
details
then
316
for
k
,
v
in
sortedhash
(
found
)
do
317
report
(
"
id : %s
"
,
k
)
318
report
(
"
basefont : %s
"
,
v
.
basefont
)
319
report
(
"
encoding : % t
"
,
v
.
names
)
320
report
(
"
subtype : %s
"
,
v
.
subtype
)
321
report
(
"
unicode : %s
"
,
v
.
unicode
)
322
if
#
v
.
chars
>
0
then
323
report
(
"
characters : % t
"
,
v
.
chars
)
324
end
325
if
#
v
.
codes
>
0
then
326
report
(
"
codepoints : % t
"
,
v
.
codes
)
327
end
328
report
(
"
"
)
329
end
330
for
k
,
v
in
sortedhash
(
common
)
do
331
report
(
"
basefont : %s
"
,
k
)
332
report
(
"
indices : % t
"
,
sortedkeys
(
v
)
)
333
report
(
"
"
)
334
end
335
else
336
local
haschar
=
false
337
for
k
,
v
in
sortedhash
(
found
)
do
338
if
#
v
.
chars
>
0
then
339
haschar
=
true
340
break
341
end
342
end
343
local
results
=
{
{
"
id
"
,
"
basefont
"
,
"
encoding
"
,
"
subtype
"
,
"
unicode
"
,
haschar
and
"
characters
"
or
nil
}
}
344
for
k
,
v
in
sortedhash
(
found
)
do
345
results
[
#
results
+
1
]
=
{
k
,
v
.
basefont
,
v
.
encoding
,
v
.
subtype
,
v
.
unicode
,
haschar
and
concat
(
v
.
chars
,
"
"
)
or
nil
}
346
end
347
utilities
.
formatters
.
formatcolumns
(
results
)
348
report
(
results
[
1
]
)
349
report
(
"
"
)
350
for
i
=
2
,
#
results
do
351
report
(
results
[
i
]
)
352
end
353
report
(
"
"
)
354
end
355
end
356
end
357 358
function
scripts
.
pdf
.
object
(
filename
,
n
)
359
if
n
then
360
local
pdffile
=
loadpdffile
(
filename
)
361
if
pdffile
then
362
print
(
lpdf
.
epdf
.
verboseobject
(
pdffile
,
n
)
or
"
no object with number
"
.
.
n
)
363
end
364
end
365
end
366 367
-- scripts.pdf.info("e:/tmp/oeps.pdf")
368
-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
369
-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
370
-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
371 372
local
filename
=
environment
.
files
[
1
]
or
"
"
373 374
if
filename
=
=
"
"
then
375
application
.
help
(
)
376
elseif
environment
.
argument
(
"
info
"
)
then
377
scripts
.
pdf
.
info
(
filename
)
378
elseif
environment
.
argument
(
"
metadata
"
)
then
379
scripts
.
pdf
.
metadata
(
filename
,
environment
.
argument
(
"
pretty
"
)
)
380
elseif
environment
.
argument
(
"
fonts
"
)
then
381
scripts
.
pdf
.
fonts
(
filename
)
382
elseif
environment
.
argument
(
"
object
"
)
then
383
scripts
.
pdf
.
object
(
filename
,
tonumber
(
environment
.
argument
(
"
object
"
)
)
)
384
elseif
environment
.
argument
(
"
exporthelp
"
)
then
385
application
.
export
(
environment
.
argument
(
"
exporthelp
"
)
,
filename
)
386
else
387
application
.
help
(
)
388
end
389 390
-- a variant on an experiment by hartmut
391 392
--~ function downloadlinks(filename)
393
--~ local document = lpdf.epdf.load(filename)
394
--~ if document then
395
--~ local pages = document.pages
396
--~ for p = 1,#pages do
397
--~ local annotations = pages[p].Annots
398
--~ if annotations then
399
--~ for a=1,#annotations do
400
--~ local annotation = annotations[a]
401
--~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
402
--~ if uri and string.find(uri,"^http") then
403
--~ os.execute("wget " .. uri)
404
--~ end
405
--~ end
406
--~ end
407
--~ end
408
--~ end
409
--~ end
410