1eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}' && eval 'exec perl -S $0 $argv:q'
2 if 0;
3
4#D \module
5#D [ file=pdftrimwhite.pl,
6#D version=2000.07.13,
7#D title=PDF postprocessing,
8#D subtitle=cropping whitespace from pdf files,
9#D author=Hans Hagen,
10#D date=\currentdate,
11#D copyright=PRAGMA ADE]
12
13#C This module is part of the \CONTEXT\ macro||package and is
14#C therefore copyrighted by \PRAGMA. See readme.pdf for
15#C details.
16
17#D This script can be used to crop margins that contain
18#D useless information from a \PDF\ image. It does so by:
19#D
20#D \startitemize[packed,n]
21#D \som cropping the image into an alternative file
22#D \som determining the boundingbox of the alternative
23#D \som cropping the image into a resulting file
24#D \stoppacked
25#D
26#D In the process, some checks are carried out. Step~1 is
27#D taken care of by \PDFTEX, step~2 by \GHOSTSCRIPT, using a
28#D file generated by \PDFTOPS, and \PDFTEX\ is responsible
29#D for step~3.
30#D
31#D \startuseMPgraphic{original}
32#D numeric n ; n = 1cm ;
33#D path p ; p := fullsquare xyscaled (8n,12n) ;
34#D path q ; q := fullsquare xyscaled (2n,3n) shifted (n,n) ;
35#D path r ; r := ((0,0)--(3n,0)) shifted (0, 5.5n) ;
36#D path s ; s := ((0,0)--(3n,0)) shifted (0,-5.5n) ;
37#D path t ; t := (-2n,-4n) ;
38#D path u ; u := p enlarged -.75n ;
39#D path v ; v := p enlarged (-1.75n,-2n) shifted (n,1.25n) ;
40#D path w ; w := q enlarged .25n ;
41#D fill p withcolor .7white ;
42#D fill q withcolor .7green ;
43#D draw r withpen pencircle scaled .25n withcolor .7green ;
44#D draw s withpen pencircle scaled .25n withcolor .7green ;
45#D draw t withpen pencircle scaled .50n withcolor .7green ;
46#D draw u withpen pencircle scaled .10n withcolor white ;
47#D draw v withpen pencircle scaled .10n withcolor .7red ;
48#D draw w withpen pencircle scaled .10n ;
49#D verbatimtex \tttf \setupframed[frame=off,align=left] etex ;
50#D label (btex \framed{crap} etex, center r) ;
51#D label (btex \framed{crap} etex, center s) ;
52#D label (btex \framed{crap} etex, center t) ;
53#D label (btex \framed{graphic} etex, center q) ;
54#D label.urt(btex \framed{page} etex, llcorner p) ;
55#D label.urt(btex \framed{crop} etex, llcorner u) ;
56#D label.lft(btex \framed{leftcrop\\
57#D rightcrop\\
58#D topcrop\\
59#D bottomcrop} etex, .5[ulcorner v,llcorner v]) ;
60#D label.bot(btex \framed{offset} etex, .5[llcorner w,lrcorner w]) ;
61#D \stopuseMPgraphic
62#D
63#D \placefigure
64#D [here][fig:pdftrimwhite]
65#D {Crops and offsets.}
66#D {\useMPgraphic{original}}
67#D
68#D The \TEX\ part has two alternatives, one using \CONTEXT, and
69#D another using plain \TEX. The \CONTEXT\ method is slower but
70#D can be extended more easily.
71#D
72#D The script is executed as follows:
73#D
74#D \starttyping
75#D pdftrimwhite [] []
76#D \stoptyping
77#D
78#D The next call crops \type {test.pdf} to its natural
79#D boundingbox.
80#D
81#D \starttyping
82#D pdftrimwhite test
83#D \stoptyping
84#D
85#D If the file has some crap at the bottom, you can say:
86#D
87#D \starttyping
88#D pdftrimwhite test --bottomcrop=2cm
89#D \stoptyping
90#D
91#D This clips 2cm from the bottom. You can clip on all sides
92#D individually, in combination or at once, like in:
93#D
94#D \starttyping
95#D pdftrimwhite test --bottomcrop=2cm --crop=1cm
96#D \stoptyping
97#D
98#D The final result is a tightly cropped image. In order to get
99#D a 5mm margin around this image, you can say:
100#D
101#D \starttyping
102#D pdftrimwhite test --bottomcrop=2cm --offset=5mm
103#D \stoptyping
104#D
105#D By default, the script intercepts logging messages and
106#D writes them to a logfile with the same name as the
107#D resulting image and the prefix \type {log}. If no name is
108#D given, the name \type {pdftrimwhite} is used for all resulting
109#D files.
110#D
111#D By default, \CONTEXT\ is used. When installed properly, you
112#D can also use plain \TEX, by adding a switch \type
113#D {--plain}. Partial switched are accepted, so the next call
114#D is valid:
115#D
116#D \starttyping
117#D pdftrimwhite test result --bot=2cm --off=5mm --plain
118#D \stoptyping
119#D
120#D The current implementation uses an intermediate \POSTSCRIPT\
121#D file. This may change as \GHOSTSCRIPT\ gets more clever with
122#D \PDF\ files.
123#D
124#D In \in {figure} [fig:pdftrimwhite] the green rectangle is the
125#D picture we want to keep. Around this picture, we want a
126#D margin, represented by the black rectangle, and specified by
127#D \type {--offset}. The white rectangle is the cropbox
128#D defined by \type {--crop}. That way we get rid of header
129#D and footerlines. The red rectangle results from an
130#D additional \type {--leftcrop} and \type {-bottomcrop} and
131#D takes care of some content, as represented by the green
132#D dot.
133#D
134#D The \type {--verbose} switch can be used to disable the
135#D interception of log messages.
136
137#D We load a few \PERL\ modules \unknown\
138
139use Config ;
140use Getopt::Long ;
141
142use strict ;
143
144#D \unknown\ and initialize them.
145
146Getopt::Long::Configure
147 ("auto_abbrev",
148 "ignore_case",
149 "pass_through") ;
150
151#D Before fetching the switches, we initialize the
152#D variables.
153
154my $Crop = "0cm" ;
155
156my $LeftCrop = "0cm" ;
157my $RightCrop = "0cm" ;
158my $TopCrop = "0cm" ;
159my $BottomCrop = "0cm" ;
160
161my $Offset = "0cm" ;
162
163my $GSbin = "" ;
164my $Verbose = 0 ;
165my $Help = 0 ;
166my $UsePlain = 0 ;
167
168my $Page = 1 ;
169
170#D On \MSWINDOWS\ and \UNIX\ the following defaults, combined
171#D with the check later, should work out okay.
172
173my $pdfps = "pdftops" ;
174my $gs = "gs" ;
175
176my $thisisunix = $Config{'osname'} !~ /dos|mswin/i ;
177
178#D When no resulting file is given, we use \type {pdftrimwhite}
179#D as name (checked later).
180
181my $figurefile = "" ;
182my $resultfile = "" ;
183my $tempfile = "" ;
184
185my $programname = "pdftrimwhite" ;
186
187#D Messages are temporarily saved and written to a log file
188#D afterwards.
189
190my $results = "" ;
191my $pipe = "" ;
192my $result = "" ;
193
194#D Unfortunately we need this information, first since
195#D \PDFTOPS\ does not honor the cropbox, and second because
196#D the vertical coordinated are swapped.
197
198my $pwidth = 597 ;
199my $pheight = 847 ;
200my $hoffset = 0 ;
201my $voffset = 0 ;
202
203#D A few more variables.
204
205my $width = my $height = my $llx = my $lly = my $urx = my $ury = 0 ;
206
207#D Here are the switches we accept. The \type {--gsbin} switch
208#D is a bonus one, and the \type {--help} switch comes
209#D naturally.
210
211&GetOptions
212 ( "leftcrop=s" => \$LeftCrop ,
213 "rightcrop=s" => \$RightCrop ,
214 "topcrop=s" => \$TopCrop ,
215 "bottomcrop=s" => \$BottomCrop,
216 "crop=s" => \$Crop ,
217 "offset=s" => \$Offset ,
218 "verbose" => \$Verbose ,
219 "gsbin=s" => \$GSbin ,
220 "plain" => \$UsePlain ,
221 "page=i" => \$Page ,
222 "help" => \$Help ) ;
223
224#D If asked for, or if no file is given, we provide some
225#D help information.
226
227sub PrintHelp
228 { print "This is PdfTrimWhite\n\n" .
229 "usage:\n\n" .
230 "pdftrimwhite [switches] filename result\n\n" .
231 "switches:\n\n" .
232 "--crop=\n" .
233 "--offset=\n" .
234 "--leftcrop=\n" .
235 "--rightcrop=\n" .
236 "--topcrop=\n" .
237 "--bottomcrop=\n" .
238 "--gsbin=\n" .
239 "--page=\n" .
240 "--plain\n" .
241 "--verbose\n" }
242
243#D The preparations:
244
245sub GetItRight
246 { if ($Help)
247 { PrintHelp() ; exit }
248 $figurefile = $ARGV[0] ; $figurefile =~ s/\.pdf$//oi ;
249 $resultfile = $ARGV[1] ; $resultfile =~ s/\.pdf$//oi ;
250 $tempfile = "pdftrimwhite-$resultfile" ;
251 if ($figurefile eq '')
252 { PrintHelp() ; exit }
253 unless ($thisisunix)
254 { $gs = "gswin32c" }
255 if ($GSbin ne '')
256 { $gs = $GSbin }
257 unless (-e "$figurefile.pdf")
258 { print "Something is terribly wrong: no file found\n" ;
259 exit }
260 if (($resultfile eq '')||($resultfile=~/(^\-|\.)/io))
261 { $resultfile = $programname }
262 $pipe = "2>&1" ;
263 if ($thisisunix)
264 { $pipe = "2>&1" } }
265
266#D Something common.
267
268sub SavePageData
269 { return "% saving page data
270\\immediate\\openout\\scratchwrite=$figurefile.tmp
271\\immediate\\write\\scratchwrite
272 {\\HOffsetBP\\space\\VOffsetBP\\space
273 \\FigureWidthBP\\space\\FigureHeightBP}
274\\immediate\\closeout\\scratchwrite\n" }
275
276sub MakePageConTeXt
277 { return "% the real work
278\\definepapersize
279 [Crap]
280 [width=\\FigureWidth,
281 height=\\FigureHeight]
282\\setuppapersize
283 [Crap][Crap]
284\\setuplayout
285 [topspace=0cm,backspace=0pt,
286 height=middle,width=middle,
287 header=0pt,footer=0pt]
288\\starttext
289 \\startstandardmakeup
290 \\clip
291 [voffset=\\VOffset,
292 hoffset=\\HOffset,
293 width=\\FigureWidth,
294 height=\\FigureHeight]
295 {\\externalfigure[$figurefile.pdf][page=$Page]\\hss}
296 \\stopstandardmakeup
297\\stoptext\n" }
298
299sub MakePagePlainTeX
300 { return "% the real work
301\\output{}
302\\hoffset=-1in
303\\voffset=\\hoffset
304\\pdfpageheight=\\FigureHeight
305\\pdfpagewidth=\\FigureWidth
306\\vbox to \\pdfpageheight
307 {\\offinterlineskip
308 \\vskip-\\VOffset
309 \\hbox to \\pdfpagewidth{\\hskip-\\HOffset\\box0\\hss}
310 \\vss}
311\\end\n" }
312
313sub CalculateClip
314 { return "% some calculations
315\\dimen0=\\figurewidth
316\\dimen2=\\figureheight
317\\dimen4=$Crop
318\\dimen6=$Crop
319\\advance\\dimen4 by $LeftCrop
320\\advance\\dimen6 by $TopCrop
321\\advance\\dimen0 by -\\dimen4
322\\advance\\dimen0 by -$Crop
323\\advance\\dimen0 by -$RightCrop
324\\advance\\dimen2 by -\\dimen6
325\\advance\\dimen2 by -$Crop
326\\advance\\dimen2 by -$BottomCrop
327\\edef\\FigureWidth {\\the\\dimen0}
328\\edef\\FigureHeight{\\the\\dimen2}
329\\edef\\HOffset {\\the\\dimen4}
330\\edef\\VOffset {\\the\\dimen6}
331\\ScaledPointsToWholeBigPoints{\\number\\dimen0}\\FigureWidthBP
332\\ScaledPointsToWholeBigPoints{\\number\\dimen2}\\FigureHeightBP
333\\ScaledPointsToWholeBigPoints{\\number\\dimen4}\\HOffsetBP
334\\ScaledPointsToWholeBigPoints{\\number\\dimen6}\\VOffsetBP\n" }
335
336sub RecalculateClip
337 { return "% some calculations
338\\dimen0=${width}bp
339\\dimen2=${height}bp
340\\dimen4=${hoffset}bp
341\\dimen6=${pheight}bp
342\\advance\\dimen0 by $Offset
343\\advance\\dimen0 by $Offset
344\\advance\\dimen2 by $Offset
345\\advance\\dimen2 by $Offset
346\\advance\\dimen4 by ${llx}bp
347\\advance\\dimen4 by -$Offset
348\\advance\\dimen6 by -${lly}bp
349\\advance\\dimen6 by $Offset
350\\advance\\dimen6 by -\\dimen2
351\\advance\\dimen6 by $TopCrop
352\\edef\\FigureWidth {\\the\\dimen0}
353\\edef\\FigureHeight{\\the\\dimen2}
354\\edef\\HOffset {\\the\\dimen4}
355\\edef\\VOffset {\\the\\dimen6}\n" }
356
357#D The previous scripts could be more sparse, but for the
358#D moment we prefer readability. Both scripts save some
359#D information in temporary file. We choose between them with
360#D the following sub routine.
361
362#D The first pass:
363
364sub PrepareConTeXt
365 { return "% interface=en
366\\setupoutput[pdftex]
367\\getfiguredimensions[$figurefile.pdf][page=$Page]\n" }
368
369sub PreparePlainTeX
370 { return "% plain tex alternative, needs recent supp-mis
371\\input supp-mis
372\\pdfoutput=1
373\\newdimen\\figurewidth
374\\newdimen\\figureheight
375\\setbox0=\\hbox
376 {\\immediate\\pdfximage page $Page {$figurefile.pdf}\\pdfrefximage\\pdflastximage}
377\\figurewidth=\\wd0
378\\figureheight=\\ht0\n" }
379
380sub PrepareFirstPass
381 { open (TEX, ">$tempfile.tex") ;
382 if ($UsePlain)
383 { print TEX
384 PreparePlainTeX .
385 CalculateClip .
386 SavePageData .
387 MakePagePlainTeX }
388 else
389 { print TEX
390 PrepareConTeXt .
391 CalculateClip .
392 SavePageData .
393 MakePageConTeXt }
394 close TEX }
395
396#D The second pass looks much like the first one, but this
397#D time we don't save information, use the natural
398#D boundingbox, and provide the offset.
399
400sub SetupConTeXt
401 { return "% interface=en
402\\setupoutput[pdftex]\n" }
403
404sub SetupPlainTeX
405 { return "% plain tex alternative
406\\pdfoutput=1
407\\setbox0=\\hbox
408 {\\immediate\\pdfximage page $Page {$figurefile.pdf}\\pdfrefximage\\pdflastximage}\n" }
409
410sub PrepareSecondPass
411 { open (TEX, ">$tempfile.tex") ;
412 if ($UsePlain)
413 { print TEX
414 SetupPlainTeX .
415 RecalculateClip .
416 MakePagePlainTeX }
417 else
418 { print TEX
419 SetupConTeXt .
420 RecalculateClip .
421 MakePageConTeXt }
422 close TEX }
423
424#D The information we save in the first pass, is loaded here.
425
426sub FetchPaperSize
427 { open (TMP,"$figurefile.tmp") ;
428 while ()
429 { chomp ;
430 if (/^(\d+) (\d+) (\d+) (\d+) *$/oi)
431 { $hoffset = $1 ;
432 $voffset = $2 ;
433 $pwidth = $3 ;
434 $pheight = $4 ;
435 last } }
436 close (TMP) }
437
438#D Here we try to find the natural boundingbox. We need to
439#D pick up the page dimensions here.
440
441sub RunTeX
442 { if ($UsePlain)
443 { $result = `pdftex -prog=pdftex -fmt=plain -int=batchmode $tempfile` }
444 else
445 { $result = `texexec --batch --once --purge $tempfile` }
446 print $result if $Verbose ; $results .= "$result\n" }
447
448sub FindBoundingBox
449 { $result = `$gs -sDEVICE=bbox -dNOPAUSE -dBATCH $tempfile.pdf $pipe` ;
450 print $result if $Verbose ; $results .= "$result\n" }
451
452sub IdentifyCropBox
453 { RunTeX() ;
454 FetchPaperSize () ;
455 FindBoundingBox() }
456
457#D Just to be sure, we check if there is some image data, so
458#D that we can retry if something went wrong. Unfortunately we cannot
459#D safely check on a high res boundingbox.
460
461my $digits = '([\-\d\.]+)' ;
462
463sub ValidatedCropBox
464 { if ($result =~ /BoundingBox:\s*$digits\s+$digits\s+$digits\s+$digits\s*/mois)
465 { $llx = $1 ; $lly = $2 ; $urx = $3 ; $ury = $4 }
466 else
467 { print "Something is terribly wrong: no boundingbox:\n$result\n" ; exit }
468 $width = abs($urx - $llx) ;
469 $height = abs($ury - $lly) ;
470 if ($width&&$height)
471 { return 1 }
472 else
473 { unless ($width)
474 { print "Something seems wrong: no width\n" ;
475 $LeftCrop = "0cm" ; $RightCrop = "0cm" ; $Crop = "0cm" }
476 unless ($height)
477 { print "Something seems wrong: no height\n" ;
478 $TopCrop = "0cm" ; $BottomCrop = "0cm" ; $Crop = "0cm" }
479 return 0 } }
480
481#D This is the main cropping routine.
482
483sub FixCropBox
484 { RunTeX() }
485
486#D For error tracing we save the log information in a file.
487
488sub RenameResult
489 { unlink "$resultfile.pdf" ;
490 rename "$tempfile.pdf", "$resultfile.pdf" }
491
492sub SaveLogInfo
493 { open (LOG, ">$resultfile.log") ;
494 print LOG $results ;
495 close (LOG) }
496
497#D We remove all temporary files.
498
499sub CleanUp
500 { unless ($Verbose)
501 { unlink "$tempfile.tex" ;
502 unlink "$tempfile.tuo" ;
503 unlink "$tempfile.tui" ;
504 unlink "$figurefile.tmp" } }
505
506#D Here it all comes together.
507
508GetItRight() ;
509
510PrepareFirstPass() ;
511
512IdentifyCropBox () ;
513
514unless (ValidatedCropBox())
515 { PrepareFirstPass() ;
516 IdentifyCropBox () }
517
518if (ValidatedCropBox())
519 { PrepareSecondPass() ;
520 FixCropBox() }
521
522RenameResult() ;
523SaveLogInfo() ;
524
525CleanUp () ;
526