pdftrimwhite.pl /size: 14 Kb    last modification: 2020-07-01 14:35
1eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}' && eval 'exec perl -S $0 $argv:q'
2        if 0;
3
4#D \module
5#D   [       file=pdftrimwhite.pl,
6#D        version=2000.07.13,
7#D          title=PDF postprocessing,
8#D       subtitle=cropping whitespace from pdf files,
9#D         author=Hans Hagen,
10#D           date=\currentdate,
11#D      copyright=PRAGMA ADE]
12
13#C This module is part of the \CONTEXT\ macro||package and is
14#C therefore copyrighted by \PRAGMA. See readme.pdf for
15#C details.
16
17#D This script can be used to crop margins that contain
18#D useless information from a \PDF\ image. It does so by:
19#D
20#D \startitemize[packed,n]
21#D \som  cropping the image into an alternative file
22#D \som  determining the boundingbox of the alternative
23#D \som  cropping the image into a resulting file
24#D \stoppacked
25#D
26#D In the process, some checks are carried out. Step~1 is
27#D taken care of by \PDFTEX, step~2 by \GHOSTSCRIPT, using a
28#D file generated by \PDFTOPS, and \PDFTEX\ is responsible
29#D for step~3.
30#D
31#D \startuseMPgraphic{original}
32#D   numeric n ; n = 1cm ;
33#D   path p ; p := fullsquare xyscaled (8n,12n) ;
34#D   path q ; q := fullsquare xyscaled (2n,3n) shifted (n,n) ;
35#D   path r ; r := ((0,0)--(3n,0)) shifted (0, 5.5n) ;
36#D   path s ; s := ((0,0)--(3n,0)) shifted (0,-5.5n) ;
37#D   path t ; t := (-2n,-4n) ;
38#D   path u ; u := p enlarged -.75n ;
39#D   path v ; v := p enlarged (-1.75n,-2n) shifted (n,1.25n) ;
40#D   path w ; w := q enlarged .25n ;
41#D   fill p                               withcolor .7white ;
42#D   fill q                               withcolor .7green ;
43#D   draw r withpen pencircle scaled .25n withcolor .7green ;
44#D   draw s withpen pencircle scaled .25n withcolor .7green ;
45#D   draw t withpen pencircle scaled .50n withcolor .7green ;
46#D   draw u withpen pencircle scaled .10n withcolor   white ;
47#D   draw v withpen pencircle scaled .10n withcolor .7red   ;
48#D   draw w withpen pencircle scaled .10n ;
49#D   verbatimtex \tttf \setupframed[frame=off,align=left] etex ;
50#D   label    (btex \framed{crap}       etex, center   r) ;
51#D   label    (btex \framed{crap}       etex, center   s) ;
52#D   label    (btex \framed{crap}       etex, center   t) ;
53#D   label    (btex \framed{graphic}    etex, center   q) ;
54#D   label.urt(btex \framed{page}       etex, llcorner p) ;
55#D   label.urt(btex \framed{crop}       etex, llcorner u) ;
56#D   label.lft(btex \framed{leftcrop\\
57#D                          rightcrop\\
58#D                          topcrop\\
59#D                          bottomcrop} etex, .5[ulcorner v,llcorner v]) ;
60#D   label.bot(btex \framed{offset}     etex, .5[llcorner w,lrcorner w]) ;
61#D \stopuseMPgraphic
62#D
63#D \placefigure
64#D   [here][fig:pdftrimwhite]
65#D   {Crops and offsets.}
66#D   {\useMPgraphic{original}}
67#D
68#D The \TEX\ part has two alternatives, one using \CONTEXT, and
69#D another using plain \TEX. The \CONTEXT\ method is slower but
70#D can be extended more easily.
71#D
72#D The script is executed as follows:
73#D
74#D \starttyping
75#D pdftrimwhite  [] []
76#D \stoptyping
77#D
78#D The next call crops \type {test.pdf} to its natural
79#D boundingbox.
80#D
81#D \starttyping
82#D pdftrimwhite test
83#D \stoptyping
84#D
85#D If the file has some crap at the bottom, you can say:
86#D
87#D \starttyping
88#D pdftrimwhite test --bottomcrop=2cm
89#D \stoptyping
90#D
91#D This clips 2cm from the bottom. You can clip on all sides
92#D individually, in combination or at once, like in:
93#D
94#D \starttyping
95#D pdftrimwhite test --bottomcrop=2cm --crop=1cm
96#D \stoptyping
97#D
98#D The final result is a tightly cropped image. In order to get
99#D a 5mm margin around this image, you can say:
100#D
101#D \starttyping
102#D pdftrimwhite test --bottomcrop=2cm --offset=5mm
103#D \stoptyping
104#D
105#D By default, the script intercepts logging messages and
106#D writes them to a logfile with the same name as the
107#D resulting image and the prefix \type {log}. If no name is
108#D given, the name \type {pdftrimwhite} is used for all resulting
109#D files.
110#D
111#D By default, \CONTEXT\ is used. When installed properly, you
112#D can also use plain \TEX, by adding a switch \type
113#D {--plain}. Partial switched are accepted, so the next call
114#D is valid:
115#D
116#D \starttyping
117#D pdftrimwhite test result --bot=2cm --off=5mm --plain
118#D \stoptyping
119#D
120#D The current implementation uses an intermediate \POSTSCRIPT\
121#D file. This may change as \GHOSTSCRIPT\ gets more clever with
122#D \PDF\ files.
123#D
124#D In \in {figure} [fig:pdftrimwhite] the green rectangle is the
125#D picture we want to keep. Around this picture, we want a
126#D margin, represented by the black rectangle, and specified by
127#D \type {--offset}. The white rectangle is the cropbox
128#D defined by \type {--crop}. That way we get rid of header
129#D and footerlines. The red rectangle results from an
130#D additional \type {--leftcrop} and \type {-bottomcrop} and
131#D takes care of some content, as represented by the green
132#D dot.
133#D
134#D The \type {--verbose} switch can be used to disable the
135#D interception of log messages.
136
137#D We load a few \PERL\ modules \unknown\
138
139use Config ;
140use Getopt::Long ;
141
142use strict ;
143
144#D \unknown\ and initialize them.
145
146Getopt::Long::Configure
147  ("auto_abbrev",
148   "ignore_case",
149   "pass_through") ;
150
151#D Before fetching the switches, we initialize the
152#D variables.
153
154my $Crop       = "0cm" ;
155
156my $LeftCrop   = "0cm" ;
157my $RightCrop  = "0cm" ;
158my $TopCrop    = "0cm" ;
159my $BottomCrop = "0cm" ;
160
161my $Offset     = "0cm" ;
162
163my $GSbin      = "" ;
164my $Verbose    = 0 ;
165my $Help       = 0 ;
166my $UsePlain   = 0 ;
167
168my $Page       = 1 ;
169
170#D On \MSWINDOWS\ and \UNIX\ the following defaults, combined
171#D with the check later, should work out okay.
172
173my $pdfps = "pdftops" ;
174my $gs    = "gs" ;
175
176my $thisisunix  = $Config{'osname'} !~ /dos|mswin/i ;
177
178#D When no resulting file is given, we use \type {pdftrimwhite}
179#D as name (checked later).
180
181my $figurefile = "" ;
182my $resultfile = "" ;
183my $tempfile   = "" ;
184
185my $programname = "pdftrimwhite" ;
186
187#D Messages are temporarily saved and written to a log file
188#D afterwards.
189
190my $results = "" ;
191my $pipe    = "" ;
192my $result  = "" ;
193
194#D Unfortunately we need this information, first since
195#D \PDFTOPS\ does not honor the cropbox, and second because
196#D the vertical coordinated are swapped.
197
198my $pwidth   = 597 ;
199my $pheight  = 847 ;
200my $hoffset  =   0 ;
201my $voffset  =   0 ;
202
203#D A few more variables.
204
205my $width = my $height = my $llx = my $lly = my $urx = my $ury = 0 ;
206
207#D Here are the switches we accept. The \type {--gsbin} switch
208#D is a bonus one, and the \type {--help} switch comes
209#D naturally.
210
211&GetOptions
212  ( "leftcrop=s"   => \$LeftCrop  ,
213    "rightcrop=s"  => \$RightCrop ,
214    "topcrop=s"    => \$TopCrop   ,
215    "bottomcrop=s" => \$BottomCrop,
216    "crop=s"       => \$Crop      ,
217    "offset=s"     => \$Offset    ,
218    "verbose"      => \$Verbose   ,
219    "gsbin=s"      => \$GSbin     ,
220    "plain"        => \$UsePlain  ,
221    "page=i"       => \$Page      ,
222    "help"         => \$Help      ) ;
223
224#D If asked for, or if no file is given, we provide some
225#D help information.
226
227sub PrintHelp
228  { print "This is PdfTrimWhite\n\n" .
229          "usage:\n\n" .
230          "pdftrimwhite [switches] filename result\n\n" .
231          "switches:\n\n" .
232          "--crop=\n" .
233          "--offset=\n" .
234          "--leftcrop=\n" .
235          "--rightcrop=\n" .
236          "--topcrop=\n" .
237          "--bottomcrop=\n" .
238          "--gsbin=\n" .
239          "--page=\n" .
240          "--plain\n" .
241          "--verbose\n" }
242
243#D The preparations:
244
245sub GetItRight
246  { if ($Help)
247      { PrintHelp() ; exit }
248    $figurefile = $ARGV[0] ; $figurefile =~ s/\.pdf$//oi ;
249    $resultfile = $ARGV[1] ; $resultfile =~ s/\.pdf$//oi ;
250    $tempfile = "pdftrimwhite-$resultfile" ;
251    if ($figurefile eq '')
252      { PrintHelp() ; exit }
253    unless ($thisisunix)
254      { $gs = "gswin32c" }
255    if ($GSbin ne '')
256      { $gs = $GSbin }
257    unless (-e "$figurefile.pdf")
258      { print "Something is terribly wrong: no file found\n" ;
259        exit }
260    if (($resultfile eq '')||($resultfile=~/(^\-|\.)/io))
261      { $resultfile = $programname }
262    $pipe = "2>&1" ;
263    if ($thisisunix)
264      { $pipe = "2>&1" } }
265
266#D Something common.
267
268sub SavePageData
269  { return "% saving page data
270\\immediate\\openout\\scratchwrite=$figurefile.tmp
271\\immediate\\write\\scratchwrite
272   {\\HOffsetBP\\space\\VOffsetBP\\space
273    \\FigureWidthBP\\space\\FigureHeightBP}
274\\immediate\\closeout\\scratchwrite\n" }
275
276sub MakePageConTeXt
277  { return "% the real work
278\\definepapersize
279  [Crap]
280  [width=\\FigureWidth,
281   height=\\FigureHeight]
282\\setuppapersize
283  [Crap][Crap]
284\\setuplayout
285  [topspace=0cm,backspace=0pt,
286   height=middle,width=middle,
287   header=0pt,footer=0pt]
288\\starttext
289  \\startstandardmakeup
290    \\clip
291      [voffset=\\VOffset,
292       hoffset=\\HOffset,
293       width=\\FigureWidth,
294       height=\\FigureHeight]
295      {\\externalfigure[$figurefile.pdf][page=$Page]\\hss}
296  \\stopstandardmakeup
297\\stoptext\n" }
298
299sub MakePagePlainTeX
300  { return "% the real work
301\\output{}
302\\hoffset=-1in
303\\voffset=\\hoffset
304\\pdfpageheight=\\FigureHeight
305\\pdfpagewidth=\\FigureWidth
306\\vbox to \\pdfpageheight
307  {\\offinterlineskip
308   \\vskip-\\VOffset
309   \\hbox to \\pdfpagewidth{\\hskip-\\HOffset\\box0\\hss}
310   \\vss}
311\\end\n" }
312
313sub CalculateClip
314  { return "% some calculations
315\\dimen0=\\figurewidth
316\\dimen2=\\figureheight
317\\dimen4=$Crop
318\\dimen6=$Crop
319\\advance\\dimen4 by $LeftCrop
320\\advance\\dimen6 by $TopCrop
321\\advance\\dimen0 by -\\dimen4
322\\advance\\dimen0 by -$Crop
323\\advance\\dimen0 by -$RightCrop
324\\advance\\dimen2 by -\\dimen6
325\\advance\\dimen2 by -$Crop
326\\advance\\dimen2 by -$BottomCrop
327\\edef\\FigureWidth {\\the\\dimen0}
328\\edef\\FigureHeight{\\the\\dimen2}
329\\edef\\HOffset     {\\the\\dimen4}
330\\edef\\VOffset     {\\the\\dimen6}
331\\ScaledPointsToWholeBigPoints{\\number\\dimen0}\\FigureWidthBP
332\\ScaledPointsToWholeBigPoints{\\number\\dimen2}\\FigureHeightBP
333\\ScaledPointsToWholeBigPoints{\\number\\dimen4}\\HOffsetBP
334\\ScaledPointsToWholeBigPoints{\\number\\dimen6}\\VOffsetBP\n" }
335
336sub RecalculateClip
337  { return "% some calculations
338\\dimen0=${width}bp
339\\dimen2=${height}bp
340\\dimen4=${hoffset}bp
341\\dimen6=${pheight}bp
342\\advance\\dimen0 by  $Offset
343\\advance\\dimen0 by  $Offset
344\\advance\\dimen2 by  $Offset
345\\advance\\dimen2 by  $Offset
346\\advance\\dimen4 by  ${llx}bp
347\\advance\\dimen4 by -$Offset
348\\advance\\dimen6 by -${lly}bp
349\\advance\\dimen6 by  $Offset
350\\advance\\dimen6 by -\\dimen2
351\\advance\\dimen6 by  $TopCrop
352\\edef\\FigureWidth {\\the\\dimen0}
353\\edef\\FigureHeight{\\the\\dimen2}
354\\edef\\HOffset     {\\the\\dimen4}
355\\edef\\VOffset     {\\the\\dimen6}\n" }
356
357#D The previous scripts could be more sparse, but for the
358#D moment we prefer readability. Both scripts save some
359#D information in temporary file. We choose between them with
360#D the following sub routine.
361
362#D The first pass:
363
364sub PrepareConTeXt
365  { return "% interface=en
366\\setupoutput[pdftex]
367\\getfiguredimensions[$figurefile.pdf][page=$Page]\n" }
368
369sub PreparePlainTeX
370  { return "% plain tex alternative, needs recent supp-mis
371\\input supp-mis
372\\pdfoutput=1
373\\newdimen\\figurewidth
374\\newdimen\\figureheight
375\\setbox0=\\hbox
376   {\\immediate\\pdfximage page $Page {$figurefile.pdf}\\pdfrefximage\\pdflastximage}
377\\figurewidth=\\wd0
378\\figureheight=\\ht0\n" }
379
380sub PrepareFirstPass
381  { open (TEX, ">$tempfile.tex") ;
382    if ($UsePlain)
383      { print TEX
384          PreparePlainTeX  .
385          CalculateClip    .
386          SavePageData     .
387          MakePagePlainTeX }
388    else
389      { print TEX
390          PrepareConTeXt  .
391          CalculateClip   .
392          SavePageData    .
393          MakePageConTeXt }
394    close TEX }
395
396#D The second pass looks much like the first one, but this
397#D time we don't save information, use the natural
398#D boundingbox, and provide the offset.
399
400sub SetupConTeXt
401  { return "% interface=en
402\\setupoutput[pdftex]\n" }
403
404sub SetupPlainTeX
405  { return "% plain tex alternative
406\\pdfoutput=1
407\\setbox0=\\hbox
408  {\\immediate\\pdfximage page $Page {$figurefile.pdf}\\pdfrefximage\\pdflastximage}\n" }
409
410sub PrepareSecondPass
411  { open (TEX, ">$tempfile.tex") ;
412    if ($UsePlain)
413      { print TEX
414          SetupPlainTeX    .
415          RecalculateClip  .
416          MakePagePlainTeX }
417    else
418      { print TEX
419          SetupConTeXt    .
420          RecalculateClip .
421          MakePageConTeXt }
422    close TEX }
423
424#D The information we save in the first pass, is loaded here.
425
426sub FetchPaperSize
427  { open (TMP,"$figurefile.tmp") ;
428    while ()
429      { chomp ;
430        if (/^(\d+) (\d+) (\d+) (\d+) *$/oi)
431          { $hoffset = $1 ;
432            $voffset = $2 ;
433            $pwidth  = $3 ;
434            $pheight = $4 ;
435            last } }
436    close (TMP) }
437
438#D Here we try to find the natural boundingbox. We need to
439#D pick up the page dimensions here.
440
441sub RunTeX
442  { if ($UsePlain)
443      { $result = `pdftex -prog=pdftex -fmt=plain -int=batchmode $tempfile` }
444    else
445      { $result = `texexec --batch --once --purge $tempfile` }
446    print $result if $Verbose ; $results .= "$result\n" }
447
448sub FindBoundingBox
449  { $result = `$gs -sDEVICE=bbox -dNOPAUSE -dBATCH $tempfile.pdf $pipe` ;
450    print $result if $Verbose ; $results .= "$result\n" }
451
452sub IdentifyCropBox
453  { RunTeX() ;
454    FetchPaperSize () ;
455    FindBoundingBox() }
456
457#D Just to be sure, we check if there is some image data, so
458#D that we can retry if something went wrong. Unfortunately we cannot
459#D safely check on a high res boundingbox.
460
461my $digits = '([\-\d\.]+)' ;
462
463sub ValidatedCropBox
464  { if ($result =~ /BoundingBox:\s*$digits\s+$digits\s+$digits\s+$digits\s*/mois)
465      { $llx = $1 ; $lly = $2 ; $urx = $3 ; $ury = $4 }
466    else
467      { print "Something is terribly wrong: no boundingbox:\n$result\n" ; exit }
468    $width  = abs($urx - $llx) ;
469    $height = abs($ury - $lly) ;
470    if ($width&&$height)
471      { return 1 }
472    else
473      { unless ($width)
474          { print "Something seems wrong: no width\n" ;
475            $LeftCrop = "0cm" ; $RightCrop  = "0cm" ; $Crop = "0cm" }
476        unless ($height)
477          { print "Something seems wrong: no height\n" ;
478            $TopCrop  = "0cm" ; $BottomCrop = "0cm" ; $Crop = "0cm" }
479        return 0 } }
480
481#D This is the main cropping routine.
482
483sub FixCropBox
484  { RunTeX() }
485
486#D For error tracing we save the log information in a file.
487
488sub RenameResult
489  { unlink "$resultfile.pdf" ;
490    rename "$tempfile.pdf", "$resultfile.pdf" }
491
492sub SaveLogInfo
493  { open (LOG, ">$resultfile.log") ;
494    print LOG $results ;
495    close (LOG) }
496
497#D We remove all temporary files.
498
499sub CleanUp
500  { unless ($Verbose)
501      { unlink "$tempfile.tex" ;
502        unlink "$tempfile.tuo" ;
503        unlink "$tempfile.tui" ;
504        unlink "$figurefile.tmp" } }
505
506#D Here it all comes together.
507
508GetItRight() ;
509
510PrepareFirstPass() ;
511
512IdentifyCropBox () ;
513
514unless (ValidatedCropBox())
515  { PrepareFirstPass() ;
516    IdentifyCropBox () }
517
518if (ValidatedCropBox())
519  { PrepareSecondPass() ;
520    FixCropBox() }
521
522RenameResult() ;
523SaveLogInfo() ;
524
525CleanUp () ;
526