1if not modules then modules = { } end modules ['lpdf-fix-imp-contents'] = {
2 version = 1.001,
3 comment = "companion to lpdf-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16local trace_fixes = false trackers.register("graphics.fixes", function(v) trace_fixes = v end)
17local trace_operators = false trackers.register("graphics.operators",function(v) trace_operators = v end)
18
19local report_fixes = logs.reporter("graphics","fixes")
20
21local type, tonumber = type, tonumber
22local char = string.char
23local setmetatableindex, setmetatablecall, sortedhash, concat, insert = table.setmetatableindex, table.setmetatablecall, table.sortedhash, table.concat, table.insert
24local round = math.round
25local numbertostring = string.f6
26
27local expanded = lpdf.epdf.expanded
28
29local function invalid_rgb(r, g, b)
30 return
31 type(r) ~= "number" or r < 0 or r > 1 or
32 type(g) ~= "number" or g < 0 or g > 1 or
33 type(b) ~= "number" or b < 0 or b > 1
34end
35
36local function invalid_cmyk(c, m, y, k)
37 return
38 type(c) ~= "number" or c < 0 or c > 1 or
39 type(m) ~= "number" or m < 0 or m > 1 or
40 type(y) ~= "number" or y < 0 or y > 1 or
41 type(k) ~= "number" or k < 0 or k > 1
42end
43
44local function fix_image_colorspace(v)
45 if not v.__content_remapped__ then
46 local objref = false
47 local space = v.__raw__.ColorSpace
48 if space == "DeviceCMYK" then
49 objref = backends.registered.pdf.codeinjections.defaultprofile(4)
50 elseif space == "DeviceRGB" then
51 objref = backends.registered.pdf.codeinjections.defaultprofile(3)
52 elseif space == "DeviceGray" then
53 objref = backends.registered.pdf.codeinjections.defaultprofile(1)
54 else
55
56 return
57 end
58 if objref then
59 v.__raw__.ColorSpace = { lpdf.epdf.objectcodes.lpdf, lpdf.reference(objref) }
60 v.__content_remapped__ = true
61 return true
62 end
63 end
64end
65
66
67
68local function fix_form_colorspace(v,newspace)
69 if newspace == "cmyk" then
70 newspace = "DeviceCMYK"
71 elseif newspace == "rgb" then
72 newspace ="DeviceRGB"
73 elseif newspace == "gray" then
74 newspace = "DeviceGray"
75 else
76 return
77 end
78 local space = v.__raw__.ColorSpace
79 if space then
80 v.__raw__.ColorSpace = newspace
81 v.__content_remapped__ = true
82 return true
83 end
84 local group = v.Group
85 if group then
86 space = group.CS
87 if space ~= newspace then
88 group.__raw__.CS = newspace
89 v.Group = group
90 v.__content_remapped__ = true
91 return true
92 end
93 end
94end
95
96
97
98do
99
100 function document.pdf_strip_page(pdfdoc,page,pagenumber,resources,compactor)
101 if resources then
102 local group_done = compactor.strip.group and 0 or false
103 local extgstate_done = compactor.strip.extgstate and 0 or false
104 local metadata_done = compactor.strip.metadata and 0 or false
105 local properties_done = compactor.strip.properties and 0 or false
106 local colorspace_done = compactor.strip.colorspace and 0 or false
107 local procset_done = compactor.cleanup.procset and 0 or false
108 local pieceinfo_done = compactor.cleanup.pieceinfo and 0 or false
109 local smask_done = compactor.report.smask and 0 or false
110 if group_done and page.__raw__.Group then
111 page .__raw__.Group = nil group_done = group_done + 1
112 resources.__raw__.Group = nil group_done = group_done + 1
113 end
114 if extgstate_done and resources.__raw__.ExtGState then resources.__raw__.ExtGState = nil extgstate_done = extgstate_done + 1 end
115 if properties_done and resources.__raw__.Properties then resources.__raw__.Properties = nil properties_done = properties_done + 1 end
116 if smask_done and resources.__raw__.SMask then smask_done = smask_done + 1 end
117 if procset_done and resources.__raw__.ProcSet then resources.__raw__.ProcSet = nil procset_done = procset_done + 1 end
118 if pieceinfo_done and resources.__raw__.PieceInfo then resources.__raw__.PieceInfo = nil pieceinfo_done = pieceinfo_done + 1 end
119
120 local x = resources.XObject
121 local f = resources.Font
122 if x or f then
123 local function strip(v)
124 if group_done and v.__raw__.Group then v.__raw__.Group = nil group_done = group_done + 1 end
125 if extgstate_done and v.__raw__.ExtGState then v.__raw__.ExtGState = nil extgstate_done = extgstate_done + 1 end
126 if metadata_done and v.__raw__.Metadata then v.__raw__.Metadata = nil metadata_done = metadata_done + 1 end
127 if smask_done and v.__raw__.SMask then smask_done = smask_done + 1 end
128 if pieceinfo_done and v.__raw__.PieceInfo then v.__raw__.PieceInfo = nil pieceinfo_done = pieceinfo_done + 1 end
129
130 local subtype = v.__raw__.Subtype
131 if subtype == "Image" then
132 if colorspace_done then
133 if fix_image_colorspace(v) then
134 colorspace_done = colorspace_done + 1
135 end
136 end
137 elseif subtype == "Form" then
138 if colorspace_done then
139 if fix_form_colorspace(v,compactor.strip.colorspace) then
140 colorspace_done = colorspace_done + 1
141 end
142 end
143 end
144
145 local r = v.Resources
146 if r then
147 if procset_done and r.__raw__.ProcSet then r.__raw__.ProcSet = nil procset_done = procset_done + 1 end
148 local x = r.XObject
149 if x then
150 for k, v in expanded(x) do
151 strip(v)
152 end
153 end
154 v.Resources = r
155 elseif subtype == "Form" then
156 report_fixes("todo: here we have a test case")
157
158 end
159 end
160 if x then
161 for k, v in expanded(x) do
162 strip(v)
163 end
164 end
165 if f then
166 for k, v in expanded(f) do
167 if v.Type == "Font" and v.Subtype == "Type3" then
168 strip(v)
169 end
170 end
171 end
172 end
173 if trace_fixes and (group_done or extgstate_done or metadata_done or properties_done or smask_done or colorspace_done or procset_done or pieceinfo_done) then
174 report_fixes(
175 "page %i of %a cleaned up resources, %i groups, %i graphic states, %i metadata, %i properties, %i colorspaces, %i smasks, %i procsets, %i pieceinfo",
176 pagenumber,file.basename(pdfdoc.filename),
177 group_done or 0,
178 extgstate_done or 0,
179 metadata_done or 0,
180 properties_done or 0,
181 colorspace_done or 0,
182 smask_done or 0,
183 procset_done or 0,
184 pieceinfo_done or 0
185 )
186 end
187 end
188
189
190
191
192
193 end
194
195end
196
197do
198
199 local tocidsetdictionary = lpdf.tocidset
200
201
202
203 local function pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
204 local f = resources.Font
205 local x = resources.XObject
206 if f then
207 local done = document.cidsetdone or { }
208 document.cidsetdone = done
209 for k, v in next, f.__raw__ do
210 local objref = v[1] == lpdf.epdf.objectcodes.reference and v[3]
211 if objref and not done[objref] then
212 done[objref] = true
213
214 local v = pdfdoc.objects[objref]
215 if v.Subtype ~= "Type0" then
216 goto DONE
217 end
218 local d = v.DescendantFonts
219 if not d then
220 goto DONE
221 end
222 local vd = false
223 local fd = false
224 if d then
225 if #d == 1 then
226 vd = d[1]
227 fd = vd.FontDescriptor
228 end
229 else
230 vd = v
231 fd = vd.FontDescriptor
232 end
233 if not fd then
234 goto DONE
235 end
236 if action == "remove" and fd.CIDSet then
237 local object = pdfdoc.objects[vd.__raw__.FontDescriptor]
238 if object then
239 object.__raw__.CIDSet = nil
240 end
241 elseif action == "add" and not fd.CIDSet then
242 local w = vd.W
243 if w then
244 local u, min, max = lpdf.epdf.expandwidths(w())
245 local c = tocidsetdictionary(u,min,max)
246 local o = lpdf.flushstreamobject(c)
247 local r = lpdf.reference(o)
248
249 local object = pdfdoc.objects[vd.__raw__.FontDescriptor]
250 if object then
251 if trace_fixes then
252 report_fixes(
253 "page %i of %a, font %a, adding CIDSet",
254 pagenumber,file.basename(pdfdoc.filename),v.BaseFont
255 )
256 end
257 object.__raw__.CIDSet = { lpdf.epdf.objectcodes.lpdf, r }
258 end
259 end
260 end
261 if not vd.__raw__.CIDToGIDMap then
262 if trace_fixes then
263 report_fixes(
264 "page %i of %a, font %a, adding CIDToGIDMap",
265 pagenumber,file.basename(pdfdoc.filename),v.BaseFont
266 )
267 end
268 vd.__raw__.CIDToGIDMap = { lpdf.epdf.objectcodes.lpdf, lpdf.constant("Identity") }
269 end
270
271 end
272 ::DONE::
273 end
274 end
275 if x then
276 for k, v in expanded(x) do
277 local resources = v.Resources
278 if resources then
279 pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
280 end
281 end
282 end
283 end
284
285 function document.pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor)
286 if resources then
287 local action = false
288 if lpdf.majorversion() > 1 then
289 action = "remove"
290 elseif compactor.cleanup.cidset then
291 action = "remove"
292 elseif compactor.add.cidset then
293 action = "add"
294 end
295 if action then
296 pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
297 end
298 end
299 end
300
301end
302
303do
304
305 local function pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
306 resources.__raw__.ProcSet = nil
307
308 local x = resources.XObject
309 if x then
310 for k, v in expanded(x) do
311 local resources = v.Resources
312 if resources then
313 pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
314 end
315 end
316 end
317 end
318
319 function document.pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
320 if resources then
321 if lpdf.majorversion() > 1 or lpdf.minorversion() > 3 then
322 pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
323 end
324 end
325 end
326
327end
328
329do
330
331 local cmyktorgb = attributes.colors.cmyktorgb
332 local cmyktogray = attributes.colors.cmyktogray
333 local rgbtocmyk = attributes.colors.rgbtocmyk
334 local rgbtogray = attributes.colors.rgbtogray
335
336
337
338 local function reducecmyk(c,op)
339 local c1 = tonumber(c[1])
340 local c2 = tonumber(c[2])
341 local c3 = tonumber(c[3])
342 local c4 = tonumber(c[4])
343 local cc = c1 == c2 and c2 == c3
344 if cc then
345 if c1 == 0 then
346
347 c[1] = numbertostring(1 - c4)
348 elseif c1 == 1 then
349
350 c[1] = "0"
351 else
352 c4 = c4 + c1
353 c[1] = c4 > 1 and "0" or numbertostring(1 - c4)
354 end
355 c[2] = op == "K" and "G" or "g"
356 c[3] = nil
357 c[4] = nil
358 c[5] = nil
359
360
361
362
363
364
365
366 end
367 end
368
369 local function reducergb(c,op)
370 local c1 = c[1]
371 local c2 = c[2]
372 local c3 = c[3]
373 if c1 == c2 and c2 == c3 then
374 c[1] = c1
375 c[2] = op == "rg" and "g" or "G"
376 c[3] = nil
377 c[4] = nil
378 end
379 end
380
381 local cmykmap = false
382 local cmykfun = false
383 local rgbmap = false
384 local rgbfun = false
385
386 local function convertcmyk(c,op)
387 local c1 = tonumber(c[1])
388 local c2 = tonumber(c[2])
389 local c3 = tonumber(c[3])
390 local c4 = tonumber(c[4])
391 local cc = c1 == c2 and c2 == c3
392 if cc then
393 if c1 == 0 then
394
395 c[1] = numbertostring(1 - c4)
396 elseif c1 == 1 then
397
398 c[1] = "0"
399 else
400 c4 = c4 + c1
401 c[1] = c4 > 1 and "0" or numbertostring(1 - c4)
402 end
403 c[2] = op == "K" and "G" or "g"
404 c[3] = nil
405 c[4] = nil
406 c[5] = nil
407 else
408 local r, g, b
409 if cmykmap then
410
411
412
413 for i=1,#cmykmap do
414 local map = cmykmap[i]
415 local factor = map[1]
416 local r1 = round(c1*factor)
417 local r2 = round(c2*factor)
418 local r3 = round(c3*factor)
419 local r4 = round(c4*factor)
420 if map[2] == r1 and map[3] == r2 and map[4] == r3 and map[5] == r4 then
421 r = (map[6] or 0)/factor
422 g = (map[7] or 0)/factor
423 b = (map[8] or 0)/factor
424 goto DONE
425 end
426 end
427 elseif cmykfun then
428 r, g, b = cmykfun(c1,c2,c3,c4)
429 if invalid_rgb(r,g,b) then
430
431 else
432 goto DONE
433 end
434 end
435 r, g, b = cmyktorgb(c1,c2,c3,c4)
436 ::DONE::
437 c[1] = numbertostring(r)
438 c[2] = numbertostring(g)
439 c[3] = numbertostring(b)
440 c[4] = op == "K" and "RG" or "rg"
441 c[5] = nil
442 end
443 end
444
445 local function convertrgb(z,op)
446 local c1 = z[1]
447 local c2 = z[2]
448 local c3 = z[3]
449 if c1 == c2 and c2 == c3 then
450 z[1] = c1
451 z[2] = op == "rg" and "g" or "G"
452 z[3] = nil
453 z[4] = nil
454 else
455 local c, m, y, k
456 if rgbmap then
457 for i=1,#rgbmap do
458 local map = rgbmap[i]
459 local factor = map[1]
460 local r1 = round(c1*factor)
461 local r2 = round(c2*factor)
462 local r3 = round(c3*factor)
463 if map[2] == r1 and map[3] == r2 and map[4] == r3 then
464 c = (map[5] or 0)/factor
465 m = (map[6] or 0)/factor
466 y = (map[7] or 0)/factor
467 k = (map[8] or 0)/factor
468 goto DONE
469 end
470 end
471 elseif rgbfun then
472 c, m, y, k = rgbfun(c1,c2,c3)
473 if invalid_cmyk(c, m, y, k) then
474
475 else
476 goto DONE
477 end
478 end
479 c, m, y, k = rgbtocmyk(c1,c2,c3)
480 ::DONE::
481 z[1] = numbertostring(c)
482 z[2] = numbertostring(m)
483 z[3] = numbertostring(y)
484 z[4] = numbertostring(k)
485 z[5] = op == "RG" and "K" or "k"
486 end
487 end
488
489 local g1, g2, g3, g4
490
491 local function recolorcmyk_gray(c,op)
492 local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
493 c[1] = numbertostring(s)
494 c[2] = op == "K" and "G" or "g"
495 c[3] = nil
496 c[4] = nil
497 c[5] = nil
498 end
499 local function recolorcmyk_rgb(c,op)
500 local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
501 c[1] = numbertostring(g1*s)
502 c[2] = numbertostring(g2*s)
503 c[3] = numbertostring(g3*s)
504 c[4] = op == "K" and "RG" or "rg"
505 c[5] = nil
506 end
507 local function recolorcmyk_cmyk(c,op)
508 local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
509 c[1] = numbertostring(g1*s)
510 c[2] = numbertostring(g2*s)
511 c[3] = numbertostring(g3*s)
512 c[4] = numbertostring(g4*s)
513 c[5] = op
514 end
515
516 local function recolorrgb_gray(c,op)
517 local s = 1 - rgbtogray(c[1],c[2],c[3])
518 c[1] = numbertostring(s)
519 c[2] = op == "RG" and "G" or "g"
520 c[3] = nil
521 c[4] = nil
522 c[5] = nil
523 end
524 local function recolorrgb_rgb(c,op)
525 local s = 1 - rgbtogray(c[1],c[2],c[3])
526 c[1] = numbertostring(g1*s)
527 c[2] = numbertostring(g2*s)
528 c[3] = numbertostring(g3*s)
529 c[4] = op
530 c[5] = nil
531 end
532 local function recolorrgb_cmyk(c,op)
533 local s = 1 - rgbtogray(c[1],c[2],c[3])
534 c[1] = numbertostring(g1*s)
535 c[2] = numbertostring(g2*s)
536 c[3] = numbertostring(g3*s)
537 c[4] = numbertostring(g4*s)
538 c[5] = op == "RG" and "K" or "k"
539 end
540
541 local function recolorgray_gray(c,op)
542 local s = 1 - tonumber(c[1])
543 c[1] = numbertostring(g1*s)
544 c[2] = op
545 c[3] = nil
546 c[4] = nil
547 c[5] = nil
548 end
549 local function recolorgray_rgb(c,op)
550 local s = 1 - tonumber(c[1])
551 c[1] = numbertostring(g1*s)
552 c[2] = numbertostring(g2*s)
553 c[3] = numbertostring(g3*s)
554 c[4] = op == "G" and "RG" or "rg"
555 c[5] = nil
556 end
557 local function recolorgray_cmyk(c,op)
558 local s = 1 - tonumber(c[1])
559 c[1] = numbertostring(g1*s)
560 c[2] = numbertostring(g2*s)
561 c[3] = numbertostring(g3*s)
562 c[4] = numbertostring(g4*s)
563 c[5] = op == "G" and "K" or "k"
564 end
565
566 local function removestate(c,op,contents,i)
567
568 contents[i] = { }
569 end
570
571 local removed = false
572
573 local function removetags(c,op,contents,i)
574 local ci = contents[i]
575 local one = ci[1]
576 if one then
577 local what = one[2]
578 if what == "Artifact" then
579
580 return
581 elseif what == "Span" then
582
583 local two = ci[2]
584 if two then
585 if two[1] == "dict" then
586 local list = two[2]
587 for i=1,#list,2 do
588 local l = list[i]
589 if l[2] == "ActualText" then
590
591 return
592 end
593 end
594 end
595 end
596 else
597
598 end
599 end
600 removed = true
601 contents[i] = { }
602 if op == "BMC" or op == "BDC" then
603 local level = 1
604 for ii=i+1,#contents do
605 local c = contents[ii]
606 local o = c[#c]
607 if o == "BMC" or o == "BDC" then
608 level = level + 1
609 elseif o == "EMC" then
610 level = level - 1
611 if level <= 0 then
612 contents[ii] = { }
613 break
614 end
615 end
616 end
617 end
618 end
619
620 local contenttostring = lpdf.epdf.contenttostring
621 local getpagecontent = lpdf.epdf.getpagecontent
622 local parsecontent = lpdf.epdf.parsecontent
623
624 local function checkbt(c,op,contents,i)
625
626 local c = contents[i-1]
627 if c and c[i] == "ET" then
628 contents[i] = { }
629 contents[i-1] = { }
630 end
631 end
632
633 local function checkQ(c,op,contents,i)
634 local c = contents[i-1]
635 if c and c[#c] == "q" then
636 contents[i] = { }
637 contents[i-1] = { }
638 end
639 end
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664 local passone = { }
665 local passtwo = { }
666
667 local function useactions(compactor)
668 local used = { }
669 local strip = compactor.strip
670 local reduce = compactor.reduce
671 local convert = compactor.convert
672 local recolor = compactor.recolor
673 cmykmap = false
674 rgbmap = false
675 cmykfun = false
676 rgbfun = false
677 removed = false
678 passone = { }
679 passtwo = { }
680 if strip.marked then
681
682
683 passone.MP = removetags
684 passone.DP = removetags
685 passone.BMC = removetags
686 passone.BDC = removetags
687
688 passtwo.BT = checkbt
689 end
690 if strip.extgstate then
691 passone.gs = removestate
692 passone.GS = removestate
693 end
694
695
696
697 if reduce.color then
698 passone.rg = reducergb
699 passone.RG = reducergb
700 passone.k = reducecmyk
701 passone.K = reducecmyk
702 elseif reduce.rgb then
703 passone.rg = reducergb
704 passone.RG = reducergb
705 elseif reduce.cmyk then
706 passone.k = reducecmyk
707 passone.K = reducecmyk
708 end
709 if convert.cmyk then
710 passone.k = convertcmyk
711 passone.K = convertcmyk
712 cmykmap = type(convert.cmyk) == "table" and convert.cmyk or false
713 cmykfun = type(convert.cmyk) == "function" and convert.cmyk or false
714 elseif convert.rgb then
715 passone.rg = convertrgb
716 passone.RG = convertrgb
717 rgbmap = type(convert.rgb) == "table" and convert.rgb or false
718 rgbfun = type(convert.rgb) == "function" and convert.rgb or false
719 end
720 local viagray = recolor.viagray
721 if viagray then
722 g1 = viagray[1]
723 g2 = viagray[2]
724 g3 = viagray[3]
725 g4 = viagray[4]
726 if g4 then
727 passone.k = recolorcmyk_cmyk
728 passone.K = recolorcmyk_cmyk
729 passone.rg = recolorrgb_cmyk
730 passone.RG = recolorrgb_cmyk
731 passone.g = recolorgray_cmyk
732 passone.G = recolorgray_cmyk
733 elseif g3 then
734 passone.k = recolorcmyk_rgb
735 passone.K = recolorcmyk_rgb
736 passone.rg = recolorrgb_rgb
737 passone.RG = recolorrgb_rgb
738 passone.g = recolorgray_rgb
739 passone.G = recolorgray_rgb
740 elseif g1 then
741 passone.k = recolorcmyk_gray
742 passone.K = recolorcmyk_gray
743 passone.rg = recolorrgb_gray
744 passone.RG = recolorrgb_gray
745 passone.g = recolorgray_gray
746 passone.G = recolorgray_gray
747 end
748 else
749 g1, g2, g3, g4 = nil, nil, nil, nil
750 end
751
752
753
754 if compactor.identify == "all" then
755 compactor.identify= {
756 content = true,
757 resources = true,
758 page = true,
759 }
760 end
761 end
762
763 local identify_content
764
765 identify_content = function(pdfdoc,contents,fonts,xobjects,counts)
766 if contents then
767 for i=1,#contents do
768 local ci = contents[i]
769 if ci then
770 local op = ci[#ci]
771 if op then
772 counts[op] = counts[op] + 1
773 if xobjects and op == "Do" then
774
775 local object = xobjects[ci[1][2]]
776 if object then
777 local subtype = object.Subtype
778 if subtype == "Form" then
779 if not object.__content_remapped__ then
780 local r = object.Resources
781 if r then
782 local contents = object()
783 local fonts = r.Font
784 local xobjects = r.XObject
785 if contents then
786 contents = parsecontent(contents,true)
787 if contents then
788 identify_content(pdfdoc,contents,fonts,xobjects,counts)
789 end
790 end
791 end
792 end
793 end
794 end
795 end
796 end
797 end
798 end
799 end
800 end
801
802 local function countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,when)
803 local counts = setmetatableindex("number")
804 identify_content(pdfdoc,contents,fonts,xobjects,counts)
805 report_fixes("page %i of file %a: %s",pagenumber,pdfdoc.filename,when)
806 for k, v in sortedhash(counts) do
807 report_fixes("%4i : %s",v,k)
808 end
809 end
810
811 local strip_content
812
813 local function form(pdfdoc,object,pagenumber,compactor)
814 if not object.__content_remapped__ then
815 local r = object.Resources
816 if r then
817 local contents = object()
818 local fonts = r.Font
819 local xobjects = r.XObject
820 if contents then
821 contents = parsecontent(contents,true)
822 if contents then
823 contents = strip_content(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
824 contents = contenttostring(contents)
825 object.__raw__.Length = #contents
826 object.__raw__.Filter = nil
827 getmetatable(object).__call = function() return contents end
828 object.__content_remapped__ = true
829 end
830 end
831 end
832 end
833 end
834
835 local function image(pdfdoc,object,pagenumber,compactor)
836 if compactor.strip.colorspace and fix_image_colorspace(object) then
837
838 end
839 end
840
841 local function collapse(contents)
842 local j = false
843 for i=1,#contents do
844 local c = contents[i]
845 if not c or #c == 0 then
846 if not j then
847 j = i - 1
848 end
849 elseif j then
850 j = j + 1
851 contents[j] = c
852 end
853 end
854 if j then
855 for i=#contents,j+1,-1 do
856 contents[i] = nil
857 end
858 end
859 end
860
861 local nocontent = {
862 k = true, K = true,
863 g = true, G = true,
864 rg = true, RG = true,
865 gs = true,
866 cm = true,
867 w = true,
868 q = true, Q = true,
869 cs = true, CS = true,
870 d = true,
871 i = true,
872 j = true,
873 J = true,
874 sc = true, SC = true,
875 scn = true, SCN = true,
876 Tc = true,
877 TL = true,
878 Tr = true,
879 Ts = true,
880 Tw = true,
881 Tz = true,
882 }
883
884 strip_content = function(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
885 if contents then
886 for i=1,#contents do
887 local ci = contents[i]
888 local op = ci[#ci]
889 local action = passone[op]
890 if action then
891 action(ci,op,contents,i)
892 elseif xobjects and op == "Do" then
893
894 local object = xobjects[ci[1][2]]
895 if object then
896 local subtype = object.Subtype
897 if subtype == "Form" then
898 form(pdfdoc,object,pagenumber,compactor)
899 end
900 if subtype == "Image" then
901 image(pdfdoc,object,pagenumber,compactor)
902 end
903 end
904 end
905 end
906 if next(passtwo) then
907 if removed then
908 collapse(contents)
909 end
910 for i=1,#contents do
911 local ci = contents[i]
912 local op = ci[#ci]
913 local action = passtwo[op]
914 if action then
915 action(ci,op,contents,i)
916 end
917 end
918
919 end
920
921
922
923 local strip = compactor.strip
924 if strip and (strip.identitycm or strip.pollution) then
925 local last = false
926 local removed = false
927
928 if strip.pollution then
929 for i=1,#contents do
930 local ci = contents[i]
931 local op = ci[#ci]
932 if op == "q" then
933 last = i
934 elseif op == "Q" then
935 if last then
936 if last == i - 1 then
937 contents[last] = { }
938 contents[i] = { }
939 else
940 for j=last,i do
941 contents[j] = { }
942 end
943 end
944 removed = true
945 last = false
946 end
947 elseif not nocontent[op] then
948 last = false
949 end
950 end
951 end
952 if strip.identitycm then
953 for i=1,#contents do
954 local ci = contents[i]
955 local op = ci[#ci]
956 if op == "cm" then
957 if tonumber(ci[1]) == 1 and tonumber(ci[4]) == 1 and tonumber(ci[5]) == 0 and tonumber(ci[6]) == 0 and tonumber(ci[2]) == 0 and tonumber(ci[3]) == 0 then
958 contents[i] = { }
959 removed = true
960 end
961 end
962 end
963 end
964
965 if removed then
966 collapse(contents)
967 end
968 end
969
970 return contents
971 end
972 end
973
974 local function strip_content_needed(pdfdoc,page,pagenumber,resources,compactor)
975 compactor = table.fastcopy(compactor)
976 setmetatableindex(compactor,"table")
977 local strip = compactor.strip
978 local marked = strip.marked
979 if marked == "force" then
980
981 elseif marked and pdfdoc.Catalog.StructTreeRoot then
982 report_fixes("page %i of file %a: %s (structure tree %s)",pagenumber,pdfdoc.filename,"stripping tags","found")
983 elseif marked == "page" then
984 local contents = lpdf.epdf.allcontent(page.Contents or "")
985 if string.find(contents,"/MCID%s%d+*") then
986 report_fixes("page %i of file %a: %s (structure tree %s)",pagenumber,pdfdoc.filename,"stripping tags","missing")
987 else
988 strip.marked = nil
989 end
990 else
991 strip.marked = nil
992 end
993 return
994 compactor, (
995 next(compactor.strip)
996 or next(compactor.reduce)
997 or next(compactor.convert)
998 or next(compactor.recolor)
999 ) and true or false
1000 end
1001
1002
1003
1004 function document.pdf_identify_content(pdfdoc,page,pagenumber,resources,compactor)
1005
1006 local function getcontents()
1007 local contents = pdfdoc.currentcontents
1008 if not contents then
1009 contents = getpagecontent(pdfdoc,pagenumber,true,true)
1010 pdfdoc.currentcontents = contents
1011 end
1012 return contents
1013 end
1014
1015 local function setcontents(contents)
1016 if contents then
1017 pdfdoc.currentcontents = contents
1018 end
1019 end
1020
1021 pdfdoc.getcontents = getcontents
1022 pdfdoc.setcontents = setcontents
1023
1024 if compactor.identify.content or compactor.identify == "all" then
1025 local fonts = resources.Font
1026 local xobjects = resources.XObject
1027 local contents = pdfdoc.getcontents()
1028 if contents then
1029 if trace_operators then
1030 countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,"before")
1031 end
1032 end
1033 end
1034 end
1035
1036 function document.pdf_strip_content(pdfdoc,page,pagenumber,resources,compactor)
1037 local compactor, needed = strip_content_needed(pdfdoc,page,pagenumber,resources,compactor)
1038 if needed then
1039 local contents = pdfdoc.getcontents()
1040 if contents then
1041 local fonts = resources.Font
1042 local xobjects = resources.XObject
1043 useactions(compactor)
1044 if g1 then
1045 insert(contents,1, { 0, "G" })
1046 insert(contents,1, { 0, "g" })
1047 end
1048 contents = strip_content(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
1049 resources.Font = fonts
1050 pdfdoc.setcontents(contents)
1051 end
1052 end
1053 end
1054
1055 function document.pdf_serialize_content(pdfdoc,page,pagenumber,resources,compactor)
1056 local contents = pdfdoc.getcontents()
1057 if contents then
1058 if trace_operators then
1059 countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,"after")
1060 end
1061 page.Contents = contenttostring(contents)
1062 pdfdoc.currentcontents = nil
1063 pdfdoc.getcontents = nil
1064 pdfdoc.setcontents = nil
1065 end
1066 end
1067
1068end
1069
1070utilities.sequencers.appendaction("pdfcontentmanipulators","before","document.pdf_identify_content")
1071utilities.sequencers.appendaction("pdfcontentmanipulators","system","document.pdf_strip_content")
1072utilities.sequencers.appendaction("pdfcontentmanipulators","after", "document.pdf_serialize_content")
1073
1074utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_identify_content")
1075utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_strip_content")
1076utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_serialize_content")
1077
1078utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_strip_page")
1079utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_cleanup_cidsets")
1080utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_cleanup_procsets")
1081
1082utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_strip_page")
1083utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_cleanup_cidsets")
1084utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_cleanup_procsets")
1085 |