lpdf-fix-imp-contents.lmt /size: 39 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lpdf-fix-imp-contents'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- This is preliminary code. The \PDF\ inclusion interface has never been designed
10-- for manipulation so we need to cheat every now and then and signal what objects
11-- are adapted. It's okay but a nicer interface is on the agenda so that the __raw__
12-- trickery gets hidden.
13
14-- See compactors-preset.lua for examples of compactor specifications.
15
16local trace_fixes      = false  trackers.register("graphics.fixes",    function(v) trace_fixes     = v end)
17local trace_operators  = false  trackers.register("graphics.operators",function(v) trace_operators = v end)
18
19local report_fixes = logs.reporter("graphics","fixes")
20
21local type, tonumber = type, tonumber
22local char = string.char
23local setmetatableindex, setmetatablecall, sortedhash, concat, insert = table.setmetatableindex, table.setmetatablecall, table.sortedhash, table.concat, table.insert
24local round = math.round
25local numbertostring = string.f6
26
27local expanded = lpdf.epdf.expanded
28
29local function invalid_rgb(r, g, b)
30    return
31        type(r) ~= "number" or r < 0 or r > 1 or
32        type(g) ~= "number" or g < 0 or g > 1 or
33        type(b) ~= "number" or b < 0 or b > 1
34end
35
36local function invalid_cmyk(c, m, y, k)
37    return
38        type(c) ~= "number" or c < 0 or c > 1 or
39        type(m) ~= "number" or m < 0 or m > 1 or
40        type(y) ~= "number" or y < 0 or y > 1 or
41        type(k) ~= "number" or k < 0 or k > 1
42end
43
44local function fix_image_colorspace(v)
45    if not v.__content_remapped__ then
46        local objref = false
47        local space  = v.__raw__.ColorSpace
48        if space == "DeviceCMYK" then
49            objref = backends.registered.pdf.codeinjections.defaultprofile(4)
50        elseif space == "DeviceRGB" then
51            objref = backends.registered.pdf.codeinjections.defaultprofile(3)
52        elseif space == "DeviceGray" then
53            objref = backends.registered.pdf.codeinjections.defaultprofile(1)
54        else
55         -- inspect(space) -- indexed, has to be done with the global intent
56            return
57        end
58        if objref then
59            v.__raw__.ColorSpace = { lpdf.epdf.objectcodes.lpdf, lpdf.reference(objref) }
60            v.__content_remapped__ = true
61            return true
62        end
63    end
64end
65
66-- compactor.strip.colorspace = "cmyk"
67
68local function fix_form_colorspace(v,newspace)
69    if newspace == "cmyk" then
70        newspace = "DeviceCMYK"
71    elseif newspace == "rgb" then
72        newspace ="DeviceRGB"
73    elseif newspace == "gray" then
74        newspace = "DeviceGray"
75    else
76        return
77    end
78    local space = v.__raw__.ColorSpace
79    if space then
80        v.__raw__.ColorSpace = newspace
81        v.__content_remapped__ = true
82        return true
83    end
84    local group = v.Group
85    if group then
86        space = group.CS
87        if space ~= newspace then
88            group.__raw__.CS = newspace
89            v.Group = group
90            v.__content_remapped__ = true
91            return true
92        end
93    end
94end
95
96-- mask clean up code has been removed .. to big of a mess
97
98do
99
100    function document.pdf_strip_page(pdfdoc,page,pagenumber,resources,compactor)
101        if resources then
102            local group_done      = compactor.strip.group       and 0 or false
103            local extgstate_done  = compactor.strip.extgstate   and 0 or false
104            local metadata_done   = compactor.strip.metadata    and 0 or false
105            local properties_done = compactor.strip.properties  and 0 or false
106            local colorspace_done = compactor.strip.colorspace  and 0 or false
107            local procset_done    = compactor.cleanup.procset   and 0 or false
108            local pieceinfo_done  = compactor.cleanup.pieceinfo and 0 or false
109            local smask_done      = compactor.report.smask      and 0 or false
110            if group_done and page.__raw__.Group then
111                page     .__raw__.Group = nil group_done = group_done + 1
112                resources.__raw__.Group = nil group_done = group_done + 1
113            end
114            if extgstate_done  and resources.__raw__.ExtGState  then resources.__raw__.ExtGState  = nil extgstate_done  = extgstate_done  + 1 end
115            if properties_done and resources.__raw__.Properties then resources.__raw__.Properties = nil properties_done = properties_done + 1 end
116            if smask_done      and resources.__raw__.SMask      then                                    smask_done      = smask_done      + 1 end
117            if procset_done    and resources.__raw__.ProcSet    then resources.__raw__.ProcSet    = nil procset_done    = procset_done    + 1 end
118            if pieceinfo_done  and resources.__raw__.PieceInfo  then resources.__raw__.PieceInfo  = nil pieceinfo_done  = pieceinfo_done  + 1 end
119            -- todo : recursely do xforms, only do when not yet done
120            local x = resources.XObject
121            local f = resources.Font
122            if x or f then
123                local function strip(v)
124                    if group_done     and v.__raw__.Group     then v.__raw__.Group     = nil group_done     = group_done     + 1 end
125                    if extgstate_done and v.__raw__.ExtGState then v.__raw__.ExtGState = nil extgstate_done = extgstate_done + 1 end
126                    if metadata_done  and v.__raw__.Metadata  then v.__raw__.Metadata  = nil metadata_done  = metadata_done  + 1 end
127                    if smask_done     and v.__raw__.SMask     then                           smask_done     = smask_done     + 1 end
128                    if pieceinfo_done and v.__raw__.PieceInfo then v.__raw__.PieceInfo = nil pieceinfo_done = pieceinfo_done + 1 end
129                    --
130                    local subtype = v.__raw__.Subtype
131                    if subtype == "Image" then
132                        if colorspace_done then
133                            if fix_image_colorspace(v) then
134                               colorspace_done = colorspace_done + 1
135                            end
136                        end
137                    elseif subtype == "Form" then
138                        if colorspace_done then
139                            if fix_form_colorspace(v,compactor.strip.colorspace) then
140                               colorspace_done = colorspace_done + 1
141                            end
142                        end
143                    end
144                    --
145                    local r = v.Resources
146                    if r then
147                        if procset_done and r.__raw__.ProcSet then r.__raw__.ProcSet = nil procset_done = procset_done + 1 end
148                        local x = r.XObject
149                        if x then
150                            for k, v in expanded(x) do
151                                strip(v)
152                            end
153                        end
154                        v.Resources = r
155                    elseif subtype == "Form" then
156                        report_fixes("todo: here we have a test case")
157                     -- v.Resources = { lpdf.epdf.objectcodes.lpdf, lpdf.checkedresources() }
158                    end
159                end
160                if x then
161                    for k, v in expanded(x) do
162                        strip(v)
163                    end
164                end
165                if f then
166                    for k, v in expanded(f) do
167                        if v.Type == "Font" and v.Subtype == "Type3" then
168                            strip(v)
169                        end
170                    end
171                end
172            end
173            if trace_fixes and (group_done or extgstate_done or metadata_done or properties_done or smask_done or colorspace_done or procset_done or pieceinfo_done) then
174                report_fixes(
175                    "page %i of %a cleaned up resources, %i groups, %i graphic states, %i metadata, %i properties, %i colorspaces, %i smasks, %i procsets, %i pieceinfo",
176                    pagenumber,file.basename(pdfdoc.filename),
177                    group_done      or 0,
178                    extgstate_done  or 0,
179                    metadata_done   or 0,
180                    properties_done or 0,
181                    colorspace_done or 0,
182                    smask_done      or 0,
183                    procset_done    or 0,
184                    pieceinfo_done  or 0
185                )
186            end
187        end
188
189        -- test
190-- if pagenumber == 1 then
191--     print(pdfdoc.Catalog.StructTreeRoot)
192-- end
193    end
194
195end
196
197do
198
199    local tocidsetdictionary = lpdf.tocidset
200
201    -- todo: recurse into xforms
202
203    local function pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
204        local f = resources.Font
205        local x = resources.XObject
206        if f then
207            local done = document.cidsetdone or { }
208            document.cidsetdone = done
209            for k, v in next, f.__raw__ do
210                local objref = v[1] == lpdf.epdf.objectcodes.reference and v[3]
211                if objref and not done[objref] then
212                    done[objref] = true
213                    --
214                    local v = pdfdoc.objects[objref]
215                    if v.Subtype ~= "Type0" then
216                        goto DONE
217                    end
218                    local d = v.DescendantFonts
219                    if not d then
220                        goto DONE
221                    end
222                    local vd = false
223                    local fd = false
224                    if d then
225                        if #d == 1 then
226                            vd = d[1]
227                            fd = vd.FontDescriptor
228                        end
229                    else
230                        vd = v
231                        fd = vd.FontDescriptor
232                    end
233                    if not fd then
234                        goto DONE
235                    end
236                    if action == "remove" and fd.CIDSet then
237                        local object = pdfdoc.objects[vd.__raw__.FontDescriptor]
238                        if object then
239                            object.__raw__.CIDSet = nil -- maybe just { } as signal
240                        end
241                    elseif action == "add" and not fd.CIDSet then
242                        local w = vd.W
243                        if w then
244                            local u, min, max = lpdf.epdf.expandwidths(w())
245                            local c = tocidsetdictionary(u,min,max)
246                            local o = lpdf.flushstreamobject(c)
247                            local r = lpdf.reference(o)
248                            -- load the object
249                            local object = pdfdoc.objects[vd.__raw__.FontDescriptor]
250                            if object then
251                                if trace_fixes then
252                                    report_fixes(
253                                        "page %i of %a, font %a, adding CIDSet",
254                                        pagenumber,file.basename(pdfdoc.filename),v.BaseFont
255                                    )
256                                end
257                                object.__raw__.CIDSet = { lpdf.epdf.objectcodes.lpdf, r }
258                            end
259                        end
260                    end
261                    if not vd.__raw__.CIDToGIDMap then
262                        if trace_fixes then
263                            report_fixes(
264                                "page %i of %a, font %a, adding CIDToGIDMap",
265                                pagenumber,file.basename(pdfdoc.filename),v.BaseFont
266                            )
267                        end
268                        vd.__raw__.CIDToGIDMap = { lpdf.epdf.objectcodes.lpdf, lpdf.constant("Identity") }
269                    end
270                    --
271                end
272              ::DONE::
273            end
274        end
275        if x then
276            for k, v in expanded(x) do
277                local resources = v.Resources
278                if resources then
279                    pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
280                end
281            end
282        end
283    end
284
285    function document.pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor)
286        if resources then
287            local action = false
288            if lpdf.majorversion() > 1 then
289                action = "remove"
290            elseif compactor.cleanup.cidset then
291                action = "remove"
292            elseif compactor.add.cidset then
293                action = "add"
294            end
295            if action then
296                pdf_cleanup_cidsets(pdfdoc,page,pagenumber,resources,compactor,action)
297            end
298        end
299    end
300
301end
302
303do
304
305    local function pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
306        resources.__raw__.ProcSet = nil
307        --
308        local x = resources.XObject
309        if x then
310            for k, v in expanded(x) do
311                local resources = v.Resources
312                if resources then
313                    pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
314                end
315            end
316        end
317    end
318
319    function document.pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
320        if resources then
321            if lpdf.majorversion() > 1 or lpdf.minorversion() > 3 then
322                pdf_cleanup_procsets(pdfdoc,page,pagenumber,resources,compactor)
323            end
324        end
325    end
326
327end
328
329do
330
331    local cmyktorgb  = attributes.colors.cmyktorgb
332    local cmyktogray = attributes.colors.cmyktogray
333    local rgbtocmyk  = attributes.colors.rgbtocmyk
334    local rgbtogray  = attributes.colors.rgbtogray
335
336--     local remapcmyk = { }
337
338    local function reducecmyk(c,op)
339        local c1 = tonumber(c[1])
340        local c2 = tonumber(c[2])
341        local c3 = tonumber(c[3])
342        local c4 = tonumber(c[4])
343        local cc = c1 == c2 and c2 == c3
344        if cc then
345            if c1 == 0 then
346                -- no color, only black
347                c[1] = numbertostring(1 - c4)
348            elseif c1 == 1 then
349                -- brownish, so assume black
350                c[1] = "0"
351            else
352                c4 = c4 + c1
353                c[1] = c4 > 1 and "0" or numbertostring(1 - c4)
354            end
355            c[2] = op == "K" and "G" or "g"
356            c[3] = nil
357            c[4] = nil
358            c[5] = nil
359     -- else -- can be an option
360     --     local r, g, b = cmyktorgb(c1,c2,c3,c4)
361     --     c[1] = numbertostring(r)
362     --     c[2] = numbertostring(g)
363     --     c[3] = numbertostring(b)
364     --     c[4] = op == "K" and "RG" or "rg"
365     --     c[5] = nil
366        end
367    end
368
369    local function reducergb(c,op)
370        local c1 = c[1]
371        local c2 = c[2]
372        local c3 = c[3]
373        if c1 == c2 and c2 == c3 then
374            c[1] = c1
375            c[2] = op == "rg" and "g" or "G"
376            c[3] = nil
377            c[4] = nil
378        end
379    end
380
381    local cmykmap = false
382    local cmykfun = false
383    local rgbmap = false
384    local rgbfun = false
385
386    local function convertcmyk(c,op)
387        local c1 = tonumber(c[1])
388        local c2 = tonumber(c[2])
389        local c3 = tonumber(c[3])
390        local c4 = tonumber(c[4])
391        local cc = c1 == c2 and c2 == c3
392        if cc then
393            if c1 == 0 then
394                -- no color, only black
395                c[1] = numbertostring(1 - c4)
396            elseif c1 == 1 then
397                -- brownish, so assume black
398                c[1] = "0"
399            else
400                c4 = c4 + c1
401                c[1] = c4 > 1 and "0" or numbertostring(1 - c4)
402            end
403            c[2] = op == "K" and "G" or "g"
404            c[3] = nil
405            c[4] = nil
406            c[5] = nil
407        else -- can be an option
408            local r, g, b
409            if cmykmap then
410                -- cmykmap = {
411                --     { 1, 1, 0, 0, .5, .6. 7 } -- todo: speed up
412                -- }
413                for i=1,#cmykmap do
414                    local map    = cmykmap[i]
415                    local factor = map[1]
416                    local r1 = round(c1*factor)
417                    local r2 = round(c2*factor)
418                    local r3 = round(c3*factor)
419                    local r4 = round(c4*factor)
420                    if map[2] == r1 and map[3] == r2 and map[4] == r3 and map[5] == r4 then
421                        r = (map[6] or 0)/factor
422                        g = (map[7] or 0)/factor
423                        b = (map[8] or 0)/factor
424                        goto DONE
425                    end
426                end
427            elseif cmykfun then
428                r, g, b = cmykfun(c1,c2,c3,c4)
429                if invalid_rgb(r,g,b) then
430                    -- todo: report failed conversion
431                else
432                    goto DONE
433                end
434            end
435            r, g, b = cmyktorgb(c1,c2,c3,c4)
436         ::DONE::
437            c[1] = numbertostring(r)
438            c[2] = numbertostring(g)
439            c[3] = numbertostring(b)
440            c[4] = op == "K" and "RG" or "rg"
441            c[5] = nil
442        end
443    end
444
445    local function convertrgb(z,op)
446        local c1 = z[1]
447        local c2 = z[2]
448        local c3 = z[3]
449        if c1 == c2 and c2 == c3 then
450            z[1] = c1
451            z[2] = op == "rg" and "g" or "G"
452            z[3] = nil
453            z[4] = nil
454        else
455            local c, m, y, k
456            if rgbmap then
457                for i=1,#rgbmap do
458                    local map    = rgbmap[i]
459                    local factor = map[1]
460                    local r1 = round(c1*factor)
461                    local r2 = round(c2*factor)
462                    local r3 = round(c3*factor)
463                    if map[2] == r1 and map[3] == r2 and map[4] == r3 then
464                        c = (map[5] or 0)/factor
465                        m = (map[6] or 0)/factor
466                        y = (map[7] or 0)/factor
467                        k = (map[8] or 0)/factor
468                        goto DONE
469                    end
470                end
471            elseif rgbfun then
472                c, m, y, k = rgbfun(c1,c2,c3)
473                if invalid_cmyk(c, m, y, k) then
474                    -- todo: report failed conversion
475                else
476                    goto DONE
477                end
478            end
479            c, m, y, k = rgbtocmyk(c1,c2,c3)
480          ::DONE::
481            z[1] = numbertostring(c)
482            z[2] = numbertostring(m)
483            z[3] = numbertostring(y)
484            z[4] = numbertostring(k)
485            z[5] = op == "RG" and "K" or "k"
486        end
487    end
488
489    local g1, g2, g3, g4
490
491    local function recolorcmyk_gray(c,op)
492        local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
493        c[1] = numbertostring(s)
494        c[2] = op == "K" and "G" or "g"
495        c[3] = nil
496        c[4] = nil
497        c[5] = nil
498    end
499    local function recolorcmyk_rgb(c,op)
500        local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
501        c[1] = numbertostring(g1*s)
502        c[2] = numbertostring(g2*s)
503        c[3] = numbertostring(g3*s)
504        c[4] = op == "K" and "RG" or "rg"
505        c[5] = nil
506    end
507    local function recolorcmyk_cmyk(c,op)
508        local s = 1 - cmyktogray(c[1],c[2],c[3],c[4])
509        c[1] = numbertostring(g1*s)
510        c[2] = numbertostring(g2*s)
511        c[3] = numbertostring(g3*s)
512        c[4] = numbertostring(g4*s)
513        c[5] = op
514    end
515
516    local function recolorrgb_gray(c,op)
517        local s = 1 - rgbtogray(c[1],c[2],c[3])
518        c[1] = numbertostring(s)
519        c[2] = op == "RG" and "G" or "g"
520        c[3] = nil
521        c[4] = nil
522        c[5] = nil
523    end
524    local function recolorrgb_rgb(c,op)
525        local s = 1 - rgbtogray(c[1],c[2],c[3])
526        c[1] = numbertostring(g1*s)
527        c[2] = numbertostring(g2*s)
528        c[3] = numbertostring(g3*s)
529        c[4] = op
530        c[5] = nil
531    end
532    local function recolorrgb_cmyk(c,op)
533        local s = 1 - rgbtogray(c[1],c[2],c[3])
534        c[1] = numbertostring(g1*s)
535        c[2] = numbertostring(g2*s)
536        c[3] = numbertostring(g3*s)
537        c[4] = numbertostring(g4*s)
538        c[5] = op == "RG" and "K" or "k"
539    end
540
541    local function recolorgray_gray(c,op)
542        local s = 1 - tonumber(c[1])
543        c[1] = numbertostring(g1*s)
544        c[2] = op
545        c[3] = nil
546        c[4] = nil
547        c[5] = nil
548    end
549    local function recolorgray_rgb(c,op)
550        local s = 1 - tonumber(c[1])
551        c[1] = numbertostring(g1*s)
552        c[2] = numbertostring(g2*s)
553        c[3] = numbertostring(g3*s)
554        c[4] = op == "G" and "RG" or "rg"
555        c[5] = nil
556    end
557    local function recolorgray_cmyk(c,op)
558        local s = 1 - tonumber(c[1])
559        c[1] = numbertostring(g1*s)
560        c[2] = numbertostring(g2*s)
561        c[3] = numbertostring(g3*s)
562        c[4] = numbertostring(g4*s)
563        c[5] = op == "G" and "K" or "k"
564    end
565
566    local function removestate(c,op,contents,i)
567        -- can be made more clever
568        contents[i] = { }
569    end
570
571    local removed = false
572
573    local function removetags(c,op,contents,i)
574        local ci  = contents[i]
575        local one = ci[1]
576        if one then
577            local what = one[2]
578            if what == "Artifact" then
579             -- print("keeping artifact")
580                return -- hopefully no indirect references here
581            elseif what == "Span" then
582                -- we need a proper tohash for this
583                local two = ci[2]
584                if two then
585                    if two[1] == "dict" then -- will become "dictionary"
586                        local list = two[2]
587                        for i=1,#list,2 do
588                            local l = list[i]
589                            if l[2] == "ActualText" then
590                             -- print("keeping actualtext")
591                                return
592                            end
593                        end
594                    end
595                end
596            else
597             -- maybe also check when /MCID and then convert to /Span
598            end
599        end
600        removed = true
601        contents[i] = { }
602        if op == "BMC" or op == "BDC" then
603            local level = 1
604            for ii=i+1,#contents do
605                local c = contents[ii]
606                local o = c[#c]
607                if o == "BMC" or o == "BDC" then
608                    level = level + 1
609                elseif o == "EMC" then
610                    level = level - 1
611                    if level <= 0 then
612                        contents[ii] = { }
613                        break
614                    end
615                end
616            end
617        end
618    end
619
620    local contenttostring = lpdf.epdf.contenttostring
621    local getpagecontent  = lpdf.epdf.getpagecontent
622    local parsecontent    = lpdf.epdf.parsecontent
623
624    local function checkbt(c,op,contents,i)
625        -- ET followed by BT
626        local c = contents[i-1]
627        if c and c[i] == "ET" then
628            contents[i]   = { }
629            contents[i-1] = { }
630        end
631    end
632
633    local function checkQ(c,op,contents,i)
634        local c = contents[i-1]
635        if c and c[#c] == "q" then
636            contents[i]   = { }
637            contents[i-1] = { }
638        end
639    end
640
641--     local function checkcm(c,op,contents,i)
642--         if c and tonumber(c[1]) == 1 and tonumber(c[4]) == 1
643--              and tonumber(c[5]) == 0 and tonumber(c[6]) == 0
644--              and tonumber(c[2]) == 0 and tonumber(c[3]) == 0 then
645--             contents[i] = { }
646--         end
647--     end
648
649--  local actions = {
650--      rg  = reducergb,
651--      RG  = reducergb,
652--      k   = reducecmyk,
653--      K   = reducecmyk,
654--      gs  = removestate,
655--      GS  = removestate,
656--      BMC = removetags,
657--      EMC = removetags,
658--      BDC = removetags,
659--   -- BT  = checkbt,
660--   -- Q   = checkQ,
661--      cm  = checkcm,
662--  }
663
664    local passone = { }
665    local passtwo = { }
666
667    local function useactions(compactor)
668        local used    = { }
669        local strip   = compactor.strip
670        local reduce  = compactor.reduce
671        local convert = compactor.convert
672        local recolor = compactor.recolor
673        cmykmap = false
674        rgbmap  = false
675        cmykfun = false
676        rgbfun  = false
677        removed = false
678        passone = { }
679        passtwo = { }
680        if strip.marked then
681            -- property lists: optional content, tagged content, object metadata, associated files
682            -- if there are direct objects these need to be named in Properties in Resource
683            passone.MP  = removetags -- marked content point
684            passone.DP  = removetags -- marked content point    with properties list
685            passone.BMC = removetags -- marked content sequence
686            passone.BDC = removetags -- marked content sequence with properties list
687        --  passone.EMC = removetags -- marked content sequence end, handled in BMC BDC
688            passtwo.BT  = checkbt    -- with matching ET
689        end
690        if strip.extgstate then
691            passone.gs = removestate
692            passone.GS = removestate
693        end
694     -- if strip.cm then
695     --     passone.cm = checkcm
696     -- end
697        if reduce.color then
698            passone.rg = reducergb
699            passone.RG = reducergb
700            passone.k  = reducecmyk
701            passone.K  = reducecmyk
702        elseif reduce.rgb then
703            passone.rg = reducergb
704            passone.RG = reducergb
705        elseif reduce.cmyk then
706            passone.k = reducecmyk
707            passone.K = reducecmyk
708        end
709        if convert.cmyk then
710            passone.k = convertcmyk
711            passone.K = convertcmyk
712            cmykmap   = type(convert.cmyk) == "table"    and convert.cmyk or false
713            cmykfun   = type(convert.cmyk) == "function" and convert.cmyk or false
714        elseif convert.rgb then
715            passone.rg = convertrgb
716            passone.RG = convertrgb
717            rgbmap     = type(convert.rgb) == "table"    and convert.rgb or false
718            rgbfun     = type(convert.rgb) == "function" and convert.rgb or false
719        end
720        local viagray = recolor.viagray
721        if viagray then
722            g1 = viagray[1]
723            g2 = viagray[2]
724            g3 = viagray[3]
725            g4 = viagray[4]
726            if g4 then
727                passone.k  = recolorcmyk_cmyk
728                passone.K  = recolorcmyk_cmyk
729                passone.rg = recolorrgb_cmyk
730                passone.RG = recolorrgb_cmyk
731                passone.g  = recolorgray_cmyk
732                passone.G  = recolorgray_cmyk
733            elseif g3 then
734                passone.k  = recolorcmyk_rgb
735                passone.K  = recolorcmyk_rgb
736                passone.rg = recolorrgb_rgb
737                passone.RG = recolorrgb_rgb
738                passone.g  = recolorgray_rgb
739                passone.G  = recolorgray_rgb
740            elseif g1 then
741                passone.k  = recolorcmyk_gray
742                passone.K  = recolorcmyk_gray
743                passone.rg = recolorrgb_gray
744                passone.RG = recolorrgb_gray
745                passone.g  = recolorgray_gray
746                passone.G  = recolorgray_gray
747            end
748        else
749            g1, g2, g3, g4 = nil, nil, nil, nil
750        end
751     -- if strip.redundant then
752     --     passtwo.Q   = checkQ
753     -- end
754        if compactor.identify == "all" then
755            compactor.identify= {
756                content   = true,
757                resources = true,
758                page      = true,
759            }
760        end
761    end
762
763    local identify_content
764
765    identify_content = function(pdfdoc,contents,fonts,xobjects,counts)
766        if contents then
767            for i=1,#contents do
768                local ci = contents[i]
769                if ci then
770                    local op = ci[#ci]
771                    if op then
772                        counts[op] = counts[op] + 1
773                        if xobjects and op == "Do" then
774                            -- can be recursive
775                            local object = xobjects[ci[1][2]]
776                            if object then
777                                local subtype = object.Subtype
778                                if subtype == "Form" then
779                                    if not object.__content_remapped__ then
780                                        local r = object.Resources
781                                        if r then
782                                            local contents = object()
783                                            local fonts    = r.Font
784                                            local xobjects = r.XObject
785                                            if contents then
786                                                contents = parsecontent(contents,true)
787                                                if contents then
788                                                    identify_content(pdfdoc,contents,fonts,xobjects,counts)
789                                                end
790                                            end
791                                        end
792                                    end
793                                end
794                            end
795                        end
796                    end
797                end
798            end
799        end
800    end
801
802    local function countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,when)
803        local counts = setmetatableindex("number")
804        identify_content(pdfdoc,contents,fonts,xobjects,counts)
805        report_fixes("page %i of file %a: %s",pagenumber,pdfdoc.filename,when)
806        for k, v in sortedhash(counts) do
807            report_fixes("%4i : %s",v,k)
808        end
809    end
810
811    local strip_content
812
813    local function form(pdfdoc,object,pagenumber,compactor)
814        if not object.__content_remapped__ then
815            local r = object.Resources
816            if r then
817                local contents = object()
818                local fonts    = r.Font
819                local xobjects = r.XObject
820                if contents then
821                    contents = parsecontent(contents,true)
822                    if contents then
823                        contents = strip_content(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
824                        contents = contenttostring(contents)
825                        object.__raw__.Length = #contents
826                        object.__raw__.Filter = nil
827                        getmetatable(object).__call = function() return contents end
828                        object.__content_remapped__ = true
829                    end
830                end
831            end
832        end
833    end
834
835    local function image(pdfdoc,object,pagenumber,compactor)
836        if compactor.strip.colorspace and fix_image_colorspace(object) then
837         -- c_done = c_done + 1
838        end
839    end
840
841    local function collapse(contents)
842        local j = false
843        for i=1,#contents do
844            local c = contents[i]
845            if not c or #c == 0 then
846                if not j then
847                    j = i - 1
848                end
849            elseif j then
850                j = j + 1
851                contents[j] = c
852            end
853        end
854        if j then
855            for i=#contents,j+1,-1 do
856                contents[i] = nil
857            end
858        end
859    end
860
861    local nocontent = {
862        k   = true, K   = true,
863        g   = true, G   = true,
864        rg  = true, RG  = true,
865        gs  = true,
866        cm  = true,
867        w   = true,
868        q   = true, Q   = true,
869        cs  = true, CS  = true,
870        d   = true,
871        i   = true,
872        j   = true,
873        J   = true,
874        sc  = true, SC  = true,
875        scn = true, SCN = true,
876        Tc  = true,
877        TL  = true,
878        Tr  = true,
879        Ts  = true,
880        Tw  = true,
881        Tz  = true,
882    }
883
884    strip_content = function(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
885        if contents then
886            for i=1,#contents do
887                local ci = contents[i]
888                local op = ci[#ci]
889                local action = passone[op]
890                if action then
891                    action(ci,op,contents,i)
892                elseif xobjects and op == "Do" then
893                    -- can be recursive
894                    local object = xobjects[ci[1][2]]
895                    if object then
896                        local subtype = object.Subtype
897                        if subtype == "Form" then
898                            form(pdfdoc,object,pagenumber,compactor)
899                        end
900                        if subtype == "Image" then
901                            image(pdfdoc,object,pagenumber,compactor)
902                        end
903                    end
904                end
905            end
906            if next(passtwo) then
907                if removed then
908                    collapse(contents)
909                end
910                for i=1,#contents do
911                    local ci = contents[i]
912                    local op = ci[#ci]
913                    local action = passtwo[op]
914                    if action then
915                        action(ci,op,contents,i)
916                    end
917                end
918             -- collapse(contents)
919            end
920            --
921            -- not yet done: q q ... Q Q
922            --
923            local strip = compactor.strip
924            if strip and (strip.identitycm or strip.pollution) then
925                local last    = false
926                local removed = false
927                --
928                if strip.pollution then
929                    for i=1,#contents do
930                        local ci = contents[i]
931                        local op = ci[#ci]
932                        if op == "q" then
933                            last = i
934                        elseif op == "Q" then
935                            if last then
936                                if last == i - 1 then
937                                    contents[last] = { }
938                                    contents[i] = { }
939                                else
940                                    for j=last,i do
941                                        contents[j] = { }
942                                    end
943                                end
944                                removed = true
945                                last = false
946                            end
947                        elseif not nocontent[op] then
948                            last = false
949                        end
950                    end
951                end
952                if strip.identitycm then
953                    for i=1,#contents do
954                        local ci = contents[i]
955                        local op = ci[#ci]
956                        if op == "cm" then
957                            if tonumber(ci[1]) == 1 and tonumber(ci[4]) == 1 and tonumber(ci[5]) == 0 and tonumber(ci[6]) == 0 and tonumber(ci[2]) == 0 and tonumber(ci[3]) == 0 then
958                                contents[i] = { }
959                                removed = true
960                            end
961                        end
962                    end
963                end
964                --
965                if removed then
966                    collapse(contents)
967                end
968            end
969            --
970            return contents
971        end
972    end
973
974    local function strip_content_needed(pdfdoc,page,pagenumber,resources,compactor)
975        compactor = table.fastcopy(compactor)
976        setmetatableindex(compactor,"table")
977        local strip  = compactor.strip
978        local marked = strip.marked
979        if marked == "force" then
980            -- always
981        elseif marked and pdfdoc.Catalog.StructTreeRoot then
982            report_fixes("page %i of file %a: %s (structure tree %s)",pagenumber,pdfdoc.filename,"stripping tags","found")
983        elseif marked == "page" then
984            local contents = lpdf.epdf.allcontent(page.Contents or "") -- unparsed string
985            if string.find(contents,"/MCID%s%d+*") then
986                report_fixes("page %i of file %a: %s (structure tree %s)",pagenumber,pdfdoc.filename,"stripping tags","missing")
987            else
988                strip.marked = nil
989            end
990        else
991            strip.marked = nil
992        end
993        return
994            compactor, (
995                next(compactor.strip)
996             or next(compactor.reduce)
997             or next(compactor.convert)
998             or next(compactor.recolor)
999            ) and true or false
1000    end
1001
1002    -- two compactors: one for content, one for resources etc
1003
1004    function document.pdf_identify_content(pdfdoc,page,pagenumber,resources,compactor)
1005
1006        local function getcontents()
1007            local contents = pdfdoc.currentcontents
1008            if not contents then
1009                contents = getpagecontent(pdfdoc,pagenumber,true,true)
1010                pdfdoc.currentcontents = contents
1011            end
1012            return contents
1013        end
1014
1015        local function setcontents(contents)
1016            if contents then
1017                pdfdoc.currentcontents = contents
1018            end
1019        end
1020
1021        pdfdoc.getcontents = getcontents
1022        pdfdoc.setcontents = setcontents
1023
1024        if compactor.identify.content or compactor.identify == "all" then
1025            local fonts    = resources.Font
1026            local xobjects = resources.XObject
1027            local contents = pdfdoc.getcontents()
1028            if contents then
1029                if trace_operators then
1030                    countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,"before")
1031                end
1032            end
1033        end
1034    end
1035
1036    function document.pdf_strip_content(pdfdoc,page,pagenumber,resources,compactor)
1037        local compactor, needed = strip_content_needed(pdfdoc,page,pagenumber,resources,compactor)
1038        if needed then
1039            local contents = pdfdoc.getcontents()
1040            if contents then
1041                local fonts    = resources.Font
1042                local xobjects = resources.XObject
1043                useactions(compactor)
1044                if g1 then
1045                    insert(contents,1, { 0, "G" })
1046                    insert(contents,1, { 0, "g" })
1047                end
1048                contents = strip_content(pdfdoc,contents,fonts,xobjects,pagenumber,compactor)
1049                resources.Font = fonts -- really needed (or maybe not here)
1050                pdfdoc.setcontents(contents)
1051            end
1052        end
1053    end
1054
1055    function document.pdf_serialize_content(pdfdoc,page,pagenumber,resources,compactor)
1056        local contents = pdfdoc.getcontents()
1057        if contents then
1058            if trace_operators then
1059                countoperators(pdfdoc,contents,fonts,xobjects,pagenumber,"after")
1060            end
1061            page.Contents = contenttostring(contents)
1062            pdfdoc.currentcontents = nil
1063            pdfdoc.getcontents = nil
1064            pdfdoc.setcontents = nil
1065        end
1066    end
1067
1068end
1069
1070utilities.sequencers.appendaction("pdfcontentmanipulators","before","document.pdf_identify_content")
1071utilities.sequencers.appendaction("pdfcontentmanipulators","system","document.pdf_strip_content")
1072utilities.sequencers.appendaction("pdfcontentmanipulators","after", "document.pdf_serialize_content")
1073
1074utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_identify_content")
1075utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_strip_content")
1076utilities.sequencers.enableaction("pdfcontentmanipulators","document.pdf_serialize_content")
1077
1078utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_strip_page")
1079utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_cleanup_cidsets")
1080utilities.sequencers.appendaction("pdfpagemanipulators","after","document.pdf_cleanup_procsets")
1081
1082utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_strip_page")
1083utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_cleanup_cidsets")
1084utilities.sequencers.enableaction("pdfpagemanipulators","document.pdf_cleanup_procsets")
1085