lpdf-fix-imp-fonts.lmt /size: 50 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lpdf-fix-imp-fonts'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- In LMTX we have to do this different than in MkIV. We also prepare ourselves
10-- variable fonts and such. In LuaTeX we use the original index but in LMTX we
11-- use a decent sequence which means that we need to resolve the original. This
12-- kind of hackery is fragile anyway, so we only merge files that are produced
13-- by ConTeXt.
14--
15-- It's a stepwise process to get this done and it will never be perfect for all
16-- inclusions. After BT 2024 I managed to handle some of the awfully bad pdf
17-- files that VRWS had to deal with merged and cleaned up. After that I rewarded
18-- myself by watching Camille Bigeault's Mental Web drumming video (which makes
19-- the usual musical timestamp).
20
21local next, type, getmetatable, unpack = next, type, getmetatable, unpack
22local gsub, format, match, find, gmatch = string.gsub, string.format, string.match, string.find, string.gmatch
23local setmetatableindex, sortedhash, sequenced = table.setmetatableindex, table.sortedhash, table.sequenced
24local nameonly, basename = file.nameonly, file.basename
25local hextointeger, chrtointeger = string.hextointeger, string.chrtointeger
26local f6 = string.f6
27local concat, insert, remove = table.concat, table.insert, table.remove
28
29local pdfe             = lpdf.epdf
30local pdfreference     = lpdf.reference
31local pdfreserveobject = lpdf.reserveobject
32
33local trace_merge      = false  trackers.register("graphics.fonts",        function(v) trace_merge   = v end)
34local trace_details    = false  trackers.register("graphics.fonts.details",function(v) trace_details = v end)
35local report_merge     = logs.reporter("graphics","fonts")
36
37local expanded         = pdfe.expanded
38local contenttostring  = pdfe.contenttostring
39local getpagecontent   = pdfe.getpagecontent
40local parsecontent     = pdfe.parsecontent
41
42----- definefont       = fonts.definers.internal
43local definefont       = fonts.definers.define
44local getstreamhash    = fonts.handlers.otf.getstreamhash
45local loadstreamdata   = fonts.handlers.otf.loadstreamdata
46local cleanfontname    = fonts.names.cleanname
47local chardata         = fonts.hashes.characters
48
49local defined = setmetatableindex(function(t,filename)
50    local v = setmetatableindex(function(t,subfont)
51        local v = { }
52        t[subfont] = v
53        return v
54    end)
55    t[filename] = v
56    return v
57end)
58
59local function toinstance(instance)
60    if type(instance) == "table" then
61        return nil, "axis={" .. sequenced(instance.__raw__,",") .. "}"
62    elseif instance and instance ~= "" then
63        return instance, nil
64    else
65        return nil, nil
66    end
67end
68
69-- This is a bit of a hack ... we need to be able to set the instance directly
70-- on a file.
71
72local function isdefinedlmtx(filename,subfont,instance,hash,version,glyphcount)
73    local fontname = "file:" .. filename
74    local instance, features = toinstance(instance)
75    if instance then
76        fontname = "name:" .. nameonly(filename) .. instance -- not ok as it's not fontname
77        instance = nil
78    end
79    local id = defined[fontname][subfont][instance or features or false]
80    if not id then
81        -- we can try to avoid this
82        id = definefont {
83            name     = fontname,
84            instance = instance,
85            detail   = features,
86        }
87        if id > 0 then
88            local dummy = lpdf.usedcharacters[id] -- force embedding
89        else
90            id = false
91        end
92        defined[fontname][subfont][instance or features or false] = id
93    end
94    if id then
95        -- We double check here!
96        local shash, sdata = getstreamhash(id)
97        if hash ~= shash then
98            report_merge("inconsistent %s in %a","hashes",filename)
99            return false
100        end
101        sdata = loadstreamdata(sdata)
102        if not sdata then
103            report_merge("inconsistent %s in %a","streamdata",filename)
104            return false
105        end
106        local fontheader = sdata.fontheader
107        if version and fontheader and version ~= fontheader.fontversion then
108            report_merge("inconsistent %s in %a","versions",filename)
109            return false
110        end
111        local streams = sdata.streams
112        if glyphcount and streams and glyphcount ~= (#streams + (streams[0] and 1 or 0)) then
113            report_merge("inconsistent %s in %a","glyphcount",filename)
114            return false
115        end
116        return id
117    end
118    return false
119end
120
121-- todo: check some more
122
123local cleanname = fonts.names.cleanname
124
125local remap = { }
126
127local function registerfont(specification)
128    local source = specification.source
129    if source then
130        remap[cleanfontname(source)] = specification
131    end
132end
133
134backends.codeinjections.registerfont = registerfont
135
136function graphics.registerpdffont(...)
137    -- todo: message
138    registerfont(...)
139end
140
141local function isdefinedunknown(fontname,remap)
142    local c = cleanfontname(fontname)
143    local m = remap[c]
144    local r = m and m.target
145    if r then
146        report_merge("remapping %a to %a",fontname,r)
147        name = r
148    else
149        name = "name:" .. fontname
150    end
151    local id = definefont {
152        name = name,
153    }
154    if id > 0 then
155        local dummy = lpdf.usedcharacters[id] -- force embedding
156    else
157        id = false
158    end
159    if id then
160        local shash, sdata = getstreamhash(id)
161     -- sdata = loadstreamdata(sdata) -- no checking here
162-- print(fontname,c,shash,sdata)
163        return shash, id
164    end
165    return false
166end
167
168-- todo: we can share the map within a pdfdoc .. using the object number
169
170local status = {
171    files     = { },
172    pages     = 0,
173    xobjects  = 0,
174    charprocs = 0,
175    merged    = 0,
176    notmerged = 0,
177    indices   = 0,
178}
179
180statistics.register("compact font inclusion", function()
181    if status.pages > 0 or status.xobjects > 0 or status.charprocs > 0 then
182        return string.format(
183            "%i files, %i pages, %i indices, %i xobjects, %i chrprocs, %i times merged, %i times not merged",
184            table.count(status.files),
185            status.pages,
186            status.indices,
187            status.xobjects,
188            status.charprocs,
189            status.merged,
190            status.notmerged
191        )
192    end
193end)
194
195local function checkedfontreference(k,v,key,value,o)
196    if key ~= k then
197-- print("different keys",key,k)
198        return value -- different keys
199    elseif v[1] ~= 10 or value[1] ~= 10 then
200-- print("different objects",key,k)
201        return value -- different objects
202    elseif v[3] ~= value[3] then
203-- print("different values",key,k)
204        return value -- different values
205    else
206        return pdfreference(o)
207    end
208end
209
210local getstates, getindexstate_composite, getindexstate_simple  do
211
212    local fromunicode16 = fonts.mappings.fromunicode16
213    local expandwidths  = pdfe.expandwidths
214    local mergewidths   = pdfe.mergewidths
215
216    local function initialize(t,k)
217        local v = {
218            unicodes = { },
219            widths   = { },
220            fontname = k,
221        }
222        t[k] = v
223        return v
224    end
225
226    getstates = function(pdfdoc)
227        local states = pdfdoc.fontstates
228        if not states then
229            states = {
230                Type1    = setmetatableindex(initialize), -- simple fonts, 1 byte index
231                Type3    = setmetatableindex(initialize), -- idem
232                TrueType = setmetatableindex(initialize), -- idem
233                OpenType = setmetatableindex(initialize), -- composite fonts, 2 byte index
234            }
235            pdfdoc.fontstates = states
236        end
237        return states
238    end
239
240    local splitsixteen  do
241
242        local lpegmatch = lpeg.match
243
244        local more = 0
245
246     -- local pattern = lpeg.P(true) / function() more = 0 end * (
247        local pattern = (
248            lpeg.C(4) / function(s) -- needs checking !
249                local now = hextointeger(s)
250                if more > 0 then
251                    now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000
252                    more = 0
253                    return now
254                elseif now >= 0xD800 and now <= 0xDBFF then
255                    more = now
256                else
257                    return now
258                end
259            end
260        )^0
261
262        splitsixteen = function(str)
263            if str and str ~= "" then
264                more = 0
265                return lpegmatch(pattern,str)
266            end
267        end
268
269    end
270
271    -- This could be an lpeg but there is not that much to gain here.
272
273    local function register1(pdfdoc,unicodes,index,uni,fontname)
274        local old = unicodes[index]
275        if not old then
276            unicodes[index] = uni
277        elseif old ~= uni then
278            report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %U, new %U, %s",
279                pdfdoc.filename,fontname,index,old,uni,"bfrange")
280        end
281    end
282
283    local function register2(pdfdoc,unicodes,index,uni,fontname)
284        local old = unicodes[index]   -- unicode
285        local new, more = splitsixteen(uni) -- unicode16 or ligature
286        if more then
287            if not old then
288                unicodes[index] = uni -- string
289            elseif old ~= uni then
290                report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %a, new %a, %s",
291                    pdfdoc.filename,fontname,index,old,uni,"bfchar")
292            end
293        else
294            if not old then
295                unicodes[index] = new
296            elseif old ~= new then
297                report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %U, new %U, %s",
298                    pdfdoc.filename,fontname,index,old,uni,"bfchar")
299            end
300        end
301    end
302
303    local function getunicodes(pdfdoc,fontname,str,unicodes)
304        -- <0000> <005E> <0020> : first index, last index, first unicode
305        for s in gmatch(str,"beginbfrange%s*(.-)%s*endbfrange") do
306            for first, last, offset in gmatch(s,"<([^>]+)>%s*<([^>]+)>%s*<([^>]+)>") do
307                local first = tonumber(first,16)    -- index
308                local last  = tonumber(last,16)     -- index
309                local uni   = fromunicode16(offset) -- unicode16
310                for index=first,last do
311                    register1(pdfdoc,unicodes,index,uni,fontname)
312                    uni = uni + 1
313                end
314            end
315        end
316        -- <005F> <0061> [<00660066> <00660069> <00660066006C>] -- untested as not seen yet
317        for s in gmatch(str,"beginbfrange%s*(.-)%s*endbfrange") do
318            for first, last, offset in gmatch(s,"<([^>]+)>%s*<([^>]+)>%s*%[([^%]]+)%]") do
319                local index = tonumber(first,16) -- index
320                for uni in gmatch("%s*<([^>]+)>") do
321                    register2(pdfdoc,unicodes,index,uni,fontname)
322                    index = index + 1
323                end
324            end
325        end
326        -- <0000> <0020>     : index, single
327        -- <005F> <00660066> : index, ligature
328        for s in gmatch(str,"beginbfchar%s*(.-)%s*endbfchar") do
329            for idx, uni in gmatch(s,"<([^>]+)>%s*<([^>]+)>") do
330                local index = tonumber(idx,16)  -- index
331                register2(pdfdoc,unicodes,index,uni,fontname)
332            end
333        end
334    end
335
336    local function isembedded(descriptor)
337        return descriptor and (descriptor.FontFile or descriptor.FontFile2 or descriptor.FontFile3) and true or false
338    end
339
340    getindexstate_composite = function(pdfdoc,somefont,descendant,states)
341        local basefont = somefont.BaseFont
342        if basefont then
343            local fontname = match(basefont,"^[A-Z]+%+(.+)$")
344            if fontname then
345                local descriptor = descendant.FontDescriptor
346                if descriptor then
347                    local widths    = descendant.W
348                    local tounicode = somefont.ToUnicode
349                    -- todo: when no tounicode, maybe just use the index
350                    if widths and tounicode then
351                        local fontstate  = states[fontname]
352                        local f_widths   = fontstate.widths
353                        local f_unicodes = fontstate.unicodes
354                        expandwidths(widths,f_widths)
355                        getunicodes(pdfdoc,fontname,tounicode(),f_unicodes)
356                        fontstate.embedded = isembedded(descriptor)
357                        return fontstate
358                    end
359                end
360            end
361        end
362    end
363
364    getindexstate_simple = function(pdfdoc,somefont,states,kind,remap)
365        local basefont = somefont.BaseFont
366        if basefont then
367            local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont
368            if fontname then
369                local descriptor = somefont.FontDescriptor
370-- maybe encoding should win
371                if descriptor then
372                    local widths    = somefont.Widths
373                    local tounicode = somefont.ToUnicode
374                    if widths and tounicode then
375                        local fontstate  = states[fontname]
376                        local f_widths   = fontstate.widths
377                        local f_unicodes = fontstate.unicodes
378                        fontstate.narrow = true
379                        mergewidths(widths,f_widths)
380                        getunicodes(pdfdoc,fontname,tounicode(),f_unicodes)
381                        fontstate.embedded = isembedded(descriptor)
382                        return fontstate
383                    end
384                end
385                -- tricky when we have the same fontname twice, once as type 1 or truetype
386                -- and once as opentype .. it really happens
387                local encoding = somefont.Encoding
388                if encoding == "WinAnsiEncoding" then
389                    local r = table.load(resolvers.findfile("regi-cp1252.lua"))
390                    local fontstate  = states[fontname]
391                    fontstate.unicodes = r
392                    fontstate.narrow = true
393                    fontstate.embedded = isembedded(descriptor)
394                    return fontstate
395                elseif descriptor then
396                    local fontfile = descriptor.FontFile
397                    if fontfile then
398                        local data  = fontfile()
399                        local first = fontfile.Length1
400                        if first and data then
401                            data = string.sub(data,1,first)
402                            local metadata = fonts.handlers.pfb.filtermetadata(data)
403                            if metadata then
404                                local fullname  = metadata.fullname
405                                if fontname == fullname then
406                                    local encoding = fonts.handlers.pfb.filterencoding(data)
407                                    if encoding then
408                                        local name, id = isdefinedunknown(fullname,remap)
409                                        if id > 0 then
410                                            local unicodes = { }
411                                            local nametoslot = fonts.helpers.nametoslot
412                                            for index, name in next, encoding do
413                                                -- todo: check for missing hits
414                                                unicodes[index] = nametoslot(name,id)
415                                            end
416                                            local fontstate = states[fontname]
417                                            fontstate.custom   = encoding
418                                            fontstate.fullname = fullname
419                                            fontstate.narrow   = true
420                                            fontstate.unicodes = unicodes
421                                            fontstate.embedded = isembedded(descriptor)
422                                            return fontstate
423                                        end
424                                    end
425                                end
426                            end
427                        end
428                    end
429                else
430                    report_merge("no encoding or descriptor in file %a for font %a",pdfdoc.filename,fontname)
431                end
432           end
433        end
434    end
435
436end
437
438local function makemap(fontname,id,state,unicode)
439    local map = { }
440    local r = remap[cleanfontname(fontname)]
441    if r and r.unicode ~= nil then
442        unicode = r.unicode
443    end
444    if unicode then
445        local chr = chardata[id]
446        for k, v in next, state.unicodes do
447            local d = chr[v]
448            if d then
449-- print(string.formatters["index in pdf %i unicode %C index in font %i"](k,v,d.index))
450                map[k] = d.index
451            else
452                -- issue
453            end
454        end
455    else
456        for k, v in next, state.unicodes do
457            map[k] = k
458        end
459    end
460    return map
461end
462
463local function dontembed(basefont,state,embedding)
464    if not state.embedded then
465        report_merge("font %a is not embedded",basefont)
466    end
467    if embedding then
468        return false
469    elseif not state.embedded then
470        return false
471    else
472        return true
473    end
474end
475
476local function getstate_OpenType(pdfdoc,v,d,embedding,remap)
477    local state = getindexstate_composite(pdfdoc,v,d,getstates(pdfdoc).OpenType)
478    if state then
479        local basefont = d.BaseFont
480        if basefont then
481            if dontembed(basefont,state,embedding) then
482                return false
483            end
484            local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont
485            local cleanname = cleanfontname(fontname)
486            local streamhash, id = isdefinedunknown(fontname,remap)
487            if streamhash then
488                return {
489                    id         = id,
490                    map        = makemap(fontname,id,state,false),
491                    streamhash = streamhash,
492                    filename   = fontname,
493                 -- subfont    = subfont,
494                 -- instance   = instance,
495                    used       = lpdf.usedindices[streamhash],
496                }
497            end
498        end
499    end
500end
501
502local function getstate_TrueType(pdfdoc,v,embedding,remap)
503    local state = getindexstate_simple(pdfdoc,v,getstates(pdfdoc).TrueType,"truetype",remap)
504    if state then
505     -- needs checking when unicode ... NOT OK
506        local basefont = v.BaseFont
507        if basefont then
508            if dontembed(basefont,state,embedding) then
509                return false
510            end
511            local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont
512            local cleanname = cleanfontname(fontname)
513            local streamhash, id = isdefinedunknown(fontname,remap)
514            if streamhash then
515                return {
516                    id         = id,
517                    map        = makemap(fontname,id,state,true),
518                    narrow     = state.narrow,
519                    streamhash = streamhash,
520                    filename   = fontname,
521                 -- subfont    = subfont,
522                 -- instance   = instance,
523                    used       = lpdf.usedindices[streamhash],
524                }
525            end
526        end
527    end
528end
529
530local function getstate_Type1(pdfdoc,v,embedding,remap)
531    local state = getindexstate_simple(pdfdoc,v,getstates(pdfdoc).Type1,"type1",remap)
532    if state then
533        local basefont = v.BaseFont
534        if basefont then
535            if dontembed(basefont,state,embedding) then
536                return false
537            end
538            local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont
539            local cleanname = cleanfontname(fontname)
540            local streamhash, id = isdefinedunknown(fontname,remap)
541            if streamhash then
542                local map = makemap(fontname,id,state,true)
543                if next(map) then
544                    return {
545                        id         = id,
546                        map        = map,
547                        narrow     = state.narrow,
548                        streamhash = streamhash,
549                        filename   = fontname,
550                     -- subfont    = subfont,
551                     -- instance   = instance,
552                        used       = lpdf.usedindices[streamhash],
553                    }
554                end
555            end
556        end
557    end
558end
559
560local function getstate_LMTX(pdfdoc,r)
561    local indexmap   = r.IndexMap
562    local streamhash = r.StreamHash
563    local filename   = r.FileName
564    local subfont    = r.SubFont or 1
565    local instance   = r.Instance or ""
566    local version    = r.Version or "0"
567    local glyphcount = r.GlyphCount or 0
568    if indexmap then
569        local index = -1
570        local map   = { }
571        for i=1,#indexmap do
572            local li = indexmap[i]
573            if type(li) == "number" then
574                index = li
575            else
576                for j=1,#li do
577                    map[index] = li[j]
578                    index = index + 1
579                end
580            end
581        end
582        if isdefinedlmtx(filename,subfont,instance,streamhash,version,glyphcount) then
583            return {
584                map        = map,
585                streamhash = streamhash,
586                filename   = filename,
587                subfont    = subfont,
588                instance   = instance,
589                used       = lpdf.usedindices[streamhash],
590            }
591        end
592    end
593end
594
595-- yes    : merge when we have a context file
596-- always : merge and assume original indices
597-- embed  : add missing fonts
598-- fix    : convert decimal into hexadecimal
599
600do
601
602    local h_hex_2 = lpdf.h_hex_2
603    local h_hex_4 = lpdf.h_hex_4
604
605    local function report_sharing(pdfdoc,what,v,shared,pagenumber,lmtx)
606        local encoding = v.Encoding
607        report_merge("page %i of %a, font %a, type %a, encoding %a, %sshared%s",
608            pagenumber,
609            basename(pdfdoc.filename),
610            v.BaseFont or "?",
611            what,
612            type(encoding) == "string" and encoding or "custom",
613            shared and "" or "not ",
614            lmtx and ", lmtx registry found" or ""
615        )
616    end
617
618    local function plugin_Type0(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
619        -- The v table is unique and can be shared
620        local shared = sharedfonts[v]
621        if type(shared) == "table" then
622            data[k] = shared
623        elseif shared == nil then
624            shared = false
625            if v.Encoding == "Identity-H"  then
626                local d = v.DescendantFonts[1] -- how about more
627                if d and d.Subtype == "CIDFontType0" or d.Subtype == "CIDFontType2" then
628                    local r = d.LMTX_Registry or d.LMTXRegistry
629                    if r then
630                     -- if compactor.merge and (compactor.merge.lmtx or compactor.merge.LMTX) then
631                        if compactor.merge.lmtx or compactor.merge.LMTX then
632                            shared = getstate_LMTX(pdfdoc,r)
633                            data[k] = shared
634                        end
635                    elseif find(pdfe.producer(pdfdoc),"^LuaMetaTeX") then
636                        -- This is a no go because for sure we have a different index order. Older
637                        -- versions append the version to the producer string.
638                    elseif compactor.merge.type0 or compactor.embed.type0 then
639                        shared = getstate_OpenType(pdfdoc,v,d,compactor.merge.type0,remap)
640                        data[k] = shared
641                    end
642                    if trace_merge then
643                        report_sharing(pdfdoc,"type0",v,shared,pagenumber,r and true or false)
644                    end
645                end
646                sharedfonts[v] = shared
647            end
648        else
649            -- what ...
650         -- print("case 1",encoding)
651        end
652    end
653
654    local function plugin_TrueType(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
655        local shared = sharedfonts[v]
656        if type(shared) == "table" then
657            data[k] = shared
658        elseif shared == nil then
659            shared = false
660            if compactor.embed.truetype or compactor.merge.truetype then
661                shared = getstate_TrueType(pdfdoc,v,compactor.merge.truetype,remap)
662                data[k] = shared
663            end
664            if trace_merge then
665                report_sharing(pdfdoc,"truetype",v,shared,pagenumber)
666            end
667            sharedfonts[v] = shared
668        end
669    end
670
671    local function plugin_Type1(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
672        local shared = sharedfonts[v]
673        if type(shared) == "table" then
674            data[k] = shared
675        elseif shared == nil then
676            shared = false
677            if compactor.embed.type1 or compactor.merge.type1 then
678                shared = getstate_Type1(pdfdoc,v,compactor.merge.type1,remap)
679                data[k] = shared
680            end
681            if trace_merge then
682                report_sharing(pdfdoc,"type1",v,shared,pagenumber)
683            end
684        end
685        sharedfonts[v] = shared
686    end
687
688    local plugin -- defined after the next one
689
690    local function plugin_Type3(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
691        local charprocs = v.CharProcs
692        if charprocs then
693            local resources = v.Resources
694            if resources then
695                local fonts    = resources.Font
696                local xobjects = resources.XObject
697                if fonts or xobjects then
698                    for k, object in expanded(charprocs) do
699                        if not object.__fonts_remapped__ then
700                            local contents = object()
701                            contents = parsecontent(contents,true)
702                            if plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,{}) then
703                                contents = contenttostring(contents)
704                                object.__raw__.Length = #contents -- nil
705                                object.__raw__.Filter = nil
706                                getmetatable(object).__call = function() return contents end
707                            end
708                            object.__fonts_remapped__ = true
709                         -- v.resources = resources
710                         -- resources.Font    = fonts
711                         -- resources.XObject = xobjects
712                            status.charprocs = status.charprocs + 1
713                            status.indices   = status.indices + indices
714                        end
715                    end
716                end
717            end
718        end
719    end
720
721    local handlers = {
722        Type0    = plugin_Type0,
723        TrueType = plugin_TrueType,
724        Type1    = plugin_Type1,
725        Type3    = plugin_Type3,
726    }
727
728    -- not always ok .. every page can have different font references but let's
729    -- assume it for now
730
731    -- ... Tw : what a mess, why not via tounicode ...
732    --
733    -- Word spacing shall be applied to every occurrence of the single-byte character code 32 in a string
734    -- when using a simple font (including Type 3) or a composite font that defines code 32 as a single-byte
735    -- code. It shall not apply to occurrences of the byte value 32 in multiple-byte codes.
736
737    local mainplugin = function(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth,objtag)
738
739        local data    = { }
740        local indices = 0
741        local remap   = compactor.files or remap
742
743        local sharedfonts  = pdfdoc.sharedfonts or { }
744        pdfdoc.sharedfonts = sharedfonts
745        if fonts or xobjects then
746
747            if fonts then
748
749                -- check if ref for k is the same
750                for k, v in expanded(fonts) do
751                    local subtype = v.Subtype
752                    local handler = subtype and handlers[subtype]
753                    if handler then
754                        handler(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
755                    else
756                        -- weird
757                    end
758                end
759
760            end
761
762            local r = false
763            local f = false
764            local n = false
765            local m = false
766            local u = false
767
768            local new     = { }
769            local old     = { }
770
771            local process_hex = false
772
773            -- if we move h and m outside the function we can use lpegs .. todo
774
775            local function process_hex_hexified(h)
776                local b = hextointeger(h)
777                local i = m[b]
778                if i then
779                    local n = h_hex_4[u[i]]
780                    if h ~= n then
781                        indices = indices + 1
782                    end
783                    return n
784                else
785                    return h_hex_4[b]
786                end
787            end
788
789            local function process_hex_narrow(s) return (gsub(s,"..",  process_hex_hexified)) end
790            local function process_hex_wide  (s) return (gsub(s,"....",process_hex_hexified)) end
791
792            local Tw     = false -- a horrible pdf feature
793            local spaces = { }
794            local stack  = { }   -- Q q stack for fonts
795
796            -- if we have Tw then we also have 0x20 as space
797
798            for i=1,#contents do
799                local ti = contents[i]
800                local op = ti[#ti]
801                if op == "Tf" then
802                    -- maybe use /R<N> for replacement
803                    f = ti[1][2]
804                    d = data[f]
805                    if d then
806                        m = d.map
807                        u = d.used
808                        r = i
809                        new[f] = d.streamhash
810                        if d.narrow then
811                            process_hex = process_hex_narrow
812                        else
813                            process_hex = process_hex_wide
814                        end
815                        if not spaces[f] then
816                            spaces[f] = process_hex_wide("0020")
817                        end
818                    else
819                        if d == false then
820                            old[f] = true
821                        end
822                        f = false
823                    end
824                elseif op == "q" then
825                    -- This happens seldom but we need to be prepares (one of VS's documents).
826                    insert(stack, { f, d, m, u, r, process_hex, Tw })
827                elseif op == "Q" then
828                    f, d, m, u, r, process_hex, Tw = unpack(remove(stack))
829                elseif op == "Tj" then
830                    if f then
831                        local ci = ti[1]
832                        if type(ci) == "table" then
833                            local tp = ci[1]
834                            if tp == "hex" then
835                                ci[2] = process_hex(ci[2])
836                            end
837                        end
838                    end
839                elseif op == "TJ" then
840                     if f then
841                        local c = ti[1][2]
842                        if c then
843                            for i=1,#c do
844                                local ci = c[i]
845                                if type(ci) == "table" then
846                                    local tp = ci[1]
847                                    if tp == "hex" then
848                                        ci[2] = process_hex(ci[2])
849                                    end
850                                end
851                            end
852                        end
853                    end
854                elseif op == "Tw" then
855                    Tw = true
856                elseif xobjects and op == "Do" then
857                    -- can be recursive
858                    local objtag = ti[1][2]
859                    local object = xobjects[objtag]
860                    if object and object.Subtype == "Form" then
861                        local r = object.Resources
862                        if r then
863                            if not object.__fonts_remapped__ then
864                                local contents = object()
865                                local fonts    = r.Font
866                                local xobjects = r.XObject
867                                contents = parsecontent(contents,true)
868                                if plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth+1,objtag) then
869                                    contents = contenttostring(contents)
870                                    object.__raw__.Length = #contents
871                                    object.__raw__.Filter = nil
872                                    getmetatable(object).__call = function() return contents end
873                                    object.__fonts_remapped__ = true
874                                    status.xobjects = status.xobjects + 1
875                                    r.Font = fonts -- really needed
876                                    object.Resources = r     -- really needed
877                                    if trace_details then
878                                        for k, v in next, fonts.__raw__ do -- we need the raw values here
879                                            local d = fonts.__raw__[k]
880                                            if d[1] == pdfe.objectcodes.lpdf then
881                                                report_merge("updated font, object %a, font id %a, object %a",objtag,k,d[2][1])
882                                            end
883                                        end
884                                    end
885                                end
886                            else
887                            end
888                        end
889                    end
890                end
891            end
892
893--             if Tw then -- can be optimized
894            if f and Tw and Tw ~= 0 then -- can be optimized
895                local value
896                local space
897                local hexed = setmetatableindex(function(t,k)
898                    local v = { "hex", k }
899                    t[k] = v
900                    return v
901                end)
902
903                local function collapse(cc)
904                    local c = { }
905                    local n = 0
906                    local t = { }
907                    local m = 0
908                    for i=1,#cc do
909                        local ci = cc[i]
910                        if type(ci) == "table" then
911                            m = m + 1 ; t[m] = ci[2]
912                        else
913                            if m > 0 then
914                                n = n + 1 ; c[n] = { "hex", concat(t,"",1,m) }
915                                m = 0
916                            end
917                            n = n + 1 ; c[n] = ci
918                        end
919                    end
920                    if m > 0 then
921                        n = n + 1 ; c[n] = { "hex", concat(t,"",1,m) }
922                    end
923                    return c
924                end
925
926                local scale = 1
927
928                for i=1,#contents do
929                    local ti = contents[i]
930                    local op = ti[#ti]
931                    if op == "Tf" then
932                        scale = tonumber(ti[2]) or 1
933                        f = ti[1][2]
934                        space = spaces[f]
935                    elseif op == "Tj" then
936                        if value and space then
937                            local ci = ti[1]
938                            local tp = ci[1]
939                            if tp == "hex" then
940                                local cc = { }
941                                local nn = 0
942                                local ok = false
943                                local ci2 = ci[2]
944                                for s in gmatch(ci2,"....") do -- todo: narrow
945                                    nn = nn + 1 ; cc[nn] = hexed[s]
946                                    if s == space then
947                                        nn = nn + 1 ; cc[nn]  = f6(value/scale)
948                                        ok = true
949                                    end
950                                end
951                                if ok then
952                                    contents[i] = { { "array", collapse(cc) }, "TJ" }
953                                end
954                            end
955                        end
956                    elseif op == "TJ" then
957                        if value and space then
958                            local c = ti[1][2]
959                            if c then
960                                local cc = { }
961                                local nn = 0
962                                local nm = false
963                                local ok = false
964                                -- combine these loops
965                                for i=1,#c do
966                                    local ci = c[i]
967                                    if type(ci) == "table" then
968                                        local tp = ci[1]
969                                        if tp == "hex" then
970                                            local ci2 = ci[2]
971                                            for s in gmatch(ci2,"....") do -- todo: narrow
972                                                nn = nn + 1 ; cc[nn]  = { "hex", s } -- cache this
973                                                if s == space then
974                                                    nn = nn + 1 ; cc[nn]  = f6(value/scale)
975                                                    nm = true
976                                                    ok = true
977                                                else
978                                                    nm = false
979                                                end
980                                            end
981                                        else
982                                            -- can't happen
983                                            nn = nn + 1 ; cc[nn] = ci
984                                            nm = false
985                                        end
986                                    elseif nm then
987                                     -- print("collapse")
988                                        cc[nn] = f6(tonumber(ci) + tonumber(cc[nn]))
989                                    else
990                                        nn = nn + 1 ; cc[nn] = ci
991                                    end
992                                end
993                                if ok then
994                                 -- ti[1][2] = cc
995                                    ti[1][2] = collapse(cc)
996                                end
997                            end
998                        end
999                    elseif op == "Tw" then
1000                        value = ti[1] * -1000
1001                        if value == 0 then
1002                            value = false
1003                        end
1004                        contents[i] = { } -- constant
1005--                     elseif op == "BT" or op == "ET" then
1006--                         value = false
1007                    end
1008                end
1009
1010
1011            end
1012
1013            local state = trace_merge and { } or false
1014
1015            if fonts then
1016                for k, v in next, fonts.__raw__ do -- we need the raw values here
1017             -- for k, v in expanded(fonts) do -- we need the raw values here
1018
1019-- -- goes wrong on nested objects
1020--                     if adapted[k] then
1021--                         -- already done
1022--                     else
1023-- local trace_details = true
1024                    if new[k] then
1025                        local x = fonts.__raw__[k][3]
1026                        local o = lpdf.getfontobjectnumber(new[k])
1027                        fonts.__raw__[k] = { pdfe.objectcodes.lpdf, pdfreference(o) }
1028                        if trace_details then
1029                            report_merge(
1030                                "%s %a, font id %a, old object %a, new object %a, name %a",
1031                                depth > 1 and "object" or "page",depth > 1 and objtag or pagenumber,k,x,o,data[k].filename
1032                            )
1033                        end
1034                        adapted[k] = true
1035                     -- adapted[k] = function(_,_,_,key,value)
1036                     --     local ref = checkedfontreference(k,v,key,value,o)
1037                     --     return ref
1038                     -- end
1039                        if state then
1040                            state[k] = true
1041                        end
1042                    elseif old[k] then
1043                        if trace_details then
1044                            local x = fonts.__raw__[k][3]
1045                            report_merge(
1046                                "%s %a, font id %a, kept object %a, name %a",
1047                                depth > 1 and "object" or "page",depth > 1 and objtag or pagenumber,k,x,data[k].filename
1048                            )
1049                        end
1050                        if state then
1051                            state[k] = false
1052                        end
1053                    else
1054                        if trace_details then
1055                            local x = fonts.__raw__[k][3]
1056                            report_merge(
1057                                "%s %a, font id %a, weird object %a",
1058                                depth > 1 and "object" or "page",depth > 1 and objtag or pagenumber,k,x
1059                            )
1060                        end
1061                    end
1062                end
1063            end
1064
1065            if state then
1066                local filename = basename(pdfdoc.filename)
1067                for k, v in sortedhash(state) do
1068                    if v then
1069                        local d = data[k]
1070                        report_merge(
1071                            "page %i of %a, font reference %a to %a, subfont %a, instance %a, merged",
1072                            pagenumber,filename,k,d.filename,d.subfont,toinstance(d.instance)
1073                        )
1074                        status.merged = status.merged + 1
1075                    else
1076                        report_merge(
1077                            "page %i of %a, font reference %a, not merged",
1078                            pagenumber,filename,k
1079                        )
1080                        status.notmerged = status.notmerged + 1
1081                    end
1082                end
1083            end
1084
1085            status.indices = status.indices + indices
1086
1087        end
1088
1089        return indices ~= 0
1090    end
1091
1092    -- faster one, for context files --
1093
1094    local lmtxplugin = function(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth,objtag)
1095
1096        local data    = { }
1097        local indices = 0
1098        local remap   = compactor.files or remap -- not relevant
1099
1100        local sharedfonts  = pdfdoc.sharedfonts or { }
1101        pdfdoc.sharedfonts = sharedfonts
1102        if fonts or xobjects then
1103
1104            if fonts then
1105
1106                for k, v in expanded(fonts) do
1107                    local subtype = v.Subtype
1108                    local handler = subtype and handlers[subtype]
1109                    if handler then
1110                        handler(pdfdoc,k,v,sharedfonts,data,compactor,pagenumber,remap)
1111                    else
1112                        -- weird
1113                    end
1114                end
1115
1116            end
1117
1118            local r = false
1119            local f = false
1120            local n = false
1121            local m = false
1122            local u = false
1123
1124            local new = { }
1125
1126            local process_hex = false
1127
1128            local function process_hex_hexified(h)
1129                local b = hextointeger(h)
1130                local i = m[b]
1131                if i then
1132                    local n = h_hex_4[u[i]]
1133                    if h ~= n then
1134                        indices = indices + 1
1135                    end
1136                    return n
1137                else
1138                    return h_hex_4[b]
1139                end
1140            end
1141
1142            local function process_hex_narrow(s) return (gsub(s,"..",  process_hex_hexified)) end
1143            local function process_hex_wide  (s) return (gsub(s,"....",process_hex_hexified)) end
1144
1145            local spaces = { }
1146
1147            -- if we have Tw then we also have 0x20 as space
1148
1149            for i=1,#contents do
1150                local ti = contents[i]
1151                local op = ti[#ti]
1152                if op == "Tf" then
1153                    -- maybe use /R<N> for replacement
1154                    f = ti[1][2]
1155                    d = data[f]
1156                    if d then
1157                        m = d.map
1158                        u = d.used
1159                        r = i
1160                        new[f] = d.streamhash
1161                        if d.narrow then
1162                            process_hex = process_hex_narrow
1163                        else
1164                            process_hex = process_hex_wide
1165                        end
1166                        if not spaces[f] then
1167                            spaces[f] = process_hex_wide("0020")
1168                        end
1169                    else
1170                        if d == false then
1171                            old[f] = true
1172                        end
1173                        f = false
1174                    end
1175                elseif op == "TJ" then
1176                     if f then
1177                        local c = ti[1][2]
1178                        if c then
1179                            for i=1,#c do
1180                                local ci = c[i]
1181                                if type(ci) == "table" then
1182                                    local tp = ci[1]
1183                                    if tp == "hex" then
1184                                        ci[2] = process_hex(ci[2])
1185                                    end
1186                                end
1187                            end
1188                        end
1189                    end
1190                elseif xobjects and op == "Do" then
1191                    -- can be recursive
1192                    local objtag = ti[1][2]
1193                    local object = xobjects[objtag]
1194                    if object and object.Subtype == "Form" then
1195                        local r = object.Resources
1196                        if r then
1197                            if not object.__fonts_remapped__ then
1198                                local contents = object()
1199                                local fonts    = r.Font
1200                                local xobjects = r.XObject
1201                                contents = parsecontent(contents,true)
1202                                if plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth+1,objtag) then
1203                                    contents = contenttostring(contents)
1204                                    object.__raw__.Length = #contents
1205                                    object.__raw__.Filter = nil
1206                                    getmetatable(object).__call = function() return contents end
1207                                    object.__fonts_remapped__ = true
1208                                    status.xobjects = status.xobjects + 1
1209                                    r.Font           = fonts -- really needed
1210                                    object.Resources = r     -- really needed
1211                                    if trace_details then
1212                                        for k, v in next, fonts.__raw__ do -- we need the raw values here
1213                                            local d = fonts.__raw__[k]
1214                                            if d[1] == pdfe.objectcodes.lpdf then
1215                                                report_merge("updated font, object %a, font id %a, object %a",objtag,k,d[2][1])
1216                                            end
1217                                        end
1218                                    end
1219                                end
1220                            else
1221                            end
1222                        end
1223                    end
1224                end
1225            end
1226
1227            if fonts then
1228                for k, v in next, fonts.__raw__ do -- we need the raw values here
1229                    if new[k] then
1230                        local x = fonts.__raw__[k][3]
1231                        local o = lpdf.getfontobjectnumber(new[k])
1232                        fonts.__raw__[k] = { pdfe.objectcodes.lpdf, pdfreference(o) }
1233                        adapted[k] = true
1234                    end
1235                end
1236            end
1237
1238            status.indices = status.indices + indices
1239
1240        end
1241
1242        return indices ~= 0
1243    end
1244
1245    -- done --
1246
1247    local function hasfonts(xobject)
1248        if xobject then
1249            for k, v in expanded(xobject) do
1250                local r = v.Resources
1251                if r then
1252                    if r.Font then
1253                        return true
1254                    end
1255                    local x = r.Xobject
1256                    if x then
1257                        return hasfonts(x)
1258                    end
1259                end
1260            end
1261        end
1262    end
1263
1264    local function onlylmtx(compactor)
1265        if next(compactor.embed) then
1266            return false
1267        else
1268            local lmtx  = false
1269            for k, v in next, compactor.merge do
1270                if k == "lmtx" then
1271                    lmtx = true
1272                else
1273                    return false
1274                end
1275            end
1276            return lmtx
1277        end
1278    end
1279
1280    local function noregistries(pdfdoc)
1281        local statistics = pdfdoc.Catalog.LMTX_Statistics
1282        if not statistics then
1283            return false
1284        end
1285        local registries = statistics.FontRegistries
1286        if not registries then
1287            return false
1288        end
1289        return registries == 0
1290    end
1291
1292
1293    function pdfe.fontplugin(pdfdoc,page,pagenumber,resources,compactor)
1294        if next(compactor.merge) or next(compactor.embed) then
1295            local fonts    = resources.Font
1296            local xobjects = resources.XObject
1297            if fonts or hasfonts(xobjects) then
1298                if not onlylmtx(compactor) then
1299                    -- we check all fonts
1300                    plugin = mainplugin
1301                elseif noregistries(pdfdoc) then
1302                    -- we don't need to check
1303                    plugin = false
1304                else
1305                    -- we need to check
1306                    plugin = lmtxplugin
1307                end
1308                if plugin then
1309                    local contents = pdfdoc.getcontents()
1310                    if contents then
1311                        plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,{},1)
1312                        resources.Font = fonts -- really needed
1313                    end
1314                end
1315                status.pages = status.pages + 1
1316                status.files[pdfdoc.filename] = (status.files[pdfdoc.filename] or 0) + 1
1317            end
1318            plugin = mainplugin -- just to be sure
1319        end
1320    end
1321
1322    utilities.sequencers.appendaction("pdfcontentmanipulators","system","lpdf.epdf.fontplugin")
1323    utilities.sequencers.enableaction("pdfcontentmanipulators","lpdf.epdf.fontplugin")
1324
1325end
1326