if not modules then modules = { } end modules ['lpdf-fnt'] = { version = 1.001, comment = "companion to lpdf-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- In LMTX we have to do this different than in MkIV. We also prepare ourselves -- variable fonts and such. In LuaTeX we use the original index but in LMTX we -- use a decent sequence which means that we need to resolve the original. This -- kind of hackery is fragile anyway, so we only merge files that are produced -- by ConTeXt. local next, type, getmetatable = next, type, getmetatable local gsub, format, match, find, gmatch = string.gsub, string.format, string.match, string.find, string.gmatch local setmetatableindex, sortedhash, sequenced = table.setmetatableindex, table.sortedhash, table.sequenced local nameonly, basename = file.nameonly, file.basename local hextointeger, chrtointeger = string.hextointeger, string.chrtointeger local pdfe = lpdf.epdf local pdfreference = lpdf.reference local pdfreserveobject = lpdf.reserveobject local trace_merge = false trackers.register("graphics.fonts",function(v) trace_merge = v end) local report_merge = logs.reporter("graphics","fonts") local expanded = pdfe.expanded local contenttostring = pdfe.contenttostring local getpagecontent = pdfe.getpagecontent local parsecontent = pdfe.parsecontent ----- definefont = fonts.definers.internal local definefont = fonts.definers.define local getstreamhash = fonts.handlers.otf.getstreamhash local loadstreamdata = fonts.handlers.otf.loadstreamdata local cleanfontname = fonts.names.cleanname local chardata = fonts.hashes.characters local defined = setmetatableindex(function(t,filename) local v = setmetatableindex(function(t,subfont) local v = { } t[subfont] = v return v end) t[filename] = v return v end) local function toinstance(instance) if type(instance) == "table" then return nil, "axis={" .. sequenced(instance.__raw__,",") .. "}" elseif instance and instance ~= "" then return instance, nil else return nil, nil end end -- This is a bit of a hack ... we need to be able to set the instance directly -- on a file. local function isdefinedlmtx(filename,subfont,instance,hash,version,glyphcount) local fontname = "file:" .. filename local instance, features = toinstance(instance) if instance then fontname = "name:" .. nameonly(filename) .. instance -- not ok as it's not fontname instance = nil end local id = defined[fontname][subfont][instance or features or false] if not id then -- we can try to avoid this id = definefont { name = fontname, instance = instance, detail = features, } if id > 0 then local dummy = lpdf.usedcharacters[id] -- force embedding else id = false end defined[fontname][subfont][instance or features or false] = id end if id then -- We double check here! local shash, sdata = getstreamhash(id) if hash ~= shash then report_merge("inconsistent %s in %a","hashes",filename) return false end sdata = loadstreamdata(sdata) if not sdata then report_merge("inconsistent %s in %a","streamdata",filename) return false end local fontheader = sdata.fontheader if version and fontheader and version ~= fontheader.fontversion then report_merge("inconsistent %s in %a","versions",filename) return false end local streams = sdata.streams if glyphcount and streams and glyphcount ~= (#streams + (streams[0] and 1 or 0)) then report_merge("inconsistent %s in %a","glyphcount",filename) return false end return id end return false end -- todo: check some more local cleanname = fonts.names.cleanname local remap = { -- ["helvetica"] = { target = "file:uhvr8a.afm" }, -- ["helvetica-bold"] = { target = "file:uhvb8a.afm" }, -- ["helveticabold"] = { target = "file:uhvb8a.afm" }, -- ["courier"] = { target = "file:ucrr8a.afm" }, -- notdef issues width -- ["courier-bold"] = { target = "file:ucrb8a.afm" }, -- ["courierbold"] = { target = "file:ucrb8a.afm" }, ["helvetica"] = { target = "file:texgyre-heros-regular.otf" }, ["helveticabold"] = { target = "file:texgyre-heros-bold.otf" }, ["helveticaitalic"] = { target = "file:texgyre-heros-italic.otf" }, ["helveticabolditalic"] = { target = "file:texgyre-heros-bolditalic.otf" }, ["courier"] = { target = "file:texgyre-cursor-regular.otf" }, ["courierbold"] = { target = "file:texgyre-cursor-bold.otf" }, ["courieritalic"] = { target = "file:texgyre-cursor-italic.otf" }, ["courierbolditalic"] = { target = "file:texgyre-cursor-bolditalic.otf" }, ["timesroman"] = { target = "file:texgyre-termes-regular.otf" }, ["timesromanbold"] = { target = "file:texgyre-termes-bold.otf" }, ["timesromanitalic"] = { target = "file:texgyre-termes-italic.otf" }, ["timesromanbolditalic"] = { target = "file:texgyre-termes-bolditalic.otf" }, } function backends.codeinjections.registerfont(specification) local source = specification.source if source then remap[cleanname(source)] = specification end end local function isdefinedunknown(fontname) local m = remap[cleanname(fontname)] local r = m and m.target if r then report_merge("remapping %a to %a",fontname,r) name = r else name = "name:" .. fontname end local id = definefont { name = name, } if id > 0 then local dummy = lpdf.usedcharacters[id] -- force embedding else id = false end if id then local shash, sdata = getstreamhash(id) -- sdata = loadstreamdata(sdata) -- no checking here return shash, id end return false end -- todo: we can share the map within a pdfdoc .. using the object number local status = { files = { }, pages = 0, xobjects = 0, charprocs = 0, merged = 0, notmerged = 0, indices = 0, } statistics.register("compact font inclusion", function() if status.pages > 0 or status.xobjects > 0 or status.charprocs > 0 then return string.format( "%i files, %i pages, %i indices, %i xobjects, %i chrprocs, %i times merged, %i times not merged", table.count(status.files), status.pages, status.indices, status.xobjects, status.charprocs, status.merged, status.notmerged ) end end) local function checkedfontreference(k,v,key,value,o) if key ~= k then -- print("different keys",key,k) return value -- different keys elseif v[1] ~= 10 or value[1] ~= 10 then -- print("different objects",key,k) return value -- different objects elseif v[3] ~= value[3] then -- print("different values",key,k) return value -- different values else return pdfreference(o) end end local getstates, getindexstate_composite, getindexstate_simple do local fromunicode16 = fonts.mappings.fromunicode16 local expandwidths = pdfe.expandwidths local mergewidths = pdfe.mergewidths local function initialize(t,k) local v = { unicodes = { }, widths = { }, fontname = k, } t[k] = v return v end getstates = function(pdfdoc) local states = pdfdoc.fontstates if not states then states = { Type1 = setmetatableindex(initialize), -- simple fonts, 1 byte index Type3 = setmetatableindex(initialize), -- idem TrueType = setmetatableindex(initialize), -- idem OpenType = setmetatableindex(initialize), -- composite fonts, 2 byte index } pdfdoc.fontstates = states end return states end local splitsixteen do local lpegmatch = lpeg.match local more = 0 -- local pattern = lpeg.P(true) / function() more = 0 end * ( local pattern = ( lpeg.C(4) / function(s) -- needs checking ! local now = hextointeger(s) if more > 0 then now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 more = 0 return now elseif now >= 0xD800 and now <= 0xDBFF then more = now else return now end end )^0 splitsixteen = function(str) if str and str ~= "" then more = 0 return lpegmatch(pattern,str) end end end -- This could be an lpeg but there is not that much to gain here. local function register1(pdfdoc,unicodes,index,uni) local old = unicodes[index] if not old then unicodes[index] = uni elseif old ~= uni then report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %U, new %U, %s",pdfdoc.filename,fontname,index,old,new,"range") end end local function register2(pdfdoc,unicodes,index,uni) local old = unicodes[index] -- unicode local new, more = splitsixteen(uni) -- unicode16 or ligature if more then if not old then unicodes[index] = uni -- string elseif old ~= uni then report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %a, new %a, %s",pdfdoc.filename,fontname,index,old,new,"bfchar") end else if not old then unicodes[index] = new elseif old ~= new then report_merge("inconsistent unicode file %a, font %a, index 0x%04X, old %U, new %U, %s",pdfdoc.filename,fontname,index,old,new,"bfchar") end end end local function getunicodes(pdfdoc,fontname,str,unicodes) -- <0000> <005E> <0020> : first index, last index, first unicode for s in gmatch(str,"beginbfrange%s*(.-)%s*endbfrange") do for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do local first = tonumber(first,16) -- index local last = tonumber(last,16) -- index local uni = fromunicode16(offset) -- unicode16 for index=first,last do register1(pdfdoc,unicodes,index,uni) uni = uni + 1 end end end -- <005F> <0061> [<00660066> <00660069> <00660066006C>] -- untested as not seen yet for s in gmatch(str,"beginbfrange%s*(.-)%s*endbfrange") do for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+%[([^%]]+)%]") do local index = tonumber(first,16) -- index for uni in gmatch("%s*<([^>]+)>") do register2(pdfdoc,unicodes,index,uni) index = index + 1 end end end -- <0000> <0020> : index, single -- <005F> <00660066> : index, ligature for s in gmatch(str,"beginbfchar%s*(.-)%s*endbfchar") do for idx, uni in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do local index = tonumber(idx,16) -- index register2(pdfdoc,unicodes,index,uni) end end end local function isembedded(descriptor) return descriptor and (descriptor.FontFile or descriptor.FontFile2 or descriptor.FontFile3) and true or false end getindexstate_composite = function(pdfdoc,somefont,descendant,states) local basefont = somefont.BaseFont if basefont then local fontname = match(basefont,"^[A-Z]+%+(.+)$") if fontname then local descriptor = descendant.FontDescriptor if descriptor then local widths = descendant.W local tounicode = somefont.ToUnicode if widths and tounicode then local fontstate = states[fontname] local f_widths = fontstate.widths local f_unicodes = fontstate.unicodes expandwidths(widths,f_widths) getunicodes(pdfdoc,fontname,tounicode(),f_unicodes) fontstate.embedded = isembedded(descriptor) return fontstate end end end end end getindexstate_simple = function(pdfdoc,somefont,states) local basefont = somefont.BaseFont if basefont then local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont if fontname then local descriptor = somefont.FontDescriptor if descriptor then local widths = somefont.Widths local tounicode = somefont.ToUnicode if widths and tounicode then local fontstate = states[fontname] local f_widths = fontstate.widths local f_unicodes = fontstate.unicodes fontstate.narrow = true mergewidths(widths,f_widths) getunicodes(pdfdoc,fontname,tounicode(),f_unicodes) fontstate.embedded = isembedded(descriptor) return fontstate end end -- tricky when we have the same fontname twice, once as type 1 or truetype -- and once as opentype .. it really happens local encoding = somefont.Encoding if encoding == "WinAnsiEncoding" then local r = table.load(resolvers.findfile("regi-cp1252.lua")) local fontstate = states[fontname] fontstate.unicodes = r fontstate.narrow = true return fontstate else -- todo: custom encoding, load from pfb if Type 1 end end end end end local function makemap(fontname,id,state,unicode) local map = { } local r = remap[cleanname(fontname)] if r and r.unicode ~= nil then unicode = r.unicode end if unicode then local chr = chardata[id] for k, v in next, state.unicodes do local d = chr[v] if d then map[k] = d.index else -- issue end end else for k, v in next, state.unicodes do map[k] = k end end return map end local function dontembed(basefont,state,embedding) if not state.embedded then report_merge("font %a is not embedded",basefont) end if embedding and state.embedded then return false else return true end end local function getstate_OpenType(pdfdoc,v,d,embedding) local state = getindexstate_composite(pdfdoc,v,d,getstates(pdfdoc).OpenType) if state then local basefont = d.BaseFont if basefont then if dontembed(basefont,state,embedding) then return false end local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont local cleanname = cleanfontname(fontname) local streamhash, id = isdefinedunknown(fontname) if streamhash then return { id = id, map = makemap(fontname,id,state,false), streamhash = streamhash, filename = fontname, -- subfont = subfont, -- instance = instance, used = lpdf.usedindices[streamhash], } end end end end local function getstate_TrueType(pdfdoc,v,embedding) local state = getindexstate_simple(pdfdoc,v,getstates(pdfdoc).TrueType) if state then -- needs checking when unicode ... NOT OK local basefont = v.BaseFont if basefont then if dontembed(basefont,state,embedding) then return false end local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont local cleanname = cleanfontname(fontname) local streamhash, id = isdefinedunknown(fontname) if streamhash then return { id = id, map = makemap(fontname,id,state,true), narrow = state.narrow, streamhash = streamhash, filename = fontname, -- subfont = subfont, -- instance = instance, used = lpdf.usedindices[streamhash], } end end end end local function getstate_Type1(pdfdoc,v,embedding) local state = getindexstate_simple(pdfdoc,v,getstates(pdfdoc).Type1) if state then local basefont = v.BaseFont if basefont then if dontembed(basefont,state,embedding) then return false end local fontname = match(basefont,"^[A-Z]+%+(.+)$") or basefont local cleanname = cleanfontname(fontname) local streamhash, id = isdefinedunknown(fontname) if streamhash then return { id = id, map = makemap(fontname,id,state,true), narrow = state.narrow, streamhash = streamhash, filename = fontname, -- subfont = subfont, -- instance = instance, used = lpdf.usedindices[streamhash], } end end end end local function getstate_LMTX(pdfdoc,r) local indexmap = r.IndexMap local streamhash = r.StreamHash local filename = r.FileName local subfont = r.SubFont or 1 local instance = r.Instance or "" local version = r.Version or "0" local glyphcount = r.GlyphCount or 0 if indexmap then local index = -1 local map = { } for i=1,#indexmap do local li = indexmap[i] if type(li) == "number" then index = li else for j=1,#li do map[index] = li[j] index = index + 1 end end end if isdefinedlmtx(filename,subfont,instance,streamhash,version,glyphcount) then return { map = map, streamhash = streamhash, filename = filename, subfont = subfont, instance = instance, used = lpdf.usedindices[streamhash], } end end end -- yes : merge when we have a context file -- always : merge and assume original indices -- embed : add missing fonts -- fix : convert decimal into hexadecimal do local h_hex_2 = lpdf.h_hex_2 local h_hex_4 = lpdf.h_hex_4 local function report_sharing(pdfdoc,what,v,shared,pagenumber,lmtx) report_merge("page %i of %a, font %a, type %a, encoding %a, %sshared%s", pagenumber, basename(pdfdoc.filename), v.BaseFont or "?", what, v.Encoding or "?", shared and "" or "not ", lmtx and ", lmtx registry found" or "" ) end local function plugin_Type0(pdfdoc,k,v,sharedfonts,data,wide,compactor,pagenumber) if v.Encoding == "Identity-H" then -- The v table is unique and can be shared local shared = sharedfonts[v] if type(shared) == "table" then data[k] = shared elseif shared == nil then shared = false local d = v.DescendantFonts[1] -- how about more if d and d.Subtype == "CIDFontType0" or d.Subtype == "CIDFontType2" then local r = d.LMTXRegistry if r then if compactor.merge.lmtx or compactor.merge.LMTX then shared = getstate_LMTX(pdfdoc,r) data[k] = shared end elseif find(pdfe.producer(pdfdoc),"^LuaMetaTeX") then -- This is a no go because for sure we have a different index order. Older -- versions append the version to the producer string. elseif compactor.merge.type0 then shared = getstate_OpenType(pdfdoc,v,d) data[k] = shared elseif compactor.embed.type0 then shared = getstate_OpenType(pdfdoc,v,d,true) data[k] = shared end if trace_merge then report_sharing(pdfdoc,"type0",v,shared,pagenumber,r and true or false) end end sharedfonts[v] = shared end else -- what ... end wide[k] = true end local function plugin_TrueType(pdfdoc,k,v,sharedfonts,data,wide,compactor,pagenumber) local shared = sharedfonts[v] if type(shared) == "table" then data[k] = shared elseif shared == nil then shared = false if compactor.merge.truetype then shared = getstate_TrueType(pdfdoc,v) data[k] = shared elseif compactor.embed.truetype then shared = getstate_TrueType(pdfdoc,v,true) data[k] = shared end if trace_merge then report_sharing(pdfdoc,"truetype",v,shared,pagenumber) end sharedfonts[v] = shared end wide[k] = true end local function plugin_Type1(pdfdoc,k,v,sharedfonts,data,wide,compactor,pagenumber) local shared = sharedfonts[v] if type(shared) == "table" then data[k] = shared elseif shared == nil then shared = false if compactor.merge.type1 then shared = getstate_Type1(pdfdoc,v) data[k] = shared elseif compactor.embed.type1 then shared = getstate_Type1(pdfdoc,v,true) data[k] = shared end if trace_merge then report_sharing(pdfdoc,"type1",v,shared,pagenumber) end end sharedfonts[v] = shared wide[k] = false end local plugin -- defined after the next one local function plugin_Type3(pdfdoc,k,v,sharedfonts,data,wide,compactor,pagenumber) local charprocs = v.CharProcs if charprocs then local resources = v.Resources if resources then local fonts = resources.Font local xobjects = resources.XObject if fonts or xobjects then for k, object in expanded(charprocs) do if not object.__remapped__ then local contents = object() contents = parsecontent(contents,true) local contents, indices = plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,{}) if indices > 0 then getmetatable(object).__call = function() return contents end end object.__remapped__ = true status.charprocs = status.charprocs + 1 status.indices = status.indices + indices end end end end end wide[k] = false end local handlers = { Type0 = plugin_Type0, TrueType = plugin_TrueType, Type1 = plugin_Type1, Type3 = plugin_Type3, } -- not always ok .. every page can have different font references but let's -- assume it for now plugin = function(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth) local data = { } local wide = { } local sharedfonts = pdfdoc.sharedfonts or { } pdfdoc.sharedfonts = sharedfonts if fonts then -- check if ref for k is the same for k, v in expanded(fonts) do local subtype = v.Subtype local handler = subtype and handlers[subtype] if handler then handler(pdfdoc,k,v,sharedfonts,data,wide,compactor,pagenumber) else -- weird end end local r = false local f = false local n = false local m = false local u = false local new = { } local old = { } local indices = 0 local process_hex = false -- local process_dec = false -- local convert_dec = false -- if we move h and m outside the function we can use lpegs .. todo local function process_hex_hexified(h) local b = hextointeger(h) local i = m[b] if i then local n = h_hex_4[u[i]] if h ~= n then indices = indices + 1 end return n else return h_hex_4[b] end end -- local function process_dec_hexified(h) -- local b = chrtointeger(h) -- local i = m[b] -- if i then -- local n = h_hex_4[u[i]] -- if h ~= n then -- indices = indices + 1 -- end -- return n -- else -- return h_hex_4[b] -- end -- end -- use helper -- local function convert_dec_hexified(h) -- local b = chrtointeger(h) -- return h_hex_2[b] -- end local function process_hex_narrow(s) return (gsub(s,"..", process_hex_hexified)) end local function process_hex_wide (s) return (gsub(s,"....",process_hex_hexified)) end -- local function process_dec_narrow(s) return (gsub(s,".", process_dec_hexified)) end -- local function process_dec_wide (s) return (gsub(s,"..", process_dec_hexified)) end -- local function convert_dec_one (s) return (gsub(s,".", convert_dec_hexified)) end -- local fix = compactor.convert.hexadecimal for i=1,#contents do local ti = contents[i] if ti[3] == "Tf" then -- maybe use /R for replacement f = ti[1][2] d = data[f] if d then m = d.map u = d.used r = i new[f] = d.streamhash if d.narrow then process_hex = process_hex_narrow -- process_dec = process_dec_narrow else process_hex = process_hex_wide -- process_dec = process_dec_wide end else if d == false then old[f] = true end -- if fix then -- if w == wide[f] then -- convert_dec = false -- else -- convert_dec = convert_dec_one -- end -- end f = false end elseif ti[2] == "Tj" then if f then local ci = ti[1] if type(ci) == "table" then local tp = ci[1] if tp == "hex" then ci[2] = process_hex(ci[2]) -- elseif tp == "dec" then -- ci[1] = "hex" -- ci[2] = process_dec(ci[2]) end -- elseif fix and convert_dec then -- local ci = ti[1] -- if type(ci) == "table" then -- local tp = ci[1] -- if tp == "dec" then -- ci[1] = "hex" -- ci[2] = convert_dec(ci[2]) -- end end end elseif ti[2] == "TJ" then if f then local c = ti[1][2] for i=1,#c do local ci = c[i] if type(ci) == "table" then local tp = ci[1] if tp == "hex" then ci[2] = process_hex(ci[2]) -- elseif tp == "dec" then -- ci[1] = "hex" -- ci[2] = process_dec(ci[2]) end end -- elseif fix and convert_dec then -- local c = ti[1][2] -- for i=1,#c do -- local ci = c[i] -- if type(ci) == "table" then -- local tp = ci[1] -- if tp == "dec" then -- ci[1] = "hex" -- ci[2] = convert_dec(ci[2]) -- end -- end end end elseif xobjects and ti[2] == "Do" then -- can be recursive local object = xobjects[ti[1][2]] if object and not object.__remapped__ and object.Subtype == "Form" then local r = object.Resources if r then local contents = object() local fonts = r.Font local xobjects = r.XObject contents = parsecontent(contents,true) local contents = plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,adapted,depth+1) if contents then contents = contenttostring(contents) object.__raw__.Length = #contents getmetatable(object).__call = function() return contents end object.__remapped__ = true status.xobjects = status.xobjects + 1 else -- some issue end end end end end local state = trace_merge and { } or false if fonts then for k, v in next, fonts.__raw__ do -- we need the raw values here -- for k, v in expanded(fonts) do -- we need the raw values here if adapted[k] then -- already done elseif new[k] then local o = lpdf.getfontobjectnumber(new[k]) fonts.__raw__[k] = { pdfe.objectcodes.lpdf, pdfreference(o) } adapted[k] = true -- adapted[k] = function(_,_,_,key,value) -- local ref = checkedfontreference(k,v,key,value,o) -- return ref -- end if state then state[k] = true end elseif state and old[k] then state[k] = false end end end if state then local filename = basename(pdfdoc.filename) for k, v in sortedhash(state) do if v then local d = data[k] report_merge( "page %i of %a, font reference %a to %a, subfont %a, instance %a, merged", pagenumber,filename,k,d.filename,d.subfont,toinstance(d.instance) ) status.merged = status.merged + 1 else report_merge( "page %i of %a, font reference %a, not merged", pagenumber,filename,k ) status.notmerged = status.notmerged + 1 end end end if indices then status.indices = status.indices + indices end return contents end end function pdfe.fontplugin(pdfdoc,page,pagenumber,resources,compactor) local fonts = resources.Font local xobjects = resources.XObject if fonts or xobjects then local contents = getpagecontent(pdfdoc,pagenumber,true,true) contents = plugin(pdfdoc,contents,fonts,xobjects,pagenumber,compactor,{},1) resources.Font = fonts -- really needed if contents then page.Contents = contenttostring(contents) end status.pages = status.pages + 1 status.files[pdfdoc.filename] = (status.files[pdfdoc.filename] or 0) + 1 end end utilities.sequencers.appendaction("pdfcontentmanipulators","system","lpdf.epdf.fontplugin") utilities.sequencers.enableaction("pdfcontentmanipulators","lpdf.epdf.fontplugin") end