font-phb.lmt /size: 18 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['font-phb'] = {
2    version   = 1.000, -- 2016.10.10,
3    comment   = "companion to font-txt.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files",
8}
9
10-- Some (historic) explanation can be found in the font-phb.lua file. To summarize:
11-- this code kind of old and originates from the times that Idris was making a font
12-- that should work with context and uniscribe. When we started with mkiv there were
13-- no libraries, but at some point Kai Eigner made an ffi interface to the harfbuzz
14-- library that showed up. His code was adapted to ConTeXt so that we could test
15-- Idris fonts (the library could use uniscribe which served as refeence for his
16-- fonts). Some experiences were was wrapped up in articles. Interesting was that
17-- sometimes context, uniscribe and/or native hb could not agree on how to interpret
18-- font features and subtle differences could occur.
19--
20-- This file is made from font-phb.lua and I stripped the components code because
21-- it made no sense. The files were eventually added because I did some cleanup and
22-- didn't want to carry old stuff around without also sort of maintaining it. I can
23-- probably strip away even more code. I might pick up this thread when Idris picks
24-- up his font making.
25--
26-- Todo: use the new (context) advance and offset features. On the other hand, it is
27-- too much of a moving target and I don't want to keep track of it. It has become
28-- one of the most frequently updated optional components in luatex which then sort
29-- of contradicts long term stability and makes it very dependent too, apart from
30-- possible make complications.
31
32local next, tonumber, pcall, rawget = next, tonumber, pcall, rawget
33
34local concat        = table.concat
35local sortedhash    = table.sortedhash
36local formatters    = string.formatters
37
38local fonts         = fonts
39local otf           = fonts.handlers.otf
40local texthandler   = otf.texthandler
41
42local fontdata      = fonts.hashes.identifiers
43
44local nuts          = nodes.nuts
45local tonode        = nuts.tonode
46local tonut         = nuts.tonut
47
48local remove_node   = nuts.remove
49
50local getboth       = nuts.getboth
51local getnext       = nuts.getnext
52local setnext       = nuts.setnext
53local getprev       = nuts.getprev
54local setprev       = nuts.setprev
55local getid         = nuts.getid
56local getchar       = nuts.getchar
57local setchar       = nuts.setchar
58local setlink       = nuts.setlink
59local setoffsets    = nuts.setoffsets
60local getwidth      = nuts.getwidth
61local setwidth      = nuts.setwidth
62
63local copy_node     = nuts.copy
64local find_tail     = nuts.tail
65
66local nodepool      = nuts.pool
67local new_kern      = nodepool.fontkern
68local new_glyph     = nodepool.glyph
69
70local nodecodes     = nodes.nodecodes
71local glyph_code    = nodecodes.glyph
72local glue_code     = nodecodes.glue
73
74local skipped = {
75    -- we assume that only valid features are set but maybe we need a list
76    -- of valid hb features as there can be many context specific ones
77    mode     = true,
78    features = true,
79    language = true,
80    script   = true,
81}
82
83local seenspaces = {
84    [0x0020] = true,
85    [0x00A0] = true,
86    [0x0009] = true, -- indeed
87    [0x000A] = true, -- indeed
88    [0x000D] = true, -- indeed
89}
90
91-- helpers
92
93local helpers     = { }
94local methods     = { }
95local initialized = { } -- we don't polute the shared table
96
97local method      = "internal" -- a bit misleading name: it's the optional module
98local shaper      = "native"   -- "uniscribe"
99local report      = logs.reporter("font plugin","hb")
100
101utilities.hb = {
102    methods = methods,
103    helpers = helpers,
104    report  = report,
105}
106
107do
108
109    local toutf8  = string.toutf8
110    local toutf32 = string.toutf32
111
112    function helpers.packtoutf8(text,leading,trailing)
113        if leading then
114            text[0] = 32
115        end
116        if trailing then
117            text[#text+1] = 32
118        end
119        return toutf8(text)
120    end
121
122    function helpers.packtoutf32(text,leading,trailing)
123        if leading then
124            text[0] = 32
125        end
126        if trailing then
127            text[#text+1] = 32
128        end
129        return toutf32(text)
130    end
131
132end
133
134local function initialize(font)
135
136    local tfmdata      = fontdata[font]
137    local resources    = tfmdata.resources
138    local shared       = tfmdata.shared
139    local filename     = resources.filename
140    local features     = shared.features
141    local descriptions = shared.rawdata.descriptions
142    local characters   = tfmdata.characters
143    local featureset   = { }
144    local copytochar   = shared.copytochar -- indextounicode
145    local spacewidth   = nil -- unscaled
146    local factor       = tfmdata.parameters.factor
147    local marks        = resources.marks or { }
148
149    -- could be shared but why care about a few extra tables
150
151    if not copytochar then
152        copytochar = { }
153        -- let's make sure that we have an indexed table and not a hash
154        local max = 0
155        for k, v in next, descriptions do
156            if v.index > max then
157                max = v.index
158            end
159        end
160        for i=0,max do
161            copytochar[i] = i
162        end
163        -- the normal mapper
164        for k, v in next, descriptions do
165            copytochar[v.index] = k
166        end
167        shared.copytochar = copytochar
168    end
169
170    -- independent from loop as we have unordered hashes
171
172    if descriptions[0x0020] then
173        spacewidth = descriptions[0x0020].width
174    elseif descriptions[0x00A0] then
175        spacewidth = descriptions[0x00A0].width
176    end
177
178    for k, v in sortedhash(features) do
179        if #k > 4 then
180            -- unknown ones are ignored anyway but we can assume that the current
181            -- (and future) extra context features use more verbose names
182        elseif skipped[k] then
183            -- we don't want to pass language and such so we block a few features
184            -- explicitly
185        elseif v == "yes" or v == true then
186            featureset[#featureset+1] = k .. "=1"     -- cf command line (false)
187        elseif v == "no" or v == false then
188            featureset[#featureset+1] = k .. "=0"     -- cf command line (true)
189        elseif type(v) == "number" then
190            featureset[#featureset+1] = k .. "=" .. v -- cf command line (alternate)
191        else
192            -- unset
193        end
194    end
195
196    local data = {
197        language   = features.language, -- do we need to uppercase and padd to 4 ?
198        script     = features.script,   -- do we need to uppercase and padd to 4 ?
199        features   = #featureset > 0 and concat(featureset,",") or "", -- hash
200        featureset = #featureset > 0 and featureset or nil,
201        copytochar = copytochar,
202        spacewidth = spacewidth,
203        filename   = filename,
204        marks      = marks,
205        factor     = factor,
206        characters = characters, -- the loaded font (we use its metrics which is more accurate)
207        method     = features.method or method,
208        shaper     = features.shaper or shaper,
209    }
210    initialized[font] = data
211    return data
212end
213
214-- In many cases this gives compatible output but especially with respect to spacing and user
215-- discretionaries that mix fonts there can be different outcomes. We also have no possibility
216-- to tweak and cheat. Of course one can always run a normal node mode pass with specific
217-- features first but then one can as well do all in node mode. So .. after a bit of playing
218-- around I redid this one from scratch and also added tracing.
219
220local trace_colors  = false  trackers.register("fonts.plugins.hb.colors", function(v) trace_colors  = v end)
221local trace_details = false  trackers.register("fonts.plugins.hb.details",function(v) trace_details = v end)
222local check_id      = false
223
224local setcolor      = nodes.tracers.colors.set
225local resetcolor    = nodes.tracers.colors.reset
226
227table.setmetatableindex(methods,function(t,k)
228    local l = "font-phb-imp-" .. k .. ".lmt"
229    report("start loading method %a from %a",k,l)
230    dofile(resolvers.findfile(l))
231    local v = rawget(t,k)
232    if v then
233        report("loading method %a succeeded",k)
234    else
235        report("loading method %a failed",k)
236        v = function() return { } end
237    end
238    t[k] = v
239    return v
240end)
241
242local inandout  do
243
244    local utfbyte = utf.byte
245    local utfchar = utf.char
246    local utf3208 = utf.utf32_to_utf8_le
247
248    inandout = function(text,result,first,last,copytochar)
249        local s = { }
250        local t = { }
251        local r = { }
252        local f = formatters["%05U"]
253        for i=1,#text do
254            local c = text[i]
255         -- t[#t+1] = f(utfbyte(utf3208(c)))
256            s[#s+1] = utfchar(c)
257            t[#t+1] = f(c)
258        end
259        for i=first,last do
260            r[#r+1] = f(copytochar[result[i][1]])
261        end
262        return s, t, r
263    end
264
265end
266
267local function harfbuzz(head,font,dynamic,rlmode,start,stop,text,leading,trailing)
268    local data = initialized[font]
269
270    if not data then
271        data = initialize(font)
272    end
273
274    if check_id then
275        if getid(start) ~= glyph_code then
276            report("error: start is not a glyph")
277            return head
278        elseif getid(stop) ~= glyph_code then
279            report("error: stop is not a glyph")
280            return head
281        end
282    end
283    local size   = #text -- original text, without spaces
284    local result = methods[data.method](font,data,rlmode,text,leading,trailing)
285    local length = result and #result or 0
286
287    if length == 0 then
288     -- report("warning: no result")
289        return head
290    end
291
292    local factor     = data.factor
293    local marks      = data.marks
294    local spacewidth = data.spacewidth
295    local copytochar = data.copytochar
296    local characters = data.characters
297
298    -- the text analyzer is only partially clever so we must assume that we get
299    -- inconsistent lists
300
301    -- we could check if something has been done (replacement or kern or so) but
302    -- then we pass around more information and need to check a lot and spaces
303    -- are kind of spoiling that game (we need a different table then) .. more
304    -- pain than gain
305
306    -- we could play with 0xFFFE as boundary
307
308    local current  = start
309    local prev     = nil
310    local glyph    = nil
311
312    local first    = 1
313    local last     = length
314    local next     = nil -- todo: keep track of them
315    local prev     = nil -- todo: keep track of them
316
317    if leading then
318        first = first + 1
319    end
320    if trailing then
321        last = last - 1
322    end
323
324    local position = first
325    local cluster  = 0
326    local glyph    = nil
327    local index    = 0
328    local count    = 1
329    local saved    = nil
330
331    if trace_details then
332        report("start run, original size: %i, result index: %i upto %i",size,first,last)
333        local s, t, r = inandout(text,result,first,last,copytochar)
334        report("method : %s",data.method)
335        report("shaper : %s",data.shaper)
336        report("string : %t",s)
337        report("text   : % t",t)
338        report("result : % t",r)
339    end
340
341    -- okay, after some experiments, it became clear that more complex code aimed at
342    -- optimization doesn't pay off as complexity also demands more testing
343
344    for i=first,last do
345        local r = result[i]
346        local unicode = copytochar[r[1]] -- can be private of course
347        --
348        cluster = r[2] + 1 -- starts at zero
349        --
350        if position == cluster then
351            if i == first then
352                index = 1
353                if trace_details then
354                    report("[%i] position: %i, cluster: %i, index: %i, starting",i,position,cluster,index)
355                end
356            else
357                index = index + 1
358                if trace_details then
359                    report("[%i] position: %i, cluster: %i, index: %i, next step",i,position,cluster,index)
360                end
361            end
362        elseif position < cluster then
363            -- a new cluster
364            current  = getnext(current)
365            position = position + 1
366            size     = size - 1
367            for p=position,cluster-1 do
368                head, current = remove_node(head,current,true)
369                if trace_details then
370                    report("[%i] position: %i, cluster: %i, index: -, removing node",i,p,cluster)
371                end
372                size = size - 1
373            end
374            position = cluster
375            index    = 1
376            glyph    = nil
377            if trace_details then
378                report("[%i] position: %i, cluster: %i, index: %i, arriving",i,cluster,position,index)
379            end
380        else -- maybe a space got properties
381            if trace_details then
382                report("position: %i, cluster: %i, index: %i, quitting due to fatal inconsistency",position,cluster,index)
383            end
384            return head
385        end
386        local copied = false
387        if glyph then
388            if trace_details then
389                report("[%i] position: %i, cluster: %i, index: %i, copying glyph, unicode %U",i,position,cluster,index,unicode)
390            end
391            local g = copy_node(glyph)
392            if trace_colors then
393                resetcolor(g)
394            end
395            setlink(current,g,getnext(current))
396            current = g
397            copied  = true
398        else
399            if trace_details then
400                report("[%i] position: %i, cluster: %i, index: %i, using glyph, unicode %U",i,position,cluster,index,unicode)
401            end
402            glyph = current
403        end
404        --
405        if not current then
406            if trace_details then
407                report("quitting due to unexpected end of node list")
408            end
409            return head
410        end
411        --
412        local id = getid(current)
413        if id ~= glyph_code then
414            if trace_details then
415                report("glyph expected in node list")
416            end
417            return head
418        end
419        --
420        -- really, we can get a tab (9), lf (10), or cr(13) back in cambria .. don't ask me why
421        --
422        local prev, next = getboth(current)
423        --
424        -- assign glyph: first in run
425        --
426        setchar(current,unicode)
427        if trace_colors then
428            count = (count == 8) and 1 or count + 1
429            setcolor(current,"trace:"..count)
430        end
431        --
432        local x_offset  = r[3] -- r.dx
433        local y_offset  = r[4] -- r.dy
434        local x_advance = r[5] -- r.ax
435        ----- y_advance = r[6] -- r.ay
436        local left  = 0
437        local right = 0
438        local dx    = 0
439        local dy    = 0
440        if trace_details then
441            if x_offset ~= 0 or y_offset ~= 0 or x_advance ~= 0 then -- or y_advance ~= 0
442                report("[%i] position: %i, cluster: %i, index: %i, old, xoffset: %p, yoffset: %p, xadvance: %p, width: %p",
443                    i,position,cluster,index,x_offset*factor,y_offset*factor,x_advance*factor,characters[unicode].width)
444            end
445        end
446        if y_offset ~= 0 then
447            dy = y_offset * factor
448        end
449        if rlmode >= 0 then
450            -- l2r marks and rest
451            if x_offset ~= 0 then
452                dx = x_offset * factor
453            end
454            local width = characters[unicode].width
455            local delta = x_advance * factor
456            if delta ~= width then
457             -- right = -(delta - width)
458                right = delta - width
459            end
460        elseif marks[unicode] then -- why not just the next loop
461            -- r2l marks
462            if x_offset ~= 0 then
463                dx = -x_offset * factor
464            end
465        else
466            -- r2l rest
467            local width = characters[unicode].width
468            local delta = (x_advance - x_offset) * factor
469            if delta ~= width then
470                left = delta - width
471            end
472            if x_offset ~= 0 then
473                right = x_offset * factor
474            end
475        end
476        if copied or dx ~= 0 or dy ~= 0 then
477            setoffsets(current,dx,dy)
478        end
479        if left ~= 0 then
480            setlink(prev,new_kern(left),current) -- insertbefore
481            if current == head then
482                head = prev
483            end
484        end
485        if right ~= 0 then
486            local kern = new_kern(right)
487            setlink(current,kern,next)
488            current = kern
489        end
490        if trace_details then
491            if dy ~= 0 or dx ~= 0 or left ~= 0 or right ~= 0 then
492                report("[%i] position: %i, cluster: %i, index: %i, new, xoffset: %p, yoffset: %p, left: %p, right: %p",i,position,cluster,index,dx,dy,left,right)
493            end
494        end
495    end
496    --
497    if trace_details then
498        report("[-] position: %i, cluster: %i, index: -, at end",position,cluster)
499    end
500    if size > 1 then
501        current = getnext(current)
502        for i=1,size-1 do
503            if trace_details then
504                report("[-] position: %i + %i, cluster: -, index: -, removing node",position,i)
505            end
506            head, current = remove_node(head,current,true)
507        end
508    end
509    --
510    -- We see all kind of interesting spaces come back (like tabs in cambria) so we do a bit of
511    -- extra testing here.
512    --
513    if leading then
514        local r = result[1]
515        local unicode = copytochar[r[1]]
516        if seenspaces[unicode] then
517            local x_advance = r[5]
518            local delta     = x_advance - spacewidth
519            if delta ~= 0 then
520                -- nothing to do but jump one slot ahead
521                local prev = getprev(start)
522                if getid(prev) == glue_code then
523                    local dx = delta * factor
524                    setwidth(prev,getwidth(prev) + dx)
525                    if trace_details then
526                        report("compensating leading glue by %p due to codepoint %U",dx,unicode)
527                    end
528                else
529                    report("no valid leading glue node")
530                end
531            end
532        end
533    end
534    --
535    if trailing then
536        local r = result[length]
537        local unicode = copytochar[r[1]]
538        if seenspaces[unicode] then
539            local x_advance = r[5]
540            local delta     = x_advance - spacewidth
541            if delta ~= 0 then
542                local next = getnext(stop)
543                if getid(next) == glue_code then
544                    local dx = delta * factor
545                    setwidth(next,getwidth(next) + dx)
546                    if trace_details then
547                        report("compensating trailing glue by %p due to codepoint %U",dx,unicode)
548                    end
549                else
550                    report("no valid trailing glue node")
551                end
552            end
553        end
554    end
555    --
556    if trace_details then
557        report("run done")
558    end
559    return head
560end
561
562otf.registerplugin("harfbuzz",function(head,font,dynamic,direction)
563    return texthandler(head,font,dynamic,direction,harfbuzz)
564end)
565