font-phb.lmt /size: 20 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['font-phb'] = {
2    version   = 1.000, -- 2016.10.10,
3    comment   = "companion to font-txt.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files",
8}
9
10-- Some (historic) explanation can be found in the font-phb.lua file. To summarize:
11-- this code kind of old and originates from the times that Idris was making a font
12-- that should work with context and uniscribe. When we started with mkiv there were
13-- no libraries, but at some point Kai Eigner made an ffi interface to the harfbuzz
14-- library that showed up. His code was adapted to ConTeXt so that we could test
15-- Idris fonts (the library could use uniscribe which served as refeence for his
16-- fonts). Some experiences were was wrapped up in articles. Interesting was that
17-- sometimes context, uniscribe and/or native hb could not agree on how to interpret
18-- font features and subtle differences could occur.
19--
20-- This file is made from font-phb.lua and I stripped the components code because
21-- it made no sense. The files were eventually added because I did some cleanup and
22-- didn't want to carry old stuff around without also sort of maintaining it. I can
23-- probably strip away even more code. I might pick up this thread when Idris picks
24-- up his font making.
25--
26-- Todo: use the new (context) advance and offset features. On the other hand, it is
27-- too much of a moving target and I don't want to keep track of it. It has become
28-- one of the most frequently updated optional components in luatex which then sort
29-- of contradicts long term stability and makes it very dependent too, apart from
30-- possible make complications.
31--
32-- Just for the record: there's also LuahbTeX which was not done by the LuaTeX team
33-- but a contribution that came from (and which interface is maintained by) the LaTeX
34-- folk which is (likely) derived from e.g. harftex. We just accepted the patches as
35-- they came and one can consider it to be a derived engine that is maintained in the
36-- same code base. I actually never ran it in ConTeXt, because we have reason nor
37-- interface but we understand and appreciate the effort for LaTeX users.
38--
39-- We have several reason for not using that engine in ConTeXt: (1) we already had
40-- the interface that Kai Eigner made and was optimized a bit later on, (2) we use
41-- external libraries because we don't want to keep checking compatibility in the
42-- perspective of maintainance so one can use frozen libraries instead (HB is a
43-- moving target in the tex live repository), and (3) we don't want to get involved
44-- in the unpleasant politics around this library in the perspective of usage in
45-- TeX: we do what we do for fun. Argument (4) is that we now use LuaMetaTeX which
46-- has a simple optional interface similar to the ffi binding that Kai came up with.
47--
48-- Of course argument (5) is that in TeX it's all about control and less about
49-- fashion (and religous debates about what to wear). And (6) we need the Lua
50-- interface anyway because it made it possible to be an early adopter of new
51-- interesting possibilities, extend math support, come up with special text
52-- solution, fix and patch fonts runtime, and of course play around just for fun. As
53-- (7) I want to mention that we also have to deal with the typical complications of
54-- for instance hyphenation and in the perspective of the Oriental TeX Project (with
55-- Idris Hamid) with advanced linebreaks in combination with fine-tuned Arabic fonts.
56--
57-- So, don't bother us with discussions about the how and why: what is needed and
58-- works well on the web or a word processer doesn't always fit the tex bill. Just
59-- let everyone has its own way and we're fine. And I anyway don't (want to)
60-- understand non context users beef with that macro package doing things its own
61-- way. Also, a standard (opentype, pdf, html, etc) is not a real standard if there
62-- are not multiple implementations using it.
63
64local next, tonumber, pcall, rawget = next, tonumber, pcall, rawget
65
66local concat        = table.concat
67local sortedhash    = table.sortedhash
68local formatters    = string.formatters
69
70local fonts         = fonts
71local otf           = fonts.handlers.otf
72local texthandler   = otf.texthandler
73
74local fontdata      = fonts.hashes.identifiers
75
76local nuts          = nodes.nuts
77local tonode        = nuts.tonode
78local tonut         = nuts.tonut
79
80local remove_node   = nuts.remove
81
82local getboth       = nuts.getboth
83local getnext       = nuts.getnext
84local setnext       = nuts.setnext
85local getprev       = nuts.getprev
86local setprev       = nuts.setprev
87local getid         = nuts.getid
88local getchar       = nuts.getchar
89local setchar       = nuts.setchar
90local setlink       = nuts.setlink
91local setoffsets    = nuts.setoffsets
92local getwidth      = nuts.getwidth
93local setwidth      = nuts.setwidth
94
95local copy_node     = nuts.copy
96local find_tail     = nuts.tail
97
98local nodepool      = nuts.pool
99local new_kern      = nodepool.fontkern
100local new_glyph     = nodepool.glyph
101
102local glyph_code    <const> = nodes.nodecodes.glyph
103local glue_code     <const> = nodes.nodecodes.glue
104
105local skipped = {
106    -- we assume that only valid features are set but maybe we need a list
107    -- of valid hb features as there can be many context specific ones
108    mode     = true,
109    features = true,
110    language = true,
111    script   = true,
112}
113
114local seenspaces = {
115    [0x0020] = true,
116    [0x00A0] = true,
117    [0x0009] = true, -- indeed
118    [0x000A] = true, -- indeed
119    [0x000D] = true, -- indeed
120}
121
122-- helpers
123
124local helpers     = { }
125local methods     = { }
126local initialized = { } -- we don't polute the shared table
127
128local method      = "internal" -- a bit misleading name: it's the optional module
129local shaper      = "native"   -- "uniscribe"
130local report      = logs.reporter("font plugin","hb")
131
132utilities.hb = {
133    methods = methods,
134    helpers = helpers,
135    report  = report,
136}
137
138do
139
140    local toutf8  = string.toutf8
141    local toutf32 = string.toutf32
142
143    function helpers.packtoutf8(text,leading,trailing)
144        if leading then
145            text[0] = 32
146        end
147        if trailing then
148            text[#text+1] = 32
149        end
150        return toutf8(text)
151    end
152
153    function helpers.packtoutf32(text,leading,trailing)
154        if leading then
155            text[0] = 32
156        end
157        if trailing then
158            text[#text+1] = 32
159        end
160        return toutf32(text)
161    end
162
163end
164
165local function initialize(font)
166
167    local tfmdata      = fontdata[font]
168    local resources    = tfmdata.resources
169    local shared       = tfmdata.shared
170    local filename     = resources.filename
171    local features     = shared.features
172    local descriptions = shared.rawdata.descriptions
173    local characters   = tfmdata.characters
174    local featureset   = { }
175    local copytochar   = shared.copytochar -- indextounicode
176    local spacewidth   = nil -- unscaled
177    local factor       = tfmdata.parameters.factor
178    local marks        = resources.marks or { }
179
180    -- could be shared but why care about a few extra tables
181
182    if not copytochar then
183        copytochar = { }
184        -- let's make sure that we have an indexed table and not a hash
185        local max = 0
186        for k, v in next, descriptions do
187            if v.index > max then
188                max = v.index
189            end
190        end
191        for i=0,max do
192            copytochar[i] = i
193        end
194        -- the normal mapper
195        for k, v in next, descriptions do
196            copytochar[v.index] = k
197        end
198        shared.copytochar = copytochar
199    end
200
201    -- independent from loop as we have unordered hashes
202
203    if descriptions[0x0020] then
204        spacewidth = descriptions[0x0020].width
205    elseif descriptions[0x00A0] then
206        spacewidth = descriptions[0x00A0].width
207    end
208
209    for k, v in sortedhash(features) do
210        if #k > 4 then
211            -- unknown ones are ignored anyway but we can assume that the current
212            -- (and future) extra context features use more verbose names
213        elseif skipped[k] then
214            -- we don't want to pass language and such so we block a few features
215            -- explicitly
216        elseif v == "yes" or v == true then
217            featureset[#featureset+1] = k .. "=1"     -- cf command line (false)
218        elseif v == "no" or v == false then
219            featureset[#featureset+1] = k .. "=0"     -- cf command line (true)
220        elseif type(v) == "number" then
221            featureset[#featureset+1] = k .. "=" .. v -- cf command line (alternate)
222        else
223            -- unset
224        end
225    end
226
227    local data = {
228        language   = features.language, -- do we need to uppercase and padd to 4 ?
229        script     = features.script,   -- do we need to uppercase and padd to 4 ?
230        features   = #featureset > 0 and concat(featureset,",") or "", -- hash
231        featureset = #featureset > 0 and featureset or nil,
232        copytochar = copytochar,
233        spacewidth = spacewidth,
234        filename   = filename,
235        marks      = marks,
236        factor     = factor,
237        characters = characters, -- the loaded font (we use its metrics which is more accurate)
238        method     = features.method or method,
239        shaper     = features.shaper or shaper,
240    }
241    initialized[font] = data
242    return data
243end
244
245-- In many cases this gives compatible output but especially with respect to spacing and user
246-- discretionaries that mix fonts there can be different outcomes. We also have no possibility
247-- to tweak and cheat. Of course one can always run a normal node mode pass with specific
248-- features first but then one can as well do all in node mode. So .. after a bit of playing
249-- around I redid this one from scratch and also added tracing.
250
251local trace_colors  = false  trackers.register("fonts.plugins.hb.colors", function(v) trace_colors  = v end)
252local trace_details = false  trackers.register("fonts.plugins.hb.details",function(v) trace_details = v end)
253local check_id      = false
254
255local setcolor      = nodes.tracers.colors.set
256local resetcolor    = nodes.tracers.colors.reset
257
258table.setmetatableindex(methods,function(t,k)
259    local l = "font-phb-imp-" .. k .. ".lmt"
260    report("start loading method %a from %a",k,l)
261    dofile(resolvers.findfile(l))
262    local v = rawget(t,k)
263    if v then
264        report("loading method %a succeeded",k)
265    else
266        report("loading method %a failed",k)
267        v = function() return { } end
268    end
269    t[k] = v
270    return v
271end)
272
273local inandout  do
274
275    local utfbyte = utf.byte
276    local utfchar = utf.char
277    local utf3208 = utf.utf32_to_utf8_le
278
279    inandout = function(text,result,first,last,copytochar)
280        local s = { }
281        local t = { }
282        local r = { }
283        local f = formatters["%05U"]
284        for i=1,#text do
285            local c = text[i]
286         -- t[#t+1] = f(utfbyte(utf3208(c)))
287            s[#s+1] = utfchar(c)
288            t[#t+1] = f(c)
289        end
290        for i=first,last do
291            r[#r+1] = f(copytochar[result[i][1]])
292        end
293        return s, t, r
294    end
295
296end
297
298local function harfbuzz(head,font,dynamic,rlmode,start,stop,text,leading,trailing)
299    local data = initialized[font]
300
301    if not data then
302        data = initialize(font)
303    end
304
305    if check_id then
306        if getid(start) ~= glyph_code then
307            report("error: start is not a glyph")
308            return head
309        elseif getid(stop) ~= glyph_code then
310            report("error: stop is not a glyph")
311            return head
312        end
313    end
314    local size   = #text -- original text, without spaces
315    local result = methods[data.method](font,data,rlmode,text,leading,trailing)
316    local length = result and #result or 0
317
318    if length == 0 then
319     -- report("warning: no result")
320        return head
321    end
322
323    local factor     = data.factor
324    local marks      = data.marks
325    local spacewidth = data.spacewidth
326    local copytochar = data.copytochar
327    local characters = data.characters
328
329    -- the text analyzer is only partially clever so we must assume that we get
330    -- inconsistent lists
331
332    -- we could check if something has been done (replacement or kern or so) but
333    -- then we pass around more information and need to check a lot and spaces
334    -- are kind of spoiling that game (we need a different table then) .. more
335    -- pain than gain
336
337    -- we could play with 0xFFFE as boundary
338
339    local current  = start
340    local prev     = nil
341    local glyph    = nil
342
343    local first    = 1
344    local last     = length
345    local next     = nil -- todo: keep track of them
346    local prev     = nil -- todo: keep track of them
347
348    if leading then
349        first = first + 1
350    end
351    if trailing then
352        last = last - 1
353    end
354
355    local position = first
356    local cluster  = 0
357    local glyph    = nil
358    local index    = 0
359    local count    = 1
360    local saved    = nil
361
362    if trace_details then
363        report("start run, original size: %i, result index: %i upto %i",size,first,last)
364        local s, t, r = inandout(text,result,first,last,copytochar)
365        report("method : %s",data.method)
366        report("shaper : %s",data.shaper)
367        report("string : %t",s)
368        report("text   : % t",t)
369        report("result : % t",r)
370    end
371
372    -- okay, after some experiments, it became clear that more complex code aimed at
373    -- optimization doesn't pay off as complexity also demands more testing
374
375    for i=first,last do
376        local r = result[i]
377        local unicode = copytochar[r[1]] -- can be private of course
378        --
379        cluster = r[2] + 1 -- starts at zero
380        --
381        if position == cluster then
382            if i == first then
383                index = 1
384                if trace_details then
385                    report("[%i] position: %i, cluster: %i, index: %i, starting",i,position,cluster,index)
386                end
387            else
388                index = index + 1
389                if trace_details then
390                    report("[%i] position: %i, cluster: %i, index: %i, next step",i,position,cluster,index)
391                end
392            end
393        elseif position < cluster then
394            -- a new cluster
395            current  = getnext(current)
396            position = position + 1
397            size     = size - 1
398            for p=position,cluster-1 do
399                head, current = remove_node(head,current,true)
400                if trace_details then
401                    report("[%i] position: %i, cluster: %i, index: -, removing node",i,p,cluster)
402                end
403                size = size - 1
404            end
405            position = cluster
406            index    = 1
407            glyph    = nil
408            if trace_details then
409                report("[%i] position: %i, cluster: %i, index: %i, arriving",i,cluster,position,index)
410            end
411        else -- maybe a space got properties
412            if trace_details then
413                report("position: %i, cluster: %i, index: %i, quitting due to fatal inconsistency",position,cluster,index)
414            end
415            return head
416        end
417        local copied = false
418        if glyph then
419            if trace_details then
420                report("[%i] position: %i, cluster: %i, index: %i, copying glyph, unicode %U",i,position,cluster,index,unicode)
421            end
422            local g = copy_node(glyph)
423            if trace_colors then
424                resetcolor(g)
425            end
426            setlink(current,g,getnext(current))
427            current = g
428            copied  = true
429        else
430            if trace_details then
431                report("[%i] position: %i, cluster: %i, index: %i, using glyph, unicode %U",i,position,cluster,index,unicode)
432            end
433            glyph = current
434        end
435        --
436        if not current then
437            if trace_details then
438                report("quitting due to unexpected end of node list")
439            end
440            return head
441        end
442        --
443        local id = getid(current)
444        if id ~= glyph_code then
445            if trace_details then
446                report("glyph expected in node list")
447            end
448            return head
449        end
450        --
451        -- really, we can get a tab (9), lf (10), or cr(13) back in cambria .. don't ask me why
452        --
453        local prev, next = getboth(current)
454        --
455        -- assign glyph: first in run
456        --
457        setchar(current,unicode)
458        if trace_colors then
459            count = (count == 8) and 1 or count + 1
460            setcolor(current,"trace:"..count)
461        end
462        --
463        local x_offset  = r[3] -- r.dx
464        local y_offset  = r[4] -- r.dy
465        local x_advance = r[5] -- r.ax
466        ----- y_advance = r[6] -- r.ay
467        local left  = 0
468        local right = 0
469        local dx    = 0
470        local dy    = 0
471        if trace_details then
472            if x_offset ~= 0 or y_offset ~= 0 or x_advance ~= 0 then -- or y_advance ~= 0
473                report("[%i] position: %i, cluster: %i, index: %i, old, xoffset: %p, yoffset: %p, xadvance: %p, width: %p",
474                    i,position,cluster,index,x_offset*factor,y_offset*factor,x_advance*factor,characters[unicode].width)
475            end
476        end
477        if y_offset ~= 0 then
478            dy = y_offset * factor
479        end
480        if rlmode >= 0 then
481            -- l2r marks and rest
482            if x_offset ~= 0 then
483                dx = x_offset * factor
484            end
485            local width = characters[unicode].width
486            local delta = x_advance * factor
487            if delta ~= width then
488             -- right = -(delta - width)
489                right = delta - width
490            end
491        elseif marks[unicode] then -- why not just the next loop
492            -- r2l marks
493            if x_offset ~= 0 then
494                dx = -x_offset * factor
495            end
496        else
497            -- r2l rest
498            local width = characters[unicode].width
499            local delta = (x_advance - x_offset) * factor
500            if delta ~= width then
501                left = delta - width
502            end
503            if x_offset ~= 0 then
504                right = x_offset * factor
505            end
506        end
507        if copied or dx ~= 0 or dy ~= 0 then
508            setoffsets(current,dx,dy)
509        end
510        if left ~= 0 then
511            setlink(prev,new_kern(left),current) -- insertbefore
512            if current == head then
513                head = prev
514            end
515        end
516        if right ~= 0 then
517            local kern = new_kern(right)
518            setlink(current,kern,next)
519            current = kern
520        end
521        if trace_details then
522            if dy ~= 0 or dx ~= 0 or left ~= 0 or right ~= 0 then
523                report("[%i] position: %i, cluster: %i, index: %i, new, xoffset: %p, yoffset: %p, left: %p, right: %p",i,position,cluster,index,dx,dy,left,right)
524            end
525        end
526    end
527    --
528    if trace_details then
529        report("[-] position: %i, cluster: %i, index: -, at end",position,cluster)
530    end
531    if size > 1 then
532        current = getnext(current)
533        for i=1,size-1 do
534            if trace_details then
535                report("[-] position: %i + %i, cluster: -, index: -, removing node",position,i)
536            end
537            head, current = remove_node(head,current,true)
538        end
539    end
540    --
541    -- We see all kind of interesting spaces come back (like tabs in cambria) so we do a bit of
542    -- extra testing here.
543    --
544    if leading then
545        local r = result[1]
546        local unicode = copytochar[r[1]]
547        if seenspaces[unicode] then
548            local x_advance = r[5]
549            local delta     = x_advance - spacewidth
550            if delta ~= 0 then
551                -- nothing to do but jump one slot ahead
552                local prev = getprev(start)
553                if getid(prev) == glue_code then
554                    local dx = delta * factor
555                    setwidth(prev,getwidth(prev) + dx)
556                    if trace_details then
557                        report("compensating leading glue by %p due to codepoint %U",dx,unicode)
558                    end
559                else
560                    report("no valid leading glue node")
561                end
562            end
563        end
564    end
565    --
566    if trailing then
567        local r = result[length]
568        local unicode = copytochar[r[1]]
569        if seenspaces[unicode] then
570            local x_advance = r[5]
571            local delta     = x_advance - spacewidth
572            if delta ~= 0 then
573                local next = getnext(stop)
574                if getid(next) == glue_code then
575                    local dx = delta * factor
576                    setwidth(next,getwidth(next) + dx)
577                    if trace_details then
578                        report("compensating trailing glue by %p due to codepoint %U",dx,unicode)
579                    end
580                else
581                    report("no valid trailing glue node")
582                end
583            end
584        end
585    end
586    --
587    if trace_details then
588        report("run done")
589    end
590    return head
591end
592
593otf.registerplugin("harfbuzz",function(head,font,dynamic,direction)
594    return texthandler(head,font,dynamic,direction,harfbuzz)
595end)
596