font-txt.lmt /size: 22 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['font-txt'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- The next code is derived from a snippet handler prototype by Kai Eigner and
11-- resembles the main loop of the Lua font handler but I decided use a more generic
12-- (and pluggable) approach and not hook it into the already present opentype
13-- handler. This is cleaner as it cannot interfere with the Lua font processor
14-- (which does some more things) and is also better performance wise. It also makes
15-- it possible to support other handlers as history has proven that there are no
16-- universal solution in computer land. Most of the disc logic is kept but done
17-- slightly different.
18--
19-- The code is heavily optimized and generalized so there can be errors. As
20-- mentioned, the plug mode can be used for alternative font handlers. A font is
21-- still loaded but the node and base mode handlers are ignored. Plugins are
22-- unlikely to work well in context as they can mess up attribute driven subsystem,
23-- so they are not officially supported. The language and script options are
24-- available in the usual way.
25--
26-- The code collects snippets, either or not with spacing around them and partially
27-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is
28-- not robust so I got rid of that branch. This is somewhat similar to the Lua font
29-- handler.
30--
31-- An alternative is to run over longer strings with dummy chars (unicode objects) as
32-- markers for whatever is in the list but that becomes tricky with mixed fonts and
33-- reconstruction becomes a bit of a mess then, especially because disc nodes force
34-- us to backtrack and look at several solutions. It also has a larger memory
35-- footprint. Some tests demonstrated that it has no gain and only adds complexity.
36--
37-- This (derived) variant is better suited for context and seems to work ok in the
38-- generic variant. I also added some context specific tracing to the code. This
39-- variant uses the plug model provided in the context font system. So, in context,
40-- using the built in Lua handler is the better alternative, also because it has
41-- extensive tracing features. Context users would loose additional functionality
42-- that has been provided for a decade and therefore plugins are not officially
43-- supported (at least not by me, unless I use them myself).
44--
45-- There is no checking here for already processed characters so best not mix this
46-- variant with code that does similar things. If this code evolves depends on the
47-- useability. Kai's code can now be found on github where it is used with a harfbuzz
48-- library. We add this kind of stuff because occasionally we compare engines and
49-- Kai sends me examples and I then need to check context.
50--
51-- One important difference between Kai's approach and the one used in ConTeXt is
52-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just
53-- indices into the original text that made more sense. The first implementation
54-- used the command line tool (binary), then I went for ffi (library).
55--
56-- Beware: this file only implements the framework for plugins. Plugins themselves
57-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin
58-- because that is after all the reference for opentype support, but that interface
59-- needs a bit more work (so it might never happen).
60--
61-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file
62-- migth be added to the distribution somewhere in the middle of 2017 when the ffi
63-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that
64-- date but we never had a reason for adding it to the ConTeXt distribution. It
65-- should still work okay because I occasionally checked it against progress made in
66-- the engines and used newer helpers.
67--
68-- Here is an example of usage:
69--
70-- \starttext
71--     \definefontfeature[test][mode=plug,features=text]
72--     \start
73--         \showfontkerns
74--         \definedfont[Serif*test]
75--         \input tufte \par
76--     \stop
77-- \stoptext
78
79local fonts            = fonts
80local otf              = fonts.handlers.otf
81local nodes            = nodes
82
83local utfchar          = utf.char
84
85local nuts             = nodes.nuts
86
87local getnext          = nuts.getnext
88local setnext          = nuts.setnext
89local getprev          = nuts.getprev
90local setprev          = nuts.setprev
91local getid            = nuts.getid
92local getfont          = nuts.getfont
93local getchar          = nuts.getchar
94local getdisc          = nuts.getdisc
95local setdisc          = nuts.setdisc
96local getboth          = nuts.getboth
97local getscales        = nuts.getscales
98local setlink          = nuts.setlink
99local getkern          = nuts.getkern
100local getwidth         = nuts.getwidth
101
102local ischar           = nuts.ischar
103local isglyph          = nuts.isglyph
104local usesfont         = nuts.usesfont
105
106local copy_node_list   = nuts.copylist
107local find_node_tail   = nuts.tail
108local flushlist        = nuts.flushlist
109local freenode         = nuts.free
110local endofmath        = nuts.endofmath
111
112local startofpar       = nuts.startofpar
113
114local nodecodes        = nodes.nodecodes
115
116local glyph_code       <const> = nodecodes.glyph
117local glue_code        <const> = nodecodes.glue
118local disc_code        <const> = nodecodes.disc
119local kern_code        <const> = nodecodes.kern
120local math_code        <const> = nodecodes.math
121local dir_code         <const> = nodecodes.dir
122local par_code         <const> = nodecodes.par
123
124local righttoleft_code <const> = tex.directioncodes.righttoleft
125
126local txtdirstate      = otf.helpers.txtdirstate
127local pardirstate      = otf.helpers.pardirstate
128
129local fonthashes       = fonts.hashes
130local fontdata         = fonthashes.identifiers
131
132local function deletedisc(head)
133    local current = head
134    local next    = nil
135    while current do
136        next = getnext(current)
137        if getid(current) == disc_code then
138            local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true)
139            setdisc(current)
140            if pre then
141                flushlist(pre)
142            end
143            if post then
144                flushlist(post)
145            end
146            local p, n = getboth(current)
147            if replace then
148                if current == head then
149                    head = replace
150                    setprev(replace) -- already nil
151                else
152                    setlink(p,replace)
153                end
154                setlink(replace_tail,n) -- was: setlink(n,replace_tail)
155            elseif current == head then
156                head = n
157                setprev(n)
158            else
159                setlink(p,n)
160            end
161            freenode(current)
162        end
163        current = next
164    end
165    return head
166end
167
168-- As we know that we have the same font we can probably optimize this a bit more.
169-- Although we can have more in disc nodes than characters and kerns we only support
170-- those two types.
171
172local function eqnode(n,m) -- no real improvement in speed
173    local n_char = isglyph(n)
174    if n_char then
175        return n_char == ischar(m,getfont(n))
176    elseif n_id == kern_code then
177        return getkern(n) == getkern(m)
178    end
179end
180
181local function equalnode(n,m)
182    if not n then
183        return not m
184    elseif not m then
185        return false
186    end
187    local n_char, n_id = isglyph(n)
188    if n_char then
189        return n_char == ischar(m,n_id) -- n_id == n_font
190    elseif n_id == whatsit_code then
191        return false
192    elseif n_id == glue_code then
193        return true
194    elseif n_id == kern_code then
195        return getkern(n) == getkern(m)
196    elseif n_id == disc_code then
197        local n_pre, n_post, n_replace = getdisc(n)
198        local m_pre, m_post, m_replace = getdisc(m)
199        while n_pre and m_pre do
200            if not eqnode(n_pre,m_pre) then
201                return false
202            end
203            n_pre = getnext(n_pre)
204            m_pre = getnext(m_pre)
205        end
206        if n_pre or m_pre then
207            return false
208        end
209        while n_post and m_post do
210            if not eqnode(n_post,m_post) then
211                return false
212            end
213            n_post = getnext(n_post)
214            m_post = getnext(m_post)
215        end
216        if n_post or m_post then
217            return false
218        end
219        while n_replace and m_replace do
220            if not eqnode(n_replace,m_replace) then
221                return false
222            end
223            n_replace = getnext(n_replace)
224            m_replace = getnext(m_replace)
225        end
226        if n_replace or m_replace then
227            return false
228        end
229        return true
230    end
231    return false
232end
233
234-- The spacing hackery is not nice. The text can get leading and trailing spaces
235-- and even mid spaces while the start and stop nodes not always are glues then
236-- so the plugin really needs to do some testing there. We could pass more context
237-- but it doesn't become much better.
238--
239-- The attribute gets passed for tracing purposes. We could support it (not that
240-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not
241-- compatible anyway. It would also mean more testing. So, don't use this mixed
242-- with node and base mode in context.
243--
244-- We don't distinguish between modes in treatment (so no r2l assumptions) and
245-- no cheats for scripts that might not use discretionaries. Such hacks can work
246-- in predictable cases but in context one can use a mix all kind of things and
247-- users do that. On the other hand, we do support longer glyph runs in both modes
248-- so there we gain a bit.
249
250do
251
252    local currentscale, currentxscale, currentyscale
253
254    local function texthandler(head,font,dynamic,rlmode,handler,startspacing,stopspacing,nesting)
255        if not head then
256            return
257        end
258        if startspacing == nil then
259            startspacing = false
260        end
261        if stopspacing == nil then
262            stopspacing = false
263        end
264
265        if getid(head) == par_code and startofpar(head) then
266            rlmode = pardirstate(head)
267        elseif rlmode == righttoleft_code then
268            rlmode = -1
269        else
270            rlmode = 0
271        end
272
273        local dirstack    = { }
274        local rlparmode   = 0
275        local topstack    = 0
276        local text        = { }
277        local size        = 0
278        local current     = head
279        local start       = nil
280        local stop        = nil
281        local startrlmode = rlmode
282
283        local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8
284            local stop = current or start -- hm, what with outer stop
285            if getid(stop) ~= glyph_code then
286                stop = getprev(stop)
287            end
288            head  = handler(head,font,dynamic,rlmode,start,stop,text,leading,trailing) -- handler can adapt text
289            size  = 0
290            text  = { }
291            start = nil
292        end
293
294        while current do
295            local char, id = ischar(current,font,dynamic,currentscale,currentxscale,currentyscale)
296            if char then
297                if not start then
298                    start = current
299                    startrlmode = rlmode
300                end
301                local char = getchar(current)
302                size = size + 1
303                text[size] = char
304                current = getnext(current)
305            elseif char == false then
306                -- so a mixed font
307                if start and size > 0 then
308                    handle(startspacing,false)
309                end
310                startspacing = false
311                local s, sx, sy = getscales(current)
312                if s ~= currentscale or sx ~= currentxscale or sy ~= currentyscale then
313                    if start and size > 0 then
314                        handle(startspacing,false)
315                    end
316                    startspacing = false
317                    currentscale, currentxscale, currentyscale = s, sx, sy
318                    -- todo: safeguard against a loop
319                else
320                    current = getnext(current)
321                    currentscale, currentxscale, currentyscale = false, false, false
322                end
323            elseif id == glue_code then
324                -- making this branch optional i.e. always use the else doesn't really
325                -- make a difference in performance (in hb) .. tricky anyway as we can
326                local width = getwidth(current)
327                if width > 0 then
328                    if start and size > 0 then
329                        handle(startspacing,true)
330                    end
331                    startspacing = true
332                    stopspacing  = false
333                else
334                    if start and size > 0 then
335                        head = handle(startspacing)
336                    end
337                    startspacing = false
338                    stopspacing  = false
339                end
340                current = getnext(current)
341            elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox
342                -- This looks much like the original code but I don't see a need to optimize
343                -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see
344                -- this branch anyway and if there are, we should just deal with them.
345                --
346                -- There is still some weird code here ... start/stop and such. When I'm in
347                -- the mood (or see a need) I'll rewrite this bit.
348
349                -- bug: disc in last word moves to end (in practice not an issue as one
350                -- doesn't want a break there)
351
352                local pre         = nil
353                local post        = nil
354                local currentnext = getnext(current)
355                local current_pre, current_post, current_replace = getdisc(current)
356                setdisc(current) -- why, we set it later
357                if start then
358                    pre  = copy_node_list(start,current)
359                    stop = getprev(current)
360                    -- why also current and not:
361                 -- pre  = copy_node_list(start,stop)
362                    if start == head then
363                        head = current
364                    end
365                    setlink(getprev(start),current)
366                    setlink(stop,current_pre)
367                    current_pre = start
368                    setprev(current_pre)
369                    start       = nil
370                    stop        = nil
371                    startrlmode = rlmode
372                end
373                while currentnext do
374                    local char, id = ischar(currentnext,font)
375                    if char or id == disc_code then
376                        stop        = currentnext
377                        currentnext = getnext(currentnext)
378                    elseif id == glue_code then
379                        local width = getwidth(currentnext)
380                        if width and width > 0 then
381                            stopspacing = true
382                        else
383                            stopspacing = false
384                        end
385                        break
386                    else
387                        break
388                    end
389                end
390                if stop then
391                    local currentnext = getnext(current)
392                    local stopnext    = getnext(stop)
393                    post = copy_node_list(currentnext,stopnext)
394                    if current_post then
395                        setlink(find_node_tail(current_post),currentnext)
396                    else
397                        setprev(currentnext)
398                        current_post = currentnext
399                    end
400                    setlink(current,stopnext)
401                    setnext(stop)
402                    stop = nil
403                end
404                if pre then
405                    setlink(find_node_tail(pre),current_replace)
406                    current_replace = pre
407                    pre = nil
408                end
409                if post then
410                    if current_replace then
411                        setlink(find_node_tail(current_replace),post)
412                    else
413                        current_replace = post
414                    end
415                    post = nil
416                end
417                size = 0   -- hm, ok, start is also nil now
418                text = { }
419                if current_pre then
420                    current_pre = texthandler(current_pre,font,dynamic,rlmode,handler,startspacing,false,"pre")
421                end
422                if current_post then
423                    current_post = texthandler(current_post,font,dynamic,rlmode,handler,false,stopspacing,"post")
424                end
425                if current_replace then
426                    current_replace = texthandler(current_replace,font,dynamic,rlmode,handler,startspacing,stopspacing,"replace")
427                end
428                startspacing = false
429                stopspacing  = false
430                local cpost       = current_post and find_node_tail(current_post)
431                local creplace    = current_replace and find_node_tail(current_replace)
432                local cpostnew    = nil
433                local creplacenew = nil
434                local newcurrent  = nil
435                while cpost and equalnode(cpost,creplace) do
436                    cpostnew    = cpost
437                    creplacenew = creplace
438                    if creplace then
439                        creplace = getprev(creplace)
440                    end
441                    cpost = getprev(cpost)
442                end
443                if cpostnew then
444                    if cpostnew == current_post then
445                        current_post = nil
446                    else
447                        setnext(getprev(cpostnew))
448                    end
449                    flushlist(cpostnew)
450                    if creplacenew == current_replace then
451                        current_replace = nil
452                    else
453                        setnext(getprev(creplacenew))
454                    end
455                    local c = getnext(current)
456                    setlink(current,creplacenew)
457                    local creplacenewtail = find_node_tail(creplacenew)
458                    setlink(creplacenewtail,c)
459                    newcurrent = creplacenewtail
460                end
461                current_post      = current_post and deletedisc(current_post)
462                current_replace   = current_replace and deletedisc(current_replace)
463                local cpre        = current_pre
464                local creplace    = current_replace
465                local cprenew     = nil
466                local creplacenew = nil
467                while cpre and equalnode(cpre, creplace) do
468                    cprenew = cpre
469                    creplacenew = creplace
470                    if creplace then
471                        creplace = getnext(creplace)
472                    end
473                    cpre = getnext(cpre)
474                end
475                if cprenew then
476                    cpre = current_pre
477                    current_pre = getnext(cprenew)
478                    if current_pre then
479                        setprev(current_pre)
480                    end
481                    setnext(cprenew)
482                    flushlist(cpre)
483                    creplace = current_replace
484                    current_replace = getnext(creplacenew)
485                    if current_replace then
486                        setprev(current_replace)
487                    end
488                    setlink(getprev(current),creplace)
489                    if current == head then
490                        head = creplace
491                    end
492                    setlink(creplacenew,current)
493                end
494                setdisc(current,current_pre,current_post,current_replace)
495                current = currentnext
496            else
497                if start and size > 0 then
498                    handle(startspacing,stopspacing)
499                end
500                startspacing = false
501                stopspacing  = false
502                if id == math_code then
503                    current = getnext(endofmath(current))
504                elseif id == dir_code then
505                    startspacing = false
506                    topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode)
507                    current = getnext(current)
508             -- elseif id == par_code and startofpar(current) then
509             --     startspacing = false
510             --     rlparmode, rlmode = pardirstate(current)
511             --     current = getnext(current)
512                else
513                    current = getnext(current)
514                end
515            end
516        end
517        if start and size > 0 then
518            handle(startspacing,stopspacing)
519        end
520        return head, true
521    end
522
523    function fonts.handlers.otf.texthandler(head,font,dynamic,direction,action)
524        currentscale  = false
525        currentxscale = false
526        currentyscale = false
527        if action then
528            return texthandler(head,font,dynamic,direction == righttoleft_code and -1 or 0,action)
529        else
530            return head, false
531        end
532    end
533
534    -- Next comes a tracer plug into context.
535
536    ----- texthandler = fonts.handlers.otf.texthandler
537    local report_text = logs.reporter("otf plugin","text")
538    local nofruns     = 0
539    local nofsnippets = 0
540    local f_unicode   = string.formatters["%U"]
541
542    local function showtext(head,font,dynamic,rlmode,start,stop,list,before,after)
543        if list then
544            nofsnippets = nofsnippets + 1
545            local plus = { }
546            for i=1,#list do
547                local u = list[i]
548                list[i] = utfchar(u)
549                plus[i] = f_unicode(u)
550            end
551            report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus)
552        else
553            report_text()
554            report_text("invalid list")
555            report_text()
556        end
557        return head, false
558    end
559
560    fonts.handlers.otf.registerplugin("text",function(head,font,dynamic,direction)
561        nofruns     = nofruns + 1
562        nofsnippets = 0
563        report_text("start run %i",nofruns)
564        local h, d = texthandler(head,font,dynamic,direction,showtext)
565        report_text("stop run %i",nofruns)
566        return h, d
567    end)
568
569end
570