font-txt.lmt /size: 22 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['font-txt'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- The next code is derived from a snippet handler prototype by Kai Eigner and
11-- resembles the main loop of the Lua font handler but I decided use a more generic
12-- (and pluggable) approach and not hook it into the already present opentype
13-- handler. This is cleaner as it cannot interfere with the Lua font processor
14-- (which does some more things) and is also better performance wise. It also makes
15-- it possible to support other handlers as history has proven that there are no
16-- universal solution in computer land. Most of the disc logic is kept but done
17-- slightly different.
18--
19-- The code is heavily optimized and generalized so there can be errors. As
20-- mentioned, the plug mode can be used for alternative font handlers. A font is
21-- still loaded but the node and base mode handlers are ignored. Plugins are
22-- unlikely to work well in context as they can mess up attribute driven subsystem,
23-- so they are not officially supported. The language and script options are
24-- available in the usual way.
25--
26-- The code collects snippets, either or not with spacing around them and partially
27-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is
28-- not robust so I got rid of that branch. This is somewhat similar to the Lua font
29-- handler.
30--
31-- An alternative is to run over longer strings with dummy chars (unicode objects) as
32-- markers for whatever is in the list but that becomes tricky with mixed fonts and
33-- reconstruction becomes a bit of a mess then, especially because disc nodes force
34-- us to backtrack and look at several solutions. It also has a larger memory
35-- footprint. Some tests demonstrated that it has no gain and only adds complexity.
36--
37-- This (derived) variant is better suited for context and seems to work ok in the
38-- generic variant. I also added some context specific tracing to the code. This
39-- variant uses the plug model provided in the context font system. So, in context,
40-- using the built in Lua handler is the better alternative, also because it has
41-- extensive tracing features. Context users would loose additional functionality
42-- that has been provided for a decade and therefore plugins are not officially
43-- supported (at least not by me, unless I use them myself).
44--
45-- There is no checking here for already processed characters so best not mix this
46-- variant with code that does similar things. If this code evolves depends on the
47-- useability. Kai's code can now be found on github where it is used with a harfbuzz
48-- library. We add this kind of stuff because occasionally we compare engines and
49-- Kai sends me examples and I then need to check context.
50--
51-- One important difference between Kai's approach and the one used in ConTeXt is
52-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just
53-- indices into the original text that made more sense. The first implementation
54-- used the command line tool (binary), then I went for ffi (library).
55--
56-- Beware: this file only implements the framework for plugins. Plugins themselves
57-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin
58-- because that is after all the reference for opentype support, but that interface
59-- needs a bit more work (so it might never happen).
60--
61-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file
62-- migth be added to the distribution somewhere in the middle of 2017 when the ffi
63-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that
64-- date but we never had a reason for adding it to the ConTeXt distribution. It
65-- should still work okay because I occasionally checked it against progress made in
66-- the engines and used newer helpers.
67--
68-- Here is an example of usage:
69--
70-- \starttext
71--     \definefontfeature[test][mode=plug,features=text]
72--     \start
73--         \showfontkerns
74--         \definedfont[Serif*test]
75--         \input tufte \par
76--     \stop
77-- \stoptext
78
79local fonts            = fonts
80local otf              = fonts.handlers.otf
81local nodes            = nodes
82
83local utfchar          = utf.char
84
85local nuts             = nodes.nuts
86
87local getnext          = nuts.getnext
88local setnext          = nuts.setnext
89local getprev          = nuts.getprev
90local setprev          = nuts.setprev
91local getid            = nuts.getid
92local getsubtype       = nuts.getsubtype
93local getfont          = nuts.getfont
94local getchar          = nuts.getchar
95local getdisc          = nuts.getdisc
96local setdisc          = nuts.setdisc
97local getboth          = nuts.getboth
98local getscales        = nuts.getscales
99local setlink          = nuts.setlink
100local getkern          = nuts.getkern
101local getwidth         = nuts.getwidth
102
103local ischar           = nuts.ischar
104local isglyph          = nuts.isglyph
105local usesfont         = nuts.usesfont
106
107local copy_node_list   = nuts.copylist
108local find_node_tail   = nuts.tail
109local flushlist        = nuts.flushlist
110local freenode         = nuts.free
111local endofmath        = nuts.endofmath
112
113local startofpar       = nuts.startofpar
114
115local nodecodes        = nodes.nodecodes
116
117local glyph_code       = nodecodes.glyph
118local glue_code        = nodecodes.glue
119local disc_code        = nodecodes.disc
120local kern_code        = nodecodes.kern
121local math_code        = nodecodes.math
122local dir_code         = nodecodes.dir
123local par_code         = nodecodes.par
124
125local righttoleft_code = tex.directioncodes.righttoleft
126
127local txtdirstate      = otf.helpers.txtdirstate
128local pardirstate      = otf.helpers.pardirstate
129
130local fonthashes       = fonts.hashes
131local fontdata         = fonthashes.identifiers
132
133local function deletedisc(head)
134    local current = head
135    local next    = nil
136    while current do
137        next = getnext(current)
138        if getid(current) == disc_code then
139            local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true)
140            setdisc(current)
141            if pre then
142                flushlist(pre)
143            end
144            if post then
145                flushlist(post)
146            end
147            local p, n = getboth(current)
148            if replace then
149                if current == head then
150                    head = replace
151                    setprev(replace) -- already nil
152                else
153                    setlink(p,replace)
154                end
155                setlink(replace_tail,n) -- was: setlink(n,replace_tail)
156            elseif current == head then
157                head = n
158                setprev(n)
159            else
160                setlink(p,n)
161            end
162            freenode(current)
163        end
164        current = next
165    end
166    return head
167end
168
169-- As we know that we have the same font we can probably optimize this a bit more.
170-- Although we can have more in disc nodes than characters and kerns we only support
171-- those two types.
172
173local function eqnode(n,m) -- no real improvement in speed
174    local n_char = isglyph(n)
175    if n_char then
176        return n_char == ischar(m,getfont(n))
177    elseif n_id == kern_code then
178        return getkern(n) == getkern(m)
179    end
180end
181
182local function equalnode(n,m)
183    if not n then
184        return not m
185    elseif not m then
186        return false
187    end
188    local n_char, n_id = isglyph(n)
189    if n_char then
190        return n_char == ischar(m,n_id) -- n_id == n_font
191    elseif n_id == whatsit_code then
192        return false
193    elseif n_id == glue_code then
194        return true
195    elseif n_id == kern_code then
196        return getkern(n) == getkern(m)
197    elseif n_id == disc_code then
198        local n_pre, n_post, n_replace = getdisc(n)
199        local m_pre, m_post, m_replace = getdisc(m)
200        while n_pre and m_pre do
201            if not eqnode(n_pre,m_pre) then
202                return false
203            end
204            n_pre = getnext(n_pre)
205            m_pre = getnext(m_pre)
206        end
207        if n_pre or m_pre then
208            return false
209        end
210        while n_post and m_post do
211            if not eqnode(n_post,m_post) then
212                return false
213            end
214            n_post = getnext(n_post)
215            m_post = getnext(m_post)
216        end
217        if n_post or m_post then
218            return false
219        end
220        while n_replace and m_replace do
221            if not eqnode(n_replace,m_replace) then
222                return false
223            end
224            n_replace = getnext(n_replace)
225            m_replace = getnext(m_replace)
226        end
227        if n_replace or m_replace then
228            return false
229        end
230        return true
231    end
232    return false
233end
234
235-- The spacing hackery is not nice. The text can get leading and trailing spaces
236-- and even mid spaces while the start and stop nodes not always are glues then
237-- so the plugin really needs to do some testing there. We could pass more context
238-- but it doesn't become much better.
239--
240-- The attribute gets passed for tracing purposes. We could support it (not that
241-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not
242-- compatible anyway. It would also mean more testing. So, don't use this mixed
243-- with node and base mode in context.
244--
245-- We don't distinguish between modes in treatment (so no r2l assumptions) and
246-- no cheats for scripts that might not use discretionaries. Such hacks can work
247-- in predictable cases but in context one can use a mix all kind of things and
248-- users do that. On the other hand, we do support longer glyph runs in both modes
249-- so there we gain a bit.
250
251do
252
253    local currentscale, currentxscale, currentyscale
254
255    local function texthandler(head,font,dynamic,rlmode,handler,startspacing,stopspacing,nesting)
256        if not head then
257            return
258        end
259        if startspacing == nil then
260            startspacing = false
261        end
262        if stopspacing == nil then
263            stopspacing = false
264        end
265
266        if getid(head) == par_code and startofpar(head) then
267            rlmode = pardirstate(head)
268        elseif rlmode == righttoleft_code then
269            rlmode = -1
270        else
271            rlmode = 0
272        end
273
274        local dirstack    = { }
275        local rlparmode   = 0
276        local topstack    = 0
277        local text        = { }
278        local size        = 0
279        local current     = head
280        local start       = nil
281        local stop        = nil
282        local startrlmode = rlmode
283
284        local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8
285            local stop = current or start -- hm, what with outer stop
286            if getid(stop) ~= glyph_code then
287                stop = getprev(stop)
288            end
289            head  = handler(head,font,dynamic,rlmode,start,stop,text,leading,trailing) -- handler can adapt text
290            size  = 0
291            text  = { }
292            start = nil
293        end
294
295        while current do
296            local char, id = ischar(current,font,dynamic,currentscale,currentxscale,currentyscale)
297            if char then
298                if not start then
299                    start = current
300                    startrlmode = rlmode
301                end
302                local char = getchar(current)
303                size = size + 1
304                text[size] = char
305                current = getnext(current)
306            elseif char == false then
307                -- so a mixed font
308                if start and size > 0 then
309                    handle(startspacing,false)
310                end
311                startspacing = false
312                local s, sx, sy = getscales(current)
313                if s ~= currentscale or sx ~= currentxscale or sy ~= currentyscale then
314                    if start and size > 0 then
315                        handle(startspacing,false)
316                    end
317                    startspacing = false
318                    currentscale, currentxscale, currentyscale = s, sx, sy
319                    -- todo: safeguard against a loop
320                else
321                    current = getnext(current)
322                    currentscale, currentxscale, currentyscale = false, false, false
323                end
324            elseif id == glue_code then
325                -- making this branch optional i.e. always use the else doesn't really
326                -- make a difference in performance (in hb) .. tricky anyway as we can
327                local width = getwidth(current)
328                if width > 0 then
329                    if start and size > 0 then
330                        handle(startspacing,true)
331                    end
332                    startspacing = true
333                    stopspacing  = false
334                else
335                    if start and size > 0 then
336                        head = handle(startspacing)
337                    end
338                    startspacing = false
339                    stopspacing  = false
340                end
341                current = getnext(current)
342            elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox
343                -- This looks much like the original code but I don't see a need to optimize
344                -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see
345                -- this branch anyway and if there are, we should just deal with them.
346                --
347                -- There is still some weird code here ... start/stop and such. When I'm in
348                -- the mood (or see a need) I'll rewrite this bit.
349
350                -- bug: disc in last word moves to end (in practice not an issue as one
351                -- doesn't want a break there)
352
353                local pre         = nil
354                local post        = nil
355                local currentnext = getnext(current)
356                local current_pre, current_post, current_replace = getdisc(current)
357                setdisc(current) -- why, we set it later
358                if start then
359                    pre  = copy_node_list(start,current)
360                    stop = getprev(current)
361                    -- why also current and not:
362                 -- pre  = copy_node_list(start,stop)
363                    if start == head then
364                        head = current
365                    end
366                    setlink(getprev(start),current)
367                    setlink(stop,current_pre)
368                    current_pre = start
369                    setprev(current_pre)
370                    start       = nil
371                    stop        = nil
372                    startrlmode = rlmode
373                end
374                while currentnext do
375                    local char, id = ischar(currentnext,font)
376                    if char or id == disc_code then
377                        stop        = currentnext
378                        currentnext = getnext(currentnext)
379                    elseif id == glue_code then
380                        local width = getwidth(currentnext)
381                        if width and width > 0 then
382                            stopspacing = true
383                        else
384                            stopspacing = false
385                        end
386                        break
387                    else
388                        break
389                    end
390                end
391                if stop then
392                    local currentnext = getnext(current)
393                    local stopnext    = getnext(stop)
394                    post = copy_node_list(currentnext,stopnext)
395                    if current_post then
396                        setlink(find_node_tail(current_post),currentnext)
397                    else
398                        setprev(currentnext)
399                        current_post = currentnext
400                    end
401                    setlink(current,stopnext)
402                    setnext(stop)
403                    stop = nil
404                end
405                if pre then
406                    setlink(find_node_tail(pre),current_replace)
407                    current_replace = pre
408                    pre = nil
409                end
410                if post then
411                    if current_replace then
412                        setlink(find_node_tail(current_replace),post)
413                    else
414                        current_replace = post
415                    end
416                    post = nil
417                end
418                size = 0   -- hm, ok, start is also nil now
419                text = { }
420                if current_pre then
421                    current_pre = texthandler(current_pre,font,dynamic,rlmode,handler,startspacing,false,"pre")
422                end
423                if current_post then
424                    current_post = texthandler(current_post,font,dynamic,rlmode,handler,false,stopspacing,"post")
425                end
426                if current_replace then
427                    current_replace = texthandler(current_replace,font,dynamic,rlmode,handler,startspacing,stopspacing,"replace")
428                end
429                startspacing = false
430                stopspacing  = false
431                local cpost       = current_post and find_node_tail(current_post)
432                local creplace    = current_replace and find_node_tail(current_replace)
433                local cpostnew    = nil
434                local creplacenew = nil
435                local newcurrent  = nil
436                while cpost and equalnode(cpost,creplace) do
437                    cpostnew    = cpost
438                    creplacenew = creplace
439                    if creplace then
440                        creplace = getprev(creplace)
441                    end
442                    cpost = getprev(cpost)
443                end
444                if cpostnew then
445                    if cpostnew == current_post then
446                        current_post = nil
447                    else
448                        setnext(getprev(cpostnew))
449                    end
450                    flushlist(cpostnew)
451                    if creplacenew == current_replace then
452                        current_replace = nil
453                    else
454                        setnext(getprev(creplacenew))
455                    end
456                    local c = getnext(current)
457                    setlink(current,creplacenew)
458                    local creplacenewtail = find_node_tail(creplacenew)
459                    setlink(creplacenewtail,c)
460                    newcurrent = creplacenewtail
461                end
462                current_post      = current_post and deletedisc(current_post)
463                current_replace   = current_replace and deletedisc(current_replace)
464                local cpre        = current_pre
465                local creplace    = current_replace
466                local cprenew     = nil
467                local creplacenew = nil
468                while cpre and equalnode(cpre, creplace) do
469                    cprenew = cpre
470                    creplacenew = creplace
471                    if creplace then
472                        creplace = getnext(creplace)
473                    end
474                    cpre = getnext(cpre)
475                end
476                if cprenew then
477                    cpre = current_pre
478                    current_pre = getnext(cprenew)
479                    if current_pre then
480                        setprev(current_pre)
481                    end
482                    setnext(cprenew)
483                    flushlist(cpre)
484                    creplace = current_replace
485                    current_replace = getnext(creplacenew)
486                    if current_replace then
487                        setprev(current_replace)
488                    end
489                    setlink(getprev(current),creplace)
490                    if current == head then
491                        head = creplace
492                    end
493                    setlink(creplacenew,current)
494                end
495                setdisc(current,current_pre,current_post,current_replace)
496                current = currentnext
497            else
498                if start and size > 0 then
499                    handle(startspacing,stopspacing)
500                end
501                startspacing = false
502                stopspacing  = false
503                if id == math_code then
504                    current = getnext(endofmath(current))
505                elseif id == dir_code then
506                    startspacing = false
507                    topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode)
508                    current = getnext(current)
509             -- elseif id == par_code and startofpar(current) then
510             --     startspacing = false
511             --     rlparmode, rlmode = pardirstate(current)
512             --     current = getnext(current)
513                else
514                    current = getnext(current)
515                end
516            end
517        end
518        if start and size > 0 then
519            handle(startspacing,stopspacing)
520        end
521        return head, true
522    end
523
524    function fonts.handlers.otf.texthandler(head,font,dynamic,direction,action)
525        currentscale  = false
526        currentxscale = false
527        currentyscale = false
528        if action then
529            return texthandler(head,font,dynamic,direction == righttoleft_code and -1 or 0,action)
530        else
531            return head, false
532        end
533    end
534
535    -- Next comes a tracer plug into context.
536
537    ----- texthandler = fonts.handlers.otf.texthandler
538    local report_text = logs.reporter("otf plugin","text")
539    local nofruns     = 0
540    local nofsnippets = 0
541    local f_unicode   = string.formatters["%U"]
542
543    local function showtext(head,font,dynamic,rlmode,start,stop,list,before,after)
544        if list then
545            nofsnippets = nofsnippets + 1
546            local plus = { }
547            for i=1,#list do
548                local u = list[i]
549                list[i] = utfchar(u)
550                plus[i] = f_unicode(u)
551            end
552            report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus)
553        else
554            report_text()
555            report_text("invalid list")
556            report_text()
557        end
558        return head, false
559    end
560
561    fonts.handlers.otf.registerplugin("text",function(head,font,dynamic,direction)
562        nofruns     = nofruns + 1
563        nofsnippets = 0
564        report_text("start run %i",nofruns)
565        local h, d = texthandler(head,font,dynamic,direction,showtext)
566        report_text("stop run %i",nofruns)
567        return h, d
568    end)
569
570end
571