font-txt.lua /size: 21 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['font-txt'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- The next code is derived from a snippet handler prototype by Kai Eigner and
11-- resembles the main loop of the Lua font handler but I decided use a more generic
12-- (and pluggable) approach and not hook it into the already present opentype
13-- handler. This is cleaner as it cannot interfere with the Lua font processor
14-- (which does some more things) and is also better performance wise. It also makes
15-- it possible to support other handlers as history has proven that there are no
16-- universal solution in computer land. Most of the disc logic is kept but done
17-- slightly different.
18--
19-- The code is heavily optimized and generalized so there can be errors. As
20-- mentioned, the plug mode can be used for alternative font handlers. A font is
21-- still loaded but the node and base mode handlers are ignored. Plugins are
22-- unlikely to work well in context as they can mess up attribute driven subsystem,
23-- so they are not officially supported. The language and script options are
24-- available in the usual way.
25--
26-- The code collects snippets, either or not with spacing around them and partially
27-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is
28-- not robust so I got rid of that branch. This is somewhat similar to the Lua font
29-- handler.
30--
31-- An alternative is to run over longer strings with dummy chars (unicode objects) as
32-- markers for whatever is in the list but that becomes tricky with mixed fonts and
33-- reconstruction becomes a bit of a mess then, especially because disc nodes force
34-- us to backtrack and look at several solutions. It also has a larger memory
35-- footprint. Some tests demonstrated that it has no gain and only adds complexity.
36--
37-- This (derived) variant is better suited for context and seems to work ok in the
38-- generic variant. I also added some context specific tracing to the code. This
39-- variant uses the plug model provided in the context font system. So, in context,
40-- using the built in Lua handler is the better alternative, also because it has
41-- extensive tracing features. Context users would loose additional functionality
42-- that has been provided for a decade and therefore plugins are not officially
43-- supported (at least not by me, unless I use them myself).
44--
45-- There is no checking here for already processed characters so best not mix this
46-- variant with code that does similar things. If this code evolves depends on the
47-- useability. Kai's code can now be found on github where it is used with a harfbuzz
48-- library. We add this kind of stuff because occasionally we compare engines and
49-- Kai sends me examples and I then need to check context.
50--
51-- One important difference between Kai's approach and the one used in ConTeXt is
52-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just
53-- indices into the original text that made more sense. The first implementation
54-- used the command line tool (binary), then I went for ffi (library).
55--
56-- Beware: this file only implements the framework for plugins. Plugins themselves
57-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin
58-- because that is after all the reference for opentype support, but that interface
59-- needs a bit more work (so it might never happen).
60--
61-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file
62-- migth be added to the distribution somewhere in the middle of 2017 when the ffi
63-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that
64-- date but we never had a reason for adding it to the ConTeXt distribution. It
65-- should still work okay because I occasionally checked it against progress made in
66-- the engines and used newer helpers.
67--
68-- Here is an example of usage:
69--
70-- \starttext
71--     \definefontfeature[test][mode=plug,features=text]
72--     \start
73--         \showfontkerns
74--         \definedfont[Serif*test]
75--         \input tufte \par
76--     \stop
77-- \stoptext
78
79local fonts            = fonts
80local otf              = fonts.handlers.otf
81local nodes            = nodes
82
83local utfchar          = utf.char
84
85local nuts             = nodes.nuts
86
87local getnext          = nuts.getnext
88local setnext          = nuts.setnext
89local getprev          = nuts.getprev
90local setprev          = nuts.setprev
91local getid            = nuts.getid
92local getsubtype       = nuts.getsubtype
93local getfont          = nuts.getfont
94local getchar          = nuts.getchar
95local getdisc          = nuts.getdisc
96local setdisc          = nuts.setdisc
97local getboth          = nuts.getboth
98local setlink          = nuts.setlink
99local getkern          = nuts.getkern
100local getwidth         = nuts.getwidth
101
102local ischar           = nuts.ischar
103local isglyph          = nuts.isglyph
104local usesfont         = nuts.usesfont
105
106local copy_node_list   = nuts.copylist
107local find_node_tail   = nuts.tail
108local flushlist        = nuts.flushlist
109local freenode         = nuts.free
110local endofmath        = nuts.endofmath
111
112local startofpar       = nuts.startofpar
113
114local nodecodes        = nodes.nodecodes
115
116local glyph_code       = nodecodes.glyph
117local glue_code        = nodecodes.glue
118local disc_code        = nodecodes.disc
119local kern_code        = nodecodes.kern
120local math_code        = nodecodes.math
121local dir_code         = nodecodes.dir
122local par_code         = nodecodes.par
123
124local righttoleft_code = nodes.dirvalues.righttoleft
125
126local txtdirstate      = otf.helpers.txtdirstate
127local pardirstate      = otf.helpers.pardirstate
128
129local fonthashes       = fonts.hashes
130local fontdata         = fonthashes.identifiers
131
132local function deletedisc(head)
133    local current = head
134    local next    = nil
135    while current do
136        next = getnext(current)
137        if getid(current) == disc_code then
138            local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true)
139            setdisc(current)
140            if pre then
141                flushlist(pre)
142            end
143            if post then
144                flushlist(post)
145            end
146            local p, n = getboth(current)
147            if replace then
148                if current == head then
149                    head = replace
150                    setprev(replace) -- already nil
151                else
152                    setlink(p,replace)
153                end
154                setlink(replace_tail,n) -- was: setlink(n,replace_tail)
155            elseif current == head then
156                head = n
157                setprev(n)
158            else
159                setlink(p,n)
160            end
161            freenode(current)
162        end
163        current = next
164    end
165    return head
166end
167
168-- As we know that we have the same font we can probably optimize this a bit more.
169-- Although we can have more in disc nodes than characters and kerns we only support
170-- those two types.
171
172local function eqnode(n,m) -- no real improvement in speed
173    local n_char = isglyph(n)
174    if n_char then
175        return n_char == ischar(m,getfont(n))
176    elseif n_id == kern_code then
177        return getkern(n) == getkern(m)
178    end
179end
180
181local function equalnode(n,m)
182    if not n then
183        return not m
184    elseif not m then
185        return false
186    end
187    local n_char, n_id = isglyph(n)
188    if n_char then
189        return n_char == ischar(m,n_id) -- n_id == n_font
190    elseif n_id == whatsit_code then
191        return false
192    elseif n_id == glue_code then
193        return true
194    elseif n_id == kern_code then
195        return getkern(n) == getkern(m)
196    elseif n_id == disc_code then
197        local n_pre, n_post, n_replace = getdisc(n)
198        local m_pre, m_post, m_replace = getdisc(m)
199        while n_pre and m_pre do
200            if not eqnode(n_pre,m_pre) then
201                return false
202            end
203            n_pre = getnext(n_pre)
204            m_pre = getnext(m_pre)
205        end
206        if n_pre or m_pre then
207            return false
208        end
209        while n_post and m_post do
210            if not eqnode(n_post,m_post) then
211                return false
212            end
213            n_post = getnext(n_post)
214            m_post = getnext(m_post)
215        end
216        if n_post or m_post then
217            return false
218        end
219        while n_replace and m_replace do
220            if not eqnode(n_replace,m_replace) then
221                return false
222            end
223            n_replace = getnext(n_replace)
224            m_replace = getnext(m_replace)
225        end
226        if n_replace or m_replace then
227            return false
228        end
229        return true
230    end
231    return false
232end
233
234-- The spacing hackery is not nice. The text can get leading and trailing spaces
235-- and even mid spaces while the start and stop nodes not always are glues then
236-- so the plugin really needs to do some testing there. We could pass more context
237-- but it doesn't become much better.
238--
239-- The attribute gets passed for tracing purposes. We could support it (not that
240-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not
241-- compatible anyway. It would also mean more testing. So, don't use this mixed
242-- with node and base mode in context.
243--
244-- We don't distinguish between modes in treatment (so no r2l assumptions) and
245-- no cheats for scripts that might not use discretionaries. Such hacks can work
246-- in predictable cases but in context one can use a mix all kind of things and
247-- users do that. On the other hand, we do support longer glyph runs in both modes
248-- so there we gain a bit.
249
250do
251
252    local function texthandler(head,font,dynamic,rlmode,handler,startspacing,stopspacing,nesting)
253        if not head then
254            return
255        end
256        if startspacing == nil then
257            startspacing = false
258        end
259        if stopspacing == nil then
260            stopspacing = false
261        end
262
263        if getid(head) == par_code and startofpar(head) then
264            rlmode = pardirstate(head)
265        elseif rlmode == righttoleft_code then
266            rlmode = -1
267        else
268            rlmode = 0
269        end
270
271        local dirstack    = { }
272        local rlparmode   = 0
273        local topstack    = 0
274        local text        = { }
275        local size        = 0
276        local current     = head
277        local start       = nil
278        local stop        = nil
279        local startrlmode = rlmode
280
281        local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8
282            local stop = current or start -- hm, what with outer stop
283            if getid(stop) ~= glyph_code then
284                stop = getprev(stop)
285            end
286            head  = handler(head,font,dynamic,rlmode,start,stop,text,leading,trailing) -- handler can adapt text
287            size  = 0
288            text  = { }
289            start = nil
290        end
291
292        while current do
293            local char, id = ischar(current,font)
294            if char then
295                if not start then
296                    start = current
297                    startrlmode = rlmode
298                end
299                local char = getchar(current)
300                size = size + 1
301                text[size] = char
302                current = getnext(current)
303            elseif char == false then
304                -- so a mixed font
305                if start and size > 0 then
306                    handle(startspacing,false)
307                end
308                startspacing = false
309                current = getnext(current)
310            elseif id == glue_code then
311                -- making this branch optional i.e. always use the else doesn't really
312                -- make a difference in performance (in hb) .. tricky anyway as we can
313                local width = getwidth(current)
314                if width > 0 then
315                    if start and size > 0 then
316                        handle(startspacing,true)
317                    end
318                    startspacing = true
319                    stopspacing  = false
320                else
321                    if start and size > 0 then
322                        head = handle(startspacing)
323                    end
324                    startspacing = false
325                    stopspacing  = false
326                end
327                current = getnext(current)
328            elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox
329                -- This looks much like the original code but I don't see a need to optimize
330                -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see
331                -- this branch anyway and if there are, we should just deal with them.
332                --
333                -- There is still some weird code here ... start/stop and such. When I'm in
334                -- the mood (or see a need) I'll rewrite this bit.
335
336                -- bug: disc in last word moves to end (in practice not an issue as one
337                -- doesn't want a break there)
338
339                local pre         = nil
340                local post        = nil
341                local currentnext = getnext(current)
342                local current_pre, current_post, current_replace = getdisc(current)
343                setdisc(current) -- why, we set it later
344                if start then
345                    pre  = copy_node_list(start,current)
346                    stop = getprev(current)
347                    -- why also current and not:
348                 -- pre  = copy_node_list(start,stop)
349                    if start == head then
350                        head = current
351                    end
352                    setlink(getprev(start),current)
353                    setlink(stop,current_pre)
354                    current_pre = start
355                    setprev(current_pre)
356                    start       = nil
357                    stop        = nil
358                    startrlmode = rlmode
359                end
360                while currentnext do
361                    local char, id = ischar(currentnext,font)
362                    if char or id == disc_code then
363                        stop        = currentnext
364                        currentnext = getnext(currentnext)
365                    elseif id == glue_code then
366                        local width = getwidth(currentnext)
367                        if width and width > 0 then
368                            stopspacing = true
369                        else
370                            stopspacing = false
371                        end
372                        break
373                    else
374                        break
375                    end
376                end
377                if stop then
378                    local currentnext = getnext(current)
379                    local stopnext    = getnext(stop)
380                    post = copy_node_list(currentnext,stopnext)
381                    if current_post then
382                        setlink(find_node_tail(current_post),currentnext)
383                    else
384                        setprev(currentnext)
385                        current_post = currentnext
386                    end
387                    setlink(current,stopnext)
388                    setnext(stop)
389                    stop = nil
390                end
391                if pre then
392                    setlink(find_node_tail(pre),current_replace)
393                    current_replace = pre
394                    pre = nil
395                end
396                if post then
397                    if current_replace then
398                        setlink(find_node_tail(current_replace),post)
399                    else
400                        current_replace = post
401                    end
402                    post = nil
403                end
404                size = 0   -- hm, ok, start is also nil now
405                text = { }
406                if current_pre then
407                    current_pre = texthandler(current_pre,font,dynamic,rlmode,handler,startspacing,false,"pre")
408                end
409                if current_post then
410                    current_post = texthandler(current_post,font,dynamic,rlmode,handler,false,stopspacing,"post")
411                end
412                if current_replace then
413                    current_replace = texthandler(current_replace,font,dynamic,rlmode,handler,startspacing,stopspacing,"replace")
414                end
415                startspacing = false
416                stopspacing  = false
417                local cpost       = current_post and find_node_tail(current_post)
418                local creplace    = current_replace and find_node_tail(current_replace)
419                local cpostnew    = nil
420                local creplacenew = nil
421                local newcurrent  = nil
422                while cpost and equalnode(cpost,creplace) do
423                    cpostnew    = cpost
424                    creplacenew = creplace
425                    if creplace then
426                        creplace = getprev(creplace)
427                    end
428                    cpost = getprev(cpost)
429                end
430                if cpostnew then
431                    if cpostnew == current_post then
432                        current_post = nil
433                    else
434                        setnext(getprev(cpostnew))
435                    end
436                    flushlist(cpostnew)
437                    if creplacenew == current_replace then
438                        current_replace = nil
439                    else
440                        setnext(getprev(creplacenew))
441                    end
442                    local c = getnext(current)
443                    setlink(current,creplacenew)
444                    local creplacenewtail = find_node_tail(creplacenew)
445                    setlink(creplacenewtail,c)
446                    newcurrent = creplacenewtail
447                end
448                current_post      = current_post and deletedisc(current_post)
449                current_replace   = current_replace and deletedisc(current_replace)
450                local cpre        = current_pre
451                local creplace    = current_replace
452                local cprenew     = nil
453                local creplacenew = nil
454                while cpre and equalnode(cpre, creplace) do
455                    cprenew = cpre
456                    creplacenew = creplace
457                    if creplace then
458                        creplace = getnext(creplace)
459                    end
460                    cpre = getnext(cpre)
461                end
462                if cprenew then
463                    cpre = current_pre
464                    current_pre = getnext(cprenew)
465                    if current_pre then
466                        setprev(current_pre)
467                    end
468                    setnext(cprenew)
469                    flushlist(cpre)
470                    creplace = current_replace
471                    current_replace = getnext(creplacenew)
472                    if current_replace then
473                        setprev(current_replace)
474                    end
475                    setlink(getprev(current),creplace)
476                    if current == head then
477                        head = creplace
478                    end
479                    setlink(creplacenew,current)
480                end
481                setdisc(current,current_pre,current_post,current_replace)
482                current = currentnext
483            else
484                if start and size > 0 then
485                    handle(startspacing,stopspacing)
486                end
487                startspacing = false
488                stopspacing  = false
489                if id == math_code then
490                    current = getnext(endofmath(current))
491                elseif id == dir_code then
492                    startspacing = false
493                    topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode)
494                    current = getnext(current)
495             -- elseif id == par_code and startofpar(current) then
496             --     startspacing = false
497             --     rlparmode, rlmode = pardirstate(current)
498             --     current = getnext(current)
499                else
500                    current = getnext(current)
501                end
502            end
503        end
504        if start and size > 0 then
505            handle(startspacing,stopspacing)
506        end
507        return head, true
508    end
509
510    function fonts.handlers.otf.texthandler(head,font,dynamic,direction,action)
511        if action then
512            return texthandler(head,font,dynamic,direction == righttoleft_code and -1 or 0,action)
513        else
514            return head, false
515        end
516    end
517
518    -- Next comes a tracer plug into context.
519
520    ----- texthandler = fonts.handlers.otf.texthandler
521    local report_text = logs.reporter("otf plugin","text")
522    local nofruns     = 0
523    local nofsnippets = 0
524    local f_unicode   = string.formatters["%U"]
525
526    local function showtext(head,font,dynamic,rlmode,start,stop,list,before,after)
527        if list then
528            nofsnippets = nofsnippets + 1
529            local plus = { }
530            for i=1,#list do
531                local u = list[i]
532                list[i] = utfchar(u)
533                plus[i] = f_unicode(u)
534            end
535            report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus)
536        else
537            report_text()
538            report_text("invalid list")
539            report_text()
540        end
541        return head, false
542    end
543
544    fonts.handlers.otf.registerplugin("text",function(head,font,dynamic,direction)
545        nofruns     = nofruns + 1
546        nofsnippets = 0
547        report_text("start run %i",nofruns)
548        local h, d = texthandler(head,font,dynamic,direction,showtext)
549        report_text("stop run %i",nofruns)
550        return h, d
551    end)
552
553end
554