spac-twi.lmt /size: 15 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['spac-twi'] = {
2    version   = 1.001,
3    comment   = "companion to spac-brk.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- Here we use two demerit parameters and a bit of tracing. In an article Dedier
10-- Verna (that he send to us) discribes some expefiences with looking at similar
11-- words using an introspective tex-alike framework. We had discussed this before so
12-- below is a follow up. The article suggests to quit at a dicretionary but below we
13-- just handle them. In complex opentype features there can be pre, post and replace
14-- parts that have stretches of words so we have to look at them. Here we just
15-- compare the indices and stay within a font, but one can consider variants where
16-- we look at the unicodes, ignore the fonts, but then we also have the danger that
17-- for instance a small capped word matches a normal capsed one. We also distinguish
18-- between left and right collapsed. It's easy to add a 'length' variant but coming
19-- up with a good criterium is hard.
20
21local concat, reverse = table.concat, table.reverse
22local utfchar = utf.char
23
24local nuts                = nodes.nuts
25local tonut               = nodes.tonut
26
27local getid               = nuts.getid
28local getprev             = nuts.getprev
29local getnext             = nuts.getnext
30local getsubtype          = nuts.getsubtype
31local getdisc             = nuts.getdisc
32local getreplace          = nuts.getreplace
33local getpre              = nuts.getpre
34local getpost             = nuts.getpost
35local isprevglyph         = nuts.isprevglyph
36local isnextglyph         = nuts.isnextglyph
37local getnormalizedline   = nuts.getnormalizedline
38
39local setcolor            = nodes.tracers.colors.set
40
41local nodecodes           = nodes.nodecodes
42
43local glyph_code          <const> = nodecodes.glyph
44local glue_code           <const> = nodecodes.glue
45local disc_code           <const> = nodecodes.disc
46local kern_code           <const> = nodecodes.kern
47local math_code           <const> = nodecodes.math
48
49local fontkern_code       = nodes.kerncodes.fontkern
50
51local breakcodes          = tex.breakcodes
52
53local texgetcount         = tex.getcount
54local texget              = tex.get
55local texgetnest          = tex.getnest
56
57local c_twinslimit        = tex.iscount("twinslimit")
58----- c_righttwindemerits = tex.iscount("righttwindemerits")
59----- c_lefttwindemerits  = tex.iscount("lefttwindemerits")
60
61local serials             = false
62local cached              = false
63local traced              = false
64local nestlevel           = 0
65local righttwindemerits   = 0
66local lefttwindemerits    = 0
67local twinslimit          = 100
68local trace               = false
69
70local report              = logs.reporter("twins")
71
72trackers.register("typesetters.twindemerits", function(v) trace = v end)
73
74local unicodes = fonts.hashes.unicodes
75local chars    = fonts.hashes.characters
76
77local function chr(f,c)
78    local d = chars[f][c]
79    return d and d.unicode or c
80end
81
82-- We really have to compare the raw char fields because different shapes can have
83-- the same unicode. But for tracing we need to remap the privates. We can consider
84-- a practical limit in length, say 16. We can also decide to keep the small tables
85-- and delay the unicoding.
86--
87-- A first variant converted to string which is just as fast but using arrays and
88-- comparing them also is efficient and comes closer to how a built-in variant would
89-- look. It needs a bit more code.
90
91local function getbefore(breakpoint)
92    local last = nil
93    local id   = getid(breakpoint)
94    if id == glue_code then
95        last = getprev(breakpoint)
96    elseif id == disc_code then
97        last = breakpoint
98    end
99    if last then
100        local first   = last
101        local font    = false
102        local current = last
103        local snippet = { }
104        local count   = 0
105        if last == breakpoint then
106            local _, r = getpre(current)
107            while r do
108                local prv, char, id = isprevglyph(r)
109                if char then
110                    count = count + 1
111                    if not font then
112                        font = id
113                    end
114                    snippet[count] = char
115                elseif id == kern_code and getsubtype(r) == fontkern_code then
116                    --
117                else
118                    return
119                end
120                r = prv
121            end
122            current = getprev(current)
123        else
124            local prv, char, id = isprevglyph(last)
125            if char then
126                count   = 1
127                current = prv
128                font    = id
129                snippet[count] = char
130            else
131                return
132            end
133        end
134        if font then
135            while current do
136                local prv, char, id = isprevglyph(current)
137                if char then
138                    if id == font then
139                        count = count + 1
140                        snippet[count] = char
141                    else
142                        return
143                    end
144                elseif id == disc_code then
145                    local _, r = getreplace(current,true) -- we go back from tail
146                    while r do
147                        local prv, char, id = isprevglyph(r)
148                        if char and id == font then
149                            count = count + 1
150                            snippet[count] = char
151                        elseif id == kern_code and getsubtype(r) == fontkern_code then
152                            --
153                        else
154                            return
155                        end
156                        r = prv
157                    end
158                elseif id == kern_code and getsubtype(current) == fontkern_code then
159                    -- continue
160                elseif id == glue_code then -- maybe only spaces
161                    if prv and count > 0 then
162                        local id = getid(prv)
163                        if id == glyph_code or id == disc_code then
164if count > 1 then
165    local u = chr(font,snippet[1])
166    if u and characters.is_punctuation[u] then
167        table.remove(snippet,1)
168        count = count - 1
169    end
170end
171                            if count > twinslimit then
172                                count = twinslimit
173                            end
174                            return font, snippet, count, first, last
175                        end
176                    end
177                    break
178                else
179                    break
180                end
181                first   = current
182                current = prv
183            end
184        end
185    end
186end
187
188local function getafter(breakpoint)
189    local first = nil
190    local id    = getid(breakpoint)
191    if id == glue_code then
192        first = getnext(breakpoint)
193    elseif id == disc_code then
194        first = breakpoint
195    end
196    if first then
197        local last    = first
198        local font    = false
199        local current = first
200        local snippet = { }
201        local snuppet = trace and { }
202        local count   = 0
203        if first == breakpoint then
204            local r = getpost(current)
205            while r do
206                local nxt, char, id = isnextglyph(r)
207                if char and (font == false or font == id) then
208                    count = count + 1
209                    if not font then
210                        font = id
211                    end
212                    snippet[count] = char
213                elseif id == kern_code and getsubtype(r) == fontkern_code then
214                    --
215                else
216                    return
217                end
218                r = nxt
219            end
220            current = getnext(current)
221        else
222            local nxt, char, id = isnextglyph(first)
223            if char then
224                count   = 1
225                current = nxt
226                font    = id
227                snippet[count] = char
228            else
229                return
230            end
231        end
232        if font then
233            while current do
234                local nxt, char, id = isnextglyph(current)
235                if char then
236                    if id == font then
237                        count = count + 1
238                        snippet[count] = char
239                    else
240                        break
241                    end
242                elseif id == disc_code then
243                    local r = getreplace(current)
244                    while r do
245                        local nxt, char, id = isnextglyph(r)
246                        if char then
247                            count = count + 1
248                            snippet[count] = char
249                        elseif id == kern_code and getsubtype(r) == fontkern_code then
250                            --
251                        else
252                            return
253                        end
254                        r = nxt
255                    end
256                elseif id == kern_code and getsubtype(current) == fontkern_code then
257                    --
258                elseif id == glue_code then -- maybe only spaces
259                    if nxt and count > 0 then
260                        local id = getid(nxt)
261                        if id == glyph_code or id == disc_code then
262if count > 1 then
263    local u = chr(font,snippet[count])
264    if u and characters.is_punctuation[u] then
265        snippet[count] = nil
266        count = count - 1
267    end
268end                            if count > twinslimit then
269                                count = twinslimit
270                            end
271                            return font, snippet, count, first, last
272                        end
273                    end
274                    break
275                else
276                    break
277                end
278                last    = current
279                current = nxt
280            end
281        end
282    end
283end
284
285local function showa(what,current,previous,pass,subpass,line,f,s,n,demerits,extra)
286    local u = { }
287    for i=1,n do
288        u[i] = chr(f,s[i]) or s[i]
289    end
290    if what == "before" then
291        reverse(u)
292    end
293    report("pass %i, subpass %i, line %i, current %i, previous %i, %s %a, demerits %i + %i",
294        pass,subpass,line,current,previous,what,utfchar(u),demerits,extra)
295end
296
297local function showb(what,current,previous)
298    if traced[current] == "before" or traced[current] == "after" then
299        traced[current] = "both"
300    else
301        traced[current] = what
302    end
303    if traced[previous] == "before" or traced[previous] == "after" then
304        traced[previous] = "both"
305    else
306        traced[previous] = what
307    end
308end
309
310local function showc(first,last,color,n)
311    if not n then
312        n = 0
313    end
314    while true do
315        local id = getid(first)
316        if id == disc_code then
317            local pre, post, replace, tpre, tpost, treplace = getdisc(first,true)
318            if pre     and n < twinslimit then n = showc(pre,tpre,color,n)         end
319            if post    and n < twinslimit then n = showc(post,tpost,color,n)       end
320            if replace and n < twinslimit then n = showc(replace,treplace,color,n) end
321        elseif id == glyph_code then
322            n = n + 1
323            setcolor(first,color)
324        end
325        if first == last or n >= twinslimit then
326            break
327        else
328            first = getnext(first)
329        end
330    end
331    return n
332end
333
334local function same(a,b,n)
335    for i=1,n do
336        if a[i] ~= b[i] then
337            return false
338        end
339    end
340    return true
341end
342
343local actions = {
344    [breakcodes.initialize] = function()
345        if texgetnest("ptr") == nestlevel then
346            serials = { }
347            cached  = { }
348            traced  = { }
349        end
350    end,
351    [breakcodes.start] = function()
352        if texgetnest("ptr") == nestlevel then
353            serials = { }
354            traced  = { }
355        end
356    end,
357    [breakcodes.report] = function(checks,pass,subpass,currentserial,previousserial,line,kind,class,classes,badness,demerits,breakpoint,short,glue,width)
358        if breakpoint and texgetnest("ptr") == nestlevel then
359            local s_before, s_after, f_before, f_after, n_before, n_after
360            breakpoint = tonut(breakpoint)
361            local current = cached[breakpoint]
362            -- we can be selective, no need for left or right when demerits are zero
363            if current then
364             -- f_before, f_after, s_before, s_after, n_before, n_after = unpack(current)
365                f_before = current[1]
366                f_after  = current[2]
367                s_before = current[3]
368                s_after  = current[4]
369                n_before = current[5]
370                n_after  = current[6]
371            elseif trace then
372                local fb, fa, lb, la
373                f_before, s_before, n_before, fb, lb = getbefore(breakpoint)
374                f_after,  s_after,  n_after,  fa, la = getafter(breakpoint)
375                current = { f_before, f_after, s_before, s_after, n_before, n_after, fb, lb, fa, la }
376                cached[breakpoint] = current
377            else
378                f_before, s_before, n_before = getbefore(breakpoint)
379                f_after,  s_after,  n_after  = getafter(breakpoint)
380                current = { f_before, f_after, s_before, s_after, n_before, n_after }
381                cached[breakpoint] = current
382            end
383            serials[currentserial] = current -- successive passes, so we need to set it again
384            local previous = serials[previousserial]
385            if previous then
386                local extra = 0
387                if righttwindemerits > 0 and f_before and f_before == previous[1] and n_before == previous[5] and same(s_before,previous[3],n_before) then
388                    if trace then
389                        showa("before",currentserial,previousserial,pass,subpass,line,f_before,s_before,n_before,demerits,righttwindemerits)
390                        showb("before",currentserial,previousserial)
391                    end
392                    extra = righttwindemerits
393                end
394                if lefttwindemerits and f_after and f_after == previous[2] and n_after == previous[6] and same(s_after,previous[4],n_after) then
395                    if trace then
396                        showa("after",currentserial,previousserial,pass,subpass,line,f_after,s_after,n_after,demerits,lefttwindemerits)
397                        showb("after",currentserial,previousserial)
398                    end
399                    -- maybe take the max of the two
400                 -- if lefttwindemerits > extra then
401                 --     extra = lefttwindemerits
402                 -- end
403                    extra = extra + lefttwindemerits
404                end
405                if extra > 0 then
406                    return demerits + extra
407                end
408            end
409        end
410        return demerits
411    end,
412    [breakcodes.collect] = function()
413        if texgetnest("ptr") == nestlevel then
414            for k, v in next, traced do
415                local t = serials[k]
416                if v == "before" then
417                    showc(t[7],t[8],"trace:dr")
418                elseif v == "after" then
419                    showc(t[9],t[10],"trace:dg")
420                else
421                    showc(t[7],t[8],"trace:dy")
422                    showc(t[9],t[10],"trace:dy")
423                end
424            end
425        end
426    end,
427    [breakcodes.wrapup] = function()
428        if texgetnest("ptr") == nestlevel then
429            serials = false
430            cached  = false
431            traced  = false
432        end
433    end,
434}
435
436nodes.handlers.linebreakchecks[2] = function(what,checks,...)
437 -- righttwindemerits = texgetcount(c_righttwindemerits)
438 -- lefttwindemerits  = texgetcount(c_lefttwindemerits)
439    righttwindemerits = texget("righttwindemerits")
440    lefttwindemerits  = texget("lefttwindemerits")
441    twinslimit        = texgetcount(c_twinslimit)
442    if righttwindemerits > 0 or lefttwindemerits > 0 and twinslimit > 0 then
443        nestlevel = texgetnest("ptr")
444        local a = actions[what]
445        if a then
446            return a(checks,...)
447        end
448    end
449end
450