typo-brk.lmt /size: 16 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['typo-brk'] = {
2    version   = 1.001,
3    comment   = "companion to typo-brk.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- this code dates from the beginning and is kind of experimental; it
10-- will be optimized and improved soon
11
12local next, type, tonumber, tostring = next, type, tonumber, tostring
13local utfbyte, utfchar = utf.byte, utf.char
14local format = string.format
15
16local trace_breakpoints = false  trackers.register("typesetters.breakpoints", function(v) trace_breakpoints = v end)
17
18local report_breakpoints = logs.reporter("typesetting","breakpoints")
19
20local nodes, node = nodes, node
21
22local settings_to_array  = utilities.parsers.settings_to_array
23
24local nuts               = nodes.nuts
25local tonut              = nuts.tonut
26
27local getnext            = nuts.getnext
28local getprev            = nuts.getprev
29local getboth            = nuts.getboth
30local getsubtype         = nuts.getsubtype
31local getfont            = nuts.getfont
32local getid              = nuts.getid
33local getattrlist        = nuts.getattrlist
34local getattr            = nuts.getattr
35local getlanguage        = nuts.getlanguage
36local isglyph            = nuts.isglyph
37
38local setattr            = nuts.setattr
39local setattrlist        = nuts.setattrlist
40local setlink            = nuts.setlink
41local setchar            = nuts.setchar
42local setdisc            = nuts.setdisc
43local setnext            = nuts.setnext
44local setprev            = nuts.setprev
45local setboth            = nuts.setboth
46local setsubtype         = nuts.setsubtype
47
48local copy_node          = nuts.copy
49local copy_node_list     = nuts.copylist
50local flushnode          = nuts.flushnode
51local insertnodebefore   = nuts.insertbefore
52local insertnodeafter    = nuts.insertafter
53local remove_node        = nuts.remove
54local endofmath          = nuts.endofmath
55local findattribute      = nuts.findattribute
56local unsetattributes    = nuts.unsetattributes
57
58local tonodes            = nuts.tonodes
59
60local texsetattribute    = tex.setattribute
61
62local unsetvalue         <const> = attributes.unsetvalue
63
64local nodepool           = nuts.pool
65local enableaction       = nodes.tasks.enableaction
66
67local v_reset            <const> = interfaces.variables.reset
68local v_yes              <const> = interfaces.variables.yes
69
70local implement          = interfaces.implement
71
72local new_penalty        = nodepool.penalty
73local new_glue           = nodepool.glue
74local new_disc           = nodepool.disc
75local new_wordboundary   = nodepool.wordboundary
76
77local nodecodes          = nodes.nodecodes
78local kerncodes          = nodes.kerncodes
79
80local kern_code          <const> = nodecodes.kern
81local math_code          <const> = nodecodes.math
82
83local fontkern_code            <const> = kerncodes.fontkern
84local italiccorrection_code    <const> = kerncodes.italiccorrection
85local leftcorrectionkern_code  <const> = kerncodes.leftcorrectionkern
86local rightcorrectionkern_code <const> = kerncodes.rightcorrectionkern
87
88local is_letter          = characters.is_letter
89
90local typesetters        = typesetters
91
92local breakpoints        = typesetters.breakpoints or { }
93typesetters.breakpoints  = breakpoints
94
95breakpoints.mapping      = breakpoints.mapping or { }
96breakpoints.numbers      = breakpoints.numbers or { }
97
98local methods            = breakpoints.methods or { }
99breakpoints.methods      = methods
100
101local a_breakpoints      <const> = attributes.private("breakpoint")
102
103storage.register("typesetters/breakpoints/mapping", breakpoints.mapping, "typesetters.breakpoints.mapping")
104
105local mapping            = breakpoints.mapping
106local numbers            = breakpoints.mapping
107
108for i=1,#mapping do
109    local m = mapping[i]
110    numbers[m.name] = m
111end
112
113-- this needs a cleanup ... maybe make all of them disc nodes
114
115-- todo: use boundaries
116
117local function insert_break(head,start,stop,before,after,kern)
118    if not kern then
119        local p = new_penalty(before)
120        local g = new_glue()
121        setattrlist(p,start)
122        setattrlist(g,start)
123        insertnodebefore(head,start,p)
124        insertnodebefore(head,start,g)
125    end
126    local p = new_penalty(after)
127    local g = new_glue()
128    setattrlist(p,start)
129    setattrlist(g,start)
130    insertnodeafter(head,stop,g)
131    insertnodeafter(head,stop,p)
132end
133
134methods[1] = function(head,start,stop,settings,kern)
135    local p, n = getboth(stop)
136    if p and n then
137        insert_break(head,start,stop,10000,0,kern)
138    end
139    return head, stop
140end
141
142methods[6] = function(head,start,stop,settings,kern)
143    local p = getprev(start)
144    local n = getnext(stop)
145    if p and n then
146        if kern then
147            insert_break(head,start,stop,10000,0,kern)
148        else
149            -- replace this
150            local l = new_wordboundary()
151            local d = new_disc()
152            local r = new_wordboundary()
153            setattrlist(d,start) -- otherwise basemode is forced and we crash
154            setlink(p,l,d,r,n)
155            if start == stop then
156                setboth(start)
157                setdisc(d,start,nil,copy_node(start))
158            else
159                setprev(start)
160                setnext(stop)
161                setdisc(d,start,nil,copy_node_list(start))
162            end
163            stop = r
164        end
165    end
166    return head, stop
167end
168
169methods[2] = function(head,start) -- ( => (-
170    local p, n = getboth(start)
171    if p and n then
172        local replace
173        head, start, replace = remove_node(head,start)
174        local post   = copy_node(replace)
175        local hyphen = copy_node(post)
176        setchar(hyphen,languages.prehyphenchar(getlanguage(post)))
177        setlink(post,hyphen)
178        head, start = insertnodebefore(head,start,new_disc(nil,post,replace))
179        setattrlist(start,replace)
180        insert_break(head,start,start,10000,10000)
181    end
182    return head, start
183end
184
185methods[3] = function(head,start) -- ) => -)
186    local p, n = getboth(start)
187    if p and n then
188        local replace
189        head, start, replace = remove_node(head,start)
190        local pre    = copy_node(replace)
191        local hyphen = copy_node(pre)
192        setchar(hyphen,languages.prehyphenchar(getlanguage(pre)))
193        setlink(hyphen,pre)
194        head, start = insertnodebefore(head,start,new_disc(hyphen,nil,replace)) -- so not pre !
195        setattrlist(start,tmp)
196        insert_break(head,start,start,10000,10000)
197    end
198    return head, start
199end
200
201methods[4] = function(head,start) -- - => - - -
202    local p, n = getboth(start)
203    if p and n then
204        local tmp
205        head, start, tmp = remove_node(head,start)
206        head, start = insertnodebefore(head,start,new_disc())
207        setattrlist(start,tmp)
208        setdisc(start,copy_node(tmp),copy_node(tmp),tmp)
209        insert_break(head,start,start,10000,10000)
210    end
211    return head, start
212end
213
214methods[5] = function(head,start,stop,settings) -- x => p q r
215    local p, n = getboth(start)
216    if p and n then
217        local tmp
218        head, start, tmp = remove_node(head,start)
219        head, start  = insertnodebefore(head,start,new_disc())
220        local attr   = getattrlist(tmp)
221        local font   = getfont(tmp)
222        local left   = settings.left
223        local right  = settings.right
224        local middle = settings.middle
225        if left then
226             left = tonodes(tostring(left),font,attr)
227        end
228        if right then
229             right = tonodes(tostring(right),font,attr)
230        end
231        if middle then
232            middle = tonodes(tostring(middle),font,attr)
233        end
234        setdisc(start,left,right,middle)
235        setattrlist(start,attr)
236        flushnode(tmp)
237        insert_break(head,start,start,10000,10000)
238    end
239    return head, start
240end
241
242-- we know we have a limited set
243-- what if characters are replaced by the font handler
244-- do we need to go into disc nodes (or do it as first step but then we need a pre/post font handler)
245
246function breakpoints.handler(head)
247    local _, current = findattribute(head, a_breakpoints)
248    if current then
249        local done    = false
250        local attr    = nil
251        local map     = nil
252        local current = head
253        while current do
254            local char, id = isglyph(current)
255            if char then
256                local a = getattr(current,a_breakpoints)
257                if a and a > 0 then
258                    if a ~= attr then
259                        local data = mapping[a]
260                        if data then
261                            map = data.characters
262                        else
263                            map = nil
264                        end
265                        attr = a
266                    end
267                    if map then
268                        local cmap = map[char]
269                        if cmap then
270                            -- for now we collect but when found ok we can move the handler here
271                            -- although it saves nothing in terms of performance
272                            local lang = getlanguage(current)
273                            local smap = lang and lang >= 0 and lang < 0x7FFF and (cmap[languages.numbers[lang]] or cmap[""])
274                            if smap then
275                                local skip  = smap.skip
276                                local start = current
277                                local stop  = current
278                                current = getnext(current)
279                                if skip then
280                                    while current do
281                                        local c = isglyph(current)
282                                        if c == char then
283                                            stop    = current
284                                            current = getnext(current)
285                                        else
286                                            break
287                                        end
288                                    end
289                                end
290                                local d = { start, stop, cmap, smap, char }
291                                if done then
292                                    done[#done+1] = d
293                                else
294                                    done = { d }
295                                end
296                            else
297                                current = getnext(current)
298                            end
299                        else
300                            current = getnext(current)
301                        end
302                    else
303                        current = getnext(current)
304                    end
305                else
306                    current = getnext(current)
307                end
308            elseif id == math_code then
309                attr    = nil
310                current = endofmath(current)
311                if current then
312                    current = getnext(current)
313                end
314            else
315                current = getnext(current)
316            end
317        end
318        if not done then
319            return head
320        end
321        -- we have hits
322     -- local numbers = languages.numbers
323        for i=1,#done do
324            local data  = done[i]
325            local start = data[1]
326            local stop  = data[2]
327            local cmap  = data[3]
328            local smap  = data[4]
329            -- we do a sanity check for language
330         -- local lang  = getlanguage(start)
331         -- local smap = lang and lang >= 0 and lang < 0x7FFF and (cmap[numbers[lang]] or cmap[""])
332         -- if smap then
333                local nleft = smap.nleft
334                local cleft = 0
335                local prev  = getprev(start)
336                local kern  = nil
337                while prev and nleft ~= cleft do
338                    local char, id = isglyph(prev)
339                    if char then
340                        if not is_letter[char] then
341                            cleft = -1
342                            break
343                        end
344                        cleft = cleft + 1
345                        prev  = getprev(prev)
346                    elseif id == kern_code then
347                        local s = getsubtype(prev)
348                        if s == fontkern_code or s == italiccorrection_code or s == rightcorrectionkern_code then
349                            if cleft == 0 then
350                                kern = prev
351                                prev = getprev(prev)
352                            else
353                                break
354                            end
355                        else
356                            break
357                        end
358                    else
359                        break
360                    end
361                end
362                if nleft == cleft then
363                    local nright = smap.nright
364                    local cright = 0
365                    local next   = getnext(stop) -- getnext(start)
366                    while next and nright ~= cright do
367                        local char, id = isglyph(next)
368                        if char then
369                            if not is_letter[char] then
370                                cright = -1
371                                break
372                            end
373                            if cright == 1 and cmap[char] then
374                                -- let's not make it too messy
375                                break
376                            end
377                            cright = cright + 1
378                            next   = getnext(next)
379                        elseif id == kern_code then
380                            local s = getsubtype(next)
381                            if s == fontkern_code or s == italiccorrection_code or s == rightcorrectionkern_code then
382                                if cleft == 0 then
383                                    next = getnext(next)
384                                else
385                                    break
386                                end
387                            else
388                                break
389                            end
390                        else
391                            break
392                        end
393                    end
394                    if nright == cright then
395                        local method = methods[smap.type]
396                        if method then
397                            head, start = method(head,start,stop,smap,kern)
398                        end
399                    end
400             -- end
401            end
402        end
403    end
404    return head
405end
406
407local enabled = false
408
409function breakpoints.define(name)
410    local data = numbers[name]
411    if data then
412        report_breakpoints("there is already a breakpoints class %a",name)
413    else
414        local number = #mapping + 1
415        local data = {
416            name       = name,
417            number     = number,
418            characters = { },
419        }
420        mapping[number] = data
421        numbers[name]   = data
422    end
423end
424
425function breakpoints.setreplacement(name,char,language,settings)
426    char = utfbyte(char)
427    local data = numbers[name]
428    if data then
429        local characters = data.characters
430        local cmap = characters[char]
431        if not cmap then
432            cmap = { }
433            characters[char] = cmap
434        end
435        local left, right, middle = settings.left, settings.right, settings.middle
436        cmap[language or ""] = {
437            type   = tonumber(settings.type)   or 1,
438            nleft  = tonumber(settings.nleft)  or 1,
439            nright = tonumber(settings.nright) or 1,
440            left   = left   ~= "" and left     or nil,
441            right  = right  ~= "" and right    or nil,
442            middle = middle ~= "" and middle   or nil,
443            skip   = settings.range == v_yes,
444        } -- was { type or 1, before or 1, after or 1 }
445    else
446        report_breakpoints("there is no breakpoints class %a",name)
447    end
448end
449
450function breakpoints.set(n)
451    if n == v_reset then
452        n = unsetvalue
453    else
454        n = mapping[n]
455        if not n then
456            n = unsetvalue
457        else
458            if not enabled then
459                if trace_breakpoints then
460                    report_breakpoints("enabling breakpoints handler")
461                end
462                enableaction("processors","typesetters.breakpoints.handler")
463            end
464            n = n.number
465        end
466    end
467    texsetattribute(a_breakpoints,n)
468end
469
470-- interface
471
472implement {
473    name      = "definebreakpoints",
474    actions   = breakpoints.define,
475    arguments = "string"
476}
477
478implement {
479    name      = "definebreakpoint",
480    actions   = breakpoints.setreplacement,
481    arguments = {
482        "string",
483        "string",
484        "string",
485        {
486            { "type", "integer" },
487            { "nleft", "integer" },
488            { "nright", "integer" },
489            { "right" },
490            { "left" },
491            { "middle" },
492            { "range" },
493        }
494    }
495}
496
497implement {
498    name      = "setbreakpoints",
499    actions   = breakpoints.set,
500    arguments = "string"
501}
502