spac-chr.lmt /size: 12 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['spac-chr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to spac-chr.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10local byte, lower = string.byte, string.lower
11
12-- beware: attribute copying is bugged ... there will be a proper luatex helper
13-- for this
14
15-- to be redone: characters will become tagged spaces instead as then we keep track of
16-- spaceskip etc
17
18-- todo: only setattr when export / use properties
19
20local next = next
21
22local trace_characters = false  trackers.register("typesetters.characters", function(v) trace_characters = v end)
23local trace_nbsp       = false  trackers.register("typesetters.nbsp",       function(v) trace_nbsp       = v end)
24
25local report_characters = logs.reporter("typesetting","characters")
26
27local nodes, node = nodes, node
28
29local nuts               = nodes.nuts
30
31local getid              = nuts.getid
32local getsubtype         = nuts.getsubtype
33local setsubtype         = nuts.setsubtype
34local getboth            = nuts.getboth
35local getnext            = nuts.getnext
36local getprev            = nuts.getprev
37local getattr            = nuts.getattr
38local setattr            = nuts.setattr
39local getlanguage        = nuts.getlanguage
40local setchar            = nuts.setchar
41local setattrlist        = nuts.setattrlist
42local getfont            = nuts.getfont
43local getxscale          = nuts.getxscale
44local isglyph            = nuts.isglyph
45
46local setcolor           = nodes.tracers.colors.set
47
48local insertnodebefore   = nuts.insertbefore
49local insertnodeafter    = nuts.insertafter
50local remove_node        = nuts.remove
51local nextchar           = nuts.traversers.char
52local nextglyph          = nuts.traversers.glyph
53
54local copy_node          = nuts.copy
55
56local nodepool           = nuts.pool
57local new_penalty        = nodepool.penalty
58local new_glue           = nodepool.glue
59local new_kern           = nodepool.kern
60local new_rule           = nodepool.rule
61
62local nodecodes          = nodes.nodecodes
63local gluecodes          = nodes.gluecodes
64
65local glyph_code         = nodecodes.glyph
66local glue_code          = nodecodes.glue
67local spaceskip_code     = gluecodes.spaceskip
68
69local chardata           = characters.data
70local ispunctuation      = characters.is_punctuation
71local canhavespace       = characters.can_have_space
72
73local typesetters        = typesetters
74
75local unicodeblocks      = characters.blocks
76
77local characters         = typesetters.characters or { } -- can be predefined
78typesetters.characters   = characters
79
80local fonthashes         = fonts.hashes
81local fontparameters     = fonthashes.parameters
82local fontcharacters     = fonthashes.characters
83local fontquads          = fonthashes.quads
84
85local setmetatableindex  = table.setmetatableindex
86
87local a_character        = attributes.private("characters")
88local a_alignstate       = attributes.private("alignstate")
89
90local c_zero   = byte('0')
91local c_period = byte('.')
92
93local function inject_quad_space(unicode,head,current,fraction)
94    if fraction ~= 0 then
95        fraction = fraction * fontquads[getfont(current)]
96    end
97    local scale = getxscale(current)
98    local glue  = new_glue(scale * fraction)
99    setattrlist(glue,current)
100    setattrlist(current) -- why reset all
101    setattr(glue,a_character,unicode)
102    return insertnodeafter(head,current,glue)
103end
104
105local function inject_char_space(unicode,head,current,parent)
106    local font  = getfont(current)
107    local char  = fontcharacters[font][parent]
108    local scale = getxscale(current)
109    local glue  = new_glue(scale * (char and char.width or fontparameters[font].space))
110    setattrlist(glue,current)
111    setattrlist(current) -- why reset all
112    setattr(glue,a_character,unicode)
113    return insertnodeafter(head,current,glue)
114end
115
116local function inject_nobreak_space(unicode,head,current,space,spacestretch,spaceshrink)
117    local scale   = getxscale(current)
118    local glue    = new_glue(scale*space,scale*spacestretch,scale*spaceshrink)
119    local penalty = new_penalty(10000)
120    setattrlist(glue,current)
121    setattrlist(penalty,current)
122    setattrlist(current) -- why reset all
123    setattr(glue,a_character,unicode) -- bombs
124    head, current = insertnodeafter(head,current,penalty)
125    if trace_nbsp then
126        local rule    = new_rule(space)
127        local kern    = new_kern(-space)
128        local penalty = new_penalty(10000)
129        setcolor(rule,"orange")
130        head, current = insertnodeafter(head,current,rule)
131        head, current = insertnodeafter(head,current,kern)
132        head, current = insertnodeafter(head,current,penalty)
133    end
134    return insertnodeafter(head,current,glue)
135end
136
137local function nbsp(head,current)
138    local para = fontparameters[getfont(current)]
139    local attr = getattr(current,a_alignstate) or 0
140    if attr >= 1 and attr <= 3 then -- flushright
141        head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0)
142    else
143        head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink)
144    end
145    setsubtype(current,spaceskip_code)
146    return head, current
147end
148
149-- assumes nuts or nodes, depending on callers .. so no tonuts here
150
151function characters.replacenbsp(head,original)
152    local head, current = nbsp(head,original)
153    return remove_node(head,original,true)
154end
155
156function characters.replacenbspaces(head)
157    -- todo: wiping as in characters.handler(head)
158    local wipe = false
159    for current, char, font in nextglyph, head do -- can be anytime so no traversechar
160        if char == 0x00A0 then
161            if wipe then
162                head = remove_node(h,current,true)
163                wipe = false
164            end
165            local h = nbsp(head,current)
166            if h then
167                wipe = current
168            end
169        end
170    end
171    if wipe then
172        head = remove_node(head,current,true)
173    end
174    return head
175end
176
177-- This initialization might move someplace else if we need more of it. The problem is that
178-- this module depends on fonts so we have an order problem.
179
180local nbsphash = { } setmetatableindex(nbsphash,function(t,k)
181    -- this needs checking !
182    for i=unicodeblocks.devanagari.first,unicodeblocks.devanagari.last do nbsphash[i] = true end
183    for i=unicodeblocks.kannada   .first,unicodeblocks.kannada   .last do nbsphash[i] = true end
184    setmetatableindex(nbsphash,nil)
185    return nbsphash[k]
186end)
187
188local function inject_regular_space(head,current,font)
189    local p = fontparameters[font]
190    local s = getxscale(current)
191    return insertnodebefore(head,current,new_glue(
192        s * p.space,
193        s * p.spacestretch,
194        s * p.spaceshrink
195    ))
196end
197
198local methods = {
199
200    -- The next one uses an attribute assigned to the character but still we
201    -- don't have the 'local' value.
202
203    -- maybe also 0x0008 : backspace
204
205    -- Watch out: a return value means "remove"!
206
207    [0x001E] = function(head,current) -- kind of special
208        local next = getnext(current)
209        if next and getid(next) == glue_code and getsubtype(next) == spaceskip_code then
210            -- remove when no valid character following
211            local nextnext = getnext(next)
212            if nextnext then
213                local char, font = isglyph(nextnext)
214                if char and not canhavespace[char] then
215                    remove_node(head,next,true)
216                end
217            end
218        else
219            -- insert when valid character following
220            local char, font = isglyph(next)
221            if char and canhavespace[char] then
222                head, current = inject_regular_space(head,current,font)
223            end
224
225        end
226        return head, current
227    end,
228
229    [0x001F] = function(head,current) -- kind of special
230        local next = getnext(current)
231        if next then
232            local char, font = isglyph(next)
233            if char and not ispunctuation[char] then
234                head, current = inject_regular_space(head,current,font)
235            end
236        end
237        return head, current
238    end,
239
240    [0x00A0] = function(head,current) -- nbsp
241        local prev, next = getboth(current)
242        if next then
243            local char = isglyph(current)
244            if not char then
245                -- move on
246            elseif char == 0x200C or char == 0x200D then -- nzwj zwj
247                next = getnext(next)
248				if next then
249                    char = isglyph(next)
250                    if char and nbsphash[char] then
251                        return false
252                    end
253                end
254            elseif nbsphash[char] then
255                return false
256            end
257        end
258        if prev then
259            local char = isglyph(prev)
260            if char and nbsphash[char] then
261                return false
262            end
263        end
264        return nbsp(head,current)
265    end,
266
267    [0x00AD] = function(head,current) -- softhyphen
268        return insertnodeafter(head,current,languages.explicithyphen(current))
269    end,
270
271    [0x2000] = function(head,current) -- enquad
272        return inject_quad_space(0x2000,head,current,1/2)
273    end,
274
275    [0x2001] = function(head,current) -- emquad
276        return inject_quad_space(0x2001,head,current,1)
277    end,
278
279    [0x2002] = function(head,current) -- enspace
280        return inject_quad_space(0x2002,head,current,1/2)
281    end,
282
283    [0x2003] = function(head,current) -- emspace
284        return inject_quad_space(0x2003,head,current,1)
285    end,
286
287    [0x2004] = function(head,current) -- threeperemspace
288        return inject_quad_space(0x2004,head,current,1/3)
289    end,
290
291    [0x2005] = function(head,current) -- fourperemspace
292        return inject_quad_space(0x2005,head,current,1/4)
293    end,
294
295    [0x2006] = function(head,current) -- sixperemspace
296        return inject_quad_space(0x2006,head,current,1/6)
297    end,
298
299    [0x2007] = function(head,current) -- figurespace
300        return inject_char_space(0x2007,head,current,c_zero)
301    end,
302
303    [0x2008] = function(head,current) -- punctuationspace
304        return inject_char_space(0x2008,head,current,c_period)
305    end,
306
307    [0x2009] = function(head,current) -- breakablethinspace
308        return inject_quad_space(0x2009,head,current,1/8) -- same as next
309    end,
310
311    [0x200A] = function(head,current) -- hairspace
312        return inject_quad_space(0x200A,head,current,1/8) -- same as previous (todo)
313    end,
314
315    [0x200B] = function(head,current) -- zerowidthspace
316        return inject_quad_space(0x200B,head,current,0)
317    end,
318
319    [0x202F] = function(head,current) -- narrownobreakspace
320        return inject_nobreak_space(0x202F,head,current,fontquads[getfont(current)]/8,0,0)
321    end,
322
323    [0x205F] = function(head,current) -- math thinspace
324        return inject_nobreak_space(0x205F,head,current,4*fontquads[getfont(current)]/18,0,0)
325    end,
326
327    -- The next one is also a bom so maybe only when we have glyphs around it
328
329 -- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
330 --     return head, current
331 -- end,
332
333}
334
335characters.methods = methods
336
337function characters.handler(head)
338    local wipe = false
339    for current, char in nextchar, head do
340        local method = methods[char]
341        if method then
342            if wipe then
343                head = remove_node(head,wipe,true)
344                wipe = false
345            end
346            if trace_characters then
347                report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
348            end
349            local h = method(head,current)
350            if h then
351                wipe = current
352            end
353        end
354    end
355    if wipe then
356        head = remove_node(head,wipe,true)
357    end
358    return head
359end
360
361-- function characters.handler(head)
362--     local wiped = false
363--     for current, char in nextchar, head do
364--         local method = methods[char]
365--         if method then
366--             if wiped then
367--                 wiped[#wiped+1] = current
368--             else
369--                 wiped = { current }
370--             end
371--             if trace_characters then
372--                 report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
373--             end
374--             local h = method(head,current)
375--             if h then
376--                 head = h
377--             end
378--         end
379--     end
380--     if wiped then
381--         for i=1,#wiped do
382--             head = remove_node(head,wiped[i],true)
383--         end
384--     end
385--     return head
386-- end
387