spac-chr.lua /size: 11 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['spac-chr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to spac-chr.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10local byte, lower = string.byte, string.lower
11
12-- beware: attribute copying is bugged ... there will be a proper luatex helper
13-- for this
14
15-- to be redone: characters will become tagged spaces instead as then we keep track of
16-- spaceskip etc
17
18-- todo: only setattr when export / use properties
19
20local next = next
21
22local trace_characters = false  trackers.register("typesetters.characters", function(v) trace_characters = v end)
23local trace_nbsp       = false  trackers.register("typesetters.nbsp",       function(v) trace_nbsp       = v end)
24
25local report_characters = logs.reporter("typesetting","characters")
26
27local nodes, node = nodes, node
28
29local nuts               = nodes.nuts
30
31local getid              = nuts.getid
32local getboth            = nuts.getboth
33local getsubtype         = nuts.getsubtype
34local setsubtype         = nuts.setsubtype
35local getnext            = nuts.getnext
36local getprev            = nuts.getprev
37local getattr            = nuts.getattr
38local setattr            = nuts.setattr
39local getlanguage        = nuts.getlanguage
40local setchar            = nuts.setchar
41local setattrlist        = nuts.setattrlist
42local getfont            = nuts.getfont
43local isglyph            = nuts.isglyph
44
45local setcolor           = nodes.tracers.colors.set
46
47local insertnodebefore   = nuts.insertbefore
48local insertnodeafter    = nuts.insertafter
49local remove_node        = nuts.remove
50local nextchar           = nuts.traversers.char
51local nextglyph          = nuts.traversers.glyph
52
53local copy_node          = nuts.copy
54
55local nodepool           = nuts.pool
56local new_penalty        = nodepool.penalty
57local new_glue           = nodepool.glue
58local new_kern           = nodepool.kern
59local new_rule           = nodepool.rule
60
61local nodecodes          = nodes.nodecodes
62local gluecodes          = nodes.gluecodes
63
64local glyph_code         = nodecodes.glyph
65local glue_code          = nodecodes.glue
66local spaceskip_code     = gluecodes.spaceskip
67
68local chardata           = characters.data
69local ispunctuation      = characters.is_punctuation
70local canhavespace       = characters.can_have_space
71
72local typesetters        = typesetters
73
74local unicodeblocks      = characters.blocks
75
76local characters         = typesetters.characters or { } -- can be predefined
77typesetters.characters   = characters
78
79local fonthashes         = fonts.hashes
80local fontparameters     = fonthashes.parameters
81local fontcharacters     = fonthashes.characters
82local fontquads          = fonthashes.quads
83
84local setmetatableindex  = table.setmetatableindex
85
86local a_character        = attributes.private("characters") -- this will become a property (or maybe even a field)
87local a_alignstate       = attributes.private("alignstate")
88
89local c_zero   = byte('0')
90local c_period = byte('.')
91
92local function inject_quad_space(unicode,head,current,fraction)
93    if fraction ~= 0 then
94        fraction = fraction * fontquads[getfont(current)]
95    end
96    local glue = new_glue(fraction)
97    setattrlist(glue,current)
98    setattrlist(current) -- why reset all
99    setattr(glue,a_character,unicode)
100    return insertnodeafter(head,current,glue)
101end
102
103local function inject_char_space(unicode,head,current,parent)
104    local font = getfont(current)
105    local char = fontcharacters[font][parent]
106    local glue = new_glue(char and char.width or fontparameters[font].space)
107    setattrlist(glue,current)
108    setattrlist(current) -- why reset all
109    setattr(glue,a_character,unicode)
110    return insertnodeafter(head,current,glue)
111end
112
113local function inject_nobreak_space(unicode,head,current,space,spacestretch,spaceshrink)
114    local glue    = new_glue(space,spacestretch,spaceshrink)
115    local penalty = new_penalty(10000)
116    setattrlist(glue,current)
117    setattrlist(current) -- why reset all
118    setattr(glue,a_character,unicode) -- bombs
119    head, current = insertnodeafter(head,current,penalty)
120    if trace_nbsp then
121        local rule    = new_rule(space)
122        local kern    = new_kern(-space)
123        local penalty = new_penalty(10000)
124        setcolor(rule,"orange")
125        head, current = insertnodeafter(head,current,rule)
126        head, current = insertnodeafter(head,current,kern)
127        head, current = insertnodeafter(head,current,penalty)
128    end
129    return insertnodeafter(head,current,glue)
130end
131
132local function nbsp(head,current)
133    local para = fontparameters[getfont(current)]
134    local attr = getattr(current,a_alignstate) or 0
135    if attr >= 1 and attr <= 3 then -- flushright
136        head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0)
137    else
138        head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink)
139    end
140    setsubtype(current,spaceskip_code)
141    return head, current
142end
143
144-- assumes nuts or nodes, depending on callers .. so no tonuts here
145
146function characters.replacenbsp(head,original)
147    local head, current = nbsp(head,original)
148    return remove_node(head,original,true)
149end
150
151function characters.replacenbspaces(head)
152    local wipe = false
153    for current, char, font in nextglyph, head do -- can be anytime so no traversechar
154        if char == 0x00A0 then
155            if wipe then
156                head = remove_node(h,current,true)
157                wipe = false
158            end
159            local h = nbsp(head,current)
160            if h then
161                wipe = current
162            end
163        end
164    end
165    if wipe then
166        head = remove_node(head,current,true)
167    end
168    return head
169end
170
171-- This initialization might move someplace else if we need more of it. The problem is that
172-- this module depends on fonts so we have an order problem.
173
174local nbsphash = { } setmetatableindex(nbsphash,function(t,k)
175    -- this needs checking !
176    for i=unicodeblocks.devanagari.first,unicodeblocks.devanagari.last do nbsphash[i] = true end
177    for i=unicodeblocks.kannada   .first,unicodeblocks.kannada   .last do nbsphash[i] = true end
178    setmetatableindex(nbsphash,nil)
179    return nbsphash[k]
180end)
181
182local methods = {
183
184    -- The next one uses an attribute assigned to the character but still we
185    -- don't have the 'local' value.
186
187    -- maybe also 0x0008 : backspace
188
189    -- Watch out: a return value means "remove"!
190
191    [0x001E] = function(head,current) -- kind of special
192        local next = getnext(current)
193        if next and getid(next) == glue_code and getsubtype(next) == spaceskip_code then
194            local nextnext = getnext(next)
195            if nextnext then
196                local char, font = isglyph(nextnext)
197                if char and not canhavespace[char] then
198                    head, current = remove_node(head,next,true)
199                end
200            end
201        end
202        return head, current
203    end,
204
205    [0x001F] = function(head,current) -- kind of special
206        local next = getnext(current)
207        if next then
208            local char, font = isglyph(next)
209            if char then
210                if not ispunctuation[char] then
211                    local p = fontparameters[font]
212                    head, current = insertnodebefore(head,current,new_glue(p.space,p.space_stretch,p.space_shrink))
213                end
214            end
215        end
216        return head, current
217    end,
218
219    [0x00A0] = function(head,current) -- nbsp
220        local prev, next = getboth(current)
221        if next then
222            local char = isglyph(current)
223            if not char then
224                -- move on
225            elseif char == 0x200C or char == 0x200D then -- nzwj zwj
226                next = getnext(next)
227				if next then
228                    char = isglyph(next)
229                    if char and nbsphash[char] then
230                        return false
231                    end
232                end
233            elseif nbsphash[char] then
234                return false
235            end
236        end
237        if prev then
238            local char = isglyph(prev)
239            if char and nbsphash[char] then
240                return false
241            end
242        end
243        return nbsp(head,current)
244    end,
245
246    [0x00AD] = function(head,current) -- softhyphen
247        return insertnodeafter(head,current,languages.explicithyphen(current))
248    end,
249
250    [0x2000] = function(head,current) -- enquad
251        return inject_quad_space(0x2000,head,current,1/2)
252    end,
253
254    [0x2001] = function(head,current) -- emquad
255        return inject_quad_space(0x2001,head,current,1)
256    end,
257
258    [0x2002] = function(head,current) -- enspace
259        return inject_quad_space(0x2002,head,current,1/2)
260    end,
261
262    [0x2003] = function(head,current) -- emspace
263        return inject_quad_space(0x2003,head,current,1)
264    end,
265
266    [0x2004] = function(head,current) -- threeperemspace
267        return inject_quad_space(0x2004,head,current,1/3)
268    end,
269
270    [0x2005] = function(head,current) -- fourperemspace
271        return inject_quad_space(0x2005,head,current,1/4)
272    end,
273
274    [0x2006] = function(head,current) -- sixperemspace
275        return inject_quad_space(0x2006,head,current,1/6)
276    end,
277
278    [0x2007] = function(head,current) -- figurespace
279        return inject_char_space(0x2007,head,current,c_zero)
280    end,
281
282    [0x2008] = function(head,current) -- punctuationspace
283        return inject_char_space(0x2008,head,current,c_period)
284    end,
285
286    [0x2009] = function(head,current) -- breakablethinspace
287        return inject_quad_space(0x2009,head,current,1/8) -- same as next
288    end,
289
290    [0x200A] = function(head,current) -- hairspace
291        return inject_quad_space(0x200A,head,current,1/8) -- same as previous (todo)
292    end,
293
294    [0x200B] = function(head,current) -- zerowidthspace
295        return inject_quad_space(0x200B,head,current,0)
296    end,
297
298    [0x202F] = function(head,current) -- narrownobreakspace
299        return inject_nobreak_space(0x202F,head,current,fontquads[getfont(current)]/8)
300    end,
301
302    [0x205F] = function(head,current) -- math thinspace
303        return inject_nobreak_space(0x205F,head,current,4*fontquads[getfont(current)]/18)
304    end,
305
306    -- The next one is also a bom so maybe only when we have glyphs around it
307
308 -- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
309 --     return head, current
310 -- end,
311
312}
313
314characters.methods = methods
315
316-- this also works ok in math as we run over glyphs and these stay glyphs ... not sure
317-- about scripts and such but that is not important anyway ... some day we can consider
318-- special definitions in math
319
320function characters.handler(head)
321    local wipe = false
322    for current, char in nextchar, head do
323        local method = methods[char]
324        if method then
325            if wipe then
326                head = remove_node(head,wipe,true)
327                wipe = false
328            end
329            if trace_characters then
330                report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
331            end
332            local h = method(head,current)
333            if h then
334                wipe = current
335            end
336        end
337    end
338    if wipe then
339        head = remove_node(head,wipe,true)
340    end
341    return head
342end
343
344-- function characters.handler(head)
345--     local wiped = false
346--     for current, char in nextchar, head do
347--         local method = methods[char]
348--         if method then
349--             if wiped then
350--                 wiped[#wiped+1] = current
351--             else
352--                 wiped = { current }
353--             end
354--             if trace_characters then
355--                 report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
356--             end
357--             local h = method(head,current)
358--             if h then
359--                 head = h
360--             end
361--         end
362--     end
363--     if wiped then
364--         for i=1,#wiped do
365--             head = remove_node(head,wiped[i],true)
366--         end
367--     end
368--     return head
369-- end
370