spac-chr.lua /size: 10 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['spac-chr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to spac-chr.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10local byte, lower = string.byte, string.lower
11
12-- beware: attribute copying is bugged ... there will be a proper luatex helper
13-- for this
14
15-- to be redone: characters will become tagged spaces instead as then we keep track of
16-- spaceskip etc
17
18-- todo: only setattr when export / use properties
19
20local next = next
21
22local trace_characters = false  trackers.register("typesetters.characters", function(v) trace_characters = v end)
23local trace_nbsp       = false  trackers.register("typesetters.nbsp",       function(v) trace_nbsp       = v end)
24
25local report_characters = logs.reporter("typesetting","characters")
26
27local nodes, node = nodes, node
28
29local nuts               = nodes.nuts
30
31local getboth            = nuts.getboth
32local getnext            = nuts.getnext
33local getprev            = nuts.getprev
34local getattr            = nuts.getattr
35local setattr            = nuts.setattr
36local getlanguage        = nuts.getlanguage
37local setchar            = nuts.setchar
38local setattrlist        = nuts.setattrlist
39local getfont            = nuts.getfont
40local setsubtype         = nuts.setsubtype
41local isglyph            = nuts.isglyph
42
43local setcolor           = nodes.tracers.colors.set
44
45local insertnodebefore   = nuts.insertbefore
46local insertnodeafter    = nuts.insertafter
47local remove_node        = nuts.remove
48local nextchar           = nuts.traversers.char
49local nextglyph          = nuts.traversers.glyph
50
51local copy_node          = nuts.copy
52
53local nodepool           = nuts.pool
54local new_penalty        = nodepool.penalty
55local new_glue           = nodepool.glue
56local new_kern           = nodepool.kern
57local new_rule           = nodepool.rule
58
59local nodecodes          = nodes.nodecodes
60local gluecodes          = nodes.gluecodes
61
62local glyph_code         = nodecodes.glyph
63local spaceskip_code     = gluecodes.spaceskip
64
65local chardata           = characters.data
66local is_punctuation     = characters.is_punctuation
67
68local typesetters        = typesetters
69
70local unicodeblocks      = characters.blocks
71
72local characters         = typesetters.characters or { } -- can be predefined
73typesetters.characters   = characters
74
75local fonthashes         = fonts.hashes
76local fontparameters     = fonthashes.parameters
77local fontcharacters     = fonthashes.characters
78local fontquads          = fonthashes.quads
79
80local setmetatableindex  = table.setmetatableindex
81
82local a_character        = attributes.private("characters") -- this will become a property (or maybe even a field)
83local a_alignstate       = attributes.private("alignstate")
84
85local c_zero   = byte('0')
86local c_period = byte('.')
87
88local function inject_quad_space(unicode,head,current,fraction)
89    if fraction ~= 0 then
90        fraction = fraction * fontquads[getfont(current)]
91    end
92    local glue = new_glue(fraction)
93    setattrlist(glue,current)
94    setattrlist(current) -- why reset all
95    setattr(glue,a_character,unicode)
96    return insertnodeafter(head,current,glue)
97end
98
99local function inject_char_space(unicode,head,current,parent)
100    local font = getfont(current)
101    local char = fontcharacters[font][parent]
102    local glue = new_glue(char and char.width or fontparameters[font].space)
103    setattrlist(glue,current)
104    setattrlist(current) -- why reset all
105    setattr(glue,a_character,unicode)
106    return insertnodeafter(head,current,glue)
107end
108
109local function inject_nobreak_space(unicode,head,current,space,spacestretch,spaceshrink)
110    local glue    = new_glue(space,spacestretch,spaceshrink)
111    local penalty = new_penalty(10000)
112    setattrlist(glue,current)
113    setattrlist(current) -- why reset all
114    setattr(glue,a_character,unicode) -- bombs
115    head, current = insertnodeafter(head,current,penalty)
116    if trace_nbsp then
117        local rule    = new_rule(space)
118        local kern    = new_kern(-space)
119        local penalty = new_penalty(10000)
120        setcolor(rule,"orange")
121        head, current = insertnodeafter(head,current,rule)
122        head, current = insertnodeafter(head,current,kern)
123        head, current = insertnodeafter(head,current,penalty)
124    end
125    return insertnodeafter(head,current,glue)
126end
127
128local function nbsp(head,current)
129    local para = fontparameters[getfont(current)]
130    local attr = getattr(current,a_alignstate) or 0
131    if attr >= 1 and attr <= 3 then -- flushright
132        head, current = inject_nobreak_space(0x00A0,head,current,para.space,0,0)
133    else
134        head, current = inject_nobreak_space(0x00A0,head,current,para.space,para.spacestretch,para.spaceshrink)
135    end
136    setsubtype(current,spaceskip_code)
137    return head, current
138end
139
140-- assumes nuts or nodes, depending on callers .. so no tonuts here
141
142function characters.replacenbsp(head,original)
143    local head, current = nbsp(head,original)
144    return remove_node(head,original,true)
145end
146
147function characters.replacenbspaces(head)
148    local wipe = false
149    for current, char, font in nextglyph, head do -- can be anytime so no traversechar
150        if char == 0x00A0 then
151            if wipe then
152                head = remove_node(h,current,true)
153                wipe = false
154            end
155            local h = nbsp(head,current)
156            if h then
157                wipe = current
158            end
159        end
160    end
161    if wipe then
162        head = remove_node(head,current,true)
163    end
164    return head
165end
166
167-- This initialization might move someplace else if we need more of it. The problem is that
168-- this module depends on fonts so we have an order problem.
169
170local nbsphash = { } setmetatableindex(nbsphash,function(t,k)
171    -- this needs checking !
172    for i=unicodeblocks.devanagari.first,unicodeblocks.devanagari.last do nbsphash[i] = true end
173    for i=unicodeblocks.kannada   .first,unicodeblocks.kannada   .last do nbsphash[i] = true end
174    setmetatableindex(nbsphash,nil)
175    return nbsphash[k]
176end)
177
178local methods = {
179
180    -- The next one uses an attribute assigned to the character but still we
181    -- don't have the 'local' value.
182
183    -- maybe also 0x0008 : backspace
184
185    [0x001F] = function(head,current) -- kind of special
186        local next = getnext(current)
187        if next then
188            local char, font = isglyph(next)
189            if char then
190                head, current = remove_node(head,current,true)
191                if not is_punctuation[char] then
192                    local p = fontparameters[font]
193                    head, current = insertnodebefore(head,current,new_glue(p.space,p.space_stretch,p.space_shrink))
194                end
195            end
196        end
197    end,
198
199    [0x00A0] = function(head,current) -- nbsp
200        local prev, next = getboth(current)
201        if next then
202            local char = isglyph(current)
203            if not char then
204                -- move on
205            elseif char == 0x200C or char == 0x200D then -- nzwj zwj
206                next = getnext(next)
207				if next then
208                    char = isglyph(next)
209                    if char and nbsphash[char] then
210                        return false
211                    end
212                end
213            elseif nbsphash[char] then
214                return false
215            end
216        end
217        if prev then
218            local char = isglyph(prev)
219            if char and nbsphash[char] then
220                return false
221            end
222        end
223        return nbsp(head,current)
224    end,
225
226    [0x00AD] = function(head,current) -- softhyphen
227        return insertnodeafter(head,current,languages.explicithyphen(current))
228    end,
229
230    [0x2000] = function(head,current) -- enquad
231        return inject_quad_space(0x2000,head,current,1/2)
232    end,
233
234    [0x2001] = function(head,current) -- emquad
235        return inject_quad_space(0x2001,head,current,1)
236    end,
237
238    [0x2002] = function(head,current) -- enspace
239        return inject_quad_space(0x2002,head,current,1/2)
240    end,
241
242    [0x2003] = function(head,current) -- emspace
243        return inject_quad_space(0x2003,head,current,1)
244    end,
245
246    [0x2004] = function(head,current) -- threeperemspace
247        return inject_quad_space(0x2004,head,current,1/3)
248    end,
249
250    [0x2005] = function(head,current) -- fourperemspace
251        return inject_quad_space(0x2005,head,current,1/4)
252    end,
253
254    [0x2006] = function(head,current) -- sixperemspace
255        return inject_quad_space(0x2006,head,current,1/6)
256    end,
257
258    [0x2007] = function(head,current) -- figurespace
259        return inject_char_space(0x2007,head,current,c_zero)
260    end,
261
262    [0x2008] = function(head,current) -- punctuationspace
263        return inject_char_space(0x2008,head,current,c_period)
264    end,
265
266    [0x2009] = function(head,current) -- breakablethinspace
267        return inject_quad_space(0x2009,head,current,1/8) -- same as next
268    end,
269
270    [0x200A] = function(head,current) -- hairspace
271        return inject_quad_space(0x200A,head,current,1/8) -- same as previous (todo)
272    end,
273
274    [0x200B] = function(head,current) -- zerowidthspace
275        return inject_quad_space(0x200B,head,current,0)
276    end,
277
278    [0x202F] = function(head,current) -- narrownobreakspace
279        return inject_nobreak_space(0x202F,head,current,fontquads[getfont(current)]/8)
280    end,
281
282    [0x205F] = function(head,current) -- math thinspace
283        return inject_nobreak_space(0x205F,head,current,4*fontquads[getfont(current)]/18)
284    end,
285
286    -- The next one is also a bom so maybe only when we have glyphs around it
287
288 -- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
289 --     return head, current
290 -- end,
291
292}
293
294characters.methods = methods
295
296-- this also works ok in math as we run over glyphs and these stay glyphs ... not sure
297-- about scripts and such but that is not important anyway ... some day we can consider
298-- special definitions in math
299
300function characters.handler(head)
301    local wipe = false
302    for current, char in nextchar, head do
303        local method = methods[char]
304        if method then
305            if wipe then
306                head = remove_node(head,wipe,true)
307                wipe = false
308            end
309            if trace_characters then
310                report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
311            end
312            local h = method(head,current)
313            if h then
314                wipe = current
315            end
316        end
317    end
318    if wipe then
319        head = remove_node(head,wipe,true)
320    end
321    return head
322end
323