spac-chr.lmt /size: 11 Kb    last modification: 2021-10-28 13:51
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
spac-chr
'
]
=
{
2
version
=
1
.
001
,
3
optimize
=
true
,
4
comment
=
"
companion to spac-chr.mkiv
"
,
5
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
6
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
7
license
=
"
see context related readme files
"
8
}
9 10
local
byte
,
lower
=
string
.
byte
,
string
.
lower
11 12
-- beware: attribute copying is bugged ... there will be a proper luatex helper
13
-- for this
14 15
-- to be redone: characters will become tagged spaces instead as then we keep track of
16
-- spaceskip etc
17 18
-- todo: only setattr when export / use properties
19 20
local
next
=
next
21 22
local
trace_characters
=
false
trackers
.
register
(
"
typesetters.characters
"
,
function
(
v
)
trace_characters
=
v
end
)
23
local
trace_nbsp
=
false
trackers
.
register
(
"
typesetters.nbsp
"
,
function
(
v
)
trace_nbsp
=
v
end
)
24 25
local
report_characters
=
logs
.
reporter
(
"
typesetting
"
,
"
characters
"
)
26 27
local
nodes
,
node
=
nodes
,
node
28 29
local
nuts
=
nodes
.
nuts
30 31
local
getboth
=
nuts
.
getboth
32
local
getnext
=
nuts
.
getnext
33
local
getprev
=
nuts
.
getprev
34
local
getattr
=
nuts
.
getattr
35
local
setattr
=
nuts
.
setattr
36
local
getlanguage
=
nuts
.
getlanguage
37
local
setchar
=
nuts
.
setchar
38
local
setattrlist
=
nuts
.
setattrlist
39
local
getfont
=
nuts
.
getfont
40
local
setsubtype
=
nuts
.
setsubtype
41
local
isglyph
=
nuts
.
isglyph
42 43
local
setcolor
=
nodes
.
tracers
.
colors
.
set
44 45
local
insertnodebefore
=
nuts
.
insertbefore
46
local
insertnodeafter
=
nuts
.
insertafter
47
local
remove_node
=
nuts
.
remove
48
local
nextchar
=
nuts
.
traversers
.
char
49
local
nextglyph
=
nuts
.
traversers
.
glyph
50 51
local
copy_node
=
nuts
.
copy
52 53
local
nodepool
=
nuts
.
pool
54
local
new_penalty
=
nodepool
.
penalty
55
local
new_glue
=
nodepool
.
glue
56
local
new_kern
=
nodepool
.
kern
57
local
new_rule
=
nodepool
.
rule
58 59
local
nodecodes
=
nodes
.
nodecodes
60
local
gluecodes
=
nodes
.
gluecodes
61 62
local
glyph_code
=
nodecodes
.
glyph
63
local
spaceskip_code
=
gluecodes
.
spaceskip
64 65
local
chardata
=
characters
.
data
66
local
is_punctuation
=
characters
.
is_punctuation
67 68
local
typesetters
=
typesetters
69 70
local
unicodeblocks
=
characters
.
blocks
71 72
local
characters
=
typesetters
.
characters
or
{
}
-- can be predefined
73
typesetters
.
characters
=
characters
74 75
local
fonthashes
=
fonts
.
hashes
76
local
fontparameters
=
fonthashes
.
parameters
77
local
fontcharacters
=
fonthashes
.
characters
78
local
fontquads
=
fonthashes
.
quads
79 80
local
setmetatableindex
=
table
.
setmetatableindex
81 82
local
a_character
=
attributes
.
private
(
"
characters
"
)
83
local
a_alignstate
=
attributes
.
private
(
"
alignstate
"
)
84 85
local
c_zero
=
byte
(
'
0
'
)
86
local
c_period
=
byte
(
'
.
'
)
87 88
local
function
inject_quad_space
(
unicode
,
head
,
current
,
fraction
)
89
if
fraction
~
=
0
then
90
fraction
=
fraction
*
fontquads
[
getfont
(
current
)
]
91
end
92
local
glue
=
new_glue
(
fraction
)
93
setattrlist
(
glue
,
current
)
94
setattrlist
(
current
)
-- why reset all
95
setattr
(
glue
,
a_character
,
unicode
)
96
return
insertnodeafter
(
head
,
current
,
glue
)
97
end
98 99
local
function
inject_char_space
(
unicode
,
head
,
current
,
parent
)
100
local
font
=
getfont
(
current
)
101
local
char
=
fontcharacters
[
font
]
[
parent
]
102
local
glue
=
new_glue
(
char
and
char
.
width
or
fontparameters
[
font
]
.
space
)
103
setattrlist
(
glue
,
current
)
104
setattrlist
(
current
)
-- why reset all
105
setattr
(
glue
,
a_character
,
unicode
)
106
return
insertnodeafter
(
head
,
current
,
glue
)
107
end
108 109
local
function
inject_nobreak_space
(
unicode
,
head
,
current
,
space
,
spacestretch
,
spaceshrink
)
110
local
glue
=
new_glue
(
space
,
spacestretch
,
spaceshrink
)
111
local
penalty
=
new_penalty
(
10000
)
112
setattrlist
(
glue
,
current
)
113
setattrlist
(
current
)
-- why reset all
114
setattr
(
glue
,
a_character
,
unicode
)
-- bombs
115
head
,
current
=
insertnodeafter
(
head
,
current
,
penalty
)
116
if
trace_nbsp
then
117
local
rule
=
new_rule
(
space
)
118
local
kern
=
new_kern
(
-
space
)
119
local
penalty
=
new_penalty
(
10000
)
120
setcolor
(
rule
,
"
orange
"
)
121
head
,
current
=
insertnodeafter
(
head
,
current
,
rule
)
122
head
,
current
=
insertnodeafter
(
head
,
current
,
kern
)
123
head
,
current
=
insertnodeafter
(
head
,
current
,
penalty
)
124
end
125
return
insertnodeafter
(
head
,
current
,
glue
)
126
end
127 128
local
function
nbsp
(
head
,
current
)
129
local
para
=
fontparameters
[
getfont
(
current
)
]
130
local
attr
=
getattr
(
current
,
a_alignstate
)
or
0
131
if
attr
>
=
1
and
attr
<
=
3
then
-- flushright
132
head
,
current
=
inject_nobreak_space
(
0x00A0
,
head
,
current
,
para
.
space
,
0
,
0
)
133
else
134
head
,
current
=
inject_nobreak_space
(
0x00A0
,
head
,
current
,
para
.
space
,
para
.
space_stretch
,
para
.
space_shrink
)
135
end
136
setsubtype
(
current
,
spaceskip_code
)
137
return
head
,
current
138
end
139 140
-- assumes nuts or nodes, depending on callers .. so no tonuts here
141 142
function
characters
.
replacenbsp
(
head
,
original
)
143
local
head
,
current
=
nbsp
(
head
,
original
)
144
return
remove_node
(
head
,
original
,
true
)
145
end
146 147
function
characters
.
replacenbspaces
(
head
)
148
local
wipe
=
false
149
for
current
,
char
,
font
in
nextglyph
,
head
do
-- can be anytime so no traversechar
150
if
char
=
=
0x00A0
then
151
if
wipe
then
152
head
=
remove_node
(
h
,
current
,
true
)
153
wipe
=
false
154
end
155
local
h
=
nbsp
(
head
,
current
)
156
if
h
then
157
wipe
=
current
158
end
159
end
160
end
161
if
wipe
then
162
head
=
remove_node
(
head
,
current
,
true
)
163
end
164
return
head
165
end
166 167
-- This initialization might move someplace else if we need more of it. The problem is that
168
-- this module depends on fonts so we have an order problem.
169 170
local
nbsphash
=
{
}
setmetatableindex
(
nbsphash
,
function
(
t
,
k
)
171
-- this needs checking !
172
for
i
=
unicodeblocks
.
devanagari
.
first
,
unicodeblocks
.
devanagari
.
last
do
nbsphash
[
i
]
=
true
end
173
for
i
=
unicodeblocks
.
kannada
.
first
,
unicodeblocks
.
kannada
.
last
do
nbsphash
[
i
]
=
true
end
174
setmetatableindex
(
nbsphash
,
nil
)
175
return
nbsphash
[
k
]
176
end
)
177 178
local
methods
=
{
179 180
-- The next one uses an attribute assigned to the character but still we
181
-- don't have the 'local' value.
182 183
-- maybe also 0x0008 : backspace
184 185
[
0x001F
]
=
function
(
head
,
current
)
-- kind of special
186
local
next
=
getnext
(
current
)
187
if
next
then
188
local
char
,
font
=
isglyph
(
next
)
189
if
char
then
190
head
,
current
=
remove_node
(
head
,
current
,
true
)
191
if
not
is_punctuation
[
char
]
then
192
local
p
=
fontparameters
[
font
]
193
head
,
current
=
insertnodebefore
(
head
,
current
,
new_glue
(
p
.
space
,
p
.
space_stretch
,
p
.
space_shrink
)
)
194
end
195
end
196
end
197
end
,
198 199
[
0x00A0
]
=
function
(
head
,
current
)
-- nbsp
200
local
prev
,
next
=
getboth
(
current
)
201
if
next
then
202
local
char
=
isglyph
(
current
)
203
if
not
char
then
204
-- move on
205
elseif
char
=
=
0x200C
or
char
=
=
0x200D
then
-- nzwj zwj
206
next
=
getnext
(
next
)
207
if
next
then
208
char
=
isglyph
(
next
)
209
if
char
and
nbsphash
[
char
]
then
210
return
false
211
end
212
end
213
elseif
nbsphash
[
char
]
then
214
return
false
215
end
216
end
217
if
prev
then
218
local
char
=
isglyph
(
prev
)
219
if
char
and
nbsphash
[
char
]
then
220
return
false
221
end
222
end
223
return
nbsp
(
head
,
current
)
224
end
,
225 226
[
0x00AD
]
=
function
(
head
,
current
)
-- softhyphen
227
return
insertnodeafter
(
head
,
current
,
languages
.
explicithyphen
(
current
)
)
228
end
,
229 230
[
0x2000
]
=
function
(
head
,
current
)
-- enquad
231
return
inject_quad_space
(
0x2000
,
head
,
current
,
1
/
2
)
232
end
,
233 234
[
0x2001
]
=
function
(
head
,
current
)
-- emquad
235
return
inject_quad_space
(
0x2001
,
head
,
current
,
1
)
236
end
,
237 238
[
0x2002
]
=
function
(
head
,
current
)
-- enspace
239
return
inject_quad_space
(
0x2002
,
head
,
current
,
1
/
2
)
240
end
,
241 242
[
0x2003
]
=
function
(
head
,
current
)
-- emspace
243
return
inject_quad_space
(
0x2003
,
head
,
current
,
1
)
244
end
,
245 246
[
0x2004
]
=
function
(
head
,
current
)
-- threeperemspace
247
return
inject_quad_space
(
0x2004
,
head
,
current
,
1
/
3
)
248
end
,
249 250
[
0x2005
]
=
function
(
head
,
current
)
-- fourperemspace
251
return
inject_quad_space
(
0x2005
,
head
,
current
,
1
/
4
)
252
end
,
253 254
[
0x2006
]
=
function
(
head
,
current
)
-- sixperemspace
255
return
inject_quad_space
(
0x2006
,
head
,
current
,
1
/
6
)
256
end
,
257 258
[
0x2007
]
=
function
(
head
,
current
)
-- figurespace
259
return
inject_char_space
(
0x2007
,
head
,
current
,
c_zero
)
260
end
,
261 262
[
0x2008
]
=
function
(
head
,
current
)
-- punctuationspace
263
return
inject_char_space
(
0x2008
,
head
,
current
,
c_period
)
264
end
,
265 266
[
0x2009
]
=
function
(
head
,
current
)
-- breakablethinspace
267
return
inject_quad_space
(
0x2009
,
head
,
current
,
1
/
8
)
-- same as next
268
end
,
269 270
[
0x200A
]
=
function
(
head
,
current
)
-- hairspace
271
return
inject_quad_space
(
0x200A
,
head
,
current
,
1
/
8
)
-- same as previous (todo)
272
end
,
273 274
[
0x200B
]
=
function
(
head
,
current
)
-- zerowidthspace
275
return
inject_quad_space
(
0x200B
,
head
,
current
,
0
)
276
end
,
277 278
[
0x202F
]
=
function
(
head
,
current
)
-- narrownobreakspace
279
return
inject_nobreak_space
(
0x202F
,
head
,
current
,
fontquads
[
getfont
(
current
)
]
/
8
)
280
end
,
281 282
[
0x205F
]
=
function
(
head
,
current
)
-- math thinspace
283
return
inject_nobreak_space
(
0x205F
,
head
,
current
,
4
*
fontquads
[
getfont
(
current
)
]
/
18
)
284
end
,
285 286
-- The next one is also a bom so maybe only when we have glyphs around it
287 288
-- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
289
-- return head, current
290
-- end,
291 292
}
293 294
characters
.
methods
=
methods
295 296
-- function characters.handler(head) -- todo: use traverseid
297
-- local current = head
298
-- while current do
299
-- local char, id = isglyph(current)
300
-- if char then
301
-- local next = getnext(current)
302
-- local method = methods[char]
303
-- if method then
304
-- if trace_characters then
305
-- report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
306
-- end
307
-- local h = method(head,current)
308
-- if h then
309
-- head = remove_node(h,current,true)
310
-- end
311
-- end
312
-- current = next
313
-- else
314
-- current = getnext(current)
315
-- end
316
-- end
317
-- return head
318
-- end
319 320
-- this also works ok in math as we run over glyphs and these stay glyphs ... not sure
321
-- about scripts and such but that is not important anyway ... some day we can consider
322
-- special definitions in math
323 324
function
characters
.
handler
(
head
)
325
local
wipe
=
false
326
for
current
,
char
in
nextchar
,
head
do
327
local
method
=
methods
[
char
]
328
if
method
then
329
if
wipe
then
330
head
=
remove_node
(
head
,
wipe
,
true
)
331
wipe
=
false
332
end
333
if
trace_characters
then
334
report_characters
(
"
replacing character %C, description %a
"
,
char
,
lower
(
chardata
[
char
]
.
description
)
)
335
end
336
local
h
=
method
(
head
,
current
)
337
if
h
then
338
wipe
=
current
339
end
340
end
341
end
342
if
wipe
then
343
head
=
remove_node
(
head
,
wipe
,
true
)
344
end
345
return
head
346
end
347