spac-chr.lua /size: 11 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
spac-chr
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to spac-chr.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
byte
,
lower
=
string
.
byte
,
string
.
lower
10 11
-- beware: attribute copying is bugged ... there will be a proper luatex helper
12
-- for this
13 14
-- to be redone: characters will become tagged spaces instead as then we keep track of
15
-- spaceskip etc
16 17
-- todo: only setattr when export / use properties
18 19
local
next
=
next
20 21
local
trace_characters
=
false
trackers
.
register
(
"
typesetters.characters
"
,
function
(
v
)
trace_characters
=
v
end
)
22
local
trace_nbsp
=
false
trackers
.
register
(
"
typesetters.nbsp
"
,
function
(
v
)
trace_nbsp
=
v
end
)
23 24
local
report_characters
=
logs
.
reporter
(
"
typesetting
"
,
"
characters
"
)
25 26
local
nodes
,
node
=
nodes
,
node
27 28
local
nuts
=
nodes
.
nuts
29 30
local
getboth
=
nuts
.
getboth
31
local
getnext
=
nuts
.
getnext
32
local
getprev
=
nuts
.
getprev
33
local
getattr
=
nuts
.
getattr
34
local
setattr
=
nuts
.
setattr
35
local
getlang
=
nuts
.
getlang
36
local
setchar
=
nuts
.
setchar
37
local
setattrlist
=
nuts
.
setattrlist
38
local
getfont
=
nuts
.
getfont
39
local
setsubtype
=
nuts
.
setsubtype
40
local
setdisc
=
nuts
.
setdisc
41
local
isglyph
=
nuts
.
isglyph
42 43
local
setcolor
=
nodes
.
tracers
.
colors
.
set
44 45
local
insert_node_before
=
nuts
.
insert_before
46
local
insert_node_after
=
nuts
.
insert_after
47
local
remove_node
=
nuts
.
remove
48
----- traverse_id = nuts.traverse_id
49
----- traverse_char = nuts.traverse_char
50
local
nextchar
=
nuts
.
traversers
.
char
51
local
nextglyph
=
nuts
.
traversers
.
glyph
52 53
local
copy_node
=
nuts
.
copy
54 55
local
nodepool
=
nuts
.
pool
56
local
new_penalty
=
nodepool
.
penalty
57
local
new_glue
=
nodepool
.
glue
58
local
new_kern
=
nodepool
.
kern
59
local
new_rule
=
nodepool
.
rule
60
local
new_disc
=
nodepool
.
disc
61 62
local
nodecodes
=
nodes
.
nodecodes
63
local
gluecodes
=
nodes
.
gluecodes
64 65
local
glyph_code
=
nodecodes
.
glyph
66
local
spaceskip_code
=
gluecodes
.
spaceskip
67 68
local
chardata
=
characters
.
data
69
local
is_punctuation
=
characters
.
is_punctuation
70 71
local
typesetters
=
typesetters
72 73
local
unicodeblocks
=
characters
.
blocks
74 75
local
characters
=
typesetters
.
characters
or
{
}
-- can be predefined
76
typesetters
.
characters
=
characters
77 78
local
fonthashes
=
fonts
.
hashes
79
local
fontparameters
=
fonthashes
.
parameters
80
local
fontcharacters
=
fonthashes
.
characters
81
local
fontquads
=
fonthashes
.
quads
82 83
local
setmetatableindex
=
table
.
setmetatableindex
84 85
local
a_character
=
attributes
.
private
(
"
characters
"
)
86
local
a_alignstate
=
attributes
.
private
(
"
alignstate
"
)
87 88
local
c_zero
=
byte
(
'
0
'
)
89
local
c_period
=
byte
(
'
.
'
)
90 91
local
function
inject_quad_space
(
unicode
,
head
,
current
,
fraction
)
92
if
fraction
~
=
0
then
93
fraction
=
fraction
*
fontquads
[
getfont
(
current
)
]
94
end
95
local
glue
=
new_glue
(
fraction
)
96
setattrlist
(
glue
,
current
)
97
setattrlist
(
current
)
-- why reset all
98
setattr
(
glue
,
a_character
,
unicode
)
99
return
insert_node_after
(
head
,
current
,
glue
)
100
end
101 102
local
function
inject_char_space
(
unicode
,
head
,
current
,
parent
)
103
local
font
=
getfont
(
current
)
104
local
char
=
fontcharacters
[
font
]
[
parent
]
105
local
glue
=
new_glue
(
char
and
char
.
width
or
fontparameters
[
font
]
.
space
)
106
setattrlist
(
glue
,
current
)
107
setattrlist
(
current
)
-- why reset all
108
setattr
(
glue
,
a_character
,
unicode
)
109
return
insert_node_after
(
head
,
current
,
glue
)
110
end
111 112
local
function
inject_nobreak_space
(
unicode
,
head
,
current
,
space
,
spacestretch
,
spaceshrink
)
113
local
glue
=
new_glue
(
space
,
spacestretch
,
spaceshrink
)
114
local
penalty
=
new_penalty
(
10000
)
115
setattrlist
(
glue
,
current
)
116
setattrlist
(
current
)
-- why reset all
117
setattr
(
glue
,
a_character
,
unicode
)
-- bombs
118
head
,
current
=
insert_node_after
(
head
,
current
,
penalty
)
119
if
trace_nbsp
then
120
local
rule
=
new_rule
(
space
)
121
local
kern
=
new_kern
(
-
space
)
122
local
penalty
=
new_penalty
(
10000
)
123
setcolor
(
rule
,
"
orange
"
)
124
head
,
current
=
insert_node_after
(
head
,
current
,
rule
)
125
head
,
current
=
insert_node_after
(
head
,
current
,
kern
)
126
head
,
current
=
insert_node_after
(
head
,
current
,
penalty
)
127
end
128
return
insert_node_after
(
head
,
current
,
glue
)
129
end
130 131
local
function
nbsp
(
head
,
current
)
132
local
para
=
fontparameters
[
getfont
(
current
)
]
133
if
getattr
(
current
,
a_alignstate
)
=
=
1
then
-- flushright
134
head
,
current
=
inject_nobreak_space
(
0x00A0
,
head
,
current
,
para
.
space
,
0
,
0
)
135
else
136
head
,
current
=
inject_nobreak_space
(
0x00A0
,
head
,
current
,
para
.
space
,
para
.
spacestretch
,
para
.
spaceshrink
)
137
end
138
setsubtype
(
current
,
spaceskip_code
)
139
return
head
,
current
140
end
141 142
-- assumes nuts or nodes, depending on callers .. so no tonuts here
143 144
function
characters
.
replacenbsp
(
head
,
original
)
145
local
head
,
current
=
nbsp
(
head
,
original
)
146
return
remove_node
(
head
,
original
,
true
)
147
end
148 149
function
characters
.
replacenbspaces
(
head
)
150
local
wipe
=
false
151
for
current
,
char
,
font
in
nextglyph
,
head
do
-- can be anytime so no traverse_char
152
if
char
=
=
0x00A0
then
153
if
wipe
then
154
head
=
remove_node
(
h
,
current
,
true
)
155
wipe
=
false
156
end
157
local
h
=
nbsp
(
head
,
current
)
158
if
h
then
159
wipe
=
current
160
end
161
end
162
end
163
if
wipe
then
164
head
=
remove_node
(
head
,
current
,
true
)
165
end
166
return
head
167
end
168 169
-- This initialization might move someplace else if we need more of it. The problem is that
170
-- this module depends on fonts so we have an order problem.
171 172
local
nbsphash
=
{
}
setmetatableindex
(
nbsphash
,
function
(
t
,
k
)
173
-- this needs checking !
174
for
i
=
unicodeblocks
.
devanagari
.
first
,
unicodeblocks
.
devanagari
.
last
do
nbsphash
[
i
]
=
true
end
175
for
i
=
unicodeblocks
.
kannada
.
first
,
unicodeblocks
.
kannada
.
last
do
nbsphash
[
i
]
=
true
end
176
setmetatableindex
(
nbsphash
,
nil
)
177
return
nbsphash
[
k
]
178
end
)
179 180
local
methods
=
{
181 182
-- The next one uses an attribute assigned to the character but still we
183
-- don't have the 'local' value.
184 185
-- maybe also 0x0008 : backspace
186 187
[
0x001F
]
=
function
(
head
,
current
)
-- kind of special
188
local
next
=
getnext
(
current
)
189
if
next
then
190
local
char
,
font
=
isglyph
(
next
)
191
if
char
then
192
head
,
current
=
remove_node
(
head
,
current
,
true
)
193
if
not
is_punctuation
[
char
]
then
194
local
p
=
fontparameters
[
font
]
195
head
,
current
=
insert_node_before
(
head
,
current
,
new_glue
(
p
.
space
,
p
.
space_stretch
,
p
.
space_shrink
)
)
196
end
197
end
198
end
199
end
,
200 201
[
0x00A0
]
=
function
(
head
,
current
)
-- nbsp
202
local
prev
,
next
=
getboth
(
current
)
203
if
next
then
204
local
char
=
isglyph
(
current
)
205
if
not
char
then
206
-- move on
207
elseif
char
=
=
0x200C
or
char
=
=
0x200D
then
-- nzwj zwj
208
next
=
getnext
(
next
)
209
if
next
then
210
char
=
isglyph
(
next
)
211
if
char
and
nbsphash
[
char
]
then
212
return
false
213
end
214
end
215
elseif
nbsphash
[
char
]
then
216
return
false
217
end
218
end
219
if
prev
then
220
local
char
=
isglyph
(
prev
)
221
if
char
and
nbsphash
[
char
]
then
222
return
false
223
end
224
end
225
return
nbsp
(
head
,
current
)
226
end
,
227 228
[
0x00AD
]
=
function
(
head
,
current
)
-- softhyphen
229
return
insert_node_after
(
head
,
current
,
languages
.
explicithyphen
(
current
)
)
230
end
,
231 232
[
0x2000
]
=
function
(
head
,
current
)
-- enquad
233
return
inject_quad_space
(
0x2000
,
head
,
current
,
1
/
2
)
234
end
,
235 236
[
0x2001
]
=
function
(
head
,
current
)
-- emquad
237
return
inject_quad_space
(
0x2001
,
head
,
current
,
1
)
238
end
,
239 240
[
0x2002
]
=
function
(
head
,
current
)
-- enspace
241
return
inject_quad_space
(
0x2002
,
head
,
current
,
1
/
2
)
242
end
,
243 244
[
0x2003
]
=
function
(
head
,
current
)
-- emspace
245
return
inject_quad_space
(
0x2003
,
head
,
current
,
1
)
246
end
,
247 248
[
0x2004
]
=
function
(
head
,
current
)
-- threeperemspace
249
return
inject_quad_space
(
0x2004
,
head
,
current
,
1
/
3
)
250
end
,
251 252
[
0x2005
]
=
function
(
head
,
current
)
-- fourperemspace
253
return
inject_quad_space
(
0x2005
,
head
,
current
,
1
/
4
)
254
end
,
255 256
[
0x2006
]
=
function
(
head
,
current
)
-- sixperemspace
257
return
inject_quad_space
(
0x2006
,
head
,
current
,
1
/
6
)
258
end
,
259 260
[
0x2007
]
=
function
(
head
,
current
)
-- figurespace
261
return
inject_char_space
(
0x2007
,
head
,
current
,
c_zero
)
262
end
,
263 264
[
0x2008
]
=
function
(
head
,
current
)
-- punctuationspace
265
return
inject_char_space
(
0x2008
,
head
,
current
,
c_period
)
266
end
,
267 268
[
0x2009
]
=
function
(
head
,
current
)
-- breakablethinspace
269
return
inject_quad_space
(
0x2009
,
head
,
current
,
1
/
8
)
-- same as next
270
end
,
271 272
[
0x200A
]
=
function
(
head
,
current
)
-- hairspace
273
return
inject_quad_space
(
0x200A
,
head
,
current
,
1
/
8
)
-- same as previous (todo)
274
end
,
275 276
[
0x200B
]
=
function
(
head
,
current
)
-- zerowidthspace
277
return
inject_quad_space
(
0x200B
,
head
,
current
,
0
)
278
end
,
279 280
[
0x202F
]
=
function
(
head
,
current
)
-- narrownobreakspace
281
return
inject_nobreak_space
(
0x202F
,
head
,
current
,
fontquads
[
getfont
(
current
)
]
/
8
)
282
end
,
283 284
[
0x205F
]
=
function
(
head
,
current
)
-- math thinspace
285
return
inject_nobreak_space
(
0x205F
,
head
,
current
,
4
*
fontquads
[
getfont
(
current
)
]
/
18
)
286
end
,
287 288
-- The next one is also a bom so maybe only when we have glyphs around it
289 290
-- [0xFEFF] = function(head,current) -- zerowidthnobreakspace
291
-- return head, current
292
-- end,
293 294
}
295 296
characters
.
methods
=
methods
297 298
-- function characters.handler(head) -- todo: use traverse_id
299
-- local current = head
300
-- while current do
301
-- local char, id = isglyph(current)
302
-- if char then
303
-- local next = getnext(current)
304
-- local method = methods[char]
305
-- if method then
306
-- if trace_characters then
307
-- report_characters("replacing character %C, description %a",char,lower(chardata[char].description))
308
-- end
309
-- local h = method(head,current)
310
-- if h then
311
-- head = remove_node(h,current,true)
312
-- end
313
-- end
314
-- current = next
315
-- else
316
-- current = getnext(current)
317
-- end
318
-- end
319
-- return head
320
-- end
321 322
-- this also works ok in math as we run over glyphs and these stay glyphs ... not sure
323
-- about scripts and such but that is not important anyway ... some day we can consider
324
-- special definitions in math
325 326
function
characters
.
handler
(
head
)
327
local
wipe
=
false
328
for
current
,
char
in
nextchar
,
head
do
329
local
method
=
methods
[
char
]
330
if
method
then
331
if
wipe
then
332
head
=
remove_node
(
head
,
wipe
,
true
)
333
wipe
=
false
334
end
335
if
trace_characters
then
336
report_characters
(
"
replacing character %C, description %a
"
,
char
,
lower
(
chardata
[
char
]
.
description
)
)
337
end
338
local
h
=
method
(
head
,
current
)
339
if
h
then
340
wipe
=
current
341
end
342
end
343
end
344
if
wipe
then
345
head
=
remove_node
(
head
,
wipe
,
true
)
346
end
347
return
head
348
end
349