lang-rep.lua /size: 15 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lang-rep
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to lang-rep.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
-- A BachoTeX 2013 experiment, probably not that useful. Eventually I used a simpler
10
-- more generic example. I'm sure no one ever notices of even needs this code.
11
--
12
-- As a follow up on a question by Alan about special treatment of dropped caps I wonder
13
-- if I can make this one more clever (probably in a few more dev steps). For instance
14
-- injecting nodes or replacing nodes. It's a prelude to a kind of lpeg for nodes,
15
-- although (given experiences so far) we don't really need that. After all, each problem
16
-- is somewhat unique.
17 18
local
type
,
tonumber
,
next
=
type
,
tonumber
,
next
19
local
gmatch
,
gsub
=
string
.
gmatch
,
string
.
gsub
20
local
utfbyte
,
utfsplit
=
utf
.
byte
,
utf
.
split
21
local
P
,
C
,
U
,
Cc
,
Ct
,
Cs
,
lpegmatch
=
lpeg
.
P
,
lpeg
.
C
,
lpeg
.
patterns
.
utf8character
,
lpeg
.
Cc
,
lpeg
.
Ct
,
lpeg
.
Cs
,
lpeg
.
match
22
local
find
=
string
.
find
23 24
local
zwnj
=
0x200C
25
local
grouped
=
P
(
"
{
"
)
*
(
Ct
(
(
U
/
utfbyte
-
P
(
"
}
"
)
)
^
1
)
+
Cc
(
false
)
)
*
P
(
"
}
"
)
-- grouped
26
local
splitter
=
Ct
(
(
27
#
P
(
"
{
"
)
*
(
28
P
(
"
{}
"
)
/
function
(
)
return
zwnj
end
29
+
Ct
(
Cc
(
"
discretionary
"
)
*
grouped
*
grouped
*
grouped
)
30
+
Ct
(
Cc
(
"
noligature
"
)
*
grouped
)
31
)
32
+
U
/
utfbyte
33
)
^
1
)
34 35
local
stripper
=
P
(
"
{
"
)
*
Cs
(
(
1
-
P
(
-2
)
)
^
0
)
*
P
(
"
}
"
)
*
P
(
-1
)
36 37
local
trace_replacements
=
false
trackers
.
register
(
"
languages.replacements
"
,
function
(
v
)
trace_replacements
=
v
end
)
38
local
trace_details
=
false
trackers
.
register
(
"
languages.replacements.details
"
,
function
(
v
)
trace_details
=
v
end
)
39 40
local
report_replacement
=
logs
.
reporter
(
"
languages
"
,
"
replacements
"
)
41 42
local
glyph_code
=
nodes
.
nodecodes
.
glyph
43
local
glue_code
=
nodes
.
nodecodes
.
glue
44 45
local
spaceskip_code
=
nodes
.
gluecodes
.
spaceskip
46
local
xspaceskip_code
=
nodes
.
gluecodes
.
xspaceskip
47 48
local
nuts
=
nodes
.
nuts
49 50
local
getnext
=
nuts
.
getnext
51
local
getprev
=
nuts
.
getprev
52
local
getattr
=
nuts
.
getattr
53
local
getid
=
nuts
.
getid
54
local
getsubtype
=
nuts
.
getsubtype
55
local
getchar
=
nuts
.
getchar
56
local
isglyph
=
nuts
.
isglyph
57 58
local
setattr
=
nuts
.
setattr
59
local
setlink
=
nuts
.
setlink
60
local
setnext
=
nuts
.
setnext
61
local
setprev
=
nuts
.
setprev
62
local
setchar
=
nuts
.
setchar
63
local
setattrlist
=
nuts
.
setattrlist
64 65
local
insertbefore
=
nuts
.
insertbefore
66
local
insertafter
=
nuts
.
insertafter
67
local
remove_node
=
nuts
.
remove
68
local
copy_node
=
nuts
.
copy
69
local
flushlist
=
nuts
.
flushlist
70 71
local
nodepool
=
nuts
.
pool
72
local
new_disc
=
nodepool
.
disc
73 74
local
texsetattribute
=
tex
.
setattribute
75
local
unsetvalue
=
attributes
.
unsetvalue
76 77
local
enableaction
=
nodes
.
tasks
.
enableaction
78 79
local
v_reset
=
interfaces
.
variables
.
reset
80 81
local
implement
=
interfaces
.
implement
82 83
local
processors
=
typesetters
.
processors
84
local
splitprocessor
=
processors
.
split
85 86
local
replacements
=
languages
.
replacements
or
{
}
87
languages
.
replacements
=
replacements
88 89
local
a_replacements
=
attributes
.
private
(
"
replacements
"
)
90
local
a_noligature
=
attributes
.
private
(
"
noligature
"
)
-- to be adapted to lmtx !
91 92
local
lists
=
{
}
93
local
last
=
0
94
local
trees
=
{
}
95 96
table
.
setmetatableindex
(
lists
,
function
(
lists
,
name
)
97
last
=
last
+
1
98
local
list
=
{
}
99
local
data
=
{
name
=
name
,
list
=
list
,
attribute
=
last
}
100
lists
[
last
]
=
data
101
lists
[
name
]
=
data
102
trees
[
last
]
=
list
103
return
data
104
end
)
105 106
lists
[
v_reset
]
.
attribute
=
unsetvalue
-- so we discard 0
107 108
-- todo: glue kern attr
109 110
local
function
add
(
root
,
word
,
replacement
)
111
local
processor
,
replacement
=
splitprocessor
(
replacement
,
true
)
-- no check
112
replacement
=
lpegmatch
(
stripper
,
replacement
)
or
replacement
113
local
list
=
utfsplit
(
word
)
-- ,true)
114
local
size
=
#
list
115
for
i
=
1
,
size
do
116
local
l
=
utfbyte
(
list
[
i
]
)
117
if
not
root
[
l
]
then
118
root
[
l
]
=
{
}
119
end
120
if
i
=
=
size
then
121
local
special
=
find
(
replacement
,
"
{
"
,
1
,
true
)
122
local
newlist
=
lpegmatch
(
splitter
,
replacement
)
123
root
[
l
]
.
final
=
{
124
word
=
word
,
125
replacement
=
replacement
,
126
processor
=
processor
,
127
oldlength
=
size
,
128
newcodes
=
newlist
,
129
special
=
special
,
130
}
131
end
132
root
=
root
[
l
]
133
end
134
end
135 136
function
replacements
.
add
(
category
,
word
,
replacement
)
137
local
root
=
lists
[
category
]
.
list
138
if
type
(
word
)
=
=
"
table
"
then
139
for
word
,
replacement
in
next
,
word
do
140
add
(
root
,
word
,
replacement
)
141
end
142
else
143
add
(
root
,
word
,
replacement
or
"
"
)
144
end
145
end
146 147
-- local strip = lpeg.stripper("{}")
148 149
function
languages
.
replacements
.
addlist
(
category
,
list
)
150
local
root
=
lists
[
category
]
.
list
151
if
type
(
list
)
=
=
"
string
"
then
152
for
new
in
gmatch
(
list
,
"
%S+
"
)
do
153
local
old
=
gsub
(
new
,
"
[{}]
"
,
"
"
)
154
-- local old = lpegmatch(strip,new)
155
add
(
root
,
old
,
new
)
156
end
157
else
158
for
i
=
1
,
#
list
do
159
local
new
=
list
[
i
]
160
local
old
=
gsub
(
new
,
"
[{}]
"
,
"
"
)
161
-- local old = lpegmatch(strip,new)
162
add
(
root
,
old
,
new
)
163
end
164
end
165
end
166 167
local
function
tonodes
(
list
,
template
)
168
local
head
,
current
169
for
i
=
1
,
#
list
do
170
local
new
=
copy_node
(
template
)
171
setchar
(
new
,
list
[
i
]
)
172
if
head
then
173
head
,
current
=
insertafter
(
head
,
current
,
new
)
174
else
175
head
,
current
=
new
,
new
176
end
177
end
178
return
head
179
end
180 181
local
is_punctuation
=
characters
.
is_punctuation
182 183
-- We can try to be clever and use the fact that there is no match to skip
184
-- over to the next word but it is gives fuzzy code so for now I removed
185
-- that optimization (when I really need a high performance version myself
186
-- I will look into it (but so far I never used this mechanism myself).
187
--
188
-- We used to have the hit checker as function but is got messy when checks
189
-- for punctuation was added.
190 191
local
function
replace
(
head
,
first
,
last
,
final
,
hasspace
,
overload
)
192
local
current
=
first
193
local
prefirst
=
getprev
(
first
)
or
head
194
local
postlast
=
getnext
(
last
)
195
local
oldlength
=
final
.
oldlength
196
local
newcodes
=
final
.
newcodes
197
local
newlength
=
newcodes
and
#
newcodes
or
0
198
if
trace_replacements
then
199
report_replacement
(
"
replacing word %a by %a
"
,
final
.
word
,
final
.
replacement
)
200
end
201
if
hasspace
or
final
.
special
then
202
-- It's easier to delete and insert so we do just that. On the todo list is
203
-- turn injected spaces into glue but easier might be to let the char break
204
-- handler do that ...
205
local
prev
=
getprev
(
current
)
206
local
next
=
getnext
(
last
)
207
local
list
=
current
208
setnext
(
last
)
209
setlink
(
prev
,
next
)
210
current
=
prev
211
if
not
current
then
212
head
=
nil
213
end
214
local
i
=
1
215
while
i
<
=
newlength
do
216
local
codes
=
newcodes
[
i
]
217
if
type
(
codes
)
=
=
"
table
"
then
218
local
method
=
codes
[
1
]
219
if
method
=
=
"
discretionary
"
then
220
local
pre
,
post
,
replace
=
codes
[
2
]
,
codes
[
3
]
,
codes
[
4
]
221
if
pre
then
222
pre
=
tonodes
(
pre
,
first
)
223
end
224
if
post
then
225
post
=
tonodes
(
post
,
first
)
226
end
227
if
replace
then
228
replace
=
tonodes
(
replace
,
first
)
229
end
230
-- todo: also set attr
231
local
new
=
new_disc
(
pre
,
post
,
replace
)
232
setattrlist
(
new
,
first
)
233
head
,
current
=
insertafter
(
head
,
current
,
new
)
234
elseif
method
=
=
"
noligature
"
then
235
-- not that efficient to copy but ok for testing
236
local
list
=
codes
[
2
]
237
if
list
then
238
for
i
=
1
,
#
list
do
239
local
new
=
copy_node
(
first
)
240
setchar
(
new
,
list
[
i
]
)
241
setattr
(
new
,
a_noligature
,
1
)
242
head
,
current
=
insertafter
(
head
,
current
,
new
)
243
end
244
else
245
local
new
=
copy_node
(
first
)
246
setchar
(
new
,
zwnj
)
247
head
,
current
=
insertafter
(
head
,
current
,
new
)
248
end
249
else
250
report_replacement
(
"
unknown method %a
"
,
method
or
"
?
"
)
251
end
252
else
253
local
new
=
copy_node
(
first
)
254
setchar
(
new
,
codes
)
255
head
,
current
=
insertafter
(
head
,
current
,
new
)
256
end
257
i
=
i
+
1
258
end
259
flushlist
(
list
)
260
elseif
newlength
=
=
0
then
261
-- we overload
262
elseif
oldlength
=
=
newlength
then
263
if
final
.
word
~
=
final
.
replacement
then
264
for
i
=
1
,
newlength
do
265
setchar
(
current
,
newcodes
[
i
]
)
266
current
=
getnext
(
current
)
267
end
268
end
269
current
=
getnext
(
final
)
270
elseif
oldlength
<
newlength
then
271
for
i
=
1
,
newlength
-
oldlength
do
272
local
n
=
copy_node
(
current
)
273
setchar
(
n
,
newcodes
[
i
]
)
274
head
,
current
=
insertbefore
(
head
,
current
,
n
)
275
current
=
getnext
(
current
)
276
end
277
for
i
=
newlength
-
oldlength
+
1
,
newlength
do
278
setchar
(
current
,
newcodes
[
i
]
)
279
current
=
getnext
(
current
)
280
end
281
else
282
for
i
=
1
,
oldlength
-
newlength
do
283
head
,
current
=
remove_node
(
head
,
current
,
true
)
284
end
285
for
i
=
1
,
newlength
do
286
setchar
(
current
,
newcodes
[
i
]
)
287
current
=
getnext
(
current
)
288
end
289
end
290
if
overload
then
291
overload
(
final
,
getnext
(
prefirst
)
,
getprev
(
postlast
)
)
292
end
293
return
head
,
postlast
294
end
295 296
-- we handle just one space
297 298
function
replacements
.
handler
(
head
)
299
local
current
=
head
300
local
overload
=
attributes
.
applyoverloads
301
local
mode
=
false
-- we're in word or punctuation mode
302
local
wordstart
=
false
303
local
wordend
=
false
304
local
prevend
=
false
305
local
prevfinal
=
false
306
local
tree
=
false
307
local
root
=
false
308
local
hasspace
=
false
309
while
current
do
310
local
id
=
getid
(
current
)
-- or use the char getter
311
if
id
=
=
glyph_code
then
312
local
a
=
getattr
(
current
,
a_replacements
)
313
if
a
then
314
-- we have a run
315
tree
=
trees
[
a
]
316
if
tree
then
317
local
char
=
getchar
(
current
)
318
local
punc
=
is_punctuation
[
char
]
319
if
mode
=
=
"
punc
"
then
320
if
not
punc
then
321
if
root
then
322
local
final
=
root
.
final
323
if
final
then
324
head
=
replace
(
head
,
wordstart
,
wordend
,
final
,
hasspace
,
overload
)
325
elseif
prevfinal
then
326
head
=
replace
(
head
,
wordstart
,
prevend
,
prevfinal
,
hasspace
,
overload
)
327
end
328
prevfinal
=
false
329
root
=
false
330
end
331
mode
=
"
word
"
332
end
333
elseif
mode
=
=
"
word
"
then
334
if
punc
then
335
if
root
then
336
local
final
=
root
.
final
337
if
final
then
338
head
=
replace
(
head
,
wordstart
,
wordend
,
final
,
hasspace
,
overload
)
339
elseif
prevfinal
then
340
head
=
replace
(
head
,
wordstart
,
prevend
,
prevfinal
,
hasspace
,
overload
)
341
end
342
prevfinal
=
false
343
root
=
false
344
end
345
mode
=
"
punc
"
346
end
347
else
348
mode
=
punc
and
"
punc
"
or
"
word
"
349
end
350
if
root
then
351
root
=
root
[
char
]
352
if
root
then
353
wordend
=
current
354
end
355
else
356
if
prevfinal
then
357
head
=
replace
(
head
,
wordstart
,
prevend
,
prevfinal
,
hasspace
,
overload
)
358
prevfinal
=
false
359
end
360
root
=
tree
[
char
]
361
if
root
then
362
wordstart
=
current
363
wordend
=
current
364
prevend
=
false
365
hasspace
=
false
366
end
367
end
368
else
369
root
=
false
370
end
371
else
372
tree
=
false
373
end
374
current
=
getnext
(
current
)
375
elseif
root
then
376
local
final
=
root
.
final
377
if
mode
=
=
"
word
"
and
id
=
=
glue_code
then
378
local
s
=
getsubtype
(
current
)
379
if
s
=
=
spaceskip_code
or
s
=
=
xspaceskip_code
then
380
local
r
=
root
[
32
]
-- maybe more types
381
if
r
then
382
if
not
prevend
then
383
local
f
=
root
.
final
384
if
f
then
385
prevend
=
wordend
386
prevfinal
=
f
387
end
388
end
389
wordend
=
current
390
root
=
r
391
hasspace
=
true
392
goto
moveon
393
end
394
end
395
end
396
if
final
then
397
head
,
current
=
replace
(
head
,
wordstart
,
wordend
,
final
,
hasspace
,
overload
)
398
elseif
prevfinal
then
399
head
,
current
=
replace
(
head
,
wordstart
,
prevend
,
prevfinal
,
hasspace
,
overload
)
400
end
401
prevfinal
=
false
402
root
=
false
403
::
moveon
::
404
current
=
getnext
(
current
)
405
else
406
current
=
getnext
(
current
)
407
end
408
end
409
if
root
then
410
local
final
=
root
.
final
411
if
final
then
412
head
=
replace
(
head
,
wordstart
,
wordend
,
final
,
hasspace
,
overload
)
413
elseif
prevfinal
then
414
head
=
replace
(
head
,
wordstart
,
prevend
,
prevfinal
,
hasspace
,
overload
)
415
end
416
end
417
return
head
418
end
419 420
local
enabled
=
false
421 422
function
replacements
.
set
(
n
)
423
if
n
=
=
v_reset
then
424
n
=
unsetvalue
425
else
426
n
=
lists
[
n
]
.
attribute
427
if
not
enabled
then
428
enableaction
(
"
processors
"
,
"
languages.replacements.handler
"
)
429
if
trace_replacements
then
430
report_replacement
(
"
enabling replacement handler
"
)
431
end
432
enabled
=
true
433
end
434
end
435
texsetattribute
(
a_replacements
,
n
)
436
end
437 438
-- interface
439 440
implement
{
441
name
=
"
setreplacements
"
,
442
actions
=
replacements
.
set
,
443
arguments
=
"
string
"
444
}
445 446
implement
{
447
name
=
"
addreplacements
"
,
448
actions
=
replacements
.
add
,
449
arguments
=
"
3 strings
"
,
450
}
451 452
implement
{
453
name
=
"
addreplacementslist
"
,
454
actions
=
replacements
.
addlist
,
455
arguments
=
"
2 strings
"
,
456
}
457