lang-hyp.lua /size: 67 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lang-hyp
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to lang-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
-- In an automated workflow hypenation of long titles can be somewhat problematic
10
-- especially when demands conflict. For that reason I played a bit with a Lua based
11
-- variant of the traditional hyphenation machinery. This mechanism has been extended
12
-- several times in projects, of which a good description can be found in TUGboat,
13
-- Volume 27 (2006), No. 2 — Proceedings of EuroTEX2006: Automatic non-standard
14
-- hyphenation in OpenOffice.org by László Németh.
15
--
16
-- Being the result of two days experimenting the following implementation is probably
17
-- not completely okay yet. If there is demand I might add some more features and plugs.
18
-- The performance is quite okay but can probably improved a bit, although this is not
19
-- the most critital code. For instance, on a metafun manual run the overhead is about
20
-- 0.3 seconds on 19 seconds which is not that bad.
21
--
22
-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up
23
-- and extended the code a bit. It can be used in production.
24
--
25
-- . a l g o r i t h m .
26
-- 4l1g4
27
-- l g o3
28
-- 1g o
29
-- 2i t h
30
-- 4h1m
31
-- ---------------------
32
-- 4 1 4 3 2 0 4 1
33
-- a l-g o-r i t h-m
34 35
-- . a s s z o n n y a l .
36
-- s1s z/sz=sz,1,3
37
-- n1n y/ny=ny,1,3
38
-- -----------------------
39
-- 0 1 0 0 0 1 0 0 0/sz=sz,2,3,ny=ny,6,3
40
-- a s-s z o n-n y a l/sz=sz,2,3,ny=ny,6,3
41
--
42
-- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length
43
--
44
-- todo : support hjcodes (<32 == length) like luatex does now (no need/demand so far)
45
-- maybe : support hyphenation over range (can alsready be done using attributes/language)
46
-- maybe : reset dictionary.hyphenated when a pattern is added and/or forced reset option
47
-- todo : check subtypes (because they have subtle meanings in the line breaking)
48
--
49
-- word start (in tex engine):
50
--
51
-- boundary : yes when wordboundary
52
-- hlist : when hyphenationbounds 1 or 3
53
-- vlist : when hyphenationbounds 1 or 3
54
-- rule : when hyphenationbounds 1 or 3
55
-- dir : when hyphenationbounds 1 or 3
56
-- whatsit : when hyphenationbounds 1 or 3
57
-- glue : yes
58
-- math : skipped
59
-- glyph : exhyphenchar (one only) : yes (so no -- ---)
60
-- otherwise : yes
61
--
62
-- word end (in tex engine):
63
--
64
-- boundary : yes
65
-- glyph : yes when different language
66
-- glue : yes
67
-- penalty : yes
68
-- kern : yes when not italic (for some historic reason)
69
-- hlist : when hyphenationbounds 2 or 3
70
-- vlist : when hyphenationbounds 2 or 3
71
-- rule : when hyphenationbounds 2 or 3
72
-- dir : when hyphenationbounds 2 or 3
73
-- whatsit : when hyphenationbounds 2 or 3
74
-- ins : when hyphenationbounds 2 or 3
75
-- adjust : when hyphenationbounds 2 or 3
76 77
local
type
,
rawget
,
rawset
,
tonumber
,
next
=
type
,
rawget
,
rawset
,
tonumber
,
next
78 79
local
P
,
R
,
S
,
Cg
,
Cf
,
Ct
,
Cc
,
C
,
Carg
,
Cs
=
lpeg
.
P
,
lpeg
.
R
,
lpeg
.
S
,
lpeg
.
Cg
,
lpeg
.
Cf
,
lpeg
.
Ct
,
lpeg
.
Cc
,
lpeg
.
C
,
lpeg
.
Carg
,
lpeg
.
Cs
80
local
lpegmatch
=
lpeg
.
match
81 82
local
context
=
context
83 84
local
concat
=
table
.
concat
85
local
insert
=
table
.
insert
86
local
remove
=
table
.
remove
87
local
formatters
=
string
.
formatters
88
local
utfchar
=
utf
.
char
89
local
utfbyte
=
utf
.
byte
90 91
if
not
characters
then
92
require
(
"
char-ini
"
)
93
end
94 95
local
setmetatableindex
=
table
.
setmetatableindex
96 97
-- \enabletrackers[hyphenator.steps=silent] will not write to the terminal
98 99
local
trace_steps
=
false
trackers
.
register
(
"
hyphenator.steps
"
,
function
(
v
)
trace_steps
=
v
end
)
100
local
trace_visualize
=
false
trackers
.
register
(
"
hyphenator.visualize
"
,
function
(
v
)
trace_visualize
=
v
end
)
101 102
local
report
=
logs
.
reporter
(
"
hyphenator
"
)
103 104
local
implement
=
interfaces
and
interfaces
.
implement
or
function
(
)
end
105 106
languages
=
languages
or
{
}
107
local
hyphenators
=
languages
.
hyphenators
or
{
}
108
languages
.
hyphenators
=
hyphenators
109
local
traditional
=
hyphenators
.
traditional
or
{
}
110
hyphenators
.
traditional
=
traditional
111 112
local
dictionaries
=
setmetatableindex
(
function
(
t
,
k
)
113
local
v
=
{
114
patterns
=
{
}
,
115
hyphenated
=
{
}
,
116
specials
=
{
}
,
117
exceptions
=
{
}
,
118
loaded
=
false
,
119
}
120
t
[
k
]
=
v
121
return
v
122
end
)
123 124
hyphenators
.
dictionaries
=
dictionaries
125 126
local
character
=
lpeg
.
patterns
.
utf8character
127
local
digit
=
R
(
"
09
"
)
128
local
weight
=
digit
/
tonumber
+
Cc
(
0
)
129
local
fence
=
P
(
"
.
"
)
130
local
hyphen
=
P
(
"
-
"
)
131
local
space
=
P
(
"
"
)
132
local
char
=
character
-
space
133
local
validcharacter
=
(
character
-
S
(
"
./
"
)
)
134
local
keycharacter
=
character
-
S
(
"
/
"
)
135
----- basepart = Ct( (Cc(0) * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc(0))^-1)
136
local
specpart
=
(
P
(
"
/
"
)
*
Cf
(
Ct
(
"
"
)
*
137
Cg
(
Cc
(
"
before
"
)
*
C
(
(
1
-
P
(
"
=
"
)
)
^
1
)
*
P
(
"
=
"
)
)
*
138
Cg
(
Cc
(
"
after
"
)
*
C
(
(
1
-
P
(
"
,
"
)
)
^
1
)
)
*
139
(
P
(
"
,
"
)
*
140
Cg
(
Cc
(
"
start
"
)
*
(
(
1
-
P
(
"
,
"
)
)
^
1
/
tonumber
)
*
P
(
"
,
"
)
)
*
141
Cg
(
Cc
(
"
length
"
)
*
(
(
1
-
P
(
-1
)
)
^
1
/
tonumber
)
)
142
)
^
-1
143
,
rawset
)
)
^
-1
144 145
local
make_hashkey_p
=
Cs
(
(
digit
/
"
"
+
keycharacter
)
^
1
)
146
----- make_pattern_p = basepart * specpart
147
local
make_hashkey_e
=
Cs
(
(
hyphen
/
"
"
+
keycharacter
)
^
1
)
148
local
make_pattern_e
=
Ct
(
P
(
char
)
*
(
hyphen
*
Cc
(
true
)
*
P
(
char
)
+
P
(
char
)
*
Cc
(
false
)
)
^
1
)
-- catch . and char after -
149 150
-- local make_hashkey_c = Cs((digit + keycharacter/"")^1)
151
-- local make_pattern_c = Ct((P(1)/tonumber)^1)
152 153
-- local cache = setmetatableindex(function(t,k)
154
-- local n = lpegmatch(make_hashkey_c,k)
155
-- local v = lpegmatch(make_pattern_c,n)
156
-- t[k] = v
157
-- return v
158
-- end)
159
--
160
-- local weight_n = digit + Cc("0")
161
-- local basepart_n = Cs( (Cc("0") * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc("0"))^-1) / cache
162
-- local make_pattern_n = basepart_n * specpart
163 164
local
make_pattern_c
=
Ct
(
(
P
(
1
)
/
tonumber
)
^
1
)
165 166
-- us + nl: 17664 entries -> 827 unique (saves some 3M)
167 168
local
cache
=
setmetatableindex
(
function
(
t
,
k
)
169
local
v
=
lpegmatch
(
make_pattern_c
,
k
)
170
t
[
k
]
=
v
171
return
v
172
end
)
173 174
local
weight_n
=
digit
+
Cc
(
"
0
"
)
175
local
fence_n
=
fence
/
"
0
"
176
local
char_n
=
validcharacter
/
"
"
177
local
basepart_n
=
Cs
(
fence_n
^
-1
*
(
weight_n
*
char_n
)
^
1
*
weight_n
*
fence_n
^
-1
)
/
cache
178
local
make_pattern_n
=
basepart_n
*
specpart
179 180
local
function
register_pattern
(
patterns
,
specials
,
str
,
specification
)
181
local
k
=
lpegmatch
(
make_hashkey_p
,
str
)
182
-- local v1, v2 = lpegmatch(make_pattern_p,str)
183
local
v1
,
v2
=
lpegmatch
(
make_pattern_n
,
str
)
184
patterns
[
k
]
=
v1
-- is this key still ok for complex patterns
185
if
specification
then
186
specials
[
k
]
=
specification
187
elseif
v2
then
188
specials
[
k
]
=
v2
189
end
190
end
191 192
local
function
unregister_pattern
(
patterns
,
specials
,
str
)
193
local
k
=
lpegmatch
(
make_hashkey_p
,
str
)
194
patterns
[
k
]
=
nil
195
specials
[
k
]
=
nil
196
end
197 198
local
p_lower
=
lpeg
.
patterns
.
utf8lower
199 200
local
function
register_exception
(
exceptions
,
str
,
specification
)
201
local
l
=
lpegmatch
(
p_lower
,
str
)
202
local
k
=
lpegmatch
(
make_hashkey_e
,
l
)
203
local
v
=
lpegmatch
(
make_pattern_e
,
l
)
204
exceptions
[
k
]
=
v
205
end
206 207
local
p_pattern
=
(
(
Carg
(
1
)
*
Carg
(
2
)
*
C
(
char
^
1
)
)
/
register_pattern
+
1
)
^
1
208
local
p_exception
=
(
(
Carg
(
1
)
*
C
(
char
^
1
)
)
/
register_exception
+
1
)
^
1
209
local
p_split
=
Ct
(
C
(
character
)
^
1
)
210 211
function
traditional
.
loadpatterns
(
language
,
filename
)
212
local
dictionary
=
dictionaries
[
language
]
213
if
not
dictionary
.
loaded
then
214
if
not
filename
or
filename
=
=
"
"
then
215
filename
=
"
lang-
"
.
.
language
216
end
217
filename
=
file
.
addsuffix
(
filename
,
"
lua
"
)
218
local
fullname
=
resolvers
.
findfile
(
filename
)
219
if
fullname
and
fullname
~
=
"
"
then
220
local
specification
=
dofile
(
fullname
)
221
if
specification
then
222
local
patterns
=
specification
.
patterns
223
if
patterns
then
224
local
data
=
patterns
.
data
225
if
data
and
data
~
=
"
"
then
226
lpegmatch
(
p_pattern
,
data
,
1
,
dictionary
.
patterns
,
dictionary
.
specials
)
227
end
228
end
229
local
exceptions
=
specification
.
exceptions
230
if
exceptions
then
231
local
data
=
exceptions
.
data
232
if
data
and
data
~
=
"
"
then
233
lpegmatch
(
p_exception
,
data
,
1
,
dictionary
.
exceptions
)
234
end
235
end
236
end
237
end
238
dictionary
.
loaded
=
true
239
end
240
return
dictionary
241
end
242 243
local
lcchars
=
characters
.
lcchars
244
local
uccodes
=
characters
.
uccodes
245
local
categories
=
characters
.
categories
246
local
nofwords
=
0
247
local
nofhashed
=
0
248 249
local
steps
=
nil
250
local
f_show
=
formatters
[
"
%w%s
"
]
251 252
local
function
show_log
(
)
253
if
trace_steps
=
=
true
then
254
report
(
)
255
local
w
=
#
steps
[
1
]
[
1
]
256
for
i
=
1
,
#
steps
do
257
local
s
=
steps
[
i
]
258
report
(
"
%s%w%S %S
"
,
s
[
1
]
,
w
-
#
s
[
1
]
+
3
,
s
[
2
]
,
s
[
3
]
or
"
"
)
259
end
260
report
(
)
261
end
262
end
263 264
local
function
show_1
(
wsplit
)
265
local
u
=
concat
(
wsplit
,
"
"
)
266
steps
=
{
{
f_show
(
0
,
u
)
,
f_show
(
0
,
u
)
}
}
267
end
268 269
local
function
show_2
(
c
,
m
,
wsplit
,
done
,
i
,
spec
)
270
local
s
=
lpegmatch
(
p_split
,
c
)
271
local
t
=
{
}
272
local
n
=
#
m
273
local
w
=
#
wsplit
274
for
j
=
1
,
n
do
275
t
[
#
t
+
1
]
=
m
[
j
]
276
t
[
#
t
+
1
]
=
s
[
j
]
277
end
278
local
m
=
2
*
i
-2
279
local
l
=
#
t
280
local
s
=
spec
and
table
.
sequenced
(
spec
)
or
"
"
281
if
m
=
=
0
then
282
steps
[
#
steps
+
1
]
=
{
f_show
(
m
,
concat
(
t
,
"
"
,
2
)
)
,
f_show
(
1
,
concat
(
done
,
"
"
,
2
,
#
done
)
,
s
)
}
283
elseif
i
+
1
=
=
w
then
284
steps
[
#
steps
+
1
]
=
{
f_show
(
m
-1
,
concat
(
t
,
"
"
,
1
,
#
t
-1
)
)
,
f_show
(
1
,
concat
(
done
,
"
"
,
2
,
#
done
)
,
s
)
}
285
else
286
steps
[
#
steps
+
1
]
=
{
f_show
(
m
-1
,
concat
(
t
)
)
,
f_show
(
1
,
concat
(
done
,
"
"
,
2
,
#
done
)
,
s
)
}
287
end
288
end
289 290
local
function
show_3
(
wsplit
,
done
)
291
local
t
=
{
}
292
local
h
=
{
}
293
local
n
=
#
wsplit
294
for
i
=
1
,
n
do
295
local
w
=
wsplit
[
i
]
296
if
i
>
1
then
297
local
d
=
done
[
i
]
298
t
[
#
t
+
1
]
=
i
>
2
and
d
%
2
=
=
1
and
"
-
"
or
"
"
299
h
[
#
h
+
1
]
=
d
300
end
301
t
[
#
t
+
1
]
=
w
302
h
[
#
h
+
1
]
=
w
303
end
304
steps
[
#
steps
+
1
]
=
{
f_show
(
0
,
concat
(
h
)
)
,
f_show
(
0
,
concat
(
t
)
)
}
305
show_log
(
)
306
end
307 308
local
function
show_4
(
wsplit
,
done
)
309
steps
=
{
{
concat
(
wsplit
,
"
"
)
}
}
310
show_log
(
)
311
end
312 313
function
traditional
.
lasttrace
(
)
314
return
steps
315
end
316 317
-- We could reuse the w table but as we cache the resolved words there is not much gain in
318
-- that complication.
319
--
320
-- Beware: word can be a table and when n is passed to we can assume reuse so we need to
321
-- honor that n then.
322
--
323
-- todo: a fast variant for tex ... less lookups (we could check is dictionary has changed)
324
-- ... although due to caching the already done words, we don't do much here
325 326
local
function
hyphenate
(
dictionary
,
word
,
n
)
-- odd is okay
327
nofwords
=
nofwords
+
1
328
local
hyphenated
=
dictionary
.
hyphenated
329
local
isstring
=
type
(
word
)
=
=
"
string
"
330
if
isstring
then
331
local
done
=
hyphenated
[
word
]
332
if
done
~
=
nil
then
333
return
done
334
end
335
elseif
n
then
336
local
done
=
hyphenated
[
concat
(
word
,
"
"
,
1
,
n
)
]
337
if
done
~
=
nil
then
338
return
done
339
end
340
else
341
local
done
=
hyphenated
[
concat
(
word
)
]
342
if
done
~
=
nil
then
343
return
done
344
end
345
end
346
local
key
347
if
isstring
then
348
key
=
word
349
word
=
lpegmatch
(
p_split
,
word
)
350
if
not
n
then
351
n
=
#
word
352
end
353
else
354
if
not
n
then
355
n
=
#
word
356
end
357
key
=
concat
(
word
,
"
"
,
1
,
n
)
358
end
359
local
l
=
1
360
local
w
=
{
"
.
"
}
361
-- local d = dictionary.codehash
362
for
i
=
1
,
n
do
363
local
c
=
word
[
i
]
364
-- l = l + (d[c] or 1)
365
l
=
l
+
1
366
w
[
l
]
=
lcchars
[
c
]
or
c
367
end
368
l
=
l
+
1
369
w
[
l
]
=
"
.
"
370
local
c
=
concat
(
w
,
"
"
,
2
,
l
-1
)
371
--
372
local
done
=
hyphenated
[
c
]
373
if
done
~
=
nil
then
374
hyphenated
[
key
]
=
done
375
nofhashed
=
nofhashed
+
1
376
return
done
377
end
378
--
379
local
exceptions
=
dictionary
.
exceptions
380
local
exception
=
exceptions
[
c
]
381
if
exception
then
382
if
trace_steps
then
383
show_4
(
w
,
exception
)
384
end
385
hyphenated
[
key
]
=
exception
386
nofhashed
=
nofhashed
+
1
387
return
exception
388
end
389
--
390
if
trace_steps
then
391
show_1
(
w
)
392
end
393
--
394
local
specials
=
dictionary
.
specials
395
local
patterns
=
dictionary
.
patterns
396
--
397
local
spec
398
for
i
=
1
,
l
do
399
for
j
=
i
,
l
do
400
local
c
=
concat
(
w
,
"
"
,
i
,
j
)
401
local
m
=
patterns
[
c
]
402
if
m
then
403
local
s
=
specials
[
c
]
404
if
not
done
then
405
done
=
{
}
406
spec
=
nil
407
-- the string that we resolve has explicit fences (.) so done starts at
408
-- the first fence and runs upto the last one so we need one slot less
409
for
i
=
1
,
l
do
410
done
[
i
]
=
0
411
end
412
end
413
-- we run over the pattern that always has a (zero) value for each character
414
-- plus one more as we look at both sides
415
for
k
=
1
,
#
m
do
416
local
new
=
m
[
k
]
417
if
not
new
then
418
break
419
elseif
new
=
=
true
then
420
report
(
"
fatal error
"
)
421
break
422
elseif
new
>
0
then
423
local
pos
=
i
+
k
-
1
424
local
old
=
done
[
pos
]
425
if
not
old
then
426
-- break ?
427
elseif
new
>
old
then
428
done
[
pos
]
=
new
429
if
s
then
430
local
b
=
i
+
(
s
.
start
or
1
)
-
1
431
if
b
>
0
then
432
local
e
=
b
+
(
s
.
length
or
2
)
-
1
433
if
e
>
0
then
434
if
pos
>
=
b
and
pos
<
=
e
then
435
if
spec
then
436
spec
[
pos
]
=
{
s
,
k
-
1
}
437
else
438
spec
=
{
[
pos
]
=
{
s
,
k
-
1
}
}
439
end
440
end
441
end
442
end
443
end
444
end
445
end
446
end
447
if
trace_steps
and
done
then
448
show_2
(
c
,
m
,
w
,
done
,
i
,
s
)
449
end
450
end
451
end
452
end
453
if
trace_steps
and
done
then
454
show_3
(
w
,
done
)
455
end
456
if
done
then
457
local
okay
=
false
458
for
i
=
3
,
#
done
do
459
if
done
[
i
]
%
2
=
=
1
then
460
done
[
i
-2
]
=
spec
and
spec
[
i
]
or
true
461
okay
=
true
462
else
463
done
[
i
-2
]
=
false
464
end
465
end
466
if
okay
then
467
done
[
#
done
]
=
nil
468
done
[
#
done
]
=
nil
469
else
470
done
=
false
471
end
472
else
473
done
=
false
474
end
475
hyphenated
[
key
]
=
done
476
nofhashed
=
nofhashed
+
1
477
return
done
478
end
479 480
function
traditional
.
gettrace
(
language
,
word
)
481
if
not
word
or
word
=
=
"
"
then
482
return
483
end
484
local
dictionary
=
dictionaries
[
language
]
485
if
dictionary
then
486
local
hyphenated
=
dictionary
.
hyphenated
487
hyphenated
[
word
]
=
nil
488
hyphenate
(
dictionary
,
word
)
489
return
steps
490
end
491
end
492 493
local
methods
=
setmetatableindex
(
function
(
t
,
k
)
local
v
=
hyphenate
t
[
k
]
=
v
return
v
end
)
494 495
function
traditional
.
installmethod
(
name
,
f
)
496
if
rawget
(
methods
,
name
)
then
497
report
(
"
overloading %a is not permitted
"
,
name
)
498
else
499
methods
[
name
]
=
f
500
end
501
end
502 503
local
s_detail_1
=
"
-
"
504
local
f_detail_2
=
formatters
[
"
%s-%s
"
]
505
local
f_detail_3
=
formatters
[
"
{%s}{%s}{}
"
]
506
local
f_detail_4
=
formatters
[
"
{%s%s}{%s%s}{%s}
"
]
507 508
function
traditional
.
injecthyphens
(
dictionary
,
word
,
specification
)
509
if
not
word
then
510
return
false
511
end
512
if
not
specification
then
513
return
word
514
end
515
local
hyphens
=
hyphenate
(
dictionary
,
word
)
516
if
not
hyphens
then
517
return
word
518
end
519 520
-- the following code is similar to code later on but here we have strings while there
521
-- we have hyphen specs
522 523
local
word
=
lpegmatch
(
p_split
,
word
)
524
local
size
=
#
word
525 526
local
leftmin
=
specification
.
leftcharmin
or
2
527
local
rightmin
=
size
-
(
specification
.
rightcharmin
or
leftmin
)
528
local
leftchar
=
specification
.
leftchar
529
local
rightchar
=
specification
.
rightchar
530 531
local
result
=
{
}
532
local
rsize
=
0
533
local
position
=
1
534 535
while
position
<
=
size
do
536
if
position
>
=
leftmin
and
position
<
=
rightmin
then
537
local
hyphen
=
hyphens
[
position
]
538
if
not
hyphen
then
539
rsize
=
rsize
+
1
540
result
[
rsize
]
=
word
[
position
]
541
position
=
position
+
1
542
elseif
hyphen
=
=
true
then
543
rsize
=
rsize
+
1
544
result
[
rsize
]
=
word
[
position
]
545
rsize
=
rsize
+
1
546
if
leftchar
and
rightchar
then
547
result
[
rsize
]
=
f_detail_3
(
rightchar
,
leftchar
)
548
else
549
result
[
rsize
]
=
s_detail_1
550
end
551
position
=
position
+
1
552
else
553
local
o
,
h
=
hyphen
[
2
]
554
if
o
then
555
h
=
hyphen
[
1
]
556
else
557
h
=
hyphen
558
o
=
1
559
end
560
local
b
=
position
-
o
+
(
h
.
start
or
1
)
561
local
e
=
b
+
(
h
.
length
or
2
)
-
1
562
if
b
>
0
and
e
>
=
b
then
563
for
i
=
1
,
b
-
position
do
564
rsize
=
rsize
+
1
565
result
[
rsize
]
=
word
[
position
]
566
position
=
position
+
1
567
end
568
rsize
=
rsize
+
1
569
if
leftchar
and
rightchar
then
570
result
[
rsize
]
=
f_detail_4
(
h
.
before
,
rightchar
,
leftchar
,
h
.
after
,
concat
(
word
,
"
"
,
b
,
e
)
)
571
else
572
result
[
rsize
]
=
f_detail_2
(
h
.
before
,
h
.
after
)
573
end
574
position
=
e
+
1
575
else
576
-- error
577
rsize
=
rsize
+
1
578
result
[
rsize
]
=
word
[
position
]
579
position
=
position
+
1
580
end
581
end
582
else
583
rsize
=
rsize
+
1
584
result
[
rsize
]
=
word
[
position
]
585
position
=
position
+
1
586
end
587
end
588
return
concat
(
result
)
589
end
590 591
do
592 593
local
word
=
C
(
(
1
-
space
)
^
1
)
594
local
spaces
=
space
^
1
595 596
local
u_pattern
=
(
Carg
(
1
)
*
Carg
(
2
)
*
word
/
unregister_pattern
+
spaces
)
^
1
597
local
r_pattern
=
(
Carg
(
1
)
*
Carg
(
2
)
*
word
*
Carg
(
3
)
/
register_pattern
+
spaces
)
^
1
598
local
e_pattern
=
(
Carg
(
1
)
*
word
/
register_exception
+
spaces
)
^
1
599 600
function
traditional
.
registerpattern
(
language
,
str
,
specification
)
601
local
dictionary
=
dictionaries
[
language
]
602
if
specification
=
=
false
then
603
lpegmatch
(
u_pattern
,
str
,
1
,
dictionary
.
patterns
,
dictionary
.
specials
)
604
-- unregister_pattern(dictionary.patterns,dictionary.specials,str)
605
else
606
lpegmatch
(
r_pattern
,
str
,
1
,
dictionary
.
patterns
,
dictionary
.
specials
,
type
(
specification
)
=
=
"
table
"
and
specification
or
false
)
607
-- register_pattern(dictionary.patterns,dictionary.specials,str,specification)
608
end
609
end
610 611
function
traditional
.
registerexception
(
language
,
str
)
612
lpegmatch
(
e_pattern
,
str
,
1
,
dictionaries
[
language
]
.
exceptions
)
613
end
614 615
end
616 617
-- todo: unicodes or utfhash ?
618 619
if
context
then
620 621
local
nodecodes
=
nodes
.
nodecodes
622
local
disccodes
=
nodes
.
disccodes
623 624
local
glyph_code
=
nodecodes
.
glyph
625
local
disc_code
=
nodecodes
.
disc
626
local
math_code
=
nodecodes
.
math
627
local
hlist_code
=
nodecodes
.
hlist
628 629
local
automaticdisc_code
=
disccodes
.
automatic
630
local
regulardisc_code
=
disccodes
.
regular
631 632
local
nuts
=
nodes
.
nuts
633
local
tonode
=
nodes
.
tonode
634
local
nodepool
=
nuts
.
pool
635 636
local
new_disc
=
nodepool
.
disc
637
local
new_penalty
=
nodepool
.
penalty
638 639
local
getfield
=
nuts
.
getfield
640
local
getfont
=
nuts
.
getfont
641
local
getid
=
nuts
.
getid
642
local
getattr
=
nuts
.
getattr
643
local
getnext
=
nuts
.
getnext
644
local
getprev
=
nuts
.
getprev
645
local
getsubtype
=
nuts
.
getsubtype
646
local
getlist
=
nuts
.
getlist
647
local
getlang
=
nuts
.
getlang
648
local
getattrlist
=
nuts
.
getattrlist
649
local
setattrlist
=
nuts
.
setattrlist
650
local
isglyph
=
nuts
.
isglyph
651
local
ischar
=
nuts
.
ischar
652 653
local
setchar
=
nuts
.
setchar
654
local
setdisc
=
nuts
.
setdisc
655
local
setlink
=
nuts
.
setlink
656
local
setprev
=
nuts
.
setprev
657
local
setnext
=
nuts
.
setnext
658 659
local
insert_before
=
nuts
.
insert_before
660
local
insert_after
=
nuts
.
insert_after
661
local
copy_node
=
nuts
.
copy
662
local
copy_list
=
nuts
.
copy_list
663
local
remove_node
=
nuts
.
remove
664
local
end_of_math
=
nuts
.
end_of_math
665
local
node_tail
=
nuts
.
tail
666 667
local
nexthlist
=
nuts
.
traversers
.
hlist
668
local
nextdisc
=
nuts
.
traversers
.
disc
669 670
local
setcolor
=
nodes
.
tracers
.
colors
.
set
671 672
local
variables
=
interfaces
.
variables
673
local
v_reset
=
variables
.
reset
674
local
v_yes
=
variables
.
yes
675
local
v_word
=
variables
.
word
676
local
v_all
=
variables
.
all
677 678
local
settings_to_array
=
utilities
.
parsers
.
settings_to_array
679 680
local
unsetvalue
=
attributes
.
unsetvalue
681
local
texsetattribute
=
tex
.
setattribute
682 683
local
prehyphenchar
=
lang
.
prehyphenchar
684
local
posthyphenchar
=
lang
.
posthyphenchar
685
local
preexhyphenchar
=
lang
.
preexhyphenchar
686
local
postexhyphenchar
=
lang
.
postexhyphenchar
687 688
local
a_hyphenation
=
attributes
.
private
(
"
hyphenation
"
)
689 690
local
interwordpenalty
=
5000
691 692
function
traditional
.
loadpatterns
(
language
)
693
return
dictionaries
[
language
]
694
end
695 696
-- for the moment we use an independent data structure
697 698
setmetatableindex
(
dictionaries
,
function
(
t
,
k
)
699
if
type
(
k
)
=
=
"
string
"
then
700
-- this will force a load if not yet loaded (we need a nicer way) for the moment
701
-- that will do (nneeded for examples that register a pattern specification
702
languages
.
getnumber
(
k
)
703
end
704
local
specification
=
languages
.
getdata
(
k
)
705
local
dictionary
=
{
706
patterns
=
{
}
,
707
exceptions
=
{
}
,
708
hyphenated
=
{
}
,
709
specials
=
{
}
,
710
instance
=
false
,
711
characters
=
{
}
,
712
unicodes
=
{
}
,
713
}
714
if
specification
then
715
local
resources
=
specification
.
resources
716
if
resources
then
717
local
characters
=
dictionary
.
characters
or
{
}
718
local
unicodes
=
dictionary
.
unicodes
or
{
}
719
for
i
=
1
,
#
resources
do
720
local
r
=
resources
[
i
]
721
if
not
r
.
in_dictionary
then
722
r
.
in_dictionary
=
true
723
local
patterns
=
r
.
patterns
724
if
patterns
then
725
local
data
=
patterns
.
data
726
if
data
then
727
-- regular patterns
728
lpegmatch
(
p_pattern
,
data
,
1
,
dictionary
.
patterns
,
dictionary
.
specials
)
729
end
730
local
extra
=
patterns
.
extra
731
if
extra
then
732
-- special patterns
733
lpegmatch
(
p_pattern
,
extra
,
1
,
dictionary
.
patterns
,
dictionary
.
specials
)
734
end
735
end
736
local
exceptions
=
r
.
exceptions
737
if
exceptions
then
738
local
data
=
exceptions
.
data
739
if
data
and
data
~
=
"
"
then
740
lpegmatch
(
p_exception
,
data
,
1
,
dictionary
.
exceptions
)
741
end
742
end
743
local
usedchars
=
lpegmatch
(
p_split
,
patterns
.
characters
)
744
for
i
=
1
,
#
usedchars
do
745
local
char
=
usedchars
[
i
]
746
local
code
=
utfbyte
(
char
)
747
local
upper
=
uccodes
[
code
]
748
characters
[
char
]
=
code
749
unicodes
[
code
]
=
char
750
if
type
(
upper
)
=
=
"
table
"
then
751
for
i
=
1
,
#
upper
do
752
local
u
=
upper
[
i
]
753
unicodes
[
u
]
=
utfchar
(
u
)
754
end
755
else
756
unicodes
[
upper
]
=
utfchar
(
upper
)
757
end
758
end
759
end
760
end
761
dictionary
.
characters
=
characters
762
dictionary
.
unicodes
=
unicodes
763
setmetatableindex
(
characters
,
function
(
t
,
k
)
local
v
=
k
and
utfbyte
(
k
)
t
[
k
]
=
v
return
v
end
)
764
end
765
t
[
specification
.
number
]
=
dictionary
766
dictionary
.
instance
=
specification
.
instance
-- needed for hyphenchars
767
end
768
t
[
k
]
=
dictionary
769
return
dictionary
770
end
)
771 772
-- Beware: left and right min doesn't mean that in a 1 mmm hsize there can be snippets
773
-- with less characters than either of them! This could be an option but such a narrow
774
-- hsize doesn't make sense anyway.
775 776
-- We assume that featuresets are defined global ... local definitions (also mid paragraph)
777
-- make not much sense anyway. For the moment we assume no predefined sets so we don't need
778
-- to store them. Nor do we need to hash them in order to save space ... no sane user will
779
-- define many of them.
780 781
local
featuresets
=
hyphenators
.
featuresets
or
{
}
782
hyphenators
.
featuresets
=
featuresets
783 784
storage
.
shared
.
noflanguagesfeaturesets
=
storage
.
shared
.
noflanguagesfeaturesets
or
0
785 786
local
noffeaturesets
=
storage
.
shared
.
noflanguagesfeaturesets
787 788
storage
.
register
(
"
languages/hyphenators/featuresets
"
,
featuresets
,
"
languages.hyphenators.featuresets
"
)
789 790
----- hash = table.sequenced(featureset,",") -- no need now
791 792
local
function
register
(
name
,
featureset
)
793
noffeaturesets
=
noffeaturesets
+
1
794
featureset
.
attribute
=
noffeaturesets
795
featuresets
[
noffeaturesets
]
=
featureset
-- access by attribute
796
featuresets
[
name
]
=
featureset
-- access by name
797
storage
.
shared
.
noflanguagesfeaturesets
=
noffeaturesets
798
return
noffeaturesets
799
end
800 801
local
function
makeset
(
...
)
802
-- a bit overkill, supporting variants but who cares
803
local
set
=
{
}
804
for
i
=
1
,
select
(
"
#
"
,
...
)
do
805
local
list
=
select
(
i
,
...
)
806
local
kind
=
type
(
list
)
807
local
used
=
nil
808
if
kind
=
=
"
string
"
then
809
if
list
=
=
v_all
then
810
-- not ok ... now all get ignored
811
return
setmetatableindex
(
function
(
t
,
k
)
local
v
=
utfchar
(
k
)
t
[
k
]
=
v
return
v
end
)
812
elseif
list
~
=
"
"
then
813
used
=
lpegmatch
(
p_split
,
list
)
814
set
=
set
or
{
}
815
for
i
=
1
,
#
used
do
816
local
char
=
used
[
i
]
817
set
[
utfbyte
(
char
)
]
=
char
818
end
819
end
820
elseif
kind
=
=
"
table
"
then
821
if
next
(
list
)
then
822
set
=
set
or
{
}
823
for
byte
,
char
in
next
,
list
do
824
set
[
byte
]
=
char
=
=
true
and
utfchar
(
byte
)
or
char
825
end
826
elseif
#
list
>
0
then
827
set
=
set
or
{
}
828
for
i
=
1
,
#
list
do
829
local
l
=
list
[
i
]
830
if
type
(
l
)
=
=
"
number
"
then
831
set
[
l
]
=
utfchar
(
l
)
832
else
833
set
[
utfbyte
(
l
)
]
=
l
834
end
835
end
836
end
837
end
838
end
839
return
set
840
end
841 842
-- category pd (tex also sees --- and -- as hyphens but do we really want that
843 844
local
defaulthyphens
=
{
845
[
0x002D
]
=
true
,
-- HYPHEN-MINUS
846
[
0x00AD
]
=
0x002D
,
-- SOFT HYPHEN (active in ConTeXt)
847
-- [0x058A] = true, -- ARMENIAN HYPHEN
848
-- [0x1400] = true, -- CANADIAN SYLLABICS HYPHEN
849
-- [0x1806] = true, -- MONGOLIAN TODO SOFT HYPHEN
850
[
0x2010
]
=
true
,
-- HYPHEN
851
-- [0x2011] = true, -- NON-BREAKING HYPHEN
852
-- [0x2012] = true, -- FIGURE DASH
853
[
0x2013
]
=
true
,
-- EN DASH
854
[
0x2014
]
=
true
,
-- EM DASH
855
-- [0x2015] = true, -- HORIZONTAL BAR
856
-- [0x2027] = true, -- HYPHENATION POINT
857
-- [0x2E17] = true, -- DOUBLE OBLIQUE HYPHEN
858
-- [0x2E1A] = true, -- HYPHEN WITH DIAERESIS
859
-- [0x2E3A] = true, -- TWO-EM DASH
860
-- [0x2E3B] = true, -- THREE-EM DASH
861
-- [0x2E40] = true, -- DOUBLE HYPHEN
862
-- [0x301C] = true, -- WAVE DASH
863
-- [0x3030] = true, -- WAVY DASH
864
-- [0x30A0] = true, -- KATAKANA-HIRAGANA DOUBLE HYPHEN
865
-- [0xFE31] = true, -- PRESENTATION FORM FOR VERTICAL EM DASH
866
-- [0xFE32] = true, -- PRESENTATION FORM FOR VERTICAL EN DASH
867
-- [0xFE58] = true, -- SMALL EM DASH
868
-- [0xFE63] = true, -- SMALL HYPHEN-MINUS
869
-- [0xFF0D] = true, -- FULLWIDTH HYPHEN-MINUS
870
}
871 872
local
defaultjoiners
=
{
873
[
0x200C
]
=
true
,
-- nzwj
874
[
0x200D
]
=
true
,
-- zwj
875
}
876 877
local
function
somehyphenchar
(
c
)
878
c
=
tonumber
(
c
)
879
return
c
~
=
0
and
c
or
nil
880
end
881 882
local
function
definefeatures
(
name
,
featureset
)
883
local
extrachars
=
featureset
.
characters
-- "[]()"
884
local
hyphenchars
=
featureset
.
hyphens
885
local
joinerchars
=
featureset
.
joiners
886
local
alternative
=
featureset
.
alternative
887
local
rightwordmin
=
tonumber
(
featureset
.
rightwordmin
)
888
local
charmin
=
tonumber
(
featureset
.
charmin
)
-- luatex now also has hyphenationmin
889
local
leftcharmin
=
tonumber
(
featureset
.
leftcharmin
)
890
local
rightcharmin
=
tonumber
(
featureset
.
rightcharmin
)
891
local
leftchar
=
somehyphenchar
(
featureset
.
leftchar
)
892
local
rightchar
=
somehyphenchar
(
featureset
.
rightchar
)
893
local
rightchars
=
featureset
.
rightchars
894
local
rightedge
=
featureset
.
rightedge
895
local
autohyphen
=
v_yes
-- featureset.autohyphen -- insert disc
896
local
hyphenonly
=
v_yes
-- featureset.hyphenonly -- don't hyphenate around
897
rightchars
=
rightchars
=
=
v_word
and
true
or
tonumber
(
rightchars
)
898
joinerchars
=
joinerchars
=
=
v_yes
and
defaultjoiners
or
joinerchars
-- table
899
hyphenchars
=
hyphenchars
=
=
v_yes
and
defaulthyphens
or
hyphenchars
-- table
900
-- not yet ok: extrachars have to be ignored so it cannot be all)
901
featureset
.
extrachars
=
makeset
(
joinerchars
or
"
"
,
extrachars
or
"
"
)
902
featureset
.
hyphenchars
=
makeset
(
hyphenchars
or
"
"
)
903
featureset
.
alternative
=
alternative
or
"
hyphenate
"
904
featureset
.
rightwordmin
=
rightwordmin
and
rightwordmin
>
0
and
rightwordmin
or
nil
905
featureset
.
charmin
=
charmin
and
charmin
>
0
and
charmin
or
nil
906
featureset
.
leftcharmin
=
leftcharmin
and
leftcharmin
>
0
and
leftcharmin
or
nil
907
featureset
.
rightcharmin
=
rightcharmin
and
rightcharmin
>
0
and
rightcharmin
or
nil
908
featureset
.
rightchars
=
rightchars
909
featureset
.
leftchar
=
leftchar
910
featureset
.
rightchar
=
rightchar
911
-- featureset.strict = rightedge == "tex"
912
featureset
.
autohyphen
=
autohyphen
=
=
v_yes
913
featureset
.
hyphenonly
=
hyphenonly
=
=
v_yes
914
return
register
(
name
,
featureset
)
915
end
916 917
local
function
setfeatures
(
n
)
918
if
not
n
or
n
=
=
v_reset
then
919
n
=
false
920
else
921
local
f
=
featuresets
[
n
]
922
if
not
f
and
type
(
n
)
=
=
"
string
"
then
923
local
t
=
settings_to_array
(
n
)
924
local
s
=
{
}
925
for
i
=
1
,
#
t
do
926
local
ti
=
t
[
i
]
927
local
fs
=
featuresets
[
ti
]
928
if
fs
then
929
for
k
,
v
in
next
,
fs
do
930
s
[
k
]
=
v
931
end
932
end
933
end
934
n
=
register
(
n
,
s
)
935
else
936
n
=
f
and
f
.
attribute
937
end
938
end
939
texsetattribute
(
a_hyphenation
,
n
or
unsetvalue
)
940
end
941 942
traditional
.
definefeatures
=
definefeatures
943
traditional
.
setfeatures
=
setfeatures
944 945
implement
{
946
name
=
"
definehyphenationfeatures
"
,
947
actions
=
definefeatures
,
948
arguments
=
{
949
"
string
"
,
950
{
951
{
"
characters
"
}
,
952
{
"
hyphens
"
}
,
953
{
"
joiners
"
}
,
954
{
"
rightchars
"
}
,
955
{
"
rightwordmin
"
,
"
integer
"
}
,
956
{
"
charmin
"
,
"
integer
"
}
,
957
{
"
leftcharmin
"
,
"
integer
"
}
,
958
{
"
rightcharmin
"
,
"
integer
"
}
,
959
{
"
leftchar
"
,
"
integer
"
}
,
960
{
"
rightchar
"
,
"
integer
"
}
,
961
{
"
alternative
"
}
,
962
{
"
rightedge
"
}
,
963
}
964
}
965
}
966 967
implement
{
968
name
=
"
sethyphenationfeatures
"
,
969
actions
=
setfeatures
,
970
arguments
=
"
string
"
971
}
972 973
implement
{
974
name
=
"
registerhyphenationpattern
"
,
975
actions
=
traditional
.
registerpattern
,
976
arguments
=
{
"
string
"
,
"
string
"
,
"
boolean
"
}
977
}
978 979
implement
{
980
name
=
"
registerhyphenationexception
"
,
981
actions
=
traditional
.
registerexception
,
982
arguments
=
"
2 strings
"
,
983
}
984 985
-- This is a relative large function with local variables and local functions. A previous
986
-- implementation had the functions outside but this is cleaner and as efficient. The test
987
-- runs 100 times over tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower
988
-- and uppercase with a 1mm hsize.
989
--
990
-- language=0 language>0 4 | 3 * slower
991
--
992
-- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15
993
-- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46
994
--
995
-- Of course we have extra overhead (virtual Lua machine) but also we check attributes and
996
-- support specific local options). The test puts the typeset text in boxes and discards
997
-- it. If we also flush the runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative
998
-- difference is (somehow) smaller. The test has 536 pages. There is a little bit of extra
999
-- overhead because we store the patterns in a different way.
1000
--
1001
-- As usual I will look for speedups. Some 0.01 seconds could be gained by sharing patterns
1002
-- which is not impressive but it does save some 3M memory on this test. (Some optimizations
1003
-- already brought the 3.30 seconds down to 3.14 but it all depends on aggressive caching.)
1004 1005
-- As we kick in the hyphenator before fonts get handled, we don't look at implicit (font)
1006
-- kerns or ligatures.
1007 1008
local
starttiming
=
statistics
.
starttiming
1009
local
stoptiming
=
statistics
.
stoptiming
1010 1011
-- local strictids = {
1012
-- [nodecodes.hlist] = true,
1013
-- [nodecodes.vlist] = true,
1014
-- [nodecodes.rule] = true,
1015
-- [nodecodes.dir] = true,
1016
-- [nodecodes.whatsit] = true,
1017
-- [nodecodes.ins] = true,
1018
-- [nodecodes.adjust] = true,
1019
--
1020
-- [nodecodes.math] = true,
1021
-- [nodecodes.disc] = true,
1022
--
1023
-- [nodecodes.accent] = true, -- never used in context
1024
-- }
1025 1026
-- a lot of overhead when only one char
1027 1028
function
traditional
.
hyphenate
(
head
)
1029 1030
local
first
=
head
1031
local
tail
=
nil
1032
local
last
=
nil
1033
local
current
=
first
1034
local
dictionary
=
nil
1035
local
instance
=
nil
1036
local
characters
=
nil
1037
local
unicodes
=
nil
1038
local
exhyphenchar
=
tex
.
exhyphenchar
1039
local
extrachars
=
nil
1040
local
hyphenchars
=
nil
1041
local
language
=
nil
1042
local
lastfont
=
nil
1043
local
start
=
nil
1044
local
stop
=
nil
1045
local
word
=
{
}
-- we reuse this table
1046
local
size
=
0
1047
local
leftchar
=
false
1048
local
rightchar
=
false
-- utfbyte("-")
1049
local
leftexchar
=
false
1050
local
rightexchar
=
false
-- utfbyte("-")
1051
local
leftmin
=
0
1052
local
rightmin
=
0
1053
local
charmin
=
1
1054
local
leftcharmin
=
nil
1055
local
rightcharmin
=
nil
1056
----- leftwordmin = nil
1057
local
rightwordmin
=
nil
1058
local
rightchars
=
nil
1059
local
leftchar
=
nil
1060
local
rightchar
=
nil
1061
local
attr
=
nil
1062
local
lastwordlast
=
nil
1063
local
hyphenated
=
hyphenate
1064
----- strict = nil
1065
local
exhyphenpenalty
=
tex
.
exhyphenpenalty
1066
local
hyphenpenalty
=
tex
.
hyphenpenalty
1067
local
autohyphen
=
false
1068
local
hyphenonly
=
false
1069 1070
-- We cannot use an 'enabled' boolean (false when no characters or extras) because we
1071
-- can have plugins that set a characters metatable and so) ... it doesn't save much
1072
-- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also
1073
-- doesn't save much. So there not that much to gain for languages that don't hyphenate.
1074
--
1075
-- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes)))
1076
-- or (extrachars and next(extrachars))
1077
--
1078
-- This can be used to not add characters i.e. keep size 0 but then we need to check for
1079
-- attributes that change it, which costs time too. Not much to gain there.
1080 1081
starttiming
(
traditional
)
1082 1083
local
function
insertpenalty
(
)
1084
local
p
=
new_penalty
(
interwordpenalty
)
1085
setattrlist
(
p
,
last
)
1086
if
trace_visualize
then
1087
nuts
.
setvisual
(
p
,
"
penalty
"
)
1088
end
1089
last
=
getprev
(
last
)
1090
first
,
last
=
insert_after
(
first
,
last
,
p
)
1091
end
1092 1093
local
function
synchronizefeatureset
(
a
)
1094
local
f
=
a
and
featuresets
[
a
]
1095
if
f
then
1096
hyphenated
=
methods
[
f
.
alternative
or
"
hyphenate
"
]
1097
extrachars
=
f
.
extrachars
1098
hyphenchars
=
f
.
hyphenchars
1099
rightwordmin
=
f
.
rightwordmin
1100
charmin
=
f
.
charmin
1101
leftcharmin
=
f
.
leftcharmin
1102
rightcharmin
=
f
.
rightcharmin
1103
leftchar
=
f
.
leftchar
1104
rightchar
=
f
.
rightchar
1105
-- strict = f.strict and strictids
1106
rightchars
=
f
.
rightchars
1107
autohyphen
=
f
.
autohyphen
1108
hyphenonly
=
f
.
hyphenonly
1109
if
rightwordmin
and
rightwordmin
>
0
and
lastwordlast
~
=
rightwordmin
then
1110
-- so we can change mid paragraph but it's kind of unpredictable then
1111
if
not
tail
then
1112
tail
=
node_tail
(
first
)
1113
end
1114
last
=
tail
1115
local
inword
=
false
1116
local
count
=
0
1117
while
last
and
rightwordmin
>
0
do
1118
local
id
=
getid
(
last
)
1119
if
id
=
=
glyph_code
then
1120
count
=
count
+
1
1121
inword
=
true
1122
if
trace_visualize
then
1123
setcolor
(
last
,
"
darkgreen
"
)
1124
end
1125
elseif
inword
then
1126
inword
=
false
1127
rightwordmin
=
rightwordmin
-
1
1128
if
rightchars
=
=
true
then
1129
if
rightwordmin
>
0
then
1130
insertpenalty
(
)
1131
end
1132
elseif
rightchars
and
count
<
=
rightchars
then
1133
insertpenalty
(
)
1134
end
1135
end
1136
last
=
getprev
(
last
)
1137
end
1138
lastwordlast
=
rightwordmin
1139
end
1140
if
not
charmin
or
charmin
=
=
0
then
1141
charmin
=
1
1142
end
1143
else
1144
hyphenated
=
methods
.
hyphenate
1145
extrachars
=
false
1146
hyphenchars
=
false
1147
rightwordmin
=
false
1148
charmin
=
1
1149
leftcharmin
=
false
1150
rightcharmin
=
false
1151
leftchar
=
false
1152
rightchar
=
false
1153
-- strict = false
1154
autohyphen
=
false
1155
hyphenonly
=
false
1156
end
1157 1158
return
a
1159
end
1160 1161
local
function
flush
(
hyphens
)
-- todo: no need for result
1162 1163
local
rightmin
=
size
-
rightmin
1164
local
result
=
{
}
1165
local
rsize
=
0
1166
local
position
=
1
1167 1168
-- todo: remember last dics and don't go back to before that (plus message) ...
1169
-- for simplicity we also assume that we don't start with a dics node
1170
--
1171
-- there can be a conflict: if we backtrack then we can end up in another disc
1172
-- and get out of sync (dup chars and so)
1173 1174
while
position
<
=
size
do
1175
if
position
>
=
leftmin
and
position
<
=
rightmin
then
1176
local
hyphen
=
hyphens
[
position
]
1177
if
not
hyphen
then
1178
rsize
=
rsize
+
1
1179
result
[
rsize
]
=
word
[
position
]
1180
position
=
position
+
1
1181
elseif
hyphen
=
=
true
then
1182
rsize
=
rsize
+
1
1183
result
[
rsize
]
=
word
[
position
]
1184
rsize
=
rsize
+
1
1185
result
[
rsize
]
=
true
1186
position
=
position
+
1
1187
else
1188
local
o
,
h
=
hyphen
[
2
]
1189
if
o
then
1190
-- { hyphen, offset)
1191
h
=
hyphen
[
1
]
1192
else
1193
-- hyphen
1194
h
=
hyphen
1195
o
=
1
1196
end
1197
local
b
=
position
-
o
+
(
h
.
start
or
1
)
1198
local
e
=
b
+
(
h
.
length
or
2
)
-
1
1199
if
b
>
0
and
e
>
=
b
then
1200
for
i
=
1
,
b
-
position
do
1201
rsize
=
rsize
+
1
1202
result
[
rsize
]
=
word
[
position
]
1203
position
=
position
+
1
1204
end
1205
rsize
=
rsize
+
1
1206
result
[
rsize
]
=
{
1207
h
.
before
or
"
"
,
-- pre
1208
h
.
after
or
"
"
,
-- post
1209
concat
(
word
,
"
"
,
b
,
e
)
,
-- replace
1210
h
.
right
,
-- optional after pre
1211
h
.
left
,
-- optional before post
1212
}
1213
position
=
e
+
1
1214
else
1215
-- error
1216
rsize
=
rsize
+
1
1217
result
[
rsize
]
=
word
[
position
]
1218
position
=
position
+
1
1219
end
1220
end
1221
else
1222
rsize
=
rsize
+
1
1223
result
[
rsize
]
=
word
[
position
]
1224
position
=
position
+
1
1225
end
1226
end
1227 1228
local
function
serialize
(
replacement
,
leftchar
,
rightchar
)
1229
if
not
replacement
then
1230
return
1231
elseif
replacement
=
=
true
then
1232
local
glyph
=
copy_node
(
stop
)
1233
setchar
(
glyph
,
leftchar
or
rightchar
)
1234
return
glyph
1235
end
1236
local
head
=
nil
1237
local
current
=
nil
1238
if
leftchar
then
1239
head
=
copy_node
(
stop
)
1240
current
=
head
1241
setchar
(
head
,
leftchar
)
1242
end
1243
local
rsize
=
#
replacement
1244
if
rsize
=
=
1
then
1245
local
glyph
=
copy_node
(
stop
)
1246
setchar
(
glyph
,
characters
[
replacement
]
)
1247
if
head
then
1248
insert_after
(
current
,
current
,
glyph
)
1249
else
1250
head
=
glyph
1251
end
1252
current
=
glyph
1253
elseif
rsize
>
0
then
1254
local
list
=
lpegmatch
(
p_split
,
replacement
)
-- this is an utf split (could be cached)
1255
for
i
=
1
,
#
list
do
1256
local
glyph
=
copy_node
(
stop
)
1257
setchar
(
glyph
,
characters
[
list
[
i
]
]
)
1258
if
head
then
1259
insert_after
(
current
,
current
,
glyph
)
1260
else
1261
head
=
glyph
1262
end
1263
current
=
glyph
1264
end
1265
end
1266
if
rightchar
then
1267
local
glyph
=
copy_node
(
stop
)
1268
insert_after
(
current
,
current
,
glyph
)
1269
setchar
(
glyph
,
rightchar
)
1270
end
1271
return
head
1272
end
1273 1274
local
current
=
start
1275
local
attrnode
=
start
-- will be different, just the first char
1276 1277
for
i
=
1
,
rsize
do
1278
local
r
=
result
[
i
]
1279
if
r
=
=
true
then
1280
local
disc
=
new_disc
(
)
1281
local
pre
=
nil
1282
local
post
=
nil
1283
if
rightchar
then
1284
pre
=
serialize
(
true
,
rightchar
)
1285
end
1286
if
leftchar
then
1287
post
=
serialize
(
true
,
leftchar
)
1288
end
1289
setdisc
(
disc
,
pre
,
post
,
nil
,
regulardisc_code
,
hyphenpenalty
)
1290
if
attrnode
then
1291
setattrlist
(
disc
,
attrnode
)
1292
end
1293
-- could be a replace as well
1294
insert_before
(
first
,
current
,
disc
)
1295
elseif
type
(
r
)
=
=
"
table
"
then
1296
local
disc
=
new_disc
(
)
1297
local
pre
=
r
[
1
]
1298
local
post
=
r
[
2
]
1299
local
replace
=
r
[
3
]
1300
local
right
=
r
[
4
]
~
=
false
and
rightchar
1301
local
left
=
r
[
5
]
~
=
false
and
leftchar
1302
if
pre
then
1303
if
pre
~
=
"
"
then
1304
pre
=
serialize
(
pre
,
false
,
right
)
1305
else
1306
pre
=
nil
1307
end
1308
end
1309
if
post
then
1310
if
post
~
=
"
"
then
1311
post
=
serialize
(
post
,
left
,
false
)
1312
else
1313
post
=
nil
1314
end
1315
end
1316
if
replace
then
1317
if
replace
~
=
"
"
then
1318
replace
=
serialize
(
replace
)
1319
else
1320
replace
=
nil
1321
end
1322
end
1323
-- maybe regular code
1324
setdisc
(
disc
,
pre
,
post
,
replace
,
regulardisc_code
,
hyphenpenalty
)
1325
if
attrnode
then
1326
setattrlist
(
disc
,
attrnode
)
1327
end
1328
insert_before
(
first
,
current
,
disc
)
1329
else
1330
setchar
(
current
,
characters
[
r
]
)
1331
if
i
<
rsize
then
1332
current
=
getnext
(
current
)
1333
end
1334
end
1335
end
1336
if
current
and
current
~
=
stop
then
1337
local
current
=
getnext
(
current
)
1338
local
last
=
getnext
(
stop
)
1339
while
current
~
=
last
do
1340
first
,
current
=
remove_node
(
first
,
current
,
true
)
1341
end
1342
end
1343 1344
end
1345 1346
local
function
inject
(
leftchar
,
rightchar
,
code
,
attrnode
)
1347
if
first
~
=
current
then
1348
local
disc
=
new_disc
(
)
1349
first
,
current
,
glyph
=
remove_node
(
first
,
current
)
1350
first
,
current
=
insert_before
(
first
,
current
,
disc
)
1351
if
trace_visualize
then
1352
setcolor
(
glyph
,
"
darkred
"
)
-- these get checked
1353
setcolor
(
disc
,
"
darkgreen
"
)
-- in the colorizer
1354
end
1355
local
pre
=
nil
1356
local
post
=
nil
1357
local
replace
=
glyph
1358
if
leftchar
and
leftchar
>
0
then
1359
post
=
copy_node
(
glyph
)
1360
setchar
(
post
,
leftchar
)
1361
end
1362
pre
=
copy_node
(
glyph
)
1363
setchar
(
pre
,
rightchar
and
rightchar
>
0
and
rightchar
or
code
)
1364
setdisc
(
disc
,
pre
,
post
,
replace
,
automaticdisc_code
,
hyphenpenalty
)
-- ex ?
1365
if
attrnode
then
1366
setattrlist
(
disc
,
attrnode
)
1367
end
1368
end
1369
return
current
1370
end
1371 1372
local
function
injectseries
(
current
,
last
,
next
,
attrnode
)
1373
local
disc
=
new_disc
(
)
1374
local
start
=
current
1375
first
,
current
=
insert_before
(
first
,
current
,
disc
)
1376
setprev
(
start
)
1377
setnext
(
last
)
1378
if
next
then
1379
setlink
(
current
,
next
)
1380
else
1381
setnext
(
current
)
1382
end
1383
local
pre
=
copy_list
(
start
)
1384
local
post
=
nil
1385
local
replace
=
start
1386
setdisc
(
disc
,
pre
,
post
,
replace
,
automaticdisc_code
,
hyphenpenalty
)
-- ex ?
1387
if
attrnode
then
1388
setattrlist
(
disc
,
attrnode
)
1389
end
1390
return
current
1391
end
1392 1393
local
a
=
getattr
(
first
,
a_hyphenation
)
1394
if
a
~
=
attr
then
1395
attr
=
synchronizefeatureset
(
a
)
1396
end
1397 1398
-- The first attribute in a word determines the way a word gets hyphenated and if
1399
-- relevant, other properties are also set then. We could optimize for silly one-char
1400
-- cases but it has no priority as the code is still not that much slower than the
1401
-- native hyphenator and this variant also provides room for extensions.
1402 1403
local
skipping
=
false
1404 1405
-- In "word word word." the sequences "word" and "." can be a different font!
1406 1407
while
current
and
current
~
=
last
do
-- and current
1408
local
code
,
id
=
isglyph
(
current
)
1409
if
code
then
1410
if
skipping
then
1411
current
=
getnext
(
current
)
1412
else
1413
local
lang
=
getlang
(
current
)
1414
local
font
=
getfont
(
current
)
1415
if
lang
~
=
language
or
font
~
=
lastfont
then
1416
if
dictionary
and
size
>
charmin
and
leftmin
+
rightmin
<
=
size
then
1417
-- only german has many words starting with an uppercase character
1418
if
categories
[
word
[
1
]
]
=
=
"
lu
"
and
getfield
(
start
,
"
uchyph
"
)
<
0
then
1419
-- skip
1420
else
1421
local
hyphens
=
hyphenated
(
dictionary
,
word
,
size
)
1422
if
hyphens
then
1423
flush
(
hyphens
)
1424
end
1425
end
1426
end
1427
lastfont
=
font
1428
if
language
~
=
lang
and
lang
>
0
then
1429
--
1430
dictionary
=
dictionaries
[
lang
]
1431
instance
=
dictionary
.
instance
1432
characters
=
dictionary
.
characters
1433
unicodes
=
dictionary
.
unicodes
1434
--
1435
local
a
=
getattr
(
current
,
a_hyphenation
)
1436
attr
=
synchronizefeatureset
(
a
)
1437
leftchar
=
leftchar
or
(
instance
and
posthyphenchar
(
instance
)
)
-- we can make this more
1438
rightchar
=
rightchar
or
(
instance
and
prehyphenchar
(
instance
)
)
-- efficient if needed
1439
leftexchar
=
(
instance
and
preexhyphenchar
(
instance
)
)
1440
rightexchar
=
(
instance
and
postexhyphenchar
(
instance
)
)
1441
leftmin
=
leftcharmin
or
getfield
(
current
,
"
left
"
)
1442
rightmin
=
rightcharmin
or
getfield
(
current
,
"
right
"
)
1443
if
not
leftchar
or
leftchar
<
0
then
1444
leftchar
=
false
1445
end
1446
if
not
rightchar
or
rightchar
<
0
then
1447
rightchar
=
false
1448
end
1449
--
1450
local
char
=
unicodes
[
code
]
or
(
extrachars
and
extrachars
[
code
]
)
1451
if
char
then
1452
word
[
1
]
=
char
1453
size
=
1
1454
start
=
current
1455
else
1456
size
=
0
1457
end
1458
else
1459
size
=
0
1460
end
1461
language
=
lang
1462
elseif
language
<
=
0
then
1463
--
1464
elseif
size
>
0
then
1465
local
char
=
unicodes
[
code
]
or
(
extrachars
and
extrachars
[
code
]
)
1466
if
char
then
1467
size
=
size
+
1
1468
word
[
size
]
=
char
1469
elseif
dictionary
then
1470
if
not
hyphenonly
or
code
~
=
exhyphenchar
then
1471
if
size
>
charmin
and
leftmin
+
rightmin
<
=
size
then
1472
if
categories
[
word
[
1
]
]
=
=
"
lu
"
and
getfield
(
start
,
"
uchyph
"
)
<
0
then
1473
-- skip
1474
else
1475
local
hyphens
=
hyphenated
(
dictionary
,
word
,
size
)
1476
if
hyphens
then
1477
flush
(
hyphens
)
1478
end
1479
end
1480
end
1481
end
1482
size
=
0
1483
if
code
=
=
exhyphenchar
then
-- normally the -
1484
local
next
=
getnext
(
current
)
1485
local
last
=
current
1486
local
font
=
getfont
(
current
)
1487
while
next
and
ischar
(
next
,
font
)
=
=
code
do
1488
last
=
next
1489
next
=
getnext
(
next
)
1490
end
1491
if
not
autohyphen
then
1492
current
=
last
1493
elseif
current
=
=
last
then
1494
current
=
inject
(
leftexchar
,
rightexchar
,
code
,
current
)
1495
else
1496
current
=
injectseries
(
current
,
last
,
next
,
current
)
1497
end
1498
if
hyphenonly
then
1499
skipping
=
true
1500
end
1501
elseif
hyphenchars
then
1502
local
char
=
hyphenchars
[
code
]
1503
if
char
=
=
true
then
1504
char
=
code
1505
end
1506
if
char
then
1507
current
=
inject
(
leftchar
and
char
or
nil
,
rightchar
and
char
or
nil
,
char
,
current
)
1508
end
1509
end
1510
end
1511
else
1512
local
a
=
getattr
(
current
,
a_hyphenation
)
1513
if
a
~
=
attr
then
1514
attr
=
synchronizefeatureset
(
a
)
-- influences extrachars
1515
leftchar
=
leftchar
or
(
instance
and
posthyphenchar
(
instance
)
)
-- we can make this more
1516
rightchar
=
rightchar
or
(
instance
and
prehyphenchar
(
instance
)
)
-- efficient if needed
1517
leftexchar
=
(
instance
and
preexhyphenchar
(
instance
)
)
1518
rightexchar
=
(
instance
and
postexhyphenchar
(
instance
)
)
1519
leftmin
=
leftcharmin
or
getfield
(
current
,
"
left
"
)
1520
rightmin
=
rightcharmin
or
getfield
(
current
,
"
right
"
)
1521
if
not
leftchar
or
leftchar
<
0
then
1522
leftchar
=
false
1523
end
1524
if
not
rightchar
or
rightchar
<
0
then
1525
rightchar
=
false
1526
end
1527
end
1528
--
1529
local
char
=
unicodes
[
code
]
or
(
extrachars
and
extrachars
[
code
]
)
1530
if
char
then
1531
word
[
1
]
=
char
1532
size
=
1
1533
start
=
current
1534
end
1535
end
1536
stop
=
current
1537
current
=
getnext
(
current
)
1538
end
1539
else
1540
if
skipping
then
1541
skipping
=
false
1542
end
1543
if
id
=
=
disc_code
then
1544
size
=
0
1545
current
=
getnext
(
current
)
1546
if
hyphenonly
then
1547
skipping
=
true
1548
end
1549
-- elseif strict and strict[id] then
1550
-- current = id == math_code and getnext(end_of_math(current)) or getnext(current)
1551
-- size = 0
1552
else
1553
current
=
id
=
=
math_code
and
getnext
(
end_of_math
(
current
)
)
or
getnext
(
current
)
1554
end
1555
if
size
>
0
then
1556
if
dictionary
and
size
>
charmin
and
leftmin
+
rightmin
<
=
size
then
1557
if
categories
[
word
[
1
]
]
=
=
"
lu
"
and
getfield
(
start
,
"
uchyph
"
)
<
0
then
1558
-- skip
1559
else
1560
local
hyphens
=
hyphenated
(
dictionary
,
word
,
size
)
1561
if
hyphens
then
1562
flush
(
hyphens
)
1563
end
1564
end
1565
end
1566
size
=
0
1567
end
1568
end
1569
end
1570
-- we can have quit due to last so we need to flush the last seen word, we could move
1571
-- this in the loop and test for current but ... messy
1572
if
dictionary
and
size
>
charmin
and
leftmin
+
rightmin
<
=
size
then
1573
if
categories
[
word
[
1
]
]
=
=
"
lu
"
and
getfield
(
start
,
"
uchyph
"
)
<
0
then
1574
-- skip
1575
else
1576
local
hyphens
=
hyphenated
(
dictionary
,
word
,
size
)
1577
if
hyphens
then
1578
flush
(
hyphens
)
1579
end
1580
end
1581
end
1582 1583
stoptiming
(
traditional
)
1584 1585
return
head
1586
end
1587 1588
statistics
.
register
(
"
hyphenation
"
,
function
(
)
1589
if
nofwords
>
0
or
statistics
.
elapsed
(
traditional
)
>
0
then
1590
return
string
.
format
(
"
%s words hyphenated, %s unique, used time %s
"
,
1591
nofwords
,
nofhashed
,
statistics
.
elapsedseconds
(
traditional
)
or
0
)
1592
end
1593
end
)
1594 1595
local
texmethod
=
"
builders.kernel.hyphenation
"
1596
local
oldmethod
=
texmethod
1597
local
newmethod
=
texmethod
1598 1599
-- local newmethod = "languages.hyphenators.traditional.hyphenate"
1600
--
1601
-- nodes.tasks.prependaction("processors","words",newmethod)
1602
-- nodes.tasks.disableaction("processors",oldmethod)
1603
--
1604
-- nodes.tasks.replaceaction("processors","words",oldmethod,newmethod)
1605 1606
-- \enabledirectives[hyphenators.method=traditional]
1607
-- \enabledirectives[hyphenators.method=builtin]
1608 1609
-- push / pop ? check first attribute
1610 1611
-- local replaceaction = nodes.tasks.replaceaction -- no longer overload this way (too many local switches)
1612 1613
local
hyphenate
=
lang
.
hyphenate
1614
local
hyphenating
=
nuts
.
hyphenating
1615
local
methods
=
{
}
1616
local
usedmethod
=
false
1617
local
stack
=
{
}
1618 1619
local
original
=
hyphenating
and
1620
function
(
head
)
1621
return
(
hyphenating
(
head
)
)
1622
end
1623
or
1624
function
(
head
)
1625
hyphenate
(
tonode
(
head
)
)
1626
return
head
-- a nut
1627
end
1628 1629
-- local has_language = lang.has_language
1630
--
1631
-- local function original(head) -- kernel.hyphenation(head)
1632
-- local h = tonode(head)
1633
-- if has_language(h) then
1634
-- hyphenate(h)
1635
-- end
1636
-- return head
1637
-- end
1638 1639
local
getcount
=
tex
.
getcount
1640 1641
hyphenators
.
methods
=
methods
1642
local
optimize
=
false
1643 1644
directives
.
register
(
"
hyphenator.optimize
"
,
function
(
v
)
optimize
=
v
end
)
1645 1646
function
hyphenators
.
handler
(
head
,
groupcode
)
1647
if
usedmethod
then
1648
if
optimize
and
(
groupcode
=
=
"
hbox
"
or
groupcode
=
=
"
adjusted_hbox
"
)
then
1649
if
getcount
(
"
hyphenstate
"
)
>
0
then
1650
forced
=
false
1651
return
usedmethod
(
head
)
1652
else
1653
return
head
1654
end
1655
else
1656
return
usedmethod
(
head
)
1657
end
1658
else
1659
return
head
1660
end
1661
end
1662 1663
methods
.
tex
=
original
1664
methods
.
original
=
original
1665
methods
.
expanded
=
original
-- was expanded before 1.005
1666
methods
.
traditional
=
languages
.
hyphenators
.
traditional
.
hyphenate
1667
methods
.
none
=
false
-- function(head) return head, false end
1668 1669
usedmethod
=
original
1670 1671
local
function
setmethod
(
method
)
1672
usedmethod
=
type
(
method
)
=
=
"
string
"
and
methods
[
method
]
1673
if
usedmethod
=
=
nil
then
1674
usedmethod
=
methods
.
tex
1675
end
1676
end
1677
local
function
pushmethod
(
method
)
1678
insert
(
stack
,
usedmethod
)
1679
usedmethod
=
type
(
method
)
=
=
"
string
"
and
methods
[
method
]
1680
if
usedmethod
=
=
nil
then
1681
usedmethod
=
methods
.
tex
1682
end
1683
end
1684
local
function
popmethod
(
)
1685
usedmethod
=
remove
(
stack
)
or
methods
.
tex
1686
end
1687 1688
hyphenators
.
setmethod
=
setmethod
1689
hyphenators
.
pushmethod
=
pushmethod
1690
hyphenators
.
popmethod
=
popmethod
1691 1692
directives
.
register
(
"
hyphenators.method
"
,
setmethod
)
1693 1694
function
hyphenators
.
setup
(
specification
)
1695
local
method
=
specification
.
method
1696
if
method
then
1697
setmethod
(
method
)
1698
end
1699
end
1700 1701
implement
{
name
=
"
sethyphenationmethod
"
,
actions
=
setmethod
,
arguments
=
"
string
"
}
1702
implement
{
name
=
"
pushhyphenation
"
,
actions
=
pushmethod
,
arguments
=
"
string
"
}
1703
implement
{
name
=
"
pophyphenation
"
,
actions
=
popmethod
}
1704 1705
-- can become a runtime loaded one:
1706 1707
local
context
=
context
1708
local
ctx_NC
=
context
.
NC
1709
local
ctx_NR
=
context
.
NR
1710
local
ctx_verbatim
=
context
.
verbatim
1711 1712
function
hyphenators
.
showhyphenationtrace
(
language
,
word
)
1713
if
not
word
or
word
=
=
"
"
then
1714
return
1715
end
1716
local
saved
=
trace_steps
1717
trace_steps
=
"
silent
"
1718
local
steps
=
traditional
.
gettrace
(
language
,
word
)
1719
trace_steps
=
saved
1720
if
steps
then
1721
local
n
=
#
steps
1722
if
n
>
0
then
1723
context
.
starttabulate
{
"
|r|l|l|l|
"
}
1724
for
i
=
1
,
n
do
1725
local
s
=
steps
[
i
]
1726
ctx_NC
(
)
if
i
>
1
and
i
<
n
then
context
(
i
-1
)
end
1727
ctx_NC
(
)
ctx_verbatim
(
s
[
1
]
)
1728
ctx_NC
(
)
ctx_verbatim
(
s
[
2
]
)
1729
ctx_NC
(
)
ctx_verbatim
(
s
[
3
]
)
1730
ctx_NC
(
)
1731
ctx_NR
(
)
1732
end
1733
context
.
stoptabulate
(
)
1734
end
1735
end
1736
end
1737 1738
implement
{
1739
name
=
"
showhyphenationtrace
"
,
1740
actions
=
hyphenators
.
showhyphenationtrace
,
1741
arguments
=
"
2 strings
"
,
1742
}
1743 1744
function
nodes
.
stripdiscretionaries
(
head
)
1745
for
l
in
nexthlist
,
head
do
1746
for
d
in
nextdisc
,
getlist
(
l
)
do
1747
remove_node
(
h
,
false
,
true
)
1748
end
1749
end
1750
return
head
1751
end
1752 1753 1754
else
1755 1756
-- traditional.loadpatterns("nl","lang-nl")
1757
-- traditional.loadpatterns("de","lang-de")
1758
-- traditional.loadpatterns("us","lang-us")
1759 1760
-- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } )
1761
-- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } )
1762
-- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } )
1763 1764
-- local specification = {
1765
-- leftcharmin = 2,
1766
-- rightcharmin = 2,
1767
-- leftchar = "<",
1768
-- rightchar = ">",
1769
-- }
1770 1771
-- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el")
1772
-- print("reeëel", traditional.injecthyphens(dictionaries.nl,"reeëel", specification),"re{e>}{<e}{eë}el")
1773
-- print("rooëel", traditional.injecthyphens(dictionaries.nl,"rooëel", specification),"r{o>}{<e}{ooë}el")
1774 1775
-- print( "qxcxkq", traditional.injecthyphens(dictionaries.de, "qxcxkq", specification),"")
1776
-- print( "qqxcxkqq", traditional.injecthyphens(dictionaries.de, "qqxcxkqq", specification),"")
1777
-- print( "qqqxcxkqqq", traditional.injecthyphens(dictionaries.de, "qqqxcxkqqq", specification),"")
1778
-- print("qqqqxcxkqqqq",traditional.injecthyphens(dictionaries.de,"qqqqxcxkqqqq",specification),"")
1779 1780
-- print("kunstmatig", traditional.injecthyphens(dictionaries.nl,"kunstmatig", specification),"")
1781
-- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"")
1782
-- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"")
1783 1784
-- traditional.loadpatterns("us","lang-us")
1785 1786
-- local specification = {
1787
-- leftcharmin = 2,
1788
-- rightcharmin = 2,
1789
-- leftchar = false,
1790
-- rightchar = false,
1791
-- }
1792 1793
-- trace_steps = true
1794 1795
-- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"")
1796
-- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle")
1797
-- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day")
1798
-- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate")
1799
-- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic")
1800
-- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects")
1801
-- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate")
1802
-- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic")
1803
-- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects")
1804 1805
end
1806 1807