mtx-patterns.lua /size: 30 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
mtx-patterns
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to mtxrun.lua
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
format
,
find
,
concat
,
gsub
,
match
,
gmatch
=
string
.
format
,
string
.
find
,
table
.
concat
,
string
.
gsub
,
string
.
match
,
string
.
gmatch
10
local
byte
,
char
=
utf
.
byte
,
utf
.
char
11
local
addsuffix
=
file
.
addsuffix
12
local
lpegmatch
,
lpegsplit
,
lpegpatterns
,
validutf8
=
lpeg
.
match
,
lpeg
.
split
,
lpeg
.
patterns
,
lpeg
.
patterns
.
validutf8
13
local
P
,
V
,
Cs
=
lpeg
.
P
,
lpeg
.
V
,
lpeg
.
Cs
14 15
local
helpinfo
=
[[
16<?xml version="1.0"?> 17<application> 18 <metadata> 19 <entry name="name">mtx-patterns</entry> 20 <entry name="detail">ConTeXt Pattern File Management</entry> 21 <entry name="version">0.20</entry> 22 </metadata> 23 <flags> 24 <category name="basic"> 25 <subcategory> 26 <flag name="convert"><short>generate context language files (mnemonic driven, if not given then all)</short></flag> 27 <flag name="check"><short>check pattern file (or those used by context when no file given)</short></flag> 28 <flag name="path"><short>source path where hyph-foo.tex files are stored</short></flag> 29 <flag name="destination"><short>destination path</short></flag> 30 <flag name="specification"><short>additional patterns: e.g.: =cy,hyph-cy,welsh</short></flag> 31 <flag name="compress"><short>compress data</short></flag> 32 <flag name="words"><short>update words in given file</short></flag> 33 <flag name="hyphenate"><short>show hypephenated words</short></flag> 34 </subcategory> 35 </category> 36 </flags> 37 <examples> 38 <category> 39 <title>Examples</title> 40 <subcategory> 41 <example><command>mtxrun --script pattern --check hyph-*.tex</command></example> 42 <example><command>mtxrun --script pattern --check --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns</command></example> 43 <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns</command></example> 44 <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns</command></example> 45 <example><command>mtxrun --script pattern --hyphenate --language=nl --left=3 nogalwiedes inderdaad</command></example> 46 </subcategory> 47 </category> 48 </examples> 49</application> 50
]]
51 52
local
application
=
logs
.
application
{
53
name
=
"
mtx-patterns
"
,
54
banner
=
"
ConTeXt Pattern File Management 0.20
"
,
55
helpinfo
=
helpinfo
,
56
}
57 58
local
report
=
application
.
report
59 60
scripts
=
scripts
or
{
}
61
scripts
.
patterns
=
scripts
.
patterns
or
{
}
62 63
local
permitted_characters
=
table
.
tohash
{
64
0x0009
,
-- tab
65
0x0027
,
-- apostrofe
66
0x02BC
,
-- modifier apostrofe (used in greek)
67
0x002D
,
-- hyphen
68
0x200C
,
-- zwnj
69
0x2019
,
-- quote right
70
0x1FBD
,
-- greek, but no letter: symbol modifier
71
0x1FBF
,
-- greek, but no letter: symbol modifier
72
}
73 74
local
ignored_ancient_greek
=
table
.
tohash
{
75
0x1FD3
,
-- greekiotadialytikatonos (also 0x0390)
76
0x1FE3
,
-- greekupsilondialytikatonos (also 0x03B0)
77
0x1FBD
,
-- greek, but no letter: symbol modifier
78
0x1FBF
,
-- greek, but no letter: symbol modifier
79
0x03F2
,
-- greeksigmalunate
80
0x02BC
,
-- modifier apostrofe)
81
}
82 83
local
ignored_french
=
table
.
tohash
{
84
0x02BC
,
-- modifier apostrofe
85
}
86 87
local
replaced_whatever
=
{
88
[
char
(
0x2019
)
]
=
char
(
0x0027
)
89
}
90 91
scripts
.
patterns
.
list
=
{
92
{
"
af
"
,
"
hyph-af
"
,
"
afrikaans
"
}
,
93
-- { "ar", "hyph-ar", "arabic" },
94
-- { "as", "hyph-as", "assamese" },
95
{
"
bg
"
,
"
hyph-bg
"
,
"
bulgarian
"
}
,
96
-- { "bn", "hyph-bn", "bengali" },
97
{
"
ca
"
,
"
hyph-ca
"
,
"
catalan
"
}
,
98
-- { "??", "hyph-cop", "coptic" },
99
{
"
cs
"
,
"
hyph-cs
"
,
"
czech
"
}
,
100
{
"
cy
"
,
"
hyph-cy
"
,
"
welsh
"
}
,
101
{
"
da
"
,
"
hyph-da
"
,
"
danish
"
}
,
102
{
"
deo
"
,
"
hyph-de-1901
"
,
"
german, old spelling
"
}
,
103
{
"
de
"
,
"
hyph-de-1996
"
,
"
german, new spelling
"
}
,
104
-- { "??", "hyph-de-ch-1901", "swiss german" },
105
-- { "??", "hyph-el-monoton", "greek" },
106
-- { "gr", "hyph-el-polyton", "greek" },
107
{
"
agr
"
,
"
hyph-grc
"
,
"
ancient greek
"
,
ignored_ancient_greek
}
,
108
{
"
gb
"
,
"
hyph-en-gb
"
,
"
british english
"
}
,
109
{
"
us
"
,
"
hyph-en-us
"
,
"
american english
"
}
,
110
{
"
eo
"
,
"
hyph-eo
"
,
"
esperanto
"
}
,
111
{
"
es
"
,
"
hyph-es
"
,
"
spanish
"
}
,
112
{
"
et
"
,
"
hyph-et
"
,
"
estonian
"
}
,
113
{
"
eu
"
,
"
hyph-eu
"
,
"
basque
"
}
,
114
-- { "fa", "hyph-fa", "farsi" },
115
{
"
fi
"
,
"
hyph-fi
"
,
"
finnish
"
}
,
116
{
"
fr
"
,
"
hyph-fr
"
,
"
french
"
,
ignored_french
}
,
117
-- { "??", "hyph-ga", "irish" },
118
-- { "??", "hyph-gl", "galician" },
119
-- { "gu", "hyph-gu", "gujarati" },
120
-- { "hi", "hyph-hi", "hindi" },
121
{
"
hr
"
,
"
hyph-hr
"
,
"
croatian
"
}
,
122
-- { "??", "hyph-hsb", "upper sorbian" },
123
{
"
hu
"
,
"
hyph-hu
"
,
"
hungarian
"
}
,
124
-- { "hy", "hyph-hy", "armenian" },
125
-- { "??", "hyph-ia", "interlingua" },
126
-- { "??", "hyph-id", "indonesian" },
127
{
"
is
"
,
"
hyph-is
"
,
"
icelandic
"
}
,
128
{
"
it
"
,
"
hyph-it
"
,
"
italian
"
}
,
129
-- { "??", "hyph-kmr", "kurmanji" },
130
-- { "kn", "hyph-kn", "kannada" },
131
{
"
la
"
,
"
hyph-la
"
,
"
latin
"
}
,
132
{
"
ala
"
,
"
hyph-la-x-classic
"
,
"
ancient latin
"
}
,
133
-- { "lo", "hyph-lo", "lao" },
134
{
"
lt
"
,
"
hyph-lt
"
,
"
lithuanian
"
}
,
135
{
"
lv
"
,
"
hyph-lv
"
,
"
latvian
"
}
,
136
{
"
mk
"
,
"
hyph-mk
"
,
"
macedonian
"
}
,
137
{
"
ml
"
,
"
hyph-ml
"
,
"
malayalam
"
}
,
138
{
"
mn
"
,
"
hyph-mn-cyrl
"
,
"
mongolian, cyrillic script
"
}
,
139
-- { "mr", "hyph-mr", "..." },
140
{
"
nb
"
,
"
hyph-nb
"
,
"
norwegian bokmål
"
}
,
141
{
"
nl
"
,
"
hyph-nl
"
,
"
dutch
"
}
,
142
{
"
nn
"
,
"
hyph-nn
"
,
"
norwegian nynorsk
"
}
,
143
-- { "or", "hyph-or", "oriya" },
144
-- { "pa", "hyph-pa", "panjabi" },
145
-- { "", "hyph-", "" },
146
{
"
pl
"
,
"
hyph-pl
"
,
"
polish
"
}
,
147
{
"
pt
"
,
"
hyph-pt
"
,
"
portuguese
"
}
,
148
{
"
ro
"
,
"
hyph-ro
"
,
"
romanian
"
}
,
149
{
"
ru
"
,
"
hyph-ru
"
,
"
russian
"
}
,
150
-- { "sa", "hyph-sa", "sanskrit" },
151
{
"
sk
"
,
"
hyph-sk
"
,
"
slovak
"
}
,
152
{
"
sl
"
,
"
hyph-sl
"
,
"
slovenian
"
}
,
153
{
"
sq
"
,
"
hyph-sq
"
,
"
albanian
"
}
,
154
{
"
sr
"
,
"
hyph-sr
"
,
"
serbian
"
,
false
,
{
"
hyph-sr-cyrl
"
,
"
hyph-sr-latn
"
}
,
}
,
155
-- { "sr", "hyph-sr-cyrl", "serbian", false },
156
-- { "sr", "hyph-sr-latn", "serbian" },
157
{
"
sv
"
,
"
hyph-sv
"
,
"
swedish
"
}
,
158
-- { "ta", "hyph-ta", "tamil" },
159
-- { "te", "hyph-te", "telugu" },
160
{
"
th
"
,
"
hyph-th
"
,
"
thai
"
}
,
161
{
"
tk
"
,
"
hyph-tk
"
,
"
turkmen
"
}
,
162
{
"
tr
"
,
"
hyph-tr
"
,
"
turkish
"
}
,
163
{
"
uk
"
,
"
hyph-uk
"
,
"
ukrainian
"
}
,
164
{
"
zh
"
,
"
hyph-zh-latn-pinyin
"
,
"
zh-latn, chinese pinyin
"
}
,
165
}
166 167
-- stripped down from lpeg example:
168 169
function
utf
.
check
(
str
)
170
return
lpegmatch
(
lpegpatterns
.
validutf8
,
str
)
171
end
172 173
-- *.tex
174
-- *.hyp.txt *.pat.txt *.lic.txt *.chr.txt
175 176
function
scripts
.
patterns
.
load
(
path
,
name
,
mnemonic
,
ignored
,
merged
)
177
local
fullname
=
file
.
join
(
path
,
name
)
178
local
basename
=
name
179
local
texfile
=
addsuffix
(
fullname
,
"
tex
"
)
180
local
hypfile
=
addsuffix
(
fullname
,
"
hyp.txt
"
)
181
local
patfile
=
addsuffix
(
fullname
,
"
pat.txt
"
)
182
local
licfile
=
addsuffix
(
fullname
,
"
lic.txt
"
)
183
-- local chrfile = addsuffix(fullname,"chr.txt")
184
local
okay
=
true
185
local
hyphenations
,
patterns
,
comment
,
stripset
=
"
"
,
"
"
,
"
"
,
"
"
186
local
splitpatternsnew
,
splithyphenationsnew
=
{
}
,
{
}
187
local
splitpatternsold
,
splithyphenationsold
=
{
}
,
{
}
188
local
usedpatterncharactersnew
,
usedhyphenationcharactersnew
=
{
}
,
{
}
189
if
merged
then
190
-- no version info
191
report
(
"
using merged txt files %s.[hyp|pat|lic].txt
"
,
name
)
192
for
i
=
1
,
#
merged
do
193
local
fullname
=
file
.
join
(
path
,
merged
[
i
]
)
194
comment
=
comment
.
.
(
io
.
loaddata
(
addsuffix
(
fullname
,
"
lic.txt
"
)
)
or
"
"
)
.
.
"
\n\n
"
195
patterns
=
patterns
.
.
(
io
.
loaddata
(
addsuffix
(
fullname
,
"
pat.txt
"
)
)
or
"
"
)
.
.
"
\n\n
"
196
hyphenations
=
hyphenations
.
.
(
io
.
loaddata
(
addsuffix
(
fullname
,
"
hyp.txt
"
)
)
or
"
"
)
.
.
"
\n\n
"
197
end
198
elseif
lfs
.
isfile
(
patfile
)
then
199
-- no version info
200
report
(
"
using txt files %s.[hyp|pat|lic].txt
"
,
name
)
201
comment
=
io
.
loaddata
(
licfile
)
or
"
"
202
patterns
=
io
.
loaddata
(
patfile
)
or
"
"
203
hyphenations
=
io
.
loaddata
(
hypfile
)
or
"
"
204
elseif
lfs
.
isfile
(
texfile
)
then
205
-- version info in comment blob
206
report
(
"
using tex file %s.txt
"
,
name
)
207
local
data
=
io
.
loaddata
(
texfile
)
or
"
"
208
if
data
~
=
"
"
then
209
data
=
gsub
(
data
,
"
([\n\r])\\input ([^ \n\r]+)
"
,
function
(
previous
,
subname
)
210
local
subname
=
addsuffix
(
subname
,
"
tex
"
)
211
local
subfull
=
file
.
join
(
file
.
dirname
(
texfile
)
,
subname
)
212
local
subdata
=
io
.
loaddata
(
subfull
)
or
"
"
213
if
subdata
=
=
"
"
then
214
report
(
"
%s: no subfile %s
"
,
basename
,
subname
)
215
end
216
return
previous
.
.
subdata
217
end
)
218
data
=
gsub
(
data
,
"
%%.-[\n\r]
"
,
"
"
)
219
data
=
gsub
(
data
,
"
*[\n\r]+
"
,
"
\n
"
)
220
patterns
=
match
(
data
,
"
\\patterns[%s]*{[%s]*(.-)[%s]*}
"
)
or
"
"
221
hyphenations
=
match
(
data
,
"
\\hyphenation[%s]*{[%s]*(.-)[%s]*}
"
)
or
"
"
222
comment
=
match
(
data
,
"
^(.-)[\n\r]\\patterns
"
)
or
"
"
223
else
224
okay
=
false
225
end
226
else
227
okay
=
false
228
end
229
if
okay
then
230
-- split into lines
231
local
how
=
lpegpatterns
.
whitespace
^
1
232
splitpatternsnew
=
lpegsplit
(
how
,
patterns
)
233
splithyphenationsnew
=
lpegsplit
(
how
,
hyphenations
)
234
end
235
if
okay
then
236
-- remove comments
237
local
function
check
(
data
,
splitdata
,
name
)
238
if
find
(
data
,
"
%%
"
)
then
239
for
i
=
1
,
#
splitdata
do
240
local
line
=
splitdata
[
i
]
241
if
find
(
line
,
"
%%
"
)
then
242
splitdata
[
i
]
=
gsub
(
line
,
"
%%.*$
"
,
"
"
)
243
report
(
"
%s: removing comment: %s
"
,
basename
,
line
)
244
end
245
end
246
end
247
end
248
check
(
patterns
,
splitpatternsnew
,
patfile
)
249
check
(
hyphenations
,
splithyphenationsnew
,
hypfile
)
250
end
251
if
okay
then
252
-- remove lines with commands
253
local
function
check
(
data
,
splitdata
,
name
)
254
if
find
(
data
,
"
\\
"
)
then
255
for
i
=
1
,
#
splitdata
do
256
local
line
=
splitdata
[
i
]
257
if
find
(
line
,
"
\\
"
)
then
258
splitdata
[
i
]
=
"
"
259
report
(
"
%s: removing line with command: %s
"
,
basename
,
line
)
260
end
261
end
262
end
263
end
264
check
(
patterns
,
splitpatternsnew
,
patfile
)
265
check
(
hyphenations
,
splithyphenationsnew
,
hypfile
)
266
end
267
if
okay
then
268
-- check for valid utf
269
local
function
check
(
data
,
splitdata
,
name
)
270
for
i
=
1
,
#
splitdata
do
271
local
line
=
splitdata
[
i
]
272
local
ok
=
lpegmatch
(
validutf8
,
line
)
273
if
not
ok
then
274
splitdata
[
i
]
=
"
"
275
report
(
"
%s: removing line with invalid utf: %s
"
,
basename
,
line
)
276
end
277
end
278
-- check for commands being used in comments
279
end
280
check
(
patterns
,
splitpatternsnew
,
patfile
)
281
check
(
hyphenations
,
splithyphenationsnew
,
hypfile
)
282
end
283
if
okay
then
284
-- remove funny lines
285
local
cd
=
characters
.
data
286
local
stripped
=
{
}
287
local
function
check
(
splitdata
,
special
,
name
)
288
local
used
=
{
}
289
for
i
=
1
,
#
splitdata
do
290
local
line
=
splitdata
[
i
]
291
for
b
in
line
:
utfvalues
(
)
do
-- could be an lpeg
292
if
b
=
=
special
then
293
-- not registered
294
elseif
permitted_characters
[
b
]
then
295
used
[
char
(
b
)
]
=
true
296
else
297
local
cdb
=
cd
[
b
]
298
if
not
cdb
then
299
report
(
"
%s: no entry in chardata for character %C
"
,
basename
,
b
)
300
else
301
local
ct
=
cdb
.
category
302
if
ct
=
=
"
lu
"
or
ct
=
=
"
ll
"
or
ct
=
=
"
lo
"
or
ct
=
=
"
mn
"
or
ct
=
=
"
mc
"
then
-- hm, really mn and mc ?
303
used
[
char
(
b
)
]
=
true
304
elseif
ct
=
=
"
nd
"
then
305
-- number
306
elseif
ct
=
=
"
cf
"
then
307
report
(
"
%s: %s line with suspected utf character %C, category %s: %s
"
,
basename
,
"
keeping
"
,
b
,
ct
,
line
)
308
used
[
char
(
b
)
]
=
true
309
else
-- maybe accent cf (200D)
310
report
(
"
%s: %s line with suspected utf character %C, category %s: %s
"
,
basename
,
"
removing
"
,
b
,
ct
,
line
)
311
splitdata
[
i
]
=
"
"
312
break
313
end
314
end
315
end
316
end
317
end
318
return
used
319
end
320
usedpatterncharactersnew
=
check
(
splitpatternsnew
,
byte
(
"
.
"
)
)
321
usedhyphenationcharactersnew
=
check
(
splithyphenationsnew
,
byte
(
"
-
"
)
)
322
for
k
,
v
in
next
,
stripped
do
323
report
(
"
%s: entries that contain character %C have been omitted
"
,
basename
,
k
)
324
end
325
end
326
if
okay
then
327
local
function
stripped
(
what
,
ignored
)
328
-- ignored (per language)
329
local
p
=
nil
330
if
ignored
then
331
for
k
,
v
in
next
,
ignored
do
332
if
p
then
333
p
=
p
+
P
(
char
(
k
)
)
334
else
335
p
=
P
(
char
(
k
)
)
336
end
337
end
338
p
=
P
{
p
+
1
*
V
(
1
)
}
-- anywhere
339
end
340
-- replaced (all languages)
341
local
r
=
nil
342
for
k
,
v
in
next
,
replaced_whatever
do
343
if
r
then
344
r
=
r
+
P
(
k
)
/
v
345
else
346
r
=
P
(
k
)
/
v
347
end
348
end
349
r
=
Cs
(
(
r
+
1
)
^
0
)
350
local
result
=
{
}
351
for
i
=
1
,
#
what
do
352
local
line
=
what
[
i
]
353
if
p
and
lpegmatch
(
p
,
line
)
then
354
report
(
"
%s: discarding conflicting pattern: %s
"
,
basename
,
line
)
355
else
-- we can speed this up by testing for replacements in the string
356
local
l
=
lpegmatch
(
r
,
line
)
357
if
l
~
=
line
then
358
report
(
"
%s: sanitizing pattern: %s -> %s (for old patterns)
"
,
basename
,
line
,
l
)
359
end
360
result
[
#
result
+
1
]
=
l
361
end
362
end
363
return
result
364
end
365 366
splitpatternsold
=
stripped
(
splitpatternsnew
,
ignored
)
367
splithyphenationsold
=
stripped
(
splithyphenationsnew
,
ignored
)
368 369
end
370
if
okay
then
371
-- discarding duplicates
372
local
function
check
(
data
,
splitdata
,
name
)
373
local
used
,
collected
=
{
}
,
{
}
374
for
i
=
1
,
#
splitdata
do
375
local
line
=
splitdata
[
i
]
376
if
line
=
=
"
"
then
377
-- discard
378
elseif
used
[
line
]
then
379
-- discard
380
report
(
"
%s: discarding duplicate pattern: %s
"
,
basename
,
line
)
381
else
382
used
[
line
]
=
true
383
collected
[
#
collected
+
1
]
=
line
384
end
385
end
386
return
collected
387
end
388
splitpatternsnew
=
check
(
patterns
,
splitpatternsnew
,
patfile
)
389
splithyphenationsnew
=
check
(
hyphenations
,
splithyphenationsnew
,
hypfile
)
390
splitpatternsold
=
check
(
patterns
,
splitpatternsold
,
patfile
)
391
splithyphenationsold
=
check
(
hyphenations
,
splithyphenationsold
,
hypfile
)
392
end
393
if
not
okay
then
394
report
(
"
no valid file %s.*
"
,
name
)
395
end
396 397
local
function
getused
(
t
)
398
local
u
=
{
}
399
for
k
,
v
in
next
,
t
do
400
if
ignored
and
ignored
[
k
]
then
401
elseif
replaced_whatever
[
k
]
then
402
else
403
u
[
k
]
=
v
404
end
405
end
406
return
u
407
end
408
local
usedpatterncharactersold
=
getused
(
usedpatterncharactersnew
)
409
local
usedhyphenationcharactersold
=
getused
(
usedhyphenationcharactersnew
)
410 411
return
okay
,
412
splitpatternsnew
,
splithyphenationsnew
,
splitpatternsold
,
splithyphenationsold
,
comment
,
stripset
,
413
usedpatterncharactersnew
,
usedhyphenationcharactersnew
,
usedpatterncharactersold
,
usedhyphenationcharactersold
414
end
415 416
function
scripts
.
patterns
.
save
(
destination
,
mnemonic
,
name
,
patternsnew
,
hyphenationsnew
,
patternsold
,
hyphenationsold
,
comment
,
stripped
,
417
pusednew
,
husednew
,
pusedold
,
husedold
,
ignored
)
418
local
nofpatternsnew
,
nofhyphenationsnew
=
#
patternsnew
,
#
hyphenationsnew
419
local
nofpatternsold
,
nofhyphenationsold
=
#
patternsold
,
#
hyphenationsold
420
report
(
"
language %s has %s old and %s new patterns and %s old and %s new exceptions
"
,
mnemonic
,
nofpatternsold
,
nofpatternsnew
,
nofhyphenationsold
,
nofhyphenationsnew
)
421
if
mnemonic
~
=
"
??
"
then
422
local
punew
=
concat
(
table
.
sortedkeys
(
pusednew
)
,
"
"
)
423
local
hunew
=
concat
(
table
.
sortedkeys
(
husednew
)
,
"
"
)
424
local
puold
=
concat
(
table
.
sortedkeys
(
pusedold
)
,
"
"
)
425
local
huold
=
concat
(
table
.
sortedkeys
(
husedold
)
,
"
"
)
426 427
local
rmefile
=
file
.
join
(
destination
,
"
lang-
"
.
.
mnemonic
.
.
"
.rme
"
)
428
local
patfile
=
file
.
join
(
destination
,
"
lang-
"
.
.
mnemonic
.
.
"
.pat
"
)
429
local
hypfile
=
file
.
join
(
destination
,
"
lang-
"
.
.
mnemonic
.
.
"
.hyp
"
)
430
local
luafile
=
file
.
join
(
destination
,
"
lang-
"
.
.
mnemonic
.
.
"
.lua
"
)
-- suffix might change to llg
431 432
local
topline
=
"
% generated by mtxrun --script pattern --convert
"
433
local
banner
=
"
% for comment and copyright, see
"
.
.
file
.
basename
(
rmefile
)
434
report
(
"
saving language data for %s
"
,
mnemonic
)
435
if
not
comment
or
comment
=
=
"
"
then
comment
=
"
% no comment
"
end
436
if
not
type
(
destination
)
=
=
"
string
"
then
destination
=
"
.
"
end
437 438
local
compression
=
environment
.
arguments
.
compress
and
"
zlib
"
or
nil
439 440
local
lines
=
string
.
splitlines
(
comment
)
441
for
i
=
1
,
#
lines
do
442
if
not
find
(
lines
[
i
]
,
"
^%%
"
)
then
443
lines
[
i
]
=
"
%
"
.
.
lines
[
i
]
444
end
445
end
446 447
local
metadata
=
{
448
-- texcomment = comment,
449
texcomment
=
concat
(
lines
,
"
\n
"
)
,
450
source
=
name
,
451
mnemonic
=
mnemonic
,
452
}
453 454
local
patterndata
,
hyphenationdata
455
if
nofpatternsnew
>
0
then
456
local
data
=
concat
(
patternsnew
,
"
"
)
457
patterndata
=
{
458
n
=
nofpatternsnew
,
459
compression
=
compression
,
460
length
=
#
data
,
461
data
=
compression
and
zlib
.
compress
(
data
,
9
)
or
data
,
462
characters
=
concat
(
table
.
sortedkeys
(
pusednew
)
,
"
"
)
,
463
lefthyphenmin
=
1
,
-- determined by pattern author
464
righthyphenmax
=
1
,
-- determined by pattern author
465
}
466
else
467
patterndata
=
{
468
n
=
0
,
469
}
470
end
471
if
nofhyphenationsnew
>
0
then
472
local
data
=
concat
(
hyphenationsnew
,
"
"
)
473
hyphenationdata
=
{
474
n
=
nofhyphenationsnew
,
475
compression
=
compression
,
476
length
=
#
data
,
477
data
=
compression
and
zlib
.
compress
(
data
,
9
)
or
data
,
478
characters
=
concat
(
table
.
sortedkeys
(
husednew
)
,
"
"
)
,
479
}
480
else
481
hyphenationdata
=
{
482
n
=
0
,
483
}
484
end
485
local
data
=
{
486
-- a prelude to language goodies, like we have font goodies and in
487
-- mkiv we can use this file directly
488
version
=
"
1.001
"
,
489
comment
=
topline
,
490
metadata
=
metadata
,
491
patterns
=
patterndata
,
492
exceptions
=
hyphenationdata
,
493
}
494 495
os
.
remove
(
rmefile
)
496
os
.
remove
(
patfile
)
497
os
.
remove
(
hypfile
)
498
os
.
remove
(
luafile
)
499 500
io
.
savedata
(
rmefile
,
format
(
"
%s\n\n%s
"
,
topline
,
comment
)
)
501
io
.
savedata
(
patfile
,
format
(
"
%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}
"
,
topline
,
banner
,
puold
,
concat
(
patternsold
,
"
\n
"
)
)
)
502
io
.
savedata
(
hypfile
,
format
(
"
%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}
"
,
topline
,
banner
,
huold
,
concat
(
hyphenationsold
,
"
\n
"
)
)
)
503
io
.
savedata
(
luafile
,
table
.
serialize
(
data
,
true
)
)
504
end
505
end
506 507
function
scripts
.
patterns
.
prepare
(
)
508
--
509
dofile
(
resolvers
.
findfile
(
"
char-def.lua
"
)
)
510
--
511
local
specification
=
environment
.
argument
(
"
specification
"
)
512
if
specification
then
513
local
components
=
utilities
.
parsers
.
settings_to_array
(
specification
)
514
if
#
components
=
=
3
then
515
table
.
insert
(
scripts
.
patterns
.
list
,
1
,
components
)
516
report
(
"
specification added: %s %s %s
"
,
table
.
unpack
(
components
)
)
517
else
518
report
(
'
invalid specification: %q, "xx,lang-yy,zzzz" expected
'
,
specification
)
519
end
520
end
521
end
522 523
function
scripts
.
patterns
.
check
(
)
524
local
path
=
environment
.
argument
(
"
path
"
)
or
"
.
"
525
local
files
=
environment
.
files
526
local
only
=
false
527
if
#
files
>
0
then
528
only
=
table
.
tohash
(
files
)
529
end
530
for
k
,
v
in
next
,
scripts
.
patterns
.
list
do
531
local
mnemonic
,
name
,
ignored
,
merged
=
v
[
1
]
,
v
[
2
]
,
v
[
4
]
,
v
[
5
]
532
if
not
only
or
only
[
mnemonic
]
then
533
report
(
"
checking language %s, file %s
"
,
mnemonic
,
name
)
534
local
okay
=
scripts
.
patterns
.
load
(
path
,
name
,
mnemonic
,
ignored
,
merged
)
535
if
not
okay
then
536
report
(
"
there are errors that need to be fixed
"
)
537
end
538
report
(
)
539
end
540
end
541
end
542 543
function
scripts
.
patterns
.
convert
(
)
544
local
path
=
environment
.
argument
(
"
path
"
)
or
"
.
"
545
if
path
=
=
"
"
then
546
report
(
"
provide sourcepath using --path
"
)
547
else
548
local
destination
=
environment
.
argument
(
"
destination
"
)
or
"
.
"
549
if
path
=
=
destination
then
550
report
(
"
source path and destination path should differ (use --path and/or --destination)
"
)
551
else
552
local
files
=
environment
.
files
553
local
only
=
false
554
if
#
files
>
0
then
555
only
=
table
.
tohash
(
files
)
556
end
557
for
k
,
v
in
next
,
scripts
.
patterns
.
list
do
558
local
mnemonic
,
name
,
ignored
,
merged
=
v
[
1
]
,
v
[
2
]
,
v
[
4
]
,
v
[
5
]
559
if
not
only
or
only
[
mnemonic
]
then
560
report
(
"
converting language %s, file %s
"
,
mnemonic
,
name
)
561
local
okay
,
patternsnew
,
hyphenationsnew
,
patternsold
,
hyphenationsold
,
comment
,
stripped
,
562
pusednew
,
husednew
,
pusedold
,
husedold
=
scripts
.
patterns
.
load
(
path
,
name
,
mnemonic
,
ignored
,
merged
)
563
if
okay
then
564
scripts
.
patterns
.
save
(
destination
,
mnemonic
,
name
,
patternsnew
,
hyphenationsnew
,
patternsold
,
hyphenationsold
,
comment
,
stripped
,
565
pusednew
,
husednew
,
pusedold
,
husedold
,
ignored
)
566
else
567
report
(
"
convertion aborted due to error(s)
"
)
568
end
569
report
(
)
570
end
571
end
572
end
573
end
574
end
575 576
local
function
valid
(
filename
)
577
local
specification
=
table
.
load
(
filename
)
578
if
not
specification
then
579
return
false
580
end
581
local
lists
=
specification
.
lists
582
if
not
lists
then
583
return
false
584
end
585
return
specification
,
lists
586
end
587 588
function
scripts
.
patterns
.
words
(
)
589
if
environment
.
arguments
.
update
then
590
local
compress
=
environment
.
arguments
.
compress
591
for
i
=
1
,
#
environment
.
files
do
592
local
filename
=
environment
.
files
[
i
]
593
local
fullname
=
resolvers
.
findfile
(
filename
)
594
if
fullname
and
fullname
~
=
"
"
then
595
report
(
"
checking file %a
"
,
fullname
)
596
local
specification
,
lists
=
valid
(
fullname
)
597
if
specification
and
#
lists
>
0
then
598
report
(
"
updating %a of language %a
"
,
filename
,
specification
.
language
)
599
for
i
=
1
,
#
lists
do
600
local
entry
=
lists
[
i
]
601
local
filename
=
entry
.
filename
602
if
filename
then
603
local
fullname
=
resolvers
.
findfile
(
filename
)
604
if
fullname
then
605
report
(
"
adding words from %a
"
,
fullname
)
606
local
data
=
io
.
loaddata
(
fullname
)
or
"
"
607
data
=
string
.
strip
(
data
)
608
data
=
string
.
gsub
(
data
,
"
%s+
"
,
"
"
)
609
if
compress
then
610
entry
.
data
=
zlib
.
compress
(
data
,
9
)
611
entry
.
compression
=
"
zlib
"
612
entry
.
length
=
#
data
613
else
614
entry
.
data
=
data
615
entry
.
compression
=
nil
616
entry
.
length
=
#
data
617
end
618
else
619
entry
.
data
=
"
"
620
entry
.
compression
=
nil
621
entry
.
length
=
0
622
end
623
else
624
entry
.
data
=
"
"
625
entry
.
compression
=
nil
626
entry
.
length
=
0
627
end
628
end
629
specification
.
version
=
"
1.00
"
630
specification
.
timestamp
=
os
.
localtime
(
)
631
report
(
"
updated file %a is saved
"
,
filename
)
632
table
.
save
(
filename
,
specification
)
633
else
634
report
(
"
no file %a
"
,
filename
)
635
end
636
else
637
report
(
"
nothing done
"
)
638
end
639
end
640
else
641
report
(
"
provide --update
"
)
642
end
643
end
644 645
-- mtxrun --script patterns --hyphenate --language=nl nogalwiedes --left=3
646
--
647
-- hyphenator |
648
-- hyphenator | . n o g a l w i e d e s . . n o g a l w i e d e s .
649
-- hyphenator | .0n4 0 4 0 0 0 0 0 0 0 0 0 0
650
-- hyphenator | 0o0g0a4l0 0 4 0 0 4 0 0 0 0 0 0 0
651
-- hyphenator | 1g0a0 0 4 1 0 4 0 0 0 0 0 0 0
652
-- hyphenator | 0l1w0 0 4 1 0 4 1 0 0 0 0 0 0
653
-- hyphenator | 4i0e0 0 4 1 0 4 1 4 0 0 0 0 0
654
-- hyphenator | 0i0e3d0e0 0 4 1 0 4 1 4 0 3 0 0 0
655
-- hyphenator | 0e1d0 0 4 1 0 4 1 4 0 3 0 0 0
656
-- hyphenator | 1d0e0 0 4 1 0 4 1 4 0 3 0 0 0
657
-- hyphenator | 0d0e2s0 0 4 1 0 4 1 4 0 3 0 2 0
658
-- hyphenator | 4s0. 0 4 1 0 4 1 4 0 3 0 4 0
659
-- hyphenator | .0n4o1g0a4l1w4i0e3d0e4s0. . n o-g a l-w i e-d e s .
660
-- hyphenator |
661
-- mtx-patterns | nl 3 3 : nogalwiedes : nogal-wie-des
662 663
function
scripts
.
patterns
.
hyphenate
(
)
664
require
(
"
lang-hyp
"
)
665
local
traditional
=
languages
.
hyphenators
.
traditional
666
local
left
=
tonumber
(
environment
.
arguments
.
left
)
or
3
667
local
right
=
tonumber
(
environment
.
arguments
.
right
)
or
3
668
local
language
=
environment
.
arguments
.
language
or
"
us
"
669
local
dictionary
=
traditional
.
loadpatterns
(
language
)
670
local
words
=
environment
.
files
671
local
specification
=
{
672
leftcharmin
=
left
,
673
rightcharmin
=
right
,
674
leftchar
=
false
,
675
rightchar
=
false
,
676
}
677
trackers
.
enable
(
"
hyphenator.steps
"
)
678
for
i
=
1
,
#
words
do
679
local
word
=
words
[
i
]
680
report
(
"
%s %s %s : %s : %s
"
,
681
language
,
left
,
right
,
682
word
,
683
traditional
.
injecthyphens
(
dictionary
,
word
,
specification
)
684
)
685
end
686
end
687 688
if
environment
.
argument
(
"
check
"
)
then
689
scripts
.
patterns
.
prepare
(
)
690
scripts
.
patterns
.
check
(
)
691
elseif
environment
.
argument
(
"
convert
"
)
then
692
scripts
.
patterns
.
prepare
(
)
693
scripts
.
patterns
.
convert
(
)
694
elseif
environment
.
argument
(
"
words
"
)
then
695
scripts
.
patterns
.
words
(
)
-- for the moment here
696
elseif
environment
.
argument
(
"
hyphenate
"
)
then
697
scripts
.
patterns
.
hyphenate
(
)
-- for the moment here
698
elseif
environment
.
argument
(
"
exporthelp
"
)
then
699
application
.
export
(
environment
.
argument
(
"
exporthelp
"
)
,
environment
.
files
[
1
]
)
700
else
701
application
.
help
(
)
702
end
703 704
-- mtxrun --script pattern --check hyph-*.tex
705
-- mtxrun --script pattern --check --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
706
-- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
707
-- mtxrun --script pattern --convert --path=c:/data/repositories/tex-hyphen/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
708
--
709
-- use this call:
710
--
711
-- setlocal
712
--
713
-- rem tugsvn checkout:
714
--
715
-- set patternsroot=c:/data/develop/svn-hyphen/trunk
716
--
717
-- rem github checkout:
718
--
719
-- set patternsroot=c:/data/repositories/tex-hyphen
720
--
721
-- del /q c:\data\develop\tex-context\tex\texmf-local\tex\context\patterns\*
722
-- del /q c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns\*
723
-- del /q c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns\*
724
--
725
-- mtxrun --script pattern --convert --path=%patternsroot%/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=c:/data/develop/tmp/patterns
726
--
727
-- copy /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
728
-- copy /Y lang*.pat c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
729
-- copy /Y lang*.rme c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
730
-- copy /Y lang*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
731
--
732
-- move /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
733
-- move /Y lang*.pat c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
734
-- move /Y lang*.rme c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
735
-- move /Y lang*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
736
--
737
-- mtxrun --script pattern --words --update word-th.lua --compress
738
--
739
-- copy /Y word*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
740
-- move /Y word*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
741
--
742
-- mtxrun --generate
743
--
744
-- endlocal
745