mtx-spell.lua /size: 11 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
mtx-patterns
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to mtxrun.lua
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
find
,
gsub
,
match
=
string
.
find
,
string
.
gsub
,
string
.
match
10
local
concat
=
table
.
concat
11
local
P
,
R
,
S
,
C
,
Ct
,
Cmt
,
Cc
,
Cs
=
lpeg
.
P
,
lpeg
.
R
,
lpeg
.
S
,
lpeg
.
C
,
lpeg
.
Ct
,
lpeg
.
Cmt
,
lpeg
.
Cc
,
lpeg
.
Cs
12
local
patterns
=
lpeg
.
patterns
13
local
lpegmatch
=
lpeg
.
match
14 15
local
helpinfo
=
[[
16<?xml version="1.0"?> 17<application> 18 <metadata> 19 <entry name="name">mtx-spell</entry> 20 <entry name="detail">ConTeXt Word Filtering</entry> 21 <entry name="version">0.10</entry> 22 </metadata> 23 <flags> 24 <category name="basic"> 25 <subcategory> 26 <flag name="expand"><short>expand hunspell dics and aff files</short></flag> 27 <flag name="dictionary"><short>word file (.dics)</short></flag> 28 <flag name="specification"><short>affix specification file (.aff)</short></flag> 29 <flag name="result"><short>destination file</short></flag> 30 </subcategory> 31 </category> 32 </flags> 33 <examples> 34 <category> 35 <title>Examples</title> 36 <subcategory> 37 <example><command>mtxrun --script spell --expand --dictionary="en_US.dic" --specification="en_US.txt" --result="data-us.txt"</command></example> 38 </subcategory> 39 </category> 40 </examples> 41</application> 42
]]
43 44 45
local
application
=
logs
.
application
{
46
name
=
"
mtx-spell
"
,
47
banner
=
"
ConTeXt Word Filtering 0.10
"
,
48
helpinfo
=
helpinfo
,
49
}
50 51
local
report
=
application
.
report
52
local
trace
=
false
53 54
scripts
=
scripts
or
{
}
55
scripts
.
spell
=
scripts
.
spell
or
{
}
56 57
---------------
58 59
require
(
"
char-def
"
)
60
require
(
"
char-utf
"
)
61 62
-- nl: ij => ij
63 64
do
65 66
local
prefixes
,
suffixes
,
affixes
,
continue
,
collected
67 68
local
function
resetall
(
)
69
prefixes
=
table
.
setmetatableindex
(
"
table
"
)
70
suffixes
=
table
.
setmetatableindex
(
"
table
"
)
71
affixes
=
table
.
setmetatableindex
(
"
table
"
)
72
continue
=
{
}
73
collected
=
{
}
74
end
75 76
local
uppers
=
{
}
77
local
chardata
=
characters
.
data
78
for
k
,
v
in
next
,
chardata
do
79
if
v
.
category
=
=
"
lu
"
then
80
uppers
[
utf
.
char
(
k
)
]
=
true
81
end
82
end
83 84
local
newline
=
patterns
.
newline
85
local
digit
=
patterns
.
digit
86
local
skipped
=
digit
+
lpeg
.
utfchartabletopattern
(
uppers
)
87
local
ignored
=
1
-
newline
88
local
garbage
=
S
(
"
'-
"
)
89 90
local
function
fixeddata
(
data
)
91
data
=
gsub
(
data
,
"
ij
"
,
"
ij
"
)
92
return
data
93
end
94 95
local
function
registersuffix
(
tag
,
f
)
96
table
.
insert
(
suffixes
[
tag
]
,
f
)
97
table
.
insert
(
affixes
[
tag
]
,
f
)
98
end
99 100
local
function
registerprefix
(
tag
,
f
)
101
table
.
insert
(
prefixes
[
tag
]
,
f
)
102
table
.
insert
(
affixes
[
tag
]
,
f
)
103
end
104 105
local
function
getfixes
(
specification
)
106 107
local
data
=
fixeddata
(
io
.
loaddata
(
specification
)
or
"
"
)
108
local
lines
=
string
.
splitlines
(
data
)
109 110
-- /* in two
111
-- Y/N continuation
112 113
-- [^...] [...] ...
114 115
local
p0
=
nil
116 117
local
p1
=
P
(
"
[^
"
)
*
Cs
(
(
1
-
P
(
"
]
"
)
)
^
1
)
*
P
(
"
]
"
)
/
function
(
s
)
118
local
t
=
utf
.
split
(
s
)
119
local
p
=
1
-
lpeg
.
utfchartabletopattern
(
t
)
120
p0
=
p0
and
(
p0
*
p
)
or
p
121
end
122
local
p2
=
P
(
"
[
"
)
*
Cs
(
(
1
-
P
(
"
]
"
)
)
^
1
)
*
P
(
"
]
"
)
/
function
(
s
)
123
local
t
=
utf
.
split
(
s
)
124
local
p
=
lpeg
.
utfchartabletopattern
(
t
)
125
p0
=
p0
and
(
p0
*
p
)
or
p
126
end
127
local
p3
=
(
patterns
.
utf8char
-
S
(
"
[]
"
)
)
^
1
/
function
(
s
)
128
local
p
=
P
(
s
)
129
p0
=
p0
and
(
p0
*
p
)
or
p
130
end
131 132
local
p
=
(
p1
+
p2
+
p3
)
^
1
133 134
local
function
makepattern
(
s
)
135
p0
=
nil
136
lpegmatch
(
p
,
s
)
137
return
p0
138
end
139 140
local
i
=
1
141
while
i
<
=
#
lines
do
142
local
line
=
lines
[
i
]
143
local
tag
,
continuation
,
n
=
match
(
line
,
"
PFX%s+(%S+)%s+(%S+)%s+(%d+)
"
)
144
if
tag
then
145
n
=
tonumber
(
n
)
or
0
146
continue
[
tag
]
=
continuation
=
=
"
Y
"
147
for
j
=
1
,
n
do
148
i
=
i
+
1
149
line
=
lines
[
i
]
150
if
not
find
(
line
,
"
[-']
"
)
then
151
local
tag
,
one
,
two
,
three
=
match
(
line
,
"
PFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)
"
)
152
if
tag
then
153
if
one
=
=
"
0
"
and
two
and
three
=
=
"
.
"
then
154
-- simple case: PFX A 0 re .
155
registerprefix
(
tag
,
function
(
str
)
156
local
new
=
two
.
.
str
157
if
trace
then
158
print
(
"
p 1
"
,
str
,
new
)
159
end
160
return
new
161
end
)
162
elseif
one
=
=
"
0
"
and
two
and
three
then
163
-- strip begin
164
if
trace
then
165
print
(
'
2
'
,
line
)
166
end
167
elseif
one
and
two
and
three
then
168
if
trace
then
169
print
(
'
3
'
,
line
)
170
end
171
else
172
if
trace
then
173
print
(
'
4
'
,
line
)
174
end
175
end
176
end
177
end
178
end
179
end
180
local
tag
,
continuation
,
n
=
match
(
line
,
"
SFX%s+(%S+)%s+(%S+)%s+(%S+)
"
)
181
if
tag
then
182
n
=
tonumber
(
n
)
or
0
183
continue
[
tag
]
=
continuation
=
=
"
Y
"
184
for
j
=
1
,
n
do
185
i
=
i
+
1
186
line
=
lines
[
i
]
187
if
not
find
(
line
,
"
[-']
"
)
then
188
local
tag
,
one
,
two
,
three
=
match
(
line
,
"
SFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)
"
)
189
if
tag
then
190
if
one
=
=
"
0
"
and
two
and
three
=
=
"
.
"
then
191
-- SFX Y 0 ly .
192
registersuffix
(
tag
,
function
(
str
)
193
local
new
=
str
.
.
two
194
if
trace
then
195
print
(
"
s 1
"
,
str
,
new
)
196
end
197
return
new
198
end
)
199
elseif
one
=
=
"
0
"
and
two
and
three
then
200
-- SFX G 0 ing [^e]
201
local
final
=
makepattern
(
three
)
*
P
(
-1
)
202
local
check
=
(
1
-
final
)
^
0
*
final
203
registersuffix
(
tag
,
function
(
str
)
204
if
lpegmatch
(
check
,
str
)
then
205
local
new
=
str
.
.
two
206
if
trace
then
207
print
(
"
s 2
"
,
str
,
new
)
208
end
209
return
new
210
end
211
end
)
212
elseif
one
and
two
and
three
then
213
-- SFX G match$ suffix old$ (dutch has sloppy matches, use english as reference)
214
local
final
=
makepattern
(
three
)
*
P
(
-1
)
215
local
check
=
(
1
-
final
)
^
1
*
final
216
local
final
=
makepattern
(
one
)
*
P
(
-1
)
217
local
replace
=
Cs
(
(
1
-
final
)
^
1
*
(
final
/
two
)
)
218
registersuffix
(
tag
,
function
(
str
)
219
if
lpegmatch
(
check
,
str
)
then
220
local
new
=
lpegmatch
(
replace
,
str
)
221
if
new
then
222
if
trace
then
223
print
(
"
s 3
"
,
str
,
new
)
224
end
225
return
new
226
end
227
end
228
end
)
229
else
230
if
trace
then
231
print
(
'
4
'
,
line
)
232
end
233
end
234
end
235
end
236
end
237
end
238
i
=
i
+
1
239
end
240
end
241 242
local
function
expand
(
_
,
_
,
word
,
spec
)
243
if
spec
then
244
local
w
=
{
word
}
245
local
n
=
1
246
for
i
=
1
,
#
spec
do
247
local
s
=
spec
[
i
]
248
local
affix
=
affixes
[
s
]
249
if
affix
then
250
for
i
=
1
,
#
affix
do
251
local
ai
=
affix
[
i
]
252
local
wi
=
ai
(
word
)
253
if
wi
then
254
n
=
n
+
1
255
w
[
n
]
=
wi
256
if
not
continue
[
s
]
then
257
break
258
end
259
end
260
end
261
end
262
end
263
for
i
=
1
,
n
do
264
collected
[
w
[
i
]
]
=
true
265
end
266
elseif
not
find
(
word
,
"
/
"
)
then
267
collected
[
word
]
=
true
268
end
269
return
true
270
end
271 272
local
function
getwords
(
dictionary
)
273
local
data
=
fixeddata
(
io
.
loaddata
(
dictionary
)
or
"
"
)
274
local
keys
=
{
}
275
for
k
,
v
in
next
,
prefixes
do
276
keys
[
k
]
=
true
277
end
278
for
k
,
v
in
next
,
suffixes
do
279
keys
[
k
]
=
true
280
end
281
local
validkeys
=
lpeg
.
utfchartabletopattern
(
keys
)
282
local
specifier
=
P
(
"
/
"
)
*
Ct
(
C
(
validkeys
)
^
1
)
^
0
*
newline
283
local
pattern
=
(
284
newline
^
1
285
+
skipped
*
(
1
-
newline
)
^
0
286
+
Cmt
(
C
(
(
1
-
specifier
-
newline
-
garbage
)
^
1
)
*
specifier
^
0
,
expand
)
287
+
ignored
^
1
*
newline
^
1
288
)
^
0
289
lpegmatch
(
pattern
,
data
)
290
collected
=
table
.
keys
(
collected
)
291
table
.
sort
(
collected
)
292
return
collected
293
end
294 295
local
function
saveall
(
result
)
296
if
result
then
297
io
.
savedata
(
result
,
concat
(
collected
,
"
\n
"
)
)
298
end
299
end
300 301
function
scripts
.
spell
.
expand
(
arguments
)
302
if
arguments
then
303
local
dictionary
=
environment
.
arguments
.
dictionary
304
local
specification
=
environment
.
arguments
.
specification
305
local
result
=
environment
.
arguments
.
result
306
if
type
(
dictionary
)
~
=
"
string
"
or
dictionary
=
=
"
"
then
307
report
(
"
missing --dictionary=name
"
)
308
elseif
type
(
specification
)
~
=
"
string
"
or
specification
=
=
"
"
then
309
report
(
"
missing --specification=name
"
)
310
elseif
type
(
result
)
~
=
"
string
"
or
result
=
=
"
"
then
311
resetall
(
)
312
getfixes
(
specification
)
313
getwords
(
dictionary
)
314
saveall
(
result
)
315
return
collected
316
end
317
end
318
end
319 320
end
321 322
-- spell.dicaff {
323
-- dictionary = "e:/context/spell/lo/en_US.dic.txt",
324
-- specification = "e:/context/spell/lo/en_US.aff.txt",
325
-- result = "e:/context/spell/lo/data-en.txt",
326
-- }
327 328
-- spell.dicaff {
329
-- dictionary = "e:/context/spell/lo/en_GB.dic.txt",
330
-- specification = "e:/context/spell/lo/en_GB.aff.txt",
331
-- result = "e:/context/spell/lo/data-uk.txt",
332
-- }
333 334
-- spell.dicaff {
335
-- dictionary = "e:/context/spell/lo/nl_NL.dic.txt",
336
-- specification = "e:/context/spell/lo/nl_NL.aff.txt",
337
-- result = "e:/context/spell/lo/data-nl.txt",
338
-- }
339 340
if
environment
.
argument
(
"
expand
"
)
then
341
scripts
.
spell
.
expand
(
environment
.
arguments
)
342
elseif
environment
.
argument
(
"
exporthelp
"
)
then
343
application
.
export
(
environment
.
argument
(
"
exporthelp
"
)
,
environment
.
files
[
1
]
)
344
else
345
application
.
help
(
)
346
end
347