1if not modules then modules = { } end modules ['mtx-patterns'] = {
2 version = 1.001,
3 comment = "companion to mtxrun.lua",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local find, gsub, match = string.find, string.gsub, string.match
10local concat = table.concat
11local P, R, S, C, Ct, Cmt, Cc, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cs
12local patterns = lpeg.patterns
13local lpegmatch = lpeg.match
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19 <entry name="name">mtx-spell</entry>
20 <entry name="detail">ConTeXt Word Filtering</entry>
21 <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24 <category name="basic">
25 <subcategory>
26 <flag name="expand"><short>expand hunspell dics and aff files</short></flag>
27 <flag name="dictionary"><short>word file (.dics)</short></flag>
28 <flag name="specification"><short>affix specification file (.aff)</short></flag>
29 <flag name="result"><short>destination file</short></flag>
30 </subcategory>
31 </category>
32 </flags>
33 <examples>
34 <category>
35 <title>Examples</title>
36 <subcategory>
37 <example><command>mtxrun --script spell --expand --dictionary="en_US.dic" --specification="en_US.txt" --result="data-us.txt"</command></example>
38 </subcategory>
39 </category>
40 </examples>
41</application>
42]]
43
44
45local application = logs.application {
46 name = "mtx-spell",
47 banner = "ConTeXt Word Filtering 0.10",
48 helpinfo = helpinfo,
49}
50
51local report = application.report
52local trace = false
53
54scripts = scripts or { }
55scripts.spell = scripts.spell or { }
56
57
58
59require("char-def")
60require("char-utf")
61
62
63
64do
65
66 local prefixes, suffixes, affixes, continue, collected
67
68 local function resetall()
69 prefixes = table.setmetatableindex("table")
70 suffixes = table.setmetatableindex("table")
71 affixes = table.setmetatableindex("table")
72 continue = { }
73 collected = { }
74 end
75
76 local uppers = { }
77 local chardata = characters.data
78 for k, v in next, chardata do
79 if v.category == "lu" then
80 uppers[utf.char(k)] = true
81 end
82 end
83
84 local newline = patterns.newline
85 local digit = patterns.digit
86 local skipped = digit + lpeg.utfchartabletopattern(uppers)
87 local ignored = 1 - newline
88 local garbage = S("'-")
89
90 local function fixeddata(data)
91 data = gsub(data,"ij","ij")
92 return data
93 end
94
95 local function registersuffix(tag,f)
96 table.insert(suffixes[tag],f)
97 table.insert(affixes [tag],f)
98 end
99
100 local function registerprefix(tag,f)
101 table.insert(prefixes[tag],f)
102 table.insert(affixes [tag],f)
103 end
104
105 local function getfixes(specification)
106
107 local data = fixeddata(io.loaddata(specification) or "")
108 local lines = string.splitlines(data)
109
110
111
112
113
114
115 local p0 = nil
116
117 local p1 = P("[^") * Cs((1-P("]"))^1) * P("]") / function(s)
118 local t = utf.split(s)
119 local p = 1 - lpeg.utfchartabletopattern(t)
120 p0 = p0 and (p0 * p) or p
121 end
122 local p2 = P("[") * Cs((1-P("]"))^1) * P("]") / function(s)
123 local t = utf.split(s)
124 local p = lpeg.utfchartabletopattern(t)
125 p0 = p0 and (p0 * p) or p
126 end
127 local p3 = (patterns.utf8char - S("[]"))^1 / function(s)
128 local p = P(s)
129 p0 = p0 and (p0 * p) or p
130 end
131
132 local p = (p1 + p2 + p3)^1
133
134 local function makepattern(s)
135 p0 = nil
136 lpegmatch(p,s)
137 return p0
138 end
139
140 local i = 1
141 while i <= #lines do
142 local line = lines[i]
143 local tag, continuation, n = match(line,"PFX%s+(%S+)%s+(%S+)%s+(%d+)")
144 if tag then
145 n = tonumber(n) or 0
146 continue[tag] = continuation == "Y"
147 for j=1,n do
148 i = i + 1
149 line = lines[i]
150 if not find(line,"[-']") then
151 local tag, one, two, three = match(line,"PFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)")
152 if tag then
153 if one == "0" and two and three == "." then
154
155 registerprefix(tag,function(str)
156 local new = two .. str
157 if trace then
158 print("p 1",str,new)
159 end
160 return new
161 end)
162 elseif one == "0" and two and three then
163
164 if trace then
165 print('2',line)
166 end
167 elseif one and two and three then
168 if trace then
169 print('3',line)
170 end
171 else
172 if trace then
173 print('4',line)
174 end
175 end
176 end
177 end
178 end
179 end
180 local tag, continuation, n = match(line,"SFX%s+(%S+)%s+(%S+)%s+(%S+)")
181 if tag then
182 n = tonumber(n) or 0
183 continue[tag] = continuation == "Y"
184 for j=1,n do
185 i = i + 1
186 line = lines[i]
187 if not find(line,"[-']") then
188 local tag, one, two, three = match(line,"SFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)")
189 if tag then
190 if one == "0" and two and three == "." then
191
192 registersuffix(tag,function(str)
193 local new = str .. two
194 if trace then
195 print("s 1",str,new)
196 end
197 return new
198 end)
199 elseif one == "0" and two and three then
200
201 local final = makepattern(three) * P(-1)
202 local check = (1 - final)^0 * final
203 registersuffix(tag,function(str)
204 if lpegmatch(check,str) then
205 local new = str .. two
206 if trace then
207 print("s 2",str,new)
208 end
209 return new
210 end
211 end)
212 elseif one and two and three then
213
214 local final = makepattern(three) * P(-1)
215 local check = (1 - final)^1 * final
216 local final = makepattern(one) * P(-1)
217 local replace = Cs((1 - final)^1 * (final/two))
218 registersuffix(tag,function(str)
219 if lpegmatch(check,str) then
220 local new = lpegmatch(replace,str)
221 if new then
222 if trace then
223 print("s 3",str,new)
224 end
225 return new
226 end
227 end
228 end)
229 else
230 if trace then
231 print('4',line)
232 end
233 end
234 end
235 end
236 end
237 end
238 i = i + 1
239 end
240 end
241
242 local function expand(_,_,word,spec)
243 if spec then
244 local w = { word }
245 local n = 1
246 for i=1,#spec do
247 local s = spec[i]
248 local affix = affixes[s]
249 if affix then
250 for i=1,#affix do
251 local ai = affix[i]
252 local wi = ai(word)
253 if wi then
254 n = n + 1
255 w[n] = wi
256 if not continue[s] then
257 break
258 end
259 end
260 end
261 end
262 end
263 for i=1,n do
264 collected[w[i]] = true
265 end
266
267
268 else
269 word = match(word,"^(.+)/")
270 if word and #word > 1 then
271 collected[word] = true
272 end
273 end
274 return true
275 end
276
277 local function getwords(dictionary)
278 local data = fixeddata(io.loaddata(dictionary) or "")
279 local keys = { }
280 for k, v in next, prefixes do
281 keys[k] = true
282 end
283 for k, v in next, suffixes do
284 keys[k] = true
285 end
286 local validkeys = lpeg.utfchartabletopattern(keys)
287 local specifier = P("/") * Ct(C(validkeys)^1)^0 * newline
288 local pattern = (
289 newline^1
290 + skipped * (1-newline)^0
291 + Cmt(C((1-specifier-newline-garbage)^1) * specifier^0, expand)
292 + ignored^1 * newline^1
293 )^0
294 lpegmatch(pattern,data)
295 collected = table.keys(collected)
296 table.sort(collected)
297 return collected
298 end
299
300 local function saveall(result)
301 if result then
302 io.savedata(result,concat(collected,"\n"))
303 end
304 end
305
306 function scripts.spell.expand(arguments)
307 if arguments then
308 local dictionary = environment.arguments.dictionary
309 local specification = environment.arguments.specification
310 local result = environment.arguments.result
311 if type(dictionary) ~= "string" or dictionary == "" then
312 report("missing --dictionary=name")
313 elseif type(specification) ~= "string" or specification == "" then
314 report("missing --specification=name")
315 elseif type(result) ~= "string" or result == "" then
316 report("missing --result=name")
317 else
318 resetall()
319 getfixes(specification)
320 getwords(dictionary)
321 saveall(result)
322 return collected
323 end
324 end
325 end
326
327end
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347if environment.argument("expand") then
348 scripts.spell.expand(environment.arguments)
349elseif environment.argument("exporthelp") then
350 application.export(environment.argument("exporthelp"),environment.files[1])
351else
352 application.help()
353end
354 |