1if not modules then modules = { } end modules ['mtx-patterns'] = {
2 version = 1.001,
3 comment = "companion to mtxrun.lua",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local find, gsub, match = string.find, string.gsub, string.match
10local concat = table.concat
11local P, R, S, C, Ct, Cmt, Cc, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cs
12local patterns = lpeg.patterns
13local lpegmatch = lpeg.match
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19 <entry name="name">mtx-spell</entry>
20 <entry name="detail">ConTeXt Word Filtering</entry>
21 <entry name="version">0.10</entry>
22 </metadata>
23 <flags>
24 <category name="basic">
25 <subcategory>
26 <flag name="expand"><short>expand hunspell dics and aff files</short></flag>
27 <flag name="dictionary"><short>word file (.dics)</short></flag>
28 <flag name="specification"><short>affix specification file (.aff)</short></flag>
29 <flag name="result"><short>destination file</short></flag>
30 </subcategory>
31 </category>
32 </flags>
33 <examples>
34 <category>
35 <title>Examples</title>
36 <subcategory>
37 <example><command>mtxrun --script spell --expand --dictionary="en_US.dic" --specification="en_US.txt" --result="data-us.txt"</command></example>
38 </subcategory>
39 </category>
40 </examples>
41</application>
42]]
43
44
45local application = logs.application {
46 name = "mtx-spell",
47 banner = "ConTeXt Word Filtering 0.10",
48 helpinfo = helpinfo,
49}
50
51local report = application.report
52local trace = false
53
54scripts = scripts or { }
55scripts.spell = scripts.spell or { }
56
57
58
59require("char-def")
60require("char-utf")
61
62
63
64do
65
66 local prefixes, suffixes, affixes, continue, collected
67
68 local function resetall()
69 prefixes = table.setmetatableindex("table")
70 suffixes = table.setmetatableindex("table")
71 affixes = table.setmetatableindex("table")
72 continue = { }
73 collected = { }
74 end
75
76 local uppers = { }
77 local chardata = characters.data
78 for k, v in next, chardata do
79 if v.category == "lu" then
80 uppers[utf.char(k)] = true
81 end
82 end
83
84 local newline = patterns.newline
85 local digit = patterns.digit
86 local skipped = digit + lpeg.utfchartabletopattern(uppers)
87 local ignored = 1 - newline
88 local garbage = S("'-")
89
90 local function fixeddata(data)
91 data = gsub(data,"ij","ij")
92 return data
93 end
94
95 local function registersuffix(tag,f)
96 table.insert(suffixes[tag],f)
97 table.insert(affixes [tag],f)
98 end
99
100 local function registerprefix(tag,f)
101 table.insert(prefixes[tag],f)
102 table.insert(affixes [tag],f)
103 end
104
105 local function getfixes(specification)
106
107 local data = fixeddata(io.loaddata(specification) or "")
108 local lines = string.splitlines(data)
109
110
111
112
113
114
115 local p0 = nil
116
117 local p1 = P("[^") * Cs((1-P("]"))^1) * P("]") / function(s)
118 local t = utf.split(s)
119 local p = 1 - lpeg.utfchartabletopattern(t)
120 p0 = p0 and (p0 * p) or p
121 end
122 local p2 = P("[") * Cs((1-P("]"))^1) * P("]") / function(s)
123 local t = utf.split(s)
124 local p = lpeg.utfchartabletopattern(t)
125 p0 = p0 and (p0 * p) or p
126 end
127 local p3 = (patterns.utf8char - S("[]"))^1 / function(s)
128 local p = P(s)
129 p0 = p0 and (p0 * p) or p
130 end
131
132 local p = (p1 + p2 + p3)^1
133
134 local function makepattern(s)
135 p0 = nil
136 lpegmatch(p,s)
137 return p0
138 end
139
140 local i = 1
141 while i <= #lines do
142 local line = lines[i]
143 local tag, continuation, n = match(line,"PFX%s+(%S+)%s+(%S+)%s+(%d+)")
144 if tag then
145 n = tonumber(n) or 0
146 continue[tag] = continuation == "Y"
147 for j=1,n do
148 i = i + 1
149 line = lines[i]
150 if not find(line,"[-']") then
151 local tag, one, two, three = match(line,"PFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)")
152 if tag then
153 if one == "0" and two and three == "." then
154
155 registerprefix(tag,function(str)
156 local new = two .. str
157 if trace then
158 print("p 1",str,new)
159 end
160 return new
161 end)
162 elseif one == "0" and two and three then
163
164 if trace then
165 print('2',line)
166 end
167 elseif one and two and three then
168 if trace then
169 print('3',line)
170 end
171 else
172 if trace then
173 print('4',line)
174 end
175 end
176 end
177 end
178 end
179 end
180 local tag, continuation, n = match(line,"SFX%s+(%S+)%s+(%S+)%s+(%S+)")
181 if tag then
182 n = tonumber(n) or 0
183 continue[tag] = continuation == "Y"
184 for j=1,n do
185 i = i + 1
186 line = lines[i]
187 if not find(line,"[-']") then
188 local tag, one, two, three = match(line,"SFX%s+(%S+)%s+(%S+)%s+([^%s/]+)%S*%s+(%S+)")
189 if tag then
190 if one == "0" and two and three == "." then
191
192 registersuffix(tag,function(str)
193 local new = str .. two
194 if trace then
195 print("s 1",str,new)
196 end
197 return new
198 end)
199 elseif one == "0" and two and three then
200
201 local final = makepattern(three) * P(-1)
202 local check = (1 - final)^0 * final
203 registersuffix(tag,function(str)
204 if lpegmatch(check,str) then
205 local new = str .. two
206 if trace then
207 print("s 2",str,new)
208 end
209 return new
210 end
211 end)
212 elseif one and two and three then
213
214 local final = makepattern(three) * P(-1)
215 local check = (1 - final)^1 * final
216 local final = makepattern(one) * P(-1)
217 local replace = Cs((1 - final)^1 * (final/two))
218 registersuffix(tag,function(str)
219 if lpegmatch(check,str) then
220 local new = lpegmatch(replace,str)
221 if new then
222 if trace then
223 print("s 3",str,new)
224 end
225 return new
226 end
227 end
228 end)
229 else
230 if trace then
231 print('4',line)
232 end
233 end
234 end
235 end
236 end
237 end
238 i = i + 1
239 end
240 end
241
242 local function expand(_,_,word,spec)
243 if spec then
244 local w = { word }
245 local n = 1
246 for i=1,#spec do
247 local s = spec[i]
248 local affix = affixes[s]
249 if affix then
250 for i=1,#affix do
251 local ai = affix[i]
252 local wi = ai(word)
253 if wi then
254 n = n + 1
255 w[n] = wi
256 if not continue[s] then
257 break
258 end
259 end
260 end
261 end
262 end
263 for i=1,n do
264 collected[w[i]] = true
265 end
266 elseif not find(word,"/") then
267 collected[word] = true
268 end
269 return true
270 end
271
272 local function getwords(dictionary)
273 local data = fixeddata(io.loaddata(dictionary) or "")
274 local keys = { }
275 for k, v in next, prefixes do
276 keys[k] = true
277 end
278 for k, v in next, suffixes do
279 keys[k] = true
280 end
281 local validkeys = lpeg.utfchartabletopattern(keys)
282 local specifier = P("/") * Ct(C(validkeys)^1)^0 * newline
283 local pattern = (
284 newline^1
285 + skipped * (1-newline)^0
286 + Cmt(C((1-specifier-newline-garbage)^1) * specifier^0, expand)
287 + ignored^1 * newline^1
288 )^0
289 lpegmatch(pattern,data)
290 collected = table.keys(collected)
291 table.sort(collected)
292 return collected
293 end
294
295 local function saveall(result)
296 if result then
297 io.savedata(result,concat(collected,"\n"))
298 end
299 end
300
301 function scripts.spell.expand(arguments)
302 if arguments then
303 local dictionary = environment.arguments.dictionary
304 local specification = environment.arguments.specification
305 local result = environment.arguments.result
306 if type(dictionary) ~= "string" or dictionary == "" then
307 report("missing --dictionary=name")
308 elseif type(specification) ~= "string" or specification == "" then
309 report("missing --specification=name")
310 elseif type(result) ~= "string" or result == "" then
311 resetall()
312 getfixes(specification)
313 getwords(dictionary)
314 saveall(result)
315 return collected
316 end
317 end
318 end
319
320end
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340if environment.argument("expand") then
341 scripts.spell.expand(environment.arguments)
342elseif environment.argument("exporthelp") then
343 application.export(environment.argument("exporthelp"),environment.files[1])
344else
345 application.help()
346end
347 |