1if not modules then modules = { } end modules ['lang-wrd'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local next, tonumber = next, tonumber
10local lower = string.lower
11local utfchar = utf.char
12local concat, setmetatableindex = table.concat, table.setmetatableindex
13local lpegmatch = lpeg.match
14local P, S, Cs, Cf, Cg, Cc, C = lpeg.P, lpeg.S, lpeg.Cs, lpeg.Cf, lpeg.Cg, lpeg.Cc, lpeg.C
15
16local report_words = logs.reporter("languages","words")
17
18local nodes = nodes
19local languages = languages
20
21local implement = interfaces.implement
22
23languages.words = languages.words or { }
24local words = languages.words
25
26words.data = words.data or { }
27words.enables = false
28words.threshold = 4
29
30local numbers = languages.numbers
31local registered = languages.registered
32
33local nuts = nodes.nuts
34
35
36local getnext = nuts.getnext
37local getid = nuts.getid
38
39local getchar = nuts.getchar
40local setattr = nuts.setattr
41
42local getlanguage = nuts.getlanguage
43local ischar = nuts.ischar
44
45local nextnode = nuts.traversers.node
46
47
48local wordsdata = words.data
49local chardata = characters.data
50local enableaction = nodes.tasks.enableaction
51
52local unsetvalue = attributes.unsetvalue
53
54local nodecodes = nodes.nodecodes
55
56
57local glyph_code = nodecodes.glyph
58
59
60
61
62
63local lowerchar = characters.lower
64
65local a_color = attributes.private('color')
66local colist = attributes.list[a_color]
67
68local is_letter = characters.is_letter
69
70local spacing = S(" \n\r\t")
71local markup = S("-=") / ""
72local lbrace = P("{") / ""
73local rbrace = P("}") / ""
74local snippet = lbrace * (1-rbrace)^0 * rbrace
75local disc = snippet/""
76 * snippet/""
77 * snippet
78local word = Cs((markup + disc + (1-spacing))^1)
79
80
81
82local loaded = { }
83local loaders = {
84 txt = function(list,fullname)
85 local data = io.loaddata(fullname)
86 if data and data ~= "" then
87 local parser = (spacing + word/function(s) list[s] = true end)^0
88
89 lpegmatch(parser,data)
90 end
91 end,
92 lua = function(list,fullname)
93 local data = dofile(fullname)
94 if data and type(data) == "table" then
95 local words = data.words
96 if words then
97 for k, v in next, words do
98 list[k] = true
99 end
100 end
101 end
102 end,
103}
104
105loaders.luc = loaders.lua
106
107function words.load(tag,filename)
108 local fullname = resolvers.findfile(filename,'other text file') or ""
109 if fullname ~= "" then
110 report_words("loading word file %a",fullname)
111 statistics.starttiming(languages)
112 local list = loaded[fullname]
113 if not list then
114 list = wordsdata[tag] or { }
115 local suffix = file.suffix(fullname)
116 local loader = loaders[suffix] or loaders.txt
117 loader(list,fullname)
118 loaded[fullname] = list
119 end
120 wordsdata[tag] = list
121 statistics.stoptiming(languages)
122 else
123 report_words("missing word file %a",filename)
124 end
125end
126
127function words.found(id, str)
128 local tag = languages.numbers[id]
129 if tag then
130 local data = wordsdata[tag]
131 if data then
132 if data[str] then
133 return 1
134 elseif data[lower(str)] then
135 return 2
136 end
137 end
138 end
139end
140
141
142
143
144
145
146local function mark_words(head,whenfound)
147 local current, language = head, nil, nil, 0
148 local str, s, nds, n = { }, 0, { }, 0
149 local function action()
150 if s > 0 then
151 local word = concat(str,"",1,s)
152 local mark = whenfound(language,word)
153 if mark then
154 for i=1,n do
155 mark(nds[i])
156 end
157 end
158 end
159 n, s = 0, 0
160 end
161
162
163 while current do
164 local code, id = ischar(current)
165 if code then
166 local a = getlanguage(current)
167 if a then
168 if a ~= language then
169 if s > 0 then
170 action()
171 end
172 language = a
173 end
174 elseif s > 0 then
175 action()
176 language = a
177 end
178 local data = chardata[code]
179 if is_letter[data.category] then
180 n = n + 1
181 nds[n] = current
182 s = s + 1
183 str[s] = utfchar(code)
184 elseif s > 0 then
185 action()
186 end
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210 elseif s > 0 then
211 action()
212 end
213 current = getnext(current)
214 end
215 if s > 0 then
216 action()
217 end
218 return head
219end
220
221local methods = { }
222words.methods = methods
223
224local enablers = { }
225words.enablers = enablers
226
227local wordmethod = 1
228local enabled = false
229
230function words.check(head)
231 if enabled then
232 return methods[wordmethod](head)
233 elseif not head then
234 return head, false
235 else
236 return head, false
237 end
238end
239
240function words.enable(settings)
241 local method = settings.method
242 wordmethod = method and tonumber(method) or wordmethod or 1
243 local e = enablers[wordmethod]
244 if e then
245 e(settings)
246 end
247 enableaction("processors","languages.words.check")
248 enabled = true
249end
250
251function words.disable()
252 enabled = false
253end
254
255
256
257local cache = { }
258
259table.setmetatableindex(cache, function(t,k)
260 local c
261 if type(k) == "string" then
262 c = colist[k]
263 elseif k < 0 then
264 c = colist["word:unset"]
265 else
266 c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
267 end
268 local v = c and function(n) setattr(n,a_color,c) end or false
269 t[k] = v
270 return v
271end)
272
273
274
275local function sweep(language,str)
276 if #str < words.threshold then
277 return false
278 elseif words.found(language,str) then
279 return cache["word:yes"]
280 else
281 return cache["word:no"]
282 end
283end
284
285methods[1] = function(head)
286 for n in nextnode, head do
287 setattr(n,a_color,unsetvalue)
288 end
289 return mark_words(head,sweep)
290end
291
292
293
294local dumpname = nil
295local dumpthem = false
296local listname = "document"
297
298local category = { }
299
300local categories = setmetatableindex(function(t,k)
301 local languages = setmetatableindex(function(t,k)
302 local r = registered[k]
303 local v = {
304 number = language,
305 parent = r and r.parent or nil,
306 patterns = r and r.patterns or nil,
307 tag = r and r.tag or nil,
308 list = { },
309 total = 0,
310 unique = 0,
311 }
312 t[k] = v
313 return v
314 end)
315 local v = {
316 languages = languages,
317 total = 0,
318 }
319 t[k] = v
320 return v
321end)
322
323local collected = {
324 total = 0,
325 version = 1.000,
326 categories = categories,
327}
328
329enablers[2] = function(settings)
330 local name = settings.list
331 listname = name and name ~= "" and name or "document"
332 category = collected.categories[listname]
333end
334
335local function sweep(language,str)
336 if #str >= words.threshold then
337 str = lowerchar(str)
338 local words = category.languages[numbers[language] or "unset"]
339 local list = words.list
340 local ls = list[str]
341 if ls then
342 list[str] = ls + 1
343 else
344 list[str] = 1
345 words.unique = words.unique + 1
346 end
347 collected.total = collected.total + 1
348 category.total = category.total + 1
349 words.total = words.total + 1
350 end
351end
352
353methods[2] = function(head)
354 dumpthem = true
355 return mark_words(head,sweep)
356end
357
358local function dumpusedwords()
359 if dumpthem then
360 collected.threshold = words.threshold
361 dumpname = dumpname or file.addsuffix(tex.jobname,"words")
362 report_words("saving list of used words in %a",dumpname)
363 io.savedata(dumpname,table.serialize(collected,true))
364
365 end
366end
367
368directives.register("languages.words.dump", function(v)
369 dumpname = (type(v) == "string" and v ~= "" and v) or dumpname
370end)
371
372luatex.registerstopactions(dumpusedwords)
373
374
375
376local function sweep(language,str)
377 return cache[language]
378end
379
380methods[3] = function(head)
381 for n in nextnode, head do
382 setattr(n,a_color,unsetvalue)
383 end
384 return mark_words(head,sweep)
385end
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403implement {
404 name = "enablespellchecking",
405 actions = words.enable,
406 arguments = {
407 {
408 { "method" },
409 { "list" }
410 }
411 }
412}
413
414implement {
415 name = "disablespellchecking",
416 actions = words.disable
417}
418
419implement {
420 name = "loadspellchecklist",
421 arguments = "2 strings",
422 actions = words.load
423}
424 |