1if not modules then modules = { } end modules ['lang-wrd'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local next, tonumber = next, tonumber
10local lower = string.lower
11local utfchar = utf.char
12local concat, setmetatableindex = table.concat, table.setmetatableindex
13local lpegmatch = lpeg.match
14local P, S, Cs, Cc, C = lpeg.P, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.C
15
16local report_words = logs.reporter("languages","words")
17
18local nodes = nodes
19local languages = languages
20
21local implement = interfaces.implement
22
23languages.words = languages.words or { }
24local words = languages.words
25
26words.data = words.data or { }
27words.enables = false
28words.threshold = 4
29
30local numbers = languages.numbers
31local registered = languages.registered
32
33local nuts = nodes.nuts
34
35
36local getnext = nuts.getnext
37local getid = nuts.getid
38
39local getchar = nuts.getchar
40local setattr = nuts.setattr
41
42local getlanguage = nuts.getlanguage
43local ischar = nuts.ischar
44
45local nextnode = nuts.traversers.node
46
47
48local wordsdata = words.data
49local chardata = characters.data
50local enableaction = nodes.tasks.enableaction
51
52local unsetvalue = attributes.unsetvalue
53
54local nodecodes = nodes.nodecodes
55
56
57local glyph_code = nodecodes.glyph
58
59
60
61
62
63local lowerchar = characters.lower
64
65local a_color = attributes.private('color')
66local colist = attributes.list[a_color]
67
68local is_letter = characters.is_letter
69
70local spacing = S(" \n\r\t")
71local markup = S("-=") / ""
72local lbrace = P("{") / ""
73local rbrace = P("}") / ""
74local snippet = lbrace * (1-rbrace)^0 * rbrace
75local disc = snippet/""
76 * snippet/""
77 * snippet
78local word = Cs((markup + disc + (1-spacing))^1)
79
80
81
82local loaded = { }
83local loaders = {
84 txt = function(list,fullname)
85 local data = io.loaddata(fullname)
86 if data and data ~= "" then
87 local parser = (spacing + word/function(s) list[s] = true end)^0
88 lpegmatch(parser,data)
89 end
90 end,
91 lua = function(list,fullname)
92 local data = dofile(fullname)
93 if data and type(data) == "table" then
94 local words = data.words
95 if words then
96 for k, v in next, words do
97 list[k] = true
98 end
99 end
100 end
101 end,
102}
103
104loaders.luc = loaders.lua
105
106function words.load(tag,filename)
107 local fullname = resolvers.findfile(filename,'other text file') or ""
108 if fullname ~= "" then
109 report_words("loading word file %a",fullname)
110 statistics.starttiming(languages)
111 local list = loaded[fullname]
112 if not list then
113 list = wordsdata[tag] or { }
114 local suffix = file.suffix(fullname)
115 local loader = loaders[suffix] or loaders.txt
116 loader(list,fullname)
117 loaded[fullname] = list
118 end
119 wordsdata[tag] = list
120 statistics.stoptiming(languages)
121 else
122 report_words("missing word file %a",filename)
123 end
124end
125
126function words.found(id, str)
127 local tag = languages.numbers[id]
128 if tag then
129 local data = wordsdata[tag]
130 if data then
131 if data[str] then
132 return 1
133 elseif data[lower(str)] then
134 return 2
135 end
136 end
137 end
138end
139
140
141
142
143
144
145local function mark_words(head,whenfound)
146 local current, language = head, nil, nil, 0
147 local str, s, nds, n = { }, 0, { }, 0
148 local function action()
149 if s > 0 then
150 local word = concat(str,"",1,s)
151 local mark = whenfound(language,word)
152 if mark then
153 for i=1,n do
154 mark(nds[i])
155 end
156 end
157 end
158 n, s = 0, 0
159 end
160
161
162 while current do
163 local code, id = ischar(current)
164 if code then
165 local a = getlanguage(current)
166 if a then
167 if a ~= language then
168 if s > 0 then
169 action()
170 end
171 language = a
172 end
173 elseif s > 0 then
174 action()
175 language = a
176 end
177 local data = chardata[code]
178 if is_letter[data.category] then
179 n = n + 1
180 nds[n] = current
181 s = s + 1
182 str[s] = utfchar(code)
183 elseif s > 0 then
184 action()
185 end
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 elseif s > 0 then
210 action()
211 end
212 current = getnext(current)
213 end
214 if s > 0 then
215 action()
216 end
217 return head
218end
219
220local methods = { }
221words.methods = methods
222
223local enablers = { }
224words.enablers = enablers
225
226local wordmethod = 1
227local enabled = false
228
229function words.check(head)
230 if enabled then
231 return methods[wordmethod](head)
232 elseif not head then
233 return head, false
234 else
235 return head, false
236 end
237end
238
239function words.enable(settings)
240 local method = settings.method
241 wordmethod = method and tonumber(method) or wordmethod or 1
242 local e = enablers[wordmethod]
243 if e then
244 e(settings)
245 end
246 enableaction("processors","languages.words.check")
247 enabled = true
248end
249
250function words.disable()
251 enabled = false
252end
253
254
255
256local cache = { }
257
258table.setmetatableindex(cache, function(t,k)
259 local c
260 if type(k) == "string" then
261 c = colist[k]
262 elseif k < 0 then
263 c = colist["word:unset"]
264 else
265 c = colist["word:" .. (numbers[k] or "unset")] or colist["word:unknown"]
266 end
267 local v = c and function(n) setattr(n,a_color,c) end or false
268 t[k] = v
269 return v
270end)
271
272
273
274local function sweep(language,str)
275 if #str < words.threshold then
276 return false
277 elseif words.found(language,str) then
278 return cache["word:yes"]
279 else
280 return cache["word:no"]
281 end
282end
283
284methods[1] = function(head)
285 for n in nextnode, head do
286 setattr(n,a_color,unsetvalue)
287 end
288 return mark_words(head,sweep)
289end
290
291
292
293local dumpname = nil
294local dumpthem = false
295local listname = "document"
296
297local category = { }
298
299local categories = setmetatableindex(function(t,k)
300 local languages = setmetatableindex(function(t,k)
301 local r = registered[k]
302 local v = {
303 number = language,
304 parent = r and r.parent or nil,
305 patterns = r and r.patterns or nil,
306 tag = r and r.tag or nil,
307 list = { },
308 total = 0,
309 unique = 0,
310 }
311 t[k] = v
312 return v
313 end)
314 local v = {
315 languages = languages,
316 total = 0,
317 }
318 t[k] = v
319 return v
320end)
321
322local collected = {
323 total = 0,
324 version = 1.000,
325 categories = categories,
326}
327
328enablers[2] = function(settings)
329 local name = settings.list
330 listname = name and name ~= "" and name or "document"
331 category = collected.categories[listname]
332end
333
334local function sweep(language,str)
335 if #str >= words.threshold then
336 str = lowerchar(str)
337 local words = category.languages[numbers[language] or "unset"]
338 local list = words.list
339 local ls = list[str]
340 if ls then
341 list[str] = ls + 1
342 else
343 list[str] = 1
344 words.unique = words.unique + 1
345 end
346 collected.total = collected.total + 1
347 category.total = category.total + 1
348 words.total = words.total + 1
349 end
350end
351
352methods[2] = function(head)
353 dumpthem = true
354 return mark_words(head,sweep)
355end
356
357local function dumpusedwords()
358 if dumpthem then
359 collected.threshold = words.threshold
360 dumpname = dumpname or file.addsuffix(tex.jobname,"words")
361 report_words("saving list of used words in %a",dumpname)
362 io.savedata(dumpname,table.serialize(collected,true))
363
364 end
365end
366
367directives.register("languages.words.dump", function(v)
368 dumpname = (type(v) == "string" and v ~= "" and v) or dumpname
369end)
370
371luatex.registerstopactions(dumpusedwords)
372
373
374
375local function sweep(language,str)
376 return cache[language]
377end
378
379methods[3] = function(head)
380 for n in nextnode, head do
381 setattr(n,a_color,unsetvalue)
382 end
383 return mark_words(head,sweep)
384end
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402implement {
403 name = "enablespellchecking",
404 actions = words.enable,
405 arguments = {
406 {
407 { "method" },
408 { "list" }
409 }
410 }
411}
412
413implement {
414 name = "disablespellchecking",
415 actions = words.disable
416}
417
418implement {
419 name = "loadspellchecklist",
420 arguments = "2 strings",
421 actions = words.load
422}
423 |