1if not modules then modules = { } end modules ['lang-rep'] = {
2 version = 1.001,
3 comment = "companion to lang-rep.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18local type, tonumber, next = type, tonumber, next
19local gmatch, gsub = string.gmatch, string.gsub
20local utfbyte, utfsplit = utf.byte, utf.split
21local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
22local find = string.find
23
24local zwnj = 0x200C
25local grouped = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")
26local splitter = Ct((
27 #P("{") * (
28 P("{}") / function() return zwnj end
29 + Ct(Cc("discretionary") * grouped * grouped * grouped)
30 + Ct(Cc("noligature") * grouped)
31 )
32 + U/utfbyte
33 )^1)
34
35local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)
36
37local trace_replacements = false trackers.register("languages.replacements", function(v) trace_replacements = v end)
38local trace_details = false trackers.register("languages.replacements.details", function(v) trace_details = v end)
39
40local report_replacement = logs.reporter("languages","replacements")
41
42local glyph_code = nodes.nodecodes.glyph
43local glue_code = nodes.nodecodes.glue
44
45local spaceskip_code = nodes.gluecodes.spaceskip
46local xspaceskip_code = nodes.gluecodes.xspaceskip
47
48local nuts = nodes.nuts
49
50local getnext = nuts.getnext
51local getprev = nuts.getprev
52local getattr = nuts.getattr
53local getid = nuts.getid
54local getsubtype = nuts.getsubtype
55local getchar = nuts.getchar
56local isglyph = nuts.isglyph
57
58local setlink = nuts.setlink
59local setnext = nuts.setnext
60local setprev = nuts.setprev
61local setchar = nuts.setchar
62local setattrlist = nuts.setattrlist
63local setoptions = nuts.setoptions
64
65local glyphoptioncodes = tex.glyphoptioncodes
66local norightligature_option = glyphoptioncodes.norightligature
67local noleftligature_option = glyphoptioncodes.noleftligature
68
69local insertbefore = nuts.insertbefore
70local insertafter = nuts.insertafter
71local remove_node = nuts.remove
72local copy_node = nuts.copy
73local flushlist = nuts.flushlist
74
75local nodepool = nuts.pool
76local new_disc = nodepool.disc
77
78local texsetattribute = tex.setattribute
79local unsetvalue = attributes.unsetvalue
80
81local enableaction = nodes.tasks.enableaction
82
83local v_reset = interfaces.variables.reset
84
85local implement = interfaces.implement
86
87local processors = typesetters.processors
88local splitprocessor = processors.split
89
90local replacements = languages.replacements or { }
91languages.replacements = replacements
92
93local a_replacements = attributes.private("replacements")
94
95local lists = { }
96local last = 0
97local trees = { }
98
99table.setmetatableindex(lists,function(lists,name)
100 last = last + 1
101 local list = { }
102 local data = { name = name, list = list, attribute = last }
103 lists[last] = data
104 lists[name] = data
105 trees[last] = list
106 return data
107end)
108
109lists[v_reset].attribute = unsetvalue
110
111
112
113local function add(root,word,replacement)
114 if type(replacement) == "function" then
115 local list = utfsplit(word)
116 local size = #list
117 for i=1,size do
118 local l = utfbyte(list[i])
119 if not root[l] then
120 root[l] = { }
121 end
122 if i == size then
123 root[l].final = {
124 word = word,
125 replacer = replacement,
126 processor = processor,
127 oldlength = size,
128 }
129 end
130 root = root[l]
131 end
132 else
133 local processor, replacement = splitprocessor(replacement,true)
134 replacement = lpegmatch(stripper,replacement) or replacement
135 local list = utfsplit(word)
136 local size = #list
137 for i=1,size do
138 local l = utfbyte(list[i])
139 if not root[l] then
140 root[l] = { }
141 end
142 if i == size then
143 local special = find(replacement,"{",1,true)
144 local newlist = lpegmatch(splitter,replacement)
145 root[l].final = {
146 word = word,
147 replacement = replacement,
148 processor = processor,
149 oldlength = size,
150 newcodes = newlist,
151 special = special,
152 }
153 end
154 root = root[l]
155 end
156 end
157end
158
159function replacements.add(category,word,replacement)
160 local root = lists[category].list
161 if type(word) == "table" then
162 for word, replacement in next, word do
163 add(root,word,replacement)
164 end
165 else
166 add(root,word,replacement or "")
167 end
168end
169
170
171
172function languages.replacements.addlist(category,list)
173 local root = lists[category].list
174 if type(list) == "string" then
175 for new in gmatch(list,"%S+") do
176 local old = gsub(new,"[{}]","")
177
178 add(root,old,new)
179 end
180 else
181 for i=1,#list do
182 local new = list[i]
183 local old = gsub(new,"[{}]","")
184
185 add(root,old,new)
186 end
187 end
188end
189
190local function tonodes(list,template)
191 local head, current
192 for i=1,#list do
193 local new = copy_node(template)
194 setchar(new,list[i])
195 if head then
196 head, current = insertafter(head,current,new)
197 else
198 head, current = new, new
199 end
200 end
201 return head
202end
203
204local ispunctuation = characters.is_punctuation
205
206
207
208
209
210
211
212
213
214local function replace(head,first,last,final,hasspace,overload)
215 local current = first
216 local prefirst = getprev(first) or head
217 local postlast = getnext(last)
218 local oldlength = final.oldlength
219 local newcodes = final.newcodes
220 local word = final.word
221 local replacement = final.replacement
222 local replacer = final.replacer
223 local special = final.special
224 if type(replacer) == "function" then
225 replacement = replacer(word)
226 if type(replacement) == "string" then
227 special = find(replacement,"{",1,true)
228 newcodes = lpegmatch(splitter,replacement)
229 else
230 return
231 end
232 end
233 local newlength = newcodes and #newcodes or 0
234 if trace_replacements then
235 report_replacement("replacing word %a by %a",word,replacement)
236 end
237 if hasspace or special then
238
239
240
241 local prev = getprev(current)
242 local next = getnext(last)
243 local list = current
244 setnext(last)
245 setlink(prev,next)
246 current = prev
247 if not current then
248 head = nil
249 end
250 local i = 1
251 while i <= newlength do
252 local codes = newcodes[i]
253 if type(codes) == "table" then
254 local method = codes[1]
255 if method == "discretionary" then
256 local pre, post, replace = codes[2], codes[3], codes[4]
257 if pre then
258 pre = tonodes(pre,first)
259 end
260 if post then
261 post = tonodes(post,first)
262 end
263 if replace then
264 replace = tonodes(replace,first)
265 end
266
267 local new = new_disc(pre,post,replace)
268 setattrlist(new,first)
269 head, current = insertafter(head,current,new)
270 elseif method == "noligature" then
271
272 local list = codes[2]
273 if list then
274 local n = #list
275 for i=1,n do
276 local new = copy_node(first)
277 setchar(new,list[i])
278 if i == 1 then
279 setoptions(new,norightligature_option)
280 elseif i == n then
281 setoptions(new,glyphoptioncodes.noleftligature | norightligature_option)
282 else
283 setoptions(new,glyphoptioncodes.noleftligature)
284 end
285 head, current = insertafter(head,current,new)
286 end
287 else
288
289
290
291 setoptions(current,norightligature_option)
292 end
293 else
294 report_replacement("unknown method %a",method or "?")
295 end
296 else
297 local new = copy_node(first)
298 setchar(new,codes)
299 head, current = insertafter(head,current,new)
300 end
301 i = i + 1
302 end
303 flushlist(list)
304 elseif newlength == 0 then
305
306 elseif oldlength == newlength then
307 if word ~= replacement then
308 for i=1,newlength do
309 setchar(current,newcodes[i])
310 current = getnext(current)
311 end
312 end
313
314 elseif oldlength < newlength then
315 for i=1,newlength-oldlength do
316 local n = copy_node(current)
317 setchar(n,newcodes[i])
318 head, current = insertbefore(head,current,n)
319 current = getnext(current)
320 end
321 for i=newlength-oldlength+1,newlength do
322 setchar(current,newcodes[i])
323 current = getnext(current)
324 end
325 else
326 for i=1,oldlength-newlength do
327 head, current = remove_node(head,current,true)
328 end
329 for i=1,newlength do
330 setchar(current,newcodes[i])
331 current = getnext(current)
332 end
333 end
334 if overload then
335 overload(final,getnext(prefirst),getprev(postlast))
336 end
337 return head, postlast
338end
339
340
341
342function replacements.handler(head)
343 local current = head
344 local overload = attributes.applyoverloads
345 local mode = false
346 local wordstart = false
347 local wordend = false
348 local prevend = false
349 local prevfinal = false
350 local tree = false
351 local root = false
352 local hasspace = false
353 while current do
354 local id = getid(current)
355 if id == glyph_code then
356 local a = getattr(current,a_replacements)
357 if a then
358
359 tree = trees[a]
360 if tree then
361 local char = getchar(current)
362 local punc = ispunctuation[char]
363 if mode == "punc" then
364 if not punc then
365 if root then
366 local final = root.final
367 if final then
368 head = replace(head,wordstart,wordend,final,hasspace,overload)
369 elseif prevfinal then
370 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
371 end
372 prevfinal = false
373 root = false
374 end
375 mode = "word"
376 end
377 elseif mode == "word" then
378 if punc then
379 if root then
380 local final = root.final
381 if final then
382 head = replace(head,wordstart,wordend,final,hasspace,overload)
383 elseif prevfinal then
384 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
385 end
386 prevfinal = false
387 root = false
388 end
389 mode = "punc"
390 end
391 else
392 mode = punc and "punc" or "word"
393 end
394 if root then
395 root = root[char]
396 if root then
397 wordend = current
398 end
399 else
400 if prevfinal then
401 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
402 prevfinal = false
403 end
404 root = tree[char]
405 if root then
406 wordstart = current
407 wordend = current
408 prevend = false
409 hasspace = false
410 end
411 end
412 else
413 root= false
414 end
415 else
416 tree = false
417 end
418 current = getnext(current)
419 elseif root then
420 local final = root.final
421 if mode == "word" and id == glue_code then
422 local s = getsubtype(current)
423 if s == spaceskip_code or s == xspaceskip_code then
424 local r = root[32]
425 if r then
426 if not prevend then
427 local f = root.final
428 if f then
429 prevend = wordend
430 prevfinal = f
431 end
432 end
433 wordend = current
434 root = r
435 hasspace = true
436 goto moveon
437 end
438 end
439 end
440 if final then
441 head, current = replace(head,wordstart,wordend,final,hasspace,overload)
442 elseif prevfinal then
443 head, current = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
444 end
445 prevfinal = false
446 root = false
447 ::moveon::
448 current = getnext(current)
449 else
450 current = getnext(current)
451 end
452 end
453 if root then
454 local final = root.final
455 if final then
456 head = replace(head,wordstart,wordend,final,hasspace,overload)
457 elseif prevfinal then
458 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
459 end
460 end
461 return head
462end
463
464local enabled = false
465
466function replacements.set(n)
467 if n == v_reset then
468 n = unsetvalue
469 else
470 n = lists[n].attribute
471 if not enabled then
472 enableaction("processors","languages.replacements.handler")
473 if trace_replacements then
474 report_replacement("enabling replacement handler")
475 end
476 enabled = true
477 end
478 end
479 texsetattribute(a_replacements,n)
480end
481
482
483
484implement {
485 name = "setreplacements",
486 actions = replacements.set,
487 arguments = "string"
488}
489
490implement {
491 name = "addreplacements",
492 actions = replacements.add,
493 arguments = "3 strings",
494}
495
496implement {
497 name = "addreplacementslist",
498 actions = replacements.addlist,
499 arguments = "2 strings",
500}
501 |