1if not modules then modules = { } end modules ['lang-rep'] = {
2 version = 1.001,
3 comment = "companion to lang-rep.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18local type, tonumber, next = type, tonumber, next
19local gmatch, gsub = string.gmatch, string.gsub
20local utfbyte, utfsplit = utf.byte, utf.split
21local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
22local find = string.find
23
24local zwnj <const> = 0x200C
25
26local grouped = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")
27local splitter = Ct((
28 #P("{") * (
29 P("{}") / function() return zwnj end
30 + Ct(Cc("discretionary") * grouped * grouped * grouped)
31 + Ct(Cc("noligature") * grouped)
32 )
33 + U/utfbyte
34 )^1)
35
36local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)
37
38local trace_replacements = false trackers.register("languages.replacements", function(v) trace_replacements = v end)
39local trace_details = false trackers.register("languages.replacements.details", function(v) trace_details = v end)
40
41local report_replacement = logs.reporter("languages","replacements")
42
43local glyph_code <const> = nodes.nodecodes.glyph
44local glue_code <const> = nodes.nodecodes.glue
45
46local spaceskip_code <const> = nodes.gluecodes.spaceskip
47local xspaceskip_code <const> = nodes.gluecodes.xspaceskip
48
49local nuts = nodes.nuts
50
51local getnext = nuts.getnext
52local getprev = nuts.getprev
53local getattr = nuts.getattr
54local getid = nuts.getid
55local getsubtype = nuts.getsubtype
56local getchar = nuts.getchar
57local isglyph = nuts.isglyph
58
59local setlink = nuts.setlink
60local setnext = nuts.setnext
61local setprev = nuts.setprev
62local setchar = nuts.setchar
63local setattrlist = nuts.setattrlist
64local setoptions = nuts.setoptions
65
66local glyphoptioncodes = tex.glyphoptioncodes
67
68local norightligature_option <const> = glyphoptioncodes.norightligature
69local noleftligature_option <const> = glyphoptioncodes.noleftligature
70
71local insertbefore = nuts.insertbefore
72local insertafter = nuts.insertafter
73local remove_node = nuts.remove
74local copy_node = nuts.copy
75local flushlist = nuts.flushlist
76
77local nodepool = nuts.pool
78local new_disc = nodepool.disc
79
80local texsetattribute = tex.setattribute
81
82local unsetvalue <const> = attributes.unsetvalue
83
84local enableaction = nodes.tasks.enableaction
85
86local v_reset <const> = interfaces.variables.reset
87
88local implement = interfaces.implement
89
90local processors = typesetters.processors
91local splitprocessor = processors.split
92
93local replacements = languages.replacements or { }
94languages.replacements = replacements
95
96local a_replacements <const> = attributes.private("replacements")
97
98local lists = { }
99local last = 0
100local trees = { }
101
102table.setmetatableindex(lists,function(lists,name)
103 last = last + 1
104 local list = { }
105 local data = { name = name, list = list, attribute = last }
106 lists[last] = data
107 lists[name] = data
108 trees[last] = list
109 return data
110end)
111
112lists[v_reset].attribute = unsetvalue
113
114
115
116local function add(root,word,replacement)
117 if type(replacement) == "function" then
118 local list = utfsplit(word)
119 local size = #list
120 for i=1,size do
121 local l = utfbyte(list[i])
122 if not root[l] then
123 root[l] = { }
124 end
125 if i == size then
126 root[l].final = {
127 word = word,
128 replacer = replacement,
129 processor = processor,
130 oldlength = size,
131 }
132 end
133 root = root[l]
134 end
135 else
136 local processor, replacement = splitprocessor(replacement,true)
137 replacement = lpegmatch(stripper,replacement) or replacement
138 local list = utfsplit(word)
139 local size = #list
140 for i=1,size do
141 local l = utfbyte(list[i])
142 if not root[l] then
143 root[l] = { }
144 end
145 if i == size then
146 local special = find(replacement,"{",1,true)
147 local newlist = lpegmatch(splitter,replacement)
148 root[l].final = {
149 word = word,
150 replacement = replacement,
151 processor = processor,
152 oldlength = size,
153 newcodes = newlist,
154 special = special,
155 }
156 end
157 root = root[l]
158 end
159 end
160end
161
162function replacements.add(category,word,replacement)
163 local root = lists[category].list
164 if type(word) == "table" then
165 for word, replacement in next, word do
166 add(root,word,replacement)
167 end
168 else
169 add(root,word,replacement or "")
170 end
171end
172
173
174
175function languages.replacements.addlist(category,list)
176 local root = lists[category].list
177 if type(list) == "string" then
178 for new in gmatch(list,"%S+") do
179 local old = gsub(new,"[{}]","")
180
181 add(root,old,new)
182 end
183 else
184 for i=1,#list do
185 local new = list[i]
186 local old = gsub(new,"[{}]","")
187
188 add(root,old,new)
189 end
190 end
191end
192
193local function tonodes(list,template)
194 local head, current
195 for i=1,#list do
196 local new = copy_node(template)
197 setchar(new,list[i])
198 if head then
199 head, current = insertafter(head,current,new)
200 else
201 head, current = new, new
202 end
203 end
204 return head
205end
206
207local ispunctuation = characters.is_punctuation
208
209
210
211
212
213
214
215
216
217local function replace(head,first,last,final,hasspace,overload)
218 local current = first
219 local prefirst = getprev(first) or head
220 local postlast = getnext(last)
221 local oldlength = final.oldlength
222 local newcodes = final.newcodes
223 local word = final.word
224 local replacement = final.replacement
225 local replacer = final.replacer
226 local special = final.special
227 if type(replacer) == "function" then
228 replacement = replacer(word)
229 if type(replacement) == "string" then
230 special = find(replacement,"{",1,true)
231 newcodes = lpegmatch(splitter,replacement)
232 else
233 return
234 end
235 end
236 local newlength = newcodes and #newcodes or 0
237 if trace_replacements then
238 report_replacement("replacing word %a by %a",word,replacement)
239 end
240 if hasspace or special then
241
242
243
244 local prev = getprev(current)
245 local next = getnext(last)
246 local list = current
247 setnext(last)
248 setlink(prev,next)
249 current = prev
250 if not current then
251 head = nil
252 end
253 local i = 1
254 while i <= newlength do
255 local codes = newcodes[i]
256 if type(codes) == "table" then
257 local method = codes[1]
258 if method == "discretionary" then
259 local pre, post, replace = codes[2], codes[3], codes[4]
260 if pre then
261 pre = tonodes(pre,first)
262 end
263 if post then
264 post = tonodes(post,first)
265 end
266 if replace then
267 replace = tonodes(replace,first)
268 end
269
270 local new = new_disc(pre,post,replace)
271 setattrlist(new,first)
272 head, current = insertafter(head,current,new)
273 elseif method == "noligature" then
274
275 local list = codes[2]
276 if list then
277 local n = #list
278 for i=1,n do
279 local new = copy_node(first)
280 setchar(new,list[i])
281 if i == 1 then
282 setoptions(new,norightligature_option)
283 elseif i == n then
284 setoptions(new,noleftligature_option | norightligature_option)
285 else
286 setoptions(new,noleftligature_option)
287 end
288 head, current = insertafter(head,current,new)
289 end
290 else
291
292
293
294 setoptions(current,norightligature_option)
295 end
296 else
297 report_replacement("unknown method %a",method or "?")
298 end
299 else
300 local new = copy_node(first)
301 setchar(new,codes)
302 head, current = insertafter(head,current,new)
303 end
304 i = i + 1
305 end
306 flushlist(list)
307 elseif newlength == 0 then
308
309 elseif oldlength == newlength then
310 if word ~= replacement then
311 for i=1,newlength do
312 setchar(current,newcodes[i])
313 current = getnext(current)
314 end
315 end
316
317 elseif oldlength < newlength then
318 for i=1,newlength-oldlength do
319 local n = copy_node(current)
320 setchar(n,newcodes[i])
321 head, current = insertbefore(head,current,n)
322 current = getnext(current)
323 end
324 for i=newlength-oldlength+1,newlength do
325 setchar(current,newcodes[i])
326 current = getnext(current)
327 end
328 else
329 for i=1,oldlength-newlength do
330 head, current = remove_node(head,current,true)
331 end
332 for i=1,newlength do
333 setchar(current,newcodes[i])
334 current = getnext(current)
335 end
336 end
337 if overload then
338 overload(final,getnext(prefirst),getprev(postlast))
339 end
340 return head, postlast
341end
342
343
344
345function replacements.handler(head)
346 local current = head
347 local overload = attributes.applyoverloads
348 local mode = false
349 local wordstart = false
350 local wordend = false
351 local prevend = false
352 local prevfinal = false
353 local tree = false
354 local root = false
355 local hasspace = false
356 while current do
357 local id = getid(current)
358 if id == glyph_code then
359 local a = getattr(current,a_replacements)
360 if a then
361
362 tree = trees[a]
363 if tree then
364 local char = getchar(current)
365 local punc = ispunctuation[char]
366 if mode == "punc" then
367 if not punc then
368 if root then
369 local final = root.final
370 if final then
371 head = replace(head,wordstart,wordend,final,hasspace,overload)
372 elseif prevfinal then
373 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
374 end
375 prevfinal = false
376 root = false
377 end
378 mode = "word"
379 end
380 elseif mode == "word" then
381 if punc then
382 if root then
383 local final = root.final
384 if final then
385 head = replace(head,wordstart,wordend,final,hasspace,overload)
386 elseif prevfinal then
387 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
388 end
389 prevfinal = false
390 root = false
391 end
392 mode = "punc"
393 end
394 else
395 mode = punc and "punc" or "word"
396 end
397 if root then
398 root = root[char]
399 if root then
400 wordend = current
401 end
402 else
403 if prevfinal then
404 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
405 prevfinal = false
406 end
407 root = tree[char]
408 if root then
409 wordstart = current
410 wordend = current
411 prevend = false
412 hasspace = false
413 end
414 end
415 else
416 root= false
417 end
418 else
419 tree = false
420 end
421 current = getnext(current)
422 elseif root then
423 local final = root.final
424 if mode == "word" and id == glue_code then
425 local s = getsubtype(current)
426 if s == spaceskip_code or s == xspaceskip_code then
427 local r = root[32]
428 if r then
429 if not prevend then
430 local f = root.final
431 if f then
432 prevend = wordend
433 prevfinal = f
434 end
435 end
436 wordend = current
437 root = r
438 hasspace = true
439 goto moveon
440 end
441 end
442 end
443 if final then
444 head, current = replace(head,wordstart,wordend,final,hasspace,overload)
445 elseif prevfinal then
446 head, current = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
447 end
448 prevfinal = false
449 root = false
450 ::moveon::
451 current = getnext(current)
452 else
453 current = getnext(current)
454 end
455 end
456 if root then
457 local final = root.final
458 if final then
459 head = replace(head,wordstart,wordend,final,hasspace,overload)
460 elseif prevfinal then
461 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
462 end
463 end
464 return head
465end
466
467local enabled = false
468
469function replacements.set(n)
470 if n == v_reset then
471 n = unsetvalue
472 else
473 n = lists[n].attribute
474 if not enabled then
475 enableaction("processors","languages.replacements.handler")
476 if trace_replacements then
477 report_replacement("enabling replacement handler")
478 end
479 enabled = true
480 end
481 end
482 texsetattribute(a_replacements,n)
483end
484
485
486
487implement {
488 name = "setreplacements",
489 actions = replacements.set,
490 arguments = "string"
491}
492
493implement {
494 name = "addreplacements",
495 actions = replacements.add,
496 arguments = "3 strings",
497}
498
499implement {
500 name = "addreplacementslist",
501 actions = replacements.addlist,
502 arguments = "2 strings",
503}
504 |