1if not modules then modules = { } end modules ['lang-rep'] = {
2 version = 1.001,
3 comment = "companion to lang-rep.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18local type, tonumber, next = type, tonumber, next
19local gmatch, gsub = string.gmatch, string.gsub
20local utfbyte, utfsplit = utf.byte, utf.split
21local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
22local find = string.find
23
24local zwnj = 0x200C
25local grouped = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")
26local splitter = Ct((
27 #P("{") * (
28 P("{}") / function() return zwnj end
29 + Ct(Cc("discretionary") * grouped * grouped * grouped)
30 + Ct(Cc("noligature") * grouped)
31 )
32 + U/utfbyte
33 )^1)
34
35local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)
36
37local trace_replacements = false trackers.register("languages.replacements", function(v) trace_replacements = v end)
38local trace_details = false trackers.register("languages.replacements.details", function(v) trace_details = v end)
39
40local report_replacement = logs.reporter("languages","replacements")
41
42local glyph_code = nodes.nodecodes.glyph
43local glue_code = nodes.nodecodes.glue
44
45local spaceskip_code = nodes.gluecodes.spaceskip
46local xspaceskip_code = nodes.gluecodes.xspaceskip
47
48local nuts = nodes.nuts
49
50local getnext = nuts.getnext
51local getprev = nuts.getprev
52local getattr = nuts.getattr
53local getid = nuts.getid
54local getsubtype = nuts.getsubtype
55local getchar = nuts.getchar
56local isglyph = nuts.isglyph
57
58local setattr = nuts.setattr
59local setlink = nuts.setlink
60local setnext = nuts.setnext
61local setprev = nuts.setprev
62local setchar = nuts.setchar
63local setattrlist = nuts.setattrlist
64
65local insertbefore = nuts.insertbefore
66local insertafter = nuts.insertafter
67local remove_node = nuts.remove
68local copy_node = nuts.copy
69local flushlist = nuts.flushlist
70
71local nodepool = nuts.pool
72local new_disc = nodepool.disc
73
74local texsetattribute = tex.setattribute
75local unsetvalue = attributes.unsetvalue
76
77local enableaction = nodes.tasks.enableaction
78
79local v_reset = interfaces.variables.reset
80
81local implement = interfaces.implement
82
83local processors = typesetters.processors
84local splitprocessor = processors.split
85
86local replacements = languages.replacements or { }
87languages.replacements = replacements
88
89local a_replacements = attributes.private("replacements")
90local a_noligature = attributes.private("noligature")
91
92local lists = { }
93local last = 0
94local trees = { }
95
96table.setmetatableindex(lists,function(lists,name)
97 last = last + 1
98 local list = { }
99 local data = { name = name, list = list, attribute = last }
100 lists[last] = data
101 lists[name] = data
102 trees[last] = list
103 return data
104end)
105
106lists[v_reset].attribute = unsetvalue
107
108
109
110local function add(root,word,replacement)
111 local processor, replacement = splitprocessor(replacement,true)
112 replacement = lpegmatch(stripper,replacement) or replacement
113 local list = utfsplit(word)
114 local size = #list
115 for i=1,size do
116 local l = utfbyte(list[i])
117 if not root[l] then
118 root[l] = { }
119 end
120 if i == size then
121 local special = find(replacement,"{",1,true)
122 local newlist = lpegmatch(splitter,replacement)
123 root[l].final = {
124 word = word,
125 replacement = replacement,
126 processor = processor,
127 oldlength = size,
128 newcodes = newlist,
129 special = special,
130 }
131 end
132 root = root[l]
133 end
134end
135
136function replacements.add(category,word,replacement)
137 local root = lists[category].list
138 if type(word) == "table" then
139 for word, replacement in next, word do
140 add(root,word,replacement)
141 end
142 else
143 add(root,word,replacement or "")
144 end
145end
146
147
148
149function languages.replacements.addlist(category,list)
150 local root = lists[category].list
151 if type(list) == "string" then
152 for new in gmatch(list,"%S+") do
153 local old = gsub(new,"[{}]","")
154
155 add(root,old,new)
156 end
157 else
158 for i=1,#list do
159 local new = list[i]
160 local old = gsub(new,"[{}]","")
161
162 add(root,old,new)
163 end
164 end
165end
166
167local function tonodes(list,template)
168 local head, current
169 for i=1,#list do
170 local new = copy_node(template)
171 setchar(new,list[i])
172 if head then
173 head, current = insertafter(head,current,new)
174 else
175 head, current = new, new
176 end
177 end
178 return head
179end
180
181local is_punctuation = characters.is_punctuation
182
183
184
185
186
187
188
189
190
191local function replace(head,first,last,final,hasspace,overload)
192 local current = first
193 local prefirst = getprev(first) or head
194 local postlast = getnext(last)
195 local oldlength = final.oldlength
196 local newcodes = final.newcodes
197 local newlength = newcodes and #newcodes or 0
198 if trace_replacements then
199 report_replacement("replacing word %a by %a",final.word,final.replacement)
200 end
201 if hasspace or final.special then
202
203
204
205 local prev = getprev(current)
206 local next = getnext(last)
207 local list = current
208 setnext(last)
209 setlink(prev,next)
210 current = prev
211 if not current then
212 head = nil
213 end
214 local i = 1
215 while i <= newlength do
216 local codes = newcodes[i]
217 if type(codes) == "table" then
218 local method = codes[1]
219 if method == "discretionary" then
220 local pre, post, replace = codes[2], codes[3], codes[4]
221 if pre then
222 pre = tonodes(pre,first)
223 end
224 if post then
225 post = tonodes(post,first)
226 end
227 if replace then
228 replace = tonodes(replace,first)
229 end
230
231 local new = new_disc(pre,post,replace)
232 setattrlist(new,first)
233 head, current = insertafter(head,current,new)
234 elseif method == "noligature" then
235
236 local list = codes[2]
237 if list then
238 for i=1,#list do
239 local new = copy_node(first)
240 setchar(new,list[i])
241 setattr(new,a_noligature,1)
242 head, current = insertafter(head,current,new)
243 end
244 else
245 local new = copy_node(first)
246 setchar(new,zwnj)
247 head, current = insertafter(head,current,new)
248 end
249 else
250 report_replacement("unknown method %a",method or "?")
251 end
252 else
253 local new = copy_node(first)
254 setchar(new,codes)
255 head, current = insertafter(head,current,new)
256 end
257 i = i + 1
258 end
259 flushlist(list)
260 elseif newlength == 0 then
261
262 elseif oldlength == newlength then
263 if final.word ~= final.replacement then
264 for i=1,newlength do
265 setchar(current,newcodes[i])
266 current = getnext(current)
267 end
268 end
269
270 elseif oldlength < newlength then
271 for i=1,newlength-oldlength do
272 local n = copy_node(current)
273 setchar(n,newcodes[i])
274 head, current = insertbefore(head,current,n)
275 current = getnext(current)
276 end
277 for i=newlength-oldlength+1,newlength do
278 setchar(current,newcodes[i])
279 current = getnext(current)
280 end
281 else
282 for i=1,oldlength-newlength do
283 head, current = remove_node(head,current,true)
284 end
285 for i=1,newlength do
286 setchar(current,newcodes[i])
287 current = getnext(current)
288 end
289 end
290 if overload then
291 overload(final,getnext(prefirst),getprev(postlast))
292 end
293 return head, postlast
294end
295
296
297
298function replacements.handler(head)
299 local current = head
300 local overload = attributes.applyoverloads
301 local mode = false
302 local wordstart = false
303 local wordend = false
304 local prevend = false
305 local prevfinal = false
306 local tree = false
307 local root = false
308 local hasspace = false
309 while current do
310 local id = getid(current)
311 if id == glyph_code then
312 local a = getattr(current,a_replacements)
313 if a then
314
315 tree = trees[a]
316 if tree then
317 local char = getchar(current)
318 local punc = is_punctuation[char]
319 if mode == "punc" then
320 if not punc then
321 if root then
322 local final = root.final
323 if final then
324 head = replace(head,wordstart,wordend,final,hasspace,overload)
325 elseif prevfinal then
326 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
327 end
328 prevfinal = false
329 root = false
330 end
331 mode = "word"
332 end
333 elseif mode == "word" then
334 if punc then
335 if root then
336 local final = root.final
337 if final then
338 head = replace(head,wordstart,wordend,final,hasspace,overload)
339 elseif prevfinal then
340 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
341 end
342 prevfinal = false
343 root = false
344 end
345 mode = "punc"
346 end
347 else
348 mode = punc and "punc" or "word"
349 end
350 if root then
351 root = root[char]
352 if root then
353 wordend = current
354 end
355 else
356 if prevfinal then
357 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
358 prevfinal = false
359 end
360 root = tree[char]
361 if root then
362 wordstart = current
363 wordend = current
364 prevend = false
365 hasspace = false
366 end
367 end
368 else
369 root= false
370 end
371 else
372 tree = false
373 end
374 current = getnext(current)
375 elseif root then
376 local final = root.final
377 if mode == "word" and id == glue_code then
378 local s = getsubtype(current)
379 if s == spaceskip_code or s == xspaceskip_code then
380 local r = root[32]
381 if r then
382 if not prevend then
383 local f = root.final
384 if f then
385 prevend = wordend
386 prevfinal = f
387 end
388 end
389 wordend = current
390 root = r
391 hasspace = true
392 goto moveon
393 end
394 end
395 end
396 if final then
397 head, current = replace(head,wordstart,wordend,final,hasspace,overload)
398 elseif prevfinal then
399 head, current = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
400 end
401 prevfinal = false
402 root = false
403 ::moveon::
404 current = getnext(current)
405 else
406 current = getnext(current)
407 end
408 end
409 if root then
410 local final = root.final
411 if final then
412 head = replace(head,wordstart,wordend,final,hasspace,overload)
413 elseif prevfinal then
414 head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
415 end
416 end
417 return head
418end
419
420local enabled = false
421
422function replacements.set(n)
423 if n == v_reset then
424 n = unsetvalue
425 else
426 n = lists[n].attribute
427 if not enabled then
428 enableaction("processors","languages.replacements.handler")
429 if trace_replacements then
430 report_replacement("enabling replacement handler")
431 end
432 enabled = true
433 end
434 end
435 texsetattribute(a_replacements,n)
436end
437
438
439
440implement {
441 name = "setreplacements",
442 actions = replacements.set,
443 arguments = "string"
444}
445
446implement {
447 name = "addreplacements",
448 actions = replacements.add,
449 arguments = "3 strings",
450}
451
452implement {
453 name = "addreplacementslist",
454 actions = replacements.addlist,
455 arguments = "2 strings",
456}
457 |