1if not modules then modules = { } end modules [ ' lang-rep ' ] = {
2 version = 1 . 001 ,
3 comment = " companion to lang-rep.mkiv " ,
4 author = " Hans Hagen, PRAGMA-ADE, Hasselt NL " ,
5 copyright = " PRAGMA ADE / ConTeXt Development Team " ,
6 license = " see context related readme files "
7}
8
9
10
11
12
13
14
15
16
17
18local type , tonumber , next = type , tonumber , next
19local gmatch , gsub = string . gmatch , string . gsub
20local utfbyte , utfsplit = utf . byte , utf . split
21local P , C , U , Cc , Ct , Cs , lpegmatch = lpeg . P , lpeg . C , lpeg . patterns . utf8character , lpeg . Cc , lpeg . Ct , lpeg . Cs , lpeg . match
22local find = string . find
23
24local zwnj = 0x200C
25local grouped = P ( " { " ) * ( Ct ( ( U / utfbyte - P ( " } " ) ) ^ 1 ) + Cc ( false ) ) * P ( " } " )
26local splitter = Ct ( (
27 # P ( " { " ) * (
28 P ( " {} " ) / function ( ) return zwnj end
29 + Ct ( Cc ( " discretionary " ) * grouped * grouped * grouped )
30 + Ct ( Cc ( " noligature " ) * grouped )
31 )
32 + U / utfbyte
33 ) ^ 1 )
34
35local stripper = P ( " { " ) * Cs ( ( 1 - P ( -2 ) ) ^ 0 ) * P ( " } " ) * P ( -1 )
36
37local trace_replacements = false trackers . register ( " languages.replacements " , function ( v ) trace_replacements = v end )
38local trace_details = false trackers . register ( " languages.replacements.details " , function ( v ) trace_details = v end )
39
40local report_replacement = logs . reporter ( " languages " , " replacements " )
41
42local glyph_code = nodes . nodecodes . glyph
43local glue_code = nodes . nodecodes . glue
44
45local spaceskip_code = nodes . gluecodes . spaceskip
46local xspaceskip_code = nodes . gluecodes . xspaceskip
47
48local nuts = nodes . nuts
49
50local getnext = nuts . getnext
51local getprev = nuts . getprev
52local getattr = nuts . getattr
53local getid = nuts . getid
54local getsubtype = nuts . getsubtype
55local getchar = nuts . getchar
56local isglyph = nuts . isglyph
57
58local setattr = nuts . setattr
59local setlink = nuts . setlink
60local setnext = nuts . setnext
61local setprev = nuts . setprev
62local setchar = nuts . setchar
63local setattrlist = nuts . setattrlist
64
65local insertbefore = nuts . insertbefore
66local insertafter = nuts . insertafter
67local remove_node = nuts . remove
68local copy_node = nuts . copy
69local flushlist = nuts . flushlist
70
71local nodepool = nuts . pool
72local new_disc = nodepool . disc
73
74local texsetattribute = tex . setattribute
75local unsetvalue = attributes . unsetvalue
76
77local enableaction = nodes . tasks . enableaction
78
79local v_reset = interfaces . variables . reset
80
81local implement = interfaces . implement
82
83local processors = typesetters . processors
84local splitprocessor = processors . split
85
86local replacements = languages . replacements or { }
87languages . replacements = replacements
88
89local a_replacements = attributes . private ( " replacements " )
90local a_noligature = attributes . private ( " noligature " )
91
92local lists = { }
93local last = 0
94local trees = { }
95
96table . setmetatableindex ( lists , function ( lists , name )
97 last = last + 1
98 local list = { }
99 local data = { name = name , list = list , attribute = last }
100 lists [ last ] = data
101 lists [ name ] = data
102 trees [ last ] = list
103 return data
104end )
105
106lists [ v_reset ] . attribute = unsetvalue
107
108
109
110local function add ( root , word , replacement )
111 local processor , replacement = splitprocessor ( replacement , true )
112 replacement = lpegmatch ( stripper , replacement ) or replacement
113 local list = utfsplit ( word )
114 local size = # list
115 for i = 1 , size do
116 local l = utfbyte ( list [ i ] )
117 if not root [ l ] then
118 root [ l ] = { }
119 end
120 if i = = size then
121 local special = find ( replacement , " { " , 1 , true )
122 local newlist = lpegmatch ( splitter , replacement )
123 root [ l ] . final = {
124 word = word ,
125 replacement = replacement ,
126 processor = processor ,
127 oldlength = size ,
128 newcodes = newlist ,
129 special = special ,
130 }
131 end
132 root = root [ l ]
133 end
134end
135
136function replacements . add ( category , word , replacement )
137 local root = lists [ category ] . list
138 if type ( word ) = = " table " then
139 for word , replacement in next , word do
140 add ( root , word , replacement )
141 end
142 else
143 add ( root , word , replacement or " " )
144 end
145end
146
147
148
149function languages . replacements . addlist ( category , list )
150 local root = lists [ category ] . list
151 if type ( list ) = = " string " then
152 for new in gmatch ( list , " %S+ " ) do
153 local old = gsub ( new , " [{}] " , " " )
154
155 add ( root , old , new )
156 end
157 else
158 for i = 1 , # list do
159 local new = list [ i ]
160 local old = gsub ( new , " [{}] " , " " )
161
162 add ( root , old , new )
163 end
164 end
165end
166
167local function tonodes ( list , template )
168 local head , current
169 for i = 1 , # list do
170 local new = copy_node ( template )
171 setchar ( new , list [ i ] )
172 if head then
173 head , current = insertafter ( head , current , new )
174 else
175 head , current = new , new
176 end
177 end
178 return head
179end
180
181local is_punctuation = characters . is_punctuation
182
183
184
185
186
187
188
189
190
191local function replace ( head , first , last , final , hasspace , overload )
192 local current = first
193 local prefirst = getprev ( first ) or head
194 local postlast = getnext ( last )
195 local oldlength = final . oldlength
196 local newcodes = final . newcodes
197 local newlength = newcodes and # newcodes or 0
198 if trace_replacements then
199 report_replacement ( " replacing word %a by %a " , final . word , final . replacement )
200 end
201 if hasspace or final . special then
202
203
204
205 local prev = getprev ( current )
206 local next = getnext ( last )
207 local list = current
208 setnext ( last )
209 setlink ( prev , next )
210 current = prev
211 if not current then
212 head = nil
213 end
214 local i = 1
215 while i < = newlength do
216 local codes = newcodes [ i ]
217 if type ( codes ) = = " table " then
218 local method = codes [ 1 ]
219 if method = = " discretionary " then
220 local pre , post , replace = codes [ 2 ] , codes [ 3 ] , codes [ 4 ]
221 if pre then
222 pre = tonodes ( pre , first )
223 end
224 if post then
225 post = tonodes ( post , first )
226 end
227 if replace then
228 replace = tonodes ( replace , first )
229 end
230
231 local new = new_disc ( pre , post , replace )
232 setattrlist ( new , first )
233 head , current = insertafter ( head , current , new )
234 elseif method = = " noligature " then
235
236 local list = codes [ 2 ]
237 if list then
238 for i = 1 , # list do
239 local new = copy_node ( first )
240 setchar ( new , list [ i ] )
241 setattr ( new , a_noligature , 1 )
242 head , current = insertafter ( head , current , new )
243 end
244 else
245 local new = copy_node ( first )
246 setchar ( new , zwnj )
247 head , current = insertafter ( head , current , new )
248 end
249 else
250 report_replacement ( " unknown method %a " , method or " ? " )
251 end
252 else
253 local new = copy_node ( first )
254 setchar ( new , codes )
255 head , current = insertafter ( head , current , new )
256 end
257 i = i + 1
258 end
259 flushlist ( list )
260 elseif newlength = = 0 then
261
262 elseif oldlength = = newlength then
263 if final . word ~ = final . replacement then
264 for i = 1 , newlength do
265 setchar ( current , newcodes [ i ] )
266 current = getnext ( current )
267 end
268 end
269 current = getnext ( final )
270 elseif oldlength < newlength then
271 for i = 1 , newlength - oldlength do
272 local n = copy_node ( current )
273 setchar ( n , newcodes [ i ] )
274 head , current = insertbefore ( head , current , n )
275 current = getnext ( current )
276 end
277 for i = newlength - oldlength + 1 , newlength do
278 setchar ( current , newcodes [ i ] )
279 current = getnext ( current )
280 end
281 else
282 for i = 1 , oldlength - newlength do
283 head , current = remove_node ( head , current , true )
284 end
285 for i = 1 , newlength do
286 setchar ( current , newcodes [ i ] )
287 current = getnext ( current )
288 end
289 end
290 if overload then
291 overload ( final , getnext ( prefirst ) , getprev ( postlast ) )
292 end
293 return head , postlast
294end
295
296
297
298function replacements . handler ( head )
299 local current = head
300 local overload = attributes . applyoverloads
301 local mode = false
302 local wordstart = false
303 local wordend = false
304 local prevend = false
305 local prevfinal = false
306 local tree = false
307 local root = false
308 local hasspace = false
309 while current do
310 local id = getid ( current )
311 if id = = glyph_code then
312 local a = getattr ( current , a_replacements )
313 if a then
314
315 tree = trees [ a ]
316 if tree then
317 local char = getchar ( current )
318 local punc = is_punctuation [ char ]
319 if mode = = " punc " then
320 if not punc then
321 if root then
322 local final = root . final
323 if final then
324 head = replace ( head , wordstart , wordend , final , hasspace , overload )
325 elseif prevfinal then
326 head = replace ( head , wordstart , prevend , prevfinal , hasspace , overload )
327 end
328 prevfinal = false
329 root = false
330 end
331 mode = " word "
332 end
333 elseif mode = = " word " then
334 if punc then
335 if root then
336 local final = root . final
337 if final then
338 head = replace ( head , wordstart , wordend , final , hasspace , overload )
339 elseif prevfinal then
340 head = replace ( head , wordstart , prevend , prevfinal , hasspace , overload )
341 end
342 prevfinal = false
343 root = false
344 end
345 mode = " punc "
346 end
347 else
348 mode = punc and " punc " or " word "
349 end
350 if root then
351 root = root [ char ]
352 if root then
353 wordend = current
354 end
355 else
356 if prevfinal then
357 head = replace ( head , wordstart , prevend , prevfinal , hasspace , overload )
358 prevfinal = false
359 end
360 root = tree [ char ]
361 if root then
362 wordstart = current
363 wordend = current
364 prevend = false
365 hasspace = false
366 end
367 end
368 else
369 root = false
370 end
371 else
372 tree = false
373 end
374 current = getnext ( current )
375 elseif root then
376 local final = root . final
377 if mode = = " word " and id = = glue_code then
378 local s = getsubtype ( current )
379 if s = = spaceskip_code or s = = xspaceskip_code then
380 local r = root [ 32 ]
381 if r then
382 if not prevend then
383 local f = root . final
384 if f then
385 prevend = wordend
386 prevfinal = f
387 end
388 end
389 wordend = current
390 root = r
391 hasspace = true
392 goto moveon
393 end
394 end
395 end
396 if final then
397 head , current = replace ( head , wordstart , wordend , final , hasspace , overload )
398 elseif prevfinal then
399 head , current = replace ( head , wordstart , prevend , prevfinal , hasspace , overload )
400 end
401 prevfinal = false
402 root = false
403 :: moveon ::
404 current = getnext ( current )
405 else
406 current = getnext ( current )
407 end
408 end
409 if root then
410 local final = root . final
411 if final then
412 head = replace ( head , wordstart , wordend , final , hasspace , overload )
413 elseif prevfinal then
414 head = replace ( head , wordstart , prevend , prevfinal , hasspace , overload )
415 end
416 end
417 return head
418end
419
420local enabled = false
421
422function replacements . set ( n )
423 if n = = v_reset then
424 n = unsetvalue
425 else
426 n = lists [ n ] . attribute
427 if not enabled then
428 enableaction ( " processors " , " languages.replacements.handler " )
429 if trace_replacements then
430 report_replacement ( " enabling replacement handler " )
431 end
432 enabled = true
433 end
434 end
435 texsetattribute ( a_replacements , n )
436end
437
438
439
440implement {
441 name = " setreplacements " ,
442 actions = replacements . set ,
443 arguments = " string "
444}
445
446implement {
447 name = " addreplacements " ,
448 actions = replacements . add ,
449 arguments = " 3 strings " ,
450}
451
452implement {
453 name = " addreplacementslist " ,
454 actions = replacements . addlist ,
455 arguments = " 2 strings " ,
456}
457 |