1if not modules then modules = { } end modules ['char-utf'] = {
2 version = 1.001,
3 comment = "companion to char-utf.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23local next, type = next, type
24local gsub, find = string.gsub, string.find
25local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort
26local utfchar, utfbyte, utfcharacters = utf.char, utf.byte, utf.characters
27local P, Cs, Cmt, Ct = lpeg.P, lpeg.Cs, lpeg.Cmt, lpeg.Ct
28
29if not characters then require("char-def") end
30if not characters.blocks then require("char-ini") end
31
32local lpegmatch = lpeg.match
33local lpegpatterns = lpeg.patterns
34local p_utf8character = lpegpatterns.utf8character
35local p_utf8byte = lpegpatterns.utf8byte
36local utfchartabletopattern = lpeg.utfchartabletopattern
37
38local formatters = string.formatters
39
40local allocate = utilities.storage.allocate or function() return { } end
41local mark = utilities.storage.mark or allocate
42
43local charfromnumber = characters.fromnumber
44
45characters = characters or { }
46local characters = characters
47
48local filters = allocate()
49characters.filters = filters
50
51local utffilters = { }
52characters.filters.utf = utffilters
53
54local data = characters.data
55
56
57
58
59
60
61
62
63local decomposed = allocate {
64 ["IJ"] = "IJ",
65 ["ij"] = "ij",
66 ["և"] = "եւ",
67 ["ff"] = "ff",
68 ["fi"] = "fi",
69 ["fl"] = "fl",
70 ["ffi"] = "ffi",
71 ["ffl"] = "ffl",
72 ["ſt"] = "ſt",
73 ["st"] = "st",
74 ["ﬓ"] = "մն",
75 ["ﬔ"] = "մե",
76 ["ﬕ"] = "մի",
77 ["ﬖ"] = "վն",
78 ["ﬗ"] = "մխ",
79}
80
81characters.decomposed = decomposed
82
83local graphemes = characters.graphemes
84local collapsed = characters.collapsed
85local combined = characters.combined
86local mathlists = characters.mathlists
87
88if graphemes then
89
90 mark(graphemes)
91 mark(collapsed)
92 mark(combined)
93 mark(mathlists)
94
95else
96
97 graphemes = allocate()
98 collapsed = allocate()
99 combined = allocate()
100 mathlists = allocate()
101
102 characters.graphemes = graphemes
103 characters.collapsed = collapsed
104 characters.combined = combined
105 characters.mathlists = mathlists
106
107 local function backtrack(v,last,target)
108 local vs = v.specials
109 if vs and #vs == 3 then
110 local kind = vs[1]
111 if kind == "char" or kind == "with" then
112 local one = vs[2]
113 local two = vs[3]
114 local first = utfchar(one)
115 local second = utfchar(two) .. last
116 collapsed[first..second] = target
117 backtrack(data[one],second,target)
118 end
119 end
120 end
121
122 local function setlist(unicode,list,start,category)
123 if list[start] ~= 0x20 then
124 local t = mathlists
125 for i=start,#list do
126 local l = list[i]
127 local f = t[l]
128 if f then
129 t = f
130 else
131 f = { }
132 t[l] = f
133 t = f
134 end
135 end
136 t[category] = unicode
137 end
138 end
139
140 local mlists = { }
141
142 for unicode, v in next, data do
143 local vs = v.specials
144 if vs then
145 local kind = vs[1]
146 local size = #vs
147 if kind == "char" or char == "with" then
148 if size == 3 then
149 local one = vs[2]
150 local two = vs[3]
151 local first = utfchar(one)
152 local second = utfchar(two)
153 local combination = utfchar(unicode)
154
155 collapsed[first..second] = combination
156 backtrack(data[one],second,combination)
157
158 local cgf = graphemes[first]
159 if not cgf then
160 cgf = { [second] = combination }
161 graphemes[first] = cgf
162 else
163 cgf[second] = combination
164 end
165 end
166 if size > 2 and (v.mathclass or v.mathspec) then
167 setlist(unicode,vs,2,"specials")
168 end
169 elseif kind == "with" then
170 if size == 3 then
171
172 combined[utfchar(vs[2],vs[3])] = utfchar(unicode)
173 end
174 elseif kind == "compat" then
175 if size == 3 then
176
177 combined[utfchar(vs[2],vs[3])] = utfchar(unicode)
178 end
179 if size > 2 and (v.mathclass or v.mathspec) then
180 setlist(unicode,vs,2,"specials")
181 end
182 end
183 end
184 local ml = v.mathlist
185 if ml then
186 mlists[unicode] = ml
187 end
188 end
189
190
191
192 for unicode, ml in next, mlists do
193 setlist(unicode,ml,1,"mathlist")
194 end
195
196 mlists = nil
197
198 if storage then
199 storage.register("characters/graphemes", graphemes, "characters.graphemes")
200 storage.register("characters/collapsed", collapsed, "characters.collapsed")
201 storage.register("characters/combined", combined, "characters.combined")
202 storage.register("characters/mathlists", mathlists, "characters.mathlists")
203 end
204
205end
206
207function characters.initialize() end
208
209local skippable = { }
210local filesuffix = file.suffix
211
212function utffilters.setskippable(suffix,value)
213 if value == nil then
214 value = true
215 end
216 if type(suffix) == "table" then
217 for i=1,#suffix do
218 skippable[suffix[i]] = value
219 end
220 else
221 skippable[suffix] = value
222 end
223end
224
225local p_collapse = nil
226
227local function prepare()
228 local tree = utfchartabletopattern(collapsed)
229
230 p_collapse = Cs((tree/collapsed + p_utf8character)^0)
231end
232
233function utffilters.collapse(str,filename)
234 if not p_collapse then
235 prepare()
236 end
237 if not str or str == "" or #str == 1 then
238 return str
239 elseif filename and skippable[filesuffix(filename)] then
240 return str
241 else
242 return lpegmatch(p_collapse,str) or str
243 end
244end
245
246local p_combine = nil
247
248local function prepare()
249 local tree = utfchartabletopattern(combined)
250 p_combine = Cs((tree/combined + p_utf8character)^0)
251end
252
253function utffilters.combine(str)
254
255 if not p_combine then
256 prepare()
257 end
258 if not str or str == "" or #str == 1 then
259 return str
260 else
261 return lpegmatch(p_combine,str) or str
262 end
263end
264
265local p_decompose = nil
266
267local function prepare()
268 local tree = utfchartabletopattern(decomposed)
269 p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1))
270end
271
272function utffilters.decompose(str,filename)
273 if not p_decompose then
274 prepare()
275 end
276 if str and str ~= "" and #str > 1 then
277 return lpegmatch(p_decompose,str)
278 end
279 if not str or str == "" or #str < 2 then
280 return str
281 elseif filename and skippable[filesuffix(filename)] then
282 return str
283 else
284 return lpegmatch(p_decompose,str) or str
285 end
286 return str
287end
288
289
290
291
292function utffilters.addgrapheme(result,first,second)
293 local result = charfromnumber(result)
294 local first = charfromnumber(first)
295 local second = charfromnumber(second)
296 if not graphemes[first] then
297 graphemes[first] = { [second] = result }
298 else
299 graphemes[first][second] = result
300 end
301 local pair = first .. second
302 if not collapsed[pair] then
303 collapsed[pair] = result
304 p_composed = nil
305 end
306end
307
308if interfaces then
309
310 interfaces.implement {
311 name = "addgrapheme",
312 actions = utffilters.addgrapheme,
313 arguments = "3 strings",
314 }
315
316end
317
318
319
320local p_reorder = nil
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343local sorter = function(a,b)
344 return b[2] < a[2]
345end
346
347local function swapper(s,p,t)
348 sort(t,sorter)
349 for i=1,#t do
350 t[i] = t[i][1]
351 end
352 return p, concat(t)
353end
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381local exceptions = {
382
383 ["َّ"] = "َّ",
384}
385
386local function prepare()
387 local hash = { }
388 for k, v in sortedhash(characters.data) do
389 local combining = v.combining
390 if combining then
391 local u = utfchar(k)
392 hash[u] = { u, combining, 0 }
393 end
394 end
395 local e = utfchartabletopattern(exceptions)
396 local p = utfchartabletopattern(hash)
397 p_reorder = Cs((e/exceptions + Cmt(Ct((p/hash)^2),swapper) + p_utf8character)^0) * P(-1)
398end
399
400function utffilters.reorder(str,filename)
401 if not p_reorder then
402 prepare()
403 end
404 if not str or str == "" or #str < 2 then
405 return str
406 elseif filename and skippable[filesuffix(filename)] then
407 return str
408 else
409 return lpegmatch(p_reorder,str) or str
410 end
411 return str
412end
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457local f_default = formatters["[%U] "]
458local f_description = formatters["[%s] "]
459
460local function convert(n)
461 local d = data[n]
462 d = d and d.description
463 if d then
464 return f_description(d)
465 else
466 return f_default(n)
467 end
468end
469
470local pattern = Cs((p_utf8byte / convert)^1)
471
472function utffilters.verbose(data)
473 return data and lpegmatch(pattern,data) or ""
474end
475
476return characters
477 |