1if not modules then modules = { } end modules ['regi-ini'] = {
2 version = 1.001,
3 comment = "companion to regi-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
14
15
16
17local commands, context = commands, context
18
19
20local tostring = tostring
21local utfchar = utf.char
22local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
23local char, gsub, format, gmatch, byte, match, lower = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match, string.lower
24local next = next
25local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
26local concat = table.concat
27local totable = string.totable
28
29local allocate = utilities.storage.allocate
30local sequencers = utilities.sequencers
31local textlineactions = resolvers.openers.helpers.textlineactions
32local setmetatableindex = table.setmetatableindex
33
34
37
38local trace_translating = false trackers.register("regimes.translating", function(v) trace_translating = v end)
39
40local report_loading = logs.reporter("regimes","loading")
41local report_translating = logs.reporter("regimes","translating")
42
43regimes = regimes or { }
44local regimes = regimes
45
46local mapping = allocate {
47 utf = false
48}
49
50local backmapping = allocate {
51}
52
53
54
55local synonyms = {
56
57 ["windows-1250"] = "cp1250",
58 ["windows-1251"] = "cp1251",
59 ["windows-1252"] = "cp1252",
60 ["windows-1253"] = "cp1253",
61 ["windows-1254"] = "cp1254",
62 ["windows-1255"] = "cp1255",
63 ["windows-1256"] = "cp1256",
64 ["windows-1257"] = "cp1257",
65 ["windows-1258"] = "cp1258",
66
67 ["il1"] = "8859-1",
68 ["il2"] = "8859-2",
69 ["il3"] = "8859-3",
70 ["il4"] = "8859-4",
71 ["il5"] = "8859-9",
72 ["il6"] = "8859-10",
73 ["il7"] = "8859-13",
74 ["il8"] = "8859-14",
75 ["il9"] = "8859-15",
76 ["il10"] = "8859-16",
77
78 ["iso-8859-1"] = "8859-1",
79 ["iso-8859-2"] = "8859-2",
80 ["iso-8859-3"] = "8859-3",
81 ["iso-8859-4"] = "8859-4",
82 ["iso-8859-9"] = "8859-9",
83 ["iso-8859-10"] = "8859-10",
84 ["iso-8859-13"] = "8859-13",
85 ["iso-8859-14"] = "8859-14",
86 ["iso-8859-15"] = "8859-15",
87 ["iso-8859-16"] = "8859-16",
88
89 ["latin1"] = "8859-1",
90 ["latin2"] = "8859-2",
91 ["latin3"] = "8859-3",
92 ["latin4"] = "8859-4",
93 ["latin5"] = "8859-9",
94 ["latin6"] = "8859-10",
95 ["latin7"] = "8859-13",
96 ["latin8"] = "8859-14",
97 ["latin9"] = "8859-15",
98 ["latin10"] = "8859-16",
99
100 ["utf-8"] = "utf",
101 ["utf8"] = "utf",
102 [""] = "utf",
103
104 ["windows"] = "cp1252",
105
106 ["pdf"] = "pdfdoc",
107
108 ["437"] = "ibm",
109}
110
111local currentregime = "utf"
112
113local function loadregime(mapping,regime)
114 regime = lower(tostring(regime))
115 regime = synonyms[regime] or synonyms["windows-"..regime] or regime
116 local name = resolvers.findfile(format("regi-%s.lua",regime)) or ""
117 local data = name ~= "" and dofile(name)
118 if data then
119 vector = { }
120 for eightbit, unicode in next, data do
121 vector[char(eightbit)] = utfchar(unicode)
122 end
123 report_loading("vector %a is loaded",regime)
124 else
125 vector = false
126 report_loading("vector %a is unknown",regime)
127 end
128 mapping[regime] = vector
129 return vector
130end
131
132local function loadreverse(t,k)
133 local t = { }
134 local m = mapping[k]
135 if m then
136 for k, v in next, m do
137 t[v] = k
138 end
139 end
140 backmapping[k] = t
141 return t
142end
143
144setmetatableindex(mapping, loadregime)
145setmetatableindex(backmapping,loadreverse)
146
147regimes.mapping = mapping
148regimes.backmapping = backmapping
149
150local function fromregime(regime,line)
151 if line and #line > 0 then
152
153 local map = mapping[regime or currentregime]
154 if map then
155 line = gsub(line,".",map)
156 end
157 end
158 return line
159end
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183local cache = { }
184
185setmetatableindex(cache, function(t,k)
186 local v = { remappers = { } }
187 t[k] = v
188 return v
189end)
190
191local function toregime(vector,str,default)
192 local d = default or "?"
193 local c = cache[vector].remappers
194 local r = c[d]
195 if not r then
196 local t = fastcopy(backmapping[vector])
197
198 local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
199 r = function(str)
200 if not str or str == "" then
201 return ""
202 else
203 return lpegmatch(pattern,str)
204 end
205 end
206 c[d] = r
207 end
208 return r(str)
209end
210
211local function disable()
212 currentregime = "utf"
213 sequencers.disableaction(textlineactions,"regimes.process")
214 return currentregime
215end
216
217local function enable(regime)
218 regime = synonyms[regime] or regime
219 if mapping[regime] == false then
220 disable()
221 else
222 currentregime = regime
223 sequencers.enableaction(textlineactions,"regimes.process")
224 end
225 return currentregime
226end
227
228regimes.toregime = toregime
229regimes.fromregime = fromregime
230regimes.translate = function(str,regime) return fromregime(regime,str) end
231regimes.enable = enable
232regimes.disable = disable
233
234
235
236
237local level = 0
238
239function regimes.process(str,filename,currentline,noflines,coding)
240 if level == 0 and coding ~= "utf-8" then
241 str = fromregime(currentregime,str)
242 if trace_translating then
243 report_translating("utf: %s",str)
244 end
245 end
246 return str
247end
248
249local function push()
250 level = level + 1
251 if trace_translating then
252 report_translating("pushing level %s",level)
253 end
254end
255
256local function pop()
257 if level > 0 then
258 if trace_translating then
259 report_translating("popping level %s",level)
260 end
261 level = level - 1
262 end
263end
264
265regimes.push = push
266regimes.pop = pop
267
268function regimes.list()
269 local name = resolvers.findfile(format("regi-ini.lua",regime)) or ""
270 local okay = { }
271 if name then
272 local list = dir.glob(file.join(file.dirname(name),"regi-*.lua"))
273 for i=1,#list do
274 local name = list[i]
275 if name ~= "regi-ini.lua" then
276 okay[#okay+1] = match(name,"regi%-(.-)%.lua")
277 end
278 table.sort(okay)
279 end
280 end
281 return okay
282end
283
284if sequencers then
285
286 sequencers.prependaction(textlineactions,"system","regimes.process")
287 sequencers.disableaction(textlineactions,"regimes.process")
288
289end
290
291
292
293
294
295local patterns = { }
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379function regimes.cleanup(regime,str)
380 if not str or str == "" then
381 return str
382 end
383 local p = patterns[regime]
384 if p == nil then
385 regime = regime and synonyms[regime] or regime or currentregime
386 local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime]
387 if vector then
388 local mapping = { }
389 for k, v in next, vector do
390 local split = totable(v)
391 for i=1,#split do
392 split[i] = utfchar(byte(split[i]))
393 end
394 split = concat(split)
395 if v ~= split then
396 mapping[split] = v
397 end
398 end
399 p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
400 else
401 p = false
402 end
403 patterns[regime] = p
404 end
405 return p and lpegmatch(p,str) or str
406end
407
408
409
410
411
412
413
414
415
416
417if interfaces then
418
419 local implement = interfaces.implement
420 local setmacro = interfaces.setmacro
421
422 implement {
423 name = "enableregime",
424 arguments = "string",
425 actions = function(regime) setmacro("currentregime",enable(regime)) end
426 }
427
428 implement {
429 name = "disableregime",
430 actions = function() setmacro("currentregime",disable()) end
431 }
432
433 implement {
434 name = "pushregime",
435 actions = push
436 }
437
438 implement {
439 name = "popregime",
440 actions = pop
441 }
442
443 local stack = { }
444
445 implement {
446 name = "startregime",
447 arguments = "string",
448 actions = function(regime)
449 insert(stack,currentregime)
450 if trace_translating then
451 report_translating("start using %a",regime)
452 end
453 setmacro("currentregime",enable(regime))
454 end
455 }
456
457 implement {
458 name = "stopregime",
459 actions = function()
460 if #stack > 0 then
461 local regime = remove(stack)
462 if trace_translating then
463 report_translating("stop using %a",regime)
464 end
465 setmacro("currentregime",enable(regime))
466 end
467 end
468 }
469
470end
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489 |