1if not modules then modules = { } end modules ['regi-ini'] = {
2 version = 1.001,
3 comment = "companion to regi-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14local commands, context = commands, context
15
16local tostring = tostring
17local utfchar = utf.char
18local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
19local char, gsub, format, gmatch, byte, match, lower = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match, string.lower
20local next = next
21local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
22local concat = table.concat
23local totable = string.totable
24
25local allocate = utilities.storage.allocate
26local sequencers = utilities.sequencers
27local textlineactions = resolvers.openers.helpers.textlineactions
28local setmetatableindex = table.setmetatableindex
29
30
31
32local trace_translating = false trackers.register("regimes.translating", function(v) trace_translating = v end)
33
34local report_loading = logs.reporter("regimes","loading")
35local report_translating = logs.reporter("regimes","translating")
36
37regimes = regimes or { }
38local regimes = regimes
39
40local mapping = allocate {
41 utf = false
42}
43
44local backmapping = allocate {
45}
46
47
48
49local synonyms = {
50
51 ["windows-1250"] = "cp1250",
52 ["windows-1251"] = "cp1251",
53 ["windows-1252"] = "cp1252",
54 ["windows-1253"] = "cp1253",
55 ["windows-1254"] = "cp1254",
56 ["windows-1255"] = "cp1255",
57 ["windows-1256"] = "cp1256",
58 ["windows-1257"] = "cp1257",
59 ["windows-1258"] = "cp1258",
60
61 ["il1"] = "8859-1",
62 ["il2"] = "8859-2",
63 ["il3"] = "8859-3",
64 ["il4"] = "8859-4",
65 ["il5"] = "8859-9",
66 ["il6"] = "8859-10",
67 ["il7"] = "8859-13",
68 ["il8"] = "8859-14",
69 ["il9"] = "8859-15",
70 ["il10"] = "8859-16",
71
72 ["iso-8859-1"] = "8859-1",
73 ["iso-8859-2"] = "8859-2",
74 ["iso-8859-3"] = "8859-3",
75 ["iso-8859-4"] = "8859-4",
76 ["iso-8859-9"] = "8859-9",
77 ["iso-8859-10"] = "8859-10",
78 ["iso-8859-13"] = "8859-13",
79 ["iso-8859-14"] = "8859-14",
80 ["iso-8859-15"] = "8859-15",
81 ["iso-8859-16"] = "8859-16",
82
83 ["latin1"] = "8859-1",
84 ["latin2"] = "8859-2",
85 ["latin3"] = "8859-3",
86 ["latin4"] = "8859-4",
87 ["latin5"] = "8859-9",
88 ["latin6"] = "8859-10",
89 ["latin7"] = "8859-13",
90 ["latin8"] = "8859-14",
91 ["latin9"] = "8859-15",
92 ["latin10"] = "8859-16",
93
94 ["utf-8"] = "utf",
95 ["utf8"] = "utf",
96 [""] = "utf",
97
98 ["windows"] = "cp1252",
99
100 ["pdf"] = "pdfdoc",
101
102 ["437"] = "ibm",
103}
104
105local currentregime = "utf"
106
107local function loadregime(mapping,regime)
108 regime = lower(tostring(regime))
109 regime = synonyms[regime] or synonyms["windows-"..regime] or regime
110 local name = resolvers.findfile(format("regi-%s.lua",regime)) or ""
111 local data = name ~= "" and dofile(name)
112 if data then
113 vector = { }
114 for eightbit, unicode in next, data do
115 vector[char(eightbit)] = utfchar(unicode)
116 end
117 report_loading("vector %a is loaded",regime)
118 else
119 vector = false
120 report_loading("vector %a is unknown",regime)
121 end
122 mapping[regime] = vector
123 return vector
124end
125
126local function loadreverse(t,k)
127 local t = { }
128 local m = mapping[k]
129 if m then
130 for k, v in next, m do
131 t[v] = k
132 end
133 end
134 backmapping[k] = t
135 return t
136end
137
138setmetatableindex(mapping, loadregime)
139setmetatableindex(backmapping,loadreverse)
140
141regimes.mapping = mapping
142regimes.backmapping = backmapping
143
144local function fromregime(regime,line)
145 if line and #line > 0 then
146
147 local map = mapping[regime or currentregime]
148 if map then
149 line = gsub(line,".",map)
150 end
151 end
152 return line
153end
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177local cache = { }
178
179setmetatableindex(cache, function(t,k)
180 local v = { remappers = { } }
181 t[k] = v
182 return v
183end)
184
185local function toregime(vector,str,default)
186 local d = default or "?"
187 local c = cache[vector].remappers
188 local r = c[d]
189 if not r then
190 local t = fastcopy(backmapping[vector])
191
192 local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
193 r = function(str)
194 if not str or str == "" then
195 return ""
196 else
197 return lpegmatch(pattern,str)
198 end
199 end
200 c[d] = r
201 end
202 return r(str)
203end
204
205local function disable()
206 currentregime = "utf"
207 sequencers.disableaction(textlineactions,"regimes.process")
208 return currentregime
209end
210
211local function enable(regime)
212 regime = synonyms[regime] or regime
213 if mapping[regime] == false then
214 disable()
215 else
216 currentregime = regime
217 sequencers.enableaction(textlineactions,"regimes.process")
218 end
219 return currentregime
220end
221
222regimes.toregime = toregime
223regimes.fromregime = fromregime
224regimes.translate = function(str,regime) return fromregime(regime,str) end
225regimes.enable = enable
226regimes.disable = disable
227
228
229
230
231local level = 0
232
233function regimes.process(str,filename,currentline,noflines,coding)
234 if level == 0 and coding ~= "utf-8" then
235 str = fromregime(currentregime,str)
236 if trace_translating then
237 report_translating("utf: %s",str)
238 end
239 end
240 return str
241end
242
243local function push()
244 level = level + 1
245 if trace_translating then
246 report_translating("pushing level %s",level)
247 end
248end
249
250local function pop()
251 if level > 0 then
252 if trace_translating then
253 report_translating("popping level %s",level)
254 end
255 level = level - 1
256 end
257end
258
259regimes.push = push
260regimes.pop = pop
261
262function regimes.list()
263 local name = resolvers.findfile(format("regi-ini.lua",regime)) or ""
264 local okay = { }
265 if name then
266 local list = dir.glob(file.join(file.dirname(name),"regi-*.lua"))
267 for i=1,#list do
268 local name = list[i]
269 if name ~= "regi-ini.lua" then
270 okay[#okay+1] = match(name,"regi%-(.-)%.lua")
271 end
272 table.sort(okay)
273 end
274 end
275 return okay
276end
277
278if sequencers then
279
280 sequencers.prependaction(textlineactions,"system","regimes.process")
281 sequencers.disableaction(textlineactions,"regimes.process")
282
283end
284
285
286
287
288
289local patterns = { }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373function regimes.cleanup(regime,str)
374 if not str or str == "" then
375 return str
376 end
377 local p = patterns[regime]
378 if p == nil then
379 regime = regime and synonyms[regime] or regime or currentregime
380 local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime]
381 if vector then
382 local mapping = { }
383 for k, v in next, vector do
384 local split = totable(v)
385 for i=1,#split do
386 split[i] = utfchar(byte(split[i]))
387 end
388 split = concat(split)
389 if v ~= split then
390 mapping[split] = v
391 end
392 end
393 p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
394 else
395 p = false
396 end
397 patterns[regime] = p
398 end
399 return p and lpegmatch(p,str) or str
400end
401
402
403
404
405
406
407
408
409
410
411if interfaces then
412
413 local implement = interfaces.implement
414 local setmacro = interfaces.setmacro
415
416 implement {
417 name = "enableregime",
418 arguments = "string",
419 actions = function(regime) setmacro("currentregime",enable(regime)) end
420 }
421
422 implement {
423 name = "disableregime",
424 actions = function() setmacro("currentregime",disable()) end
425 }
426
427 implement {
428 name = "pushregime",
429 actions = push
430 }
431
432 implement {
433 name = "popregime",
434 actions = pop
435 }
436
437 local stack = { }
438
439 implement {
440 name = "startregime",
441 arguments = "string",
442 actions = function(regime)
443 insert(stack,currentregime)
444 if trace_translating then
445 report_translating("start using %a",regime)
446 end
447 setmacro("currentregime",enable(regime))
448 end
449 }
450
451 implement {
452 name = "stopregime",
453 actions = function()
454 if #stack > 0 then
455 local regime = remove(stack)
456 if trace_translating then
457 report_translating("stop using %a",regime)
458 end
459 setmacro("currentregime",enable(regime))
460 end
461 end
462 }
463
464end
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483 |