1if not modules then modules = { } end modules ['regi-ini'] = {
2 version = 1.001,
3 comment = "companion to regi-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12local tostring = tostring
13local utfchar = utf.char
14local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
15local char, gsub, format, gmatch, byte, match, lower = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match, string.lower
16local next = next
17local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
18local concat = table.concat
19local totable = string.totable
20
21local allocate = utilities.storage.allocate
22local sequencers = utilities.sequencers
23local textlineactions = resolvers.openers.helpers.textlineactions
24local setmetatableindex = table.setmetatableindex
25
26
27
28local trace_translating = false trackers.register("regimes.translating", function(v) trace_translating = v end)
29
30local report_loading = logs.reporter("regimes","loading")
31local report_translating = logs.reporter("regimes","translating")
32
33regimes = regimes or { }
34local regimes = regimes
35
36local mapping = allocate {
37 utf = false
38}
39
40local backmapping = allocate {
41}
42
43
44
45local synonyms = {
46
47 ["windows-1250"] = "cp1250",
48 ["windows-1251"] = "cp1251",
49 ["windows-1252"] = "cp1252",
50 ["windows-1253"] = "cp1253",
51 ["windows-1254"] = "cp1254",
52 ["windows-1255"] = "cp1255",
53 ["windows-1256"] = "cp1256",
54 ["windows-1257"] = "cp1257",
55 ["windows-1258"] = "cp1258",
56
57 ["il1"] = "8859-1",
58 ["il2"] = "8859-2",
59 ["il3"] = "8859-3",
60 ["il4"] = "8859-4",
61 ["il5"] = "8859-9",
62 ["il6"] = "8859-10",
63 ["il7"] = "8859-13",
64 ["il8"] = "8859-14",
65 ["il9"] = "8859-15",
66 ["il10"] = "8859-16",
67
68 ["iso-8859-1"] = "8859-1",
69 ["iso-8859-2"] = "8859-2",
70 ["iso-8859-3"] = "8859-3",
71 ["iso-8859-4"] = "8859-4",
72 ["iso-8859-9"] = "8859-9",
73 ["iso-8859-10"] = "8859-10",
74 ["iso-8859-13"] = "8859-13",
75 ["iso-8859-14"] = "8859-14",
76 ["iso-8859-15"] = "8859-15",
77 ["iso-8859-16"] = "8859-16",
78
79 ["latin1"] = "8859-1",
80 ["latin2"] = "8859-2",
81 ["latin3"] = "8859-3",
82 ["latin4"] = "8859-4",
83 ["latin5"] = "8859-9",
84 ["latin6"] = "8859-10",
85 ["latin7"] = "8859-13",
86 ["latin8"] = "8859-14",
87 ["latin9"] = "8859-15",
88 ["latin10"] = "8859-16",
89
90 ["utf-8"] = "utf",
91 ["utf8"] = "utf",
92 [""] = "utf",
93
94 ["windows"] = "cp1252",
95
96 ["pdf"] = "pdfdoc",
97
98 ["437"] = "ibm",
99}
100
101local currentregime = "utf"
102
103local function loadregime(mapping,regime)
104 regime = lower(tostring(regime))
105 regime = synonyms[regime] or synonyms["windows-"..regime] or regime
106 local name = resolvers.findfile(format("regi-%s.lua",regime)) or ""
107 local data = name ~= "" and dofile(name)
108 if data then
109 vector = { }
110 for eightbit, unicode in next, data do
111 vector[char(eightbit)] = utfchar(unicode)
112 end
113 report_loading("vector %a is loaded",regime)
114 else
115 vector = false
116 report_loading("vector %a is unknown",regime)
117 end
118 mapping[regime] = vector
119 return vector
120end
121
122local function loadreverse(t,k)
123 local t = { }
124 local m = mapping[k]
125 if m then
126 for k, v in next, m do
127 t[v] = k
128 end
129 end
130 backmapping[k] = t
131 return t
132end
133
134setmetatableindex(mapping, loadregime)
135setmetatableindex(backmapping,loadreverse)
136
137regimes.mapping = mapping
138regimes.backmapping = backmapping
139
140local function fromregime(regime,line)
141 if line and #line > 0 then
142
143 local map = mapping[regime or currentregime]
144 if map then
145 line = gsub(line,".",map)
146 end
147 end
148 return line
149end
150
151local cache = { }
152
153setmetatableindex(cache, function(t,k)
154 local v = { remappers = { } }
155 t[k] = v
156 return v
157end)
158
159local function toregime(vector,str,default)
160 local d = default or "?"
161 local c = cache[vector].remappers
162 local r = c[d]
163 if not r then
164 local t = fastcopy(backmapping[vector])
165
166 local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
167 r = function(str)
168 if not str or str == "" then
169 return ""
170 else
171 return lpegmatch(pattern,str)
172 end
173 end
174 c[d] = r
175 end
176 return r(str)
177end
178
179local function disable()
180 currentregime = "utf"
181 sequencers.disableaction(textlineactions,"regimes.process")
182 return currentregime
183end
184
185local function enable(regime)
186 regime = synonyms[regime] or regime
187 if mapping[regime] == false then
188 disable()
189 else
190 currentregime = regime
191 sequencers.enableaction(textlineactions,"regimes.process")
192 end
193 return currentregime
194end
195
196regimes.toregime = toregime
197regimes.fromregime = fromregime
198regimes.translate = function(str,regime) return fromregime(regime,str) end
199regimes.enable = enable
200regimes.disable = disable
201
202
203
204
205local level = 0
206
207function regimes.process(str,filename,currentline,noflines,coding)
208 if level == 0 and coding ~= "utf-8" then
209 str = fromregime(currentregime,str)
210 if trace_translating then
211 report_translating("utf: %s",str)
212 end
213 end
214 return str
215end
216
217local function push()
218 level = level + 1
219 if trace_translating then
220 report_translating("pushing level %s",level)
221 end
222end
223
224local function pop()
225 if level > 0 then
226 if trace_translating then
227 report_translating("popping level %s",level)
228 end
229 level = level - 1
230 end
231end
232
233regimes.push = push
234regimes.pop = pop
235
236function regimes.list()
237 local name = resolvers.findfile(format("regi-ini.lua",regime)) or ""
238 local okay = { }
239 if name then
240 local list = dir.glob(file.join(file.dirname(name),"regi-*.lua"))
241 for i=1,#list do
242 local name = list[i]
243 if name ~= "regi-ini.lua" then
244 okay[#okay+1] = match(name,"regi%-(.-)%.lua")
245 end
246 table.sort(okay)
247 end
248 end
249 return okay
250end
251
252sequencers.prependaction(textlineactions,"system","regimes.process")
253sequencers.disableaction(textlineactions,"regimes.process")
254
255
256
257
258
259local patterns = { }
260
261function regimes.cleanup(regime,str)
262 if not str or str == "" then
263 return str
264 end
265 local p = patterns[regime]
266 if p == nil then
267 regime = regime and synonyms[regime] or regime or currentregime
268 local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime]
269 if vector then
270 local mapping = { }
271 for k, v in next, vector do
272 local split = totable(v)
273 for i=1,#split do
274 split[i] = utfchar(byte(split[i]))
275 end
276 split = concat(split)
277 if v ~= split then
278 mapping[split] = v
279 end
280 end
281 p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
282 else
283 p = false
284 end
285 patterns[regime] = p
286 end
287 return p and lpegmatch(p,str) or str
288end
289
290
291
292
293
294
295
296
297
298
299if interfaces then
300
301 local implement = interfaces.implement
302 local setmacro = interfaces.setmacro
303
304 implement {
305 name = "enableregime",
306 public = true,
307 protected = true,
308 arguments = "optional",
309 actions = function(regime) setmacro("currentregime",enable(regime)) end
310 }
311
312 implement {
313 name = "disableregime",
314 public = true,
315 protected = true,
316 actions = function() setmacro("currentregime",disable()) end
317 }
318
319 implement {
320 name = "pushregime",
321 public = true,
322 protected = true,
323 actions = push
324 }
325
326 implement {
327 name = "popregime",
328 public = true,
329 protected = true,
330 actions = pop
331 }
332
333 local stack = { }
334
335 implement {
336 name = "startregime",
337 public = true,
338 protected = true,
339 arguments = "optional",
340 actions = function(regime)
341 insert(stack,currentregime)
342 if trace_translating then
343 report_translating("start using %a",regime)
344 end
345 setmacro("currentregime",enable(regime))
346 end
347 }
348
349 implement {
350 name = "stopregime",
351 public = true,
352 protected = true,
353 actions = function()
354 if #stack > 0 then
355 local regime = remove(stack)
356 if trace_translating then
357 report_translating("stop using %a",regime)
358 end
359 setmacro("currentregime",enable(regime))
360 end
361 end
362 }
363
364end
365 |