lang-tra.lmt /size: 11 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['lang-tra'] = {
2    version   = 1.001,
3    comment   = "companion to lang-tra.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- The indic transliterations was researched by kauśika and after some experiments
10-- we settled on the current approach (mappings and a more specific lpeg).
11
12-- Todo: initial and final in addition to mapping so that we can do hebrew and
13-- such.
14
15local concat, setmetatableindex = table.concat, table.setmetatableindex
16local nospaces = string.nospaces
17local utfbyte, utfchar, utfsplit, utfvalues = utf.byte, utf.char, utf.split, utf.values
18local C, Cc, Cs, lpegmatch = lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.match
19local utfchartabletopattern = lpeg.utfchartabletopattern
20local utfcharacterpattern = lpeg.patterns.utf8character
21
22local nuts                = nodes.nuts
23
24local nextchar            = nuts.traversers.char
25
26local getattr             = nuts.getattr
27local setchar             = nuts.setchar
28local getnext             = nuts.getnext
29local isnextchar          = nuts.isnextchar
30
31local insertafter         = nuts.insertafter
32local copynode            = nuts.copy
33local removenode          = nuts.remove
34
35local texsetattribute     = tex.setattribute
36
37local registervalue       = attributes.registervalue
38local getvalue            = attributes.getvalue
39
40local transliteration     = { }
41languages.transliteration = transliteration
42
43local a_transliteration   = attributes.private("transliteration")
44local unsetvalue          = attributes.unsetvalue
45
46local implement           = interfaces.implement
47local context             = context
48
49local zwj                 = utf.char(0x200D)
50
51-- local lastmapping         = 0
52local loadedmappings      = { }
53local loadedlibraries     = { }
54local exceptions          = { }
55
56local report              = logs.reporter("transliteration")
57local trace               = false  trackers.register("languages.transliteration", function(v) trace = v end)
58
59local converters = {
60--     ["iast to deva"] = function(m)
61--         local t_svara          = m.svara
62--         local p_svara          = utfchartabletopattern(t_svara)
63--         local t_vyanjana       = m.vyanjana
64--         local p_vyanjana       = utfchartabletopattern(t_vyanjana)
65--         local t_maatra         = m.maatra
66--         local p_maatra         = utfchartabletopattern(t_maatra)
67--         local t_viraama        = m.viraama
68--         local p_viraama        = utfchartabletopattern(t_viraama)
69--         local t_boundary       = m.boundary
70--         local p_boundary       = utfchartabletopattern(t_boundary)
71--         local t_yogavaaha      = m.yogavaaha
72--         local p_yogavaaha      = utfchartabletopattern(t_yogavaaha)
73--         local p_svara_boundary = 1 - p_svara - p_vyanjana - p_yogavaaha
74--         local p = Cs ( (
75--             p_svara                     / t_svara
76--           + p_vyanjana                  / t_vyanjana
77--           + p_viraama                   / t_viraama
78--           + p_yogavaaha                 / t_yogavaaha
79--           + C(utfcharacterpattern)
80--         )^0 )
81--         return function(s)
82--             -- for now
83-- --             s = zwj .. s
84--             --
85--             return lpegmatch(p,s) or s
86--         end
87--     end,
88    ["mapping"] = function(m)
89        local t_mapping = m.mapping
90--         inspect(t_mapping)
91        if t_mapping then
92            local t_exceptions = m.exceptions
93            local p = Cs ( (
94                utfchartabletopattern(t_mapping) / t_mapping
95              + C(utfcharacterpattern)
96            )^0 )
97--          lpeg.print(p)
98            return function(s,e)
99                return (e and e[s]) or t_exceptions[s] or lpegmatch(p,s) or s
100            end
101        else
102            return false
103        end
104    end,
105    ["default"] = function(m)
106        local t_exceptions = m.exceptions
107        return function(s,e)
108            return (e and e[s]) or t_exceptions[s] or s
109        end
110    end,
111}
112
113function transliteration.use(library)
114    local lib = loadedlibraries[library]
115    if lib == nil then
116        -- todo: use library loader
117        local data = require("lang-imp-" .. library)
118        if type(data) == "table" then
119            local transliterations = data.transliterations
120            if transliterations then
121                for name, d in table.sortedhash(transliterations) do
122                    local vector = d.vector
123                    if not d.exceptions then
124                        d.exceptions = { }
125                    end
126                    if vector then
127                        report("vector %a in %a is %sloaded",name,library," already")
128                    else
129                        d.vector = (converters[name] or converters.mapping or converters.default)(d)
130                                or (converters.default)(d)
131                        report("vector %a in set %a is %sloaded",name,library,"")
132                    end
133                    d.library   = library
134                    d.name      = name
135                    d.mapping   = nil -- for now, saves memory
136                    loadedmappings[name] = d
137                    loadedmappings[nospaces(name)] = d
138                end
139            else
140                report("library %a has no transliterations",library)
141            end
142            loadedlibraries[library] = data
143        else
144            loadedlibraries[library] = false
145        end
146    end
147end
148
149local enable = false
150
151enable = function()
152    nodes.tasks.enableaction("processors", "languages.transliteration.handler")
153    enable = false
154end
155
156function transliteration.register(parent,name)
157    local p = exceptions[parent]
158    if p then
159        if trace then
160            report("%a has exceptions that default to %a",name,parent)
161        end
162        exceptions[name] = setmetatableindex({ },p)
163    else
164        if trace then
165            report("%a has independent exceptions",name)
166        end
167        exceptions[name] = { }
168    end
169end
170
171function transliteration.set(name,vector)
172    if enable then
173        enable()
174    end
175    local a = registervalue(a_transliteration, {
176        m = loadedmappings[vector],
177        e = exceptions[name],
178    })
179    if trace then
180        report("setting transliteration %i, name %a, vector %a",a,name,vector)
181    end
182    texsetattribute(a_transliteration,a)
183end
184
185function transliteration.exception(name,old,new)
186    local m = loadedmappings[name]
187    if m then
188        m.exceptions[old] = new
189    else
190        local e = exceptions[name]
191        if not e then
192            e = { }
193            exceptions[name] = e
194        end
195        e[old] = new
196    end
197end
198
199-- When there is need I will improve the performance of the next handler.
200
201function transliteration.handler(head)
202    local aprev   = nil
203    local vector  = nil
204    local except  = nil
205    local current = head
206    local first   = nil
207    local last    = nil
208    local list    = { }
209    local size    = 0
210
211    -- we need a more clever one: run over small ranges in order to keep colors etc
212
213    -- actually we can generalize the replacer elsewhere
214
215    local function flush(nxt)
216        if vector then
217            -- we can do some optimization here by having the split as replacement
218            local old = concat(list,"",1,size)
219            local new = vector(old,except)
220            if old ~= new then
221                if trace then
222                    report("old: %s",old)
223                    report("new: %s",new)
224                end
225                local c = first
226                local x = false
227                for s in utfvalues(new) do
228                    if x then
229                        head, c = insertafter(head,c,copynode(first))
230                        setchar(c,s)
231                    else
232                        setchar(c,s)
233                        if c == last then
234                            x = true
235                        else
236                            c = getnext(c)
237                        end
238                    end
239                end
240                if not x then
241                    while c do
242                        head, c = removenode(head,c,true)
243                        if c == nxt then
244                            break
245                        end
246                    end
247                end
248            end
249        end
250    end
251
252    while current do
253        local nxt, chr, more = isnextchar(current)
254        if chr then
255            local a = getattr(current,a_transliteration)
256            if a then
257                if a ~= aprev then
258                    if first then
259                        flush(nxt)
260                        first = nil
261                        size  = 0
262                    end
263                    aprev = a
264                    local data = getvalue(a_transliteration,a)
265                    if data then
266                        local m = data.m
267                        if m then
268                            vector = m.vector
269                            except = data.e
270                        else
271                            vector = nil
272                            except = nil
273                        end
274                    else
275                        vector = nil
276                        except = nil
277                    end
278                end
279                if not first then
280                    first = current
281                end
282                last = current
283                size = size + 1
284                list[size] = utfchar(chr)
285                if not more then
286                    flush(nxt)
287                    first = nil
288                    size  = 0
289                    -- we can go ahead one next
290                end
291            else
292                if first then
293                    flush(nxt)
294                    first = nil
295                    size  = 0
296                end
297            end
298        end
299        current = nxt
300    end
301    if first then
302       flush(nxt)
303    end
304    return head
305end
306
307interfaces.implement {
308    name      = "usetransliteration",
309    public    = true,
310    protected = true,
311    arguments = "optional",
312    actions   = transliteration.use,
313}
314
315implement {
316    name      = "settransliteration",
317    arguments = "2 strings",
318    actions   = transliteration.set,
319}
320
321implement {
322    name      = "registertransliteration",
323    arguments = "3 strings",
324    actions   = transliteration.register,
325}
326
327implement {
328    name      = "transliterationexception",
329    arguments = "3 strings",
330    actions   = transliteration.exception,
331}
332
333nodes.tasks.prependaction("processors", "normalizers", "languages.transliteration.handler", nil, "nut", "disabled" )
334
335local function transliterate(scheme,str)
336    if str and str ~= "" then
337        local m = loadedmappings[scheme]
338        local c = m and m.vector
339        context(c and c(str) or str)
340    end
341end
342
343local getbuffer = buffers.getcontent
344
345implement {
346    name      = "transliterate",
347    public    = true,
348    protected = true,
349    arguments = { "optional", "string" },
350    actions   = transliterate,
351}
352
353implement {
354    name      = "transliteratebuffer",
355    public    = true,
356    protected = true,
357    arguments = { "optional", "string" },
358    actions   = function(scheme,name) transliterate(scheme,getbuffer(name)) end,
359}
360
361implement {
362    name      = "transliterated",
363    public    = true,
364    arguments = { "optional", "string" },
365    actions   = transliterate,
366}
367
368implement {
369    name      = "transliteratedbuffer",
370    public    = true,
371    arguments = { "optional", "string" },
372    actions   = function(scheme,name) transliterate(scheme,getbuffer(name)) end,
373}
374