if not modules then modules = { } end modules ['lang-tra'] = { version = 1.001, comment = "companion to lang-tra.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- The indic transliterations was researched by kauĊ›ika and after some experiments -- we settled on the current approach (mappings and a more specific lpeg). -- Todo: initial and final in addition to mapping so that we can do hebrew and -- such. local concat, setmetatableindex = table.concat, table.setmetatableindex local nospaces = string.nospaces local utfbyte, utfchar, utfsplit, utfvalues = utf.byte, utf.char, utf.split, utf.values local C, Cc, Cs, lpegmatch = lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.match local utfchartabletopattern = lpeg.utfchartabletopattern local utfcharacterpattern = lpeg.patterns.utf8character local nuts = nodes.nuts local nextchar = nuts.traversers.char local getattr = nuts.getattr local setchar = nuts.setchar local getnext = nuts.getnext local isnextchar = nuts.isnextchar local insertafter = nuts.insertafter local copynode = nuts.copy local removenode = nuts.remove local texsetattribute = tex.setattribute local registervalue = attributes.registervalue local getvalue = attributes.getvalue local transliteration = { } languages.transliteration = transliteration local a_transliteration = attributes.private("transliteration") local unsetvalue = attributes.unsetvalue local implement = interfaces.implement local context = context local zwj = utf.char(0x200D) -- local lastmapping = 0 local loadedmappings = { } local loadedlibraries = { } local exceptions = { } local report = logs.reporter("transliteration") local trace = false trackers.register("languages.transliteration", function(v) trace = v end) local converters = { -- ["iast to deva"] = function(m) -- local t_svara = m.svara -- local p_svara = utfchartabletopattern(t_svara) -- local t_vyanjana = m.vyanjana -- local p_vyanjana = utfchartabletopattern(t_vyanjana) -- local t_maatra = m.maatra -- local p_maatra = utfchartabletopattern(t_maatra) -- local t_viraama = m.viraama -- local p_viraama = utfchartabletopattern(t_viraama) -- local t_boundary = m.boundary -- local p_boundary = utfchartabletopattern(t_boundary) -- local t_yogavaaha = m.yogavaaha -- local p_yogavaaha = utfchartabletopattern(t_yogavaaha) -- local p_svara_boundary = 1 - p_svara - p_vyanjana - p_yogavaaha -- local p = Cs ( ( -- p_svara / t_svara -- + p_vyanjana / t_vyanjana -- + p_viraama / t_viraama -- + p_yogavaaha / t_yogavaaha -- + C(utfcharacterpattern) -- )^0 ) -- return function(s) -- -- for now -- -- s = zwj .. s -- -- -- return lpegmatch(p,s) or s -- end -- end, ["mapping"] = function(m) local t_mapping = m.mapping -- inspect(t_mapping) if t_mapping then local t_exceptions = m.exceptions local p = Cs ( ( utfchartabletopattern(t_mapping) / t_mapping + C(utfcharacterpattern) )^0 ) -- lpeg.print(p) return function(s,e) return (e and e[s]) or t_exceptions[s] or lpegmatch(p,s) or s end else return false end end, ["default"] = function(m) local t_exceptions = m.exceptions return function(s,e) return (e and e[s]) or t_exceptions[s] or s end end, } function transliteration.use(library) local lib = loadedlibraries[library] if lib == nil then -- todo: use library loader local data = require("lang-imp-" .. library) if type(data) == "table" then local transliterations = data.transliterations if transliterations then for name, d in table.sortedhash(transliterations) do local vector = d.vector if not d.exceptions then d.exceptions = { } end if vector then report("vector %a in %a is %sloaded",name,library," already") else d.vector = (converters[name] or converters.mapping or converters.default)(d) or (converters.default)(d) report("vector %a in set %a is %sloaded",name,library,"") end d.library = library d.name = name d.mapping = nil -- for now, saves memory loadedmappings[name] = d loadedmappings[nospaces(name)] = d end else report("library %a has no transliterations",library) end loadedlibraries[library] = data else loadedlibraries[library] = false end end end local enable = false enable = function() nodes.tasks.enableaction("processors", "languages.transliteration.handler") enable = false end function transliteration.register(parent,name) local p = exceptions[parent] if p then if trace then report("%a has exceptions that default to %a",name,parent) end exceptions[name] = setmetatableindex({ },p) else if trace then report("%a has independent exceptions",name) end exceptions[name] = { } end end function transliteration.set(name,vector) if enable then enable() end local a = registervalue(a_transliteration, { m = loadedmappings[vector], e = exceptions[name], }) if trace then report("setting transliteration %i, name %a, vector %a",a,name,vector) end texsetattribute(a_transliteration,a) end function transliteration.exception(name,old,new) local m = loadedmappings[name] if m then m.exceptions[old] = new else local e = exceptions[name] if not e then e = { } exceptions[name] = e end e[old] = new end end -- When there is need I will improve the performance of the next handler. function transliteration.handler(head) local aprev = nil local vector = nil local except = nil local current = head local first = nil local last = nil local list = { } local size = 0 -- we need a more clever one: run over small ranges in order to keep colors etc -- actually we can generalize the replacer elsewhere local function flush(nxt) if vector then -- we can do some optimization here by having the split as replacement local old = concat(list,"",1,size) local new = vector(old,except) if old ~= new then if trace then report("old: %s",old) report("new: %s",new) end local c = first local x = false for s in utfvalues(new) do if x then head, c = insertafter(head,c,copynode(first)) setchar(c,s) else setchar(c,s) if c == last then x = true else c = getnext(c) end end end if not x then while c do head, c = removenode(head,c,true) if c == nxt then break end end end end end end while current do local nxt, chr, more = isnextchar(current) if chr then local a = getattr(current,a_transliteration) if a then if a ~= aprev then if first then flush(nxt) first = nil size = 0 end aprev = a local data = getvalue(a_transliteration,a) if data then local m = data.m if m then vector = m.vector except = data.e else vector = nil except = nil end else vector = nil except = nil end end if not first then first = current end last = current size = size + 1 list[size] = utfchar(chr) if not more then flush(nxt) first = nil size = 0 -- we can go ahead one next end else if first then flush(nxt) first = nil size = 0 end end end current = nxt end if first then flush(nxt) end return head end interfaces.implement { name = "usetransliteration", public = true, protected = true, arguments = "optional", actions = transliteration.use, } implement { name = "settransliteration", arguments = "2 strings", actions = transliteration.set, } implement { name = "registertransliteration", arguments = "3 strings", actions = transliteration.register, } implement { name = "transliterationexception", arguments = "3 strings", actions = transliteration.exception, } nodes.tasks.prependaction("processors", "normalizers", "languages.transliteration.handler", nil, "nut", "disabled" ) local function transliterate(scheme,str) if str and str ~= "" then local m = loadedmappings[scheme] local c = m and m.vector context(c and c(str) or str) end end local getbuffer = buffers.getcontent implement { name = "transliterate", public = true, protected = true, arguments = { "optional", "string" }, actions = transliterate, } implement { name = "transliteratebuffer", public = true, protected = true, arguments = { "optional", "string" }, actions = function(scheme,name) transliterate(scheme,getbuffer(name)) end, } implement { name = "transliterated", public = true, arguments = { "optional", "string" }, actions = transliterate, } implement { name = "transliteratedbuffer", public = true, arguments = { "optional", "string" }, actions = function(scheme,name) transliterate(scheme,getbuffer(name)) end, }