if not modules then modules = { } end modules ['char-tex'] = { version = 1.001, comment = "companion to char-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } local lpeg = lpeg local tonumber, next, type = tonumber, next, type local format, find, gmatch, match, gsub = string.format, string.find, string.gmatch, string.match, string.gsub local utfchar, utfbyte = utf.char, utf.byte local concat, tohash = table.concat, table.tohash local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc local lpegpatterns = lpeg.patterns local lpegmatch = lpeg.match local utfchartabletopattern = lpeg.utfchartabletopattern local allocate = utilities.storage.allocate local mark = utilities.storage.mark local context = context local commands = commands if not characters then require("char-ini") require("char-utf") end local characters = characters local texcharacters = { } characters.tex = texcharacters local utffilters = characters.filters.utf local allocate = utilities.storage.allocate or function() return { } end local mark = utilities.storage.mark or allocate local is_character = characters.is_character local is_letter = characters.is_letter local is_command = characters.is_command local is_spacing = characters.is_spacing local is_mark = characters.is_mark local data = characters.data if not data then return end local blocks = characters.blocks local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end) local report_defining = logs.reporter("characters") -- In order to deal with 8-bit output, we need to find a way to go from UTF to -- 8-bit. This is handled in the 32 bit engine itself. This leaves us problems with -- characters that are specific to TeX, like curly braces and dollars. We can remap -- some chars that tex input files are sensitive for to a private area (while -- writing to a utility file) and revert then to their original slot when we read in -- such a file. Instead of reverting, we can (when we resolve characters to glyphs) -- map them to their right glyph there. For this purpose we can use the private -- planes 0x0F0000 and 0x100000. local low = allocate() local high = allocate() local escapes = allocate() local special = "~#$%^&_{}\\|" -- "~#$%{}\\|" local private = { low = low, high = high, escapes = escapes, } utffilters.private = private for ch in gmatch(special,".") do local cb if type(ch) == "number" then cb, ch = ch, utfchar(ch) else cb = utfbyte(ch) end if cb < 256 then escapes[ch] = "\\" .. ch low[ch] = utfchar(0x0F0000 + cb) if ch == "%" then ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted end high[utfchar(0x0F0000 + cb)] = ch end end local tohigh = lpeg.replacer(low) -- frozen, only for basic tex local tolow = lpeg.replacer(high) -- frozen, only for basic tex lpegpatterns.utftohigh = tohigh lpegpatterns.utftolow = tolow function utffilters.harden(str) return lpegmatch(tohigh,str) end function utffilters.soften(str) return lpegmatch(tolow,str) end private.escape = utf.remapper(escapes) -- maybe: ,"dynamic" private.replace = utf.remapper(low) -- maybe: ,"dynamic" private.revert = utf.remapper(high) -- maybe: ,"dynamic" local accentmapping = allocate { ['"'] = { [""] = "¨", A = "Ä", a = "ä", E = "Ë", e = "ë", I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï", O = "Ö", o = "ö", U = "Ü", u = "ü", Y = "Ÿ", y = "ÿ", }, ["'"] = { [""] = "´", A = "Á", a = "á", C = "Ć", c = "ć", E = "É", e = "é", I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í", L = "Ĺ", l = "ĺ", N = "Ń", n = "ń", O = "Ó", o = "ó", R = "Ŕ", r = "ŕ", S = "Ś", s = "ś", U = "Ú", u = "ú", Y = "Ý", y = "ý", Z = "Ź", z = "ź", }, ["."] = { [""] = "˙", C = "Ċ", c = "ċ", E = "Ė", e = "ė", G = "Ġ", g = "ġ", I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i", Z = "Ż", z = "ż", }, ["="] = { [""] = "¯", A = "Ā", a = "ā", E = "Ē", e = "ē", I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī", O = "Ō", o = "ō", U = "Ū", u = "ū", }, ["H"] = { [""] = "˝", O = "Ő", o = "ő", U = "Ű", u = "ű", }, ["^"] = { [""] = "ˆ", A = "Â", a = "â", C = "Ĉ", c = "ĉ", E = "Ê", e = "ê", G = "Ĝ", g = "ĝ", H = "Ĥ", h = "ĥ", I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î", J = "Ĵ", j = "ĵ", O = "Ô", o = "ô", S = "Ŝ", s = "ŝ", U = "Û", u = "û", W = "Ŵ", w = "ŵ", Y = "Ŷ", y = "ŷ", }, ["`"] = { [""] = "`", A = "À", a = "à", E = "È", e = "è", I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì", O = "Ò", o = "ò", U = "Ù", u = "ù", Y = "Ỳ", y = "ỳ", }, ["c"] = { [""] = "¸", C = "Ç", c = "ç", K = "Ķ", k = "ķ", L = "Ļ", l = "ļ", N = "Ņ", n = "ņ", R = "Ŗ", r = "ŗ", S = "Ş", s = "ş", T = "Ţ", t = "ţ", }, ["k"] = { [""] = "˛", A = "Ą", a = "ą", E = "Ę", e = "ę", I = "Į", i = "į", U = "Ų", u = "ų", }, ["r"] = { [""] = "˚", A = "Å", a = "å", U = "Ů", u = "ů", }, ["u"] = { [""] = "˘", A = "Ă", a = "ă", E = "Ĕ", e = "ĕ", G = "Ğ", g = "ğ", I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ", O = "Ŏ", o = "ŏ", U = "Ŭ", u = "ŭ", }, ["v"] = { [""] = "ˇ", C = "Č", c = "č", D = "Ď", d = "ď", E = "Ě", e = "ě", L = "Ľ", l = "ľ", N = "Ň", n = "ň", R = "Ř", r = "ř", S = "Š", s = "š", T = "Ť", t = "ť", Z = "Ž", z = "ž", }, ["~"] = { [""] = "˜", A = "Ã", a = "ã", I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ", N = "Ñ", n = "ñ", O = "Õ", o = "õ", U = "Ũ", u = "ũ", }, } texcharacters.accentmapping = accentmapping local accent_map = allocate { -- incomplete ['~'] = "̃" , -- ̃ Ẽ ['"'] = "̈" , -- ̈ Ë ["`"] = "̀" , -- ̀ È ["'"] = "́" , -- ́ É ["^"] = "̂" , -- ̂ Ê -- ̄ Ē -- ̆ Ĕ -- ̇ Ė -- ̉ Ẻ -- ̌ Ě -- ̏ Ȅ -- ̑ Ȇ -- ̣ Ẹ -- ̧ Ȩ -- ̨ Ę -- ̭ Ḙ -- ̰ Ḛ } -- local accents = concat(table.keys(accentmapping)) -- was _map local function remap_accent(a,c,braced) local m = accentmapping[a] if m then local n = m[c] if n then return n end end -- local m = accent_map[a] -- if m then -- return c .. m -- elseif braced then -- or #c > 0 if braced then -- or #c > 0 return "\\" .. a .. "{" .. c .. "}" else return "\\" .. a .. " " .. c end end local commandmapping = allocate { ["aa"] = "å", ["AA"] = "Å", ["ae"] = "æ", ["AE"] = "Æ", ["cc"] = "ç", ["CC"] = "Ç", ["i"] = "ı", ["j"] = "ȷ", ["ij"] = "ij", ["IJ"] = "IJ", ["l"] = "ł", ["L"] = "Ł", ["o"] = "ø", ["O"] = "Ø", ["oe"] = "œ", ["OE"] = "Œ", ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß", -- uppercase: ẞ } texcharacters.commandmapping = commandmapping -- local ligaturemapping = allocate { -- ["''"] = "”", -- ["``"] = "“", -- ["--"] = "–", -- ["---"] = "—", -- } -- Older accent handling code can be found in char-def.lua but in the meantime -- we moved on. First the one with commands: local untex, pattern local function toutfpattern() if not untex then local hash = { } for k, v in next, accentmapping do for kk, vv in next, v do if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then hash[ "\\"..k.." "..kk ] = vv hash["{\\"..k.." "..kk.."}"] = vv else hash["\\" ..k ..kk ] = vv hash["{\\"..k ..kk.."}"] = vv end hash["\\" ..k.."{"..kk.."}" ] = vv hash["{\\"..k.."{"..kk.."}}"] = vv end end for k, v in next, commandmapping do hash["\\"..k.." "] = v hash["{\\"..k.."}"] = v hash["{\\"..k.." }"] = v end -- for k, v in next, ligaturemapping do -- hash[k] = v -- end untex = utfchartabletopattern(hash) / hash end return untex end local function prepare() pattern = Cs((toutfpattern() + P(1))^0) return pattern end local function textoutf(str,strip) if str == "" then return str elseif not find(str,"\\",1,true) then return str -- elseif strip then else return lpegmatch(pattern or prepare(),str) end end texcharacters.toutfpattern = toutfpattern texcharacters.toutf = textoutf -- print(texcharacters.toutf([[\~{Z}]],true)) -- print(texcharacters.toutf([[\'\i]],true)) -- print(texcharacters.toutf([[\'{\i}]],true)) -- print(texcharacters.toutf([[\"{e}]],true)) -- print(texcharacters.toutf([[\" {e}]],true)) -- print(texcharacters.toutf([[{\"{e}}]],true)) -- print(texcharacters.toutf([[{\" {e}}]],true)) -- print(texcharacters.toutf([[{\l}]],true)) -- print(texcharacters.toutf([[{\l }]],true)) -- print(texcharacters.toutf([[\v{r}]],true)) -- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true)) -- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true)) -- Next the ones without backslash local untex, pattern local function toutfpattern() if not untex then local hash = { } for k, v in next, accentmapping do for kk, vv in next, v do hash[k..kk] = vv end end for k, v in next, commandmapping do hash[k] = v end -- for k, v in next, ligaturemapping do -- hash[k] = v -- end untex = utfchartabletopattern(hash) / hash end return untex end local function prepare() pattern = Cs((toutfpattern() + P(1))^0) return pattern end local function textoutf(str) return lpegmatch(pattern or prepare(),str) end texcharacters.strtoutfpattern = toutfpattern texcharacters.strtextoutf = textoutf local collapse = utffilters.collapse local combine = utffilters.combine if not interfaces then return end local implement = interfaces.implement local pattern1, pattern2 local verbosemarks = characters.verbosemarks if verbosemarks then mark(verbosemarks) else verbosemarks = allocate { ["stroke"] = utfchar(0x02F), ["slash"] = utfchar(0x02F), ["middle dot"] = utfchar(0x0B7), ["grave"] = utfchar(0x300), ["acute"] = utfchar(0x301), ["circumflex"] = utfchar(0x302), ["circumflex"] = utfchar(0x302), ["tilde"] = utfchar(0x303), ["macron"] = utfchar(0x304), ["line"] = utfchar(0x304), ["overline"] = utfchar(0x305), ["breve"] = utfchar(0x306), ["dot"] = utfchar(0x307), ["dieresis"] = utfchar(0x308), ["diaeresis"] = utfchar(0x308), ["hook"] = utfchar(0x309), ["ring"] = utfchar(0x30A), ["double acute"] = utfchar(0x30B), ["hungarumlaut"] = utfchar(0x30B), -- tex speak ["caron"] = utfchar(0x30C), ["vertical line"] = utfchar(0x30D), ["double vertical line"] = utfchar(0x30E), ["double grave"] = utfchar(0x30F), ["inverted breve"] = utfchar(0x311), ["dot below"] = utfchar(0x323), ["ring below"] = utfchar(0x325), ["cedilla"] = utfchar(0x327), ["comma below"] = utfchar(0x327), ["ogonek"] = utfchar(0x328), ["caron below"] = utfchar(0x32C), ["circumflex below"] = utfchar(0x32D), ["tilde below"] = utfchar(0x330), ["macron below"] = utfchar(0x331), ["line below"] = utfchar(0x331), ["hook below"] = utfchar(0x1FA9D), } characters.verbosemarks = verbosemarks if storage then storage.register("characters/verbosemarks", verbosemarks, "characters.verbosemarks") end end local function prepare1() pattern1 = Cs( ( P("\\")/"" * (utfchartabletopattern(commandmapping) / commandmapping) * (P(" ")/"") + utfchartabletopattern(verbosemarks) / verbosemarks + lpegpatterns.space/"" + lpegpatterns.utf8character )^0 ) return pattern1 end local function prepare2() local back = { ["ı"] = "i", ["ȷ"] = "j", } pattern2 = Cs( ( utfchartabletopattern(back) / back + lpegpatterns.utf8character )^0 ) return pattern2 end local hash = table.setmetatableindex(function(t,k) local f = k k = lpegmatch(pattern1 or prepare1(),k) or k k = lpegmatch(pattern2 or prepare2(),k) or k local v = collapse(k) or k -- char specials if k ~= v then goto DONE end v = combine(k) or k -- with specials if k ~= v then goto DONE end v = commandmapping[k] or k if k ~= v then f = "\\" .. f goto DONE end v = textoutf(k) or k if k ~= v then f = "\\" .. f goto DONE end ::DONE:: report_defining("instead of old school '%s' you can input the utf sequence %s",f,v) t[k] = v return v end) implement { name = "chr", -- arguments = "argument", -- not here arguments = "string", public = true, actions = function(str) local hsh = hash[str] context(hsh) -- expandable end } function texcharacters.safechar(n) -- was characters.safechar local c = data[n] if c and c.contextname then return "\\" .. c.contextname else return utfchar(n) end end if not context or not commands then -- used in e.g. mtx-bibtex return end -- all kind of initializations local tex = tex local texsetlccode = tex.setlccode local texsetsfcode = tex.setsfcode local texsetcatcode = tex.setcatcode local contextsprint = context.sprint local ctxcatcodes = catcodes.numbers.ctxcatcodes local texsetmacro = tokens.setters.macro local texsetchar = tokens.setters.char -- function texcharacters.defineaccents() -- local ctx_dodefineaccentcommand = context.dodefineaccent -- local ctx_dodefineaccent = context.dodefineaccent -- local ctx_dodefinecommand = context.dodefinecommand -- for accent, group in next, accentmapping do -- ctx_dodefineaccentcommand(accent) -- for character, mapping in next, group do -- ctx_dodefineaccent(accent,character,mapping) -- end -- end -- for command, mapping in next, commandmapping do -- ctx_dodefinecommand(command,mapping) -- end -- os.exit() -- end function texcharacters.defineaccents() local ctx_dodefinecombine = context.dodefinecombine local ctx_dodefinecommand = context.dodefinecommand for verbose, mark in next, verbosemarks do ctx_dodefinecombine((gsub(verbose," ","")),mark) end for command, mapping in next, commandmapping do ctx_dodefinecommand(command,mapping) end end implement { -- a waste of scanner but consistent name = "defineaccents", actions = texcharacters.defineaccents } -- Instead of using a TeX file to define the named glyphs, we use the table. After -- all, we have this information available anyway. local function to_number(s) local n = tonumber(s) if n then return n end return tonumber(match(s,'^"(.*)$'),16) or 0 end implement { name = "utfchar", actions = { to_number, utfchar, contextsprint }, arguments = "string" } implement { name = "safechar", actions = { to_number, texcharacters.safechar, contextsprint }, arguments = "string" } implement { name = "uchar", arguments = "2 integers", actions = function(h,l) context(utfchar(h*256+l)) end } tex.uprint = commands.utfchar -- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code) -- so at some point we might drop this -- The following get set at the TeX end: local forbidden = tohash { 0x000A0, -- zs nobreakspace 0x000AD, -- cf softhyphen -- 0x00600, -- cf arabicnumber -- 0x00601, -- cf arabicsanah -- 0x00602, -- cf arabicfootnotemarker -- 0x00603, -- cf arabicsafha -- 0x00604, -- cf arabicsamvat -- 0x00605, -- cf arabicnumberabove -- 0x0061C, -- cf arabiclettermark -- 0x006DD, -- cf arabicendofayah -- 0x008E2, -- cf arabicdisputedendofayah 0x02000, -- zs enquad 0x02001, -- zs emquad 0x02002, -- zs enspace \kern .5\emwidth 0x02003, -- zs emspace \hskip \emwidth 0x02004, -- zs threeperemspace 0x02005, -- zs fourperemspace 0x02006, -- zs sixperemspace 0x02007, -- zs figurespace 0x02008, -- zs punctuationspace 0x02009, -- zs breakablethinspace 0x0200A, -- zs hairspace 0x0200B, -- cf zerowidthspace 0x0200C, -- cf zwnj 0x0200D, -- cf zwj 0x0202F, -- zs narrownobreakspace 0x0205F, -- zs medspace \textormathspace +\medmuskip 2 -- 0x03000, -- zs ideographicspace -- 0x0FEFF, -- cf zerowidthnobreakspace \penalty \plustenthousand \kern \zeropoint } local csletters = characters.csletters -- also a signal that we have initialized local activated = { } local sfmode = "unset" -- unset, traditional, normal local block_too = false directives.register("characters.blockstoo",function(v) block_too = v end) -- If this is something that is not documentwide and used a lot, then we -- need a more clever approach (trivial but not now). local function setuppersfcodes(v,n) if sfstate ~= "unset" then report_defining("setting uppercase sf codes to %a",n) for u, chr in next, data do if chr.category == "lu" then texsetsfcode(u,n) end end end sfstate = v end directives.register("characters.spaceafteruppercase",function(v) if v == "traditional" then setuppersfcodes(v,999) elseif v == "normal" then setuppersfcodes(v,1000) end end) if not csletters then csletters = allocate() characters.csletters = csletters report_defining("setting up character related codes and commands") if sfstate == "unset" then sfstate = "traditional" end local traditional = sfstate == "traditional" for u, chr in next, data do -- will move up local contextname = chr.contextname local category = chr.category local isletter = is_letter[category] if contextname then if is_character[category] then if chr.unicodeslot < 128 then if isletter then local c = utfchar(u) csletters[c] = u end else local c = utfchar(u) if isletter and u >= 32 and u <= 65536 then csletters[c] = u end end if isletter then local lc = chr.lccode local uc = chr.uccode if not lc then chr.lccode = u lc = u elseif type(lc) == "table" then lc = u end if not uc then chr.uccode = u uc = u elseif type(uc) == "table" then uc = u end texsetlccode(u,lc,uc) if traditional and category == "lu" then texsetsfcode(code,999) end end elseif is_command[category] and not forbidden[u] then -- skip elseif is_mark[category] then texsetlccode(u,u,u) -- for hyphenation end elseif isletter then csletters[utfchar(u)] = u local lc, uc = chr.lccode, chr.uccode if not lc then chr.lccode = u lc = u elseif type(lc) == "table" then lc = u end if not uc then chr.uccode = u uc = u elseif type(uc) == "table" then uc = u end texsetlccode(u,lc,uc) if traditional and category == "lu" then texsetsfcode(code,999) end elseif is_mark[category] then texsetlccode(u,u,u) -- for hyphenation end end if blocks_too then -- this slows down format generation by over 10 percent for k, v in next, blocks do if v.catcode == "letter" then local first = v.first local last = v.last local gaps = v.gaps if first and last then for u=first,last do csletters[utfchar(u)] = u -- -- texsetlccode(u,u,u) -- self self -- end end if gaps then for i=1,#gaps do local u = gaps[i] csletters[utfchar(u)] = u -- -- texsetlccode(u,u,u) -- self self -- end end end end end if storage then storage.register("characters/csletters", csletters, "characters.csletters") end -- These can go now: -- -- .acute .apostrophe .bar .breve .breveacute .brevedotbelow .brevegrave .brevehook -- .brevetilde .caron .cedilla .circumflex .circumflexacute .circumflexdotbelow -- .circumflexgrave .circumflexhook .circumflextilde .commaaccent .curl .diaeresis -- .diaeresisacute .diaeresiscaron .diaeresisgrave .diaeresismacron .dotaccent -- .dotaccentmacron .dotbelow .dotmiddle .doublegrave .grave .horn .hornacute -- .horndotbelow .horngrave .hornhook .horntilde .hungarumlaut .invertedbreve -- .macron .ogonek .ogonekmacron .ring .ringacute .sharp .stroke .strokeacute .tail -- .tilde .tildemacron local pattern = P("greek") + P("arabic") + P("hebrew") + P("cyrillic") + P("roman") -- Only keep these: -- -- copyright dotlessI dotlessi dotlessJ dotlessj emdash endash exclamdown figuredash -- guilsingleleft guilsingleright periodcentered perthousand questiondown quotedbl -- quotedblbase quotedblleft quotedblright quoteleft quoteright quotesingle -- quotesinglebase registered rightguillemot sectionmark textacute textampersand -- textAngstrom textasciicircum textasciitilde textat textbackslash textbar -- textbottomcomma textbottomdot textbraceleft textbraceright textbreve -- textbrokenbar textbullet textcaron textcedilla textcelsius textcent textcircledP -- textcircumflex textcomma textcontrolspace textcurrency textdag textddag -- textdegree textdiaeresis textdiv textdollar textdong textdotaccent textellipsis -- texteuro textfraction textgrave texthash texthorizontalbar texthungarumlaut -- texthyphen textkelvin textlognot textmacron textmho textminus textmp textmu -- textmultiply textnumero textogonek textohm textounce textpercent textperiod -- textpm textring textslash textsterling texttilde textunderscore textyen local function setname(category,chr,u,contextname) if lpegmatch(pattern,contextname) then -- print("ignored",contextname) elseif is_character[category] then if chr.unicodeslot < 128 then if is_letter[category] then texsetmacro(contextname,utfchar(u),"immutable") else texsetchar(contextname,u,"immutable") end else texsetmacro(contextname,utfchar(u),"immutable") end elseif is_command[category] and not forbidden[u] then texsetmacro(contextname,utfchar(u),"immutable") end end function characters.setcharacternames(ctt) for u, chr in next, data do -- will move up local contextname = chr.contextname local contextspec = chr.contextspec local category = chr.category if contextname then setname(category,chr,u,contextname) end if contextspec then for i=1,#contextspec do local extraname = contextspec[i] if extraname ~= contextname then setname(category,chr,u,extraname) end end end end end else mark(csletters) end lpegpatterns.csletter = utfchartabletopattern(csletters) -- The engine presets the letters to 11 (always). function characters.setlettercatcodes(cct) if trace_defining then report_defining("assigning letter catcodes to catcode table %a",cct) end local saved = tex.catcodetable tex.catcodetable = cct texsetcatcode(0x200C,11) -- non-joiner texsetcatcode(0x200D,11) -- joiner for c, u in next, csletters do texsetcatcode(u,11) end tex.catcodetable = saved end function characters.setothercatcodes(cct) if trace_defining then report_defining("assigning other catcodes to catcode table %a",cct) end local saved = tex.catcodetable tex.catcodetable = cct for u=65,90 do texsetcatcode(u,12) end for u=97,122 do texsetcatcode(u,12) end tex.catcodetable = saved end function characters.setactivecatcodes(cct) local saved = tex.catcodetable tex.catcodetable = cct for i=1,#activated do local u = activated[i] texsetcatcode(u,13) if trace_defining then report_defining("character %U (%s) is active in set %a",u,data[u].description,cct) end end tex.catcodetable = saved end implement { name = "chardescription", arguments = "integer", actions = function(slot) local d = data[slot] if d then context(d.description) end end, } if characters.setcharacternames then -- only in ini mode implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" } implement { name = "setothercatcodes", scope = "private", actions = characters.setothercatcodes, arguments = "integer" } implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" } implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" } end -- experiment (some can move to char-ini.lua) local function overload(c,u,code,codes) local c = tonumber(c) if not c then return end local u = utilities.parsers.settings_to_array(u) local n = #u if n == 0 then return end local t = nil if n == 1 then t = tonumber(u[1]) else t = { } for i=1,n do t[#t+1] = tonumber(u[i]) end end if t then data[c][code] = t characters[codes][c] = nil end end implement { name = "overloaduppercase", arguments = "2 strings", actions = function(c,u) overload(c,u,"uccode","uccodes") end } implement { name = "overloadlowercase", arguments = "2 strings", actions = function(c,u) overload(c,u,"lccode","lccodes") end } -- Just for fun we support keywords: -- -- \startTEXpage[offset=10pt] -- abg" -- \sl \showboxes -- \accent `" h% -- \accent `" x% -- \accent yoffset .2ex `" x -- \accent yoffset 1.1ex `x x% -- \stopTEXpage -- -- We could do this: -- -- \startTEXpage[offset=10pt] -- abg" -- \sl \showboxes -- \withaccent `" h% -- \withaccent `" x% -- \withaccent yoffset .2ex `" x -- \withaccent yoffset 1.1ex accent `x base `x% -- \stopTEXpage -- -- But only when users demand it: -- -- do -- -- local new_glyph = nodes.pool.glyph -- -- local scankeyword = tokens.scanners.keyword -- local scaninteger = tokens.scanners.integer -- local scandimension = tokens.scanners.dimension -- local scantoken = tokens.scanners.token -- -- implement { -- name = "withaccent", -- public = true, -- protected = true, -- actions = function() -- local xoffset = 0 -- local yoffset = 0 -- local accent = false -- local base = false -- local zwj = 0x200D -- while true do -- if scankeyword("xoffset") then -- xoffset = scandimension() -- elseif scankeyword("yoffset") then -- yoffset = scandimension() -- elseif scankeyword("accent") then -- accent = scaninteger() -- elseif scankeyword("base") then -- base = scaninteger() -- else -- break -- end -- end -- if not accent then -- accent = scaninteger() -- end -- if not base then -- local nxttok = scantoken() -- base = nxttok.cmdname == "char_number" and scaninteger() or nxttok.index -- end -- if base and accent and base > 0 and accent > 0 then -- base = new_glyph(true,base) -- zwj = new_glyph(true,zwj) -- accent = new_glyph(true,accent) -- local slant = fonts.hashes.parameters[true].slant / 65536 -- a la tex -- local xheight = fonts.hashes.parameters[true].xheight -- hm, compensated for glyphscale? -- accent.xoffset = xoffset - .5*(base.width -accent.width) + .5*(base.height-accent.height) * slant -- accent.yoffset = yoffset - (xheight - accent.height) -- accent.left = accent.width -- accent.options = accent.options | 0x40 | 0x80 -- context.dontleavehmode() -- context(base) -- context(zwj) -- context(accent) -- end -- end, -- } -- -- end