if not modules then modules = { } end modules ['char-tex'] = {
    version   = 1.001,
    comment   = "companion to char-ini.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local lpeg = lpeg
local tonumber, next, type = tonumber, next, type
local format, find, gmatch, match, gsub = string.format, string.find, string.gmatch, string.match, string.gsub
local utfchar, utfbyte = utf.char, utf.byte
local concat, tohash = table.concat, table.tohash
local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc

local lpegpatterns          = lpeg.patterns
local lpegmatch             = lpeg.match
local utfchartabletopattern = lpeg.utfchartabletopattern

local allocate              = utilities.storage.allocate
local mark                  = utilities.storage.mark

local context               = context
local commands              = commands

if not characters then require("char-ini") require("char-utf")  end

local characters            = characters
local texcharacters         = { }
characters.tex              = texcharacters
local utffilters            = characters.filters.utf

local allocate              = utilities.storage.allocate or function() return { } end
local mark                  = utilities.storage.mark     or allocate

local is_character          = characters.is_character
local is_letter             = characters.is_letter
local is_command            = characters.is_command
local is_spacing            = characters.is_spacing
local is_mark               = characters.is_mark

local data                  = characters.data  if not data then return end
local blocks                = characters.blocks

local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end)

local report_defining       = logs.reporter("characters")

-- In order to deal with 8-bit output, we need to find a way to go from UTF to
-- 8-bit. This is handled in the 32 bit engine itself. This leaves us problems with
-- characters that are specific to TeX, like curly braces and dollars. We can remap
-- some chars that tex input files are sensitive for to a private area (while
-- writing to a utility file) and revert then to their original slot when we read in
-- such a file. Instead of reverting, we can (when we resolve characters to glyphs)
-- map them to their right glyph there. For this purpose we can use the private
-- planes 0x0F0000 and 0x100000.

local low     = allocate()
local high    = allocate()
local escapes = allocate()
local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"

local private = {
    low     = low,
    high    = high,
    escapes = escapes,
}

utffilters.private = private

for ch in gmatch(special,".") do
    local cb
    if type(ch) == "number" then
        cb, ch = ch, utfchar(ch)
    else
        cb = utfbyte(ch)
    end
    if cb < 256 then
        escapes[ch] = "\\" .. ch
        low[ch] = utfchar(0x0F0000 + cb)
        if ch == "%" then
            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
        end
        high[utfchar(0x0F0000 + cb)] = ch
    end
end

local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex
local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex

lpegpatterns.utftohigh = tohigh
lpegpatterns.utftolow  = tolow

function utffilters.harden(str)
    return lpegmatch(tohigh,str)
end

function utffilters.soften(str)
    return lpegmatch(tolow,str)
end

private.escape  = utf.remapper(escapes) -- maybe: ,"dynamic"
private.replace = utf.remapper(low)     -- maybe: ,"dynamic"
private.revert  = utf.remapper(high)    -- maybe: ,"dynamic"

local accentmapping = allocate {
    ['"'] = { [""] = "¨",
        A = "Ä", a = "ä",
        E = "Ë", e = "ë",
        I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
        O = "Ö", o = "ö",
        U = "Ü", u = "ü",
        Y = "Ÿ", y = "ÿ",
    },
    ["'"] = { [""] = "´",
        A = "Á", a = "á",
        C = "Ć", c = "ć",
        E = "É", e = "é",
        I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
        L = "Ĺ", l = "ĺ",
        N = "Ń", n = "ń",
        O = "Ó", o = "ó",
        R = "Ŕ", r = "ŕ",
        S = "Ś", s = "ś",
        U = "Ú", u = "ú",
        Y = "Ý", y = "ý",
        Z = "Ź", z = "ź",
    },
    ["."] = { [""] = "˙",
        C = "Ċ", c = "ċ",
        E = "Ė", e = "ė",
        G = "Ġ", g = "ġ",
        I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
        Z = "Ż", z = "ż",
    },
    ["="] = { [""] = "¯",
        A = "Ā", a = "ā",
        E = "Ē", e = "ē",
        I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
        O = "Ō", o = "ō",
        U = "Ū", u = "ū",
    },
    ["H"] = { [""] = "˝",
        O = "Ő", o = "ő",
        U = "Ű", u = "ű",
    },
    ["^"] = { [""] = "ˆ",
        A = "Â", a = "â",
        C = "Ĉ", c = "ĉ",
        E = "Ê", e = "ê",
        G = "Ĝ", g = "ĝ",
        H = "Ĥ", h = "ĥ",
        I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
        J = "Ĵ", j = "ĵ",
        O = "Ô", o = "ô",
        S = "Ŝ", s = "ŝ",
        U = "Û", u = "û",
        W = "Ŵ", w = "ŵ",
        Y = "Ŷ", y = "ŷ",
    },
    ["`"] = { [""] = "`",
        A = "À", a = "à",
        E = "È", e = "è",
        I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
        O = "Ò", o = "ò",
        U = "Ù", u = "ù",
        Y = "Ỳ", y = "ỳ",
    },
    ["c"] = { [""] = "¸",
        C = "Ç", c = "ç",
        K = "Ķ", k = "ķ",
        L = "Ļ", l = "ļ",
        N = "Ņ", n = "ņ",
        R = "Ŗ", r = "ŗ",
        S = "Ş", s = "ş",
        T = "Ţ", t = "ţ",
    },
    ["k"] = { [""] = "˛",
        A = "Ą", a = "ą",
        E = "Ę", e = "ę",
        I = "Į", i = "į",
        U = "Ų", u = "ų",
    },
    ["r"] = { [""] = "˚",
        A = "Å", a = "å",
        U = "Ů", u = "ů",
    },
    ["u"] = { [""] = "˘",
        A = "Ă", a = "ă",
        E = "Ĕ", e = "ĕ",
        G = "Ğ", g = "ğ",
        I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
        O = "Ŏ", o = "ŏ",
        U = "Ŭ", u = "ŭ",
        },
    ["v"] = { [""] = "ˇ",
        C = "Č", c = "č",
        D = "Ď", d = "ď",
        E = "Ě", e = "ě",
        L = "Ľ", l = "ľ",
        N = "Ň", n = "ň",
        R = "Ř", r = "ř",
        S = "Š", s = "š",
        T = "Ť", t = "ť",
        Z = "Ž", z = "ž",
        },
    ["~"] = { [""] = "˜",
        A = "Ã", a = "ã",
        I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
        N = "Ñ", n = "ñ",
        O = "Õ", o = "õ",
        U = "Ũ", u = "ũ",
    },
}

texcharacters.accentmapping = accentmapping

local accent_map = allocate { -- incomplete
   ['~'] = "̃" , --  ̃ Ẽ
   ['"'] = "̈" , --  ̈ Ë
   ["`"] = "̀" , --  ̀ È
   ["'"] = "́" , --  ́ É
   ["^"] = "̂" , --  ̂ Ê
    --  ̄ Ē
    --  ̆ Ĕ
    --  ̇ Ė
    --  ̉ Ẻ
    --  ̌ Ě
    --  ̏ Ȅ
    --  ̑ Ȇ
    --  ̣ Ẹ
    --  ̧ Ȩ
    --  ̨ Ę
    --  ̭ Ḙ
    --  ̰ Ḛ
}

-- local accents = concat(table.keys(accentmapping)) -- was _map

local function remap_accent(a,c,braced)
    local m = accentmapping[a]
    if m then
        local n = m[c]
        if n then
            return n
        end
    end
--     local m = accent_map[a]
--     if m then
--         return c .. m
--     elseif braced then -- or #c > 0
    if braced then -- or #c > 0
        return "\\" .. a .. "{" .. c .. "}"
    else
        return "\\" .. a .. " " .. c
    end
end

local commandmapping = allocate {
    ["aa"] = "å", ["AA"] = "Å",
    ["ae"] = "æ", ["AE"] = "Æ",
    ["cc"] = "ç", ["CC"] = "Ç",
    ["i"]  = "ı", ["j"]  = "ȷ",
    ["ij"] = "ĳ", ["IJ"] = "Ĳ",
    ["l"]  = "ł", ["L"]  = "Ł",
    ["o"]  = "ø", ["O"]  = "Ø",
    ["oe"] = "œ", ["OE"] = "Œ",
    ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß", -- uppercase: ẞ
}

texcharacters.commandmapping = commandmapping

-- local ligaturemapping = allocate {
--     ["''"]  = "”",
--     ["``"]  = "“",
--     ["--"]  = "–",
--     ["---"] = "—",
-- }

-- Older accent handling code can be found in char-def.lua but in the meantime
-- we moved on. First the one with commands:

local untex, pattern

local function toutfpattern()
    if not untex then
        local hash = { }
        for k, v in next, accentmapping do
            for kk, vv in next, v do
                if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
                    hash[ "\\"..k.." "..kk     ] = vv
                    hash["{\\"..k.." "..kk.."}"] = vv
                else
                    hash["\\" ..k     ..kk     ] = vv
                    hash["{\\"..k     ..kk.."}"] = vv
                end
                hash["\\" ..k.."{"..kk.."}" ] = vv
                hash["{\\"..k.."{"..kk.."}}"] = vv
            end
        end
        for k, v in next, commandmapping do
            hash["\\"..k.." "] = v
            hash["{\\"..k.."}"] = v
            hash["{\\"..k.." }"] = v
        end
     -- for k, v in next, ligaturemapping do
     --     hash[k] = v
     -- end
        untex = utfchartabletopattern(hash) / hash
    end
    return untex
end

local function prepare()
    pattern = Cs((toutfpattern() + P(1))^0)
    return pattern
end

local function textoutf(str,strip)
    if str == "" then
        return str
    elseif not find(str,"\\",1,true) then
        return str
 -- elseif strip then
    else
        return lpegmatch(pattern or prepare(),str)
    end
end

texcharacters.toutfpattern = toutfpattern
texcharacters.toutf        = textoutf

-- print(texcharacters.toutf([[\~{Z}]],true))
-- print(texcharacters.toutf([[\'\i]],true))
-- print(texcharacters.toutf([[\'{\i}]],true))
-- print(texcharacters.toutf([[\"{e}]],true))
-- print(texcharacters.toutf([[\" {e}]],true))
-- print(texcharacters.toutf([[{\"{e}}]],true))
-- print(texcharacters.toutf([[{\" {e}}]],true))
-- print(texcharacters.toutf([[{\l}]],true))
-- print(texcharacters.toutf([[{\l }]],true))
-- print(texcharacters.toutf([[\v{r}]],true))
-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))

-- Next the ones without backslash

local untex, pattern

local function toutfpattern()
    if not untex then
        local hash = { }
        for k, v in next, accentmapping do
            for kk, vv in next, v do
                hash[k..kk] = vv
            end
        end
        for k, v in next, commandmapping do
            hash[k] = v
        end
     -- for k, v in next, ligaturemapping do
     --     hash[k] = v
     -- end
        untex = utfchartabletopattern(hash) / hash
    end
    return untex
end

local function prepare()
    pattern = Cs((toutfpattern() + P(1))^0)
    return pattern
end

local function textoutf(str)
    return lpegmatch(pattern or prepare(),str)
end

texcharacters.strtoutfpattern = toutfpattern
texcharacters.strtextoutf     = textoutf

local collapse = utffilters.collapse
local combine  = utffilters.combine

if not interfaces then return end

local implement = interfaces.implement

local pattern1, pattern2

local verbosemarks = characters.verbosemarks

if verbosemarks then

    mark(verbosemarks)

else

    verbosemarks = allocate {
        ["stroke"]               = utfchar(0x02F), ["slash"]        = utfchar(0x02F),
        ["middle dot"]           = utfchar(0x0B7),

        ["grave"]                = utfchar(0x300),
        ["acute"]                = utfchar(0x301),
        ["circumflex"]           = utfchar(0x302),
        ["circumflex"]           = utfchar(0x302),
        ["tilde"]                = utfchar(0x303),
        ["macron"]               = utfchar(0x304), ["line"]         = utfchar(0x304),
        ["overline"]             = utfchar(0x305),
        ["breve"]                = utfchar(0x306),
        ["dot"]                  = utfchar(0x307),
        ["dieresis"]             = utfchar(0x308), ["diaeresis"]    = utfchar(0x308),
        ["hook"]                 = utfchar(0x309),
        ["ring"]                 = utfchar(0x30A),
        ["double acute"]         = utfchar(0x30B), ["hungarumlaut"] = utfchar(0x30B), -- tex speak
        ["caron"]                = utfchar(0x30C),
        ["vertical line"]        = utfchar(0x30D),
        ["double vertical line"] = utfchar(0x30E),
        ["double grave"]         = utfchar(0x30F),
        ["inverted breve"]       = utfchar(0x311),
        ["dot below"]            = utfchar(0x323),
        ["ring below"]           = utfchar(0x325),
        ["cedilla"]              = utfchar(0x327), ["comma below"]  = utfchar(0x327),
        ["ogonek"]               = utfchar(0x328),
        ["caron below"]          = utfchar(0x32C),
        ["circumflex below"]     = utfchar(0x32D),
        ["tilde below"]          = utfchar(0x330),
        ["macron below"]         = utfchar(0x331), ["line below"]   = utfchar(0x331),

        ["hook below"]           = utfchar(0x1FA9D),

    }

    characters.verbosemarks = verbosemarks

    if storage then
        storage.register("characters/verbosemarks", verbosemarks, "characters.verbosemarks")
    end

end

local function prepare1()
    pattern1 = Cs(
        (
P("\\")/"" * (utfchartabletopattern(commandmapping) / commandmapping) * (P(" ")/"")
+            utfchartabletopattern(verbosemarks) / verbosemarks
          + lpegpatterns.space/""
          + lpegpatterns.utf8character
        )^0
    )
    return pattern1
end

local function prepare2()
    local back = {
      ["ı"] = "i",
      ["ȷ"] = "j",
    }
    pattern2 = Cs(
        (
            utfchartabletopattern(back) / back
          + lpegpatterns.utf8character
        )^0
    )
    return pattern2
end

local hash = table.setmetatableindex(function(t,k)
    local f = k
    k = lpegmatch(pattern1 or prepare1(),k) or k
    k = lpegmatch(pattern2 or prepare2(),k) or k
    local v = collapse(k) or k -- char specials
    if k ~= v then
        goto DONE
    end
    v = combine(k) or k -- with specials
    if k ~= v then
        goto DONE
    end
    v = commandmapping[k] or k
    if k ~= v then
        f = "\\" .. f
        goto DONE
    end
    v = textoutf(k) or k
    if k ~= v then
        f = "\\" .. f
        goto DONE
    end
  ::DONE::
    report_defining("instead of old school '%s' you can input the utf sequence %s",f,v)
    t[k] = v
    return v
end)

implement {
    name      = "chr",
 -- arguments = "argument", -- not here
    arguments = "string",
    public    = true,
    actions   = function(str)
        local hsh = hash[str]
        context(hsh) -- expandable
    end
}

function texcharacters.safechar(n) -- was characters.safechar
    local c = data[n]
    if c and c.contextname then
        return "\\" .. c.contextname
    else
        return utfchar(n)
    end
end

if not context or not commands then
    -- used in e.g. mtx-bibtex
    return
end

-- all kind of initializations

local tex           = tex
local texsetlccode  = tex.setlccode
local texsetsfcode  = tex.setsfcode
local texsetcatcode = tex.setcatcode

local contextsprint = context.sprint
local ctxcatcodes   = catcodes.numbers.ctxcatcodes

local texsetmacro   = tokens.setters.macro
local texsetchar    = tokens.setters.char

-- function texcharacters.defineaccents()
--     local ctx_dodefineaccentcommand  = context.dodefineaccent
--     local ctx_dodefineaccent         = context.dodefineaccent
--     local ctx_dodefinecommand        = context.dodefinecommand
--     for accent, group in next, accentmapping do
--         ctx_dodefineaccentcommand(accent)
--         for character, mapping in next, group do
--             ctx_dodefineaccent(accent,character,mapping)
--         end
--     end
--     for command, mapping in next, commandmapping do
--         ctx_dodefinecommand(command,mapping)
--     end
--     os.exit()
-- end

function texcharacters.defineaccents()
    local ctx_dodefinecombine = context.dodefinecombine
    local ctx_dodefinecommand = context.dodefinecommand
    for verbose, mark in next, verbosemarks do
        ctx_dodefinecombine((gsub(verbose," ","")),mark)
    end
    for command, mapping in next, commandmapping do
        ctx_dodefinecommand(command,mapping)
    end
end

implement { -- a waste of scanner but consistent
    name    = "defineaccents",
    actions = texcharacters.defineaccents
}

-- Instead of using a TeX file to define the named glyphs, we use the table. After
-- all, we have this information available anyway.

local function to_number(s)
    local n = tonumber(s)
    if n then
        return n
    end
    return tonumber(match(s,'^"(.*)$'),16) or 0
end

implement {
    name      = "utfchar",
    actions   = { to_number, utfchar, contextsprint },
    arguments = "string"
}

implement {
    name      = "safechar",
    actions   = { to_number, texcharacters.safechar, contextsprint },
    arguments = "string"
}

implement {
    name      = "uchar",
    arguments = "2 integers",
    actions   = function(h,l)
        context(utfchar(h*256+l))
    end
}

tex.uprint = commands.utfchar

-- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code)
-- so at some point we might drop this

-- The following get set at the TeX end:

local forbidden = tohash {
    0x000A0, -- zs nobreakspace            <self>
    0x000AD, -- cf softhyphen              <self>
 -- 0x00600, -- cf arabicnumber            <self>
 -- 0x00601, -- cf arabicsanah             <self>
 -- 0x00602, -- cf arabicfootnotemarker    <self>
 -- 0x00603, -- cf arabicsafha             <self>
 -- 0x00604, -- cf arabicsamvat            <self>
 -- 0x00605, -- cf arabicnumberabove       <self>
 -- 0x0061C, -- cf arabiclettermark        <self>
 -- 0x006DD, -- cf arabicendofayah         <self>
 -- 0x008E2, -- cf arabicdisputedendofayah <self>
    0x02000, -- zs enquad                  <self>
    0x02001, -- zs emquad                  <self>
    0x02002, -- zs enspace                 \kern .5\emwidth
    0x02003, -- zs emspace                 \hskip \emwidth
    0x02004, -- zs threeperemspace         <self>
    0x02005, -- zs fourperemspace          <self>
    0x02006, -- zs sixperemspace           <self>
    0x02007, -- zs figurespace             <self>
    0x02008, -- zs punctuationspace        <self>
    0x02009, -- zs breakablethinspace      <self>
    0x0200A, -- zs hairspace               <self>
    0x0200B, -- cf zerowidthspace          <self>
    0x0200C, -- cf zwnj                    <self>
    0x0200D, -- cf zwj                     <self>
    0x0202F, -- zs narrownobreakspace      <self>
    0x0205F, -- zs medspace                \textormathspace +\medmuskip 2
 -- 0x03000, -- zs ideographicspace        <self>
 -- 0x0FEFF, -- cf zerowidthnobreakspace   \penalty \plustenthousand \kern \zeropoint
}

local csletters = characters.csletters -- also a signal that we have initialized
local activated = { }
local sfmode    = "unset" -- unset, traditional, normal
local block_too = false

directives.register("characters.blockstoo",function(v) block_too = v end)

-- If this is something that is not documentwide and used a lot, then we
-- need a more clever approach (trivial but not now).

local function setuppersfcodes(v,n)
    if sfstate ~= "unset" then
        report_defining("setting uppercase sf codes to %a",n)
        for u, chr in next, data do
            if chr.category == "lu" then
                texsetsfcode(u,n)
            end
        end
    end
    sfstate = v
end

directives.register("characters.spaceafteruppercase",function(v)
    if v == "traditional" then
        setuppersfcodes(v,999)
    elseif v == "normal" then
        setuppersfcodes(v,1000)
    end
end)

if not csletters then

    csletters            = allocate()
    characters.csletters = csletters

    report_defining("setting up character related codes and commands")

    if sfstate == "unset" then
        sfstate = "traditional"
    end

    local traditional = sfstate == "traditional"

    for u, chr in next, data do -- will move up
        local contextname = chr.contextname
        local category    = chr.category
        local isletter    = is_letter[category]
        if contextname then
            if is_character[category] then
                if chr.unicodeslot < 128 then
                    if isletter then
                        local c = utfchar(u)
                        csletters[c] = u
                    end
                else
                    local c = utfchar(u)
                    if isletter and u >= 32 and u <= 65536 then
                        csletters[c] = u
                    end
                end
                if isletter then
                    local lc = chr.lccode
                    local uc = chr.uccode
                    if not lc then
                        chr.lccode = u
                        lc = u
                    elseif type(lc) == "table" then
                        lc = u
                    end
                    if not uc then
                        chr.uccode = u
                        uc = u
                    elseif type(uc) == "table" then
                        uc = u
                    end
                    texsetlccode(u,lc,uc)
                    if traditional and category == "lu" then
                        texsetsfcode(code,999)
                    end
                end
            elseif is_command[category] and not forbidden[u] then
                -- skip
            elseif is_mark[category] then
                texsetlccode(u,u,u) -- for hyphenation
            end
        elseif isletter then
            csletters[utfchar(u)] = u
            local lc, uc = chr.lccode, chr.uccode
            if not lc then
                chr.lccode = u
                lc = u
            elseif type(lc) == "table" then
                lc = u
            end
            if not uc then
                chr.uccode = u
                uc = u
            elseif type(uc) == "table" then
                uc = u
            end
            texsetlccode(u,lc,uc)
            if traditional and category == "lu" then
                texsetsfcode(code,999)
            end
        elseif is_mark[category] then
            texsetlccode(u,u,u) -- for hyphenation
        end
    end

    if blocks_too then
        -- this slows down format generation by over 10 percent
        for k, v in next, blocks do
            if v.catcode == "letter" then
                local first = v.first
                local last  = v.last
                local gaps  = v.gaps
                if first and last then
                    for u=first,last do
                        csletters[utfchar(u)] = u
                        --
                     -- texsetlccode(u,u,u) -- self self
                        --
                    end
                end
                if gaps then
                    for i=1,#gaps do
                        local u = gaps[i]
                        csletters[utfchar(u)] = u
                        --
                     -- texsetlccode(u,u,u) -- self self
                        --
                    end
                end
            end
        end
    end

    if storage then
        storage.register("characters/csletters", csletters, "characters.csletters")
    end

 -- These can go now:
 --
 -- .acute .apostrophe .bar .breve .breveacute .brevedotbelow .brevegrave .brevehook
 -- .brevetilde .caron .cedilla .circumflex .circumflexacute .circumflexdotbelow
 -- .circumflexgrave .circumflexhook .circumflextilde .commaaccent .curl .diaeresis
 -- .diaeresisacute .diaeresiscaron .diaeresisgrave .diaeresismacron .dotaccent
 -- .dotaccentmacron .dotbelow .dotmiddle .doublegrave .grave .horn .hornacute
 -- .horndotbelow .horngrave .hornhook .horntilde .hungarumlaut .invertedbreve
 -- .macron .ogonek .ogonekmacron .ring .ringacute .sharp .stroke .strokeacute .tail
 -- .tilde .tildemacron

    local pattern = P("greek") + P("arabic") + P("hebrew") + P("cyrillic") + P("roman")

 -- Only keep these:
 --
 -- copyright dotlessI dotlessi dotlessJ dotlessj emdash endash exclamdown figuredash
 -- guilsingleleft guilsingleright periodcentered perthousand questiondown quotedbl
 -- quotedblbase quotedblleft quotedblright quoteleft quoteright quotesingle
 -- quotesinglebase registered rightguillemot sectionmark textacute textampersand
 -- textAngstrom textasciicircum textasciitilde textat textbackslash textbar
 -- textbottomcomma textbottomdot textbraceleft textbraceright textbreve
 -- textbrokenbar textbullet textcaron textcedilla textcelsius textcent textcircledP
 -- textcircumflex textcomma textcontrolspace textcurrency textdag textddag
 -- textdegree textdiaeresis textdiv textdollar textdong textdotaccent textellipsis
 -- texteuro textfraction textgrave texthash texthorizontalbar texthungarumlaut
 -- texthyphen textkelvin textlognot textmacron textmho textminus textmp textmu
 -- textmultiply textnumero textogonek textohm textounce textpercent textperiod
 -- textpm textring textslash textsterling texttilde textunderscore textyen

    local function setname(category,chr,u,contextname)
        if lpegmatch(pattern,contextname) then
         -- print("ignored",contextname)
        elseif is_character[category] then
            if chr.unicodeslot < 128 then
                if is_letter[category] then
                    texsetmacro(contextname,utfchar(u),"immutable")
                else
                    texsetchar(contextname,u,"immutable")
                end
            else
                texsetmacro(contextname,utfchar(u),"immutable")
            end
        elseif is_command[category] and not forbidden[u] then
            texsetmacro(contextname,utfchar(u),"immutable")
        end
    end

    function characters.setcharacternames(ctt)
        for u, chr in next, data do -- will move up
            local contextname = chr.contextname
            local contextspec = chr.contextspec
            local category    = chr.category
            if contextname then
                setname(category,chr,u,contextname)
            end
            if contextspec then
                for i=1,#contextspec do
                    local extraname = contextspec[i]
                    if extraname ~= contextname then
                        setname(category,chr,u,extraname)
                    end
                end
            end
        end
    end

else
    mark(csletters)
end

lpegpatterns.csletter = utfchartabletopattern(csletters)

-- The engine presets the letters to 11 (always).

function characters.setlettercatcodes(cct)
    if trace_defining then
        report_defining("assigning letter catcodes to catcode table %a",cct)
    end
    local saved = tex.catcodetable
    tex.catcodetable = cct
    texsetcatcode(0x200C,11) -- non-joiner
    texsetcatcode(0x200D,11) -- joiner
    for c, u in next, csletters do
        texsetcatcode(u,11)
    end
    tex.catcodetable = saved
end

function characters.setothercatcodes(cct)
    if trace_defining then
        report_defining("assigning other catcodes to catcode table %a",cct)
    end
    local saved = tex.catcodetable
    tex.catcodetable = cct
    for u=65,90 do
        texsetcatcode(u,12)
    end
    for u=97,122 do
        texsetcatcode(u,12)
    end
    tex.catcodetable = saved
end

function characters.setactivecatcodes(cct)
    local saved = tex.catcodetable
    tex.catcodetable = cct
    for i=1,#activated do
        local u = activated[i]
        texsetcatcode(u,13)
        if trace_defining then
            report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
        end
    end
    tex.catcodetable = saved
end

implement {
    name      = "chardescription",
    arguments = "integer",
    actions   = function(slot)
        local d = data[slot]
        if d then
            context(d.description)
        end
    end,
}

if characters.setcharacternames then -- only in ini mode

    implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
    implement { name = "setothercatcodes",  scope = "private", actions = characters.setothercatcodes,  arguments = "integer" }
    implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
    implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }

end

-- experiment (some can move to char-ini.lua)

local function overload(c,u,code,codes)
    local c = tonumber(c)
    if not c then
        return
    end
    local u = utilities.parsers.settings_to_array(u)
    local n = #u
    if n == 0 then
        return
    end
    local t = nil
    if n == 1 then
        t = tonumber(u[1])
    else
        t = { }
        for i=1,n do
            t[#t+1] = tonumber(u[i])
        end
    end
    if t then
        data[c][code] = t
        characters[codes][c] = nil
    end
end

implement {
    name      = "overloaduppercase",
    arguments = "2 strings",
    actions   = function(c,u)
        overload(c,u,"uccode","uccodes")
    end
}

implement {
    name      = "overloadlowercase",
    arguments = "2 strings",
    actions   = function(c,u)
        overload(c,u,"lccode","lccodes")
    end
}

-- Just for fun we support keywords:
--
-- \startTEXpage[offset=10pt]
--     abg"
--     \sl \showboxes
--     \accent               `" h%
--     \accent               `" x%
--     \accent yoffset  .2ex `" x
--     \accent yoffset 1.1ex `x x%
-- \stopTEXpage
--
-- We could do this:
--
-- \startTEXpage[offset=10pt]
--     abg"
--     \sl \showboxes
--     \withaccent               `" h%
--     \withaccent               `" x%
--     \withaccent yoffset  .2ex `" x
--     \withaccent yoffset 1.1ex accent `x base `x%
-- \stopTEXpage
--
-- But only when users demand it:
--
-- do
--
--     local new_glyph = nodes.pool.glyph
--
--     local scankeyword   = tokens.scanners.keyword
--     local scaninteger   = tokens.scanners.integer
--     local scandimension = tokens.scanners.dimension
--     local scantoken     = tokens.scanners.token
--
--     implement {
--         name      = "withaccent",
--         public    = true,
--         protected = true,
--         actions   = function()
--             local xoffset = 0
--             local yoffset = 0
--             local accent  = false
--             local base    = false
--             local zwj     = 0x200D
--             while true do
--                 if scankeyword("xoffset") then
--                     xoffset = scandimension()
--                 elseif scankeyword("yoffset") then
--                     yoffset = scandimension()
--                 elseif scankeyword("accent") then
--                     accent = scaninteger()
--                 elseif scankeyword("base") then
--                     base = scaninteger()
--                 else
--                     break
--                 end
--             end
--             if not accent then
--                 accent = scaninteger()
--             end
--             if not base then
--                 local nxttok = scantoken()
--                 base = nxttok.cmdname == "char_number" and scaninteger() or nxttok.index
--             end
--             if base and accent and base > 0 and accent > 0 then
--                 base   = new_glyph(true,base)
--                 zwj    = new_glyph(true,zwj)
--                 accent = new_glyph(true,accent)
--                 local slant   = fonts.hashes.parameters[true].slant / 65536 -- a la tex
--                 local xheight = fonts.hashes.parameters[true].xheight -- hm, compensated for glyphscale?
--                 accent.xoffset = xoffset - .5*(base.width -accent.width) + .5*(base.height-accent.height) * slant
--                 accent.yoffset = yoffset - (xheight - accent.height)
--                 accent.left    = accent.width
--                 accent.options = accent.options | 0x40 | 0x80
--                 context.dontleavehmode()
--                 context(base)
--                 context(zwj)
--                 context(accent)
--             end
--         end,
--     }
--
-- end