char-tex.lua /size: 26 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['char-tex'] = {
2    version   = 1.001,
3    comment   = "companion to char-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local lpeg = lpeg
10local tonumber, next, type = tonumber, next, type
11local format, find, gmatch, match = string.format, string.find, string.gmatch, string.match
12local utfchar, utfbyte = utf.char, utf.byte
13local concat, tohash = table.concat, table.tohash
14local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
15
16local lpegpatterns          = lpeg.patterns
17local lpegmatch             = lpeg.match
18local utfchartabletopattern = lpeg.utfchartabletopattern
19
20local allocate              = utilities.storage.allocate
21local mark                  = utilities.storage.mark
22
23local context               = context
24local commands              = commands
25
26local characters            = characters
27local texcharacters         = { }
28characters.tex              = texcharacters
29local utffilters            = characters.filters.utf
30
31local is_character          = characters.is_character
32local is_letter             = characters.is_letter
33local is_command            = characters.is_command
34local is_spacing            = characters.is_spacing
35local is_mark               = characters.is_mark
36local is_punctuation        = characters.is_punctuation
37
38local data                  = characters.data  if not data then return end
39local blocks                = characters.blocks
40
41local trace_defining        = false  trackers.register("characters.defining", function(v) characters_defining = v end)
42
43local report_defining       = logs.reporter("characters")
44
45--[[ldx--
46<p>In order to deal with 8-bit output, we need to find a way to go from <l n='utf'/> to
478-bit. This is handled in the <l n='luatex'/> engine itself.</p>
48
49<p>This leaves us problems with characters that are specific to <l n='tex'/> like
50<type>{}</type>, <type>$</type> and alike. We can remap some chars that tex input files
51are sensitive for to a private area (while writing to a utility file) and revert then
52to their original slot when we read in such a file. Instead of reverting, we can (when
53we resolve characters to glyphs) map them to their right glyph there. For this purpose
54we can use the private planes 0x0F0000 and 0x100000.</p>
55--ldx]]--
56
57local low     = allocate()
58local high    = allocate()
59local escapes = allocate()
60local special = "~#$%^&_{}\\|" -- "~#$%{}\\|"
61
62local private = {
63    low     = low,
64    high    = high,
65    escapes = escapes,
66}
67
68utffilters.private = private
69
70for ch in gmatch(special,".") do
71    local cb
72    if type(ch) == "number" then
73        cb, ch = ch, utfchar(ch)
74    else
75        cb = utfbyte(ch)
76    end
77    if cb < 256 then
78        escapes[ch] = "\\" .. ch
79        low[ch] = utfchar(0x0F0000 + cb)
80        if ch == "%" then
81            ch = "%%" -- nasty, but we need this as in replacements (also in lpeg) % is interpreted
82        end
83        high[utfchar(0x0F0000 + cb)] = ch
84    end
85end
86
87local tohigh = lpeg.replacer(low)   -- frozen, only for basic tex
88local tolow  = lpeg.replacer(high)  -- frozen, only for basic tex
89
90lpegpatterns.utftohigh = tohigh
91lpegpatterns.utftolow  = tolow
92
93function utffilters.harden(str)
94    return lpegmatch(tohigh,str)
95end
96
97function utffilters.soften(str)
98    return lpegmatch(tolow,str)
99end
100
101private.escape  = utf.remapper(escapes) -- maybe: ,"dynamic"
102private.replace = utf.remapper(low)     -- maybe: ,"dynamic"
103private.revert  = utf.remapper(high)    -- maybe: ,"dynamic"
104
105--[[ldx--
106<p>We get a more efficient variant of this when we integrate
107replacements in collapser. This more or less renders the previous
108private code redundant. The following code is equivalent but the
109first snippet uses the relocated dollars.</p>
110
111<typing>
112[󰀤x󰀤] [$x$]
113</typing>
114--ldx]]--
115
116-- using the tree-lpeg-mapper would be nice but we also need to deal with end-of-string
117-- cases: "\"\i" and don't want "\relax" to be seen as \r e lax" (for which we need to mess
118-- with spaces
119
120local accentmapping = allocate {
121    ['"'] = { [""] = "¨",
122        A = "Ä", a = "ä",
123        E = "Ë", e = "ë",
124        I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
125        O = "Ö", o = "ö",
126        U = "Ü", u = "ü",
127        Y = "Ÿ", y = "ÿ",
128    },
129    ["'"] = { [""] = "´",
130        A = "Á", a = "á",
131        C = "Ć", c = "ć",
132        E = "É", e = "é",
133        I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
134        L = "Ĺ", l = "ĺ",
135        N = "Ń", n = "ń",
136        O = "Ó", o = "ó",
137        R = "Ŕ", r = "ŕ",
138        S = "Ś", s = "ś",
139        U = "Ú", u = "ú",
140        Y = "Ý", y = "ý",
141        Z = "Ź", z = "ź",
142    },
143    ["."] = { [""] = "˙",
144        C = "Ċ", c = "ċ",
145        E = "Ė", e = "ė",
146        G = "Ġ", g = "ġ",
147        I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
148        Z = "Ż", z = "ż",
149    },
150    ["="] = { [""] = "¯",
151        A = "Ā", a = "ā",
152        E = "Ē", e = "ē",
153        I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
154        O = "Ō", o = "ō",
155        U = "Ū", u = "ū",
156    },
157    ["H"] = { [""] = "˝",
158        O = "Ő", o = "ő",
159        U = "Ű", u = "ű",
160    },
161    ["^"] = { [""] = "ˆ",
162        A = "Â", a = "â",
163        C = "Ĉ", c = "ĉ",
164        E = "Ê", e = "ê",
165        G = "Ĝ", g = "ĝ",
166        H = "Ĥ", h = "ĥ",
167        I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
168        J = "Ĵ", j = "ĵ",
169        O = "Ô", o = "ô",
170        S = "Ŝ", s = "ŝ",
171        U = "Û", u = "û",
172        W = "Ŵ", w = "ŵ",
173        Y = "Ŷ", y = "ŷ",
174    },
175    ["`"] = { [""] = "`",
176        A = "À", a = "à",
177        E = "È", e = "è",
178        I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
179        O = "Ò", o = "ò",
180        U = "Ù", u = "ù",
181        Y = "", y = "",
182    },
183    ["c"] = { [""] = "¸",
184        C = "Ç", c = "ç",
185        K = "Ķ", k = "ķ",
186        L = "Ļ", l = "ļ",
187        N = "Ņ", n = "ņ",
188        R = "Ŗ", r = "ŗ",
189        S = "Ş", s = "ş",
190        T = "Ţ", t = "ţ",
191    },
192    ["k"] = { [""] = "˛",
193        A = "Ą", a = "ą",
194        E = "Ę", e = "ę",
195        I = "Į", i = "į",
196        U = "Ų", u = "ų",
197    },
198    ["r"] = { [""] = "˚",
199        A = "Å", a = "å",
200        U = "Ů", u = "ů",
201    },
202    ["u"] = { [""] = "˘",
203        A = "Ă", a = "ă",
204        E = "Ĕ", e = "ĕ",
205        G = "Ğ", g = "ğ",
206        I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
207        O = "Ŏ", o = "ŏ",
208        U = "Ŭ", u = "ŭ",
209        },
210    ["v"] = { [""] = "ˇ",
211        C = "Č", c = "č",
212        D = "Ď", d = "ď",
213        E = "Ě", e = "ě",
214        L = "Ľ", l = "ľ",
215        N = "Ň", n = "ň",
216        R = "Ř", r = "ř",
217        S = "Š", s = "š",
218        T = "Ť", t = "ť",
219        Z = "Ž", z = "ž",
220        },
221    ["~"] = { [""] = "˜",
222        A = "Ã", a = "ã",
223        I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
224        N = "Ñ", n = "ñ",
225        O = "Õ", o = "õ",
226        U = "Ũ", u = "ũ",
227    },
228}
229
230texcharacters.accentmapping = accentmapping
231
232local accent_map = allocate { -- incomplete
233   ['~'] = "̃" , --  ̃ Ẽ
234   ['"'] = "̈" , --  ̈ Ë
235   ["`"] = "̀" , --  ̀ È
236   ["'"] = "́" , --  ́ É
237   ["^"] = "̂" , --  ̂ Ê
238    --  ̄ Ē
239    --  ̆ Ĕ
240    --  ̇ Ė
241    --  ̉ Ẻ
242    --  ̌ Ě
243    --  ̏ Ȅ
244    --  ̑ Ȇ
245    --  ̣ Ẹ
246    --  ̧ Ȩ
247    --  ̨ Ę
248    --  ̭ Ḙ
249    --  ̰ Ḛ
250}
251
252-- local accents = concat(table.keys(accentmapping)) -- was _map
253
254local function remap_accent(a,c,braced)
255    local m = accentmapping[a]
256    if m then
257        local n = m[c]
258        if n then
259            return n
260        end
261    end
262--     local m = accent_map[a]
263--     if m then
264--         return c .. m
265--     elseif braced then -- or #c > 0
266    if braced then -- or #c > 0
267        return "\\" .. a .. "{" .. c .. "}"
268    else
269        return "\\" .. a .. " " .. c
270    end
271end
272
273local commandmapping = allocate {
274    ["aa"] = "å", ["AA"] = "",
275    ["ae"] = "æ", ["AE"] = "Æ",
276    ["cc"] = "ç", ["CC"] = "Ç",
277    ["i"]  = "ı", ["j"]  = "ȷ",
278    ["ij"] = "ij", ["IJ"] = "IJ",
279    ["l"]  = "ł", ["L"]  = "Ł",
280    ["o"]  = "ø", ["O"]  = "Ø",
281    ["oe"] = "œ", ["OE"] = "Œ",
282    ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß", -- uppercase: ẞ
283}
284
285texcharacters.commandmapping = commandmapping
286
287local ligaturemapping = allocate {
288    ["''"]  = "",
289    ["``"]  = "",
290    ["--"]  = "",
291    ["---"] = "",
292}
293
294-- local achar    = R("az","AZ") + P("ı") + P("\\i")
295--
296-- local spaces   = P(" ")^0
297-- local no_l     = P("{") / ""
298-- local no_r     = P("}") / ""
299-- local no_b     = P('\\') / ""
300--
301-- local lUr      = P("{") * C(achar) * P("}")
302--
303-- local accents_1 = [["'.=^`~]]
304-- local accents_2 = [[Hckruv]]
305--
306-- local accent   = P('\\') * (
307--     C(S(accents_1)) * (lUr * Cc(true) + C(achar) * Cc(false)) + -- we need achar for ı etc, could be sped up
308--     C(S(accents_2)) *  lUr * Cc(true)
309-- ) / remap_accent
310--
311-- local csname  = P('\\') * C(R("az","AZ")^1)
312--
313-- local command  = (
314--     csname +
315--     P("{") * csname * spaces * P("}")
316-- ) / commandmapping -- remap_commands
317--
318-- local both_1 = Cs { "run",
319--     accent  = accent,
320--     command = command,
321--     run     = (V("accent") + no_l * V("accent") * no_r + V("command") + P(1))^0,
322-- }
323--
324-- local both_2 = Cs { "run",
325--     accent  = accent,
326--     command = command,
327--     run     = (V("accent") + V("command") + no_l * ( V("accent") + V("command") ) * no_r + P(1))^0,
328-- }
329--
330-- function texcharacters.toutf(str,strip)
331--     if not find(str,"\\",1,true) then
332--         return str
333--     elseif strip then
334--         return lpegmatch(both_1,str)
335--     else
336--         return lpegmatch(both_2,str)
337--     end
338-- end
339
340local untex
341
342local function toutfpattern()
343    if not untex then
344        local hash = { }
345        for k, v in next, accentmapping do
346            for kk, vv in next, v do
347                if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
348                    hash[ "\\"..k.." "..kk     ] = vv
349                    hash["{\\"..k.." "..kk.."}"] = vv
350                else
351                    hash["\\" ..k     ..kk     ] = vv
352                    hash["{\\"..k     ..kk.."}"] = vv
353                end
354                hash["\\" ..k.."{"..kk.."}" ] = vv
355                hash["{\\"..k.."{"..kk.."}}"] = vv
356            end
357        end
358        for k, v in next, commandmapping do
359            hash["\\"..k.." "] = v
360            hash["{\\"..k.."}"] = v
361            hash["{\\"..k.." }"] = v
362        end
363        for k, v in next, ligaturemapping do
364            hash[k] = v
365        end
366        untex = utfchartabletopattern(hash) / hash
367    end
368    return untex
369end
370
371texcharacters.toutfpattern = toutfpattern
372
373local pattern = nil
374
375local function prepare()
376    pattern = Cs((toutfpattern() + P(1))^0)
377    return pattern
378end
379
380function texcharacters.toutf(str,strip)
381    if str == "" then
382        return str
383    elseif not find(str,"\\",1,true) then
384        return str
385 -- elseif strip then
386    else
387        return lpegmatch(pattern or prepare(),str)
388    end
389end
390
391-- print(texcharacters.toutf([[\~{Z}]],true))
392-- print(texcharacters.toutf([[\'\i]],true))
393-- print(texcharacters.toutf([[\'{\i}]],true))
394-- print(texcharacters.toutf([[\"{e}]],true))
395-- print(texcharacters.toutf([[\" {e}]],true))
396-- print(texcharacters.toutf([[{\"{e}}]],true))
397-- print(texcharacters.toutf([[{\" {e}}]],true))
398-- print(texcharacters.toutf([[{\l}]],true))
399-- print(texcharacters.toutf([[{\l }]],true))
400-- print(texcharacters.toutf([[\v{r}]],true))
401-- print(texcharacters.toutf([[fo{\"o}{\ss}ar]],true))
402-- print(texcharacters.toutf([[H{\'a}n Th\^e\llap{\raise 0.5ex\hbox{\'{\relax}}} Th{\'a}nh]],true))
403
404function texcharacters.safechar(n) -- was characters.safechar
405    local c = data[n]
406    if c and c.contextname then
407        return "\\" .. c.contextname
408    else
409        return utfchar(n)
410    end
411end
412
413if not context or not commands then
414    -- used in e.g. mtx-bibtex
415    return
416end
417
418-- all kind of initializations
419
420if not interfaces then return end
421
422local implement     = interfaces.implement
423
424local tex           = tex
425local texsetlccode  = tex.setlccode
426local texsetsfcode  = tex.setsfcode
427local texsetcatcode = tex.setcatcode
428
429local contextsprint = context.sprint
430local ctxcatcodes   = catcodes.numbers.ctxcatcodes
431
432local texsetmacro   = tokens.setters.macro
433local texsetchar    = tokens.setters.char
434
435function texcharacters.defineaccents()
436    local ctx_dodefineaccentcommand = context.dodefineaccentcommand
437    local ctx_dodefineaccent        = context.dodefineaccent
438    local ctx_dodefinecommand       = context.dodefinecommand
439    for accent, group in next, accentmapping do
440        ctx_dodefineaccentcommand(accent)
441        for character, mapping in next, group do
442            ctx_dodefineaccent(accent,character,mapping)
443        end
444    end
445    for command, mapping in next, commandmapping do
446        ctx_dodefinecommand(command,mapping)
447    end
448end
449
450implement { -- a waste of scanner but consistent
451    name    = "defineaccents",
452    actions = texcharacters.defineaccents
453}
454
455--[[ldx--
456<p>Instead of using a <l n='tex'/> file to define the named glyphs, we
457use the table. After all, we have this information available anyway.</p>
458--ldx]]--
459
460function commands.makeactive(n,name) -- not used
461    contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
462 -- context("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name)
463end
464
465local function to_number(s)
466    local n = tonumber(s)
467    if n then
468        return n
469    end
470    return tonumber(match(s,'^"(.*)$'),16) or 0
471end
472
473implement {
474    name      = "utfchar",
475    actions   = { to_number, utfchar, contextsprint },
476    arguments = "string"
477}
478
479implement {
480    name      = "safechar",
481    actions   = { to_number, texcharacters.safechar, contextsprint },
482    arguments = "string"
483}
484
485implement {
486    name      = "uchar",
487    arguments = { "integer", "integer" },
488    actions   = function(h,l)
489        context(utfchar(h*256+l))
490    end
491}
492
493tex.uprint = commands.utfchar
494
495-- in contect we don't use lc and uc codes (in fact in luatex we should have a hf code)
496-- so at some point we might drop this
497
498-- The following get set at the TeX end:
499
500local forbidden = tohash {
501    0x000A0, -- zs nobreakspace            <self>
502    0x000AD, -- cf softhyphen              <self>
503 -- 0x00600, -- cf arabicnumber            <self>
504 -- 0x00601, -- cf arabicsanah             <self>
505 -- 0x00602, -- cf arabicfootnotemarker    <self>
506 -- 0x00603, -- cf arabicsafha             <self>
507 -- 0x00604, -- cf arabicsamvat            <self>
508 -- 0x00605, -- cf arabicnumberabove       <self>
509 -- 0x0061C, -- cf arabiclettermark        <self>
510 -- 0x006DD, -- cf arabicendofayah         <self>
511 -- 0x008E2, -- cf arabicdisputedendofayah <self>
512    0x02000, -- zs enquad                  <self>
513    0x02001, -- zs emquad                  <self>
514    0x02002, -- zs enspace                 \kern .5\emwidth
515    0x02003, -- zs emspace                 \hskip \emwidth
516    0x02004, -- zs threeperemspace         <self>
517    0x02005, -- zs fourperemspace          <self>
518    0x02006, -- zs sixperemspace           <self>
519    0x02007, -- zs figurespace             <self>
520    0x02008, -- zs punctuationspace        <self>
521    0x02009, -- zs breakablethinspace      <self>
522    0x0200A, -- zs hairspace               <self>
523    0x0200B, -- cf zerowidthspace          <self>
524    0x0200C, -- cf zwnj                    <self>
525    0x0200D, -- cf zwj                     <self>
526    0x0202F, -- zs narrownobreakspace      <self>
527    0x0205F, -- zs medspace                \textormathspace +\medmuskip 2
528 -- 0x03000, -- zs ideographicspace        <self>
529 -- 0x0FEFF, -- cf zerowidthnobreakspace   \penalty \plustenthousand \kern \zeropoint
530}
531
532local csletters = characters.csletters -- also a signal that we have initialized
533local activated = { }
534local sfmode    = "unset" -- unset, traditional, normal
535local block_too = false
536
537directives.register("characters.blockstoo",function(v) block_too = v end)
538
539-- If this is something that is not documentwide and used a lot, then we
540-- need a more clever approach (trivial but not now).
541
542local function setuppersfcodes(v,n)
543    if sfstate ~= "unset" then
544        report_defining("setting uppercase sf codes to %a",n)
545        for u, chr in next, data do
546            if chr.category == "lu" then
547                texsetsfcode(u,n)
548            end
549        end
550    end
551    sfstate = v
552end
553
554directives.register("characters.spaceafteruppercase",function(v)
555    if v == "traditional" then
556        setuppersfcodes(v,999)
557    elseif v == "normal" then
558        setuppersfcodes(v,1000)
559    end
560end)
561
562if not csletters then
563
564    csletters            = allocate()
565    characters.csletters = csletters
566
567    report_defining("setting up character related codes and commands")
568
569    if sfstate == "unset" then
570        sfstate = "traditional"
571    end
572
573    local traditional = sfstate == "traditional"
574
575    for u, chr in next, data do -- will move up
576        local contextname = chr.contextname
577        local category    = chr.category
578        local isletter    = is_letter[category]
579        if contextname then
580            if is_character[category] then
581                if chr.unicodeslot < 128 then
582                    if isletter then
583                        local c = utfchar(u)
584                        csletters[c] = u
585                    end
586                else
587                    local c = utfchar(u)
588                    if isletter and u >= 32 and u <= 65536 then
589                        csletters[c] = u
590                    end
591                end
592                if isletter then
593                    local lc = chr.lccode
594                    local uc = chr.uccode
595                    if not lc then
596                        chr.lccode = u
597                        lc = u
598                    elseif type(lc) == "table" then
599                        lc = u
600                    end
601                    if not uc then
602                        chr.uccode = u
603                        uc = u
604                    elseif type(uc) == "table" then
605                        uc = u
606                    end
607                    texsetlccode(u,lc,uc)
608                    if traditional and category == "lu" then
609                        texsetsfcode(code,999)
610                    end
611                end
612            elseif is_command[category] and not forbidden[u] then
613                -- skip
614            elseif is_mark[category] then
615                texsetlccode(u,u,u) -- for hyphenation
616            end
617        elseif isletter then
618            csletters[utfchar(u)] = u
619            local lc, uc = chr.lccode, chr.uccode
620            if not lc then
621                chr.lccode = u
622                lc = u
623            elseif type(lc) == "table" then
624                lc = u
625            end
626            if not uc then
627                chr.uccode = u
628                uc = u
629            elseif type(uc) == "table" then
630                uc = u
631            end
632            texsetlccode(u,lc,uc)
633            if traditional and category == "lu" then
634                texsetsfcode(code,999)
635            end
636        elseif is_mark[category] then
637            texsetlccode(u,u,u) -- for hyphenation
638        end
639    end
640
641    if blocks_too then
642        -- this slows down format generation by over 10 percent
643        for k, v in next, blocks do
644            if v.catcode == "letter" then
645                local first = v.first
646                local last  = v.last
647                local gaps  = v.gaps
648                if first and last then
649                    for u=first,last do
650                        csletters[utfchar(u)] = u
651                        --
652                     -- texsetlccode(u,u,u) -- self self
653                        --
654                    end
655                end
656                if gaps then
657                    for i=1,#gaps do
658                        local u = gaps[i]
659                        csletters[utfchar(u)] = u
660                        --
661                     -- texsetlccode(u,u,u) -- self self
662                        --
663                    end
664                end
665            end
666        end
667    end
668
669    if storage then
670        storage.register("characters/csletters", csletters, "characters.csletters")
671    end
672
673    function characters.setcharacternames(ctt)
674        for u, chr in next, data do -- will move up
675            local contextname = chr.contextname
676            local category    = chr.category
677            local isletter    = is_letter[category]
678            if contextname then
679                if is_character[category] then
680                    if chr.unicodeslot < 128 then
681                        if isletter then
682                            texsetmacro(contextname,utfchar(u),"immutable")
683                        else
684                            texsetchar(contextname,u,"immutable")
685                        end
686                    else
687                        texsetmacro(contextname,utfchar(u),"immutable")
688                    end
689                elseif is_command[category] and not forbidden[u] then
690                    texsetmacro(contextname,utfchar(u),"immutable")
691                end
692            end
693        end
694    end
695
696else
697    mark(csletters)
698end
699
700lpegpatterns.csletter = utfchartabletopattern(csletters)
701
702-- todo: get rid of activated
703-- todo: move first loop out ,merge with above
704
705function characters.setlettercatcodes(cct)
706    if trace_defining then
707        report_defining("assigning letter catcodes to catcode table %a",cct)
708    end
709    local saved = tex.catcodetable
710    tex.catcodetable = cct
711    texsetcatcode(0x200C,11) -- non-joiner
712    texsetcatcode(0x200D,11) -- joiner
713    for c, u in next, csletters do
714        texsetcatcode(u,11)
715    end
716 -- for u, chr in next, data do
717 --     if not chr.fallback and is_letter[chr.category] and u >= 32 and u <= 65536 then
718 --         texsetcatcode(u,11)
719 --     end
720 --     local range = chr.range
721 --     if range then
722 --         for i=1,range.first,range.last do -- tricky as not all are letters
723 --             texsetcatcode(i,11)
724 --         end
725 --     end
726 -- end
727 -- for k, v in next, blocks do
728 --     if v.catcode == "letter" then
729 --         for u=v.first,v.last do
730 --             texsetcatcode(u,11)
731 --         end
732 --     end
733 -- end
734    tex.catcodetable = saved
735end
736
737function characters.setactivecatcodes(cct)
738    local saved = tex.catcodetable
739    tex.catcodetable = cct
740    for i=1,#activated do
741        local u = activated[i]
742        texsetcatcode(u,13)
743        if trace_defining then
744            report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
745        end
746    end
747    tex.catcodetable = saved
748end
749
750--[[ldx--
751<p>Setting the lccodes is also done in a loop over the data table.</p>
752--ldx]]--
753
754-- function characters.setcodes() -- we could loop over csletters
755--     if trace_defining then
756--         report_defining("defining lc and uc codes")
757--     end
758--     local traditional = sfstate == "traditional" or sfstate == "unset"
759--     for code, chr in next, data do
760--         local cc = chr.category
761--         if is_letter[cc] then
762--             local range = chr.range
763--             if range then
764--                 for i=range.first,range.last do
765--                     texsetlccode(i,i,i) -- self self
766--                 end
767--             else
768--                 local lc, uc = chr.lccode, chr.uccode
769--                 if not lc then
770--                     chr.lccode, lc = code, code
771--                 elseif type(lc) == "table" then
772--                     lc = code
773--                 end
774--                 if not uc then
775--                     chr.uccode, uc = code, code
776--                 elseif type(uc) == "table" then
777--                     uc = code
778--                 end
779--                 texsetlccode(code,lc,uc)
780--                 if traditional and cc == "lu" then
781--                     texsetsfcode(code,999)
782--                 end
783--             end
784--         elseif is_mark[cc] then
785--             texsetlccode(code,code,code) -- for hyphenation
786--         end
787--     end
788--     if traditional then
789--         sfstate = "traditional"
790--     end
791-- end
792
793-- tex
794
795implement {
796    name      = "chardescription",
797    arguments = "integer",
798    actions   = function(slot)
799        local d = data[slot]
800        if d then
801            context(d.description)
802        end
803    end,
804}
805
806-- xml
807
808characters.activeoffset = 0x10000 -- there will be remapped in that byte range
809
810function commands.remapentity(chr,slot) -- not used
811    contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
812end
813
814-- xml.entities = xml.entities or { }
815--
816-- storage.register("xml/entities",xml.entities,"xml.entities") -- this will move to lxml
817--
818-- function characters.setmkiventities()
819--     local entities = xml.entities
820--     entities.lt  = "<"
821--     entities.amp = "&"
822--     entities.gt  = ">"
823-- end
824--
825-- function characters.setmkiientities()
826--     local entities = xml.entities
827--     entities.lt  = utfchar(characters.activeoffset + utfbyte("<"))
828--     entities.amp = utfchar(characters.activeoffset + utfbyte("&"))
829--     entities.gt  = utfchar(characters.activeoffset + utfbyte(">"))
830-- end
831
832if characters.setcharacternames then -- only in ini mode
833
834    implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
835    implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
836    implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }
837
838end
839
840-- experiment (some can move to char-ini.lua)
841
842local function overload(c,u,code,codes)
843    local c = tonumber(c)
844    if not c then
845        return
846    end
847    local u = utilities.parsers.settings_to_array(u)
848    local n = #u
849    if n == 0 then
850        return
851    end
852    local t = nil
853    if n == 1 then
854        t = tonumber(u[1])
855    else
856        t = { }
857        for i=1,n do
858            t[#t+1] = tonumber(u[i])
859        end
860    end
861    if t then
862        data[c][code] = t
863        characters[codes][c] = nil
864    end
865end
866
867interfaces.implement {
868    name      = "overloaduppercase",
869    arguments = "2 strings",
870    actions   = function(c,u)
871        overload(c,u,"uccode","uccodes")
872    end
873}
874
875interfaces.implement {
876    name      = "overloadlowercase",
877    arguments = "2 strings",
878    actions   = function(c,u)
879        overload(c,u,"lccode","lccodes")
880    end
881}
882