1if not modules then modules = { } end modules ['char-tex'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg = lpeg
10local tonumber, next, type = tonumber, next, type
11local format, find, gmatch, match = string.format, string.find, string.gmatch, string.match
12local utfchar, utfbyte = utf.char, utf.byte
13local concat, tohash = table.concat, table.tohash
14local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
15
16local lpegpatterns = lpeg.patterns
17local lpegmatch = lpeg.match
18local utfchartabletopattern = lpeg.utfchartabletopattern
19
20local allocate = utilities.storage.allocate
21local mark = utilities.storage.mark
22
23local context = context
24local commands = commands
25
26local characters = characters
27local texcharacters = { }
28characters.tex = texcharacters
29local utffilters = characters.filters.utf
30
31local is_character = characters.is_character
32local is_letter = characters.is_letter
33local is_command = characters.is_command
34local is_spacing = characters.is_spacing
35local is_mark = characters.is_mark
36local is_punctuation = characters.is_punctuation
37
38local data = characters.data if not data then return end
39local blocks = characters.blocks
40
41local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
42
43local report_defining = logs.reporter("characters")
44
45
46
47
48
49
50
51
52
53
54local low = allocate()
55local high = allocate()
56local escapes = allocate()
57local special = "~#$%^&_{}\\|"
58
59local private = {
60 low = low,
61 high = high,
62 escapes = escapes,
63}
64
65utffilters.private = private
66
67for ch in gmatch(special,".") do
68 local cb
69 if type(ch) == "number" then
70 cb, ch = ch, utfchar(ch)
71 else
72 cb = utfbyte(ch)
73 end
74 if cb < 256 then
75 escapes[ch] = "\\" .. ch
76 low[ch] = utfchar(0x0F0000 + cb)
77 if ch == "%" then
78 ch = "%%"
79 end
80 high[utfchar(0x0F0000 + cb)] = ch
81 end
82end
83
84local tohigh = lpeg.replacer(low)
85local tolow = lpeg.replacer(high)
86
87lpegpatterns.utftohigh = tohigh
88lpegpatterns.utftolow = tolow
89
90function utffilters.harden(str)
91 return lpegmatch(tohigh,str)
92end
93
94function utffilters.soften(str)
95 return lpegmatch(tolow,str)
96end
97
98private.escape = utf.remapper(escapes)
99private.replace = utf.remapper(low)
100private.revert = utf.remapper(high)
101
102local accentmapping = allocate {
103 ['"'] = { [""] = "¨",
104 A = "Ä", a = "ä",
105 E = "Ë", e = "ë",
106 I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
107 O = "Ö", o = "ö",
108 U = "Ü", u = "ü",
109 Y = "Ÿ", y = "ÿ",
110 },
111 ["'"] = { [""] = "´",
112 A = "Á", a = "á",
113 C = "Ć", c = "ć",
114 E = "É", e = "é",
115 I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
116 L = "Ĺ", l = "ĺ",
117 N = "Ń", n = "ń",
118 O = "Ó", o = "ó",
119 R = "Ŕ", r = "ŕ",
120 S = "Ś", s = "ś",
121 U = "Ú", u = "ú",
122 Y = "Ý", y = "ý",
123 Z = "Ź", z = "ź",
124 },
125 ["."] = { [""] = "˙",
126 C = "Ċ", c = "ċ",
127 E = "Ė", e = "ė",
128 G = "Ġ", g = "ġ",
129 I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
130 Z = "Ż", z = "ż",
131 },
132 ["="] = { [""] = "¯",
133 A = "Ā", a = "ā",
134 E = "Ē", e = "ē",
135 I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
136 O = "Ō", o = "ō",
137 U = "Ū", u = "ū",
138 },
139 ["H"] = { [""] = "˝",
140 O = "Ő", o = "ő",
141 U = "Ű", u = "ű",
142 },
143 ["^"] = { [""] = "ˆ",
144 A = "Â", a = "â",
145 C = "Ĉ", c = "ĉ",
146 E = "Ê", e = "ê",
147 G = "Ĝ", g = "ĝ",
148 H = "Ĥ", h = "ĥ",
149 I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
150 J = "Ĵ", j = "ĵ",
151 O = "Ô", o = "ô",
152 S = "Ŝ", s = "ŝ",
153 U = "Û", u = "û",
154 W = "Ŵ", w = "ŵ",
155 Y = "Ŷ", y = "ŷ",
156 },
157 ["`"] = { [""] = "`",
158 A = "À", a = "à",
159 E = "È", e = "è",
160 I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
161 O = "Ò", o = "ò",
162 U = "Ù", u = "ù",
163 Y = "Ỳ", y = "ỳ",
164 },
165 ["c"] = { [""] = "¸",
166 C = "Ç", c = "ç",
167 K = "Ķ", k = "ķ",
168 L = "Ļ", l = "ļ",
169 N = "Ņ", n = "ņ",
170 R = "Ŗ", r = "ŗ",
171 S = "Ş", s = "ş",
172 T = "Ţ", t = "ţ",
173 },
174 ["k"] = { [""] = "˛",
175 A = "Ą", a = "ą",
176 E = "Ę", e = "ę",
177 I = "Į", i = "į",
178 U = "Ų", u = "ų",
179 },
180 ["r"] = { [""] = "˚",
181 A = "Å", a = "å",
182 U = "Ů", u = "ů",
183 },
184 ["u"] = { [""] = "˘",
185 A = "Ă", a = "ă",
186 E = "Ĕ", e = "ĕ",
187 G = "Ğ", g = "ğ",
188 I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
189 O = "Ŏ", o = "ŏ",
190 U = "Ŭ", u = "ŭ",
191 },
192 ["v"] = { [""] = "ˇ",
193 C = "Č", c = "č",
194 D = "Ď", d = "ď",
195 E = "Ě", e = "ě",
196 L = "Ľ", l = "ľ",
197 N = "Ň", n = "ň",
198 R = "Ř", r = "ř",
199 S = "Š", s = "š",
200 T = "Ť", t = "ť",
201 Z = "Ž", z = "ž",
202 },
203 ["~"] = { [""] = "˜",
204 A = "Ã", a = "ã",
205 I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
206 N = "Ñ", n = "ñ",
207 O = "Õ", o = "õ",
208 U = "Ũ", u = "ũ",
209 },
210}
211
212texcharacters.accentmapping = accentmapping
213
214local accent_map = allocate {
215 ['~'] = "̃" ,
216 ['"'] = "̈" ,
217 ["`"] = "̀" ,
218 ["'"] = "́" ,
219 ["^"] = "̂" ,
220
221
222
223
224
225
226
227
228
229
230
231
232}
233
234
235
236local function remap_accent(a,c,braced)
237 local m = accentmapping[a]
238 if m then
239 local n = m[c]
240 if n then
241 return n
242 end
243 end
244
245
246
247
248 if braced then
249 return "\\" .. a .. "{" .. c .. "}"
250 else
251 return "\\" .. a .. " " .. c
252 end
253end
254
255local commandmapping = allocate {
256 ["aa"] = "å", ["AA"] = "Å",
257 ["ae"] = "æ", ["AE"] = "Æ",
258 ["cc"] = "ç", ["CC"] = "Ç",
259 ["i"] = "ı", ["j"] = "ȷ",
260 ["ij"] = "ij", ["IJ"] = "IJ",
261 ["l"] = "ł", ["L"] = "Ł",
262 ["o"] = "ø", ["O"] = "Ø",
263 ["oe"] = "œ", ["OE"] = "Œ",
264 ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß",
265}
266
267texcharacters.commandmapping = commandmapping
268
269local ligaturemapping = allocate {
270 ["''"] = "”",
271 ["``"] = "“",
272 ["--"] = "–",
273 ["---"] = "—",
274}
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322local untex
323
324local function toutfpattern()
325 if not untex then
326 local hash = { }
327 for k, v in next, accentmapping do
328 for kk, vv in next, v do
329 if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
330 hash[ "\\"..k.." "..kk ] = vv
331 hash["{\\"..k.." "..kk.."}"] = vv
332 else
333 hash["\\" ..k ..kk ] = vv
334 hash["{\\"..k ..kk.."}"] = vv
335 end
336 hash["\\" ..k.."{"..kk.."}" ] = vv
337 hash["{\\"..k.."{"..kk.."}}"] = vv
338 end
339 end
340 for k, v in next, commandmapping do
341 hash["\\"..k.." "] = v
342 hash["{\\"..k.."}"] = v
343 hash["{\\"..k.." }"] = v
344 end
345 for k, v in next, ligaturemapping do
346 hash[k] = v
347 end
348 untex = utfchartabletopattern(hash) / hash
349 end
350 return untex
351end
352
353texcharacters.toutfpattern = toutfpattern
354
355local pattern = nil
356
357local function prepare()
358 pattern = Cs((toutfpattern() + P(1))^0)
359 return pattern
360end
361
362function texcharacters.toutf(str,strip)
363 if str == "" then
364 return str
365 elseif not find(str,"\\",1,true) then
366 return str
367
368 else
369 return lpegmatch(pattern or prepare(),str)
370 end
371end
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386function texcharacters.safechar(n)
387 local c = data[n]
388 if c and c.contextname then
389 return "\\" .. c.contextname
390 else
391 return utfchar(n)
392 end
393end
394
395if not context or not commands then
396
397 return
398end
399
400
401
402if not interfaces then return end
403
404local implement = interfaces.implement
405
406local tex = tex
407local texsetlccode = tex.setlccode
408local texsetsfcode = tex.setsfcode
409local texsetcatcode = tex.setcatcode
410
411local contextsprint = context.sprint
412local ctxcatcodes = catcodes.numbers.ctxcatcodes
413
414local texsetmacro = tokens.setters.macro
415local texsetchar = tokens.setters.char
416
417function texcharacters.defineaccents()
418 local ctx_dodefineaccentcommand = context.dodefineaccentcommand
419 local ctx_dodefineaccent = context.dodefineaccent
420 local ctx_dodefinecommand = context.dodefinecommand
421 for accent, group in next, accentmapping do
422 ctx_dodefineaccentcommand(accent)
423 for character, mapping in next, group do
424 ctx_dodefineaccent(accent,character,mapping)
425 end
426 end
427 for command, mapping in next, commandmapping do
428 ctx_dodefinecommand(command,mapping)
429 end
430end
431
432implement {
433 name = "defineaccents",
434 actions = texcharacters.defineaccents
435}
436
437
438
439
440function commands.makeactive(n,name)
441 contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
442
443end
444
445local function to_number(s)
446 local n = tonumber(s)
447 if n then
448 return n
449 end
450 return tonumber(match(s,'^"(.*)$'),16) or 0
451end
452
453implement {
454 name = "utfchar",
455 actions = { to_number, utfchar, contextsprint },
456 arguments = "string"
457}
458
459implement {
460 name = "safechar",
461 actions = { to_number, texcharacters.safechar, contextsprint },
462 arguments = "string"
463}
464
465implement {
466 name = "uchar",
467 arguments = { "integer", "integer" },
468 actions = function(h,l)
469 context(utfchar(h*256+l))
470 end
471}
472
473tex.uprint = commands.utfchar
474
475
476
477
478
479
480local forbidden = tohash {
481 0x000A0,
482 0x000AD,
483
484
485
486
487
488
489
490
491
492 0x02000,
493 0x02001,
494 0x02002,
495 0x02003,
496 0x02004,
497 0x02005,
498 0x02006,
499 0x02007,
500 0x02008,
501 0x02009,
502 0x0200A,
503 0x0200B,
504 0x0200C,
505 0x0200D,
506 0x0202F,
507 0x0205F,
508
509
510}
511
512local csletters = characters.csletters
513local activated = { }
514local sfmode = "unset"
515local block_too = false
516
517directives.register("characters.blockstoo",function(v) block_too = v end)
518
519
520
521
522local function setuppersfcodes(v,n)
523 if sfstate ~= "unset" then
524 report_defining("setting uppercase sf codes to %a",n)
525 for u, chr in next, data do
526 if chr.category == "lu" then
527 texsetsfcode(u,n)
528 end
529 end
530 end
531 sfstate = v
532end
533
534directives.register("characters.spaceafteruppercase",function(v)
535 if v == "traditional" then
536 setuppersfcodes(v,999)
537 elseif v == "normal" then
538 setuppersfcodes(v,1000)
539 end
540end)
541
542if not csletters then
543
544 csletters = allocate()
545 characters.csletters = csletters
546
547 report_defining("setting up character related codes and commands")
548
549 if sfstate == "unset" then
550 sfstate = "traditional"
551 end
552
553 local traditional = sfstate == "traditional"
554
555 for u, chr in next, data do
556 local contextname = chr.contextname
557 local category = chr.category
558 local isletter = is_letter[category]
559 if contextname then
560 if is_character[category] then
561 if chr.unicodeslot < 128 then
562 if isletter then
563 local c = utfchar(u)
564 csletters[c] = u
565 end
566 else
567 local c = utfchar(u)
568 if isletter and u >= 32 and u <= 65536 then
569 csletters[c] = u
570 end
571 end
572 if isletter then
573 local lc = chr.lccode
574 local uc = chr.uccode
575 if not lc then
576 chr.lccode = u
577 lc = u
578 elseif type(lc) == "table" then
579 lc = u
580 end
581 if not uc then
582 chr.uccode = u
583 uc = u
584 elseif type(uc) == "table" then
585 uc = u
586 end
587 texsetlccode(u,lc,uc)
588 if traditional and category == "lu" then
589 texsetsfcode(code,999)
590 end
591 end
592 elseif is_command[category] and not forbidden[u] then
593
594 elseif is_mark[category] then
595 texsetlccode(u,u,u)
596 end
597 elseif isletter then
598 csletters[utfchar(u)] = u
599 local lc, uc = chr.lccode, chr.uccode
600 if not lc then
601 chr.lccode = u
602 lc = u
603 elseif type(lc) == "table" then
604 lc = u
605 end
606 if not uc then
607 chr.uccode = u
608 uc = u
609 elseif type(uc) == "table" then
610 uc = u
611 end
612 texsetlccode(u,lc,uc)
613 if traditional and category == "lu" then
614 texsetsfcode(code,999)
615 end
616 elseif is_mark[category] then
617 texsetlccode(u,u,u)
618 end
619 end
620
621 if blocks_too then
622
623 for k, v in next, blocks do
624 if v.catcode == "letter" then
625 local first = v.first
626 local last = v.last
627 local gaps = v.gaps
628 if first and last then
629 for u=first,last do
630 csletters[utfchar(u)] = u
631
632
633
634 end
635 end
636 if gaps then
637 for i=1,#gaps do
638 local u = gaps[i]
639 csletters[utfchar(u)] = u
640
641
642
643 end
644 end
645 end
646 end
647 end
648
649 if storage then
650 storage.register("characters/csletters", csletters, "characters.csletters")
651 end
652
653 function characters.setcharacternames(ctt)
654 for u, chr in next, data do
655 local contextname = chr.contextname
656 local category = chr.category
657 local isletter = is_letter[category]
658 if contextname then
659 if is_character[category] then
660 if chr.unicodeslot < 128 then
661 if isletter then
662 texsetmacro(contextname,utfchar(u),"immutable")
663 else
664 texsetchar(contextname,u,"immutable")
665 end
666 else
667 texsetmacro(contextname,utfchar(u),"immutable")
668 end
669 elseif is_command[category] and not forbidden[u] then
670 texsetmacro(contextname,utfchar(u),"immutable")
671 end
672 end
673 end
674 end
675
676else
677 mark(csletters)
678end
679
680lpegpatterns.csletter = utfchartabletopattern(csletters)
681
682
683
684
685function characters.setlettercatcodes(cct)
686 if trace_defining then
687 report_defining("assigning letter catcodes to catcode table %a",cct)
688 end
689 local saved = tex.catcodetable
690 tex.catcodetable = cct
691 texsetcatcode(0x200C,11)
692 texsetcatcode(0x200D,11)
693 for c, u in next, csletters do
694 texsetcatcode(u,11)
695 end
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714 tex.catcodetable = saved
715end
716
717function characters.setactivecatcodes(cct)
718 local saved = tex.catcodetable
719 tex.catcodetable = cct
720 for i=1,#activated do
721 local u = activated[i]
722 texsetcatcode(u,13)
723 if trace_defining then
724 report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
725 end
726 end
727 tex.catcodetable = saved
728end
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773implement {
774 name = "chardescription",
775 arguments = "integer",
776 actions = function(slot)
777 local d = data[slot]
778 if d then
779 context(d.description)
780 end
781 end,
782}
783
784
785
786characters.activeoffset = 0x10000
787
788function commands.remapentity(chr,slot)
789 contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
790end
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810if characters.setcharacternames then
811
812 implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
813 implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
814 implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }
815
816end
817
818
819
820local function overload(c,u,code,codes)
821 local c = tonumber(c)
822 if not c then
823 return
824 end
825 local u = utilities.parsers.settings_to_array(u)
826 local n = #u
827 if n == 0 then
828 return
829 end
830 local t = nil
831 if n == 1 then
832 t = tonumber(u[1])
833 else
834 t = { }
835 for i=1,n do
836 t[#t+1] = tonumber(u[i])
837 end
838 end
839 if t then
840 data[c][code] = t
841 characters[codes][c] = nil
842 end
843end
844
845interfaces.implement {
846 name = "overloaduppercase",
847 arguments = "2 strings",
848 actions = function(c,u)
849 overload(c,u,"uccode","uccodes")
850 end
851}
852
853interfaces.implement {
854 name = "overloadlowercase",
855 arguments = "2 strings",
856 actions = function(c,u)
857 overload(c,u,"lccode","lccodes")
858 end
859}
860 |