1if not modules then modules = { } end modules ['char-tex'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg = lpeg
10local tonumber, next, type = tonumber, next, type
11local format, find, gmatch, match = string.format, string.find, string.gmatch, string.match
12local utfchar, utfbyte = utf.char, utf.byte
13local concat, tohash = table.concat, table.tohash
14local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
15
16local lpegpatterns = lpeg.patterns
17local lpegmatch = lpeg.match
18local utfchartabletopattern = lpeg.utfchartabletopattern
19
20local allocate = utilities.storage.allocate
21local mark = utilities.storage.mark
22
23local context = context
24local commands = commands
25
26local characters = characters
27local texcharacters = { }
28characters.tex = texcharacters
29local utffilters = characters.filters.utf
30
31local is_character = characters.is_character
32local is_letter = characters.is_letter
33local is_command = characters.is_command
34local is_spacing = characters.is_spacing
35local is_mark = characters.is_mark
36local is_punctuation = characters.is_punctuation
37
38local data = characters.data if not data then return end
39local blocks = characters.blocks
40
41local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
42
43local report_defining = logs.reporter("characters")
44
45
46
47
48
49
50
51
52
53
54local low = allocate()
55local high = allocate()
56local escapes = allocate()
57local special = "~#$%^&_{}\\|"
58
59local private = {
60 low = low,
61 high = high,
62 escapes = escapes,
63}
64
65utffilters.private = private
66
67for chr in gmatch(special,".") do
68 local cb, ch
69 if type(chr) == "number" then
70 ch = utfchar(chr)
71 cb = chr
72 else
73 ch = chr
74 cb = utfbyte(chr)
75 end
76 if cb < 256 then
77 escapes[ch] = "\\" .. ch
78 low[ch] = utfchar(0x0F0000 + cb)
79 if ch == "%" then
80 ch = "%%"
81 end
82 high[utfchar(0x0F0000 + cb)] = ch
83 end
84end
85
86local tohigh = lpeg.replacer(low)
87local tolow = lpeg.replacer(high)
88
89lpegpatterns.utftohigh = tohigh
90lpegpatterns.utftolow = tolow
91
92function utffilters.harden(str)
93 return lpegmatch(tohigh,str)
94end
95
96function utffilters.soften(str)
97 return lpegmatch(tolow,str)
98end
99
100private.escape = utf.remapper(escapes)
101private.replace = utf.remapper(low)
102private.revert = utf.remapper(high)
103
104local accentmapping = allocate {
105 ['"'] = { [""] = "¨",
106 A = "Ä", a = "ä",
107 E = "Ë", e = "ë",
108 I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
109 O = "Ö", o = "ö",
110 U = "Ü", u = "ü",
111 Y = "Ÿ", y = "ÿ",
112 },
113 ["'"] = { [""] = "´",
114 A = "Á", a = "á",
115 C = "Ć", c = "ć",
116 E = "É", e = "é",
117 I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
118 L = "Ĺ", l = "ĺ",
119 N = "Ń", n = "ń",
120 O = "Ó", o = "ó",
121 R = "Ŕ", r = "ŕ",
122 S = "Ś", s = "ś",
123 U = "Ú", u = "ú",
124 Y = "Ý", y = "ý",
125 Z = "Ź", z = "ź",
126 },
127 ["."] = { [""] = "˙",
128 C = "Ċ", c = "ċ",
129 E = "Ė", e = "ė",
130 G = "Ġ", g = "ġ",
131 I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
132 Z = "Ż", z = "ż",
133 },
134 ["="] = { [""] = "¯",
135 A = "Ā", a = "ā",
136 E = "Ē", e = "ē",
137 I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
138 O = "Ō", o = "ō",
139 U = "Ū", u = "ū",
140 },
141 ["H"] = { [""] = "˝",
142 O = "Ő", o = "ő",
143 U = "Ű", u = "ű",
144 },
145 ["^"] = { [""] = "ˆ",
146 A = "Â", a = "â",
147 C = "Ĉ", c = "ĉ",
148 E = "Ê", e = "ê",
149 G = "Ĝ", g = "ĝ",
150 H = "Ĥ", h = "ĥ",
151 I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
152 J = "Ĵ", j = "ĵ",
153 O = "Ô", o = "ô",
154 S = "Ŝ", s = "ŝ",
155 U = "Û", u = "û",
156 W = "Ŵ", w = "ŵ",
157 Y = "Ŷ", y = "ŷ",
158 },
159 ["`"] = { [""] = "`",
160 A = "À", a = "à",
161 E = "È", e = "è",
162 I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
163 O = "Ò", o = "ò",
164 U = "Ù", u = "ù",
165 Y = "Ỳ", y = "ỳ",
166 },
167 ["c"] = { [""] = "¸",
168 C = "Ç", c = "ç",
169 K = "Ķ", k = "ķ",
170 L = "Ļ", l = "ļ",
171 N = "Ņ", n = "ņ",
172 R = "Ŗ", r = "ŗ",
173 S = "Ş", s = "ş",
174 T = "Ţ", t = "ţ",
175 },
176 ["k"] = { [""] = "˛",
177 A = "Ą", a = "ą",
178 E = "Ę", e = "ę",
179 I = "Į", i = "į",
180 U = "Ų", u = "ų",
181 },
182 ["r"] = { [""] = "˚",
183 A = "Å", a = "å",
184 U = "Ů", u = "ů",
185 },
186 ["u"] = { [""] = "˘",
187 A = "Ă", a = "ă",
188 E = "Ĕ", e = "ĕ",
189 G = "Ğ", g = "ğ",
190 I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
191 O = "Ŏ", o = "ŏ",
192 U = "Ŭ", u = "ŭ",
193 },
194 ["v"] = { [""] = "ˇ",
195 C = "Č", c = "č",
196 D = "Ď", d = "ď",
197 E = "Ě", e = "ě",
198 L = "Ľ", l = "ľ",
199 N = "Ň", n = "ň",
200 R = "Ř", r = "ř",
201 S = "Š", s = "š",
202 T = "Ť", t = "ť",
203 Z = "Ž", z = "ž",
204 },
205 ["~"] = { [""] = "˜",
206 A = "Ã", a = "ã",
207 I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
208 N = "Ñ", n = "ñ",
209 O = "Õ", o = "õ",
210 U = "Ũ", u = "ũ",
211 },
212}
213
214texcharacters.accentmapping = accentmapping
215
216local accent_map = allocate {
217 ['~'] = "̃" ,
218 ['"'] = "̈" ,
219 ["`"] = "̀" ,
220 ["'"] = "́" ,
221 ["^"] = "̂" ,
222
223
224
225
226
227
228
229
230
231
232
233
234}
235
236
237
238local function remap_accent(a,c,braced)
239 local m = accentmapping[a]
240 if m then
241 local n = m[c]
242 if n then
243 return n
244 end
245 end
246
247
248
249
250 if braced then
251 return "\\" .. a .. "{" .. c .. "}"
252 else
253 return "\\" .. a .. " " .. c
254 end
255end
256
257local commandmapping = allocate {
258 ["aa"] = "å", ["AA"] = "Å",
259 ["ae"] = "æ", ["AE"] = "Æ",
260 ["cc"] = "ç", ["CC"] = "Ç",
261 ["i"] = "ı", ["j"] = "ȷ",
262 ["ij"] = "ij", ["IJ"] = "IJ",
263 ["l"] = "ł", ["L"] = "Ł",
264 ["o"] = "ø", ["O"] = "Ø",
265 ["oe"] = "œ", ["OE"] = "Œ",
266 ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß",
267}
268
269texcharacters.commandmapping = commandmapping
270
271local ligaturemapping = allocate {
272 ["''"] = "”",
273 ["``"] = "“",
274 ["--"] = "–",
275 ["---"] = "—",
276}
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324local untex
325
326local function toutfpattern()
327 if not untex then
328 local hash = { }
329 for k, v in next, accentmapping do
330 for kk, vv in next, v do
331 if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
332 hash[ "\\"..k.." "..kk ] = vv
333 hash["{\\"..k.." "..kk.."}"] = vv
334 else
335 hash["\\" ..k ..kk ] = vv
336 hash["{\\"..k ..kk.."}"] = vv
337 end
338 hash["\\" ..k.."{"..kk.."}" ] = vv
339 hash["{\\"..k.."{"..kk.."}}"] = vv
340 end
341 end
342 for k, v in next, commandmapping do
343 hash["\\"..k.." "] = v
344 hash["{\\"..k.."}"] = v
345 hash["{\\"..k.." }"] = v
346 end
347 for k, v in next, ligaturemapping do
348 hash[k] = v
349 end
350 untex = utfchartabletopattern(hash) / hash
351 end
352 return untex
353end
354
355texcharacters.toutfpattern = toutfpattern
356
357local pattern = nil
358
359local function prepare()
360 pattern = Cs((toutfpattern() + P(1))^0)
361 return pattern
362end
363
364function texcharacters.toutf(str,strip)
365 if str == "" then
366 return str
367 elseif not find(str,"\\",1,true) then
368 return str
369
370 else
371 return lpegmatch(pattern or prepare(),str)
372 end
373end
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388function texcharacters.safechar(n)
389 local c = data[n]
390 if c and c.contextname then
391 return "\\" .. c.contextname
392 else
393 return utfchar(n)
394 end
395end
396
397if not context or not commands then
398
399 return
400end
401
402
403
404if not interfaces then return end
405
406local implement = interfaces.implement
407
408local tex = tex
409local texsetlccode = tex.setlccode
410local texsetsfcode = tex.setsfcode
411local texsetcatcode = tex.setcatcode
412
413local contextsprint = context.sprint
414local ctxcatcodes = catcodes.numbers.ctxcatcodes
415
416local texsetmacro = tokens.setters.macro
417local texsetchar = tokens.setters.char
418
419function texcharacters.defineaccents()
420 local ctx_dodefineaccentcommand = context.dodefineaccentcommand
421 local ctx_dodefineaccent = context.dodefineaccent
422 local ctx_dodefinecommand = context.dodefinecommand
423 for accent, group in next, accentmapping do
424 ctx_dodefineaccentcommand(accent)
425 for character, mapping in next, group do
426 ctx_dodefineaccent(accent,character,mapping)
427 end
428 end
429 for command, mapping in next, commandmapping do
430 ctx_dodefinecommand(command,mapping)
431 end
432end
433
434implement {
435 name = "defineaccents",
436 actions = texcharacters.defineaccents
437}
438
439
440
441
442function commands.makeactive(n,name)
443 contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
444
445end
446
447local function to_number(s)
448 local n = tonumber(s)
449 if n then
450 return n
451 end
452 return tonumber(match(s,'^"(.*)$'),16) or 0
453end
454
455implement {
456 name = "utfchar",
457 actions = { to_number, utfchar, contextsprint },
458 arguments = "string"
459}
460
461implement {
462 name = "safechar",
463 actions = { to_number, texcharacters.safechar, contextsprint },
464 arguments = "string"
465}
466
467implement {
468 name = "uchar",
469 arguments = { "integer", "integer" },
470 actions = function(h,l)
471 context(utfchar(h*256+l))
472 end
473}
474
475tex.uprint = commands.utfchar
476
477
478
479
480
481
482local forbidden = tohash {
483 0x000A0,
484 0x000AD,
485
486
487
488
489
490
491
492
493
494 0x02000,
495 0x02001,
496 0x02002,
497 0x02003,
498 0x02004,
499 0x02005,
500 0x02006,
501 0x02007,
502 0x02008,
503 0x02009,
504 0x0200A,
505 0x0200B,
506 0x0200C,
507 0x0200D,
508 0x0202F,
509 0x0205F,
510
511
512}
513
514local csletters = characters.csletters
515local activated = { }
516local sfmode = "unset"
517local block_too = false
518
519directives.register("characters.blockstoo",function(v) block_too = v end)
520
521
522
523
524local function setuppersfcodes(v,n)
525 if sfstate ~= "unset" then
526 report_defining("setting uppercase sf codes to %a",n)
527 for u, chr in next, data do
528 if chr.category == "lu" then
529 texsetsfcode(u,n)
530 end
531 end
532 end
533 sfstate = v
534end
535
536directives.register("characters.spaceafteruppercase",function(v)
537 if v == "traditional" then
538 setuppersfcodes(v,999)
539 elseif v == "normal" then
540 setuppersfcodes(v,1000)
541 end
542end)
543
544if not csletters then
545
546 csletters = allocate()
547 characters.csletters = csletters
548
549 report_defining("setting up character related codes and commands")
550
551 if sfstate == "unset" then
552 sfstate = "traditional"
553 end
554
555 local traditional = sfstate == "traditional"
556
557 for u, chr in next, data do
558 local contextname = chr.contextname
559 local category = chr.category
560 local isletter = is_letter[category]
561 if contextname then
562 if is_character[category] then
563 if chr.unicodeslot < 128 then
564 if isletter then
565 local c = utfchar(u)
566 csletters[c] = u
567 end
568 else
569 local c = utfchar(u)
570 if isletter and u >= 32 and u <= 65536 then
571 csletters[c] = u
572 end
573 end
574 if isletter then
575 local lc = chr.lccode
576 local uc = chr.uccode
577 if not lc then
578 chr.lccode = u
579 lc = u
580 elseif type(lc) == "table" then
581 lc = u
582 end
583 if not uc then
584 chr.uccode = u
585 uc = u
586 elseif type(uc) == "table" then
587 uc = u
588 end
589 texsetlccode(u,lc,uc)
590 if traditional and category == "lu" then
591 texsetsfcode(code,999)
592 end
593 end
594 elseif is_command[category] and not forbidden[u] then
595
596 elseif is_mark[category] then
597 texsetlccode(u,u,u)
598 end
599 elseif isletter then
600 csletters[utfchar(u)] = u
601 local lc, uc = chr.lccode, chr.uccode
602 if not lc then
603 chr.lccode = u
604 lc = u
605 elseif type(lc) == "table" then
606 lc = u
607 end
608 if not uc then
609 chr.uccode = u
610 uc = u
611 elseif type(uc) == "table" then
612 uc = u
613 end
614 texsetlccode(u,lc,uc)
615 if traditional and category == "lu" then
616 texsetsfcode(code,999)
617 end
618 elseif is_mark[category] then
619 texsetlccode(u,u,u)
620 end
621 end
622
623 if blocks_too then
624
625 for k, v in next, blocks do
626 if v.catcode == "letter" then
627 local first = v.first
628 local last = v.last
629 local gaps = v.gaps
630 if first and last then
631 for u=first,last do
632 csletters[utfchar(u)] = u
633
634
635
636 end
637 end
638 if gaps then
639 for i=1,#gaps do
640 local u = gaps[i]
641 csletters[utfchar(u)] = u
642
643
644
645 end
646 end
647 end
648 end
649 end
650
651 if storage then
652 storage.register("characters/csletters", csletters, "characters.csletters")
653 end
654
655 function characters.setcharacternames(ctt)
656 for u, chr in next, data do
657 local contextname = chr.contextname
658 local category = chr.category
659 local isletter = is_letter[category]
660 if contextname then
661 if is_character[category] then
662 if chr.unicodeslot < 128 then
663 if isletter then
664 texsetmacro(contextname,utfchar(u),"immutable")
665 else
666 texsetchar(contextname,u,"immutable")
667 end
668 else
669 texsetmacro(contextname,utfchar(u),"immutable")
670 end
671 elseif is_command[category] and not forbidden[u] then
672 texsetmacro(contextname,utfchar(u),"immutable")
673 end
674 end
675 end
676 end
677
678else
679 mark(csletters)
680end
681
682lpegpatterns.csletter = utfchartabletopattern(csletters)
683
684
685
686
687function characters.setlettercatcodes(cct)
688 if trace_defining then
689 report_defining("assigning letter catcodes to catcode table %a",cct)
690 end
691 local saved = tex.catcodetable
692 tex.catcodetable = cct
693 texsetcatcode(0x200C,11)
694 texsetcatcode(0x200D,11)
695 for c, u in next, csletters do
696 texsetcatcode(u,11)
697 end
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716 tex.catcodetable = saved
717end
718
719function characters.setactivecatcodes(cct)
720 local saved = tex.catcodetable
721 tex.catcodetable = cct
722 for i=1,#activated do
723 local u = activated[i]
724 texsetcatcode(u,13)
725 if trace_defining then
726 report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
727 end
728 end
729 tex.catcodetable = saved
730end
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775implement {
776 name = "chardescription",
777 arguments = "integer",
778 actions = function(slot)
779 local d = data[slot]
780 if d then
781 context(d.description)
782 end
783 end,
784}
785
786
787
788characters.activeoffset = 0x10000
789
790function commands.remapentity(chr,slot)
791 contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
792end
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812if characters.setcharacternames then
813
814 implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
815 implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
816 implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }
817
818end
819
820
821
822local function overload(c,u,code,codes)
823 local c = tonumber(c)
824 if not c then
825 return
826 end
827 local u = utilities.parsers.settings_to_array(u)
828 local n = #u
829 if n == 0 then
830 return
831 end
832 local t = nil
833 if n == 1 then
834 t = tonumber(u[1])
835 else
836 t = { }
837 for i=1,n do
838 t[#t+1] = tonumber(u[i])
839 end
840 end
841 if t then
842 data[c][code] = t
843 characters[codes][c] = nil
844 end
845end
846
847interfaces.implement {
848 name = "overloaduppercase",
849 arguments = "2 strings",
850 actions = function(c,u)
851 overload(c,u,"uccode","uccodes")
852 end
853}
854
855interfaces.implement {
856 name = "overloadlowercase",
857 arguments = "2 strings",
858 actions = function(c,u)
859 overload(c,u,"lccode","lccodes")
860 end
861}
862 |