1if not modules then modules = { } end modules ['char-tex'] = {
2 version = 1.001,
3 comment = "companion to char-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg = lpeg
10local tonumber, next, type = tonumber, next, type
11local format, find, gmatch, match = string.format, string.find, string.gmatch, string.match
12local utfchar, utfbyte = utf.char, utf.byte
13local concat, tohash = table.concat, table.tohash
14local P, C, R, S, V, Cs, Cc = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs, lpeg.Cc
15
16local lpegpatterns = lpeg.patterns
17local lpegmatch = lpeg.match
18local utfchartabletopattern = lpeg.utfchartabletopattern
19
20local allocate = utilities.storage.allocate
21local mark = utilities.storage.mark
22
23local context = context
24local commands = commands
25
26local characters = characters
27local texcharacters = { }
28characters.tex = texcharacters
29local utffilters = characters.filters.utf
30
31local is_character = characters.is_character
32local is_letter = characters.is_letter
33local is_command = characters.is_command
34local is_spacing = characters.is_spacing
35local is_mark = characters.is_mark
36local is_punctuation = characters.is_punctuation
37
38local data = characters.data if not data then return end
39local blocks = characters.blocks
40
41local trace_defining = false trackers.register("characters.defining", function(v) characters_defining = v end)
42
43local report_defining = logs.reporter("characters")
44
45
56
57local low = allocate()
58local high = allocate()
59local escapes = allocate()
60local special = "~#$%^&_{}\\|"
61
62local private = {
63 low = low,
64 high = high,
65 escapes = escapes,
66}
67
68utffilters.private = private
69
70for ch in gmatch(special,".") do
71 local cb
72 if type(ch) == "number" then
73 cb, ch = ch, utfchar(ch)
74 else
75 cb = utfbyte(ch)
76 end
77 if cb < 256 then
78 escapes[ch] = "\\" .. ch
79 low[ch] = utfchar(0x0F0000 + cb)
80 if ch == "%" then
81 ch = "%%"
82 end
83 high[utfchar(0x0F0000 + cb)] = ch
84 end
85end
86
87local tohigh = lpeg.replacer(low)
88local tolow = lpeg.replacer(high)
89
90lpegpatterns.utftohigh = tohigh
91lpegpatterns.utftolow = tolow
92
93function utffilters.harden(str)
94 return lpegmatch(tohigh,str)
95end
96
97function utffilters.soften(str)
98 return lpegmatch(tolow,str)
99end
100
101private.escape = utf.remapper(escapes)
102private.replace = utf.remapper(low)
103private.revert = utf.remapper(high)
104
105
115
116
117
118
119
120local accentmapping = allocate {
121 ['"'] = { [""] = "¨",
122 A = "Ä", a = "ä",
123 E = "Ë", e = "ë",
124 I = "Ï", i = "ï", ["ı"] = "ï", ["\\i"] = "ï",
125 O = "Ö", o = "ö",
126 U = "Ü", u = "ü",
127 Y = "Ÿ", y = "ÿ",
128 },
129 ["'"] = { [""] = "´",
130 A = "Á", a = "á",
131 C = "Ć", c = "ć",
132 E = "É", e = "é",
133 I = "Í", i = "í", ["ı"] = "í", ["\\i"] = "í",
134 L = "Ĺ", l = "ĺ",
135 N = "Ń", n = "ń",
136 O = "Ó", o = "ó",
137 R = "Ŕ", r = "ŕ",
138 S = "Ś", s = "ś",
139 U = "Ú", u = "ú",
140 Y = "Ý", y = "ý",
141 Z = "Ź", z = "ź",
142 },
143 ["."] = { [""] = "˙",
144 C = "Ċ", c = "ċ",
145 E = "Ė", e = "ė",
146 G = "Ġ", g = "ġ",
147 I = "İ", i = "i", ["ı"] = "i", ["\\i"] = "i",
148 Z = "Ż", z = "ż",
149 },
150 ["="] = { [""] = "¯",
151 A = "Ā", a = "ā",
152 E = "Ē", e = "ē",
153 I = "Ī", i = "ī", ["ı"] = "ī", ["\\i"] = "ī",
154 O = "Ō", o = "ō",
155 U = "Ū", u = "ū",
156 },
157 ["H"] = { [""] = "˝",
158 O = "Ő", o = "ő",
159 U = "Ű", u = "ű",
160 },
161 ["^"] = { [""] = "ˆ",
162 A = "Â", a = "â",
163 C = "Ĉ", c = "ĉ",
164 E = "Ê", e = "ê",
165 G = "Ĝ", g = "ĝ",
166 H = "Ĥ", h = "ĥ",
167 I = "Î", i = "î", ["ı"] = "î", ["\\i"] = "î",
168 J = "Ĵ", j = "ĵ",
169 O = "Ô", o = "ô",
170 S = "Ŝ", s = "ŝ",
171 U = "Û", u = "û",
172 W = "Ŵ", w = "ŵ",
173 Y = "Ŷ", y = "ŷ",
174 },
175 ["`"] = { [""] = "`",
176 A = "À", a = "à",
177 E = "È", e = "è",
178 I = "Ì", i = "ì", ["ı"] = "ì", ["\\i"] = "ì",
179 O = "Ò", o = "ò",
180 U = "Ù", u = "ù",
181 Y = "Ỳ", y = "ỳ",
182 },
183 ["c"] = { [""] = "¸",
184 C = "Ç", c = "ç",
185 K = "Ķ", k = "ķ",
186 L = "Ļ", l = "ļ",
187 N = "Ņ", n = "ņ",
188 R = "Ŗ", r = "ŗ",
189 S = "Ş", s = "ş",
190 T = "Ţ", t = "ţ",
191 },
192 ["k"] = { [""] = "˛",
193 A = "Ą", a = "ą",
194 E = "Ę", e = "ę",
195 I = "Į", i = "į",
196 U = "Ų", u = "ų",
197 },
198 ["r"] = { [""] = "˚",
199 A = "Å", a = "å",
200 U = "Ů", u = "ů",
201 },
202 ["u"] = { [""] = "˘",
203 A = "Ă", a = "ă",
204 E = "Ĕ", e = "ĕ",
205 G = "Ğ", g = "ğ",
206 I = "Ĭ", i = "ĭ", ["ı"] = "ĭ", ["\\i"] = "ĭ",
207 O = "Ŏ", o = "ŏ",
208 U = "Ŭ", u = "ŭ",
209 },
210 ["v"] = { [""] = "ˇ",
211 C = "Č", c = "č",
212 D = "Ď", d = "ď",
213 E = "Ě", e = "ě",
214 L = "Ľ", l = "ľ",
215 N = "Ň", n = "ň",
216 R = "Ř", r = "ř",
217 S = "Š", s = "š",
218 T = "Ť", t = "ť",
219 Z = "Ž", z = "ž",
220 },
221 ["~"] = { [""] = "˜",
222 A = "Ã", a = "ã",
223 I = "Ĩ", i = "ĩ", ["ı"] = "ĩ", ["\\i"] = "ĩ",
224 N = "Ñ", n = "ñ",
225 O = "Õ", o = "õ",
226 U = "Ũ", u = "ũ",
227 },
228}
229
230texcharacters.accentmapping = accentmapping
231
232local accent_map = allocate {
233 ['~'] = "̃" ,
234 ['"'] = "̈" ,
235 ["`"] = "̀" ,
236 ["'"] = "́" ,
237 ["^"] = "̂" ,
238
239
240
241
242
243
244
245
246
247
248
249
250}
251
252
253
254local function remap_accent(a,c,braced)
255 local m = accentmapping[a]
256 if m then
257 local n = m[c]
258 if n then
259 return n
260 end
261 end
262
263
264
265
266 if braced then
267 return "\\" .. a .. "{" .. c .. "}"
268 else
269 return "\\" .. a .. " " .. c
270 end
271end
272
273local commandmapping = allocate {
274 ["aa"] = "å", ["AA"] = "Å",
275 ["ae"] = "æ", ["AE"] = "Æ",
276 ["cc"] = "ç", ["CC"] = "Ç",
277 ["i"] = "ı", ["j"] = "ȷ",
278 ["ij"] = "ij", ["IJ"] = "IJ",
279 ["l"] = "ł", ["L"] = "Ł",
280 ["o"] = "ø", ["O"] = "Ø",
281 ["oe"] = "œ", ["OE"] = "Œ",
282 ["sz"] = "ß", ["SZ"] = "SZ", ["ss"] = "ß", ["SS"] = "ß",
283}
284
285texcharacters.commandmapping = commandmapping
286
287local ligaturemapping = allocate {
288 ["''"] = "”",
289 ["``"] = "“",
290 ["--"] = "–",
291 ["---"] = "—",
292}
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340local untex
341
342local function toutfpattern()
343 if not untex then
344 local hash = { }
345 for k, v in next, accentmapping do
346 for kk, vv in next, v do
347 if (k >= "a" and k <= "z") or (k >= "A" and k <= "Z") then
348 hash[ "\\"..k.." "..kk ] = vv
349 hash["{\\"..k.." "..kk.."}"] = vv
350 else
351 hash["\\" ..k ..kk ] = vv
352 hash["{\\"..k ..kk.."}"] = vv
353 end
354 hash["\\" ..k.."{"..kk.."}" ] = vv
355 hash["{\\"..k.."{"..kk.."}}"] = vv
356 end
357 end
358 for k, v in next, commandmapping do
359 hash["\\"..k.." "] = v
360 hash["{\\"..k.."}"] = v
361 hash["{\\"..k.." }"] = v
362 end
363 for k, v in next, ligaturemapping do
364 hash[k] = v
365 end
366 untex = utfchartabletopattern(hash) / hash
367 end
368 return untex
369end
370
371texcharacters.toutfpattern = toutfpattern
372
373local pattern = nil
374
375local function prepare()
376 pattern = Cs((toutfpattern() + P(1))^0)
377 return pattern
378end
379
380function texcharacters.toutf(str,strip)
381 if str == "" then
382 return str
383 elseif not find(str,"\\",1,true) then
384 return str
385
386 else
387 return lpegmatch(pattern or prepare(),str)
388 end
389end
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404function texcharacters.safechar(n)
405 local c = data[n]
406 if c and c.contextname then
407 return "\\" .. c.contextname
408 else
409 return utfchar(n)
410 end
411end
412
413if not context or not commands then
414
415 return
416end
417
418
419
420if not interfaces then return end
421
422local implement = interfaces.implement
423
424local tex = tex
425local texsetlccode = tex.setlccode
426local texsetsfcode = tex.setsfcode
427local texsetcatcode = tex.setcatcode
428
429local contextsprint = context.sprint
430local ctxcatcodes = catcodes.numbers.ctxcatcodes
431
432local texsetmacro = tokens.setters.macro
433local texsetchar = tokens.setters.char
434
435function texcharacters.defineaccents()
436 local ctx_dodefineaccentcommand = context.dodefineaccentcommand
437 local ctx_dodefineaccent = context.dodefineaccent
438 local ctx_dodefinecommand = context.dodefinecommand
439 for accent, group in next, accentmapping do
440 ctx_dodefineaccentcommand(accent)
441 for character, mapping in next, group do
442 ctx_dodefineaccent(accent,character,mapping)
443 end
444 end
445 for command, mapping in next, commandmapping do
446 ctx_dodefinecommand(command,mapping)
447 end
448end
449
450implement {
451 name = "defineaccents",
452 actions = texcharacters.defineaccents
453}
454
455
459
460function commands.makeactive(n,name)
461 contextsprint(ctxcatcodes,format("\\catcode%s=13\\unexpanded\\def %s{\\%s}",n,utfchar(n),name))
462
463end
464
465local function to_number(s)
466 local n = tonumber(s)
467 if n then
468 return n
469 end
470 return tonumber(match(s,'^"(.*)$'),16) or 0
471end
472
473implement {
474 name = "utfchar",
475 actions = { to_number, utfchar, contextsprint },
476 arguments = "string"
477}
478
479implement {
480 name = "safechar",
481 actions = { to_number, texcharacters.safechar, contextsprint },
482 arguments = "string"
483}
484
485implement {
486 name = "uchar",
487 arguments = { "integer", "integer" },
488 actions = function(h,l)
489 context(utfchar(h*256+l))
490 end
491}
492
493tex.uprint = commands.utfchar
494
495
496
497
498
499
500local forbidden = tohash {
501 0x000A0,
502 0x000AD,
503
504
505
506
507
508
509
510
511
512 0x02000,
513 0x02001,
514 0x02002,
515 0x02003,
516 0x02004,
517 0x02005,
518 0x02006,
519 0x02007,
520 0x02008,
521 0x02009,
522 0x0200A,
523 0x0200B,
524 0x0200C,
525 0x0200D,
526 0x0202F,
527 0x0205F,
528
529
530}
531
532local csletters = characters.csletters
533local activated = { }
534local sfmode = "unset"
535local block_too = false
536
537directives.register("characters.blockstoo",function(v) block_too = v end)
538
539
540
541
542local function setuppersfcodes(v,n)
543 if sfstate ~= "unset" then
544 report_defining("setting uppercase sf codes to %a",n)
545 for u, chr in next, data do
546 if chr.category == "lu" then
547 texsetsfcode(u,n)
548 end
549 end
550 end
551 sfstate = v
552end
553
554directives.register("characters.spaceafteruppercase",function(v)
555 if v == "traditional" then
556 setuppersfcodes(v,999)
557 elseif v == "normal" then
558 setuppersfcodes(v,1000)
559 end
560end)
561
562if not csletters then
563
564 csletters = allocate()
565 characters.csletters = csletters
566
567 report_defining("setting up character related codes and commands")
568
569 if sfstate == "unset" then
570 sfstate = "traditional"
571 end
572
573 local traditional = sfstate == "traditional"
574
575 for u, chr in next, data do
576 local contextname = chr.contextname
577 local category = chr.category
578 local isletter = is_letter[category]
579 if contextname then
580 if is_character[category] then
581 if chr.unicodeslot < 128 then
582 if isletter then
583 local c = utfchar(u)
584 csletters[c] = u
585 end
586 else
587 local c = utfchar(u)
588 if isletter and u >= 32 and u <= 65536 then
589 csletters[c] = u
590 end
591 end
592 if isletter then
593 local lc = chr.lccode
594 local uc = chr.uccode
595 if not lc then
596 chr.lccode = u
597 lc = u
598 elseif type(lc) == "table" then
599 lc = u
600 end
601 if not uc then
602 chr.uccode = u
603 uc = u
604 elseif type(uc) == "table" then
605 uc = u
606 end
607 texsetlccode(u,lc,uc)
608 if traditional and category == "lu" then
609 texsetsfcode(code,999)
610 end
611 end
612 elseif is_command[category] and not forbidden[u] then
613
614 elseif is_mark[category] then
615 texsetlccode(u,u,u)
616 end
617 elseif isletter then
618 csletters[utfchar(u)] = u
619 local lc, uc = chr.lccode, chr.uccode
620 if not lc then
621 chr.lccode = u
622 lc = u
623 elseif type(lc) == "table" then
624 lc = u
625 end
626 if not uc then
627 chr.uccode = u
628 uc = u
629 elseif type(uc) == "table" then
630 uc = u
631 end
632 texsetlccode(u,lc,uc)
633 if traditional and category == "lu" then
634 texsetsfcode(code,999)
635 end
636 elseif is_mark[category] then
637 texsetlccode(u,u,u)
638 end
639 end
640
641 if blocks_too then
642
643 for k, v in next, blocks do
644 if v.catcode == "letter" then
645 local first = v.first
646 local last = v.last
647 local gaps = v.gaps
648 if first and last then
649 for u=first,last do
650 csletters[utfchar(u)] = u
651
652
653
654 end
655 end
656 if gaps then
657 for i=1,#gaps do
658 local u = gaps[i]
659 csletters[utfchar(u)] = u
660
661
662
663 end
664 end
665 end
666 end
667 end
668
669 if storage then
670 storage.register("characters/csletters", csletters, "characters.csletters")
671 end
672
673 function characters.setcharacternames(ctt)
674 for u, chr in next, data do
675 local contextname = chr.contextname
676 local category = chr.category
677 local isletter = is_letter[category]
678 if contextname then
679 if is_character[category] then
680 if chr.unicodeslot < 128 then
681 if isletter then
682 texsetmacro(contextname,utfchar(u),"immutable")
683 else
684 texsetchar(contextname,u,"immutable")
685 end
686 else
687 texsetmacro(contextname,utfchar(u),"immutable")
688 end
689 elseif is_command[category] and not forbidden[u] then
690 texsetmacro(contextname,utfchar(u),"immutable")
691 end
692 end
693 end
694 end
695
696else
697 mark(csletters)
698end
699
700lpegpatterns.csletter = utfchartabletopattern(csletters)
701
702
703
704
705function characters.setlettercatcodes(cct)
706 if trace_defining then
707 report_defining("assigning letter catcodes to catcode table %a",cct)
708 end
709 local saved = tex.catcodetable
710 tex.catcodetable = cct
711 texsetcatcode(0x200C,11)
712 texsetcatcode(0x200D,11)
713 for c, u in next, csletters do
714 texsetcatcode(u,11)
715 end
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734 tex.catcodetable = saved
735end
736
737function characters.setactivecatcodes(cct)
738 local saved = tex.catcodetable
739 tex.catcodetable = cct
740 for i=1,#activated do
741 local u = activated[i]
742 texsetcatcode(u,13)
743 if trace_defining then
744 report_defining("character %U (%s) is active in set %a",u,data[u].description,cct)
745 end
746 end
747 tex.catcodetable = saved
748end
749
750
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795implement {
796 name = "chardescription",
797 arguments = "integer",
798 actions = function(slot)
799 local d = data[slot]
800 if d then
801 context(d.description)
802 end
803 end,
804}
805
806
807
808characters.activeoffset = 0x10000
809
810function commands.remapentity(chr,slot)
811 contextsprint(format("{\\catcode%s=13\\xdef%s{\\string%s}}",slot,utfchar(slot),chr))
812end
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832if characters.setcharacternames then
833
834 implement { name = "setlettercatcodes", scope = "private", actions = characters.setlettercatcodes, arguments = "integer" }
835 implement { name = "setactivecatcodes", scope = "private", actions = characters.setactivecatcodes, arguments = "integer" }
836 implement { name = "setcharacternames", scope = "private", actions = characters.setcharacternames, arguments = "integer" }
837
838end
839
840
841
842local function overload(c,u,code,codes)
843 local c = tonumber(c)
844 if not c then
845 return
846 end
847 local u = utilities.parsers.settings_to_array(u)
848 local n = #u
849 if n == 0 then
850 return
851 end
852 local t = nil
853 if n == 1 then
854 t = tonumber(u[1])
855 else
856 t = { }
857 for i=1,n do
858 t[#t+1] = tonumber(u[i])
859 end
860 end
861 if t then
862 data[c][code] = t
863 characters[codes][c] = nil
864 end
865end
866
867interfaces.implement {
868 name = "overloaduppercase",
869 arguments = "2 strings",
870 actions = function(c,u)
871 overload(c,u,"uccode","uccodes")
872 end
873}
874
875interfaces.implement {
876 name = "overloadlowercase",
877 arguments = "2 strings",
878 actions = function(c,u)
879 overload(c,u,"lccode","lccodes")
880 end
881}
882 |