textextcodes.c /size: 17 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex
8
9    Contrary to traditional \TEX\ we have catcode tables so that we can switch catcode regimes very
10    fast. We can have many such regimes and they're stored in trees.
11
12*/
13
14# define CATCODESTACK    8
15# define CATCODEDEFAULT  12
16# define CATCODEDEFAULTS 0x0C0C0C0C /*tex Used as |dflt| value in |sa| struct. */
17
18typedef struct catcode_state_info {
19    sa_tree       *catcode_heads;
20    unsigned char *catcode_valid;
21    int            catcode_max;
22    int            padding;
23} catcode_state_info;
24
25static catcode_state_info lmt_catcode_state = {
26    .catcode_heads = NULL,
27    .catcode_valid = NULL,
28    .catcode_max   = 0,
29    .padding       = 0,
30} ;
31
32static void tex_aux_allocate_catcodes(void)
33{
34    lmt_catcode_state.catcode_heads = sa_malloc_array(sizeof(sa_tree), max_n_of_catcode_tables);
35    lmt_catcode_state.catcode_valid = sa_malloc_array(sizeof(unsigned char), max_n_of_catcode_tables);
36    if (lmt_catcode_state.catcode_heads && lmt_catcode_state.catcode_valid) {
37        sa_wipe_array(lmt_catcode_state.catcode_heads, sizeof(sa_tree), max_n_of_catcode_tables);
38        sa_wipe_array(lmt_catcode_state.catcode_valid, sizeof(unsigned char), max_n_of_catcode_tables);
39    } else {
40        tex_overflow_error("catcodes", max_n_of_catcode_tables);
41    }
42}
43
44static void tex_aux_initialize_catcodes(void)
45{
46    sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
47    lmt_catcode_state.catcode_max = 0;
48    tex_aux_allocate_catcodes();
49    lmt_catcode_state.catcode_valid[0] = 1;
50    lmt_catcode_state.catcode_heads[0] = sa_new_tree(CATCODESTACK, 1, item);
51}
52
53void tex_set_cat_code(int h, int n, halfword v, int gl)
54{
55    sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
56    sa_tree tree = lmt_catcode_state.catcode_heads[h];
57    if (! tree) {
58        if (h > lmt_catcode_state.catcode_max) {
59            lmt_catcode_state.catcode_max = h;
60        }
61        tree = sa_new_tree(CATCODESTACK, 1, item);
62        lmt_catcode_state.catcode_heads[h] = tree;
63    }
64    sa_set_item_1(tree, n, v, gl);
65}
66
67halfword tex_get_cat_code(int h, int n)
68{
69    sa_tree_item item = { .uint_value = CATCODEDEFAULTS };
70    sa_tree tree = lmt_catcode_state.catcode_heads[h];
71    if (! tree) {
72        if (h > lmt_catcode_state.catcode_max) {
73            lmt_catcode_state.catcode_max = h;
74        }
75        tree = sa_new_tree(CATCODESTACK, 1, item);
76        lmt_catcode_state.catcode_heads[h] = tree;
77    }
78    return sa_return_item_1(tree, n);
79}
80
81void tex_unsave_cat_codes(int h, int gl)
82{
83    if (h > lmt_catcode_state.catcode_max) {
84        lmt_catcode_state.catcode_max = h;
85    }
86    for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
87        if (lmt_catcode_state.catcode_heads[k]) {
88            sa_restore_stack(lmt_catcode_state.catcode_heads[k], gl);
89        }
90    }
91}
92
93static void tex_aux_dump_catcodes(dumpstream f)
94{
95    int total = 0;
96    for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
97        if (lmt_catcode_state.catcode_valid[k]) {
98            total++;
99        }
100    }
101    dump_int(f, lmt_catcode_state.catcode_max);
102    dump_int(f, total);
103    for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
104        if (lmt_catcode_state.catcode_valid[k]) {
105            dump_int(f, k);
106            sa_dump_tree(f, lmt_catcode_state.catcode_heads[k]);
107        }
108    }
109}
110
111static void tex_aux_undump_catcodes(dumpstream f)
112{
113    int total;
114    sa_free_array(lmt_catcode_state.catcode_heads);
115    sa_free_array(lmt_catcode_state.catcode_valid);
116    tex_aux_allocate_catcodes();
117    undump_int(f, lmt_catcode_state.catcode_max);
118    undump_int(f, total);
119    for (int k = 0; k < total; k++) {
120        int x;
121        undump_int(f, x);
122        lmt_catcode_state.catcode_heads[x] = sa_undump_tree(f);
123        lmt_catcode_state.catcode_valid[x] = 1;
124    }
125}
126
127int tex_valid_catcode_table(int h)
128{
129    return (h >= 0 && h < max_n_of_catcode_tables && lmt_catcode_state.catcode_valid[h]);
130}
131
132void tex_copy_cat_codes(int from, int to)
133{
134    if (from < 0 || from >= max_n_of_catcode_tables || lmt_catcode_state.catcode_valid[from] == 0) {
135        exit(EXIT_FAILURE);
136    } else {
137        if (to > lmt_catcode_state.catcode_max) {
138            lmt_catcode_state.catcode_max = to;
139        }
140        sa_destroy_tree(lmt_catcode_state.catcode_heads[to]);
141        lmt_catcode_state.catcode_heads[to] = sa_copy_tree(lmt_catcode_state.catcode_heads[from]);
142        lmt_catcode_state.catcode_valid[to] = 1;
143    }
144}
145
146/*
147void set_cat_code_table_default(int h, int dflt)
148{
149    if (valid_catcode_table(h)) {
150        catcode_state.catcode_heads[h]->dflt.uchar_value[0] = (unsigned char) dflt;
151        catcode_state.catcode_heads[h]->dflt.uchar_value[1] = (unsigned char) dflt;
152        catcode_state.catcode_heads[h]->dflt.uchar_value[2] = (unsigned char) dflt;
153        catcode_state.catcode_heads[h]->dflt.uchar_value[3] = (unsigned char) dflt;
154    }
155}
156
157int get_cat_code_table_default(int h)
158{
159    if (valid_catcode_table(h)) {
160        return catcode_state.catcode_heads[h]->dflt.uchar_value[0];
161    } else {
162        return CATCODEDEFAULT;
163    }
164}
165*/
166
167void tex_initialize_cat_codes(int h)
168{
169    if (h > lmt_catcode_state.catcode_max) {
170        lmt_catcode_state.catcode_max = h;
171    }
172    sa_destroy_tree(lmt_catcode_state.catcode_heads[h]);
173    lmt_catcode_state.catcode_heads[h] = NULL;
174    tex_set_cat_code(h, '\r', end_line_cmd, 1);
175    tex_set_cat_code(h, ' ', spacer_cmd, 1);
176    tex_set_cat_code(h, '\\', escape_cmd, 1);
177    tex_set_cat_code(h, '%', comment_cmd, 1);
178    tex_set_cat_code(h, 127, invalid_char_cmd, 1);
179    tex_set_cat_code(h, 0, ignore_cmd, 1);
180    tex_set_cat_code(h, 0xFEFF, ignore_cmd, 1);
181    for (int k = 'A'; k <= 'Z'; k++) {
182        tex_set_cat_code(h, k, letter_cmd, 1);
183        tex_set_cat_code(h, k + 'a' - 'A', letter_cmd, 1);
184    }
185    lmt_catcode_state.catcode_valid[h] = 1;
186}
187
188static void tex_aux_free_catcodes(void)
189{
190    for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) {
191        if (lmt_catcode_state.catcode_valid[k]) {
192            sa_destroy_tree(lmt_catcode_state.catcode_heads[k]);
193        }
194    }
195    lmt_catcode_state.catcode_heads = sa_free_array(lmt_catcode_state.catcode_heads);
196    lmt_catcode_state.catcode_valid = sa_free_array(lmt_catcode_state.catcode_valid);
197}
198
199/*tex
200
201    The lowercase mapping codes are also stored in a tree. Let's keep them close for cache hits,
202    maybe also with hjcodes.
203
204*/
205
206# define LCCODESTACK      8
207# define LCCODEDEFAULT    0
208
209# define UCCODESTACK      8
210# define UCCODEDEFAULT    0
211
212# define SFCODESTACK      8
213# define SFCODEDEFAULT    scaling_factor
214
215# define HCCODESTACK      8
216# define HCCODEDEFAULT    0
217
218# define HMCODESTACK      8
219# define HMCODEDEFAULT    0
220
221# define AMCODESTACK      8
222# define AMCODEDEFAULT    0
223
224typedef struct luscode_state_info {
225    sa_tree uccode_head;
226    sa_tree lccode_head;
227    sa_tree sfcode_head;
228    sa_tree hccode_head;
229    sa_tree hmcode_head;
230    sa_tree amcode_head;
231} luscode_state_info;
232
233static luscode_state_info lmt_luscode_state = {
234    .uccode_head = NULL,
235    .lccode_head = NULL,
236    .sfcode_head = NULL,
237    .hccode_head = NULL,
238    .hmcode_head = NULL,
239    .amcode_head = NULL
240};
241
242void tex_set_lc_code(int n, halfword v, int gl)
243{
244    sa_tree_item item = { .int_value = v };
245    sa_set_item_4(lmt_luscode_state.lccode_head, n, item, gl);
246}
247
248halfword tex_get_lc_code(int n)
249{
250    return sa_return_item_4(lmt_luscode_state.lccode_head, n);
251}
252
253static void tex_aux_unsave_lccodes(int gl)
254{
255    sa_restore_stack(lmt_luscode_state.lccode_head, gl);
256}
257
258static void tex_aux_initialize_lccodes(void)
259{
260    sa_tree_item item = {.int_value = LCCODEDEFAULT };
261    lmt_luscode_state.lccode_head = sa_new_tree(LCCODESTACK, 4, item);
262}
263
264static void tex_aux_dump_lccodes(dumpstream f)
265{
266    sa_dump_tree(f, lmt_luscode_state.lccode_head);
267}
268
269static void tex_aux_undump_lccodes(dumpstream f)
270{
271    lmt_luscode_state.lccode_head = sa_undump_tree(f);
272}
273
274static void tex_aux_free_lccodes(void)
275{
276    sa_destroy_tree(lmt_luscode_state.lccode_head);
277}
278
279/*tex
280
281    And the uppercase mapping codes are again stored in a tree.
282
283*/
284
285void tex_set_uc_code(int n, halfword v, int gl)
286{
287    sa_tree_item item = { .int_value = v };
288    sa_set_item_4(lmt_luscode_state.uccode_head, n, item, gl);
289}
290
291halfword tex_get_uc_code(int n)
292{
293    return sa_return_item_4(lmt_luscode_state.uccode_head, n);
294}
295
296static void tex_aux_unsave_uccodes(int gl)
297{
298    sa_restore_stack(lmt_luscode_state.uccode_head, gl);
299}
300
301static void tex_aux_initialize_uccodes(void)
302{
303    sa_tree_item item = { .int_value = UCCODEDEFAULT };
304    lmt_luscode_state.uccode_head = sa_new_tree(UCCODESTACK, 4, item);
305}
306
307static void tex_aux_dump_uccodes(dumpstream f)
308{
309    sa_dump_tree(f,lmt_luscode_state.uccode_head);
310}
311
312static void tex_aux_undump_uccodes(dumpstream f)
313{
314    lmt_luscode_state.uccode_head = sa_undump_tree(f);
315}
316
317static void tex_aux_free_uccodes(void)
318{
319    sa_destroy_tree(lmt_luscode_state.uccode_head);
320}
321
322/*tex
323
324    By now it will be no surprise that the space factors get stored in a tree.
325
326*/
327
328void tex_set_sf_code(int n, halfword v, int gl)
329{
330    sa_tree_item item = { .int_value = v };
331    sa_set_item_4(lmt_luscode_state.sfcode_head, n, item, gl);
332}
333
334halfword tex_get_sf_code(int n)
335{
336    return sa_return_item_4(lmt_luscode_state.sfcode_head, n);
337}
338
339static void tex_aux_unsave_sfcodes(int gl)
340{
341    sa_restore_stack(lmt_luscode_state.sfcode_head, gl);
342}
343
344static void tex_aux_initialize_sfcodes(void)
345{
346    sa_tree_item item = { .int_value = SFCODEDEFAULT };
347    lmt_luscode_state.sfcode_head = sa_new_tree(SFCODESTACK, 4, item);
348}
349
350static void tex_aux_dump_sfcodes(dumpstream f)
351{
352    sa_dump_tree(f, lmt_luscode_state.sfcode_head);
353}
354
355static void tex_aux_undump_sfcodes(dumpstream f)
356{
357    lmt_luscode_state.sfcode_head = sa_undump_tree(f);
358}
359
360static void tex_aux_free_sfcodes(void)
361{
362    sa_destroy_tree(lmt_luscode_state.sfcode_head);
363}
364
365/*tex
366
367    Finaly the hyphen character codes, a rather small sparse array.
368
369*/
370
371void tex_set_hc_code(int n, halfword v, int gl)
372{
373    sa_tree_item item = { .int_value = v };
374    sa_set_item_4(lmt_luscode_state.hccode_head, n, item, gl);
375}
376
377halfword tex_get_hc_code(int n)
378{
379    return sa_return_item_4(lmt_luscode_state.hccode_head, n);
380}
381
382static void tex_aux_unsave_hccodes(int gl)
383{
384    sa_restore_stack(lmt_luscode_state.hccode_head, gl);
385}
386
387static void tex_aux_initialize_hccodes(void)
388{
389    sa_tree_item item = { .int_value = HCCODEDEFAULT };
390    lmt_luscode_state.hccode_head = sa_new_tree(HCCODESTACK, 4, item);
391}
392
393static void tex_aux_dump_hccodes(dumpstream f)
394{
395    sa_dump_tree(f, lmt_luscode_state.hccode_head);
396}
397
398static void tex_aux_undump_hccodes(dumpstream f)
399{
400    lmt_luscode_state.hccode_head = sa_undump_tree(f);
401}
402
403static void tex_aux_free_hccodes(void)
404{
405    sa_destroy_tree(lmt_luscode_state.hccode_head);
406}
407
408/*tex 
409    The same is true for math hyphenation but here we have a small options set. 
410*/
411
412void tex_set_hm_code(int n, halfword v, int gl)
413{
414    sa_set_item_1(lmt_luscode_state.hmcode_head, n, v, gl);
415}
416
417halfword tex_get_hm_code(int n)
418{
419    return sa_return_item_1(lmt_luscode_state.hmcode_head, n);
420}
421
422static void tex_aux_unsave_hmcodes(int gl)
423{
424    sa_restore_stack(lmt_luscode_state.hmcode_head, gl);
425}
426
427static void tex_aux_initialize_hmcodes(void)
428{
429    sa_tree_item item = { .int_value = HMCODEDEFAULT };
430    lmt_luscode_state.hmcode_head = sa_new_tree(HMCODESTACK, 1, item);
431}
432
433static void tex_aux_dump_hmcodes(dumpstream f)
434{
435    sa_dump_tree(f, lmt_luscode_state.hmcode_head);
436}
437
438static void tex_aux_undump_hmcodes(dumpstream f)
439{
440    lmt_luscode_state.hmcode_head = sa_undump_tree(f);
441}
442
443static void tex_aux_free_hmcodes(void)
444{
445    sa_destroy_tree(lmt_luscode_state.hmcode_head);
446}
447
448/*tex Experiment. */
449
450
451void tex_set_am_code(int n, halfword v, int gl)
452{
453    sa_set_item_1(lmt_luscode_state.amcode_head, n, v, gl);
454}
455
456halfword tex_get_am_code(int n)
457{
458    return sa_return_item_1(lmt_luscode_state.amcode_head, n);
459}
460
461static void tex_aux_unsave_amcodes(int gl)
462{
463    sa_restore_stack(lmt_luscode_state.amcode_head, gl);
464}
465
466static void tex_aux_initialize_amcodes(void)
467{
468    sa_tree_item item = { .int_value = AMCODEDEFAULT };
469    lmt_luscode_state.amcode_head = sa_new_tree(AMCODESTACK, 1, item);
470}
471
472static void tex_aux_dump_amcodes(dumpstream f)
473{
474    sa_dump_tree(f, lmt_luscode_state.amcode_head);
475}
476
477static void tex_aux_undump_amcodes(dumpstream f)
478{
479    lmt_luscode_state.amcode_head = sa_undump_tree(f);
480}
481
482static void tex_aux_free_amcodes(void)
483{
484    sa_destroy_tree(lmt_luscode_state.amcode_head);
485}
486
487/*tex
488
489    The hyphenation codes are indeed stored in a tree and are used instead of lowercase codes when
490    deciding what characters to take into acccount when hyphenating. They are bound to upto
491    |HJCODE_MAX| languages. In the end I decided to put the hash pointer in the language record
492    so that we can do better lean memory management. Actually, the hjcode handling already was more
493    efficient than in \LUATEX\ because I kept track of usage and allocated (dumped) only the
494    languages that were used. A typical example of nicely cleaned up code that in the end was
495    ditched but that happens often (and of course goes unnoticed). Actually, in \CONTEXT\ we don't
496    dump language info at all, so I might as wel drop language dumping, just like fonts.
497
498*/
499
500# define HJCODESTACK   8
501# define HJCODEDEFAULT 0
502
503void tex_set_hj_code(int h, int n, halfword v, int gl)
504{
505    if (h >= 0 && h <= lmt_language_state.language_data.top) {
506        sa_tree_item item = { .int_value = HJCODEDEFAULT };
507        sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
508        if (! tree) {
509            tree = sa_new_tree(HJCODESTACK, 4, item);
510            lmt_language_state.languages[h]->hjcode_head = tree;
511        }
512        if (tree) {
513            item.int_value = (int) v;
514            sa_set_item_4(tree, n, item, gl);
515        }
516    }
517}
518
519/*tex We just return the lccodes when nothing is set. */
520
521halfword tex_get_hj_code(int h, int n)
522{
523    if (h >= 0 && h <= lmt_language_state.language_data.top) {
524        sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
525        if (! tree) {
526            tree = lmt_luscode_state.lccode_head;
527        }
528        return sa_return_item_4(tree, n);
529    } else {
530        return 0;
531    }
532}
533
534void tex_dump_language_hj_codes(dumpstream f, int h)
535{
536    if (h >= 0 && h <= lmt_language_state.language_data.top) {
537        sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
538        if (tree) {
539            dump_via_int(f, 1);
540            sa_dump_tree(f, tree);
541        } else {
542            dump_via_int(f, 0);
543        }
544    } else {
545       /* error */
546    }
547}
548
549void tex_undump_language_hj_codes(dumpstream f, int h)
550{
551    if (h >= 0 && h <= lmt_language_state.language_data.top) {
552        int x;
553        undump_int(f, x);
554        if (x) {
555            sa_free_array(lmt_language_state.languages[h]->hjcode_head);
556            lmt_language_state.languages[h]->hjcode_head = sa_undump_tree(f);
557        } else {
558            lmt_language_state.languages[h]->hjcode_head = NULL;
559        }
560    } else {
561       /* error */
562    }
563}
564
565void tex_hj_codes_from_lc_codes(int h)
566{
567    if (h >= 0 && h <= lmt_language_state.language_data.top) {
568        sa_tree tree = lmt_language_state.languages[h]->hjcode_head;
569        if (tree) {
570            sa_destroy_tree(tree);
571        }
572        tree = sa_copy_tree(lmt_luscode_state.lccode_head);
573        lmt_language_state.languages[h]->hjcode_head = tree ? tree : NULL;
574    }
575}
576
577/*tex The public management functions. */
578
579void tex_unsave_text_codes(int grouplevel)
580{
581    tex_aux_unsave_lccodes(grouplevel);
582    tex_aux_unsave_uccodes(grouplevel);
583    tex_aux_unsave_sfcodes(grouplevel);
584    tex_aux_unsave_hccodes(grouplevel);
585    tex_aux_unsave_hmcodes(grouplevel);
586    tex_aux_unsave_amcodes(grouplevel);
587}
588
589void tex_initialize_text_codes(void)
590{
591    tex_aux_initialize_catcodes();
592    tex_aux_initialize_lccodes();
593    tex_aux_initialize_uccodes();
594    tex_aux_initialize_sfcodes();
595    tex_aux_initialize_hccodes();
596    tex_aux_initialize_hmcodes();
597    tex_aux_initialize_amcodes();
598 /* initializehjcodes(); */
599}
600
601void tex_free_text_codes(void)
602{
603    tex_aux_free_catcodes();
604    tex_aux_free_lccodes();
605    tex_aux_free_uccodes();
606    tex_aux_free_sfcodes();
607    tex_aux_free_hccodes();
608    tex_aux_free_hmcodes();
609    tex_aux_free_amcodes();
610 /* freehjcodes(); */
611}
612
613void tex_dump_text_codes(dumpstream f)
614{
615    tex_aux_dump_catcodes(f);
616    tex_aux_dump_lccodes(f);
617    tex_aux_dump_uccodes(f);
618    tex_aux_dump_sfcodes(f);
619    tex_aux_dump_hccodes(f);
620    tex_aux_dump_hmcodes(f);
621    tex_aux_dump_amcodes(f);
622 /* dumphjcodes(f); */
623}
624
625void tex_undump_text_codes(dumpstream f)
626{
627    tex_aux_undump_catcodes(f);
628    tex_aux_undump_lccodes(f);
629    tex_aux_undump_uccodes(f);
630    tex_aux_undump_sfcodes(f);
631    tex_aux_undump_hccodes(f);
632    tex_aux_undump_hmcodes(f);
633    tex_aux_undump_amcodes(f);
634 /* undumphjcodes(f); */
635}
636
637void tex_initialize_xx_codes(void)
638{
639    /*tex We're compatible. */
640    for (int u = 'A'; u <= 'Z'; u++) {
641        int l = u + 32;
642        tex_set_lc_code(u, l, level_one);
643        tex_set_lc_code(l, l, level_one);
644        tex_set_uc_code(u, u, level_one);
645        tex_set_uc_code(l, u, level_one);
646        tex_set_sf_code(u, 999, level_one);
647    }
648    /*tex A good start but not compatible. */
649 /* set_hc_code(0x002D, 0x002D, level_one); */
650 /* set_hc_code(0x2010, 0x2010, level_one); */
651}
652
653void tex_run_case_shift(halfword code)
654{
655    int upper = code == upper_case_code;
656    halfword l = tex_scan_toks_normal(0, NULL);
657    halfword p = token_link(l);
658    while (p) {
659        halfword t = token_info(p);
660        if (t < cs_token_flag) {
661            halfword c = t % cs_offset_value;
662            halfword i = upper ? tex_get_uc_code(c) : tex_get_lc_code(c);
663            if (i) {
664                set_token_info(p, t - c + i);
665            }
666        } else if (tex_is_active_cs(cs_text(t - cs_token_flag))) {
667            halfword c = active_cs_value(cs_text(t - cs_token_flag));
668            halfword i = upper ? tex_get_uc_code(c) : tex_get_lc_code(c);
669            if (i) {
670                set_token_info(p, tex_active_to_cs(i, 1) + cs_token_flag);
671            }
672        }
673        p = token_link(p);
674    }
675    if (token_link(l)) {
676        tex_begin_backed_up_list(token_link(l));
677    }
678    tex_put_available_token(l);
679}