texexpand.c /size: 85 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex
8
9    Only a dozen or so command codes |> max_command| can possibly be returned by |get_next|; in
10    increasing order, they are |undefined_cs|, |expand_after|, |no_expand|, |input|, |if_test|,
11    |fi_or_else|, |cs_name|, |convert|, |the|, |get_mark|, |call|, |long_call|, |outer_call|,
12    |long_outer_call|, and |end_template|.
13
14    Sometimes, recursive calls to the following |expand| routine may cause exhaustion of the
15    run-time calling stack, resulting in forced execution stops by the operating system. To
16    diminish the chance of this happening, a counter is used to keep track of the recursion depth,
17    in conjunction with a constant called |expand_depth|.
18
19    Note that this does not catch all possible infinite recursion loops, just the ones that
20    exhaust the application calling stack. The actual maximum value of |expand_depth| is outside
21    of our control, but the initial setting of |100| should be enough to prevent problems.
22
23*/
24
25expand_state_info lmt_expand_state = {
26    .limits           = {
27        .minimum = min_expand_depth,
28        .maximum = max_expand_depth,
29        .size    = min_expand_depth,
30        .top     = 0,
31    },
32    .depth            = 0,
33    .cs_name_level    = 0,
34    .arguments        = 0,
35    .match_token_head = null,
36    .padding          = 0,
37};
38
39       static void tex_aux_macro_call                (halfword cs, halfword cmd, halfword chr);
40static inline void tex_aux_manufacture_csname        (void);
41static inline void tex_aux_manufacture_csname_use    (void);
42static inline void tex_aux_manufacture_csname_future (void);
43static inline void tex_aux_inject_last_tested_cs     (void);
44
45/*tex
46
47    We no longer store |match_token_head| in the format file. It is a bit cleaner to just
48    initialize them. So we free them.
49
50*/
51
52void tex_initialize_expansion(void)
53{
54    lmt_expand_state.match_token_head = tex_get_available_token(null);
55}
56
57void tex_cleanup_expansion(void)
58{
59    tex_put_available_token(lmt_expand_state.match_token_head);
60}
61
62halfword tex_expand_match_token_head(void)
63{
64    return lmt_expand_state.match_token_head;
65}
66
67/*tex
68
69    The |expand| subroutine is used when |cur_cmd > max_command|. It removes a \quote {call} or a
70    conditional or one of the other special operations just listed. It follows that |expand| might
71    invoke itself recursively. In all cases, |expand| destroys the current token, but it sets things
72    up so that the next |get_next| will deliver the appropriate next token. The value of |cur_tok|
73    need not be known when |expand| is called.
74
75    Since several of the basic scanning routines communicate via global variables, their values are
76    saved as local variables of |expand| so that recursive calls don't invalidate them.
77
78*/
79
80static inline void tex_aux_expand_after(void)
81{
82    /*tex
83        Expand the token after the next token. It takes only a little shuffling to do what \TEX\
84        calls |\expandafter|.
85    */
86    halfword t1 = tex_get_token();
87    halfword t2 = tex_get_token();
88    if (cur_cmd > max_command_cmd) {
89        tex_expand_current_token();
90    } else {
91        tex_back_input(t2);
92       /* token_link(t1) = t2; */ /* no gain, rarely happens */
93    }
94    tex_back_input(t1);
95}
96
97static inline void tex_aux_expand_toks_after(void)
98{
99    halfword t1 = tex_scan_toks_normal(0, NULL);
100    halfword l1 = token_link(t1);
101    if (l1) {
102        halfword t2 = tex_get_token();
103        if (cur_cmd > max_command_cmd) {
104            tex_expand_current_token();
105        } else {
106            tex_back_input(t2);
107        }
108        tex_begin_backed_up_list(l1);
109    }
110    tex_put_available_token(t1);
111}
112
113/*tex
114    Here we deal with stuff not in the big switch. Where that is discussed there is mentioning of
115    it all being a bit messy, also due to the fact that that switch (or actually a lookup table)
116    also uses the mode for determining what to do. We see no reason to change this model.
117*/
118
119void tex_inject_parameter(halfword n)
120{
121    if (n >= 0 && n < lmt_input_state.parameter_stack_data.ptr) {
122        halfword p = lmt_input_state.parameter_stack[n];
123        if (p) {
124            tex_begin_parameter_list(p);
125        }
126    }
127}
128
129void tex_expand_current_token(void)
130{
131    ++lmt_expand_state.depth;
132    if (lmt_expand_state.depth > lmt_expand_state.limits.top) {
133        if (lmt_expand_state.depth >= lmt_expand_state.limits.size) {
134            tex_overflow_error("expansion depth", lmt_expand_state.limits.size);
135        } else {
136            lmt_expand_state.limits.top += 1;
137        }
138    }
139    /*tex We're okay. */
140    {
141        halfword saved_cur_val = cur_val;
142        halfword saved_cur_val_level = cur_val_level;
143     // halfword saved_head = token_link(token_data.backup_head);
144        if (cur_cmd < first_call_cmd) {
145            /*tex Expand a nonmacro. */
146            halfword code = cur_chr;
147            if (tracing_commands_par > 1) {
148                tex_show_cmd_chr(cur_cmd, cur_chr);
149            }
150            switch (cur_cmd) {
151                case expand_after_cmd:
152                    switch (code) {
153                        case expand_after_code:
154                            tex_aux_expand_after();
155                            break;
156                        /*
157                        case expand_after_3_code:
158                            tex_aux_expand_after();
159                            // fall-through
160                        case expand_after_2_code:
161                            tex_aux_expand_after();
162                            tex_aux_expand_after();
163                            break;
164                        */
165                        case expand_unless_code:
166                            tex_conditional_unless();
167                            break;
168                        case future_expand_code:
169                            /*tex
170                                This is an experiment: |\futureexpand| (2) which takes |\check \yes
171                                \nop| as arguments. It's not faster, but gives less tracing noise
172                                than a macro. The variant |\futureexpandis| (3) alternative doesn't
173                                inject the gobbles space(s).
174                            */
175                            tex_get_token();
176                            {
177                                halfword spa = null;
178                                halfword chr = cur_chr;
179                                halfword cmd = cur_cmd;
180                                halfword yes = tex_get_token(); /* when match */
181                                halfword nop = tex_get_token(); /* when no match */
182                                while (1) {
183                                    halfword t = tex_get_token();
184                                    if (cur_cmd == spacer_cmd) {
185                                        spa = t;
186                                    } else {
187                                        tex_back_input(t);
188                                        break;
189                                    }
190                                }
191                                /*tex The value 1 means: same input level. */
192                                if (cur_cmd == cmd && cur_chr == chr) {
193                                    tex_reinsert_token(yes);
194                                } else {
195                                    if (spa) {
196                                        tex_reinsert_token(space_token);
197                                    }
198                                    tex_reinsert_token(nop);
199                                }
200                            }
201                            break;
202                        case future_expand_is_code:
203                            tex_get_token();
204                            {
205                                halfword chr = cur_chr;
206                                halfword cmd = cur_cmd;
207                                halfword yes = tex_get_token(); /* when match */
208                                halfword nop = tex_get_token(); /* when no match */
209                                while (1) {
210                                    halfword t = tex_get_token();
211                                    if (cur_cmd != spacer_cmd) {
212                                        tex_back_input(t);
213                                        break;
214                                    }
215                                }
216                                tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
217                            }
218                            break;
219                        case future_expand_is_ap_code:
220                            tex_get_token();
221                            {
222                                halfword chr = cur_chr;
223                                halfword cmd = cur_cmd;
224                                halfword yes = tex_get_token(); /* when match */
225                                halfword nop = tex_get_token(); /* when no match */
226                                while (1) {
227                                    halfword t = tex_get_token();
228                                    if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
229                                        tex_back_input(t);
230                                        break;
231                                    }
232                                }
233                                /*tex We stay at the same input level. */
234                                tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
235                            }
236                            break;
237                        case expand_after_spaces_code:
238                            {
239                                /* maybe two variants: after_spaces and after_par like in the ignores */
240                                halfword t1 = tex_get_token();
241                                while (1) {
242                                    halfword t2 = tex_get_token();
243                                    if (cur_cmd != spacer_cmd) {
244                                        tex_back_input(t2);
245                                        break;
246                                    }
247                                }
248                                tex_reinsert_token(t1);
249                                break;
250                            }
251                        case expand_after_pars_code:
252                            {
253                                halfword t1 = tex_get_token();
254                                while (1) {
255                                    halfword t2 = tex_get_token();
256                                    if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
257                                        tex_back_input(t2);
258                                        break;
259                                    }
260                                }
261                                tex_reinsert_token(t1);
262                                break;
263                            }
264                        case expand_token_code:
265                            {
266                                /* we can share code with lmtokenlib .. todo */
267                                halfword cat = tex_scan_category_code(0);
268                                halfword chr = tex_scan_char_number(0);
269                                /* too fragile:
270                                    halfword tok = null;
271                                    switch (cat) {
272                                        case letter_cmd:
273                                        case other_char_cmd:
274                                        case ignore_cmd:
275                                        case spacer_cmd:
276                                            tok = token_val(cat, chr);
277                                            break;
278                                        case active_char_cmd:
279                                            {
280                                                halfword cs = tex_active_to_cs(chr, ! lmt_hash_state.no_new_cs);
281                                                if (cs) {
282                                                    chr = eq_value(cs);
283                                                    tok = cs_token_flag + cs;
284                                                    break;
285                                                }
286                                            }
287                                        default:
288                                            tok = token_val(other_char_cmd, chr);
289                                            break;
290                                    }
291                                */
292                                switch (cat) {
293                                    case letter_cmd:
294                                    case other_char_cmd:
295                                    case ignore_cmd:
296                                    case spacer_cmd:
297                                        break;
298                                    default:
299                                        cat = other_char_cmd;
300                                        break;
301                                }
302                                tex_back_input(token_val(cat, chr));
303                                break;
304                            }
305                        case expand_cs_token_code:
306                            {
307                                tex_get_token();
308                                if (cur_tok >= cs_token_flag) {
309                                    halfword cmd = eq_type(cur_cs);
310                                    switch (cmd) {
311                                        case left_brace_cmd:
312                                        case right_brace_cmd:
313                                        case math_shift_cmd:
314                                        case alignment_tab_cmd:
315                                        case superscript_cmd:
316                                        case subscript_cmd:
317                                        case spacer_cmd:
318                                        case letter_cmd:
319                                        case other_char_cmd:
320                                        case active_char_cmd: /* new */
321                                            cur_tok = token_val(cmd, eq_value(cur_cs));
322                                            break;
323                                    }
324                                }
325                                tex_back_input(cur_tok);
326                                break;
327                            }
328                        case expand_code:
329                            {
330                                /*tex
331                                    These can be used instead of |\the<tok register [ref]>| but
332                                    that next token is not expanded so it doesn't accept |\if|.
333                                */
334                                tex_get_token();
335                                switch (cur_cmd) {
336                                    case call_cmd:
337                                    case protected_call_cmd:
338                                    case semi_protected_call_cmd:
339                                    case constant_call_cmd:
340                                    case tolerant_call_cmd:
341                                    case tolerant_protected_call_cmd:
342                                    case tolerant_semi_protected_call_cmd:
343                                        tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
344                                        break;
345                                    case internal_toks_reference_cmd:
346                                    case register_toks_reference_cmd:
347                                        if (cur_chr) {
348                                            tex_begin_token_list(cur_chr, token_text);
349                                        }
350                                        break;
351                                    case register_cmd:
352                                        if (cur_chr == token_val_level) {
353                                            halfword n = tex_scan_toks_register_number();
354                                            halfword p = eq_value(register_toks_location(n));
355                                            if (p) {
356                                                tex_begin_token_list(p, token_text);
357                                            }
358                                        } else {
359                                            tex_back_input(cur_tok);
360                                        }
361                                        break;
362                                    case internal_toks_cmd:
363                                    case register_toks_cmd:
364                                        {
365                                            halfword p = eq_value(cur_chr);
366                                            if (p) {
367                                                tex_begin_token_list(p, token_text);
368                                            }
369                                        }
370                                        break;
371                                    case index_cmd:
372                                        tex_inject_parameter(cur_chr);
373                                        break;
374                                    case case_shift_cmd:
375                                        tex_run_case_shift(cur_chr);
376                                        break;
377                                    default:
378                                        /* Use expand_current_token so that protected lua call are dealt with too? */
379                                        tex_back_input(cur_tok);
380                                        break;
381                                }
382                                break;
383                            }
384                        case expand_toks_code:
385                            {
386                                /*tex
387                                    These can be used instead of |\the<tok register [ref]>| and
388                                    contrary to above here the next token is expanded so it works
389                                    with a following |\if|.
390                                */
391                                tex_get_x_token();
392                                switch (cur_cmd) {
393                                    case internal_toks_reference_cmd:
394                                    case register_toks_reference_cmd:
395                                        if (cur_chr) {
396                                            tex_begin_token_list(cur_chr, token_text);
397                                        }
398                                        break;
399                                    case register_cmd:
400                                        if (cur_chr == token_val_level) {
401                                            halfword n = tex_scan_toks_register_number();
402                                            halfword p = eq_value(register_toks_location(n));
403                                            if (p) {
404                                                tex_begin_token_list(p, token_text);
405                                            }
406                                        } else {
407                                            tex_back_input(cur_tok);
408                                        }
409                                        break;
410                                    case internal_toks_cmd:
411                                    case register_toks_cmd:
412                                        {
413                                            halfword p = eq_value(cur_chr);
414                                            if (p) {
415                                                tex_begin_token_list(p, token_text);
416                                            }
417                                        }
418                                        break;
419                                    default:
420                                        /* Issue an error message? */
421                                        tex_back_input(cur_tok);
422                                        break;
423                                }
424                                break;
425                            }
426                        case expand_active_code:
427                            {
428                                tex_get_token();
429                                if (cur_cmd == active_char_cmd) {
430                                    cur_cs = tex_active_to_cs(cur_chr, ! lmt_hash_state.no_new_cs);
431                                    if (cur_cs) {
432                                        cur_tok = cs_token_flag + cur_cs;
433                                    } else {
434                                        cur_tok = token_val(cur_cmd, cur_chr);
435                                    }
436                                }
437                                tex_back_input(cur_tok);
438                                break;
439                            }
440                        case expand_semi_code:
441                            {
442                                tex_get_token();
443                                switch (cur_cmd) {
444                                    case semi_protected_call_cmd:
445                                    case tolerant_semi_protected_call_cmd:
446                                        tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
447                                        break;
448                                    case lua_semi_protected_call_cmd:
449                                        tex_aux_lua_call(cur_cmd, cur_chr);
450                                        break;
451                                    default:
452                                        tex_back_input(cur_tok);
453                                        break;
454                                }
455                                break;
456                            }
457                        case expand_after_toks_code:
458                            {
459                                tex_aux_expand_toks_after();
460                                break;
461                            }
462                        case expand_parameter_code:
463                            {
464                                halfword n = tex_scan_integer(0, NULL, NULL);
465                                if (n >= 0 && n < lmt_input_state.parameter_stack_data.ptr) {
466                                    halfword p = lmt_input_state.parameter_stack[n];
467                                    if (p) {
468                                        tex_begin_parameter_list(p);
469                                    }
470                                }
471                                break;
472                            }
473                        /* keep as reference */ /*
474                        case expand_after_fi_code:
475                            {
476                                tex_conditional_after_fi();
477                                break;
478                            }
479                        */
480                    }
481                    break;
482                case cs_name_cmd:
483                    /*tex Manufacture a control sequence name. */
484                    switch (code) {
485                        case cs_name_code:
486                            tex_aux_manufacture_csname();
487                            break;
488                        case last_named_cs_code:
489                            tex_aux_inject_last_tested_cs();
490                            break;
491                        case begin_cs_name_code:
492                            tex_aux_manufacture_csname_use();
493                            break;
494                        case future_cs_name_code:
495                            tex_aux_manufacture_csname_future();
496                            break;
497                    }
498                    break;
499                case no_expand_cmd:
500                    {
501                        /*tex
502                            Suppress expansion of the next token. The implementation of |\noexpand|
503                            is a bit trickier, because it is necessary to insert a special
504                            |dont_expand| marker into \TEX's reading mechanism. This special marker
505                            is processed by |get_next|, but it does not slow down the inner loop.
506
507                            Since |\outer| macros might arise here, we must also clear the
508                            |scanner_status| temporarily.
509                        */
510                        halfword t;
511//                        halfword save_scanner_status = lmt_input_state.scanner_status;
512//                        lmt_input_state.scanner_status = scanner_is_normal;
513                        t = tex_get_token();
514//                        lmt_input_state.scanner_status = save_scanner_status;
515                        tex_back_input(t);
516                        /*tex Now |start| and |loc| point to the backed-up token |t|. */
517                        if (t >= cs_token_flag) {
518                            halfword p = tex_get_available_token(deep_frozen_dont_expand_token);
519                            set_token_link(p, lmt_input_state.cur_input.loc);
520                            lmt_input_state.cur_input.start = p;
521                            lmt_input_state.cur_input.loc = p;
522                        }
523                    }
524                    break;
525                case if_test_cmd:
526                    if (code < first_real_if_test_code) {
527                        tex_conditional_fi_or_else();
528                    } else if (code != if_condition_code) {
529                        tex_conditional_if(code, 0);
530                    } else {
531                        /*tex The |\ifcondition| primitive is a no-op unless we're in skipping mode. */
532                    }
533                    break;
534                case the_cmd:
535                    {
536                        halfword h = tex_the_toks(code, NULL);
537                        if (h) {
538                            tex_begin_inserted_list(h);
539                        }
540                        break;
541                    }
542                case lua_call_cmd:
543                    if (code > 0) {
544                        strnumber u = tex_save_cur_string();
545                        lmt_token_state.luacstrings = 0;
546                        lmt_function_call(code, 0);
547                        tex_restore_cur_string(u);
548                        if (lmt_token_state.luacstrings > 0) {
549                            tex_lua_string_start();
550                        }
551                    } else {
552                        tex_normal_error("luacall", "invalid number in expansion");
553                    }
554                    break;
555                case lua_local_call_cmd:
556                    if (code > 0) {
557                        lua_State *L = lmt_lua_state.lua_instance;
558                        strnumber u = tex_save_cur_string();
559                        lmt_token_state.luacstrings = 0;
560                        /* todo: use a private table as we can overflow, unless we register early */
561                        lua_rawgeti(L, LUA_REGISTRYINDEX, code);
562                        if (lua_pcall(L, 0, 0, 0)) {
563                            tex_formatted_warning("luacall", "local call error: %s", lua_tostring(L, -1));
564                        } else {
565                            tex_restore_cur_string(u);
566                            if (lmt_token_state.luacstrings > 0) {
567                                tex_lua_string_start();
568                            }
569                        }
570                    } else {
571                        tex_normal_error("luacall", "invalid local number in expansion");
572                    }
573                    break;
574                case begin_local_cmd:
575                    tex_begin_local_control();
576                    break;
577                case convert_cmd:
578                    tex_run_convert_tokens(code);
579                    break;
580                case input_cmd:
581                    /*tex Initiate or terminate input from a file */
582                    switch (code) {
583                        case normal_input_code:
584                        case eof_input_code:
585                            if (lmt_fileio_state.name_in_progress) {
586                                tex_insert_relax_and_cur_cs();
587                            } else if (code == normal_input_code) {
588                                tex_start_input(tex_read_file_name(0, NULL, texinput_extension), null);
589                            } else {
590                                halfword t = tex_scan_toks_normal(0, NULL);
591                                tex_start_input(tex_read_file_name(0, NULL, texinput_extension), t);
592                            }
593                            break;
594                        case end_of_input_code:
595                            lmt_token_state.force_eof = 1;
596                            break;
597                        case quit_loop_code:
598                            lmt_main_control_state.quit_loop = 1;
599                            break;
600                        case quit_loop_now_code:
601                            if (lmt_main_control_state.loop_nesting) {
602                                while (1) {
603                                    tex_get_token();
604                                    if (cur_cmd == end_local_cmd) {
605                                        lmt_main_control_state.quit_loop = 1;
606                                        tex_back_input(cur_tok);
607                                        break;
608                                    }
609                                }
610                            } else {
611                                /*tex We're not in a loop and end up at some fuzzy error. */
612                            }
613                            break;
614                     /* case quit_fi_now_code: */ /*tex |\if ... \quitfinow\ignorerest \else .. \fi| */
615                     /*     tex_quit_fi();     */
616                     /*     break;             */
617                        case token_input_code:
618                            tex_tex_string_start(io_token_eof_input_code, cat_code_table_par);
619                            break;
620                        case tex_token_input_code:
621                            tex_tex_string_start(io_token_input_code, cat_code_table_par);
622                            break;
623                        case tokenized_code:
624                        case retokenized_code:
625                            {
626                                /*tex
627                                    This variant complements the other expandable primitives but
628                                    also supports an optional keyword, who knows when that comes in
629                                    handy; what goes in is detokenized anyway. For now it is an
630                                    undocumented feature. It is likely that there is a |cct| passed
631                                    so we don't need to optimize. If needed we can make a version
632                                    where this is mandate.
633                                */
634                                int cattable = (code == retokenized_code || tex_scan_optional_keyword("catcodetable")) ? tex_scan_integer(0, NULL, NULL) : cat_code_table_par; /* no _optional_ here ? */
635                                full_scanner_status saved_full_status = tex_save_full_scanner_status();
636                                strnumber u = tex_save_cur_string();
637                                halfword s = tex_scan_toks_expand(0, NULL, 0, 0);
638                                tex_unsave_full_scanner_status(saved_full_status);
639                                if (token_link(s)) {
640                                     tex_begin_inserted_list(tex_wrapped_token_list(s));
641                                     tex_tex_string_start(io_token_input_code, cattable);
642                                }
643                                tex_put_available_token(s);
644                                tex_restore_cur_string(u);
645                            }
646                            break;
647                        default:
648                            break;
649                    }
650                    break;
651                case get_mark_cmd:
652                    {
653                        /*tex Insert the appropriate mark text into the scanner. */
654                        halfword num = 0;
655                        switch (code) {
656                            case top_marks_code:
657                            case first_marks_code:
658                            case bot_marks_code:
659                            case split_first_marks_code:
660                            case split_bot_marks_code:
661                            case current_marks_code:
662                                num = tex_scan_mark_number();
663                                break;
664                        }
665                        if (tex_valid_mark(num)) {
666                            halfword ptr = tex_get_some_mark(code, num);
667                            if (ptr) {
668                                tex_begin_token_list(ptr, mark_text);
669                            }
670                        }
671                        break;
672                    }
673                case index_cmd: /* not needed here */
674                    tex_inject_parameter(code);
675                    break;
676                default:
677                    /* Maybe ... or maybe an option */
678                 // if (lmt_expand_state.cs_name_level == 0) {
679                        if (tex_cs_state(cur_cs) == cs_undefined_error) {
680                            /*tex Complain about an undefined macro */
681                            tex_handle_error(
682                                normal_error_type,
683                             // "Undefined control sequence %m", cur_cs,
684                                "Undefined control sequence",
685                                "The control sequence at the end of the top line of your error message was never\n"
686                                "\\def'ed. You can just continue as I'll forget about whatever was undefined."
687                            );
688                        } else {
689                            /*tex We ended up in a situation that is unlikely to happen in traditional \TEX. */
690                            tex_handle_error(
691                                normal_error_type,
692                                "Control sequence expected instead of %C", cur_cmd, code,
693                                "You injected something that confused the parser, maybe by using some Lua call."
694                            );
695                        }
696                 // }
697                    break;
698            }
699        } else if (cur_cmd <= last_call_cmd) {
700             tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
701        } else {
702            /*tex
703                Insert a token containing |frozen_endv|. An |end_template| command is effectively
704                changed to an |endv| command by the following code. (The reason for this is discussed
705                below; the |frozen_end_template| at the end of the template has passed the
706                |check_outer_validity| test, so its mission of error detection has been accomplished.)
707            */
708         // tex_back_input(deep_frozen_end_template_2_token); /* we never come here */
709            tex_back_input(deep_frozen_end_template_token); /* we never come here */
710        }
711        cur_val = saved_cur_val;
712        cur_val_level = saved_cur_val_level;
713     // set_token_link(token_data.backup_head, saved_head);
714    }
715    --lmt_expand_state.depth;
716}
717
718static void tex_aux_complain_missing_csname(void)
719{
720    tex_handle_error(
721        back_error_type,
722        "Missing \\endcsname inserted",
723        "The control sequence marked <to be read again> should not appear between \\csname\n"
724        "and \\endcsname."
725    );
726}
727
728// static inline int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
729// {
730//     if (c <= 0x7F) {
731//         b[m++] = (unsigned char) c;
732//     } else if (c <= 0x7FF) {
733//         b[m++] = (unsigned char) (0xC0 + c / 0x40);
734//         b[m++] = (unsigned char) (0x80 + c % 0x40);
735//     } else if (c <= 0xFFFF) {
736//         b[m++] = (unsigned char) (0xE0 +  c / 0x1000);
737//         b[m++] = (unsigned char) (0x80 + (c % 0x1000) / 0x40);
738//         b[m++] = (unsigned char) (0x80 + (c % 0x1000) % 0x40);
739//     } else {
740//         b[m++] = (unsigned char) (0xF0 +   c / 0x40000);
741//         b[m++] = (unsigned char) (0x80 + ( c % 0x40000) / 0x1000);
742//         b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) / 0x40);
743//         b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) % 0x40);
744//     }
745//     return m;
746// }
747
748// static inline int tex_aux_chr_to_buffer(unsigned char *b, int m, int c)
749// {
750//     b[m++] = (unsigned char) c;
751//     return m;
752// }
753
754static inline int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
755{
756    if (c <= 0x7F) {
757        b[m++] = (unsigned char) c;
758    } else if (c <= 0x7FF) {
759        b[m++] = (unsigned char) (0xC0 | (c >> 6));
760        b[m++] = (unsigned char) (0x80 | (c & 0x3F));
761    } else if (c <= 0xFFFF) {
762        b[m++] = (unsigned char) (0xE0 | (c >> 12));
763        b[m++] = (unsigned char) (0x80 | ((c >> 6) & 0x3F));
764        b[m++] = (unsigned char) (0x80 | (c & 0x3F));
765    } else {
766        int u;
767        c -= 0x10000;
768        u = (int) (((c & 0xf0000) >> 16) + 1);
769        b[m++] = (unsigned char) (0xF0 | (u >> 2));
770        b[m++] = (unsigned char) (0x80 | ((u & 3) << 4) | ((c & 0xF000) >> 12));
771        b[m++] = (unsigned char) (0x80 | ((c & 0xFC0) >> 6));;
772        b[m++] = (unsigned char) (0x80 | (c & 0x3F));;
773    }
774    return m;
775}
776
777/*tex
778    We also quit on a protected macro call, which is different from \LUATEX\ (and \PDFTEX) but makes
779    much sense. It also long token lists that never (should) match anyway.
780*/
781
782static int tex_aux_collect_cs_tokens(halfword *p, int *n)
783{
784    while (1) {
785        tex_get_next();
786        switch (cur_cmd) {
787            case left_brace_cmd:
788            case right_brace_cmd:
789            case math_shift_cmd:
790            case alignment_tab_cmd:
791         /* case end_line_cmd: */
792            case parameter_cmd:
793            case superscript_cmd:
794            case subscript_cmd:
795         /* case ignore_cmd: */
796            case spacer_cmd:
797            case letter_cmd:
798            case other_char_cmd:
799            case active_char_cmd: /* new, here we don't expand */
800                 *p = tex_store_new_token(*p, token_val(cur_cmd, cur_chr));
801                 *n += 1;
802                 break;
803         /* case comment_cmd: */
804         /* case invalid_char_cmd: */
805         /*      break; */
806            case call_cmd:
807            case tolerant_call_cmd:
808                tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
809                break;
810            case constant_call_cmd:
811                {
812                    halfword h = token_link(cur_chr);
813                    if (h) {
814                        if (token_link(h)) {
815                            if (cur_chr > max_data_value) {
816                                 while (h) {
817                                     *p = tex_store_new_token(*p, token_info(h));
818                                     h = token_link(h);
819                                     *n += 1;
820                                 }
821                            } else {
822                                *p = tex_store_new_token(*p, token_val(deep_frozen_keep_constant_cmd, cur_chr));
823                                *n += 1;
824                            }
825                        } else {
826                            *p = tex_store_new_token(*p, token_info(h));
827                            *n += 1;
828                        }
829                    }
830                }
831                break;
832            case end_cs_name_cmd:
833                return 1;
834            case convert_cmd:
835                if (cur_chr == cs_lastname_code) {
836                    if (lmt_scanner_state.last_cs_name != null_cs) {
837                        /*tex We cheat and abuse the |convert_cmd| as carrier for the current string. */
838                        *n += (int) str_length(cs_text(lmt_scanner_state.last_cs_name));
839                        cur_chr = cs_text(lmt_scanner_state.last_cs_name) - cs_offset_value + 0xFF;
840                        *p = tex_store_new_token(*p, token_val(cur_cmd, cur_chr));
841                    }
842                    break;
843                }
844            default:
845                if (cur_cmd > max_command_cmd && cur_cmd < first_call_cmd) {
846                    tex_expand_current_token();
847                } else {
848                    return 0;
849                }
850         }
851     }
852}
853
854/* why do we use different methods here */
855
856static inline halfword tex_aux_cs_tokens_to_string(halfword h, halfword f)
857{
858    int m = f;
859    halfword l = token_link(h);
860    while (l) {
861        halfword info = token_info(l);
862        if (token_cmd(info) == deep_frozen_keep_constant_cmd) {
863            halfword h = token_link(token_chr(info));
864            while (h) {
865                m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(token_info(h)));
866                h = token_link(h);
867            }
868        } else if (token_cmd(info) == convert_cmd) {
869         // if (token_chr(info) >= 0xFF) {
870                /*tex We know that we have something here. */
871                strnumber t = token_chr(info) + cs_offset_value - 0xFF;
872                memcpy(lmt_fileio_state.io_buffer + m,  str_string(t), str_length(t));
873                m += (int) str_length(t);
874         // }
875        } else {
876            m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(info));
877        }
878        l = token_link(l);
879    }
880    return m;
881}
882
883int tex_is_valid_csname(void)
884{
885    halfword cs = null_cs;
886    halfword h = tex_get_available_token(null);
887    halfword p = h;
888    int b = 0;
889    int n = 0;
890    lmt_expand_state.cs_name_level += 1;
891    if (! tex_aux_collect_cs_tokens(&p, &n)) {
892         /*tex We seldom end up here so there is no gain in optimizing. */
893     //  if (1) {
894     //      int level = 1;
895     //      while (level) {
896     //          tex_get_next();
897     //          switch (cur_cmd) {
898     //              case end_cs_name_cmd:
899     //                  level--;
900     //                  break;
901     //              case cs_name_cmd:
902     //                  level++;
903     //                  break;
904     //              case if_test_cmd:
905     //                  if (cur_chr == if_csname_code) {
906     //                      level++;
907     //                  }
908     //                  break;
909     //          }
910     //      }
911     //  } else {
912            do {
913                tex_get_x_or_protected(); /* we skip unprotected ! */
914            } while (cur_cmd != end_cs_name_cmd);
915     // }
916    } else if (n) {
917        /*tex Look up the characters of list |n| in the hash table, and set |cur_cs|. */
918        int f = lmt_fileio_state.io_first;
919        if (tex_room_in_buffer(f + n * 4)) {
920            int m = tex_aux_cs_tokens_to_string(h, f);
921            cs = tex_id_locate_only(f, m - f);
922            b = (cs != undefined_control_sequence) && (eq_type(cs) != undefined_cs_cmd);
923        }
924    } else {
925        /*tex Safeguard in case we accidentally redefined |null_cs|. */
926     // copy_eqtb_entry(null_cs, undefined_control_sequence);
927    }
928    tex_flush_token_list_head_tail(h, p, n + 1);
929    lmt_scanner_state.last_cs_name = cs;
930    lmt_expand_state.cs_name_level -= 1;
931    cur_cs = cs;
932    return b;
933}
934
935static inline halfword tex_aux_get_cs_name(void)
936{
937    halfword h = tex_get_available_token(null); /* hm */
938    halfword p = h;
939    int n = 0;
940    lmt_expand_state.cs_name_level += 1;
941    if (tex_aux_collect_cs_tokens(&p, &n)) {
942        /*tex
943            Here we have to make a choice wrt duplicating hashes. In pdftex the hashes are
944            duplicated when we csname a meaning of a macro with |#1| and |##1| or just |##|
945            but in the token list these are actually references of single hashes. Therefore
946            we do as in luatex: we go single hash. In the end it doesn't matter much as such
947            weird control sequences are less likely to happen than embedded hashes (with
948            catcode parameter) so single is then more natural.
949        */
950        int f = lmt_fileio_state.io_first;
951        if (n && tex_room_in_buffer(f + n * 4)) {
952            int m = tex_aux_cs_tokens_to_string(h, f);
953            cur_cs = tex_id_locate(f, m - f, 1);
954        } else {
955            cur_cs = null_cs;
956        }
957    } else {
958        tex_aux_complain_missing_csname();
959    }
960    lmt_scanner_state.last_cs_name = cur_cs;
961    lmt_expand_state.cs_name_level -= 1;
962    tex_flush_token_list_head_tail(h, p, n);
963    return cur_cs;
964}
965
966static inline void tex_aux_manufacture_csname(void)
967{
968    halfword cs = tex_aux_get_cs_name();
969    if (eq_type(cs) == undefined_cs_cmd) {
970        /*tex The control sequence will now match |\relax|. The savestack might change. */
971        tex_eq_define(cs, relax_cmd, relax_code);
972    }
973    tex_back_input(cs + cs_token_flag);
974}
975
976static inline void tex_aux_manufacture_csname_use(void)
977{
978    if (tex_is_valid_csname()) {
979        tex_back_input(cur_cs + cs_token_flag);
980    } else {
981        lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
982    }
983}
984
985static inline void tex_aux_manufacture_csname_future(void)
986{
987    halfword t = tex_get_token();
988    if (tex_is_valid_csname()) {
989        tex_back_input(cur_cs + cs_token_flag);
990    } else {
991        lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
992        tex_back_input(t);
993    }
994}
995
996halfword tex_create_csname(void)
997{
998    halfword cs = tex_aux_get_cs_name();
999    if (eq_type(cs) == undefined_cs_cmd) {
1000        tex_eq_define(cs, relax_cmd, relax_code);
1001    }
1002    return cs; // cs + cs_token_flag;
1003}
1004
1005static inline void tex_aux_inject_last_tested_cs(void)
1006{
1007    if (lmt_scanner_state.last_cs_name != null_cs) {
1008        tex_back_input(lmt_scanner_state.last_cs_name + cs_token_flag);
1009    }
1010}
1011
1012/*tex
1013
1014    Sometimes the expansion looks too far ahead, so we want to insert a harmless |\relax| into the
1015    user's input.
1016*/
1017
1018void tex_insert_relax_and_cur_cs(void)
1019{
1020    tex_back_input(cs_token_flag + cur_cs);
1021    tex_reinsert_token(deep_frozen_relax_token);
1022    lmt_input_state.cur_input.token_type = inserted_text;
1023}
1024
1025/*tex
1026
1027    Here is a recursive procedure that is \TEX's usual way to get the next token of input. It has
1028    been slightly optimized to take account of common cases.
1029
1030*/
1031
1032halfword tex_get_x_token(void)
1033{
1034    /*tex This code sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands macros. */
1035    while (1) {
1036        tex_get_next();
1037        if (cur_cmd <= max_command_cmd) {
1038            break;
1039        } else if (cur_cmd < first_call_cmd) {
1040            tex_expand_current_token();
1041        } else if (cur_cmd <= last_call_cmd) {
1042            tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
1043        } else {
1044         // cur_cs = deep_frozen_cs_end_template_2_code;
1045            cur_cs = deep_frozen_cs_end_template_code;
1046            cur_cmd = end_template_cmd;
1047            /*tex Now |cur_chr = token_state.null_list|. */
1048            break;
1049        }
1050    }
1051    if (cur_cs) {
1052        cur_tok = cs_token_flag + cur_cs;
1053    } else {
1054        cur_tok = token_val(cur_cmd, cur_chr);
1055    }
1056    return cur_tok;
1057}
1058
1059/*tex
1060
1061    The |get_x_token| procedure is equivalent to two consecutive procedure calls: |get_next; x_token|.
1062    It's |get_x_token| without the initial |get_next|.
1063
1064*/
1065
1066void tex_x_token(void)
1067{
1068    while (cur_cmd > max_command_cmd) {
1069        tex_expand_current_token();
1070        tex_get_next();
1071    }
1072    if (cur_cs) {
1073        cur_tok = cs_token_flag + cur_cs;
1074    } else {
1075        cur_tok = token_val(cur_cmd, cur_chr);
1076    }
1077}
1078
1079/*tex
1080
1081    A control sequence that has been |\def|'ed by the user is expanded by \TEX's |macro_call|
1082    procedure. Here we also need to deal with marks, but these are  discussed elsewhere.
1083
1084    So let's consider |macro_call| itself, which is invoked when \TEX\ is scanning a control
1085    sequence whose |cur_cmd| is either |call|, |long_call|, |outer_call|, or |long_outer_call|. The
1086    control sequence definition appears in the token list whose reference count is in location
1087    |cur_chr| of |mem|.
1088
1089    The global variable |long_state| will be set to |call| or to |long_call|, depending on whether
1090    or not the control sequence disallows |\par| in its parameters. The |get_next| routine will set
1091    |long_state| to |outer_call| and emit |\par|, if a file ends or if an |\outer| control sequence
1092    occurs in the midst of an argument.
1093
1094    The parameters, if any, must be scanned before the macro is expanded. Parameters are token
1095    lists without reference counts. They are placed on an auxiliary stack called |pstack| while
1096    they are being scanned, since the |param_stack| may be losing entries during the matching
1097    process. (Note that |param_stack| can't be gaining entries, since |macro_call| is the only
1098    routine that puts anything onto |param_stack|, and it is not recursive.)
1099
1100    After parameter scanning is complete, the parameters are moved to the |param_stack|. Then the
1101    macro body is fed to the scanner; in other words, |macro_call| places the defined text of the
1102    control sequence at the top of \TEX's input stack, so that |get_next| will proceed to read it
1103    next.
1104
1105    The global variable |cur_cs| contains the |eqtb| address of the control sequence being expanded,
1106    when |macro_call| begins. If this control sequence has not been declared |\long|, i.e., if its
1107    command code in the |eq_type| field is not |long_call| or |long_outer_call|, its parameters are
1108    not allowed to contain the control sequence |\par|. If an illegal |\par| appears, the macro call
1109    is aborted, and the |\par| will be rescanned.
1110
1111    Beware: we cannot use |cur_cmd| here because for instance |\bgroup| can be part of an argument
1112    without there being an |\egroup|. We really need to check raw brace tokens (|{}|) here when we
1113    pick up an argument!
1114
1115 */
1116
1117/*tex
1118
1119    In \LUAMETATEX| we have an extended argument definition system. The approach is still the same
1120    and the additional code kind of fits in. There is a bit more testing going on but the overhead
1121    is kept at a minimum so performance is not hit. Macro packages like \CONTEXT\ spend a lot of
1122    time expanding and the extra overhead of the extensions is compensated by some gain in using
1123    them. However, the most important motive is in readability of macro code on the one hand and
1124    the wish for less tracing (due to all this multi-step processing) on the other. It suits me
1125    well. This is definitely a case of |goto| abuse.
1126
1127*/
1128
1129static halfword tex_aux_prune_list(halfword h)
1130{
1131    halfword t = h;
1132    halfword p = null;
1133    bool done = 0;
1134    int last = null;
1135    while (t) {
1136        halfword l = token_link(t);
1137        halfword i = token_info(t);
1138        halfword c = token_cmd(i);
1139        if (c != spacer_cmd && c != end_paragraph_cmd && i != lmt_token_state.par_token) { // c != 0xFF
1140            done = true;
1141            last = null;
1142        } else if (done) {
1143            if (! last) {
1144                last = p; /* before space */
1145            }
1146        } else {
1147            h = l;
1148            tex_put_available_token(t);
1149        }
1150        p = t;
1151        t = l;
1152    }
1153    if (last) {
1154        halfword l = token_link(last);
1155        token_link(last) = null;
1156        tex_flush_token_list(l);
1157    }
1158    return h;
1159}
1160
1161int tex_get_parameter_count(void)
1162{
1163    int n = 0;
1164    for (int i = lmt_input_state.cur_input.parameter_start; i < lmt_input_state.parameter_stack_data.ptr; i++) {
1165        if (lmt_input_state.parameter_stack[i]) {
1166            ++n;
1167        } else {
1168            break;
1169        }
1170    }
1171    return n;
1172}
1173
1174int tex_get_parameter_index(int n)
1175{
1176    n = lmt_input_state.cur_input.parameter_start + n - 1;
1177    if (n < lmt_input_state.parameter_stack_data.ptr) {
1178        return n;
1179    }
1180    return -1;
1181}
1182
1183/*tex
1184    We can avoid the copy of parameters to the stack but it complicates the code because we also need
1185    to clean up the previous set of parameters etc. It's not worth the effort. However, there are
1186    plenty of optimizations compared to the original. Some are measurable on an average run, others
1187    are more likely to increase performance when thousands of successive runs happen in e.g. a virtual
1188    environment where threads fight for memory access and cpu cache. And because \CONTEXT\ is us used
1189    that way we keep looking into ways to gain performance, but not at the cost of dirty hacks (that
1190    I tried out of curiosity but rejected in the end).
1191
1192    The arguments counter is a bit fuzzy and might disappear. I might rewrite this again using states.
1193*/
1194
1195// halfword tex_get_token(void)
1196// {
1197//     lmt_hash_state.no_new_cs = 0;
1198//     tex_get_next();
1199//     lmt_hash_state.no_new_cs = 1;
1200//     cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr);
1201//     return cur_tok;
1202// }
1203
1204static inline void tex_aux_macro_grab_left_right(halfword lefttoken, halfword righttoken, int match)
1205{
1206    halfword tail = lmt_expand_state.match_token_head;
1207    int unbalance = 0;
1208    int nesting = 1;
1209    while (1) {
1210        halfword t = tex_get_token();
1211        if (cur_tok < right_brace_limit) {
1212            if (cur_tok < left_brace_limit) {
1213                ++unbalance;
1214            } else if (unbalance) {
1215                --unbalance;
1216            }
1217        } else if (unbalance) {
1218            /* just add */
1219        } else if (t == lefttoken) {
1220            ++nesting;
1221        } else if (t == righttoken) {
1222            --nesting;
1223            if (! nesting) {
1224                break;
1225            }
1226        }
1227        if (match) {
1228            tail = tex_store_new_token(tail, t);
1229        }
1230    }
1231}
1232
1233static inline void tex_aux_macro_grab_upto_par(int match)
1234{
1235    halfword tail = lmt_expand_state.match_token_head;
1236    int unbalance = 0;
1237    while (1) {
1238        halfword t = tex_get_token();
1239        if (cur_tok < right_brace_limit) {
1240            if (cur_tok < left_brace_limit) {
1241                ++unbalance;
1242            } else if (unbalance) {
1243                --unbalance;
1244            }
1245        } else if (unbalance) {
1246            /* just add */
1247        } else if (cur_cmd == end_paragraph_cmd) {
1248            break;
1249        }
1250        if (match) {
1251            tail = tex_store_new_token(tail, t);
1252        }
1253    }
1254}
1255
1256static inline void tex_aux_macro_gobble_upto(halfword gobbletoken, bool gobblemore)
1257{
1258    if (gobblemore) {
1259        while (1) {
1260            halfword t = tex_get_token();
1261            if (! (t == gobbletoken || cur_cmd == spacer_cmd)) {
1262                break;
1263            }
1264        }
1265    } else {
1266        do {
1267        } while (tex_get_token() == gobbletoken);
1268    }
1269}
1270
1271static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
1272{
1273    bool tracing = tracing_macros_par > 0;
1274    if (tracing) {
1275        /*tex
1276            Setting |\tracingmacros| to 2 means that elsewhere marks etc are shown so in fact a bit
1277            more detail. However, as we turn that on anyway, using a value of 3 is not that weird
1278            for less info here. Introducing an extra parameter makes no sense.
1279        */
1280        tex_begin_diagnostic();
1281        tex_print_cs_checked(cs);
1282        if (is_untraced(eq_flag(cs))) {
1283            tracing = false;
1284        } else {
1285            if (! get_token_preamble(chr)) {
1286                tex_print_str("->");
1287            } else {
1288                /* maybe move the preamble scanner to here */
1289            }
1290            tex_token_show(chr);
1291        }
1292        tex_end_diagnostic();
1293    }
1294    if (! get_token_preamble(chr)) {
1295        /*tex Happens more often (about two times). */
1296        tex_cleanup_input_state();
1297        if (token_link(chr)) {
1298            tex_begin_macro_list(chr);
1299            lmt_expand_state.arguments = 0;
1300            lmt_input_state.cur_input.name = lmt_input_state.warning_index;
1301            lmt_input_state.cur_input.loc = token_link(chr);
1302        } else {
1303            /* We ignore empty bodies. */
1304        }
1305    } else {
1306        halfword matchpointer = token_link(chr);
1307        halfword matchtoken = token_info(matchpointer);
1308        int save_scanner_status = lmt_input_state.scanner_status;
1309        halfword save_warning_index = lmt_input_state.warning_index;
1310        int nofscanned = 0;
1311        int nofarguments = 0;
1312        halfword pstack[max_match_count] = { null };
1313        /*tex
1314            Scan the parameters and make |link(r)| point to the macro body; but |return| if an
1315            illegal |\par| is detected.
1316
1317            At this point, the reader will find it advisable to review the explanation of token
1318            list format that was presented earlier, since many aspects of that format are of
1319            importance chiefly in the |macro_call| routine.
1320
1321            The token list might begin with a string of compulsory tokens before the first
1322            |match| or |end_match|. In that case the macro name is supposed to be followed by
1323            those tokens; the following program will set |s=null| to represent this restriction.
1324            Otherwise |s| will be set to the first token of a string that will delimit the next
1325            parameter.
1326        */
1327        int tolerant = is_tolerant_cmd(cmd);
1328        /*tex the number of tokens or groups (usually) */
1329        halfword count = 0;
1330        /*tex one step before the last |right_brace| token */
1331        halfword rightbrace = null;
1332        /*tex the state, currently the character used in parameter */
1333        int match = 0;
1334        bool thrash = false;
1335        bool last = false;
1336        bool spacer = false;
1337        bool gobblemore = false;
1338        bool nested = false;
1339        int quitting = 0; /* multiple values */
1340        /*tex current node in parameter token list being built */
1341        halfword p = null;
1342        /*tex backup pointer for parameter matching */
1343        halfword s = null;
1344        halfword lefttoken = null;
1345        halfword righttoken = null;
1346        halfword gobbletoken = null;
1347        halfword leftparent = null;
1348        halfword rightparent = null;
1349        halfword leftbracket = null;
1350        halfword rightbracket = null;
1351        halfword leftangle = null;
1352        halfword rightangle = null;
1353        /*tex
1354             One day I will check the next code for too many tests, no that much branching that it.
1355             The numbers in |#n| are match tokens except the last one, which is has a different
1356             token info.
1357        */
1358        lmt_input_state.warning_index = cs;
1359        lmt_input_state.scanner_status = tolerant ? scanner_is_tolerant : scanner_is_matching;
1360        /* */
1361        do {
1362            /*tex
1363                So, can we use a local head here? After all, there is no expansion going on here,
1364                so no need to access |temp_token_head|. On the other hand, it's also used as a
1365                signal, so not now.
1366            */
1367          RESTART:
1368            set_token_link(lmt_expand_state.match_token_head, null);
1369          AGAIN:
1370            spacer = false;
1371          LATER:
1372            if (matchtoken < match_token || matchtoken >= end_match_token) {
1373                s = null;
1374            } else {
1375                switch (matchtoken) {
1376                    case spacer_match_token:
1377                        matchpointer = token_link(matchpointer);
1378                        matchtoken = token_info(matchpointer);
1379                        do {
1380                            tex_get_token();
1381                        } while (cur_cmd == spacer_cmd);
1382                        last = true;
1383                        goto AGAIN;
1384                    case mandate_match_token:
1385                        match = match_mandate;
1386                        goto MANDATE;
1387                    case mandate_keep_match_token:
1388                        match = match_bracekeeper;
1389                      MANDATE:
1390                        if (last) {
1391                            last = false;
1392                        } else {
1393                            tex_get_token();
1394                            last = true;
1395                        }
1396                        if (cur_tok < left_brace_limit) {
1397                            matchpointer = token_link(matchpointer);
1398                            matchtoken = token_info(matchpointer);
1399                            s = matchpointer;
1400                            p = lmt_expand_state.match_token_head;
1401                            count = 0;
1402                            last = false;
1403                            goto GROUPED;
1404                        } else if (tolerant) {
1405                            last = false;
1406                            nofarguments = nofscanned;
1407                            tex_back_input(cur_tok);
1408                            goto QUITTING;
1409                        } else {
1410                            last = false;
1411                            tex_back_input(cur_tok);
1412                            s = null;
1413                            goto BAD;
1414                        }
1415                     // break;
1416                    case thrash_match_token:
1417                        match = 0;
1418                        thrash = true;
1419                        break;
1420                    case leading_match_token:
1421                        match = match_spacekeeper;
1422                        break;
1423                    case prune_match_token:
1424                        match = match_pruner;
1425                        break;
1426                    case continue_match_token:
1427                        matchpointer = token_link(matchpointer);
1428                        matchtoken = token_info(matchpointer);
1429                        goto AGAIN;
1430                    case quit_match_token:
1431                        match = match_quitter;
1432                        if (tolerant) {
1433                            last = false;
1434                            nofarguments = nofscanned;
1435                            matchpointer = token_link(matchpointer);
1436                            matchtoken = token_info(matchpointer);
1437                            goto QUITTING;
1438                        } else {
1439                            break;
1440                        }
1441                    case par_spacer_match_token:
1442                        matchpointer = token_link(matchpointer);
1443                        matchtoken = token_info(matchpointer);
1444                        do {
1445                            /* discard as we go */
1446                            tex_get_token();
1447                        } while (cur_cmd == spacer_cmd || cur_cmd == end_paragraph_cmd);
1448                        last = true;
1449                        goto AGAIN;
1450                    case keep_spacer_match_token:
1451                        matchpointer = token_link(matchpointer);
1452                        matchtoken = token_info(matchpointer);
1453                        do {
1454                            tex_get_token();
1455                            if (cur_cmd == spacer_cmd) {
1456                                spacer = true;
1457                            } else {
1458                                break;
1459                            }
1460                        } while (1);
1461                        last = true;
1462                        goto LATER;
1463                    case left_match_token:
1464                        matchpointer = token_link(matchpointer);
1465                        lefttoken = token_info(matchpointer);
1466                        matchpointer = token_link(matchpointer);
1467                        matchtoken = token_info(matchpointer);
1468                     // match = match_token;
1469                        goto AGAIN;
1470                    case right_match_token:
1471                        matchpointer = token_link(matchpointer);
1472                        righttoken = token_info(matchpointer);
1473                        matchpointer = token_link(matchpointer);
1474                        matchtoken = token_info(matchpointer);
1475                     // match = match_token;
1476                        goto AGAIN;
1477                    case gobble_more_match_token:
1478                        gobblemore = true;
1479                    case gobble_match_token:
1480                        matchpointer = token_link(matchpointer);
1481                        gobbletoken = token_info(matchpointer);
1482                        matchpointer = token_link(matchpointer);
1483                        matchtoken = token_info(matchpointer);
1484                     // match = match_token;
1485                        goto AGAIN;
1486                    case brackets_match_token:
1487                        leftbracket = left_bracket_token;
1488                        rightbracket = right_bracket_token;
1489                        matchpointer = token_link(matchpointer);
1490                        matchtoken = token_info(matchpointer);
1491                        nested = true;
1492                     // match = match_token;
1493                        goto AGAIN;
1494                    case parentheses_match_token:
1495                        leftparent = left_parent_token;
1496                        rightparent = right_parent_token;
1497                        matchpointer = token_link(matchpointer);
1498                        matchtoken = token_info(matchpointer);
1499                        nested = true;
1500                     // match = match_token;
1501                        goto AGAIN;
1502                    case angles_match_token:
1503                        leftangle= left_angle_token;
1504                        rightangle = right_angle_token;
1505                        matchpointer = token_link(matchpointer);
1506                        matchtoken = token_info(matchpointer);
1507                        nested = true;
1508                     // match = match_token;
1509                        goto AGAIN;
1510# if (match_experiment)
1511/* 
1512    This is a proof of concept that kind of works but we need a storage model that permits the
1513    larger values. But that is currently not worth the trouble because we seldom need this. Think 
1514    of dimension_value_cmd and integer_value_cmd where the next token pointed too is the value 
1515    but that is actually also kind of alien to tex (not really a token list then). Typing the 
1516    stack is overkill too. We can do the same as node_cmd: have a lsb/msb in a folow up ignore_cmd
1517    token. 
1518*/
1519case dimension_match_token:
1520    {
1521        if (last) { 
1522            tex_back_input(cur_tok);
1523        }
1524        halfword v = tex_scan_dimension(0, 0, 0, 0, NULL, NULL);
1525        halfword p ;
1526        if (node_token_overflow(v)) {
1527            p = tex_store_new_token(null, token_val(dimension_reference_cmd, node_token_msb(v)));
1528            tex_store_new_token(p, token_val(ignore_cmd, node_token_lsb(v)));
1529        } else {
1530            p = tex_store_new_token(null, token_val(dimension_reference_cmd, v));
1531        }
1532        pstack[nofscanned] = p;
1533        ++nofscanned;
1534        matchpointer = token_link(matchpointer);
1535        matchtoken = token_info(matchpointer);
1536        last = false;
1537        goto OEPS;
1538    }
1539case integer_match_token:
1540    {
1541        if (last) { 
1542            tex_back_input(cur_tok);
1543        }
1544        halfword v = tex_scan_integer(0, NULL, NULL);
1545        halfword p ;
1546        if (node_token_overflow(v)) {
1547            p = tex_store_new_token(null, token_val(integer_reference_cmd, node_token_msb(v)));
1548            tex_store_new_token(p, token_val(ignore_cmd, node_token_lsb(v)));
1549        } else {
1550            p = tex_store_new_token(null, token_val(integer_reference_cmd, v));
1551        }       
1552        pstack[nofscanned] = p;
1553        ++nofscanned;
1554        matchpointer = token_link(matchpointer);
1555        matchtoken = token_info(matchpointer);
1556        last = false;
1557        goto OEPS;
1558    }
1559# endif 
1560                    default:
1561                        match = matchtoken - match_token;
1562                        break;
1563                }
1564                matchpointer = token_link(matchpointer);
1565                matchtoken = token_info(matchpointer);
1566                s = matchpointer;
1567                p = lmt_expand_state.match_token_head;
1568                count = 0;
1569            }
1570            /*tex
1571                Scan an argument delimited by two tokens that can be nested. The right only case is
1572                basically just a simple delimited variant but a bit faster.
1573
1574                todo: when gobble ...
1575            */
1576            if (lefttoken && righttoken) {
1577                tex_aux_macro_grab_left_right(lefttoken, righttoken, match);
1578                lefttoken = null;
1579                righttoken = null;
1580                if (nested) {
1581                    leftparent = null;
1582                    rightparent = null;
1583                    leftbracket = null;
1584                    rightbracket = null;
1585                    leftangle = null;
1586                    rightangle = null;
1587                    nested = false;
1588                }
1589                goto FOUND;
1590            } else if (gobbletoken) {
1591                tex_aux_macro_gobble_upto(gobbletoken, gobblemore);
1592                last = true;
1593                gobbletoken = null;
1594                gobblemore = false;
1595            } else if (matchtoken == par_command_match_token) {
1596                tex_aux_macro_grab_upto_par(match);
1597                cur_tok = matchtoken;
1598                goto DELIMITER;
1599            }
1600            /*tex
1601                Scan a parameter until its delimiter string has been found; or, if |s = null|,
1602                simply scan the delimiter string. If |info(r)| is a |match| or |end_match|
1603                command, it cannot be equal to any token found by |get_token|. Therefore an
1604                undelimited parameter --- i.e., a |match| that is immediately followed by
1605                |match| or |end_match| --- will always fail the test |cur_tok=info(r)| in the
1606                following algorithm.
1607            */
1608          CONTINUE:
1609            /*tex Set |cur_tok| to the next token of input. */
1610            if (last) {
1611                last = false;
1612            } else {
1613                tex_get_token();
1614            }
1615            /* is token_cmd reliable here? */
1616            if (! count && token_cmd(matchtoken) == ignore_cmd) {
1617                if (cur_cmd < ignore_cmd || cur_cmd > other_char_cmd || cur_chr != token_chr(matchtoken)) {
1618                    /*tex We could optimize this but it doesn't pay off now. */
1619                    tex_back_input(cur_tok);
1620                }
1621                matchpointer = token_link(matchpointer);
1622                matchtoken = token_info(matchpointer);
1623                if (s) {
1624                    s = matchpointer;
1625                }
1626                goto AGAIN;
1627            }
1628            if (cur_tok == matchtoken) {
1629                /*tex
1630                    When we end up here we have a match on a delimiter. Advance |r|; |goto found|
1631                    if the parameter delimiter has been fully matched, otherwise |goto continue|.
1632                    A slightly subtle point arises here: When the parameter delimiter ends with
1633                    |#|, the token list will have a left brace both before and after the
1634                    |end_match|. Only one of these should affect the |align_state|, but both will
1635                    be scanned, so we must make a correction.
1636                */
1637              DELIMITER:
1638                matchpointer = token_link(matchpointer);
1639                matchtoken = token_info(matchpointer);
1640                if (matchtoken >= match_token && matchtoken <= end_match_token) {
1641                    if (cur_tok < left_brace_limit) {
1642                        --lmt_input_state.align_state;
1643                    }
1644                    goto FOUND;
1645                } else {
1646                    goto CONTINUE;
1647                }
1648            } else if (cur_cmd == ignore_something_cmd && cur_chr == ignore_argument_code) {
1649                quitting = count ? 1 : count ? 2 : 3;
1650                goto FOUND;
1651            }
1652            /*tex
1653                Contribute the recently matched tokens to the current parameter, and |goto continue|
1654                if a partial match is still in effect; but abort if |s = null|.
1655
1656                When the following code becomes active, we have matched tokens from |s| to the
1657                predecessor of |r|, and we have found that |cur_tok <> info(r)|. An interesting
1658                situation now presents itself: If the parameter is to be delimited by a string such
1659                as |ab|, and if we have scanned |aa|, we want to contribute one |a| to the current
1660                parameter and resume looking for a |b|. The program must account for such partial
1661                matches and for others that can be quite complex. But most of the time we have
1662                |s = r| and nothing needs to be done.
1663
1664                Incidentally, it is possible for |\par| tokens to sneak in to certain parameters of
1665                non-|\long| macros. For example, consider a case like |\def\a#1\par!{...}| where
1666                the first |\par| is not followed by an exclamation point. In such situations it
1667                does not seem appropriate to prohibit the |\par|, so \TEX\ keeps quiet about this
1668                bending of the rules.
1669            */
1670            if (s != matchpointer) {
1671              BAD:
1672                if (tolerant) {
1673                    quitting = nofscanned ? 1 : count ? 2 : 3;
1674                    tex_back_input(cur_tok);
1675                 // last = false;
1676                    goto FOUND;
1677                } else if (s) {
1678                    /*tex cycle pointer for backup recovery */
1679                    halfword t = s;
1680                    do {
1681                        halfword u, v;
1682                        if (match) {
1683                            p = tex_store_new_token(p, token_info(t));
1684                        }
1685                        ++count; /* why */
1686                        u = token_link(t);
1687                        v = s;
1688                        while (1) {
1689                            if (u == matchpointer) {
1690                                if (cur_tok != token_info(v)) {
1691                                    break;
1692                                } else {
1693                                    matchpointer = token_link(v);
1694                                    matchtoken = token_info(matchpointer);
1695                                    goto CONTINUE;
1696                                }
1697                            } else if (token_info(u) != token_info(v)) {
1698                                break;
1699                            } else {
1700                                u = token_link(u);
1701                                v = token_link(v);
1702                            }
1703                        }
1704                        t = token_link(t);
1705                    } while (t != matchpointer);
1706                    matchpointer = s;
1707                    matchtoken = token_info(matchpointer);
1708                    /*tex At this point, no tokens are recently matched. */
1709                } else {
1710                    tex_handle_error(
1711                        normal_error_type,
1712                        "Use of %S doesn't match its definition",
1713                        lmt_input_state.warning_index,
1714                        "If you say, e.g., '\\def\\a1{...}', then you must always put '1' after '\\a',\n"
1715                        "since control sequence names are made up of letters only. The macro here has not\n"
1716                        "been followed by the required stuff, so I'm ignoring it."
1717                    );
1718                    goto EXIT;
1719                }
1720            }
1721          GROUPED:
1722            /*tex We could check |cur_cmd| instead but then we also have to check |cur_cs| later on. */
1723            if (cur_tok < left_brace_limit) {
1724                /*tex Contribute an entire group to the current parameter. */
1725                int unbalance = 0;
1726                while (1) {
1727                    if (match) {
1728                        p = tex_store_new_token(p, cur_tok);
1729                    }
1730                    if (last) {
1731                        last = false;
1732                    } else {
1733                        tex_get_token();
1734                    }
1735                    if (cur_tok < right_brace_limit) {
1736                        if (cur_tok < left_brace_limit) {
1737                            ++unbalance;
1738                        } else if (unbalance) {
1739                            --unbalance;
1740                        } else {
1741                            break;
1742                        }
1743                    }
1744                }
1745                rightbrace = p;
1746                if (match) {
1747                    p = tex_store_new_token(p, cur_tok);
1748                }
1749            } else if (cur_tok < right_brace_limit) {
1750                /*tex Report an extra right brace and |goto continue|. */
1751                tex_back_input(cur_tok);
1752                /* moved up: */
1753                ++lmt_input_state.align_state;
1754                tex_insert_paragraph_token();
1755                /* till here */
1756                tex_handle_error(
1757                    insert_error_type,
1758                    "Argument of %S has an extra }",
1759                    lmt_input_state.warning_index,
1760                    "I've run across a '}' that doesn't seem to match anything. For example,\n"
1761                    "'\\def\\a#1{...}' and '\\a}' would produce this error. The '\\par' that I've just\n"
1762                    "inserted will cause me to report a runaway argument that might be the root of the\n"
1763                    "problem." );
1764                goto CONTINUE;
1765                /*tex A white lie; the |\par| won't always trigger a runaway. */
1766            } else {
1767                /*tex
1768                    Store the current token, but |goto continue| if it is a blank space that would
1769                    become an undelimited parameter.
1770                */
1771                if (cur_tok == space_token && matchtoken <= end_match_token && matchtoken >= match_token && matchtoken != leading_match_token) {
1772                    goto CONTINUE;
1773                }
1774                if (nested && (cur_tok == leftbracket || cur_tok == leftparent || cur_tok == leftangle)) {
1775                    int unbalance = 0;
1776                    int pairing = 1;
1777                    if (match) {
1778                        p = tex_store_new_token(p, cur_tok);
1779                    }
1780                    while (1) {
1781                        halfword t = tex_get_token();
1782                        if (t < right_brace_limit) {
1783                            if (t < left_brace_limit) {
1784                                ++unbalance;
1785                            } else if (unbalance) {
1786                                --unbalance;
1787                            }
1788                        } else if (unbalance) {
1789                            /* just add */
1790                        } else if (t == leftbracket || t == leftparent || t == leftangle) {
1791                            ++pairing;
1792                        } else if (pairing && (t == rightbracket || t == rightparent || t == rightangle)) {
1793                            --pairing;
1794                            if (! pairing && ! righttoken) {
1795                                if (match) {
1796                                    p = tex_store_new_token(p, t);
1797                                }
1798                                break;
1799                            }
1800                        } else if (t == righttoken) {
1801                            break;
1802                        }
1803                        if (match) {
1804                            p = tex_store_new_token(p, t);
1805                        }
1806                        /* align stuff */
1807                    }
1808                } else {
1809                    if (match) {
1810                        p = tex_store_new_token(p, cur_tok);
1811                    }
1812                }
1813            }
1814            ++count;
1815            if (matchtoken > end_match_token || matchtoken < match_token) {
1816                goto CONTINUE;
1817            }
1818          FOUND:
1819            if (s) {
1820                /*
1821                    Tidy up the parameter just scanned, and tuck it away. If the parameter consists
1822                    of a single group enclosed in braces, we must strip off the enclosing braces.
1823                    That's why |rightbrace| was introduced. Actually, in most cases |m == 1|.
1824                */
1825                if (! thrash) {
1826                    halfword n = token_link(lmt_expand_state.match_token_head);
1827                    if (n) {
1828                        if (token_info(p) < right_brace_limit && count == 1 && p != lmt_expand_state.match_token_head && match != match_bracekeeper) {
1829                            set_token_link(rightbrace, null);
1830                            tex_put_available_token(p);
1831                            p = n;
1832                            pstack[nofscanned] = token_link(p);
1833                            tex_put_available_token(p);
1834                        } else {
1835                            pstack[nofscanned] = n;
1836                        }
1837                        if (match == match_pruner) {
1838                            pstack[nofscanned] = tex_aux_prune_list(pstack[nofscanned]);
1839                        }
1840                    }
1841                    ++nofscanned;
1842                    if (tracing) {
1843                        tex_begin_diagnostic();
1844                        tex_print_format("%c%c<-", match_visualizer, '0' + nofscanned + (nofscanned > 9 ? gap_match_count : 0));
1845                        tex_show_token_list(pstack[nofscanned - 1], 0, 0);
1846                        tex_end_diagnostic();
1847                    }
1848                } else {
1849                    thrash = false;
1850                }
1851                lefttoken = null;
1852                righttoken = null;
1853                if (nested) {
1854                    leftparent = null;
1855                    rightparent = null;
1856                    leftbracket = null;
1857                    rightbracket = null;
1858                    leftangle = null;
1859                    rightangle = null;
1860                    nested = false;
1861                }
1862            }
1863            /*tex
1864                Now |info(r)| is a token whose command code is either |match| or |end_match|.
1865            */
1866            if (quitting) {
1867                nofarguments = quitting == 3 ? 0 : quitting == 2 && count == 0 ? 0 : nofscanned;
1868              QUITTING:
1869                if (spacer) {
1870                    tex_back_input(space_token); /* experiment */
1871                }
1872                while (1) {
1873                    switch (matchtoken) {
1874                        case end_match_token:
1875                            goto QUITDONE;
1876                        case spacer_match_token:
1877                        case thrash_match_token:
1878                        case par_spacer_match_token:
1879                        case keep_spacer_match_token:
1880                            goto NEXTMATCH;
1881                        case mandate_match_token:
1882                        case leading_match_token:
1883                         /* pstack[nofscanned] = null; */ /* zerood anyway */
1884                            break;
1885                        case mandate_keep_match_token:
1886                            p = tex_store_new_token(null, left_brace_token);
1887                            pstack[nofscanned] = p;
1888                            p = tex_store_new_token(p, right_brace_token);
1889                            break;
1890                        case continue_match_token:
1891                            matchpointer = token_link(matchpointer);
1892                            matchtoken = token_info(matchpointer);
1893                            quitting = 0;
1894                            goto RESTART;
1895                        case quit_match_token:
1896                            if (quitting) {
1897                                matchpointer = token_link(matchpointer);
1898                                matchtoken = token_info(matchpointer);
1899                                quitting = 0;
1900                                goto RESTART;
1901                            } else {
1902                                goto NEXTMATCH;
1903                            }
1904                        case left_match_token:
1905                        case right_match_token:
1906                        case gobble_match_token:
1907                        case gobble_more_match_token:
1908                            matchpointer = token_link(matchpointer);
1909                            matchtoken = token_info(matchpointer);
1910                            goto NEXTMATCH;
1911                        case brackets_match_token:
1912                        case parentheses_match_token:
1913                        case angles_match_token:
1914                            goto NEXTMATCH;
1915                        default:
1916                            if (matchtoken >= match_token && matchtoken < end_match_token) {
1917                             /* pstack[nofscanned] = null; */ /* zerood anyway */
1918                                break;
1919                            } else {
1920                                goto NEXTMATCH;
1921                            }
1922                    }
1923                    nofscanned++;
1924                    if (tracing) {
1925                        tex_begin_diagnostic();
1926                        tex_print_format("%c%i--", match_visualizer, nofscanned);
1927                        tex_end_diagnostic();
1928                    }
1929                  NEXTMATCH:
1930                    matchpointer = token_link(matchpointer);
1931                    matchtoken = token_info(matchpointer);
1932                }
1933            }
1934# if (match_experiment)
1935  OEPS:
1936# endif 
1937        } while (matchtoken != end_match_token);
1938        nofarguments = nofscanned;
1939      QUITDONE:
1940        matchpointer = token_link(matchpointer);
1941        /*tex
1942            Feed the macro body and its parameters to the scanner Before we put a new token list on the
1943            input stack, it is wise to clean off all token lists that have recently been depleted. Then
1944            a user macro that ends with a call to itself will not require unbounded stack space.
1945
1946            We could ignore this when |lmt_expand_state.cs_name_level > 0| but there is no gain.
1947        */
1948        tex_cleanup_input_state();
1949        /*tex
1950            We don't really start a list, it's more housekeeping. The starting point is the body and
1951            the later set |loc| reflects that.
1952        */
1953        tex_begin_macro_list(chr);
1954        /*tex
1955            Beware: here the |name| is used for symbolic locations but also for macro indices but these
1956            are way above the symbolic |token_types| that we use. Better would be to have a dedicated
1957            variable but let's not open up a can of worms now. We can't use |warning_index| combined
1958            with a symbolic name either. We're at |end_match_token| now so we need to advance.
1959        */
1960        lmt_input_state.cur_input.name = cs;
1961        lmt_input_state.cur_input.loc = matchpointer;
1962        /*tex
1963            This comes last, after the cleanup and the start of the macro list.
1964        */
1965        if (nofscanned) {
1966            tex_copy_to_parameter_stack(&pstack[0], nofscanned);
1967        }
1968      EXIT:
1969        lmt_expand_state.arguments = nofarguments;
1970        lmt_input_state.scanner_status = save_scanner_status;
1971        lmt_input_state.warning_index = save_warning_index;
1972    }
1973}
1974