texexpand.c /size: 82 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex
8
9    Only a dozen or so command codes |> max_command| can possibly be returned by |get_next|; in
10    increasing order, they are |undefined_cs|, |expand_after|, |no_expand|, |input|, |if_test|,
11    |fi_or_else|, |cs_name|, |convert|, |the|, |get_mark|, |call|, |long_call|, |outer_call|,
12    |long_outer_call|, and |end_template|.
13
14    Sometimes, recursive calls to the following |expand| routine may cause exhaustion of the
15    run-time calling stack, resulting in forced execution stops by the operating system. To
16    diminish the chance of this happening, a counter is used to keep track of the recursion depth,
17    in conjunction with a constant called |expand_depth|.
18
19    Note that this does not catch all possible infinite recursion loops, just the ones that
20    exhaust the application calling stack. The actual maximum value of |expand_depth| is outside
21    of our control, but the initial setting of |100| should be enough to prevent problems.
22
23*/
24
25expand_state_info lmt_expand_state = {
26    .limits           = {
27        .minimum = min_expand_depth,
28        .maximum = max_expand_depth,
29        .size    = min_expand_depth,
30        .top     = 0,
31    },
32    .depth            = 0,
33    .cs_name_level    = 0,
34    .arguments        = 0,
35    .match_token_head = null,
36    .padding          = 0,
37};
38
39       static void tex_aux_macro_call                (halfword cs, halfword cmd, halfword chr);
40inline static void tex_aux_manufacture_csname        (void);
41inline static void tex_aux_manufacture_csname_use    (void);
42inline static void tex_aux_manufacture_csname_future (void);
43inline static void tex_aux_inject_last_tested_cs     (void);
44
45/*tex
46
47    We no longer store |match_token_head| in the format file. It is a bit cleaner to just
48    initialize them. So we free them.
49
50*/
51
52void tex_initialize_expansion(void)
53{
54    lmt_expand_state.match_token_head = tex_get_available_token(null);
55}
56
57void tex_cleanup_expansion(void)
58{
59    tex_put_available_token(lmt_expand_state.match_token_head);
60}
61
62halfword tex_expand_match_token_head(void)
63{
64    return lmt_expand_state.match_token_head;
65}
66
67/*tex
68
69    The |expand| subroutine is used when |cur_cmd > max_command|. It removes a \quote {call} or a
70    conditional or one of the other special operations just listed. It follows that |expand| might
71    invoke itself recursively. In all cases, |expand| destroys the current token, but it sets things
72    up so that the next |get_next| will deliver the appropriate next token. The value of |cur_tok|
73    need not be known when |expand| is called.
74
75    Since several of the basic scanning routines communicate via global variables, their values are
76    saved as local variables of |expand| so that recursive calls don't invalidate them.
77
78*/
79
80inline static void tex_aux_expand_after(void)
81{
82    /*tex
83        Expand the token after the next token. It takes only a little shuffling to do what \TEX\
84        calls |\expandafter|.
85    */
86    halfword t1 = tex_get_token();
87    halfword t2 = tex_get_token();
88    if (cur_cmd > max_command_cmd) {
89        tex_expand_current_token();
90    } else {
91        tex_back_input(t2);
92       /* token_link(t1) = t2; */ /* no gain, rarely happens */
93    }
94    tex_back_input(t1);
95}
96
97inline static void tex_aux_expand_toks_after(void)
98{
99    halfword t1 = tex_scan_toks_normal(0, NULL);
100    halfword l1 = token_link(t1);
101    if (l1) {
102        halfword t2 = tex_get_token();
103        if (cur_cmd > max_command_cmd) {
104            tex_expand_current_token();
105        } else {
106            tex_back_input(t2);
107        }
108        tex_begin_backed_up_list(l1);
109    }
110    tex_put_available_token(t1);
111}
112
113/*tex
114    Here we deal with stuff not in the big switch. Where that is discussed there is mentioning of
115    it all being a bit messy, also due to the fact that that switch (or actually a lookup table)
116    also uses the mode for determining what to do. We see no reason to change this model.
117*/
118
119void tex_inject_parameter(halfword n)
120{
121    if (n >= 0 && n < lmt_input_state.parameter_stack_data.ptr) {
122        halfword p = lmt_input_state.parameter_stack[n];
123        if (p) {
124            tex_begin_parameter_list(p);
125        }
126    }
127}
128
129void tex_expand_current_token(void)
130{
131    ++lmt_expand_state.depth;
132    if (lmt_expand_state.depth > lmt_expand_state.limits.top) {
133        if (lmt_expand_state.depth >= lmt_expand_state.limits.size) {
134            tex_overflow_error("expansion depth", lmt_expand_state.limits.size);
135        } else {
136            lmt_expand_state.limits.top += 1;
137        }
138    }
139    /*tex We're okay. */
140    {
141        halfword saved_cur_val = cur_val;
142        halfword saved_cur_val_level = cur_val_level;
143     // halfword saved_head = token_link(token_data.backup_head);
144        if (cur_cmd < first_call_cmd) {
145            /*tex Expand a nonmacro. */
146            halfword code = cur_chr;
147            if (tracing_commands_par > 1) {
148                tex_show_cmd_chr(cur_cmd, cur_chr);
149            }
150            switch (cur_cmd) {
151                case expand_after_cmd:
152                    {
153                        switch (code) {
154                            case expand_after_code:
155                                tex_aux_expand_after();
156                                break;
157                            /*
158                            case expand_after_3_code:
159                                tex_aux_expand_after();
160                                // fall-through
161                            case expand_after_2_code:
162                                tex_aux_expand_after();
163                                tex_aux_expand_after();
164                                break;
165                            */
166                            case expand_unless_code:
167                                tex_conditional_unless();
168                                break;
169                            case future_expand_code:
170                                /*tex
171                                    This is an experiment: |\futureexpand| (2) which takes |\check \yes
172                                    \nop| as arguments. It's not faster, but gives less tracing noise
173                                    than a macro. The variant |\futureexpandis| (3) alternative doesn't
174                                    inject the gobbles space(s).
175                                */
176                                tex_get_token();
177                                {
178                                    halfword spa = null;
179                                    halfword chr = cur_chr;
180                                    halfword cmd = cur_cmd;
181                                    halfword yes = tex_get_token(); /* when match */
182                                    halfword nop = tex_get_token(); /* when no match */
183                                    while (1) {
184                                        halfword t = tex_get_token();
185                                        if (cur_cmd == spacer_cmd) {
186                                            spa = t;
187                                        } else {
188                                            tex_back_input(t);
189                                            break;
190                                        }
191                                    }
192                                    /*tex The value 1 means: same input level. */
193                                    if (cur_cmd == cmd && cur_chr == chr) {
194                                        tex_reinsert_token(yes);
195                                    } else {
196                                        if (spa) {
197                                            tex_reinsert_token(space_token);
198                                        }
199                                        tex_reinsert_token(nop);
200                                    }
201                                }
202                                break;
203                            case future_expand_is_code:
204                                tex_get_token();
205                                {
206                                    halfword chr = cur_chr;
207                                    halfword cmd = cur_cmd;
208                                    halfword yes = tex_get_token(); /* when match */
209                                    halfword nop = tex_get_token(); /* when no match */
210                                    while (1) {
211                                        halfword t = tex_get_token();
212                                        if (cur_cmd != spacer_cmd) {
213                                            tex_back_input(t);
214                                            break;
215                                        }
216                                    }
217                                    tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
218                                }
219                                break;
220                            case future_expand_is_ap_code:
221                                tex_get_token();
222                                {
223                                    halfword chr = cur_chr;
224                                    halfword cmd = cur_cmd;
225                                    halfword yes = tex_get_token(); /* when match */
226                                    halfword nop = tex_get_token(); /* when no match */
227                                    while (1) {
228                                        halfword t = tex_get_token();
229                                        if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
230                                            tex_back_input(t);
231                                            break;
232                                        }
233                                    }
234                                    /*tex We stay at the same input level. */
235                                    tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
236                                }
237                                break;
238                            case expand_after_spaces_code:
239                                {
240                                    /* maybe two variants: after_spaces and after_par like in the ignores */
241                                    halfword t1 = tex_get_token();
242                                    while (1) {
243                                        halfword t2 = tex_get_token();
244                                        if (cur_cmd != spacer_cmd) {
245                                            tex_back_input(t2);
246                                            break;
247                                        }
248                                    }
249                                    tex_reinsert_token(t1);
250                                    break;
251                                }
252                            case expand_after_pars_code:
253                                {
254                                    halfword t1 = tex_get_token();
255                                    while (1) {
256                                        halfword t2 = tex_get_token();
257                                        if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
258                                            tex_back_input(t2);
259                                            break;
260                                        }
261                                    }
262                                    tex_reinsert_token(t1);
263                                    break;
264                                }
265                            case expand_token_code:
266                                {
267                                    /* we can share code with lmtokenlib .. todo */
268                                    halfword cat = tex_scan_category_code(0);
269                                    halfword chr = tex_scan_char_number(0);
270                                    /* too fragile: 
271                                        halfword tok = null;
272                                        switch (cat) {
273                                            case letter_cmd:
274                                            case other_char_cmd:
275                                            case ignore_cmd:
276                                            case spacer_cmd:
277                                                tok = token_val(cat, chr);
278                                                break;
279                                            case active_char_cmd:
280                                                {
281                                                    halfword cs = tex_active_to_cs(chr, ! lmt_hash_state.no_new_cs);
282                                                    if (cs) { 
283                                                        chr = eq_value(cs);
284                                                        tok = cs_token_flag + cs;
285                                                        break;
286                                                    }
287                                                }
288                                            default:
289                                                tok = token_val(other_char_cmd, chr);
290                                                break;
291                                        }
292                                    */
293                                    switch (cat) {
294                                        case letter_cmd:
295                                        case other_char_cmd:
296                                        case ignore_cmd:
297                                        case spacer_cmd:
298                                            break;
299                                        default:
300                                            cat = other_char_cmd;
301                                            break;
302                                    }
303                                    tex_back_input(token_val(cat, chr));
304                                    break;
305                                }
306                            case expand_cs_token_code:
307                                {
308                                    tex_get_token();
309                                    if (cur_tok >= cs_token_flag) {
310                                        halfword cmd = eq_type(cur_cs);
311                                        switch (cmd) {
312                                            case left_brace_cmd:
313                                            case right_brace_cmd:
314                                            case math_shift_cmd:
315                                            case alignment_tab_cmd:
316                                            case superscript_cmd:
317                                            case subscript_cmd:
318                                            case spacer_cmd:
319                                            case letter_cmd:
320                                            case other_char_cmd:
321                                            case active_char_cmd: /* new */
322                                                cur_tok = token_val(cmd, eq_value(cur_cs));
323                                                break;
324                                        }
325                                    }
326                                    tex_back_input(cur_tok);
327                                    break;
328                                }
329                            case expand_code:
330                                {
331                                    /*tex 
332                                        These can be used instead of |\the<tok register [ref]>| but 
333                                        that next token is not expanded so it doesn't accept |\if|. 
334                                    */
335                                    tex_get_token();
336                                    switch (cur_cmd) { 
337                                        case call_cmd:
338                                        case protected_call_cmd:               
339                                        case semi_protected_call_cmd:
340                                        case constant_call_cmd:
341                                        case tolerant_call_cmd:
342                                        case tolerant_protected_call_cmd:
343                                        case tolerant_semi_protected_call_cmd:
344                                            tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
345                                            break;
346                                        case internal_toks_reference_cmd:
347                                        case register_toks_reference_cmd:
348                                            if (cur_chr) {
349                                                tex_begin_token_list(cur_chr, token_text);
350                                            }
351                                            break;
352                                        case register_cmd:
353                                            if (cur_chr == token_val_level) {
354                                                halfword n = tex_scan_toks_register_number();
355                                                halfword p = eq_value(register_toks_location(n));
356                                                if (p) {
357                                                    tex_begin_token_list(p, token_text);
358                                                }
359                                            } else { 
360                                                tex_back_input(cur_tok);
361                                            }
362                                            break;
363                                        case internal_toks_cmd:
364                                        case register_toks_cmd:
365                                            { 
366                                                halfword p = eq_value(cur_chr);   
367                                                if (p) {
368                                                    tex_begin_token_list(p, token_text);
369                                                }
370                                            }
371                                            break;
372                                        case index_cmd:
373                                            tex_inject_parameter(cur_chr);
374                                            break;
375                                        case case_shift_cmd:
376                                            tex_run_case_shift(cur_chr);
377                                            break;
378                                        default: 
379                                            /* Use expand_current_token so that protected lua call are dealt with too? */
380                                            tex_back_input(cur_tok);
381                                            break;
382                                    }                                            
383                                    break;
384                                }
385                            case expand_toks_code:
386                                {
387                                    /*tex 
388                                        These can be used instead of |\the<tok register [ref]>| and 
389                                        contrary to above here the next token is expanded so it works 
390                                        with a following |\if|. 
391                                    */
392                                    tex_get_x_token();
393                                    switch (cur_cmd) { 
394                                        case internal_toks_reference_cmd:
395                                        case register_toks_reference_cmd:
396                                            if (cur_chr) {
397                                                tex_begin_token_list(cur_chr, token_text);
398                                            }
399                                            break;
400                                        case register_cmd:
401                                            if (cur_chr == token_val_level) {
402                                                halfword n = tex_scan_toks_register_number();
403                                                halfword p = eq_value(register_toks_location(n));
404                                                if (p) {
405                                                    tex_begin_token_list(p, token_text);
406                                                }
407                                            } else { 
408                                                tex_back_input(cur_tok);
409                                            }
410                                            break;
411                                        case internal_toks_cmd:
412                                        case register_toks_cmd:
413                                            { 
414                                                halfword p = eq_value(cur_chr);   
415                                                if (p) {
416                                                    tex_begin_token_list(p, token_text);
417                                                }
418                                            }
419                                            break;
420                                        default: 
421                                            /* Issue an error message? */
422                                            tex_back_input(cur_tok);
423                                            break;
424                                    }                                            
425                                    break;
426                                }
427                            case expand_active_code:
428                                {
429                                    tex_get_token();
430                                    if (cur_cmd == active_char_cmd) {
431                                        cur_cs = tex_active_to_cs(cur_chr, ! lmt_hash_state.no_new_cs);
432                                        if (cur_cs) {
433                                            cur_tok = cs_token_flag + cur_cs;
434                                        } else {
435                                            cur_tok = token_val(cur_cmd, cur_chr);
436                                        }
437                                    }
438                                    tex_back_input(cur_tok);
439                                    break;
440                                }
441                            case expand_semi_code:
442                                {
443                                    tex_get_token();
444                                    switch (cur_cmd) {
445                                        case semi_protected_call_cmd:
446                                        case tolerant_semi_protected_call_cmd:
447                                            tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
448                                            break;
449                                        case lua_semi_protected_call_cmd:
450                                            tex_aux_lua_call(cur_cmd, cur_chr);
451                                            break;
452                                        default:
453                                            tex_back_input(cur_tok);
454                                            break;
455                                    }
456                                    break;
457                                }
458                            case expand_after_toks_code:
459                                {
460                                    tex_aux_expand_toks_after();
461                                    break;
462                                }
463                            case expand_parameter_code:
464                                {
465                                    halfword n = tex_scan_integer(0, NULL);
466                                    if (n >= 0 && n < lmt_input_state.parameter_stack_data.ptr) {
467                                        halfword p = lmt_input_state.parameter_stack[n];
468                                        if (p) {
469                                            tex_begin_parameter_list(p);
470                                        }
471                                    }
472                                    break;
473                                }
474                            /* keep as reference */ /*
475                            case expand_after_fi_code:
476                                {
477                                    tex_conditional_after_fi();
478                                    break;
479                                }
480                            */
481                        }
482                    }
483                    break;
484                case cs_name_cmd:
485                    /*tex Manufacture a control sequence name. */
486                    switch (code) {
487                        case cs_name_code:
488                            tex_aux_manufacture_csname();
489                            break;
490                        case last_named_cs_code:
491                            tex_aux_inject_last_tested_cs();
492                            break;
493                        case begin_cs_name_code:
494                            tex_aux_manufacture_csname_use();
495                            break;
496                        case future_cs_name_code:
497                            tex_aux_manufacture_csname_future();
498                            break;
499                    }
500                    break;
501                case no_expand_cmd:
502                    {
503                        /*tex
504                            Suppress expansion of the next token. The implementation of |\noexpand|
505                            is a bit trickier, because it is necessary to insert a special
506                            |dont_expand| marker into \TEX's reading mechanism. This special marker
507                            is processed by |get_next|, but it does not slow down the inner loop.
508
509                            Since |\outer| macros might arise here, we must also clear the
510                            |scanner_status| temporarily.
511                        */
512                        halfword t;
513//                        halfword save_scanner_status = lmt_input_state.scanner_status;
514//                        lmt_input_state.scanner_status = scanner_is_normal;
515                        t = tex_get_token();
516//                        lmt_input_state.scanner_status = save_scanner_status;
517                        tex_back_input(t);
518                        /*tex Now |start| and |loc| point to the backed-up token |t|. */
519                        if (t >= cs_token_flag) {
520                            halfword p = tex_get_available_token(deep_frozen_dont_expand_token);
521                            set_token_link(p, lmt_input_state.cur_input.loc);
522                            lmt_input_state.cur_input.start = p;
523                            lmt_input_state.cur_input.loc = p;
524                        }
525                    }
526                    break;
527                case if_test_cmd:
528                    if (code < first_real_if_test_code) {
529                        tex_conditional_fi_or_else();
530                    } else if (code != if_condition_code) {
531                        tex_conditional_if(code, 0);
532                    } else {
533                        /*tex The |\ifcondition| primitive is a no-op unless we're in skipping mode. */
534                    }
535                    break;
536                case the_cmd:
537                    {
538                        halfword h = tex_the_toks(code, NULL);
539                        if (h) { 
540                            tex_begin_inserted_list(h);
541                        }
542                        break;
543                    }
544                case lua_call_cmd:
545                    if (code > 0) {
546                        strnumber u = tex_save_cur_string();
547                        lmt_token_state.luacstrings = 0;
548                        lmt_function_call(code, 0);
549                        tex_restore_cur_string(u);
550                        if (lmt_token_state.luacstrings > 0) {
551                            tex_lua_string_start();
552                        }
553                    } else {
554                        tex_normal_error("luacall", "invalid number in expansion");
555                    }
556                    break;
557                case lua_local_call_cmd:
558                    if (code > 0) {
559                        lua_State *L = lmt_lua_state.lua_instance;
560                        strnumber u = tex_save_cur_string();
561                        lmt_token_state.luacstrings = 0;
562                        /* todo: use a private table as we can overflow, unless we register early */
563                        lua_rawgeti(L, LUA_REGISTRYINDEX, code);
564                        if (lua_pcall(L, 0, 0, 0)) {
565                            tex_formatted_warning("luacall", "local call error: %s", lua_tostring(L, -1));
566                        } else {
567                            tex_restore_cur_string(u);
568                            if (lmt_token_state.luacstrings > 0) {
569                                tex_lua_string_start();
570                            }
571                        }
572                    } else {
573                        tex_normal_error("luacall", "invalid local number in expansion");
574                    }
575                    break;
576                case begin_local_cmd:
577                    tex_begin_local_control();
578                    break;
579                case convert_cmd:
580                    tex_run_convert_tokens(code);
581                    break;
582                case input_cmd:
583                    /*tex Initiate or terminate input from a file */
584                    switch (code) {
585                        case normal_input_code:
586                        case eof_input_code:
587                            if (lmt_fileio_state.name_in_progress) {
588                                tex_insert_relax_and_cur_cs();
589                            } else if (code == normal_input_code) {
590                                tex_start_input(tex_read_file_name(0, NULL, texinput_extension), null);
591                            } else { 
592                                halfword t = tex_scan_toks_normal(0, NULL);
593                                tex_start_input(tex_read_file_name(0, NULL, texinput_extension), t);
594                            }
595                            break;
596                        case end_of_input_code:
597                            lmt_token_state.force_eof = 1;
598                            break;
599                        case quit_loop_code:
600                            lmt_main_control_state.quit_loop = 1;
601                            break;
602                        case quit_loop_now_code:
603                            if (lmt_main_control_state.loop_nesting) { 
604                                while (1) { 
605                                    tex_get_token();
606                                    if (cur_cmd == end_local_cmd) {
607                                        lmt_main_control_state.quit_loop = 1;
608                                        tex_back_input(cur_tok);
609                                        break;
610                                    }
611                                }
612                            } else { 
613                                /*tex We're not in a loop and end up at some fuzzy error. */
614                            }
615                            break;                            
616                     /* case quit_fi_now_code: */ /*tex |\if ... \quitfinow\ignorerest \else .. \fi| */
617                     /*     tex_quit_fi();     */
618                     /*     break;             */
619                        case token_input_code:
620                            tex_tex_string_start(io_token_eof_input_code, cat_code_table_par);
621                            break;
622                        case tex_token_input_code:
623                            tex_tex_string_start(io_token_input_code, cat_code_table_par);
624                            break;
625                        case tokenized_code:
626                        case retokenized_code:
627                            {
628                                /*tex
629                                    This variant complements the other expandable primitives but
630                                    also supports an optional keyword, who knows when that comes in
631                                    handy; what goes in is detokenized anyway. For now it is an
632                                    undocumented feature. It is likely that there is a |cct| passed
633                                    so we don't need to optimize. If needed we can make a version
634                                    where this is mandate.
635                                */
636                                int cattable = (code == retokenized_code || tex_scan_optional_keyword("catcodetable")) ? tex_scan_integer(0, NULL) : cat_code_table_par;
637                                full_scanner_status saved_full_status = tex_save_full_scanner_status();
638                                strnumber u = tex_save_cur_string();
639                                halfword s = tex_scan_toks_expand(0, NULL, 0, 0);
640                                tex_unsave_full_scanner_status(saved_full_status);
641                                if (token_link(s)) {
642                                     tex_begin_inserted_list(tex_wrapped_token_list(s));
643                                     tex_tex_string_start(io_token_input_code, cattable);
644                                }
645                                tex_put_available_token(s);
646                                tex_restore_cur_string(u);
647                            }
648                            break;
649                        default:
650                            break;
651                    }
652                    break;
653                case get_mark_cmd:
654                    {
655                        /*tex Insert the appropriate mark text into the scanner. */
656                        halfword num = 0;
657                        switch (code) {
658                            case top_marks_code:
659                            case first_marks_code:
660                            case bot_marks_code:
661                            case split_first_marks_code:
662                            case split_bot_marks_code:
663                            case current_marks_code:
664                                num = tex_scan_mark_number();
665                                break;
666                        }
667                        if (tex_valid_mark(num)) {
668                            halfword ptr = tex_get_some_mark(code, num);
669                            if (ptr) {
670                                tex_begin_token_list(ptr, mark_text);
671                            }
672                        }
673                        break;
674                    }
675                case index_cmd: /* not needed here */
676                    tex_inject_parameter(code); 
677                    break;
678                default:
679                    /* Maybe ... or maybe an option */
680                 // if (lmt_expand_state.cs_name_level == 0) {
681                        if (tex_cs_state(cur_cs) == cs_undefined_error) { 
682                            /*tex Complain about an undefined macro */
683                            tex_handle_error(
684                                normal_error_type,
685                             // "Undefined control sequence %m", cur_cs,
686                                "Undefined control sequence",
687                                "The control sequence at the end of the top line of your error message was never\n"
688                                "\\def'ed. You can just continue as I'll forget about whatever was undefined."
689                            );
690                        } else { 
691                            /*tex We ended up in a situation that is unlikely to happen in traditional \TEX. */
692                            tex_handle_error(
693                                normal_error_type,
694                                "Control sequence expected instead of %C", cur_cmd, code,
695                                "You injected something that confused the parser, maybe by using some Lua call."
696                            );
697                        }
698                 // }
699                    break;
700            }
701        } else if (cur_cmd <= last_call_cmd) {
702             tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
703        } else {
704            /*tex
705                Insert a token containing |frozen_endv|. An |end_template| command is effectively
706                changed to an |endv| command by the following code. (The reason for this is discussed
707                below; the |frozen_end_template| at the end of the template has passed the
708                |check_outer_validity| test, so its mission of error detection has been accomplished.)
709            */
710         // tex_back_input(deep_frozen_end_template_2_token); /* we never come here */
711            tex_back_input(deep_frozen_end_template_token); /* we never come here */
712        }
713        cur_val = saved_cur_val;
714        cur_val_level = saved_cur_val_level;
715     // set_token_link(token_data.backup_head, saved_head);
716    }
717    --lmt_expand_state.depth;
718}
719
720static void tex_aux_complain_missing_csname(void)
721{
722    tex_handle_error(
723        back_error_type,
724        "Missing \\endcsname inserted",
725        "The control sequence marked <to be read again> should not appear between \\csname\n"
726        "and \\endcsname."
727    );
728}
729
730// inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
731// {
732//     if (c <= 0x7F) {
733//         b[m++] = (unsigned char) c;
734//     } else if (c <= 0x7FF) {
735//         b[m++] = (unsigned char) (0xC0 + c / 0x40);
736//         b[m++] = (unsigned char) (0x80 + c % 0x40);
737//     } else if (c <= 0xFFFF) {
738//         b[m++] = (unsigned char) (0xE0 +  c / 0x1000);
739//         b[m++] = (unsigned char) (0x80 + (c % 0x1000) / 0x40);
740//         b[m++] = (unsigned char) (0x80 + (c % 0x1000) % 0x40);
741//     } else {
742//         b[m++] = (unsigned char) (0xF0 +   c / 0x40000);
743//         b[m++] = (unsigned char) (0x80 + ( c % 0x40000) / 0x1000);
744//         b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) / 0x40);
745//         b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) % 0x40);
746//     }
747//     return m;
748// }
749
750inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
751{
752    if (c <= 0x7F) {
753        b[m++] = (unsigned char) c;
754    } else if (c <= 0x7FF) {
755        b[m++] = (unsigned char) (0xC0 | (c >> 6));
756        b[m++] = (unsigned char) (0x80 | (c & 0x3F));
757    } else if (c <= 0xFFFF) {
758        b[m++] = (unsigned char) (0xE0 | (c >> 12));
759        b[m++] = (unsigned char) (0x80 | ((c >> 6) & 0x3F));
760        b[m++] = (unsigned char) (0x80 | (c & 0x3F));
761    } else {
762        int u; 
763        c -= 0x10000;
764        u = (int) (((c & 0xf0000) >> 16) + 1);
765        b[m++] = (unsigned char) (0xF0 | (u >> 2));
766        b[m++] = (unsigned char) (0x80 | ((u & 3) << 4) | ((c & 0xF000) >> 12));
767        b[m++] = (unsigned char) (0x80 | ((c & 0xFC0) >> 6));;
768        b[m++] = (unsigned char) (0x80 | (c & 0x3F));;
769    }
770    return m;
771}
772
773/*tex
774    We also quit on a protected macro call, which is different from \LUATEX\ (and \PDFTEX) but makes
775    much sense. It also long token lists that never (should) match anyway.
776*/
777
778static int tex_aux_collect_cs_tokens(halfword *p, int *n)
779{
780    while (1) {
781        tex_get_next();
782        switch (cur_cmd) {
783            case left_brace_cmd:
784            case right_brace_cmd:
785            case math_shift_cmd:
786            case alignment_tab_cmd:
787         /* case end_line_cmd: */
788            case parameter_cmd:
789            case superscript_cmd:
790            case subscript_cmd:
791         /* case ignore_cmd: */
792            case spacer_cmd:
793            case letter_cmd:
794            case other_char_cmd:
795            case active_char_cmd: /* new, here we don't expand */
796                 *p = tex_store_new_token(*p, token_val(cur_cmd, cur_chr));
797                 *n += 1;
798                 break;
799         /* case comment_cmd: */
800         /* case invalid_char_cmd: */
801         /*      break; */
802            case call_cmd:
803            case tolerant_call_cmd:
804                tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
805                break;
806            case constant_call_cmd:
807                {
808                  halfword h = token_link(cur_chr);
809                  if (h) { 
810                      if (token_link(h)) { 
811                          if (cur_chr > max_data_value) {
812                               while (h) {
813                                   *p = tex_store_new_token(*p, token_info(h));
814                                   *n += 1;
815                                   h = token_link(h);
816                               }
817                          } else {
818                              *p = tex_store_new_token(*p, token_val(deep_frozen_keep_constant_cmd, cur_chr));
819                          }
820                      } else { 
821                          *p = tex_store_new_token(*p, token_info(h));
822                      }
823                      *n += 1;
824                  }
825                }
826                break;
827            case end_cs_name_cmd:
828                return 1;
829            default:
830                if (cur_cmd > max_command_cmd && cur_cmd < first_call_cmd) {
831                    tex_expand_current_token();
832                } else {
833                    return 0;
834                }
835         }
836     }
837}
838
839int tex_is_valid_csname(void)
840{
841    halfword cs = null_cs;
842    int b = 0;
843    int n = 0;
844    halfword h = tex_get_available_token(null);
845    halfword p = h;
846    lmt_expand_state.cs_name_level += 1;
847    if (! tex_aux_collect_cs_tokens(&p, &n)) {
848        /*tex We seldom end up here so there is no gain in optimizing. */
849        do {
850            tex_get_x_or_protected(); /* we skip unprotected ! */
851        } while (cur_cmd != end_cs_name_cmd);
852    } else if (n) {
853        /*tex Look up the characters of list |n| in the hash table, and set |cur_cs|. */
854        int f = lmt_fileio_state.io_first;
855        if (tex_room_in_buffer(f + n * 4)) {
856            int m = f;
857            halfword l = token_link(h);
858            while (l) {
859                if (token_cmd(token_info(l)) == deep_frozen_keep_constant_cmd) {
860                    halfword h = token_link(token_chr(token_info(l)));
861                    while (h) {
862                        m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(token_info(h)));
863                        h = token_link(h);
864                    }
865                } else {
866                    m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(token_info(l)));
867                }
868                l = token_link(l);
869            }
870            cs = tex_id_locate_only(f, m - f); 
871            b = (cs != undefined_control_sequence) && (eq_type(cs) != undefined_cs_cmd);
872        }
873    }
874    tex_flush_token_list_head_tail(h, p, n + 1);
875    lmt_scanner_state.last_cs_name = cs;
876    lmt_expand_state.cs_name_level -= 1;
877    cur_cs = cs;
878    return b;
879}
880
881inline static halfword tex_aux_get_cs_name(void)
882{
883    halfword h = tex_get_available_token(null); /* hm */
884    halfword p = h;
885    int n = 0;
886    lmt_expand_state.cs_name_level += 1;
887    if (tex_aux_collect_cs_tokens(&p, &n)) {
888        /*tex 
889            Here we have to make a choice wrt duplicating hashes. In pdftex the hashes are 
890            duplicated when we csname a meaning of a macro with |#1| and |##1| or just |##| 
891            but in the token list these are actually references of single hashes. Therefore 
892            we do as in luatex: we go single hash. In the end it doesn't matter much as such 
893            weird control sequences are less likely to happen than embedded hashes (with 
894            catcode parameter) so single is then more natural. 
895        */
896        int siz;
897        char *s = tex_tokenlist_to_tstring(h, 1, &siz, 0, 0, 0, 0, 1); /* single hashes */
898        /*tex 
899            Now we can look up the characters of list |h| in the hash table, and set |cur_cs| 
900            accordingly. 
901        */
902        cur_cs = (siz > 0) ? tex_string_locate((char *) s, siz, 1) : null_cs;
903    } else {
904        tex_aux_complain_missing_csname();
905    }
906    lmt_scanner_state.last_cs_name = cur_cs;
907    lmt_expand_state.cs_name_level -= 1;
908    tex_flush_token_list_head_tail(h, p, n);
909    return cur_cs;
910}
911
912inline static void tex_aux_manufacture_csname(void)
913{
914    halfword cs = tex_aux_get_cs_name();
915    if (eq_type(cs) == undefined_cs_cmd) {
916        /*tex The |save_stack| might change! */
917        tex_eq_define(cs, relax_cmd, relax_code);
918    }
919    /*tex The control sequence will now match |\relax| */
920    tex_back_input(cs + cs_token_flag);
921}
922
923inline static void tex_aux_manufacture_csname_use(void)
924{
925    if (tex_is_valid_csname()) {
926        tex_back_input(cur_cs + cs_token_flag);
927    } else {
928        lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
929    }
930}
931
932inline static void tex_aux_manufacture_csname_future(void)
933{
934    halfword t = tex_get_token();
935    if (tex_is_valid_csname()) {
936        tex_back_input(cur_cs + cs_token_flag);
937    } else {
938        lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
939        tex_back_input(t);
940    }
941}
942
943halfword tex_create_csname(void)
944{
945    halfword cs = tex_aux_get_cs_name();
946    if (eq_type(cs) == undefined_cs_cmd) {
947        tex_eq_define(cs, relax_cmd, relax_code);
948    }
949    return cs; // cs + cs_token_flag;
950}
951
952inline static void tex_aux_inject_last_tested_cs(void)
953{
954    if (lmt_scanner_state.last_cs_name != null_cs) {
955        tex_back_input(lmt_scanner_state.last_cs_name + cs_token_flag);
956    }
957}
958
959/*tex
960
961    Sometimes the expansion looks too far ahead, so we want to insert a harmless |\relax| into the
962    user's input.
963*/
964
965void tex_insert_relax_and_cur_cs(void)
966{
967    tex_back_input(cs_token_flag + cur_cs);
968    tex_reinsert_token(deep_frozen_relax_token);
969    lmt_input_state.cur_input.token_type = inserted_text;
970}
971
972/*tex
973
974    Here is a recursive procedure that is \TEX's usual way to get the next token of input. It has
975    been slightly optimized to take account of common cases.
976
977*/
978
979halfword tex_get_x_token(void)
980{
981    /*tex This code sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands macros. */
982    while (1) {
983        tex_get_next();
984        if (cur_cmd <= max_command_cmd) {
985            break;
986        } else if (cur_cmd < first_call_cmd) {
987            tex_expand_current_token();
988        } else if (cur_cmd <= last_call_cmd) {
989            tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
990        } else {
991         // cur_cs = deep_frozen_cs_end_template_2_code;
992            cur_cs = deep_frozen_cs_end_template_code;
993            cur_cmd = end_template_cmd;
994            /*tex Now |cur_chr = token_state.null_list|. */
995            break;
996        }
997    }
998    if (cur_cs) {
999        cur_tok = cs_token_flag + cur_cs;
1000    } else {
1001        cur_tok = token_val(cur_cmd, cur_chr);
1002    }
1003    return cur_tok;
1004}
1005
1006/*tex
1007
1008    The |get_x_token| procedure is equivalent to two consecutive procedure calls: |get_next; x_token|.
1009    It's |get_x_token| without the initial |get_next|.
1010
1011*/
1012
1013void tex_x_token(void)
1014{
1015    while (cur_cmd > max_command_cmd) {
1016        tex_expand_current_token();
1017        tex_get_next();
1018    }
1019    if (cur_cs) {
1020        cur_tok = cs_token_flag + cur_cs;
1021    } else {
1022        cur_tok = token_val(cur_cmd, cur_chr);
1023    }
1024}
1025
1026/*tex
1027
1028    A control sequence that has been |\def|'ed by the user is expanded by \TEX's |macro_call|
1029    procedure. Here we also need to deal with marks, but these are  discussed elsewhere.
1030
1031    So let's consider |macro_call| itself, which is invoked when \TEX\ is scanning a control
1032    sequence whose |cur_cmd| is either |call|, |long_call|, |outer_call|, or |long_outer_call|. The
1033    control sequence definition appears in the token list whose reference count is in location
1034    |cur_chr| of |mem|.
1035
1036    The global variable |long_state| will be set to |call| or to |long_call|, depending on whether
1037    or not the control sequence disallows |\par| in its parameters. The |get_next| routine will set
1038    |long_state| to |outer_call| and emit |\par|, if a file ends or if an |\outer| control sequence
1039    occurs in the midst of an argument.
1040
1041    The parameters, if any, must be scanned before the macro is expanded. Parameters are token
1042    lists without reference counts. They are placed on an auxiliary stack called |pstack| while
1043    they are being scanned, since the |param_stack| may be losing entries during the matching
1044    process. (Note that |param_stack| can't be gaining entries, since |macro_call| is the only
1045    routine that puts anything onto |param_stack|, and it is not recursive.)
1046
1047    After parameter scanning is complete, the parameters are moved to the |param_stack|. Then the
1048    macro body is fed to the scanner; in other words, |macro_call| places the defined text of the
1049    control sequence at the top of \TEX's input stack, so that |get_next| will proceed to read it
1050    next.
1051
1052    The global variable |cur_cs| contains the |eqtb| address of the control sequence being expanded,
1053    when |macro_call| begins. If this control sequence has not been declared |\long|, i.e., if its
1054    command code in the |eq_type| field is not |long_call| or |long_outer_call|, its parameters are
1055    not allowed to contain the control sequence |\par|. If an illegal |\par| appears, the macro call
1056    is aborted, and the |\par| will be rescanned.
1057
1058    Beware: we cannot use |cur_cmd| here because for instance |\bgroup| can be part of an argument
1059    without there being an |\egroup|. We really need to check raw brace tokens (|{}|) here when we
1060    pick up an argument!
1061
1062 */
1063
1064/*tex
1065
1066    In \LUAMETATEX| we have an extended argument definition system. The approach is still the same
1067    and the additional code kind of fits in. There is a bit more testing going on but the overhead
1068    is kept at a minimum so performance is not hit. Macro packages like \CONTEXT\ spend a lot of
1069    time expanding and the extra overhead of the extensions is compensated by some gain in using
1070    them. However, the most important motive is in readability of macro code on the one hand and
1071    the wish for less tracing (due to all this multi-step processing) on the other. It suits me
1072    well. This is definitely a case of |goto| abuse.
1073
1074*/
1075
1076static halfword tex_aux_prune_list(halfword h)
1077{
1078    halfword t = h;
1079    halfword p = null;
1080    bool done = 0;
1081    int last = null;
1082    while (t) {
1083        halfword l = token_link(t);
1084        halfword i = token_info(t);
1085        halfword c = token_cmd(i);
1086        if (c != spacer_cmd && c != end_paragraph_cmd && i != lmt_token_state.par_token) { // c != 0xFF
1087            done = true;
1088            last = null;
1089        } else if (done) {
1090            if (! last) {
1091                last = p; /* before space */
1092            }
1093        } else {
1094            h = l;
1095            tex_put_available_token(t);
1096        }
1097        p = t;
1098        t = l;
1099    }
1100    if (last) {
1101        halfword l = token_link(last);
1102        token_link(last) = null;
1103        tex_flush_token_list(l);
1104    }
1105    return h;
1106}
1107
1108int tex_get_parameter_count(void)
1109{
1110    int n = 0;
1111    for (int i = lmt_input_state.cur_input.parameter_start; i < lmt_input_state.parameter_stack_data.ptr; i++) {
1112        if (lmt_input_state.parameter_stack[i]) {
1113            ++n;
1114        } else {
1115            break;
1116        }
1117    }
1118    return n;
1119}
1120
1121int tex_get_parameter_index(int n)
1122{
1123    n = lmt_input_state.cur_input.parameter_start + n - 1;
1124    if (n < lmt_input_state.parameter_stack_data.ptr) {
1125        return n; 
1126    }
1127    return -1;
1128}
1129
1130/*tex 
1131    We can avoid the copy of parameters to the stack but it complicates the code because we also need 
1132    to clean up the previous set of parameters etc. It's not worth the effort. However, there are 
1133    plenty of optimizations compared to the original. Some are measurable on an average run, others
1134    are more likely to increase performance when thousands of successive runs happen in e.g. a virtual 
1135    environment where threads fight for memory access and cpu cache. And because \CONTEXT\ is us used 
1136    that way we keep looking into ways to gain performance, but not at the cost of dirty hacks (that 
1137    I tried out of curiosity but rejected in the end). 
1138
1139    The arguments counter is a bit fuzzy and might disappear. I might rewrite this again using states. 
1140*/
1141
1142// halfword tex_get_token(void)
1143// {
1144//     lmt_hash_state.no_new_cs = 0;
1145//     tex_get_next();
1146//     lmt_hash_state.no_new_cs = 1;
1147//     cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr);
1148//     return cur_tok;
1149// }
1150
1151inline static void tex_aux_macro_grab_left_right(halfword lefttoken, halfword righttoken, int match)
1152{
1153    halfword tail = lmt_expand_state.match_token_head;
1154    int unbalance = 0;
1155    int nesting = 1;
1156    while (1) {
1157        halfword t = tex_get_token();
1158        if (cur_tok < right_brace_limit) {
1159            if (cur_tok < left_brace_limit) {
1160                ++unbalance;
1161            } else if (unbalance) {
1162                --unbalance;
1163            }
1164        } else if (unbalance) {
1165            /* just add */
1166        } else if (t == lefttoken) {
1167            ++nesting;  
1168        } else if (t == righttoken) {
1169            --nesting;
1170            if (! nesting) { 
1171                break;
1172            }
1173        }
1174        if (match) { 
1175            tail = tex_store_new_token(tail, t);
1176        }
1177    }
1178}
1179
1180inline static void tex_aux_macro_grab_upto_par(int match)
1181{
1182    halfword tail = lmt_expand_state.match_token_head;
1183    int unbalance = 0;
1184    while (1) {
1185        halfword t = tex_get_token();
1186        if (cur_tok < right_brace_limit) {
1187            if (cur_tok < left_brace_limit) {
1188                ++unbalance;
1189            } else if (unbalance) {
1190                --unbalance;
1191            }
1192        } else if (unbalance) {
1193            /* just add */
1194        } else if (cur_cmd == end_paragraph_cmd) {
1195            break;
1196        }
1197        if (match) { 
1198            tail = tex_store_new_token(tail, t);
1199        }
1200    }
1201}
1202
1203inline static void tex_aux_macro_gobble_upto(halfword gobbletoken, bool gobblemore)
1204{
1205    if (gobblemore) { 
1206        while (1) { 
1207            halfword t = tex_get_token();
1208            if (! (t == gobbletoken || cur_cmd == spacer_cmd)) {
1209                break;
1210            }
1211        }
1212    } else { 
1213        do {
1214        } while (tex_get_token() == gobbletoken);
1215    }
1216}
1217
1218static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
1219{
1220    bool tracing = tracing_macros_par > 0;
1221    if (tracing) {
1222        /*tex
1223            Setting |\tracingmacros| to 2 means that elsewhere marks etc are shown so in fact a bit
1224            more detail. However, as we turn that on anyway, using a value of 3 is not that weird
1225            for less info here. Introducing an extra parameter makes no sense.
1226        */
1227        tex_begin_diagnostic();
1228        tex_print_cs_checked(cs);
1229        if (is_untraced(eq_flag(cs))) {
1230            tracing = false;
1231        } else {
1232            if (! get_token_preamble(chr)) {
1233                tex_print_str("->");
1234            } else {
1235                /* maybe move the preamble scanner to here */
1236            }
1237            tex_token_show(chr);
1238        }
1239        tex_end_diagnostic();
1240    }
1241    if (! get_token_preamble(chr)) {
1242        /*tex Happens more often (about two times). */
1243        tex_cleanup_input_state();
1244        if (token_link(chr)) {
1245            tex_begin_macro_list(chr);
1246            lmt_expand_state.arguments = 0;
1247            lmt_input_state.cur_input.name = lmt_input_state.warning_index;
1248            lmt_input_state.cur_input.loc = token_link(chr);
1249        } else { 
1250            /* We ignore empty bodies. */
1251        }
1252    } else {
1253        halfword matchpointer = token_link(chr);
1254        halfword matchtoken = token_info(matchpointer);
1255        int save_scanner_status = lmt_input_state.scanner_status;
1256        halfword save_warning_index = lmt_input_state.warning_index;
1257        int nofscanned = 0;
1258        int nofarguments = 0;
1259        halfword pstack[max_match_count] = { null }; 
1260        /*tex
1261            Scan the parameters and make |link(r)| point to the macro body; but |return| if an
1262            illegal |\par| is detected.
1263
1264            At this point, the reader will find it advisable to review the explanation of token
1265            list format that was presented earlier, since many aspects of that format are of
1266            importance chiefly in the |macro_call| routine.
1267
1268            The token list might begin with a string of compulsory tokens before the first
1269            |match| or |end_match|. In that case the macro name is supposed to be followed by
1270            those tokens; the following program will set |s=null| to represent this restriction.
1271            Otherwise |s| will be set to the first token of a string that will delimit the next
1272            parameter.
1273        */
1274        int tolerant = is_tolerant_cmd(cmd);
1275        /*tex the number of tokens or groups (usually) */
1276        halfword count = 0;
1277        /*tex one step before the last |right_brace| token */
1278        halfword rightbrace = null;
1279        /*tex the state, currently the character used in parameter */
1280        int match = 0;
1281        bool thrash = false;
1282        bool last = false;
1283        bool spacer = false;
1284        bool gobblemore = false;
1285        bool nested = false;
1286        int quitting = 0; /* multiple values */
1287        /*tex current node in parameter token list being built */
1288        halfword p = null;
1289        /*tex backup pointer for parameter matching */
1290        halfword s = null;
1291        halfword lefttoken = null;
1292        halfword righttoken = null;
1293        halfword gobbletoken = null;
1294        halfword leftparent = null;
1295        halfword rightparent = null;
1296        halfword leftbracket = null;
1297        halfword rightbracket = null;
1298        halfword leftangle = null;
1299        halfword rightangle = null;
1300        /*tex
1301             One day I will check the next code for too many tests, no that much branching that it.
1302             The numbers in |#n| are match tokens except the last one, which is has a different
1303             token info.
1304        */
1305        lmt_input_state.warning_index = cs;
1306        lmt_input_state.scanner_status = tolerant ? scanner_is_tolerant : scanner_is_matching;
1307        /* */
1308        do {
1309            /*tex
1310                So, can we use a local head here? After all, there is no expansion going on here,
1311                so no need to access |temp_token_head|. On the other hand, it's also used as a
1312                signal, so not now.
1313            */
1314          RESTART:
1315            set_token_link(lmt_expand_state.match_token_head, null);
1316          AGAIN:
1317            spacer = false;
1318          LATER:
1319            if (matchtoken < match_token || matchtoken >= end_match_token) {
1320                s = null;
1321            } else {
1322                switch (matchtoken) {
1323                    case spacer_match_token:
1324                        matchpointer = token_link(matchpointer);
1325                        matchtoken = token_info(matchpointer);
1326                        do {
1327                            tex_get_token();
1328                        } while (cur_cmd == spacer_cmd);
1329                        last = true;
1330                        goto AGAIN;
1331                    case mandate_match_token:
1332                        match = match_mandate;
1333                        goto MANDATE;
1334                    case mandate_keep_match_token:
1335                        match = match_bracekeeper;
1336                      MANDATE:
1337                        if (last) {
1338                            last = false;
1339                        } else {
1340                            tex_get_token();
1341                            last = true;
1342                        }
1343                        if (cur_tok < left_brace_limit) {
1344                            matchpointer = token_link(matchpointer);
1345                            matchtoken = token_info(matchpointer);
1346                            s = matchpointer;
1347                            p = lmt_expand_state.match_token_head;
1348                            count = 0;
1349                            last = false;
1350                            goto GROUPED;
1351                        } else if (tolerant) {
1352                            last = false;
1353                            nofarguments = nofscanned;
1354                            tex_back_input(cur_tok);
1355                            goto QUITTING;
1356                        } else {
1357                            last = false;
1358                            tex_back_input(cur_tok);
1359                            s = null;
1360                            goto BAD;
1361                        }
1362                     // break;
1363                    case thrash_match_token:
1364                        match = 0;
1365                        thrash = true;
1366                        break;
1367                    case leading_match_token:
1368                        match = match_spacekeeper;
1369                        break;
1370                    case prune_match_token:
1371                        match = match_pruner;
1372                        break;
1373                    case continue_match_token:
1374                        matchpointer = token_link(matchpointer);
1375                        matchtoken = token_info(matchpointer);
1376                        goto AGAIN;
1377                    case quit_match_token:
1378                        match = match_quitter;
1379                        if (tolerant) {
1380                            last = false;
1381                            nofarguments = nofscanned;
1382                            matchpointer = token_link(matchpointer);
1383                            matchtoken = token_info(matchpointer);
1384                            goto QUITTING;
1385                        } else {
1386                            break;
1387                        }
1388                    case par_spacer_match_token:
1389                        matchpointer = token_link(matchpointer);
1390                        matchtoken = token_info(matchpointer);
1391                        do {
1392                            /* discard as we go */
1393                            tex_get_token();
1394                        } while (cur_cmd == spacer_cmd || cur_cmd == end_paragraph_cmd);
1395                        last = true;
1396                        goto AGAIN;
1397                    case keep_spacer_match_token:
1398                        matchpointer = token_link(matchpointer);
1399                        matchtoken = token_info(matchpointer);
1400                        do {
1401                            tex_get_token();
1402                            if (cur_cmd == spacer_cmd) {
1403                                spacer = true;
1404                            } else {
1405                                break;
1406                            }
1407                        } while (1);
1408                        last = true;
1409                        goto LATER;
1410                    case left_match_token:
1411                        matchpointer = token_link(matchpointer);
1412                        lefttoken = token_info(matchpointer);
1413                        matchpointer = token_link(matchpointer);
1414                        matchtoken = token_info(matchpointer);
1415                     // match = match_token;
1416                        goto AGAIN;
1417                    case right_match_token:
1418                        matchpointer = token_link(matchpointer);
1419                        righttoken = token_info(matchpointer);
1420                        matchpointer = token_link(matchpointer);
1421                        matchtoken = token_info(matchpointer);
1422                     // match = match_token;
1423                        goto AGAIN;
1424                    case gobble_more_match_token:
1425                        gobblemore = true;
1426                    case gobble_match_token:
1427                        matchpointer = token_link(matchpointer);
1428                        gobbletoken = token_info(matchpointer);
1429                        matchpointer = token_link(matchpointer);
1430                        matchtoken = token_info(matchpointer);
1431                     // match = match_token;
1432                        goto AGAIN;
1433                    case brackets_match_token:
1434                        leftbracket = left_bracket_token;
1435                        rightbracket = right_bracket_token;
1436                        matchpointer = token_link(matchpointer);
1437                        matchtoken = token_info(matchpointer);
1438                        nested = true;
1439                     // match = match_token;
1440                        goto AGAIN;
1441                    case parentheses_match_token:
1442                        leftparent = left_parent_token;
1443                        rightparent = right_parent_token;
1444                        matchpointer = token_link(matchpointer);
1445                        matchtoken = token_info(matchpointer);
1446                        nested = true;
1447                     // match = match_token;
1448                        goto AGAIN;
1449                    case angles_match_token:
1450                        leftangle= left_angle_token;
1451                        rightangle = right_angle_token;
1452                        matchpointer = token_link(matchpointer);
1453                        matchtoken = token_info(matchpointer);
1454                        nested = true;
1455                     // match = match_token;
1456                        goto AGAIN;
1457                    default:
1458                        match = matchtoken - match_token;
1459                        break;
1460                }
1461                matchpointer = token_link(matchpointer);
1462                matchtoken = token_info(matchpointer);
1463                s = matchpointer;
1464                p = lmt_expand_state.match_token_head;
1465                count = 0;
1466            }
1467            /*tex 
1468                Scan an argument delimited by two tokens that can be nested. The right only case is 
1469                basically just a simple delimited variant but a bit faster. 
1470
1471                todo: when gobble ... 
1472            */
1473            if (lefttoken && righttoken) { 
1474                tex_aux_macro_grab_left_right(lefttoken, righttoken, match);
1475                lefttoken = null;
1476                righttoken = null;
1477                if (nested) {
1478                    leftparent = null;
1479                    rightparent = null;
1480                    leftbracket = null;
1481                    rightbracket = null;
1482                    leftangle = null;
1483                    rightangle = null;
1484                    nested = false;
1485                }
1486                goto FOUND;
1487            } else if (gobbletoken) { 
1488                tex_aux_macro_gobble_upto(gobbletoken, gobblemore);
1489                last = true; 
1490                gobbletoken = null;
1491                gobblemore = false;
1492            } else if (matchtoken == par_command_match_token) {
1493                tex_aux_macro_grab_upto_par(match);
1494                cur_tok = matchtoken; 
1495                goto DELIMITER;
1496            } 
1497            /*tex
1498                Scan a parameter until its delimiter string has been found; or, if |s = null|,
1499                simply scan the delimiter string. If |info(r)| is a |match| or |end_match|
1500                command, it cannot be equal to any token found by |get_token|. Therefore an
1501                undelimited parameter --- i.e., a |match| that is immediately followed by
1502                |match| or |end_match| --- will always fail the test |cur_tok=info(r)| in the
1503                following algorithm.
1504            */
1505          CONTINUE:
1506            /*tex Set |cur_tok| to the next token of input. */
1507            if (last) {
1508                last = false;
1509            } else {
1510                tex_get_token();
1511            }
1512            /* is token_cmd reliable here? */
1513            if (! count && token_cmd(matchtoken) == ignore_cmd) {
1514                if (cur_cmd < ignore_cmd || cur_cmd > other_char_cmd || cur_chr != token_chr(matchtoken)) {
1515                    /*tex We could optimize this but it doesn't pay off now. */
1516                    tex_back_input(cur_tok);
1517                }
1518                matchpointer = token_link(matchpointer);
1519                matchtoken = token_info(matchpointer);
1520                if (s) {
1521                    s = matchpointer;
1522                }
1523                goto AGAIN;
1524            }
1525            if (cur_tok == matchtoken) {
1526                /*tex
1527                    When we end up here we have a match on a delimiter. Advance |r|; |goto found|
1528                    if the parameter delimiter has been fully matched, otherwise |goto continue|.
1529                    A slightly subtle point arises here: When the parameter delimiter ends with
1530                    |#|, the token list will have a left brace both before and after the
1531                    |end_match|. Only one of these should affect the |align_state|, but both will
1532                    be scanned, so we must make a correction.
1533                */
1534              DELIMITER:
1535                matchpointer = token_link(matchpointer);
1536                matchtoken = token_info(matchpointer);
1537                if (matchtoken >= match_token && matchtoken <= end_match_token) {
1538                    if (cur_tok < left_brace_limit) {
1539                        --lmt_input_state.align_state;
1540                    }
1541                    goto FOUND;
1542                } else {
1543                    goto CONTINUE;
1544                }
1545            } else if (cur_cmd == ignore_something_cmd && cur_chr == ignore_argument_code) {
1546                quitting = count ? 1 : count ? 2 : 3;
1547                goto FOUND;
1548            }
1549            /*tex
1550                Contribute the recently matched tokens to the current parameter, and |goto continue|
1551                if a partial match is still in effect; but abort if |s = null|.
1552
1553                When the following code becomes active, we have matched tokens from |s| to the
1554                predecessor of |r|, and we have found that |cur_tok <> info(r)|. An interesting
1555                situation now presents itself: If the parameter is to be delimited by a string such
1556                as |ab|, and if we have scanned |aa|, we want to contribute one |a| to the current
1557                parameter and resume looking for a |b|. The program must account for such partial
1558                matches and for others that can be quite complex. But most of the time we have
1559                |s = r| and nothing needs to be done.
1560
1561                Incidentally, it is possible for |\par| tokens to sneak in to certain parameters of
1562                non-|\long| macros. For example, consider a case like |\def\a#1\par!{...}| where
1563                the first |\par| is not followed by an exclamation point. In such situations it
1564                does not seem appropriate to prohibit the |\par|, so \TEX\ keeps quiet about this
1565                bending of the rules.
1566            */
1567            if (s != matchpointer) {
1568              BAD:
1569                if (tolerant) {
1570                    quitting = nofscanned ? 1 : count ? 2 : 3;
1571                    tex_back_input(cur_tok);
1572                 // last = false;
1573                    goto FOUND;
1574                } else if (s) {
1575                    /*tex cycle pointer for backup recovery */
1576                    halfword t = s;
1577                    do {
1578                        halfword u, v;
1579                        if (match) {
1580                            p = tex_store_new_token(p, token_info(t));
1581                        }
1582                        ++count; /* why */
1583                        u = token_link(t);
1584                        v = s;
1585                        while (1) {
1586                            if (u == matchpointer) {
1587                                if (cur_tok != token_info(v)) {
1588                                    break;
1589                                } else {
1590                                    matchpointer = token_link(v);
1591                                    matchtoken = token_info(matchpointer);
1592                                    goto CONTINUE;
1593                                }
1594                            } else if (token_info(u) != token_info(v)) {
1595                                break;
1596                            } else {
1597                                u = token_link(u);
1598                                v = token_link(v);
1599                            }
1600                        }
1601                        t = token_link(t);
1602                    } while (t != matchpointer);
1603                    matchpointer = s;
1604                    matchtoken = token_info(matchpointer);
1605                    /*tex At this point, no tokens are recently matched. */
1606                } else {
1607                    tex_handle_error(
1608                        normal_error_type,
1609                        "Use of %S doesn't match its definition",
1610                        lmt_input_state.warning_index,
1611                        "If you say, e.g., '\\def\\a1{...}', then you must always put '1' after '\\a',\n"
1612                        "since control sequence names are made up of letters only. The macro here has not\n"
1613                        "been followed by the required stuff, so I'm ignoring it."
1614                    );
1615                    goto EXIT;
1616                }
1617            }
1618          GROUPED:
1619            /*tex We could check |cur_cmd| instead but then we also have to check |cur_cs| later on. */
1620            if (cur_tok < left_brace_limit) {
1621                /*tex Contribute an entire group to the current parameter. */
1622                int unbalance = 0;
1623                while (1) {
1624                    if (match) {
1625                        p = tex_store_new_token(p, cur_tok);
1626                    }
1627                    if (last) {
1628                        last = false;
1629                    } else {
1630                        tex_get_token();
1631                    }
1632                    if (cur_tok < right_brace_limit) {
1633                        if (cur_tok < left_brace_limit) {
1634                            ++unbalance;
1635                        } else if (unbalance) {
1636                            --unbalance;
1637                        } else {
1638                            break;
1639                        }
1640                    }
1641                }
1642                rightbrace = p;
1643                if (match) {
1644                    p = tex_store_new_token(p, cur_tok);
1645                }
1646            } else if (cur_tok < right_brace_limit) {
1647                /*tex Report an extra right brace and |goto continue|. */
1648                tex_back_input(cur_tok);
1649                /* moved up: */
1650                ++lmt_input_state.align_state;
1651                tex_insert_paragraph_token();
1652                /* till here */
1653                tex_handle_error(
1654                    insert_error_type,
1655                    "Argument of %S has an extra }",
1656                    lmt_input_state.warning_index,
1657                    "I've run across a '}' that doesn't seem to match anything. For example,\n"
1658                    "'\\def\\a#1{...}' and '\\a}' would produce this error. The '\\par' that I've just\n"
1659                    "inserted will cause me to report a runaway argument that might be the root of the\n"
1660                    "problem." );
1661                goto CONTINUE;
1662                /*tex A white lie; the |\par| won't always trigger a runaway. */
1663            } else {
1664                /*tex
1665                    Store the current token, but |goto continue| if it is a blank space that would
1666                    become an undelimited parameter.
1667                */
1668                if (cur_tok == space_token && matchtoken <= end_match_token && matchtoken >= match_token && matchtoken != leading_match_token) {
1669                    goto CONTINUE;
1670                }
1671                if (nested && (cur_tok == leftbracket || cur_tok == leftparent || cur_tok == leftangle)) {
1672                    int unbalance = 0;
1673                    int pairing = 1;
1674                    if (match) { 
1675                        p = tex_store_new_token(p, cur_tok);
1676                    }
1677                    while (1) {
1678                        halfword t = tex_get_token();
1679                        if (t < right_brace_limit) {
1680                            if (t < left_brace_limit) {
1681                                ++unbalance;
1682                            } else if (unbalance) {
1683                                --unbalance;
1684                            }
1685                        } else if (unbalance) {
1686                            /* just add */
1687                        } else if (t == leftbracket || t == leftparent || t == leftangle) {
1688                            ++pairing;
1689                        } else if (pairing && (t == rightbracket || t == rightparent || t == rightangle)) { 
1690                            --pairing;
1691                            if (! pairing && ! righttoken) { 
1692                                if (match) { 
1693                                    p = tex_store_new_token(p, t);
1694                                }
1695                                break;
1696                            }
1697                        } else if (t == righttoken) {
1698                            break;
1699                        }
1700                        if (match) {
1701                            p = tex_store_new_token(p, t);
1702                        }
1703                        /* align stuff */
1704                    }
1705                } else { 
1706                    if (match) {
1707                        p = tex_store_new_token(p, cur_tok);
1708                    }
1709                }
1710            }
1711            ++count; 
1712            if (matchtoken > end_match_token || matchtoken < match_token) {
1713                goto CONTINUE;
1714            }
1715          FOUND:
1716            if (s) {
1717                /*
1718                    Tidy up the parameter just scanned, and tuck it away. If the parameter consists
1719                    of a single group enclosed in braces, we must strip off the enclosing braces.
1720                    That's why |rightbrace| was introduced. Actually, in most cases |m == 1|.
1721                */
1722                if (! thrash) {
1723                    halfword n = token_link(lmt_expand_state.match_token_head);
1724                    if (n) {
1725                        if (token_info(p) < right_brace_limit && count == 1 && p != lmt_expand_state.match_token_head && match != match_bracekeeper) {
1726                            set_token_link(rightbrace, null);
1727                            tex_put_available_token(p);
1728                            p = n;
1729                            pstack[nofscanned] = token_link(p);
1730                            tex_put_available_token(p);
1731                        } else {
1732                            pstack[nofscanned] = n;
1733                        }
1734                        if (match == match_pruner) {
1735                            pstack[nofscanned] = tex_aux_prune_list(pstack[nofscanned]);
1736                        }
1737                    }
1738                    ++nofscanned;
1739                    if (tracing) {
1740                        tex_begin_diagnostic();
1741                        tex_print_format("%c%c<-", match_visualizer, '0' + nofscanned + (nofscanned > 9 ? gap_match_count : 0));
1742                        tex_show_token_list(pstack[nofscanned - 1], 0, 0);
1743                        tex_end_diagnostic();
1744                    }
1745                } else {
1746                    thrash = false;
1747                }
1748                lefttoken = null;
1749                righttoken = null;
1750                if (nested) { 
1751                    leftparent = null;
1752                    rightparent = null;
1753                    leftbracket = null;
1754                    rightbracket = null;
1755                    leftangle = null;
1756                    rightangle = null;
1757                    nested = false;
1758                }
1759            }
1760            /*tex
1761                Now |info(r)| is a token whose command code is either |match| or |end_match|.
1762            */
1763            if (quitting) {
1764                nofarguments = quitting == 3 ? 0 : quitting == 2 && count == 0 ? 0 : nofscanned;
1765              QUITTING:
1766                if (spacer) {
1767                    tex_back_input(space_token); /* experiment */
1768                }
1769                while (1) {
1770                    switch (matchtoken) {
1771                        case end_match_token:
1772                            goto QUITDONE;
1773                        case spacer_match_token:
1774                        case thrash_match_token:
1775                        case par_spacer_match_token:
1776                        case keep_spacer_match_token:
1777                            goto NEXTMATCH;
1778                        case mandate_match_token:
1779                        case leading_match_token:
1780                         /* pstack[nofscanned] = null; */ /* zerood anyway */
1781                            break;
1782                        case mandate_keep_match_token:
1783                            p = tex_store_new_token(null, left_brace_token);
1784                            pstack[nofscanned] = p;
1785                            p = tex_store_new_token(p, right_brace_token);
1786                            break;
1787                        case continue_match_token:
1788                            matchpointer = token_link(matchpointer);
1789                            matchtoken = token_info(matchpointer);
1790                            quitting = 0;
1791                            goto RESTART;
1792                        case quit_match_token:
1793                            if (quitting) {
1794                                matchpointer = token_link(matchpointer);
1795                                matchtoken = token_info(matchpointer);
1796                                quitting = 0;
1797                                goto RESTART;
1798                            } else {
1799                                goto NEXTMATCH;
1800                            }
1801                        case left_match_token:
1802                        case right_match_token:
1803                        case gobble_match_token:
1804                        case gobble_more_match_token:
1805                            matchpointer = token_link(matchpointer);
1806                            matchtoken = token_info(matchpointer);
1807                            goto NEXTMATCH;
1808                        case brackets_match_token:
1809                        case parentheses_match_token:
1810                        case angles_match_token:
1811                            goto NEXTMATCH;
1812                        default:
1813                            if (matchtoken >= match_token && matchtoken < end_match_token) {
1814                             /* pstack[nofscanned] = null; */ /* zerood anyway */
1815                                break;
1816                            } else {
1817                                goto NEXTMATCH;
1818                            }
1819                    }
1820                    nofscanned++;
1821                    if (tracing) {
1822                        tex_begin_diagnostic();
1823                        tex_print_format("%c%i--", match_visualizer, nofscanned);
1824                        tex_end_diagnostic();
1825                    }
1826                  NEXTMATCH:
1827                    matchpointer = token_link(matchpointer);
1828                    matchtoken = token_info(matchpointer);
1829                }
1830            }
1831        } while (matchtoken != end_match_token);
1832        nofarguments = nofscanned;
1833      QUITDONE:
1834        matchpointer = token_link(matchpointer);
1835        /*tex
1836            Feed the macro body and its parameters to the scanner Before we put a new token list on the
1837            input stack, it is wise to clean off all token lists that have recently been depleted. Then
1838            a user macro that ends with a call to itself will not require unbounded stack space.
1839        */
1840        tex_cleanup_input_state();
1841        /*tex
1842            We don't really start a list, it's more housekeeping. The starting point is the body and
1843            the later set |loc| reflects that.
1844        */
1845        tex_begin_macro_list(chr);
1846        /*tex
1847            Beware: here the |name| is used for symbolic locations but also for macro indices but these
1848            are way above the symbolic |token_types| that we use. Better would be to have a dedicated
1849            variable but let's not open up a can of worms now. We can't use |warning_index| combined
1850            with a symbolic name either. We're at |end_match_token| now so we need to advance.
1851        */
1852        lmt_input_state.cur_input.name = cs;
1853        lmt_input_state.cur_input.loc = matchpointer;
1854        /*tex
1855            This comes last, after the cleanup and the start of the macro list.
1856        */
1857        if (nofscanned) {
1858            tex_copy_to_parameter_stack(&pstack[0], nofscanned);
1859        }
1860      EXIT:
1861        lmt_expand_state.arguments = nofarguments;
1862        lmt_input_state.scanner_status = save_scanner_status;
1863        lmt_input_state.warning_index = save_warning_index;
1864    }
1865}
1866