textoken.c /size: 158 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex Todo: move some helpers to other places. */
8
9static inline int tex_aux_the_cat_code(halfword b)
10{
11    return (lmt_input_state.cur_input.cattable == default_catcode_table_preset) ?
12        tex_get_cat_code(cat_code_table_par, b)
13    : ( (lmt_input_state.cur_input.cattable > -0xFF) ?
14        tex_get_cat_code(lmt_input_state.cur_input.cattable, b)
15    : (
16        - lmt_input_state.cur_input.cattable - 0xFF
17    ) ) ;
18}
19
20/*tex
21
22    The \TEX\ system does nearly all of its own memory allocation, so that it can readily be
23    transported into environments that do not have automatic facilities for strings, garbage
24    collection, etc., and so that it can be in control of what error messages the user receives.
25    The dynamic storage requirements of \TEX\ are handled by providing two large arrays called
26    |fixmem| and |varmem| in which consecutive blocks of words are used as nodes by the \TEX\
27    routines.
28
29    Pointer variables are indices into this array, or into another array called |eqtb| that
30    will be explained later. A pointer variable might also be a special flag that lies outside
31    the bounds of |mem|, so we allow pointers to assume any |halfword| value. The minimum
32    halfword value represents a null pointer. \TEX\ does not assume that |mem[null]| exists.
33
34    Locations in |fixmem| are used for storing one-word records; a conventional |AVAIL| stack is
35    used for allocation in this array.
36
37    One can make an argument to switch to standard \CCODE\ allocation but the current approach is
38    very efficient in memory usage and performence so we stay with it. On the average memory
39    consumption of \TEX| is not that large, definitely not compared to other programs that deal
40    with text.
41
42    The big dynamic storage area is named |fixmem| where the smallest location of one|-|word
43    memory in use is |fix_mem_min| and the largest location of one|-|word memory in use is
44    |fix_mem_max|.
45
46    The |dyn_used| variable keeps track of how much memory is in use. The head of the list of
47    available one|-|word nodes is registered in |avail|. The last one-|word node used in |mem|
48    is |fix_mem_end|.
49
50    All these variables are packed in the structure |token_memory_state|.
51
52*/
53
54token_memory_state_info lmt_token_memory_state = {
55    .tokens      = NULL,
56    .tokens_data = {
57        .minimum   = min_token_size,
58        .maximum   = max_token_size,
59        .size      = siz_token_size,
60        .step      = stp_token_size,
61        .allocated = 0,
62        .itemsize  = sizeof(memoryword),
63        .top       = 0,
64        .ptr       = 0, /* used to register usage */
65        .initial   = 0,
66        .offset    = 0,
67        .extra     = 0, 
68    },
69    .available  = 0,
70    .padding    = 0,
71};
72
73/*tex
74
75    Token data has its own memory space. Again we have some state variables: |temp_token_head| is
76    the head of a (temporary) list of some kind as are |hold_token_head| and |omit_template|. A
77    permanently empty list is available in |null_list| and the head of the token list built by
78    |scan_keyword| is registered in |backup_head|. All these variables are packed in the structure
79    |token_data| but some have been moved to a more relevant state (so omit and hold are now in the
80    alignment state).
81
82*/
83
84token_state_info lmt_token_state = {
85    .null_list      = null,
86    .force_eof      = 0,
87    .luacstrings    = 0,
88    .par_loc        = null,
89    .par_token      = null,
90 /* .line_par_loc   = null, */ /* removed because not really used and useful */
91 /* .line_par_token = null, */ /* idem */
92    .buffer         = NULL,
93    .bufloc         = 0,
94    .bufmax         = 0,
95    .empty          = null,
96    .padding        = 0,
97};
98
99/*tex Some properties are dumped in the format so these are aet already! */
100
101# define reserved_token_mem_slots 2 // play safe for slight overuns
102
103void tex_initialize_token_mem(void)
104{
105    memoryword *tokens = NULL;
106    int size = 0;
107    if (lmt_main_state.run_state == initializing_state) {
108        size = lmt_token_memory_state.tokens_data.minimum;
109    } else {
110        size = lmt_token_memory_state.tokens_data.allocated;
111        lmt_token_memory_state.tokens_data.initial = lmt_token_memory_state.tokens_data.ptr;
112    }
113    if (size > 0) {
114        tokens = aux_allocate_clear_array(sizeof(memoryword), size, reserved_token_mem_slots);
115    }
116    if (tokens) {
117        lmt_token_memory_state.tokens = tokens;
118        lmt_token_memory_state.tokens_data.allocated = size;
119    } else {
120        tex_overflow_error("tokens", size);
121    }
122}
123
124static void tex_aux_bump_token_memory(void)
125{
126    /*tex We need to manage the big dynamic storage area. */
127    int size = lmt_token_memory_state.tokens_data.allocated + lmt_token_memory_state.tokens_data.step;
128    if (size > lmt_token_memory_state.tokens_data.size) {
129        lmt_run_memory_callback("token", 0);
130        tex_show_runaway();
131        tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated);
132    } else {
133        memoryword *tokens = aux_reallocate_array(lmt_token_memory_state.tokens, sizeof(memoryword), size, reserved_token_mem_slots);
134        lmt_run_memory_callback("token", tokens ? 1 : 0);
135        if (tokens) {
136            lmt_token_memory_state.tokens = tokens;
137        } else {
138            /*tex If memory is exhausted, display possible runaway text. */
139            tex_show_runaway();
140            tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated);
141        }
142    }
143    memset((void *) (lmt_token_memory_state.tokens + lmt_token_memory_state.tokens_data.allocated + 1), 0, ((size_t) lmt_token_memory_state.tokens_data.step + reserved_token_mem_slots) * sizeof(memoryword));
144    lmt_token_memory_state.tokens_data.allocated = size;
145}
146
147void tex_initialize_tokens(void)
148{
149    lmt_token_memory_state.available = null;
150    lmt_token_memory_state.tokens_data.top = 0;
151    lmt_token_state.null_list = tex_get_available_token(null);
152 /* lmt_token_state.in_lua_escape = 0; */
153}
154
155/*tex
156    Experiment. It saves some 512K on the \CONTEXT\ format of October 2020. It makes me wonder if I
157    should spend some time on optimizing token lists (kind of cisc commands as we're currently kind
158    of risc).
159
160    A mixed token/file model (for permanent macros) could avoid the link and result in less memory
161    which in turn is easier on the cache. We could save at most 2M (for 35K) macros in \CONTEXT\
162    so it is not worth the trouble.
163*/
164
165void tex_compact_tokens(void)
166{
167    int nc = 0;
168 // memoryword *target = allocate_array(sizeof(memoryword), (size_t) token_memory_state.tokens_data.allocated, 0);
169    memoryword *target = aux_allocate_clear_array(sizeof(memoryword), lmt_token_memory_state.tokens_data.allocated, 0);
170    halfword *mapper = aux_allocate_array(sizeof(halfword), lmt_token_memory_state.tokens_data.allocated, 0);
171    int nofluacmds = 0;
172    if (target && mapper) {
173        memoryword *tokens = lmt_token_memory_state.tokens;
174        memset((void *) mapper, -1, ((size_t) lmt_token_memory_state.tokens_data.allocated) * sizeof(halfword));
175        for (int cs = 0; cs < (eqtb_size + lmt_hash_state.hash_data.ptr + 1); cs++) {
176            switch (eq_type(cs)) {
177                case call_cmd:
178                case protected_call_cmd:
179                case semi_protected_call_cmd:
180                case constant_call_cmd:
181                case tolerant_call_cmd:
182                case tolerant_protected_call_cmd:
183                case tolerant_semi_protected_call_cmd:
184                case internal_toks_reference_cmd:
185                case register_toks_reference_cmd:
186                    {
187                        halfword v = eq_value(cs); /* ref count token*/
188                        if (v) {
189                            if (mapper[v] < 0) {
190                             // printf("before =>"); { halfword tt = v; while (tt) { printf("%7d ",tt); tt = token_link(tt); } } printf("\n");
191                                halfword t = v;
192                                nc++;
193                                mapper[v] = nc; /* new ref count token index */
194                                while (1) {
195                                    target[nc].half1 = tokens[t].half1; /* info cq. ref count */
196                                    t = tokens[t].half0;
197                                    if (t) {
198                                        nc++;
199                                        target[nc-1].half0 = nc; /* link to next */
200                                    } else {
201                                        target[nc].half0 = null; /* link to next */
202                                        break;
203                                    }
204                                }
205                             // printf("after  =>"); { halfword tt = mapper[v]; while (tt) { printf("%7d ",tt); tt = target[tt].half0; } } printf("\n");
206                            }
207                            eq_value(cs) = mapper[v];
208                        }
209                        break;
210                    }
211                case lua_value_cmd:
212                case lua_call_cmd:
213                case lua_local_call_cmd:
214                    {
215                        ++nofluacmds;
216                        break;
217                    }
218            }
219        }
220        lmt_token_state.empty = mapper[lmt_token_state.empty];
221     // print(dump_state.format_identifier);
222        tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc);
223        if (nofluacmds) {
224            /*tex
225                We just mention them because when these are aliased the macro package needs to make
226                sure that after loading that happens again because registered funciton references
227                can have changed between format generation and run!
228            */
229            tex_print_format("%i potentially aliased lua call/value entries, ", nofluacmds);
230        }
231        lmt_token_memory_state.tokens_data.top = nc;
232        lmt_token_memory_state.tokens_data.ptr = nc;
233        aux_deallocate_array(lmt_token_memory_state.tokens);
234        lmt_token_memory_state.tokens = target;
235        lmt_token_memory_state.available = null;
236    } else {
237        tex_overflow_error("token compaction size", lmt_token_memory_state.tokens_data.allocated);
238    }
239}
240
241/*tex
242
243    The function |get_avail| returns a pointer (index) to a new one word node whose |link| field is
244    |null| (which is just 0). However, \TEX\ will halt if there is no more room left.
245
246    If the available space list is empty, i.e., if |avail = null|, we try first to increase
247    |fix_mem_end|. If that cannot be done, i.e., if |fix_mem_end = fix_mem_max|, we try to reallocate
248    array |fixmem|. If, that doesn't work, we have to quit. Users can configure \TEX\ to use a lot of
249    memory but in some scenarios limitations make sense.
250
251    Remark: we can have a pool of chunks where we get from or just allocate per token (as we have lots
252    of them that is slow). But then format loading becomes much slower as we need to recreate the
253    linked list. A no go. In todays terms \TEX\ memory usage is low anyway.
254
255    The freed tokens are kept in a linked list. First we check if we can quickly get one of these. If
256    that fails, we try to get one from the available pool. If that fails too, we enlarge the pool and
257    try again. We keep track of the used number of tokens. We also make sure that the tokens links to
258    nothing.
259
260    One problem is of course that tokens can be scattered over memory. We could have some sorter that
261    occasionally kicks in but it doesn't pay off. Normally definitions (in the format) are in sequence
262    but a normal run \unknown\ it would be interesting to know if this impacts the cache.
263
264*/
265
266halfword tex_get_available_token(halfword t)
267{
268    halfword p = lmt_token_memory_state.available;
269    if (p) {
270        lmt_token_memory_state.available = token_link(p);
271    } else if (lmt_token_memory_state.tokens_data.top < lmt_token_memory_state.tokens_data.allocated) {
272        p = ++lmt_token_memory_state.tokens_data.top;
273    } else {
274        tex_aux_bump_token_memory();
275        p = ++lmt_token_memory_state.tokens_data.top;
276    }
277    ++lmt_token_memory_state.tokens_data.ptr;
278    token_link(p) = null;
279    token_info(p) = t;
280    return p;
281}
282
283/*tex
284
285    Because we only have forward links, a freed token ends up at the head of the list of available
286    tokens.
287
288*/
289
290void tex_put_available_token(halfword p)
291{
292    token_link(p) = lmt_token_memory_state.available;
293    lmt_token_memory_state.available = p;
294    --lmt_token_memory_state.tokens_data.ptr;
295}
296
297halfword tex_store_new_token(halfword p, halfword t)
298{
299    halfword q = tex_get_available_token(t);
300    token_link(p) = q;
301    return q;
302}
303
304/*tex
305
306    The procedure |flush_list (p)| frees an entire linked list of oneword nodes that starts at
307    position |p|. It makes list of single word nodes available. The second variant in principle
308    is faster but in practice this goes unnoticed. Of course there is a little price to pay for
309    keeping track of memory usage.
310
311*/
312
313void tex_flush_token_list(halfword head)
314{
315    if (head) {
316        if (! token_link(head)) {
317            /* This happens more frequently (6.2M vs 1.7M). */
318            token_link(head) = lmt_token_memory_state.available;
319            --lmt_token_memory_state.tokens_data.ptr;
320        } else {
321            halfword current = head;
322            halfword tail;
323            int i = 0;
324            do {
325                ++i;
326                tail = current;
327                current = token_link(tail);
328            } while (current);
329            token_link(tail) = lmt_token_memory_state.available;
330            lmt_token_memory_state.tokens_data.ptr -= i;
331        }
332        lmt_token_memory_state.available = head;
333    }
334}
335
336void tex_flush_token_list_head_tail(halfword head, halfword tail, int n)
337{
338    if (head) {
339        lmt_token_memory_state.tokens_data.ptr -= n;
340        token_link(tail) = lmt_token_memory_state.available;
341        lmt_token_memory_state.available = head;
342    }
343}
344
345void tex_add_token_reference(halfword p)
346{
347    if (get_token_reference(p) < max_token_reference) {
348        add_token_reference(p);
349 // } else {
350 //     tex_overflow_error("reference count", max_token_reference);
351    }
352}
353
354void tex_increment_token_reference(halfword p, int n)
355{
356    if ((get_token_reference(p) + n) < max_token_reference) {
357        inc_token_reference(p, n);
358    } else {
359        inc_token_reference(p, max_token_reference - get_token_reference(p));
360 // } else {
361 //     tex_overflow_error("reference count", max_token_reference);
362    }
363}
364
365void tex_delete_token_reference(halfword p)
366{
367    if (p) {
368        halfword r = get_token_reference(p);
369        if (! r) {
370            tex_flush_token_list(p);
371        } else if (r < max_token_reference) {
372            sub_token_reference(p);
373        }
374    }
375}
376
377/*tex
378
379    A \TEX\ token is either a character or a control sequence, and it is represented internally in
380    one of two ways:
381
382    \startitemize[n]
383        \startitem
384            A character whose ASCII code number is |c| and whose command code is |m| is represented
385            as the number $2^{21}m+c$; the command code is in the range |1 <= m <= 14|.
386        \stopitem
387        \startitem
388            A control sequence whose |eqtb| address is |p| is represented as the number
389            |cs_token_flag+p|. Here |cs_token_flag = t =| $2^{25}-1$ is larger than $2^{21}m+c$, yet
390            it is small enough that |cs_token_flag + p < max_halfword|; thus, a token fits
391            comfortably in a halfword.
392        \stopitem
393    \stopitemize
394
395    A token |t| represents a |left_brace| command if and only if |t < left_brace_limit|; it
396    represents a |right_brace| command if and only if we have |left_brace_limit <= t <
397    right_brace_limit|; and it represents a |match| or |end_match| command if and only if
398    |match_token <= t <= end_match_token|. The following definitions take care of these
399    token-oriented constants and a few others.
400
401    A token list is a singly linked list of one-word nodes in |mem|, where each word contains a token
402    and a link. Macro definitions, output routine definitions, marks, |\write| texts, and a few other
403    things are remembered by \TEX\ in the form of token lists, usually preceded by a node with a
404    reference count in its |token_ref_count| field. The token stored in location |p| is called
405    |info(p)|.
406
407    Three special commands appear in the token lists of macro definitions. When |m = match|, it means
408    that \TEX\ should scan a parameter for the current macro; when |m = end_match|, it means that
409    parameter matching should end and \TEX\ should start reading the macro text; and when |m =
410    out_param|, it means that \TEX\ should insert parameter number |c| into the text at this point.
411
412    The enclosing |\char'173| and |\char'175| characters of a macro definition are omitted, but the
413    final right brace of an output routine is included at the end of its token list.
414
415    Here is an example macro definition that illustrates these conventions. After \TEX\ processes
416    the text:
417
418    \starttyping
419    \def\mac a#1#2 \b {#1\-a ##1#2 \#2\}
420    \stoptyping
421
422    The definition of |\mac| is represented as a token list containing:
423
424    \starttyping
425    (reference count) letter a match # match # spacer \b end_match
426    out_param1 \- letter a spacer, mac_param # other_char 1
427    out_param2 spacer out_param 2
428    \stoptyping
429
430    The procedure |scan_toks| builds such token lists, and |macro_call| does the parameter matching.
431
432    Examples such as |\def \m {\def \m {a} b}| explain why reference counts would be needed even if
433    \TEX\ had no |\let| operation: When the token list for |\m| is being read, the redefinition of
434    |\m| changes the |eqtb| entry before the token list has been fully consumed, so we dare not
435    simply destroy a token list when its control sequence is being redefined.
436
437    If the parameter-matching part of a definition ends with |#{}|, the corresponding token list
438    will have |{| just before the |end_match| and also at the very end. The first |{| is used to
439    delimit the parameter; the second one keeps the first from disappearing.
440
441    The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form, including the
442    expansion of a macro or mark.
443
444*/
445
446void tex_print_meaning(halfword code)
447{
448    /*tex
449
450    This would make sense but some macro packages don't like it:
451
452    \starttyping
453    if (cur_cmd == math_given_cmd) {
454        cur_cmd = math_xgiven_cmd ;
455    }
456    \stoptyping
457
458    Eventually we might just do it that way. We also can have |\meaningonly| that omits the
459    |macro:| and arguments.
460    */
461    int untraced = is_untraced(eq_flag(cur_cs));
462    if (! untraced) {
463        switch (code) {
464            case meaning_code:
465            case meaning_full_code:
466            case meaning_ful_code:
467            case meaning_asis_code:
468                tex_print_cmd_flags(cur_cs, cur_cmd, (code != meaning_code), code == meaning_asis_code);
469                break;
470        }
471    }
472    switch (cur_cmd) {
473        case call_cmd:
474        case protected_call_cmd:
475        case semi_protected_call_cmd:
476        case constant_call_cmd:
477        case tolerant_call_cmd:
478        case tolerant_protected_call_cmd:
479        case tolerant_semi_protected_call_cmd:
480            if (untraced) {
481                tex_print_cs(cur_cs);
482                return;
483            } else {
484                switch (code) {
485                    case meaning_code:
486                    case meaning_full_code:
487                    case meaning_ful_code:
488                        tex_print_str("macro");
489                        if (code == meaning_ful_code) {
490                            return;
491                        } else {
492                            goto FOLLOWUP;
493                        }
494                    case meaning_asis_code:
495                     // tex_print_format("%e%C %S ", def_cmd, def_code, cur_cs);
496                        tex_print_cmd_chr(def_cmd, def_code);
497                        tex_print_char(' ');
498                        tex_print_cs(cur_cs);
499                        tex_print_char(' ');
500                        if (cur_chr && token_link(cur_chr)) {
501                            tex_show_token_list(token_link(cur_chr), get_token_preamble(cur_chr) ? 1 : 3, 0);
502                        } else {
503                            tex_print_char('{');
504                            tex_print_char('}');
505                        }
506                        return;
507                    case meaning_les_code:
508                        if (cur_chr && token_link(cur_chr)) {
509                            tex_show_token_list(token_link(cur_chr), 2, 0);
510                        }
511                        return;
512                }
513                goto DETAILS;
514            }
515        case get_mark_cmd:
516            tex_print_cmd_chr((singleword) cur_cmd, cur_chr);
517            tex_print_char(':');
518            tex_print_nlp();
519            tex_token_show(tex_get_some_mark(cur_chr, 0));
520            return;
521        case lua_value_cmd:
522        case lua_call_cmd:
523        case lua_local_call_cmd:
524        case lua_protected_call_cmd:
525        case lua_semi_protected_call_cmd:
526            if (untraced) {
527                tex_print_cs(cur_cs);
528                return;
529            } else {
530                goto DEFAULT;
531            }
532        case if_test_cmd:
533            if (cur_chr > last_if_test_code) {
534                tex_print_cs(cur_cs);
535                return;
536            } else {
537                goto DEFAULT;
538            }
539        default:
540         DEFAULT:
541            tex_print_cmd_chr((singleword) cur_cmd, cur_chr);
542            if (cur_cmd < call_cmd) {
543                return;
544            } else {
545                /* all kind of reference cmds */
546                break;
547            }
548    }
549  FOLLOWUP:
550    tex_print_char(':');
551  DETAILS:
552    tex_print_nlp();
553    tex_token_show(cur_chr);
554}
555
556/*tex
557
558    The procedure |show_token_list|, which prints a symbolic form of the token list that starts at
559    a given node |p|, illustrates these conventions. The token list being displayed should not begin
560    with a reference count. However, the procedure is intended to be robust, so that if the memory
561    links are awry or if |p| is not really a pointer to a token list, nothing catastrophic will
562    happen.
563
564    An additional parameter |q| is also given; this parameter is either null or it points to a node
565    in the token list where a certain magic computation takes place that will be explained later.
566    Basically, |q| is non-null when we are printing the two-line context information at the time of
567    an error message; |q| marks the place corresponding to where the second line should begin.
568
569    For example, if |p| points to the node containing the first |a| in the token list above, then
570    |show_token_list| will print the string
571
572    \starttyping
573    a#1#2 \b ->#1-a ##1#2 #2
574    \stoptyping
575
576    and if |q| points to the node containing the second |a|, the magic computation will be performed
577    just before the second |a| is printed.
578
579    The generation will stop, and |\ETC.| will be printed, if the length of printing exceeds a given
580    limit~|l|. Anomalous entries are printed in the form of control sequences that are not followed
581    by a blank space, e.g., |\BAD.|; this cannot be confused with actual control sequences because a
582    real control sequence named |BAD| would come out |\BAD |.
583
584    In \LUAMETATEX\ we have some more node types and token types so we also have additional tracing.
585    Because there is some more granularity in for instance nodes (subtypes) more detail is reported.
586
587    It made sense to split the |tex_show_token_list| funciton in two, ine specialized for showing
588    the context. That saves some testing and passing arguments.
589
590*/
591
592static const char *tex_aux_special_cmd_string(halfword cmd, halfword chr, const char *unknown)
593{
594    switch (cmd) {
595        case node_cmd                    : return "[[special cmd: node pointer]]";
596        case lua_protected_call_cmd      : return "[[special cmd: lua protected call]]";
597        case lua_semi_protected_call_cmd : return "[[special cmd: lua semi protected call]]";
598        case lua_value_cmd               : return "[[special cmd: lua value call]]";
599        case iterator_value_cmd          : return "[[special cmd: iterator value]]";
600        case lua_call_cmd                : return "[[special cmd: lua call]]";
601        case lua_local_call_cmd          : return "[[special cmd: lua local call]]";
602        case begin_local_cmd             : return "[[special cmd: begin local call]]";
603        case end_local_cmd               : return "[[special cmd: end local call]]";
604     // case prefix_cmd                  : return "[[special cmd: enforced]]";
605        case prefix_cmd                  : return "\\always ";
606# if (match_experiment)
607        case integer_reference_cmd       : return "[[special cmd: integer pointer]]"; 
608        case dimension_reference_cmd     : return "[[special cmd: dimension pointer]]"; 
609# endif 
610        default                          : printf("[[unknown cmd: (%i,%i)]]\n", cmd, chr); return unknown;
611    }
612}
613
614void tex_show_token_list(halfword p, int asis, int single)
615{
616    if (p) {
617        unsigned char n = 0;
618        int max = lmt_token_memory_state.tokens_data.top;
619        if (asis == 3) {
620            tex_print_char('{');
621        }
622        while (p) {
623            if (p < 0 || p > max) {
624                tex_print_str(error_string_clobbered(41));
625                return;
626            } else if (token_info(p) >= cs_token_flag) {
627                tex_print_cs_checked(token_info(p) - cs_token_flag);
628            } else if (token_info(p) > 0) {
629                int cmd = token_cmd(token_info(p));
630                int chr = token_chr(token_info(p));
631                switch (cmd) {
632                    case left_brace_cmd:
633                    case right_brace_cmd:
634                    case math_shift_cmd:
635                    case alignment_tab_cmd:
636                    case superscript_cmd:
637                    case subscript_cmd:
638                    case spacer_cmd:
639                    case letter_cmd:
640                    case other_char_cmd:
641                    case active_char_cmd:
642                    case ignore_cmd:
643                        tex_print_tex_str(chr);
644                        break;
645                    case parameter_cmd:
646                        /*
647                            Here we need to duplicate because a nested definition is parsed and
648                            these |##1| are two tokens |parameter + 1| while |#1| is a one token
649                            |parameter ref 1|.
650                        */
651                        if (! single) {
652                            tex_print_tex_str(chr);
653                        }
654                        tex_print_tex_str(chr);
655                        break;
656                    case parameter_reference_cmd:
657                        tex_print_tex_str(match_visualizer);
658                        if (chr <= 9) {
659                            tex_print_char(chr + '0');
660                        } else if (chr <= max_match_count) {
661                            tex_print_char(chr + '0' + gap_match_count);
662                        } else {
663                            tex_print_char('!');
664                            return;
665                        }
666                        break;
667                    case match_cmd:
668                        tex_print_char(match_visualizer);
669                        if (is_valid_match_ref(chr)) {
670                            ++n;
671                        }
672                        tex_print_char(chr ? chr : '0');
673                        if (n > max_match_count) {
674                            return;
675                        } else {
676                            break;
677                        }
678                    case end_match_cmd:
679                        switch (asis) {
680                            case 1:
681                                tex_print_char('{');
682                                break;
683                            case 2:
684                                return;
685                            default:
686                                if (chr == 0) {
687                                    tex_print_str("->");
688                                }
689                                break;
690                        }
691                        break;
692                    case ignore_something_cmd:
693                        break;
694                    case set_font_cmd:
695                        tex_print_format("[font->%s]", font_original(cur_val));
696                        break;
697                    case end_paragraph_cmd:
698                     /* tex_print_format("%e%s", "par "); */
699                        tex_print_str_esc("par ");
700                        break;
701                    default:
702                        tex_print_str(tex_aux_special_cmd_string(cmd, chr, error_string_bad(43)));
703                        break;
704                }
705            } else {
706                tex_print_str(error_string_bad(42));
707            }
708            p = token_link(p);
709        }
710        if (asis == 1 || asis == 3) {
711            tex_print_char('}');
712        }
713    }
714}
715
716void tex_show_token_list_context(halfword p, halfword q)
717{
718    if (p) {
719        /*tex the highest parameter number, as an \ASCII\ digit */
720        unsigned char n = 0;
721        int max = lmt_token_memory_state.tokens_data.top;
722        lmt_print_state.tally = 0;
723        while (p) {
724            if (p == q) {
725                /*tex Do magic computation. We only end up here in context showing. */
726                tex_set_trick_count();
727            }
728            /*tex Display token |p|, and |return| if there are problems. */
729            if (p < 0 || p > max) {
730                tex_print_str(error_string_clobbered(41));
731                return;
732            } else if (token_info(p) >= cs_token_flag) {
733             // if (! ((print_state.inhibit_par_tokens) && (token_info(p) == token_state.par_token))) {
734                    tex_print_cs_checked(token_info(p) - cs_token_flag);
735             // }
736            } else if (token_info(p) > 0) {
737                int cmd = token_cmd(token_info(p));
738                int chr = token_chr(token_info(p));
739                /*
740                    Display the token (|cmd|,|chr|). The procedure usually \quote {learns} the character
741                    code used for macro parameters by seeing one in a |match| command before it runs
742                    into any |out_param| commands. This is probably not true any longer.
743                */
744                switch (cmd) {
745                    case left_brace_cmd:
746                    case right_brace_cmd:
747                    case math_shift_cmd:
748                    case alignment_tab_cmd:
749                    case superscript_cmd:
750                    case subscript_cmd:
751                    case spacer_cmd:
752                    case letter_cmd:
753                    case other_char_cmd:
754                    case active_char_cmd: /* new */
755                    case ignore_cmd: /* new */
756                        tex_print_tex_str(chr);
757                        break;
758                    case parameter_cmd:
759                        /*tex
760                            When we show a context we alwasy duplicate the hashes.
761                        */
762                        tex_print_tex_str(chr);
763                        tex_print_tex_str(chr);
764                        break;
765                    case parameter_reference_cmd:
766                        tex_print_tex_str(match_visualizer);
767                        if (chr <= 9) {
768                            tex_print_char(chr + '0');
769                        } else if (chr <= max_match_count) {
770                            tex_print_char(chr + '0' + gap_match_count);
771                        } else {
772                            tex_print_char('!');
773                            return;
774                        }
775                        break;
776                    case match_cmd:
777                        tex_print_char(match_visualizer);
778                        if (is_valid_match_ref(chr)) {
779                            ++n;
780                        }
781                        tex_print_char(chr ? chr : '0');
782                        if (n > max_match_count) {
783                            /*tex Can this happen at all? */
784                            return;
785                        } else {
786                            break;
787                        }
788                    case end_match_cmd:
789                        tex_print_str("->");
790                        break;
791                    case ignore_something_cmd:
792                        break;
793                    case set_font_cmd:
794                        tex_print_format("[font->%s]", font_original(cur_val));
795                        break;
796                    case end_paragraph_cmd:
797                     /* tex_print_format("%e%s", "par "); */
798                        tex_print_str_esc("par ");
799                        break;
800                    default:
801                        tex_print_str(tex_aux_special_cmd_string(cmd, chr, error_string_bad(53)));
802                        break;
803                }
804         // } else if (token_info(p) == 0) {
805         //     tex_print_str(error_string_bad(44));
806            } else {
807                tex_print_str(error_string_bad(42));
808            }
809            p = token_link(p);
810        }
811        if (p) {
812            tex_print_str_esc("ETC.");
813        }
814    }
815}
816
817/*
818# define do_buffer_to_unichar(a,b) do { \
819    a = (halfword)str2uni(fileio_state.io_buffer+b); \
820    b += utf8_size(a); \
821} while (0)
822*/
823
824static inline halfword get_unichar_from_buffer(int *b)
825{
826    halfword a = (halfword) ((const unsigned char) *(lmt_fileio_state.io_buffer + *b));
827    if (a <= 0x80) {
828        *b += 1;
829    } else {
830        int al;
831        a = (halfword) aux_str2uni_len(lmt_fileio_state.io_buffer + *b, &al);
832        *b += al;
833    }
834    return a;
835}
836
837/*tex
838
839    Here's the way we sometimes want to display a token list, given a pointer to its reference count;
840    the pointer may be null.
841
842*/
843
844void tex_token_show(halfword p)
845{
846    if (p && token_link(p)) {
847        tex_show_token_list(token_link(p), 0, 0);
848    }
849}
850
851/*tex
852
853    The next function, |delete_token_ref|, is called when a pointer to a token list's reference
854    count is being removed. This means that the token list should disappear if the reference count
855    was |null|, otherwise the count should be decreased by one. Variable |p| points to the reference
856    count of a token list that is losing one reference.
857
858*/
859
860int tex_get_char_cat_code(int c)
861{
862    return tex_aux_the_cat_code(c);
863}
864
865static void tex_aux_invalid_character_error(void)
866{
867    tex_handle_error(
868        normal_error_type,
869        "Text line contains an invalid character",
870        "A funny symbol that I can't read has just been input. Continue, and I'll forget\n"
871        "that it ever happened."
872    );
873}
874
875static int tex_aux_process_sup_mark(void);
876
877static int tex_aux_scan_control_sequence(void);
878
879typedef enum next_line_retval {
880    next_line_ok,
881    next_line_return,
882    next_line_restart
883} next_line_retval;
884
885static inline next_line_retval tex_aux_next_line(void);
886
887/*tex
888
889    In case you are getting bored, here is a slightly less trivial routine: Given a string of
890    lowercase letters, like |pt| or |plus| or |width|, the |scan_keyword| routine checks to see
891    whether the next tokens of input match this string. The match must be exact, except that
892    ppercase letters will match their lowercase counterparts; uppercase equivalents are determined
893    by subtracting |"a" - "A"|, rather than using the |uc_code| table, since \TEX\ uses this
894    routine only for its own limited set of keywords.
895
896    If a match is found, the characters are effectively removed from the input and |true| is
897    returned. Otherwise |false| is returned, and the input is left essentially unchanged (except
898    for the fact that some macros may have been expanded, etc.).
899
900    In \LUATEX\ and its follow up we have more keywords and for instance when scanning a box
901    specification that is noticeable because the |scan_keyword| function is a little inefficient
902    in the sense that when there is no match, it will push back what got read so far. So there is
903    token allocation, pushing a level etc involved. Keep in mind that expansion happens here so what
904    gets pushing back is not always literally pushing back what we started with.
905
906    In \LUAMETATEX\ we now have a bit different approach. The |scan_mandate_keyword| follows up on
907    |scan_character| so we have a two step approach. We could actually pass a list of valid keywords
908    but that would make for a complex function with no real benefits.
909
910*/
911
912halfword tex_scan_character(const char *s, int left_brace, int skip_space, int skip_relax) // int skip_endpar
913{
914    halfword save_cur_cs = cur_cs;
915    while (1) {
916        tex_get_x_token();
917        switch (cur_cmd) {
918            case spacer_cmd:
919                if (skip_space) {
920                    break;
921                } else {
922                    goto DONE;
923                }
924            case relax_cmd:
925                if (skip_relax) {
926                    break;
927                } else {
928                    goto DONE;
929                }
930         // case end_paragraph_cmd:
931         //   if (skip_space) { /* skip_endpar */
932         //       break;
933         //   } else {
934         //       goto DONE;
935         //   }
936            case letter_cmd:
937            case other_char_cmd:
938                if (cur_chr <= 'z' && strchr(s, cur_chr)) {
939                    cur_cs = save_cur_cs;
940                    return cur_chr;
941                } else {
942                    goto DONE;
943                }
944            case left_brace_cmd:
945                if (left_brace) {
946                    cur_cs = save_cur_cs;
947                    return '{';
948                } else {
949                    goto DONE;
950                }
951            default:
952                goto DONE;
953        }
954    }
955  DONE:
956    tex_back_input(cur_tok);
957    cur_cs = save_cur_cs;
958    return 0;
959}
960
961void tex_aux_show_keyword_error(const char *s)
962{
963    tex_handle_error(
964        normal_error_type,
965        "Valid keyword expected, likely '%s'",
966        s,
967        "You started a keyword but it seems to be an invalid one. The first character(s)\n"
968        "might give you a clue. You might want to quit unwanted lookahead with \\relax."
969    );
970}
971
972/*tex
973    Scanning an optional keyword starts at the beginning. This means that we can also (for instance)
974    have a minus or plus sign which means that we have a different loop than with the alternative
975    that already checked the first character.
976*/
977
978int tex_scan_optional_keyword(const char *s)
979{
980    halfword save_cur_cs = cur_cs;
981    int done = 0;
982    const char *p = s;
983    while (*p) {
984        tex_get_x_token();
985        switch (cur_cmd) {
986            case letter_cmd:
987            case other_char_cmd:
988                if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) {
989                    if (*(++p)) {
990                        done = 1;
991                    } else {
992                        cur_cs = save_cur_cs;
993                        return 1;
994                    }
995                } else if (done) {
996                    goto BAD_NEWS;
997                } else {
998                    // can be a minus or so ! as in \advance\foo -10
999                    tex_back_input(cur_tok);
1000                    cur_cs = save_cur_cs;
1001                    return 1;
1002                }
1003                break;
1004            case spacer_cmd:  /* normally spaces are not pushed back */
1005                if (done) {
1006                    goto BAD_NEWS;
1007                } else {
1008                    break;
1009                }
1010                // fall through
1011            default:
1012                tex_back_input(cur_tok);
1013                if (done) {
1014                    /* unless we accept partial keywords */
1015                    goto BAD_NEWS;
1016                } else {
1017                    cur_cs = save_cur_cs;
1018                    return 0;
1019                }
1020        }
1021    }
1022  BAD_NEWS:
1023    tex_aux_show_keyword_error(s);
1024    cur_cs = save_cur_cs;
1025    return 0;
1026}
1027
1028/*tex
1029    Here we know that the first character(s) matched so we are in the middle of a keyword already
1030    which means a different loop than the previous one.
1031*/
1032
1033int tex_scan_mandate_keyword(const char *s, int offset)
1034{
1035    halfword save_cur_cs = cur_cs;
1036    int done = 0;
1037 // int done = offset > 0;
1038    const char *p = s + offset; /* offset always > 0 so no issue with +/- */
1039    while (*p) {
1040        tex_get_x_token();
1041        switch (cur_cmd) {
1042            case letter_cmd:
1043            case other_char_cmd:
1044                if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) {
1045                    if (*(++p)) {
1046                        done = 1;
1047                    } else {
1048                        cur_cs = save_cur_cs;
1049                        return 1;
1050                    }
1051                } else {
1052                    goto BAD_NEWS;
1053                }
1054                break;
1055         // case spacer_cmd: /* normally spaces are not pushed back */
1056         // case relax_cmd:  /* normally not, should be option  */
1057         //     if (done) {
1058         //         back_input(cur_tok);
1059         //         goto BAD_NEWS;
1060         //     } else {
1061         //         break;
1062         //     }
1063         // default:
1064         //     goto BAD_NEWS;
1065            case spacer_cmd: /* normally spaces are not pushed back */
1066                if (done) {
1067                    goto BAD_NEWS;
1068                } else {
1069                    break;
1070                }
1071                // fall through
1072            default:
1073                tex_back_input(cur_tok);
1074                /* unless we accept partial keywords */
1075                goto BAD_NEWS;
1076        }
1077    }
1078  BAD_NEWS:
1079    tex_aux_show_keyword_error(s);
1080    cur_cs = save_cur_cs;
1081    return 0;
1082}
1083
1084/*
1085    This is the original scanner with push|-|back. It's a matter of choice: we are more restricted
1086    on the one hand and more loose on the other.
1087*/
1088
1089int tex_scan_keyword(const char *s)
1090{
1091    if (*s) {
1092        halfword h = null;
1093        halfword p = null;
1094        halfword save_cur_cs = cur_cs;
1095        int n = 0;
1096        while (*s) {
1097            /*tex Recursion is possible here! */
1098            tex_get_x_token();
1099            if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && ((cur_chr == *s) || (cur_chr == *s - 'a' + 'A'))) {
1100                p = tex_store_new_token(p, cur_tok);
1101                if (! h) {
1102                    h = p;
1103                }
1104                n++;
1105                s++;
1106            } else if ((p != h) || (cur_cmd != spacer_cmd)) {
1107                tex_back_input(cur_tok);
1108                if (h) {
1109                    tex_begin_backed_up_list(h);
1110                }
1111                cur_cs = save_cur_cs;
1112                return 0;
1113            }
1114        }
1115        if (h) {
1116            tex_flush_token_list_head_tail(h, p, n);
1117        }
1118        cur_cs = save_cur_cs;
1119        return 1;
1120    } else {
1121        /*tex but not with newtokenlib zero keyword simply doesn't match  */
1122        return 0 ;
1123    }
1124}
1125
1126int tex_scan_partial_keyword(const char *s)
1127{
1128    if (*s) {
1129        halfword save_cur_cs = cur_cs;
1130        int n = 0;
1131        while (*s) {
1132            tex_get_x_token();
1133            if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && ((cur_chr == *s) || (cur_chr == *s - 'a' + 'A'))) {
1134                n++;
1135                s++;
1136            } else if (cur_cmd != spacer_cmd) {
1137                tex_back_input(cur_tok);
1138                cur_cs = save_cur_cs;
1139                return n > 0;
1140            }
1141        }
1142        cur_cs = save_cur_cs;
1143        return 1;
1144    } else {
1145        return 0 ;
1146    }
1147}
1148
1149int tex_scan_keyword_case_sensitive(const char *s)
1150{
1151    if (*s) {
1152        halfword h = null;
1153        halfword p = null;
1154        halfword save_cur_cs = cur_cs;
1155        int n = 0;
1156        while (*s) {
1157            tex_get_x_token();
1158            if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && (cur_chr == *s)) {
1159                p = tex_store_new_token(p, cur_tok);
1160                if (! h) {
1161                    h = p;
1162                }
1163                n++;
1164                s++;
1165            } else if ((p != h) || (cur_cmd != spacer_cmd)) {
1166                tex_back_input(cur_tok);
1167                if (h) {
1168                    tex_begin_backed_up_list(h);
1169                }
1170                cur_cs = save_cur_cs;
1171                return 0;
1172            }
1173        }
1174        if (h) {
1175            tex_flush_token_list_head_tail(h, p, n);
1176        }
1177        cur_cs = save_cur_cs;
1178        return 1;
1179    } else {
1180        return 0 ;
1181    }
1182}
1183
1184/*tex
1185
1186    We can not return |undefined_control_sequence| under some conditions (inside |shift_case|,
1187    for example). This needs thinking.
1188
1189*/
1190
1191halfword tex_active_to_cs(int c, int force)
1192{
1193    halfword cs = -1;
1194    if (c >= 0 && c <= max_character_code) {
1195        char utfbytes[8] = { active_character_first, active_character_second, active_character_third, 0 };
1196        aux_uni2string((char *) &utfbytes[3], c);
1197        cs = tex_string_locate(utfbytes, (size_t) utf8_size(c) + 3, force);
1198    }
1199    if (cs < 0) {
1200        cs = tex_string_locate(active_character_unknown, 4, force); /*tex Including the zero sentinel. */
1201    }
1202    return cs;
1203}
1204
1205/*tex
1206
1207    The heart of \TEX's input mechanism is the |get_next| procedure, which we shall develop in the
1208    next few sections of the program. Perhaps we shouldn't actually call it the \quote {heart},
1209    however, because it really acts as \TEX's eyes and mouth, reading the source files and
1210    gobbling them up. And it also helps \TEX\ to regurgitate stored token lists that are to be
1211    processed again.
1212
1213    The main duty of |get_next| is to input one token and to set |cur_cmd| and |cur_chr| to that
1214    token's command code and modifier. Furthermore, if the input token is a control sequence, the
1215    |eqtb| location of that control sequence is stored in |cur_cs|; otherwise |cur_cs| is set to
1216    zero.
1217
1218    Underlying this simple description is a certain amount of complexity because of all the cases
1219    that need to be handled. However, the inner loop of |get_next| is reasonably short and fast.
1220
1221    When |get_next| is asked to get the next token of a |\read| line, it sets |cur_cmd = cur_chr
1222    = cur_cs = 0| in the case that no more tokens appear on that line. (There might not be any
1223    tokens at all, if the |end_line_char| has |ignore| as its catcode.)
1224
1225    The value of |par_loc| is the |eqtb| address of |\par|. This quantity is needed because a
1226    blank line of input is supposed to be exactly equivalent to the appearance of |\par|; we must
1227    set |cur_cs := par_loc| when detecting a blank line.
1228
1229    Parts |get_next| are executed more often than any other instructions of \TEX. The global
1230    variable |force_eof| is normally |false|; it is set |true| by an |\endinput| command.
1231    |luacstrings| is the number of lua print statements waiting to be input, it is changed by
1232    |lmt_token_call|.
1233
1234    If the user has set the |pausing| parameter to some positive value, and if nonstop mode has
1235    not been selected, each line of input is displayed on the terminal and the transcript file,
1236    followed by |=>|. \TEX\ waits for a response. If the response is simply |carriage_return|,
1237    the line is accepted as it stands, otherwise the line typed is used instead of the line in the
1238    file.
1239
1240*/
1241
1242/*tex
1243
1244    The other variant gives less clutter in tracing cache usage when profiling and for some files
1245    (like the manual) also a bit of a speedup. Splitting the switch which gives 10 times less Bim
1246    in vallgrind! See the \LUATEX\ source for that code.
1247
1248    The big switch changes the state if necessary, and |goto switch| if the current character
1249    should be ignored, or |goto reswitch| if the current character changes to another.
1250
1251    The n-way switch accomplishes the scanning quickly, assuming that a decent \CCODE\ compiler
1252    has translated the code. Note that the numeric values for |mid_line|, |skip_blanks|, and
1253    |new_line| are spaced apart from each other by |max_char_code+1|, so we can add a character's
1254    command code to the state to get a single number that characterizes both.
1255
1256    Remark: checking performance indicated that this switch was the cause of many branch prediction
1257    errors but changing it to:
1258
1259    \starttyping
1260    c = istate + cur_cmd;
1261    if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) {
1262        return 1;
1263    } else if (c >= new_line_state) {
1264        switch (c) {
1265        }
1266    } else if (c >= skip_blanks_state) {
1267        switch (c) {
1268        }
1269    } else if (c >= mid_line_state) {
1270        switch (c) {
1271        }
1272    } else {
1273        istate = mid_line_state;
1274        return 1;
1275    }
1276    \stoptyping
1277
1278    This gives as many prediction errors. So, we can indeed assume that the compiler does the right
1279    job, or that there is simply no other way.
1280
1281    When a line is finished a space is emited. When a character of type |spacer| gets through, its
1282    character code is changed to |\ =040|. This means that the \ASCII\ codes for tab and space, and
1283    for the space inserted at the end of a line, will be treated alike when macro parameters are
1284    being matched. We do this since such characters are indistinguishable on most computer terminal
1285    displays.
1286
1287*/
1288
1289/*
1290
1291    c = istate + cur_cmd;
1292    if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) {
1293        return 1;
1294    } else if (c >= new_line_state) {
1295        ....
1296    }
1297
1298*/
1299
1300/*tex
1301
1302    This trick has been dropped when the wrapup mechanism had proven to be useful. The idea was
1303    to backport this to \LUATEX\ but some other \PDFTEX\ compatible parstuff made it there and
1304    backporting par related features becomes too messy.
1305
1306    \starttyping
1307    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1308    cur_cs = lmt_token_state.line_par_loc;
1309    cur_cmd = eq_type(cur_cs);
1310    if (cur_cmd == undefined_cs_cmd) {
1311        cur_cs = lmt_token_state.par_loc;
1312        cur_cmd = eq_type(cur_cs);
1313    }
1314    cur_chr = eq_value(cur_cs);
1315    \stoptyping
1316
1317*/
1318
1319static int tex_aux_get_next_file(void)
1320{
1321  SWITCH:
1322    if (lmt_input_state.cur_input.loc <= lmt_input_state.cur_input.limit) {
1323        /*tex current line not yet finished */
1324        cur_chr = get_unichar_from_buffer(&lmt_input_state.cur_input.loc);
1325      RESWITCH:
1326        if (lmt_input_state.cur_input.cattable == no_catcode_table_preset) {
1327            /* happens seldom: detokenized line */
1328            cur_cmd = cur_chr == ' ' ? spacer_cmd : other_char_cmd;
1329        } else {
1330            cur_cmd = tex_aux_the_cat_code(cur_chr);
1331        }
1332        switch (lmt_input_state.cur_input.state + cur_cmd) {
1333            case mid_line_state    + ignore_cmd:
1334            case skip_blanks_state + ignore_cmd:
1335            case new_line_state    + ignore_cmd:
1336            case skip_blanks_state + spacer_cmd:
1337            case new_line_state    + spacer_cmd:
1338                /*tex Cases where character is ignored. */
1339                goto SWITCH;
1340            case mid_line_state    + escape_cmd:
1341            case new_line_state    + escape_cmd:
1342            case skip_blanks_state + escape_cmd:
1343                /*tex Scan a control sequence. */
1344                lmt_input_state.cur_input.state = (unsigned char) tex_aux_scan_control_sequence();
1345                break;
1346            case mid_line_state    + active_char_cmd:
1347            case new_line_state    + active_char_cmd:
1348            case skip_blanks_state + active_char_cmd:
1349                /*tex Process an active-character. */
1350                if ((lmt_input_state.scanner_status == scanner_is_tolerant || lmt_input_state.scanner_status == scanner_is_matching) && tex_pass_active_math_char(cur_chr)) {
1351                    /*tex We need to intercept a delimiter in arguments. */
1352                } else if ((lmt_input_state.scanner_status == scanner_is_defining || lmt_input_state.scanner_status == scanner_is_absorbing) && tex_pass_active_math_char(cur_chr)) {
1353                    /*tex We are storing stuff in a token list or macro body. */
1354                } else if ((cur_mode == mmode || lmt_nest_state.math_mode) && tex_check_active_math_char(cur_chr)) {
1355                    /*tex We have an intercept. */
1356                } else if (lmt_scanner_state.expression_depth) {
1357                    /*tex well */
1358                    cur_tok = other_token + cur_chr;
1359                    cur_cmd = other_char_cmd;
1360                } else {
1361                    cur_cs = tex_active_to_cs(cur_chr, ! lmt_hash_state.no_new_cs);
1362                    cur_cmd = eq_type(cur_cs);
1363                    cur_chr = eq_value(cur_cs);
1364                }
1365                lmt_input_state.cur_input.state = mid_line_state;
1366                break;
1367            case mid_line_state    + superscript_cmd:
1368            case new_line_state    + superscript_cmd:
1369            case skip_blanks_state + superscript_cmd:
1370                /*tex We need to check for multiple ^:
1371                    (0) always check for ^^ ^^^^ ^^^^^^^
1372                    (1) only check in text mode
1373                    (*) never
1374                */
1375                if (sup_mark_mode_par) {
1376                    if (sup_mark_mode_par == 1 && cur_mode != mmode && tex_aux_process_sup_mark()) {
1377                        goto RESWITCH;
1378                    }
1379                } else if (tex_aux_process_sup_mark()) {
1380                    goto RESWITCH;
1381                } else {
1382                    /*tex
1383                        We provide prescripts and shifted script in math mode and avoid fance |^|
1384                        processing in text mode (which is what we do in \CONTEXT).
1385                    */
1386                }
1387                lmt_input_state.cur_input.state = mid_line_state;
1388                break;
1389            case mid_line_state    + invalid_char_cmd:
1390            case new_line_state    + invalid_char_cmd:
1391            case skip_blanks_state + invalid_char_cmd:
1392                /*tex Decry the invalid character and |goto restart|. */
1393                tex_aux_invalid_character_error();
1394                /*tex Because state may be |token_list| now: */
1395                return 0;
1396            case mid_line_state + spacer_cmd:
1397                /*tex Enter |skip_blanks| state, emit a space. */
1398                lmt_input_state.cur_input.state = skip_blanks_state;
1399                cur_chr = ' ';
1400                break;
1401            case mid_line_state + end_line_cmd:
1402                /*tex Finish the line. See note above about dropped |\linepar|. */
1403                lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1404                cur_cmd = spacer_cmd;
1405                cur_chr = ' ';
1406                break;
1407            case skip_blanks_state + end_line_cmd:
1408            case mid_line_state    + comment_cmd:
1409            case new_line_state    + comment_cmd:
1410            case skip_blanks_state + comment_cmd:
1411                /*tex Finish line, |goto switch|; */
1412                lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1413                goto SWITCH;
1414            case new_line_state + end_line_cmd:
1415                if (! auto_paragraph_mode(auto_paragraph_go_on)) {
1416                    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1417                }
1418                /*tex Finish line, emit a |\par|; */
1419                if (auto_paragraph_mode(auto_paragraph_text))  {
1420                    cur_cs = null;
1421                    cur_cmd = end_paragraph_cmd;
1422                    cur_chr = new_line_end_paragraph_code;
1423                 // cur_chr = normal_end_paragraph_code;
1424                } else {
1425                    cur_cs = lmt_token_state.par_loc;
1426                    cur_cmd = eq_type(cur_cs);
1427                    cur_chr = eq_value(cur_cs);
1428                }
1429                break;
1430            case skip_blanks_state + left_brace_cmd:
1431            case new_line_state    + left_brace_cmd:
1432                lmt_input_state.cur_input.state = mid_line_state;
1433                ++lmt_input_state.align_state;
1434                break;
1435            case mid_line_state + left_brace_cmd:
1436                ++lmt_input_state.align_state;
1437                break;
1438            case skip_blanks_state + right_brace_cmd:
1439            case new_line_state    + right_brace_cmd:
1440                lmt_input_state.cur_input.state = mid_line_state;
1441                --lmt_input_state.align_state;
1442                break;
1443            case mid_line_state + right_brace_cmd:
1444                --lmt_input_state.align_state;
1445                break;
1446            case mid_line_state + math_shift_cmd:
1447            case mid_line_state + alignment_tab_cmd:
1448            case mid_line_state + parameter_cmd:
1449            case mid_line_state + subscript_cmd:
1450            case mid_line_state + letter_cmd:
1451            case mid_line_state + other_char_cmd:
1452                break;
1453            /*
1454            case skip_blanks_state + math_shift_cmd:
1455            case skip_blanks_state + alignment_tab_cmd:
1456            case skip_blanks_state + parameter_cmd:
1457            case skip_blanks_state + subscript_cmd:
1458            case skip_blanks_state + letter_cmd:
1459            case skip_blanks_state + other_char_cmd:
1460            case new_line_state    + math_shift_cmd:
1461            case new_line_state    + alignment_tab_cmd:
1462            case new_line_state    + parameter_cmd:
1463            case new_line_state    + subscript_cmd:
1464            case new_line_state    + letter_cmd:
1465            case new_line_state    + other_char_cmd:
1466            */
1467            default:
1468                lmt_input_state.cur_input.state = mid_line_state;
1469                break;
1470        }
1471    } else {
1472        if (! io_token_input(lmt_input_state.cur_input.name)) {
1473            lmt_input_state.cur_input.state = new_line_state;
1474        }
1475        /*tex
1476
1477           Move to next line of file, or |goto restart| if there is no next line, or |return| if a
1478           |\read| line has finished.
1479
1480        */
1481        do {
1482            next_line_retval r = tex_aux_next_line();
1483            if (r == next_line_restart) {
1484                /*tex This happens more often. */
1485                return 0;
1486            } else if (r == next_line_return) {
1487                return 1;
1488            }
1489        } while (0);
1490     /* check_interrupt(); */
1491        goto SWITCH;
1492    }
1493    return 1;
1494}
1495
1496/*tex
1497
1498    Notice that a code like |^^8| becomes |x| if not followed by a hex digit. We only support a
1499    limited set:
1500
1501    \starttyping
1502    ^^^^^^XXXXXX
1503    ^^^^XXXXXX
1504    ^^XX ^^<char>
1505    \stoptyping
1506
1507*/
1508
1509# define is_hex(a) ((a >= '0' && a <= '9') || (a >= 'a' && a <= 'f'))
1510
1511static inline halfword tex_aux_two_hex_to_cur_chr(int c1, int c2)
1512 {
1513   return
1514        0x10 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1515      + 0x01 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10);
1516 }
1517
1518static inline halfword tex_aux_four_hex_to_cur_chr(int c1, int c2,int c3, int c4)
1519 {
1520   return
1521         0x1000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1522       + 0x0100 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10)
1523       + 0x0010 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10)
1524       + 0x0001 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10);
1525}
1526
1527static inline halfword tex_aux_six_hex_to_cur_chr(int c1, int c2, int c3, int c4, int c5, int c6)
1528{
1529   return
1530         0x100000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1531       + 0x010000 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10)
1532       + 0x001000 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10)
1533       + 0x000100 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10)
1534       + 0x000010 * (c5 <= '9' ? c5 - '0' : c5 - 'a' + 10)
1535       + 0x000001 * (c6 <= '9' ? c6 - '0' : c6 - 'a' + 10);
1536
1537}
1538
1539static int tex_aux_process_sup_mark(void)
1540{
1541    if (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc]) {
1542        if (lmt_input_state.cur_input.loc < lmt_input_state.cur_input.limit) {
1543            if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2])) {
1544                if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4])) {
1545                    if ((lmt_input_state.cur_input.loc + 10) <= lmt_input_state.cur_input.limit) {
1546                        /*tex |^^^^^^XXXXXX| */
1547                        int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  5];
1548                        int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  6];
1549                        int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  7];
1550                        int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  8];
1551                        int c5 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  9];
1552                        int c6 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 10];
1553                        if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1554                            lmt_input_state.cur_input.loc += 11;
1555                            cur_chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6);
1556                            return 1;
1557                        } else {
1558                            tex_handle_error(
1559                                normal_error_type,
1560                                "^^^^^^ needs six hex digits",
1561                                NULL
1562                            );
1563                        }
1564                    } else {
1565                        tex_handle_error(
1566                            normal_error_type,
1567                            "^^^^^^ needs six hex digits, end of input",
1568                            NULL
1569                        );
1570                    }
1571                } else if ((lmt_input_state.cur_input.loc + 6) <= lmt_input_state.cur_input.limit) {
1572                /*tex |^^^^XXXX| */
1573                    int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3];
1574                    int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4];
1575                    int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 5];
1576                    int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 6];
1577                    if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1578                        lmt_input_state.cur_input.loc += 7;
1579                        cur_chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4);
1580                        return 1;
1581                    } else {
1582                        tex_handle_error(
1583                            normal_error_type,
1584                            "^^^^ needs four hex digits",
1585                            NULL
1586                        );
1587                    }
1588                } else {
1589                    tex_handle_error(
1590                        normal_error_type,
1591                        "^^^^ needs four hex digits, end of input",
1592                        NULL
1593                    );
1594                }
1595            } else if ((lmt_input_state.cur_input.loc + 2) <= lmt_input_state.cur_input.limit) {
1596                /*tex |^^XX| */
1597                int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1];
1598                int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2];
1599                if (is_hex(c1) && is_hex(c2)) {
1600                    lmt_input_state.cur_input.loc += 3;
1601                    cur_chr = tex_aux_two_hex_to_cur_chr(c1, c2);
1602                    return 1;
1603                }
1604            }
1605            /*tex The single character case: */
1606            {
1607                int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1];
1608                if (c1 < 0x80) {
1609                    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.loc + 2;
1610                 // if (is_hex(c1) && (iloc <= ilimit)) {
1611                 //     int c2 = fileio_state.io_buffer[iloc];
1612                 //     if (is_hex(c2)) {
1613                 //         ++iloc;
1614                 //         cur_chr = two_hex_to_cur_chr(c1, c2);
1615                 //         return 1;
1616                 //     }
1617                 // }
1618                 // /*tex The somewhat odd cases, often special control characters: */
1619                    cur_chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1620                    return 1;
1621                }
1622            }
1623        }
1624    }
1625    return 0;
1626}
1627
1628/*tex
1629
1630    Control sequence names are scanned only when they appear in some line of a file. Once they have
1631    been scanned the first time, their |eqtb| location serves as a unique identification, so \TEX\
1632    doesn't need to refer to the original name any more except when it prints the equivalent in
1633    symbolic form.
1634
1635    The program that scans a control sequence has been written carefully in order to avoid the
1636    blowups that might otherwise occur if a malicious user tried something like |\catcode'15 = 0|.
1637    The algorithm might look at |buffer[ilimit + 1]|, but it never looks at |buffer[ilimit + 2]|.
1638
1639    If expanded characters like |^^A| or |^^df| appear in or just following a control sequence name,
1640    they are converted to single characters in the buffer and the process is repeated, slowly but
1641    surely.
1642
1643*/
1644
1645/*tex
1646
1647    Whenever we reach the following piece of code, we will have |cur_chr = buffer[k - 1]| and |k <=
1648    ilimit + 1| and |cat = get_cat_code(cat_code_table, cur_chr)|. If an expanded code like |^^A| or
1649    |^^df| appears in |buffer[(k - 1) .. (k + 1)]| or |buffer[(k - 1) .. (k + 2)]|, we will store
1650    the corresponding code in |buffer[k - 1]| and shift the rest of the buffer left two or three
1651    places.
1652
1653*/
1654
1655static int tex_aux_check_expanded_code(int *kk, halfword *chr)
1656{
1657    if (sup_mark_mode_par > 1 || (sup_mark_mode_par == 1 && cur_mode == mmode)) {
1658        return 0;
1659    } else {
1660        int k = *kk;
1661        /* chr is the ^ character or an equivalent one */
1662        if (lmt_fileio_state.io_buffer[k] == *chr && k < lmt_input_state.cur_input.limit) {
1663            int d = 1;
1664            int l;
1665            if ((*chr == lmt_fileio_state.io_buffer[k + 1]) && (*chr == lmt_fileio_state.io_buffer[k + 2])) {
1666                if ((*chr == lmt_fileio_state.io_buffer[k + 3]) && (*chr == lmt_fileio_state.io_buffer[k + 4])) {
1667                    if ((k + 10) <= lmt_input_state.cur_input.limit) {
1668                        int c1 = lmt_fileio_state.io_buffer[k + 6 - 1];
1669                        int c2 = lmt_fileio_state.io_buffer[k + 6    ];
1670                        int c3 = lmt_fileio_state.io_buffer[k + 6 + 1];
1671                        int c4 = lmt_fileio_state.io_buffer[k + 6 + 2];
1672                        int c5 = lmt_fileio_state.io_buffer[k + 6 + 3];
1673                        int c6 = lmt_fileio_state.io_buffer[k + 6 + 4];
1674                        if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1675                            d = 6;
1676                            *chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6);
1677                        } else {
1678                            tex_handle_error(
1679                                normal_error_type,
1680                                "^^^^^^ needs six hex digits",
1681                                NULL
1682                            );
1683                        }
1684                    } else {
1685                        tex_handle_error(
1686                            normal_error_type,
1687                            "^^^^^^ needs six hex digits, end of input",
1688                            NULL
1689                        );
1690                    }
1691                } else if ((k + 6) <= lmt_input_state.cur_input.limit) {
1692                    int c1 = lmt_fileio_state.io_buffer[k + 4 - 1];
1693                    int c2 = lmt_fileio_state.io_buffer[k + 4    ];
1694                    int c3 = lmt_fileio_state.io_buffer[k + 4 + 1];
1695                    int c4 = lmt_fileio_state.io_buffer[k + 4 + 2];
1696                    if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1697                        d = 4;
1698                        *chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4);
1699                    } else {
1700                        tex_handle_error(
1701                            normal_error_type,
1702                            "^^^^ needs four hex digits",
1703                            NULL
1704                        );
1705                    }
1706                } else {
1707                    tex_handle_error(
1708                        normal_error_type,
1709                        "^^^^ needs four hex digits, end of input",
1710                        NULL
1711                    );
1712                }
1713            } else {
1714                int c1 = lmt_fileio_state.io_buffer[k + 1];
1715                if (c1 < 0x80) { /* really ? */
1716                    d = 1;
1717                    if (is_hex(c1) && (k + 2) <= lmt_input_state.cur_input.limit) {
1718                        int c2 = lmt_fileio_state.io_buffer[k + 2];
1719                        if (is_hex(c2)) {
1720                            d = 2;
1721                            *chr = tex_aux_two_hex_to_cur_chr(c1, c2);
1722                        } else {
1723                            *chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1724                        }
1725                    } else {
1726                        *chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1727                    }
1728                }
1729            }
1730            if (d > 2) {
1731                d = 2 * d - 1;
1732            } else {
1733                d++;
1734            }
1735            if (*chr <= 0x7F) {
1736                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) *chr;
1737            } else if (*chr <= 0x7FF) {
1738                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xC0 + *chr / 0x40);
1739                k++;
1740                d--;
1741                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + *chr % 0x40);
1742            } else if (*chr <= 0xFFFF) {
1743                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xE0 + *chr / 0x1000);
1744                k++;
1745                d--;
1746                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) / 0x40);
1747                k++;
1748                d--;
1749                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) % 0x40);
1750            } else {
1751                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xF0 + *chr / 0x40000);
1752                k++;
1753                d--;
1754                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x40000) / 0x1000);
1755                k++;
1756                d--;
1757                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) / 0x40);
1758                k++;
1759                d--;
1760                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) % 0x40);
1761            }
1762            l = k;
1763            lmt_input_state.cur_input.limit -= d;
1764            while (l <= lmt_input_state.cur_input.limit) {
1765                lmt_fileio_state.io_buffer[l] = lmt_fileio_state.io_buffer[l + d];
1766                l++;
1767            }
1768            *kk = k;
1769            cur_chr = *chr; /* hm */
1770            return 1;
1771        } else {
1772            return 0;
1773        }
1774    }
1775}
1776
1777static int tex_aux_scan_control_sequence(void)
1778{
1779    int state = mid_line_state;
1780    if (lmt_input_state.cur_input.loc > lmt_input_state.cur_input.limit) {
1781        /*tex |state| is irrelevant in this case. */
1782        cur_cs = null_cs;
1783    } else {
1784        /*tex |cat_code(cur_chr)|, usually: */
1785        while (1) {
1786            int loc = lmt_input_state.cur_input.loc;
1787            halfword chr = get_unichar_from_buffer(&loc);
1788            halfword cat = tex_aux_the_cat_code(chr);
1789            if (cat != letter_cmd || loc > lmt_input_state.cur_input.limit) {
1790                if (cat == spacer_cmd) {
1791                    state = skip_blanks_state;
1792                } else {
1793                    state = mid_line_state;
1794                    if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) {
1795                        continue;
1796                    }
1797                }
1798            } else {
1799                state = skip_blanks_state;
1800# if 0
1801                do {
1802                    chr = get_unichar_from_buffer(&loc);
1803                    cat = tex_aux_the_cat_code(chr);
1804                } while (cat == letter_cmd && loc <= lmt_input_state.cur_input.limit);
1805# else
1806                while (cat == letter_cmd && loc <= lmt_input_state.cur_input.limit) {
1807                    chr = get_unichar_from_buffer(&loc);
1808                    cat = tex_aux_the_cat_code(chr);
1809                }
1810# endif
1811                /*tex If an expanded \unknown: */
1812                if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) {
1813                    continue;
1814                } else if (cat != letter_cmd) {
1815                    /*tex Backtrack one character which can be \UTF. */
1816                    if (chr <= 0x7F) {
1817                        loc -= 1; /* in most cases */
1818                    } else if (chr > 0xFFFF) {
1819                        loc -= 4;
1820                    } else if (chr > 0x7FF) {
1821                        loc -= 3;
1822                    } else /* if (cur_chr > 0x7F) */ {
1823                        loc -= 2;
1824                    }
1825                    /*tex Now |k| points to first nonletter. */
1826                }
1827            }
1828            cur_cs = tex_id_locate(lmt_input_state.cur_input.loc, loc - lmt_input_state.cur_input.loc, ! lmt_hash_state.no_new_cs);
1829            lmt_input_state.cur_input.loc = loc;
1830            break;
1831        }
1832    }
1833    cur_cmd = eq_type(cur_cs);
1834    cur_chr = eq_value(cur_cs);
1835    return state;
1836}
1837
1838static void tex_aux_check_validity(void)
1839{
1840    switch (lmt_input_state.scanner_status) {
1841        case scanner_is_normal:
1842            break;
1843        case scanner_is_skipping:
1844            tex_handle_error(
1845                condition_error_type,
1846                "The file ended while I was skipping conditional text.",
1847                "This kind of error happens when you say '\\if...' and forget the\n"
1848                "matching '\\fi'. It can also be that you  use '\\orelse' or '\\orunless\n'"
1849                "in the wrong way. Or maybe a forbidden control sequence was encountered."
1850            );
1851            break;
1852        case scanner_is_defining:
1853            tex_handle_error(runaway_error_type, "The file ended when scanning a definition.", NULL);
1854            break;
1855        case scanner_is_matching:
1856            tex_handle_error(runaway_error_type, "The file ended when scanning an argument.", NULL);
1857            break;
1858        case scanner_is_tolerant:
1859            break;
1860        case scanner_is_aligning:
1861            tex_handle_error(runaway_error_type, "The file ended when scanning an alignment preamble.", NULL);
1862            break;
1863        case scanner_is_absorbing:
1864            tex_handle_error(runaway_error_type, "The file ended when absorbing something.", NULL);
1865            break;
1866    }
1867}
1868
1869static inline int tex_aux_every_eof(void)
1870{
1871    halfword t = lmt_input_state.in_stack[lmt_input_state.cur_input.index].at_end_of_file;
1872    if (t) {
1873        /* tex Fake one empty line. Never happens in \CONTEXT. */
1874        lmt_input_state.cur_input.limit = lmt_fileio_state.io_first - 1;
1875        lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 1;
1876        lmt_input_state.in_stack[lmt_input_state.cur_input.index].at_end_of_file = null;
1877        tex_begin_token_list(t, end_file_text);
1878        tex_delete_token_reference(t);
1879        return 1;
1880    } else if (every_eof_par) {
1881        /* tex Fake one empty line. Never happens in \CONTEXT. */
1882        lmt_input_state.cur_input.limit = lmt_fileio_state.io_first - 1;
1883        lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 1;
1884        tex_begin_token_list(every_eof_par, every_eof_text);
1885        return 1;
1886    } else {
1887        return 0;
1888    }
1889}
1890
1891static inline next_line_retval tex_aux_next_line(void)
1892{
1893    if (lmt_input_state.cur_input.name > io_initial_input_code) {
1894        /*tex Read next line of file into |buffer|, or |goto restart| if the file has ended. */
1895        unsigned inhibit_eol = 0;
1896        ++lmt_input_state.input_line;
1897        lmt_fileio_state.io_first = lmt_input_state.cur_input.start;
1898        if (! lmt_token_state.force_eof) {
1899            switch (lmt_input_state.cur_input.name) {
1900                case io_lua_input_code:
1901                    {
1902                        halfword result = null;
1903                        int cattable = 0;
1904                        int partial = 0;
1905                        int finalline = 0;
1906                        int type = lmt_cstring_input(&result, &cattable, &partial, &finalline);
1907                        switch (type) {
1908                            case eof_tex_input:
1909                                lmt_token_state.force_eof = 1;
1910                                break;
1911                            case string_tex_input:
1912                                /*tex string */
1913                                lmt_input_state.cur_input.limit = lmt_fileio_state.io_last;
1914                                lmt_input_state.cur_input.cattable = (short) cattable;
1915                                lmt_input_state.cur_input.partial = (signed char) partial;
1916                                if (finalline || partial || cattable == no_catcode_table_preset) {
1917                                    inhibit_eol = 1;
1918                                }
1919                                if (! partial) {
1920                                    lmt_input_state.cur_input.state = new_line_state;
1921                                }
1922                                break;
1923                            case token_tex_input:
1924                                /*tex token */
1925                                {
1926                                    halfword t = result - cs_token_flag;
1927                                    if (t >= 0 && eq_type(t) == input_cmd && eq_value(t) == end_of_input_code && lmt_input_state.cur_input.index > 0) {
1928                                        tex_end_file_reading();
1929                                    }
1930                                    tex_back_input(result);
1931                                    return next_line_restart;
1932                                }
1933                            case token_list_tex_input:
1934                                /*tex token */
1935                                if (result) {
1936                                    tex_begin_backed_up_list(result);
1937                                }
1938                                return next_line_restart;
1939                            case node_tex_input:
1940                                /*tex node */
1941                                if (node_token_overflow(result)) {
1942                                    /* we could link them and avoid one input level */
1943                                    tex_back_input(token_val(ignore_cmd, node_token_lsb(result)));
1944                                    tex_reinsert_token(token_val(node_cmd, node_token_msb(result)));
1945                                    return next_line_restart;
1946                                } else {
1947                                    /*tex |0x10FFFF == 1114111| */
1948                                    tex_back_input(token_val(node_cmd, result));
1949                                    return next_line_restart;
1950                                }
1951                            default:
1952                                lmt_token_state.force_eof = 1;
1953                                break;
1954                        }
1955                        break;
1956                    }
1957                case io_token_input_code:
1958                case io_token_eof_input_code:
1959                    {
1960                        /* can be simplified but room for extensions now */
1961                        halfword result = null;
1962                        int cattable = 0;
1963                        int partial = 0;
1964                        int finalline = 0;
1965                        int type = lmt_cstring_input(&result, &cattable, &partial, &finalline);
1966                        switch (type) {
1967                            case eof_tex_input:
1968                                lmt_token_state.force_eof = 1;
1969                                if (lmt_input_state.cur_input.name == io_token_eof_input_code && tex_aux_every_eof()) {
1970                                    return next_line_restart;
1971                                }
1972                                break;
1973                            case string_tex_input:
1974                                /*tex string */
1975                                lmt_input_state.cur_input.limit = lmt_fileio_state.io_last;
1976                                lmt_input_state.cur_input.cattable = (short) cattable;
1977                                lmt_input_state.cur_input.partial = (signed char) partial;
1978                                inhibit_eol = lmt_input_state.cur_input.name != io_token_eof_input_code;
1979                                if (! partial) {
1980                                    lmt_input_state.cur_input.state = new_line_state;
1981                                }
1982                                break;
1983                            default:
1984                                if (result) {
1985                                    /*tex Can't happen: lua token and node output mixed in here */
1986                                }
1987                                lmt_token_state.force_eof = 1;
1988                                break;
1989                        }
1990                        break;
1991                    }
1992                case io_tex_macro_code:
1993                    /* this can't happen and will fail with the next line check */
1994             /* case io_file_input_code: */
1995                default:
1996                    if (tex_lua_input_ln()) {
1997                        /*tex Not end of file, set |ilimit|. */
1998                        lmt_input_state.cur_input.limit = lmt_fileio_state.io_last;
1999                        lmt_input_state.cur_input.cattable = default_catcode_table_preset;
2000                        break;
2001                    } else if (! lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen && tex_aux_every_eof()) {
2002                        return next_line_restart;
2003                    } else {
2004                        tex_aux_check_validity();
2005                        lmt_token_state.force_eof = 1;
2006                        break;
2007                    }
2008            }
2009        }
2010        /*tex
2011            All of the easy branches of |get_next| have now been taken care of. There is one more 
2012            branch. Conversely, the |file_warning| procedure is invoked when a file ends and some 
2013            groups entered or conditionals started while reading from that file are still incomplete.
2014        */
2015        if (lmt_token_state.force_eof) {
2016            if (tracing_nesting_par > 0) {
2017                if ((lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group != cur_boundary) || (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr != lmt_condition_state.cond_ptr)) {
2018                    if (! io_token_input(lmt_input_state.cur_input.name)) {
2019                        /*tex Check for unfinished groups: */
2020                        tex_save_stack_catch_up();
2021                        /*tex Check for unfinished conditionals: */
2022                        tex_conditional_catch_up();
2023                        /*tex Show the context, when asked for: */
2024                        tex_print_nlp();
2025                        if (tracing_nesting_par > 1) {
2026                            tex_show_context();
2027                        }
2028                        /*tex Recover if needed: */
2029                        if (lmt_error_state.history == spotless) {
2030                            lmt_error_state.history = warning_issued;
2031                        }
2032                    }
2033                }
2034            }
2035            if (io_file_input(lmt_input_state.cur_input.name)) {
2036                tex_report_stop_file();
2037                --lmt_input_state.open_files;
2038            }
2039            lmt_token_state.force_eof = 0;
2040            tex_end_file_reading();
2041            return next_line_restart;
2042        } else {
2043            if (inhibit_eol || end_line_char_inactive) {
2044                lmt_input_state.cur_input.limit--;
2045            } else {
2046                lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par;
2047            }
2048            lmt_fileio_state.io_first = lmt_input_state.cur_input.limit + 1;
2049            lmt_input_state.cur_input.loc = lmt_input_state.cur_input.start;
2050            /*tex We're ready to read. */
2051        }
2052    } else if (lmt_input_state.input_stack_data.ptr > 0) {
2053        cur_cmd = 0;
2054        cur_chr = 0;
2055        return next_line_return;
2056    } else {
2057        /*tex A somewhat weird check: */
2058        switch (lmt_print_state.selector) {
2059            case no_print_selector_code:
2060            case terminal_selector_code:
2061                tex_open_log_file();
2062                break;
2063        }
2064        tex_handle_error(eof_error_type, "end of file encountered", NULL);
2065        /*tex Just in case it is not handled in a callback: */
2066        if (lmt_error_state.interaction > nonstop_mode) {
2067            tex_fatal_error("aborting job");
2068        }
2069    }
2070    /*tex We're in a loop and restart: */
2071    return next_line_ok;
2072}
2073
2074halfword tex_get_at_end_of_file(void)
2075{
2076    for (int i = lmt_input_state.input_stack_data.ptr; i > 0; i--) {
2077        if (lmt_input_state.input_stack[i].name == io_file_input_code) {
2078            return lmt_input_state.in_stack[lmt_input_state.input_stack[i].index].at_end_of_file;
2079        }
2080    }
2081    return null;
2082}
2083
2084void tex_set_at_end_of_file(halfword h)
2085{
2086    for (int i = lmt_input_state.input_stack_data.ptr; i > 0; i--) {
2087        if (lmt_input_state.input_stack[i].name == io_file_input_code) {
2088            lmt_input_state.in_stack[lmt_input_state.input_stack[i].index].at_end_of_file = h;
2089            return;
2090        }
2091    }
2092    tex_flush_token_list(h);
2093}
2094
2095/*tex
2096    Let's consider now what happens when |get_next| is looking at a token list.
2097*/
2098
2099static int tex_aux_get_next_tokenlist(void)
2100{
2101    halfword t = token_info(lmt_input_state.cur_input.loc);
2102    /*tex Move to next. */
2103    lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc);
2104    if (t >= cs_token_flag) {
2105        /*tex A control sequence token */
2106        cur_cs = t - cs_token_flag;
2107        cur_cmd = eq_type(cur_cs);
2108        if (cur_cmd == deep_frozen_dont_expand_cmd) {
2109            /*tex
2110
2111                Get the next token, suppressing expansion. The present point in the program is
2112                reached only when the |expand| routine has inserted a special marker into the
2113                input. In this special case, |token_info(iloc)| is known to be a control sequence
2114                token, and |token_link(iloc) = null|.
2115
2116            */
2117            cur_cs = token_info(lmt_input_state.cur_input.loc) - cs_token_flag;
2118            lmt_input_state.cur_input.loc = null;
2119            cur_cmd = eq_type(cur_cs);
2120            if (cur_cmd > max_command_cmd) {
2121                cur_cmd = relax_cmd;
2122             // cur_chr = no_expand_flag;
2123                cur_chr = no_expand_relax_code;
2124                return 1;
2125            }
2126        }
2127        cur_chr = eq_value(cur_cs);
2128    } else {
2129        cur_cmd = token_cmd(t);
2130        cur_chr = token_chr(t);
2131        switch (cur_cmd) {
2132            case left_brace_cmd:
2133                ++lmt_input_state.align_state;
2134                break;
2135            case right_brace_cmd:
2136                --lmt_input_state.align_state;
2137                break;
2138            case active_char_cmd:
2139                if ((cur_mode == mmode || lmt_nest_state.math_mode) && tex_check_active_math_char(cur_chr)) {
2140                    /*tex We have an intercept. */
2141                } else if (lmt_scanner_state.expression_depth) {
2142                    /*tex well */
2143                    cur_tok = other_token + cur_chr;
2144                    cur_cmd = other_char_cmd;
2145                }
2146                break;
2147            case parameter_reference_cmd:
2148                {
2149                    /*tex Insert macro parameter and |goto restart|. */
2150                    halfword p = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + cur_chr - 1];
2151                    if (p) {
2152                        tex_begin_parameter_list(p);
2153                    }
2154                    return 0;
2155                }
2156        }
2157    }
2158    return 1;
2159}
2160
2161/*tex
2162
2163    Now we're ready to take the plunge into |get_next| itself. Parts of this routine are executed
2164    more often than any other instructions of \TEX. This sets |cur_cmd|, |cur_chr|, |cur_cs| to
2165    next token.
2166
2167    Handling alignments is interwoven because there we switch between constructing cells and rows
2168    (node lists) based on templates that are token lists. This is why in several places we find
2169    checks for |align_state|.
2170
2171*/
2172
2173void tex_get_next(void)
2174{
2175    while (1) {
2176        cur_cs = 0;
2177        if (lmt_input_state.cur_input.state != token_list_state) {
2178            /*tex Input from external file, |goto restart| if no input found. */
2179            if (! tex_aux_get_next_file()) {
2180                continue;
2181            } else {
2182                /*tex Check align state later on! */
2183            }
2184        } else if (! lmt_input_state.cur_input.loc) {
2185            /*tex List exhausted, resume previous level. */
2186            tex_end_token_list();
2187            continue;
2188        } else if (! tex_aux_get_next_tokenlist()) {
2189            /*tex Parameter needs to be expanded. */
2190            continue;
2191        }
2192     // if ((! lmt_input_state.align_state) && (cur_cmd == alignment_tab_cmd || cur_cmd == alignment_cmd)) {
2193     //     /*tex If an alignment entry has just ended, take appropriate action. */
2194     //     tex_insert_alignment_template();
2195     //     continue;
2196     // } else {
2197     //     break;
2198     // }
2199        switch (cur_cmd) {
2200            case alignment_tab_cmd:
2201            case alignment_cmd:
2202                /*tex If an alignment entry has just ended, take appropriate action. */
2203                if (lmt_input_state.align_state) {
2204                    return;
2205                } else {
2206                    tex_insert_alignment_template();
2207                    continue;
2208                }
2209            default:
2210                return;
2211        }
2212    }
2213}
2214
2215// void tex_get_next(void)
2216// {
2217//     while (1) {
2218//         cur_cs = 0;
2219//         if (lmt_input_state.cur_input.state != token_list_state) {
2220//             /*tex Input from external file, |goto restart| if no input found. */
2221//             if (tex_aux_get_next_file()) {
2222//                 switch (cur_cmd) {
2223//                     case alignment_tab_cmd:
2224//                     case alignment_cmd:
2225//                         /*tex If an alignment entry has just ended, take appropriate action. */
2226//                         if (lmt_input_state.align_state) {
2227//                             return;
2228//                         } else {
2229//                             tex_insert_alignment_template();
2230//                             break;
2231//                         }
2232//                     default:
2233//                         return;
2234//                 }
2235//             }
2236//         } else if (! lmt_input_state.cur_input.loc) {
2237//             /*tex List exhausted, resume previous level. */
2238//             tex_end_token_list();
2239//         } else if (tex_aux_get_next_tokenlist()) {
2240//             switch (cur_cmd) {
2241//                 case alignment_tab_cmd:
2242//                 case alignment_cmd:
2243//                     /*tex If an alignment entry has just ended, take appropriate action. */
2244//                     if (lmt_input_state.align_state) {
2245//                         return;
2246//                     } else {
2247//                         tex_insert_alignment_template();
2248//                         break;
2249//                     }
2250//                 default:
2251//                     return;
2252//             }
2253//         }
2254//     }
2255// }
2256
2257void tex_get_next_non_spacer(void)
2258{
2259    while (1) {
2260        cur_cs = 0;
2261        if (lmt_input_state.cur_input.state != token_list_state) {
2262            /*tex Input from external file, |goto restart| if no input found. */
2263            if (! tex_aux_get_next_file()) {
2264                continue;
2265            } else {
2266                /*tex Check align state later on! */
2267            }
2268        } else if (! lmt_input_state.cur_input.loc) {
2269            /*tex List exhausted, resume previous level. */
2270            tex_end_token_list();
2271            continue;
2272        } else if (! tex_aux_get_next_tokenlist()) {
2273            /*tex Parameter needs to be expanded. */
2274            continue;
2275        }
2276        switch (cur_cmd) {
2277            case spacer_cmd:
2278                continue;
2279            case alignment_tab_cmd:
2280            case alignment_cmd:
2281                /*tex If an alignment entry has just ended, take appropriate action. */
2282                if (lmt_input_state.align_state) {
2283                    return;
2284                } else {
2285                    tex_insert_alignment_template();
2286                    continue;
2287                }
2288            default:
2289                return;
2290        }
2291    }
2292}
2293
2294/*tex
2295
2296    Since |get_next| is used so frequently in \TEX, it is convenient to define three related
2297    procedures that do a little more:
2298
2299    \startitemize
2300        \startitem
2301            |get_token| not only sets |cur_cmd| and |cur_chr|, it also sets |cur_tok|, a packed
2302            halfword version of the current token.
2303        \stopitem
2304        \startitem
2305            |get_x_token|, meaning \quote {get an expanded token}, is like |get_token|, but if the
2306            current token turns out to be a user-defined control sequence (i.e., a macro call), or
2307            a conditional, or something like |\topmark| or |\expandafter| or |\csname|, it is
2308            eliminated from the input by beginning the expansion of the macro or the evaluation of
2309            the conditional.
2310        \stopitem
2311        \startitem
2312            |x_token| is like |get_x_token| except that it assumes that |get_next| has already been
2313            called.
2314        \stopitem
2315    \stopitemize
2316
2317    In fact, these three procedures account for almost every use of |get_next|. No new control
2318    sequences will be defined except during a call of |get_token|, or when |\csname| compresses a
2319    token list, because |no_new_control_sequence| is always |true| at other times.
2320
2321    This sets |cur_cmd|, |cur_chr|, |cur_tok|. For convenience we also return the token because in
2322    some places we store it and then some direct assignment looks a bit nicer.
2323
2324*/
2325
2326halfword tex_get_token(void)
2327{
2328    lmt_hash_state.no_new_cs = 0;
2329    tex_get_next();
2330    lmt_hash_state.no_new_cs = 1;
2331    cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr);
2332    return cur_tok;
2333}
2334
2335/*tex
2336
2337    The |get_x_or_protected| procedure is like |get_x_token| except that protected macros are not
2338    expanded. It sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands non-protected macros.
2339
2340*/
2341
2342void tex_get_x_or_protected(void)
2343{
2344    lmt_hash_state.no_new_cs = 0;
2345    while (1) {
2346        tex_get_next();
2347        if (cur_cmd <= max_command_cmd || is_protected_cmd(cur_cmd)) {
2348            break;
2349        } else {
2350            tex_expand_current_token();
2351        }
2352    }
2353    cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr); /* needed afterwards ? */
2354    lmt_hash_state.no_new_cs = 1;
2355}
2356
2357/*tex This changes the string |s| to a token list. */
2358
2359// halfword tex_string_to_toks(const char *ss)
2360// {
2361//     const char *s = ss;
2362//     const char *se = ss + strlen(s);
2363//     /*tex tail of the token list */
2364//     halfword h = null;
2365//     halfword p = null;
2366//     /*tex new node being added to the token list via |store_new_token| */
2367//     while (s < se) {
2368//         int tl;
2369//         halfword t = (halfword) aux_str2uni_len((const unsigned char *) s, &tl);
2370//         s += tl;
2371//         if (t == ' ') {
2372//             t = space_token;
2373//         } else {
2374//             t += other_token;
2375//         }
2376//         p = tex_store_new_token(p, t);
2377//         if (! h) {
2378//             h = p;
2379//         }
2380//     }
2381//     return h;
2382// }
2383
2384/*tex
2385
2386    The token lists for macros and for other things like |\mark| and |\output| and |\write| are
2387    produced by a procedure called |scan_toks|.
2388
2389    Before we get into the details of |scan_toks|, let's consider a much simpler task, that of
2390    converting the current string into a token list. The |str_toks| function does this; it
2391    classifies spaces as type |spacer| and everything else as type |other_char|.
2392
2393    The token list created by |str_toks| begins at |link(temp_token_head)| and ends at the value
2394    |p| that is returned. If |p = temp_token_head|, the list is empty.
2395
2396    |lua_str_toks| is almost identical, but it also escapes the three symbols that \LUA\ considers
2397    special while scanning a literal string.
2398*/
2399
2400static halfword lmt_str_toks(lstring b) /* returns head */
2401{
2402    unsigned char *k = (unsigned char *) b.s;
2403    halfword head = null;
2404    halfword tail = head;
2405    while (k < (unsigned char *) b.s + b.l) {
2406        int tl;
2407        halfword t = aux_str2uni_len(k, &tl);
2408        k += tl;
2409        if (t == ' ') {
2410            t = space_token;
2411        } else {
2412            if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13)) {
2413                tail = tex_store_new_token(tail, escape_token);
2414                if (! head) {
2415                    head = tail;
2416                }
2417                if (t == 10) {
2418                    t = 'n';
2419                } else if (t == 13) {
2420                    t = 'r';
2421                }
2422            }
2423            t += other_token;
2424        }
2425        tail = tex_store_new_token(tail, t);
2426        if (! head) {
2427            head = tail;
2428        }
2429    }
2430    return head;
2431}
2432
2433/*tex
2434
2435    Incidentally, the main reason for wanting |str_toks| is the function |the_toks|, which has
2436    similar input/output characteristics. This changes the string |str_pool[b .. pool_ptr]| to a
2437    token list:
2438
2439*/
2440
2441halfword tex_str_toks(lstring s, halfword *tail)
2442{
2443    halfword h = null;
2444    halfword p = null;
2445    if (s.s) {
2446        unsigned char *k = s.s;
2447        unsigned char *l = k + s.l;
2448        while (k < l) {
2449            int tl;
2450            halfword t = aux_str2uni_len(k, &tl);
2451            if (t == ' ') {
2452                t = space_token;
2453            } else {
2454                t += other_token;
2455            }
2456            k += tl;
2457            p = tex_store_new_token(p, t);
2458            if (! h) {
2459                h = p;
2460            }
2461        }
2462    }
2463    if (tail) {
2464        *tail = null;
2465    }
2466    return h;
2467}
2468
2469halfword tex_cur_str_toks(halfword *tail)
2470{
2471    halfword h = null;
2472    halfword p = null;
2473    unsigned char *k = (unsigned char *) lmt_string_pool_state.string_temp;
2474    if (k) {
2475        unsigned char *l = k + lmt_string_pool_state.string_temp_top;
2476        /*tex tail of the token list */
2477        while (k < l) {
2478            /*tex token being appended */
2479            int tl;
2480            halfword t = aux_str2uni_len(k, &tl);
2481            if (t == ' ') {
2482                t = space_token;
2483            } else {
2484                t += other_token;
2485            }
2486            k += tl;
2487            p = tex_store_new_token(p, t);
2488            if (! h) {
2489                h = p;
2490            }
2491        }
2492    }
2493    tex_reset_cur_string();
2494    if (tail) {
2495        *tail = p;
2496    }
2497    return h;
2498}
2499
2500/*tex
2501
2502    Most of the converter is similar to the one I made for macro so at some point I can make a
2503    helper; also todo: there is no need to go through the pool.
2504
2505*/
2506
2507halfword tex_str_scan_toks(int ct, lstring ls)
2508{
2509    /*tex index into string */
2510    unsigned char *k = ls.s;
2511    unsigned char *l = k + ls.l;
2512    /*tex tail of the token list */
2513    halfword h = null;
2514    halfword p = null;
2515    while (k < l) {
2516        int cc;
2517        /*tex token being appended */
2518        int lt;
2519        halfword t = aux_str2uni_len(k, &lt);
2520        k += lt;
2521        cc = tex_get_cat_code(ct, t);
2522        if (cc == 0) {
2523            /*tex We have a potential control sequence so we check for it. */
2524            int lname = 0 ;
2525            int s = 0 ;
2526            int c = 0 ;
2527            unsigned char *name = k ;
2528            while (k < l) {
2529                t = (halfword) aux_str2uni_len((const unsigned char *) k, &s);
2530                c = tex_get_cat_code(ct, t);
2531                if (c == 11) {
2532                    k += s ;
2533                    lname += s ;
2534                } else if (c == 10) {
2535                    /*tex We ignore a trailing space like normal scanning does. */
2536                    k += s ;
2537                    break ;
2538                } else {
2539                    break ;
2540                }
2541            }
2542            if (s > 0) {
2543                /*tex We have a potential |\cs|. */
2544                halfword cs = tex_string_locate_only((const char *) name, lname);
2545                if (cs == undefined_control_sequence) {
2546                    /*tex Let's play safe and backtrack. */
2547                    t += cc * (1<<21);
2548                    k = name ;
2549                } else {
2550                    t = cs_token_flag + cs;
2551                }
2552            } else {
2553                /*tex
2554                    Just a character with some meaning, so |\unknown| becomes effectively
2555                    |\unknown| assuming that |\\| has some useful meaning of course.
2556                */
2557                t += cc * (1 << 21);
2558                k = name ;
2559            }
2560        } else {
2561            /*tex
2562                Whatever token, so for instance $x^2$ just works given a \TEX\ catcode regime.
2563            */
2564            t += cc * (1 << 21);
2565        }
2566        p = tex_store_new_token(p, t);
2567        if (! h) {
2568            h = p;
2569        }
2570    }
2571    return h;
2572}
2573
2574/* these two can be combined, then we can avoid the h check  */
2575
2576static void tex_aux_set_toks_register(halfword loc, singleword cmd, halfword t, int g)
2577{
2578    halfword ref = get_reference_token();
2579    set_token_link(ref, t);
2580    tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref);
2581}
2582
2583static halfword tex_aux_append_copied_toks_list(halfword loc, singleword cmd, int g, halfword s, halfword t, halfword *tail)
2584{
2585    halfword ref = get_reference_token();
2586    halfword p = ref;
2587    while (s) {
2588        p = tex_store_new_token(p, token_info(s));
2589        s = token_link(s);
2590    }
2591    while (t) {
2592        p = tex_store_new_token(p, token_info(t));
2593        t = token_link(t);
2594    }
2595    tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref);
2596    if (tail) {
2597        *tail = p;
2598    }
2599    return ref;
2600}
2601
2602/*tex Public helper: */
2603
2604halfword tex_copy_token_list(halfword h1, halfword *t)
2605{
2606    halfword h2 = tex_store_new_token(null, token_info(h1));
2607    halfword t1 = token_link(h1);
2608    halfword t2 = h2;
2609    while (t1) {
2610        t2 = tex_store_new_token(t2, token_info(t1));
2611        t1 = token_link(t1);
2612    }
2613    if (t) {
2614        *t = t2;
2615    }
2616    return h2;
2617}
2618
2619/*tex
2620
2621    At some point I decided to implement the following primitives:
2622
2623    \starttabulate[|T||T||]
2624    \NC 0 \NC \type {toksapp}   \NC 1 \NC \type {etoksapp} \NC \NR
2625    \NC 2 \NC \type {tokspre}   \NC 3 \NC \type {etokspre} \NC \NR
2626    \NC 4 \NC \type {gtoksapp}  \NC 5 \NC \type {xtoksapp} \NC \NR
2627    \NC 6 \NC \type {gtokspre}  \NC 7 \NC \type {xtokspre} \NC \NR
2628    \stoptabulate
2629
2630    These append and prepend tokens to token lists. In \CONTEXT\ we always had macros doing something
2631    like that. It was only a few years later that I ran again into an article that Taco and I wrote
2632    in 1999 in the NTG Maps about an extension to \ETEX\ (called eetex). The first revelation was
2633    that I had completely forgotten about it, which can be explained by the two decade time-lap. The
2634    second was that Taco actually added that to the program at that time, so I could have used (parts
2635    of) that code. Anyway, among the other proposed (and implemented) features were manipulating
2636    lists and ways to output packed data to the \DVI\ files (numbers packed into 1 upto 4 bytes).
2637    Maybe some day I'll have a go at lists, although with todays computers there is not that much to
2638    gain. Also, \CONTEXT\ progressed to different internals so the urge is no longer there. The also
2639    discussed \SGML\ mode also in no longer that relevant given that we have \LUA.
2640
2641    If we want to handle macros too we really need to distinguish between toks and macros with
2642    |cur_chr| above, but not now. We can't expand, and have to use |get_r_token| or so. I don't need
2643    it anyway.
2644
2645    \starttyping
2646    get_r_token();
2647    if (cur_cmd == call_cmd) {
2648        nt = cur_cs;
2649        target = equiv(nt);
2650    } else {
2651        // some error message
2652    }
2653    \stoptyping
2654*/
2655
2656# define immediate_permitted(loc,target) ((eq_level(loc) == cur_level) && (get_token_reference(target) == 0))
2657
2658typedef enum combine_operations {
2659    combine_assign,
2660    combine_append,
2661    combine_prepend,
2662} combine_operations;
2663
2664void tex_run_combine_the_toks(void)
2665{
2666    halfword source = null;
2667    halfword target = null;
2668    halfword append, expand, global;
2669    halfword nt, ns;
2670    singleword cmd;
2671    /* */
2672    switch (cur_chr) {
2673        case expanded_toks_code:                append = combine_assign;  global = 0; expand = 1; break;
2674        case append_toks_code:                  append = combine_append;  global = 0; expand = 0; break;
2675        case append_expanded_toks_code:         append = combine_append;  global = 0; expand = 1; break;
2676        case prepend_toks_code:                 append = combine_prepend; global = 0; expand = 0; break;
2677        case prepend_expanded_toks_code:        append = combine_prepend; global = 0; expand = 1; break;
2678        case global_expanded_toks_code:         append = combine_assign;  global = 1; expand = 1; break;
2679        case global_append_toks_code:           append = combine_append;  global = 1; expand = 0; break;
2680        case global_append_expanded_toks_code:  append = combine_append;  global = 1; expand = 1; break;
2681        case global_prepend_toks_code:          append = combine_prepend; global = 1; expand = 0; break;
2682        case global_prepend_expanded_toks_code: append = combine_prepend; global = 1; expand = 1; break;
2683        default:                                append = combine_assign;  global = 0; expand = 0; break;
2684    }
2685    /*tex The target. */
2686    tex_get_x_token();
2687    if (cur_cmd == register_toks_cmd || cur_cmd == internal_toks_cmd) {
2688        nt = eq_value(cur_cs);
2689        cmd = (singleword) cur_cmd;
2690    } else {
2691        /*tex Maybe a number. */
2692        tex_back_input(cur_tok);
2693        nt = register_toks_location(tex_scan_toks_register_number());
2694        cmd = register_toks_cmd;
2695    }
2696    target = eq_value(nt);
2697    /*tex The source. */
2698    do {
2699        tex_get_x_token();
2700    } while (cur_cmd == spacer_cmd);
2701    if (cur_cmd == left_brace_cmd) {
2702        source = expand ? tex_scan_toks_expand(1, NULL, 0, 0) : tex_scan_toks_normal(1, NULL);
2703        /*tex The action. */
2704        if (source) {
2705            if (target) {
2706                halfword s = token_link(source);
2707                if (s) {
2708                    halfword t = token_link(target);
2709                    if (! t) {
2710                        /*tex Can this happen? */
2711                        set_token_link(target, s);
2712                        token_link(source) = null;
2713                    } else {
2714                        switch (append) {
2715                            case combine_assign:
2716                                goto ASSIGN_1;
2717                            case 1:
2718                                /*append */
2719                                if (immediate_permitted(nt,target)) {
2720                                    halfword p = t;
2721                                    while (token_link(p)) {
2722                                        p = token_link(p);
2723                                    }
2724                                    token_link(p) = s;
2725                                    token_link(source) = null;
2726                                } else {
2727                                    tex_aux_append_copied_toks_list(nt, cmd, global, t, s, NULL);
2728                                }
2729                                break;
2730                            case 2:
2731                                /* prepend */
2732                                if (immediate_permitted(nt,target)) {
2733                                    halfword p = s;
2734                                    while (token_link(p)) {
2735                                        p = token_link(p);
2736                                    }
2737                                    token_link(source) = null;
2738                                    set_token_link(p, t);
2739                                    set_token_link(target, s);
2740                                } else {
2741                                    tex_aux_append_copied_toks_list(nt, cmd, global, s, t, NULL);
2742                                }
2743                                break;
2744                        }
2745                    }
2746                }
2747            } else {
2748                ASSIGN_1:
2749                tex_aux_set_toks_register(nt, cmd, token_link(source), global);
2750                token_link(source) = null;
2751            }
2752            tex_flush_token_list(source);
2753        }
2754    } else {
2755        /* cf luatex we don't handle expand here */
2756        if (cur_cmd == register_toks_cmd) {
2757            ns = register_toks_number(eq_value(cur_cs));
2758        } else if (cur_cmd == internal_toks_cmd) {
2759            ns = internal_toks_number(eq_value(cur_cs));
2760        } else {
2761            ns = tex_scan_toks_register_number();
2762        }
2763        /*tex The action. */
2764        source = toks_register(ns);
2765        if (source) {
2766            if (target) {
2767                if (expand) {
2768                    halfword defref = lmt_input_state.def_ref;
2769                    tex_back_input(right_brace_token + '}');
2770                    tex_begin_token_list(source, token_text);
2771                    source = tex_scan_toks_expand(1, NULL, 0, 1);
2772                    lmt_input_state.def_ref = defref;
2773                    switch (append) {
2774                        case combine_assign:
2775                            eq_value(nt) = source;
2776                            break;
2777                        case combine_append:
2778                            if (immediate_permitted(nt, target)) {
2779                                halfword p = tex_tail_of_token_list(token_link(target));
2780                                token_link(p) = token_link(source);
2781                            } else {
2782                                halfword tail;
2783                                tex_aux_append_copied_toks_list(nt, cmd, global, target, null, &tail);
2784                                token_link(tail) = token_link(source);
2785                            }
2786                            tex_put_available_token(source);
2787                            break;
2788                        case combine_prepend:
2789                            if (immediate_permitted(nt, target)) {
2790                                halfword p = tex_tail_of_token_list(token_link(source));
2791                                token_link(p) = token_link(target);
2792                                token_link(target) = token_link(source);
2793                            } else {
2794                                halfword head = tex_aux_append_copied_toks_list(nt, cmd, global, target, null, NULL);
2795                                halfword tail = tex_tail_of_token_list(token_link(source));
2796                                token_link(tail) = token_link(head);
2797                                token_link(head) = token_link(source);
2798                            }
2799                            tex_put_available_token(source);
2800                            break;
2801                    }
2802                } else {
2803                    halfword t = token_link(target);
2804                    halfword s = token_link(source);
2805                    switch (append) {
2806                        case combine_assign:
2807                            tex_add_token_reference(source);
2808                            eq_value(nt) = source;
2809                            break;
2810                        case combine_append:
2811                            if (immediate_permitted(nt, target)) {
2812                                halfword p = tex_tail_of_token_list(t);
2813                                while (s) {
2814                                    p = tex_store_new_token(p, token_info(s));
2815                                    s = token_link(s);
2816                                }
2817                            } else {
2818                                tex_aux_append_copied_toks_list(nt, cmd, global, t, s, NULL);
2819                            }
2820                            break;
2821                        case combine_prepend:
2822                            if (immediate_permitted(nt, target)) {
2823                                halfword h = null;
2824                                halfword p = null;
2825                                while (s) {
2826                                    p = tex_store_new_token(p, token_info(s));
2827                                    if (! h) {
2828                                        h = p;
2829                                    }
2830                                    s = token_link(s);
2831                                }
2832                                set_token_link(p, t);
2833                                set_token_link(target, h);
2834                            } else {
2835                                tex_aux_append_copied_toks_list(nt, cmd, global, s, t, NULL);
2836                            }
2837                            break;
2838                    }
2839                }
2840            } else if (expand) {
2841                halfword defref = lmt_input_state.def_ref;
2842                tex_back_input(right_brace_token + '}');
2843                tex_begin_token_list(source, token_text);
2844                source = tex_scan_toks_expand(1, NULL, 0, 1);
2845                eq_value(nt) = source;
2846                lmt_input_state.def_ref = defref;
2847            } else {
2848                // set_toks_register(nt, source, global);
2849                tex_add_token_reference(source);
2850                eq_value(nt) = source;
2851            }
2852        }
2853    }
2854}
2855
2856/*tex
2857
2858    This routine, used in the next one, prints the job name, possibly modified by the
2859    |process_jobname| callback.
2860
2861*/
2862
2863static void tex_aux_print_job_name(void)
2864{
2865    if (lmt_fileio_state.job_name) {
2866        /*tex \CCODE\ strings for jobname before and after processing. */
2867        char *s = lmt_fileio_state.job_name;
2868        int callback_id = lmt_callback_defined(process_jobname_callback);
2869        if (callback_id > 0) {
2870            char *ss;
2871            int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->S", s, &ss);
2872            if (lua_retval && ss) {
2873                s = ss;
2874            }
2875        }
2876        tex_print_str(s);
2877    }
2878}
2879
2880/*tex
2881
2882    The procedure |run_convert_tokens| uses |str_toks| to insert the token list for |convert|
2883    functions into the scanner; |\outer| control sequences are allowed to follow |\string| and
2884    |\meaning|.
2885
2886*/
2887
2888/*tex Codes not really needed but cleaner when testing */
2889
2890# define push_selector { \
2891    saved_selector = lmt_print_state.selector; \
2892    lmt_print_state.selector = new_string_selector_code; \
2893}
2894
2895# define pop_selector { \
2896    lmt_print_state.selector = saved_selector; \
2897}
2898
2899void tex_run_convert_tokens(halfword code)
2900{
2901    /*tex Scan the argument for command |c|. */
2902    switch (code) {
2903        /*tex
2904            The |number_code| is quite popular. Beware, when used with a lua none function, a zero
2905            is injected. We could intercept it at the cost of messy code, but on the other hand,
2906            nothing guarantees that the call returns a number so this side effect can be defended
2907            as a recovery measure.
2908        */
2909        case number_code:
2910            {
2911                int saved_selector;
2912                halfword v = tex_scan_integer(0, NULL, NULL);
2913                push_selector;
2914                tex_print_int(v);
2915                pop_selector;
2916                break;
2917            }
2918        case to_integer_code:
2919        case to_hexadecimal_code:
2920            {
2921                int saved_selector;
2922                int grouped = 0;
2923                halfword v = tex_scan_integer(0, NULL, &grouped);
2924                if (! grouped) {
2925                    tex_get_x_token(); /* maybe not x here */
2926                    if (cur_cmd != relax_cmd) {
2927                       tex_back_input(cur_tok);
2928                    }
2929                }
2930                push_selector;
2931                if (code == to_integer_code) {
2932                    tex_print_int(v);
2933                } else {
2934                    tex_print_hex(v);
2935                }
2936                pop_selector;
2937                break;
2938            }
2939        case to_scaled_code:
2940        case to_sparse_scaled_code:
2941        case to_dimension_code:
2942        case to_sparse_dimension_code:
2943            {
2944                int saved_selector;
2945                int grouped = 0;
2946                halfword v = tex_scan_dimension(0, 0, 0, 0, NULL, &grouped);
2947                if (! grouped) {
2948                    tex_get_x_token(); /* maybe not x here */
2949                    if (cur_cmd != relax_cmd) {
2950                       tex_back_input(cur_tok);
2951                    }
2952                }
2953                push_selector;
2954                switch (code) {
2955                    case to_sparse_dimension_code:
2956                    case to_sparse_scaled_code:
2957                        tex_print_sparse_dimension(v, no_unit);
2958                        break;
2959                    default:
2960                        tex_print_dimension(v, no_unit);
2961                        break;
2962                }
2963                switch (code) {
2964                    case to_dimension_code:
2965                    case to_sparse_dimension_code:
2966                        tex_print_unit(pt_unit);
2967                        break;
2968                }
2969                pop_selector;
2970                break;
2971            }
2972        case to_mathstyle_code:
2973            {
2974                int saved_selector;
2975                halfword v = tex_scan_math_style_identifier(1, 0);
2976                push_selector;
2977                tex_print_int(v);
2978                pop_selector;
2979                break;
2980            }
2981        case lua_function_code:
2982            {
2983             /* We can use:  tex_aux_lua_call(convert_cmd, v); */
2984                halfword v = tex_scan_integer(0, NULL, NULL);
2985                if (v > 0) {
2986                    strnumber u = tex_save_cur_string();
2987                    lmt_token_state.luacstrings = 0;
2988                    lmt_function_call(v, 0);
2989                    tex_restore_cur_string(u);
2990                    if (lmt_token_state.luacstrings > 0) {
2991                        tex_lua_string_start();
2992                    }
2993                } else {
2994                    tex_normal_error("luafunction", "invalid number");
2995                }
2996                return;
2997            }
2998        case lua_bytecode_code:
2999            {
3000                halfword v = tex_scan_integer(0, NULL, NULL);
3001                if (v < 0 || v > 65535) {
3002                    tex_normal_error("luabytecode", "invalid number");
3003                } else {
3004                    strnumber u = tex_save_cur_string();
3005                    lmt_token_state.luacstrings = 0;
3006                    lmt_bytecode_call(v);
3007                    tex_restore_cur_string(u);
3008                    if (lmt_token_state.luacstrings > 0) {
3009                        tex_lua_string_start();
3010                    }
3011                }
3012                return;
3013            }
3014        case lua_code:
3015            {
3016                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3017                strnumber u = tex_save_cur_string();
3018                halfword s = tex_scan_toks_expand(0, NULL, 0, 1); // maybe expandconstant
3019                tex_unsave_full_scanner_status(saved_full_status);
3020                lmt_token_state.luacstrings = 0;
3021                lmt_token_call(s);
3022                tex_delete_token_reference(s); /* boils down to flush_list */
3023                tex_restore_cur_string(u);
3024                if (lmt_token_state.luacstrings > 0) {
3025                    tex_lua_string_start();
3026                }
3027                /*tex No further action. */
3028                return;
3029            }
3030        case expanded_code:
3031        case semi_expanded_code:
3032            {
3033                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3034                strnumber u = tex_save_cur_string();
3035                halfword s = tex_scan_toks_expand(0, NULL, code == semi_expanded_code, 0);
3036                tex_unsave_full_scanner_status(saved_full_status);
3037                if (token_link(s)) {
3038                    tex_begin_inserted_list(token_link(s));
3039                    token_link(s) = null;
3040                }
3041                tex_put_available_token(s);
3042                tex_restore_cur_string(u);
3043                /*tex No further action. */
3044                return;
3045            }
3046        /*tex
3047            This one makes no sense because |\expandaftercs\foo{{#1}}| vs |\expanded{\foo{#1}}|
3048            runs in a ratio of 2.2:1.5 due to {#1} being three input levels. (Keep as example of
3049            a rejected feature.)
3050        */ /*
3051        case expanded_after_cs_code:
3052            {
3053                halfword token = tex_get_token();
3054                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3055                strnumber u = tex_save_cur_string();
3056                halfword s = tex_scan_toks_expand(0, NULL, 0);
3057                tex_unsave_full_scanner_status(saved_full_status);
3058                token_info(s) = token;
3059                tex_begin_inserted_list(s);
3060                tex_restore_cur_string(u);
3061                return;
3062            }
3063        */
3064     /* case immediate_assignment_code: */
3065     /* case immediate_assigned_code:   */
3066        /*tex
3067             These two were an on-the-road-to-bachotex brain-wave. A first variant did more in
3068             sequence till a relax or spacer was seen. These commands permits for instance setting
3069             counters in full expansion. However, as we have the more powerful local control
3070             mechanisms available these two commands have been dropped in \LUAMETATEX. Performance
3071             wise there is not that much to gain from |\immediateassigned| and it's even somewhat
3072             limited. So, they're gone now. Actually, one can also use the local control feature in
3073             an |\edef|, which {\em is} rather efficient, so we're good anyway. The upgraded code
3074             can be found in the archive.
3075        */
3076        case string_code:
3077        case cs_string_code:
3078        case cs_active_code:
3079            {
3080                int saved_selector;
3081                int saved_scanner_status = lmt_input_state.scanner_status;
3082                lmt_input_state.scanner_status = scanner_is_normal;
3083                tex_get_token();
3084                lmt_input_state.scanner_status = saved_scanner_status;
3085                push_selector;
3086                if (code == cs_active_code) {
3087                    tex_print_str(active_character_namespace);
3088                }
3089                if (cur_cs) {
3090                    if (code == cs_string_code) {
3091                        tex_print_cs_name(cur_cs);
3092                    } else {
3093                        tex_print_cs(cur_cs);
3094                    }
3095                } else {
3096                    tex_print_tex_str(cur_chr);
3097                }
3098                pop_selector;
3099                break;
3100            }
3101//        case string_code:
3102//            {
3103//                int saved_selector;
3104//                int saved_scanner_status = lmt_input_state.scanner_status;
3105//                lmt_input_state.scanner_status = scanner_is_normal;
3106//                tex_get_token();
3107//                lmt_input_state.scanner_status = saved_scanner_status;
3108//                push_selector;
3109//                if (cur_cs) {
3110//                    tex_print_cs(cur_cs);
3111//                } else {
3112//                    tex_print_tex_str(cur_chr);
3113//                }
3114//                pop_selector;
3115//                break;
3116//            }
3117//        case cs_string_code:
3118//            {
3119//                int saved_selector;
3120//                int saved_scanner_status = lmt_input_state.scanner_status;
3121//                lmt_input_state.scanner_status = scanner_is_normal;
3122//                tex_get_token();
3123//                lmt_input_state.scanner_status = saved_scanner_status;
3124//                push_selector;
3125//                if (cur_cs) {
3126//                    tex_print_cs_name(cur_cs);
3127//                } else {
3128//                    tex_print_tex_str(cur_chr);
3129//                }
3130//                pop_selector;
3131//                break;
3132//            }
3133//        case cs_active_code:
3134//            {
3135//                /*tex
3136//                    We cannot pick up the token and see what character it is because it will be
3137//                    replaced by its meaning.
3138//                */
3139//                int saved_selector;
3140//                int saved_scanner_status = lmt_input_state.scanner_status;
3141//                lmt_input_state.scanner_status = scanner_is_normal;
3142//                tex_get_token();
3143//                lmt_input_state.scanner_status = saved_scanner_status;
3144//                push_selector;
3145//                tex_print_str(active_character_namespace);
3146//                if (cur_cs) {
3147//                    tex_print_cs(cur_cs);
3148//                } else {
3149//                    tex_print_tex_str(cur_chr);
3150//                }
3151//                pop_selector;
3152//                break;
3153//            }
3154        case cs_lastname_code:
3155            if (lmt_scanner_state.last_cs_name != null_cs) {
3156                int saved_selector;
3157                push_selector;
3158                tex_print_cs_name(lmt_scanner_state.last_cs_name);
3159                pop_selector;
3160            }
3161            break;
3162        case detokenized_code:
3163            /*tex Sort of like |\meaningles| but without the explanationary text. */
3164            {
3165                int saved_selector;
3166                int saved_scanner_status = lmt_input_state.scanner_status;
3167                halfword t = null;
3168                lmt_input_state.scanner_status = scanner_is_normal;
3169                tex_get_token();
3170                lmt_input_state.scanner_status = saved_scanner_status;
3171                t = tex_get_available_token(cur_tok);
3172                push_selector;
3173                tex_show_token_list(t, 0, 0);
3174                tex_put_available_token(t);
3175                pop_selector;
3176                break;
3177            }
3178        case detokened_code:
3179            /*tex Takes a control sequence or token list. Probably a bad name but so be it. */
3180            {
3181                int saved_selector;
3182                int saved_scanner_status = lmt_input_state.scanner_status;
3183                halfword list = null;
3184                lmt_input_state.scanner_status = scanner_is_normal;
3185                tex_get_token();
3186                lmt_input_state.scanner_status = saved_scanner_status;
3187                switch (cur_cmd) {
3188                    case call_cmd:
3189                    case protected_call_cmd:
3190                    case semi_protected_call_cmd:
3191                    case constant_call_cmd:
3192                    case tolerant_call_cmd:
3193                    case tolerant_protected_call_cmd:
3194                    case tolerant_semi_protected_call_cmd:
3195                       if (! get_token_preamble(cur_chr)) {
3196                           /* We only serialize macros with no arguments. */
3197                           list = token_link(cur_chr);
3198                           break;
3199                       } else {
3200                           goto WHATEVER;
3201                       }
3202                    case internal_toks_cmd:
3203                    case register_toks_cmd:
3204                        list = token_link(eq_value(cur_chr));
3205                        break;
3206                    case register_cmd:
3207                        if (cur_chr == token_val_level) {
3208                            halfword n = tex_scan_toks_register_number();
3209                            list = token_link(toks_register(n));
3210                            break;
3211                        } else {
3212                            goto WHATEVER;
3213                        }
3214                    default:
3215                      WHATEVER:
3216                        {
3217                            halfword t = tex_get_available_token(cur_tok);
3218                            push_selector;
3219                            tex_show_token_list(t, 0, 0);
3220                            pop_selector;
3221                            tex_put_available_token(t);
3222                        }
3223                        break;
3224                }
3225                if (list) {
3226                    push_selector;
3227                    tex_show_token_list(list, 2, 0);
3228                    pop_selector;
3229                }
3230                break;
3231            }
3232        case roman_numeral_code:
3233            {
3234                int saved_selector;
3235                halfword v = tex_scan_integer(0, NULL, NULL);
3236                push_selector;
3237                tex_print_roman_int(v);
3238                pop_selector;
3239                break;
3240            }
3241        case meaning_code:
3242        case meaning_full_code:
3243        case meaning_less_code:
3244        case meaning_ful_code:
3245        case meaning_les_code:
3246        case meaning_asis_code:
3247            {
3248                int saved_selector;
3249                int saved_scanner_status = lmt_input_state.scanner_status;
3250                lmt_input_state.scanner_status = scanner_is_normal;
3251                tex_get_token();
3252                lmt_input_state.scanner_status = saved_scanner_status;
3253                push_selector;
3254                tex_print_meaning(code);
3255                pop_selector;
3256                break;
3257            }
3258        case to_character_code:
3259            {
3260                int saved_selector;
3261                int chr = tex_scan_char_number(0);
3262                push_selector;
3263                tex_print_tex_str(chr);
3264                pop_selector;
3265                break;
3266            }
3267        case lua_escape_string_code:
3268     /* case lua_token_string_code: */ /* for now rejected: could also be keyword */
3269            {
3270                /* tex
3271                    If I would need it I could probably add support for catcode tables and verbose
3272                    serialization. Maybe we can use some of the other (more efficient) helpers when
3273                    we have a detokenize variant. We make sure that the escape character is a
3274                    backslash because these conversions can occur anywhere and are very much
3275                    related to \LUA\ calls. (Maybe it makes sense to pass it a argument to the
3276                    serializer.)
3277
3278                    A |\luatokenstring| primitive doesn't really make sense because \LUATEX\ lacks
3279                    it and |\luaescapestring| is a compatibility primitive.
3280                */
3281                lstring str;
3282                int length = 0;
3283             /* int saved_in_lua_escape = lmt_token_state.in_lua_escape; */
3284                halfword saved_escape_char = escape_char_par;
3285                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3286                halfword result = tex_scan_toks_expand(0, NULL, 0, 0);
3287             /* halfword result = tex_scan_toks_expand(0, NULL, code == lua_token_string_code); */
3288             /* lmt_token_state.in_lua_escape = 1; */
3289                escape_char_par = '\\';
3290                str.s = (unsigned char *) tex_tokenlist_to_tstring(result, 0, &length, 0, 0, 0, 0, 1); /* single hashes */
3291                str.l = (unsigned) length;
3292             /* lmt_token_state.in_lua_escape = saved_in_lua_escape; */
3293                escape_char_par = saved_escape_char;
3294                tex_delete_token_reference(result); /* boils down to flush_list */
3295                tex_unsave_full_scanner_status(saved_full_status);
3296                if (str.l) {
3297                    result = lmt_str_toks(str);
3298                    tex_begin_inserted_list(result);
3299                }
3300                return;
3301            }
3302        case font_name_code:
3303            {
3304                int saved_selector;
3305                halfword fnt = tex_scan_font_identifier(NULL);
3306                push_selector;
3307                tex_print_font(fnt);
3308                pop_selector;
3309                break;
3310            }
3311        case font_specification_code:
3312            {
3313                int saved_selector;
3314                halfword fnt = tex_scan_font_identifier(NULL);
3315                push_selector;
3316                tex_append_string((const unsigned char *) font_original(fnt), (unsigned) strlen(font_original(fnt)));
3317                pop_selector;
3318                break;
3319            }
3320        case job_name_code:
3321            {
3322                int saved_selector;
3323                if (! lmt_fileio_state.job_name) {
3324                    tex_open_log_file();
3325                }
3326                push_selector;
3327                tex_aux_print_job_name();
3328                pop_selector;
3329                break;
3330            }
3331        case format_name_code:
3332            {
3333                int saved_selector;
3334                if (! lmt_fileio_state.job_name) {
3335                    tex_open_log_file();
3336                }
3337                push_selector;
3338                tex_print_str(lmt_engine_state.dump_name);
3339                pop_selector;
3340                break;
3341            }
3342        case luatex_banner_code:
3343            {
3344                int saved_selector;
3345                push_selector;
3346                tex_print_str(lmt_engine_state.luatex_banner);
3347                pop_selector;
3348                break;
3349            }
3350        case font_identifier_code:
3351            {
3352                int saved_selector;
3353                halfword fnt = tex_scan_font_identifier(NULL);
3354                push_selector;
3355                tex_print_font_identifier(fnt);
3356                pop_selector;
3357                break;
3358            }
3359        default:
3360            tex_confusion("convert tokens");
3361            break;
3362    }
3363    {
3364        halfword head = tex_cur_str_toks(NULL);
3365        tex_begin_inserted_list(head);
3366    }
3367}
3368
3369/*tex
3370    The boolean |in_lua_escape| is keeping track of the lua string escape state.
3371*/
3372
3373strnumber tex_the_convert_string(halfword c, int i)
3374{
3375    int saved_selector = lmt_print_state.selector;
3376    strnumber ret = 0;
3377    int done = 1 ;
3378    lmt_print_state.selector = new_string_selector_code;
3379    switch (c) {
3380        case number_code:
3381        case to_integer_code:
3382            tex_print_int(i);
3383            break;
3384        case to_hexadecimal_code:
3385            tex_print_hex(i);
3386            break;
3387        case to_scaled_code:
3388            tex_print_dimension(i, no_unit);
3389            break;
3390        case to_sparse_scaled_code:
3391            tex_print_sparse_dimension(i, no_unit);
3392            break;
3393        case to_dimension_code:
3394            tex_print_dimension(i, pt_unit);
3395            break;
3396        case to_sparse_dimension_code:
3397            tex_print_sparse_dimension(i, pt_unit);
3398            break;
3399        case roman_numeral_code:
3400            tex_print_roman_int(i);
3401            break;
3402        case to_character_code:
3403            tex_print_tex_str(i);
3404            break;
3405        case font_name_code:
3406            tex_print_font(i);
3407            break;
3408        case font_specification_code:
3409            tex_print_str(font_original(i));
3410            break;
3411        case job_name_code:
3412            tex_aux_print_job_name();
3413            break;
3414        case format_name_code:
3415            tex_print_str(lmt_engine_state.dump_name);
3416            break;
3417        case luatex_banner_code:
3418            tex_print_str(lmt_engine_state.luatex_banner);
3419            break;
3420        case font_identifier_code:
3421            tex_print_font_identifier(i);
3422            break;
3423        default:
3424            done = 0;
3425            break;
3426    }
3427    if (done) {
3428        ret = tex_make_string();
3429    }
3430    lmt_print_state.selector = saved_selector;
3431    return ret;
3432}
3433
3434/*tex Return a string from tokens list: */
3435
3436strnumber tex_tokens_to_string(halfword p)
3437{
3438    if (lmt_print_state.selector == new_string_selector_code) {
3439        tex_normal_error("tokens", "tokens_to_string() called while selector = new_string");
3440        return get_nullstr();
3441    } else {
3442        int saved_selector = lmt_print_state.selector;
3443        lmt_print_state.selector = new_string_selector_code;
3444        tex_token_show(p);
3445        lmt_print_state.selector = saved_selector;
3446        return tex_make_string();
3447    }
3448}
3449
3450/*tex
3451
3452    The actual token conversion in this function is now functionally equivalent to |show_token_list|,
3453    except that it always prints the whole token list. Often the result is not that large, for
3454    instance |\directlua| is seldom large. However, this converter is also used for patterns
3455    and exceptions where size is mnore an issue. For that reason we used to have three variants,
3456    one of which (experimentally) used a buffer. At some point, in the manual we were talking of
3457    millions of allocations but times have changed.
3458
3459    Macros were used to inline the appending code (in the thre variants), but in the end I decided
3460    to just merge all into one function, with a bit more overhead because we need to optionally
3461    skip a macro preamble.
3462
3463    Values like 512 and 128 also work ok. There is not much to gain in optimization here. We used
3464    to have 3 mostly overlapping functions, one of which used a buffer. We can probably use a
3465    larger default buffer size and larger step and only free when we think it's too large.
3466
3467*/
3468
3469# define default_buffer_size  512 /*tex This used to be 256 */
3470# define default_buffer_step 4096 /*tex When we're larger, we always are much larger. */
3471
3472// todo: check ret
3473
3474static inline void tex_aux_make_room_in_buffer(int a)
3475{
3476    if (lmt_token_state.bufloc + a + 1 > lmt_token_state.bufmax) {
3477        char *tmp = aux_reallocate_array(lmt_token_state.buffer, sizeof(unsigned char), lmt_token_state.bufmax + default_buffer_step, 1);
3478        if (tmp) {
3479            lmt_token_state.bufmax += default_buffer_step;
3480        } else {
3481            // error
3482        }
3483        lmt_token_state.buffer = tmp;
3484    }
3485}
3486
3487static void tex_aux_append_uchar_to_buffer(int s)
3488{
3489    tex_aux_make_room_in_buffer(4);
3490    if (s <= 0x7F) {
3491        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (s);
3492    } else if (s <= 0x7FF) {
3493        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xC0 + (s / 0x40));
3494        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (s % 0x40));
3495    } else if (s <= 0xFFFF) {
3496        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xE0 +  (s / 0x1000));
3497        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) / 0x40));
3498        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) % 0x40));
3499    } else {
3500        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xF0 +   (s / 0x40000));
3501        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 +  ((s % 0x40000) / 0x1000));
3502        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) / 0x40));
3503        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) % 0x40));
3504    }
3505}
3506
3507static void tex_aux_append_char_to_buffer(int c)
3508{
3509    tex_aux_make_room_in_buffer(1);
3510    lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (c);
3511}
3512
3513/*tex Only errors and unknowns. */
3514
3515static void tex_aux_append_str_to_buffer(const char *s)
3516{
3517    const char *v = s;
3518    tex_aux_make_room_in_buffer((int) strlen(v));
3519    /*tex Using memcpy will inline and give a larger binary ... and we seldom need this. */
3520    while (*v) {
3521        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (*v);
3522        v++;
3523    }
3524}
3525
3526/*tex Only bogus csnames. */
3527
3528static void tex_aux_append_esc_to_buffer(const char *s)
3529{
3530    int e = escape_char_par;
3531    if (e > 0 && e < cs_offset_value) {
3532        tex_aux_append_uchar_to_buffer(e);
3533    }
3534    tex_aux_append_str_to_buffer(s);
3535}
3536
3537# define is_cat_letter(a)  (tex_aux_the_cat_code(aux_str2uni(str_string((a)))) == letter_cmd)
3538
3539/* make two versions: macro and not */
3540
3541char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamble, int nospace, int strip, int wipe, int single)
3542{
3543    if (pp) {
3544        /*tex We need to go beyond the reference. */
3545        int p = token_link(pp);
3546        if (p) {
3547            int e = escape_char_par;  /*tex The serialization of the escape, normally a backlash. */
3548            int n = 0;                /*tex The character after |#|, so |#0| upto |#9| */
3549            int min = 0;
3550            int max = lmt_token_memory_state.tokens_data.top;
3551            int skip = 0;
3552            int tail = p;
3553            int count = 0;
3554            if (lmt_token_state.bufmax > default_buffer_size) {
3555                /* Let's start fresh and small. */
3556                aux_deallocate_array(lmt_token_state.buffer);
3557                lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1);
3558                lmt_token_state.bufmax = default_buffer_size;
3559            } else if (! lmt_token_state.buffer) {
3560                /* Let's start. */
3561                lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1);
3562                lmt_token_state.bufmax = default_buffer_size;
3563            }
3564            lmt_token_state.bufloc = 0;
3565            if (skippreamble == 1) {
3566                skip = get_token_preamble(pp);
3567            }
3568            while (p) {
3569                if (p < min || p > max) {
3570                    tex_aux_append_str_to_buffer(error_string_clobbered(31));
3571                    break;
3572                } else {
3573                    int info = token_info(p);
3574                    if (info < 0) {
3575                        /*tex Unlikely, will go after checking (maybe \LUA\ user mess up). */
3576                        tex_aux_append_str_to_buffer(error_string_bad(32));
3577                    } else if (info < cs_token_flag) {
3578                        /*tex We nearly always end up here because otherwise we have an error. */
3579                        int cmd = token_cmd(info);
3580                        int chr = token_chr(info);
3581                        switch (cmd) {
3582                            case left_brace_cmd:
3583                            case right_brace_cmd:
3584                            case math_shift_cmd:
3585                            case alignment_tab_cmd:
3586                            case end_line_cmd:
3587                            case superscript_cmd:
3588                            case subscript_cmd:
3589                            case spacer_cmd:
3590                            case letter_cmd:
3591                            case other_char_cmd:
3592                            case active_char_cmd:
3593                                if (! skip) {
3594                                    tex_aux_append_uchar_to_buffer(chr);
3595                                }
3596                                break;
3597                            case parameter_cmd:
3598                                if (! skip) {
3599                                 /* if (! single && ! nospace && (! lmt_token_state.in_lua_escape && (lmt_expand_state.cs_name_level == 0))) { */
3600                                    if (! single && ! nospace && lmt_expand_state.cs_name_level == 0) {
3601                                        tex_aux_append_uchar_to_buffer(chr);
3602                                    }
3603                                    tex_aux_append_uchar_to_buffer(chr);
3604                                }
3605                                break;
3606                            case parameter_reference_cmd:
3607                                if (! skip) {
3608                                    tex_aux_append_char_to_buffer(match_visualizer);
3609                                    if (chr <= 9) {
3610                                        tex_aux_append_char_to_buffer(chr + '0');
3611                                    } else if (chr <= max_match_count) {
3612                                        tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
3613                                    } else {
3614                                        tex_aux_append_char_to_buffer('!');
3615                                        goto EXIT;
3616                                    }
3617                                } else {
3618                                    if (chr > max_match_count) {
3619                                        goto EXIT;
3620                                    }
3621                                }
3622                                break;
3623                            case match_cmd:
3624                                if (! skip) {
3625                                    tex_aux_append_char_to_buffer(match_visualizer);
3626                                }
3627                                if (is_valid_match_ref(chr)) {
3628                                    ++n;
3629                                }
3630                                if (! skip) {
3631                                    tex_aux_append_char_to_buffer(chr ? chr : '0');
3632                                 // if (chr <= 9) {
3633                                 //     tex_aux_append_char_to_buffer(chr + '0');
3634                                 // } else if (chr <= max_match_count) {
3635                                 //     tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
3636                                 // }
3637                                }
3638                                if (n > max_match_count) {
3639                                    goto EXIT;
3640                                }
3641                                break;
3642                            case end_match_cmd:
3643                                if (skippreamble == 2) {
3644                                    goto EXIT;
3645                                } else if (chr == 0) {
3646                                    if (! skip) {
3647                                        tex_aux_append_char_to_buffer('-');
3648                                        tex_aux_append_char_to_buffer('>');
3649                                    }
3650                                    skip = 0 ;
3651                                }
3652                                break;
3653                            case end_paragraph_cmd:
3654                                if (! inhibit_par && (auto_paragraph_mode(auto_paragraph_text))) {
3655                                    tex_aux_append_esc_to_buffer("par");
3656                                }
3657                                break;
3658                            case deep_frozen_keep_constant_cmd:
3659                                if (! skip) {
3660                                    halfword h = token_link(chr);
3661                                    while (h) {
3662                                        tex_aux_append_uchar_to_buffer(token_chr(token_info(h)));
3663                                        h = token_link(h);
3664                                    }
3665                                }
3666                                break;
3667                            case ignore_cmd:
3668                                break;
3669                            default:
3670                                tex_aux_append_str_to_buffer(tex_aux_special_cmd_string(cmd, chr, error_string_bad(33)));
3671                                break;
3672                        }
3673                    } else if (! (inhibit_par && info == lmt_token_state.par_token)) {
3674                        int q = info - cs_token_flag;
3675                        if (q < hash_base) {
3676                            if (q == null_cs) {
3677                                tex_aux_append_esc_to_buffer("csname");
3678                                tex_aux_append_esc_to_buffer("endcsname");
3679                            } else {
3680                                tex_aux_append_str_to_buffer(error_string_impossible(34));
3681                            }
3682                        } else if (eqtb_out_of_range(q)) {
3683                            tex_aux_append_str_to_buffer(error_string_impossible(35));
3684                        } else {
3685                            strnumber txt = cs_text(q);
3686                            if (txt  < 0 || txt  >= lmt_string_pool_state.string_pool_data.ptr) {
3687                                tex_aux_append_str_to_buffer(error_string_nonexistent(36));
3688                            } else {
3689                                int allocated = 0;
3690                                char *sh = tex_makecstring(txt, &allocated);
3691                                char *s = sh;
3692                                if (tex_is_active_cs(txt)) {
3693                                    s = s + 3;
3694                                    while (*s) {
3695                                        tex_aux_append_char_to_buffer(*s);
3696                                        s++;
3697                                    }
3698                                } else {
3699                                    if (e >= 0) {
3700                                        tex_aux_append_uchar_to_buffer(e);
3701                                    }
3702                                    while (*s) {
3703                                        tex_aux_append_char_to_buffer(*s);
3704                                        s++;
3705                                    }
3706                                    if ((! nospace) && ((! tex_single_letter(txt)) || is_cat_letter(txt))) {
3707                                        tex_aux_append_char_to_buffer(' ');
3708                                    }
3709                                }
3710                                if (allocated) {
3711                                    lmt_memory_free(sh);
3712                                }
3713                            }
3714                        }
3715                    }
3716                    tail = p;
3717                    ++count;
3718                    p = token_link(p);
3719                }
3720            }
3721          EXIT:
3722            if (strip && lmt_token_state.bufloc > 1) {
3723                if (lmt_token_state.buffer[lmt_token_state.bufloc-1] == strip) {
3724                    lmt_token_state.bufloc -= 1;
3725                }
3726                if (lmt_token_state.bufloc > 1 && lmt_token_state.buffer[0] == strip) {
3727                    memcpy(&lmt_token_state.buffer[0], &lmt_token_state.buffer[1], lmt_token_state.bufloc-1);
3728                    lmt_token_state.bufloc -= 1;
3729                }
3730            }
3731            lmt_token_state.buffer[lmt_token_state.bufloc] = '\0';
3732            if (siz) {
3733                *siz = lmt_token_state.bufloc;
3734            }
3735            if (wipe) {
3736                tex_flush_token_list_head_tail(pp, tail, count);
3737            }
3738            return lmt_token_state.buffer;
3739        } else {
3740            if (wipe) {
3741                 tex_put_available_token(pp);
3742            }
3743        }
3744    }
3745    if (siz) {
3746        *siz = 0;
3747    }
3748    return NULL;
3749}
3750
3751/*tex
3752
3753    The \LUA\ interface needs some extra functions. The functions themselves are quite boring, but
3754    they are handy because otherwise this internal stuff has to be accessed from \CCODE\ directly,
3755    where lots of the defines are not available.
3756
3757    It doesn't make sense to listen to |\globaldefs| here, so that feature has been removed here.
3758
3759*/
3760
3761/* The bin gets 1.2K smaller if we inline these. */
3762
3763halfword tex_get_tex_dimension_register (int j, int internal) { return internal ? dimension_parameter(j) : dimension_register(j) ; }
3764halfword tex_get_tex_skip_register      (int j, int internal) { return internal ? glue_parameter(j) : skip_register(j) ; }
3765halfword tex_get_tex_muskip_register    (int j, int internal) { return internal ? muglue_parameter(j) : muskip_register(j); }
3766halfword tex_get_tex_count_register     (int j, int internal) { return internal ? count_parameter(j) : count_register(j)  ; }
3767halfword tex_get_tex_posit_register     (int j, int internal) { return internal ? posit_parameter(j) : posit_register(j)  ; }
3768halfword tex_get_tex_attribute_register (int j, int internal) { return internal ? attribute_parameter(j) : attribute_register(j) ; }
3769halfword tex_get_tex_box_register       (int j, int internal) { return internal ? box_parameter(j) : box_register(j) ; }
3770
3771void tex_set_tex_dimension_register(int j, halfword v, int flags, int internal)
3772{
3773 // if (global_defs_par) {
3774 //     flags = add_global_flag(flags);
3775 // }
3776    if (internal) {
3777        tex_assign_internal_dimension_value(flags, internal_dimension_location(j), v);
3778    } else {
3779        tex_word_define(flags, register_dimension_location(j), v);
3780    }
3781}
3782
3783void tex_set_tex_skip_register(int j, halfword v, int flags, int internal)
3784{
3785 // if (global_defs_par) {
3786 //     flags = add_global_flag(flags);
3787 // }
3788    if (internal) {
3789        tex_assign_internal_skip_value(flags, internal_glue_location(j), v);
3790    } else {
3791        tex_word_define(flags, register_glue_location(j), v);
3792    }
3793}
3794
3795void tex_set_tex_muskip_register(int j, halfword v, int flags, int internal)
3796{
3797 // if (global_defs_par) {
3798 //     flags = add_global_flag(flags);
3799 // }
3800    tex_word_define(flags, internal ? internal_muglue_location(j) : register_muglue_location(j), v);
3801}
3802
3803void tex_set_tex_count_register(int j, halfword v, int flags, int internal)
3804{
3805 // if (global_defs_par) {
3806 //     flags = add_global_flag(flags);
3807 // }
3808    if (internal) {
3809        tex_assign_internal_integer_value(flags, internal_integer_location(j), v);
3810    } else {
3811        tex_word_define(flags, register_integer_location(j), v);
3812    }
3813}
3814void tex_set_tex_posit_register(int j, halfword v, int flags, int internal)
3815{
3816 // if (global_defs_par) {
3817 //     flags = add_global_flag(flags);
3818 // }
3819    if (internal) {
3820        tex_assign_internal_posit_value(flags, internal_posit_location(j), v);
3821    } else {
3822        tex_word_define(flags, register_posit_location(j), v);
3823    }
3824}
3825
3826
3827void tex_set_tex_attribute_register(int j, halfword v, int flags, int internal)
3828{
3829 // if (global_defs_par) {
3830 //     flags = add_global_flag(flags);
3831 // }
3832    if (j > lmt_node_memory_state.max_used_attribute) {
3833        lmt_node_memory_state.max_used_attribute = j;
3834    }
3835    tex_change_attribute_register(flags, register_attribute_location(j), v);
3836    tex_word_define(flags, internal ? internal_attribute_location(j) : register_attribute_location(j), v);
3837}
3838
3839void tex_set_tex_box_register(int j, halfword v, int flags, int internal)
3840{
3841 // if (global_defs_par) {
3842 //     flags = add_global_flag(flags);
3843 // }
3844    if (internal) {
3845        tex_define(flags, internal_box_location(j), internal_box_reference_cmd, v);
3846    } else {
3847        tex_define(flags, register_box_location(j), register_box_reference_cmd, v);
3848    }
3849}
3850
3851void tex_set_tex_toks_register(int j, lstring s, int flags, int internal)
3852{
3853    halfword ref = get_reference_token();
3854    halfword head = tex_str_toks(s, NULL);
3855    set_token_link(ref, head);
3856 // if (global_defs_par) {
3857 //     flags = add_global_flag(flags);
3858 // }
3859    if (internal) {
3860        tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref);
3861    } else {
3862        tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref);
3863    }
3864}
3865
3866void tex_scan_tex_toks_register(int j, int c, lstring s, int flags, int internal)
3867{
3868    halfword ref = get_reference_token();
3869    halfword head = tex_str_scan_toks(c, s);
3870    set_token_link(ref, head);
3871 // if (global_defs_par) {
3872 //     flags = add_global_flag(flags);
3873 // }
3874    if (internal) {
3875        tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref);
3876    } else {
3877        tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref);
3878    }
3879}
3880
3881int tex_get_tex_toks_register(int j, int internal)
3882{
3883    halfword t = internal ? toks_parameter(j) : toks_register(j);
3884    if (t) {
3885        return tex_tokens_to_string(t);
3886    } else {
3887        return get_nullstr();
3888    }
3889}
3890
3891/* Options: (0) error when undefined [bad], (1) create [but undefined], (2) ignore [discard] */
3892
3893halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option)
3894{
3895    halfword p = null;
3896    if (! head) {
3897        head = get_reference_token();
3898    }
3899    p = (tail && *tail) ? *tail : head;
3900    if (lstr > 0) {
3901        const char *se = str + lstr;
3902        while (str < se) {
3903            /*tex hh: |str2uni| could return len too (also elsewhere) */
3904            int ul;
3905            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &ul);
3906            halfword t = null;
3907            halfword cc = tex_get_cat_code(ct, u);
3908            str += ul;
3909            /*tex
3910                This is a relative simple converter; if more is needed one can just use |tex.print|
3911                with a regular |\def| or |\gdef| and feed the string into the regular scanner.
3912            */
3913            switch (cc) {
3914                case escape_cmd:
3915                    {
3916                        /*tex We have a potential control sequence so we check for it. */
3917                        int lname = 0;
3918                        const char *name  = str;
3919                        while (str < se) {
3920                            int s;
3921                            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &s);
3922                            int c = tex_get_cat_code(ct, u);
3923                            if (c == letter_cmd) {
3924                                str += s;
3925                                lname += s;
3926                            } else if (c == spacer_cmd) {
3927                                /*tex We ignore a trailing space like normal scanning does. */
3928                                if (lname == 0) {
3929                             // if (u == 32) {
3930                                    lname += s;
3931                                }
3932                                str += s;
3933                                break ;
3934                            } else {
3935                                if (lname == 0) {
3936                                    lname += s;
3937                                    str += s;
3938                                }
3939                                break ;
3940                            }
3941                        }
3942                        if (lname > 0) {
3943                            /*tex We have a potential |\cs|. */
3944                            halfword cs = tex_string_locate(name, lname, option == 1 ? 1 : 0); /* 1 == create */
3945                            if (cs == undefined_control_sequence) {
3946                                if (option == 2) {
3947                                    /*tex We ignore unknown commands. */
3948                                 // t = null;
3949                                } else {
3950                                    /*tex We play safe and backtrack, as we have option 0, but never used anyway. */
3951                                    t = u + (cc * (1<<21));
3952                                    str = name;
3953                                }
3954                            } else {
3955                                /* We end up here when option is 1. */
3956                                t = cs_token_flag + cs;
3957                            }
3958                        } else {
3959                            /*tex
3960                                Just a character with some meaning, so |\unknown| becomes effectively
3961                                |\unknown| assuming that |\\| has some useful meaning of course.
3962                            */
3963                            t = u + (cc * (1 << 21));
3964                            str = name;
3965                        }
3966                        break;
3967                    }
3968                case comment_cmd:
3969                    goto DONE;
3970                case ignore_cmd:
3971                    break;
3972                case spacer_cmd:
3973                 /* t = u + (cc * (1<<21)); */
3974                    t = token_val(spacer_cmd, ' ');
3975                    break;
3976                default:
3977                    /*tex
3978                        Whatever token, so for instance $x^2$ just works given a tex catcode regime.
3979                    */
3980                    t = u + (cc * (1<<21));
3981                    break;
3982            }
3983            if (t) {
3984                p = tex_store_new_token(p, t);
3985            }
3986        }
3987    }
3988  DONE:
3989    if (tail) {
3990        *tail = p;
3991    }
3992    return head;
3993}
3994
3995/*tex So far for the helpers. */
3996
3997int tex_used_token_count(void) {
3998    return lmt_token_memory_state.tokens_data.ptr;
3999}
4000
4001void tex_dump_token_mem(dumpstream f)
4002{
4003    /*tex
4004        It doesn't pay off to prune the available list. We save less than 10K if we do this and
4005        it assumes a sequence at the end. It doesn't help that the list is in reverse order so
4006        we just dump the lot. But we do check the allocated size. We cheat a bit in reducing
4007        the ptr so that we can set the the initial counter on loading.
4008    */
4009    halfword p = lmt_token_memory_state.available;
4010    halfword u = lmt_token_memory_state.tokens_data.top + 1;
4011    while (p) {
4012        --u;
4013        p = token_link(p);
4014    }
4015    lmt_token_memory_state.tokens_data.ptr = u;
4016    dump_int(f, lmt_token_state.null_list); /* the only one left */
4017    dump_int(f, lmt_token_memory_state.tokens_data.allocated);
4018    dump_int(f, lmt_token_memory_state.tokens_data.top);
4019    dump_int(f, lmt_token_memory_state.tokens_data.ptr);
4020    dump_int(f, lmt_token_memory_state.available);
4021    dump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1);
4022}
4023
4024void tex_undump_token_mem(dumpstream f)
4025{
4026    undump_int(f, lmt_token_state.null_list); /* the only one left */
4027    undump_int(f, lmt_token_memory_state.tokens_data.allocated);
4028    undump_int(f, lmt_token_memory_state.tokens_data.top);
4029    undump_int(f, lmt_token_memory_state.tokens_data.ptr);
4030    undump_int(f, lmt_token_memory_state.available);
4031    tex_initialize_token_mem(); /* maybe only ptr upto top */
4032    undump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1);
4033}
4034