SourceBrowser

textoken.c /size: 157 Kb last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex Todo: move some helpers to other places. */
8
9inline static int tex_aux_the_cat_code(halfword b)
10{
11    return (lmt_input_state.cur_input.cattable == default_catcode_table_preset) ?
12        tex_get_cat_code(cat_code_table_par, b)
13    : ( (lmt_input_state.cur_input.cattable > -0xFF) ?
14        tex_get_cat_code(lmt_input_state.cur_input.cattable, b)
15    : (
16        - lmt_input_state.cur_input.cattable - 0xFF
17    ) ) ;
18}
19
20/*tex
21
22    The \TEX\ system does nearly all of its own memory allocation, so that it can readily be
23    transported into environments that do not have automatic facilities for strings, garbage
24    collection, etc., and so that it can be in control of what error messages the user receives.
25    The dynamic storage requirements of \TEX\ are handled by providing two large arrays called
26    |fixmem| and |varmem| in which consecutive blocks of words are used as nodes by the \TEX\
27    routines.
28
29    Pointer variables are indices into this array, or into another array called |eqtb| that
30    will be explained later. A pointer variable might also be a special flag that lies outside
31    the bounds of |mem|, so we allow pointers to assume any |halfword| value. The minimum
32    halfword value represents a null pointer. \TEX\ does not assume that |mem[null]| exists.
33
34    Locations in |fixmem| are used for storing one-word records; a conventional |AVAIL| stack is
35    used for allocation in this array.
36
37    One can make an argument to switch to standard \CCODE\ allocation but the current approach is
38    very efficient in memory usage and performence so we stay with it. On the average memory
39    consumption of \TEX| is not that large, definitely not compared to other programs that deal
40    with text.
41
42    The big dynamic storage area is named |fixmem| where the smallest location of one|-|word
43    memory in use is |fix_mem_min| and the largest location of one|-|word memory in use is
44    |fix_mem_max|.
45
46    The |dyn_used| variable keeps track of how much memory is in use. The head of the list of
47    available one|-|word nodes is registered in |avail|. The last one-|word node used in |mem|
48    is |fix_mem_end|.
49
50    All these variables are packed in the structure |token_memory_state|.
51
52*/
53
54token_memory_state_info lmt_token_memory_state = {
55    .tokens      = NULL,
56    .tokens_data = {
57        .minimum   = min_token_size,
58        .maximum   = max_token_size,
59        .size      = siz_token_size,
60        .step      = stp_token_size,
61        .allocated = 0,
62        .itemsize  = sizeof(memoryword),
63        .top       = 0,
64        .ptr       = 0, /* used to register usage */
65        .initial   = 0,
66        .offset    = 0,
67    },
68    .available  = 0,
69    .padding    = 0,
70};
71
72/*tex
73
74    Token data has its own memory space. Again we have some state variables: |temp_token_head| is
75    the head of a (temporary) list of some kind as are |hold_token_head| and |omit_template|. A
76    permanently empty list is available in |null_list| and the head of the token list built by
77    |scan_keyword| is registered in |backup_head|. All these variables are packed in the structure
78    |token_data| but some have been moved to a more relevant state (so omit and hold are now in the
79    alignment state).
80
81*/
82
83token_state_info lmt_token_state = {
84    .null_list      = null,
85    .in_lua_escape  = 0, /* obsolete */
86    .force_eof      = 0,
87    .luacstrings    = 0,
88    .par_loc        = null,
89    .par_token      = null,
90 /* .line_par_loc   = null, */ /* removed because not really used and useful */
91 /* .line_par_token = null, */ /* idem */
92    .buffer         = NULL,
93    .bufloc         = 0,
94    .bufmax         = 0,
95    .empty          = null, 
96};
97
98/*tex Some properties are dumped in the format so these are aet already! */
99
100# define reserved_token_mem_slots 2 // play safe for slight overuns
101
102void tex_initialize_token_mem(void)
103{
104    memoryword *tokens = NULL;
105    int size = 0;
106    if (lmt_main_state.run_state == initializing_state) {
107        size = lmt_token_memory_state.tokens_data.minimum;
108    } else {
109        size = lmt_token_memory_state.tokens_data.allocated;
110        lmt_token_memory_state.tokens_data.initial = lmt_token_memory_state.tokens_data.ptr;
111    }
112    if (size > 0) {
113        tokens = aux_allocate_clear_array(sizeof(memoryword), size, reserved_token_mem_slots);
114    }
115    if (tokens) {
116        lmt_token_memory_state.tokens = tokens;
117        lmt_token_memory_state.tokens_data.allocated = size;
118    } else {
119        tex_overflow_error("tokens", size);
120    }
121}
122
123static void tex_aux_bump_token_memory(void)
124{
125    /*tex We need to manage the big dynamic storage area. */
126    int size = lmt_token_memory_state.tokens_data.allocated + lmt_token_memory_state.tokens_data.step;
127    if (size > lmt_token_memory_state.tokens_data.size) {
128        lmt_run_memory_callback("token", 0);
129        tex_show_runaway();
130        tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated);
131    } else {
132        memoryword *tokens = aux_reallocate_array(lmt_token_memory_state.tokens, sizeof(memoryword), size, reserved_token_mem_slots);
133        lmt_run_memory_callback("token", tokens ? 1 : 0);
134        if (tokens) {
135            lmt_token_memory_state.tokens = tokens;
136        } else {
137            /*tex If memory is exhausted, display possible runaway text. */
138            tex_show_runaway();
139            tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated);
140        }
141    }
142    memset((void *) (lmt_token_memory_state.tokens + lmt_token_memory_state.tokens_data.allocated + 1), 0, ((size_t) lmt_token_memory_state.tokens_data.step + reserved_token_mem_slots) * sizeof(memoryword));
143    lmt_token_memory_state.tokens_data.allocated = size;
144}
145
146void tex_initialize_tokens(void)
147{
148    lmt_token_memory_state.available = null;
149    lmt_token_memory_state.tokens_data.top = 0;
150    lmt_token_state.null_list = tex_get_available_token(null);
151 /* lmt_token_state.in_lua_escape = 0; */
152}
153
154/*tex
155    Experiment. It saves some 512K on the \CONTEXT\ format of October 2020. It makes me wonder if I
156    should spend some time on optimizing token lists (kind of cisc commands as we're currently kind
157    of risc).
158
159    A mixed token/file model (for permanent macros) could avoid the link and result in less memory 
160    which in turn is easier on the cache. We could save at most 2M (for 35K) macros in \CONTEXT\ 
161    so it is not worth the trouble. 
162*/
163
164void tex_compact_tokens(void)
165{
166    int nc = 0;
167 // memoryword *target = allocate_array(sizeof(memoryword), (size_t) token_memory_state.tokens_data.allocated, 0);
168    memoryword *target = aux_allocate_clear_array(sizeof(memoryword), lmt_token_memory_state.tokens_data.allocated, 0);
169    halfword *mapper = aux_allocate_array(sizeof(halfword), lmt_token_memory_state.tokens_data.allocated, 0);
170    int nofluacmds = 0;
171    if (target && mapper) {
172        memoryword *tokens = lmt_token_memory_state.tokens;
173        memset((void *) mapper, -1, ((size_t) lmt_token_memory_state.tokens_data.allocated) * sizeof(halfword));
174        /* also reset available */
175        for (int cs = 0; cs < (eqtb_size + lmt_hash_state.hash_data.ptr + 1); cs++) {
176            switch (eq_type(cs)) {
177                case call_cmd:
178                case protected_call_cmd:
179                case semi_protected_call_cmd:
180                case constant_call_cmd:
181                case tolerant_call_cmd:
182                case tolerant_protected_call_cmd:
183                case tolerant_semi_protected_call_cmd:
184                case internal_toks_reference_cmd:
185                case register_toks_reference_cmd:
186                    {
187                        halfword v = eq_value(cs); /* ref count token*/
188                        if (v) {
189                            if (mapper[v] < 0) {
190                             // printf("before =>"); { halfword tt = v; while (tt) { printf("%7d ",tt); tt = token_link(tt); } } printf("\n");
191                                halfword t = v;
192                                nc++;
193                                mapper[v] = nc; /* new ref count token index */
194                                while (1) {
195                                    target[nc].half1 = tokens[t].half1; /* info cq. ref count */
196                                    t = tokens[t].half0;
197                                    if (t) {
198                                        nc++;
199                                        target[nc-1].half0 = nc;        /* link to next */
200                                    } else {
201                                        target[nc].half0 = null;        /* link to next */
202                                        break;
203                                    }
204                                }
205                             // printf("after  =>"); { halfword tt = mapper[v]; while (tt) { printf("%7d ",tt); tt = target[tt].half0; } } printf("\n");
206                            }
207                            eq_value(cs) = mapper[v];
208                        }
209                        break;
210                    }
211                case lua_value_cmd:
212                case lua_call_cmd:
213                case lua_local_call_cmd:
214                    {
215                        ++nofluacmds;
216                        break;
217                    }
218            }
219        }
220        lmt_token_state.empty = mapper[lmt_token_state.empty];
221     // print(dump_state.format_identifier);
222        tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc);
223        if (nofluacmds) {
224            /*tex
225                We just mention them because when these are aliased the macro package needs to make
226                sure that after loading that happens again because registered funciton references
227                can have changed between format generation and run!
228            */
229            tex_print_format("%i potentially aliased lua call/value entries, ", nofluacmds);
230        }
231        lmt_token_memory_state.tokens_data.top = nc;
232        lmt_token_memory_state.tokens_data.ptr = nc;
233        aux_deallocate_array(lmt_token_memory_state.tokens);
234        lmt_token_memory_state.tokens = target;
235        lmt_token_memory_state.available = null;
236    } else {
237        tex_overflow_error("token compaction size", lmt_token_memory_state.tokens_data.allocated);
238    }
239}
240
241
242/*tex
243
244    The function |get_avail| returns a pointer (index) to a new one word node whose |link| field is
245    |null| (which is just 0). However, \TEX\ will halt if there is no more room left.
246
247    If the available space list is empty, i.e., if |avail = null|, we try first to increase
248    |fix_mem_end|. If that cannot be done, i.e., if |fix_mem_end = fix_mem_max|, we try to reallocate
249    array |fixmem|. If, that doesn't work, we have to quit. Users can configure \TEX\ to use a lot of
250    memory but in some scenarios limitations make sense.
251
252    Remark: we can have a pool of chunks where we get from or just allocate per token (as we have lots
253    of them that is slow). But then format loading becomes much slower as we need to recreate the
254    linked list. A no go. In todays terms \TEX\ memory usage is low anyway.
255
256    The freed tokens are kept in a linked list. First we check if we can quickly get one of these. If
257    that fails, we try to get one from the available pool. If that fails too, we enlarge the pool and
258    try again. We keep track of the used number of tokens. We also make sure that the tokens links to
259    nothing.
260
261    One problem is of course that tokens can be scattered over memory. We could have some sorter that
262    occasionally kicks in but it doesn't pay off. Normally definitions (in the format) are in sequence
263    but a normal run \unknown\ it would be interesting to know if this impacts the cache.
264
265*/
266
267halfword tex_get_available_token(halfword t)
268{
269    halfword p = lmt_token_memory_state.available;
270    if (p) {
271        lmt_token_memory_state.available = token_link(p);
272    } else if (lmt_token_memory_state.tokens_data.top < lmt_token_memory_state.tokens_data.allocated) {
273        p = ++lmt_token_memory_state.tokens_data.top;
274    } else {
275        tex_aux_bump_token_memory();
276        p = ++lmt_token_memory_state.tokens_data.top;
277    }
278    ++lmt_token_memory_state.tokens_data.ptr;
279    token_link(p) = null;
280    token_info(p) = t;
281    return p;
282}
283
284/*tex
285
286    Because we only have forward links, a freed token ends up at the head of the list of available
287    tokens.
288
289*/
290
291void tex_put_available_token(halfword p)
292{
293    token_link(p) = lmt_token_memory_state.available;
294    lmt_token_memory_state.available = p;
295    --lmt_token_memory_state.tokens_data.ptr;
296}
297
298halfword tex_store_new_token(halfword p, halfword t)
299{
300    halfword q = tex_get_available_token(t);
301    token_link(p) = q;
302    return q;
303}
304
305/*tex
306
307    The procedure |flush_list (p)| frees an entire linked list of oneword nodes that starts at
308    position |p|. It makes list of single word nodes available. The second variant in principle
309    is faster but in practice this goes unnoticed. Of course there is a little price to pay for
310    keeping track of memory usage.
311
312*/
313
314void tex_flush_token_list(halfword head)
315{
316    if (head) {
317        if (! token_link(head)) {
318            /* This happens more frequently (6.2M vs 1.7M). */
319            token_link(head) = lmt_token_memory_state.available;
320            --lmt_token_memory_state.tokens_data.ptr;
321        } else {
322            halfword current = head;
323            halfword tail;
324            int i = 0;
325            do {
326                ++i;
327                tail = current;
328                current = token_link(tail);
329            } while (current);
330            token_link(tail) = lmt_token_memory_state.available;
331            lmt_token_memory_state.tokens_data.ptr -= i;
332        }
333        lmt_token_memory_state.available = head;
334    }
335}
336
337void tex_flush_token_list_head_tail(halfword head, halfword tail, int n)
338{
339    if (head) {
340        lmt_token_memory_state.tokens_data.ptr -= n;
341        token_link(tail) = lmt_token_memory_state.available;
342        lmt_token_memory_state.available = head;
343    }
344}
345
346void tex_add_token_reference(halfword p)
347{
348    if (get_token_reference(p) < max_token_reference) {
349        add_token_reference(p);
350 // } else {
351 //     tex_overflow_error("reference count", max_token_reference);
352    }
353}
354
355void tex_increment_token_reference(halfword p, int n)
356{
357    if ((get_token_reference(p) + n) < max_token_reference) {
358        inc_token_reference(p, n);
359    } else { 
360        inc_token_reference(p, max_token_reference - get_token_reference(p));
361 // } else {
362 //     tex_overflow_error("reference count", max_token_reference);
363    }
364}
365
366void tex_delete_token_reference(halfword p)
367{
368    if (p) {
369        halfword r = get_token_reference(p);
370        if (! r) {
371            tex_flush_token_list(p);
372        } else if (r < max_token_reference) {
373            sub_token_reference(p);
374        }
375    }
376}
377
378/*tex
379
380    A \TEX\ token is either a character or a control sequence, and it is represented internally in
381    one of two ways:
382
383    \startitemize[n]
384        \startitem
385            A character whose ASCII code number is |c| and whose command code is |m| is represented
386            as the number $2^{21}m+c$; the command code is in the range |1 <= m <= 14|.
387        \stopitem
388        \startitem
389            A control sequence whose |eqtb| address is |p| is represented as the number
390            |cs_token_flag+p|. Here |cs_token_flag = t =| $2^{25}-1$ is larger than $2^{21}m+c$, yet
391            it is small enough that |cs_token_flag + p < max_halfword|; thus, a token fits
392            comfortably in a halfword.
393        \stopitem
394    \stopitemize
395
396    A token |t| represents a |left_brace| command if and only if |t < left_brace_limit|; it
397    represents a |right_brace| command if and only if we have |left_brace_limit <= t <
398    right_brace_limit|; and it represents a |match| or |end_match| command if and only if
399    |match_token <= t <= end_match_token|. The following definitions take care of these
400    token-oriented constants and a few others.
401
402    A token list is a singly linked list of one-word nodes in |mem|, where each word contains a token
403    and a link. Macro definitions, output routine definitions, marks, |\write| texts, and a few other
404    things are remembered by \TEX\ in the form of token lists, usually preceded by a node with a
405    reference count in its |token_ref_count| field. The token stored in location |p| is called
406    |info(p)|.
407
408    Three special commands appear in the token lists of macro definitions. When |m = match|, it means
409    that \TEX\ should scan a parameter for the current macro; when |m = end_match|, it means that
410    parameter matching should end and \TEX\ should start reading the macro text; and when |m =
411    out_param|, it means that \TEX\ should insert parameter number |c| into the text at this point.
412
413    The enclosing |\char'173| and |\char'175| characters of a macro definition are omitted, but the
414    final right brace of an output routine is included at the end of its token list.
415
416    Here is an example macro definition that illustrates these conventions. After \TEX\ processes
417    the text:
418
419    \starttyping
420    \def\mac a#1#2 \b {#1\-a ##1#2 \#2\}
421    \stoptyping
422
423    The definition of |\mac| is represented as a token list containing:
424
425    \starttyping
426    (reference count) letter a match # match # spacer \b end_match
427    out_param1 \- letter a spacer, mac_param # other_char 1
428    out_param2 spacer out_param 2
429    \stoptyping
430
431    The procedure |scan_toks| builds such token lists, and |macro_call| does the parameter matching.
432
433    Examples such as |\def \m {\def \m {a} b}| explain why reference counts would be needed even if
434    \TEX\ had no |\let| operation: When the token list for |\m| is being read, the redefinition of
435    |\m| changes the |eqtb| entry before the token list has been fully consumed, so we dare not
436    simply destroy a token list when its control sequence is being redefined.
437
438    If the parameter-matching part of a definition ends with |#{}|, the corresponding token list
439    will have |{| just before the |end_match| and also at the very end. The first |{| is used to
440    delimit the parameter; the second one keeps the first from disappearing.
441
442    The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form, including the
443    expansion of a macro or mark.
444
445*/
446
447void tex_print_meaning(halfword code)
448{
449    /*tex
450
451    This would make sense but some macro packages don't like it:
452
453    \starttyping
454    if (cur_cmd == math_given_cmd) {
455        cur_cmd = math_xgiven_cmd ;
456    }
457    \stoptyping
458
459    Eventually we might just do it that way. We also can have |\meaningonly| that omits the
460    |macro:| and arguments.
461    */
462    int untraced = is_untraced(eq_flag(cur_cs));
463    if (! untraced) {
464        switch (code) {
465            case meaning_code:
466            case meaning_full_code:
467            case meaning_ful_code:
468            case meaning_asis_code:
469                tex_print_cmd_flags(cur_cs, cur_cmd, (code != meaning_code), code == meaning_asis_code);
470                break;
471        }
472    }
473    switch (cur_cmd) {
474        case call_cmd:
475        case protected_call_cmd:
476        case semi_protected_call_cmd:
477        case constant_call_cmd:
478        case tolerant_call_cmd:
479        case tolerant_protected_call_cmd:
480        case tolerant_semi_protected_call_cmd:
481            if (untraced) {
482                tex_print_cs(cur_cs);
483                return;
484            } else {
485                switch (code) {
486                    case meaning_code:
487                    case meaning_full_code:
488                    case meaning_ful_code:
489                        tex_print_str("macro");
490                        if (code == meaning_ful_code) { 
491                            return; 
492                        } else { 
493                            goto FOLLOWUP;
494                        }
495                    case meaning_asis_code:
496                     // tex_print_format("%e%C %S ", def_cmd, def_code, cur_cs);
497                        tex_print_cmd_chr(def_cmd, def_code);
498                        tex_print_char(' ');
499                        tex_print_cs(cur_cs);
500                        tex_print_char(' ');
501                        if (cur_chr && token_link(cur_chr)) {
502                            tex_show_token_list(token_link(cur_chr), get_token_preamble(cur_chr) ? 1 : 3, 0);
503                        } else { 
504                            tex_print_char('{');
505                            tex_print_char('}');
506                        }
507                        return;
508                    case meaning_les_code:
509                        if (cur_chr && token_link(cur_chr)) {
510                            tex_show_token_list(token_link(cur_chr), 2, 0);
511                        }
512                        return;
513                }
514                goto DETAILS;
515            }
516        case get_mark_cmd:
517            tex_print_cmd_chr((singleword) cur_cmd, cur_chr);
518            tex_print_char(':');
519            tex_print_nlp();
520            tex_token_show(tex_get_some_mark(cur_chr, 0));
521            return;
522        case lua_value_cmd:
523        case lua_call_cmd:
524        case lua_local_call_cmd:
525        case lua_protected_call_cmd:
526        case lua_semi_protected_call_cmd:
527            if (untraced) {
528                tex_print_cs(cur_cs);
529                return;
530            } else {
531                goto DEFAULT;
532            }
533        case if_test_cmd:
534            if (cur_chr > last_if_test_code) {
535                tex_print_cs(cur_cs);
536                return;
537            } else {
538                goto DEFAULT;
539            }
540        default:
541         DEFAULT:
542            tex_print_cmd_chr((singleword) cur_cmd, cur_chr);
543            if (cur_cmd < call_cmd) {
544                return;
545            } else {
546                /* all kind of reference cmds */
547                break;
548            }
549    }
550  FOLLOWUP:
551    tex_print_char(':');
552  DETAILS:
553    tex_print_nlp();
554    tex_token_show(cur_chr);
555}
556
557/*tex
558
559    The procedure |show_token_list|, which prints a symbolic form of the token list that starts at
560    a given node |p|, illustrates these conventions. The token list being displayed should not begin
561    with a reference count. However, the procedure is intended to be robust, so that if the memory
562    links are awry or if |p| is not really a pointer to a token list, nothing catastrophic will
563    happen.
564
565    An additional parameter |q| is also given; this parameter is either null or it points to a node
566    in the token list where a certain magic computation takes place that will be explained later.
567    Basically, |q| is non-null when we are printing the two-line context information at the time of
568    an error message; |q| marks the place corresponding to where the second line should begin.
569
570    For example, if |p| points to the node containing the first |a| in the token list above, then
571    |show_token_list| will print the string
572
573    \starttyping
574    a#1#2 \b ->#1-a ##1#2 #2
575    \stoptyping
576
577    and if |q| points to the node containing the second |a|, the magic computation will be performed
578    just before the second |a| is printed.
579
580    The generation will stop, and |\ETC.| will be printed, if the length of printing exceeds a given
581    limit~|l|. Anomalous entries are printed in the form of control sequences that are not followed
582    by a blank space, e.g., |\BAD.|; this cannot be confused with actual control sequences because a
583    real control sequence named |BAD| would come out |\BAD |.
584
585    In \LUAMETATEX\ we have some more node types and token types so we also have additional tracing.
586    Because there is some more granularity in for instance nodes (subtypes) more detail is reported.
587
588    It made sense to split the |tex_show_token_list| funciton in two, ine specialized for showing 
589    the context. That saves some testing and passing arguments. 
590
591*/
592
593static const char *tex_aux_special_cmd_string(halfword cmd, halfword chr, const char *unknown)
594{
595    switch (cmd) {
596        case node_cmd                    : return "[[special cmd: node pointer]]";
597        case lua_protected_call_cmd      : return "[[special cmd: lua protected call]]";
598        case lua_semi_protected_call_cmd : return "[[special cmd: lua semi protected call]]";
599        case lua_value_cmd               : return "[[special cmd: lua value call]]";
600        case iterator_value_cmd          : return "[[special cmd: iterator value]]";
601        case lua_call_cmd                : return "[[special cmd: lua call]]";
602        case lua_local_call_cmd          : return "[[special cmd: lua local call]]";
603        case begin_local_cmd             : return "[[special cmd: begin local call]]";
604        case end_local_cmd               : return "[[special cmd: end local call]]";
605     // case prefix_cmd                  : return "[[special cmd: enforced]]";
606        case prefix_cmd                  : return "\\always ";
607        default                          : printf("[[unknown cmd: (%i,%i)]]\n", cmd, chr); return unknown;
608    }
609}
610
611void tex_show_token_list(halfword p, int asis, int single)
612{
613    if (p) {
614        unsigned char n = 0;
615        int max = lmt_token_memory_state.tokens_data.top;
616        if (asis == 3) {
617            tex_print_char('{');
618        }
619        while (p) {
620            if (p < 0 || p > max) {
621                tex_print_str(error_string_clobbered(41));
622                return;
623            } else if (token_info(p) >= cs_token_flag) {
624                tex_print_cs_checked(token_info(p) - cs_token_flag);
625            } else if (token_info(p) > 0) {
626                int cmd = token_cmd(token_info(p));
627                int chr = token_chr(token_info(p));
628                switch (cmd) {
629                    case left_brace_cmd:
630                    case right_brace_cmd:
631                    case math_shift_cmd:
632                    case alignment_tab_cmd:
633                    case superscript_cmd:
634                    case subscript_cmd:
635                    case spacer_cmd:
636                    case letter_cmd:
637                    case other_char_cmd:
638                    case active_char_cmd:
639                    case ignore_cmd: 
640                        tex_print_tex_str(chr);
641                        break;
642                    case parameter_cmd:
643                        /* 
644                            Here we need to duplicate because a nested definition is parsed and 
645                            these |##1| are two tokens |parameter + 1| while |#1| is a one token 
646                            |parameter ref 1|. 
647                        */
648                        if (! single) { 
649                            tex_print_tex_str(chr);
650                        }
651                        tex_print_tex_str(chr);
652                        break;
653                    case parameter_reference_cmd:
654                        tex_print_tex_str(match_visualizer);
655                        if (chr <= 9) {
656                            tex_print_char(chr + '0');
657                        } else if (chr <= max_match_count) {
658                            tex_print_char(chr + '0' + gap_match_count);
659                        } else {
660                            tex_print_char('!');
661                            return;
662                        }
663                        break;
664                    case match_cmd:
665                        tex_print_char(match_visualizer);
666                        if (is_valid_match_ref(chr)) {
667                            ++n;
668                        }
669                        tex_print_char(chr ? chr : '0');
670                        if (n > max_match_count) {
671                            return;
672                        } else {
673                            break;
674                        }
675                    case end_match_cmd:
676                        switch (asis) { 
677                            case 1:
678                                tex_print_char('{');
679                                break;
680                            case 2:
681                                return;
682                            default: 
683                                if (chr == 0) {
684                                    tex_print_str("->");
685                                }
686                                break;
687                        }
688                        break;
689                    case ignore_something_cmd:
690                        break;
691                    case set_font_cmd:
692                        tex_print_format("[font->%s]", font_original(cur_val));
693                        break;
694                    case end_paragraph_cmd:
695                     /* tex_print_format("%e%s", "par "); */
696                        tex_print_str_esc("par ");
697                        break;
698                    default:
699                        tex_print_str(tex_aux_special_cmd_string(cmd, chr, error_string_bad(43)));
700                        break;
701                }
702            } else {
703                tex_print_str(error_string_bad(42));
704            }
705            p = token_link(p);
706        }
707        if (asis == 1 || asis == 3) {
708            tex_print_char('}');
709        }
710    }
711}
712
713void tex_show_token_list_context(halfword p, halfword q)
714{
715    if (p) {
716        /*tex the highest parameter number, as an \ASCII\ digit */
717        unsigned char n = 0;
718        int max = lmt_token_memory_state.tokens_data.top;
719        lmt_print_state.tally = 0;
720        while (p) {
721            if (p == q) {
722                /*tex Do magic computation. We only end up here in context showing. */
723                tex_set_trick_count();
724            }
725            /*tex Display token |p|, and |return| if there are problems. */
726            if (p < 0 || p > max) {
727                tex_print_str(error_string_clobbered(41));
728                return;
729            } else if (token_info(p) >= cs_token_flag) {
730             // if (! ((print_state.inhibit_par_tokens) && (token_info(p) == token_state.par_token))) {
731                    tex_print_cs_checked(token_info(p) - cs_token_flag);
732             // }
733            } else if (token_info(p) > 0) {
734                int cmd = token_cmd(token_info(p));
735                int chr = token_chr(token_info(p));
736                /*
737                    Display the token (|cmd|,|chr|). The procedure usually \quote {learns} the character
738                    code used for macro parameters by seeing one in a |match| command before it runs
739                    into any |out_param| commands. This is probably not true any longer. 
740                */
741                switch (cmd) {
742                    case left_brace_cmd:
743                    case right_brace_cmd:
744                    case math_shift_cmd:
745                    case alignment_tab_cmd:
746                    case superscript_cmd:
747                    case subscript_cmd:
748                    case spacer_cmd:
749                    case letter_cmd:
750                    case other_char_cmd:
751                    case active_char_cmd: /* new */
752                    case ignore_cmd: /* new */
753                        tex_print_tex_str(chr);
754                        break;
755                    case parameter_cmd:
756                        /*tex 
757                            When we show a context we alwasy duplicate the hashes.
758                        */      
759                        tex_print_tex_str(chr);
760                        tex_print_tex_str(chr);
761                        break;
762                    case parameter_reference_cmd:
763                        tex_print_tex_str(match_visualizer);
764                        if (chr <= 9) {
765                            tex_print_char(chr + '0');
766                        } else if (chr <= max_match_count) {
767                            tex_print_char(chr + '0' + gap_match_count);
768                        } else {
769                            tex_print_char('!');
770                            return;
771                        }
772                        break;
773                    case match_cmd:
774                        tex_print_char(match_visualizer);
775                        if (is_valid_match_ref(chr)) {
776                            ++n;
777                        }
778                        tex_print_char(chr ? chr : '0');
779                        if (n > max_match_count) {
780                            /*tex Can this happen at all? */
781                            return;
782                        } else {
783                            break;
784                        }
785                    case end_match_cmd:
786                        tex_print_str("->");
787                        break;
788                    case ignore_something_cmd:
789                        break;
790                    case set_font_cmd:
791                        tex_print_format("[font->%s]", font_original(cur_val));
792                        break;
793                    case end_paragraph_cmd:
794                     /* tex_print_format("%e%s", "par "); */
795                        tex_print_str_esc("par ");
796                        break;
797                    default:
798                        tex_print_str(tex_aux_special_cmd_string(cmd, chr, error_string_bad(43)));
799                        break;
800                }
801         // } else if (token_info(p) == 0) {
802         //     tex_print_str(error_string_bad(44));
803            } else {
804                tex_print_str(error_string_bad(42));
805            }
806            p = token_link(p);
807        }
808        if (p) {
809            tex_print_str_esc("ETC.");
810        }
811    }
812}
813
814/*
815# define do_buffer_to_unichar(a,b) do { \
816    a = (halfword)str2uni(fileio_state.io_buffer+b); \
817    b += utf8_size(a); \
818} while (0)
819*/
820
821inline static halfword get_unichar_from_buffer(int *b)
822{
823    halfword a = (halfword) ((const unsigned char) *(lmt_fileio_state.io_buffer + *b));
824    if (a <= 0x80) {
825        *b += 1;
826    } else {
827        int al; 
828        a = (halfword) aux_str2uni_len(lmt_fileio_state.io_buffer + *b, &al);
829        *b += al;
830    }
831    return a;
832}
833
834/*tex
835
836    Here's the way we sometimes want to display a token list, given a pointer to its reference count;
837    the pointer may be null.
838
839*/
840
841void tex_token_show(halfword p)
842{
843    if (p && token_link(p)) {
844        tex_show_token_list(token_link(p), 0, 0);
845    }
846}
847
848/*tex
849
850    The next function, |delete_token_ref|, is called when a pointer to a token list's reference
851    count is being removed. This means that the token list should disappear if the reference count
852    was |null|, otherwise the count should be decreased by one. Variable |p| points to the reference
853    count of a token list that is losing one reference.
854
855*/
856
857int tex_get_char_cat_code(int c)
858{
859    return tex_aux_the_cat_code(c);
860}
861
862static void tex_aux_invalid_character_error(void)
863{
864    tex_handle_error(
865        normal_error_type,
866        "Text line contains an invalid character",
867        "A funny symbol that I can't read has just been input. Continue, and I'll forget\n"
868        "that it ever happened."
869    );
870}
871
872static int tex_aux_process_sup_mark(void);
873
874static int tex_aux_scan_control_sequence(void);
875
876typedef enum next_line_retval {
877    next_line_ok,
878    next_line_return,
879    next_line_restart
880} next_line_retval;
881
882inline static next_line_retval tex_aux_next_line(void);
883
884/*tex
885
886    In case you are getting bored, here is a slightly less trivial routine: Given a string of
887    lowercase letters, like |pt| or |plus| or |width|, the |scan_keyword| routine checks to see
888    whether the next tokens of input match this string. The match must be exact, except that
889    ppercase letters will match their lowercase counterparts; uppercase equivalents are determined
890    by subtracting |"a" - "A"|, rather than using the |uc_code| table, since \TEX\ uses this
891    routine only for its own limited set of keywords.
892
893    If a match is found, the characters are effectively removed from the input and |true| is
894    returned. Otherwise |false| is returned, and the input is left essentially unchanged (except
895    for the fact that some macros may have been expanded, etc.).
896
897    In \LUATEX\ and its follow up we have more keywords and for instance when scanning a box
898    specification that is noticeable because the |scan_keyword| function is a little inefficient
899    in the sense that when there is no match, it will push back what got read so far. So there is
900    token allocation, pushing a level etc involved. Keep in mind that expansion happens here so what
901    gets pushing back is not always literally pushing back what we started with.
902
903    In \LUAMETATEX\ we now have a bit different approach. The |scan_mandate_keyword| follows up on
904    |scan_character| so we have a two step approach. We could actually pass a list of valid keywords
905    but that would make for a complex function with no real benefits.
906
907*/
908
909halfword tex_scan_character(const char *s, int left_brace, int skip_space, int skip_relax)
910{
911    halfword save_cur_cs = cur_cs;
912    while (1) {
913        tex_get_x_token();
914        switch (cur_cmd) {
915            case spacer_cmd:
916                if (skip_space) {
917                    break;
918                } else {
919                    goto DONE;
920                }
921            case relax_cmd:
922                if (skip_relax) {
923                    break;
924                } else {
925                    goto DONE;
926                }
927            case letter_cmd:
928            case other_char_cmd:
929                if (cur_chr <= 'z' && strchr(s, cur_chr)) {
930                    cur_cs = save_cur_cs;
931                    return cur_chr;
932                } else {
933                    goto DONE;
934                }
935            case left_brace_cmd:
936                if (left_brace) {
937                    cur_cs = save_cur_cs;
938                    return '{';
939                } else {
940                    goto DONE;
941                }
942            default:
943                goto DONE;
944        }
945    }
946  DONE:
947    tex_back_input(cur_tok);
948    cur_cs = save_cur_cs;
949    return 0;
950}
951
952void tex_aux_show_keyword_error(const char *s)
953{
954    tex_handle_error(
955        normal_error_type,
956        "Valid keyword expected, likely '%s'",
957        s,
958        "You started a keyword but it seems to be an invalid one. The first character(s)\n"
959        "might give you a clue. You might want to quit unwanted lookahead with \\relax."
960    );
961}
962
963/*tex
964    Scanning an optional keyword starts at the beginning. This means that we can also (for instance)
965    have a minus or plus sign which means that we have a different loop than with the alternative
966    that already checked the first character.
967*/
968
969int tex_scan_optional_keyword(const char *s)
970{
971    halfword save_cur_cs = cur_cs;
972    int done = 0;
973    const char *p = s;
974    while (*p) {
975        tex_get_x_token();
976        switch (cur_cmd) {
977            case letter_cmd:
978            case other_char_cmd:
979                if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) {
980                    if (*(++p)) {
981                        done = 1;
982                    } else {
983                        cur_cs = save_cur_cs;
984                        return 1;
985                    }
986                } else if (done) {
987                    goto BAD_NEWS;
988                } else {
989                    // can be a minus or so ! as in \advance\foo -10
990                    tex_back_input(cur_tok);
991                    cur_cs = save_cur_cs;
992                    return 1;
993                }
994                break;
995            case spacer_cmd:  /* normally spaces are not pushed back */
996                if (done) {
997                    goto BAD_NEWS;
998                } else {
999                    break;
1000                }
1001                // fall through
1002            default:
1003                tex_back_input(cur_tok);
1004                if (done) {
1005                    /* unless we accept partial keywords */
1006                    goto BAD_NEWS;
1007                } else {
1008                    cur_cs = save_cur_cs;
1009                    return 0;
1010                }
1011        }
1012    }
1013  BAD_NEWS:
1014    tex_aux_show_keyword_error(s);
1015    cur_cs = save_cur_cs;
1016    return 0;
1017}
1018
1019/*tex
1020    Here we know that the first character(s) matched so we are in the middle of a keyword already
1021    which means a different loop than the previous one. 
1022*/
1023
1024int tex_scan_mandate_keyword(const char *s, int offset)
1025{
1026    halfword save_cur_cs = cur_cs;
1027    int done = 0;
1028 // int done = offset > 0;
1029    const char *p = s + offset; /* offset always > 0 so no issue with +/- */
1030    while (*p) {
1031        tex_get_x_token();
1032        switch (cur_cmd) {
1033            case letter_cmd:
1034            case other_char_cmd:
1035                if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) {
1036                    if (*(++p)) {
1037                        done = 1;
1038                    } else {
1039                        cur_cs = save_cur_cs;
1040                        return 1;
1041                    }
1042                } else {
1043                    goto BAD_NEWS;
1044                }
1045                break;
1046         // case spacer_cmd: /* normally spaces are not pushed back */
1047         // case relax_cmd:  /* normally not, should be option  */
1048         //     if (done) {
1049         //         back_input(cur_tok);
1050         //         goto BAD_NEWS;
1051         //     } else {
1052         //         break;
1053         //     }
1054         // default:
1055         //     goto BAD_NEWS;
1056            case spacer_cmd: /* normally spaces are not pushed back */
1057                if (done) {
1058                    goto BAD_NEWS;
1059                } else {
1060                    break;
1061                }
1062                // fall through
1063            default:
1064                tex_back_input(cur_tok);
1065                /* unless we accept partial keywords */
1066                goto BAD_NEWS;
1067        }
1068    }
1069  BAD_NEWS:
1070    tex_aux_show_keyword_error(s);
1071    cur_cs = save_cur_cs;
1072    return 0;
1073}
1074
1075/*
1076    This is the original scanner with push|-|back. It's a matter of choice: we are more restricted
1077    on the one hand and more loose on the other.
1078*/
1079
1080int tex_scan_keyword(const char *s)
1081{
1082    if (*s) {
1083        halfword h = null;
1084        halfword p = null;
1085        halfword save_cur_cs = cur_cs;
1086        int n = 0;
1087        while (*s) {
1088            /*tex Recursion is possible here! */
1089            tex_get_x_token();
1090            if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && ((cur_chr == *s) || (cur_chr == *s - 'a' + 'A'))) {
1091                p = tex_store_new_token(p, cur_tok);
1092                if (! h) {
1093                    h = p;
1094                }
1095                n++;
1096                s++;
1097            } else if ((p != h) || (cur_cmd != spacer_cmd)) {
1098                tex_back_input(cur_tok);
1099                if (h) {
1100                    tex_begin_backed_up_list(h);
1101                }
1102                cur_cs = save_cur_cs;
1103                return 0;
1104            }
1105        }
1106        if (h) {
1107            tex_flush_token_list_head_tail(h, p, n);
1108        }
1109        cur_cs = save_cur_cs;
1110        return 1;
1111    } else {
1112        /*tex but not with newtokenlib zero keyword simply doesn't match  */
1113        return 0 ;
1114    }
1115}
1116
1117int tex_scan_keyword_case_sensitive(const char *s)
1118{
1119    if (*s) {
1120        halfword h = null;
1121        halfword p = null;
1122        halfword save_cur_cs = cur_cs;
1123        int n = 0;
1124        while (*s) {
1125            tex_get_x_token();
1126            if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && (cur_chr == *s)) {
1127                p = tex_store_new_token(p, cur_tok);
1128                if (! h) {
1129                    h = p;
1130                }
1131                n++;
1132                s++;
1133            } else if ((p != h) || (cur_cmd != spacer_cmd)) {
1134                tex_back_input(cur_tok);
1135                if (h) {
1136                    tex_begin_backed_up_list(h);
1137                }
1138                cur_cs = save_cur_cs;
1139                return 0;
1140            }
1141        }
1142        if (h) {
1143            tex_flush_token_list_head_tail(h, p, n);
1144        }
1145        cur_cs = save_cur_cs;
1146        return 1;
1147    } else {
1148        return 0 ;
1149    }
1150}
1151
1152/*tex
1153
1154    We can not return |undefined_control_sequence| under some conditions (inside |shift_case|,
1155    for example). This needs thinking.
1156
1157*/
1158
1159halfword tex_active_to_cs(int c, int force)
1160{
1161    halfword cs = -1;
1162    if (c >= 0 && c <= max_character_code) {
1163        char utfbytes[8] = { active_character_first, active_character_second, active_character_third, 0 };
1164        aux_uni2string((char *) &utfbytes[3], c);
1165        cs = tex_string_locate(utfbytes, (size_t) utf8_size(c) + 3, force);
1166    }
1167    if (cs < 0) {
1168        cs = tex_string_locate(active_character_unknown, 4, force); /*tex Including the zero sentinel. */
1169    }
1170    return cs;
1171}
1172
1173/*tex
1174
1175    The heart of \TEX's input mechanism is the |get_next| procedure, which we shall develop in the
1176    next few sections of the program. Perhaps we shouldn't actually call it the \quote {heart},
1177    however, because it really acts as \TEX's eyes and mouth, reading the source files and
1178    gobbling them up. And it also helps \TEX\ to regurgitate stored token lists that are to be
1179    processed again.
1180
1181    The main duty of |get_next| is to input one token and to set |cur_cmd| and |cur_chr| to that
1182    token's command code and modifier. Furthermore, if the input token is a control sequence, the
1183    |eqtb| location of that control sequence is stored in |cur_cs|; otherwise |cur_cs| is set to
1184    zero.
1185
1186    Underlying this simple description is a certain amount of complexity because of all the cases
1187    that need to be handled. However, the inner loop of |get_next| is reasonably short and fast.
1188
1189    When |get_next| is asked to get the next token of a |\read| line, it sets |cur_cmd = cur_chr
1190    = cur_cs = 0| in the case that no more tokens appear on that line. (There might not be any
1191    tokens at all, if the |end_line_char| has |ignore| as its catcode.)
1192
1193    The value of |par_loc| is the |eqtb| address of |\par|. This quantity is needed because a
1194    blank line of input is supposed to be exactly equivalent to the appearance of |\par|; we must
1195    set |cur_cs := par_loc| when detecting a blank line.
1196
1197    Parts |get_next| are executed more often than any other instructions of \TEX. The global
1198    variable |force_eof| is normally |false|; it is set |true| by an |\endinput| command.
1199    |luacstrings| is the number of lua print statements waiting to be input, it is changed by
1200    |lmt_token_call|.
1201
1202    If the user has set the |pausing| parameter to some positive value, and if nonstop mode has
1203    not been selected, each line of input is displayed on the terminal and the transcript file,
1204    followed by |=>|. \TEX\ waits for a response. If the response is simply |carriage_return|,
1205    the line is accepted as it stands, otherwise the line typed is used instead of the line in the
1206    file.
1207
1208    We no longer need the following:
1209
1210*/
1211
1212// void firm_up_the_line(void)
1213// {
1214//     ilimit = fileio_state.io_last;
1215// }
1216
1217/*tex
1218
1219    The other variant gives less clutter in tracing cache usage when profiling and for some files
1220    (like the manual) also a bit of a speedup. Splitting the switch which gives 10 times less Bim
1221    in vallgrind! See the \LUATEX\ source for that code.
1222
1223    The big switch changes the state if necessary, and |goto switch| if the current character
1224    should be ignored, or |goto reswitch| if the current character changes to another.
1225
1226    The n-way switch accomplishes the scanning quickly, assuming that a decent \CCODE\ compiler
1227    has translated the code. Note that the numeric values for |mid_line|, |skip_blanks|, and
1228    |new_line| are spaced apart from each other by |max_char_code+1|, so we can add a character's
1229    command code to the state to get a single number that characterizes both.
1230
1231    Remark: checking performance indicated that this switch was the cause of many branch prediction
1232    errors but changing it to:
1233
1234    \starttyping
1235    c = istate + cur_cmd;
1236    if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) {
1237        return 1;
1238    } else if (c >= new_line_state) {
1239        switch (c) {
1240        }
1241    } else if (c >= skip_blanks_state) {
1242        switch (c) {
1243        }
1244    } else if (c >= mid_line_state) {
1245        switch (c) {
1246        }
1247    } else {
1248        istate = mid_line_state;
1249        return 1;
1250    }
1251    \stoptyping
1252
1253    This gives as many prediction errors. So, we can indeed assume that the compiler does the right
1254    job, or that there is simply no other way.
1255
1256    When a line is finished a space is emited. When a character of type |spacer| gets through, its
1257    character code is changed to |\ =040|. This means that the \ASCII\ codes for tab and space, and
1258    for the space inserted at the end of a line, will be treated alike when macro parameters are
1259    being matched. We do this since such characters are indistinguishable on most computer terminal
1260    displays.
1261
1262*/
1263
1264/*
1265
1266    c = istate + cur_cmd;
1267    if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) {
1268        return 1;
1269    } else if (c >= new_line_state) {
1270        ....
1271    }
1272
1273*/
1274
1275/*tex
1276
1277    This trick has been dropped when the wrapup mechanism had proven to be useful. The idea was
1278    to backport this to \LUATEX\ but some other \PDFTEX\ compatible parstuff made it there and
1279    backporting par related features becomes too messy.
1280
1281    \starttyping
1282    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1283    cur_cs = lmt_token_state.line_par_loc;
1284    cur_cmd = eq_type(cur_cs);
1285    if (cur_cmd == undefined_cs_cmd) {
1286        cur_cs = lmt_token_state.par_loc;
1287        cur_cmd = eq_type(cur_cs);
1288    }
1289    cur_chr = eq_value(cur_cs);
1290    \stoptyping
1291
1292*/
1293
1294static int tex_aux_get_next_file(void)
1295{
1296  SWITCH:
1297    if (lmt_input_state.cur_input.loc <= lmt_input_state.cur_input.limit) {
1298        /*tex current line not yet finished */
1299        cur_chr = get_unichar_from_buffer(&lmt_input_state.cur_input.loc);
1300      RESWITCH:
1301        if (lmt_input_state.cur_input.cattable == no_catcode_table_preset) {
1302            /* happens seldom: detokenized line */
1303            cur_cmd = cur_chr == ' ' ? spacer_cmd : other_char_cmd;
1304        } else {
1305            cur_cmd = tex_aux_the_cat_code(cur_chr);
1306        }
1307        switch (lmt_input_state.cur_input.state + cur_cmd) {
1308            case mid_line_state    + ignore_cmd:
1309            case skip_blanks_state + ignore_cmd:
1310            case new_line_state    + ignore_cmd:
1311            case skip_blanks_state + spacer_cmd:
1312            case new_line_state    + spacer_cmd:
1313                /*tex Cases where character is ignored. */
1314                goto SWITCH;
1315            case mid_line_state    + escape_cmd:
1316            case new_line_state    + escape_cmd:
1317            case skip_blanks_state + escape_cmd:
1318                /*tex Scan a control sequence. */
1319                lmt_input_state.cur_input.state = (unsigned char) tex_aux_scan_control_sequence();
1320                break;
1321            case mid_line_state    + active_char_cmd:
1322            case new_line_state    + active_char_cmd:
1323            case skip_blanks_state + active_char_cmd:
1324                /*tex Process an active-character. */
1325                if ((lmt_input_state.scanner_status == scanner_is_tolerant || lmt_input_state.scanner_status == scanner_is_matching) && tex_pass_active_math_char(cur_chr)) {
1326                    /*tex We need to intercept a delimiter in arguments. */
1327                } else if ((lmt_input_state.scanner_status == scanner_is_defining || lmt_input_state.scanner_status == scanner_is_absorbing) && tex_pass_active_math_char(cur_chr)) {
1328                    /*tex We are storing stuff in a token list or macro body. */
1329                } else if ((cur_mode == mmode || lmt_nest_state.math_mode) && tex_check_active_math_char(cur_chr)) {
1330                    /*tex We have an intercept. */
1331                } else { 
1332                    cur_cs = tex_active_to_cs(cur_chr, ! lmt_hash_state.no_new_cs);
1333                    cur_cmd = eq_type(cur_cs);
1334                    cur_chr = eq_value(cur_cs);
1335                }
1336                lmt_input_state.cur_input.state = mid_line_state;
1337                break;
1338            case mid_line_state    + superscript_cmd:
1339            case new_line_state    + superscript_cmd:
1340            case skip_blanks_state + superscript_cmd:
1341                /*tex We need to check for multiple ^:
1342                    (0) always check for ^^ ^^^^ ^^^^^^^
1343                    (1) only check in text mode
1344                    (*) never
1345                */
1346                if (sup_mark_mode_par) {
1347                    if (sup_mark_mode_par == 1 && cur_mode != mmode && tex_aux_process_sup_mark()) {
1348                        goto RESWITCH;
1349                    }
1350                } else if (tex_aux_process_sup_mark()) {
1351                    goto RESWITCH;
1352                } else {
1353                    /*tex
1354                        We provide prescripts and shifted script in math mode and avoid fance |^|
1355                        processing in text mode (which is what we do in \CONTEXT).
1356                    */
1357                }
1358                lmt_input_state.cur_input.state = mid_line_state;
1359                break;
1360            case mid_line_state    + invalid_char_cmd:
1361            case new_line_state    + invalid_char_cmd:
1362            case skip_blanks_state + invalid_char_cmd:
1363                /*tex Decry the invalid character and |goto restart|. */
1364                tex_aux_invalid_character_error();
1365                /*tex Because state may be |token_list| now: */
1366                return 0;
1367            case mid_line_state + spacer_cmd:
1368                /*tex Enter |skip_blanks| state, emit a space. */
1369                lmt_input_state.cur_input.state = skip_blanks_state;
1370                cur_chr = ' ';
1371                break;
1372            case mid_line_state + end_line_cmd:
1373                /*tex Finish the line. See note above about dropped |\linepar|. */
1374                lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1375                cur_cmd = spacer_cmd;
1376                cur_chr = ' ';
1377                break;
1378            case skip_blanks_state + end_line_cmd:
1379            case mid_line_state    + comment_cmd:
1380            case new_line_state    + comment_cmd:
1381            case skip_blanks_state + comment_cmd:
1382                /*tex Finish line, |goto switch|; */
1383                lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1384                goto SWITCH;
1385            case new_line_state + end_line_cmd:
1386                if (! auto_paragraph_mode(auto_paragraph_go_on)) {
1387                    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1;
1388                }
1389                /*tex Finish line, emit a |\par|; */
1390                if (auto_paragraph_mode(auto_paragraph_text))  {
1391                    cur_cs = null;
1392                    cur_cmd = end_paragraph_cmd;
1393                    cur_chr = new_line_end_paragraph_code;
1394                 // cur_chr = normal_end_paragraph_code;
1395                } else {
1396                    cur_cs = lmt_token_state.par_loc;
1397                    cur_cmd = eq_type(cur_cs);
1398                    cur_chr = eq_value(cur_cs);
1399                }
1400                break;
1401            case skip_blanks_state + left_brace_cmd:
1402            case new_line_state    + left_brace_cmd:
1403                lmt_input_state.cur_input.state = mid_line_state;
1404                ++lmt_input_state.align_state;
1405                break;
1406            case mid_line_state + left_brace_cmd:
1407                ++lmt_input_state.align_state;
1408                break;
1409            case skip_blanks_state + right_brace_cmd:
1410            case new_line_state    + right_brace_cmd:
1411                lmt_input_state.cur_input.state = mid_line_state;
1412                --lmt_input_state.align_state;
1413                break;
1414            case mid_line_state + right_brace_cmd:
1415                --lmt_input_state.align_state;
1416                break;
1417            case mid_line_state + math_shift_cmd:
1418            case mid_line_state + alignment_tab_cmd:
1419            case mid_line_state + parameter_cmd:
1420            case mid_line_state + subscript_cmd:
1421            case mid_line_state + letter_cmd:
1422            case mid_line_state + other_char_cmd:
1423                break;
1424            /*
1425            case skip_blanks_state + math_shift_cmd:
1426            case skip_blanks_state + alignment_tab_cmd:
1427            case skip_blanks_state + parameter_cmd:
1428            case skip_blanks_state + subscript_cmd:
1429            case skip_blanks_state + letter_cmd:
1430            case skip_blanks_state + other_char_cmd:
1431            case new_line_state    + math_shift_cmd:
1432            case new_line_state    + alignment_tab_cmd:
1433            case new_line_state    + parameter_cmd:
1434            case new_line_state    + subscript_cmd:
1435            case new_line_state    + letter_cmd:
1436            case new_line_state    + other_char_cmd:
1437            */
1438            default:
1439                lmt_input_state.cur_input.state = mid_line_state;
1440                break;
1441        }
1442    } else {
1443        if (! io_token_input(lmt_input_state.cur_input.name)) {
1444            lmt_input_state.cur_input.state = new_line_state;
1445        }
1446        /*tex
1447
1448           Move to next line of file, or |goto restart| if there is no next line, or |return| if a
1449           |\read| line has finished.
1450
1451        */
1452        do {
1453            next_line_retval r = tex_aux_next_line();
1454            if (r == next_line_restart) {
1455                /*tex This happens more often. */
1456                return 0;
1457            } else if (r == next_line_return) {
1458                return 1;
1459            }
1460        } while (0);
1461     /* check_interrupt(); */
1462        goto SWITCH;
1463    }
1464    return 1;
1465}
1466
1467/*tex
1468
1469    Notice that a code like |^^8| becomes |x| if not followed by a hex digit. We only support a
1470    limited set:
1471
1472    \starttyping
1473    ^^^^^^XXXXXX
1474    ^^^^XXXXXX
1475    ^^XX ^^<char>
1476    \stoptyping
1477
1478*/
1479
1480# define is_hex(a) ((a >= '0' && a <= '9') || (a >= 'a' && a <= 'f'))
1481
1482 inline static halfword tex_aux_two_hex_to_cur_chr(int c1, int c2)
1483 {
1484   return
1485        0x10 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1486      + 0x01 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10);
1487 }
1488
1489 inline static halfword tex_aux_four_hex_to_cur_chr(int c1, int c2,int c3, int c4)
1490 {
1491   return
1492         0x1000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1493       + 0x0100 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10)
1494       + 0x0010 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10)
1495       + 0x0001 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10);
1496}
1497
1498inline static halfword tex_aux_six_hex_to_cur_chr(int c1, int c2, int c3, int c4, int c5, int c6)
1499{
1500   return
1501         0x100000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10)
1502       + 0x010000 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10)
1503       + 0x001000 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10)
1504       + 0x000100 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10)
1505       + 0x000010 * (c5 <= '9' ? c5 - '0' : c5 - 'a' + 10)
1506       + 0x000001 * (c6 <= '9' ? c6 - '0' : c6 - 'a' + 10);
1507
1508}
1509
1510static int tex_aux_process_sup_mark(void)
1511{
1512    if (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc]) {
1513        if (lmt_input_state.cur_input.loc < lmt_input_state.cur_input.limit) {
1514            if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2])) {
1515                if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4])) {
1516                    if ((lmt_input_state.cur_input.loc + 10) <= lmt_input_state.cur_input.limit) {
1517                        /*tex |^^^^^^XXXXXX| */
1518                        int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  5];
1519                        int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  6];
1520                        int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  7];
1521                        int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  8];
1522                        int c5 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc +  9];
1523                        int c6 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 10];
1524                        if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1525                            lmt_input_state.cur_input.loc += 11;
1526                            cur_chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6);
1527                            return 1;
1528                        } else {
1529                            tex_handle_error(
1530                                normal_error_type,
1531                                "^^^^^^ needs six hex digits",
1532                                NULL
1533                            );
1534                        }
1535                    } else {
1536                        tex_handle_error(
1537                            normal_error_type,
1538                            "^^^^^^ needs six hex digits, end of input",
1539                            NULL
1540                        );
1541                    }
1542                } else if ((lmt_input_state.cur_input.loc + 6) <= lmt_input_state.cur_input.limit) {
1543                /*tex |^^^^XXXX| */
1544                    int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3];
1545                    int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4];
1546                    int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 5];
1547                    int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 6];
1548                    if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1549                        lmt_input_state.cur_input.loc += 7;
1550                        cur_chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4);
1551                        return 1;
1552                    } else {
1553                        tex_handle_error(
1554                            normal_error_type,
1555                            "^^^^ needs four hex digits",
1556                            NULL
1557                        );
1558                    }
1559                } else {
1560                    tex_handle_error(
1561                        normal_error_type,
1562                        "^^^^ needs four hex digits, end of input",
1563                        NULL
1564                    );
1565                }
1566            } else if ((lmt_input_state.cur_input.loc + 2) <= lmt_input_state.cur_input.limit) {
1567                /*tex |^^XX| */
1568                int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1];
1569                int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2];
1570                if (is_hex(c1) && is_hex(c2)) {
1571                    lmt_input_state.cur_input.loc += 3;
1572                    cur_chr = tex_aux_two_hex_to_cur_chr(c1, c2);
1573                    return 1;
1574                }
1575            }
1576            /*tex The single character case: */
1577            {
1578                int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1];
1579                if (c1 < 0x80) {
1580                    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.loc + 2;
1581                 // if (is_hex(c1) && (iloc <= ilimit)) {
1582                 //     int c2 = fileio_state.io_buffer[iloc];
1583                 //     if (is_hex(c2)) {
1584                 //         ++iloc;
1585                 //         cur_chr = two_hex_to_cur_chr(c1, c2);
1586                 //         return 1;
1587                 //     }
1588                 // }
1589                 // /*tex The somewhat odd cases, often special control characters: */
1590                    cur_chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1591                    return 1;
1592                }
1593            }
1594        }
1595    }
1596    return 0;
1597}
1598
1599/*tex
1600
1601    Control sequence names are scanned only when they appear in some line of a file. Once they have
1602    been scanned the first time, their |eqtb| location serves as a unique identification, so \TEX\
1603    doesn't need to refer to the original name any more except when it prints the equivalent in
1604    symbolic form.
1605
1606    The program that scans a control sequence has been written carefully in order to avoid the
1607    blowups that might otherwise occur if a malicious user tried something like |\catcode'15 = 0|.
1608    The algorithm might look at |buffer[ilimit + 1]|, but it never looks at |buffer[ilimit + 2]|.
1609
1610    If expanded characters like |^^A| or |^^df| appear in or just following a control sequence name,
1611    they are converted to single characters in the buffer and the process is repeated, slowly but
1612    surely.
1613
1614*/
1615
1616/*tex
1617
1618    Whenever we reach the following piece of code, we will have |cur_chr = buffer[k - 1]| and |k <=
1619    ilimit + 1| and |cat = get_cat_code(cat_code_table, cur_chr)|. If an expanded code like |^^A| or
1620    |^^df| appears in |buffer[(k - 1) .. (k + 1)]| or |buffer[(k - 1) .. (k + 2)]|, we will store
1621    the corresponding code in |buffer[k - 1]| and shift the rest of the buffer left two or three
1622    places.
1623
1624*/
1625
1626static int tex_aux_check_expanded_code(int *kk, halfword *chr)
1627{
1628    if (sup_mark_mode_par > 1 || (sup_mark_mode_par == 1 && cur_mode == mmode)) {
1629        return 0;
1630    } else {
1631        int k = *kk;
1632        /* chr is the ^ character or an equivalent one */
1633        if (lmt_fileio_state.io_buffer[k] == *chr && k < lmt_input_state.cur_input.limit) {
1634            int d = 1;
1635            int l;
1636            if ((*chr == lmt_fileio_state.io_buffer[k + 1]) && (*chr == lmt_fileio_state.io_buffer[k + 2])) {
1637                if ((*chr == lmt_fileio_state.io_buffer[k + 3]) && (*chr == lmt_fileio_state.io_buffer[k + 4])) {
1638                    if ((k + 10) <= lmt_input_state.cur_input.limit) {
1639                        int c1 = lmt_fileio_state.io_buffer[k + 6 - 1];
1640                        int c2 = lmt_fileio_state.io_buffer[k + 6    ];
1641                        int c3 = lmt_fileio_state.io_buffer[k + 6 + 1];
1642                        int c4 = lmt_fileio_state.io_buffer[k + 6 + 2];
1643                        int c5 = lmt_fileio_state.io_buffer[k + 6 + 3];
1644                        int c6 = lmt_fileio_state.io_buffer[k + 6 + 4];
1645                        if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) {
1646                            d = 6;
1647                            *chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6);
1648                        } else {
1649                            tex_handle_error(
1650                                normal_error_type,
1651                                "^^^^^^ needs six hex digits",
1652                                NULL
1653                            );
1654                        }
1655                    } else {
1656                        tex_handle_error(
1657                            normal_error_type,
1658                            "^^^^^^ needs six hex digits, end of input",
1659                            NULL
1660                        );
1661                    }
1662                } else if ((k + 6) <= lmt_input_state.cur_input.limit) {
1663                    int c1 = lmt_fileio_state.io_buffer[k + 4 - 1];
1664                    int c2 = lmt_fileio_state.io_buffer[k + 4    ];
1665                    int c3 = lmt_fileio_state.io_buffer[k + 4 + 1];
1666                    int c4 = lmt_fileio_state.io_buffer[k + 4 + 2];
1667                    if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) {
1668                        d = 4;
1669                        *chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4);
1670                    } else {
1671                        tex_handle_error(
1672                            normal_error_type,
1673                            "^^^^ needs four hex digits",
1674                            NULL
1675                        );
1676                    }
1677                } else {
1678                    tex_handle_error(
1679                        normal_error_type,
1680                        "^^^^ needs four hex digits, end of input",
1681                        NULL
1682                    );
1683                }
1684            } else {
1685                int c1 = lmt_fileio_state.io_buffer[k + 1];
1686                if (c1 < 0x80) { /* really ? */
1687                    d = 1;
1688                    if (is_hex(c1) && (k + 2) <= lmt_input_state.cur_input.limit) {
1689                        int c2 = lmt_fileio_state.io_buffer[k + 2];
1690                        if (is_hex(c2)) {
1691                            d = 2;
1692                            *chr = tex_aux_two_hex_to_cur_chr(c1, c2);
1693                        } else {
1694                            *chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1695                        }
1696                    } else {
1697                        *chr = (c1 < 0x40 ? c1 + 0x40 : c1 - 0x40);
1698                    }
1699                }
1700            }
1701            if (d > 2) {
1702                d = 2 * d - 1;
1703            } else {
1704                d++;
1705            }
1706            if (*chr <= 0x7F) {
1707                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) *chr;
1708            } else if (*chr <= 0x7FF) {
1709                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xC0 + *chr / 0x40);
1710                k++;
1711                d--;
1712                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + *chr % 0x40);
1713            } else if (*chr <= 0xFFFF) {
1714                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xE0 + *chr / 0x1000);
1715                k++;
1716                d--;
1717                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) / 0x40);
1718                k++;
1719                d--;
1720                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) % 0x40);
1721            } else {
1722                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xF0 + *chr / 0x40000);
1723                k++;
1724                d--;
1725                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x40000) / 0x1000);
1726                k++;
1727                d--;
1728                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) / 0x40);
1729                k++;
1730                d--;
1731                lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) % 0x40);
1732            }
1733            l = k;
1734            lmt_input_state.cur_input.limit -= d;
1735            while (l <= lmt_input_state.cur_input.limit) {
1736                lmt_fileio_state.io_buffer[l] = lmt_fileio_state.io_buffer[l + d];
1737                l++;
1738            }
1739            *kk = k;
1740            cur_chr = *chr; /* hm */
1741            return 1;
1742        } else {
1743            return 0;
1744        }
1745    }
1746}
1747
1748static int tex_aux_scan_control_sequence(void)
1749{
1750    int state = mid_line_state;
1751    if (lmt_input_state.cur_input.loc > lmt_input_state.cur_input.limit) {
1752        /*tex |state| is irrelevant in this case. */
1753        cur_cs = null_cs;
1754    } else {
1755        /*tex |cat_code(cur_chr)|, usually: */
1756        while (1) {
1757            int loc = lmt_input_state.cur_input.loc;
1758            halfword chr = get_unichar_from_buffer(&loc);
1759            halfword cat = tex_aux_the_cat_code(chr);
1760            if (cat != letter_cmd || loc > lmt_input_state.cur_input.limit) {
1761                if (cat == spacer_cmd) {
1762                    state = skip_blanks_state;
1763                } else {
1764                    state = mid_line_state;
1765                    if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) {
1766                        continue;
1767                    }
1768                }
1769            } else {
1770                state = skip_blanks_state;
1771//                do {
1772//                    chr = get_unichar_from_buffer(&loc);
1773//                    cat = tex_aux_the_cat_code(chr);
1774//                } while (cat == letter_cmd && loc <= lmt_input_state.cur_input.limit);
1775
1776                while (cat == letter_cmd && loc <= lmt_input_state.cur_input.limit) {
1777                    chr = get_unichar_from_buffer(&loc);
1778                    cat = tex_aux_the_cat_code(chr);
1779                }
1780
1781                /*tex If an expanded \unknown */
1782                if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) {
1783                    continue;
1784                } else if (cat != letter_cmd) {
1785                    /*tex Backtrack one character which can be \UTF. */
1786                    if (chr <= 0x7F) {
1787                        loc -= 1; /* in most cases */
1788                    } else if (chr > 0xFFFF) {
1789                        loc -= 4;
1790                    } else if (chr > 0x7FF) {
1791                        loc -= 3;
1792                    } else /* if (cur_chr > 0x7F) */ {
1793                        loc -= 2;
1794                    }
1795                    /*tex Now |k| points to first nonletter. */
1796                }
1797            }
1798            cur_cs = tex_id_locate(lmt_input_state.cur_input.loc, loc - lmt_input_state.cur_input.loc, ! lmt_hash_state.no_new_cs);
1799            lmt_input_state.cur_input.loc = loc;
1800            break;
1801        }
1802    }
1803    cur_cmd = eq_type(cur_cs);
1804    cur_chr = eq_value(cur_cs);
1805    return state;
1806}
1807
1808/*tex
1809
1810    All of the easy branches of |get_next| have now been taken care of. There is one more branch.
1811    Conversely, the |file_warning| procedure is invoked when a file ends and some groups entered or
1812    conditionals started while reading from that file are still incomplete.
1813
1814*/
1815
1816static void tex_aux_file_warning(void)
1817{
1818    {
1819     // save_state_info saved_save_stack_data = lmt_save_state;
1820        halfword saved_stack_ptr = lmt_save_state.save_stack_data.ptr;
1821        quarterword saved_group = cur_group;
1822        quarterword saved_level = cur_level;
1823        lmt_save_state.save_stack_data.ptr = cur_boundary;
1824        while (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group != lmt_save_state.save_stack_data.ptr) {
1825            --cur_level;
1826            tex_print_nlp();
1827            tex_print_format("Warning: end of file when %G is incomplete", 1);
1828            cur_group = save_level(lmt_save_state.save_stack_data.ptr);
1829            lmt_save_state.save_stack_data.ptr = save_value(lmt_save_state.save_stack_data.ptr);
1830        }
1831     // lmt_save_state = saved_save_stack_data;
1832        lmt_save_state.save_stack_data.ptr = saved_stack_ptr;
1833        cur_level = saved_level;
1834        cur_group = saved_group;
1835    }
1836    {
1837        condition_state_info saved_condition_state = lmt_condition_state;
1838        while (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr != lmt_condition_state.cond_ptr) {
1839            /* todo, more info */
1840            tex_print_nlp();
1841            tex_print_format("Warning: end of file when %C", if_test_cmd, lmt_condition_state.cur_if);
1842            if (lmt_condition_state.if_limit == fi_code) {
1843                tex_print_str_esc("else");
1844            }
1845            if (lmt_condition_state.if_line) {
1846                tex_print_format(" entered on line %i", lmt_condition_state.if_line);
1847            }
1848            tex_print_str(" is incomplete");
1849            lmt_condition_state.cur_if = if_limit_subtype(lmt_condition_state.cond_ptr);
1850            lmt_condition_state.cur_unless = if_limit_unless(lmt_condition_state.cond_ptr);
1851            lmt_condition_state.if_step = if_limit_step(lmt_condition_state.cond_ptr);
1852            lmt_condition_state.if_unless = if_limit_stepunless(lmt_condition_state.cond_ptr);
1853            lmt_condition_state.if_limit = if_limit_type(lmt_condition_state.cond_ptr);
1854            lmt_condition_state.if_line = if_limit_line(lmt_condition_state.cond_ptr);
1855            lmt_condition_state.cond_ptr = node_next(lmt_condition_state.cond_ptr);
1856        }
1857        lmt_condition_state = saved_condition_state;
1858    }
1859    tex_print_nlp();
1860    if (tracing_nesting_par > 1) {
1861        tex_show_context();
1862    }
1863    if (lmt_error_state.history == spotless) {
1864        lmt_error_state.history = warning_issued;
1865    }
1866}
1867
1868static void tex_aux_check_validity(void)
1869{
1870    switch (lmt_input_state.scanner_status) {
1871        case scanner_is_normal:
1872            break;
1873        case scanner_is_skipping:
1874            tex_handle_error(
1875                condition_error_type,
1876                "The file ended while I was skipping conditional text.",
1877                "This kind of error happens when you say '\\if...' and forget the\n"
1878                "matching '\\fi'. It can also be that you  use '\\orelse' or '\\orunless\n'"
1879                "in the wrong way. Or maybe a forbidden control sequence was encountered."
1880            );
1881            break;
1882        case scanner_is_defining:
1883            tex_handle_error(runaway_error_type, "The file ended when scanning a definition.", NULL);
1884            break;
1885        case scanner_is_matching:
1886            tex_handle_error(runaway_error_type, "The file ended when scanning an argument.", NULL);
1887            break;
1888        case scanner_is_tolerant:
1889            break;
1890        case scanner_is_aligning:
1891            tex_handle_error(runaway_error_type, "The file ended when scanning an alignment preamble.", NULL);
1892            break;
1893        case scanner_is_absorbing:
1894            tex_handle_error(runaway_error_type, "The file ended when absorbing something.", NULL);
1895            break;
1896    }
1897}
1898
1899static inline int tex_aux_every_eof(void)
1900{
1901    halfword t = lmt_input_state.in_stack[lmt_input_state.cur_input.index].at_end_of_file;
1902    if (t) {
1903        /* tex Fake one empty line. Never happens in \CONTEXT. */
1904        lmt_input_state.cur_input.limit = lmt_fileio_state.io_first - 1;
1905        lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 1;
1906        lmt_input_state.in_stack[lmt_input_state.cur_input.index].at_end_of_file = null;
1907        tex_begin_token_list(t, end_file_text);
1908        tex_delete_token_reference(t);
1909        return 1;
1910    } else if (every_eof_par) {
1911        /* tex Fake one empty line. Never happens in \CONTEXT. */
1912        lmt_input_state.cur_input.limit = lmt_fileio_state.io_first - 1;
1913        lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 1;
1914        tex_begin_token_list(every_eof_par, every_eof_text);
1915        return 1;
1916    } else { 
1917        return 0;
1918    }
1919}
1920
1921inline static next_line_retval tex_aux_next_line(void)
1922{
1923    if (lmt_input_state.cur_input.name > io_initial_input_code) {
1924        /*tex Read next line of file into |buffer|, or |goto restart| if the file has ended. */
1925        unsigned inhibit_eol = 0;
1926        ++lmt_input_state.input_line;
1927        lmt_fileio_state.io_first = lmt_input_state.cur_input.start;
1928        if (! lmt_token_state.force_eof) {
1929            switch (lmt_input_state.cur_input.name) {
1930                case io_lua_input_code:
1931                    {
1932                        halfword result = null;
1933                        int cattable = 0;
1934                        int partial = 0;
1935                        int finalline = 0;
1936                        int type = lmt_cstring_input(&result, &cattable, &partial, &finalline);
1937                        switch (type) {
1938                            case eof_tex_input:
1939                                lmt_token_state.force_eof = 1;
1940                                break;
1941                            case string_tex_input:
1942                                /*tex string */
1943                                lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */
1944                                lmt_input_state.cur_input.cattable = (short) cattable;
1945                                lmt_input_state.cur_input.partial = (signed char) partial;
1946                                if (finalline || partial || cattable == no_catcode_table_preset) {
1947                                    inhibit_eol = 1;
1948                                }
1949                                if (! partial) {
1950                                    lmt_input_state.cur_input.state = new_line_state;
1951                                }
1952                                break;
1953                            case token_tex_input:
1954                                /*tex token */
1955                                {
1956                                    halfword t = result - cs_token_flag;
1957                                    if (t >= 0 && eq_type(t) == input_cmd && eq_value(t) == end_of_input_code && lmt_input_state.cur_input.index > 0) {
1958                                        tex_end_file_reading(); 
1959                                    }
1960                                    tex_back_input(result);
1961                                    return next_line_restart;
1962                                }
1963                            case token_list_tex_input:
1964                                /*tex token */
1965                                if (result) {
1966                                    tex_begin_backed_up_list(result);
1967                                }
1968                                return next_line_restart;
1969                            case node_tex_input:
1970                                /*tex node */
1971                                if (node_token_overflow(result)) {
1972                                    /* we could link them and avoid ine input level */
1973                                    tex_back_input(token_val(ignore_cmd, node_token_lsb(result)));
1974                                    tex_reinsert_token(token_val(node_cmd, node_token_msb(result)));
1975                                    return next_line_restart;
1976                                } else {
1977                                    /*tex |0x10FFFF == 1114111| */
1978                                    tex_back_input(token_val(node_cmd, result));
1979                                    return next_line_restart;
1980                                }
1981                            default:
1982                                lmt_token_state.force_eof = 1;
1983                                break;
1984                        }
1985                        break;
1986                    }
1987                case io_token_input_code:
1988                case io_token_eof_input_code:
1989                    {
1990                        /* can be simplified but room for extensions now */
1991                        halfword result = null;
1992                        int cattable = 0;
1993                        int partial = 0;
1994                        int finalline = 0;
1995                        int type = lmt_cstring_input(&result, &cattable, &partial, &finalline);
1996                        switch (type) {
1997                            case eof_tex_input:
1998                                lmt_token_state.force_eof = 1;
1999                                if (lmt_input_state.cur_input.name == io_token_eof_input_code && tex_aux_every_eof()) {
2000                                    return next_line_restart;
2001                                } 
2002                                break;
2003                            case string_tex_input:
2004                                /*tex string */
2005                                lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */
2006                                lmt_input_state.cur_input.cattable = (short) cattable;
2007                                lmt_input_state.cur_input.partial = (signed char) partial;
2008                                inhibit_eol = lmt_input_state.cur_input.name != io_token_eof_input_code;
2009                                if (! partial) {
2010                                    lmt_input_state.cur_input.state = new_line_state;
2011                                }
2012                                break;
2013                            default:
2014                                if (result) {
2015                                    /*tex Can't happen: lua token and node output mixed in here */
2016                                }
2017                                lmt_token_state.force_eof = 1;
2018                                break;
2019                        }
2020                        break;
2021                    }
2022                case io_tex_macro_code:
2023                    /* this can't happen and will fail with the next line check */
2024             // case io_file_input_code:
2025                default:
2026                    if (tex_lua_input_ln()) {
2027                        /*tex Not end of file, set |ilimit|. */
2028                        lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */
2029                        lmt_input_state.cur_input.cattable = default_catcode_table_preset;
2030                        break;
2031                    } else if (! lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen && tex_aux_every_eof()) {
2032                        return next_line_restart;
2033                    } else {
2034                        tex_aux_check_validity();
2035                        lmt_token_state.force_eof = 1;
2036                        break;
2037                    }
2038            }
2039        }
2040        if (lmt_token_state.force_eof) {
2041            if (tracing_nesting_par > 0) {
2042                if ((lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group != cur_boundary) || (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr != lmt_condition_state.cond_ptr)) {
2043                    if (! io_token_input(lmt_input_state.cur_input.name)) {
2044                        /*tex Give warning for some unfinished groups and/or conditionals. */
2045                        tex_aux_file_warning();
2046                    }
2047                }
2048            }
2049            if (io_file_input(lmt_input_state.cur_input.name)) {
2050                tex_report_stop_file();
2051                --lmt_input_state.open_files;
2052            }
2053            lmt_token_state.force_eof = 0;
2054            tex_end_file_reading();
2055            return next_line_restart;
2056        } else {
2057            if (inhibit_eol || end_line_char_inactive) {
2058                lmt_input_state.cur_input.limit--;
2059            } else {
2060                lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par;
2061            }
2062            lmt_fileio_state.io_first = lmt_input_state.cur_input.limit + 1;
2063            lmt_input_state.cur_input.loc = lmt_input_state.cur_input.start;
2064            /*tex We're ready to read. */
2065        }
2066    } else if (lmt_input_state.input_stack_data.ptr > 0) {
2067        cur_cmd = 0;
2068        cur_chr = 0;
2069        return next_line_return;
2070    } else {
2071        /*tex A somewhat weird check: */
2072        switch (lmt_print_state.selector) {
2073            case no_print_selector_code:
2074            case terminal_selector_code:
2075                tex_open_log_file();
2076                break;
2077        }
2078        tex_handle_error(eof_error_type, "end of file encountered", NULL);
2079        /*tex Just in case it is not handled in a callback: */
2080        if (lmt_error_state.interaction > nonstop_mode) {
2081            tex_fatal_error("aborting job");
2082        }
2083    }
2084    /*tex We're in a loop and restart: */
2085    return next_line_ok;
2086}
2087
2088halfword tex_get_at_end_of_file(void)
2089{
2090    for (int i = lmt_input_state.input_stack_data.ptr; i > 0; i--) {
2091        if (lmt_input_state.input_stack[i].name == io_file_input_code) { 
2092            return lmt_input_state.in_stack[lmt_input_state.input_stack[i].index].at_end_of_file;
2093        }
2094    }
2095    return null;
2096}
2097
2098void tex_set_at_end_of_file(halfword h)
2099{
2100    for (int i = lmt_input_state.input_stack_data.ptr; i > 0; i--) {
2101        if (lmt_input_state.input_stack[i].name == io_file_input_code) { 
2102            lmt_input_state.in_stack[lmt_input_state.input_stack[i].index].at_end_of_file = h;
2103            return;
2104        }
2105    }
2106    tex_flush_token_list(h);
2107}
2108
2109/*tex
2110    Let's consider now what happens when |get_next| is looking at a token list.
2111*/
2112
2113static int tex_aux_get_next_tokenlist(void)
2114{
2115    halfword t = token_info(lmt_input_state.cur_input.loc);
2116    /*tex Move to next. */
2117    lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc);
2118    if (t >= cs_token_flag) {
2119        /*tex A control sequence token */
2120        cur_cs = t - cs_token_flag;
2121        cur_cmd = eq_type(cur_cs);
2122        if (cur_cmd == deep_frozen_dont_expand_cmd) {
2123            /*tex
2124
2125                Get the next token, suppressing expansion. The present point in the program is
2126                reached only when the |expand| routine has inserted a special marker into the
2127                input. In this special case, |token_info(iloc)| is known to be a control sequence
2128                token, and |token_link(iloc) = null|.
2129
2130            */
2131            cur_cs = token_info(lmt_input_state.cur_input.loc) - cs_token_flag;
2132            lmt_input_state.cur_input.loc = null;
2133            cur_cmd = eq_type(cur_cs);
2134            if (cur_cmd > max_command_cmd) {
2135                cur_cmd = relax_cmd;
2136             // cur_chr = no_expand_flag;
2137                cur_chr = no_expand_relax_code;
2138                return 1;
2139            }
2140        }
2141        cur_chr = eq_value(cur_cs);
2142    } else {
2143        cur_cmd = token_cmd(t);
2144        cur_chr = token_chr(t);
2145        switch (cur_cmd) {
2146            case left_brace_cmd:
2147                ++lmt_input_state.align_state;
2148                break;
2149            case right_brace_cmd:
2150                --lmt_input_state.align_state;
2151                break;
2152            case active_char_cmd:
2153                if ((cur_mode == mmode || lmt_nest_state.math_mode) && tex_check_active_math_char(cur_chr)) {
2154                    /*tex We have an intercept. */
2155                }
2156                break;
2157            case parameter_reference_cmd:
2158                { 
2159                    /*tex Insert macro parameter and |goto restart|. */
2160                    halfword p = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + cur_chr - 1];
2161                    if (p) {
2162                        tex_begin_parameter_list(p);
2163                    }
2164                    return 0;
2165                }
2166        }
2167    }
2168    return 1;
2169}
2170
2171/*tex
2172
2173    Now we're ready to take the plunge into |get_next| itself. Parts of this routine are executed
2174    more often than any other instructions of \TEX. This sets |cur_cmd|, |cur_chr|, |cur_cs| to
2175    next token.
2176
2177    Handling alignments is interwoven because there we switch between constructing cells and rows
2178    (node lists) based on templates that are token lists. This is why in several places we find
2179    checks for |align_state|.
2180
2181*/
2182
2183void tex_get_next(void)
2184{
2185    while (1) {
2186        cur_cs = 0;
2187        if (lmt_input_state.cur_input.state != token_list_state) {
2188            /*tex Input from external file, |goto restart| if no input found. */
2189            if (! tex_aux_get_next_file()) {
2190                continue;
2191            } else {
2192                /*tex Check align state later on! */
2193            }
2194        } else if (! lmt_input_state.cur_input.loc) {
2195            /*tex List exhausted, resume previous level. */
2196            tex_end_token_list();
2197            continue;
2198        } else if (! tex_aux_get_next_tokenlist()) {
2199            /*tex Parameter needs to be expanded. */
2200            continue;
2201        }
2202//        if ((! lmt_input_state.align_state) && (cur_cmd == alignment_tab_cmd || cur_cmd == alignment_cmd)) {
2203//            /*tex If an alignment entry has just ended, take appropriate action. */
2204//            tex_insert_alignment_template();
2205//            continue;
2206//        } else {
2207//            break;
2208//        }
2209        switch (cur_cmd) { 
2210            case alignment_tab_cmd:
2211            case alignment_cmd:
2212                /*tex If an alignment entry has just ended, take appropriate action. */
2213                if (lmt_input_state.align_state) {
2214                    return;
2215                } else {
2216                    tex_insert_alignment_template();
2217                    continue;
2218                }
2219            default:
2220                return;
2221        }
2222    }
2223}
2224
2225void tex_get_next_non_spacer(void)
2226{
2227    while (1) {
2228        cur_cs = 0;
2229        if (lmt_input_state.cur_input.state != token_list_state) {
2230            /*tex Input from external file, |goto restart| if no input found. */
2231            if (! tex_aux_get_next_file()) {
2232                continue;
2233            } else {
2234                /*tex Check align state later on! */
2235            }
2236        } else if (! lmt_input_state.cur_input.loc) {
2237            /*tex List exhausted, resume previous level. */
2238            tex_end_token_list();
2239            continue;
2240        } else if (! tex_aux_get_next_tokenlist()) {
2241            /*tex Parameter needs to be expanded. */
2242            continue;
2243        }
2244        switch (cur_cmd) { 
2245            case spacer_cmd:
2246                continue;
2247            case alignment_tab_cmd:
2248            case alignment_cmd:
2249                /*tex If an alignment entry has just ended, take appropriate action. */
2250                if (lmt_input_state.align_state) {
2251                    return;
2252                } else {
2253                    tex_insert_alignment_template();
2254                    continue;
2255                }
2256            default:
2257                return;
2258        }
2259    }
2260}
2261
2262/*tex
2263
2264    Since |get_next| is used so frequently in \TEX, it is convenient to define three related
2265    procedures that do a little more:
2266
2267    \startitemize
2268        \startitem
2269            |get_token| not only sets |cur_cmd| and |cur_chr|, it also sets |cur_tok|, a packed
2270            halfword version of the current token.
2271        \stopitem
2272        \startitem
2273            |get_x_token|, meaning \quote {get an expanded token}, is like |get_token|, but if the
2274            current token turns out to be a user-defined control sequence (i.e., a macro call), or
2275            a conditional, or something like |\topmark| or |\expandafter| or |\csname|, it is
2276            eliminated from the input by beginning the expansion of the macro or the evaluation of
2277            the conditional.
2278        \stopitem
2279        \startitem
2280            |x_token| is like |get_x_token| except that it assumes that |get_next| has already been
2281            called.
2282        \stopitem
2283    \stopitemize
2284
2285    In fact, these three procedures account for almost every use of |get_next|. No new control
2286    sequences will be defined except during a call of |get_token|, or when |\csname| compresses a
2287    token list, because |no_new_control_sequence| is always |true| at other times.
2288
2289    This sets |cur_cmd|, |cur_chr|, |cur_tok|. For convenience we also return the token because in
2290    some places we store it and then some direct assignment looks a bit nicer.
2291
2292*/
2293
2294halfword tex_get_token(void)
2295{
2296    lmt_hash_state.no_new_cs = 0;
2297    tex_get_next();
2298    lmt_hash_state.no_new_cs = 1;
2299    cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr);
2300    return cur_tok;
2301}
2302
2303/*tex
2304
2305    The |get_x_or_protected| procedure is like |get_x_token| except that protected macros are not
2306    expanded. It sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands non-protected macros.
2307
2308*/
2309
2310void tex_get_x_or_protected(void)
2311{
2312    lmt_hash_state.no_new_cs = 0;
2313    while (1) {
2314        tex_get_next();
2315        if (cur_cmd <= max_command_cmd || is_protected_cmd(cur_cmd)) {
2316            break;
2317        } else {
2318            tex_expand_current_token();
2319        }
2320    }
2321    cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr); /* needed afterwards ? */
2322    lmt_hash_state.no_new_cs = 1;
2323}
2324
2325/*tex This changes the string |s| to a token list. */
2326
2327halfword tex_string_to_toks(const char *ss)
2328{
2329    const char *s = ss;
2330    const char *se = ss + strlen(s);
2331    /*tex tail of the token list */
2332    halfword h = null;
2333    halfword p = null;
2334    /*tex new node being added to the token list via |store_new_token| */
2335    while (s < se) {
2336        int tl; 
2337        halfword t = (halfword) aux_str2uni_len((const unsigned char *) s, &tl);
2338        s += tl;
2339        if (t == ' ') {
2340            t = space_token;
2341        } else {
2342            t += other_token;
2343        }
2344        p = tex_store_new_token(p, t);
2345        if (! h) {
2346            h = p;
2347        }
2348    }
2349    return h;
2350}
2351
2352/*tex
2353
2354    The token lists for macros and for other things like |\mark| and |\output| and |\write| are
2355    produced by a procedure called |scan_toks|.
2356
2357    Before we get into the details of |scan_toks|, let's consider a much simpler task, that of
2358    converting the current string into a token list. The |str_toks| function does this; it
2359    classifies spaces as type |spacer| and everything else as type |other_char|.
2360
2361    The token list created by |str_toks| begins at |link(temp_token_head)| and ends at the value
2362    |p| that is returned. If |p = temp_token_head|, the list is empty.
2363
2364    |lua_str_toks| is almost identical, but it also escapes the three symbols that \LUA\ considers
2365    special while scanning a literal string.
2366*/
2367
2368static halfword lmt_str_toks(lstring b) /* returns head */
2369{
2370    unsigned char *k = (unsigned char *) b.s;
2371    halfword head = null;
2372    halfword tail = head;
2373    while (k < (unsigned char *) b.s + b.l) {
2374        int tl; 
2375        halfword t = aux_str2uni_len(k, &tl);
2376        k += tl;
2377        if (t == ' ') {
2378            t = space_token;
2379        } else {
2380            if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13)) {
2381                tail = tex_store_new_token(tail, escape_token);
2382                if (! head) {
2383                    head = tail;
2384                }
2385                if (t == 10) {
2386                    t = 'n';
2387                } else if (t == 13) {
2388                    t = 'r';
2389                }
2390            }
2391            t += other_token;
2392        }
2393        tail = tex_store_new_token(tail, t);
2394        if (! head) {
2395            head = tail;
2396        }
2397    }
2398    return head;
2399}
2400
2401/*tex
2402
2403    Incidentally, the main reason for wanting |str_toks| is the function |the_toks|, which has
2404    similar input/output characteristics. This changes the string |str_pool[b .. pool_ptr]| to a
2405    token list:
2406
2407*/
2408
2409halfword tex_str_toks(lstring s, halfword *tail)
2410{
2411    halfword h = null;
2412    halfword p = null;
2413    if (s.s) {
2414        unsigned char *k = s.s;
2415        unsigned char *l = k + s.l;
2416        while (k < l) {
2417            int tl;
2418            halfword t = aux_str2uni_len(k, &tl);
2419            if (t == ' ') {
2420                t = space_token;
2421            } else {
2422                t += other_token;
2423            }
2424            k += tl;
2425            p = tex_store_new_token(p, t);
2426            if (! h) {
2427                h = p;
2428            }
2429        }
2430    }
2431    if (tail) {
2432        *tail = null;
2433    }
2434    return h;
2435}
2436
2437halfword tex_cur_str_toks(halfword *tail)
2438{
2439    halfword h = null;
2440    halfword p = null;
2441    unsigned char *k = (unsigned char *) lmt_string_pool_state.string_temp;
2442    if (k) {
2443        unsigned char *l = k + lmt_string_pool_state.string_temp_top;
2444        /*tex tail of the token list */
2445        while (k < l) {
2446            /*tex token being appended */
2447            int tl;
2448            halfword t = aux_str2uni_len(k, &tl);
2449            if (t == ' ') {
2450                t = space_token;
2451            } else {
2452                t += other_token;
2453            }
2454            k += tl;
2455            p = tex_store_new_token(p, t);
2456            if (! h) {
2457                h = p;
2458            }
2459        }
2460    }
2461    tex_reset_cur_string();
2462    if (tail) {
2463        *tail = p;
2464    }
2465    return h;
2466}
2467
2468/*tex
2469
2470    Most of the converter is similar to the one I made for macro so at some point I can make a
2471    helper; also todo: there is no need to go through the pool.
2472
2473*/
2474
2475halfword tex_str_scan_toks(int ct, lstring ls)
2476{
2477    /*tex index into string */
2478    unsigned char *k = ls.s;
2479    unsigned char *l = k + ls.l;
2480    /*tex tail of the token list */
2481    halfword h = null;
2482    halfword p = null;
2483    while (k < l) {
2484        int cc;
2485        /*tex token being appended */
2486        int lt;
2487        halfword t = aux_str2uni_len(k, &lt);
2488        k += lt;
2489        cc = tex_get_cat_code(ct, t);
2490        if (cc == 0) {
2491            /*tex We have a potential control sequence so we check for it. */
2492            int lname = 0 ;
2493            int s = 0 ;
2494            int c = 0 ;
2495            unsigned char *name = k ;
2496            while (k < l) {
2497                t = (halfword) aux_str2uni_len((const unsigned char *) k, &s);
2498                c = tex_get_cat_code(ct, t);
2499                if (c == 11) {
2500                    k += s ;
2501                    lname += s ;
2502                } else if (c == 10) {
2503                    /*tex We ignore a trailing space like normal scanning does. */
2504                    k += s ;
2505                    break ;
2506                } else {
2507                    break ;
2508                }
2509            }
2510            if (s > 0) {
2511                /*tex We have a potential |\cs|. */
2512                halfword cs = tex_string_locate_only((const char *) name, lname);
2513                if (cs == undefined_control_sequence) {
2514                    /*tex Let's play safe and backtrack. */
2515                    t += cc * (1<<21);
2516                    k = name ;
2517                } else {
2518                    t = cs_token_flag + cs;
2519                }
2520            } else {
2521                /*tex
2522                    Just a character with some meaning, so |\unknown| becomes effectively
2523                    |\unknown| assuming that |\\| has some useful meaning of course.
2524                */
2525                t += cc * (1 << 21);
2526                k = name ;
2527            }
2528        } else {
2529            /*tex
2530                Whatever token, so for instance $x^2$ just works given a \TEX\ catcode regime.
2531            */
2532            t += cc * (1 << 21);
2533        }
2534        p = tex_store_new_token(p, t);
2535        if (! h) {
2536            h = p;
2537        }
2538    }
2539    return h;
2540}
2541
2542/* these two can be combined, then we can avoid the h check  */
2543
2544static void tex_aux_set_toks_register(halfword loc, singleword cmd, halfword t, int g)
2545{
2546    halfword ref = get_reference_token();
2547    set_token_link(ref, t);
2548    tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref);
2549}
2550
2551static halfword tex_aux_append_copied_toks_list(halfword loc, singleword cmd, int g, halfword s, halfword t, halfword *tail)
2552{
2553    halfword ref = get_reference_token();
2554    halfword p = ref;
2555    while (s) {
2556        p = tex_store_new_token(p, token_info(s));
2557        s = token_link(s);
2558    }
2559    while (t) {
2560        p = tex_store_new_token(p, token_info(t));
2561        t = token_link(t);
2562    }
2563    tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref);
2564    if (tail) { 
2565        *tail = p;
2566    }
2567    return ref;
2568}
2569
2570/*tex Public helper: */
2571
2572halfword tex_copy_token_list(halfword h1, halfword *t)
2573{
2574    halfword h2 = tex_store_new_token(null, token_info(h1));
2575    halfword t1 = token_link(h1);
2576    halfword t2 = h2;
2577    while (t1) {
2578        t2 = tex_store_new_token(t2, token_info(t1));
2579        t1 = token_link(t1);
2580    }
2581    if (t) {
2582        *t = t2;
2583    }
2584    return h2;
2585}
2586
2587/*tex
2588
2589    At some point I decided to implement the following primitives:
2590
2591    \starttabulate[|T||T||]
2592    \NC 0 \NC \type {toksapp}   \NC 1 \NC \type {etoksapp} \NC \NR
2593    \NC 2 \NC \type {tokspre}   \NC 3 \NC \type {etokspre} \NC \NR
2594    \NC 4 \NC \type {gtoksapp}  \NC 5 \NC \type {xtoksapp} \NC \NR
2595    \NC 6 \NC \type {gtokspre}  \NC 7 \NC \type {xtokspre} \NC \NR
2596    \stoptabulate
2597
2598    These append and prepend tokens to token lists. In \CONTEXT\ we always had macros doing something
2599    like that. It was only a few years later that I ran again into an article that Taco and I wrote
2600    in 1999 in the NTG Maps about an extension to \ETEX\ (called eetex). The first revelation was
2601    that I had completely forgotten about it, which can be explained by the two decade time-lap. The
2602    second was that Taco actually added that to the program at that time, so I could have used (parts
2603    of) that code. Anyway, among the other proposed (and implemented) features were manipulating
2604    lists and ways to output packed data to the \DVI\ files (numbers packed into 1 upto 4 bytes).
2605    Maybe some day I'll have a go at lists, although with todays computers there is not that much to
2606    gain. Also, \CONTEXT\ progressed to different internals so the urge is no longer there. The also
2607    discussed \SGML\ mode also in no longer that relevant given that we have \LUA.
2608
2609    If we want to handle macros too we really need to distinguish between toks and macros with
2610    |cur_chr| above, but not now. We can't expand, and have to use |get_r_token| or so. I don't need
2611    it anyway.
2612
2613    \starttyping
2614    get_r_token();
2615    if (cur_cmd == call_cmd) {
2616        nt = cur_cs;
2617        target = equiv(nt);
2618    } else {
2619        // some error message
2620    }
2621    \stoptyping
2622*/
2623
2624# define immediate_permitted(loc,target) ((eq_level(loc) == cur_level) && (get_token_reference(target) == 0))
2625
2626typedef enum combine_operations { 
2627    combine_assign,
2628    combine_append,
2629    combine_prepend,
2630} combine_operations;
2631
2632void tex_run_combine_the_toks(void)
2633{
2634    halfword source = null;
2635    halfword target = null;
2636    halfword append, expand, global;
2637    halfword nt, ns;
2638    singleword cmd;
2639    /* */
2640    switch (cur_chr) {
2641        case expanded_toks_code:                append = combine_assign;  global = 0; expand = 1; break;
2642        case append_toks_code:                  append = combine_append;  global = 0; expand = 0; break;
2643        case append_expanded_toks_code:         append = combine_append;  global = 0; expand = 1; break;
2644        case prepend_toks_code:                 append = combine_prepend; global = 0; expand = 0; break;
2645        case prepend_expanded_toks_code:        append = combine_prepend; global = 0; expand = 1; break;
2646        case global_expanded_toks_code:         append = combine_assign;  global = 1; expand = 1; break;
2647        case global_append_toks_code:           append = combine_append;  global = 1; expand = 0; break;
2648        case global_append_expanded_toks_code:  append = combine_append;  global = 1; expand = 1; break;
2649        case global_prepend_toks_code:          append = combine_prepend; global = 1; expand = 0; break;
2650        case global_prepend_expanded_toks_code: append = combine_prepend; global = 1; expand = 1; break;
2651        default:                                append = combine_assign;  global = 0; expand = 0; break;
2652    }
2653    /*tex The target. */
2654    tex_get_x_token();
2655    if (cur_cmd == register_toks_cmd || cur_cmd == internal_toks_cmd) {
2656        nt = eq_value(cur_cs);
2657        cmd = (singleword) cur_cmd;
2658    } else {
2659        /*tex Maybe a number. */
2660        tex_back_input(cur_tok);
2661        nt = register_toks_location(tex_scan_toks_register_number());
2662        cmd = register_toks_cmd;
2663    }
2664    target = eq_value(nt);
2665    /*tex The source. */
2666    do {
2667        tex_get_x_token();
2668    } while (cur_cmd == spacer_cmd);
2669    if (cur_cmd == left_brace_cmd) {
2670        source = expand ? tex_scan_toks_expand(1, NULL, 0, 0) : tex_scan_toks_normal(1, NULL);
2671        /*tex The action. */
2672        if (source) {
2673            if (target) {
2674                halfword s = token_link(source);
2675                if (s) {
2676                    halfword t = token_link(target);
2677                    if (! t) {
2678                        /*tex Can this happen? */
2679                        set_token_link(target, s);
2680                        token_link(source) = null;
2681                    } else {
2682                        switch (append) {
2683                            case combine_assign:
2684                                goto ASSIGN_1;
2685                            case 1:
2686                                /*append */
2687                                if (immediate_permitted(nt,target)) {
2688                                    halfword p = t;
2689                                    while (token_link(p)) {
2690                                        p = token_link(p);
2691                                    }
2692                                    token_link(p) = s;
2693                                    token_link(source) = null;
2694                                } else {
2695                                    tex_aux_append_copied_toks_list(nt, cmd, global, t, s, NULL);
2696                                }
2697                                break;
2698                            case 2:
2699                                /* prepend */
2700                                if (immediate_permitted(nt,target)) {
2701                                    halfword p = s;
2702                                    while (token_link(p)) {
2703                                        p = token_link(p);
2704                                    }
2705                                    token_link(source) = null;
2706                                    set_token_link(p, t);
2707                                    set_token_link(target, s);
2708                                } else {
2709                                    tex_aux_append_copied_toks_list(nt, cmd, global, s, t, NULL);
2710                                }
2711                                break;
2712                        }
2713                    }
2714                }
2715            } else {
2716                ASSIGN_1:
2717                tex_aux_set_toks_register(nt, cmd, token_link(source), global);
2718                token_link(source) = null;
2719            }
2720            tex_flush_token_list(source);
2721        }
2722    } else {
2723        /* cf luatex we don't handle expand here */
2724        if (cur_cmd == register_toks_cmd) {
2725            ns = register_toks_number(eq_value(cur_cs));
2726        } else if (cur_cmd == internal_toks_cmd) {
2727            ns = internal_toks_number(eq_value(cur_cs));
2728        } else {
2729            ns = tex_scan_toks_register_number();
2730        }
2731        /*tex The action. */
2732        source = toks_register(ns);
2733        if (source) {
2734            if (target) {
2735                if (expand) { 
2736                    halfword defref = lmt_input_state.def_ref;
2737                    tex_back_input(right_brace_token + '}');
2738                    tex_begin_token_list(source, token_text);
2739                    source = tex_scan_toks_expand(1, NULL, 0, 1);
2740                    lmt_input_state.def_ref = defref;
2741                    switch (append) {
2742                        case combine_assign:
2743                            eq_value(nt) = source;
2744                            break;
2745                        case combine_append:
2746                            if (immediate_permitted(nt, target)) {
2747                                halfword p = tex_tail_of_token_list(token_link(target));
2748                                token_link(p) = token_link(source);
2749                            } else {
2750                                halfword tail; 
2751                                tex_aux_append_copied_toks_list(nt, cmd, global, target, null, &tail);
2752                                token_link(tail) = token_link(source);
2753                            }
2754                            tex_put_available_token(source);
2755                            break;
2756                        case combine_prepend:
2757                            if (immediate_permitted(nt, target)) {
2758                                halfword p = tex_tail_of_token_list(token_link(source));
2759                                token_link(p) = token_link(target);
2760                                token_link(target) = token_link(source);
2761                            } else {
2762                                halfword head = tex_aux_append_copied_toks_list(nt, cmd, global, target, null, NULL);
2763                                halfword tail = tex_tail_of_token_list(token_link(source));
2764                                token_link(tail) = token_link(head);
2765                                token_link(head) = token_link(source);
2766                            }
2767                            tex_put_available_token(source);
2768                            break;
2769                    }
2770                } else { 
2771                    halfword t = token_link(target);
2772                    halfword s = token_link(source);
2773                    switch (append) {
2774                        case combine_assign:
2775                            tex_add_token_reference(source);
2776                            eq_value(nt) = source;
2777                            break;
2778                        case combine_append:
2779                            if (immediate_permitted(nt, target)) {
2780                                halfword p = tex_tail_of_token_list(t);
2781                                while (s) {
2782                                    p = tex_store_new_token(p, token_info(s));
2783                                    s = token_link(s);
2784                                }
2785                            } else {
2786                                tex_aux_append_copied_toks_list(nt, cmd, global, t, s, NULL);
2787                            }
2788                            break;
2789                        case combine_prepend:
2790                            if (immediate_permitted(nt, target)) {
2791                                halfword h = null;
2792                                halfword p = null;
2793                                while (s) {
2794                                    p = tex_store_new_token(p, token_info(s));
2795                                    if (! h) {
2796                                        h = p;
2797                                    }
2798                                    s = token_link(s);
2799                                }
2800                                set_token_link(p, t);
2801                                set_token_link(target, h);
2802                            } else {
2803                                tex_aux_append_copied_toks_list(nt, cmd, global, s, t, NULL);
2804                            }
2805                            break;
2806                    }
2807                }
2808            } else if (expand) { 
2809                halfword defref = lmt_input_state.def_ref;
2810                tex_back_input(right_brace_token + '}');
2811                tex_begin_token_list(source, token_text);
2812                source = tex_scan_toks_expand(1, NULL, 0, 1);
2813                eq_value(nt) = source;
2814                lmt_input_state.def_ref = defref;
2815            } else {
2816                // set_toks_register(nt, source, global);
2817                tex_add_token_reference(source);
2818                eq_value(nt) = source;
2819            }
2820        }
2821    }
2822}
2823
2824/*tex
2825
2826    This routine, used in the next one, prints the job name, possibly modified by the
2827    |process_jobname| callback.
2828
2829*/
2830
2831static void tex_aux_print_job_name(void)
2832{
2833    if (lmt_fileio_state.job_name) {
2834        /*tex \CCODE\ strings for jobname before and after processing. */
2835        char *s = lmt_fileio_state.job_name;
2836        int callback_id = lmt_callback_defined(process_jobname_callback);
2837        if (callback_id > 0) {
2838            char *ss;
2839            int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->S", s, &ss);
2840            if (lua_retval && ss) {
2841                s = ss;
2842            }
2843        }
2844        tex_print_str(s);
2845    }
2846}
2847
2848/*tex
2849
2850    The procedure |run_convert_tokens| uses |str_toks| to insert the token list for |convert|
2851    functions into the scanner; |\outer| control sequences are allowed to follow |\string| and
2852    |\meaning|.
2853
2854*/
2855
2856/*tex Codes not really needed but cleaner when testing */
2857
2858# define push_selector { \
2859    saved_selector = lmt_print_state.selector; \
2860    lmt_print_state.selector = new_string_selector_code; \
2861}
2862
2863# define pop_selector { \
2864    lmt_print_state.selector = saved_selector; \
2865}
2866
2867void tex_run_convert_tokens(halfword code)
2868{
2869    /*tex Scan the argument for command |c|. */
2870    switch (code) {
2871        /*tex
2872            The |number_code| is quite popular. Beware, when used with a lua none function, a zero
2873            is injected. We could intercept it at the cost of messy code, but on the other hand,
2874            nothing guarantees that the call returns a number so this side effect can be defended
2875            as a recovery measure.
2876        */
2877        case number_code:
2878            {
2879                int saved_selector;
2880                halfword v = tex_scan_integer(0, NULL);
2881                push_selector;
2882                tex_print_int(v);
2883                pop_selector;
2884                break;
2885            }
2886        case to_integer_code:
2887        case to_hexadecimal_code:
2888            {
2889                int saved_selector;
2890                halfword v = tex_scan_integer(0, NULL);
2891                tex_get_x_token(); /* maybe not x here */
2892                if (cur_cmd != relax_cmd) {
2893                    tex_back_input(cur_tok);
2894                }
2895                push_selector;
2896                if (code == to_integer_code) {
2897                    tex_print_int(v);
2898                } else { 
2899                    tex_print_hex(v);
2900                }
2901                pop_selector;
2902                break;
2903            }
2904        case to_scaled_code:
2905        case to_sparse_scaled_code:
2906        case to_dimension_code:
2907        case to_sparse_dimension_code:
2908            {
2909                int saved_selector;
2910                halfword v = tex_scan_dimension(0, 0, 0, 0, NULL);
2911                tex_get_x_token(); /* maybe not x here */
2912                if (cur_cmd != relax_cmd) {
2913                    tex_back_input(cur_tok);
2914                }
2915                push_selector;
2916                switch (code) {
2917                    case to_sparse_dimension_code:
2918                    case to_sparse_scaled_code:
2919                        tex_print_sparse_dimension(v, no_unit);
2920                        break;
2921                    default:
2922                        tex_print_dimension(v, no_unit);
2923                        break;
2924                }
2925                switch (code) {
2926                    case to_dimension_code:
2927                    case to_sparse_dimension_code:
2928                        tex_print_unit(pt_unit);
2929                        break;
2930                }
2931                pop_selector;
2932                break;
2933            }
2934        case to_mathstyle_code:
2935            {
2936                int saved_selector;
2937                halfword v = tex_scan_math_style_identifier(1, 0);
2938                push_selector;
2939                tex_print_int(v);
2940                pop_selector;
2941                break;
2942            }
2943        case lua_function_code:
2944            {
2945             /* We can use:  tex_aux_lua_call(convert_cmd, v); */
2946                halfword v = tex_scan_integer(0, NULL);
2947                if (v > 0) {
2948                    strnumber u = tex_save_cur_string();
2949                    lmt_token_state.luacstrings = 0;
2950                    lmt_function_call(v, 0);
2951                    tex_restore_cur_string(u);
2952                    if (lmt_token_state.luacstrings > 0) {
2953                        tex_lua_string_start();
2954                    }
2955                } else {
2956                    tex_normal_error("luafunction", "invalid number");
2957                }
2958                return;
2959            }
2960        case lua_bytecode_code:
2961            {
2962                halfword v = tex_scan_integer(0, NULL);
2963                if (v < 0 || v > 65535) {
2964                    tex_normal_error("luabytecode", "invalid number");
2965                } else {
2966                    strnumber u = tex_save_cur_string();
2967                    lmt_token_state.luacstrings = 0;
2968                    lmt_bytecode_call(v);
2969                    tex_restore_cur_string(u);
2970                    if (lmt_token_state.luacstrings > 0) {
2971                        tex_lua_string_start();
2972                    }
2973                }
2974                return;
2975            }
2976        case lua_code:
2977            {
2978                full_scanner_status saved_full_status = tex_save_full_scanner_status();
2979                strnumber u = tex_save_cur_string();
2980                halfword s = tex_scan_toks_expand(0, NULL, 0, 1); // maybe expandconstant 
2981                tex_unsave_full_scanner_status(saved_full_status);
2982                lmt_token_state.luacstrings = 0;
2983                lmt_token_call(s);
2984                tex_delete_token_reference(s); /* boils down to flush_list */
2985                tex_restore_cur_string(u);
2986                if (lmt_token_state.luacstrings > 0) {
2987                    tex_lua_string_start();
2988                }
2989                /*tex No further action. */
2990                return;
2991            }
2992        case expanded_code:
2993        case semi_expanded_code:
2994            {
2995                full_scanner_status saved_full_status = tex_save_full_scanner_status();
2996                strnumber u = tex_save_cur_string();
2997                halfword s = tex_scan_toks_expand(0, NULL, code == semi_expanded_code, 0);
2998                tex_unsave_full_scanner_status(saved_full_status);
2999                if (token_link(s)) {
3000                    tex_begin_inserted_list(token_link(s));
3001                    token_link(s) = null;
3002                }
3003                tex_put_available_token(s);
3004                tex_restore_cur_string(u);
3005                /*tex No further action. */
3006                return;
3007            }
3008        /*tex
3009            This one makes no sense because |\expandaftercs\foo{{#1}}| vs |\expanded{\foo{#1}}| 
3010            runs in a ratio of 2.2:1.5 due to {#1} being three input levels. (Keep as example of 
3011            a rejected feature.) 
3012        */ /*
3013        case expanded_after_cs_code:
3014            {
3015                halfword token = tex_get_token();
3016                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3017                strnumber u = tex_save_cur_string();
3018                halfword s = tex_scan_toks_expand(0, NULL, 0);
3019                tex_unsave_full_scanner_status(saved_full_status);
3020                token_info(s) = token;
3021                tex_begin_inserted_list(s);
3022                tex_restore_cur_string(u);
3023                return;
3024            } 
3025        */ 
3026     /* case immediate_assignment_code: */
3027     /* case immediate_assigned_code:   */
3028        /*tex
3029             These two were an on-the-road-to-bachotex brain-wave. A first variant did more in
3030             sequence till a relax or spacer was seen. These commands permits for instance setting
3031             counters in full expansion. However, as we have the more powerful local control
3032             mechanisms available these two commands have been dropped in \LUAMETATEX. Performance
3033             wise there is not that much to gain from |\immediateassigned| and it's even somewhat
3034             limited. So, they're gone now. Actually, one can also use the local control feature in
3035             an |\edef|, which {\em is} rather efficient, so we're good anyway. The upgraded code
3036             can be found in the archive.
3037        */
3038        case string_code:
3039        case cs_string_code:
3040        case cs_active_code:
3041            {
3042                int saved_selector;
3043                int saved_scanner_status = lmt_input_state.scanner_status;
3044                lmt_input_state.scanner_status = scanner_is_normal;
3045                tex_get_token();
3046                lmt_input_state.scanner_status = saved_scanner_status;
3047                push_selector;
3048                if (code == cs_active_code) {
3049                    tex_print_str(active_character_namespace);
3050                }
3051                if (cur_cs) {
3052                    if (code == cs_string_code) {
3053                        tex_print_cs_name(cur_cs);
3054                    } else { 
3055                        tex_print_cs(cur_cs);
3056                    }
3057                } else {
3058                    tex_print_tex_str(cur_chr);
3059                }
3060                pop_selector;
3061                break;
3062            }
3063//        case string_code:
3064//            {
3065//                int saved_selector;
3066//                int saved_scanner_status = lmt_input_state.scanner_status;
3067//                lmt_input_state.scanner_status = scanner_is_normal;
3068//                tex_get_token();
3069//                lmt_input_state.scanner_status = saved_scanner_status;
3070//                push_selector;
3071//                if (cur_cs) {
3072//                    tex_print_cs(cur_cs);
3073//                } else {
3074//                    tex_print_tex_str(cur_chr);
3075//                }
3076//                pop_selector;
3077//                break;
3078//            }
3079//        case cs_string_code:
3080//            {
3081//                int saved_selector;
3082//                int saved_scanner_status = lmt_input_state.scanner_status;
3083//                lmt_input_state.scanner_status = scanner_is_normal;
3084//                tex_get_token();
3085//                lmt_input_state.scanner_status = saved_scanner_status;
3086//                push_selector;
3087//                if (cur_cs) {
3088//                    tex_print_cs_name(cur_cs);
3089//                } else {
3090//                    tex_print_tex_str(cur_chr);
3091//                }
3092//                pop_selector;
3093//                break;
3094//            }
3095//        case cs_active_code:
3096//            {
3097//                /*tex 
3098//                    We cannot pick up the token and see what character it is because it will be
3099//                    replaced by its meaning.
3100//                */
3101//                int saved_selector;
3102//                int saved_scanner_status = lmt_input_state.scanner_status;
3103//                lmt_input_state.scanner_status = scanner_is_normal;
3104//                tex_get_token();
3105//                lmt_input_state.scanner_status = saved_scanner_status;
3106//                push_selector;
3107//                tex_print_str(active_character_namespace);
3108//                if (cur_cs) {
3109//                    tex_print_cs(cur_cs);
3110//                } else {
3111//                    tex_print_tex_str(cur_chr);
3112//                }
3113//                pop_selector;
3114//                break;
3115//            }
3116        /*
3117        case cs_lastname_code:
3118            if (lmt_scanner_state.last_cs_name != null_cs) {
3119                int saved_selector;
3120                push_selector;
3121                tex_print_cs_name(lmt_scanner_state.last_cs_name);
3122                pop_selector;
3123            }
3124            break;
3125        */
3126        case detokenized_code:
3127            /*tex Sort of like |\meaningles| but without the explanationary text. */
3128            {
3129                int saved_selector;
3130                int saved_scanner_status = lmt_input_state.scanner_status;
3131                halfword t = null; 
3132                lmt_input_state.scanner_status = scanner_is_normal;
3133                tex_get_token();
3134                lmt_input_state.scanner_status = saved_scanner_status;
3135                t = tex_get_available_token(cur_tok);
3136                push_selector;
3137                tex_show_token_list(t, 0, 0);
3138                tex_put_available_token(t);
3139                pop_selector;
3140                break;
3141            }
3142        case detokened_code:
3143            /*tex Takes a control sequence or token list. Probably a bad name but so be it. */
3144            {
3145                int saved_selector;
3146                int saved_scanner_status = lmt_input_state.scanner_status;
3147                halfword list = null;
3148                lmt_input_state.scanner_status = scanner_is_normal;
3149                tex_get_token();
3150                lmt_input_state.scanner_status = saved_scanner_status;
3151                switch (cur_cmd) {
3152                    case call_cmd:                        
3153                    case protected_call_cmd:              
3154                    case semi_protected_call_cmd:
3155                    case constant_call_cmd:
3156                    case tolerant_call_cmd:               
3157                    case tolerant_protected_call_cmd:     
3158                    case tolerant_semi_protected_call_cmd:
3159                       if (! get_token_preamble(cur_chr)) {
3160                           /* We only serialize macros with no arguments. */
3161                           list = token_link(cur_chr);
3162                           break;
3163                       } else {
3164                           goto WHATEVER;
3165                       }
3166                    case internal_toks_cmd:
3167                    case register_toks_cmd:
3168                        list = token_link(eq_value(cur_chr));
3169                        break;
3170                    case register_cmd:
3171                        if (cur_chr == token_val_level) {
3172                            halfword n = tex_scan_toks_register_number();
3173                            list = token_link(toks_register(n));
3174                            break;
3175                        } else { 
3176                            goto WHATEVER;
3177                        }
3178                        break;
3179                    default:             
3180                      WHATEVER:
3181                        {
3182                            halfword t = tex_get_available_token(cur_tok);
3183                            push_selector;
3184                            tex_show_token_list(t, 0, 0);
3185                            pop_selector;
3186                            tex_put_available_token(t);
3187                        }
3188                        break;
3189                }
3190                if (list) {
3191                    push_selector;
3192                    tex_show_token_list(list, 2, 0);
3193                    pop_selector;
3194                }
3195                break;
3196            }
3197        case roman_numeral_code:
3198            {
3199                int saved_selector;
3200                halfword v = tex_scan_integer(0, NULL);
3201                push_selector;
3202                tex_print_roman_int(v);
3203                pop_selector;
3204                break;
3205            }
3206        case meaning_code:
3207        case meaning_full_code:
3208        case meaning_less_code:
3209        case meaning_ful_code:
3210        case meaning_les_code:
3211        case meaning_asis_code:
3212            {
3213                int saved_selector;
3214                int saved_scanner_status = lmt_input_state.scanner_status;
3215                lmt_input_state.scanner_status = scanner_is_normal;
3216                tex_get_token();
3217                lmt_input_state.scanner_status = saved_scanner_status;
3218                push_selector;
3219                tex_print_meaning(code);
3220                pop_selector;
3221                break;
3222            }
3223        case to_character_code:
3224            {
3225                int saved_selector;
3226                int chr = tex_scan_char_number(0);
3227                push_selector;
3228                tex_print_tex_str(chr);
3229                pop_selector;
3230                break;
3231            }
3232        case lua_escape_string_code:
3233     /* case lua_token_string_code: */ /* for now rejected: could also be keyword */
3234            {
3235                /* tex 
3236                    If I would need it I could probably add support for catcode tables and verbose 
3237                    serialization. Maybe we can use some of the other (more efficient) helpers when
3238                    we have a detokenize variant. We make sure that the escape character is a 
3239                    backslash because these conversions can occur anywhere and are very much 
3240                    related to \LUA\ calls. (Maybe it makes sense to pass it a argument to the
3241                    serializer.) 
3242
3243                    A |\luatokenstring| primitive doesn't really make sense because \LUATEX\ lacks
3244                    it and |\luaescapestring| is a compatibility primitive.
3245                */
3246                lstring str;
3247                int length = 0;
3248             /* int saved_in_lua_escape = lmt_token_state.in_lua_escape; */
3249                halfword saved_escape_char = escape_char_par; 
3250                full_scanner_status saved_full_status = tex_save_full_scanner_status();
3251                halfword result = tex_scan_toks_expand(0, NULL, 0, 0); 
3252             /* halfword result = tex_scan_toks_expand(0, NULL, code == lua_token_string_code); */
3253             /* lmt_token_state.in_lua_escape = 1; */
3254                escape_char_par = '\\';
3255                str.s = (unsigned char *) tex_tokenlist_to_tstring(result, 0, &length, 0, 0, 0, 0, 1); /* sinmgle hashes */
3256                str.l = (unsigned) length;
3257             /* lmt_token_state.in_lua_escape = saved_in_lua_escape; */
3258                escape_char_par = saved_escape_char; 
3259                tex_delete_token_reference(result); /* boils down to flush_list */
3260                tex_unsave_full_scanner_status(saved_full_status);
3261                if (str.l) {
3262                    result = lmt_str_toks(str);
3263                    tex_begin_inserted_list(result);
3264                }
3265                return;
3266            }
3267        case font_name_code:
3268            {
3269                int saved_selector;
3270                halfword fnt = tex_scan_font_identifier(NULL);
3271                push_selector;
3272                tex_print_font(fnt);
3273                pop_selector;
3274                break;
3275            }
3276        case font_specification_code:
3277            {
3278                int saved_selector;
3279                halfword fnt = tex_scan_font_identifier(NULL);
3280                push_selector;
3281                tex_append_string((const unsigned char *) font_original(fnt), (unsigned) strlen(font_original(fnt)));
3282                pop_selector;
3283                break;
3284            }
3285        case job_name_code:
3286            {
3287                int saved_selector;
3288                if (! lmt_fileio_state.job_name) {
3289                    tex_open_log_file();
3290                }
3291                push_selector;
3292                tex_aux_print_job_name();
3293                pop_selector;
3294                break;
3295            }
3296        case format_name_code:
3297            {
3298                int saved_selector;
3299                if (! lmt_fileio_state.job_name) {
3300                    tex_open_log_file();
3301                }
3302                push_selector;
3303                tex_print_str(lmt_engine_state.dump_name);
3304                pop_selector;
3305                break;
3306            }
3307        case luatex_banner_code:
3308            {
3309                int saved_selector;
3310                push_selector;
3311                tex_print_str(lmt_engine_state.luatex_banner);
3312                pop_selector;
3313                break;
3314            }
3315        default:
3316            tex_confusion("convert tokens");
3317            break;
3318    }
3319    {
3320        halfword head = tex_cur_str_toks(NULL);
3321        tex_begin_inserted_list(head);
3322    }
3323}
3324
3325/*tex
3326    The boolean |in_lua_escape| is keeping track of the lua string escape state.
3327*/
3328
3329strnumber tex_the_convert_string(halfword c, int i)
3330{
3331    int saved_selector = lmt_print_state.selector;
3332    strnumber ret = 0;
3333    int done = 1 ;
3334    lmt_print_state.selector = new_string_selector_code;
3335    switch (c) {
3336        case number_code:
3337        case to_integer_code:
3338            tex_print_int(i);
3339            break;
3340        case to_hexadecimal_code:
3341            tex_print_hex(i);
3342            break;
3343        case to_scaled_code:
3344            tex_print_dimension(i, no_unit);
3345            break;
3346        case to_sparse_scaled_code:
3347            tex_print_sparse_dimension(i, no_unit);
3348            break;
3349        case to_dimension_code:
3350            tex_print_dimension(i, pt_unit);
3351            break;
3352        case to_sparse_dimension_code:
3353            tex_print_sparse_dimension(i, pt_unit);
3354            break;
3355        case roman_numeral_code:
3356            tex_print_roman_int(i);
3357            break;
3358        case to_character_code:
3359            tex_print_tex_str(i);
3360            break;
3361        case font_name_code:
3362            tex_print_font(i);
3363            break;
3364        case font_specification_code:
3365            tex_print_str(font_original(i));
3366            break;
3367        case job_name_code:
3368            tex_aux_print_job_name();
3369            break;
3370        case format_name_code:
3371            tex_print_str(lmt_engine_state.dump_name);
3372            break;
3373        case luatex_banner_code:
3374            tex_print_str(lmt_engine_state.luatex_banner);
3375            break;
3376        case font_identifier_code:
3377            tex_print_font_identifier(i);
3378            break;
3379        default:
3380            done = 0;
3381            break;
3382    }
3383    if (done) {
3384        ret = tex_make_string();
3385    }
3386    lmt_print_state.selector = saved_selector;
3387    return ret;
3388}
3389
3390/*tex Return a string from tokens list: */
3391
3392strnumber tex_tokens_to_string(halfword p)
3393{
3394    if (lmt_print_state.selector == new_string_selector_code) {
3395        tex_normal_error("tokens", "tokens_to_string() called while selector = new_string");
3396        return get_nullstr();
3397    } else {
3398        int saved_selector = lmt_print_state.selector;
3399        lmt_print_state.selector = new_string_selector_code;
3400        tex_token_show(p);
3401        lmt_print_state.selector = saved_selector;
3402        return tex_make_string();
3403    }
3404}
3405
3406/*tex
3407
3408    The actual token conversion in this function is now functionally equivalent to |show_token_list|,
3409    except that it always prints the whole token list. Often the result is not that large, for
3410    instance |\directlua| is seldom large. However, this converter is also used for patterns
3411    and exceptions where size is mnore an issue. For that reason we used to have three variants,
3412    one of which (experimentally) used a buffer. At some point, in the manual we were talking of
3413    millions of allocations but times have changed.
3414
3415    Macros were used to inline the appending code (in the thre variants), but in the end I decided
3416    to just merge all into one function, with a bit more overhead because we need to optionally
3417    skip a macro preamble.
3418
3419    Values like 512 and 128 also work ok. There is not much to gain in optimization here. We used
3420    to have 3 mostly overlapping functions, one of which used a buffer. We can probably use a
3421    larger default buffer size and larger step and only free when we think it's too large.
3422
3423*/
3424
3425# define default_buffer_size  512 /*tex This used to be 256 */
3426# define default_buffer_step 4096 /*tex When we're larger, we always are much larger. */
3427
3428// todo: check ret
3429
3430static void tex_aux_make_room_in_buffer(int a)
3431{
3432    if (lmt_token_state.bufloc + a + 1 > lmt_token_state.bufmax) {
3433        char *tmp = aux_reallocate_array(lmt_token_state.buffer, sizeof(unsigned char), lmt_token_state.bufmax + default_buffer_step, 1);
3434        if (tmp) {
3435            lmt_token_state.bufmax += default_buffer_step;
3436        } else {
3437            // error
3438        }
3439        lmt_token_state.buffer = tmp;
3440    }
3441}
3442
3443static void tex_aux_append_uchar_to_buffer(int s)
3444{
3445    tex_aux_make_room_in_buffer(4);
3446    if (s <= 0x7F) {
3447        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (s);
3448    } else if (s <= 0x7FF) {
3449        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xC0 + (s / 0x40));
3450        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (s % 0x40));
3451    } else if (s <= 0xFFFF) {
3452        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xE0 +  (s / 0x1000));
3453        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) / 0x40));
3454        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) % 0x40));
3455    } else {
3456        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xF0 +   (s / 0x40000));
3457        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 +  ((s % 0x40000) / 0x1000));
3458        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) / 0x40));
3459        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) % 0x40));
3460    }
3461}
3462
3463static void tex_aux_append_char_to_buffer(int c)
3464{
3465    tex_aux_make_room_in_buffer(1);
3466    lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (c);
3467}
3468
3469/*tex Only errors and unknowns. */
3470
3471static void tex_aux_append_str_to_buffer(const char *s)
3472{
3473    const char *v = s;
3474    tex_aux_make_room_in_buffer((int) strlen(v));
3475    /*tex Using memcpy will inline and give a larger binary ... and we seldom need this. */
3476    while (*v) {
3477        lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (*v);
3478        v++;
3479    }
3480}
3481
3482/*tex Only bogus csnames. */
3483
3484static void tex_aux_append_esc_to_buffer(const char *s)
3485{
3486    int e = escape_char_par;
3487    if (e > 0 && e < cs_offset_value) {
3488        tex_aux_append_uchar_to_buffer(e);
3489    }
3490    tex_aux_append_str_to_buffer(s);
3491}
3492
3493# define is_cat_letter(a)  (tex_aux_the_cat_code(aux_str2uni(str_string((a)))) == letter_cmd)
3494
3495/* make two versions: macro and not */
3496
3497char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamble, int nospace, int strip, int wipe, int single)
3498{
3499    if (pp) {
3500        /*tex We need to go beyond the reference. */
3501        int p = token_link(pp);
3502        if (p) {
3503            int e = escape_char_par;  /*tex The serialization of the escape, normally a backlash. */
3504            int n = 0;                /*tex The character after |#|, so |#0| upto |#9| */
3505            int min = 0;
3506            int max = lmt_token_memory_state.tokens_data.top;
3507            int skip = 0;
3508            int tail = p; 
3509            int count = 0;
3510            if (lmt_token_state.bufmax > default_buffer_size) {
3511                /* Let's start fresh and small. */
3512                aux_deallocate_array(lmt_token_state.buffer);
3513                lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1);
3514                lmt_token_state.bufmax = default_buffer_size;
3515            } else if (! lmt_token_state.buffer) {
3516                /* Let's start. */
3517                lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1);
3518                lmt_token_state.bufmax = default_buffer_size;
3519            }
3520            lmt_token_state.bufloc = 0;
3521            if (skippreamble == 1) {
3522                skip = get_token_preamble(pp);
3523            }
3524            while (p) {
3525                if (p < min || p > max) {
3526                    tex_aux_append_str_to_buffer(error_string_clobbered(31));
3527                    break;
3528                } else {
3529                    int info = token_info(p);
3530                    if (info < 0) {
3531                        /*tex Unlikely, will go after checking (maybe \LUA\ user mess up). */
3532                        tex_aux_append_str_to_buffer(error_string_bad(32));
3533                    } else if (info < cs_token_flag) {
3534                        /*tex We nearly always end up here because otherwise we have an error. */
3535                        int cmd = token_cmd(info);
3536                        int chr = token_chr(info);
3537                        switch (cmd) {
3538                            case left_brace_cmd:
3539                            case right_brace_cmd:
3540                            case math_shift_cmd:
3541                            case alignment_tab_cmd:
3542                            case superscript_cmd:
3543                            case subscript_cmd:
3544                            case spacer_cmd:
3545                            case letter_cmd:
3546                            case other_char_cmd:
3547                            case active_char_cmd:
3548                                if (! skip) {
3549                                    tex_aux_append_uchar_to_buffer(chr);
3550                                }
3551                                break;
3552                            case parameter_cmd:
3553                                if (! skip) {
3554                                 /* if (! single && ! nospace && (! lmt_token_state.in_lua_escape && (lmt_expand_state.cs_name_level == 0))) { */
3555                                    if (! single && ! nospace && lmt_expand_state.cs_name_level == 0) {
3556                                        tex_aux_append_uchar_to_buffer(chr);
3557                                    }
3558                                    tex_aux_append_uchar_to_buffer(chr);
3559                                }
3560                                break;
3561                            case parameter_reference_cmd:
3562                                if (! skip) {
3563                                    tex_aux_append_char_to_buffer(match_visualizer);
3564                                    if (chr <= 9) {
3565                                        tex_aux_append_char_to_buffer(chr + '0');
3566                                    } else if (chr <= max_match_count) {
3567                                        tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
3568                                    } else {
3569                                        tex_aux_append_char_to_buffer('!'); 
3570                                        goto EXIT;
3571                                    }
3572                                } else {
3573                                    if (chr > max_match_count) {
3574                                        goto EXIT;
3575                                    }
3576                                }
3577                                break;
3578                            case match_cmd:
3579                                if (! skip) {
3580                                    tex_aux_append_char_to_buffer(match_visualizer);
3581                                }
3582                                if (is_valid_match_ref(chr)) {
3583                                    ++n;
3584                                }
3585                                if (! skip) {
3586                                    tex_aux_append_char_to_buffer(chr ? chr : '0');
3587                                 // if (chr <= 9) {
3588                                 //     tex_aux_append_char_to_buffer(chr + '0');
3589                                 // } else if (chr <= max_match_count) {
3590                                 //     tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
3591                                 // }
3592                                }
3593                                if (n > max_match_count) {
3594                                    goto EXIT;
3595                                }
3596                                break;
3597                            case end_match_cmd:
3598                                if (skippreamble == 2) {
3599                                    goto EXIT;
3600                                } else if (chr == 0) {
3601                                    if (! skip) {
3602                                        tex_aux_append_char_to_buffer('-');
3603                                        tex_aux_append_char_to_buffer('>');
3604                                    }
3605                                    skip = 0 ;
3606                                }
3607                                break;
3608                            case end_paragraph_cmd:
3609                                if (! inhibit_par && (auto_paragraph_mode(auto_paragraph_text))) {
3610                                    tex_aux_append_esc_to_buffer("par");
3611                                }
3612                                break;
3613                            case deep_frozen_keep_constant_cmd:
3614                                if (! skip) {
3615                                    halfword h = token_link(chr);
3616                                    while (h) {
3617                                        tex_aux_append_uchar_to_buffer(token_chr(token_info(h)));
3618                                        h = token_link(h);
3619                                    }
3620                                }
3621                                break;
3622                            default:
3623                                tex_aux_append_str_to_buffer(tex_aux_special_cmd_string(cmd, chr, error_string_bad(33)));
3624                                break;
3625                        }
3626                    } else if (! (inhibit_par && info == lmt_token_state.par_token)) {
3627                        int q = info - cs_token_flag;
3628                        if (q < hash_base) {
3629                            if (q == null_cs) {
3630                                tex_aux_append_esc_to_buffer("csname");
3631                                tex_aux_append_esc_to_buffer("endcsname");
3632                            } else {
3633                                tex_aux_append_str_to_buffer(error_string_impossible(34));
3634                            }
3635                        } else if (eqtb_out_of_range(q)) {
3636                            tex_aux_append_str_to_buffer(error_string_impossible(35));
3637                        } else {
3638                            strnumber txt = cs_text(q);
3639                            if (txt  < 0 || txt  >= lmt_string_pool_state.string_pool_data.ptr) {
3640                                tex_aux_append_str_to_buffer(error_string_nonexistent(36));
3641                            } else {
3642                                int allocated = 0;
3643                                char *sh = tex_makecstring(txt, &allocated);
3644                                char *s = sh;
3645                                if (tex_is_active_cs(txt)) {
3646                                    s = s + 3;
3647                                    while (*s) {
3648                                        tex_aux_append_char_to_buffer(*s);
3649                                        s++;
3650                                    }
3651                                } else {
3652                                    if (e >= 0) {
3653                                        tex_aux_append_uchar_to_buffer(e);
3654                                    }
3655                                    while (*s) {
3656                                        tex_aux_append_char_to_buffer(*s);
3657                                        s++;
3658                                    }
3659                                    if ((! nospace) && ((! tex_single_letter(txt)) || is_cat_letter(txt))) {
3660                                        tex_aux_append_char_to_buffer(' ');
3661                                    }
3662                                }
3663                                if (allocated) {
3664                                    lmt_memory_free(sh);    
3665                                }
3666                            }
3667                        }
3668                    }
3669                    tail = p; 
3670                    ++count;
3671                    p = token_link(p);
3672                }
3673            }
3674          EXIT:
3675            if (strip && lmt_token_state.bufloc > 1) { 
3676                if (lmt_token_state.buffer[lmt_token_state.bufloc-1] == strip) {
3677                    lmt_token_state.bufloc -= 1;
3678                }
3679                if (lmt_token_state.bufloc > 1 && lmt_token_state.buffer[0] == strip) {
3680                    memcpy(&lmt_token_state.buffer[0], &lmt_token_state.buffer[1], lmt_token_state.bufloc-1);
3681                    lmt_token_state.bufloc -= 1;
3682                }
3683            }
3684            lmt_token_state.buffer[lmt_token_state.bufloc] = '\0';
3685            if (siz) {
3686                *siz = lmt_token_state.bufloc;
3687            }
3688            if (wipe) { 
3689                tex_flush_token_list_head_tail(pp, tail, count);
3690            }
3691            return lmt_token_state.buffer;
3692        } else { 
3693            if (wipe) {
3694                 tex_put_available_token(pp);
3695            }
3696        }
3697    }
3698    if (siz) {
3699        *siz = 0;
3700    }
3701    return NULL;
3702}
3703
3704/*tex
3705
3706    The \LUA\ interface needs some extra functions. The functions themselves are quite boring, but
3707    they are handy because otherwise this internal stuff has to be accessed from \CCODE\ directly,
3708    where lots of the defines are not available.
3709
3710    It doesn't make sense to listen to |\globaldefs| here, so that feature has been removed here.
3711
3712*/
3713
3714/* The bin gets 1.2K smaller if we inline these. */
3715
3716halfword tex_get_tex_dimension_register (int j, int internal) { return internal ? dimension_parameter(j) : dimension_register(j) ; }
3717halfword tex_get_tex_skip_register      (int j, int internal) { return internal ? glue_parameter(j) : skip_register(j) ; }
3718halfword tex_get_tex_muskip_register    (int j, int internal) { return internal ? muglue_parameter(j) : muskip_register(j); }
3719halfword tex_get_tex_count_register     (int j, int internal) { return internal ? count_parameter(j) : count_register(j)  ; }
3720halfword tex_get_tex_posit_register     (int j, int internal) { return internal ? posit_parameter(j) : posit_register(j)  ; }
3721halfword tex_get_tex_attribute_register (int j, int internal) { return internal ? attribute_parameter(j) : attribute_register(j) ; }
3722halfword tex_get_tex_box_register       (int j, int internal) { return internal ? box_parameter(j) : box_register(j) ; }
3723
3724void tex_set_tex_dimension_register(int j, halfword v, int flags, int internal)
3725{
3726 // if (global_defs_par) {
3727 //     flags = add_global_flag(flags);
3728 // }
3729    if (internal) {
3730        tex_assign_internal_dimension_value(flags, internal_dimension_location(j), v);
3731    } else {
3732        tex_word_define(flags, register_dimension_location(j), v);
3733    }
3734}
3735
3736void tex_set_tex_skip_register(int j, halfword v, int flags, int internal)
3737{
3738 // if (global_defs_par) {
3739 //     flags = add_global_flag(flags);
3740 // }
3741    if (internal) {
3742        tex_assign_internal_skip_value(flags, internal_glue_location(j), v);
3743    } else {
3744        tex_word_define(flags, register_glue_location(j), v);
3745    }
3746}
3747
3748void tex_set_tex_muskip_register(int j, halfword v, int flags, int internal)
3749{
3750 // if (global_defs_par) {
3751 //     flags = add_global_flag(flags);
3752 // }
3753    tex_word_define(flags, internal ? internal_muglue_location(j) : register_muglue_location(j), v);
3754}
3755
3756void tex_set_tex_count_register(int j, halfword v, int flags, int internal)
3757{
3758 // if (global_defs_par) {
3759 //     flags = add_global_flag(flags);
3760 // }
3761    if (internal) {
3762        tex_assign_internal_integer_value(flags, internal_integer_location(j), v);
3763    } else {
3764        tex_word_define(flags, register_integer_location(j), v);
3765    }
3766}
3767void tex_set_tex_posit_register(int j, halfword v, int flags, int internal)
3768{
3769 // if (global_defs_par) {
3770 //     flags = add_global_flag(flags);
3771 // }
3772    if (internal) {
3773        tex_assign_internal_posit_value(flags, internal_posit_location(j), v);
3774    } else {
3775        tex_word_define(flags, register_posit_location(j), v);
3776    }
3777}
3778
3779
3780void tex_set_tex_attribute_register(int j, halfword v, int flags, int internal)
3781{
3782 // if (global_defs_par) {
3783 //     flags = add_global_flag(flags);
3784 // }
3785    if (j > lmt_node_memory_state.max_used_attribute) {
3786        lmt_node_memory_state.max_used_attribute = j;
3787    }
3788    tex_change_attribute_register(flags, register_attribute_location(j), v);
3789    tex_word_define(flags, internal ? internal_attribute_location(j) : register_attribute_location(j), v);
3790}
3791
3792void tex_set_tex_box_register(int j, halfword v, int flags, int internal)
3793{
3794 // if (global_defs_par) {
3795 //     flags = add_global_flag(flags);
3796 // }
3797    if (internal) {
3798        tex_define(flags, internal_box_location(j), internal_box_reference_cmd, v);
3799    } else {
3800        tex_define(flags, register_box_location(j), register_box_reference_cmd, v);
3801    }
3802}
3803
3804void tex_set_tex_toks_register(int j, lstring s, int flags, int internal)
3805{
3806    halfword ref = get_reference_token();
3807    halfword head = tex_str_toks(s, NULL);
3808    set_token_link(ref, head);
3809 // if (global_defs_par) {
3810 //     flags = add_global_flag(flags);
3811 // }
3812    if (internal) {
3813        tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref);
3814    } else {
3815        tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref);
3816    }
3817}
3818
3819void tex_scan_tex_toks_register(int j, int c, lstring s, int flags, int internal)
3820{
3821    halfword ref = get_reference_token();
3822    halfword head = tex_str_scan_toks(c, s);
3823    set_token_link(ref, head);
3824 // if (global_defs_par) {
3825 //     flags = add_global_flag(flags);
3826 // }
3827    if (internal) {
3828        tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref);
3829    } else {
3830        tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref);
3831    }
3832}
3833
3834int tex_get_tex_toks_register(int j, int internal)
3835{
3836    halfword t = internal ? toks_parameter(j) : toks_register(j);
3837    if (t) {
3838        return tex_tokens_to_string(t);
3839    } else {
3840        return get_nullstr();
3841    }
3842}
3843
3844/* Options: (0) error when undefined [bad], (1) create [but undefined], (2) ignore [discard] */
3845
3846halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option)
3847{
3848    halfword p = null;
3849    if (! head) {
3850        head = get_reference_token();
3851    }
3852    p = (tail && *tail) ? *tail : head;
3853    if (lstr > 0) {
3854        const char *se = str + lstr;
3855        while (str < se) {
3856            /*tex hh: |str2uni| could return len too (also elsewhere) */
3857            int ul;
3858            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &ul);
3859            halfword t = null;
3860            halfword cc = tex_get_cat_code(ct, u);
3861            str += ul;
3862            /*tex
3863                This is a relative simple converter; if more is needed one can just use |tex.print|
3864                with a regular |\def| or |\gdef| and feed the string into the regular scanner.
3865            */
3866            switch (cc) {
3867                case escape_cmd:
3868                    {
3869                        /*tex We have a potential control sequence so we check for it. */
3870                        int lname = 0;
3871                        const char *name  = str;
3872                        while (str < se) {
3873                            int s; 
3874                            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &s);
3875                            int c = tex_get_cat_code(ct, u);
3876                            if (c == letter_cmd) {
3877                                str += s;
3878                                lname += s;
3879                            } else if (c == spacer_cmd) {
3880                                /*tex We ignore a trailing space like normal scanning does. */
3881                                if (lname == 0) {
3882                             // if (u == 32) {
3883                                    lname += s;
3884                                }
3885                                str += s;
3886                                break ;
3887                            } else {
3888                                if (lname == 0) {
3889                                    lname += s;
3890                                    str += s;
3891                                }
3892                                break ;
3893                            }
3894                        }
3895                        if (lname > 0) {
3896                            /*tex We have a potential |\cs|. */
3897                            halfword cs = tex_string_locate(name, lname, option == 1 ? 1 : 0); /* 1 == create */
3898                            if (cs == undefined_control_sequence) {
3899                                if (option == 2) {
3900                                    /*tex We ignore unknown commands. */
3901                                 // t = null;
3902                                } else {
3903                                    /*tex We play safe and backtrack, as we have option 0, but never used anyway. */
3904                                    t = u + (cc * (1<<21));
3905                                    str = name;
3906                                }
3907                            } else {
3908                                /* We end up here when option is 1. */
3909                                t = cs_token_flag + cs;
3910                            }
3911                        } else {
3912                            /*tex
3913                                Just a character with some meaning, so |\unknown| becomes effectively
3914                                |\unknown| assuming that |\\| has some useful meaning of course.
3915                            */
3916                            t = u + (cc * (1 << 21));
3917                            str = name;
3918                        }
3919                        break;
3920                    }
3921                case comment_cmd:
3922                    goto DONE;
3923                case ignore_cmd:
3924                    break;
3925                case spacer_cmd:
3926                 /* t = u + (cc * (1<<21)); */
3927                    t = token_val(spacer_cmd, ' ');
3928                    break;
3929                default:
3930                    /*tex
3931                        Whatever token, so for instance $x^2$ just works given a tex catcode regime.
3932                    */
3933                    t = u + (cc * (1<<21));
3934                    break;
3935            }
3936            if (t) {
3937                p = tex_store_new_token(p, t);
3938            }
3939        }
3940    }
3941  DONE:
3942    if (tail) {
3943        *tail = p;
3944    }
3945    return head;
3946}
3947
3948/*tex So far for the helpers. */
3949
3950int tex_used_token_count(void) {
3951    return lmt_token_memory_state.tokens_data.ptr;
3952}
3953
3954void tex_dump_token_mem(dumpstream f)
3955{
3956    /*tex
3957        It doesn't pay off to prune the available list. We save less than 10K if we do this and
3958        it assumes a sequence at the end. It doesn't help that the list is in reverse order so
3959        we just dump the lot. But we do check the allocated size. We cheat a bit in reducing
3960        the ptr so that we can set the the initial counter on loading.
3961    */
3962    halfword p = lmt_token_memory_state.available;
3963    halfword u = lmt_token_memory_state.tokens_data.top + 1;
3964    while (p) {
3965        --u;
3966        p = token_link(p);
3967    }
3968    lmt_token_memory_state.tokens_data.ptr = u;
3969    dump_int(f, lmt_token_state.null_list); /* the only one left */
3970    dump_int(f, lmt_token_memory_state.tokens_data.allocated);
3971    dump_int(f, lmt_token_memory_state.tokens_data.top);
3972    dump_int(f, lmt_token_memory_state.tokens_data.ptr);
3973    dump_int(f, lmt_token_memory_state.available);
3974    dump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1);
3975}
3976
3977void tex_undump_token_mem(dumpstream f)
3978{
3979    undump_int(f, lmt_token_state.null_list); /* the only one left */
3980    undump_int(f, lmt_token_memory_state.tokens_data.allocated);
3981    undump_int(f, lmt_token_memory_state.tokens_data.top);
3982    undump_int(f, lmt_token_memory_state.tokens_data.ptr);
3983    undump_int(f, lmt_token_memory_state.available);
3984    tex_initialize_token_mem(); /* maybe only ptr upto top */
3985    undump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1);
3986}
3987
Source Browser ?