texprimitive.c /size: 37 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7/*tex
8
9    Control sequences are stored and retrieved by means of a fairly standard hash table algorithm
10    called the method of \quote {coalescing lists} (cf.\ Algorithm 6.4C in {\em The Art of
11    Computer Programming}). Once a control sequence enters the table, it is never removed, because
12    there are complicated situations involving |\gdef| where the removal of a control sequence at
13    the end of a group would be a mistake preventable only by the introduction of a complicated
14    reference-count mechanism.
15
16    The actual sequence of letters forming a control sequence identifier is stored in the |str_pool|
17    array together with all the other strings. An auxiliary array |hash| consists of items with two
18    halfword fields per word. The first of these, called |next(p)|, points to the next identifier
19    belonging to the same coalesced list as the identifier corresponding to~|p|; and the other,
20    called |text(p)|, points to the |str_start| entry for |p|'s identifier. If position~|p| of the
21    hash table is empty, we have |text(p)=0|; if position |p| is either empty or the end of a
22    coalesced hash list, we have |next(p) = 0|. An auxiliary pointer variable called |hash_used| is
23    maintained in such a way that all locations |p >= hash_used| are nonempty. The global variable
24    |cs_count| tells how many multiletter control sequences have been defined, if statistics are
25    being kept.
26
27    A boolean variable called |no_new_control_sequence| is set to |true| during the time that new
28    hash table entries are forbidden.
29
30    The other variables in the following state structure are: the hash table: |hash|, the allocation
31    pointer |hash_used| for |hash|, |hash_extra| above |eqtb_size|, the maximum of the hash array
32    |hash_top|, the pointer to the next high hash location |hash_high|, the mentioned flag that says
33    if new identifiers are legal |no_new_control_sequence| and the total number of known identifiers:
34    |cs_count|.
35
36*/
37
38hash_state_info lmt_hash_state = {
39    .hash      = NULL,
40    .hash_data = {
41        .minimum   = min_hash_size,
42        .maximum   = max_hash_size,
43        .size      = siz_hash_size,
44        .step      = stp_hash_size,
45        .allocated = 0,
46        .itemsize  = sizeof(memoryword) + sizeof(memoryword),
47        .top       = 0,
48        .ptr       = 0,
49        .initial   = 0,
50        .offset    = 0, // eqtb_size,
51    },
52    .eqtb_data = {
53        .minimum   = min_hash_size,
54        .maximum   = max_hash_size,
55        .size      = siz_hash_size,
56        .step      = stp_hash_size,
57        .allocated = memory_data_unset,
58        .itemsize  = memory_data_unset,
59        .top       = frozen_control_sequence,
60        .ptr       = 0,
61        .initial   = 0,
62        .offset    = 0,
63    },
64    .eqtb      = NULL,
65    .no_new_cs = 1,
66    .padding   = 0,
67};
68
69/*tex
70
71    The arrays |prim| and |prim_eqtb| are used for |name -> cmd, chr| lookups. The are modelled
72    after |hash| and |eqtb|, except that primitives do not have an |eq_level|, that field is
73    replaced by |origin|. Furthermore we have a link for coalesced lists: |prim_next (a)|; the
74    string number for control sequence name: |prim_text (a)|; test if all positions are occupied:
75    |prim_is_full|; some fields: |prim_origin_field (a)|, |prim_eq_type_field (a)| and
76    |prim_equiv_field(a)|; the level of definition: |prim_origin (a)|; the command code for
77    equivalent: |prim_eq_type(a)|; the equivalent value: |prim_equiv(a)|; the allocation pointer
78    for |prim|: |prim_used|; the primitives tables: |two_halves prim [(prim_size + 1)]| and
79    |memoryword prim_eqtb [(prim_size + 1)]|. The array |prim_data| works the other way around, it
80    is used for |cmd, chr| to name lookups.
81
82*/
83
84primitive_state_info lmt_primitive_state;
85
86/*tex Test if all positions are occupied: */
87
88# define prim_base           1
89# define reserved_hash_slots 1
90
91/*tex Initialize the memory arrays: */
92
93void tex_initialize_primitives(void)
94{
95    memset(lmt_primitive_state.prim_data, 0, sizeof(prim_info)  * (last_cmd  + 1));
96    memset(lmt_primitive_state.prim,      0, sizeof(memoryword) * (prim_size + 1));
97    memset(lmt_primitive_state.prim_eqtb, 0, sizeof(memoryword) * (prim_size + 1));
98    for (int k = 0; k <= prim_size; k++) {
99        prim_eq_type(k) = undefined_cs_cmd;
100    }
101    lmt_primitive_state.prim_used = prim_size;
102}
103
104void tex_initialize_hash_mem(void)
105{
106    if (lmt_main_state.run_state == initializing_state) {
107        if (lmt_hash_state.hash_data.minimum == 0) {
108            tex_emergency_message("startup error", "you need at least some hash size");
109        } else {
110            lmt_hash_state.hash_data.allocated = lmt_hash_state.hash_data.minimum;
111            lmt_hash_state.hash_data.top = eqtb_size + lmt_hash_state.hash_data.minimum;
112        }
113    }
114    {
115        int size = lmt_hash_state.hash_data.top + 1;
116        memoryword *hash = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
117        memoryword *eqtb = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
118        if (hash && eqtb) {
119            lmt_hash_state.hash = hash;
120            lmt_hash_state.eqtb = eqtb;
121            if (lmt_main_state.run_state == initializing_state) {
122                /*tex Initialization happens elsewhere. */
123            } else {
124                tex_initialize_undefined_cs();
125                for (int i = eqtb_size + 1; i <= lmt_hash_state.hash_data.top; i++) {
126                    copy_eqtb_entry(i, undefined_control_sequence);
127                }
128            }
129        } else {
130            tex_overflow_error("hash", size);
131        }
132    }
133}
134
135static int tex_aux_room_in_hash(void)
136{
137    if (lmt_hash_state.hash_data.allocated + lmt_hash_state.hash_data.step <= lmt_hash_state.hash_data.size) {
138        int size = lmt_hash_state.hash_data.top + lmt_hash_state.hash_data.step + 1;
139        memoryword *hash = aux_reallocate_array(lmt_hash_state.hash, sizeof(memoryword), size, reserved_hash_slots);
140        memoryword *eqtb = aux_reallocate_array(lmt_hash_state.eqtb, sizeof(memoryword), size, reserved_hash_slots);
141        if (hash && eqtb) {
142            memset(hash + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
143            memset(eqtb + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
144            lmt_hash_state.hash = hash;
145            lmt_hash_state.eqtb = eqtb;
146            /*tex
147                This is not really needed because we now dp this when a new id is created which
148                is a better place anyway. But we play safe and still do it:
149            */
150            for (int i = lmt_hash_state.hash_data.top + 1; i <= size; i++) {
151                copy_eqtb_entry(i, undefined_control_sequence);
152            }
153            lmt_hash_state.hash_data.allocated += lmt_hash_state.hash_data.step;
154            lmt_hash_state.hash_data.top += lmt_hash_state.hash_data.step;
155            lmt_run_memory_callback("hash", 1);
156            return 1;
157        } else {
158            lmt_run_memory_callback("hash", 0);
159            tex_overflow_error("hash", size);
160        }
161    }
162    return 0;
163}
164
165/*tex
166
167    The value of |hash_prime| should be roughly 85\%! of |hash_size|, and it should be a prime
168    number. The theory of hashing tells us to expect fewer than two table probes, on the average,
169    when the search is successful. [See J.~S. Vitter, {\sl Journal of the ACM\/ \bf30} (1983),
170    231--258.]
171
172    https://en.wikipedia.org/wiki/Coalesced_hashing
173    https://programming.guide/coalesced-hashing.html
174
175    Because we seldom use uppercase we get many misses, multiplying a chr j[k] by k actually gives
176    a better spread.
177
178    Making a \CONTEXT\ format takes some 250.000 hash calculations while the \LUAMETATEX\ manual 
179    needs some 1.7 million for just over 250 pages (with an average string length of 15).
180
181    The primitive hash lookups are needed when we initialize and when we lookup an internal
182    variable.
183
184*/
185
186inline static halfword tex_aux_compute_hash(const char *j, unsigned l)
187{
188    halfword h = (unsigned const char) j[0];
189    for (unsigned k = 1; k < l; k++) {
190        h = (h + h + (unsigned const char) j[k]) % hash_prime;
191    }
192    return h;
193}
194
195inline static halfword tex_aux_compute_prim(const char *j, unsigned l)
196{
197    halfword h = (unsigned const char) j[0];
198    for (unsigned k = 1; k < l; k++) {
199        h = (h + h + (unsigned const char) j[k]) % prim_prime;
200    }
201    return h;
202}
203
204halfword tex_prim_lookup(strnumber s)
205{
206    /*tex The index in the |hash| array: */
207    if (s >= cs_offset_value) {
208        unsigned char *j = str_string(s);
209     // unsigned l = (unsigned) str_length(s);
210        halfword l = (halfword) str_length(s);
211        halfword h = tex_aux_compute_prim((char *) j, l);
212        /*tex We start searching here; note that |0 <= h < hash_prime|. */
213        halfword p = h + 1;
214        while (1) {
215         /* When using |halfword text = prim_text(p)| no intellisense warning for first test in: */
216            if ((prim_text(p) > 0) && (str_length(prim_text(p)) == (size_t) l) && tex_str_eq_str(prim_text(p), s)) {
217                return p;
218            } else if (prim_next(p)) {
219                p = prim_next(p);
220            } else if (lmt_hash_state.no_new_cs) {
221                return undefined_primitive;
222            } else {
223                /*tex Insert a new primitive after |p|, then make |p| point to it. */
224                if (prim_text(p) > 0) {
225                    /*tex Search for an empty location in |prim| */
226                    do {
227                        if (lmt_primitive_state.prim_used > prim_base) {
228                            --lmt_primitive_state.prim_used;
229                        } else {
230                            tex_overflow_error("primitive size", prim_size);
231                        }
232                    } while (prim_text(lmt_primitive_state.prim_used));
233                    prim_next(p) = lmt_primitive_state.prim_used;
234                    p = lmt_primitive_state.prim_used;
235                }
236                prim_text(p) = s;
237                break;
238            }
239        }
240        return p;
241    } else if ((s < 0) || (s == undefined_control_sequence)) {
242        return undefined_primitive;
243    } else {
244        return s;
245    }
246}
247
248/*tex How to test a csname for primitive-ness? */
249
250/*
251int tex_cs_is_primitive(strnumber csname)
252{
253    int m = prim_lookup(csname);
254    if (m != undefined_primitive) {
255        char *ss = makecstring(csname);
256        int n = string_locate(ss, str_length(csname), 0);
257        lmt_memory_free(ss);
258        return ((n != undefined_cs_cmd) && (eq_type(n) == prim_eq_type(m)) && (eq_value(n) == prim_equiv(m)));
259    } else {
260        return 0;
261    }
262}
263*/
264
265/*tex Dumping and undumping. */
266
267/* We cheat! It should be dump_things(f, prim_state.prim[p], 1); */
268
269void tex_dump_primitives(dumpstream f)
270{
271    /*
272    for (int p = 0; p <= prim_size; p++) {
273        dump_mem(f, prim_state.prim[p]);
274    }
275    for (int p = 0; p <= prim_size; p++) {
276        dump_mem(f, prim_state.prim_eqtb[p]);
277    }
278    */
279    dump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
280    dump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
281    for (int p = 0; p <= last_cmd; p++) {
282        dump_int(f, lmt_primitive_state.prim_data[p].offset);
283        dump_int(f, lmt_primitive_state.prim_data[p].subids);
284        for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
285            dump_int(f, lmt_primitive_state.prim_data[p].names[q]);
286        }
287    }
288}
289
290void tex_undump_primitives(dumpstream f)
291{
292    undump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
293    undump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
294    for (int p = 0; p <= last_cmd; p++) {
295        undump_int(f, lmt_primitive_state.prim_data[p].offset);
296        undump_int(f, lmt_primitive_state.prim_data[p].subids);
297        if (lmt_primitive_state.prim_data[p].subids > 0) {
298            int size = lmt_primitive_state.prim_data[p].subids;
299            strnumber *names = aux_allocate_clear_array(sizeof(strnumber *), size, 1);
300            if (names) {
301                lmt_primitive_state.prim_data[p].names = names;
302                for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
303                    undump_int(f, names[q]);
304                }
305            } else {
306                tex_overflow_error("primitives", size * sizeof(strnumber *));
307            }
308        }
309    }
310}
311
312/*tex
313
314    Dump the hash table, A different scheme is used to compress the hash table, since its lower
315    region is usually sparse. When |text (p) <> 0| for |p <= hash_used|, we output two words,
316    |p| and |hash[p]|. The hash table is, of course, densely packed for |p >= hash_used|, so the
317    remaining entries are output in a~block.
318
319*/
320
321void tex_dump_hashtable(dumpstream f)
322{
323    dump_int(f, lmt_hash_state.eqtb_data.top);
324    lmt_hash_state.eqtb_data.ptr = frozen_control_sequence - 1 - lmt_hash_state.eqtb_data.top + lmt_hash_state.hash_data.ptr;
325    /* the root entries, i.e. the direct hash slots */
326    for (halfword p = hash_base; p <= lmt_hash_state.eqtb_data.top; p++) {
327        if (cs_text(p)) {
328            dump_int(f, p);
329            dump_int(f, lmt_hash_state.hash[p]);
330            ++lmt_hash_state.eqtb_data.ptr;
331        }
332    }
333    /* the chain entries, i.e. the follow up list slots => eqtb */
334    dump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
335    if (lmt_hash_state.hash_data.ptr > 0) {
336        dump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
337    }
338    dump_int(f, lmt_hash_state.eqtb_data.ptr);
339}
340
341void tex_undump_hashtable(dumpstream f)
342{
343    undump_int(f, lmt_hash_state.eqtb_data.top);
344    if (lmt_hash_state.eqtb_data.top >= hash_base && lmt_hash_state.eqtb_data.top <= frozen_control_sequence) {
345        halfword p = hash_base - 1;
346        do {
347            halfword q;
348            undump_int(f, q);
349            if (q >= (p + 1) && q <= lmt_hash_state.eqtb_data.top) {
350                undump_int(f, lmt_hash_state.hash[q]);
351                p = q;
352            } else {
353                goto BAD;
354            }
355        } while (p != lmt_hash_state.eqtb_data.top);
356        undump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
357        if (lmt_hash_state.hash_data.ptr > 0) {
358            /* we get a warning on possible overrun here */
359            undump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
360        }
361        undump_int(f, lmt_hash_state.eqtb_data.ptr);
362        lmt_hash_state.eqtb_data.initial = lmt_hash_state.eqtb_data.ptr;
363        return;
364    }
365  BAD:
366    tex_fatal_undump_error("hash");
367}
368
369/*tex
370
371    We need to put \TEX's \quote {primitive} control sequences into the hash table, together with
372    their command code (which will be the |eq_type|) and an operand (which will be the |equiv|).
373    The |primitive| procedure does this, in a way that no \TEX\ user can. The global value |cur_val|
374    contains the new |eqtb| pointer after |primitive| has acted.
375
376    Because the definitions of the actual user-accessible name of a primitive can be postponed until
377    runtime, the function |primitive_def| is needed that does nothing except creating the control
378    sequence name.
379
380*/
381
382void tex_primitive_def(const char *str, size_t length, singleword cmd, halfword chr)
383{
384    /*tex This creates the |text()| string: */
385    cur_val = tex_string_locate(str, length, 1);
386    set_eq_level(cur_val, level_one);
387    set_eq_type(cur_val, cmd);
388    set_eq_flag(cur_val, primitive_flag_bit);
389    set_eq_value(cur_val, chr);
390}
391
392/*tex
393
394    The function |store_primitive_name| sets up the bookkeeping for the reverse lookup. It is
395    quite paranoid, because it is easy to mess this up accidentally.
396
397    The |offset| is needed because sometimes character codes (in |o|) are indices into |eqtb|
398    or are offset by a magical value to make sure they do not conflict with something else. We
399    don't want the |prim_data[c].names| to have too many entries as it will just be wasted room,
400    so |offset| is substracted from |o| before creating or accessing the array.
401
402*/
403
404static void tex_aux_store_primitive_name(strnumber s, singleword cmd, halfword chr, halfword offset)
405{
406    lmt_primitive_state.prim_data[cmd].offset = offset;
407    if (lmt_primitive_state.prim_data[cmd].subids < (chr + 1)) {
408        /*tex Not that efficient as each primitive triggers this now but only at ini time so ... */
409        strnumber *newstr = aux_allocate_clear_array(sizeof(strnumber *), chr + 1, 1);
410        if (lmt_primitive_state.prim_data[cmd].names) {
411            memcpy(newstr, lmt_primitive_state.prim_data[cmd].names, (unsigned) (lmt_primitive_state.prim_data[cmd].subids) * sizeof(strnumber));
412            aux_deallocate_array(lmt_primitive_state.prim_data[cmd].names);
413        }
414        lmt_primitive_state.prim_data[cmd].names = newstr;
415        lmt_primitive_state.prim_data[cmd].subids = chr + 1;
416    }
417    lmt_primitive_state.prim_data[cmd].names[chr] = s;
418}
419
420/*tex
421
422    Compared to \TEX82, |primitive| has two extra parameters. The |off| is an offset that will be
423    passed on to |store_primitive_name|, the |cmd_origin| is the bit that is used to group
424    primitives by originator. So the next function is called for each primitive and fills |prim_eqtb|.
425
426    Contrary to \LUATEX\ we define (using |primitive_def|) all primitives beforehand, so not only
427    those with |cmd_origin| values |core| and |tex|. As side effect, we don't get redundant string
428    entries as in \LUATEX.
429
430*/
431
432void tex_primitive(int cmd_origin, const char *str, singleword cmd, halfword chr, halfword offset)
433{
434    int prim_val;
435    strnumber ss;
436    if (cmd_origin != no_command) {
437        tex_primitive_def(str, strlen(str), cmd, offset + chr);
438        /*tex Indeed, |cur_val| has the latest primitive. */
439        ss = cs_text(cur_val);
440    } else {
441        ss = tex_maketexstring(str);
442    }
443    prim_val = tex_prim_lookup(ss);
444    prim_origin(prim_val) = (quarterword) cmd_origin;
445    prim_eq_type(prim_val) = cmd;
446    prim_equiv(prim_val) = offset + chr;
447    tex_aux_store_primitive_name(ss, cmd, chr, offset);
448}
449
450/*tex
451
452    Here is a helper that does the actual hash insertion. This code far from ideal: the existence
453    of |hash_extra| changes all the potential (short) coalesced lists into a single (long) one.
454    This will create a slowdown.
455
456    Here |hash_state.hash_used| starts out as the maximum \quote {normal} hash, not extra.
457
458*/
459
460static halfword tex_aux_insert_id(halfword p, const unsigned char *j, unsigned int l)
461{
462    if (cs_text(p) > 0) {
463     RESTART:
464        if (lmt_hash_state.hash_data.ptr < lmt_hash_state.hash_data.allocated) {
465            ++lmt_hash_state.hash_data.ptr;
466            cs_next(p) = lmt_hash_state.hash_data.ptr + eqtb_size;
467            p = cs_next(p);
468        } else if (tex_aux_room_in_hash()) {
469            goto RESTART;
470        } else {
471            /*tex
472                Search for an empty location in |hash|. This actually makes the direct first hit
473                in such a hash slot invalid but we check for the string anyway. As we now use a
474                hash size that is rather minimal, we don't really need this branch. It is a last
475                resort anyway.
476            */
477            do {
478                if (lmt_hash_state.eqtb_data.top == hash_base) {
479                    /*tex We cannot go lower than this. */
480                    tex_overflow_error("hash size", hash_size + lmt_hash_state.hash_data.allocated);
481                }
482                --lmt_hash_state.eqtb_data.top;
483            } while (cs_text(lmt_hash_state.eqtb_data.top) != 0);
484            cs_next(p) = lmt_hash_state.eqtb_data.top;
485            p = lmt_hash_state.eqtb_data.top;
486        }
487    }
488    cs_text(p) = tex_push_string(j, l);
489    copy_eqtb_entry(p, undefined_control_sequence);
490    ++lmt_hash_state.eqtb_data.ptr;
491    return p;
492}
493
494/*tex
495
496    Here is the subroutine that searches the hash table for an identifier that matches a given
497    string of length |l > 1| appearing in |buffer[j .. (j + l - 1)]|. If the identifier is found,
498    the corresponding hash table address is returned. Otherwise, if the global variable
499    |no_new_control_sequence| is |true|, the dummy address |undefined_control_sequence| is returned.
500    Otherwise the identifier is inserted into the hash table and its location is returned.
501
502    On the \LUAMETATEX\ manual we have 250K hits and 400K misses. Adapting the max and prime does
503    bring down the misses but also no gain in performance. In practice we seldom follow the chain.
504
505*/
506
507halfword tex_id_locate(int j, int l, int create)
508{
509    /*tex The index in |hash| array: */
510    halfword p = tex_aux_compute_hash((char *) (lmt_fileio_state.io_buffer + j), (unsigned) l) + hash_base;
511    /*tex We start searching here. Note that |0 <= h < hash_prime|: */
512    while (1) {
513        strnumber s = cs_text(p);
514        if ((s > 0) && (str_length(s) == (unsigned) l) && tex_str_eq_buf(s, j, l)) {
515            return p;
516        } else {
517            /*tex The next one in a chain: */
518            halfword n = cs_next(p);
519            if (n) {
520                p = n;
521            } else if (create) {
522                return tex_aux_insert_id(p, (lmt_fileio_state.io_buffer + j), (unsigned) l);
523            } else {
524                break;
525            }
526        }
527    }
528    return undefined_control_sequence;
529}
530
531halfword tex_id_locate_only(int j, int l)
532{
533    halfword p = tex_aux_compute_hash((char *) (lmt_fileio_state.io_buffer + j), (unsigned) l) + hash_base;
534    while (p) {
535        strnumber s = cs_text(p);
536        if ((s > 0) && (str_length(s) == (unsigned) l) && tex_str_eq_buf(s, j, l)) {
537     // if ((s > 0) && (str_length(s) == (unsigned) l) && memcmp(str_string(s), &lmt_fileio_state.io_buffer[j], l) == 0) {
538            return p;
539        } else {
540            p = cs_next(p);
541        }
542    }
543    return undefined_control_sequence;
544}
545
546/*tex
547
548    Here is a similar subroutine for finding a primitive in the hash. This one is based on a \CCODE\
549    string.
550
551*/
552
553halfword tex_string_locate(const char *s, size_t l, int create)
554{
555    /*tex The hash code: */
556    halfword h = tex_aux_compute_hash(s, (unsigned) l);
557    /*tex The index in |hash| array. We start searching here. Note that |0 <= h < hash_prime|: */
558    halfword p = h + hash_base;
559    while (1) {
560        if (cs_text(p) > 0 && tex_str_eq_cstr(cs_text(p), s, (int) l)) {
561            return p;
562        } else {
563            halfword n = cs_next(p);
564            if (n) {
565                p = n;
566            } else if (create) {
567                return tex_aux_insert_id(p, (const unsigned char *) s, (unsigned) l);
568            } else {
569                break;
570            }
571        }
572    }
573    return undefined_control_sequence;
574}
575
576halfword tex_string_locate_only(const char *s, size_t l)
577{
578    halfword p = tex_aux_compute_hash(s, (unsigned) l) + hash_base;
579    while (p) {
580        if (cs_text(p) > 0 && tex_str_eq_cstr(cs_text(p), s, (int) l)) {
581            return p;
582        } else {
583            p = cs_next(p);
584        }
585    }
586    return undefined_control_sequence;
587}
588
589halfword tex_located_string(const char *s)
590{
591    return tex_string_locate_only(s, strlen(s));
592}
593
594/*tex
595
596    The |print_cmd_chr| routine prints a symbolic interpretation of a command code and its modifier.
597    This is used in certain \quotation {You can\'t} error messages, and in the implementation of
598    diagnostic routines like |\show|.
599
600    The body of |print_cmd_chr| use to be a rather tedious listing of print commands, and most of it
601    was essentially an inverse to the |primitive| routine that enters a \TEX\ primitive into |eqtb|.
602
603    Thanks to |prim_data|, there is no need for all that tediousness. What is left of |primt_cnd_chr|
604    are just the exceptions to the general rule that the |cmd,chr_code| pair represents in a single
605    primitive command.
606
607*/
608
609static void tex_aux_print_chr_cmd(const char *s, halfword cmd, halfword chr)
610{
611    tex_print_str(s);
612    if (chr) {
613        tex_print_str(cmd == letter_cmd ? " letter " : " character ");
614        tex_print_uhex(chr);
615        tex_print_char(' ');
616        /*
617            By using the the unicode (ascii) names for some we can better support syntax
618            highlighting (which often involves parsing). The names are enclused in single
619            quotes. For the chr codes above 128 we assume \UNICODE\ support.
620        */
621        /*tex
622            We already intercepted the line feed here so that it doesn't give a side effect here
623            in the original |tex_print_tex_str(chr)| call but we have now inlined similar code
624            but without side effects.
625        */
626        if (chr < 32 || chr == 127) {
627            return;
628        } else if (chr <= 0x7F) {
629            switch (chr) {
630                case '\n' : tex_print_str("'line feed'");            return;
631                case '\r' : tex_print_str("'carriage return'");      return;
632                case ' '  : tex_print_str("'space'");                return;
633                case '!'  : tex_print_str("'exclamation mark'");     return;
634                case '\"' : tex_print_str("'quotation mark'");       return;
635                case '#'  : tex_print_str("'hash tag'");             return;
636                case '$'  : tex_print_str("'dollar sign'");          return;
637                case '%'  : tex_print_str("'percent sign'");         return;
638                case '&'  : tex_print_str("'ampersand'");            return;
639                case '\'' : tex_print_str("'apostrophe'");           return;
640                case '('  : tex_print_str("'left parenthesis'");     return;
641                case ')'  : tex_print_str("'right parenthesis'");    return;
642                case '*'  : tex_print_str("'asterisk'");             return;
643                case '+'  : tex_print_str("'plus sign'");            return;
644                case ','  : tex_print_str("'comma'");                return;
645                case '-'  : tex_print_str("'hyphen minus'");         return;
646                case '.'  : tex_print_str("'full stop'");            return;
647                case '/'  : tex_print_str("'slash'");                return;
648                case ':'  : tex_print_str("'colon'");                return;
649                case ';'  : tex_print_str("'semicolon'");            return;
650                case '<'  : tex_print_str("'less than sign'");       return;
651                case '='  : tex_print_str("'equal sign'");           return;
652                case '>'  : tex_print_str("'more than sign'");       return;
653                case '?'  : tex_print_str("'question mark'");        return;
654                case '@'  : tex_print_str("'at sign'");              return;
655                case '['  : tex_print_str("'left square bracket'");  return;
656                case '\\' : tex_print_str("'backslash'");            return;
657                case ']'  : tex_print_str("'right square bracket'"); return;
658                case '^'  : tex_print_str("'circumflex accent'");    return;
659                case '_'  : tex_print_str("'low line'");             return;
660                case '`'  : tex_print_str("'grave accent'");         return;
661                case '{'  : tex_print_str("'left curly bracket'");   return;
662                case '|'  : tex_print_str("'vertical bar'");         return;
663                case '}'  : tex_print_str("'right curly bracket'");  return;
664                case '~'  : tex_print_str("'tilde'");                return;
665            }
666            tex_print_char(chr);
667        } else if (chr <= 0x7FF) {
668            tex_print_char(0xC0 + (chr / 0x40));
669            tex_print_char(0x80 + (chr % 0x40));
670        } else if (chr <= 0xFFFF) {
671            tex_print_char(0xE0 +  (chr / 0x1000));
672            tex_print_char(0x80 + ((chr % 0x1000) / 0x40));
673            tex_print_char(0x80 + ((chr % 0x1000) % 0x40));
674        } else if (chr <= 0x10FFFF) {
675            tex_print_char(0xF0 +   (chr / 0x40000));
676            tex_print_char(0x80 +  ((chr % 0x40000) / 0x1000));
677            tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) / 0x40));
678            tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) % 0x40));
679        }
680    }
681}
682
683/*tex |\TEX82| Didn't print the |cmd,idx| information, but it may be useful. */
684
685static void tex_aux_prim_cmd_chr(quarterword cmd, halfword chr)
686{
687    if (cmd <= last_visible_cmd) {
688        int idx = chr - lmt_primitive_state.prim_data[cmd].offset;
689        if (idx >= 0 && idx < lmt_primitive_state.prim_data[cmd].subids) {
690            if (lmt_primitive_state.prim_data[cmd].names && lmt_primitive_state.prim_data[cmd].names[idx]) {
691                tex_print_tex_str_esc(lmt_primitive_state.prim_data[cmd].names[idx]);
692            } else {
693                tex_print_format("[warning: cmd %i, chr %i, no name]", cmd, idx);
694            }
695        } else if (cmd == internal_integer_cmd && idx < number_integer_pars) {
696            /* a special case */
697            tex_print_format("[integer: chr %i, class specific]", cmd);
698        } else {
699            tex_print_format("[warning: cmd %i, chr %i, out of range]", cmd, idx);
700        }
701    } else {
702        tex_print_format("[warning: cmd %i, invalid]", cmd);
703    }
704}
705
706static void tex_aux_show_lua_call(const char *what, int slot)
707{
708    int callback_id = lmt_callback_defined(show_lua_call_callback);
709    if (callback_id) {
710        char *ss = NULL;
711        int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Sd->S", what, slot, &ss);
712        if (lua_retval && ss && strlen(ss) > 0) {
713            tex_print_str(ss);
714            lmt_memory_free(ss);
715            return;
716        }
717    }
718    tex_print_format("%s %i", what, slot);
719}
720
721void tex_print_cmd_flags(halfword cs, halfword cmd, int flags, int escaped)
722{
723    if (flags) {
724        flags = eq_flag(cs);
725        if (is_frozen   (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("frozen "   ); }
726        if (is_permanent(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("permanent "); }
727        if (is_immutable(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("immutable "); }
728        if (is_primitive(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("primitive "); }
729        if (is_mutable  (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("mutable "  ); }
730        if (is_noaligned(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("noaligned "); }
731        if (is_instance (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("instance " ); }
732        if (is_untraced (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("untraced " ); }
733    }
734    if (is_constant_cmd(cmd)) {
735        (escaped ? tex_print_str_esc : tex_print_str)("constant " );
736    }
737    if (is_tolerant_cmd(cmd)) {
738        (escaped ? tex_print_str_esc : tex_print_str)("tolerant " );
739    }
740    if (is_protected_cmd(cmd)) {
741        (escaped ? tex_print_str_esc : tex_print_str)("protected ");
742    } else if (is_semi_protected_cmd(cmd)) {
743        (escaped ? tex_print_str_esc : tex_print_str)("semiprotected ");
744    }
745}
746
747void tex_print_cmd_chr(singleword cmd, halfword chr)
748{
749    switch (cmd) {
750        case left_brace_cmd:
751            tex_aux_print_chr_cmd("begin group", cmd, chr);
752            break;
753        case right_brace_cmd:
754            tex_aux_print_chr_cmd("end group", cmd, chr);
755            break;
756        case math_shift_cmd:
757            tex_aux_print_chr_cmd("math shift", cmd, chr);
758            break;
759        case alignment_tab_cmd:
760            tex_aux_print_chr_cmd("alignment tab", cmd, chr);
761            break;
762        case parameter_cmd:
763            tex_aux_print_chr_cmd("parameter", cmd, chr);
764            break;
765        case superscript_cmd:
766            tex_aux_print_chr_cmd("superscript", cmd, chr);
767            break;
768        case subscript_cmd:
769            tex_aux_print_chr_cmd("subscript", cmd, chr);
770            break;
771        case spacer_cmd:
772            tex_aux_print_chr_cmd("blank space", cmd, chr);
773            break;
774        case letter_cmd:
775        case other_char_cmd:
776            tex_aux_print_chr_cmd("the", cmd, chr);
777            break;
778        case active_char_cmd:
779            tex_aux_print_chr_cmd("active", cmd, chr);
780            break;
781        /*
782        case comment_cmd:
783        case invalid_char_cmd:
784            break;
785        */
786        case end_template_cmd:
787            /*tex Kind of special: |chr| points to |null_list). */
788            tex_print_str_esc("endtemplate");
789         // tex_print_str("end of alignment template");
790            break;
791        case if_test_cmd:
792            if (chr <= last_if_test_code) {
793                tex_aux_prim_cmd_chr(cmd, chr);
794            } else {
795                tex_aux_show_lua_call("luacondition", chr - last_if_test_code);
796            }
797            break;
798        case char_given_cmd:
799            tex_print_str_esc("char");
800            tex_print_qhex(chr);
801            break;
802        case lua_call_cmd:
803            tex_aux_show_lua_call("luacall", chr);
804            break;
805        case lua_local_call_cmd:
806            tex_aux_show_lua_call("local luacall", chr);
807            break;
808        case lua_protected_call_cmd:
809            tex_aux_show_lua_call("protected luacall", chr);
810            break;
811        case lua_semi_protected_call_cmd:
812            tex_aux_show_lua_call("semiprotected luacall", chr);
813            break;
814        case lua_value_cmd:
815            tex_aux_show_lua_call("luavalue", chr);
816            break;
817        case set_font_cmd:
818            tex_print_str("select font ");
819            tex_print_font(chr);
820            break;
821        case undefined_cs_cmd:
822            tex_print_str("undefined");
823            break;
824        case call_cmd:
825        case protected_call_cmd:
826        case semi_protected_call_cmd:
827        case constant_call_cmd:
828        case tolerant_call_cmd:
829        case tolerant_protected_call_cmd:
830        case tolerant_semi_protected_call_cmd:
831            tex_print_cmd_flags(cur_cs, cur_cmd, 1, 0);
832            tex_print_str("macro");
833            break;
834        case internal_toks_cmd:
835            tex_aux_prim_cmd_chr(cmd, chr);
836            break;
837        case register_toks_cmd:
838            tex_print_str_esc("toks");
839            tex_print_int(register_toks_number(chr));
840            break;
841        case internal_integer_cmd:
842            tex_aux_prim_cmd_chr(cmd, chr);
843            break;
844        case register_integer_cmd:
845            tex_print_str_esc("count");
846            tex_print_int(register_integer_number(chr));
847            break;
848        case internal_attribute_cmd:
849            tex_aux_prim_cmd_chr(cmd, chr);
850            break;
851        case register_attribute_cmd:
852            tex_print_str_esc("attribute");
853            tex_print_int(register_attribute_number(chr));
854            break;
855        case register_posit_cmd:
856            tex_print_str_esc("posit");
857            tex_print_int(register_posit_number(chr));
858            break;
859        case internal_posit_cmd:
860            tex_aux_prim_cmd_chr(cmd, chr);
861            break;
862        case internal_dimension_cmd:
863            tex_aux_prim_cmd_chr(cmd, chr);
864            break;
865        case register_dimension_cmd:
866            tex_print_str_esc("dimen");
867            tex_print_int(register_dimension_number(chr));
868            break;
869        case internal_glue_cmd:
870            tex_aux_prim_cmd_chr(cmd, chr);
871            break;
872        case register_glue_cmd:
873            tex_print_str_esc("skip");
874            tex_print_int(register_glue_number(chr));
875            break;
876        case internal_muglue_cmd:
877            tex_aux_prim_cmd_chr(cmd, chr);
878            break;
879        case register_muglue_cmd:
880            tex_print_str_esc("muskip");
881            tex_print_int(register_muglue_number(chr));
882            break;
883        case node_cmd:
884            tex_print_str(node_token_flagged(chr) ? "large" : "small");
885            tex_print_str(" node reference");
886            break;
887        case integer_cmd:
888            tex_print_str("integer ");
889            tex_print_int(chr);
890            break;
891        case index_cmd:
892            tex_print_str("parameter ");
893            tex_print_int(chr);
894            break;
895        case dimension_cmd:
896            tex_print_str("dimension ");
897            tex_print_dimension(chr, pt_unit);
898            break;
899        case posit_cmd:
900            tex_print_str("posit ");
901            tex_print_posit(chr);
902            break;
903        case gluespec_cmd:
904            tex_print_str("gluespec ");
905            tex_print_spec(chr, pt_unit);
906            break;
907        case mugluespec_cmd:
908            tex_print_str("mugluespec ");
909            tex_print_spec(chr, mu_unit);
910            break;
911        case mathspec_cmd:
912            switch (node_subtype(chr)) {
913                case tex_mathcode:
914                    tex_print_str_esc("mathchar");
915                    break;
916                case umath_mathcode:
917             /* case umathnum_mathcode: */
918                    tex_print_str_esc("Umathchar");
919                    break;
920                case mathspec_mathcode:
921                    tex_print_str("mathspec ");
922            }
923            tex_print_mathspec(chr);
924            break;
925        case fontspec_cmd:
926            {
927                /* We don't check for validity here. */
928                tex_print_str("fontspec ");
929                tex_print_fontspec(chr);
930            }
931            break;
932        case deep_frozen_end_template_cmd:
933            /*tex Kind of special: |chr| points to |null_list). */
934            tex_print_str_esc("endtemplate");
935            break;
936        case deep_frozen_dont_expand_cmd:
937            /*tex Kind of special. */
938            tex_print_str_esc("notexpanded");
939            break;
940        case deep_frozen_keep_constant_cmd:
941            /*tex Kind of special. */
942            tex_print_str_esc("keepconstant");
943            break;
944        case internal_box_reference_cmd:
945            tex_print_str_esc("hiddenlocalbox");
946            break;
947        default:
948            /*tex These are most commands, actually. Todo: local boxes*/
949            tex_aux_prim_cmd_chr(cmd, chr);
950            break;
951    }
952}
953