textypes.h /size: 45 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5# ifndef LMT_TEXTYPES_H
6# define LMT_TEXTYPES_H
7
8# include <stdio.h>
9
10# define LMT_TOSTRING_INDEED(s) #s
11# define LMT_TOSTRING(s) LMT_TOSTRING_INDEED(s)
12
13/*tex
14
15    Here is the comment from the engine(s) that we started with. Keep in mind that \TEX\ originates
16    on other architectures and that it was written in \PASCAL.
17
18    In order to make efficient use of storage space, \TEX\ bases its major data structures on a
19    |memoryword|, which contains either a (signed) integer, possibly scaled, or a (signed)
20    |glue_ratio|, or a small number of fields that are one half or one quarter of the size used for
21    storing integers. More details about how we pack data in a memory word can be found in the
22    |memoryword| files.
23
24    If |x| is a variable of type |memoryword|, it contains up to four fields that can be referred
25    to as follows (\LUATEX\ differs a bit here but the principles remain):
26
27    \starttabulate
28        \NC |x.int|                           \NC an |integer|            \NC \NR
29        \NC |x.sc |                           \NC a |scaled| integer      \NC \NR
30        \NC |x.gr|                            \NC a |glueratio|           \NC \NR
31        \NC |x.hh.lh|, |x.hh.rh|              \NC two halfword fields)    \NC \NR
32        \NC |x.hh.b0|, |x.hh.b1|              \NC two quarterword fields  \NC \NR
33        \NC |x.qqqq.b0| \unknown\ |x.qqqq.b3| \NC four quarterword fields \NC \NR
34    \stoptabulate
35
36    This is somewhat cumbersome to write, and not very readable either, but macros will be used to
37    make the notation shorter and more transparent. The |memoryword| file gives a formal definition
38    of |memoryword| and its subsidiary types, using packed variant records. \TEX\ makes no
39    assumptions about the relative positions of the fields within a word.
40
41    We are assuming 32-bit integers, a halfword must contain at least 32 bits, and a quarterword
42    must contain at least 16 bits.
43
44    The present implementation tries to accommodate as many variations as possible, so it makes few
45    assumptions. If integers having the subrange |min_quarterword .. max_quarterword| can be packed
46    into a quarterword, and if integers having the subrange |min_halfword .. max_halfword| can be
47    packed into a halfword, everything should work satisfactorily.
48
49    It is usually most efficient to have |min_quarterword = min_halfword = 0|, so one should try to
50    achieve this unless it causes a severe problem. The values defined here are recommended for most
51    32-bit computers.
52
53    We cannot use the full range of 32 bits in a halfword, because we have to allow negative values
54    for potential backend tricks like web2c's dynamic allocation, and parshapes pointers have to be
55    able to store at least twice the value |max_halfword| (see below). Therefore, |max_halfword| is
56    $2^{30}-1$
57
58    Via the intermediate step if \WEBC\ we went from \PASCAL\ to \CCODE. As in the meantime we also
59    live in a 64 bit world the above model has been adapted a bit but the principles and names remain.
60
61    A |halfword| is a 32 bit integer and a |quarterword| a 16 bit one. The |scaled| type is used for
62    scaled integers but it's just another name for |halfword| or |int|. The code sometimes uses an
63    |int| instead of |scaled| or |halfword| (which might get fixed). By using the old type names we
64    sort of get an indication what we're dealing with.
65
66    If we even bump scaled to 64 bit we need to redo some code that now assumes that a scaled and
67    halfword are the same size (as in values). Instead we can then decide to go 64 bit for both.
68
69    The |internal_font_number| type is now also a |halfword| so it's no longer used as such.
70
71    We now use 64 memory words split into whatever pieces we need. This also means that we can use
72    a double as glueratio which us saves some casting.
73
74    In principle we can widen up the engine to use long instead of int because it is relatively easy 
75    to adapt the nodes but it will take much more memory and we gain nothing. I might (re)introduce 
76    the pointer as type instead of halfword just for clarity but the mixed usage doesn't really make 
77    ot better. It's more about perception. I will do that when I have reason to check some code and 
78    are in edit mode. 
79
80*/
81
82typedef int             strnumber;
83typedef int             halfword;
84typedef long long       fullword;
85typedef unsigned short  quarterword;   /*tex It really is an unsigned one! But \MPLIB| had it signed. */
86typedef unsigned char   singleword;
87typedef int             scaled;
88typedef double          glueratio;     /*tex This looks better in our (tex specific) syntax highlighting. */
89typedef int             pointer;       /*tex Maybe I'll replace halfwords that act as pointer some day. */
90typedef FILE           *dumpstream;
91
92/*      glueratio       glue_ratio; */ /*tex one-word representation of a glue expansion factor */
93/*      unsigned char   glue_ord;   */ /*tex infinity to the 0, 1, 2, 3, or 4 power */
94/*      unsigned short  group_code; */ /*tex |save_level| for a level boundary */
95
96/*tex
97
98    The documentation refers to pointers and halfwords and scaled and all are in fact just integers.
99    Okay, one can wonder about negative pointers but we never reach the limits so we're okay wrr
100    wraparound. At some point we might just replace all by int as some of the helpers already do
101    that. For now we keep halfword and scaled but we removed (the not so often used) pointers
102    because they were already mixed with halfwords in similar usage.
103
104    So, again we use constants that reflect the original naming and also the related comments.
105
106    Here are some more constants. Others definitions can be font alongside where they make most
107    sense. For instance, these are used all over the place: |null|, |normal|, etc. However, over
108    time, with all these extensions it was not used consistently. So, I replaced the usage of
109    |normal| by more explicit identifiers, also because we have more subtypes in this engine. But
110    we kept most constants (but most in enums)!
111
112    Characters of text that have been converted to \TEX's internal form are said to be of type
113    |unsigned char|, which is a subrange of the integers. We are assuming that our runtime system
114    is able to read and write \UTF-8.
115
116    If constants in this file change, one also must change the format identifier!
117
118*/
119
120typedef struct scaledwhd {
121    scaled wd;
122    scaled ht;
123    scaled dp;
124    union { 
125        scaled ic; /* padding anyway */
126        scaled ns; /* natural size */
127    };
128} scaledwhd;
129
130typedef struct scaledkrn {
131    scaled bl;
132    scaled br;
133    scaled tl;
134    scaled tr;
135} scaledkrn;
136
137extern halfword tex_badness(
138    scaled t,
139    scaled s
140);
141
142/*tex
143    We could use the 4 leftmost bits in tokens for [protected frozen tolerant permanent] flags but
144    it would mean way more shifting and checking so we don't to that. However, we already use
145    one nibble for the cstokenflag: 0x1FFFFFFF so we actually have no room. We also have a signed
146    unsigned issue because halfwords are integers so quite a bit needs to be adapted if we use all
147    32 bits. We have between 128 and 256 cmd codes so we need one byte for that. We also have to
148    deal with the max utf / unicode values.
149*/
150
151# define cs_offset_bits                            21
152# define cs_offset_value                   0x00200000  // ((1 << STRING_OFFSET_BITS) - 1)
153# define cs_offset_max                     0x001FFFFF
154# define cs_token_flag                     0x1FFFFFFF
155
156# define max_cardinal                      0xFFFFFFFF
157# define min_cardinal                               0
158# define max_integer                       0x7FFFFFFF /*tex aka |infinity| */
159# define min_integer                      -0x7FFFFFFF /*tex aka |min_infinity| */
160# define max_longinteger             0x7FFFFFFFFFFFFF 
161# define min_longinteger            -0x7FFFFFFFFFFFFF 
162# define max_doubleinteger   (double) max_longinteger
163# define min_doubleinteger   (double) min_longinteger
164# define max_posit                       max_cardinal 
165# define min_posit                       min_cardinal 
166# define max_dimension                     0x3FFFFFFF
167# define min_dimension                    -0x3FFFFFFF
168# define max_dimen                      max_dimension
169# define min_dimen                      min_dimension
170# define min_data_value                             0
171# define max_data_value                 cs_offset_max
172# define max_half_value                         32767 /*tex For instance sf codes.*/
173
174# define one_bp                                 65781
175
176# define max_infinity                      0x7FFFFFFF /*tex the largest positive value that \TEX\ knows */
177# define min_infinity                     -0x7FFFFFFF
178# define awful_bad                         0x3FFFFFFF /*tex more than a billion demerits |07777777777| */ 
179# define infinite_bad                           10000 /*tex infinitely bad value */
180# define infinite_penalty                infinite_bad /*tex infinite penalty value */
181# define eject_penalty              -infinite_penalty /*tex negatively infinite penalty value */
182# define final_penalty                    -0x40000000 /*tex in the output routine */
183# define deplorable                            100000 /*tex more than |inf_bad|, but less than |awful_bad| */
184# define extremely_deplorable               100000000
185# define large_width_excess                   7230584
186# define large_height_excess                  7230584 /*tex same as |large_width_excess| */
187# define small_stretchability                 1663497
188# define loose_criterion                           99 
189# define decent_criterion                          12 
190# define tight_criterion                           12 /*tex same as |decent_criterion| */
191# define max_calculated_badness                  8189
192# define emergency_adj_demerits                 10000
193
194# define default_rule                           26214 /*tex 0.4pt */
195# define ignore_depth                       -65536000 /*tex The magic dimension value to mean \quote {ignore me}: -1000pt */
196
197# define min_quarterword                            0 /*tex The smallest allowable value in a |quarterword|. */
198# define max_quarterword                        65535 /*tex The largest allowable value in a |quarterword|. */
199
200# define min_halfword                     -0x3FFFFFFF /*tex The smallest allowable value in a |halfword|. */
201# define max_halfword                      0x3FFFFFFF /*tex The largest allowable value in a |halfword|. */
202
203# define null_flag                        -0x40000000
204# define zero_glue                                  0
205# define unity                                0x10000 /*tex |0200000| or $2^{16}$, represents 1.00000 */
206# define two                                  0x20000 /*tex |0400000| or $2^{17}$, represents 2.00000 */
207# define null                                       0
208# define null_font                                  0
209
210# define unused_attribute_value           -0x7FFFFFFF /*tex as low as it goes */
211# define unused_state_value                         0 /*tex 0 .. 0xFFFF */
212# define unused_script_value                        0 /*tex 0 .. 0xFFFF */
213# define unused_scale_value                      1000
214
215# define unused_math_style                       0xFF
216# define unused_math_family                      0xFF
217
218# define preset_rule_thickness             0x40000000 /*tex denotes |unset_rule_thickness|: |010000000000|. */
219
220# define min_space_factor                           0 /*tex watch out: |\spacefactor| cannot be zero but the sf code can!*/
221# define max_space_factor                      0x7FFF /*tex |077777| */
222# define min_scale_factor                           0 
223# define max_scale_factor                      100000 /*tex for now */
224# define default_space_factor                    1000
225# define space_factor_threshold                  2000
226# define default_tolerance                      10000
227# define default_hangafter                          1
228# define default_deadcycles                        25
229# define default_pre_display_gap                 2000
230# define default_eqno_gap_step                   1000
231
232# define default_output_box                       255
233
234# define scaling_factor                          1000
235# define scaling_factor_squared               1000000
236# define scaling_factor_double                   1000.0
237//define scaling_multiplier_double               0.001
238
239# define max_math_scaling_factor                 5000
240
241# define max_font_adjust_step                     100
242# define max_font_adjust_stretch_factor          1000
243# define max_font_adjust_shrink_factor            500
244
245# define math_default_penalty    (infinite_penalty+1)
246
247# define initial_alignment_state             -1000000
248# define busy_alignment_state                 1000000
249# define interwoven_alignment_threshold        500000
250
251/*tex
252
253    For practical reasons all these registers were max'd to 64K but that really makes no sense for
254    e.g. glue and mu glue and even attributes. Imagine using more than 8K attributes: we get long
255    linked lists, slow lookup, lots of copying, need plenty node memory. These large ranges also
256    demand more memory as we need these eqtb entries. So, when I was pondering specific ex and em
257    glue (behaving like mu glue in math) I realized that we can do that at no cost at all: we just
258    make some register ranges smaller. Keep in mind that we already have cheap integer, dimension,
259    and glue shortcuts that can be used instead of registers for storing constant values.
260
261    large  : 7 * 64                           = 448   3.584 Kb
262    medium : 4 * 64 + 2 * 32 + 1 * 16         = 336   2.688 Kb
263    small  :          4 * 32          + 3 * 8 = 152   1.216 Kb
264
265    The memory saving is not that large but keep in mind that we have these huge eqtb arrays and
266    registers are accessed frequently so the more we have in the CPU cache the better. (We already
267    use less than in \LUATEX\ because we got rid of some parallel array so there it would have more
268    impact).
269
270    At some point we might actually drop these maxima indeed as we really don't need that many 
271    if these registers and if (say) 16K is not enough, then nothing is. 
272
273*/
274
275# if 1
276
277    # define max_toks_register_index      0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */
278    # define max_box_register_index       0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */
279    # define max_integer_register_index   0xFFFF /* 0xFFFF 0xFFFF 0x3FFF */ /* 64 64 16 */
280    # define max_dimension_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x3FFF */ /* 64 64 16 */
281    # define max_posit_register_index     0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32  8 */
282    # define max_attribute_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32  8 */
283    # define max_glue_register_index      0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32  8 */
284    # define max_muglue_register_index    0xFFFF /* 0xFFFF 0x3FFF 0x1FFF */ /* 64 16  8 */
285
286# else
287
288    # define max_toks_register_index      0x1FFF //  8K
289    # define max_box_register_index       0x7FFF // 32K /* less of we use a lua stack */
290    # define max_integer_register_index   0x1FFF //  8k
291    # define max_dimension_register_index 0x1FFF //  8k  
292    # define max_posit_register_index     0x1FFF //  8k 
293    # define max_attribute_register_index 0x1FFF //  8k 
294    # define max_glue_register_index      0x0FFF //  4k 
295    # define max_muglue_register_index    0x0FFF //  4k 
296
297# endif
298
299# define max_unit_register_index       26*26
300
301# define max_n_of_toks_registers      (max_toks_register_index      + 1)
302# define max_n_of_box_registers       (max_box_register_index       + 1)
303# define max_n_of_integer_registers   (max_integer_register_index   + 1)
304# define max_n_of_dimension_registers (max_dimension_register_index + 1)
305# define max_n_of_attribute_registers (max_attribute_register_index + 1)
306# define max_n_of_posit_registers     (max_posit_register_index     + 1)
307# define max_n_of_glue_registers      (max_glue_register_index      + 1)
308# define max_n_of_muglue_registers    (max_muglue_register_index    + 1)
309# define max_n_of_unit_registers      (max_unit_register_index      + 1)
310
311# define max_n_of_bytecodes                   65536 /* dynamic */
312# define max_n_of_math_families                  64
313# define max_n_of_math_classes                   64
314# define max_n_of_catcode_tables                256
315# define max_n_of_box_indices          max_halfword
316
317# define min_n_list_stack_entries                 7 
318# define max_n_list_stack_entries                15 
319
320# define max_character_code                0x10FFFF /*tex 1114111, the largest allowed character number; must be |< max_halfword| */
321//define max_math_character_code           0x0FFFFF /*tex 1048575, for now this is plenty, otherwise we need to store differently */
322# define max_math_character_code max_character_code /*tex part gets clipped when we convert to a number */
323# define max_function_reference       cs_offset_max
324# define min_iterator_value                -0xFFFFF /* When we decide to generalize it might become 0xFFFF0 with */
325# define max_iterator_value                 0xFFFFF /* 0x0000F being a classifier so that we save cmd's          */
326# define max_category_code                       15
327# define max_newline_character                  127 /*tex This is an old constraint but there is no reason to change it. */
328# define max_endline_character                  127 /*tex To keep it simple we stick to the maximum single UTF character. */
329# define max_box_axis                           255
330# define max_size_of_word                      1000 /*tex More than enough (esp. since this can end up on the stack. Includes {}{}{} exception stuff. */
331# define min_limited_scale                        0 /*tex Zero is a signal too. */
332# define max_limited_scale                     1000
333# define min_math_style_scale                     0 /*tex Zero is a signal too. */
334# define max_math_style_scale                  2000
335# define max_parameter_index                     15
336
337# define max_size_of_word_buffer (4 * max_size_of_word + 2 + 1 + 2) /* utf + two_periods + sentinal_zero + some_slack */
338
339# define max_mark_index          (max_n_of_marks         - 1)
340# define max_insert_index        (max_n_of_inserts       - 1)
341# define max_box_index           (max_n_of_box_indices   - 1)
342# define max_bytecode_index      (max_n_of_bytecodes     - 1)
343
344# define max_math_family_index   (max_n_of_math_families - 1)
345# define max_math_class_code     (max_n_of_math_classes  - 1)
346# define max_math_property       0xFFFF
347# define max_math_group          0xFFFF
348# define max_math_index          max_character_code
349# define max_math_discretionary  0xFF
350
351# define max_classification_code 0xFFFF
352
353# define ascii_space  32
354# define ascii_max   127
355
356# define default_space_factor 1000
357# define special_space_factor  999
358
359/*tex 
360    We started out with 32 but it makes no sense to initialize that many every time we need to do
361    that. In \CONTEXT\ we have a granular setup with nine values. The maximum practical value is 
362    actually 99 and one needs step sizes that are reasonable. 
363*/
364
365# define default_fitness              0
366# define min_n_of_fitness_values      5
367# define max_n_of_fitness_values     15 
368# define all_fitness_values        0xFF
369
370/*tex
371
372    This is very math specific: we used to pack info into an unsigned 32 bit integer: class, family
373    and character. We now use node for that (which also opend up the possibility to store more
374    info) but in case of a zero family we can also decide to use the older method of packing packing
375    a number: |FF+10FFFF| but the gain (at least on \CONTEXT) is litle: around 10K so here we only
376    mention it as consideration. We can consider anyway to omit the class part when we need a
377    numeric representation, although we don't really need (or like) that kind of abuse.
378
379*/
380
381# define math_class_bits      6
382# define math_family_bits     6
383# define math_character_bits 20
384
385# define math_class_part(a)     ((a >> 26) & 0x3F)
386# define math_family_part(a)    ((a >> 20) & 0x3F)
387# define math_character_part(a)  (a        & 0xFFFFF)
388
389# define math_old_class_part(a)     ((a >> 12) & 0x0F)
390# define math_old_family_part(a)    ((a >>  8) & 0x0F)
391# define math_old_character_part(a)  (a        & 0xFF)
392
393# define math_old_class_mask(a)     (a & 0x0F)
394# define math_old_family_mask(a)    (a & 0x0F)
395# define math_old_character_mask(a) (a & 0xFF)
396
397# define math_packed_character(c,f,v)     (((c & 0x3F) << 26) + ((f & 0x3F) << 20) + (v & 0xFFFFF))
398# define math_old_packed_character(c,f,v) (((c & 0x0F) << 12) + ((f & 0x0F) <<  8) + (v & 0x000FF))
399
400# define rule_font_fam_offset 0xFFFFFF
401
402/*tex We put these here for consistency: */
403
404# define too_big_char (max_character_code + 1) /*tex 1114112, |biggest_char + 1| */
405# define special_char (max_character_code + 2) /*tex 1114113, |biggest_char + 2| */
406# define number_chars (max_character_code + 3) /*tex 1114114, |biggest_char + 3| */
407
408/*tex
409
410    As mentioned, because we're now in \CCODE\ we use a bit simplified memory mode. We don't do any
411    byte swapping related to endian properties as we don't share formats between architectures
412    anyway. A memory word is 64 bits and interpreted in several ways. So the memoryword is a bit
413    different. We also use the opportunity to squeeze eight characters into the word.
414
415    halfword    : 32 bit integer       (2)
416    quarterword : 16 bit integer       (4)
417    singlechar  :  8 bit unsigned char (8)
418    int         : 32 bit integer       (2)
419    glue        : 64 bit double        (1)
420
421    The names below still reflect the original \TEX\ names but we have simplified the model a bit.
422    Watch out: we still make |B0| and |B1| overlap |LH| which for instance is needed when a we
423    store the size of a node in the type and subtype field. The same is true for the overlapping
424    |CINT|s! Don't change this without also checking the macros elsewhere.
425
426    \starttyping
427    typedef union memoryword {
428        struct {
429            halfword H0, H1;
430        } h;
431        struct {
432            quarterword B0, B1, B2, B3;
433        } q;
434        struct {
435            unsigned char C0, C1, C2, C3, C4, C5, C6, C7;
436        } s;
437        struct {
438            glueratio GLUE;
439        } g;
440    } memoryword;
441    \stoptyping
442
443    The dual 32 bit model suits tokens well and for nodes is only needed because we store a double but
444    when we'd store a 32 bit float instead (which is cf tex) we could use a smaller single 32 bit word.
445
446    On the other hand. it might even make sense for nodes to move to a quad 32 bit variant because it
447    makes smaller node identifiers which might remove some limits. But as many nodes have an odd size
448    we will waste more memory. Of course for nodes we can at some point decide to go full dynamic and
449    use a pointer table but then we need to abstract the embedded subnodes (in disc and insert) first.
450
451    It is a bit tricky if we want to use a [8][8][16][32], [16][16][32] of similar mixing because of
452    endiannes, which is why we use a more stepwise definition of memoryword. This mixed scheme permits
453    packing more data in anode.
454
455*/
456
457// typedef union memoryword {
458//     halfword      H[2];  /* 2 * 32 bit */
459//     unsigned int  U[2];
460//     quarterword   Q[4];  /* 4 * 16 bit */
461//     unsigned char C[8];  /* 8 *  8 bit */
462//     glueratio     GLUE;  /* 1 * 64 bit */
463//     short         X;
464//     long long     L;
465//     double        D;
466//     void          *P;    /* 1 * 64 bit or 32 bit */
467// } memoryword;
468
469typedef union memorysplit {
470    quarterword  Q;
471    short        X;
472    singleword   S[2];
473} memorysplit;
474
475typedef union memoryalias {
476    halfword     H;
477    unsigned int U;
478 /* quarterword  Q[2]; */
479 /* singleword   S[4]; */
480    memorysplit  X[2];
481} memoryalias;
482
483typedef union memoryword {
484 /* halfword      H[2]; */
485 /* unsigned int  U[2]; */
486 /* quarterword   Q[4]; */
487    memoryalias   A[2];
488    unsigned char C[8];
489    glueratio     GLUE;
490    long long     L;
491    double        D;
492    void          *P;
493} memoryword;
494
495typedef union tokenword {
496    union { 
497        halfword info;
498        halfword val;
499        struct  { 
500            int cmd:8; 
501            int chr:24; 
502        };
503    };
504    halfword link; 
505} tokenword;
506
507/*tex
508
509    These symbolic names will be used in the definitions of tokens and nodes, the core data
510    structures of the \TEX\ machinery. In some cases halfs and quarters overlap.
511
512*/
513
514# define half0   A[0].H
515# define half1   A[1].H
516
517# define hulf0   A[0].U
518# define hulf1   A[1].U
519
520// # define quart00  A[0].Q[0]
521// # define quart01  A[0].Q[1]
522// # define quart10  A[1].Q[0]
523// # define quart11  A[1].Q[1]
524
525# define quart00  A[0].X[0].Q
526# define quart01  A[0].X[1].Q
527# define quart10  A[1].X[0].Q
528# define quart11  A[1].X[1].Q
529
530# define short00  A[0].X[0].X
531# define short01  A[0].X[1].X
532# define short10  A[1].X[0].X
533# define short11  A[1].X[1].X
534
535// # define single00 A[0].S[0]
536// # define single01 A[0].S[1]
537// # define single02 A[0].S[2]
538// # define single03 A[0].S[3]
539// # define single10 A[1].S[0]
540// # define single11 A[1].S[1]
541// # define single12 A[1].S[2]
542// # define single13 A[1].S[3]
543
544# define single00 A[0].X[0].S[0]
545# define single01 A[0].X[0].S[1]
546# define single02 A[0].X[1].S[0]
547# define single03 A[0].X[1].S[1]
548# define single10 A[1].X[0].S[0]
549# define single11 A[1].X[0].S[1]
550# define single12 A[1].X[1].S[0]
551# define single13 A[1].X[1].S[1]
552
553# define glue0   GLUE
554# define long0   L
555# define double0 D
556
557/*tex
558
559    We're coming from \PASCAL\ which has a boolean type, while in \CCODE\ an |int| is used. However,
560    as we often have callbacks and and a connection with the \LUA\ end using |boolean|, |true| and
561    |false| is often somewhat inconstent. For that reason we now use |int| instead. It also prevents
562    interference with a different definition of |boolean|, something that we can into a few times in
563    the past with external code.
564
565    There were not that many explicit booleans used anyway so better be consistent in using integers
566    than have an inconsistent mix.
567
568*/
569
570/*tex
571
572    The following parameters can be changed at compile time to extend or reduce \TEX's capacity.
573    They may have different values in |INITEX| and in production versions of \TEX. Some values can
574    be adapted at runtime. We start with those that influence memory management. Anyhow, some day
575    I will collect some statistics from runs and come up with (probably) lower defaults.
576
577*/
578
579/*tex These do a stepwise allocation. */
580
581/*tex The buffer is way too large ... only lines ... we could start out smaller */
582
583/*define magic_maximum         2097151 */ /* (max string) Because we step 500K we will always be below this. */
584//define magic_maximum         2000000    /* Looks nicer and we never need the real maximum anyway. */
585# define magic_maximum cs_offset_value    /* Looks nicer and we never need the real maximum anyway. */
586
587# define max_hash_size   magic_maximum    /* This is one of these magic numbers. */
588# define min_hash_size          150000    /* A reasonable default. */
589# define siz_hash_size          250000
590# define stp_hash_size          100000    /* Often we have enough. */
591
592# define max_pool_size   magic_maximum    /* stringsize ! */
593# define min_pool_size          150000
594# define siz_pool_size          500000
595# define stp_pool_size          100000
596
597# define max_body_size       100000000    /* poolsize */
598# define min_body_size        10000000
599# define siz_body_size        20000000
600# define stp_body_size         1000000
601
602# define max_node_size       100000000    /* Currently these are the memory words! */
603# define min_node_size        10000000    /* Currently these are the memory words! */
604# define siz_node_size        25000000
605# define stp_node_size         5000000    /* Currently these are the memory words! */
606
607# define max_token_size       10000000    /* If needed we can go much larger. */
608# define min_token_size        2000000    /* The original 10000 is a bit cheap. */
609# define siz_token_size       10000000
610# define stp_token_size        1000000
611
612# define max_buffer_size     100000000    /* Let's be generous */
613# define min_buffer_size       1000000    /* We often need quite a bit. */
614# define siz_buffer_size      10000000
615# define stp_buffer_size       1000000    /* We use this step when we increase the table. */
616
617# define max_nest_size           10000    /* The table will grow dynamically but the file system might have limitations. */
618# define min_nest_size            1000    /* Quite a bit more that the old default 50. */
619# define siz_nest_size           10000    /* Quite a bit more that the old default 50. */
620# define stp_nest_size            1000    /* We use this step when we increase the table. */
621
622# define max_mvl_size              500
623# define min_mvl_size               10
624# define stp_mvl_size               10
625
626# define max_mvl_index    max_mvl_size
627# define min_mvl_index               1
628
629# define max_in_open              2000    /* The table will grow dynamically but the file system might have limitations. */
630# define min_in_open               500    /* This used to be 100, but who knows what users load. */
631# define siz_in_open              2000    /* This used to be 100, but who knows what users load. */
632# define stp_in_open               250    /* We use this step when we increase the table. */
633
634# define max_parameter_size     100000    /* This should be plenty and if not there probably is an issue in the macro package. */
635# define min_parameter_size      20000    /* The original value of 60 is definitely not enough when we nest macro calls. */
636# define siz_parameter_size     100000    /* The original value of 60 is definitely not enough when we nest macro calls. */
637# define stp_parameter_size      10000    /* We use this step when we increase the table. */
638
639# define max_save_size          500000    /* The table will grow dynamically. */
640# define min_save_size          100000    /* The original value was 5000, which is not that large for todays usage. */
641# define siz_save_size          500000    /* The original value was 5000, which is not that large for todays usage. */
642# define stp_save_size           10000    /* We use this step when we increase the table. */
643
644# define max_stack_size         100000    /* The table will grow dynamically. */
645# define min_stack_size          10000    /* The original value was 500, okay long ago, but not now. */
646# define siz_stack_size         100000    /* The original value was 500, okay long ago, but not now. */
647# define stp_stack_size          10000    /* We use this step when we increase the table. */
648
649# define max_mark_size           10000    /*tex The 64K was rediculous (5 64K arrays of halfword). */
650# define min_mark_size              50
651# define stp_mark_size              50
652
653# define max_insert_size           500
654# define min_insert_size            25
655# define stp_insert_size            25
656
657# define max_font_size          100000    /* We're now no longer hooked into the eqtb (saved 500+ K in the format too). */
658# define min_font_size             250
659# define stp_font_size             250
660
661# define max_language_size       10000    /* We could bump this (as we merged the hj codes) but it makes no sense. */
662# define min_language_size         250
663# define stp_language_size         250
664
665/*tex
666    Units. At some point these will be used in texscanning and lmtexlib (3 times replacement).
667*/
668
669# define bp_numerator   7227  // base point
670# define bp_denonimator 7200
671
672# define cc_numerator  14856  // cicero
673# define cc_denonimator 1157
674
675# define cm_numerator   7227  // centimeter
676# define cm_denonimator  254
677
678# define dd_numerator   1238  // didot
679# define dd_denonimator 1157
680
681# define dk_numerator  49838  // knuth
682# define dk_denonimator 7739
683
684# define es_numerator   9176  // edith
685# define es_denonimator  129
686
687# define in_numerator   7227  // inch
688# define in_denonimator  100
689
690# define mm_numerator   7227  // millimeter
691# define mm_denonimator 2540
692
693# define pc_numerator     12  // pica
694# define pc_denonimator    1
695
696# define pt_numerator      1  // point
697# define pt_denonimator    1
698
699# define sp_numerator      1  // scaled point
700# define sp_denonimator    1
701
702# define ts_numerator   4588  // tove
703# define ts_denonimator  645
704
705# define eu_min_factor     1
706# define eu_max_factor    50
707# define eu_def_factor    10
708
709/*tex 1 font id in slot 0 + 16 characters after that */
710
711# define max_twin_length  16
712# define max_twin_snippet (max_twin_length + 1)
713
714/*tex
715
716    These are used in the code, so when we want them to adapt, which is needed when we make them
717    configurable, we need to change this.
718
719*/
720
721# define max_n_of_marks      max_mark_size
722# define max_n_of_inserts    max_insert_size
723# define max_n_of_fonts      max_font_size
724# define max_n_of_languages  max_language_size
725
726/*tex
727
728    The following settings are not related to memory management. Some day I will probably change
729    the error half stuff. There is already an indent related frozen setting here.
730
731*/
732
733# define max_expand_depth     1000000      /* Just a number, no allocation. */
734# define min_expand_depth       10000
735
736# define max_error_line           255      /* This also determines size of a (static) array */
737# define min_error_line           132      /* Good old \TEX\ uses a value of 79. */
738
739# define max_half_error_line      255
740# define min_half_error_line       80      /* Good old \TEX\ uses a value of 50. */
741
742# define memory_data_unset         -1
743
744typedef struct memory_data {
745    int ptr;       /* the current pointer */
746    int top;       /* the maximum used pointer */
747    int size;      /* the used (optionally user asked) value */
748    int allocated; /* the currently allocated amount */
749    int step;      /* the step used for growing */
750    int minimum;   /* the default mininum allocated, also the step */
751    int maximum;   /* the maximum possible */
752    int itemsize;  /* the itemsize */
753    int initial;
754    int offset;    /* offset of ptr and top */
755    int extra; 
756} memory_data;
757
758typedef struct limits_data {
759    int size;      /* the used (optionally user asked) value */
760    int minimum;   /* the default mininum allocated */
761    int maximum;   /* the maximum possible */
762    int top;       /* the maximum used */
763} limits_data;
764
765extern void tex_dump_constants   (dumpstream f);
766extern void tex_undump_constants (dumpstream f);
767
768/*tex
769
770This is an experimental feature, different approaces to the main command dispatcher:
771
772\starttabulate[|l|l|l|l|l|l]
773\BC n  \BC method          \BC [vhm]mode   \BC binary    \BC manual \BC comment \NC \NR
774\ML
775\NC 0  \NC jump table      \NC cmd offsets \NC 2.691.584 \NC 10.719 \NC original method, selector: (cmd + mode) \NC \NR
776\NC 1  \NC case with modes \NC sequential  \NC 2.697.216 \NC 10.638 \NC nicer modes, we can delegate more to runners \NC \NR
777\NC 2  \NC flat case       \NC cmd offsets \NC 2.695.168 \NC 10.562 \NC variant on original \NC \NR
778\stoptabulate
779
780The second method can be codes differently where we can delegate more to runners (that then can get
781called with a mode argument). Maybe for a next iteration. Concerning performance: the differences
782can be neglected (no differences on the test suite) because the bottleneck in \CONTEXT\ is at the
783\LUA\ end.
784
785I occasionally test the variants. The last test showed that mode 1 gives a bit larger binary. There
786is no real difference in performance.
787
788Well, per end December 2022 we only have the case with modes left but one can always find the old 
789code in the archive. 
790
791*/
792
793/*tex For the moment here. */
794
795typedef struct line_break_properties {
796    halfword initial_par;
797    halfword group_context;
798    halfword par_context;
799    halfword tracing_paragraphs;
800    halfword tracing_fitness;
801    halfword tracing_toddlers;
802    halfword tracing_orphans;
803    halfword tracing_passes;
804    halfword paragraph_dir;
805    halfword paragraph_options;
806    halfword parfill_left_skip;
807    halfword parfill_right_skip;
808    halfword parinit_left_skip;
809    halfword parinit_right_skip;
810    halfword emergency_left_skip;
811    halfword emergency_right_skip;
812    halfword pretolerance;
813    halfword tolerance;
814    halfword emergency_stretch;
815    halfword emergency_original; 
816    halfword emergency_extra_stretch;
817    halfword looseness;
818    halfword adjust_spacing;
819    halfword protrude_chars;
820    halfword adj_demerits;
821    halfword max_adj_demerits;
822    halfword line_penalty;
823    halfword last_line_fit;
824    halfword double_hyphen_demerits;
825    halfword final_hyphen_demerits;
826    scaled   hsize;
827    halfword left_skip;
828    halfword right_skip;
829    scaled   hang_indent;
830    halfword hang_after;
831    halfword par_shape;
832    halfword inter_line_penalty;
833    halfword inter_line_penalties;
834    halfword club_penalty;
835    halfword club_penalties;
836    halfword widow_penalty;
837    halfword widow_penalties;
838    halfword display_widow_penalty;
839    halfword display_widow_penalties;
840    halfword orphan_penalties;
841    halfword toddler_penalties;
842    halfword left_twin_demerits;
843    halfword right_twin_demerits;
844    halfword fitness_classes;
845    halfword adjacent_demerits;
846    halfword orphan_line_factors;
847    halfword broken_penalty;
848    halfword broken_penalties;
849    halfword baseline_skip;
850    halfword line_skip;
851    halfword line_skip_limit;
852    halfword adjust_spacing_step;
853    halfword adjust_spacing_shrink;
854    halfword adjust_spacing_stretch;
855    halfword hyphenation_mode;
856    halfword shaping_penalties_mode;
857    halfword shaping_penalty;
858    halfword par_passes;
859    halfword line_break_checks;
860    halfword extra_hyphen_penalty; 
861    halfword line_break_optional;
862    halfword single_line_penalty;
863    halfword hyphen_penalty;
864    halfword ex_hyphen_penalty;
865    /*tex Only in par passes (for now). */
866    halfword math_penalty_factor;
867    halfword sf_factor;
868    halfword sf_stretch_factor;
869} line_break_properties;
870
871typedef struct balance_properties {
872    halfword tracing_balancing;
873    halfword tracing_fitness;
874    halfword tracing_passes;
875    halfword pretolerance;
876    halfword tolerance;
877    halfword emergency_stretch;
878    halfword emergency_shrink;
879    halfword original_stretch; 
880    halfword original_shrink; 
881    halfword looseness;
882    halfword adj_demerits;
883    halfword max_adj_demerits;
884    scaled   vsize;
885    scaled   topskip;
886    scaled   bottomskip;
887    halfword shape;
888    halfword fitness_classes;
889    halfword checks;
890    halfword passes;
891    halfword penalty;
892    halfword packing;
893    halfword trial; /* packing */
894} balance_properties;
895
896typedef enum sparse_identifiers {
897    unknown_sparse_identifier,
898    catcode_sparse_identifier,
899    lccode_sparse_identifier,
900    uccode_sparse_identifier,
901    sfcode_sparse_identifier,
902    hjcode_sparse_identifier,
903    hmcode_sparse_identifier,
904    hccode_sparse_identifier,
905    amcode_sparse_identifier,
906    fontchar_sparse_identifier,
907    mathcode_sparse_identifier,
908    delcode_sparse_identifier,
909    mathfont_sparse_identifier, 
910    mathparam_sparse_identifier, 
911    user_sparse_identifier,
912} sparse_identifiers;
913
914/*tex
915
916    Here are the group codes that are used to discriminate between different kinds of groups. They
917    allow \TEX\ to decide what special actions, if any, should be performed when a group ends.
918
919    Some groups are not supposed to be ended by right braces. For example, the |$| that begins a
920    math formula causes a |math_shift_group| to be started, and this should be terminated by a
921    matching |$|. Similarly, a group that starts with |\left| should end with |\right|, and one
922    that starts with |\begingroup| should end with |\endgroup|.
923
924*/
925
926typedef enum tex_group_codes {
927    bottom_level_group,  /*tex group code for the outside world */
928    simple_group,        /*tex group code for local structure only */
929    hbox_group,          /*tex code for |\hbox| */
930    adjusted_hbox_group, /*tex code for |\hbox| in vertical mode */
931    vbox_group,          /*tex code for |\vbox| */
932    vtop_group,          /*tex code for |\vtop| */
933    dbox_group,          /*tex code for |\dbox| */
934    align_group,         /*tex code for |\halign|, |\valign| */
935    no_align_group,      /*tex code for |\noalign| */
936    output_group,        /*tex code for output routine */
937    math_group,          /*tex code for, e.g., |\char'136| */
938    math_stack_group,
939    math_component_group,
940    discretionary_group, /*tex code for |\discretionary|' */
941    insert_group,        /*tex code for |\insert| */
942    vadjust_group,       /*tex code for |\vadjust| */
943    vcenter_group,       /*tex code for |\vcenter| */
944    math_fraction_group, /*tex code for |\over| and friends */
945    math_operator_group,
946    math_radical_group,
947    math_choice_group,   /*tex code for |\mathchoice| */
948    also_simple_group,   /*tex code for |\begingroup|\unknown|\egroup| */
949    semi_simple_group,   /*tex code for |\begingroup|\unknown|\endgroup| */
950    math_simple_group,   /*tex code for |\beginmathgroup|\unknown|\endmathgroup| */
951    math_fence_group,    /*tex code for fences |\left|\unknown|\right| */
952    math_inline_group,   
953    math_display_group,  
954    math_number_group,     
955    local_box_group,     /*tex code for |\localleftbox|\unknown|localrightbox| */
956    split_off_group,     /*tex box code for the top part of a |\vsplit| */
957    split_keep_group,    /*tex box code for the bottom part of a |\vsplit| */
958    preamble_group,      /*tex box code for the preamble processing  in an alignment */
959    align_set_group,     /*tex box code for the final item pass in an alignment */
960    finish_row_group,    /*tex box code for a provisory line in an alignment */
961    lua_group,
962} tex_group_codes;
963
964/*
965    In the end I decided to split them into context and begin, but maybe some day
966    they all merge into one (easier on tracing and reporting in shared helpers).
967*/
968
969typedef enum tex_par_context_codes {
970    normal_par_context,
971    vmode_par_context,
972    vbox_par_context,
973    vtop_par_context,
974    dbox_par_context,
975    vcenter_par_context,
976    vadjust_par_context,
977    insert_par_context,
978    output_par_context,
979    align_par_context,
980    no_align_par_context,
981    span_par_context,
982    math_par_context,
983    lua_par_context,
984    reset_par_context,
985    n_of_par_context_codes,
986} tex_par_context_codes;
987
988typedef enum tex_alignment_context_codes {
989    preamble_pass_alignment_context,
990    preroll_pass_alignment_context,
991    package_pass_alignment_context,
992    wrapup_pass_alignment_context,
993} tex_alignment_context_codes;
994
995typedef enum tex_breaks_context_codes {
996    initialize_line_break_context,
997    start_line_break_context,
998    list_line_break_context,
999    stop_line_break_context,
1000    collect_line_break_context,
1001    line_line_break_context,
1002    delete_line_break_context,
1003    report_line_break_context,
1004    wrapup_line_break_context,
1005} tex_breaks_context_codes;
1006
1007typedef enum tex_build_context_codes {
1008    initialize_show_build_context,
1009    step_show_build_context,
1010    check_show_build_context,
1011    skip_show_build_context,
1012    move_show_build_context,
1013    fireup_show_build_context,
1014    wrapup_show_build_context,
1015} tex_build_context_codes;
1016
1017typedef enum tex_vsplit_context_codes {
1018    initialize_show_vsplit_context,
1019    continue_show_vsplit_context,
1020    check_show_vsplit_context,
1021    quit_show_vsplit_context,
1022    wrapup_show_vsplit_context,
1023} tex_vsplit_context_codes;
1024
1025typedef enum tex_page_context_codes {
1026    box_page_context,
1027    end_page_context,
1028    vadjust_page_context,
1029    penalty_page_context,
1030    boundary_page_context,
1031    insert_page_context,
1032    hmode_par_page_context,
1033    vmode_par_page_context,
1034    begin_paragraph_page_context,
1035    before_display_page_context,
1036    after_display_page_context,
1037    after_output_page_context,
1038    alignment_page_context,
1039    triggered_page_context
1040} tex_page_context_codes;
1041
1042typedef enum tex_append_line_context_codes {
1043    box_append_line_context,
1044    pre_box_append_line_context,
1045    pre_adjust_append_line_context,
1046    post_adjust_append_line_context,
1047    pre_migrate_append_line_context,
1048    post_migrate_append_line_context,
1049} tex_append_line_context_codes;
1050
1051typedef enum tex_par_trigger_codes {
1052    normal_par_trigger,
1053    force_par_trigger,
1054    indent_par_trigger,
1055    no_indent_par_trigger,
1056    math_char_par_trigger,
1057    char_par_trigger,
1058    boundary_par_trigger,
1059    space_par_trigger,
1060    math_par_trigger,
1061    kern_par_trigger,
1062    hskip_par_trigger,
1063    un_hbox_char_par_trigger,
1064    valign_char_par_trigger,
1065    vrule_char_par_trigger,
1066} tex_par_trigger_codes;
1067
1068/*tex 
1069    In the end we don't go granular because all we need is some control over specific features and 
1070    we keep these generic and independent of whatever unicode provides. Otherwise we'd also have to 
1071    bloat the format file. 
1072*/
1073
1074// typedef enum tex_character_classification_codes { 
1075//     letter_classification_code      = 0x0001,
1076//     other_classification_code       = 0x0002,
1077//     punctuation_classification_code = 0x0004,
1078//     spacing_classification_code     = 0x0008,
1079//                                     
1080//     lowercase_classification_code   = 0x0010,
1081//     uppercase_classification_code   = 0x0020,
1082//     titlecase_classification_code   = 0x0030, /* ! */
1083//     accent_classification_code      = 0x0040, 
1084//     digit_classification_code       = 0x0080,
1085//                                     
1086//     open_classification_code        = 0x0100,
1087//     close_classification_code       = 0x0200, 
1088//     middle_classification_code      = 0x0300, /* ! */
1089//     quote_classification_code       = 0x0400,
1090//     dash_classification_code        = 0x0800,
1091//                                     
1092//     symbol_classification_code      = 0x1000,
1093//     math_classification_code        = 0x2000,
1094//     control_classification_code     = 0x4000, 
1095//     currency_classification_code    = 0x8000, /* or reserve this one, maybe generic unit */
1096// } tex_character_classification_codes;
1097
1098typedef enum tex_character_control_codes { 
1099    ignore_twin_character_control_code = 0x0001,
1100} tex_character_control_codes;
1101
1102# define default_character_control 0
1103
1104# define has_character_control(a,b) ((a & b) != 0) 
1105
1106# endif
1107
1108