texinputstack.h /size: 20 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# ifndef LMT_INPUTSTACK_H
6# define LMT_INPUTSTACK_H
7
8/*tex
9
10    The state of \TEX's input mechanism appears in the input stack, whose entries are records with
11    six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|.
12
13*/
14
15/* todo: there is no need to be sparse here */
16
17typedef struct in_state_record {
18    halfword       start;
19    halfword       loc;
20    unsigned short state;
21    union          { unsigned short index; unsigned short token_type;      }; /*tex: So, no macro but name. */
22    union          { halfword       limit; halfword       parameter_start; }; /*tex: So, no macro but name. */
23    halfword       name;
24    short          cattable;   /*tex The category table used by the current line (see |textoken.c|). */
25    unsigned short partial;    /*tex Is the current line partial (see |textoken.c|)? */
26    int            state_file; /*tex Here we stack the tag of the current file. */
27    int            state_line; /*tex Not used. */
28} in_state_record;
29
30typedef struct input_stack_record {
31    halfword  input_file_callback_id;
32    halfword  line;
33    halfword  end_of_file_seen;
34    halfword  group;
35    halfword  if_ptr;
36    halfword  at_end_of_file;
37    char     *full_source_filename;
38} input_stack_record;
39
40// todo: better names for in_state_record and input_stack_record ... now mixed up
41
42typedef struct input_state_info {
43    in_state_record    *input_stack;
44    memory_data         input_stack_data;
45    input_stack_record *in_stack;
46    memory_data         in_stack_data;
47    halfword           *parameter_stack;
48    memory_data         parameter_stack_data;
49    in_state_record     cur_input;            /*tex The \quote {top} input state. Why not just pointing. */
50    int                 input_line;
51    int                 scanner_status;
52    halfword            def_ref;              /*tex Has to be set for error recovery etc. */
53    int                 align_state;
54    int                 base_ptr;
55    halfword            warning_index;
56    int                 open_files;
57    int                 padding;
58} input_state_info;
59
60extern input_state_info lmt_input_state;
61
62typedef struct input_file_state_info {
63    int      forced_file;
64    int      forced_line;
65    halfword mode;
66    halfword line;
67} input_file_state_info;
68
69extern input_file_state_info input_file_state;
70
71static inline int input_file_value(void)
72{
73    return input_file_state.forced_file ? input_file_state.forced_file : lmt_input_state.cur_input.state_file;
74}
75
76static inline int input_line_value(void)
77{
78    return input_file_state.forced_line ? input_file_state.forced_line : (input_file_state.line ? input_file_state.line : lmt_input_state.input_line);
79}
80
81/*tex
82
83    In \LUAMETATEX\ the io model was stepwise changed a bit, mostly in the \LUA\ feedback area.
84    Support for nodes, tokens, short and long string were improved. Around 2.06.17 specification
85    nodes became dynamic and that left the pseudo files as only variable node type. By removing
86    variable nodes we can avoid some code in node management so getting rid of pseudo files made
87    sense. The token scan macros used these but now use a lightweight varian tof the \LUA\ scanner,
88    which we had anyway. The only complication is the |\everyeof| of |\scantokens|. Also, tracing
89    (if at all) is now different but these three scanners are seldom used and were introduced in
90    \ETEX\ (|scantokens|), \LUATEX\ (|\scantextokens|) and \LUAMETATEX\ (|tokenized|). The new
91    approach also gives more room for future extensions.
92
93    All this has been a very stepwise process, because we know that there are users who use \LMTX\
94    in production and small steps are easier to test. Experiments mostly happen in parts of the
95    code that is less critital ... after all \LUAMETATEX\ is also an experimental engine ... but
96    io related code changes are kind of critital.
97
98    Just to remember wahat we came from: the first 15 were reserved read channels but that is now
99    delegated to \LUA, so we had an offset of 16 in:
100
101*/
102
103typedef enum io_codes {
104    io_initial_input_code,
105    io_lua_input_code,
106    io_token_input_code,
107    io_token_eof_input_code,
108    io_tex_macro_code,
109    io_file_input_code,
110} io_codes;
111
112/*
113*
114    Now, these |io_codes| are used in the name field but that field can also be a way larger number,
115    i.e.\ the string index of the file. That also assumes that the first used index is above the last
116    io_code. It can be the warning index too, just for the sake of an error context message. So:
117    symbolic (small) number, tex string being the filename, and macro name. But, because we also
118    have that information in other places (partly as side effect of luafication) a simpler model is
119    used now where we use a few dedicates codes. It also means that we no longer store the filename
120    in the string pool.
121
122*/
123
124# define io_token_input(c) (c >= io_lua_input_code && c <= io_token_eof_input_code)
125# define io_file_input(c)  (c >= io_file_input_code)
126
127/*tex
128
129    Let's look more closely now at the control variables (|state|, |index|, |start|, |loc|, |limit|,
130    |name|), assuming that \TEX\ is reading a line of characters that have been input from some file
131    or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines
132    of characters that are currently being read from files, including all lines on subsidiary levels
133    of the input stack that are not yet completed. \TEX\ will return to the other lines when it is
134    finished with the present input file.
135
136    (Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine
137    |buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the
138    string pool and checking that these two tables don't bump into each other.)
139
140    The line we are currently working on begins in position |start| of the buffer; the next character
141    we are about to read is |buffer[loc]|; and |limit| is the location of the last character present.
142    If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the
143    |end_line_char|, denoting the end of a line, but this is not true if the current line is an
144    insertion that was entered on the user's terminal in response to an error message.
145
146    The |name| variable is a string number that designates the name of the current file, if we are
147    reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading
148    from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number;
149    in such cases the input is actually from the terminal, under control of the procedure |read_toks|.)
150    Finally |18 <= name <=20| indicates that we are reading a pseudo file created by the |\scantokens|
151    or |\scantextokens| command. A larger value is reserved for input coming from \LUA.
152
153    The |state| variable has one of three values, when we are scanning such files:
154
155    \startitemize
156        \startitem
157            |mid_line| is the normal state.
158        \stopitem
159        \startitem
160            |skip_blanks| is like |mid_line|, but blanks are ignored.
161        \stopitem
162        \startitem
163            |new_line| is the state at the beginning of a line.
164        \stopitem
165    \stopitemize
166
167    These state values are assigned numeric codes so that if we add the state code to the next
168    character's command code, we get distinct values. For example, |mid_line + spacer| stands for the
169    case that a blank space character occurs in the middle of a line when it is not being ignored;
170    after this case is processed, the next value of |state| will be |skip_blanks|.
171
172    As with other constants, we only add some prefix or suffix but keep the normal name as much as
173    possible, so that the original documentation still applies.
174
175*/
176
177/* 
178    We could have |token_array_state| for a packed representation of really permanent macros if we 
179    freeze permanent. If we don't freeze we need an extra bit to flag a macro as using the array 
180    but we don't have a bit left. Packing could happen before we dump and would make the body half 
181    the size. Fetching from an array is a middleground between a token list and a file and could a 
182    bit faster and definitely make for a smaller format file. In the end it might not really pay 
183    off and it is also a bit un-TeX. 
184*/
185
186typedef enum state_codes {
187    token_list_state  = 0,
188    /*tex when scanning a line of characters */
189    mid_line_state    = 1,
190    /*tex when ignoring blanks */
191    skip_blanks_state = 2 + max_category_code,
192    /*tex at the start of a line */
193    new_line_state    = 3 + max_category_code + max_category_code,
194} state_codes;
195
196/*tex
197
198    Additional information about the current line is available via the |index| variable, which
199    counts how many lines of characters are present in the buffer below the current level. We
200    have |index = 0| when reading from the terminal and prompting the user for each line; then if
201    the user types, e.g., |\input paper|, we will have |index = 1| while reading the file
202    |paper.tex|. However, it does not follow that |index| is the same as the input stack pointer,
203    since many of the levels on the input stack may come from token lists. For example, the
204    instruction |\input paper| might occur in a token list.
205
206    The global variable |in_open| is equal to the |index| value of the highest \quote {non token
207    list} level. Thus, the number of partially read lines in the buffer is |in_open + 1|, and we
208    have |in_open = index| when we are not reading a token list.
209
210    If we are not currently reading from the terminal, or from an input stream, we are reading from
211    the file variable |input_file [index]|. We use the notation |terminal_input| as a convenient
212    abbreviation  for |name = 0|, and |cur_file| as an abbreviation for |input_file [index]|.
213
214    The global variable |line| contains the line number in the topmost open file, for use in error
215    messages. If we are not reading from the terminal, |line_stack [index]| holds the line number
216    or  the enclosing level, so that |line| can be restored when the current file has been read.
217    Line numbers should never be negative, since the negative of the current line number is used to
218    identify the user's output routine in the |mode_line| field of the semantic nest entries.
219
220    If more information about the input state is needed, it can be included in small arrays like
221    those shown here. For example, the current page or segment number in the input file might be
222    put into a variable |page|, maintained for enclosing levels in ||page_stack:array [1 ..
223    max_input_open] of integer| by analogy with |line_stack|.
224
225    Users of \TEX\ sometimes forget to balance left and right braces properly, and one of the ways
226    \TEX\ tries to spot such errors is by considering an input file as broken into subfiles by
227    control sequences that are declared to be |\outer|.
228
229    A variable called |scanner_status| tells \TEX\ whether or not to complain when a subfile ends.
230    This variable has six possible values:
231
232    \startitemize
233
234    \startitem
235        |normal|, means that a subfile can safely end here without incident.
236    \stopitem
237
238    \startitem
239        |skipping|, means that a subfile can safely end here, but not a file, because we're reading
240        past some conditional text that was not selected.
241    \stopitem
242
243    \startitem
244        |defining|, means that a subfile shouldn't end now because a macro is being defined.
245    \stopitem
246
247    \startitem
248        |matching|, means that a subfile shouldn't end now because a macro is being used and we are
249        searching for the end of its arguments.
250    \stopitem
251
252    \startitem
253        |aligning|, means that a subfile shouldn't end now because we are not finished with the
254        preamble of an |\halign| or |\valign|.
255    \stopitem
256
257    \startitem
258        |absorbing|, means that a subfile shouldn't end now because we are reading a balanced token
259        list for |\message|, |\write|, etc.
260    \stopitem
261
262    \stopitemize
263
264    If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb|
265    location for the relevant control sequence name to print in an error message.
266
267*/
268
269typedef enum scanner_states {
270    scanner_is_normal,    /*tex passing conditional text */
271    scanner_is_skipping,  /*tex passing conditional text */
272    scanner_is_defining,  /*tex reading a macro definition */
273    scanner_is_matching,  /*tex reading macro arguments */
274    scanner_is_tolerant,  /*tex reading tolerant macro arguments */
275    scanner_is_aligning,  /*tex reading an alignment preamble */
276    scanner_is_absorbing, /*tex reading a balanced text */
277} scanner_states;
278
279extern void tex_show_runaway(void); /*tex This is only used when running out of token memory. */
280
281/*tex
282
283    However, the discussion about input state really applies only to the case that we are inputting
284    from a file. There is another important case, namely when we are currently getting input from a
285    token list. In this case |state = token_list|, and the conventions about the other state
286    variables are
287    different:
288
289    \startitemize
290
291    \startitem
292        |loc| is a pointer to the current node in the token list, i.e., the node that will be read
293        next. If |loc=null|, the token list has been fully read.
294    \stopitem
295
296    \startitem
297        |start| points to the first node of the token list; this node may or may not contain a
298        reference count, depending on the type of token list involved.
299    \stopitem
300
301    \startitem
302        |token_type|, which takes the place of |index| in the discussion above, is a code number
303        that explains what kind of token list is being scanned.
304    \stopitem
305
306    \startitem
307        |name| points to the |eqtb| address of the control sequence being expanded, if the current
308        token list is a macro.
309    \stopitem
310
311    \startitem
312        |param_start|, which takes the place of |limit|, tells where the parameters of the current
313        macro begin in the |param_stack|, if the current token list is a macro.
314    \stopitem
315
316    \stopitemize
317
318    The |token_type| can take several values, depending on where the current token list came from:
319
320    \startitemize
321
322    \startitem
323        |parameter|, if a parameter is being scanned;
324    \stopitem
325
326    \startitem
327        |u_template|, if the |u_j| part of an alignment template is being scanned;
328    \stopitem
329
330    \startitem
331        |v_template|, if the |v_j| part of an alignment template is being scanned;
332    \stopitem
333
334    \startitem
335        |backed_up|, if the token list being scanned has been inserted as \quotation {to be read
336        again}.
337    \stopitem
338
339    \startitem
340        |inserted|, if the token list being scanned has been inserted as the text expansion of a
341        |\count| or similar variable;
342    \stopitem
343
344    \startitem
345        |macro|, if a user-defined control sequence is being scanned;
346    \stopitem
347
348    \startitem
349        |output_text|, if an |\output| routine is being scanned;
350    \stopitem
351
352    \startitem
353        |every_par_text|, if the text of |\everypar| is being scanned;
354    \stopitem
355
356    \startitem
357        |every_math_text|, if the text of |\everymath| is being scanned;
358    \stopitem
359
360    \startitem
361        |every_display_text|, if the text of \everydisplay| is being scanned;
362    \stopitem
363
364    \startitem
365        |every_hbox_text|, if the text of |\everyhbox| is being scanned;
366    \stopitem
367
368    \startitem
369        |every_vbox_text|, if the text of |\everyvbox| is being scanned;
370    \stopitem
371
372    \startitem
373        |every_job_text|, if the text of |\everyjob| is being scanned;
374    \stopitem
375
376    \startitem
377        |every_cr_text|, if the text of |\everycr| is being scanned;
378    \stopitem
379
380    \startitem
381        |mark_text|, if the text of a |\mark| is being scanned;
382    \stopitem
383
384    \startitem
385        |write_text|, if the text of a |\write| is being scanned.
386    \stopitem
387
388    \stopitemize
389
390    The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the
391    corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc.
392
393    The token list begins with a reference count if and only if |token_type >= macro|.
394
395    Since \ETEX's additional token list parameters precede |toks_base|, the corresponding token
396    types must precede |write_text|. However, in \LUAMETATEX\ we delegate all the read and write
397    primitives to \LUA\ so that model has been simplified.
398
399*/
400
401/* #define token_type  input_state.cur_input.token_type  */ /*tex type of current token list */
402/* #define param_start input_state.cur_input.param_start */ /*tex base of macro parameters in |param_stack| */
403
404typedef enum token_types {
405
406    /*tex This one is unreferenced and always flushed at the end of a macro. */
407
408    parameter_text,        /*tex parameter         */ 
409
410    /*tex These are managed by the node handlers and flushed there. */
411
412    template_pre_text,     /*tex |u_j| template    */ 
413    template_post_text,    /*tex |v_j| template    */ 
414
415    /*tex This one is rather special too and never flushed. */
416
417    associated_text,       /*tex used in units */
418
419    /*tex These are unreferenced and always flushed en the end. */
420
421    backed_up_text,        /*tex text to be reread */
422    inserted_text,         /*tex inserted texts    */
423
424    /*tex This one referenced in the begin call and dereferenced at the end. */
425
426    macro_text,            /*tex defined control sequences */
427
428    /*tex These are referenced in the begin call and dereferenced at the end. */
429
430    output_text,           /*tex output routines   */
431    every_par_text,        /*tex |\everypar|       */
432    every_math_text,       /*tex |\everymath|      */
433    every_display_text,    /*tex |\everydisplay|   */
434    every_hbox_text,       /*tex |\everyhbox|      */
435    every_vbox_text,       /*tex |\everyvbox|      */
436    every_math_atom_text,  /*tex |\everymathatom|  */
437    every_job_text,        /*tex |\everyjob|       */
438    every_cr_text,         /*tex |\everycr|        */
439    every_tab_text,        /*tex |\everytab|       */
440    error_help_text,       /*tex |\errhelp|        */
441    every_before_par_text, /*tex |\everybeforeeof| */
442    every_eof_text,        /*tex |\everyeof|       */
443
444    /*tex These could be unreferenced and always flush (different begin call). */  
445
446    end_of_group_text,     /*tex |\atendofgroup|   */
447    mark_text,             /*tex |\topmark|, etc.  */ 
448    token_text,            /*tex                   */  
449    loop_text,             /*tex                   */    
450    end_paragraph_text,    /*tex |\everyendpar|    */ 
451    end_file_text,
452    write_text,            /*tex |\write|          */ 
453    local_text,            /*tex                   */ 
454    local_loop_text,       /*tex                   */ 
455
456} token_types;
457
458extern void        tex_initialize_input_state  (void);
459/*     int         tex_room_on_parameter_stack (void); */
460/*     int         tex_room_on_in_stack        (void); */
461/*     int         tex_room_on_input_stack     (void); */
462extern void        tex_copy_to_parameter_stack (halfword *pstack, int n);
463extern void        tex_show_context            (void);
464extern void        tex_show_validity           (void);
465extern void        tex_set_trick_count         (void);
466extern void        tex_begin_token_list        (halfword t, quarterword kind); /* include some tracing */
467extern void        tex_begin_parameter_list    (halfword t);
468extern void        tex_begin_backed_up_list    (halfword t);
469extern void        tex_begin_inserted_list     (halfword t);
470extern void        tex_begin_associated_list   (halfword t);
471extern void        tex_begin_macro_list        (halfword t);
472extern void        tex_end_token_list          (void);
473extern void        tex_quit_token_list         (void);
474extern void        tex_cleanup_input_state     (void);
475extern void        tex_back_input              (halfword t);
476extern void        tex_reinsert_token          (halfword t);
477extern void        tex_insert_input            (halfword h);
478extern void        tex_append_input            (halfword h);
479extern void        tex_begin_file_reading      (void);
480extern void        tex_end_file_reading        (void);
481extern void        tex_initialize_inputstack   (void);
482extern void        tex_lua_string_start        (void);
483extern void        tex_tex_string_start        (int iotype, int cattable);
484extern void        tex_any_string_start        (char *s);
485extern halfword    tex_wrapped_token_list      (halfword h);
486extern const char *tex_current_input_file_name (void);
487
488# endif
489