1/* 2 See license.txt in the root of this project. 3*/ 4 5# ifndef LMT_INPUTSTACK_H 6# define LMT_INPUTSTACK_H 7 8/*tex 9 10 The state of \TEX's input mechanism appears in the input stack, whose entries are records with 11 six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|. 12 13*/ 14 15/* todo: there is no need to be sparse here */ 16 17typedef struct in_state_record { 18 halfword start; 19 halfword loc; 20 unsigned short state; 21 union { unsigned short index; unsigned short token_type; }; /*tex: So, no macro but name. */ 22 union { halfword limit; halfword parameter_start; }; /*tex: So, no macro but name. */ 23 halfword name; 24 short cattable; /*tex The category table used by the current line (see |textoken.c|). */ 25 unsigned short partial; /*tex Is the current line partial (see |textoken.c|)? */ 26 int state_file; /*tex Here we stack the tag of the current file. */ 27 int state_line; /*tex Not used. */ 28} in_state_record; 29 30typedef struct input_stack_record { 31 halfword input_file_callback_id; 32 halfword line; 33 halfword end_of_file_seen; 34 halfword group; 35 halfword if_ptr; 36 halfword at_end_of_file; 37 char *full_source_filename; 38} input_stack_record; 39 40// todo: better names for in_state_record and input_stack_record ... now mixed up 41 42typedef struct input_state_info { 43 in_state_record *input_stack; 44 memory_data input_stack_data; 45 input_stack_record *in_stack; 46 memory_data in_stack_data; 47 halfword *parameter_stack; 48 memory_data parameter_stack_data; 49 in_state_record cur_input; /*tex The \quote {top} input state. Why not just pointing. */ 50 int input_line; 51 int scanner_status; 52 halfword def_ref; /*tex Has to be set for error recovery etc. */ 53 int align_state; 54 int base_ptr; 55 halfword warning_index; 56 int open_files; 57 int padding; 58} input_state_info; 59 60extern input_state_info lmt_input_state; 61 62typedef struct input_file_state_info { 63 int forced_file; 64 int forced_line; 65 halfword mode; 66 halfword line; 67} input_file_state_info; 68 69extern input_file_state_info input_file_state; 70 71static inline int input_file_value(void) 72{ 73 return input_file_state.forced_file ? input_file_state.forced_file : lmt_input_state.cur_input.state_file; 74} 75 76static inline int input_line_value(void) 77{ 78 return input_file_state.forced_line ? input_file_state.forced_line : (input_file_state.line ? input_file_state.line : lmt_input_state.input_line); 79} 80 81/*tex 82 83 In \LUAMETATEX\ the io model was stepwise changed a bit, mostly in the \LUA\ feedback area. 84 Support for nodes, tokens, short and long string were improved. Around 2.06.17 specification 85 nodes became dynamic and that left the pseudo files as only variable node type. By removing 86 variable nodes we can avoid some code in node management so getting rid of pseudo files made 87 sense. The token scan macros used these but now use a lightweight varian tof the \LUA\ scanner, 88 which we had anyway. The only complication is the |\everyeof| of |\scantokens|. Also, tracing 89 (if at all) is now different but these three scanners are seldom used and were introduced in 90 \ETEX\ (|scantokens|), \LUATEX\ (|\scantextokens|) and \LUAMETATEX\ (|tokenized|). The new 91 approach also gives more room for future extensions. 92 93 All this has been a very stepwise process, because we know that there are users who use \LMTX\ 94 in production and small steps are easier to test. Experiments mostly happen in parts of the 95 code that is less critital ... after all \LUAMETATEX\ is also an experimental engine ... but 96 io related code changes are kind of critital. 97 98 Just to remember wahat we came from: the first 15 were reserved read channels but that is now 99 delegated to \LUA, so we had an offset of 16 in: 100 101*/ 102 103typedef enum io_codes { 104 io_initial_input_code, 105 io_lua_input_code, 106 io_token_input_code, 107 io_token_eof_input_code, 108 io_tex_macro_code, 109 io_file_input_code, 110} io_codes; 111 112/* 113* 114 Now, these |io_codes| are used in the name field but that field can also be a way larger number, 115 i.e.\ the string index of the file. That also assumes that the first used index is above the last 116 io_code. It can be the warning index too, just for the sake of an error context message. So: 117 symbolic (small) number, tex string being the filename, and macro name. But, because we also 118 have that information in other places (partly as side effect of luafication) a simpler model is 119 used now where we use a few dedicates codes. It also means that we no longer store the filename 120 in the string pool. 121 122*/ 123 124# define io_token_input(c) (c >= io_lua_input_code && c <= io_token_eof_input_code) 125# define io_file_input(c) (c >= io_file_input_code) 126 127/*tex 128 129 Let's look more closely now at the control variables (|state|, |index|, |start|, |loc|, |limit|, 130 |name|), assuming that \TEX\ is reading a line of characters that have been input from some file 131 or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines 132 of characters that are currently being read from files, including all lines on subsidiary levels 133 of the input stack that are not yet completed. \TEX\ will return to the other lines when it is 134 finished with the present input file. 135 136 (Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine 137 |buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the 138 string pool and checking that these two tables don't bump into each other.) 139 140 The line we are currently working on begins in position |start| of the buffer; the next character 141 we are about to read is |buffer[loc]|; and |limit| is the location of the last character present. 142 If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the 143 |end_line_char|, denoting the end of a line, but this is not true if the current line is an 144 insertion that was entered on the user's terminal in response to an error message. 145 146 The |name| variable is a string number that designates the name of the current file, if we are 147 reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading 148 from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number; 149 in such cases the input is actually from the terminal, under control of the procedure |read_toks|.) 150 Finally |18 <= name <=20| indicates that we are reading a pseudo file created by the |\scantokens| 151 or |\scantextokens| command. A larger value is reserved for input coming from \LUA. 152 153 The |state| variable has one of three values, when we are scanning such files: 154 155 \startitemize 156 \startitem 157 |mid_line| is the normal state. 158 \stopitem 159 \startitem 160 |skip_blanks| is like |mid_line|, but blanks are ignored. 161 \stopitem 162 \startitem 163 |new_line| is the state at the beginning of a line. 164 \stopitem 165 \stopitemize 166 167 These state values are assigned numeric codes so that if we add the state code to the next 168 character's command code, we get distinct values. For example, |mid_line + spacer| stands for the 169 case that a blank space character occurs in the middle of a line when it is not being ignored; 170 after this case is processed, the next value of |state| will be |skip_blanks|. 171 172 As with other constants, we only add some prefix or suffix but keep the normal name as much as 173 possible, so that the original documentation still applies. 174 175*/ 176 177/* 178 We could have |token_array_state| for a packed representation of really permanent macros if we 179 freeze permanent. If we don't freeze we need an extra bit to flag a macro as using the array 180 but we don't have a bit left. Packing could happen before we dump and would make the body half 181 the size. Fetching from an array is a middleground between a token list and a file and could a 182 bit faster and definitely make for a smaller format file. In the end it might not really pay 183 off and it is also a bit un-TeX. 184*/ 185 186typedef enum state_codes { 187 token_list_state = 0, 188 /*tex when scanning a line of characters */ 189 mid_line_state = 1, 190 /*tex when ignoring blanks */ 191 skip_blanks_state = 2 + max_category_code, 192 /*tex at the start of a line */ 193 new_line_state = 3 + max_category_code + max_category_code, 194} state_codes; 195 196/*tex 197 198 Additional information about the current line is available via the |index| variable, which 199 counts how many lines of characters are present in the buffer below the current level. We 200 have |index = 0| when reading from the terminal and prompting the user for each line; then if 201 the user types, e.g., |\input paper|, we will have |index = 1| while reading the file 202 |paper.tex|. However, it does not follow that |index| is the same as the input stack pointer, 203 since many of the levels on the input stack may come from token lists. For example, the 204 instruction |\input paper| might occur in a token list. 205 206 The global variable |in_open| is equal to the |index| value of the highest \quote {non token 207 list} level. Thus, the number of partially read lines in the buffer is |in_open + 1|, and we 208 have |in_open = index| when we are not reading a token list. 209 210 If we are not currently reading from the terminal, or from an input stream, we are reading from 211 the file variable |input_file [index]|. We use the notation |terminal_input| as a convenient 212 abbreviation for |name = 0|, and |cur_file| as an abbreviation for |input_file [index]|. 213 214 The global variable |line| contains the line number in the topmost open file, for use in error 215 messages. If we are not reading from the terminal, |line_stack [index]| holds the line number 216 or the enclosing level, so that |line| can be restored when the current file has been read. 217 Line numbers should never be negative, since the negative of the current line number is used to 218 identify the user's output routine in the |mode_line| field of the semantic nest entries. 219 220 If more information about the input state is needed, it can be included in small arrays like 221 those shown here. For example, the current page or segment number in the input file might be 222 put into a variable |page|, maintained for enclosing levels in ||page_stack:array [1 .. 223 max_input_open] of integer| by analogy with |line_stack|. 224 225 Users of \TEX\ sometimes forget to balance left and right braces properly, and one of the ways 226 \TEX\ tries to spot such errors is by considering an input file as broken into subfiles by 227 control sequences that are declared to be |\outer|. 228 229 A variable called |scanner_status| tells \TEX\ whether or not to complain when a subfile ends. 230 This variable has six possible values: 231 232 \startitemize 233 234 \startitem 235 |normal|, means that a subfile can safely end here without incident. 236 \stopitem 237 238 \startitem 239 |skipping|, means that a subfile can safely end here, but not a file, because we're reading 240 past some conditional text that was not selected. 241 \stopitem 242 243 \startitem 244 |defining|, means that a subfile shouldn't end now because a macro is being defined. 245 \stopitem 246 247 \startitem 248 |matching|, means that a subfile shouldn't end now because a macro is being used and we are 249 searching for the end of its arguments. 250 \stopitem 251 252 \startitem 253 |aligning|, means that a subfile shouldn't end now because we are not finished with the 254 preamble of an |\halign| or |\valign|. 255 \stopitem 256 257 \startitem 258 |absorbing|, means that a subfile shouldn't end now because we are reading a balanced token 259 list for |\message|, |\write|, etc. 260 \stopitem 261 262 \stopitemize 263 264 If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb| 265 location for the relevant control sequence name to print in an error message. 266 267*/ 268 269typedef enum scanner_states { 270 scanner_is_normal, /*tex passing conditional text */ 271 scanner_is_skipping, /*tex passing conditional text */ 272 scanner_is_defining, /*tex reading a macro definition */ 273 scanner_is_matching, /*tex reading macro arguments */ 274 scanner_is_tolerant, /*tex reading tolerant macro arguments */ 275 scanner_is_aligning, /*tex reading an alignment preamble */ 276 scanner_is_absorbing, /*tex reading a balanced text */ 277} scanner_states; 278 279extern void tex_show_runaway(void); /*tex This is only used when running out of token memory. */ 280 281/*tex 282 283 However, the discussion about input state really applies only to the case that we are inputting 284 from a file. There is another important case, namely when we are currently getting input from a 285 token list. In this case |state = token_list|, and the conventions about the other state 286 variables are 287 different: 288 289 \startitemize 290 291 \startitem 292 |loc| is a pointer to the current node in the token list, i.e., the node that will be read 293 next. If |loc=null|, the token list has been fully read. 294 \stopitem 295 296 \startitem 297 |start| points to the first node of the token list; this node may or may not contain a 298 reference count, depending on the type of token list involved. 299 \stopitem 300 301 \startitem 302 |token_type|, which takes the place of |index| in the discussion above, is a code number 303 that explains what kind of token list is being scanned. 304 \stopitem 305 306 \startitem 307 |name| points to the |eqtb| address of the control sequence being expanded, if the current 308 token list is a macro. 309 \stopitem 310 311 \startitem 312 |param_start|, which takes the place of |limit|, tells where the parameters of the current 313 macro begin in the |param_stack|, if the current token list is a macro. 314 \stopitem 315 316 \stopitemize 317 318 The |token_type| can take several values, depending on where the current token list came from: 319 320 \startitemize 321 322 \startitem 323 |parameter|, if a parameter is being scanned; 324 \stopitem 325 326 \startitem 327 |u_template|, if the |u_j| part of an alignment template is being scanned; 328 \stopitem 329 330 \startitem 331 |v_template|, if the |v_j| part of an alignment template is being scanned; 332 \stopitem 333 334 \startitem 335 |backed_up|, if the token list being scanned has been inserted as \quotation {to be read 336 again}. 337 \stopitem 338 339 \startitem 340 |inserted|, if the token list being scanned has been inserted as the text expansion of a 341 |\count| or similar variable; 342 \stopitem 343 344 \startitem 345 |macro|, if a user-defined control sequence is being scanned; 346 \stopitem 347 348 \startitem 349 |output_text|, if an |\output| routine is being scanned; 350 \stopitem 351 352 \startitem 353 |every_par_text|, if the text of |\everypar| is being scanned; 354 \stopitem 355 356 \startitem 357 |every_math_text|, if the text of |\everymath| is being scanned; 358 \stopitem 359 360 \startitem 361 |every_display_text|, if the text of \everydisplay| is being scanned; 362 \stopitem 363 364 \startitem 365 |every_hbox_text|, if the text of |\everyhbox| is being scanned; 366 \stopitem 367 368 \startitem 369 |every_vbox_text|, if the text of |\everyvbox| is being scanned; 370 \stopitem 371 372 \startitem 373 |every_job_text|, if the text of |\everyjob| is being scanned; 374 \stopitem 375 376 \startitem 377 |every_cr_text|, if the text of |\everycr| is being scanned; 378 \stopitem 379 380 \startitem 381 |mark_text|, if the text of a |\mark| is being scanned; 382 \stopitem 383 384 \startitem 385 |write_text|, if the text of a |\write| is being scanned. 386 \stopitem 387 388 \stopitemize 389 390 The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the 391 corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc. 392 393 The token list begins with a reference count if and only if |token_type >= macro|. 394 395 Since \ETEX's additional token list parameters precede |toks_base|, the corresponding token 396 types must precede |write_text|. However, in \LUAMETATEX\ we delegate all the read and write 397 primitives to \LUA\ so that model has been simplified. 398 399*/ 400 401/* #define token_type input_state.cur_input.token_type */ /*tex type of current token list */ 402/* #define param_start input_state.cur_input.param_start */ /*tex base of macro parameters in |param_stack| */ 403 404typedef enum token_types { 405 406 /*tex This one is unreferenced and always flushed at the end of a macro. */ 407 408 parameter_text, /*tex parameter */ 409 410 /*tex These are managed by the node handlers and flushed there. */ 411 412 template_pre_text, /*tex |u_j| template */ 413 template_post_text, /*tex |v_j| template */ 414 415 /*tex This one is rather special too and never flushed. */ 416 417 associated_text, /*tex used in units */ 418 419 /*tex These are unreferenced and always flushed en the end. */ 420 421 backed_up_text, /*tex text to be reread */ 422 inserted_text, /*tex inserted texts */ 423 424 /*tex This one referenced in the begin call and dereferenced at the end. */ 425 426 macro_text, /*tex defined control sequences */ 427 428 /*tex These are referenced in the begin call and dereferenced at the end. */ 429 430 output_text, /*tex output routines */ 431 every_par_text, /*tex |\everypar| */ 432 every_math_text, /*tex |\everymath| */ 433 every_display_text, /*tex |\everydisplay| */ 434 every_hbox_text, /*tex |\everyhbox| */ 435 every_vbox_text, /*tex |\everyvbox| */ 436 every_math_atom_text, /*tex |\everymathatom| */ 437 every_job_text, /*tex |\everyjob| */ 438 every_cr_text, /*tex |\everycr| */ 439 every_tab_text, /*tex |\everytab| */ 440 error_help_text, /*tex |\errhelp| */ 441 every_before_par_text, /*tex |\everybeforeeof| */ 442 every_eof_text, /*tex |\everyeof| */ 443 444 /*tex These could be unreferenced and always flush (different begin call). */ 445 446 end_of_group_text, /*tex |\atendofgroup| */ 447 mark_text, /*tex |\topmark|, etc. */ 448 token_text, /*tex */ 449 loop_text, /*tex */ 450 end_paragraph_text, /*tex |\everyendpar| */ 451 end_file_text, 452 write_text, /*tex |\write| */ 453 local_text, /*tex */ 454 local_loop_text, /*tex */ 455 456} token_types; 457 458extern void tex_initialize_input_state (void); 459/* int tex_room_on_parameter_stack (void); */ 460/* int tex_room_on_in_stack (void); */ 461/* int tex_room_on_input_stack (void); */ 462extern void tex_copy_to_parameter_stack (halfword *pstack, int n); 463extern void tex_show_context (void); 464extern void tex_show_validity (void); 465extern void tex_set_trick_count (void); 466extern void tex_begin_token_list (halfword t, quarterword kind); /* include some tracing */ 467extern void tex_begin_parameter_list (halfword t); 468extern void tex_begin_backed_up_list (halfword t); 469extern void tex_begin_inserted_list (halfword t); 470extern void tex_begin_associated_list (halfword t); 471extern void tex_begin_macro_list (halfword t); 472extern void tex_end_token_list (void); 473extern void tex_quit_token_list (void); 474extern void tex_cleanup_input_state (void); 475extern void tex_back_input (halfword t); 476extern void tex_reinsert_token (halfword t); 477extern void tex_insert_input (halfword h); 478extern void tex_append_input (halfword h); 479extern void tex_begin_file_reading (void); 480extern void tex_end_file_reading (void); 481extern void tex_initialize_inputstack (void); 482extern void tex_lua_string_start (void); 483extern void tex_tex_string_start (int iotype, int cattable); 484extern void tex_any_string_start (char *s); 485extern halfword tex_wrapped_token_list (halfword h); 486extern const char *tex_current_input_file_name (void); 487 488# endif 489 |