textoken.h /size: 29 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5# ifndef LMT_TEXTOKEN_H
6# define LMT_TEXTOKEN_H
7
8# include "luametatex.h"
9
10/*tex
11
12    These are constants that can be added to a chr value and then give a token with the right cmd
13    and chr combination, whichs is then equivalent to |token_val (cmd, chr)|. The cmd results from
14    shifting right 21 bits. The following tokens therefore should match the order of the (first
15    bunch) of cmd codes!
16
17    \TEX\ stores the specific match character which defaults to |#|. When tokens get serialized the
18    machinery starts with |match_chr = '#'| but overloads that by the last stored variant. So the
19    last (!) seen |match_chr| in the macro preamble determines what gets used in showing the body.
20    One could argue that this is a buglet but I more see it as a side effect. In practice there is
21    never a mix of such characters used. Anyway, one could as well use the first seen in the
22    preamble and use that for the rest because consistency is better than confusion. Even better is
23    to just always use |#| and store the numbers in preamble match tokens, which opens up
24    possibilities (for strict or tolerant matching, skipping spaces, optional delimiters and even
25    more arguments).
26
27*/
28
29//define cs_token_flag            0x1FFFFFFF
30
31# define node_token_max           0x0FFFFF
32# define node_token_flag          0x100000
33# define node_token_lsb(sum)      (sum & 0x0000FFFF)
34# define node_token_msb(sum)      (((sum & 0xFFFF0000) >> 16) + node_token_flag)
35# define node_token_sum(msb,lsb)  (((msb & 0x0000FFFF) << 16) + lsb)
36# define node_token_overflow(sum) (sum > node_token_max)
37# define node_token_flagged(sum)  (sum > node_token_flag)
38
39/*tex
40    Instead of |fixmem| we use |tokens| because it is dynamic anyway and we then better match variables
41    that deal with managing that. Most was already hidden in a few files anyway.
42*/
43
44typedef struct token_memory_state_info {
45    memoryword  *tokens;      /*tex |memoryword *volatile fixmem;| */
46    memory_data  tokens_data;
47    halfword     available;
48    int          padding;
49} token_memory_state_info;
50
51extern token_memory_state_info lmt_token_memory_state;
52
53typedef enum read_states {
54    reading_normal,      /*tex we're going ahead */
55    reading_just_opened, /*tex newly opened, first line not yet read */
56    reading_closed,      /*tex not open, or at end of file */
57} read_states;
58
59typedef enum lua_input_types {
60    unset_lua_input,
61    string_lua_input,
62    packed_lua_input,
63    token_lua_input,
64    token_list_lua_input,
65    node_lua_input,
66} lua_input_types;
67
68typedef enum tex_input_types {
69    eof_tex_input,
70    string_tex_input,
71    token_tex_input,
72    token_list_tex_input,
73    node_tex_input,
74} tex_input_types;
75
76typedef enum catcode_table_presets {
77    default_catcode_table_preset = -1,
78    no_catcode_table_preset      = -2,
79} catcode_table_presets;
80
81/*tex
82*
83    There are a few temporary head pointers, one is |temp_token_head|. This one we keep because
84    when we expand, we can run into situations where we need that pointer. But, |backup_head| is
85    a real temporary one: we can replace that with local variables. Okay, it is kind of kept in
86    the format file but if it ends up there we're in some kind of troubles anyway. So,
87    |backup_head| is now local and |temp_token_head| only global when we are scanning; in cases
88    where we serialize tokens lists it has been replaced by local variables (and the related
89    functions now keep track of head and tail). This makes sense because in \LUAMETATEX\ we often
90    go between \TEX\ and \LUA\ and this keeps it kind of simple. This also makes clear when we
91    are scanning (the global head is used) and doing something simple with a list. The same is
92    true for |match_token_head| thatmoved to the expand state. The |backup_head| variable is gone
93    because we now use locals.
94
95*/
96
97typedef struct token_state_info {
98    halfword  null_list;     /*tex permanently empty list */
99    int       force_eof;
100    int       luacstrings;
101    /*tex These are pseudo constants, their value depends on the number of primitives etc. */
102    halfword  par_loc;
103    halfword  par_token;
104 /* halfword  line_par_loc;   */ /*tex See note in textoken.c|. */
105 /* halfword  line_par_token; */ /*tex See note in textoken.c|. */
106    char     *buffer;
107    int       bufloc;
108    int       bufmax;
109    int       empty;
110    int       padding;
111} token_state_info;
112
113extern token_state_info lmt_token_state;
114
115/*tex
116
117    We now can have 15 paremeters but if needed we can go higher. However, we then also need to 
118    cache more and change the |preamble| and |count| to some funny bit ranges. If needed we can 
119    bump the reference count maximum but quite likely one already has run out of something else
120    already.   
121
122    \starttyping
123    preamble  = 0xF0000000 : 1 when we have one, including trailing #
124    count     = 0x0F000000
125    reference = 0x00FFFFFF
126    \stoptyping
127
128*/
129
130typedef enum macro_preamble_states { 
131    macro_without_preamble = 0x0, 
132    macro_with_preamble    = 0x1, 
133    macro_is_packed        = 0x2, /* not yet, maybe some day array instead of list */
134} macro_preamble_states;
135
136# define max_match_count 15
137# define gap_match_count  7
138
139# define max_token_reference 0x00FFFFFF
140
141# define get_token_preamble(a)   ((lmt_token_memory_state.tokens[a].hulf1 >> 28) & 0xF)
142# define get_token_parameters(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 24) & 0xF)
143# define get_token_reference(a)  ((lmt_token_memory_state.tokens[a].hulf1      ) & max_token_reference)
144
145# define set_token_preamble(a,b)   lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28)  /* normally the variable is still zero here */
146# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 24)  /* normally the variable is still zero here */
147
148# define set_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)
149# define add_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 += 1            /* we are way off the parameter count */
150# define sub_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 -= 1            /* we are way off the parameter count */
151# define inc_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)          /* we are way off the parameter count */
152# define dec_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 -= (b)          /* we are way off the parameter count */
153
154/* */
155
156# define token_info(a)       lmt_token_memory_state.tokens[a].half1
157# define token_link(a)       lmt_token_memory_state.tokens[a].half0
158# define get_token_info(a)   lmt_token_memory_state.tokens[a].half1
159# define get_token_link(a)   lmt_token_memory_state.tokens[a].half0
160# define set_token_info(a,b) lmt_token_memory_state.tokens[a].half1 = (b)
161# define set_token_link(a,b) lmt_token_memory_state.tokens[a].half0 = (b)
162
163# define token_cmd(A)    ((A) >> cs_offset_bits)
164# define token_chr(A)    ((A) &  cs_offset_max)
165# define token_val(A,B) (((A) << cs_offset_bits) + (B))
166
167/*tex
168    Sometimes we add a value directly. Instead we could use |token_val| on the spot but then we
169    also need different range checkers. We use numbers because we don't have the cmd codes defined
170    yet when we're here. so we can't use for instance |token_val (spacer_cmd, 20)| yet.
171*/
172
173# define left_brace_token        token_val( 1, 0) /* token_val(left_brace_cmd,    0) */
174# define right_brace_token       token_val( 2, 0) /* token_val(right_brace_cmd,   0) */
175# define math_shift_token        token_val( 3, 0) /* token_val(math_shift_cmd,    0) */
176# define alignment_token         token_val( 4, 0) /* token_val(alignment_tab_cmd, 0) */
177# define endline_token           token_val( 5, 0) /* token_val(end_line_cmd,      0) */
178# define parameter_token         token_val( 6, 0) /* token_val(parameter_cmd,     0) */
179# define superscript_token       token_val( 7, 0) /* token_val(superscript_cmd,   0) */
180# define subscript_token         token_val( 8, 0) /* token_val(subscript_cmd,     0) */
181# define ignore_token            token_val( 9, 0) /* token_val(ignore_cmd,        0) */
182# define space_token             token_val(10,32) /* token_val(spacer_cmd,       32) */
183# define letter_token            token_val(11, 0) /* token_val(letter_cmd         0) */
184# define other_token             token_val(12, 0) /* token_val(other_char_cmd,    0) */
185# define active_token            token_val(13, 0) /* token_val(active_char_cmd,   0) */
186                                                                                                    
187# define match_token             token_val(19, 0) /* token_val(match_cmd,         0) */
188# define end_match_token         token_val(20, 0) /* token_val(end_match_cmd,     0) */
189
190/*tex 
191    Testing for |left_brace_limit| and |right_brace_limit| is convenient because then we don't
192    need to check |cur_cmd| as well as |cur_cs| when we check for balanced |{}|. However, as
193    soon as we need to check |cur_cmd| anyway it becomes nicer to check for |cur_cs| afterwards. 
194    Using a |switch| is then a bit more efficient too. 
195*/
196
197# define left_brace_limit  right_brace_token      
198# define right_brace_limit math_shift_token       
199
200# define octal_token             (other_token  + '\'') /*tex apostrophe, indicates an octal constant */
201# define hex_token               (other_token  + '"')  /*tex double quote, indicates a hex constant */
202# define alpha_token             (other_token  + '`')  /*tex reverse apostrophe, precedes alpha constants */
203# define point_token             (other_token  + '.')  /*tex decimal point */
204# define continental_point_token (other_token  + ',')  /*tex decimal point, Eurostyle */
205# define period_token            (other_token  + '.')  /*tex decimal point */
206# define comma_token             (other_token  + ',')  /*tex decimal comma */
207# define plus_token              (other_token  + '+')
208# define minus_token             (other_token  + '-')
209# define slash_token             (other_token  + '/')
210# define asterisk_token          (other_token  + '*')
211# define colon_token             (other_token  + ':')
212# define semi_colon_token        (other_token  + ';')
213# define equal_token             (other_token  + '=')
214# define less_token              (other_token  + '<')
215# define more_token              (other_token  + '>')
216# define exclamation_token_o     (other_token  + '!')
217# define exclamation_token_l     (letter_token + '!')
218# define underscore_token        (other_token  + '_')
219# define underscore_token_o      (other_token  + '_')
220# define underscore_token_l      (letter_token + '_')
221# define underscore_token_s      (subscript_token + '_')
222# define circumflex_token        (other_token  + '^')
223# define circumflex_token_o      (other_token  + '^')
224# define circumflex_token_l      (letter_token + '^')
225# define circumflex_token_s      (superscript_token + '^')
226# define bar_token               (other_token  + '|')
227# define bar_token_o             (other_token  + '|')
228# define bar_token_l             (letter_token + '|')
229# define escape_token            (other_token  + '\\')
230# define left_parent_token       (other_token  + '(')
231# define right_parent_token      (other_token  + ')')
232# define left_bracket_token      (other_token  + '[')
233# define right_bracket_token     (other_token  + ']')
234# define left_angle_token        (other_token  + '<')
235# define right_angle_token       (other_token  + '>')
236# define one_token               (other_token  + '1') 
237# define two_token               (other_token  + '2') 
238# define three_token             (other_token  + '3') 
239# define four_token              (other_token  + '4') 
240# define five_token              (other_token  + '5')
241# define six_token               (other_token  + '6')
242# define seven_token             (other_token  + '7')
243# define eight_token             (other_token  + '8')
244# define nine_token              (other_token  + '9')  /*tex zero, the smallest digit */
245# define zero_token              (other_token  + '0')  /*tex zero, the smallest digit */
246# define hash_token              (other_token  + '#')
247# define dollar_token            (other_token  + '$')
248# define percentage_token        (other_token  + '%')
249# define ampersand_token         (other_token  + '&')
250# define ampersand_token_l       (letter_token + '&')
251# define ampersand_token_o       (other_token  + '&')
252# define ampersand_token_t       (alignment_token + '&')
253# define tilde_token             (other_token  + '~')
254# define tilde_token_l           (letter_token + '~')
255# define tilde_token_o           (other_token  + '~')
256# define at_sign_token_l         (letter_token + '@')
257# define at_sign_token_o         (other_token  + '@')
258# define dollar_token_l          (letter_token + '$')
259# define dollar_token_o          (other_token  + '$')
260# define dollar_token_m          (math_shift_token + '$')
261
262# define element_token           (other_token + 0x2208) // ∈
263# define not_element_token       (other_token + 0x2209) // ∉
264# define not_equal_token         (other_token + 0x2260) // ≠
265# define less_or_equal_token     (other_token + 0x2264) // ≤
266# define more_or_equal_token     (other_token + 0x2265) // ≥ 
267# define not_less_or_equal_token (other_token + 0x2270) // ≰
268# define not_more_or_equal_token (other_token + 0x2271) // ≱ 
269# define plus_minus_token        (other_token + 0x00B1) // ± plus minus  
270# define minus_plus_token        (other_token + 0x2213) // ∓ minus plus 
271
272# define logical_nor_token       (other_token + 0x22BD) // ⊽
273# define logical_nand_token      (other_token + 0x22BC) // ⊼
274# define logical_xnor_token      (other_token + 0x2299) // ⊙
275
276# define conditional_and_token   (other_token + 0x2227) // ∧
277# define conditional_or_token    (other_token + 0x2228) // ∨
278
279# define a_token_l               (letter_token + 'a')  /*tex the smallest special hex digit */
280# define a_token_o               (other_token  + 'a')
281
282# define b_token_l               (letter_token + 'b')  /*tex the smallest special hex digit */
283# define b_token_o               (other_token  + 'b')
284
285# define c_token_l               (letter_token + 'c')
286# define c_token_o               (other_token  + 'c')
287
288# define d_token_l               (letter_token + 'd')
289# define d_token_o               (other_token  + 'd')
290
291# define e_token_l               (letter_token + 'e')
292# define e_token_o               (other_token  + 'e')
293
294# define f_token_l               (letter_token + 'f')  /*tex the largest special hex digit */
295# define f_token_o               (other_token  + 'f')
296
297# define g_token_l               (letter_token + 'g')
298# define g_token_o               (other_token  + 'g')
299
300# define h_token_l               (letter_token + 'h')
301# define h_token_o               (other_token  + 'h')
302
303# define i_token_l               (letter_token + 'i')
304# define i_token_o               (other_token  + 'i')
305
306# define j_token_l               (letter_token + 'j')
307# define j_token_o               (other_token  + 'j')
308
309# define k_token_l               (letter_token + 'k')
310# define k_token_o               (other_token  + 'k')
311
312# define l_token_l               (letter_token + 'l')
313# define l_token_o               (other_token  + 'l')
314
315# define m_token_l               (letter_token + 'm')
316# define m_token_o               (other_token  + 'm')
317
318# define n_token_l               (letter_token + 'n')
319# define n_token_o               (other_token  + 'n')
320
321# define o_token_l               (letter_token + 'o')
322# define o_token_o               (other_token  + 'o')
323
324# define p_token_l               (letter_token + 'p')
325# define p_token_o               (other_token  + 'p')
326
327# define q_token_l               (letter_token + 'q')
328# define q_token_o               (other_token  + 'q')
329
330# define r_token_l               (letter_token + 'r')
331# define r_token_o               (other_token  + 'r')
332
333# define s_token_l               (letter_token + 's')
334# define s_token_o               (other_token  + 's')
335
336# define t_token_l               (letter_token + 't')
337# define t_token_o               (other_token  + 't')
338
339# define u_token_l               (letter_token + 'u')
340# define u_token_o               (other_token  + 'u')
341
342# define v_token_l               (letter_token + 'v')
343# define v_token_o               (other_token  + 'v')
344
345# define w_token_l               (letter_token + 'w')
346# define w_token_o               (other_token  + 'w')
347
348# define x_token_l               (letter_token + 'x')
349# define x_token_o               (other_token  + 'x')
350
351# define y_token_l               (letter_token + 'y')
352# define y_token_o               (other_token  + 'y')
353
354# define z_token_l               (letter_token + 'z')
355# define z_token_o               (other_token  + 'z')
356
357# define A_token_l               (letter_token + 'A')  /*tex the smallest special hex digit */
358# define A_token_o               (other_token  + 'A')
359
360# define B_token_l               (letter_token + 'B')
361# define B_token_o               (other_token  + 'B')
362
363# define C_token_l               (letter_token + 'C')
364# define C_token_o               (other_token  + 'C')
365
366# define D_token_l               (letter_token + 'D')
367# define D_token_o               (other_token  + 'D')
368
369# define E_token_l               (letter_token + 'E')
370# define E_token_o               (other_token  + 'E')
371
372# define F_token_l               (letter_token + 'F')  /*tex the largest special hex digit */
373# define F_token_o               (other_token  + 'F')
374
375# define G_token_l               (letter_token + 'G') 
376# define G_token_o               (other_token  + 'G')
377
378# define H_token_l               (letter_token + 'H') 
379# define H_token_o               (other_token  + 'H')
380
381# define I_token_l               (letter_token + 'I') 
382# define I_token_o               (other_token  + 'I')
383
384# define J_token_l               (letter_token + 'J') 
385# define J_token_o               (other_token  + 'J')
386
387# define K_token_l               (letter_token + 'K') 
388# define K_token_o               (other_token  + 'K')
389
390# define L_token_l               (letter_token + 'L') 
391# define L_token_o               (other_token  + 'L')
392
393# define M_token_l               (letter_token + 'M') 
394# define M_token_o               (other_token  + 'M')
395
396# define N_token_l               (letter_token + 'N') 
397# define N_token_o               (other_token  + 'N')
398
399# define O_token_l               (letter_token + 'O')
400# define O_token_o               (other_token  + 'O')
401
402# define P_token_l               (letter_token + 'P')
403# define P_token_o               (other_token  + 'P')
404
405# define Q_token_l               (letter_token + 'Q')
406# define Q_token_o               (other_token  + 'Q')
407
408# define R_token_l               (letter_token + 'R') 
409# define R_token_o               (other_token  + 'R')
410
411# define S_token_l               (letter_token + 'S') 
412# define S_token_o               (other_token  + 'S')
413
414# define T_token_l               (letter_token + 'T') 
415# define T_token_o               (other_token  + 'T')
416
417# define U_token_l               (letter_token + 'U') 
418# define U_token_o               (other_token  + 'U')
419
420# define V_token_l               (letter_token + 'V') 
421# define V_token_o               (other_token  + 'V')
422
423# define W_token_l               (letter_token + 'W') 
424# define W_token_o               (other_token  + 'W')
425
426# define X_token_l               (letter_token + 'X')
427# define X_token_o               (other_token  + 'X')
428
429# define Y_token_l               (letter_token + 'Y')
430# define Y_token_o               (other_token  + 'Y')
431
432# define Z_token_l               (letter_token + 'Z')
433# define Z_token_o               (other_token  + 'Z')
434
435# define at_token_l              (letter_token + '@')
436# define at_token_o              (other_token  + '@')
437
438# define hash_token_o            (other_token  + '#')
439# define space_token_o           (other_token  + ' ')
440# define tab_token_o             (other_token  + '\t')
441# define newline_token_o         (other_token  + '\n')
442# define return_token_o          (other_token  + '\r')
443# define backslash_token_o       (other_token  + '\\')
444# define double_quote_token_o    (other_token  + '\"')
445# define single_quote_token_o    (other_token  + '\'')
446
447//define nbsp_token_o            (other_token  + 0x202F)
448//define zws_token_o             (other_token  + 0x200B)
449
450# define match_visualizer    '#'
451# define match_spacer        '*'  /* ignore spaces */
452# define match_bracekeeper   '+'  /* keep (honor) the braces */
453# define match_thrasher      '-'  /* discard (wipe) and don't count the argument */
454# define match_par_spacer    '.'  /* ignore pars and spaces */
455# define match_keep_spacer   ','  /* push back space when no match */
456# define match_pruner        '/'  /* remove leading and trailing spaces and pars */
457# define match_continuator   ':'  /* pick up scanning here */
458# define match_quitter       ';'  /* quit scanning */
459# define match_mandate       '='  /* braces are mandate */
460# define match_spacekeeper   '^'  /* keep leading spaces */
461# define match_mandate_keep  '_'  /* braces are mandate and kept (obey) */
462# define match_par_command   '@'  /* par delimiter, only internal */
463# define match_left          'L'  
464# define match_right         'R'  
465# define match_gobble        'G'  
466# define match_gobble_more   'M'  
467# define match_brackets      'S'  /* square brackets */ 
468# define match_angles        'X'  /* angle brackets */ 
469# define match_parentheses   'P'  /* parentheses */ 
470
471# define match_experiment 0
472
473# if (match_experiment)
474# define match_dimension     'd'  /* dimension */ 
475# define match_integer       'i'  /* integer */ 
476# endif 
477
478# define single_quote        '\''
479# define double_quote        '\"'
480
481# define spacer_match_token        (match_token + match_spacer)
482# define keep_match_token          (match_token + match_bracekeeper)
483# define thrash_match_token        (match_token + match_thrasher)
484# define par_spacer_match_token    (match_token + match_par_spacer)
485# define keep_spacer_match_token   (match_token + match_keep_spacer)
486# define prune_match_token         (match_token + match_pruner)
487# define continue_match_token      (match_token + match_continuator)
488# define quit_match_token          (match_token + match_quitter)
489# define mandate_match_token       (match_token + match_mandate)
490# define leading_match_token       (match_token + match_spacekeeper)
491# define mandate_keep_match_token  (match_token + match_mandate_keep)
492# define par_command_match_token   (match_token + match_par_command)
493# define left_match_token          (match_token + match_left)
494# define right_match_token         (match_token + match_right)
495# define gobble_match_token        (match_token + match_gobble)
496# define gobble_more_match_token   (match_token + match_gobble_more)
497# define brackets_match_token      (match_token + match_brackets)
498# define angles_match_token        (match_token + match_angles)
499# define parentheses_match_token   (match_token + match_parentheses)
500
501# if (match_experiment)
502# define dimension_match_token     (match_token + match_dimension)
503# define integer_match_token       (match_token + match_integer)
504# endif 
505
506# define is_valid_match_ref(r) (r != thrash_match_token && r != spacer_match_token && r != keep_spacer_match_token && r != continue_match_token && r != quit_match_token)
507
508/*tex
509    Managing the head of the list of available one-word nodes. The |get_avail| function has been
510    given a more verbose name. It gets from the pool and should not be confused with |get_token|
511    which reads from the input or token list. The |free_avail| function got renamed to
512    |put_available_token| so we have some symmetry here.
513*/
514
515extern void     tex_compact_tokens            (void);
516extern void     tex_initialize_tokens         (void);
517extern void     tex_initialize_token_mem      (void);
518extern halfword tex_get_available_token       (halfword t);
519extern void     tex_put_available_token       (halfword p);
520extern halfword tex_store_new_token           (halfword p, halfword t);
521extern void     tex_delete_token_reference    (halfword p);
522extern void     tex_add_token_reference       (halfword p);
523extern void     tex_increment_token_reference (halfword p, int n);
524
525# define get_reference_token() tex_get_available_token(null)
526
527/*tex
528
529    The |no_expand_flag| is a special character value that is inserted by |get_next| if it wants to
530    suppress expansion.
531
532*/
533
534# define no_expand_flag special_char /* no_expand_relax_code */
535
536/*tex  A few special values; these are no longer used as we always go for maxima. */
537
538# define default_token_show_min 32
539# define default_token_show_max 2500       
540# define extreme_token_show_max 0x3FFFFFFF 
541
542/*tex  All kind of helpers: */
543
544extern void       tex_dump_token_mem              (dumpstream f);
545extern void       tex_undump_token_mem            (dumpstream f);
546extern int        tex_used_token_count            (void);
547extern void       tex_print_meaning               (halfword code);
548extern void       tex_flush_token_list            (halfword p);
549extern void       tex_flush_token_list_head_tail  (halfword h, halfword t, int n);
550extern void       tex_show_token_list_context     (halfword p, halfword q);
551extern void       tex_show_token_list             (halfword p, int asis, int single);
552extern void       tex_token_show                  (halfword p);
553/*     void       tex_add_token_ref               (halfword p); */
554/*     void       tex_delete_token_ref            (halfword p); */
555extern void       tex_get_next                    (void);
556extern void       tex_get_next_non_spacer         (void);
557extern halfword   tex_scan_character              (const char *s, int left_brace, int skip_space, int skip_relax);
558extern int        tex_scan_optional_keyword       (const char *s);
559extern int        tex_scan_mandate_keyword        (const char *s, int offset);
560extern void       tex_aux_show_keyword_error      (const char *s);
561extern int        tex_scan_keyword                (const char *s);
562extern int        tex_scan_partial_keyword        (const char *s);
563extern int        tex_scan_keyword_case_sensitive (const char *s);
564extern halfword   tex_active_to_cs                (int c, int force);
565/*     halfword   tex_string_to_toks              (const char *s); */
566extern int        tex_get_char_cat_code           (int c);
567extern halfword   tex_get_token                   (void);
568extern void       tex_get_x_or_protected          (void);
569extern halfword   tex_str_toks                    (lstring s, halfword *tail); /* returns head */
570extern halfword   tex_cur_str_toks                (halfword *tail);            /* returns head */
571extern halfword   tex_str_scan_toks               (int c, lstring b);          /* returns head */
572extern void       tex_run_combine_the_toks        (void);
573extern void       tex_run_convert_tokens          (halfword code);
574extern strnumber  tex_the_convert_string          (halfword c, int i);
575extern strnumber  tex_tokens_to_string            (halfword p);
576extern char      *tex_tokenlist_to_tstring        (int p, int inhibit_par, int *siz, int skip, int nospace, int strip, int wipe, int single);
577
578extern halfword   tex_get_tex_dimension_register  (int j, int internal);
579extern halfword   tex_get_tex_skip_register       (int j, int internal);
580extern halfword   tex_get_tex_muskip_register     (int j, int internal);
581extern halfword   tex_get_tex_count_register      (int j, int internal);
582extern halfword   tex_get_tex_posit_register      (int j, int internal);
583extern halfword   tex_get_tex_attribute_register  (int j, int internal);
584extern halfword   tex_get_tex_box_register        (int j, int internal);
585extern halfword   tex_get_tex_toks_register       (int j, int internal);
586
587extern void       tex_set_tex_dimension_register  (int j, halfword v, int flags, int internal);
588extern void       tex_set_tex_skip_register       (int j, halfword v, int flags, int internal);
589extern void       tex_set_tex_muskip_register     (int j, halfword v, int flags, int internal);
590extern void       tex_set_tex_count_register      (int j, halfword v, int flags, int internal);
591extern void       tex_set_tex_posit_register      (int j, halfword v, int flags, int internal);
592extern void       tex_set_tex_attribute_register  (int j, halfword v, int flags, int internal);
593extern void       tex_set_tex_box_register        (int j, halfword v, int flags, int internal);
594
595extern void       tex_set_tex_toks_register       (int j,        lstring s, int flags, int internal);
596extern void       tex_scan_tex_toks_register      (int j, int c, lstring s, int flags, int internal);
597
598extern halfword   tex_copy_token_list             (halfword h, halfword *t);
599
600extern halfword   tex_parse_str_to_tok            (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option);
601
602extern halfword   tex_get_at_end_of_file          (void);
603extern void       tex_set_at_end_of_file          (halfword h);
604
605static inline int      tex_valid_token            (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); }
606static inline halfword tex_tail_of_token_list     (halfword t) { while (token_link(t)) { t = token_link(t); } return t; }
607
608/*tex 
609
610    This is also a sort of documentation. Active characters are stored in the hash using a prefix 
611    which assumes that users don't use that one. So far we've seen no clashes which is due to the 
612    fact that the namespace prefix U+FFFF is an invalid \UNICODE\ character and it's kind of hard 
613    to get that one into the input anyway. 
614
615    The replacement character U+FFFD is a kind of fallback when we run into some troubles or when 
616    a control sequence is expected (and undefined is unacceptable). 
617
618    U+FFFD  REPLACEMENT CHARACTER 
619    U+FFFE  NOT A CHARACTER
620    U+FFFF  NOT A CHARACTER 
621
622    I experimented with a namespace character (catcodtable id) as fourth character but there are 
623    some unwanted side effects, for instance in testing an active character as separator (in 
624    arguments) so that code waa eventually removed. I might come back to this one day (active 
625    characters in the catcode regime namespace).
626
627*/
628
629# define utf_fffd_string            "\xEF\xBF\xBD" /* U+FFFD : 65533 */
630
631# define active_character_namespace "\xEF\xBF\xBF" /* U+FFFF : 65535 */
632
633# define active_character_first     '\xEF'        
634# define active_character_second    '\xBF'
635# define active_character_third     '\xBF'
636
637# define active_first               0xEF        
638# define active_second              0xBF
639# define active_third               0xBF
640
641# define active_character_unknown   "\xEF\xBF\xBD" /* utf_fffd_string */
642
643# define active_cs_value(A) aux_str2uni(str_string(A)+3)
644
645# endif
646