textoken.h /size: 25 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# ifndef LMT_TEXTOKEN_H
6# define LMT_TEXTOKEN_H
7
8# include "luametatex.h"
9
10/*tex
11
12    These are constants that can be added to a chr value and then give a token with the right cmd
13    and chr combination, whichs is then equivalent to |token_val (cmd, chr)|. The cmd results from
14    shifting right 21 bits. The following tokens therefore should match the order of the (first
15    bunch) of cmd codes!
16
17    \TEX\ stores the specific match character which defaults to |#|. When tokens get serialized the
18    machinery starts with |match_chr = '#'| but overloads that by the last stored variant. So the
19    last (!) seen |match_chr| in the macro preamble determines what gets used in showing the body.
20    One could argue that this is a buglet but I more see it as a side effect. In practice there is
21    never a mix of such characters used. Anyway, one could as well use the first seen in the
22    preamble and use that for the rest because consistency is better than confusion. Even better is
23    to just always use |#| and store the numbers in preamble match tokens, which opens up
24    possibilities (for strict or tolerant matching, skipping spaces, optional delimiters and even
25    more arguments).
26
27*/
28
29//define cs_token_flag            0x1FFFFFFF
30
31# define node_token_max           0x0FFFFF
32# define node_token_flag          0x100000
33# define node_token_lsb(sum)      (sum & 0x0000FFFF)
34# define node_token_msb(sum)      (((sum & 0xFFFF0000) >> 16) + node_token_flag)
35# define node_token_sum(msb,lsb)  (((msb & 0x0000FFFF) << 16) + lsb)
36# define node_token_overflow(sum) (sum > node_token_max)
37# define node_token_flagged(sum)  (sum > node_token_flag)
38
39/*tex
40    Instead of |fixmem| we use |tokens| because it is dynamic anyway and we then better match variables
41    that deal with managing that. Most was already hidden in a few files anyway.
42*/
43
44typedef struct token_memory_state_info {
45    memoryword  *tokens;      /*tex |memoryword *volatile fixmem;| */
46    memory_data  tokens_data;
47    halfword     available;
48    int          padding;
49} token_memory_state_info;
50
51extern token_memory_state_info lmt_token_memory_state;
52
53typedef enum read_states {
54    reading_normal,      /*tex we're going ahead */
55    reading_just_opened, /*tex newly opened, first line not yet read */
56    reading_closed,      /*tex not open, or at end of file */
57} read_states;
58
59typedef enum lua_input_types {
60    unset_lua_input,
61    string_lua_input,
62    packed_lua_input,
63    token_lua_input,
64    token_list_lua_input,
65    node_lua_input,
66} lua_input_types;
67
68typedef enum tex_input_types {
69    eof_tex_input,
70    string_tex_input,
71    token_tex_input,
72    token_list_tex_input,
73    node_tex_input,
74} tex_input_types;
75
76typedef enum catcode_table_presets {
77    default_catcode_table_preset = -1,
78    no_catcode_table_preset      = -2,
79} catcode_table_presets;
80
81/*tex
82*
83    There are a few temporary head pointers, one is |temp_token_head|. This one we keep because
84    when we expand, we can run into situations where we need that pointer. But, |backup_head| is
85    a real temporary one: we can replace that with local variables. Okay, it is kind of kept in
86    the format file but if it ends up there we're in some kind of troubles anyway. So,
87    |backup_head| is now local and |temp_token_head| only global when we are scanning; in cases
88    where we serialize tokens lists it has been replaced by local variables (and the related
89    functions now keep track of head and tail). This makes sense because in \LUAMETATEX\ we often
90    go between \TEX\ and \LUA\ and this keeps it kind of simple. This also makes clear when we
91    are scanning (the global head is used) and doing something simple with a list. The same is
92    true for |match_token_head| thatmoved to the expand state. The |backup_head| variable is gone
93    because we now use locals.
94
95*/
96
97typedef struct token_state_info {
98    halfword  null_list;     /*tex permanently empty list */
99    int       in_lua_escape; /*tex obsolete, controlled differently */
100    int       force_eof;
101    int       luacstrings;
102    /*tex These are pseudo constants, their value depends on the number of primitives etc. */
103    halfword  par_loc;
104    halfword  par_token;
105 /* halfword  line_par_loc;   */ /*tex See note in textoken.c|. */
106 /* halfword  line_par_token; */ /*tex See note in textoken.c|. */
107    /* */
108    char     *buffer;
109    int       bufloc;
110    int       bufmax;
111    int       empty;
112} token_state_info;
113
114extern token_state_info lmt_token_state;
115
116/*tex
117
118    We now can have 15 paremeters but if needed we can go higher. However, we then also need to 
119    cache more and change the |preamble| and |count| to some funny bit ranges. If needed we can 
120    bump the reference count maximum but quite likely one already has run out of something else
121    already.   
122
123    \starttyping
124    preamble  = 0xF0000000 : 1 when we have one, including trailing #
125    count     = 0x0F000000
126    reference = 0x00FFFFFF
127    \stoptyping
128
129*/
130
131typedef enum macro_preamble_states { 
132    macro_without_preamble = 0x0, 
133    macro_with_preamble    = 0x1, 
134    macro_is_packed        = 0x2, /* not yet, maybe some day array instead of list */
135} macro_preamble_states;
136
137# define max_match_count 15
138# define gap_match_count  7
139
140# define max_token_reference 0x00FFFFFF
141
142# define get_token_preamble(a)   ((lmt_token_memory_state.tokens[a].hulf1 >> 28) & 0xF)
143# define get_token_parameters(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 24) & 0xF)
144# define get_token_reference(a)  ((lmt_token_memory_state.tokens[a].hulf1      ) & max_token_reference)
145
146# define set_token_preamble(a,b)   lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28)  /* normally the variable is still zero here */
147# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 24)  /* normally the variable is still zero here */
148
149# define set_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)
150# define add_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 += 1            /* we are way off the parameter count */
151# define sub_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 -= 1            /* we are way off the parameter count */
152# define inc_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)          /* we are way off the parameter count */
153# define dec_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 -= (b)          /* we are way off the parameter count */
154
155/* */
156
157# define token_info(a)       lmt_token_memory_state.tokens[a].half1
158# define token_link(a)       lmt_token_memory_state.tokens[a].half0
159# define get_token_info(a)   lmt_token_memory_state.tokens[a].half1
160# define get_token_link(a)   lmt_token_memory_state.tokens[a].half0
161# define set_token_info(a,b) lmt_token_memory_state.tokens[a].half1 = (b)
162# define set_token_link(a,b) lmt_token_memory_state.tokens[a].half0 = (b)
163
164# define token_cmd(A)    ((A) >> cs_offset_bits)
165# define token_chr(A)    ((A) &  cs_offset_max)
166# define token_val(A,B) (((A) << cs_offset_bits) + (B))
167
168/*tex
169    Sometimes we add a value directly. Instead we could use |token_val| on the spot but then we
170    also need different range checkers. We use numbers because we don't have the cmd codes defined
171    yet when we're here. so we can't use for instance |token_val (spacer_cmd, 20)| yet.
172*/
173
174# define left_brace_token        token_val( 1, 0) /* token_val(left_brace_cmd,    0) */
175# define right_brace_token       token_val( 2, 0) /* token_val(right_brace_cmd,   0) */
176# define math_shift_token        token_val( 3, 0) /* token_val(math_shift_cmd,    0) */
177# define alignment_token         token_val( 4, 0) /* token_val(alignment_tab_cmd, 0) */
178# define endline_token           token_val( 5, 0) /* token_val(end_line_cmd,      0) */
179# define parameter_token         token_val( 6, 0) /* token_val(parameter_cmd,     0) */
180# define superscript_token       token_val( 7, 0) /* token_val(superscript_cmd,   0) */
181# define subscript_token         token_val( 8, 0) /* token_val(subscript_cmd,     0) */
182# define ignore_token            token_val( 9, 0) /* token_val(ignore_cmd,        0) */
183# define space_token             token_val(10,32) /* token_val(spacer_cmd,       32) */
184# define letter_token            token_val(11, 0) /* token_val(letter_cmd         0) */
185# define other_token             token_val(12, 0) /* token_val(other_char_cmd,    0) */
186# define active_token            token_val(13, 0) /* token_val(active_char_cmd,   0) */
187                                                                                                    
188# define match_token             token_val(19, 0) /* token_val(match_cmd,         0) */
189# define end_match_token         token_val(20, 0) /* token_val(end_match_cmd,     0) */
190
191/*tex 
192    Testing for |left_brace_limit| and |right_brace_limit| is convenient because then we don't
193    need to check |cur_cmd| as well as |cur_cs| when we check for balanced |{}|. However, as
194    soon as we need to check |cur_cmd| anyway it becomes nicer to check for |cur_cs| afterwards. 
195    Using a |switch| is then a bit more efficient too. 
196*/
197
198# define left_brace_limit  right_brace_token      
199# define right_brace_limit math_shift_token       
200
201# define octal_token             (other_token  + '\'') /*tex apostrophe, indicates an octal constant */
202# define hex_token               (other_token  + '"')  /*tex double quote, indicates a hex constant */
203# define alpha_token             (other_token  + '`')  /*tex reverse apostrophe, precedes alpha constants */
204# define point_token             (other_token  + '.')  /*tex decimal point */
205# define continental_point_token (other_token  + ',')  /*tex decimal point, Eurostyle */
206# define period_token            (other_token  + '.')  /*tex decimal point */
207# define comma_token             (other_token  + ',')  /*tex decimal comma */
208# define plus_token              (other_token  + '+')
209# define minus_token             (other_token  + '-')
210# define slash_token             (other_token  + '/')
211# define asterisk_token          (other_token  + '*')
212# define colon_token             (other_token  + ':')
213# define semi_colon_token        (other_token  + ';')
214# define equal_token             (other_token  + '=')
215# define less_token              (other_token  + '<')
216# define more_token              (other_token  + '>')
217# define exclamation_token_o     (other_token  + '!')
218# define exclamation_token_l     (letter_token + '!') /* letter */
219# define underscore_token        (other_token  + '_')
220# define underscore_token_o      (other_token  + '_')
221# define underscore_token_l      (letter_token + '_') /* letter */
222# define circumflex_token        (other_token  + '^')
223# define circumflex_token_o      (other_token  + '^')
224# define circumflex_token_l      (letter_token + '^') /* letter */
225# define escape_token            (other_token  + '\\')
226# define left_parent_token       (other_token  + '(')
227# define right_parent_token      (other_token  + ')')
228# define left_bracket_token      (other_token  + '[')
229# define right_bracket_token     (other_token  + ']')
230# define left_angle_token        (other_token  + '<')
231# define right_angle_token       (other_token  + '>')
232# define zero_token              (other_token  + '0')  /*tex zero, the smallest digit */
233# define one_token               (other_token  + '1') 
234# define five_token              (other_token  + '5')
235# define seven_token             (other_token  + '7')
236# define nine_token              (other_token  + '9')  /*tex zero, the smallest digit */
237
238# define dollar_token_m          (math_shift_token + '$')
239
240# define a_token_l               (letter_token + 'a')  /*tex the smallest special hex digit */
241# define a_token_o               (other_token  + 'a')
242
243# define b_token_l               (letter_token + 'b')  /*tex the smallest special hex digit */
244# define b_token_o               (other_token  + 'b')
245
246# define d_token_l               (letter_token + 'd')
247# define d_token_o               (other_token  + 'd')
248
249# define e_token_l               (letter_token + 'e')
250# define e_token_o               (other_token  + 'e')
251
252# define f_token_l               (letter_token + 'f')  /*tex the largest special hex digit */
253# define f_token_o               (other_token  + 'f')
254
255# define i_token_l               (letter_token + 'i')
256# define i_token_o               (other_token  + 'i')
257
258# define l_token_l               (letter_token + 'l')
259# define l_token_o               (other_token  + 'l')
260
261# define m_token_l               (letter_token + 'm')
262# define m_token_o               (other_token  + 'm')
263
264# define n_token_l               (letter_token + 'n')
265# define n_token_o               (other_token  + 'n')
266
267# define o_token_l               (letter_token + 'o')
268# define o_token_o               (other_token  + 'o')
269
270# define p_token_l               (letter_token + 'p')
271# define p_token_o               (other_token  + 'p')
272
273# define r_token_l               (letter_token + 'r')
274# define r_token_o               (other_token  + 'r')
275
276# define s_token_l               (letter_token + 's')
277# define s_token_o               (other_token  + 's')
278
279# define t_token_l               (letter_token + 't')
280# define t_token_o               (other_token  + 't')
281
282# define u_token_l               (letter_token + 'u')
283# define u_token_o               (other_token  + 'u')
284
285# define x_token_l               (letter_token + 'x')
286# define x_token_o               (other_token  + 'x')
287
288# define A_token_l               (letter_token + 'A')  /*tex the smallest special hex digit */
289# define A_token_o               (other_token  + 'A')
290
291# define E_token_l               (letter_token + 'E')
292# define E_token_o               (other_token  + 'E')
293
294# define F_token_l               (letter_token + 'F')  /*tex the largest special hex digit */
295# define F_token_o               (other_token  + 'F')
296
297# define G_token_l               (letter_token + 'G') 
298# define G_token_o               (other_token  + 'G')
299
300# define H_token_l               (letter_token + 'H') 
301# define H_token_o               (other_token  + 'H')
302
303# define I_token_l               (letter_token + 'I') 
304# define I_token_o               (other_token  + 'I')
305
306# define K_token_l               (letter_token + 'K') 
307# define K_token_o               (other_token  + 'K')
308
309# define L_token_l               (letter_token + 'L') 
310# define L_token_o               (other_token  + 'L')
311
312# define M_token_l               (letter_token + 'M') 
313# define M_token_o               (other_token  + 'M')
314
315# define N_token_l               (letter_token + 'N') 
316# define N_token_o               (other_token  + 'N')
317
318# define P_token_l               (letter_token + 'P')
319# define P_token_o               (other_token  + 'P')
320
321# define Q_token_l               (letter_token + 'Q')
322# define Q_token_o               (other_token  + 'Q')
323
324# define R_token_l               (letter_token + 'R') 
325# define R_token_o               (other_token  + 'R')
326
327# define S_token_l               (letter_token + 'S') 
328# define S_token_o               (other_token  + 'S')
329
330# define T_token_l               (letter_token + 'T') 
331# define T_token_o               (other_token  + 'T')
332
333# define X_token_l               (letter_token + 'X')
334# define X_token_o               (other_token  + 'X')
335
336# define Z_token_l               (letter_token + 'Z')
337# define Z_token_o               (other_token  + 'Z')
338
339# define at_token_l              (letter_token + '@')
340# define at_token_o              (other_token  + '@')
341
342# define hash_token_o            (other_token  + '#')
343# define space_token_o           (other_token  + ' ')
344# define tab_token_o             (other_token  + '\t')
345# define newline_token_o         (other_token  + '\n')
346# define return_token_o          (other_token  + '\r')
347# define backslash_token_o       (other_token  + '\\')
348# define double_quote_token_o    (other_token  + '\"')
349# define single_quote_token_o    (other_token  + '\'')
350
351//define nbsp_token_o            (other_token  + 0x202F)
352//define zws_token_o             (other_token  + 0x200B)
353
354# define match_visualizer    '#'
355# define match_spacer        '*'  /* ignore spaces */
356# define match_bracekeeper   '+'  /* keep (honor) the braces */
357# define match_thrasher      '-'  /* discard (wipe) and don't count the argument */
358# define match_par_spacer    '.'  /* ignore pars and spaces */
359# define match_keep_spacer   ','  /* push back space when no match */
360# define match_pruner        '/'  /* remove leading and trailing spaces and pars */
361# define match_continuator   ':'  /* pick up scanning here */
362# define match_quitter       ';'  /* quit scanning */
363# define match_mandate       '='  /* braces are mandate */
364# define match_spacekeeper   '^'  /* keep leading spaces */
365# define match_mandate_keep  '_'  /* braces are mandate and kept (obey) */
366# define match_par_command   '@'  /* par delimiter, only internal */
367# define match_left          'L'  
368# define match_right         'R'  
369# define match_gobble        'G'  
370# define match_gobble_more   'M'  
371# define match_brackets      'S'  /* square brackets */ 
372# define match_angles        'X'  /* angle brackets */ 
373# define match_parentheses   'P'  /* parentheses */ 
374
375# define single_quote        '\''
376# define double_quote        '\"'
377
378# define spacer_match_token        (match_token + match_spacer)
379# define keep_match_token          (match_token + match_bracekeeper)
380# define thrash_match_token        (match_token + match_thrasher)
381# define par_spacer_match_token    (match_token + match_par_spacer)
382# define keep_spacer_match_token   (match_token + match_keep_spacer)
383# define prune_match_token         (match_token + match_pruner)
384# define continue_match_token      (match_token + match_continuator)
385# define quit_match_token          (match_token + match_quitter)
386# define mandate_match_token       (match_token + match_mandate)
387# define leading_match_token       (match_token + match_spacekeeper)
388# define mandate_keep_match_token  (match_token + match_mandate_keep)
389# define par_command_match_token   (match_token + match_par_command)
390# define left_match_token          (match_token + match_left)
391# define right_match_token         (match_token + match_right)
392# define gobble_match_token        (match_token + match_gobble)
393# define gobble_more_match_token   (match_token + match_gobble_more)
394# define brackets_match_token      (match_token + match_brackets)
395# define angles_match_token        (match_token + match_angles)
396# define parentheses_match_token   (match_token + match_parentheses)
397
398# define is_valid_match_ref(r) (r != thrash_match_token && r != spacer_match_token && r != keep_spacer_match_token && r != continue_match_token && r != quit_match_token)
399
400/*tex
401    Managing the head of the list of available one-word nodes. The |get_avail| function has been
402    given a more verbose name. It gets from the pool and should not be confused with |get_token|
403    which reads from the input or token list. The |free_avail| function got renamed to
404    |put_available_token| so we have some symmetry here.
405*/
406
407extern void     tex_compact_tokens            (void);
408extern void     tex_initialize_tokens         (void);
409extern void     tex_initialize_token_mem      (void);
410extern halfword tex_get_available_token       (halfword t);
411extern void     tex_put_available_token       (halfword p);
412extern halfword tex_store_new_token           (halfword p, halfword t);
413extern void     tex_delete_token_reference    (halfword p);
414extern void     tex_add_token_reference       (halfword p);
415extern void     tex_increment_token_reference (halfword p, int n);
416
417# define get_reference_token() tex_get_available_token(null)
418
419/*tex
420
421    The |no_expand_flag| is a special character value that is inserted by |get_next| if it wants to
422    suppress expansion.
423
424*/
425
426# define no_expand_flag special_char /* no_expand_relax_code */
427
428/*tex  A few special values; these are no longer used as we always go for maxima. */
429
430# define default_token_show_min 32
431# define default_token_show_max 2500       
432# define extreme_token_show_max 0x3FFFFFFF 
433
434/*tex  All kind of helpers: */
435
436extern void       tex_dump_token_mem              (dumpstream f);
437extern void       tex_undump_token_mem            (dumpstream f);
438extern int        tex_used_token_count            (void);
439extern void       tex_print_meaning               (halfword code);
440extern void       tex_flush_token_list            (halfword p);
441extern void       tex_flush_token_list_head_tail  (halfword h, halfword t, int n);
442extern void       tex_show_token_list_context     (halfword p, halfword q);
443extern void       tex_show_token_list             (halfword p, int asis, int single);
444extern void       tex_token_show                  (halfword p);
445/*     void       tex_add_token_ref               (halfword p); */
446/*     void       tex_delete_token_ref            (halfword p); */
447extern void       tex_get_next                    (void);
448extern void       tex_get_next_non_spacer         (void);
449extern halfword   tex_scan_character              (const char *s, int left_brace, int skip_space, int skip_relax);
450extern int        tex_scan_optional_keyword       (const char *s);
451extern int        tex_scan_mandate_keyword        (const char *s, int offset);
452extern void       tex_aux_show_keyword_error      (const char *s);
453extern int        tex_scan_keyword                (const char *s);
454extern int        tex_scan_keyword_case_sensitive (const char *s);
455extern halfword   tex_active_to_cs                (int c, int force);
456extern halfword   tex_string_to_toks              (const char *s);
457extern int        tex_get_char_cat_code           (int c);
458extern halfword   tex_get_token                   (void);
459extern void       tex_get_x_or_protected          (void);
460extern halfword   tex_str_toks                    (lstring s, halfword *tail); /* returns head */
461extern halfword   tex_cur_str_toks                (halfword *tail);            /* returns head */
462extern halfword   tex_str_scan_toks               (int c, lstring b);          /* returns head */
463extern void       tex_run_combine_the_toks        (void);
464extern void       tex_run_convert_tokens          (halfword code);
465extern strnumber  tex_the_convert_string          (halfword c, int i);
466extern strnumber  tex_tokens_to_string            (halfword p);
467extern char      *tex_tokenlist_to_tstring        (int p, int inhibit_par, int *siz, int skip, int nospace, int strip, int wipe, int single);
468
469extern halfword   tex_get_tex_dimension_register  (int j, int internal);
470extern halfword   tex_get_tex_skip_register       (int j, int internal);
471extern halfword   tex_get_tex_muskip_register     (int j, int internal);
472extern halfword   tex_get_tex_count_register      (int j, int internal);
473extern halfword   tex_get_tex_posit_register      (int j, int internal);
474extern halfword   tex_get_tex_attribute_register  (int j, int internal);
475extern halfword   tex_get_tex_box_register        (int j, int internal);
476extern halfword   tex_get_tex_toks_register       (int j, int internal);
477
478extern void       tex_set_tex_dimension_register  (int j, halfword v, int flags, int internal);
479extern void       tex_set_tex_skip_register       (int j, halfword v, int flags, int internal);
480extern void       tex_set_tex_muskip_register     (int j, halfword v, int flags, int internal);
481extern void       tex_set_tex_count_register      (int j, halfword v, int flags, int internal);
482extern void       tex_set_tex_posit_register      (int j, halfword v, int flags, int internal);
483extern void       tex_set_tex_attribute_register  (int j, halfword v, int flags, int internal);
484extern void       tex_set_tex_box_register        (int j, halfword v, int flags, int internal);
485
486extern void       tex_set_tex_toks_register       (int j,        lstring s, int flags, int internal);
487extern void       tex_scan_tex_toks_register      (int j, int c, lstring s, int flags, int internal);
488
489extern halfword   tex_copy_token_list             (halfword h, halfword *t);
490
491extern halfword   tex_parse_str_to_tok            (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option);
492
493static inline int tex_valid_token                 (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); }
494
495extern halfword   tex_get_at_end_of_file          (void);
496extern void       tex_set_at_end_of_file          (halfword h);
497
498inline halfword   tex_tail_of_token_list          (halfword t) { while (token_link(t)) { t = token_link(t); } return t; }
499
500/*tex 
501
502    This is also a sort of documentation. Active characters are stored in the hash using a prefix 
503    which assumes that users don't use that one. So far we've seen no clashes which is due to the 
504    fact that the namespace prefix U+FFFF is an invalid \UNICODE\ character and it's kind of hard 
505    to get that one into the input anyway. 
506
507    The replacement character U+FFFD is a kind of fallback when we run into some troubles or when 
508    a control sequence is expected (and undefined is unacceptable). 
509
510    U+FFFD  REPLACEMENT CHARACTER 
511    U+FFFE  NOT A CHARACTER
512    U+FFFF  NOT A CHARACTER 
513
514    I experimented with a namespace character (catcodtable id) as fourth character but there are 
515    some unwanted side effects, for instance in testing an active character as separator (in 
516    arguments) so that code waa eventually removed. I might come back to this one day (active 
517    characters in the catcode regime namespace).
518
519*/
520
521# define utf_fffd_string            "\xEF\xBF\xBD" /* U+FFFD : 65533 */
522
523# define active_character_namespace "\xEF\xBF\xBF" /* U+FFFF : 65535 */
524
525# define active_character_first     '\xEF'        
526# define active_character_second    '\xBF'
527# define active_character_third     '\xBF'
528
529# define active_first               0xEF        
530# define active_second              0xBF
531# define active_third               0xBF
532
533# define active_character_unknown   "\xEF\xBF\xBD" /* utf_fffd_string */
534
535# define active_cs_value(A) aux_str2uni(str_string(A)+3)
536
537# endif
538