texdumpdata.c /size: 11 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7dump_state_info lmt_dump_state = {
8    .fingerprint = luametatex_format_fingerprint,
9    .padding     = 0
10};
11
12/*tex
13
14    After \INITEX\ has seen a collection of fonts and macros, it can write all the necessary
15    information on an auxiliary file so that production versions of \TEX\ are able to initialize
16    their memory at high speed. The present section of the program takes care of such output and
17    input. We shall consider simultaneously the processes of storing and restoring, so that the
18    inverse relation between them is clear.
19
20    The global variable |format_ident| is a string that is printed right after the |banner| line
21    when \TEX\ is ready to start. For \INITEX\ this string says simply |(INITEX)|; for other
22    versions of \TEX\ it says, for example, |(preloaded format = plain 1982.11.19)|, showing the
23    year, month, and day that the format file was created. We have |format_ident = 0| before \TEX's
24    tables are loaded. |FORMAT_ID| is a new field of type int suitable for the identification of a
25    format: values between 0 and 256 (included) can not be used because in the previous format they
26    are used for the length of the name of the engine.
27
28    Because most used processors are little endian, we flush that way, but after that we just stick
29    to the architecture. This also lets it come out as a readable 12 character (not nul terminated)
30    string on a little endian machine. By using integers we can be sure that when it's generated on
31    a different architecture the format is not seen as valid.
32
33*/
34
35/*
36
37    In \LUAMETATEX\ the code has been overhauled. The sections are better separated and we write
38    less to the file because we try to be sparse. Also, a more dynamic approach is used. In the
39    \CONTEXT\ macro package most of what goes into the format is \LUA\ bytecode.
40
41    We no longer hand endian related code here which saves swapping bytes on the most popular
42    architectures. We also maintain some statistics and have several points where we check if
43    we're still okay.
44
45    Here we only have the main chunk. The specific data sections are implemented where it makes
46    most sense.
47
48*/
49
50# define MAGIC_FORMAT_NUMBER_LE_1 0x58544D4C // 0x4C4D5458 // LMTX
51# define MAGIC_FORMAT_NUMBER_LE_2 0x5845542D // 0x2D544558 // -TEX
52# define MAGIC_FORMAT_NUMBER_LE_3 0x544D462D // 0x2D464D54 // -FMT
53
54static int tex_aux_report_dump_state(dumpstream f, int pos, const char *what)
55{
56    int tmp = ftell(f);
57    tex_print_format("%i %s", tmp - pos, what);
58    fflush(stdout);
59    return tmp;
60}
61
62/* todo: move more dumping to other files, then also the sizes. */
63
64static void tex_aux_dump_fingerprint(dumpstream f)
65{
66    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_1);
67    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_2);
68    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_3);
69    dump_via_int(f, luametatex_format_fingerprint);
70}
71
72static void tex_aux_undump_fingerprint(dumpstream f)
73{
74    int x;
75    undump_int(f, x);
76    if (x == MAGIC_FORMAT_NUMBER_LE_1) {
77        undump_int(f, x);
78        if (x == MAGIC_FORMAT_NUMBER_LE_2) {
79            undump_int(f, x);
80            if (x == MAGIC_FORMAT_NUMBER_LE_3) {
81                undump_int(f, x);
82                if (x == luametatex_format_fingerprint) {
83                    return;
84                } else {
85                    tex_fatal_undump_error("version id");
86                }
87            }
88        }
89    }
90    tex_fatal_undump_error("initial fingerprint");
91}
92
93static void tex_aux_dump_final_check(dumpstream f)
94{
95    dump_via_int(f, luametatex_format_fingerprint);
96}
97
98static void tex_aux_undump_final_check(dumpstream f)
99{
100    int x;
101    undump_int(f, x);
102    if (x == luametatex_format_fingerprint) {
103        return;
104    } else {
105        tex_fatal_undump_error("final fingerprint");
106    }
107}
108
109static void tex_aux_create_fmt_name(void)
110{
111    lmt_print_state.selector = new_string_selector_code;
112    tex_print_format("%s %i.%i.%i %s", lmt_fileio_state.fmt_name, year_par, month_par, day_par, lmt_fileio_state.job_name);
113    lmt_print_state.selector = terminal_and_logfile_selector_code;
114}
115
116/*tex 
117    Dumping the |number_tex_commands| is just a safeguard for when we experiment with (temporary) 
118    extensions. 
119*/
120
121static void tex_aux_dump_preamble(dumpstream f)
122{
123    dump_via_int(f, hash_size);
124    dump_via_int(f, hash_prime);
125    dump_via_int(f, prim_size);
126    dump_via_int(f, prim_prime);
127    dump_via_int(f, number_tex_commands);
128    dump_int(f, lmt_hash_state.hash_data.allocated);
129    dump_int(f, lmt_hash_state.hash_data.ptr);
130    dump_int(f, lmt_hash_state.hash_data.top);
131}
132
133static void tex_aux_undump_preamble(dumpstream f)
134{
135    int x;
136    undump_int(f, x);
137    if (x != hash_size) {
138        goto BAD;
139    }
140    undump_int(f, x);
141    if (x != hash_prime) {
142        goto BAD;
143    }
144    undump_int(f, x);
145    if (x != prim_size) {
146        goto BAD;
147    }
148    undump_int(f, x);
149    if (x != prim_prime) {
150        goto BAD;
151    }
152    undump_int(f, x);
153    if (x != number_tex_commands) {
154        goto BAD;
155    }
156    undump_int(f, lmt_hash_state.hash_data.allocated);
157    undump_int(f, lmt_hash_state.hash_data.ptr);
158    undump_int(f, lmt_hash_state.hash_data.top);
159    /*tex
160        We can consider moving all these allocaters to the start instead of this exception.
161    */
162    tex_initialize_hash_mem();
163    return;
164  BAD:
165    tex_fatal_undump_error("preamble");
166}
167
168void tex_store_fmt_file(void)
169{
170    int pos = 0;
171    dumpstream f = NULL;
172
173    /*tex
174        If dumping is not allowed, abort. The user is not allowed to dump a format file unless
175        |save_ptr = 0|. This condition implies that |cur_level=level_one|, hence the |xeq_level|
176        array is constant and it need not be dumped.
177    */
178
179    if (lmt_save_state.save_stack_data.ptr != 0) {
180        tex_handle_error(
181            succumb_error_type,
182            "You can't dump inside a group",
183            "'{...\\dump}' is a no-no."
184        );
185    }
186
187    /*tex
188        We don't store some things.
189    */
190
191    tex_dispose_specification_nodes();
192
193    /*tex
194        Create the |format_ident|, open the format file, and inform the user that dumping has begun.
195    */
196
197    {
198        int callback_id = lmt_callback_defined(pre_dump_callback);
199        if (callback_id > 0) {
200            (void) lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->");
201        }
202    }
203
204    /*tex
205        We report the usual plus some more statistics. When something is wrong the machine just
206        quits, hopefully with some meaningful error. We always create the format in normal log and
207        terminal mode. We create a format name first because we also use that in error reporting.
208    */
209
210    tex_aux_create_fmt_name();
211
212    f = tex_open_fmt_file(1);
213    if (! f) {
214        tex_formatted_error("system", "format file '%s' cannot be opened for writing", lmt_fileio_state.fmt_name);
215        return;
216    }
217
218    tex_print_nlp();
219    tex_print_format("Dumping format in file '%s': ", lmt_fileio_state.fmt_name);
220    fflush(stdout);
221
222    tex_compact_tokens();
223    tex_compact_string_pool();
224
225    tex_aux_dump_fingerprint(f);    pos = tex_aux_report_dump_state(f, pos, "fingerprint + ");
226    lmt_dump_engine_info(f);        pos = tex_aux_report_dump_state(f, pos, "engine + ");
227    tex_aux_dump_preamble(f);       pos = tex_aux_report_dump_state(f, pos, "preamble + ");
228    tex_dump_constants(f);          pos = tex_aux_report_dump_state(f, pos, "constants + ");
229    tex_dump_string_pool(f);        pos = tex_aux_report_dump_state(f, pos, "stringpool + ");
230 // tex_print_format("(%i used and %i free) ", tex_used_node_count(), tex_free_node_count());
231    tex_dump_node_mem(f);           pos = tex_aux_report_dump_state(f, pos, "nodes + ");
232 // tex_print_format("(%i used and free) ", tex_used_token_count());
233    tex_dump_token_mem(f);          pos = tex_aux_report_dump_state(f, pos, "tokens + ");
234    tex_dump_equivalents_mem(f);    pos = tex_aux_report_dump_state(f, pos, "equivalents + ");
235    tex_dump_specification_data(f); pos = tex_aux_report_dump_state(f, pos, "specifications + ");
236    tex_dump_math_codes(f);         pos = tex_aux_report_dump_state(f, pos, "math codes + ");
237    tex_dump_text_codes(f);         pos = tex_aux_report_dump_state(f, pos, "text codes + ");
238    tex_dump_primitives(f);         pos = tex_aux_report_dump_state(f, pos, "primitives + ");
239    tex_dump_hashtable(f);          pos = tex_aux_report_dump_state(f, pos, "hashtable + ");
240    tex_dump_font_data(f);          pos = tex_aux_report_dump_state(f, pos, "fonts + ");
241    tex_dump_math_data(f);          pos = tex_aux_report_dump_state(f, pos, "math + ");
242    tex_dump_language_data(f);      pos = tex_aux_report_dump_state(f, pos, "language + ");
243    tex_dump_insert_data(f);        pos = tex_aux_report_dump_state(f, pos, "insert + ");
244    lmt_dump_registers(f);          pos = tex_aux_report_dump_state(f, pos, "bytecodes + ");
245    tex_aux_dump_final_check(f);    pos = tex_aux_report_dump_state(f, pos, "housekeeping = ");
246
247    tex_aux_report_dump_state(f, 0, "total.");
248    tex_close_fmt_file(f);
249    tex_print_ln();
250
251}
252
253/*tex
254
255    Corresponding to the procedure that dumps a format file, we have a function that reads one in.
256    The function returns |false| if the dumped format is incompatible with the present \TEX\ table
257    sizes, etc.
258
259    The inverse macros are slightly more complicated, since we need to check the range of the values
260    we are reading in. We say |undump (a) (b) (x)| to read an integer value |x| that is supposed to
261    be in the range |a <= x <= b|.
262
263*/
264
265int tex_fatal_undump_error(const char *s)
266{
267    tex_emergency_message("system", "fatal format error, loading file '%s' failed with bad '%s' data, remake the format", emergency_fmt_name, s);
268    return tex_emergency_exit();
269}
270
271//define undumping(s) printf("undumping: %s\n",s); fflush(stdout);
272# define undumping(s)
273
274static void tex_aux_undump_fmt_data(dumpstream f)
275{
276    undumping("warmingup")
277
278    undumping("fingerprint")    tex_aux_undump_fingerprint(f);
279    undumping("engineinfo")     lmt_undump_engine_info(f);
280    undumping("preamble")       tex_aux_undump_preamble(f);
281    undumping("constants")      tex_undump_constants(f);
282    undumping("strings")        tex_undump_string_pool(f);
283    undumping("nodes")          tex_undump_node_mem(f);
284    undumping("tokens")         tex_undump_token_mem(f);
285    undumping("equivalents")    tex_undump_equivalents_mem(f);
286    undumping("specifications") tex_undump_specification_data(f);
287    undumping("mathcodes")      tex_undump_math_codes(f);
288    undumping("textcodes")      tex_undump_text_codes(f);
289    undumping("primitives")     tex_undump_primitives(f);
290    undumping("hashtable")      tex_undump_hashtable(f);
291    undumping("fonts")          tex_undump_font_data(f);
292    undumping("math")           tex_undump_math_data(f);
293    undumping("languages")      tex_undump_language_data(f);
294    undumping("inserts")        tex_undump_insert_data(f);
295    undumping("bytecodes")      lmt_undump_registers(f);
296    undumping("finalcheck")     tex_aux_undump_final_check(f);
297
298    undumping("done")
299
300    /*tex This should go elsewhere. */
301
302    cur_list.prev_depth = ignore_depth_criterion_par;
303}
304
305/*
306    The next code plays nice but on an error we exit anyway so some code is never reached in that
307    case.
308*/
309
310int tex_load_fmt_file(void)
311{
312    dumpstream f = tex_open_fmt_file(0);
313    if (f) {
314        tex_aux_undump_fmt_data(f);
315        tex_close_fmt_file(f);
316        return 1;
317    } else {
318        return tex_fatal_undump_error("filehandle");
319    }
320}
321
322void tex_initialize_dump_state(void)
323{
324    if (! lmt_engine_state.dump_name) {
325        lmt_engine_state.dump_name = lmt_memory_strdup("initex");
326    }
327}
328