texdumpdata.c /size: 11 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7dump_state_info lmt_dump_state = {
8    .fingerprint = luametatex_format_fingerprint,
9    .padding     = 0
10};
11
12/*tex
13
14    After \INITEX\ has seen a collection of fonts and macros, it can write all the necessary
15    information on an auxiliary file so that production versions of \TEX\ are able to initialize
16    their memory at high speed. The present section of the program takes care of such output and
17    input. We shall consider simultaneously the processes of storing and restoring, so that the
18    inverse relation between them is clear.
19
20    The global variable |format_ident| is a string that is printed right after the |banner| line
21    when \TEX\ is ready to start. For \INITEX\ this string says simply |(INITEX)|; for other
22    versions of \TEX\ it says, for example, |(preloaded format = plain 1982.11.19)|, showing the
23    year, month, and day that the format file was created. We have |format_ident = 0| before \TEX's
24    tables are loaded. |FORMAT_ID| is a new field of type int suitable for the identification of a
25    format: values between 0 and 256 (included) can not be used because in the previous format they
26    are used for the length of the name of the engine.
27
28    Because most used processors are little endian, we flush that way, but after that we just stick
29    to the architecture. This also lets it come out as a readable 12 character (not nul terminated)
30    string on a little endian machine. By using integers we can be sure that when it's generated on
31    a different architecture the format is not seen as valid.
32
33*/
34
35/*
36
37    In \LUAMETATEX\ the code has been overhauled. The sections are better separated and we write
38    less to the file because we try to be sparse. Also, a more dynamic approach is used. In the
39    \CONTEXT\ macro package most of what goes into the format is \LUA\ bytecode.
40
41    We no longer hand endian related code here which saves swapping bytes on the most popular
42    architectures. We also maintain some statistics and have several points where we check if
43    we're still okay.
44
45    Here we only have the main chunk. The specific data sections are implemented where it makes
46    most sense.
47
48*/
49
50# define MAGIC_FORMAT_NUMBER_LE_1 0x58544D4C // 0x4C4D5458 // LMTX
51# define MAGIC_FORMAT_NUMBER_LE_2 0x5845542D // 0x2D544558 // -TEX
52# define MAGIC_FORMAT_NUMBER_LE_3 0x544D462D // 0x2D464D54 // -FMT
53
54static int tex_aux_report_dump_state(dumpstream f, int pos, const char *what)
55{
56    int tmp = ftell(f);
57    tex_print_format("%i %s", tmp - pos, what);
58    fflush(stdout);
59    return tmp;
60}
61
62/* todo: move more dumping to other files, then also the sizes. */
63
64static void tex_aux_dump_fingerprint(dumpstream f)
65{
66    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_1);
67    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_2);
68    dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_3);
69    dump_via_int(f, luametatex_format_fingerprint);
70}
71
72static void tex_aux_undump_fingerprint(dumpstream f)
73{
74    int x;
75    undump_int(f, x);
76    if (x == MAGIC_FORMAT_NUMBER_LE_1) {
77        undump_int(f, x);
78        if (x == MAGIC_FORMAT_NUMBER_LE_2) {
79            undump_int(f, x);
80            if (x == MAGIC_FORMAT_NUMBER_LE_3) {
81                undump_int(f, x);
82                if (x == luametatex_format_fingerprint) {
83                    return;
84                } else {
85                    tex_fatal_undump_error("version id");
86                }
87            }
88        }
89    }
90    tex_fatal_undump_error("initial fingerprint");
91}
92
93static void tex_aux_dump_final_check(dumpstream f)
94{
95    dump_via_int(f, luametatex_format_fingerprint);
96}
97
98static void tex_aux_undump_final_check(dumpstream f)
99{
100    int x;
101    undump_int(f, x);
102    if (x == luametatex_format_fingerprint) {
103        return;
104    } else {
105        tex_fatal_undump_error("final fingerprint");
106    }
107}
108
109static void tex_aux_create_fmt_name(void)
110{
111    lmt_print_state.selector = new_string_selector_code;
112//    lmt_dump_state.format_identifier = tex_make_string();
113//    lmt_dump_state.format_name = tex_make_string();
114    tex_print_format("%s %i.%i.%i %s",lmt_fileio_state.fmt_name, year_par, month_par, day_par, lmt_fileio_state.job_name);
115    lmt_print_state.selector = terminal_and_logfile_selector_code;
116}
117
118static void tex_aux_dump_preamble(dumpstream f)
119{
120    dump_via_int(f, hash_size);
121    dump_via_int(f, hash_prime);
122    dump_via_int(f, prim_size);
123    dump_via_int(f, prim_prime);
124    dump_int(f, lmt_hash_state.hash_data.allocated);
125    dump_int(f, lmt_hash_state.hash_data.ptr);
126    dump_int(f, lmt_hash_state.hash_data.top);
127}
128
129static void tex_aux_undump_preamble(dumpstream f)
130{
131    int x;
132    undump_int(f, x);
133    if (x != hash_size) {
134        goto BAD;
135    }
136    undump_int(f, x);
137    if (x != hash_prime) {
138        goto BAD;
139    }
140    undump_int(f, x);
141    if (x != prim_size) {
142        goto BAD;
143    }
144    undump_int(f, x);
145    if (x != prim_prime) {
146        goto BAD;
147    }
148    undump_int(f, lmt_hash_state.hash_data.allocated);
149    undump_int(f, lmt_hash_state.hash_data.ptr);
150    undump_int(f, lmt_hash_state.hash_data.top);
151    /*tex
152        We can consider moving all these allocaters to the start instead of this exception.
153    */
154    tex_initialize_hash_mem();
155    return;
156  BAD:
157    tex_fatal_undump_error("preamble");
158}
159
160void tex_store_fmt_file(void)
161{
162    int pos = 0;
163    dumpstream f = NULL;
164
165    /*tex
166        If dumping is not allowed, abort. The user is not allowed to dump a format file unless
167        |save_ptr = 0|. This condition implies that |cur_level=level_one|, hence the |xeq_level|
168        array is constant and it need not be dumped.
169    */
170
171    if (lmt_save_state.save_stack_data.ptr != 0) {
172        tex_handle_error(
173            succumb_error_type,
174            "You can't dump inside a group",
175            "'{...\\dump}' is a no-no."
176        );
177    }
178
179    /*tex
180        We don't store some things.
181    */
182
183    tex_dispose_specification_nodes();
184
185    /*tex
186        Create the |format_ident|, open the format file, and inform the user that dumping has begun.
187    */
188
189    {
190        int callback_id = lmt_callback_defined(pre_dump_callback);
191        if (callback_id > 0) {
192            (void) lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->");
193        }
194    }
195
196    /*tex
197        We report the usual plus some more statistics. When something is wrong the machine just
198        quits, hopefully with some meaningful error. We always create the format in normal log and
199        terminal mode. We create a format name first because we also use that in error reporting.
200    */
201
202    tex_aux_create_fmt_name();
203
204    f = tex_open_fmt_file(1);
205    if (! f) {
206        tex_formatted_error("system", "format file '%s' cannot be opened for writing", lmt_fileio_state.fmt_name);
207        return;
208    }
209
210    tex_print_nlp();
211    tex_print_format("Dumping format in file '%s': ", lmt_fileio_state.fmt_name);
212    fflush(stdout);
213
214    tex_compact_tokens();
215    tex_compact_string_pool();
216
217    tex_aux_dump_fingerprint(f); pos = tex_aux_report_dump_state(f, pos, "fingerprint + ");
218    lmt_dump_engine_info(f);     pos = tex_aux_report_dump_state(f, pos, "engine + ");
219    tex_aux_dump_preamble(f);    pos = tex_aux_report_dump_state(f, pos, "preamble + ");
220    tex_dump_constants(f);       pos = tex_aux_report_dump_state(f, pos, "constants + ");
221    tex_dump_string_pool(f);     pos = tex_aux_report_dump_state(f, pos, "stringpool + ");
222 // tex_print_format("(%i used and %i free) ", tex_used_node_count(), tex_free_node_count());
223    tex_dump_node_mem(f);        pos = tex_aux_report_dump_state(f, pos, "nodes + ");
224 // tex_print_format("(%i used and free) ", tex_used_token_count());
225    tex_dump_token_mem(f);       pos = tex_aux_report_dump_state(f, pos, "tokens + ");
226    tex_dump_equivalents_mem(f); pos = tex_aux_report_dump_state(f, pos, "equivalents + ");
227    tex_dump_math_codes(f);      pos = tex_aux_report_dump_state(f, pos, "math codes + ");
228    tex_dump_text_codes(f);      pos = tex_aux_report_dump_state(f, pos, "text codes + ");
229    tex_dump_primitives(f);      pos = tex_aux_report_dump_state(f, pos, "primitives + ");
230    tex_dump_hashtable(f);       pos = tex_aux_report_dump_state(f, pos, "hashtable + ");
231    tex_dump_font_data(f);       pos = tex_aux_report_dump_state(f, pos, "fonts + ");
232    tex_dump_math_data(f);       pos = tex_aux_report_dump_state(f, pos, "math + ");
233    tex_dump_language_data(f);   pos = tex_aux_report_dump_state(f, pos, "language + ");
234    tex_dump_insert_data(f);     pos = tex_aux_report_dump_state(f, pos, "insert + ");
235    lmt_dump_registers(f);       pos = tex_aux_report_dump_state(f, pos, "bytecodes + ");
236    tex_aux_dump_final_check(f); pos = tex_aux_report_dump_state(f, pos, "housekeeping = ");
237
238    tex_aux_report_dump_state(f, 0, "total.");
239    tex_close_fmt_file(f);
240    tex_print_ln();
241
242}
243
244/*tex
245
246    Corresponding to the procedure that dumps a format file, we have a function that reads one in.
247    The function returns |false| if the dumped format is incompatible with the present \TEX\ table
248    sizes, etc.
249
250    The inverse macros are slightly more complicated, since we need to check the range of the values
251    we are reading in. We say |undump (a) (b) (x)| to read an integer value |x| that is supposed to
252    be in the range |a <= x <= b|.
253
254*/
255
256int tex_fatal_undump_error(const char *s)
257{
258    tex_emergency_message("system", "fatal format error, loading file '%s' failed with bad '%s' data, remake the format", emergency_fmt_name, s);
259    return tex_emergency_exit();
260}
261
262//define undumping(s) printf("undumping: %s\n",s); fflush(stdout);
263# define undumping(s)
264
265static void tex_aux_undump_fmt_data(dumpstream f)
266{
267    undumping("warmingup")
268
269    undumping("fingerprint") tex_aux_undump_fingerprint(f);
270    undumping("engineinfo")  lmt_undump_engine_info(f);
271    undumping("preamble")    tex_aux_undump_preamble(f);
272    undumping("constants")   tex_undump_constants(f);
273    undumping("strings")     tex_undump_string_pool(f);
274    undumping("nodes")       tex_undump_node_mem(f);
275    undumping("tokens")      tex_undump_token_mem(f);
276    undumping("equivalents") tex_undump_equivalents_mem(f);
277    undumping("mathcodes")   tex_undump_math_codes(f);
278    undumping("textcodes")   tex_undump_text_codes(f);
279    undumping("primitives")  tex_undump_primitives(f);
280    undumping("hashtable")   tex_undump_hashtable(f);
281    undumping("fonts")       tex_undump_font_data(f);
282    undumping("math")        tex_undump_math_data(f);
283    undumping("languages")   tex_undump_language_data(f);
284    undumping("inserts")     tex_undump_insert_data(f);
285    undumping("bytecodes")   lmt_undump_registers(f);
286    undumping("finalcheck")  tex_aux_undump_final_check(f);
287
288    undumping("done")
289
290    /*tex This should go elsewhere. */
291
292    cur_list.prev_depth = ignore_depth_criterion_par;
293}
294
295/*
296    The next code plays nice but on an error we exit anyway so some code is never reached in that
297    case.
298*/
299
300int tex_load_fmt_file(void)
301{
302    dumpstream f = tex_open_fmt_file(0);
303    if (f) {
304        tex_aux_undump_fmt_data(f);
305        tex_close_fmt_file(f);
306        return 1;
307    } else {
308        return tex_fatal_undump_error("filehandle");
309    }
310}
311
312void tex_initialize_dump_state(void)
313{
314    if (! lmt_engine_state.dump_name) {
315        lmt_engine_state.dump_name = lmt_memory_strdup("initex");
316    }
317}
318