texfileio.c /size: 38 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5# include "luametatex.h"
6
7fileio_state_info lmt_fileio_state = {
8   .io_buffer        = NULL,
9   .io_buffer_data   = {
10        .minimum   = min_buffer_size,
11        .maximum   = max_buffer_size,
12        .size      = siz_buffer_size,
13        .step      = stp_buffer_size,
14        .allocated = 0,
15        .itemsize  = sizeof(unsigned char),
16        .top       = 0,
17        .ptr       = 0,
18        .initial   = memory_data_unset,
19        .offset    = 0,
20   },
21   .io_first         = 0,
22   .io_last          = 0,
23   .name_in_progress = 0,
24   .log_opened       = 0,
25   .job_name         = NULL,
26   .log_name         = NULL,
27   .fmt_name         = NULL
28};
29
30/*tex
31
32    Once \TEX\ is working, you should be able to diagnose most errors with the |\show| commands and
33    other diagnostic features. Because we have made some internal changes the optional debug interface
34    has been removed.
35
36*/
37
38# define reserved_io_buffer_slots 256
39
40void tex_initialize_fileio_state(void)
41{
42    int size = lmt_fileio_state.io_buffer_data.minimum;
43    lmt_fileio_state.io_buffer = aux_allocate_clear_array(sizeof(unsigned char), size, reserved_io_buffer_slots);
44    if (lmt_fileio_state.io_buffer) {
45        lmt_fileio_state.io_buffer_data.allocated = size;
46    } else {
47        tex_overflow_error("buffer", size);
48    }
49}
50
51bool tex_room_in_buffer(int top)
52{
53    /*tex Beware: |top| can exceed the old size plus the step. */
54    if (top > lmt_fileio_state.io_buffer_data.top) {
55       lmt_fileio_state.io_buffer_data.top = top;
56        if (top > lmt_fileio_state.io_buffer_data.allocated) {
57            unsigned char *tmp = NULL;
58            if (top <= lmt_fileio_state.io_buffer_data.size) {
59                if (lmt_fileio_state.io_buffer_data.allocated + lmt_fileio_state.io_buffer_data.step > top) {
60                    top = lmt_fileio_state.io_buffer_data.allocated + lmt_fileio_state.io_buffer_data.step;
61                    if (top > lmt_fileio_state.io_buffer_data.size) {
62                        top = lmt_fileio_state.io_buffer_data.size;
63                    }
64                }
65                if (top > lmt_fileio_state.io_buffer_data.allocated) {
66                    lmt_fileio_state.io_buffer_data.allocated = top;
67                    tmp = aux_reallocate_array(lmt_fileio_state.io_buffer, sizeof(unsigned char), top, reserved_io_buffer_slots);
68                    lmt_fileio_state.io_buffer = tmp;
69                }
70            }
71            lmt_run_memory_callback("buffer", tmp ? 1 : 0);
72            if (! tmp) {
73                tex_overflow_error("buffer", top);
74                return false;
75            }
76        }
77    }
78    return true;
79}
80
81static int tex_aux_open_outfile(FILE **f, const char *name, const char *mode)
82{
83    FILE *res = aux_utf8_fopen(name, mode);
84    if (res) {
85        *f = res;
86        return 1;
87    }
88    return 0;
89}
90
91/*tex
92
93    We conform to the way \WEBC\ does handle trailing tabs and spaces. This decade old behaviour
94    was changed in September 2017 and can introduce compatibility issues in existing workflows.
95    Because we don't want too many differences with upstream \TEX live we just follow up on that
96    patch and it's up to macro packages to deal with possible issues (which can be done via the
97    usual callbacks. One can wonder why we then still prune spaces but we leave that to the reader.
98
99    Patched original comment:
100
101    Make last be one past the last non-space character in \quote {buffer}, ignoring line
102    terminators (but not, e.g., tabs). This is because we are supposed to treat this like a line of
103    TeX input. Although there are pathological cases (|SP CR SC CR|) where this differs from
104    input_line below, and from previous behavior of removing all whitespace, the simplicity of
105    removing all trailing line terminators seems more in keeping with actual command line
106    processing.
107
108    The |IS_SPC_OR_EOL| macro deals with space characters (|SPACE 32|) and newlines (|CR| and |LF|)
109    and no longer looks at tabs (|TAB 9|).
110
111*/
112
113/*
114    The terminal input code is gone as is the read related code (that had already been nicely
115    cleaned up and abstracted but that is the price we pay for stepwise progress. That code is
116    still in the git repository of course.
117
118    At some point I might do the same as we do in mplib: four callbacks for open, close, read
119    and write (in which case the log goes via write). Part of the management is them moved to
120    \LUA\ and we save a lookup.
121
122    When I adapted the code in this module and the one dealing with errors, I decided to delegate
123    all interaction to \LUA, also because the sometimes tight integration in the scanning and
124    expansion mechanisms. In the 2021 TeX tuneup there have been some patches in the interaction
125    code and some remarks ring a bell: especially the relation between offering feedback and
126    waiting for input. However, because we delegate to \LUA, the engine is no longer responsible
127    for what the macro package lets the user do in case of an error. For instance, in \CONTEXT\ we
128    just abort the run: it makes no sense to carry on the wrong way. Computers are fast enough for
129    a \quotation {Fix and run again.} approach. But we do offer the message and optional help as
130    cue. On the agenda is a further abstraction of error handling. This deviation is fine as we
131    obey Don's wish to not call it \TEX\ but instead add some more letters to the name.
132
133*/
134
135int tex_lua_a_open_in(const char *fn)
136{
137    int callback_id = lmt_callback_defined(open_data_file_callback);
138    if (callback_id > 0) {
139        int k = lmt_run_and_save_callback(lmt_lua_state.lua_instance, callback_id, "S->", fn);
140        lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id = k;
141        return k > 0;
142    } else {
143        tex_emergency_message("startup error", "missing open_data_file callback");
144        tex_emergency_exit();
145        return 0;
146    }
147}
148
149void tex_lua_a_close_in()
150{
151    int k = lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id;
152    if (k > 0) {
153        lmt_run_saved_callback_close(lmt_lua_state.lua_instance, k);
154        lmt_destroy_saved_callback(lmt_lua_state.lua_instance, k);
155        lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id = 0;
156    }
157}
158
159/*tex
160
161    Binary input and output are done with \CCODE's ordinary procedures, so we don't have to make
162    any other special arrangements for binary \IO. Text output is also easy to do with standard
163    routines. The treatment of text input is more difficult, however, because of the necessary
164    translation to |unsigned char| values. \TEX's conventions should be efficient, and they should
165    blend nicely with the user's operating environment.
166
167    Input from text files is read one line at a time, using a routine called |lua_input_ln|. This
168    function is defined in terms of global variables called |buffer|, |first|, and |last| that will
169    be described in detail later; for now, it suffices for us to know that |buffer| is an array of
170    |unsigned char| values, and that |first| and |last| are indices into this array representing
171    the beginning and ending of a line of text.
172
173    The lines of characters being read: |buffer|, the first unused position in |first|, the end of
174    the line just input |last|, the largest index used in |buffer|: |max_buf_stack|.
175
176    The |lua_input_ln| function brings the next line of input from the specified file into available
177    positions of the buffer array and returns the value |true|, unless the file has already been
178    entirely read, in which case it returns |false| and sets |last:=first|. In general, the
179    |unsigned char| numbers that represent the next line of the file are input into |buffer[first]|,
180    |buffer[first + 1]|, \dots, |buffer[last - 1]|; and the global variable |last| is set equal to
181    |first| plus the length of the line. Trailing blanks are removed from the line; thus, either
182    |last = first| (in which case the line was entirely blank) or |buffer[last - 1] <> " "|.
183
184    An overflow error is given, however, if the normal actions of |lua_input_ln| would make |last
185    >= buf_size|; this is done so that other parts of \TEX\ can safely look at the contents of
186    |buffer[last+1]| without overstepping the bounds of the |buffer| array. Upon entry to
187    |lua_input_ln|, the condition |first < buf_size| will always hold, so that there is always room
188    for an \quote {empty} line.
189
190    The variable |max_buf_stack|, which is used to keep track of how large the |buf_size| parameter
191    must be to accommodate the present job, is also kept up to date by |lua_input_ln|.
192
193    If the |bypass_eoln| parameter is |true|, |lua_input_ln| will do a |get| before looking at the
194    first character of the line; this skips over an |eoln| that was in |f^|. The procedure does not
195    do a |get| when it reaches the end of the line; therefore it can be used to acquire input from
196    the user's terminal as well as from ordinary text files.
197
198    Since the inner loop of |lua_input_ln| is part of \TEX's \quote {inner loop} --- each character
199    of input comes in at this place --- it is wise to reduce system overhead by making use of
200    special routines that read in an entire array of characters at once, if such routines are
201    available.
202
203*/
204
205int tex_lua_input_ln(void) /*tex |bypass_eoln| was not used */
206{
207    int callback_id = lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id;
208    if (callback_id > 0) {
209        lua_State *L = lmt_lua_state.lua_instance;
210        int last_ptr = 0;
211        lmt_fileio_state.io_last = lmt_fileio_state.io_first;
212        last_ptr = lmt_run_saved_callback_line(L, callback_id, lmt_fileio_state.io_first);
213        if (last_ptr < 0) {
214            return 0;
215        } else if (last_ptr > 0) {
216            lmt_fileio_state.io_last = last_ptr;
217            if (last_ptr > lmt_fileio_state.io_buffer_data.top) {
218                lmt_fileio_state.io_buffer_data.top = last_ptr;
219            }
220        }
221        return 1;
222    } else {
223        return 0;
224    }
225}
226
227/*tex
228
229    We need a special routine to read the first line of \TEX\ input from the user's terminal.
230    This line is different because it is read before we have opened the transcript file; there is
231    sort of a \quote {chicken and egg} problem here. If the user types |\input paper| on the first
232    line, or if some macro invoked by that line does such an |\input|, the transcript file will be
233    named |paper.log|; but if no |\input| commands are performed during the first line of terminal
234    input, the transcript file will acquire its default name |texput.log|. (The transcript file
235    will not contain error messages generated by the first line before the first |\input| command.)
236
237    The first line is special also because it may be read before \TEX\ has input a format file. In
238    such cases, normal error messages cannot yet be given. The following code uses concepts that
239    will be explained later.
240
241    Different systems have different ways to get started. But regardless of what conventions are
242    adopted, the routine that initializes the terminal should satisfy the following specifications:
243
244    \startitemize[n]
245
246        \startitem
247            It should open file |term_in| for input from the terminal.
248        \stopitem
249
250        \startitem
251            If the user has given a command line, this line should be considered the first line of
252            terminal input. Otherwise the user should be prompted with |**|, and the first line of
253            input should be whatever is typed in response.
254        \stopitem
255
256        \startitem
257            The first line of input, which might or might not be a command line, should appear in
258            locations |first| to |last-1| of the |buffer| array.
259        \stopitem
260
261        \startitem
262            The global variable |loc| should be set so that the character to be read next by \TEX\
263            is in |buffer[loc]|. This character should not be blank, and we should have |loc < last|.
264        \stopitem
265
266    \stopitemize
267
268    It may be necessary to prompt the user several times before a non-blank line comes in. The
269    prompt is |**| instead of the later |*| because the meaning is slightly different: |\input|
270    need not be typed immediately after |**|.)
271
272    The following code does the required initialization. If anything has been specified on the
273    command line, then |t_open_in| will return with |last > first|.
274
275    This code has been adapted and we no longer ask for a name. It makes no sense because one needs
276    to initialize the primitives and backend anyway and no one is going to do that interactively.
277    Of course one can implement a session in \LUA. We keep the \TEX\ trick to push the name into
278    the input buffer and then exercise an |\input| which ensures proper housekeeping. There is a
279    bit overkill in the next function but for now we keep it (as reference).
280
281    For a while copying the argument to th ebuffer lived in the engine lib but it made no sense
282    to duplicate code, so now it's here. Anyway, the following does no longer apply:
283
284    \startquotation
285    This is supposed to open the terminal for input, but what we really do is copy command line
286    arguments into \TEX's buffer, so it can handle them. If nothing is available, or we've been
287    called already (and hence, |argc == 0|), we return with |last = first|.
288    \stopquotation
289
290    In \LUAMETATEX\ we don't really have a terminal. In the \LUATEX\ precursor we used to append
291    all the remaining arguments but now we just take the first one. If one wants filenames with
292    spaces \unknown\ use quotes. Keep in mind that original \TEX\ permits this:
293
294    \starttyping
295    tex ... filename \\hbox{!} \\end
296    \stoptyping
297
298    But we don't follow that route in the situation where \LUA\ is mostly in charge of passing
299    input from files and the console.
300
301    In the end I went for an easier solution: just pass the name to the file reader. But we keep
302    this as nostalgic reference to how \TEX\ originally kin dof did these things.
303
304    \starttyping
305    int input_file_name_pushed(void)
306    {
307        const char *ptr = engine_input_filename();
308        if (ptr) {
309            int len = strlen(ptr);
310            fileio_state.io_buffer[fileio_state.io_first] = 0;
311            if (len > 0 && room_in_buffer(len + 1)) {
312                // We cannot use strcat, because we have multibyte UTF-8 input. Hm, why not.
313                fileio_state.io_last= fileio_state.io_first;
314                while (*ptr) {
315                    fileio_state.io_buffer[fileio_state.io_last++] = (unsigned char) * (ptr++);
316                }
317                // Backtrack over spaces and newlines.
318                for (
319                    --fileio_state.io_last;
320                    fileio_state.io_last >= fileio_state.io_first && IS_SPC_OR_EOL(fileio_state.io_buffer[fileio_state.io_last]);
321                    --fileio_state.io_last
322                );
323                // Terminate the string.
324                fileio_state.io_buffer[++fileio_state.io_last] = 0;
325                // One more time, this time converting to \TEX's internal character representation.
326                if (fileio_state.io_last > fileio_state.io_first) {
327                    input_state.cur_input.loc = fileio_state.io_first;
328                    while ((input_state.cur_input.loc < fileio_state.io_last) && (fileio_state.io_buffer[input_state.cur_input.loc] == ' ')) {
329                        ++input_state.cur_input.loc;
330                    }
331                    if (input_state.cur_input.loc < fileio_state.io_last) {
332                        input_state.cur_input.limit = fileio_state.io_last;
333                        fileio_state.io_first = fileio_state.io_last + 1;
334                    }
335                    if (input_state.cur_input.loc < input_state.cur_input.limit) {
336                        return 1;
337                    }
338                }
339            }
340        }
341        fileio_state.io_first = 1;
342        fileio_state.io_last = 1;
343        return 0;
344    }
345    \stopttyping
346
347    It's this kind of magic that can take lots of time to play with and figure out, also because
348    we cannot break expectations too much.
349
350*/
351
352/*tex
353
354    Per June 22 2020 the terminal code is gone. See |texlegacy.c| for the old, already adapted
355    long ago, code. It was already shedulded for removal a while. We only keep the update.
356
357*/
358
359void tex_terminal_update(void) /* renamed, else conflict in |lmplib|. */
360{
361    fflush(stdout);
362}
363
364/*tex
365
366    It's time now to fret about file names. Besides the fact that different operating systems treat
367    files in different ways, we must cope with the fact that completely different naming conventions
368    are used by different groups of people. The following programs show what is required for one
369    particular operating system; similar routines for other systems are not difficult to devise.
370
371    \TEX\ assumes that a file name has three parts: the name proper; its \quote {extension}; and a
372    \quote {file area} where it is found in an external file system. The extension of an input file
373    or a write file is assumed to be |.tex| unless otherwise specified; it is |transcript_extension|
374    on the transcript file that records each run of \TEX; it is |.tfm| on the font metric files that
375    describe characters in the fonts \TEX\ uses; it is |.dvi| on the output files that specify
376    typesetting information; and it is |format_extension| on the format files written by \INITEX\
377    to initialize \TEX. The file area can be arbitrary on input files, but files are usually output
378    to the user's current area.
379
380    Simple uses of \TEX\ refer only to file names that have no explicit extension or area. For
381    example, a person usually says |\input paper| or |\font \tenrm = helvetica| instead of |\input
382    {paper.new}| or |\font \tenrm = {test}|. Simple file names are best, because they make the \TEX\
383    source files portable; whenever a file name consists entirely of letters and digits, it should be
384    treated in the same way by all implementations of \TEX. However, users need the ability to refer
385    to other files in their environment, especially when responding to error messages concerning
386    unopenable files; therefore we want to let them use the syntax that appears in their favorite
387    operating system.
388
389    The following procedures don't allow spaces to be part of file names; but some users seem to like
390    names that are spaced-out. System-dependent changes to allow such things should probably be made
391    with reluctance, and only when an entire file name that includes spaces is \quote {quoted} somehow.
392
393    Here are the global values that file names will be scanned into.
394
395    \starttyping
396    strnumber cur_name;
397    strnumber cur_area;
398    strnumber cur_ext;
399    \stoptyping
400
401    The file names we shall deal with have the following structure: If the name contains |/| or |:|
402    (for Amiga only), the file area consists of all characters up to and including the final such
403    character; otherwise the file area is null. If the remaining file name contains |.|, the file
404    extension consists of all such characters from the last |.| to the end, otherwise the file
405    extension is null.
406
407    We can scan such file names easily by using two global variables that keep track of the
408    occurrences of area and extension delimiters:
409
410    Input files that can't be found in the user's area may appear in a standard system area called
411    |TEX_area|. Font metric files whose areas are not given explicitly are assumed to appear in a
412    standard system area called |TEX_font_area|. These system area names will, of course, vary from
413    place to place.
414
415    This whole model has been adapted a little but we do keep the |area|, |name|, |ext| distinction
416    for now although we don't use the string pool.
417
418*/
419
420static char *tex_aux_pack_file_name(char *s, int l, const char *name, const char *ext)
421{
422    const char *fn = (char *) s;
423    if ((! fn) || (l <= 0)) {
424        fn = name;
425    }
426    if (! fn) {
427        return NULL;
428    } else if (! ext) {
429        return lmt_memory_strdup(fn);
430    } else {
431        int e = -1;
432        for (int i = 0; i < l; i++) {
433            if (IS_DIR_SEP(fn[i])) {
434                e = -1;
435            } else if (fn[i] == '.') {
436                e = i;
437            }
438        }
439        if (e >= 0) {
440            return lmt_memory_strdup(fn);
441        } else {
442            char *f = lmt_memory_malloc(strlen(fn) + strlen(ext) + 1);
443            if (f) {
444                sprintf(f, "%s%s", fn, ext);
445            }
446            return f;
447        }
448    }
449}
450
451/*tex
452
453    Here is a routine that manufactures the output file names, assuming that |job_name <> 0|. It
454    ignores and changes the current settings of |cur_area| and |cur_ext|; |s = transcript_extension|,
455    |".dvi"|, or |format_extension|
456
457    The packer does split the basename every time but isn't called that often so we can use it in
458    the checker too.
459
460*/
461
462static char *tex_aux_pack_job_name(const char *e, int keeppath, int keepsuffix)
463{
464    char *n = lmt_fileio_state.job_name;
465    int ln = (n) ? (int) strlen(n) : 0;
466    if (! ln) {
467        tex_fatal_error("bad jobname");
468        return NULL;
469    } else {
470        int le = (e) ? (int) strlen(e) : 0;
471        int f = -1; /* first */
472        int l = -1; /* last */
473        char *fn = NULL;
474        int k = 0;
475        for (int i = 0; i < ln; i++) {
476            if (IS_DIR_SEP(n[i])) {
477                f = i;
478                l = -1;
479            } else if (n[i] == '.') {
480                l = i;
481            }
482        }
483        if (keeppath) {
484            f = 0;
485        } else if (f < 0) {
486            f = 0;
487        } else {
488            f += 1;
489        }
490        if (keepsuffix || l < 0) {
491            l = ln;
492        }
493        fn = (char*) lmt_memory_malloc((l - f) + le + 2); /* a bit too much */
494        if (fn) {
495            for (int i = f; i < l; i++) {
496                fn[k++] = n[i];
497            }
498            for (int i = 0; i < le; i++) {
499                fn[k++] = e[i];
500            }
501            fn[k] = 0;
502        }
503        return fn;
504    }
505}
506
507/*tex
508
509    The following comment is obsolete but we keep it as reference because it tells some history.
510
511    \startquotation
512    Because the format is zipped we read and write dump files through zlib. Earlier versions recast
513    |*f| from |FILE *| to |gzFile|, but there is no guarantee that these have the same size, so a
514    static variable is needed.
515
516    We no longer do byte-swapping so formats are generated for the system and not shared. It
517    actually slowed down loading of the format on the majority of used platforms (intel).
518
519    A \CONTEXT\ format is uncompressed some 16 MB but that used to be over 30MB due to more
520    (preallocated) memory usage. A compressed format is 11 MB so the saving is not that much. If
521    we were in lua I'd load the whole file in one go and use a fast decompression after which we
522    could access the bytes in memory. But it's not worth the trouble.
523
524    Tests has shown that a level 3 compression is the most optimal tradeoff between file size and
525    load time.
526
527    So, in principle we can undefine |FMT_COMPRESSION| below and experiment a bit with it. With
528    SSD's it makes no dent, but on a network it still might.
529
530    Per end May 2019 the |FMT_COMPRESSION| branch is gone so that we can simplify the opener and
531    closer.
532    \stopquotation
533
534*/
535
536void tex_check_fmt_name(void)
537{
538    if (lmt_engine_state.dump_name) {
539        char *tmp = lmt_fileio_state.job_name;
540        lmt_fileio_state.job_name = lmt_engine_state.dump_name;
541        lmt_fileio_state.fmt_name = tex_aux_pack_job_name(format_extension, 1, 0);
542        lmt_fileio_state.job_name = tmp;
543    } else if (lmt_main_state.run_state != initializing_state) {
544        /*tex For |dump_name| to be NULL is a bug. */
545        tex_emergency_message("startup error", "no format file given, quitting");
546        tex_emergency_exit();
547    }
548}
549
550void tex_check_job_name(char * fn)
551{
552    if (! lmt_fileio_state.job_name) {
553        if (lmt_engine_state.startup_jobname) {
554            lmt_fileio_state.job_name = lmt_engine_state.startup_jobname; /* not freed here */
555            lmt_fileio_state.job_name = tex_aux_pack_job_name(NULL, 0, 0);
556        } else if (fn) {
557            lmt_fileio_state.job_name = fn;
558            lmt_fileio_state.job_name = tex_aux_pack_job_name(NULL, 0, 0); /* not freed here */
559        } else {
560            tex_emergency_message("startup warning", "using fallback jobname 'texput', continuing");
561            lmt_fileio_state.job_name = lmt_memory_strdup("texput");
562        }
563    }
564    if (! lmt_fileio_state.log_name) {
565        lmt_fileio_state.log_name = tex_aux_pack_job_name(transcript_extension, 0, 1);
566    }
567    if (! lmt_fileio_state.fmt_name) {
568        lmt_fileio_state.fmt_name = tex_aux_pack_job_name(format_extension, 0, 1);
569    }
570}
571
572/*tex
573
574    A messier routine is also needed, since format file names must be scanned before \TEX's
575    string mechanism has been initialized. We shall use the global variable |TEX_format_default|
576    to supply the text for default system areas and extensions related to format files.
577
578    Under \UNIX\ we don't give the area part, instead depending on the path searching that will
579    happen during file opening. Also, the length will be set in the main program.
580
581    \starttyping
582    char *TEX_format_default;
583    \stoptyping
584
585    This part of the program becomes active when a \quote {virgin} \TEX\ is trying to get going,
586    just after the preliminary initialization, or when the user is substituting another format file
587    by typing |&| after the initial |**| prompt. The buffer contains the first line of input in
588    |buffer[loc .. (last - 1)]|, where |loc < last| and |buffer[loc] <> " "|.
589
590*/
591
592dumpstream tex_open_fmt_file(int writemode)
593{
594    dumpstream f = NULL;
595    if (! lmt_fileio_state.fmt_name) {
596        /* this can't happen */
597        tex_emergency_message("startup error", "no format output file '%s' given, quitting", emergency_fmt_name);
598        tex_emergency_exit();
599    } else if (writemode) {
600        f = aux_utf8_fopen(lmt_fileio_state.fmt_name, FOPEN_WBIN_MODE);
601        if (! f) {
602            tex_emergency_message("startup error", "invalid format output file '%s' given, quitting", lmt_fileio_state.fmt_name);
603            tex_emergency_exit();
604        }
605    } else {
606        int callbackid = lmt_callback_defined(find_format_file_callback);
607        if (callbackid > 0) {
608            char *fnam = NULL;
609            int test = lmt_run_callback(lmt_lua_state.lua_instance, callbackid, "S->R", lmt_fileio_state.fmt_name, &fnam);
610            if (test && fnam && strlen(fnam) > 0) {
611                lmt_memory_free(lmt_fileio_state.fmt_name);
612                lmt_fileio_state.fmt_name = fnam;
613            } else {
614                lmt_memory_free(fnam);
615            }
616            f = aux_utf8_fopen(lmt_fileio_state.fmt_name, FOPEN_RBIN_MODE);
617            if (! f) {
618                tex_emergency_message("startup error", "invalid format input file '%s' given, quitting", emergency_fmt_name);
619                tex_emergency_exit();
620            }
621        } else {
622            /*tex For the moment we make this mandate! */
623            tex_emergency_message("startup error", "missing find_format_file callback");
624            tex_emergency_exit();
625        }
626    }
627    return f;
628}
629
630void tex_close_fmt_file(dumpstream f)
631{
632    if (f) {
633        fclose(f);
634    }
635}
636
637/*tex
638
639    The variable |name_in_progress| is used to prevent recursive use of |scan_file_name|, since the
640    |begin_name| and other procedures communicate via global variables. Recursion would arise only
641    by devious tricks like |\input \input f|; such attempts at sabotage must be thwarted.
642    Furthermore, |name_in_progress| prevents |\input| from being initiated when a font size
643    specification is being scanned.
644
645    Another variable, |job_name|, contains the file name that was first |\input| by the user. This
646    name is extended by |transcript_extension| and |.dvi| and |format_extension| in the names of
647    \TEX's output files. The fact if the transcript file been opened is registered in
648    |log_opened_global|.
649
650    Initially |job_name = 0|; it becomes nonzero as soon as the true name is known. We have
651    |job_name = 0| if and only if the |log| file has not been opened, except of course for a short
652    time just after |job_name| has become nonzero.
653
654    The full name of the log file is stored in |log_name|. The |open_log_file| routine is used to
655    open the transcript file and to help it catch up to what has previously been printed on the
656    terminal.
657
658*/
659
660void tex_open_log_file(void)
661{
662    if (! lmt_fileio_state.log_opened) {
663        int callback_id = lmt_callback_defined(find_log_file_callback);
664        if (callback_id > 0) {
665            char *filename = NULL;
666            int okay = 0;
667            tex_check_job_name(NULL);
668            okay = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->R", lmt_fileio_state.log_name, &filename);
669            if (okay && filename && (strlen(filename) > 0)) {
670                lmt_memory_free(lmt_fileio_state.log_name);
671                lmt_fileio_state.log_name = filename;
672            } else {
673                lmt_memory_free(filename);
674            }
675        } else {
676            /*tex For the moment we make this mandate! */
677            tex_emergency_message("startup error", "missing find_log_file callback");
678            tex_emergency_exit();
679        }
680        if (tex_aux_open_outfile(&lmt_print_state.logfile, lmt_fileio_state.log_name, FOPEN_W_MODE)) {
681            /*tex The previous |selector| setting is saved:*/
682            int saved_selector = lmt_print_state.selector;
683            lmt_print_state.selector = logfile_selector_code;
684            lmt_fileio_state.log_opened = 1;
685            /*tex Again we resolve a callback id: */
686            callback_id = lmt_callback_defined(start_run_callback);
687            /*tex There is no need to free |fn|! */
688            if (callback_id == 0) {
689                tex_print_banner();
690                /*tex Print the banner line, including current date and time. */
691                tex_print_log_banner();
692                /*tex Make sure bottom level is in memory. */
693                lmt_input_state.input_stack[lmt_input_state.input_stack_data.ptr] = lmt_input_state.cur_input;
694                /*tex We don't have a first line so that code is gone. */
695                tex_print_ln();
696            } else if (callback_id > 0) {
697                lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->");
698            } else {
699                tex_print_banner();
700            }
701            /*tex should be done always */
702            if (lmt_print_state.loggable_info) {
703                fprintf(lmt_print_state.logfile, "%s\n", lmt_print_state.loggable_info);
704                lmt_memory_free(lmt_print_state.loggable_info);
705                lmt_print_state.loggable_info = NULL;
706            }
707            switch (saved_selector) {
708                case no_print_selector_code : lmt_print_state.selector = logfile_selector_code; break;
709                case terminal_selector_code : lmt_print_state.selector = terminal_and_logfile_selector_code; break;
710                default                     : lmt_print_state.selector = saved_selector; break;
711            }
712        } else {
713            tex_emergency_message("startup error", "log file '%s' cannot be opened, quitting", emergency_log_name);
714            tex_emergency_exit();
715        }
716    }
717}
718
719void tex_close_log_file(void)
720{
721    fclose(lmt_print_state.logfile);
722    lmt_fileio_state.log_opened = 0;
723}
724
725/*tex
726
727    Let's turn now to the procedure that is used to initiate file reading when an |\input| command
728    is being processed. This function is used with |\\input| as well as in the start up.
729
730*/
731
732void tex_start_input(char *fn, halfword at_end_of_file)
733{
734    /*tex Set up |cur_file| and new level of input. */
735    tex_begin_file_reading();
736    if (! tex_lua_a_open_in(fn)) {
737        /*tex
738            Normally this is catched earler, as we have lookup callbacks but the first file, the
739            one passed on the command line can fall though this checking.
740        */
741        tex_end_file_reading();
742        tex_emergency_message("runtime error", "input file '%s' is not found, quitting", fn);
743        tex_emergency_exit();
744    }
745    lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].full_source_filename = fn;
746    lmt_input_state.cur_input.name = io_file_input_code;
747    lmt_input_state.in_stack[lmt_input_state.cur_input.index].at_end_of_file = at_end_of_file;
748    /*tex
749        |open_log_file| doesn't |show_context|, so |limit| and |loc| needn't be set to meaningful
750        values yet.
751    */
752    tex_report_start_file((unsigned char *) fn);
753    ++lmt_input_state.open_files;
754    tex_terminal_update();
755    lmt_input_state.cur_input.state = new_line_state;
756    /*tex
757
758        Read the first line of the new file. Here we have to remember to tell the |lua_input_ln|
759        routine not to start with a |get|. If the file is empty, it is considered to contain a
760        single blank line.
761
762    */
763    lmt_input_state.input_line = 1;
764    tex_lua_input_ln();
765    lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */
766    if (end_line_char_inactive) {
767        --lmt_input_state.cur_input.limit;
768    } else {
769        lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par;
770    }
771    lmt_fileio_state.io_first = lmt_input_state.cur_input.limit + 1;
772    lmt_input_state.cur_input.loc = lmt_input_state.cur_input.start;
773}
774
775/*tex
776
777    In order to isolate the system-dependent aspects of file names, the system-independent parts of
778    \TEX\ are expressed in terms of three system-dependent procedures called |begin_name|,
779    |more_name|, and |end_name|. In essence, if the user-specified characters of the file name are
780    |c_1|\unknown|c_n|, the system-independent driver program does the operations
781
782    \starttyping
783    |begin_name|;
784    |more_name|(c_1);
785    .....
786    |more_name|(c_n);
787    |end_name|
788    \stoptyping
789
790    These three procedures communicate with each other via global variables. Afterwards the file
791    name will appear in the string pool as three strings called |cur_name|, |cur_area|, and
792    |cur_ext|; the latter two are null (i.e., |""|), unless they were explicitly specified by the
793    user.
794
795    Actually the situation is slightly more complicated, because \TEX\ needs to know when the file
796    name ends. The |more_name| routine is a function (with side effects) that returns |true| on the
797    calls |more_name (c_1)|, \dots, |more_name (c_{n - 1})|. The final call |more_name(c_n)| returns
798    |false|; or, it returns |true| and the token following |c_n| is something like |\hbox| (i.e.,
799    not a character). In other words, |more_name| is supposed to return |true| unless it is sure that
800    the file name has been completely scanned; and |end_name| is supposed to be able to finish the
801    assembly of |cur_name|, |cur_area|, and |cur_ext| regardless of whether |more_name (c_n)|
802    returned |true| or |false|.
803
804    This code has been adapted and the string pool is no longer used. We also don't ask for another
805    name on the console.
806
807*/
808
809/*tex
810
811    And here's the second. The string pool might change as the file name is being scanned, since a
812    new |\csname| might be entered; therefore we keep |area_delimiter| and |ext_delimiter| relative
813    to the beginning of the current string, instead of assigning an absolute address like |pool_ptr|
814    to them.
815
816    Now let's consider the \quote {driver} routines by which \TEX\ deals with file names in a
817    system-independent manner. First comes a procedure that looks for a file name in the input by
818    calling |get_x_token| for the information.
819
820*/
821
822char *tex_read_file_name(int optionalequal, const char * name, const char* ext)
823{
824    halfword result;
825    if (optionalequal) {
826        tex_scan_optional_equals();
827    }
828    do {
829        tex_get_x_token();
830    } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd);
831    if (cur_cmd == left_brace_cmd) {
832        result = tex_scan_toks_expand(1, NULL, 0, 0);
833    } else {
834        char quote = 0;
835        halfword p = get_reference_token();
836        result = p;
837        while (1) {
838            switch (cur_cmd) {
839                case escape_cmd:
840                case left_brace_cmd:
841                case right_brace_cmd:
842                case math_shift_cmd:
843                case alignment_tab_cmd:
844                case parameter_cmd:
845                case superscript_cmd:
846                case subscript_cmd:
847                case letter_cmd:
848                case other_char_cmd:
849                    switch (cur_chr) { 
850                        case double_quote:
851                            if (quote == double_quote) {
852                                goto DONE;
853                            } else {
854                                quote = double_quote;
855                            }
856                            break;
857                        case single_quote:
858                            if (quote == single_quote) {
859                                goto DONE;
860                            } else {
861                                quote = single_quote;
862                            }
863                            break;
864                        default:
865                            p = tex_store_new_token(p, cur_tok);
866                    }
867                    break;
868                case spacer_cmd:
869                case end_line_cmd:
870                    if (quote) {
871                        p = tex_store_new_token(p, token_val(spacer_cmd, ' '));
872                    } else {
873                        goto DONE;
874                    }
875                case ignore_cmd:
876                    break;
877                default:
878                    tex_back_input(cur_tok);
879                    goto DONE;
880            }
881            tex_get_x_token();
882        }
883    }
884  DONE:
885    {
886        int l = 0;
887        char *s = tex_tokenlist_to_tstring(result, 1, &l, 0, 0, 0, 1, 1); /* single hashes */
888        char *fn = s ? tex_aux_pack_file_name(s, l, name, ext) : NULL;
889        return fn;
890    }
891}
892
893void tex_print_file_name(unsigned char *name)
894{
895    int must_quote = 0;
896    if (name) {
897        unsigned char *j = name;
898        while (*j) {
899            if (*j == ' ') {
900                must_quote = 1;
901                break;
902            } else {
903                j++;
904            }
905        }
906    }
907    if (must_quote) {
908        /* initial quote */
909        tex_print_char('"');
910    }
911    if (name) {
912        unsigned char *j = name;
913        while (*j) {
914            if (*j == '"') {
915                /* skip embedded quote, maybe escape */
916            } else {
917                tex_print_char(*j);
918            }
919            j++;
920        }
921    }
922    if (must_quote) {
923        /* final quote */
924        tex_print_char('"');
925    }
926}
927
928void tex_report_start_file(unsigned char *name)
929{
930    int callback_id = lmt_callback_defined(start_file_callback);
931    if (callback_id) {
932        lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->", name);
933    } else {
934        tex_print_char('(');
935        tex_print_file_name((unsigned char *) name);
936    }
937}
938
939void tex_report_stop_file(void)
940{
941    int callback_id = lmt_callback_defined(stop_file_callback);
942    if (callback_id) {
943        lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->");
944    } else {
945        tex_print_char(')');
946    }
947}
948