data-tmp.lua /size: 16 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['data-tmp'] = {
2    version   = 1.100,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- This module deals with caching data. It sets up the paths and implements loaders
10-- and savers for tables. Best is to set the following variable. When not set, the
11-- usual paths will be checked. Personally I prefer the (users) temporary path.
12--
13--   TEXMFCACHE=$TMP;$TEMP;$TMPDIR;$TEMPDIR;$HOME;$TEXMFVAR;$VARTEXMF;.
14--
15-- Currently we do no locking when we write files. This is no real problem because
16-- most caching involves fonts and the chance of them being written at the same time
17-- is small. We also need to extend luatools with a recache feature.
18
19local next, type = next, type
20local pcall, loadfile, collectgarbage = pcall, loadfile, collectgarbage
21local format, lower, gsub = string.format, string.lower, string.gsub
22local concat, serialize, fastserialize, serializetofile = table.concat, table.serialize, table.fastserialize, table.tofile
23local mkdirs, expanddirname, isdir, isfile = dir.mkdirs, dir.expandname, lfs.isdir, lfs.isfile
24local is_writable, is_readable = file.is_writable, file.is_readable
25local collapsepath, joinfile, addsuffix, dirname = file.collapsepath, file.join, file.addsuffix, file.dirname
26local savedata = file.savedata
27local formatters = string.formatters
28local osexit, osdate, osuuid = os.exit, os.date, os.uuid
29local removefile = os.remove
30local md5hex = md5.hex
31
32local trace_locating = false  trackers.register("resolvers.locating", function(v) trace_locating = v end)
33local trace_cache    = false  trackers.register("resolvers.cache",    function(v) trace_cache    = v end)
34
35local report_caches    = logs.reporter("resolvers","caches")
36local report_resolvers = logs.reporter("resolvers","caching")
37
38local resolvers    = resolvers
39local cleanpath    = resolvers.cleanpath
40local resolvepath  = resolvers.resolve
41
42local luautilities = utilities.lua
43
44-- intermezzo
45
46do
47
48    local directive_cleanup = false  directives.register("system.compile.cleanup", function(v) directive_cleanup = v end)
49    local directive_strip   = false  directives.register("system.compile.strip",   function(v) directive_strip   = v end)
50
51    local compilelua = luautilities.compile
52
53    function luautilities.compile(luafile,lucfile,cleanup,strip)
54        if cleanup == nil then cleanup = directive_cleanup end
55        if strip   == nil then strip   = directive_strip   end
56        return compilelua(luafile,lucfile,cleanup,strip)
57    end
58
59end
60
61-- end of intermezzo
62
63caches              = caches or { }
64local caches        = caches
65local writable      = nil
66local readables     = { }
67local usedreadables = { }
68
69local compilelua    = luautilities.compile
70local luasuffixes   = luautilities.suffixes
71
72caches.base         = caches.base or (LUATEXENGINE and LUATEXENGINE .. "-cache") or "luatex-cache"  -- can be local
73caches.more         = caches.more or "context"       -- can be local
74caches.defaults     = { "TMPDIR", "TEMPDIR", "TMP", "TEMP", "HOME", "HOMEPATH" }
75
76local direct_cache  = false -- true is faster but may need huge amounts of memory
77local fast_cache    = false
78local cache_tree    = false
79
80directives.register("system.caches.direct",function(v) direct_cache = true end)
81directives.register("system.caches.fast",  function(v) fast_cache   = true end)
82
83-- we could use a metatable for writable and readable but not yet
84
85local function configfiles()
86    return concat(resolvers.configurationfiles(),";")
87end
88
89local function hashed(tree)
90    tree = gsub(tree,"[\\/]+$","")
91    tree = lower(tree)
92    local hash = md5hex(tree)
93    if trace_cache or trace_locating then
94        report_caches("hashing tree %a, hash %a",tree,hash)
95    end
96    return hash
97end
98
99local function treehash()
100    local tree = configfiles()
101    if not tree or tree == "" then
102        return false
103    else
104        return hashed(tree)
105    end
106end
107
108caches.hashed      = hashed
109caches.treehash    = treehash
110caches.configfiles = configfiles
111
112local function identify()
113    -- Combining the loops makes it messy. First we check the format cache path
114    -- and when the last component is not present we try to create it.
115    local texmfcaches = resolvers.cleanpathlist("TEXMFCACHE") -- forward ref
116    if texmfcaches then
117        for k=1,#texmfcaches do
118            local cachepath = texmfcaches[k]
119            if cachepath ~= "" then
120                cachepath = resolvepath(cachepath)
121                cachepath = cleanpath(cachepath)
122                cachepath = collapsepath(cachepath)
123                local valid = isdir(cachepath)
124                if valid then
125                    if is_readable(cachepath) then
126                        readables[#readables+1] = cachepath
127                        if not writable and is_writable(cachepath) then
128                            writable = cachepath
129                        end
130                    end
131                elseif not writable then
132                    local cacheparent = dirname(cachepath)
133                    if is_writable(cacheparent) then -- we go on anyway (needed for mojca's kind of paths)
134                        mkdirs(cachepath)
135                        if isdir(cachepath) and is_writable(cachepath) then
136                            report_caches("path %a created",cachepath)
137                            writable = cachepath
138                            readables[#readables+1] = cachepath
139                        end
140                    end
141                end
142            end
143        end
144    end
145    -- As a last resort we check some temporary paths but this time we don't
146    -- create them.
147    local texmfcaches = caches.defaults
148    if texmfcaches then
149        for k=1,#texmfcaches do
150            local cachepath = texmfcaches[k]
151            cachepath = resolvers.expansion(cachepath) -- was getenv
152            if cachepath ~= "" then
153                cachepath = resolvepath(cachepath)
154                cachepath = cleanpath(cachepath)
155                local valid = isdir(cachepath)
156                if valid and is_readable(cachepath) then
157                    if not writable and is_writable(cachepath) then
158                        readables[#readables+1] = cachepath
159                        writable = cachepath
160                        break
161                    end
162                end
163            end
164        end
165    end
166    -- Some extra checking. If we have no writable or readable path then we simply
167    -- quit.
168    if not writable then
169        report_caches("fatal error: there is no valid writable cache path defined")
170        osexit()
171    elseif #readables == 0 then
172        report_caches("fatal error: there is no valid readable cache path defined")
173        osexit()
174    end
175    -- why here
176    writable = expanddirname(cleanpath(writable)) -- just in case
177    -- moved here ( we have only one writable tree)
178    local base = caches.base
179    local more = caches.more
180    local tree = cache_tree or treehash() -- we have only one writable tree
181    if tree then
182        cache_tree = tree
183        writable = mkdirs(writable,base,more,tree)
184        for i=1,#readables do
185            readables[i] = joinfile(readables[i],base,more,tree)
186        end
187    else
188        writable = mkdirs(writable,base,more)
189        for i=1,#readables do
190            readables[i] = joinfile(readables[i],base,more)
191        end
192    end
193    -- end
194    if trace_cache then
195        for i=1,#readables do
196            report_caches("using readable path %a (order %s)",readables[i],i)
197        end
198        report_caches("using writable path %a",writable)
199    end
200    identify = function()
201        return writable, readables
202    end
203    return writable, readables
204end
205
206function caches.usedpaths(separator)
207    local writable, readables = identify()
208    if #readables > 1 then
209        local result = { }
210        local done = { }
211        for i=1,#readables do
212            local readable = readables[i]
213            if readable == writable then
214                done[readable] = true
215                result[#result+1] = formatters["readable+writable: %a"](readable)
216            elseif usedreadables[i] then
217                done[readable] = true
218                result[#result+1] = formatters["readable: %a"](readable)
219            end
220        end
221        if not done[writable] then
222            result[#result+1] = formatters["writable: %a"](writable)
223        end
224        return concat(result,separator or " | ")
225    else
226        return writable or "?"
227    end
228end
229
230local r_cache = { }
231local w_cache = { }
232
233local function getreadablepaths(...)
234    local tags = { ... }
235    local hash = concat(tags,"/")
236    local done = r_cache[hash]
237    if not done then
238        local writable, readables = identify() -- exit if not found
239        if #tags > 0 then
240            done = { }
241            for i=1,#readables do
242                done[i] = joinfile(readables[i],...)
243            end
244        else
245            done = readables
246        end
247        r_cache[hash] = done
248    end
249    return done
250end
251
252local function getwritablepath(...)
253    local tags = { ... }
254    local hash = concat(tags,"/")
255    local done = w_cache[hash]
256    if not done then
257        local writable, readables = identify() -- exit if not found
258        if #tags > 0 then
259            done = mkdirs(writable,...)
260        else
261            done = writable
262        end
263        w_cache[hash] = done
264    end
265    return done
266end
267
268local function setfirstwritablefile(filename,...)
269    local wr = getwritablepath(...)
270    local fullname = joinfile(wr,filename)
271    return fullname, wr
272end
273
274local function setluanames(path,name)
275    return
276        format("%s/%s.%s",path,name,luasuffixes.tma),
277        format("%s/%s.%s",path,name,luasuffixes.tmc)
278end
279
280local function getfirstreadablefile(filename,...)
281    -- check if we have already written once
282    local fullname, path = setfirstwritablefile(filename,...)
283    if is_readable(fullname) then
284        return fullname, path -- , true
285    end
286    -- otherwise search for pregenerated
287    local rd = getreadablepaths(...)
288    for i=1,#rd do
289        local path = rd[i]
290        local fullname = joinfile(path,filename)
291        if is_readable(fullname) then
292            usedreadables[i] = true
293            return fullname, path -- , false
294        end
295    end
296    -- else assume new written
297    return fullname, path -- , true
298end
299
300caches.getreadablepaths     = getreadablepaths
301caches.getwritablepath      = getwritablepath
302caches.setfirstwritablefile = setfirstwritablefile
303caches.getfirstreadablefile = getfirstreadablefile
304caches.setluanames          = setluanames
305
306-- -- not used:
307--
308-- function caches.define(category,subcategory)
309--     return function()
310--         return getwritablepath(category,subcategory)
311--     end
312-- end
313
314-- This works best if the first writable is the first readable too. In practice
315-- we can have these situations for file databases:
316--
317-- tma in readable
318-- tma + tmb/c in readable
319--
320-- runtime files like fonts are written to the writable cache anyway
321
322local checkmemory = utilities and utilities.lua and utilities.lua.checkmemory
323local threshold   = 100 -- MB
324
325function caches.loaddata(readables,name,writable)
326    local used = checkmemory and checkmemory()
327    if type(readables) == "string" then
328        readables = { readables }
329    end
330    for i=1,#readables do
331        local path   = readables[i]
332        local loader = false
333        local state  = false
334        local tmaname, tmcname = setluanames(path,name)
335        if isfile(tmcname) then
336            state, loader = pcall(loadfile,tmcname)
337        end
338        if not loader and isfile(tmaname) then
339            -- can be different paths when we read a file database from disk
340            local tmacrap, tmcname = setluanames(writable,name)
341            if isfile(tmcname) then
342                state, loader = pcall(loadfile,tmcname)
343            end
344            compilelua(tmaname,tmcname)
345            if isfile(tmcname) then
346                state, loader = pcall(loadfile,tmcname)
347            end
348            if not loader then
349                state, loader = pcall(loadfile,tmaname)
350            end
351        end
352        if loader then
353            loader = loader()
354            if checkmemory then
355                checkmemory(used,threshold)
356            else -- generic
357                collectgarbage("step") -- option, really slows down!
358            end
359            return loader
360        end
361    end
362    return false
363end
364
365function caches.is_writable(filepath,filename)
366    local tmaname, tmcname = setluanames(filepath,filename)
367    return is_writable(tmaname)
368end
369
370local saveoptions = { compact = true, accurate = not JITSUPPORTED }
371
372function caches.savedata(filepath,filename,data,fast)
373    local tmaname, tmcname = setluanames(filepath,filename)
374    data.cache_uuid = osuuid()
375    if fast or fast_cache then
376        savedata(tmaname,fastserialize(data,true))
377    elseif direct_cache then
378        savedata(tmaname,serialize(data,true,saveoptions))
379    else
380        serializetofile(tmaname,data,true,saveoptions)
381    end
382    compilelua(tmaname,tmcname)
383end
384
385-- moved from data-res:
386
387local content_state = { }
388
389function caches.contentstate()
390    return content_state or { }
391end
392
393function caches.loadcontent(cachename,dataname,filename)
394    if not filename then
395        local name = hashed(cachename)
396        local full, path = getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees")
397        filename = joinfile(path,name)
398    end
399    local state, blob = pcall(loadfile,addsuffix(filename,luasuffixes.luc))
400    if not blob then
401        state, blob = pcall(loadfile,addsuffix(filename,luasuffixes.lua))
402    end
403    if blob then
404        local data = blob()
405        if data and data.content then
406            if data.type == dataname then
407                if data.version == resolvers.cacheversion then
408                    content_state[#content_state+1] = data.uuid
409                    if trace_locating then
410                        report_resolvers("loading %a for %a from %a",dataname,cachename,filename)
411                    end
412                    return data.content
413                else
414                    report_resolvers("skipping %a for %a from %a (version mismatch)",dataname,cachename,filename)
415                end
416            else
417                report_resolvers("skipping %a for %a from %a (datatype mismatch)",dataname,cachename,filename)
418            end
419        elseif trace_locating then
420            report_resolvers("skipping %a for %a from %a (no content)",dataname,cachename,filename)
421        end
422    elseif trace_locating then
423        report_resolvers("skipping %a for %a from %a (invalid file)",dataname,cachename,filename)
424    end
425end
426
427function caches.collapsecontent(content)
428    for k, v in next, content do
429        if type(v) == "table" and #v == 1 then
430            content[k] = v[1]
431        end
432    end
433end
434
435function caches.savecontent(cachename,dataname,content,filename)
436    if not filename then
437        local name = hashed(cachename)
438        local full, path = setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees")
439        filename = joinfile(path,name) -- is full
440    end
441    local luaname = addsuffix(filename,luasuffixes.lua)
442    local lucname = addsuffix(filename,luasuffixes.luc)
443    if trace_locating then
444        report_resolvers("preparing %a for %a",dataname,cachename)
445    end
446    local data = {
447        type    = dataname,
448        root    = cachename,
449        version = resolvers.cacheversion,
450        date    = osdate("%Y-%m-%d"),
451        time    = osdate("%H:%M:%S"),
452        content = content,
453        uuid    = osuuid(),
454    }
455    local ok = savedata(luaname,serialize(data,true))
456    if ok then
457        if trace_locating then
458            report_resolvers("category %a, cachename %a saved in %a",dataname,cachename,luaname)
459        end
460        if compilelua(luaname,lucname) then
461            if trace_locating then
462                report_resolvers("%a compiled to %a",dataname,lucname)
463            end
464            return true
465        else
466            if trace_locating then
467                report_resolvers("compiling failed for %a, deleting file %a",dataname,lucname)
468            end
469            removefile(lucname)
470        end
471    elseif trace_locating then
472        report_resolvers("unable to save %a in %a (access error)",dataname,luaname)
473    end
474end
475