data-tmp.lua /size: 16 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['data-tmp'] = {
2    version   = 1.100,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9--[[ldx--
10<p>This module deals with caching data. It sets up the paths and implements
11loaders and savers for tables. Best is to set the following variable. When not
12set, the usual paths will be checked. Personally I prefer the (users) temporary
13path.</p>
14
15</code>
16TEXMFCACHE=$TMP;$TEMP;$TMPDIR;$TEMPDIR;$HOME;$TEXMFVAR;$VARTEXMF;.
17</code>
18
19<p>Currently we do no locking when we write files. This is no real problem
20because most caching involves fonts and the chance of them being written at the
21same time is small. We also need to extend luatools with a recache feature.</p>
22--ldx]]--
23
24local next, type = next, type
25local pcall, loadfile, collectgarbage = pcall, loadfile, collectgarbage
26local format, lower, gsub = string.format, string.lower, string.gsub
27local concat, serialize, fastserialize, serializetofile = table.concat, table.serialize, table.fastserialize, table.tofile
28local mkdirs, expanddirname, isdir, isfile = dir.mkdirs, dir.expandname, lfs.isdir, lfs.isfile
29local is_writable, is_readable = file.is_writable, file.is_readable
30local collapsepath, joinfile, addsuffix, dirname = file.collapsepath, file.join, file.addsuffix, file.dirname
31local savedata = file.savedata
32local formatters = string.formatters
33local osexit, osdate, osuuid = os.exit, os.date, os.uuid
34local removefile = os.remove
35local md5hex = md5.hex
36
37local trace_locating = false  trackers.register("resolvers.locating", function(v) trace_locating = v end)
38local trace_cache    = false  trackers.register("resolvers.cache",    function(v) trace_cache    = v end)
39
40local report_caches    = logs.reporter("resolvers","caches")
41local report_resolvers = logs.reporter("resolvers","caching")
42
43local resolvers    = resolvers
44local cleanpath    = resolvers.cleanpath
45local resolvepath  = resolvers.resolve
46
47local luautilities = utilities.lua
48
49-- intermezzo
50
51do
52
53    local directive_cleanup = false  directives.register("system.compile.cleanup", function(v) directive_cleanup = v end)
54    local directive_strip   = false  directives.register("system.compile.strip",   function(v) directive_strip   = v end)
55
56    local compilelua = luautilities.compile
57
58    function luautilities.compile(luafile,lucfile,cleanup,strip)
59        if cleanup == nil then cleanup = directive_cleanup end
60        if strip   == nil then strip   = directive_strip   end
61        return compilelua(luafile,lucfile,cleanup,strip)
62    end
63
64end
65
66-- end of intermezzo
67
68caches              = caches or { }
69local caches        = caches
70local writable      = nil
71local readables     = { }
72local usedreadables = { }
73
74local compilelua    = luautilities.compile
75local luasuffixes   = luautilities.suffixes
76
77caches.base         = caches.base or (LUATEXENGINE and LUATEXENGINE .. "-cache") or "luatex-cache"  -- can be local
78caches.more         = caches.more or "context"       -- can be local
79caches.defaults     = { "TMPDIR", "TEMPDIR", "TMP", "TEMP", "HOME", "HOMEPATH" }
80
81local direct_cache  = false -- true is faster but may need huge amounts of memory
82local fast_cache    = false
83local cache_tree    = false
84
85directives.register("system.caches.direct",function(v) direct_cache = true end)
86directives.register("system.caches.fast",  function(v) fast_cache   = true end)
87
88-- we could use a metatable for writable and readable but not yet
89
90local function configfiles()
91    return concat(resolvers.configurationfiles(),";")
92end
93
94local function hashed(tree)
95    tree = gsub(tree,"[\\/]+$","")
96    tree = lower(tree)
97    local hash = md5hex(tree)
98    if trace_cache or trace_locating then
99        report_caches("hashing tree %a, hash %a",tree,hash)
100    end
101    return hash
102end
103
104local function treehash()
105    local tree = configfiles()
106    if not tree or tree == "" then
107        return false
108    else
109        return hashed(tree)
110    end
111end
112
113caches.hashed      = hashed
114caches.treehash    = treehash
115caches.configfiles = configfiles
116
117local function identify()
118    -- Combining the loops makes it messy. First we check the format cache path
119    -- and when the last component is not present we try to create it.
120    local texmfcaches = resolvers.cleanpathlist("TEXMFCACHE") -- forward ref
121    if texmfcaches then
122        for k=1,#texmfcaches do
123            local cachepath = texmfcaches[k]
124            if cachepath ~= "" then
125                cachepath = resolvepath(cachepath)
126                cachepath = cleanpath(cachepath)
127                cachepath = collapsepath(cachepath)
128                local valid = isdir(cachepath)
129                if valid then
130                    if is_readable(cachepath) then
131                        readables[#readables+1] = cachepath
132                        if not writable and is_writable(cachepath) then
133                            writable = cachepath
134                        end
135                    end
136                elseif not writable then
137                    local cacheparent = dirname(cachepath)
138                    if is_writable(cacheparent) then -- we go on anyway (needed for mojca's kind of paths)
139                        mkdirs(cachepath)
140                        if isdir(cachepath) and is_writable(cachepath) then
141                            report_caches("path %a created",cachepath)
142                            writable = cachepath
143                            readables[#readables+1] = cachepath
144                        end
145                    end
146                end
147            end
148        end
149    end
150    -- As a last resort we check some temporary paths but this time we don't
151    -- create them.
152    local texmfcaches = caches.defaults
153    if texmfcaches then
154        for k=1,#texmfcaches do
155            local cachepath = texmfcaches[k]
156            cachepath = resolvers.expansion(cachepath) -- was getenv
157            if cachepath ~= "" then
158                cachepath = resolvepath(cachepath)
159                cachepath = cleanpath(cachepath)
160                local valid = isdir(cachepath)
161                if valid and is_readable(cachepath) then
162                    if not writable and is_writable(cachepath) then
163                        readables[#readables+1] = cachepath
164                        writable = cachepath
165                        break
166                    end
167                end
168            end
169        end
170    end
171    -- Some extra checking. If we have no writable or readable path then we simply
172    -- quit.
173    if not writable then
174        report_caches("fatal error: there is no valid writable cache path defined")
175        osexit()
176    elseif #readables == 0 then
177        report_caches("fatal error: there is no valid readable cache path defined")
178        osexit()
179    end
180    -- why here
181    writable = expanddirname(cleanpath(writable)) -- just in case
182    -- moved here ( we have only one writable tree)
183    local base = caches.base
184    local more = caches.more
185    local tree = cache_tree or treehash() -- we have only one writable tree
186    if tree then
187        cache_tree = tree
188        writable = mkdirs(writable,base,more,tree)
189        for i=1,#readables do
190            readables[i] = joinfile(readables[i],base,more,tree)
191        end
192    else
193        writable = mkdirs(writable,base,more)
194        for i=1,#readables do
195            readables[i] = joinfile(readables[i],base,more)
196        end
197    end
198    -- end
199    if trace_cache then
200        for i=1,#readables do
201            report_caches("using readable path %a (order %s)",readables[i],i)
202        end
203        report_caches("using writable path %a",writable)
204    end
205    identify = function()
206        return writable, readables
207    end
208    return writable, readables
209end
210
211function caches.usedpaths(separator)
212    local writable, readables = identify()
213    if #readables > 1 then
214        local result = { }
215        local done = { }
216        for i=1,#readables do
217            local readable = readables[i]
218            if readable == writable then
219                done[readable] = true
220                result[#result+1] = formatters["readable+writable: %a"](readable)
221            elseif usedreadables[i] then
222                done[readable] = true
223                result[#result+1] = formatters["readable: %a"](readable)
224            end
225        end
226        if not done[writable] then
227            result[#result+1] = formatters["writable: %a"](writable)
228        end
229        return concat(result,separator or " | ")
230    else
231        return writable or "?"
232    end
233end
234
235local r_cache = { }
236local w_cache = { }
237
238local function getreadablepaths(...)
239    local tags = { ... }
240    local hash = concat(tags,"/")
241    local done = r_cache[hash]
242    if not done then
243        local writable, readables = identify() -- exit if not found
244        if #tags > 0 then
245            done = { }
246            for i=1,#readables do
247                done[i] = joinfile(readables[i],...)
248            end
249        else
250            done = readables
251        end
252        r_cache[hash] = done
253    end
254    return done
255end
256
257local function getwritablepath(...)
258    local tags = { ... }
259    local hash = concat(tags,"/")
260    local done = w_cache[hash]
261    if not done then
262        local writable, readables = identify() -- exit if not found
263        if #tags > 0 then
264            done = mkdirs(writable,...)
265        else
266            done = writable
267        end
268        w_cache[hash] = done
269    end
270    return done
271end
272
273local function setfirstwritablefile(filename,...)
274    local wr = getwritablepath(...)
275    local fullname = joinfile(wr,filename)
276    return fullname, wr
277end
278
279local function setluanames(path,name)
280    return
281        format("%s/%s.%s",path,name,luasuffixes.tma),
282        format("%s/%s.%s",path,name,luasuffixes.tmc)
283end
284
285local function getfirstreadablefile(filename,...)
286    -- check if we have already written once
287    local fullname, path = setfirstwritablefile(filename,...)
288    if is_readable(fullname) then
289        return fullname, path -- , true
290    end
291    -- otherwise search for pregenerated
292    local rd = getreadablepaths(...)
293    for i=1,#rd do
294        local path = rd[i]
295        local fullname = joinfile(path,filename)
296        if is_readable(fullname) then
297            usedreadables[i] = true
298            return fullname, path -- , false
299        end
300    end
301    -- else assume new written
302    return fullname, path -- , true
303end
304
305caches.getreadablepaths     = getreadablepaths
306caches.getwritablepath      = getwritablepath
307caches.setfirstwritablefile = setfirstwritablefile
308caches.getfirstreadablefile = getfirstreadablefile
309caches.setluanames          = setluanames
310
311-- -- not used:
312--
313-- function caches.define(category,subcategory)
314--     return function()
315--         return getwritablepath(category,subcategory)
316--     end
317-- end
318
319-- This works best if the first writable is the first readable too. In practice
320-- we can have these situations for file databases:
321--
322-- tma in readable
323-- tma + tmb/c in readable
324--
325-- runtime files like fonts are written to the writable cache anyway
326
327local checkmemory = utilities and utilities.lua and utilities.lua.checkmemory
328local threshold   = 100 -- MB
329
330function caches.loaddata(readables,name,writable)
331    local used = checkmemory and checkmemory()
332    if type(readables) == "string" then
333        readables = { readables }
334    end
335    for i=1,#readables do
336        local path   = readables[i]
337        local loader = false
338        local state  = false
339        local tmaname, tmcname = setluanames(path,name)
340        if isfile(tmcname) then
341            state, loader = pcall(loadfile,tmcname)
342        end
343        if not loader and isfile(tmaname) then
344            -- can be different paths when we read a file database from disk
345            local tmacrap, tmcname = setluanames(writable,name)
346            if isfile(tmcname) then
347                state, loader = pcall(loadfile,tmcname)
348            end
349            compilelua(tmaname,tmcname)
350            if isfile(tmcname) then
351                state, loader = pcall(loadfile,tmcname)
352            end
353            if not loader then
354                state, loader = pcall(loadfile,tmaname)
355            end
356        end
357        if loader then
358            loader = loader()
359            if checkmemory then
360                checkmemory(used,threshold)
361            else -- generic
362                collectgarbage("step") -- option, really slows down!
363            end
364            return loader
365        end
366    end
367    return false
368end
369
370function caches.is_writable(filepath,filename)
371    local tmaname, tmcname = setluanames(filepath,filename)
372    return is_writable(tmaname)
373end
374
375local saveoptions = { compact = true, accurate = not JITSUPPORTED }
376
377function caches.savedata(filepath,filename,data,fast)
378    local tmaname, tmcname = setluanames(filepath,filename)
379    data.cache_uuid = osuuid()
380    if fast or fast_cache then
381        savedata(tmaname,fastserialize(data,true))
382    elseif direct_cache then
383        savedata(tmaname,serialize(data,true,saveoptions))
384    else
385        serializetofile(tmaname,data,true,saveoptions)
386    end
387    compilelua(tmaname,tmcname)
388end
389
390-- moved from data-res:
391
392local content_state = { }
393
394function caches.contentstate()
395    return content_state or { }
396end
397
398function caches.loadcontent(cachename,dataname,filename)
399    if not filename then
400        local name = hashed(cachename)
401        local full, path = getfirstreadablefile(addsuffix(name,luasuffixes.lua),"trees")
402        filename = joinfile(path,name)
403    end
404    local state, blob = pcall(loadfile,addsuffix(filename,luasuffixes.luc))
405    if not blob then
406        state, blob = pcall(loadfile,addsuffix(filename,luasuffixes.lua))
407    end
408    if blob then
409        local data = blob()
410        if data and data.content then
411            if data.type == dataname then
412                if data.version == resolvers.cacheversion then
413                    content_state[#content_state+1] = data.uuid
414                    if trace_locating then
415                        report_resolvers("loading %a for %a from %a",dataname,cachename,filename)
416                    end
417                    return data.content
418                else
419                    report_resolvers("skipping %a for %a from %a (version mismatch)",dataname,cachename,filename)
420                end
421            else
422                report_resolvers("skipping %a for %a from %a (datatype mismatch)",dataname,cachename,filename)
423            end
424        elseif trace_locating then
425            report_resolvers("skipping %a for %a from %a (no content)",dataname,cachename,filename)
426        end
427    elseif trace_locating then
428        report_resolvers("skipping %a for %a from %a (invalid file)",dataname,cachename,filename)
429    end
430end
431
432function caches.collapsecontent(content)
433    for k, v in next, content do
434        if type(v) == "table" and #v == 1 then
435            content[k] = v[1]
436        end
437    end
438end
439
440function caches.savecontent(cachename,dataname,content,filename)
441    if not filename then
442        local name = hashed(cachename)
443        local full, path = setfirstwritablefile(addsuffix(name,luasuffixes.lua),"trees")
444        filename = joinfile(path,name) -- is full
445    end
446    local luaname = addsuffix(filename,luasuffixes.lua)
447    local lucname = addsuffix(filename,luasuffixes.luc)
448    if trace_locating then
449        report_resolvers("preparing %a for %a",dataname,cachename)
450    end
451    local data = {
452        type    = dataname,
453        root    = cachename,
454        version = resolvers.cacheversion,
455        date    = osdate("%Y-%m-%d"),
456        time    = osdate("%H:%M:%S"),
457        content = content,
458        uuid    = osuuid(),
459    }
460    local ok = savedata(luaname,serialize(data,true))
461    if ok then
462        if trace_locating then
463            report_resolvers("category %a, cachename %a saved in %a",dataname,cachename,luaname)
464        end
465        if compilelua(luaname,lucname) then
466            if trace_locating then
467                report_resolvers("%a compiled to %a",dataname,lucname)
468            end
469            return true
470        else
471            if trace_locating then
472                report_resolvers("compiling failed for %a, deleting file %a",dataname,lucname)
473            end
474            removefile(lucname)
475        end
476    elseif trace_locating then
477        report_resolvers("unable to save %a in %a (access error)",dataname,luaname)
478    end
479end
480