data-sch.lmt /size: 10 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['data-sch'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local load, tonumber, require = load, tonumber, require
10local gsub, format = string.gsub, string.format
11local savedata = io.savedata
12local sortedhash, concat = table.sortedhash, table.concat
13local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders
14local addsuffix, suffix, splitbase = file.addsuffix, file.suffix, file.splitbase
15local md5hex = md5.hex
16local removefile, renamefile, fileexists = os.remove, os.rename, io.exists
17
18-- todo: more locals
19
20local trace_schemes  = false  trackers.register("resolvers.schemes",function(v) trace_schemes = v end)
21local report_schemes = logs.reporter("resolvers","schemes")
22
23local http           = require("socket.http")
24local ltn12          = require("ltn12")
25
26if mbox then mbox = nil end -- useless and even bugged (helper overwrites lib)
27
28local resolvers      = resolvers
29local schemes        = resolvers.schemes or { }
30resolvers.schemes    = schemes
31
32local cleaners       = { }
33schemes.cleaners     = cleaners
34
35local threshold      = 24 * 60 * 60
36local inmemory       = false
37local uselibrary     = false
38
39directives.register("schemes.threshold",  function(v) threshold = tonumber(v) or threshold end)
40directives.register("schemes.inmemory",   function(v) inmemory = v end)
41directives.register("schemes.uselibrary", function(v) uselibrary = v end)
42
43function cleaners.none(specification)
44    return specification.original
45end
46
47-- function cleaners.strip(specification)
48--     -- todo: only keep suffix periods, so after the last
49--     return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods
50-- end
51
52function cleaners.strip(specification) -- keep suffixes
53    local path, name = splitbase(specification.original)
54    if path == "" then
55        return (gsub(name,"[^%a%d%.]+","-"))
56    else
57        return (gsub((gsub(path,"%.","-") .. "-" .. name),"[^%a%d%.]+","-"))
58    end
59end
60
61function cleaners.md5(specification)
62    return addsuffix(md5hex(specification.original),suffix(specification.path))
63end
64
65local cleaner = cleaners.strip
66
67directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end)
68
69function resolvers.schemes.cleanname(specification)
70    local hash = cleaner(specification)
71    if trace_schemes then
72        report_schemes("hashing %a to %a",specification.original,hash)
73    end
74    return hash
75end
76
77local cached     = { }
78local loaded     = { }
79local reused     = { }
80local thresholds = { }
81local handlers   = { }
82
83local function fetcher(report)
84    if uselibrary then
85        local curl = require("curl") or require("libs-imp-curl") -- we have curl preloaded
86        local fetch = curl and curl.fetch
87        if fetch then
88            return function(str)
89                local data, message = fetch {
90                    url            = str,
91                    followlocation = true,
92                    sslverifyhost  = false,
93                    sslverifypeer  = false,
94                }
95                if not data then
96                    report("some error: %s",message)
97                end
98                return data
99            end
100        end
101    end
102end
103
104local runner = sandbox.registerrunner {
105    name     = "to file curl resolver",
106    method   = "execute",
107    program  = "curl",
108    template = '--silent --insecure --create-dirs --output "%cachename%" "%original%"',
109    internal = function(specification)
110        local fetch = fetcher(specification.reporter)
111        return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized)
112            local data = fetch(variables.original)
113            savedata(variables.cachename,data or "")
114        end
115    end,
116    checkers = {
117        cachename = "cache",
118        original  = "url",
119    }
120}
121
122local memrunner = sandbox.registerrunner {
123    name     = "in memory curl resolver",
124    method   = "resultof",
125    program  = "curl",
126    template = '--silent --insecure "%original%"',
127    internal = function(specification)
128        local fetch = fetcher(specification.reporter)
129        return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized)
130            return fetch(variables.original) or ""
131        end
132    end,
133    checkers = {
134        original = "url",
135    }
136}
137
138local function fetch(specification)
139    local original  = specification.original
140    local scheme    = specification.scheme
141    local cleanname = schemes.cleanname(specification)
142    if inmemory then
143        statistics.starttiming(schemes)
144        local cachename = resolvers.savers.virtualname(cleanname)
145        local handler   = handlers[scheme]
146     -- if handler and not uselibrary then
147        if handler then -- internal sockets are twice as fast as library
148            if trace_schemes then
149                report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in")
150            end
151            logs.flush()
152            handler(specification,cachename)
153        else
154            if trace_schemes then
155                report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl")
156            end
157            logs.flush()
158            local result = memrunner {
159                original = original,
160            }
161            resolvers.savers.directvirtual(cachename,result,true) -- persistent
162        end
163        loaded[scheme] = loaded[scheme] + 1
164        statistics.stoptiming(schemes)
165        return cachename
166    else
167        local cachename = caches.setfirstwritablefile(cleanname,"schemes")
168        if not cached[original] or threshold == 0 then
169            statistics.starttiming(schemes)
170            if threshold == 0 or not fileexists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then
171             -- removefile(cachename)
172                cached[original] = cachename
173                local handler = handlers[scheme]
174                if handler then
175                    if trace_schemes then
176                        report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in")
177                    end
178                    logs.flush()
179                    handler(specification,cachename)
180                else
181                    if trace_schemes then
182                        report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl")
183                    end
184                    logs.flush()
185                    runner {
186                        original  = original,
187                        cachename = cachename,
188                    }
189                end
190            end
191            if fileexists(cachename) then
192                cached[original] = cachename
193                if trace_schemes then
194                    report_schemes("using cached %a, protocol %a, cachename %a",original,scheme,cachename)
195                end
196            else
197                cached[original] = ""
198                if trace_schemes then
199                    report_schemes("using missing %a, protocol %a",original,scheme)
200                end
201            end
202            loaded[scheme] = loaded[scheme] + 1
203            statistics.stoptiming(schemes)
204        else
205            if trace_schemes then
206                report_schemes("reusing %a, protocol %a",original,scheme)
207            end
208            reused[scheme] = reused[scheme] + 1
209        end
210        return cached[original]
211    end
212end
213
214local function finder(specification,filetype)
215    return resolvers.methodhandler("finders",fetch(specification),filetype)
216end
217
218local opener = openers.file
219local loader = loaders.file
220
221local function install(scheme,handler,newthreshold)
222    handlers  [scheme] = handler
223    loaded    [scheme] = 0
224    reused    [scheme] = 0
225    finders   [scheme] = finder
226    openers   [scheme] = opener
227    loaders   [scheme] = loader
228    thresholds[scheme] = newthreshold or threshold
229end
230
231schemes.install = install
232
233local function http_handler(specification,cachename)
234    if inmemory then
235        local result = { }
236        local status, message = http.request {
237            url  = specification.original,
238            sink = ltn12.sink.table(result)
239        }
240        resolvers.savers.directvirtual(cachename,concat(result),true) -- persistent
241    else
242        local tempname = cachename .. ".tmp"
243        local handle   = io.open(tempname,"wb")
244        local status, message = http.request {
245            url  = specification.original,
246            sink = ltn12.sink.file(handle)
247        }
248        if not status then
249            removefile(tempname)
250        else
251            removefile(cachename)
252            renamefile(tempname,cachename)
253        end
254    end
255    return cachename
256end
257
258install('http',http_handler)
259install('https') -- see pod
260install('ftp')
261
262statistics.register("scheme handling time", function()
263    local l, r, nl, nr = { }, { }, 0, 0
264    for k, v in sortedhash(loaded) do
265        if v > 0 then
266            nl = nl + 1
267            l[nl] = k .. ":" .. v
268        end
269    end
270    for k, v in sortedhash(reused) do
271        if v > 0 then
272            nr = nr + 1
273            r[nr] = k .. ":" .. v
274        end
275    end
276    local n = nl + nr
277    if n > 0 then
278        if nl == 0 then l = { "none" } end
279        if nr == 0 then r = { "none" } end
280        return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s",
281            statistics.elapsedtime(schemes), n, threshold, concat(l," "), concat(l," "))
282    else
283        return nil
284    end
285end)
286
287-- We provide a few more helpers:
288
289----- http        = require("socket.http")
290local httprequest = http.request
291local toquery     = url.toquery
292
293local function fetchstring(url,data)
294    local q = data and toquery(data)
295    if q then
296        url = url .. "?" .. q
297    end
298    local reply = httprequest(url)
299    return reply -- just one argument
300end
301
302schemes.fetchstring = fetchstring
303
304function schemes.fetchtable(url,data)
305    local reply = fetchstring(url,data)
306    if reply then
307        local s = load("return " .. reply)
308        if s then
309            return s()
310        end
311    end
312end
313