data-exp.lua /size: 17 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['data-exp'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local format, find, gmatch, lower, char, sub = string.format, string.find, string.gmatch, string.lower, string.char, string.sub
10local concat, sort = table.concat, table.sort
11local sortedkeys = table.sortedkeys
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local Ct, Cs, Cc, Carg, P, C, S = lpeg.Ct, lpeg.Cs, lpeg.Cc, lpeg.Carg, lpeg.P, lpeg.C, lpeg.S
14local type, next = type, next
15local isdir = lfs.isdir
16
17local collapsepath, joinpath, basename = file.collapsepath, file.join, file.basename
18
19local trace_locating   = false  trackers.register("resolvers.locating",   function(v) trace_locating   = v end)
20local trace_expansions = false  trackers.register("resolvers.expansions", function(v) trace_expansions = v end)
21local trace_globbing   = true   trackers.register("resolvers.globbing",   function(v) trace_globbing   = v end)
22
23local report_expansions = logs.reporter("resolvers","expansions")
24local report_globbing   = logs.reporter("resolvers","globbing")
25
26local resolvers     = resolvers
27local resolveprefix = resolvers.resolve
28
29-- As this bit of code is somewhat special it gets its own module. After
30-- all, when working on the main resolver code, I don't want to scroll
31-- past this every time. See data-obs.lua for the gsub variant.
32
33-- local function f_first(a,b)
34--     local t, n = { }, 0
35--     for s in gmatch(b,"[^,]+") do
36--         n = n + 1 ; t[n] = a .. s
37--     end
38--     return concat(t,",")
39-- end
40--
41-- local function f_second(a,b)
42--     local t, n = { }, 0
43--     for s in gmatch(a,"[^,]+") do
44--         n = n + 1 ; t[n] = s .. b
45--     end
46--     return concat(t,",")
47-- end
48
49-- kpsewhich --expand-braces '{a,b}{c,d}'
50-- ac:bc:ad:bd
51
52-- old  {a,b}{c,d} => ac ad bc bd
53--
54-- local function f_both(a,b)
55--     local t, n = { }, 0
56--     for sa in gmatch(a,"[^,]+") do
57--         for sb in gmatch(b,"[^,]+") do
58--             n = n + 1 ; t[n] = sa .. sb
59--         end
60--     end
61--     return concat(t,",")
62-- end
63--
64-- new  {a,b}{c,d} => ac bc ad bd
65
66local function f_both(a,b)
67    local t, n = { }, 0
68    for sb in gmatch(b,"[^,]+") do              -- and not sa
69        for sa in gmatch(a,"[^,]+") do          --         sb
70            n = n + 1 ; t[n] = sa .. sb
71        end
72    end
73    return concat(t,",")
74end
75
76local comma   = P(",")
77local nocomma = (1-comma)^1
78local docomma = comma^1/","
79local before  = Cs((nocomma * Carg(1) + docomma)^0)
80local after   = Cs((Carg(1) * nocomma + docomma)^0)
81local both    = Cs(((C(nocomma) * Carg(1))/function(a,b) return lpegmatch(before,b,1,a) end + docomma)^0)
82
83local function f_first (a,b) return lpegmatch(after, b,1,a) end
84local function f_second(a,b) return lpegmatch(before,a,1,b) end
85local function f_both  (a,b) return lpegmatch(both,  b,1,a) end
86
87-- print(f_first ("a",    "x,y,z"))
88-- print(f_second("a,b,c","x"))
89-- print(f_both  ("a,b,c","x,y,z"))
90
91local left  = P("{")
92local right = P("}")
93local var   = P((1 - S("{}" ))^0)
94local set   = P((1 - S("{},"))^0)
95local other = P(1)
96
97local l_first  = Cs( ( Cc("{") * (C(set) * left * C(var) * right / f_first) * Cc("}")               + other )^0 )
98local l_second = Cs( ( Cc("{") * (left * C(var) * right * C(set) / f_second) * Cc("}")              + other )^0 )
99local l_both   = Cs( ( Cc("{") * (left * C(var) * right * left * C(var) * right / f_both) * Cc("}") + other )^0 )
100local l_rest   = Cs( ( left * var * (left/"") * var * (right/"") * var * right                      + other )^0 )
101
102local stripper_1 = lpeg.stripper ("{}@")
103local replacer_1 = lpeg.replacer { { ",}", ",@}" }, { "{,", "{@," }, }
104
105local function splitpathexpr(str, newlist, validate) -- I couldn't resist lpegging it (nice exercise).
106    if trace_expansions then
107        report_expansions("expanding variable %a",str)
108    end
109    local t, ok, done = newlist or { }, false, false
110    local n = #t
111    str = lpegmatch(replacer_1,str)
112    repeat
113        local old = str
114        repeat
115            local old = str
116            str = lpegmatch(l_first, str)
117        until old == str
118        repeat
119            local old = str
120            str = lpegmatch(l_second,str)
121        until old == str
122        repeat
123            local old = str
124            str = lpegmatch(l_both,  str)
125        until old == str
126        repeat
127            local old = str
128            str = lpegmatch(l_rest,  str)
129        until old == str
130    until old == str -- or not find(str,"{",1,true)
131    str = lpegmatch(stripper_1,str)
132    if validate then
133        for s in gmatch(str,"[^,]+") do
134            s = validate(s)
135            if s then
136                n = n + 1
137                t[n] = s
138            end
139        end
140    else
141        for s in gmatch(str,"[^,]+") do
142            n = n + 1
143            t[n] = s
144        end
145    end
146    if trace_expansions then
147        for k=1,#t do
148            report_expansions("% 4i: %s",k,t[k])
149        end
150    end
151    return t
152end
153
154-- We could make the previous one public.
155
156local function validate(s)
157    s = collapsepath(s) -- already keeps the trailing / and //
158    return s ~= "" and not find(s,"^!*unset/*$") and s
159end
160
161resolvers.validatedpath = validate -- keeps the trailing //
162
163function resolvers.expandedpathfromlist(pathlist)
164    local newlist = { }
165    for k=1,#pathlist do
166        splitpathexpr(pathlist[k],newlist,validate)
167    end
168    return newlist
169end
170
171-- {a,b,c,d}
172-- a,b,c/{p,q,r},d
173-- a,b,c/{p,q,r}/d/{x,y,z}//
174-- a,b,c/{p,q/{x,y,z},r},d/{p,q,r}
175-- a,b,c/{p,q/{x,y,z},r},d/{p,q,r}
176-- a{b,c}{d,e}f
177-- {a,b,c,d}
178-- {a,b,c/{p,q,r},d}
179-- {a,b,c/{p,q,r}/d/{x,y,z}//}
180-- {a,b,c/{p,q/{x,y,z}},d/{p,q,r}}
181-- {a,b,c/{p,q/{x,y,z},w}v,d/{p,q,r}}
182-- {$SELFAUTODIR,$SELFAUTOPARENT}{,{/share,}/texmf{-local,.local,}/web2c}
183
184local usedhomedir = nil
185local donegation  = (P("!") /""     )^0
186local doslashes   = (P("\\")/"/" + 1)^0
187
188local function expandedhome()
189    if not usedhomedir then
190        usedhomedir = lpegmatch(Cs(donegation * doslashes),environment.homedir or "")
191        if usedhomedir == "~" or usedhomedir == "" or not isdir(usedhomedir) then
192            if trace_expansions then
193                report_expansions("no home dir set, ignoring dependent path using current path")
194            end
195            usedhomedir = "."
196        end
197    end
198    return usedhomedir
199end
200
201local dohome  = ((P("~") + P("$HOME") + P("%HOME%")) / expandedhome)^0
202local cleanup = Cs(donegation * dohome * doslashes)
203
204resolvers.cleanpath = function(str)
205    return str and lpegmatch(cleanup,str) or ""
206end
207
208-- print(resolvers.cleanpath(""))
209-- print(resolvers.cleanpath("!"))
210-- print(resolvers.cleanpath("~"))
211-- print(resolvers.cleanpath("~/test"))
212-- print(resolvers.cleanpath("!~/test"))
213-- print(resolvers.cleanpath("~/test~test"))
214
215-- This one strips quotes and funny tokens.
216
217-- we have several options here:
218--
219-- expandhome = P("~") / "$HOME"              : relocateble
220-- expandhome = P("~") / "home:"              : relocateble
221-- expandhome = P("~") / environment.homedir  : frozen but unexpanded
222-- expandhome = P("~") = dohome               : frozen and expanded
223
224local expandhome = P("~") / "$HOME"
225
226local dodouble = P('"') / "" * (expandhome + (1 - P('"')))^0 * P('"') / ""
227local dosingle = P("'") / "" * (expandhome + (1 - P("'")))^0 * P("'") / ""
228local dostring =               (expandhome +  1              )^0
229
230local stripper = Cs(
231    lpegpatterns.unspacer * (dosingle + dodouble + dostring) * lpegpatterns.unspacer
232)
233
234function resolvers.checkedvariable(str) -- assumes str is a string
235    return type(str) == "string" and lpegmatch(stripper,str) or str
236end
237
238-- The path splitter:
239
240-- A config (optionally) has the paths split in tables. Internally
241-- we join them and split them after the expansion has taken place. This
242-- is more convenient.
243
244local cache = { }
245
246local splitter = lpeg.tsplitat(";") -- as we move towards urls, prefixes and use tables we no longer do :
247
248local backslashswapper = lpeg.replacer("\\","/")
249
250local function splitconfigurationpath(str) -- beware, this can be either a path or a { specification }
251    if str then
252        local found = cache[str]
253        if not found then
254            if str == "" then
255                found = { }
256            else
257                local split = lpegmatch(splitter,lpegmatch(backslashswapper,str)) -- can be combined
258                found = { }
259                local noffound = 0
260                for i=1,#split do
261                    local s = split[i]
262                    if not find(s,"^{*unset}*") then
263                        noffound = noffound + 1
264                        found[noffound] = s
265                    end
266                end
267                if trace_expansions then
268                    report_expansions("splitting path specification %a",str)
269                    for k=1,noffound do
270                        report_expansions("% 4i: %s",k,found[k])
271                    end
272                end
273                cache[str] = found
274            end
275        end
276        return found
277    end
278end
279
280resolvers.splitconfigurationpath = splitconfigurationpath
281
282function resolvers.splitpath(str)
283    if type(str) == 'table' then
284        return str
285    else
286        return splitconfigurationpath(str)
287    end
288end
289
290function resolvers.joinpath(str)
291    if type(str) == 'table' then
292        return joinpath(str)
293    else
294        return str
295    end
296end
297
298-- The next function scans directories and returns a hash where the
299-- entries are either strings or tables.
300--
301-- starting with . or .. etc or funny char
302--
303-- local l_forbidden = S("~`!#$%^&*()={}[]:;\"\'||\\/<>,?\n\r\t")
304-- local l_confusing = P(" ")
305-- local l_character = lpegpatterns.utf8
306-- local l_dangerous = P(".")
307--
308-- local l_normal = (l_character - l_forbidden - l_confusing - l_dangerous) * (l_character - l_forbidden - l_confusing^2)^0 * P(-1)
309-- ----- l_normal = l_normal * Cc(true) + Cc(false)
310--
311-- local function test(str)
312--     print(str,lpegmatch(l_normal,str))
313-- end
314-- test("ヒラギノ明朝 Pro W3")
315-- test("..ヒラギノ明朝 Pro W3")
316-- test(":ヒラギノ明朝 Pro W3;")
317-- test("ヒラギノ明朝 /Pro W3;")
318-- test("ヒラギノ明朝 Pro  W3")
319
320-- a lot of this caching can be stripped away when we have ssd's everywhere
321--
322-- we could cache all the (sub)paths here if needed
323
324local attributes, directory = lfs.attributes, lfs.dir
325
326local weird          = P(".")^1 + lpeg.anywhere(S("~`!#$%^&*()={}[]:;\"\'||<>,?\n\r\t"))
327local lessweird      = P(".")^1 + lpeg.anywhere(S("~`#$%^&*:;\"\'||<>,?\n\r\t"))
328local timer          = { }
329local scanned        = { }
330local nofscans       = 0
331local scancache      = { }
332local fullcache      = { }
333----- simplecache    = { }
334local nofsharedscans = 0
335local addcasecraptoo = true -- experiment to let case matter a  bit (still fuzzy)
336
337-- So, we assume either a lowercase name or a mixed case one but only one such case
338-- as having Foo fOo foo FoO FOo etc on the system is braindead in any sane project.
339
340local function scan(files,remap,spec,path,n,m,r,onlyone,tolerant)
341    local full     = path == "" and spec or (spec .. path .. '/')
342    local dirlist  = { }
343    local nofdirs  = 0
344    local pattern  = tolerant and lessweird or weird
345    local filelist = { }
346    local noffiles = 0
347    for name, mode in directory(full) do
348        if not lpegmatch(pattern,name) then
349            if not mode then
350                mode = attributes(full..name,"mode")
351            end
352            if mode == "file" then
353                n = n + 1
354                noffiles = noffiles + 1
355                filelist[noffiles] = name
356            elseif mode == "directory" then
357                m = m + 1
358                nofdirs = nofdirs + 1
359                if path ~= "" then
360                    dirlist[nofdirs] = path .. "/" .. name
361                else
362                    dirlist[nofdirs] = name
363                end
364            end
365        end
366    end
367    if noffiles > 0 then
368        sort(filelist)
369        for i=1,noffiles do
370            local name  = filelist[i]
371            local lower = lower(name)
372            local paths = files[lower]
373            if paths then
374                if onlyone then
375                    -- forget about it
376                else
377                    if name ~= lower then
378                        local rl = remap[lower]
379                        if not rl then
380                            remap[lower] = name
381                            r = r + 1
382                        elseif trace_globbing and rl ~= name then
383                            report_globbing("confusing filename, name: %a, lower: %a, already: %a",name,lower,rl)
384                        end
385                        if addcasecraptoo then
386                            local paths = files[name]
387                            if not paths then
388                                files[name] = path
389                            elseif type(paths) == "string" then
390                                files[name] = { paths, path }
391                            else
392                                paths[#paths+1] = path
393                            end
394                        end
395                    end
396                    if type(paths) == "string" then
397                        files[lower] = { paths, path }
398                    else
399                        paths[#paths+1] = path
400                    end
401                end
402            else -- probably unique anyway
403                files[lower] = path
404                if name ~= lower then
405                    local rl = remap[lower]
406                    if not rl then
407                        remap[lower] = name
408                        r = r + 1
409                    elseif trace_globbing and rl ~= name then
410                        report_globbing("confusing filename, name: %a, lower: %a, already: %a",name,lower,rl)
411                    end
412                end
413            end
414        end
415    end
416    if nofdirs > 0 then
417        sort(dirlist)
418        for i=1,nofdirs do
419            files, remap, n, m, r = scan(files,remap,spec,dirlist[i],n,m,r,onlyonce,tolerant)
420        end
421    end
422    scancache[sub(full,1,-2)] = files
423    return files, remap, n, m, r
424end
425
426local function scanfiles(path,branch,usecache,onlyonce,tolerant)
427    local realpath = resolveprefix(path)
428    if usecache then
429        local content = fullcache[realpath]
430        if content then
431            if trace_locating then
432                report_expansions("using cached scan of path %a, branch %a",path,branch or path)
433            end
434            nofsharedscans = nofsharedscans + 1
435            return content
436        end
437    end
438    --
439    statistics.starttiming(timer)
440    if trace_locating then
441        report_expansions("scanning path %a, branch %a",path,branch or path)
442    end
443    local content
444    if isdir(realpath) then
445        local files, remap, n, m, r = scan({ },{ },realpath .. '/',"",0,0,0,onlyonce,tolerant)
446        content = {
447            metadata = {
448                path        = path, -- can be selfautoparent:texmf-whatever
449                files       = n,
450                directories = m,
451                remappings  = r,
452            },
453            files = files,
454            remap = remap,
455        }
456        if trace_locating then
457            report_expansions("%s files found on %s directories with %s uppercase remappings",n,m,r)
458        end
459    else
460        content = {
461            metadata = {
462                path        = path, -- can be selfautoparent:texmf-whatever
463                files       = 0,
464                directories = 0,
465                remappings  = 0,
466            },
467            files = { },
468            remap = { },
469        }
470        if trace_locating then
471            report_expansions("invalid path %a",realpath)
472        end
473    end
474    if usecache then
475        scanned[#scanned+1] = realpath
476        fullcache[realpath] = content
477    end
478    nofscans = nofscans + 1
479    statistics.stoptiming(timer)
480    return content
481end
482
483resolvers.scanfiles = scanfiles
484
485function resolvers.simplescanfiles(path,branch,usecache)
486    return scanfiles(path,branch,usecache,true,true) -- onlyonce
487end
488
489function resolvers.scandata()
490    table.sort(scanned)
491    return {
492        n      = nofscans,
493        shared = nofsharedscans,
494        time   = statistics.elapsedtime(timer),
495        paths  = scanned,
496    }
497end
498
499function resolvers.get_from_content(content,path,name) -- or (content,name)
500    if not content then
501        return
502    end
503    local files = content.files
504    if not files then
505        return
506    end
507    local remap = content.remap
508    if not remap then
509        return
510    end
511    if name then
512        -- this one resolves a remapped name
513        local used = lower(name)
514        return path, remap[used] or used
515    else
516        -- this one does a lookup and resolves a remapped name
517        local name = path
518--         if addcasecraptoo then
519--             local path = files[name]
520--             if path then
521--                 return path, name
522--             end
523--         end
524        local used = lower(name)
525        local path = files[used]
526        if path then
527            return path, remap[used] or used
528        end
529    end
530end
531
532local nothing = function() end
533
534function resolvers.filtered_from_content(content,pattern)
535    if content and type(pattern) == "string" then
536        local pattern = lower(pattern)
537        local files   = content.files -- we could store the sorted list
538        local remap   = content.remap
539        if files and remap then
540            local f = sortedkeys(files)
541            local n = #f
542            local i = 0
543            local function iterator()
544                while i < n do
545                    i = i + 1
546                    local k = f[i]
547                    if find(k,pattern) then
548                        return files[k], remap and remap[k] or k
549                    end
550                end
551            end
552            return iterator
553        end
554    end
555    return nothing
556end
557
558-- inspect(resolvers.simplescanfiles("e:/temporary/mb-mp"))
559-- inspect(resolvers.scanfiles("e:/temporary/mb-mp"))
560