l-file.lua /size: 22 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['l-file'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- needs a cleanup
10
11file       = file or { }
12local file = file
13
14if not lfs then
15    lfs = optionalrequire("lfs")
16end
17
18-- -- see later
19--
20-- if not lfs then
21--
22--     lfs = {
23--         getcurrentdir = function()
24--             return "."
25--         end,
26--         attributes = function()
27--             return nil
28--         end,
29--         isfile = function(name)
30--             local f = io.open(name,'rb')
31--             if f then
32--                 f:close()
33--                 return true
34--             end
35--         end,
36--         isdir = function(name)
37--             print("you need to load lfs")
38--             return false
39--         end
40--     }
41--
42-- elseif not lfs.isfile then
43--
44--     local attributes = lfs.attributes
45--
46--     function lfs.isdir(name)
47--         return attributes(name,"mode") == "directory"
48--     end
49--
50--     function lfs.isfile(name)
51--         return attributes(name,"mode") == "file"
52--     end
53--
54--  -- function lfs.isdir(name)
55--  --     local a = attributes(name)
56--  --     return a and a.mode == "directory"
57--  -- end
58--
59--  -- function lfs.isfile(name)
60--  --     local a = attributes(name)
61--  --     return a and a.mode == "file"
62--  -- end
63--
64-- end
65
66local insert, concat = table.insert, table.concat
67local match, find, gmatch = string.match, string.find, string.gmatch
68local lpegmatch = lpeg.match
69local getcurrentdir, attributes = lfs.currentdir, lfs.attributes
70local checkedsplit = string.checkedsplit
71
72local P, R, S, C, Cs, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Ct
73
74-- better this way:
75
76local attributes = lfs.attributes
77
78function lfs.isdir(name)
79    if name then
80        return attributes(name,"mode") == "directory"
81    end
82end
83
84function lfs.isfile(name)
85    if name then
86        local a = attributes(name,"mode")
87        return a == "file" or a == "link" or nil
88    end
89end
90
91function lfs.isfound(name)
92    if name then
93        local a = attributes(name,"mode")
94        return (a == "file" or a == "link") and name or nil
95    end
96end
97
98function lfs.modification(name)
99    return name and attributes(name,"modification") or nil
100end
101
102if sandbox then
103    sandbox.redefine(lfs.isfile,"lfs.isfile")
104    sandbox.redefine(lfs.isdir, "lfs.isdir")
105    sandbox.redefine(lfs.isfound, "lfs.isfound")
106end
107
108local colon     = P(":")
109local period    = P(".")
110local periods   = P("..")
111local fwslash   = P("/")
112local bwslash   = P("\\")
113local slashes   = S("\\/")
114local noperiod  = 1-period
115local noslashes = 1-slashes
116local name      = noperiod^1
117local suffix    = period/"" * (1-period-slashes)^1 * -1
118
119----- pattern = C((noslashes^0 * slashes^1)^1)
120local pattern = C((1 - (slashes^1 * noslashes^1 * -1))^1) * P(1) -- there must be a more efficient way
121
122local function pathpart(name,default)
123    return name and lpegmatch(pattern,name) or default or ""
124end
125
126local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1
127
128local function basename(name)
129    return name and lpegmatch(pattern,name) or name
130end
131
132-- print(pathpart("file"))
133-- print(pathpart("dir/file"))
134-- print(pathpart("/dir/file"))
135-- print(basename("file"))
136-- print(basename("dir/file"))
137-- print(basename("/dir/file"))
138
139local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0
140
141local function nameonly(name)
142    return name and lpegmatch(pattern,name) or name
143end
144
145local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1
146
147local function suffixonly(name)
148    return name and lpegmatch(pattern,name) or ""
149end
150
151local pattern = (noslashes^0 * slashes)^0 * noperiod^1 * ((period * C(noperiod^1))^1) * -1 + Cc("")
152
153local function suffixesonly(name)
154    if name then
155        return lpegmatch(pattern,name)
156    else
157        return ""
158    end
159end
160
161file.pathpart     = pathpart
162file.basename     = basename
163file.nameonly     = nameonly
164file.suffixonly   = suffixonly
165file.suffix       = suffixonly
166file.suffixesonly = suffixesonly
167file.suffixes     = suffixesonly
168
169file.dirname      = pathpart   -- obsolete
170file.extname      = suffixonly -- obsolete
171
172-- actually these are schemes
173
174local drive  = C(R("az","AZ")) * colon
175local path   = C((noslashes^0 * slashes)^0)
176local suffix = period * C(P(1-period)^0 * P(-1))
177local base   = C((1-suffix)^0)
178local rest   = C(P(1)^0)
179
180drive  = drive  + Cc("")
181path   = path   + Cc("")
182base   = base   + Cc("")
183suffix = suffix + Cc("")
184
185local pattern_a =   drive * path  *   base * suffix
186local pattern_b =           path  *   base * suffix
187local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures
188local pattern_d =           path  *   rest
189
190function file.splitname(str,splitdrive)
191    if not str then
192        -- error
193    elseif splitdrive then
194        return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix
195    else
196        return lpegmatch(pattern_b,str) -- returns path, base, suffix
197    end
198end
199
200function file.splitbase(str)
201    if str then
202        return lpegmatch(pattern_d,str) -- returns path, base+suffix (path has / appended, might change at some point)
203    else
204        return "", str -- assume no path
205    end
206end
207
208---- stripslash = C((1 - P("/")^1*P(-1))^0)
209
210function file.nametotable(str,splitdrive)
211    if str then
212        local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
213     -- if path ~= "" then
214     --     path = lpegmatch(stripslash,path) -- unfortunate hack, maybe this becomes default
215     -- end
216        if splitdrive then
217            return {
218                path    = path,
219                drive   = drive,
220                subpath = subpath,
221                name    = name,
222                base    = base,
223                suffix  = suffix,
224            }
225        else
226            return {
227                path    = path,
228                name    = name,
229                base    = base,
230                suffix  = suffix,
231            }
232        end
233    end
234end
235
236-- print(file.splitname("file"))
237-- print(file.splitname("dir/file"))
238-- print(file.splitname("/dir/file"))
239-- print(file.splitname("file"))
240-- print(file.splitname("dir/file"))
241-- print(file.splitname("/dir/file"))
242
243-- inspect(file.nametotable("file.ext"))
244-- inspect(file.nametotable("dir/file.ext"))
245-- inspect(file.nametotable("/dir/file.ext"))
246-- inspect(file.nametotable("file.ext"))
247-- inspect(file.nametotable("dir/file.ext"))
248-- inspect(file.nametotable("/dir/file.ext"))
249
250----- pattern = Cs(((period * noperiod^1 * -1) / "" + 1)^1)
251local pattern = Cs(((period * (1-period-slashes)^1 * -1) / "" + 1)^1)
252
253function file.removesuffix(name)
254    return name and lpegmatch(pattern,name)
255end
256
257-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1
258--
259-- function file.addsuffix(name, suffix)
260--     local p = lpegmatch(pattern,name)
261--     if p then
262--         return name
263--     else
264--         return name .. "." .. suffix
265--     end
266-- end
267
268local suffix  = period/"" * (1-period-slashes)^1 * -1
269local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix)
270
271function file.addsuffix(filename,suffix,criterium)
272    if not filename or not suffix or suffix == "" then
273        return filename
274    elseif criterium == true then
275        return filename .. "." .. suffix
276    elseif not criterium then
277        local n, s = lpegmatch(pattern,filename)
278        if not s or s == "" then
279            return filename .. "." .. suffix
280        else
281            return filename
282        end
283    else
284        local n, s = lpegmatch(pattern,filename)
285        if s and s ~= "" then
286            local t = type(criterium)
287            if t == "table" then
288                -- keep if in criterium
289                for i=1,#criterium do
290                    if s == criterium[i] then
291                        return filename
292                    end
293                end
294            elseif t == "string" then
295                -- keep if criterium
296                if s == criterium then
297                    return filename
298                end
299            end
300        end
301        return (n or filename) .. "." .. suffix
302    end
303end
304
305-- print("1 " .. file.addsuffix("name","new")                   .. " -> name.new")
306-- print("2 " .. file.addsuffix("name.old","new")               .. " -> name.old")
307-- print("3 " .. file.addsuffix("name.old","new",true)          .. " -> name.old.new")
308-- print("4 " .. file.addsuffix("name.old","new","new")         .. " -> name.new")
309-- print("5 " .. file.addsuffix("name.old","new","old")         .. " -> name.old")
310-- print("6 " .. file.addsuffix("name.old","new","foo")         .. " -> name.new")
311-- print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new")
312-- print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old")
313
314local suffix  = period * (1-period-slashes)^1 * -1
315local pattern = Cs((1-suffix)^0)
316
317function file.replacesuffix(name,suffix)
318    if name and suffix and suffix ~= "" then
319        return lpegmatch(pattern,name) .. "." .. suffix
320    else
321        return name
322    end
323end
324
325--
326
327local reslasher = lpeg.replacer(P("\\"),"/")
328
329function file.reslash(str)
330    return str and lpegmatch(reslasher,str)
331end
332
333-- We should be able to use:
334--
335-- local writable = P(1) * P("w") * Cc(true)
336--
337-- function file.is_writable(name)
338--     local a = attributes(name) or attributes(pathpart(name,"."))
339--     return a and lpegmatch(writable,a.permissions) or false
340-- end
341--
342-- But after some testing Taco and I came up with the more robust
343-- variant:
344
345if lfs.isreadablefile and lfs.iswritablefile then
346
347    file.is_readable = lfs.isreadablefile
348    file.is_writable = lfs.iswritablefile
349
350else
351
352    function file.is_writable(name)
353        if not name then
354            -- error
355        elseif lfs.isdir(name) then
356            name = name .. "/m_t_x_t_e_s_t.tmp"
357            local f = io.open(name,"wb")
358            if f then
359                f:close()
360                os.remove(name)
361                return true
362            end
363        elseif lfs.isfile(name) then
364            local f = io.open(name,"ab")
365            if f then
366                f:close()
367                return true
368            end
369        else
370            local f = io.open(name,"ab")
371            if f then
372                f:close()
373                os.remove(name)
374                return true
375            end
376        end
377        return false
378    end
379
380    local readable = P("r") * Cc(true)
381
382    function file.is_readable(name)
383        if name then
384            local a = attributes(name)
385            return a and lpegmatch(readable,a.permissions) or false
386        else
387            return false
388        end
389    end
390
391end
392
393file.isreadable = file.is_readable -- depricated
394file.iswritable = file.is_writable -- depricated
395
396function file.size(name)
397    if name then
398        local a = attributes(name)
399        return a and a.size or 0
400    else
401        return 0
402    end
403end
404
405function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split)
406    return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
407end
408
409function file.joinpath(tab,separator) -- table
410    return tab and concat(tab,separator or io.pathseparator) -- can have trailing //
411end
412
413local someslash = S("\\/")
414local stripper  = Cs(P(fwslash)^0/"" * reslasher)
415local isnetwork = someslash * someslash * (1-someslash)
416                + (1-fwslash-colon)^1 * colon
417local isroot    = fwslash^1 * -1
418local hasroot   = fwslash^1
419
420local reslasher = lpeg.replacer(S("\\/"),"/")
421local deslasher = lpeg.replacer(S("\\/")^1,"/")
422
423-- If we have a network or prefix then there is a change that we end up with two
424-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2)
425-- split and rebuild as url. Of course we could assume no network paths (which
426-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but
427-- then we still have to deal with urls ... anyhow, multiple // are never a real
428-- problem but just ugly.
429
430-- function file.join(...)
431--     local lst = { ... }
432--     local one = lst[1]
433--     if lpegmatch(isnetwork,one) then
434--         local one = lpegmatch(reslasher,one)
435--         local two = lpegmatch(deslasher,concat(lst,"/",2))
436--         if lpegmatch(hasroot,two) then
437--             return one .. two
438--         else
439--             return one .. "/" .. two
440--         end
441--     elseif lpegmatch(isroot,one) then
442--         local two = lpegmatch(deslasher,concat(lst,"/",2))
443--         if lpegmatch(hasroot,two) then
444--             return two
445--         else
446--             return "/" .. two
447--         end
448--     elseif one == "" then
449--         return lpegmatch(stripper,concat(lst,"/",2))
450--     else
451--         return lpegmatch(deslasher,concat(lst,"/"))
452--     end
453-- end
454
455function file.join(one, two, three, ...)
456    if not two then
457        return one == "" and one or lpegmatch(reslasher,one)
458    end
459    if not one or one == "" then
460        return lpegmatch(stripper,three and concat({ two, three, ... },"/") or two)
461    end
462    if lpegmatch(isnetwork,one) then
463        local one = lpegmatch(reslasher,one)
464        local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two)
465        if lpegmatch(hasroot,two) then
466            return one .. two
467        else
468            return one .. "/" .. two
469        end
470    elseif lpegmatch(isroot,one) then
471        local two = lpegmatch(deslasher,three and concat({ two, three, ... },"/") or two)
472        if lpegmatch(hasroot,two) then
473            return two
474        else
475            return "/" .. two
476        end
477    else
478        return lpegmatch(deslasher,concat({  one, two, three, ... },"/"))
479    end
480end
481
482-- or we can use this:
483--
484-- function file.join(...)
485--     local n = select("#",...)
486--     local one = select(1,...)
487--     if n == 1 then
488--         return one == "" and one or lpegmatch(stripper,one)
489--     end
490--     if one == "" then
491--         return lpegmatch(stripper,n > 2 and concat({ ... },"/",2) or select(2,...))
492--     end
493--     if lpegmatch(isnetwork,one) then
494--         local one = lpegmatch(reslasher,one)
495--         local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...))
496--         if lpegmatch(hasroot,two) then
497--             return one .. two
498--         else
499--             return one .. "/" .. two
500--         end
501--     elseif lpegmatch(isroot,one) then
502--         local two = lpegmatch(deslasher,n > 2 and concat({ ... },"/",2) or select(2,...))
503--         if lpegmatch(hasroot,two) then
504--             return two
505--         else
506--             return "/" .. two
507--         end
508--     else
509--         return lpegmatch(deslasher,concat({ ... },"/"))
510--     end
511-- end
512
513-- print(file.join("c:/whatever"))
514-- print(file.join("c:/whatever","name"))
515-- print(file.join("//","/y"))
516-- print(file.join("/","/y"))
517-- print(file.join("","/y"))
518-- print(file.join("/x/","/y"))
519-- print(file.join("x/","/y"))
520-- print(file.join("http://","/y"))
521-- print(file.join("http://a","/y"))
522-- print(file.join("http:///a","/y"))
523-- print(file.join("//nas-1","/y"))
524-- print(file.join("//nas-1/a/b/c","/y"))
525-- print(file.join("\\\\nas-1\\a\\b\\c","\\y"))
526
527-- The previous one fails on "a.b/c"  so Taco came up with a split based
528-- variant. After some skyping we got it sort of compatible with the old
529-- one. After that the anchoring to currentdir was added in a better way.
530-- Of course there are some optimizations too. Finally we had to deal with
531-- windows drive prefixes and things like sys://. Eventually gsubs and
532-- finds were replaced by lpegs.
533
534local drivespec    = R("az","AZ")^1 * colon
535local anchors      = fwslash
536                   + drivespec
537local untouched    = periods
538                   + (1-period)^1 * P(-1)
539local mswindrive   = Cs(drivespec * (bwslash/"/" + fwslash)^0)
540local mswinuncpath = (bwslash + fwslash) * (bwslash + fwslash) * Cc("//")
541local splitstarter = (mswindrive + mswinuncpath + Cc(false))
542                   * Ct(lpeg.splitat(S("/\\")^1))
543local absolute     = fwslash
544
545function file.collapsepath(str,anchor) -- anchor: false|nil, true, "."
546    if not str then
547        return
548    end
549    if anchor == true and not lpegmatch(anchors,str) then
550        str = getcurrentdir() .. "/" .. str
551    end
552    if str == "" or str =="." then
553        return "."
554    elseif lpegmatch(untouched,str) then
555        return lpegmatch(reslasher,str)
556    end
557    local starter, oldelements = lpegmatch(splitstarter,str)
558    local newelements = { }
559    local i = #oldelements
560    while i > 0 do
561        local element = oldelements[i]
562        if element == '.' then
563            -- do nothing
564        elseif element == '..' then
565            local n = i - 1
566            while n > 0 do
567                local element = oldelements[n]
568                if element ~= '..' and element ~= '.' then
569                    oldelements[n] = '.'
570                    break
571                else
572                    n = n - 1
573                end
574             end
575            if n < 1 then
576               insert(newelements,1,'..')
577            end
578        elseif element ~= "" then
579            insert(newelements,1,element)
580        end
581        i = i - 1
582    end
583    if #newelements == 0 then
584        return starter or "."
585    elseif starter then
586        return starter .. concat(newelements, '/')
587    elseif lpegmatch(absolute,str) then
588        return "/" .. concat(newelements,'/')
589    else
590        newelements = concat(newelements, '/')
591        if anchor == "." and find(str,"^%./") then
592            return "./" .. newelements
593        else
594            return newelements
595        end
596    end
597end
598
599-- local function test(str,...)
600--    print(string.format("%-20s %-15s %-30s %-20s",str,file.collapsepath(str),file.collapsepath(str,true),file.collapsepath(str,".")))
601-- end
602-- test("a/b.c/d") test("b.c/d") test("b.c/..")
603-- test("/") test("c:/..") test("sys://..")
604-- test("") test("./") test(".") test("..") test("./..") test("../..")
605-- test("a") test("./a") test("/a") test("a/../..")
606-- test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..")
607-- test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..")
608-- test("./a")
609-- test([[\\a.b.c\d\e]])
610
611local validchars = R("az","09","AZ","--","..")
612local pattern_a  = lpeg.replacer(1-validchars)
613local pattern_a  = Cs((validchars + P(1)/"-")^1)
614local whatever   = P("-")^0 / ""
615local pattern_b  = Cs(whatever * (1 - whatever * -1)^1)
616
617function file.robustname(str,strict)
618    if str then
619        str = lpegmatch(pattern_a,str) or str
620        if strict then
621            return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
622        else
623            return str
624        end
625    end
626end
627
628local loaddata = io.loaddata
629local savedata = io.savedata
630
631file.readdata  = loaddata
632file.savedata  = savedata
633
634function file.copy(oldname,newname)
635    if oldname and newname then
636        local data = loaddata(oldname)
637        if data and data ~= "" then
638            savedata(newname,data)
639        end
640    end
641end
642
643-- also rewrite previous
644
645local letter    = R("az","AZ") + S("_-+")
646local separator = P("://")
647
648local qualified = period^0 * fwslash
649                + letter   * colon
650                + letter^1 * separator
651                + letter^1 * fwslash
652local rootbased = fwslash
653                + letter * colon
654
655lpeg.patterns.qualified = qualified
656lpeg.patterns.rootbased = rootbased
657
658-- ./name ../name  /name c: :// name/name
659
660function file.is_qualified_path(filename)
661    return filename and lpegmatch(qualified,filename) ~= nil
662end
663
664function file.is_rootbased_path(filename)
665    return filename and lpegmatch(rootbased,filename) ~= nil
666end
667
668-- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end
669--
670-- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" }
671-- test { "c:", "c:aa", "c:aa/bb", "c:aa/bb/cc", "c:aa/bb/cc.dd", "c:aa/bb/cc.dd.ee" }
672-- test { "/aa", "/aa/bb", "/aa/bb/cc", "/aa/bb/cc.dd", "/aa/bb/cc.dd.ee" }
673-- test { "aa", "aa/bb", "aa/bb/cc", "aa/bb/cc.dd", "aa/bb/cc.dd.ee" }
674
675-- -- maybe:
676--
677-- if os.type == "windows" then
678--     local currentdir = getcurrentdir
679--     function getcurrentdir()
680--         return lpegmatch(reslasher,currentdir())
681--     end
682-- end
683
684-- for myself:
685
686function file.strip(name,dir)
687    if name then
688        local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
689        return a ~= "" and a or name
690    end
691end
692
693-- local debuglist = {
694--     "pathpart", "basename", "nameonly", "suffixonly", "suffix", "dirname", "extname",
695--     "addsuffix", "removesuffix", "replacesuffix", "join",
696--     "strip","collapsepath", "joinpath", "splitpath",
697-- }
698
699-- for i=1,#debuglist do
700--     local name = debuglist[i]
701--     local f = file[name]
702--     file[name] = function(...)
703--         print(name,f(...))
704--         return f(...)
705--     end
706-- end
707
708-- a goodie: a dumb version of mkdirs (not used in context itself, only
709-- in generic usage)
710
711function lfs.mkdirs(path)
712    local full = ""
713    for sub in gmatch(path,"(/*[^\\/]+)") do -- accepts leading c: and /
714        full = full .. sub
715        -- lfs.isdir("/foo") mistakenly returns true on windows so
716        -- so we don't test and just make as that one is not too picky
717        lfs.mkdir(full)
718    end
719end
720
721-- here is oen i ran into when messign around with xavante code (keppler project)
722-- where it's called in_base .. no gain in using lpeg here
723
724function file.withinbase(path) -- don't go beyond root
725    local l = 0
726    if not find(path,"^/") then
727        path = "/" .. path
728    end
729    for dir in gmatch(path,"/([^/]+)") do
730        if dir == ".." then
731            l = l - 1
732        elseif dir ~= "." then
733            l = l + 1
734        end
735        if l < 0 then
736            return false
737        end
738    end
739    return true
740end
741
742-- not used in context but was in luatex once:
743
744do
745
746    local symlinktarget     = lfs.symlinktarget     -- luametatex (always returns string)
747    local symlinkattributes = lfs.symlinkattributes -- luatex     (can return nil)
748
749    if symlinktarget then
750        function lfs.readlink(name)
751            local target = symlinktarget(name)
752            return name ~= target and name or nil
753        end
754    elseif symlinkattributes then
755        function lfs.readlink(name)
756            return symlinkattributes(name,"target") or nil
757        end
758    else
759        function lfs.readlink(name)
760            return nil
761        end
762    end
763
764end
765