util-zip.lua /size: 22 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['util-zip'] = {
2    version   = 1.001,
3    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
4    copyright = "PRAGMA ADE / ConTeXt Development Team",
5    license   = "see context related readme files"
6}
7
8-- This module is mostly meant for relative simple zip and unzip tasks. We can read
9-- and write zip files but with limitations. Performance is quite good and it makes
10-- us independent of zip tools, which (for some reason) are not always installed.
11--
12-- This is an lmtx module and at some point will be lmtx only but for a while we
13-- keep some hybrid functionality.
14
15local type, tostring, tonumber = type, tostring, tonumber
16local sort, concat = table.sort, table.concat
17
18local find, format, sub, gsub = string.find, string.format, string.sub, string.gsub
19local osdate, ostime, osclock = os.date, os.time, os.clock
20local ioopen = io.open
21local loaddata, savedata = io.loaddata, io.savedata
22local filejoin, isdir, dirname, mkdirs = file.join, lfs.isdir, file.dirname, dir.mkdirs
23local suffix, suffixes = file.suffix, file.suffixes
24local openfile = io.open
25
26gzip = gzip or { } -- so in luatex we keep the old ones too
27
28if not zlib then
29    zlib = xzip    -- in luametatex we shadow the old one
30elseif not xzip then
31    xzip = zlib
32end
33
34local files         = utilities.files
35local openfile      = files.open
36local closefile     = files.close
37local readstring    = files.readstring
38local readcardinal2 = files.readcardinal2le
39local readcardinal4 = files.readcardinal4le
40local setposition   = files.setposition
41local getposition   = files.getposition
42
43local band          = bit32.band
44local rshift        = bit32.rshift
45local lshift        = bit32.lshift
46
47local zlibdecompress     = zlib.decompress
48local zlibdecompresssize = zlib.decompresssize
49local zlibchecksum       = zlib.crc32
50
51local decompress     = function(source)            return zlibdecompress    (source,-15)            end -- auto
52local decompresssize = function(source,targetsize) return zlibdecompresssize(source,targetsize,-15) end -- auto
53local calculatecrc   = function(buffer,initial)    return zlibchecksum      (initial or 0,buffer)   end
54
55local zipfiles      = { }
56utilities.zipfiles  = zipfiles
57
58local openzipfile, closezipfile, unzipfile, foundzipfile, getziphash, getziplist  do
59
60    function openzipfile(name)
61        return {
62            name   = name,
63            handle = openfile(name,0),
64        }
65    end
66
67    local function collect(z)
68        if not z.list then
69            local list     = { }
70            local hash     = { }
71            local position = 0
72            local index    = 0
73            local handle   = z.handle
74            while true do
75                setposition(handle,position)
76                local signature = readstring(handle,4)
77                if signature == "PK\3\4" then
78                    -- [local file header 1]
79                    -- [encryption header 1]
80                    -- [file data 1]
81                    -- [data descriptor 1]
82                    local version      = readcardinal2(handle)
83                    local flag         = readcardinal2(handle)
84                    local method       = readcardinal2(handle)
85                    local filetime     = readcardinal2(handle)
86                    local filedate     = readcardinal2(handle)
87                    local crc32        = readcardinal4(handle)
88                    local compressed   = readcardinal4(handle)
89                    local uncompressed = readcardinal4(handle)
90                    local namelength   = readcardinal2(handle)
91                    local extralength  = readcardinal2(handle)
92                    local filename     = readstring(handle,namelength)
93                    local descriptor   = band(flag,8) ~= 0
94                    local encrypted    = band(flag,1) ~= 0
95                    local acceptable   = method == 0 or method == 8
96                    -- 30 bytes of header including the signature
97                    local skipped      = 0
98                    local size         = 0
99                    if encrypted then
100                        size = readcardinal2(handle)
101                        skipbytes(size)
102                        skipped = skipped + size + 2
103                        skipbytes(8)
104                        skipped = skipped + 8
105                        size = readcardinal2(handle)
106                        skipbytes(size)
107                        skipped = skipped + size + 2
108                        size = readcardinal4(handle)
109                        skipbytes(size)
110                        skipped = skipped + size + 4
111                        size = readcardinal2(handle)
112                        skipbytes(size)
113                        skipped = skipped + size + 2
114                    end
115                    position = position + 30 + namelength + extralength + skipped
116                    if descriptor then
117                        setposition(handle,position + compressed)
118                        crc32        = readcardinal4(handle)
119                        compressed   = readcardinal4(handle)
120                        uncompressed = readcardinal4(handle)
121                    end
122                    if acceptable then
123                        index = index + 1
124                        local data = {
125                            filename     = filename,
126                            index        = index,
127                            position     = position,
128                            method       = method,
129                            compressed   = compressed,
130                            uncompressed = uncompressed,
131                            crc32        = crc32,
132                            encrypted    = encrypted,
133                        }
134                        hash[filename] = data
135                        list[index]    = data
136                    else
137                        -- maybe a warning when encrypted
138                    end
139                    position = position + compressed
140                else
141                    break
142                end
143                z.list = list
144                z.hash = hash
145            end
146        end
147    end
148
149    function getziplist(z)
150        local list = z.list
151        if not list then
152            collect(z)
153        end
154        return z.list
155    end
156
157    function getziphash(z)
158        local hash = z.hash
159        if not hash then
160            collect(z)
161        end
162        return z.hash
163    end
164
165    function foundzipfile(z,name)
166        return getziphash(z)[name]
167    end
168
169    function closezipfile(z)
170        local f = z.handle
171        if f then
172            closefile(f)
173            z.handle = nil
174        end
175    end
176
177    function unzipfile(z,filename,check)
178        local hash = z.hash
179        if not hash then
180            hash = zipfiles.hash(z)
181        end
182        local data = hash[filename] -- normalize
183        if not data then
184            -- lower and cleanup
185            -- only name
186        end
187        if data then
188            local handle     = z.handle
189            local position   = data.position
190            local compressed = data.compressed
191            if compressed > 0 then
192                setposition(handle,position)
193                local result = readstring(handle,compressed)
194                if data.method == 8 then
195                    if decompresssize then
196                        result = decompresssize(result,data.uncompressed)
197                    else
198                        result = decompress(result)
199                    end
200                end
201                if check and data.crc32 ~= calculatecrc(result) then
202                    print("checksum mismatch")
203                    return ""
204                end
205                return result
206            else
207                return ""
208            end
209        end
210    end
211
212    zipfiles.open  = openzipfile
213    zipfiles.close = closezipfile
214    zipfiles.unzip = unzipfile
215    zipfiles.hash  = getziphash
216    zipfiles.list  = getziplist
217    zipfiles.found = foundzipfile
218
219end
220
221if xzip then -- flate then do
222
223    local writecardinal1 = files.writebyte
224    local writecardinal2 = files.writecardinal2le
225    local writecardinal4 = files.writecardinal4le
226
227    local logwriter      = logs.writer
228
229    local globpattern    = dir.globpattern
230--     local compress       = flate.flate_compress
231--     local checksum       = flate.update_crc32
232    local compress       = xzip.compress
233    local checksum       = xzip.crc32
234
235 -- local function fromdostime(dostime,dosdate)
236 --     return ostime {
237 --         year  = (dosdate >>  9) + 1980, -- 25 .. 31
238 --         month = (dosdate >>  5) & 0x0F, -- 21 .. 24
239 --         day   = (dosdate      ) & 0x1F, -- 16 .. 20
240 --         hour  = (dostime >> 11)       , -- 11 .. 15
241 --         min   = (dostime >>  5) & 0x3F, --  5 .. 10
242 --         sec   = (dostime      ) & 0x1F, --  0 ..  4
243 --     }
244 -- end
245 --
246 -- local function todostime(time)
247 --     local t = osdate("*t",time)
248 --     return
249 --         ((t.year - 1980) <<  9) + (t.month << 5) +  t.day,
250 --          (t.hour         << 11) + (t.min   << 5) + (t.sec >> 1)
251 -- end
252
253    local function fromdostime(dostime,dosdate)
254        return ostime {
255            year  =      rshift(dosdate, 9) + 1980,  -- 25 .. 31
256            month = band(rshift(dosdate, 5),  0x0F), -- 21 .. 24
257            day   = band(      (dosdate   ),  0x1F), -- 16 .. 20
258            hour  = band(rshift(dostime,11)       ), -- 11 .. 15
259            min   = band(rshift(dostime, 5),  0x3F), --  5 .. 10
260            sec   = band(      (dostime   ),  0x1F), --  0 ..  4
261        }
262    end
263
264    local function todostime(time)
265        local t = osdate("*t",time)
266        return
267            lshift(t.year - 1980, 9) + lshift(t.month,5) +        t.day,
268            lshift(t.hour       ,11) + lshift(t.min  ,5) + rshift(t.sec,1)
269    end
270
271    local function openzip(filename,level,comment,verbose)
272        local f = ioopen(filename,"wb")
273        if f then
274            return {
275                filename     = filename,
276                handle       = f,
277                list         = { },
278                level        = tonumber(level) or 3,
279                comment      = tostring(comment),
280                verbose      = verbose,
281                uncompressed = 0,
282                compressed   = 0,
283            }
284        end
285    end
286
287    local function writezip(z,name,data,level,time)
288        local f        = z.handle
289        local list     = z.list
290        local level    = tonumber(level) or z.level or 3
291        local method   = 8
292        local zipped   = compress(data,level)
293        local checksum = checksum(data)
294        local verbose  = z.verbose
295        --
296        if not zipped then
297            method = 0
298            zipped = data
299        end
300        --
301        local start        = f:seek()
302        local compressed   = #zipped
303        local uncompressed = #data
304        --
305        z.compressed   = z.compressed   + compressed
306        z.uncompressed = z.uncompressed + uncompressed
307        --
308        if verbose then
309            local pct = 100 * compressed/uncompressed
310            if pct >= 100 then
311                logwriter(format("%10i        %s",uncompressed,name))
312            else
313                logwriter(format("%10i  %02.1f  %s",uncompressed,pct,name))
314            end
315        end
316        --
317        f:write("\x50\x4b\x03\x04") -- PK..  0x04034b50
318        --
319        writecardinal2(f,0)            -- minimum version
320        writecardinal2(f,0)            -- flag
321        writecardinal2(f,method)       -- method
322        writecardinal2(f,0)            -- time
323        writecardinal2(f,0)            -- date
324        writecardinal4(f,checksum)     -- crc32
325        writecardinal4(f,compressed)   -- compressed
326        writecardinal4(f,uncompressed) -- uncompressed
327        writecardinal2(f,#name)        -- namelength
328        writecardinal2(f,0)            -- extralength
329        --
330        f:write(name)                  -- name
331        f:write(zipped)
332        --
333        list[#list+1] = { #zipped, #data, name, checksum, start, time or 0 }
334    end
335
336    local function closezip(z)
337        local f       = z.handle
338        local list    = z.list
339        local comment = z.comment
340        local verbose = z.verbose
341        local count   = #list
342        local start   = f:seek()
343        --
344        for i=1,count do
345            local l = list[i]
346            local compressed   = l[1]
347            local uncompressed = l[2]
348            local name         = l[3]
349            local checksum     = l[4]
350            local start        = l[5]
351            local time         = l[6]
352            local date, time   = todostime(time)
353            f:write('\x50\x4b\x01\x02')
354            writecardinal2(f,0)            -- version made by
355            writecardinal2(f,0)            -- version needed to extract
356            writecardinal2(f,0)            -- flags
357            writecardinal2(f,8)            -- method
358            writecardinal2(f,time)         -- time
359            writecardinal2(f,date)         -- date
360            writecardinal4(f,checksum)     -- crc32
361            writecardinal4(f,compressed)   -- compressed
362            writecardinal4(f,uncompressed) -- uncompressed
363            writecardinal2(f,#name)        -- namelength
364            writecardinal2(f,0)            -- extralength
365            writecardinal2(f,0)            -- commentlength
366            writecardinal2(f,0)            -- nofdisks -- ?
367            writecardinal2(f,0)            -- internal attr (type)
368            writecardinal4(f,0)            -- external attr (mode)
369            writecardinal4(f,start)        -- local offset
370            f:write(name)                  -- name
371        end
372        --
373        local stop = f:seek()
374        local size = stop - start
375        --
376        f:write('\x50\x4b\x05\x06')
377        writecardinal2(f,0)            -- disk
378        writecardinal2(f,0)            -- disks
379        writecardinal2(f,count)        -- entries
380        writecardinal2(f,count)        -- entries
381        writecardinal4(f,size)         -- dir size
382        writecardinal4(f,start)        -- dir offset
383        if type(comment) == "string" and comment ~= "" then
384            writecardinal2(f,#comment) -- comment length
385            f:write(comment)           -- comemnt
386        else
387            writecardinal2(f,0)
388        end
389        --
390        if verbose then
391            local compressed   = z.compressed
392            local uncompressed = z.uncompressed
393            local filename     = z.filename
394            --
395            local pct = 100 * compressed/uncompressed
396            logwriter("")
397            if pct >= 100 then
398                logwriter(format("%10i        %s",uncompressed,filename))
399            else
400                logwriter(format("%10i  %02.1f  %s",uncompressed,pct,filename))
401            end
402        end
403        --
404        f:close()
405    end
406
407    local function zipdir(zipname,path,level,verbose)
408        if type(zipname) == "table" then
409            verbose = zipname.verbose
410            level   = zipname.level
411            path    = zipname.path
412            zipname = zipname.zipname
413        end
414        if not zipname or zipname == "" then
415            return
416        end
417        if not path or path == "" then
418            path = "."
419        end
420        if not isdir(path) then
421            return
422        end
423        path = gsub(path,"\\+","/")
424        path = gsub(path,"/+","/")
425        local list  = { }
426        local count = 0
427        globpattern(path,"",true,function(name,size,time)
428            count = count + 1
429            list[count] = { name, time }
430        end)
431        sort(list,function(a,b)
432            return a[1] < b[1]
433        end)
434        local zipf = openzip(zipname,level,comment,verbose)
435        if zipf then
436            local p = #path + 2
437            for i=1,count do
438                local li   = list[i]
439                local name = li[1]
440                local time = li[2]
441                local data = loaddata(name)
442                local name = sub(name,p,#name)
443                writezip(zipf,name,data,level,time,verbose)
444            end
445            closezip(zipf)
446        end
447    end
448
449    local function unzipdir(zipname,path,verbose)
450        if type(zipname) == "table" then
451            verbose = zipname.verbose
452            path    = zipname.path
453            zipname = zipname.zipname
454        end
455        if not zipname or zipname == "" then
456            return
457        end
458        if not path or path == "" then
459            path = "."
460        end
461        local z = openzipfile(zipname)
462        if z then
463            local list = getziplist(z)
464            if list then
465                local total = 0
466                local count = #list
467                local step  = number.idiv(count,10)
468                local done  = 0
469                local steps = verbose == "steps"
470                local time  = steps and osclock()
471                for i=1,count do
472                    local l = list[i]
473                    local n = l.filename
474                    local d = unzipfile(z,n) -- true for check
475                    if d then
476                        local p = filejoin(path,n)
477                        if mkdirs(dirname(p)) then
478                            if steps then
479                                total = total + #d
480                                done = done + 1
481                                if done >= step then
482                                    done = 0
483                                    logwriter(format("%4i files of %4i done, %10i bytes, %0.3f seconds",i,count,total,osclock()-time))
484                                end
485                            elseif verbose then
486                                logwriter(n)
487                            end
488                            savedata(p,d)
489                        end
490                    else
491                        logwriter(format("problem with file %s",n))
492                    end
493                end
494                if steps then
495                    logwriter(format("%4i files of %4i done, %10i bytes, %0.3f seconds",count,count,total,osclock()-time))
496                end
497                closezipfile(z)
498                return true
499            else
500                closezipfile(z)
501            end
502        end
503    end
504
505    zipfiles.zipdir   = zipdir
506    zipfiles.unzipdir = unzipdir
507
508end
509
510-- todo: compress/decompress that work with offset in string
511
512-- We only have a few official methods here:
513--
514--   local decompressed = gzip.load       (filename)
515--   local resultsize   = gzip.save       (filename,compresslevel)
516--   local compressed   = gzip.compress   (str,compresslevel)
517--   local decompressed = gzip.decompress (str)
518--   local iscompressed = gzip.compressed (str)
519--   local suffix, okay = gzip.suffix     (filename)
520--
521-- In LuaMetaTeX we have only xzip which implements a very few methods:
522--
523--   compress   (str,level,method,window,memory,strategy)
524--   decompress (str,window)
525--   adler32    (str,checksum)
526--   crc32      (str,checksum)
527
528local pattern   = "^\x1F\x8B\x08"
529local gziplevel = 3
530
531function gzip.suffix(filename)
532    local suffix, extra = suffixes(filename)
533    local gzipped = extra == "gz"
534    return suffix, gzipped
535end
536
537function gzip.compressed(s)
538    return s and find(s,pattern)
539end
540
541local getdecompressed
542local putcompressed
543
544if gzip.compress then
545
546    local gzipwindow = 15 + 16 -- +16: gzip, +32: gzip|zlib
547
548    local compress   = zlib.compress
549    local decompress = zlib.decompress
550
551    getdecompressed = function(str)
552        return decompress(str,gzipwindow) -- pass offset
553    end
554
555    putcompressed = function(str,level)
556        return compress(str,level or gziplevel,nil,gzipwindow)
557    end
558
559else
560
561    -- Special window values are: flate: -15, zlib: 15, gzip : -15
562
563    local gzipwindow = -15 -- miniz needs this
564    local identifier = "\x1F\x8B"
565
566    local compress      = zlib.compress
567    local decompress    = zlib.decompress
568    local crc32         = zlib.crc32
569
570    local streams       = utilities.streams
571    local openstream    = streams.openstring
572    local closestream   = streams.close
573    local getposition   = streams.getposition
574    local readbyte      = streams.readbyte
575    local readcardinal4 = streams.readcardinal4le
576    local readcardinal2 = streams.readcardinal2le
577    local readstring    = streams.readstring
578    local readcstring   = streams.readcstring
579    local skipbytes     = streams.skip
580
581    local tocardinal1   = streams.tocardinal1
582    local tocardinal4   = streams.tocardinal4le
583
584    getdecompressed = function(str)
585        local s = openstream(str)
586        local identifier  = readstring(s,2)
587        local method      = readbyte(s,1)
588        local flags       = readbyte(s,1)
589        local timestamp   = readcardinal4(s)
590        local compression = readbyte(s,1)
591        local operating   = readbyte(s,1)
592     -- local isjusttext  = (flags & 0x01 ~= 0) and true             or false
593     -- local extrasize   = (flags & 0x04 ~= 0) and readcardinal2(s) or 0
594     -- local filename    = (flags & 0x08 ~= 0) and readcstring(s)   or ""
595     -- local comment     = (flags & 0x10 ~= 0) and readcstring(s)   or ""
596     -- local checksum    = (flags & 0x02 ~= 0) and readcardinal2(s) or 0
597        local isjusttext  = band(flags,0x01) ~= 0 and true             or false
598        local extrasize   = band(flags,0x04) ~= 0 and readcardinal2(s) or 0
599        local filename    = band(flags,0x08) ~= 0 and readcstring(s)   or ""
600        local comment     = band(flags,0x10) ~= 0 and readcstring(s)   or ""
601        local checksum    = band(flags,0x02) ~= 0 and readcardinal2(s) or 0
602        local compressed  = readstring(s,#str)
603        local data = decompress(compressed,gzipwindow) -- pass offset
604        return data
605    end
606
607    putcompressed = function(str,level,originalname)
608        return concat {
609            identifier,              -- 2 identifier
610            tocardinal1(0x08),       -- 1 method
611            tocardinal1(0x08),       -- 1 flags
612            tocardinal4(os.time()),  -- 4 mtime
613            tocardinal1(0x02),       -- 1 compression (2 or 4)
614            tocardinal1(0xFF),       -- 1 operating
615            (originalname or "unknownname") .. "\0",
616            compress(str,level,nil,gzipwindow),
617            tocardinal4(crc32(str)), -- 4
618            tocardinal4(#str),       -- 4
619        }
620    end
621
622end
623
624function gzip.load(filename)
625    local f = openfile(filename,"rb")
626    if not f then
627        -- invalid file
628    else
629        local data = f:read("*all")
630        f:close()
631        if data and data ~= "" then
632            if suffix(filename) == "gz" then
633                data = getdecompressed(data)
634            end
635            return data
636        end
637    end
638end
639
640function gzip.save(filename,data,level,originalname)
641    if suffix(filename) ~= "gz" then
642        filename = filename .. ".gz"
643    end
644    local f = openfile(filename,"wb")
645    if f then
646        data = putcompressed(data or "",level or gziplevel,originalname)
647        f:write(data)
648        f:close()
649        return #data
650    end
651end
652
653function gzip.compress(s,level)
654    if s and not find(s,pattern) then
655        if not level then
656            level = gziplevel
657        elseif level <= 0 then
658            return s
659        elseif level > 9 then
660            level = 9
661        end
662        return putcompressed(s,level or gziplevel) or s
663    end
664end
665
666function gzip.decompress(s)
667    if s and find(s,pattern) then
668        return getdecompressed(s)
669    else
670        return s
671    end
672end
673
674zipfiles.gunzipfile = gzip.load
675
676return zipfiles
677