mtx-grep.lua /size: 8292 b    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['mtx-babel'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- If needed this one can be optimized for speed as well as use some existing
10-- helpers. We can quit faster on max, and probably use lpeg instead of find.
11
12local helpinfo = [[
13<?xml version="1.0"?>
14<application>
15 <metadata>
16  <entry name="name">mtx-grep</entry>
17  <entry name="detail">Simple Grepper</entry>
18  <entry name="version">0.10</entry>
19 </metadata>
20 <flags>
21  <category name="basic">
22   <subcategory>
23    <flag name="pattern"><short>search for pattern (optional)</short></flag>
24    <flag name="count"><short>count matches only</short></flag>
25    <flag name="nocomment"><short>skip lines that start with %% or #</short></flag>
26    <flag name="n"><short>show at most n matches</short></flag>
27    <flag name="first"><short>only show first match</short></flag>
28    <flag name="match"><short>return the match (if it is one)</short></flag>
29    <flag name="xml"><short>pattern is lpath expression</short></flag>
30   </subcategory>
31  </category>
32 </flags>
33 <examples>
34  <category>
35   <title>Examples</title>
36   <subcategory>
37    <example><command>mtxrun --script grep --pattern=module *.mkiv</command></example>
38    <example><command>mtxrun --script grep --pattern="modules.-%['(.-)'%]" char-*.lua --first</command></example>
39    <example><command>mtxrun --script grep --pattern=module --count *.mkiv</command></example>
40    <example><command>mtxrun --script grep --pattern=module --first *.mkiv</command></example>
41    <example><command>mtxrun --script grep --pattern=module --nocomment *.mkiv</command></example>
42    <example><command>mtxrun --script grep --pattern=module --n=10 *.mkiv</command></example>
43   </subcategory>
44  </category>
45 </examples>
46 <comments>
47    <comment>patterns are lua patterns and need to be escaped accordingly</comment>
48 </comments>
49</application>
50]]
51
52local application = logs.application {
53    name     = "mtx-grep",
54    banner   = "Simple Grepper 0.10",
55    helpinfo = helpinfo,
56}
57
58local report = application.report
59
60scripts      = scripts      or { }
61scripts.grep = scripts.grep or { }
62
63local find, match, format = string.find, string.match, string.format
64local lpegmatch = lpeg.match
65
66local cr       = lpeg.P("\r")
67local lf       = lpeg.P("\n")
68local crlf     = cr * lf
69local newline  = crlf + cr + lf
70local content  = lpeg.C((1-newline)^0) * newline + lpeg.C(lpeg.P(1)^1)
71
72local write_nl = (logs and logs.writer) or (texio and texio.write_nl) or print
73
74-- local pattern = "LIJST[@TYPE='BULLET']/LIJSTITEM[contains(text(),'Kern')]"
75
76-- 'Cc%(\\\"\\\"%)'
77
78function scripts.grep.find(pattern, files, offset)
79    if pattern and pattern ~= "" then
80        statistics.starttiming(scripts.grep)
81        local nofmatches, noffiles, nofmatchedfiles = 0, 0, 0
82        local n, m, check = 0, 0, nil
83        local name = ""
84        local count = environment.argument("count")
85        local nocomment = environment.argument("nocomment")
86        local max = tonumber(environment.argument("n")) or (environment.argument("first") and 1) or false
87        local domatch = environment.argument("match")
88        if environment.argument("xml") then
89            for i=offset or 1, #files do
90                local globbed = dir.glob(files[i])
91                for i=1,#globbed do
92                    name = globbed[i]
93                    local data = xml.load(name)
94                    if data and not data.error then
95                        n, m, noffiles = 0, 0, noffiles + 1
96                        if count then
97                            for c in xml.collected(data,pattern) do
98                                m = m + 1
99                            end
100                            if m > 0 then
101                                nofmatches = nofmatches + m
102                                nofmatchedfiles = nofmatchedfiles + 1
103                                write_nl(format("%5i  %s",m,name))
104                                io.flush()
105                            end
106                        else
107                            for c in xml.collected(data,pattern) do
108                                m = m + 1
109                                if not max or m <= max then
110                                    write_nl(format("%s: %s",name,xml.tostring(c)))
111                                end
112                            end
113                        end
114                    end
115                end
116            end
117        else
118            if nocomment then
119                if count then
120                    check = function(line)
121                        n = n + 1
122                        if find(line,"^[%%#]") then
123                            -- skip
124                        elseif find(line,pattern) then
125                            m = m + 1
126                        end
127                    end
128                else
129                    check = function(line)
130                        n = n + 1
131                        if find(line,"^[%%#]") then
132                            -- skip
133                        elseif find(line,pattern) then
134                            m = m + 1
135                            if not max or m <= max then
136                                if domatch then
137                                    write_nl(match(line,pattern))
138                                else
139                                    write_nl(format("%s %6i: %s",name,n,line))
140                                end
141                                io.flush()
142                            end
143                        end
144                    end
145                end
146            else
147                if count then
148                    check = function(line)
149                        n = n + 1
150                        if find(line,pattern) then
151                            m = m + 1
152                        end
153                    end
154                else
155                    check = function(line)
156                        n = n + 1
157                        if find(line,pattern) then
158                            m = m + 1
159                            if not max or m <= max then
160                                if domatch then
161                                    write_nl(match(line,pattern))
162                                else
163                                    write_nl(format("%s %6i: %s",name,n,line))
164                                end
165                                io.flush()
166                            end
167                        end
168                    end
169                end
170            end
171            local capture = (content/check)^0 -- todo: break out when max
172            for i=offset or 1, #files do
173                local globbed = dir.glob(files[i])
174                for i=1,#globbed do
175                    name = globbed[i]
176                    if not find(name,"/%.") then
177                        local data = io.loaddata(name)
178                        if data then
179                            n, m, noffiles = 0, 0, noffiles + 1
180                            lpegmatch(capture,data)
181                            if count and m > 0 then
182                                nofmatches = nofmatches + m
183                                nofmatchedfiles = nofmatchedfiles + 1
184                                write_nl(format("%5i  %s",m,name))
185                                io.flush()
186                            end
187                        end
188                    end
189                end
190            end
191        end
192        statistics.stoptiming(scripts.grep)
193        if count and nofmatches > 0 then
194            write_nl(format("\nfiles: %s, matches: %s, matched files: %s, runtime: %0.3f seconds",noffiles,nofmatches,nofmatchedfiles,statistics.elapsedtime(scripts.grep)))
195        end
196    end
197end
198
199local pattern = environment.argument("pattern")
200local files   = environment.files and #environment.files > 0 and environment.files
201
202if environment.argument("exporthelp") then
203    application.export(environment.argument("exporthelp"),files[1])
204elseif pattern and files then
205    scripts.grep.find(pattern, files)
206elseif files then
207    scripts.grep.find(files[1], files, 2)
208else
209    application.help()
210end
211