mtx-grep.lua /size: 10 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['mtx-babel'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- If needed this one can be optimized for speed as well as use some existing
10-- helpers. We can quit faster on max, and probably use lpeg instead of find.
11
12local helpinfo = [[
13<?xml version="1.0"?>
14<application>
15 <metadata>
16  <entry name="name">mtx-grep</entry>
17  <entry name="detail">Simple Grepper</entry>
18  <entry name="version">0.10</entry>
19 </metadata>
20 <flags>
21  <category name="basic">
22   <subcategory>
23    <flag name="pattern"><short>search for pattern (optional)</short></flag>
24    <flag name="count"><short>count matches only</short></flag>
25    <flag name="all"><short>count all occurences in a line</short></flag>
26    <flag name="nocomment"><short>skip lines that start with %% or #</short></flag>
27    <flag name="noattic"><short>skip files that hh considers irrelevant</short></flag>
28    <flag name="n"><short>show at most n matches</short></flag>
29    <flag name="first"><short>only show first match</short></flag>
30    <flag name="match"><short>return the match (if it is one)</short></flag>
31    <flag name="xml"><short>pattern is lpath expression</short></flag>
32   </subcategory>
33  </category>
34 </flags>
35 <examples>
36  <category>
37   <title>Examples</title>
38   <subcategory>
39    <example><command>mtxrun --script grep --pattern=module *.mkiv</command></example>
40    <example><command>mtxrun --script grep --pattern="modules.-%['(.-)'%]" char-*.lua --first</command></example>
41    <example><command>mtxrun --script grep --pattern=module --count *.mkiv</command></example>
42    <example><command>mtxrun --script grep --pattern=module --first *.mkiv</command></example>
43    <example><command>mtxrun --script grep --pattern=module --nocomment *.mkiv</command></example>
44    <example><command>mtxrun --script grep --pattern=module --n=10 *.mkiv</command></example>
45   </subcategory>
46  </category>
47 </examples>
48 <comments>
49    <comment>patterns are lua patterns and need to be escaped accordingly</comment>
50 </comments>
51</application>
52]]
53
54local application = logs.application {
55    name     = "mtx-grep",
56    banner   = "Simple Grepper 0.10",
57    helpinfo = helpinfo,
58}
59
60local report = application.report
61
62scripts      = scripts      or { }
63scripts.grep = scripts.grep or { }
64
65local find, match, format = string.find, string.match, string.format
66local lpegmatch = lpeg.match
67
68local cr       = lpeg.P("\r")
69local lf       = lpeg.P("\n")
70local crlf     = cr * lf
71local newline  = crlf + cr + lf
72local content  = lpeg.C((1-newline)^0) * newline + lpeg.C(lpeg.P(1)^1)
73
74local write_nl = (logs and logs.writer) or (texio and texio.write_nl) or print
75
76-- local pattern = "LIJST[@TYPE='BULLET']/LIJSTITEM[contains(text(),'Kern')]"
77
78-- 'Cc%(\\\"\\\"%)'
79
80function scripts.grep.find(pattern, files, offset)
81    if pattern and pattern ~= "" then
82        statistics.starttiming(scripts.grep)
83        local nofmatches, noffiles, nofmatchedfiles = 0, 0, 0
84        local n, m, check = 0, 0, nil
85        local name = ""
86        local noattic = environment.argument("noattic")
87        local count = environment.argument("count")
88        local nocomment = environment.argument("nocomment")
89        local max = tonumber(environment.argument("n")) or (environment.argument("first") and 1) or false
90        local domatch = environment.argument("match")
91        local all = environment.argument("all")
92        -- for me:
93        local function skip(name)
94            return noattic and (find(name,"attic") or find(name,"backup") or find(name,"old") or find(name,"keep") or find(name,"install") or find(name,"texmf"))
95        end
96        --
97        if environment.argument("xml") then
98            for i=offset or 1, #files do
99                local globbed = dir.glob(files[i])
100                for i=1,#globbed do
101                    name = globbed[i]
102                    if not skip(name) then
103                        local data = xml.load(name)
104                        if data and not data.error then
105                            n, m, noffiles = 0, 0, noffiles + 1
106                            if count then
107                                for c in xml.collected(data,pattern) do
108                                    m = m + 1
109                                end
110                                if m > 0 then
111                                    nofmatches = nofmatches + m
112                                    nofmatchedfiles = nofmatchedfiles + 1
113                                    write_nl(format("%5i  %s",m,name))
114                                    io.flush()
115                                end
116                            else
117                                for c in xml.collected(data,pattern) do
118                                    m = m + 1
119                                    if not max or m <= max then
120                                        write_nl(format("%s: %s",name,xml.tostring(c)))
121                                    end
122                                end
123                            end
124                        end
125                    end
126                end
127            end
128        else
129            if nocomment then
130                if count then
131                    if all then
132                        check = function(line)
133                            n = n + 1
134                            if find(line,"^[%%#%-]") then
135                                -- skip
136                            else
137                                local p = 0
138                                while true do
139                                    p = find(line,pattern,p+1)
140                                    if p then
141                                        m = m + 1
142                                    else
143                                        break
144                                    end
145                                end
146                            end
147                        end
148                    else
149                        check = function(line)
150                            n = n + 1
151                            if find(line,"^[%%#%-]") then
152                                -- skip
153                            elseif find(line,pattern) then
154                                m = m + 1
155                            end
156                        end
157                    end
158                else
159                    check = function(line)
160                        n = n + 1
161                        if find(line,"^[%%#%-]") then
162                            -- skip
163                        elseif find(line,pattern) then
164                            m = m + 1
165                            if not max or m <= max then
166                                if domatch then
167                                    write_nl(match(line,pattern))
168                                else
169                                    write_nl(format("%s %6i: %s",name,n,line))
170                                end
171                                io.flush()
172                            end
173                        end
174                    end
175                end
176            else
177                if count then
178                    if all then
179                        check = function(line)
180                            n = n + 1
181                            local p = 0
182                            while true do
183                                p = find(line,pattern,p+1)
184                                if p then
185                                    m = m + 1
186                                else
187                                    break
188                                end
189                            end
190                        end
191                    else
192                        check = function(line)
193                            n = n + 1
194                            if find(line,pattern) then
195                                m = m + 1
196                            end
197                        end
198                    end
199                else
200                    check = function(line)
201                        n = n + 1
202                        if find(line,pattern) then
203                            m = m + 1
204                            if not max or m <= max then
205                                if domatch then
206                                    write_nl(match(line,pattern))
207                                else
208                                    write_nl(format("%s %6i: %s",name,n,line))
209                                end
210                                io.flush()
211                            end
212                        end
213                    end
214                end
215            end
216            local capture = (content/check)^0 -- todo: break out when max
217            for i=offset or 1, #files do
218                local globbed = dir.glob(files[i])
219                for i=1,#globbed do
220                    name = globbed[i]
221                    if not find(name,"/%.") and not skip(name) then
222                        local data = io.loaddata(name)
223                        if data then
224                            n, m, noffiles = 0, 0, noffiles + 1
225                            lpegmatch(capture,data)
226                            if count and m > 0 then
227                                nofmatches = nofmatches + m
228                                nofmatchedfiles = nofmatchedfiles + 1
229                                write_nl(format("%5i  %s",m,name))
230                                io.flush()
231                            end
232                        end
233                    end
234                end
235            end
236        end
237        statistics.stoptiming(scripts.grep)
238        if count and nofmatches > 0 then
239            write_nl(format("\nfiles: %s, matches: %s, matched files: %s, runtime: %0.3f seconds",noffiles,nofmatches,nofmatchedfiles,statistics.elapsedtime(scripts.grep)))
240        end
241    end
242end
243
244local pattern = environment.argument("pattern")
245local files   = environment.files and #environment.files > 0 and environment.files
246
247if environment.argument("exporthelp") then
248    application.export(environment.argument("exporthelp"),files[1])
249elseif pattern and files then
250    scripts.grep.find(pattern, files)
251elseif files then
252    scripts.grep.find(files[1], files, 2)
253else
254    application.help()
255end
256