mtx-grep.lua /size: 8847 b    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['mtx-babel'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- If needed this one can be optimized for speed as well as use some existing
10-- helpers. We can quit faster on max, and probably use lpeg instead of find.
11
12local helpinfo = [[
13<?xml version="1.0"?>
14<application>
15 <metadata>
16  <entry name="name">mtx-grep</entry>
17  <entry name="detail">Simple Grepper</entry>
18  <entry name="version">0.10</entry>
19 </metadata>
20 <flags>
21  <category name="basic">
22   <subcategory>
23    <flag name="pattern"><short>search for pattern (optional)</short></flag>
24    <flag name="count"><short>count matches only</short></flag>
25    <flag name="nocomment"><short>skip lines that start with %% or #</short></flag>
26    <flag name="noattic"><short>skip files that hh considers irrelevant</short></flag>
27    <flag name="n"><short>show at most n matches</short></flag>
28    <flag name="first"><short>only show first match</short></flag>
29    <flag name="match"><short>return the match (if it is one)</short></flag>
30    <flag name="xml"><short>pattern is lpath expression</short></flag>
31   </subcategory>
32  </category>
33 </flags>
34 <examples>
35  <category>
36   <title>Examples</title>
37   <subcategory>
38    <example><command>mtxrun --script grep --pattern=module *.mkiv</command></example>
39    <example><command>mtxrun --script grep --pattern="modules.-%['(.-)'%]" char-*.lua --first</command></example>
40    <example><command>mtxrun --script grep --pattern=module --count *.mkiv</command></example>
41    <example><command>mtxrun --script grep --pattern=module --first *.mkiv</command></example>
42    <example><command>mtxrun --script grep --pattern=module --nocomment *.mkiv</command></example>
43    <example><command>mtxrun --script grep --pattern=module --n=10 *.mkiv</command></example>
44   </subcategory>
45  </category>
46 </examples>
47 <comments>
48    <comment>patterns are lua patterns and need to be escaped accordingly</comment>
49 </comments>
50</application>
51]]
52
53local application = logs.application {
54    name     = "mtx-grep",
55    banner   = "Simple Grepper 0.10",
56    helpinfo = helpinfo,
57}
58
59local report = application.report
60
61scripts      = scripts      or { }
62scripts.grep = scripts.grep or { }
63
64local find, match, format = string.find, string.match, string.format
65local lpegmatch = lpeg.match
66
67local cr       = lpeg.P("\r")
68local lf       = lpeg.P("\n")
69local crlf     = cr * lf
70local newline  = crlf + cr + lf
71local content  = lpeg.C((1-newline)^0) * newline + lpeg.C(lpeg.P(1)^1)
72
73local write_nl = (logs and logs.writer) or (texio and texio.write_nl) or print
74
75-- local pattern = "LIJST[@TYPE='BULLET']/LIJSTITEM[contains(text(),'Kern')]"
76
77-- 'Cc%(\\\"\\\"%)'
78
79function scripts.grep.find(pattern, files, offset)
80    if pattern and pattern ~= "" then
81        statistics.starttiming(scripts.grep)
82        local nofmatches, noffiles, nofmatchedfiles = 0, 0, 0
83        local n, m, check = 0, 0, nil
84        local name = ""
85        local noattic = environment.argument("noattic")
86        local count = environment.argument("count")
87        local nocomment = environment.argument("nocomment")
88        local max = tonumber(environment.argument("n")) or (environment.argument("first") and 1) or false
89        local domatch = environment.argument("match")
90        -- for me:
91        local function skip(name)
92            return noattic and (find(name,"attic") or find(name,"backup") or find(name,"old") or find(name,"keep") or find(name,"install") or find(name,"texmf"))
93        end
94        --
95        if environment.argument("xml") then
96            for i=offset or 1, #files do
97                local globbed = dir.glob(files[i])
98                for i=1,#globbed do
99                    name = globbed[i]
100                    if not skip(name) then
101                        local data = xml.load(name)
102                        if data and not data.error then
103                            n, m, noffiles = 0, 0, noffiles + 1
104                            if count then
105                                for c in xml.collected(data,pattern) do
106                                    m = m + 1
107                                end
108                                if m > 0 then
109                                    nofmatches = nofmatches + m
110                                    nofmatchedfiles = nofmatchedfiles + 1
111                                    write_nl(format("%5i  %s",m,name))
112                                    io.flush()
113                                end
114                            else
115                                for c in xml.collected(data,pattern) do
116                                    m = m + 1
117                                    if not max or m <= max then
118                                        write_nl(format("%s: %s",name,xml.tostring(c)))
119                                    end
120                                end
121                            end
122                        end
123                    end
124                end
125            end
126        else
127            if nocomment then
128                if count then
129                    check = function(line)
130                        n = n + 1
131                        if find(line,"^[%%#]") then
132                            -- skip
133                        elseif find(line,pattern) then
134                            m = m + 1
135                        end
136                    end
137                else
138                    check = function(line)
139                        n = n + 1
140                        if find(line,"^[%%#]") then
141                            -- skip
142                        elseif find(line,pattern) then
143                            m = m + 1
144                            if not max or m <= max then
145                                if domatch then
146                                    write_nl(match(line,pattern))
147                                else
148                                    write_nl(format("%s %6i: %s",name,n,line))
149                                end
150                                io.flush()
151                            end
152                        end
153                    end
154                end
155            else
156                if count then
157                    check = function(line)
158                        n = n + 1
159                        if find(line,pattern) then
160                            m = m + 1
161                        end
162                    end
163                else
164                    check = function(line)
165                        n = n + 1
166                        if find(line,pattern) then
167                            m = m + 1
168                            if not max or m <= max then
169                                if domatch then
170                                    write_nl(match(line,pattern))
171                                else
172                                    write_nl(format("%s %6i: %s",name,n,line))
173                                end
174                                io.flush()
175                            end
176                        end
177                    end
178                end
179            end
180            local capture = (content/check)^0 -- todo: break out when max
181            for i=offset or 1, #files do
182                local globbed = dir.glob(files[i])
183                for i=1,#globbed do
184                    name = globbed[i]
185                    if not find(name,"/%.") and not skip(name) then
186                        local data = io.loaddata(name)
187                        if data then
188                            n, m, noffiles = 0, 0, noffiles + 1
189                            lpegmatch(capture,data)
190                            if count and m > 0 then
191                                nofmatches = nofmatches + m
192                                nofmatchedfiles = nofmatchedfiles + 1
193                                write_nl(format("%5i  %s",m,name))
194                                io.flush()
195                            end
196                        end
197                    end
198                end
199            end
200        end
201        statistics.stoptiming(scripts.grep)
202        if count and nofmatches > 0 then
203            write_nl(format("\nfiles: %s, matches: %s, matched files: %s, runtime: %0.3f seconds",noffiles,nofmatches,nofmatchedfiles,statistics.elapsedtime(scripts.grep)))
204        end
205    end
206end
207
208local pattern = environment.argument("pattern")
209local files   = environment.files and #environment.files > 0 and environment.files
210
211if environment.argument("exporthelp") then
212    application.export(environment.argument("exporthelp"),files[1])
213elseif pattern and files then
214    scripts.grep.find(pattern, files)
215elseif files then
216    scripts.grep.find(files[1], files, 2)
217else
218    application.help()
219end
220