publ-fnd.lua /size: 9913 b    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['publ-fnd'] = {
2    version   = 1.001,
3    comment   = "this module part of publication support",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9if not characters then
10    dofile(resolvers.findfile("char-def.lua"))
11    dofile(resolvers.findfile("char-utf.lua"))
12end
13
14-- this tracker is only for real debugging and not for the average user
15
16local trace_match = false  trackers.register("publications.match", function(v) trace_match = v end)
17
18local publications = publications
19
20local tonumber, next, type = tonumber, next, type
21local find = string.find
22local P, R, S, C, Cs, Cp, Cc, Carg, Ct, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Carg, lpeg.Ct, lpeg.V
23local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
24local concat = table.concat
25
26local formatters = string.formatters
27local lowercase  = characters.lower
28local topattern  = string.topattern
29
30publications = publications or { } -- for testing
31
32local report     = logs.reporter("publications","match")
33
34local colon    = P(":")
35local dash     = P("-")
36local lparent  = P("(")
37local rparent  = P(")")
38local space    = lpegpatterns.whitespace
39local utf8char = lpegpatterns.utf8character
40local valid    = 1 - colon - space - lparent - rparent
41----- key      = C(valid^1)
42local key      = C(R("az","AZ")^1)
43local wildcard = C("*")
44local word     = Cs(lpegpatterns.unquoted + lpegpatterns.argument + valid^1)
45local simple   = C(valid^1)
46local number   = C(valid^1)
47
48local key      = C(R("az","AZ")^1)
49local contains = S(":~")
50local exact    = P("=")
51local valid    = (1 - space - lparent -rparent)^1
52local wildcard = P("*") / ".*"
53local single   = P("?") / "."
54local dash     = P("-") / "%."
55local percent  = P("-") / "%%"
56local word     = Cs(lpegpatterns.unquoted + lpegpatterns.argument + valid)
57local range    = P("<") * space^0 * C((1-space)^1) * space^1 * C((1-space- P(">"))^1) * space^0 * P(">")
58
59local f_key_fld      = formatters["  local kf_%s = get(entry,%q)           \n  if kf_%s then kf_%s = lower(kf_%s) end"]
60local f_key_set      = formatters["  local ks_%s = get(entry,%q,categories)\n  if ks_%s then ks_%s = lower(ks_%s) end"]
61local f_number_fld   = formatters["  local nf_%s = tonumber(get(entry,%q))"]
62local f_number_set   = formatters["  local ns_%s = tonumber(get(entry,%q,categories))"]
63
64local f_fld_exact    = formatters["(kf_%s == %q)"]
65local f_set_exact    = formatters["(ks_%s == %q)"]
66local f_fld_contains = formatters["(kf_%s and find(kf_%s,%q))"]
67local f_set_contains = formatters["(ks_%s and find(ks_%s,%q))"]
68local f_fld_between  = formatters["(nf_%s and nf_%s >= %s and nf_%s <= %s)"]
69local f_set_between  = formatters["(ns_%s and ns_%s >= %s and ns_%s <= %s)"]
70
71local f_all_match    = formatters["anywhere(entry,%q)"]
72
73local function test_key_value(keys,where,key,first,last)
74    if not key or key == "" then
75        return "(false)"
76    elseif key == "*" then
77        last = "^.*" .. topattern(lowercase(last)) .. ".*$" -- todo: make an lpeg
78        return f_all_match(last)
79    elseif first == false then
80        -- exact
81        last = lowercase(last)
82        if where == "set" then
83            keys[key] = f_key_set(key,key,key,key,key)
84            return f_set_exact(key,last)
85        else
86            keys[key] = f_key_fld(key,key,key,key,key)
87            return f_fld_exact(key,last)
88        end
89    elseif first == true then
90        -- contains
91        last = "^.*" .. topattern(lowercase(last)) .. ".*$"
92        if where == "set" then
93            keys[key] = f_key_set(key,key,key,key,key)
94            return f_set_contains(key,key,last)
95        else
96            keys[key] = f_key_fld(key,key,key,key,key)
97            return f_fld_contains(key,key,last)
98        end
99    else
100        -- range
101        if where == "set" then
102            keys[key] = f_number_set(key,key)
103            return f_set_between(key,key,tonumber(first),key,tonumber(last))
104        else
105            keys[key] = f_number_fld(key,key)
106            return f_fld_between(key,key,tonumber(first),key,tonumber(last))
107        end
108    end
109end
110
111local p_compare = P { "all",
112    all      = (V("one") + V("operator") + V("nested") + C(" "))^1,
113    nested   = C("(") * V("all") * C(")"), -- C really needed?
114    operator = C("and")
115             + C("or")
116             + C("not"),
117    one      = Carg(1)
118             * V("where")
119             * V("key")
120             * (V("how") * V("word") + V("range"))
121             / test_key_value,
122    key      = key
123             + C("*"),
124    where    = C("set") * P(":")
125             + Cc(""),
126    how      = contains * Cc(true)
127             + exact * Cc(false),
128    word     = word,
129    range    = range,
130}
131
132-- local p_combine = space^0 * (P(",")/" or ") * space^0
133
134-- local  pattern = Cs((P("match")/"" * space^0 * p_compare + p_combine)^1)
135
136local comma        = P(",")
137local p_spaces     = space^0
138local p_combine    = p_spaces * comma * p_spaces / " or "
139local p_expression = P("match")/"" * Cs(p_compare)
140                   + Carg(1)
141                   * Cc("")
142                   * Cc("tag")
143                   * Cc(false)
144                   * (
145                        P("tag") * p_spaces * P("(") * Cs((1-S(")")-space)^1) * p_spaces * P(")")
146                      + p_spaces * Cs((1-space-comma)^1) * p_spaces
147                     ) / test_key_value
148
149local pattern = Cs {
150    V(2) * (p_combine * V(2))^0,
151    p_expression,
152}
153
154-- -- -- -- -- -- -- -- -- -- -- -- --
155-- -- -- -- -- -- -- -- -- -- -- -- --
156
157function publications.anywhere(entry,str) -- helpers
158    for k, v in next, entry do
159        if find(lowercase(v),str) then
160            return true
161        end
162    end
163end
164
165-- todo: use an environment instead of
166
167-- table={
168--  { "match", "((kf_editor and find(kf_editor,\"^.*braslau.*$\")))" },
169--  { "hash", "foo1234" },
170--  { "tag", "bar5678" },
171-- }
172
173local f_template = formatters[ [[
174local find = string.find
175local lower = characters.lower
176local anywhere = publications.anywhere
177local get = publications.getfuzzy
178local specification = publications.currentspecification
179local categories = specification and specification.categories
180return function(entry)
181%s
182  return %s and true or false
183end
184]] ]
185
186local function compile(dataset,expr)
187    local keys        = { }
188 -- local expression  = lpegmatch(pattern,expr,start,keys)
189    local expression  = lpegmatch(pattern,expr,1,keys)
190    if trace_match then
191        report("compiling expression: %s",expr)
192    end
193    local definitions = { }
194    for k, v in next, keys do
195        definitions[#definitions+1] = v
196    end
197    if #definitions == 0 then
198        report("invalid expression: %s",expr)
199    elseif trace_match then
200        for i=1,#definitions do
201            report("% 3i : %s",i,definitions[i])
202        end
203    end
204    definitions = concat(definitions,"\n")
205    local code = f_template(definitions,expression)
206    if trace_match then
207        report("generated code: %s",code)
208    end
209    local finder = loadstring(code) -- use an environment
210    if type(finder) == "function" then
211        finder = finder()
212        if type(finder) == "function" then
213            return finder, code
214        end
215    end
216    report("invalid expression: %s",expr)
217    return false
218end
219
220-- local function test(str)
221--     local keys        = { }
222--     local definitions = { }
223--     local expression  = lpegmatch(pattern,str,1,keys)
224--     for k, v in next, keys do
225--         definitions[#definitions+1] = v
226--     end
227--     definitions = concat(definitions,"\n")
228--     print(f_template(definitions,expression))
229-- end
230
231-- test("match(foo:bar and (foo:bar or foo:bar))")
232-- test("match(foo=bar and (foo=bar or foo=bar))")
233-- test("match(set:foo:bar),match(set:foo:bar)")
234-- test("match(set:foo=bar)")
235-- test("match(foo:{bar bar})")
236-- test("match(foo={bar bar})")
237-- test("match(set:foo:'bar bar')")
238-- test("match(set:foo='bar bar')")
239-- test("match(set:foo<1000 2000>)")
240-- test("match(set:foo<1000 2000>)")
241-- test("match(*:foo)")
242-- test("match(*:*)")
243
244local trigger = (P("match") + P("tag")) * p_spaces * P("(")
245local check   = (1-trigger)^0 * trigger
246
247local function finder(dataset,expression)
248    local found = lpegmatch(check,expression) and compile(dataset,expression) or false
249    if found then
250        local okay, message = pcall(found,{})
251        if not okay then
252            found = false
253            report("error in match: %s",message)
254        end
255    end
256    return found
257end
258
259-- finder("match(author:foo)")
260-- finder("match(author:foo and author:bar)")
261-- finder("match(author:foo or (author:bar and page:123))")
262-- finder("match(author:foo),match(author:foo)")
263
264publications.finder = finder
265
266function publications.search(dataset,expression)
267    local find = finder(dataset,expression)
268    if find then
269        local ordered = dataset.ordered
270        local target  = { }
271        for i=1,#ordered do
272            local entry = ordered[i]
273            if find(entry) then
274                local tag = entry.tag
275                if not target[tag] then
276                    -- we always take the first
277                    target[tag] = entry
278                end
279            end
280        end
281        return target
282    else
283        return { } -- { dataset.luadata[expression] } -- ?
284    end
285end
286
287-- local d = publications.datasets.default
288--
289-- local d = publications.load {
290--     dataset   = "default",
291--     filename = "t:/manuals/mkiv/hybrid/tugboat.bib"
292-- }
293--
294-- inspect(publications.search(d,[[match(author:hagen)]]))
295-- inspect(publications.search(d,[[match(author:hagen and author:hoekwater and year:1990-2010)]]))
296-- inspect(publications.search(d,[[match(author:"Bogusław Jackowski")]]))
297-- inspect(publications.search(d,[[match(author:"Bogusław Jackowski" and (tonumber(field:year) or 0) > 2000)]]))
298-- inspect(publications.search(d,[[Hagen:TB19-3-304]]))
299