util-prs.lmt /size: 27 Kb    last modification: 2024-01-16 10:22
1if not modules then modules = { } end modules ['util-prs'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cp
11----- Cg, Cf = lpeg.Cg, lpeg.Cf
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local concat, gmatch, find = table.concat, string.gmatch, string.find
14local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
15local mod, div = math.mod, math.div
16
17utilities         = utilities or {}
18local parsers     = utilities.parsers or { }
19utilities.parsers = parsers
20local patterns    = parsers.patterns or { }
21parsers.patterns  = patterns
22
23local setmetatableindex = table.setmetatableindex
24local sortedhash        = table.sortedhash
25local sortedkeys        = table.sortedkeys
26local tohash            = table.tohash
27
28local hashes            = { }
29parsers.hashes          = hashes
30-- we share some patterns
31
32local digit       = R("09")
33local space       = P(' ')
34local equal       = P("=")
35local colon       = P(":")
36local comma       = P(",")
37local lbrace      = P("{")
38local rbrace      = P("}")
39local lparent     = P("(")
40local rparent     = P(")")
41local lbracket    = P("[")
42local rbracket    = P("]")
43local period      = S(".")
44local punctuation = S(".,:;")
45local spacer      = lpegpatterns.spacer
46local whitespace  = lpegpatterns.whitespace
47local newline     = lpegpatterns.newline
48local anything    = lpegpatterns.anything
49local endofstring = lpegpatterns.endofstring
50
51local nobrace     = 1 - (lbrace   + rbrace )
52local noparent    = 1 - (lparent  + rparent)
53local nobracket   = 1 - (lbracket + rbracket)
54
55-- we could use a Cf Cg construct or from now a %
56
57local escape, left, right = P("\\"), P('{'), P('}')
58
59-- lpegpatterns.balanced = P {
60--     [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
61--     [2] = left * V(1) * right
62-- }
63lpegpatterns.balanced = P {
64    ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
65    left * V(1) * right
66}
67
68local nestedbraces   = P { lbrace   * (nobrace   + V(1))^0 * rbrace }
69local nestedparents  = P { lparent  * (noparent  + V(1))^0 * rparent }
70local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
71local spaces         = space^0
72local argument       = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
73local content        = (1-endofstring)^0
74
75lpegpatterns.nestedbraces   = nestedbraces   -- no capture
76lpegpatterns.nestedparents  = nestedparents  -- no capture
77lpegpatterns.nestedbrackets = nestedbrackets -- no capture
78lpegpatterns.nested         = nestedbraces   -- no capture
79lpegpatterns.argument       = argument       -- argument after e.g. =
80lpegpatterns.content        = content        -- rest after e.g =
81
82local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
83                + C((nestedbraces + (1-comma))^0)
84
85local key       = C((1-equal-comma)^1)
86local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
87local pattern_c = (space+comma)^0 * (key * equal * value)
88local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
89
90local key       = C((1-space-equal-comma)^1)
91local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
92
93-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored
94
95local hash = { }
96
97local function set(key,value)
98    hash[key] = value
99end
100
101local pattern_a_s = (pattern_a/set)^1
102local pattern_b_s = (pattern_b/set)^1
103local pattern_c_s = (pattern_c/set)^1
104local pattern_d_s = (pattern_d/set)^1
105
106patterns.settings_to_hash_a = pattern_a_s
107patterns.settings_to_hash_b = pattern_b_s
108patterns.settings_to_hash_c = pattern_c_s
109patterns.settings_to_hash_d = pattern_d_s
110
111function parsers.make_settings_to_hash_pattern(set,how)
112    if how == "strict" then
113        return (pattern_c/set)^1
114    elseif how == "tolerant" then
115        return (pattern_b/set)^1
116    else
117        return (pattern_a/set)^1
118    end
119end
120
121function parsers.settings_to_hash(str,existing)
122    if not str or str == "" then
123        return { }
124    elseif type(str) == "table" then
125        if existing then
126            for k, v in next, str do
127                existing[k] = v
128            end
129            return exiting
130        else
131            return str
132        end
133    else
134        hash = existing or { }
135        lpegmatch(pattern_a_s,str)
136        return hash
137    end
138end
139
140function parsers.settings_to_hash_colon_too(str)
141    if not str or str == "" then
142        return { }
143    elseif type(str) == "table" then
144        return str
145    else
146        hash = { }
147        lpegmatch(pattern_d_s,str)
148        return hash
149    end
150end
151
152function parsers.settings_to_hash_tolerant(str,existing)
153    if not str or str == "" then
154        return { }
155    elseif type(str) == "table" then
156        if existing then
157            for k, v in next, str do
158                existing[k] = v
159            end
160            return exiting
161        else
162            return str
163        end
164    else
165        hash = existing or { }
166        lpegmatch(pattern_b_s,str)
167        return hash
168    end
169end
170
171function parsers.settings_to_hash_strict(str,existing)
172    if not str or str == "" then
173        return nil
174    elseif type(str) == "table" then
175        if existing then
176            for k, v in next, str do
177                existing[k] = v
178            end
179            return exiting
180        else
181            return str
182        end
183    elseif str and str ~= "" then
184        hash = existing or { }
185        lpegmatch(pattern_c_s,str)
186        return next(hash) and hash
187    end
188end
189
190local separator = comma * space^0
191local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
192                + C((nestedbraces + (1-comma))^0)
193local pattern   = spaces * Ct(value*(separator*value)^0)
194
195-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored
196
197patterns.settings_to_array = pattern
198
199-- we could use a weak table as cache
200
201function parsers.settings_to_array(str,strict)
202    if not str or str == "" then
203        return { }
204    elseif type(str) == "table" then
205        return str
206    elseif strict then
207        if find(str,"{",1,true) then
208            return lpegmatch(pattern,str)
209        else
210            return { str }
211        end
212    elseif find(str,",",1,true) then
213        return lpegmatch(pattern,str)
214    else
215        return { str }
216    end
217end
218
219function parsers.settings_to_numbers(str)
220    if not str or str == "" then
221        return { }
222    end
223    if type(str) == "table" then
224        -- fall through
225    elseif find(str,",",1,true) then
226        str = lpegmatch(pattern,str)
227    else
228        return { tonumber(str) }
229    end
230    for i=1,#str do
231        str[i] = tonumber(str[i])
232    end
233    return str
234end
235
236local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
237                + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
238local pattern   = spaces * Ct(value*(separator*value)^0)
239
240function parsers.settings_to_array_obey_fences(str)
241    return lpegmatch(pattern,str)
242end
243
244-- inspect(parsers.settings_to_array_obey_fences("url(http://a,b.c)"))
245
246-- this one also strips end spaces before separators
247--
248-- "{123} , 456  " -> "123" "456"
249
250-- local separator = space^0 * comma * space^0
251-- local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
252--                 + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0)
253-- local withvalue = Carg(1) * value / function(f,s) return f(s) end
254-- local pattern_a = spaces * Ct(value*(separator*value)^0)
255-- local pattern_b = spaces * withvalue * (separator*withvalue)^0
256
257local cache_a = { }
258local cache_b = { }
259
260function parsers.groupedsplitat(symbol,withaction)
261    if not symbol then
262        symbol = ","
263    end
264    local pattern = (withaction and cache_b or cache_a)[symbol]
265    if not pattern then
266        local symbols   = S(symbol)
267        local separator = space^0 * symbols * space^0
268        local value     =
269                        lbrace
270                        * C((nobrace + nestedbraces)^0)
271                     -- * rbrace
272                        * (rbrace * (#symbols + P(-1))) -- new per 2023-03-11
273                        +
274                        C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
275        if withaction then
276            local withvalue = Carg(1) * value / function(f,s) return f(s) end
277            pattern = spaces * withvalue * (separator*withvalue)^0
278            cache_b[symbol] = pattern
279        else
280            pattern = spaces * Ct(value*(separator*value)^0)
281            cache_a[symbol] = pattern
282        end
283    end
284    return pattern
285end
286
287local pattern_a = parsers.groupedsplitat(",",false)
288local pattern_b = parsers.groupedsplitat(",",true)
289
290function parsers.stripped_settings_to_array(str)
291    if not str or str == "" then
292        return { }
293    else
294        return lpegmatch(pattern_a,str)
295    end
296end
297
298function parsers.process_stripped_settings(str,action)
299    if not str or str == "" then
300        return { }
301    else
302        return lpegmatch(pattern_b,str,1,action)
303    end
304end
305
306-- parsers.process_stripped_settings("{123} , 456  ",function(s) print("["..s.."]") end)
307-- parsers.process_stripped_settings("123 , 456  ",function(s) print("["..s.."]") end)
308
309local function set(t,v)
310    t[#t+1] = v
311end
312
313local value   = P(Carg(1)*value) / set
314local pattern = value*(separator*value)^0 * Carg(1)
315
316function parsers.add_settings_to_array(t,str)
317    return lpegmatch(pattern,str,nil,t)
318end
319
320function parsers.hash_to_string(h,separator,yes,no,strict,omit)
321    if h then
322        local t  = { }
323        local tn = 0
324        local s  = sortedkeys(h)
325        omit = omit and tohash(omit)
326        for i=1,#s do
327            local key = s[i]
328            if not omit or not omit[key] then
329                local value = h[key]
330                if type(value) == "boolean" then
331                    if yes and no then
332                        if value then
333                            tn = tn + 1
334                            t[tn] = key .. '=' .. yes
335                        elseif not strict then
336                            tn = tn + 1
337                            t[tn] = key .. '=' .. no
338                        end
339                    elseif value or not strict then
340                        tn = tn + 1
341                        t[tn] = key .. '=' .. tostring(value)
342                    end
343                else
344                    tn = tn + 1
345                    t[tn] = key .. '=' .. value
346                end
347            end
348        end
349        return concat(t,separator or ",")
350    else
351        return ""
352    end
353end
354
355function parsers.array_to_string(a,separator)
356    if a then
357        return concat(a,separator or ",")
358    else
359        return ""
360    end
361end
362
363-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway
364--     if str then
365--         t = t or { }
366--         for s in gmatch(str,"[^, ]+") do -- space added
367--             t[s] = true
368--         end
369--         return t
370--     else
371--         return { }
372--     end
373-- end
374
375----- pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true) )^1,rawset)
376local pattern = Ct("") * (C((1-S(", "))^1) * S(", ")^0 * Cc(true) % rawset)^1
377
378function parsers.settings_to_set(str)
379    return str and lpegmatch(pattern,str) or { }
380end
381
382hashes.settings_to_set =  table.setmetatableindex(function(t,k) -- experiment, not public
383    local v = k and lpegmatch(pattern,k) or { }
384    t[k] = v
385    return v
386end)
387
388function parsers.settings_to_set(str)
389    return str and lpegmatch(pattern,str) or { }
390end
391
392local pattern = Ct((C((1-S(", "))^1) * S(", ")^0)^1)
393
394hashes.settings_to_list =  table.setmetatableindex(function(t,k) -- experiment, not public
395    local v = k and lpegmatch(pattern,k) or { }
396    t[k] = v
397    return v
398end)
399
400-- inspect(hashes.settings_to_set["a,b, c, d"])
401-- inspect(hashes.settings_to_list["a,b, c, d"])
402
403-- as we use a next, we are not sure when the gc kicks in
404
405getmetatable(hashes.settings_to_set ).__mode = "kv" -- could be an option (maybe sharing makes sense)
406getmetatable(hashes.settings_to_list).__mode = "kv" -- could be an option (maybe sharing makes sense)
407
408function parsers.simple_hash_to_string(h, separator)
409    local t  = { }
410    local tn = 0
411    for k, v in sortedhash(h) do
412        if v then
413            tn = tn + 1
414            t[tn] = k
415        end
416    end
417    return concat(t,separator or ",")
418end
419
420local str      = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
421----- setting  = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)
422local setting  = Carg(1) * (whitespace^0 * (str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))) % rawset)^1
423local splitter = setting^1
424
425function parsers.options_to_hash(str,target)
426    return str and lpegmatch(splitter,str,1,target or { }) or { }
427end
428
429-- inspect(parsers.options_to_hash([[aaaa bbbb cccc=dddd eeee=ffff]])) -- mtx-context
430
431local splitter = lpeg.tsplitat(" ")
432
433function parsers.options_to_array(str)
434    return str and lpegmatch(splitter,str) or { }
435end
436
437-- for chem (currently one level)
438
439local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
440                + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
441                + C((nestedbraces + (1-comma))^1)
442                + Cc("") -- new
443local pattern_a = spaces * Ct(value*(separator*value)^0)
444
445local function repeater(n,str)
446    if not n then
447        return str
448    else
449        local s = lpegmatch(pattern_a,str)
450        if n == 1 then
451            return unpack(s)
452        else
453            local t  = { }
454            local tn = 0
455            for i=1,n do
456                for j=1,#s do
457                    tn = tn + 1
458                    t[tn] = s[j]
459                end
460            end
461            return unpack(t)
462        end
463    end
464end
465
466local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
467                + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
468                + C((nestedbraces + (1-comma))^1)
469                + Cc("") -- new
470local pattern_b = spaces * Ct(value*(separator*value)^0)
471
472function parsers.settings_to_array_with_repeat(str,expand) -- beware: "" =>  { }
473    if expand then
474        return lpegmatch(pattern_b,str) or { }
475    else
476        return lpegmatch(pattern_a,str) or { }
477    end
478end
479
480--
481
482local value   = lbrace * C((nobrace + nestedbraces)^0) * rbrace
483local pattern = Ct((space + value)^0)
484
485function parsers.arguments_to_table(str)
486    return lpegmatch(pattern,str)
487end
488
489-- temporary here (unoptimized)
490
491function parsers.getparameters(self,class,parentclass,settings)
492    local sc = self[class]
493    if not sc then
494        sc = { }
495        self[class] = sc
496        if parentclass then
497            local sp = self[parentclass]
498            if not sp then
499                sp = { }
500                self[parentclass] = sp
501            end
502            setmetatableindex(sc,sp)
503        end
504    end
505    parsers.settings_to_hash(settings,sc)
506end
507
508function parsers.listitem(str)
509    return gmatch(str,"[^, ]+")
510end
511
512--
513
514local pattern = Cs { "start",
515    start    = V("one") + V("two") + V("three"),
516    rest     = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
517    thousand = digit * digit * digit,
518    one      = digit * V("rest"),
519    two      = digit * digit * V("rest"),
520    three    = V("thousand") * V("rest"),
521}
522
523lpegpatterns.splitthousands = pattern -- maybe better in the parsers namespace ?
524
525function parsers.splitthousands(str)
526    return lpegmatch(pattern,str) or str
527end
528
529-- print(parsers.splitthousands("11111111111.11"))
530
531local optionalwhitespace = whitespace^0
532
533lpegpatterns.words      = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
534lpegpatterns.sentences  = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
535lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
536
537-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n "
538-- inspect(lpegmatch(lpegpatterns.paragraphs,str))
539-- inspect(lpegmatch(lpegpatterns.sentences,str))
540-- inspect(lpegmatch(lpegpatterns.words,str))
541
542-- handy for k="v" [, ] k="v"
543
544local dquote    = P('"')
545local equal     = P('=')
546local escape    = P('\\')
547local separator = S(' ,')
548
549local key       = C((1-equal)^1)
550local value     = dquote * C((1-dquote-escape*dquote)^0) * dquote
551
552----- pattern   = Cf(Ct("") * Cg(key * equal * value) * separator^0,rawset)^0 * P(-1) -- was wrong
553----- pattern   = Cf(Ct("") * (Cg(key * equal * value) * separator^0)^1,rawset)^0 * P(-1)
554local pattern   = Ct("") * (((key * equal * value) * separator^0) % rawset)^0 * P(-1)
555
556function parsers.keq_to_hash(str)
557    if str and str ~= "" then
558        return lpegmatch(pattern,str)
559    else
560        return { }
561    end
562end
563
564-- inspect(lpeg.match(pattern,[[key="value" foo="bar"]]))
565
566local defaultspecification = { separator = ",", quote = '"' }
567
568-- this version accepts multiple separators and quotes as used in the
569-- database module
570
571function parsers.csvsplitter(specification)
572    specification   = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
573    local separator = specification.separator
574    local quotechar = specification.quote
575    local numbers   = specification.numbers
576    local separator = S(separator ~= "" and separator or ",")
577    local whatever  = C((1 - separator - newline)^0)
578    if quotechar and quotechar ~= "" then
579        local quotedata = nil
580        for chr in gmatch(quotechar,".") do
581            local quotechar = P(chr)
582            local quoteitem = (1 - quotechar)^0
583            local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
584            if quotedata then
585                quotedata = quotedata + quoteword
586            else
587                quotedata = quoteword
588            end
589        end
590        whatever = quotedata + whatever
591    end
592    local parser = Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
593    return function(data)
594        return lpegmatch(parser,data)
595    end
596end
597
598-- local crap = [[
599-- first,second,third,fourth
600-- "1","2","3","4"
601-- "5","6","7","8"
602-- ]]
603
604-- local mycsvsplitter = parsers.csvsplitter { numbers = true }
605
606-- local list = mycsvsplitter(crap) inspect(list)
607
608-- and this is a slightly patched version of a version posted by Philipp Gesang
609
610function parsers.rfc4180splitter(specification)
611    specification     = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
612    local numbers     = specification.numbers
613    local separator   = specification.separator --> rfc: COMMA
614    local quotechar   = P(specification.quote)  -->      DQUOTE
615    local dquotechar  = quotechar * quotechar   -->      2DQUOTE
616                      / specification.quote
617    local separator   = S(separator ~= "" and separator or ",")
618    local whatever    = (dquotechar + (1 - quotechar))^0
619    local escaped     = quotechar
620                      * (numbers and (whatever/tonumber) or Cs(whatever))
621                      * quotechar
622    local non_escaped = C((1 - quotechar - newline - separator)^1)
623    local field       = escaped + non_escaped + Cc("")
624    local record      = Ct(field * (separator * field)^1)
625    local headerline  = record * Cp()
626    local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
627    local headeryes   = Ct(morerecords)
628    local headernop   = Ct(record * morerecords)
629    return function(data,getheader)
630        if getheader then
631            local header, position = lpegmatch(headerline,data)
632            local data = lpegmatch(headeryes,data,position)
633            return data, header
634        else
635            return lpegmatch(headernop,data)
636        end
637    end
638end
639
640-- local mycsvsplitter = parsers.rfc4180splitter { numbers = true }
641--
642-- local crap = [[
643-- first,second,third,fourth
644-- "1","2","3","4"
645-- "a","b","c","d"
646-- "foo","bar""baz","boogie","xyzzy"
647-- ]]
648--
649-- local list, names = mycsvsplitter(crap,true)   inspect(list) inspect(names)
650-- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)
651
652local function ranger(first,last,n,action)
653    if not first then
654        -- forget about it
655    elseif last == true then
656        for i=first,n or first do
657            action(i)
658        end
659    elseif last then
660        for i=first,last do
661            action(i)
662        end
663    else
664        action(first)
665    end
666end
667
668local cardinal    = (lpegpatterns.hexadecimal + lpegpatterns.cardinal) / tonumber
669local spacers     = lpegpatterns.spacer^0
670local endofstring = lpegpatterns.endofstring
671
672local stepper  = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
673               * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
674
675local stepper  = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
676               * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring -- we're sort of strict (could do without endofstring)
677
678function parsers.stepper(str,n,action)
679    local ts = type(str)
680    if type(n) == "function" then
681        if ts == "number" then
682            n(str)
683        elseif ts == "table" then
684            for i=1,#str do
685                n(str[i])
686            end
687        else
688            lpegmatch(stepper,str,1,false,n or print)
689        end
690    elseif ts == "string" then
691        lpegmatch(stepper,str,1,n,action or print)
692    end
693end
694
695-- parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
696-- parsers.stepper("1-3,7,8,9")
697-- parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
698-- parsers.stepper(" 1 : 3, ,7 ")
699-- parsers.stepper("1:4,9:13,24:*",30)
700-- parsers.stepper(1,print)
701-- parsers.stepper({1,3,4},print)
702
703local pattern_math = Cs((P("%")/"\\percent " +  P("^")           * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
704local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
705
706patterns.unittotex = pattern
707
708function parsers.unittotex(str,textmode)
709    return lpegmatch(textmode and pattern_text or pattern_math,str)
710end
711
712local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
713
714function parsers.unittoxml(str)
715    return lpegmatch(pattern,str)
716end
717
718-- print(parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %"))
719
720local cache   = { }
721local spaces  = lpegpatterns.space^0
722local dummy   = function() end
723
724setmetatableindex(cache,function(t,k)
725    local separator = S(k) -- was P
726    local value     = (1-separator)^0
727    local pattern   = spaces * C(value) * separator^0 * Cp()
728    t[k] = pattern
729    return pattern
730end)
731
732local commalistiterator = cache[","]
733
734function parsers.iterator(str,separator)
735    local n = #str
736    if n == 0 then
737        return dummy
738    else
739        local pattern = separator and cache[separator] or commalistiterator
740        local p = 1
741        return function()
742            if p <= n then
743                local s, e = lpegmatch(pattern,str,p)
744                if e then
745                    p = e
746                    return s
747                end
748            end
749        end
750    end
751end
752
753-- for s in parsers.iterator("a b c,b,c") do
754--     print(s)
755-- end
756
757local function initialize(t,name)
758    local source = t[name]
759    if source then
760        local result = { }
761        for k, v in next, t[name] do
762            result[k] = v
763        end
764        return result
765    else
766        return { }
767    end
768end
769
770local function fetch(t,name)
771    return t[name] or { }
772end
773
774local function process(result,more)
775    for k, v in next, more do
776        result[k] = v
777    end
778    return result
779end
780
781----- name   = C((1-S(", "))^1)
782----- parser = (Carg(1) * name / initialize) * (S(", ")^1 * (Carg(1) * name / fetch))^0
783----- merge  = Cf(parser,process)
784local name   = Carg(1) * C((1-S(", "))^1)
785local parser = S(", ")^1 * (name / fetch)
786local merge  = (name / initialize) * (parser % process)^0
787
788function parsers.mergehashes(hash,list)
789    return lpegmatch(merge,list,1,hash)
790end
791
792-- local t = {
793--     aa = { alpha = 1, beta = 2, gamma = 3, },
794--     bb = { alpha = 4, beta = 5, delta = 6, },
795--     cc = { epsilon = 3 },
796-- }
797-- inspect(parsers.mergehashes(t,"aa, bb, cc"))
798
799function parsers.runtime(time)
800    if not time then
801        time = os.runtime()
802    end
803    local days = div(time,24*60*60)
804    time = mod(time,24*60*60)
805    local hours = div(time,60*60)
806    time = mod(time,60*60)
807    local minutes = div(time,60)
808    local seconds = mod(time,60)
809    return days, hours, minutes, seconds
810end
811
812--
813
814local spacing = whitespace^0
815local apply   = P("->")
816local method  = C((1-apply)^1)
817local token   = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
818
819local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
820
821function parsers.splitmethod(str,default)
822    if str then
823        return lpegmatch(pattern,str,1,default or false)
824    else
825        return default or false, ""
826    end
827end
828
829-- print(parsers.splitmethod(" foo -> {bar} "))
830-- print(parsers.splitmethod("foo->{bar}"))
831-- print(parsers.splitmethod("foo->bar"))
832-- print(parsers.splitmethod("foo"))
833-- print(parsers.splitmethod("{foo}"))
834-- print(parsers.splitmethod())
835
836local p_year = lpegpatterns.digit^4 / tonumber
837
838-- local pattern = Cf( Ct("") *
839--     (
840--         (             Cg(Cc("year")  * p_year  )
841--           * S("-/") * Cg(Cc("month") * cardinal)
842--           * S("-/") * Cg(Cc("day")   * cardinal)
843--         ) +
844--         (             Cg(Cc("day")   * cardinal)
845--           * S("-/") * Cg(Cc("month") * cardinal)
846--           * S("-/") * Cg(Cc("year")  * p_year  )
847--         ) +
848--         (             Cg(Cc("year")  * p_year  )
849--           * S("-/") * Cg(Cc("month") * cardinal)
850--         ) +
851--         (             Cg(Cc("month") * cardinal)
852--           * S("-/") * Cg(Cc("year")  * p_year  )
853--         )
854--     )
855--       *  (
856--          P(" ") * Cg(Cc("hour") * cardinal)
857--       *  P(":") * Cg(Cc("min")  * cardinal)
858--       * (P(":") * Cg(Cc("sec")  * cardinal))^-1
859--       + P(-1) )
860-- , rawset)
861
862local pattern = Ct("") * (
863    (
864        (             (Cc("year")  * p_year  ) % rawset
865          * S("-/") * (Cc("month") * cardinal) % rawset
866          * S("-/") * (Cc("day")   * cardinal) % rawset
867        ) +
868        (             (Cc("day")   * cardinal) % rawset
869          * S("-/") * (Cc("month") * cardinal) % rawset
870          * S("-/") * (Cc("year")  * p_year  ) % rawset
871        ) +
872        (             (Cc("year")  * p_year  ) % rawset
873          * S("-/") * (Cc("month") * cardinal) % rawset
874        ) +
875        (             (Cc("month") * cardinal) % rawset
876          * S("-/") * (Cc("year")  * p_year  ) % rawset
877        )
878    )
879      *  (
880         P(" ") * (Cc("hour") * cardinal) % rawset
881      *  P(":") * (Cc("min")  * cardinal) % rawset
882      * (P(":") * (Cc("sec")  * cardinal) % rawset)^-1
883      + P(-1) )
884)
885
886lpegpatterns.splittime = pattern
887
888function parsers.totime(str)
889    return lpegmatch(pattern,str)
890end
891
892-- inspect(parsers.totime("2019-03-05"))
893-- inspect(parsers.totime("2019-03-05 12:12:12"))
894-- print(os.time(parsers.totime("2019-03-05 12:12:12")))
895-- print(os.time(parsers.totime("2019/03/05 12:12:12")))
896-- print(os.time(parsers.totime("05-03-2019 12:12:12")))
897-- print(os.time(parsers.totime("05/03/2019 12:12:12")))
898