util-prs.lmt /size: 27 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['util-prs'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cp
11----- Cg, Cf = lpeg.Cg, lpeg.Cf
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local concat, gmatch, find = table.concat, string.gmatch, string.find
14local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
15local mod, div = math.mod, math.div
16
17utilities         = utilities or {}
18local parsers     = utilities.parsers or { }
19utilities.parsers = parsers
20local patterns    = parsers.patterns or { }
21parsers.patterns  = patterns
22
23local setmetatableindex = table.setmetatableindex
24local sortedhash        = table.sortedhash
25local sortedkeys        = table.sortedkeys
26local tohash            = table.tohash
27
28local hashes            = { }
29parsers.hashes          = hashes
30-- we share some patterns
31
32local digit       = R("09")
33local space       = P(' ')
34local equal       = P("=")
35local colon       = P(":")
36local comma       = P(",")
37local lbrace      = P("{")
38local rbrace      = P("}")
39local lparent     = P("(")
40local rparent     = P(")")
41local lbracket    = P("[")
42local rbracket    = P("]")
43local period      = S(".")
44local punctuation = S(".,:;")
45local spacer      = lpegpatterns.spacer
46local whitespace  = lpegpatterns.whitespace
47local newline     = lpegpatterns.newline
48local anything    = lpegpatterns.anything
49local endofstring = lpegpatterns.endofstring
50
51local nobrace     = 1 - (lbrace   + rbrace )
52local noparent    = 1 - (lparent  + rparent)
53local nobracket   = 1 - (lbracket + rbracket)
54
55-- we could use a Cf Cg construct or from now a %
56
57local escape, left, right = P("\\"), P('{'), P('}')
58
59-- lpegpatterns.balanced = P {
60--     [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
61--     [2] = left * V(1) * right
62-- }
63
64lpegpatterns.balanced = P {
65    ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
66    left * V(1) * right
67}
68
69local nestedbraces   = P { lbrace   * (nobrace   + V(1))^0 * rbrace }
70local nestedparents  = P { lparent  * (noparent  + V(1))^0 * rparent }
71local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
72local spaces         = space^0
73local argument       = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
74local content        = (1-endofstring)^0
75
76lpegpatterns.nestedbraces   = nestedbraces   -- no capture
77lpegpatterns.nestedparents  = nestedparents  -- no capture
78lpegpatterns.nestedbrackets = nestedbrackets -- no capture
79lpegpatterns.nested         = nestedbraces   -- no capture
80lpegpatterns.argument       = argument       -- argument after e.g. =
81lpegpatterns.content        = content        -- rest after e.g =
82
83local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
84                + C((nestedbraces + (1-comma))^0)
85
86local key       = C((1-equal-comma)^1)
87local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
88local pattern_c = (space+comma)^0 * (key * equal * value)
89local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
90
91local key       = C((1-space-equal-comma)^1)
92local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
93
94-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored
95
96local hash = { }
97
98local function set(key,value)
99    hash[key] = value
100end
101
102local pattern_a_s = (pattern_a/set)^1
103local pattern_b_s = (pattern_b/set)^1
104local pattern_c_s = (pattern_c/set)^1
105local pattern_d_s = (pattern_d/set)^1
106
107patterns.settings_to_hash_a = pattern_a_s
108patterns.settings_to_hash_b = pattern_b_s
109patterns.settings_to_hash_c = pattern_c_s
110patterns.settings_to_hash_d = pattern_d_s
111
112function parsers.make_settings_to_hash_pattern(set,how)
113    if how == "strict" then
114        return (pattern_c/set)^1
115    elseif how == "tolerant" then
116        return (pattern_b/set)^1
117    else
118        return (pattern_a/set)^1
119    end
120end
121
122function parsers.settings_to_hash(str,existing)
123    if not str or str == "" then
124        return { }
125    elseif type(str) == "table" then
126        if existing then
127            for k, v in next, str do
128                existing[k] = v
129            end
130            return exiting
131        else
132            return str
133        end
134    else
135        hash = existing or { }
136        lpegmatch(pattern_a_s,str)
137        return hash
138    end
139end
140
141function parsers.settings_to_hash_colon_too(str)
142    if not str or str == "" then
143        return { }
144    elseif type(str) == "table" then
145        return str
146    else
147        hash = { }
148        lpegmatch(pattern_d_s,str)
149        return hash
150    end
151end
152
153function parsers.settings_to_hash_tolerant(str,existing)
154    if not str or str == "" then
155        return { }
156    elseif type(str) == "table" then
157        if existing then
158            for k, v in next, str do
159                existing[k] = v
160            end
161            return exiting
162        else
163            return str
164        end
165    else
166        hash = existing or { }
167        lpegmatch(pattern_b_s,str)
168        return hash
169    end
170end
171
172function parsers.settings_to_hash_strict(str,existing)
173    if not str or str == "" then
174        return nil
175    elseif type(str) == "table" then
176        if existing then
177            for k, v in next, str do
178                existing[k] = v
179            end
180            return exiting
181        else
182            return str
183        end
184    elseif str and str ~= "" then
185        hash = existing or { }
186        lpegmatch(pattern_c_s,str)
187        return next(hash) and hash
188    end
189end
190
191local separator = comma * space^0
192local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
193                + C((nestedbraces + (1-comma))^0)
194local pattern   = spaces * Ct(value*(separator*value)^0)
195
196-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored
197
198patterns.settings_to_array = pattern
199
200-- we could use a weak table as cache
201
202function parsers.settings_to_array(str,strict)
203    if not str or str == "" then
204        return { }
205    elseif type(str) == "table" then
206        return str
207    elseif strict then
208        if find(str,"{",1,true) then
209            return lpegmatch(pattern,str)
210        else
211            return { str }
212        end
213    elseif find(str,",",1,true) then
214        return lpegmatch(pattern,str)
215    else
216        return { str }
217    end
218end
219
220function parsers.settings_to_numbers(str)
221    if not str or str == "" then
222        return { }
223    end
224    if type(str) == "table" then
225        -- fall through
226    elseif find(str,",",1,true) then
227        str = lpegmatch(pattern,str)
228    else
229        return { tonumber(str) }
230    end
231    for i=1,#str do
232        str[i] = tonumber(str[i])
233    end
234    return str
235end
236
237local value     = lbrace * C((nobrace + nestedbraces)^0) * rbrace
238                + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
239local pattern   = spaces * Ct(value*(separator*value)^0)
240
241function parsers.settings_to_array_obey_fences(str)
242    return lpegmatch(pattern,str)
243end
244
245-- inspect(parsers.settings_to_array_obey_fences("url(http://a,b.c)"))
246
247-- this one also strips end spaces before separators
248--
249-- "{123} , 456  " -> "123" "456"
250
251-- local separator = space^0 * comma * space^0
252-- local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
253--                 + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0)
254-- local withvalue = Carg(1) * value / function(f,s) return f(s) end
255-- local pattern_a = spaces * Ct(value*(separator*value)^0)
256-- local pattern_b = spaces * withvalue * (separator*withvalue)^0
257
258local cache_a = { }
259local cache_b = { }
260
261function parsers.groupedsplitat(symbol,withaction)
262    if not symbol then
263        symbol = ","
264    end
265    local pattern = (withaction and cache_b or cache_a)[symbol]
266    if not pattern then
267        local symbols   = S(symbol)
268        local separator = space^0 * symbols * space^0
269        local value     =
270                        lbrace
271                        * C((nobrace + nestedbraces)^0)
272                     -- *  rbrace
273                     -- * (rbrace           * (#symbols + P(-1))) -- new per 2023-03-11
274                        * (rbrace * space^0 * (#symbols + P(-1))) -- new per 2025-01-29
275                        +
276                        C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
277        if withaction then
278            local withvalue = Carg(1) * value / function(f,s) return f(s) end
279            pattern = spaces * withvalue * (separator*withvalue)^0
280            cache_b[symbol] = pattern
281        else
282            pattern = spaces * Ct(value*(separator*value)^0)
283            cache_a[symbol] = pattern
284        end
285    end
286    return pattern
287end
288
289local pattern_a = parsers.groupedsplitat(",",false)
290local pattern_b = parsers.groupedsplitat(",",true)
291
292function parsers.stripped_settings_to_array(str)
293    if not str or str == "" then
294        return { }
295    else
296        return lpegmatch(pattern_a,str)
297    end
298end
299
300function parsers.process_stripped_settings(str,action)
301    if not str or str == "" then
302        return { }
303    else
304        return lpegmatch(pattern_b,str,1,action)
305    end
306end
307
308-- parsers.process_stripped_settings("{123} , 456  ",function(s) print("["..s.."]") end)
309-- parsers.process_stripped_settings("123 , 456  ",function(s) print("["..s.."]") end)
310
311local function set(t,v)
312    t[#t+1] = v
313end
314
315local value   = P(Carg(1)*value) / set
316local pattern = value*(separator*value)^0 * Carg(1)
317
318function parsers.add_settings_to_array(t,str)
319    return lpegmatch(pattern,str,nil,t)
320end
321
322function parsers.hash_to_string(h,separator,yes,no,strict,omit)
323    if h then
324        local t  = { }
325        local tn = 0
326        local s  = sortedkeys(h)
327        omit = omit and tohash(omit)
328        for i=1,#s do
329            local key = s[i]
330            if not omit or not omit[key] then
331                local value = h[key]
332                if type(value) == "boolean" then
333                    if yes and no then
334                        if value then
335                            tn = tn + 1
336                            t[tn] = key .. '=' .. yes
337                        elseif not strict then
338                            tn = tn + 1
339                            t[tn] = key .. '=' .. no
340                        end
341                    elseif value or not strict then
342                        tn = tn + 1
343                        t[tn] = key .. '=' .. tostring(value)
344                    end
345                else
346                    tn = tn + 1
347                    t[tn] = key .. '=' .. value
348                end
349            end
350        end
351        return concat(t,separator or ",")
352    else
353        return ""
354    end
355end
356
357function parsers.array_to_string(a,separator)
358    if a then
359        return concat(a,separator or ",")
360    else
361        return ""
362    end
363end
364
365-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway
366--     if str then
367--         t = t or { }
368--         for s in gmatch(str,"[^, ]+") do -- space added
369--             t[s] = true
370--         end
371--         return t
372--     else
373--         return { }
374--     end
375-- end
376
377----- pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true) )^1,rawset)
378local pattern = Ct("") * (C((1-S(", "))^1) * S(", ")^0 * Cc(true) % rawset)^1
379
380function parsers.settings_to_set(str)
381    return str and lpegmatch(pattern,str) or { }
382end
383
384hashes.settings_to_set =  table.setmetatableindex(function(t,k) -- experiment, not public
385    local v = k and lpegmatch(pattern,k) or { }
386    t[k] = v
387    return v
388end)
389
390function parsers.settings_to_set(str)
391    return str and lpegmatch(pattern,str) or { }
392end
393
394local pattern = Ct((C((1-S(", "))^1) * S(", ")^0)^1)
395
396hashes.settings_to_list =  table.setmetatableindex(function(t,k) -- experiment, not public
397    local v = k and lpegmatch(pattern,k) or { }
398    t[k] = v
399    return v
400end)
401
402-- inspect(hashes.settings_to_set["a,b, c, d"])
403-- inspect(hashes.settings_to_list["a,b, c, d"])
404
405-- as we use a next, we are not sure when the gc kicks in
406
407getmetatable(hashes.settings_to_set ).__mode = "kv" -- could be an option (maybe sharing makes sense)
408getmetatable(hashes.settings_to_list).__mode = "kv" -- could be an option (maybe sharing makes sense)
409
410function parsers.simple_hash_to_string(h, separator)
411    local t  = { }
412    local tn = 0
413    for k, v in sortedhash(h) do
414        if v then
415            tn = tn + 1
416            t[tn] = k
417        end
418    end
419    return concat(t,separator or ",")
420end
421
422local str      = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
423----- setting  = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)
424local setting  = Carg(1) * (whitespace^0 * (str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))) % rawset)^1
425local splitter = setting^1
426
427function parsers.options_to_hash(str,target)
428    return str and lpegmatch(splitter,str,1,target or { }) or { }
429end
430
431-- inspect(parsers.options_to_hash([[aaaa bbbb cccc=dddd eeee=ffff]])) -- mtx-context
432
433local splitter = lpeg.tsplitat(" ")
434
435function parsers.options_to_array(str)
436    return str and lpegmatch(splitter,str) or { }
437end
438
439-- for chem (currently one level)
440
441local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
442                + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
443                + C((nestedbraces + (1-comma))^1)
444                + Cc("") -- new
445local pattern_a = spaces * Ct(value*(separator*value)^0)
446
447local function repeater(n,str)
448    if not n then
449        return str
450    else
451        local s = lpegmatch(pattern_a,str)
452        if n == 1 then
453            return unpack(s)
454        else
455            local t  = { }
456            local tn = 0
457            for i=1,n do
458                for j=1,#s do
459                    tn = tn + 1
460                    t[tn] = s[j]
461                end
462            end
463            return unpack(t)
464        end
465    end
466end
467
468local value     = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
469                + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
470                + C((nestedbraces + (1-comma))^1)
471                + Cc("") -- new
472local pattern_b = spaces * Ct(value*(separator*value)^0)
473
474function parsers.settings_to_array_with_repeat(str,expand) -- beware: "" =>  { }
475    if expand then
476        return lpegmatch(pattern_b,str) or { }
477    else
478        return lpegmatch(pattern_a,str) or { }
479    end
480end
481
482--
483
484local value   = lbrace * C((nobrace + nestedbraces)^0) * rbrace
485local pattern = Ct((space + value)^0)
486
487function parsers.arguments_to_table(str)
488    return lpegmatch(pattern,str)
489end
490
491-- temporary here (unoptimized)
492
493function parsers.getparameters(self,class,parentclass,settings)
494    local sc = self[class]
495    if not sc then
496        sc = { }
497        self[class] = sc
498        if parentclass then
499            local sp = self[parentclass]
500            if not sp then
501                sp = { }
502                self[parentclass] = sp
503            end
504            setmetatableindex(sc,sp)
505        end
506    end
507    parsers.settings_to_hash(settings,sc)
508end
509
510function parsers.listitem(str)
511    return gmatch(str,"[^, ]+")
512end
513
514--
515
516local pattern = Cs { "start",
517    start    = V("one") + V("two") + V("three"),
518    rest     = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
519    thousand = digit * digit * digit,
520    one      = digit * V("rest"),
521    two      = digit * digit * V("rest"),
522    three    = V("thousand") * V("rest"),
523}
524
525lpegpatterns.splitthousands = pattern -- maybe better in the parsers namespace ?
526
527function parsers.splitthousands(str)
528    return lpegmatch(pattern,str) or str
529end
530
531-- print(parsers.splitthousands("11111111111.11"))
532
533local optionalwhitespace = whitespace^0
534
535lpegpatterns.words      = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
536lpegpatterns.sentences  = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
537lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
538
539-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n "
540-- inspect(lpegmatch(lpegpatterns.paragraphs,str))
541-- inspect(lpegmatch(lpegpatterns.sentences,str))
542-- inspect(lpegmatch(lpegpatterns.words,str))
543
544-- handy for k="v" [, ] k="v"
545
546local dquote    = P('"')
547local equal     = P('=')
548local escape    = P('\\')
549local separator = S(' ,')
550local utfbom    = lpegpatterns.utfbom^0  -- we just intercept
551
552local key       = C((1-equal)^1)
553local value     = dquote * C((1-dquote-escape*dquote)^0) * dquote
554
555----- pattern   = Cf(Ct("") * Cg(key * equal * value) * separator^0,rawset)^0 * P(-1) -- was wrong
556----- pattern   = Cf(Ct("") * (Cg(key * equal * value) * separator^0)^1,rawset)^0 * P(-1)
557local pattern   = Ct("") * (((key * equal * value) * separator^0) % rawset)^0 * P(-1)
558
559function parsers.keq_to_hash(str)
560    if str and str ~= "" then
561        return lpegmatch(pattern,str)
562    else
563        return { }
564    end
565end
566
567-- inspect(lpeg.match(pattern,[[key="value" foo="bar"]]))
568
569
570local defaultspecification = { separator = ",", quote = '"' }
571
572-- this version accepts multiple separators and quotes as used in the
573-- database module
574
575function parsers.csvsplitter(specification)
576    specification   = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
577    local separator = specification.separator
578    local quotechar = specification.quote
579    local numbers   = specification.numbers
580    local separator = S(separator ~= "" and separator or ",")
581    local whatever  = C((1 - separator - newline)^0)
582    if quotechar and quotechar ~= "" then
583        local quotedata = nil
584        for chr in gmatch(quotechar,".") do
585            local quotechar = P(chr)
586            local quoteitem = (1 - quotechar)^0
587            local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
588            if quotedata then
589                quotedata = quotedata + quoteword
590            else
591                quotedata = quoteword
592            end
593        end
594        whatever = quotedata + whatever
595    end
596    local parser = utfbom * Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
597    return function(data)
598        return lpegmatch(parser,data)
599    end
600end
601
602-- local crap = [[
603-- first,second,third,fourth
604-- "1","2","3","4"
605-- "5","6","7","8"
606-- ]]
607
608-- local mycsvsplitter = parsers.csvsplitter { numbers = true }
609
610-- local list = mycsvsplitter(crap) inspect(list)
611
612-- and this is a slightly patched version of a version posted by Philipp Gesang
613
614function parsers.rfc4180splitter(specification)
615    specification     = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
616    local numbers     = specification.numbers
617    local zero        = specification.zero
618    local separator   = specification.separator --> rfc: COMMA
619    local quotechar   = P(specification.quote)  -->      DQUOTE
620    local dquotechar  = quotechar * quotechar   -->      2DQUOTE
621                      / specification.quote
622    local separator   = S(separator ~= "" and separator or ",")
623    local whatever    = (dquotechar + (1 - quotechar))^0
624    local escaped     = quotechar
625                      * (numbers and (zero and (whatever/function(n) return tonumber(n) or 0 end) or whatever/tonumber) or Cs(whatever))
626                      * quotechar
627 -- local non_escaped = C((1 - quotechar - newline - separator)^1)
628    local whotever    = (1 - quotechar - newline - separator)^1
629    local non_escaped = (numbers and (zero and (whotever/function(n) return tonumber(n) or 0 end) or whotever/tonumber) or Cs(whotever))
630    local field       = escaped + non_escaped + (numbers and zero and Cc(0) or Cc(""))
631    local record      = Ct(field * (separator * field)^1)
632    local headerline  = utfbom * record * Cp()
633    local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
634    local headeryes   = utfbom * Ct(morerecords)
635    local headernop   = utfbom * Ct(record * morerecords)
636    return function(data,getheader)
637        if getheader then
638            local header, position = lpegmatch(headerline,data)
639            local data = lpegmatch(headeryes,data,position)
640            return data, header
641        else
642            return lpegmatch(headernop,data)
643        end
644    end
645end
646
647-- local mycsvsplitter = parsers.rfc4180splitter { numbers = true }
648--
649-- local crap = [[
650-- first,second,third,fourth
651-- "1","2","3","4"
652-- "a","b","c","d"
653-- "foo","bar""baz","boogie","xyzzy"
654-- ]]
655--
656-- local list, names = mycsvsplitter(crap,true)   inspect(list) inspect(names)
657-- local list, names = mycsvsplitter(crap)        inspect(list) inspect(names)
658
659local function ranger(first,last,n,action)
660    if not first then
661        -- forget about it
662    elseif last == true then
663        for i=first,n or first do
664            action(i)
665        end
666    elseif last then
667        for i=first,last do
668            action(i)
669        end
670    else
671        action(first) -- can be a string too
672    end
673end
674
675local cardinal    = (lpegpatterns.hexadecimal + lpegpatterns.cardinal) / tonumber
676local spacers     = lpegpatterns.spacer^0
677local endofstring = lpegpatterns.endofstring
678
679----- stepper  = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
680-----          * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
681
682local stepper  =
683    spacers
684  * (
685        (
686            cardinal
687          * (
688                spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) )
689              + Cc(false)
690            )
691          + Cs((1-S(", "))^1) * Cc(false)
692        )
693      * Carg(1) * Carg(2) / ranger
694      * S(", ")^0
695    )^1
696  * endofstring -- we're sort of strict (could do without endofstring)
697
698function parsers.stepper(str,n,action)
699    local ts = type(str)
700    if type(n) == "function" then
701        if ts == "number" then
702            n(str)
703        elseif ts == "table" then
704            for i=1,#str do
705                n(str[i])
706            end
707        else
708            lpegmatch(stepper,str,1,false,n or print)
709        end
710    elseif ts == "string" then
711        lpegmatch(stepper,str,1,n,action or print)
712    end
713end
714
715-- parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
716-- parsers.stepper("1-3,7,8,9,one")
717-- parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
718-- parsers.stepper(" 1 : 3, ,7 ")
719-- parsers.stepper("1:4,9:13,24:*",30)
720-- parsers.stepper(1,print)
721-- parsers.stepper({1,3,4},print)
722
723local pattern_math = Cs((P("%")/"\\percent " +  P("^")           * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
724local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
725
726patterns.unittotex = pattern
727
728function parsers.unittotex(str,textmode)
729    return lpegmatch(textmode and pattern_text or pattern_math,str)
730end
731
732local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
733
734function parsers.unittoxml(str)
735    return lpegmatch(pattern,str)
736end
737
738-- print(parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %"))
739
740local cache   = { }
741local spaces  = lpegpatterns.space^0
742local dummy   = function() end
743
744setmetatableindex(cache,function(t,k)
745    local separator = S(k) -- was P
746    local value     = (1-separator)^0
747    local pattern   = spaces * C(value) * separator^0 * Cp()
748    t[k] = pattern
749    return pattern
750end)
751
752local commalistiterator = cache[","]
753
754function parsers.iterator(str,separator)
755    local n = #str
756    if n == 0 then
757        return dummy
758    else
759        local pattern = separator and cache[separator] or commalistiterator
760        local p = 1
761        return function()
762            if p <= n then
763                local s, e = lpegmatch(pattern,str,p)
764                if e then
765                    p = e
766                    return s
767                end
768            end
769        end
770    end
771end
772
773-- for s in parsers.iterator("a b c,b,c") do
774--     print(s)
775-- end
776
777local function initialize(t,name)
778    local source = t[name]
779    if source then
780        local result = { }
781        for k, v in next, t[name] do
782            result[k] = v
783        end
784        return result
785    else
786        return { }
787    end
788end
789
790local function fetch(t,name)
791    return t[name] or { }
792end
793
794local function process(result,more)
795    for k, v in next, more do
796        result[k] = v
797    end
798    return result
799end
800
801----- name   = C((1-S(", "))^1)
802----- parser = (Carg(1) * name / initialize) * (S(", ")^1 * (Carg(1) * name / fetch))^0
803----- merge  = Cf(parser,process)
804local name   = Carg(1) * C((1-S(", "))^1)
805local parser = S(", ")^1 * (name / fetch)
806local merge  = (name / initialize) * (parser % process)^0
807
808function parsers.mergehashes(hash,list)
809    return lpegmatch(merge,list,1,hash)
810end
811
812-- local t = {
813--     aa = { alpha = 1, beta = 2, gamma = 3, },
814--     bb = { alpha = 4, beta = 5, delta = 6, },
815--     cc = { epsilon = 3 },
816-- }
817-- inspect(parsers.mergehashes(t,"aa, bb, cc"))
818
819function parsers.runtime(time)
820    if not time then
821        time = os.runtime()
822    end
823    local days = div(time,24*60*60)
824    time = mod(time,24*60*60)
825    local hours = div(time,60*60)
826    time = mod(time,60*60)
827    local minutes = div(time,60)
828    local seconds = mod(time,60)
829    return days, hours, minutes, seconds
830end
831
832--
833
834local spacing = whitespace^0
835local apply   = P("->")
836local method  = C((1-apply)^1)
837local token   = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
838
839local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
840
841function parsers.splitmethod(str,default)
842    if str then
843        return lpegmatch(pattern,str,1,default or false)
844    else
845        return default or false, ""
846    end
847end
848
849-- print(parsers.splitmethod(" foo -> {bar} "))
850-- print(parsers.splitmethod("foo->{bar}"))
851-- print(parsers.splitmethod("foo->bar"))
852-- print(parsers.splitmethod("foo"))
853-- print(parsers.splitmethod("{foo}"))
854-- print(parsers.splitmethod())
855
856local p_year = lpegpatterns.digit^4 / tonumber
857
858-- local pattern = Cf( Ct("") *
859--     (
860--         (             Cg(Cc("year")  * p_year  )
861--           * S("-/") * Cg(Cc("month") * cardinal)
862--           * S("-/") * Cg(Cc("day")   * cardinal)
863--         ) +
864--         (             Cg(Cc("day")   * cardinal)
865--           * S("-/") * Cg(Cc("month") * cardinal)
866--           * S("-/") * Cg(Cc("year")  * p_year  )
867--         ) +
868--         (             Cg(Cc("year")  * p_year  )
869--           * S("-/") * Cg(Cc("month") * cardinal)
870--         ) +
871--         (             Cg(Cc("month") * cardinal)
872--           * S("-/") * Cg(Cc("year")  * p_year  )
873--         )
874--     )
875--       *  (
876--          P(" ") * Cg(Cc("hour") * cardinal)
877--       *  P(":") * Cg(Cc("min")  * cardinal)
878--       * (P(":") * Cg(Cc("sec")  * cardinal))^-1
879--       + P(-1) )
880-- , rawset)
881
882local pattern = Ct("") * (
883    (
884        (             (Cc("year")  * p_year  ) % rawset
885          * S("-/") * (Cc("month") * cardinal) % rawset
886          * S("-/") * (Cc("day")   * cardinal) % rawset
887        ) +
888        (             (Cc("day")   * cardinal) % rawset
889          * S("-/") * (Cc("month") * cardinal) % rawset
890          * S("-/") * (Cc("year")  * p_year  ) % rawset
891        ) +
892        (             (Cc("year")  * p_year  ) % rawset
893          * S("-/") * (Cc("month") * cardinal) % rawset
894        ) +
895        (             (Cc("month") * cardinal) % rawset
896          * S("-/") * (Cc("year")  * p_year  ) % rawset
897        ) +
898        (             (Cc("year")  * (C(4)/tonumber)) % rawset
899          *           (Cc("month") * (C(2)/tonumber)) % rawset
900          *           (Cc("day")   * (C(2)/tonumber)) % rawset
901        )
902    )
903      *  (
904         P(" ") * (Cc("hour") * cardinal) % rawset
905      *  P(":") * (Cc("min")  * cardinal) % rawset
906      * (P(":") * (Cc("sec")  * cardinal) % rawset)^-1
907      + P(-1) )
908)
909
910lpegpatterns.splittime = pattern
911
912function parsers.totime(str)
913    return lpegmatch(pattern,str)
914end
915
916-- inspect(parsers.totime("20190305"))
917-- inspect(parsers.totime("2019-03-05"))
918-- inspect(parsers.totime("05-03-2019"))
919-- inspect(parsers.totime("2019-03-05 12:12:12"))
920-- print(os.time(parsers.totime("2019-03-05 12:12:12")))
921-- print(os.time(parsers.totime("2019/03/05 12:12:12")))
922-- print(os.time(parsers.totime("05-03-2019 12:12:12")))
923-- print(os.time(parsers.totime("05/03/2019 12:12:12")))
924