1if not modules then modules = { } end modules ['util-prs'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
11local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
12local concat, gmatch, find = table.concat, string.gmatch, string.find
13local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
14local mod, div = math.mod, math.div
15
16utilities = utilities or {}
17local parsers = utilities.parsers or { }
18utilities.parsers = parsers
19local patterns = parsers.patterns or { }
20parsers.patterns = patterns
21
22local setmetatableindex = table.setmetatableindex
23local sortedhash = table.sortedhash
24local sortedkeys = table.sortedkeys
25local tohash = table.tohash
26
27local hashes = { }
28parsers.hashes = hashes
29
30
31local digit = R("09")
32local space = P(' ')
33local equal = P("=")
34local colon = P(":")
35local comma = P(",")
36local lbrace = P("{")
37local rbrace = P("}")
38local lparent = P("(")
39local rparent = P(")")
40local lbracket = P("[")
41local rbracket = P("]")
42local period = S(".")
43local punctuation = S(".,:;")
44local spacer = lpegpatterns.spacer
45local whitespace = lpegpatterns.whitespace
46local newline = lpegpatterns.newline
47local anything = lpegpatterns.anything
48local endofstring = lpegpatterns.endofstring
49
50local nobrace = 1 - (lbrace + rbrace )
51local noparent = 1 - (lparent + rparent)
52local nobracket = 1 - (lbracket + rbracket)
53
54
55
56local escape, left, right = P("\\"), P('{'), P('}')
57
58
59
60
61
62lpegpatterns.balanced = P {
63 ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
64 left * V(1) * right
65}
66
67local nestedbraces = P { lbrace * (nobrace + V(1))^0 * rbrace }
68local nestedparents = P { lparent * (noparent + V(1))^0 * rparent }
69local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
70local spaces = space^0
71local argument = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
72local content = (1-endofstring)^0
73
74lpegpatterns.nestedbraces = nestedbraces
75lpegpatterns.nestedparents = nestedparents
76lpegpatterns.nested = nestedbraces
77lpegpatterns.argument = argument
78lpegpatterns.content = content
79
80local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
81 + C((nestedbraces + (1-comma))^0)
82
83local key = C((1-equal-comma)^1)
84local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
85local pattern_c = (space+comma)^0 * (key * equal * value)
86local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
87
88local key = C((1-space-equal-comma)^1)
89local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
90
91
92
93local hash = { }
94
95local function set(key,value)
96 hash[key] = value
97end
98
99local pattern_a_s = (pattern_a/set)^1
100local pattern_b_s = (pattern_b/set)^1
101local pattern_c_s = (pattern_c/set)^1
102local pattern_d_s = (pattern_d/set)^1
103
104patterns.settings_to_hash_a = pattern_a_s
105patterns.settings_to_hash_b = pattern_b_s
106patterns.settings_to_hash_c = pattern_c_s
107patterns.settings_to_hash_d = pattern_d_s
108
109function parsers.make_settings_to_hash_pattern(set,how)
110 if how == "strict" then
111 return (pattern_c/set)^1
112 elseif how == "tolerant" then
113 return (pattern_b/set)^1
114 else
115 return (pattern_a/set)^1
116 end
117end
118
119function parsers.settings_to_hash(str,existing)
120 if not str or str == "" then
121 return { }
122 elseif type(str) == "table" then
123 if existing then
124 for k, v in next, str do
125 existing[k] = v
126 end
127 return exiting
128 else
129 return str
130 end
131 else
132 hash = existing or { }
133 lpegmatch(pattern_a_s,str)
134 return hash
135 end
136end
137
138function parsers.settings_to_hash_colon_too(str)
139 if not str or str == "" then
140 return { }
141 elseif type(str) == "table" then
142 return str
143 else
144 hash = { }
145 lpegmatch(pattern_d_s,str)
146 return hash
147 end
148end
149
150function parsers.settings_to_hash_tolerant(str,existing)
151 if not str or str == "" then
152 return { }
153 elseif type(str) == "table" then
154 if existing then
155 for k, v in next, str do
156 existing[k] = v
157 end
158 return exiting
159 else
160 return str
161 end
162 else
163 hash = existing or { }
164 lpegmatch(pattern_b_s,str)
165 return hash
166 end
167end
168
169function parsers.settings_to_hash_strict(str,existing)
170 if not str or str == "" then
171 return nil
172 elseif type(str) == "table" then
173 if existing then
174 for k, v in next, str do
175 existing[k] = v
176 end
177 return exiting
178 else
179 return str
180 end
181 elseif str and str ~= "" then
182 hash = existing or { }
183 lpegmatch(pattern_c_s,str)
184 return next(hash) and hash
185 end
186end
187
188local separator = comma * space^0
189local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
190 + C((nestedbraces + (1-comma))^0)
191local pattern = spaces * Ct(value*(separator*value)^0)
192
193
194
195patterns.settings_to_array = pattern
196
197
198
199function parsers.settings_to_array(str,strict)
200 if not str or str == "" then
201 return { }
202 elseif type(str) == "table" then
203 return str
204 elseif strict then
205 if find(str,"{",1,true) then
206 return lpegmatch(pattern,str)
207 else
208 return { str }
209 end
210 elseif find(str,",",1,true) then
211 return lpegmatch(pattern,str)
212 else
213 return { str }
214 end
215end
216
217function parsers.settings_to_numbers(str)
218 if not str or str == "" then
219 return { }
220 end
221 if type(str) == "table" then
222
223 elseif find(str,",",1,true) then
224 str = lpegmatch(pattern,str)
225 else
226 return { tonumber(str) }
227 end
228 for i=1,#str do
229 str[i] = tonumber(str[i])
230 end
231 return str
232end
233
234local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
235 + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
236local pattern = spaces * Ct(value*(separator*value)^0)
237
238function parsers.settings_to_array_obey_fences(str)
239 return lpegmatch(pattern,str)
240end
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255local cache_a = { }
256local cache_b = { }
257
258function parsers.groupedsplitat(symbol,withaction)
259 if not symbol then
260 symbol = ","
261 end
262 local pattern = (withaction and cache_b or cache_a)[symbol]
263 if not pattern then
264 local symbols = S(symbol)
265 local separator = space^0 * symbols * space^0
266 local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
267 + C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
268 if withaction then
269 local withvalue = Carg(1) * value / function(f,s) return f(s) end
270 pattern = spaces * withvalue * (separator*withvalue)^0
271 cache_b[symbol] = pattern
272 else
273 pattern = spaces * Ct(value*(separator*value)^0)
274 cache_a[symbol] = pattern
275 end
276 end
277 return pattern
278end
279
280local pattern_a = parsers.groupedsplitat(",",false)
281local pattern_b = parsers.groupedsplitat(",",true)
282
283function parsers.stripped_settings_to_array(str)
284 if not str or str == "" then
285 return { }
286 else
287 return lpegmatch(pattern_a,str)
288 end
289end
290
291function parsers.process_stripped_settings(str,action)
292 if not str or str == "" then
293 return { }
294 else
295 return lpegmatch(pattern_b,str,1,action)
296 end
297end
298
299
300
301
302local function set(t,v)
303 t[#t+1] = v
304end
305
306local value = P(Carg(1)*value) / set
307local pattern = value*(separator*value)^0 * Carg(1)
308
309function parsers.add_settings_to_array(t,str)
310 return lpegmatch(pattern,str,nil,t)
311end
312
313function parsers.hash_to_string(h,separator,yes,no,strict,omit)
314 if h then
315 local t = { }
316 local tn = 0
317 local s = sortedkeys(h)
318 omit = omit and tohash(omit)
319 for i=1,#s do
320 local key = s[i]
321 if not omit or not omit[key] then
322 local value = h[key]
323 if type(value) == "boolean" then
324 if yes and no then
325 if value then
326 tn = tn + 1
327 t[tn] = key .. '=' .. yes
328 elseif not strict then
329 tn = tn + 1
330 t[tn] = key .. '=' .. no
331 end
332 elseif value or not strict then
333 tn = tn + 1
334 t[tn] = key .. '=' .. tostring(value)
335 end
336 else
337 tn = tn + 1
338 t[tn] = key .. '=' .. value
339 end
340 end
341 end
342 return concat(t,separator or ",")
343 else
344 return ""
345 end
346end
347
348function parsers.array_to_string(a,separator)
349 if a then
350 return concat(a,separator or ",")
351 else
352 return ""
353 end
354end
355
356
357
358
359
360
361
362
363
364
365
366
367
368local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset)
369
370function parsers.settings_to_set(str)
371 return str and lpegmatch(pattern,str) or { }
372end
373
374hashes.settings_to_set = table.setmetatableindex(function(t,k)
375 local v = k and lpegmatch(pattern,k) or { }
376 t[k] = v
377 return v
378end)
379
380
381
382getmetatable(hashes.settings_to_set).__mode = "kv"
383
384function parsers.simple_hash_to_string(h, separator)
385 local t = { }
386 local tn = 0
387 for k, v in sortedhash(h) do
388 if v then
389 tn = tn + 1
390 t[tn] = k
391 end
392 end
393 return concat(t,separator or ",")
394end
395
396
397
398local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
399local setting = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)
400local splitter = setting^1
401
402function parsers.options_to_hash(str,target)
403 return str and lpegmatch(splitter,str,1,target or { }) or { }
404end
405
406local splitter = lpeg.tsplitat(" ")
407
408function parsers.options_to_array(str)
409 return str and lpegmatch(splitter,str) or { }
410end
411
412
413
414local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
415 + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
416 + C((nestedbraces + (1-comma))^1)
417local pattern_a = spaces * Ct(value*(separator*value)^0)
418
419local function repeater(n,str)
420 if not n then
421 return str
422 else
423 local s = lpegmatch(pattern_a,str)
424 if n == 1 then
425 return unpack(s)
426 else
427 local t = { }
428 local tn = 0
429 for i=1,n do
430 for j=1,#s do
431 tn = tn + 1
432 t[tn] = s[j]
433 end
434 end
435 return unpack(t)
436 end
437 end
438end
439
440local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
441 + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
442 + C((nestedbraces + (1-comma))^1)
443local pattern_b = spaces * Ct(value*(separator*value)^0)
444
445function parsers.settings_to_array_with_repeat(str,expand)
446 if expand then
447 return lpegmatch(pattern_b,str) or { }
448 else
449 return lpegmatch(pattern_a,str) or { }
450 end
451end
452
453
454
455local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
456local pattern = Ct((space + value)^0)
457
458function parsers.arguments_to_table(str)
459 return lpegmatch(pattern,str)
460end
461
462
463
464function parsers.getparameters(self,class,parentclass,settings)
465 local sc = self[class]
466 if not sc then
467 sc = { }
468 self[class] = sc
469 if parentclass then
470 local sp = self[parentclass]
471 if not sp then
472 sp = { }
473 self[parentclass] = sp
474 end
475 setmetatableindex(sc,sp)
476 end
477 end
478 parsers.settings_to_hash(settings,sc)
479end
480
481function parsers.listitem(str)
482 return gmatch(str,"[^, ]+")
483end
484
485
486
487local pattern = Cs { "start",
488 start = V("one") + V("two") + V("three"),
489 rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
490 thousand = digit * digit * digit,
491 one = digit * V("rest"),
492 two = digit * digit * V("rest"),
493 three = V("thousand") * V("rest"),
494}
495
496lpegpatterns.splitthousands = pattern
497
498function parsers.splitthousands(str)
499 return lpegmatch(pattern,str) or str
500end
501
502
503
504local optionalwhitespace = whitespace^0
505
506lpegpatterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
507lpegpatterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
508lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
509
510
511
512
513
514
515
516
517local dquote = P('"')
518local equal = P('=')
519local escape = P('\\')
520local separator = S(' ,')
521
522local key = C((1-equal)^1)
523local value = dquote * C((1-dquote-escape*dquote)^0) * dquote
524
525
526local pattern = Cf(Ct("") * (Cg(key * equal * value) * separator^0)^1,rawset)^0 * P(-1)
527
528function parsers.keq_to_hash(str)
529 if str and str ~= "" then
530 return lpegmatch(pattern,str)
531 else
532 return { }
533 end
534end
535
536
537
538local defaultspecification = { separator = ",", quote = '"' }
539
540
541
542
543function parsers.csvsplitter(specification)
544 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
545 local separator = specification.separator
546 local quotechar = specification.quote
547 local numbers = specification.numbers
548 local separator = S(separator ~= "" and separator or ",")
549 local whatever = C((1 - separator - newline)^0)
550 if quotechar and quotechar ~= "" then
551 local quotedata = nil
552 for chr in gmatch(quotechar,".") do
553 local quotechar = P(chr)
554 local quoteitem = (1 - quotechar)^0
555 local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
556 if quotedata then
557 quotedata = quotedata + quoteword
558 else
559 quotedata = quoteword
560 end
561 end
562 whatever = quotedata + whatever
563 end
564 local parser = Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
565 return function(data)
566 return lpegmatch(parser,data)
567 end
568end
569
570
571
572
573
574
575
576
577
578
579
580
581
582function parsers.rfc4180splitter(specification)
583 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
584 local numbers = specification.numbers
585 local separator = specification.separator
586 local quotechar = P(specification.quote)
587 local dquotechar = quotechar * quotechar
588 / specification.quote
589 local separator = S(separator ~= "" and separator or ",")
590 local whatever = (dquotechar + (1 - quotechar))^0
591 local escaped = quotechar
592 * (numbers and (whatever/tonumber) or Cs(whatever))
593 * quotechar
594 local non_escaped = C((1 - quotechar - newline - separator)^1)
595 local field = escaped + non_escaped + Cc("")
596 local record = Ct(field * (separator * field)^1)
597 local headerline = record * Cp()
598 local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
599 local headeryes = Ct(morerecords)
600 local headernop = Ct(record * morerecords)
601 return function(data,getheader)
602 if getheader then
603 local header, position = lpegmatch(headerline,data)
604 local data = lpegmatch(headeryes,data,position)
605 return data, header
606 else
607 return lpegmatch(headernop,data)
608 end
609 end
610end
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630local function ranger(first,last,n,action)
631 if not first then
632
633 elseif last == true then
634 for i=first,n or first do
635 action(i)
636 end
637 elseif last then
638 for i=first,last do
639 action(i)
640 end
641 else
642 action(first)
643 end
644end
645
646local cardinal = lpegpatterns.cardinal / tonumber
647local spacers = lpegpatterns.spacer^0
648local endofstring = lpegpatterns.endofstring
649
650local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
651 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
652
653local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
654 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring
655
656function parsers.stepper(str,n,action)
657 if type(n) == "function" then
658 lpegmatch(stepper,str,1,false,n or print)
659 else
660 lpegmatch(stepper,str,1,n,action or print)
661 end
662end
663
664
665
666local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
667local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
668
669patterns.unittotex = pattern
670
671function parsers.unittotex(str,textmode)
672 return lpegmatch(textmode and pattern_text or pattern_math,str)
673end
674
675local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
676
677function parsers.unittoxml(str)
678 return lpegmatch(pattern,str)
679end
680
681
682
683local cache = { }
684local spaces = lpegpatterns.space^0
685local dummy = function() end
686
687setmetatableindex(cache,function(t,k)
688 local separator = S(k)
689 local value = (1-separator)^0
690 local pattern = spaces * C(value) * separator^0 * Cp()
691 t[k] = pattern
692 return pattern
693end)
694
695local commalistiterator = cache[","]
696
697function parsers.iterator(str,separator)
698 local n = #str
699 if n == 0 then
700 return dummy
701 else
702 local pattern = separator and cache[separator] or commalistiterator
703 local p = 1
704 return function()
705 if p <= n then
706 local s, e = lpegmatch(pattern,str,p)
707 if e then
708 p = e
709 return s
710 end
711 end
712 end
713 end
714end
715
716
717
718
719
720local function initialize(t,name)
721 local source = t[name]
722 if source then
723 local result = { }
724 for k, v in next, t[name] do
725 result[k] = v
726 end
727 return result
728 else
729 return { }
730 end
731end
732
733local function fetch(t,name)
734 return t[name] or { }
735end
736
737local function process(result,more)
738 for k, v in next, more do
739 result[k] = v
740 end
741 return result
742end
743
744local name = C((1-S(", "))^1)
745local parser = (Carg(1) * name / initialize) * (S(", ")^1 * (Carg(1) * name / fetch))^0
746local merge = Cf(parser,process)
747
748function parsers.mergehashes(hash,list)
749 return lpegmatch(merge,list,1,hash)
750end
751
752
753
754
755
756
757
758
759
760function parsers.runtime(time)
761 if not time then
762 time = os.runtime()
763 end
764 local days = div(time,24*60*60)
765 time = mod(time,24*60*60)
766 local hours = div(time,60*60)
767 time = mod(time,60*60)
768 local minutes = div(time,60)
769 local seconds = mod(time,60)
770 return days, hours, minutes, seconds
771end
772
773
774
775local spacing = whitespace^0
776local apply = P("->")
777local method = C((1-apply)^1)
778local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
779
780local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
781
782function parsers.splitmethod(str,default)
783 if str then
784 return lpegmatch(pattern,str,1,default or false)
785 else
786 return default or false, ""
787 end
788end
789
790
791
792
793
794
795
796
797local p_year = lpegpatterns.digit^4 / tonumber
798
799local pattern = Cf( Ct("") *
800 (
801 ( Cg(Cc("year") * p_year)
802 * S("-/") * Cg(Cc("month") * cardinal)
803 * S("-/") * Cg(Cc("day") * cardinal)
804 ) +
805 ( Cg(Cc("day") * cardinal)
806 * S("-/") * Cg(Cc("month") * cardinal)
807 * S("-/") * Cg(Cc("year") * p_year)
808 )
809 )
810 * P(" ") * Cg(Cc("hour") * cardinal)
811 * P(":") * Cg(Cc("min") * cardinal)
812 * (P(":") * Cg(Cc("sec") * cardinal))^-1
813, rawset)
814
815lpegpatterns.splittime = pattern
816
817function parsers.totime(str)
818 return lpegmatch(pattern,str)
819end
820
821
822
823
824
825 |