1if not modules then modules = { } end modules ['util-prs'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cg, Cf, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cg, lpeg.Cf, lpeg.Cp
11local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
12local concat, gmatch, find = table.concat, string.gmatch, string.find
13local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
14local mod, div = math.mod, math.div
15
16utilities = utilities or {}
17local parsers = utilities.parsers or { }
18utilities.parsers = parsers
19local patterns = parsers.patterns or { }
20parsers.patterns = patterns
21
22local setmetatableindex = table.setmetatableindex
23local sortedhash = table.sortedhash
24local sortedkeys = table.sortedkeys
25local tohash = table.tohash
26
27local hashes = { }
28parsers.hashes = hashes
29
30
31local digit = R("09")
32local space = P(' ')
33local equal = P("=")
34local colon = P(":")
35local comma = P(",")
36local lbrace = P("{")
37local rbrace = P("}")
38local lparent = P("(")
39local rparent = P(")")
40local lbracket = P("[")
41local rbracket = P("]")
42local period = S(".")
43local punctuation = S(".,:;")
44local spacer = lpegpatterns.spacer
45local whitespace = lpegpatterns.whitespace
46local newline = lpegpatterns.newline
47local anything = lpegpatterns.anything
48local endofstring = lpegpatterns.endofstring
49
50local nobrace = 1 - (lbrace + rbrace )
51local noparent = 1 - (lparent + rparent)
52local nobracket = 1 - (lbracket + rbracket)
53
54
55
56local escape, left, right = P("\\"), P('{'), P('}')
57
58
59
60
61
62lpegpatterns.balanced = P {
63 ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
64 left * V(1) * right
65}
66
67local nestedbraces = P { lbrace * (nobrace + V(1))^0 * rbrace }
68local nestedparents = P { lparent * (noparent + V(1))^0 * rparent }
69local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
70local spaces = space^0
71local argument = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
72local content = (1-endofstring)^0
73
74lpegpatterns.nestedbraces = nestedbraces
75lpegpatterns.nestedparents = nestedparents
76lpegpatterns.nestedbrackets = nestedbrackets
77lpegpatterns.nested = nestedbraces
78lpegpatterns.argument = argument
79lpegpatterns.content = content
80
81local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
82 + C((nestedbraces + (1-comma))^0)
83
84local key = C((1-equal-comma)^1)
85local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
86local pattern_c = (space+comma)^0 * (key * equal * value)
87local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
88
89local key = C((1-space-equal-comma)^1)
90local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
91
92
93
94local hash = { }
95
96local function set(key,value)
97 hash[key] = value
98end
99
100local pattern_a_s = (pattern_a/set)^1
101local pattern_b_s = (pattern_b/set)^1
102local pattern_c_s = (pattern_c/set)^1
103local pattern_d_s = (pattern_d/set)^1
104
105patterns.settings_to_hash_a = pattern_a_s
106patterns.settings_to_hash_b = pattern_b_s
107patterns.settings_to_hash_c = pattern_c_s
108patterns.settings_to_hash_d = pattern_d_s
109
110function parsers.make_settings_to_hash_pattern(set,how)
111 if how == "strict" then
112 return (pattern_c/set)^1
113 elseif how == "tolerant" then
114 return (pattern_b/set)^1
115 else
116 return (pattern_a/set)^1
117 end
118end
119
120function parsers.settings_to_hash(str,existing)
121 if not str or str == "" then
122 return { }
123 elseif type(str) == "table" then
124 if existing then
125 for k, v in next, str do
126 existing[k] = v
127 end
128 return exiting
129 else
130 return str
131 end
132 else
133 hash = existing or { }
134 lpegmatch(pattern_a_s,str)
135 return hash
136 end
137end
138
139function parsers.settings_to_hash_colon_too(str)
140 if not str or str == "" then
141 return { }
142 elseif type(str) == "table" then
143 return str
144 else
145 hash = { }
146 lpegmatch(pattern_d_s,str)
147 return hash
148 end
149end
150
151function parsers.settings_to_hash_tolerant(str,existing)
152 if not str or str == "" then
153 return { }
154 elseif type(str) == "table" then
155 if existing then
156 for k, v in next, str do
157 existing[k] = v
158 end
159 return exiting
160 else
161 return str
162 end
163 else
164 hash = existing or { }
165 lpegmatch(pattern_b_s,str)
166 return hash
167 end
168end
169
170function parsers.settings_to_hash_strict(str,existing)
171 if not str or str == "" then
172 return nil
173 elseif type(str) == "table" then
174 if existing then
175 for k, v in next, str do
176 existing[k] = v
177 end
178 return exiting
179 else
180 return str
181 end
182 elseif str and str ~= "" then
183 hash = existing or { }
184 lpegmatch(pattern_c_s,str)
185 return next(hash) and hash
186 end
187end
188
189local separator = comma * space^0
190local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
191 + C((nestedbraces + (1-comma))^0)
192local pattern = spaces * Ct(value*(separator*value)^0)
193
194
195
196patterns.settings_to_array = pattern
197
198
199
200function parsers.settings_to_array(str,strict)
201 if not str or str == "" then
202 return { }
203 elseif type(str) == "table" then
204 return str
205 elseif strict then
206 if find(str,"{",1,true) then
207 return lpegmatch(pattern,str)
208 else
209 return { str }
210 end
211 elseif find(str,",",1,true) then
212 return lpegmatch(pattern,str)
213 else
214 return { str }
215 end
216end
217
218function parsers.settings_to_numbers(str)
219 if not str or str == "" then
220 return { }
221 end
222 if type(str) == "table" then
223
224 elseif find(str,",",1,true) then
225 str = lpegmatch(pattern,str)
226 else
227 return { tonumber(str) }
228 end
229 for i=1,#str do
230 str[i] = tonumber(str[i])
231 end
232 return str
233end
234
235local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
236 + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
237local pattern = spaces * Ct(value*(separator*value)^0)
238
239function parsers.settings_to_array_obey_fences(str)
240 return lpegmatch(pattern,str)
241end
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256local cache_a = { }
257local cache_b = { }
258
259function parsers.groupedsplitat(symbol,withaction)
260 if not symbol then
261 symbol = ","
262 end
263 local pattern = (withaction and cache_b or cache_a)[symbol]
264 if not pattern then
265 local symbols = S(symbol)
266 local separator = space^0 * symbols * space^0
267 local value =
268 lbrace
269 * C((nobrace + nestedbraces)^0)
270
271 * (rbrace * (#symbols + P(-1)))
272 +
273 C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
274 if withaction then
275 local withvalue = Carg(1) * value / function(f,s) return f(s) end
276 pattern = spaces * withvalue * (separator*withvalue)^0
277 cache_b[symbol] = pattern
278 else
279 pattern = spaces * Ct(value*(separator*value)^0)
280 cache_a[symbol] = pattern
281 end
282 end
283 return pattern
284end
285
286local pattern_a = parsers.groupedsplitat(",",false)
287local pattern_b = parsers.groupedsplitat(",",true)
288
289function parsers.stripped_settings_to_array(str)
290 if not str or str == "" then
291 return { }
292 else
293 return lpegmatch(pattern_a,str)
294 end
295end
296
297function parsers.process_stripped_settings(str,action)
298 if not str or str == "" then
299 return { }
300 else
301 return lpegmatch(pattern_b,str,1,action)
302 end
303end
304
305
306
307
308local function set(t,v)
309 t[#t+1] = v
310end
311
312local value = P(Carg(1)*value) / set
313local pattern = value*(separator*value)^0 * Carg(1)
314
315function parsers.add_settings_to_array(t,str)
316 return lpegmatch(pattern,str,nil,t)
317end
318
319function parsers.hash_to_string(h,separator,yes,no,strict,omit)
320 if h then
321 local t = { }
322 local tn = 0
323 local s = sortedkeys(h)
324 omit = omit and tohash(omit)
325 for i=1,#s do
326 local key = s[i]
327 if not omit or not omit[key] then
328 local value = h[key]
329 if type(value) == "boolean" then
330 if yes and no then
331 if value then
332 tn = tn + 1
333 t[tn] = key .. '=' .. yes
334 elseif not strict then
335 tn = tn + 1
336 t[tn] = key .. '=' .. no
337 end
338 elseif value or not strict then
339 tn = tn + 1
340 t[tn] = key .. '=' .. tostring(value)
341 end
342 else
343 tn = tn + 1
344 t[tn] = key .. '=' .. value
345 end
346 end
347 end
348 return concat(t,separator or ",")
349 else
350 return ""
351 end
352end
353
354function parsers.array_to_string(a,separator)
355 if a then
356 return concat(a,separator or ",")
357 else
358 return ""
359 end
360end
361
362
363
364
365
366
367
368
369
370
371
372
373
374local pattern = Cf(Ct("") * Cg(C((1-S(", "))^1) * S(", ")^0 * Cc(true))^1,rawset)
375
376function parsers.settings_to_set(str)
377 return str and lpegmatch(pattern,str) or { }
378end
379
380hashes.settings_to_set = table.setmetatableindex(function(t,k)
381 local v = k and lpegmatch(pattern,k) or { }
382 t[k] = v
383 return v
384end)
385
386function parsers.settings_to_set(str)
387 return str and lpegmatch(pattern,str) or { }
388end
389
390local pattern = Ct((C((1-S(", "))^1) * S(", ")^0)^1)
391
392hashes.settings_to_list = table.setmetatableindex(function(t,k)
393 local v = k and lpegmatch(pattern,k) or { }
394 t[k] = v
395 return v
396end)
397
398
399
400
401
402
403getmetatable(hashes.settings_to_set ).__mode = "kv"
404getmetatable(hashes.settings_to_list).__mode = "kv"
405
406function parsers.simple_hash_to_string(h, separator)
407 local t = { }
408 local tn = 0
409 for k, v in sortedhash(h) do
410 if v then
411 tn = tn + 1
412 t[tn] = k
413 end
414 end
415 return concat(t,separator or ",")
416end
417
418
419
420local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
421local setting = Cf( Carg(1) * (whitespace^0 * Cg(str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))))^1,rawset)
422local splitter = setting^1
423
424function parsers.options_to_hash(str,target)
425 return str and lpegmatch(splitter,str,1,target or { }) or { }
426end
427
428local splitter = lpeg.tsplitat(" ")
429
430function parsers.options_to_array(str)
431 return str and lpegmatch(splitter,str) or { }
432end
433
434
435
436local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
437 + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
438 + C((nestedbraces + (1-comma))^1)
439 + Cc("")
440local pattern_a = spaces * Ct(value*(separator*value)^0)
441
442local function repeater(n,str)
443 if not n then
444 return str
445 else
446 local s = lpegmatch(pattern_a,str)
447 if n == 1 then
448 return unpack(s)
449 else
450 local t = { }
451 local tn = 0
452 for i=1,n do
453 for j=1,#s do
454 tn = tn + 1
455 t[tn] = s[j]
456 end
457 end
458 return unpack(t)
459 end
460 end
461end
462
463local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
464 + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
465 + C((nestedbraces + (1-comma))^1)
466 + Cc("")
467local pattern_b = spaces * Ct(value*(separator*value)^0)
468
469function parsers.settings_to_array_with_repeat(str,expand)
470 if expand then
471 return lpegmatch(pattern_b,str) or { }
472 else
473 return lpegmatch(pattern_a,str) or { }
474 end
475end
476
477
478
479local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
480local pattern = Ct((space + value)^0)
481
482function parsers.arguments_to_table(str)
483 return lpegmatch(pattern,str)
484end
485
486
487
488function parsers.getparameters(self,class,parentclass,settings)
489 local sc = self[class]
490 if not sc then
491 sc = { }
492 self[class] = sc
493 if parentclass then
494 local sp = self[parentclass]
495 if not sp then
496 sp = { }
497 self[parentclass] = sp
498 end
499 setmetatableindex(sc,sp)
500 end
501 end
502 parsers.settings_to_hash(settings,sc)
503end
504
505function parsers.listitem(str)
506 return gmatch(str,"[^, ]+")
507end
508
509
510
511local pattern = Cs { "start",
512 start = V("one") + V("two") + V("three"),
513 rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
514 thousand = digit * digit * digit,
515 one = digit * V("rest"),
516 two = digit * digit * V("rest"),
517 three = V("thousand") * V("rest"),
518}
519
520lpegpatterns.splitthousands = pattern
521
522function parsers.splitthousands(str)
523 return lpegmatch(pattern,str) or str
524end
525
526
527
528local optionalwhitespace = whitespace^0
529
530lpegpatterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
531lpegpatterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
532lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
533
534
535
536
537
538
539
540
541local dquote = P('"')
542local equal = P('=')
543local escape = P('\\')
544local separator = S(' ,')
545
546local key = C((1-equal)^1)
547local value = dquote * C((1-dquote-escape*dquote)^0) * dquote
548
549
550local pattern = Cf(Ct("") * (Cg(key * equal * value) * separator^0)^1,rawset)^0 * P(-1)
551
552function parsers.keq_to_hash(str)
553 if str and str ~= "" then
554 return lpegmatch(pattern,str)
555 else
556 return { }
557 end
558end
559
560
561
562local defaultspecification = { separator = ",", quote = '"' }
563
564
565
566
567function parsers.csvsplitter(specification)
568 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
569 local separator = specification.separator
570 local quotechar = specification.quote
571 local numbers = specification.numbers
572 local separator = S(separator ~= "" and separator or ",")
573 local whatever = C((1 - separator - newline)^0)
574 if quotechar and quotechar ~= "" then
575 local quotedata = nil
576 for chr in gmatch(quotechar,".") do
577 local quotechar = P(chr)
578 local quoteitem = (1 - quotechar)^0
579 local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
580 if quotedata then
581 quotedata = quotedata + quoteword
582 else
583 quotedata = quoteword
584 end
585 end
586 whatever = quotedata + whatever
587 end
588 local parser = Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
589 return function(data)
590 return lpegmatch(parser,data)
591 end
592end
593
594
595
596
597
598
599
600
601
602
603
604
605
606function parsers.rfc4180splitter(specification)
607 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
608 local numbers = specification.numbers
609 local separator = specification.separator
610 local quotechar = P(specification.quote)
611 local dquotechar = quotechar * quotechar
612 / specification.quote
613 local separator = S(separator ~= "" and separator or ",")
614 local whatever = (dquotechar + (1 - quotechar))^0
615 local escaped = quotechar
616 * (numbers and (whatever/tonumber) or Cs(whatever))
617 * quotechar
618 local non_escaped = C((1 - quotechar - newline - separator)^1)
619 local field = escaped + non_escaped + Cc("")
620 local record = Ct(field * (separator * field)^1)
621 local headerline = record * Cp()
622 local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
623 local headeryes = Ct(morerecords)
624 local headernop = Ct(record * morerecords)
625 return function(data,getheader)
626 if getheader then
627 local header, position = lpegmatch(headerline,data)
628 local data = lpegmatch(headeryes,data,position)
629 return data, header
630 else
631 return lpegmatch(headernop,data)
632 end
633 end
634end
635
636
637
638
639
640
641
642
643
644
645
646
647
648local function ranger(first,last,n,action)
649 if not first then
650
651 elseif last == true then
652 for i=first,n or first do
653 action(i)
654 end
655 elseif last then
656 for i=first,last do
657 action(i)
658 end
659 else
660 action(first)
661 end
662end
663
664local cardinal = (lpegpatterns.hexadecimal + lpegpatterns.cardinal) / tonumber
665local spacers = lpegpatterns.spacer^0
666local endofstring = lpegpatterns.endofstring
667
668local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
669 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
670
671local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
672 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring
673
674function parsers.stepper(str,n,action)
675 local ts = type(str)
676 if type(n) == "function" then
677 if ts == "number" then
678 n(str)
679 elseif ts == "table" then
680 for i=1,#str do
681 n(str[i])
682 end
683 else
684 lpegmatch(stepper,str,1,false,n or print)
685 end
686 elseif ts == "string" then
687 lpegmatch(stepper,str,1,n,action or print)
688 end
689end
690
691
692
693
694
695
696
697
698
699local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
700local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
701
702patterns.unittotex = pattern
703
704function parsers.unittotex(str,textmode)
705 return lpegmatch(textmode and pattern_text or pattern_math,str)
706end
707
708local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
709
710function parsers.unittoxml(str)
711 return lpegmatch(pattern,str)
712end
713
714
715
716local cache = { }
717local spaces = lpegpatterns.space^0
718local dummy = function() end
719
720setmetatableindex(cache,function(t,k)
721 local separator = S(k)
722 local value = (1-separator)^0
723 local pattern = spaces * C(value) * separator^0 * Cp()
724 t[k] = pattern
725 return pattern
726end)
727
728local commalistiterator = cache[","]
729
730function parsers.iterator(str,separator)
731 local n = #str
732 if n == 0 then
733 return dummy
734 else
735 local pattern = separator and cache[separator] or commalistiterator
736 local p = 1
737 return function()
738 if p <= n then
739 local s, e = lpegmatch(pattern,str,p)
740 if e then
741 p = e
742 return s
743 end
744 end
745 end
746 end
747end
748
749
750
751
752
753local function initialize(t,name)
754 local source = t[name]
755 if source then
756 local result = { }
757 for k, v in next, t[name] do
758 result[k] = v
759 end
760 return result
761 else
762 return { }
763 end
764end
765
766local function fetch(t,name)
767 return t[name] or { }
768end
769
770local function process(result,more)
771 for k, v in next, more do
772 result[k] = v
773 end
774 return result
775end
776
777local name = C((1-S(", "))^1)
778local parser = (Carg(1) * name / initialize) * (S(", ")^1 * (Carg(1) * name / fetch))^0
779local merge = Cf(parser,process)
780
781function parsers.mergehashes(hash,list)
782 return lpegmatch(merge,list,1,hash)
783end
784
785
786
787
788
789
790
791
792
793function parsers.runtime(time)
794 if not time then
795 time = os.runtime()
796 end
797 local days = div(time,24*60*60)
798 time = mod(time,24*60*60)
799 local hours = div(time,60*60)
800 time = mod(time,60*60)
801 local minutes = div(time,60)
802 local seconds = mod(time,60)
803 return days, hours, minutes, seconds
804end
805
806
807
808local spacing = whitespace^0
809local apply = P("->")
810local method = C((1-apply)^1)
811local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
812
813local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
814
815function parsers.splitmethod(str,default)
816 if str then
817 return lpegmatch(pattern,str,1,default or false)
818 else
819 return default or false, ""
820 end
821end
822
823
824
825
826
827
828
829
830local p_year = lpegpatterns.digit^4 / tonumber
831
832local pattern = Cf( Ct("") *
833 (
834 ( Cg(Cc("year") * p_year)
835 * S("-/") * Cg(Cc("month") * cardinal)
836 * S("-/") * Cg(Cc("day") * cardinal)
837 ) +
838 ( Cg(Cc("day") * cardinal)
839 * S("-/") * Cg(Cc("month") * cardinal)
840 * S("-/") * Cg(Cc("year") * p_year)
841 ) +
842 ( Cg(Cc("year") * p_year)
843 * S("-/") * Cg(Cc("month") * cardinal)
844 ) +
845 ( Cg(Cc("month") * cardinal)
846 * S("-/") * Cg(Cc("year") * p_year)
847 )
848 )
849 * (
850 P(" ") * Cg(Cc("hour") * cardinal)
851 * P(":") * Cg(Cc("min") * cardinal)
852 * (P(":") * Cg(Cc("sec") * cardinal))^-1
853 + P(-1) )
854
855, rawset)
856
857lpegpatterns.splittime = pattern
858
859function parsers.totime(str)
860 return lpegmatch(pattern,str)
861end
862
863
864
865
866
867
868
869 |