1if not modules then modules = { } end modules ['util-prs'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cp
11
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local concat, gmatch, find = table.concat, string.gmatch, string.find
14local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
15local mod, div = math.mod, math.div
16
17utilities = utilities or {}
18local parsers = utilities.parsers or { }
19utilities.parsers = parsers
20local patterns = parsers.patterns or { }
21parsers.patterns = patterns
22
23local setmetatableindex = table.setmetatableindex
24local sortedhash = table.sortedhash
25local sortedkeys = table.sortedkeys
26local tohash = table.tohash
27
28local hashes = { }
29parsers.hashes = hashes
30
31
32local digit = R("09")
33local space = P(' ')
34local equal = P("=")
35local colon = P(":")
36local comma = P(",")
37local lbrace = P("{")
38local rbrace = P("}")
39local lparent = P("(")
40local rparent = P(")")
41local lbracket = P("[")
42local rbracket = P("]")
43local period = S(".")
44local punctuation = S(".,:;")
45local spacer = lpegpatterns.spacer
46local whitespace = lpegpatterns.whitespace
47local newline = lpegpatterns.newline
48local anything = lpegpatterns.anything
49local endofstring = lpegpatterns.endofstring
50
51local nobrace = 1 - (lbrace + rbrace )
52local noparent = 1 - (lparent + rparent)
53local nobracket = 1 - (lbracket + rbracket)
54
55
56
57local escape, left, right = P("\\"), P('{'), P('}')
58
59
60
61
62
63lpegpatterns.balanced = P {
64 ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
65 left * V(1) * right
66}
67
68local nestedbraces = P { lbrace * (nobrace + V(1))^0 * rbrace }
69local nestedparents = P { lparent * (noparent + V(1))^0 * rparent }
70local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
71local spaces = space^0
72local argument = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
73local content = (1-endofstring)^0
74
75lpegpatterns.nestedbraces = nestedbraces
76lpegpatterns.nestedparents = nestedparents
77lpegpatterns.nestedbrackets = nestedbrackets
78lpegpatterns.nested = nestedbraces
79lpegpatterns.argument = argument
80lpegpatterns.content = content
81
82local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
83 + C((nestedbraces + (1-comma))^0)
84
85local key = C((1-equal-comma)^1)
86local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
87local pattern_c = (space+comma)^0 * (key * equal * value)
88local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
89
90local key = C((1-space-equal-comma)^1)
91local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
92
93
94
95local hash = { }
96
97local function set(key,value)
98 hash[key] = value
99end
100
101local pattern_a_s = (pattern_a/set)^1
102local pattern_b_s = (pattern_b/set)^1
103local pattern_c_s = (pattern_c/set)^1
104local pattern_d_s = (pattern_d/set)^1
105
106patterns.settings_to_hash_a = pattern_a_s
107patterns.settings_to_hash_b = pattern_b_s
108patterns.settings_to_hash_c = pattern_c_s
109patterns.settings_to_hash_d = pattern_d_s
110
111function parsers.make_settings_to_hash_pattern(set,how)
112 if how == "strict" then
113 return (pattern_c/set)^1
114 elseif how == "tolerant" then
115 return (pattern_b/set)^1
116 else
117 return (pattern_a/set)^1
118 end
119end
120
121function parsers.settings_to_hash(str,existing)
122 if not str or str == "" then
123 return { }
124 elseif type(str) == "table" then
125 if existing then
126 for k, v in next, str do
127 existing[k] = v
128 end
129 return exiting
130 else
131 return str
132 end
133 else
134 hash = existing or { }
135 lpegmatch(pattern_a_s,str)
136 return hash
137 end
138end
139
140function parsers.settings_to_hash_colon_too(str)
141 if not str or str == "" then
142 return { }
143 elseif type(str) == "table" then
144 return str
145 else
146 hash = { }
147 lpegmatch(pattern_d_s,str)
148 return hash
149 end
150end
151
152function parsers.settings_to_hash_tolerant(str,existing)
153 if not str or str == "" then
154 return { }
155 elseif type(str) == "table" then
156 if existing then
157 for k, v in next, str do
158 existing[k] = v
159 end
160 return exiting
161 else
162 return str
163 end
164 else
165 hash = existing or { }
166 lpegmatch(pattern_b_s,str)
167 return hash
168 end
169end
170
171function parsers.settings_to_hash_strict(str,existing)
172 if not str or str == "" then
173 return nil
174 elseif type(str) == "table" then
175 if existing then
176 for k, v in next, str do
177 existing[k] = v
178 end
179 return exiting
180 else
181 return str
182 end
183 elseif str and str ~= "" then
184 hash = existing or { }
185 lpegmatch(pattern_c_s,str)
186 return next(hash) and hash
187 end
188end
189
190local separator = comma * space^0
191local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
192 + C((nestedbraces + (1-comma))^0)
193local pattern = spaces * Ct(value*(separator*value)^0)
194
195
196
197patterns.settings_to_array = pattern
198
199
200
201function parsers.settings_to_array(str,strict)
202 if not str or str == "" then
203 return { }
204 elseif type(str) == "table" then
205 return str
206 elseif strict then
207 if find(str,"{",1,true) then
208 return lpegmatch(pattern,str)
209 else
210 return { str }
211 end
212 elseif find(str,",",1,true) then
213 return lpegmatch(pattern,str)
214 else
215 return { str }
216 end
217end
218
219function parsers.settings_to_numbers(str)
220 if not str or str == "" then
221 return { }
222 end
223 if type(str) == "table" then
224
225 elseif find(str,",",1,true) then
226 str = lpegmatch(pattern,str)
227 else
228 return { tonumber(str) }
229 end
230 for i=1,#str do
231 str[i] = tonumber(str[i])
232 end
233 return str
234end
235
236local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
237 + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
238local pattern = spaces * Ct(value*(separator*value)^0)
239
240function parsers.settings_to_array_obey_fences(str)
241 return lpegmatch(pattern,str)
242end
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257local cache_a = { }
258local cache_b = { }
259
260function parsers.groupedsplitat(symbol,withaction)
261 if not symbol then
262 symbol = ","
263 end
264 local pattern = (withaction and cache_b or cache_a)[symbol]
265 if not pattern then
266 local symbols = S(symbol)
267 local separator = space^0 * symbols * space^0
268 local value =
269 lbrace
270 * C((nobrace + nestedbraces)^0)
271
272 * (rbrace * (#symbols + P(-1)))
273 +
274 C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
275 if withaction then
276 local withvalue = Carg(1) * value / function(f,s) return f(s) end
277 pattern = spaces * withvalue * (separator*withvalue)^0
278 cache_b[symbol] = pattern
279 else
280 pattern = spaces * Ct(value*(separator*value)^0)
281 cache_a[symbol] = pattern
282 end
283 end
284 return pattern
285end
286
287local pattern_a = parsers.groupedsplitat(",",false)
288local pattern_b = parsers.groupedsplitat(",",true)
289
290function parsers.stripped_settings_to_array(str)
291 if not str or str == "" then
292 return { }
293 else
294 return lpegmatch(pattern_a,str)
295 end
296end
297
298function parsers.process_stripped_settings(str,action)
299 if not str or str == "" then
300 return { }
301 else
302 return lpegmatch(pattern_b,str,1,action)
303 end
304end
305
306
307
308
309local function set(t,v)
310 t[#t+1] = v
311end
312
313local value = P(Carg(1)*value) / set
314local pattern = value*(separator*value)^0 * Carg(1)
315
316function parsers.add_settings_to_array(t,str)
317 return lpegmatch(pattern,str,nil,t)
318end
319
320function parsers.hash_to_string(h,separator,yes,no,strict,omit)
321 if h then
322 local t = { }
323 local tn = 0
324 local s = sortedkeys(h)
325 omit = omit and tohash(omit)
326 for i=1,#s do
327 local key = s[i]
328 if not omit or not omit[key] then
329 local value = h[key]
330 if type(value) == "boolean" then
331 if yes and no then
332 if value then
333 tn = tn + 1
334 t[tn] = key .. '=' .. yes
335 elseif not strict then
336 tn = tn + 1
337 t[tn] = key .. '=' .. no
338 end
339 elseif value or not strict then
340 tn = tn + 1
341 t[tn] = key .. '=' .. tostring(value)
342 end
343 else
344 tn = tn + 1
345 t[tn] = key .. '=' .. value
346 end
347 end
348 end
349 return concat(t,separator or ",")
350 else
351 return ""
352 end
353end
354
355function parsers.array_to_string(a,separator)
356 if a then
357 return concat(a,separator or ",")
358 else
359 return ""
360 end
361end
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376local pattern = Ct("") * (C((1-S(", "))^1) * S(", ")^0 * Cc(true) % rawset)^1
377
378function parsers.settings_to_set(str)
379 return str and lpegmatch(pattern,str) or { }
380end
381
382hashes.settings_to_set = table.setmetatableindex(function(t,k)
383 local v = k and lpegmatch(pattern,k) or { }
384 t[k] = v
385 return v
386end)
387
388function parsers.settings_to_set(str)
389 return str and lpegmatch(pattern,str) or { }
390end
391
392local pattern = Ct((C((1-S(", "))^1) * S(", ")^0)^1)
393
394hashes.settings_to_list = table.setmetatableindex(function(t,k)
395 local v = k and lpegmatch(pattern,k) or { }
396 t[k] = v
397 return v
398end)
399
400
401
402
403
404
405getmetatable(hashes.settings_to_set ).__mode = "kv"
406getmetatable(hashes.settings_to_list).__mode = "kv"
407
408function parsers.simple_hash_to_string(h, separator)
409 local t = { }
410 local tn = 0
411 for k, v in sortedhash(h) do
412 if v then
413 tn = tn + 1
414 t[tn] = k
415 end
416 end
417 return concat(t,separator or ",")
418end
419
420local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
421
422local setting = Carg(1) * (whitespace^0 * (str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))) % rawset)^1
423local splitter = setting^1
424
425function parsers.options_to_hash(str,target)
426 return str and lpegmatch(splitter,str,1,target or { }) or { }
427end
428
429
430
431local splitter = lpeg.tsplitat(" ")
432
433function parsers.options_to_array(str)
434 return str and lpegmatch(splitter,str) or { }
435end
436
437
438
439local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
440 + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
441 + C((nestedbraces + (1-comma))^1)
442 + Cc("")
443local pattern_a = spaces * Ct(value*(separator*value)^0)
444
445local function repeater(n,str)
446 if not n then
447 return str
448 else
449 local s = lpegmatch(pattern_a,str)
450 if n == 1 then
451 return unpack(s)
452 else
453 local t = { }
454 local tn = 0
455 for i=1,n do
456 for j=1,#s do
457 tn = tn + 1
458 t[tn] = s[j]
459 end
460 end
461 return unpack(t)
462 end
463 end
464end
465
466local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
467 + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
468 + C((nestedbraces + (1-comma))^1)
469 + Cc("")
470local pattern_b = spaces * Ct(value*(separator*value)^0)
471
472function parsers.settings_to_array_with_repeat(str,expand)
473 if expand then
474 return lpegmatch(pattern_b,str) or { }
475 else
476 return lpegmatch(pattern_a,str) or { }
477 end
478end
479
480
481
482local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
483local pattern = Ct((space + value)^0)
484
485function parsers.arguments_to_table(str)
486 return lpegmatch(pattern,str)
487end
488
489
490
491function parsers.getparameters(self,class,parentclass,settings)
492 local sc = self[class]
493 if not sc then
494 sc = { }
495 self[class] = sc
496 if parentclass then
497 local sp = self[parentclass]
498 if not sp then
499 sp = { }
500 self[parentclass] = sp
501 end
502 setmetatableindex(sc,sp)
503 end
504 end
505 parsers.settings_to_hash(settings,sc)
506end
507
508function parsers.listitem(str)
509 return gmatch(str,"[^, ]+")
510end
511
512
513
514local pattern = Cs { "start",
515 start = V("one") + V("two") + V("three"),
516 rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
517 thousand = digit * digit * digit,
518 one = digit * V("rest"),
519 two = digit * digit * V("rest"),
520 three = V("thousand") * V("rest"),
521}
522
523lpegpatterns.splitthousands = pattern
524
525function parsers.splitthousands(str)
526 return lpegmatch(pattern,str) or str
527end
528
529
530
531local optionalwhitespace = whitespace^0
532
533lpegpatterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
534lpegpatterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
535lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
536
537
538
539
540
541
542
543
544local dquote = P('"')
545local equal = P('=')
546local escape = P('\\')
547local separator = S(' ,')
548
549local key = C((1-equal)^1)
550local value = dquote * C((1-dquote-escape*dquote)^0) * dquote
551
552
553
554local pattern = Ct("") * (((key * equal * value) * separator^0) % rawset)^0 * P(-1)
555
556function parsers.keq_to_hash(str)
557 if str and str ~= "" then
558 return lpegmatch(pattern,str)
559 else
560 return { }
561 end
562end
563
564
565
566local defaultspecification = { separator = ",", quote = '"' }
567
568
569
570
571function parsers.csvsplitter(specification)
572 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
573 local separator = specification.separator
574 local quotechar = specification.quote
575 local numbers = specification.numbers
576 local separator = S(separator ~= "" and separator or ",")
577 local whatever = C((1 - separator - newline)^0)
578 if quotechar and quotechar ~= "" then
579 local quotedata = nil
580 for chr in gmatch(quotechar,".") do
581 local quotechar = P(chr)
582 local quoteitem = (1 - quotechar)^0
583 local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
584 if quotedata then
585 quotedata = quotedata + quoteword
586 else
587 quotedata = quoteword
588 end
589 end
590 whatever = quotedata + whatever
591 end
592 local parser = Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
593 return function(data)
594 return lpegmatch(parser,data)
595 end
596end
597
598
599
600
601
602
603
604
605
606
607
608
609
610function parsers.rfc4180splitter(specification)
611 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
612 local numbers = specification.numbers
613 local separator = specification.separator
614 local quotechar = P(specification.quote)
615 local dquotechar = quotechar * quotechar
616 / specification.quote
617 local separator = S(separator ~= "" and separator or ",")
618 local whatever = (dquotechar + (1 - quotechar))^0
619 local escaped = quotechar
620 * (numbers and (whatever/tonumber) or Cs(whatever))
621 * quotechar
622 local non_escaped = C((1 - quotechar - newline - separator)^1)
623 local field = escaped + non_escaped + Cc("")
624 local record = Ct(field * (separator * field)^1)
625 local headerline = record * Cp()
626 local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
627 local headeryes = Ct(morerecords)
628 local headernop = Ct(record * morerecords)
629 return function(data,getheader)
630 if getheader then
631 local header, position = lpegmatch(headerline,data)
632 local data = lpegmatch(headeryes,data,position)
633 return data, header
634 else
635 return lpegmatch(headernop,data)
636 end
637 end
638end
639
640
641
642
643
644
645
646
647
648
649
650
651
652local function ranger(first,last,n,action)
653 if not first then
654
655 elseif last == true then
656 for i=first,n or first do
657 action(i)
658 end
659 elseif last then
660 for i=first,last do
661 action(i)
662 end
663 else
664 action(first)
665 end
666end
667
668local cardinal = (lpegpatterns.hexadecimal + lpegpatterns.cardinal) / tonumber
669local spacers = lpegpatterns.spacer^0
670local endofstring = lpegpatterns.endofstring
671
672local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + Cc(true) ) + Cc(false) )
673 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1
674
675local stepper = spacers * ( cardinal * ( spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) ) + Cc(false) )
676 * Carg(1) * Carg(2) / ranger * S(", ")^0 )^1 * endofstring
677
678function parsers.stepper(str,n,action)
679 local ts = type(str)
680 if type(n) == "function" then
681 if ts == "number" then
682 n(str)
683 elseif ts == "table" then
684 for i=1,#str do
685 n(str[i])
686 end
687 else
688 lpegmatch(stepper,str,1,false,n or print)
689 end
690 elseif ts == "string" then
691 lpegmatch(stepper,str,1,n,action or print)
692 end
693end
694
695
696
697
698
699
700
701
702
703local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
704local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
705
706patterns.unittotex = pattern
707
708function parsers.unittotex(str,textmode)
709 return lpegmatch(textmode and pattern_text or pattern_math,str)
710end
711
712local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
713
714function parsers.unittoxml(str)
715 return lpegmatch(pattern,str)
716end
717
718
719
720local cache = { }
721local spaces = lpegpatterns.space^0
722local dummy = function() end
723
724setmetatableindex(cache,function(t,k)
725 local separator = S(k)
726 local value = (1-separator)^0
727 local pattern = spaces * C(value) * separator^0 * Cp()
728 t[k] = pattern
729 return pattern
730end)
731
732local commalistiterator = cache[","]
733
734function parsers.iterator(str,separator)
735 local n = #str
736 if n == 0 then
737 return dummy
738 else
739 local pattern = separator and cache[separator] or commalistiterator
740 local p = 1
741 return function()
742 if p <= n then
743 local s, e = lpegmatch(pattern,str,p)
744 if e then
745 p = e
746 return s
747 end
748 end
749 end
750 end
751end
752
753
754
755
756
757local function initialize(t,name)
758 local source = t[name]
759 if source then
760 local result = { }
761 for k, v in next, t[name] do
762 result[k] = v
763 end
764 return result
765 else
766 return { }
767 end
768end
769
770local function fetch(t,name)
771 return t[name] or { }
772end
773
774local function process(result,more)
775 for k, v in next, more do
776 result[k] = v
777 end
778 return result
779end
780
781
782
783
784local name = Carg(1) * C((1-S(", "))^1)
785local parser = S(", ")^1 * (name / fetch)
786local merge = (name / initialize) * (parser % process)^0
787
788function parsers.mergehashes(hash,list)
789 return lpegmatch(merge,list,1,hash)
790end
791
792
793
794
795
796
797
798
799function parsers.runtime(time)
800 if not time then
801 time = os.runtime()
802 end
803 local days = div(time,24*60*60)
804 time = mod(time,24*60*60)
805 local hours = div(time,60*60)
806 time = mod(time,60*60)
807 local minutes = div(time,60)
808 local seconds = mod(time,60)
809 return days, hours, minutes, seconds
810end
811
812
813
814local spacing = whitespace^0
815local apply = P("->")
816local method = C((1-apply)^1)
817local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
818
819local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
820
821function parsers.splitmethod(str,default)
822 if str then
823 return lpegmatch(pattern,str,1,default or false)
824 else
825 return default or false, ""
826 end
827end
828
829
830
831
832
833
834
835
836local p_year = lpegpatterns.digit^4 / tonumber
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862local pattern = Ct("") * (
863 (
864 ( (Cc("year") * p_year ) % rawset
865 * S("-/") * (Cc("month") * cardinal) % rawset
866 * S("-/") * (Cc("day") * cardinal) % rawset
867 ) +
868 ( (Cc("day") * cardinal) % rawset
869 * S("-/") * (Cc("month") * cardinal) % rawset
870 * S("-/") * (Cc("year") * p_year ) % rawset
871 ) +
872 ( (Cc("year") * p_year ) % rawset
873 * S("-/") * (Cc("month") * cardinal) % rawset
874 ) +
875 ( (Cc("month") * cardinal) % rawset
876 * S("-/") * (Cc("year") * p_year ) % rawset
877 )
878 )
879 * (
880 P(" ") * (Cc("hour") * cardinal) % rawset
881 * P(":") * (Cc("min") * cardinal) % rawset
882 * (P(":") * (Cc("sec") * cardinal) % rawset)^-1
883 + P(-1) )
884)
885
886lpegpatterns.splittime = pattern
887
888function parsers.totime(str)
889 return lpegmatch(pattern,str)
890end
891
892
893
894
895
896
897
898 |