1if not modules then modules = { } end modules ['util-prs'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local lpeg, table, string = lpeg, table, string
10local P, R, V, S, C, Ct, Cs, Carg, Cc, Cp = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc, lpeg.Cp
11
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local concat, gmatch, find = table.concat, string.gmatch, string.find
14local tonumber, tostring, type, next, rawset = tonumber, tostring, type, next, rawset
15local mod, div = math.mod, math.div
16
17utilities = utilities or {}
18local parsers = utilities.parsers or { }
19utilities.parsers = parsers
20local patterns = parsers.patterns or { }
21parsers.patterns = patterns
22
23local setmetatableindex = table.setmetatableindex
24local sortedhash = table.sortedhash
25local sortedkeys = table.sortedkeys
26local tohash = table.tohash
27
28local hashes = { }
29parsers.hashes = hashes
30
31
32local digit = R("09")
33local space = P(' ')
34local equal = P("=")
35local colon = P(":")
36local comma = P(",")
37local lbrace = P("{")
38local rbrace = P("}")
39local lparent = P("(")
40local rparent = P(")")
41local lbracket = P("[")
42local rbracket = P("]")
43local period = S(".")
44local punctuation = S(".,:;")
45local spacer = lpegpatterns.spacer
46local whitespace = lpegpatterns.whitespace
47local newline = lpegpatterns.newline
48local anything = lpegpatterns.anything
49local endofstring = lpegpatterns.endofstring
50
51local nobrace = 1 - (lbrace + rbrace )
52local noparent = 1 - (lparent + rparent)
53local nobracket = 1 - (lbracket + rbracket)
54
55
56
57local escape, left, right = P("\\"), P('{'), P('}')
58
59
60
61
62
63
64lpegpatterns.balanced = P {
65 ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
66 left * V(1) * right
67}
68
69local nestedbraces = P { lbrace * (nobrace + V(1))^0 * rbrace }
70local nestedparents = P { lparent * (noparent + V(1))^0 * rparent }
71local nestedbrackets = P { lbracket * (nobracket + V(1))^0 * rbracket }
72local spaces = space^0
73local argument = Cs((lbrace/"") * ((nobrace + nestedbraces)^0) * (rbrace/""))
74local content = (1-endofstring)^0
75
76lpegpatterns.nestedbraces = nestedbraces
77lpegpatterns.nestedparents = nestedparents
78lpegpatterns.nestedbrackets = nestedbrackets
79lpegpatterns.nested = nestedbraces
80lpegpatterns.argument = argument
81lpegpatterns.content = content
82
83local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
84 + C((nestedbraces + (1-comma))^0)
85
86local key = C((1-equal-comma)^1)
87local pattern_a = (space+comma)^0 * (key * equal * value + key * C(""))
88local pattern_c = (space+comma)^0 * (key * equal * value)
89local pattern_d = (space+comma)^0 * (key * (equal+colon) * value + key * C(""))
90
91local key = C((1-space-equal-comma)^1)
92local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + C("")))
93
94
95
96local hash = { }
97
98local function set(key,value)
99 hash[key] = value
100end
101
102local pattern_a_s = (pattern_a/set)^1
103local pattern_b_s = (pattern_b/set)^1
104local pattern_c_s = (pattern_c/set)^1
105local pattern_d_s = (pattern_d/set)^1
106
107patterns.settings_to_hash_a = pattern_a_s
108patterns.settings_to_hash_b = pattern_b_s
109patterns.settings_to_hash_c = pattern_c_s
110patterns.settings_to_hash_d = pattern_d_s
111
112function parsers.make_settings_to_hash_pattern(set,how)
113 if how == "strict" then
114 return (pattern_c/set)^1
115 elseif how == "tolerant" then
116 return (pattern_b/set)^1
117 else
118 return (pattern_a/set)^1
119 end
120end
121
122function parsers.settings_to_hash(str,existing)
123 if not str or str == "" then
124 return { }
125 elseif type(str) == "table" then
126 if existing then
127 for k, v in next, str do
128 existing[k] = v
129 end
130 return exiting
131 else
132 return str
133 end
134 else
135 hash = existing or { }
136 lpegmatch(pattern_a_s,str)
137 return hash
138 end
139end
140
141function parsers.settings_to_hash_colon_too(str)
142 if not str or str == "" then
143 return { }
144 elseif type(str) == "table" then
145 return str
146 else
147 hash = { }
148 lpegmatch(pattern_d_s,str)
149 return hash
150 end
151end
152
153function parsers.settings_to_hash_tolerant(str,existing)
154 if not str or str == "" then
155 return { }
156 elseif type(str) == "table" then
157 if existing then
158 for k, v in next, str do
159 existing[k] = v
160 end
161 return exiting
162 else
163 return str
164 end
165 else
166 hash = existing or { }
167 lpegmatch(pattern_b_s,str)
168 return hash
169 end
170end
171
172function parsers.settings_to_hash_strict(str,existing)
173 if not str or str == "" then
174 return nil
175 elseif type(str) == "table" then
176 if existing then
177 for k, v in next, str do
178 existing[k] = v
179 end
180 return exiting
181 else
182 return str
183 end
184 elseif str and str ~= "" then
185 hash = existing or { }
186 lpegmatch(pattern_c_s,str)
187 return next(hash) and hash
188 end
189end
190
191local separator = comma * space^0
192local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
193 + C((nestedbraces + (1-comma))^0)
194local pattern = spaces * Ct(value*(separator*value)^0)
195
196
197
198patterns.settings_to_array = pattern
199
200
201
202function parsers.settings_to_array(str,strict)
203 if not str or str == "" then
204 return { }
205 elseif type(str) == "table" then
206 return str
207 elseif strict then
208 if find(str,"{",1,true) then
209 return lpegmatch(pattern,str)
210 else
211 return { str }
212 end
213 elseif find(str,",",1,true) then
214 return lpegmatch(pattern,str)
215 else
216 return { str }
217 end
218end
219
220function parsers.settings_to_numbers(str)
221 if not str or str == "" then
222 return { }
223 end
224 if type(str) == "table" then
225
226 elseif find(str,",",1,true) then
227 str = lpegmatch(pattern,str)
228 else
229 return { tonumber(str) }
230 end
231 for i=1,#str do
232 str[i] = tonumber(str[i])
233 end
234 return str
235end
236
237local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
238 + C((nestedbraces + nestedbrackets + nestedparents + (1-comma))^0)
239local pattern = spaces * Ct(value*(separator*value)^0)
240
241function parsers.settings_to_array_obey_fences(str)
242 return lpegmatch(pattern,str)
243end
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258local cache_a = { }
259local cache_b = { }
260
261function parsers.groupedsplitat(symbol,withaction)
262 if not symbol then
263 symbol = ","
264 end
265 local pattern = (withaction and cache_b or cache_a)[symbol]
266 if not pattern then
267 local symbols = S(symbol)
268 local separator = space^0 * symbols * space^0
269 local value =
270 lbrace
271 * C((nobrace + nestedbraces)^0)
272
273
274 * (rbrace * space^0 * (#symbols + P(-1)))
275 +
276 C((nestedbraces + (1-(space^0*(symbols+P(-1)))))^0)
277 if withaction then
278 local withvalue = Carg(1) * value / function(f,s) return f(s) end
279 pattern = spaces * withvalue * (separator*withvalue)^0
280 cache_b[symbol] = pattern
281 else
282 pattern = spaces * Ct(value*(separator*value)^0)
283 cache_a[symbol] = pattern
284 end
285 end
286 return pattern
287end
288
289local pattern_a = parsers.groupedsplitat(",",false)
290local pattern_b = parsers.groupedsplitat(",",true)
291
292function parsers.stripped_settings_to_array(str)
293 if not str or str == "" then
294 return { }
295 else
296 return lpegmatch(pattern_a,str)
297 end
298end
299
300function parsers.process_stripped_settings(str,action)
301 if not str or str == "" then
302 return { }
303 else
304 return lpegmatch(pattern_b,str,1,action)
305 end
306end
307
308
309
310
311local function set(t,v)
312 t[#t+1] = v
313end
314
315local value = P(Carg(1)*value) / set
316local pattern = value*(separator*value)^0 * Carg(1)
317
318function parsers.add_settings_to_array(t,str)
319 return lpegmatch(pattern,str,nil,t)
320end
321
322function parsers.hash_to_string(h,separator,yes,no,strict,omit)
323 if h then
324 local t = { }
325 local tn = 0
326 local s = sortedkeys(h)
327 omit = omit and tohash(omit)
328 for i=1,#s do
329 local key = s[i]
330 if not omit or not omit[key] then
331 local value = h[key]
332 if type(value) == "boolean" then
333 if yes and no then
334 if value then
335 tn = tn + 1
336 t[tn] = key .. '=' .. yes
337 elseif not strict then
338 tn = tn + 1
339 t[tn] = key .. '=' .. no
340 end
341 elseif value or not strict then
342 tn = tn + 1
343 t[tn] = key .. '=' .. tostring(value)
344 end
345 else
346 tn = tn + 1
347 t[tn] = key .. '=' .. value
348 end
349 end
350 end
351 return concat(t,separator or ",")
352 else
353 return ""
354 end
355end
356
357function parsers.array_to_string(a,separator)
358 if a then
359 return concat(a,separator or ",")
360 else
361 return ""
362 end
363end
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378local pattern = Ct("") * (C((1-S(", "))^1) * S(", ")^0 * Cc(true) % rawset)^1
379
380function parsers.settings_to_set(str)
381 return str and lpegmatch(pattern,str) or { }
382end
383
384hashes.settings_to_set = table.setmetatableindex(function(t,k)
385 local v = k and lpegmatch(pattern,k) or { }
386 t[k] = v
387 return v
388end)
389
390function parsers.settings_to_set(str)
391 return str and lpegmatch(pattern,str) or { }
392end
393
394local pattern = Ct((C((1-S(", "))^1) * S(", ")^0)^1)
395
396hashes.settings_to_list = table.setmetatableindex(function(t,k)
397 local v = k and lpegmatch(pattern,k) or { }
398 t[k] = v
399 return v
400end)
401
402
403
404
405
406
407getmetatable(hashes.settings_to_set ).__mode = "kv"
408getmetatable(hashes.settings_to_list).__mode = "kv"
409
410function parsers.simple_hash_to_string(h, separator)
411 local t = { }
412 local tn = 0
413 for k, v in sortedhash(h) do
414 if v then
415 tn = tn + 1
416 t[tn] = k
417 end
418 end
419 return concat(t,separator or ",")
420end
421
422local str = Cs(lpegpatterns.unquoted) + C((1-whitespace-equal)^1)
423
424local setting = Carg(1) * (whitespace^0 * (str * whitespace^0 * (equal * whitespace^0 * str + Cc(""))) % rawset)^1
425local splitter = setting^1
426
427function parsers.options_to_hash(str,target)
428 return str and lpegmatch(splitter,str,1,target or { }) or { }
429end
430
431
432
433local splitter = lpeg.tsplitat(" ")
434
435function parsers.options_to_array(str)
436 return str and lpegmatch(splitter,str) or { }
437end
438
439
440
441local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
442 + C(digit^1 * lparent * (noparent + nestedparents)^1 * rparent)
443 + C((nestedbraces + (1-comma))^1)
444 + Cc("")
445local pattern_a = spaces * Ct(value*(separator*value)^0)
446
447local function repeater(n,str)
448 if not n then
449 return str
450 else
451 local s = lpegmatch(pattern_a,str)
452 if n == 1 then
453 return unpack(s)
454 else
455 local t = { }
456 local tn = 0
457 for i=1,n do
458 for j=1,#s do
459 tn = tn + 1
460 t[tn] = s[j]
461 end
462 end
463 return unpack(t)
464 end
465 end
466end
467
468local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
469 + (C(digit^1)/tonumber * lparent * Cs((noparent + nestedparents)^1) * rparent) / repeater
470 + C((nestedbraces + (1-comma))^1)
471 + Cc("")
472local pattern_b = spaces * Ct(value*(separator*value)^0)
473
474function parsers.settings_to_array_with_repeat(str,expand)
475 if expand then
476 return lpegmatch(pattern_b,str) or { }
477 else
478 return lpegmatch(pattern_a,str) or { }
479 end
480end
481
482
483
484local value = lbrace * C((nobrace + nestedbraces)^0) * rbrace
485local pattern = Ct((space + value)^0)
486
487function parsers.arguments_to_table(str)
488 return lpegmatch(pattern,str)
489end
490
491
492
493function parsers.getparameters(self,class,parentclass,settings)
494 local sc = self[class]
495 if not sc then
496 sc = { }
497 self[class] = sc
498 if parentclass then
499 local sp = self[parentclass]
500 if not sp then
501 sp = { }
502 self[parentclass] = sp
503 end
504 setmetatableindex(sc,sp)
505 end
506 end
507 parsers.settings_to_hash(settings,sc)
508end
509
510function parsers.listitem(str)
511 return gmatch(str,"[^, ]+")
512end
513
514
515
516local pattern = Cs { "start",
517 start = V("one") + V("two") + V("three"),
518 rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
519 thousand = digit * digit * digit,
520 one = digit * V("rest"),
521 two = digit * digit * V("rest"),
522 three = V("thousand") * V("rest"),
523}
524
525lpegpatterns.splitthousands = pattern
526
527function parsers.splitthousands(str)
528 return lpegmatch(pattern,str) or str
529end
530
531
532
533local optionalwhitespace = whitespace^0
534
535lpegpatterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
536lpegpatterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
537lpegpatterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
538
539
540
541
542
543
544
545
546local dquote = P('"')
547local equal = P('=')
548local escape = P('\\')
549local separator = S(' ,')
550local utfbom = lpegpatterns.utfbom^0
551
552local key = C((1-equal)^1)
553local value = dquote * C((1-dquote-escape*dquote)^0) * dquote
554
555
556
557local pattern = Ct("") * (((key * equal * value) * separator^0) % rawset)^0 * P(-1)
558
559function parsers.keq_to_hash(str)
560 if str and str ~= "" then
561 return lpegmatch(pattern,str)
562 else
563 return { }
564 end
565end
566
567
568
569
570local defaultspecification = { separator = ",", quote = '"' }
571
572
573
574
575function parsers.csvsplitter(specification)
576 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
577 local separator = specification.separator
578 local quotechar = specification.quote
579 local numbers = specification.numbers
580 local separator = S(separator ~= "" and separator or ",")
581 local whatever = C((1 - separator - newline)^0)
582 if quotechar and quotechar ~= "" then
583 local quotedata = nil
584 for chr in gmatch(quotechar,".") do
585 local quotechar = P(chr)
586 local quoteitem = (1 - quotechar)^0
587 local quoteword = quotechar * (numbers and (quoteitem/tonumber) or C(quoteitem)) * quotechar
588 if quotedata then
589 quotedata = quotedata + quoteword
590 else
591 quotedata = quoteword
592 end
593 end
594 whatever = quotedata + whatever
595 end
596 local parser = utfbom * Ct((Ct(whatever * (separator * whatever)^0) * S("\n\r")^1)^0 )
597 return function(data)
598 return lpegmatch(parser,data)
599 end
600end
601
602
603
604
605
606
607
608
609
610
611
612
613
614function parsers.rfc4180splitter(specification)
615 specification = specification and setmetatableindex(specification,defaultspecification) or defaultspecification
616 local numbers = specification.numbers
617 local zero = specification.zero
618 local separator = specification.separator
619 local quotechar = P(specification.quote)
620 local dquotechar = quotechar * quotechar
621 / specification.quote
622 local separator = S(separator ~= "" and separator or ",")
623 local whatever = (dquotechar + (1 - quotechar))^0
624 local escaped = quotechar
625 * (numbers and (zero and (whatever/function(n) return tonumber(n) or 0 end) or whatever/tonumber) or Cs(whatever))
626 * quotechar
627
628 local whotever = (1 - quotechar - newline - separator)^1
629 local non_escaped = (numbers and (zero and (whotever/function(n) return tonumber(n) or 0 end) or whotever/tonumber) or Cs(whotever))
630 local field = escaped + non_escaped + (numbers and zero and Cc(0) or Cc(""))
631 local record = Ct(field * (separator * field)^1)
632 local headerline = utfbom * record * Cp()
633 local morerecords = (newline^(specification.strict and -1 or 1) * record)^0
634 local headeryes = utfbom * Ct(morerecords)
635 local headernop = utfbom * Ct(record * morerecords)
636 return function(data,getheader)
637 if getheader then
638 local header, position = lpegmatch(headerline,data)
639 local data = lpegmatch(headeryes,data,position)
640 return data, header
641 else
642 return lpegmatch(headernop,data)
643 end
644 end
645end
646
647
648
649
650
651
652
653
654
655
656
657
658
659local function ranger(first,last,n,action)
660 if not first then
661
662 elseif last == true then
663 for i=first,n or first do
664 action(i)
665 end
666 elseif last then
667 for i=first,last do
668 action(i)
669 end
670 else
671 action(first)
672 end
673end
674
675local cardinal = (lpegpatterns.hexadecimal + lpegpatterns.cardinal) / tonumber
676local spacers = lpegpatterns.spacer^0
677local endofstring = lpegpatterns.endofstring
678
679
680
681
682local stepper =
683 spacers
684 * (
685 (
686 cardinal
687 * (
688 spacers * S(":-") * spacers * ( cardinal + (P("*") + endofstring) * Cc(true) )
689 + Cc(false)
690 )
691 + Cs((1-S(", "))^1) * Cc(false)
692 )
693 * Carg(1) * Carg(2) / ranger
694 * S(", ")^0
695 )^1
696 * endofstring
697
698function parsers.stepper(str,n,action)
699 local ts = type(str)
700 if type(n) == "function" then
701 if ts == "number" then
702 n(str)
703 elseif ts == "table" then
704 for i=1,#str do
705 n(str[i])
706 end
707 else
708 lpegmatch(stepper,str,1,false,n or print)
709 end
710 elseif ts == "string" then
711 lpegmatch(stepper,str,1,n,action or print)
712 end
713end
714
715
716
717
718
719
720
721
722
723local pattern_math = Cs((P("%")/"\\percent " + P("^") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
724local pattern_text = Cs((P("%")/"\\percent " + (P("^")/"\\high") * Cc("{") * lpegpatterns.integer * Cc("}") + anything)^0)
725
726patterns.unittotex = pattern
727
728function parsers.unittotex(str,textmode)
729 return lpegmatch(textmode and pattern_text or pattern_math,str)
730end
731
732local pattern = Cs((P("^") / "<sup>" * lpegpatterns.integer * Cc("</sup>") + anything)^0)
733
734function parsers.unittoxml(str)
735 return lpegmatch(pattern,str)
736end
737
738
739
740local cache = { }
741local spaces = lpegpatterns.space^0
742local dummy = function() end
743
744setmetatableindex(cache,function(t,k)
745 local separator = S(k)
746 local value = (1-separator)^0
747 local pattern = spaces * C(value) * separator^0 * Cp()
748 t[k] = pattern
749 return pattern
750end)
751
752local commalistiterator = cache[","]
753
754function parsers.iterator(str,separator)
755 local n = #str
756 if n == 0 then
757 return dummy
758 else
759 local pattern = separator and cache[separator] or commalistiterator
760 local p = 1
761 return function()
762 if p <= n then
763 local s, e = lpegmatch(pattern,str,p)
764 if e then
765 p = e
766 return s
767 end
768 end
769 end
770 end
771end
772
773
774
775
776
777local function initialize(t,name)
778 local source = t[name]
779 if source then
780 local result = { }
781 for k, v in next, t[name] do
782 result[k] = v
783 end
784 return result
785 else
786 return { }
787 end
788end
789
790local function fetch(t,name)
791 return t[name] or { }
792end
793
794local function process(result,more)
795 for k, v in next, more do
796 result[k] = v
797 end
798 return result
799end
800
801
802
803
804local name = Carg(1) * C((1-S(", "))^1)
805local parser = S(", ")^1 * (name / fetch)
806local merge = (name / initialize) * (parser % process)^0
807
808function parsers.mergehashes(hash,list)
809 return lpegmatch(merge,list,1,hash)
810end
811
812
813
814
815
816
817
818
819function parsers.runtime(time)
820 if not time then
821 time = os.runtime()
822 end
823 local days = div(time,24*60*60)
824 time = mod(time,24*60*60)
825 local hours = div(time,60*60)
826 time = mod(time,60*60)
827 local minutes = div(time,60)
828 local seconds = mod(time,60)
829 return days, hours, minutes, seconds
830end
831
832
833
834local spacing = whitespace^0
835local apply = P("->")
836local method = C((1-apply)^1)
837local token = lbrace * C((1-rbrace)^1) * rbrace + C(anything^1)
838
839local pattern = spacing * (method * spacing * apply + Carg(1)) * spacing * token
840
841function parsers.splitmethod(str,default)
842 if str then
843 return lpegmatch(pattern,str,1,default or false)
844 else
845 return default or false, ""
846 end
847end
848
849
850
851
852
853
854
855
856local p_year = lpegpatterns.digit^4 / tonumber
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882local pattern = Ct("") * (
883 (
884 ( (Cc("year") * p_year ) % rawset
885 * S("-/") * (Cc("month") * cardinal) % rawset
886 * S("-/") * (Cc("day") * cardinal) % rawset
887 ) +
888 ( (Cc("day") * cardinal) % rawset
889 * S("-/") * (Cc("month") * cardinal) % rawset
890 * S("-/") * (Cc("year") * p_year ) % rawset
891 ) +
892 ( (Cc("year") * p_year ) % rawset
893 * S("-/") * (Cc("month") * cardinal) % rawset
894 ) +
895 ( (Cc("month") * cardinal) % rawset
896 * S("-/") * (Cc("year") * p_year ) % rawset
897 ) +
898 ( (Cc("year") * (C(4)/tonumber)) % rawset
899 * (Cc("month") * (C(2)/tonumber)) % rawset
900 * (Cc("day") * (C(2)/tonumber)) % rawset
901 )
902 )
903 * (
904 P(" ") * (Cc("hour") * cardinal) % rawset
905 * P(":") * (Cc("min") * cardinal) % rawset
906 * (P(":") * (Cc("sec") * cardinal) % rawset)^-1
907 + P(-1) )
908)
909
910lpegpatterns.splittime = pattern
911
912function parsers.totime(str)
913 return lpegmatch(pattern,str)
914end
915
916
917
918
919
920
921
922
923
924 |