util-prs.lua /size: 24 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
util-prs
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to luat-lib.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
lpeg
,
table
,
string
=
lpeg
,
table
,
string
10
local
P
,
R
,
V
,
S
,
C
,
Ct
,
Cs
,
Carg
,
Cc
,
Cg
,
Cf
,
Cp
=
lpeg
.
P
,
lpeg
.
R
,
lpeg
.
V
,
lpeg
.
S
,
lpeg
.
C
,
lpeg
.
Ct
,
lpeg
.
Cs
,
lpeg
.
Carg
,
lpeg
.
Cc
,
lpeg
.
Cg
,
lpeg
.
Cf
,
lpeg
.
Cp
11
local
lpegmatch
,
lpegpatterns
=
lpeg
.
match
,
lpeg
.
patterns
12
local
concat
,
gmatch
,
find
=
table
.
concat
,
string
.
gmatch
,
string
.
find
13
local
tonumber
,
tostring
,
type
,
next
,
rawset
=
tonumber
,
tostring
,
type
,
next
,
rawset
14
local
mod
,
div
=
math
.
mod
,
math
.
div
15 16
utilities
=
utilities
or
{
}
17
local
parsers
=
utilities
.
parsers
or
{
}
18
utilities
.
parsers
=
parsers
19
local
patterns
=
parsers
.
patterns
or
{
}
20
parsers
.
patterns
=
patterns
21 22
local
setmetatableindex
=
table
.
setmetatableindex
23
local
sortedhash
=
table
.
sortedhash
24
local
sortedkeys
=
table
.
sortedkeys
25
local
tohash
=
table
.
tohash
26 27
local
hashes
=
{
}
28
parsers
.
hashes
=
hashes
29
-- we share some patterns
30 31
local
digit
=
R
(
"
09
"
)
32
local
space
=
P
(
'
'
)
33
local
equal
=
P
(
"
=
"
)
34
local
colon
=
P
(
"
:
"
)
35
local
comma
=
P
(
"
,
"
)
36
local
lbrace
=
P
(
"
{
"
)
37
local
rbrace
=
P
(
"
}
"
)
38
local
lparent
=
P
(
"
(
"
)
39
local
rparent
=
P
(
"
)
"
)
40
local
lbracket
=
P
(
"
[
"
)
41
local
rbracket
=
P
(
"
]
"
)
42
local
period
=
S
(
"
.
"
)
43
local
punctuation
=
S
(
"
.,:;
"
)
44
local
spacer
=
lpegpatterns
.
spacer
45
local
whitespace
=
lpegpatterns
.
whitespace
46
local
newline
=
lpegpatterns
.
newline
47
local
anything
=
lpegpatterns
.
anything
48
local
endofstring
=
lpegpatterns
.
endofstring
49 50
local
nobrace
=
1
-
(
lbrace
+
rbrace
)
51
local
noparent
=
1
-
(
lparent
+
rparent
)
52
local
nobracket
=
1
-
(
lbracket
+
rbracket
)
53 54
-- we could use a Cf Cg construct
55 56
local
escape
,
left
,
right
=
P
(
"
\\
"
)
,
P
(
'
{
'
)
,
P
(
'
}
'
)
57 58
-- lpegpatterns.balanced = P {
59
-- [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
60
-- [2] = left * V(1) * right
61
-- }
62
lpegpatterns
.
balanced
=
P
{
63
(
(
escape
*
(
left
+
right
)
)
+
(
1
-
(
left
+
right
)
)
+
V
(
2
)
)
^
0
,
64
left
*
V
(
1
)
*
right
65
}
66 67
local
nestedbraces
=
P
{
lbrace
*
(
nobrace
+
V
(
1
)
)
^
0
*
rbrace
}
68
local
nestedparents
=
P
{
lparent
*
(
noparent
+
V
(
1
)
)
^
0
*
rparent
}
69
local
nestedbrackets
=
P
{
lbracket
*
(
nobracket
+
V
(
1
)
)
^
0
*
rbracket
}
70
local
spaces
=
space
^
0
71
local
argument
=
Cs
(
(
lbrace
/
"
"
)
*
(
(
nobrace
+
nestedbraces
)
^
0
)
*
(
rbrace
/
"
"
)
)
72
local
content
=
(
1
-
endofstring
)
^
0
73 74
lpegpatterns
.
nestedbraces
=
nestedbraces
-- no capture
75
lpegpatterns
.
nestedparents
=
nestedparents
-- no capture
76
lpegpatterns
.
nested
=
nestedbraces
-- no capture
77
lpegpatterns
.
argument
=
argument
-- argument after e.g. =
78
lpegpatterns
.
content
=
content
-- rest after e.g =
79 80
local
value
=
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
81
+
C
(
(
nestedbraces
+
(
1
-
comma
)
)
^
0
)
82 83
local
key
=
C
(
(
1
-
equal
-
comma
)
^
1
)
84
local
pattern_a
=
(
space
+
comma
)
^
0
*
(
key
*
equal
*
value
+
key
*
C
(
"
"
)
)
85
local
pattern_c
=
(
space
+
comma
)
^
0
*
(
key
*
equal
*
value
)
86
local
pattern_d
=
(
space
+
comma
)
^
0
*
(
key
*
(
equal
+
colon
)
*
value
+
key
*
C
(
"
"
)
)
87 88
local
key
=
C
(
(
1
-
space
-
equal
-
comma
)
^
1
)
89
local
pattern_b
=
spaces
*
comma
^
0
*
spaces
*
(
key
*
(
(
spaces
*
equal
*
spaces
*
value
)
+
C
(
"
"
)
)
)
90 91
-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored
92 93
local
hash
=
{
}
94 95
local
function
set
(
key
,
value
)
96
hash
[
key
]
=
value
97
end
98 99
local
pattern_a_s
=
(
pattern_a
/
set
)
^
1
100
local
pattern_b_s
=
(
pattern_b
/
set
)
^
1
101
local
pattern_c_s
=
(
pattern_c
/
set
)
^
1
102
local
pattern_d_s
=
(
pattern_d
/
set
)
^
1
103 104
patterns
.
settings_to_hash_a
=
pattern_a_s
105
patterns
.
settings_to_hash_b
=
pattern_b_s
106
patterns
.
settings_to_hash_c
=
pattern_c_s
107
patterns
.
settings_to_hash_d
=
pattern_d_s
108 109
function
parsers
.
make_settings_to_hash_pattern
(
set
,
how
)
110
if
how
=
=
"
strict
"
then
111
return
(
pattern_c
/
set
)
^
1
112
elseif
how
=
=
"
tolerant
"
then
113
return
(
pattern_b
/
set
)
^
1
114
else
115
return
(
pattern_a
/
set
)
^
1
116
end
117
end
118 119
function
parsers
.
settings_to_hash
(
str
,
existing
)
120
if
not
str
or
str
=
=
"
"
then
121
return
{
}
122
elseif
type
(
str
)
=
=
"
table
"
then
123
if
existing
then
124
for
k
,
v
in
next
,
str
do
125
existing
[
k
]
=
v
126
end
127
return
exiting
128
else
129
return
str
130
end
131
else
132
hash
=
existing
or
{
}
133
lpegmatch
(
pattern_a_s
,
str
)
134
return
hash
135
end
136
end
137 138
function
parsers
.
settings_to_hash_colon_too
(
str
)
139
if
not
str
or
str
=
=
"
"
then
140
return
{
}
141
elseif
type
(
str
)
=
=
"
table
"
then
142
return
str
143
else
144
hash
=
{
}
145
lpegmatch
(
pattern_d_s
,
str
)
146
return
hash
147
end
148
end
149 150
function
parsers
.
settings_to_hash_tolerant
(
str
,
existing
)
151
if
not
str
or
str
=
=
"
"
then
152
return
{
}
153
elseif
type
(
str
)
=
=
"
table
"
then
154
if
existing
then
155
for
k
,
v
in
next
,
str
do
156
existing
[
k
]
=
v
157
end
158
return
exiting
159
else
160
return
str
161
end
162
else
163
hash
=
existing
or
{
}
164
lpegmatch
(
pattern_b_s
,
str
)
165
return
hash
166
end
167
end
168 169
function
parsers
.
settings_to_hash_strict
(
str
,
existing
)
170
if
not
str
or
str
=
=
"
"
then
171
return
nil
172
elseif
type
(
str
)
=
=
"
table
"
then
173
if
existing
then
174
for
k
,
v
in
next
,
str
do
175
existing
[
k
]
=
v
176
end
177
return
exiting
178
else
179
return
str
180
end
181
elseif
str
and
str
~
=
"
"
then
182
hash
=
existing
or
{
}
183
lpegmatch
(
pattern_c_s
,
str
)
184
return
next
(
hash
)
and
hash
185
end
186
end
187 188
local
separator
=
comma
*
space
^
0
189
local
value
=
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
190
+
C
(
(
nestedbraces
+
(
1
-
comma
)
)
^
0
)
191
local
pattern
=
spaces
*
Ct
(
value
*
(
separator
*
value
)
^
0
)
192 193
-- "aap, {noot}, mies" : outer {} removed, leading spaces ignored
194 195
patterns
.
settings_to_array
=
pattern
196 197
-- we could use a weak table as cache
198 199
function
parsers
.
settings_to_array
(
str
,
strict
)
200
if
not
str
or
str
=
=
"
"
then
201
return
{
}
202
elseif
type
(
str
)
=
=
"
table
"
then
203
return
str
204
elseif
strict
then
205
if
find
(
str
,
"
{
"
,
1
,
true
)
then
206
return
lpegmatch
(
pattern
,
str
)
207
else
208
return
{
str
}
209
end
210
elseif
find
(
str
,
"
,
"
,
1
,
true
)
then
211
return
lpegmatch
(
pattern
,
str
)
212
else
213
return
{
str
}
214
end
215
end
216 217
function
parsers
.
settings_to_numbers
(
str
)
218
if
not
str
or
str
=
=
"
"
then
219
return
{
}
220
end
221
if
type
(
str
)
=
=
"
table
"
then
222
-- fall through
223
elseif
find
(
str
,
"
,
"
,
1
,
true
)
then
224
str
=
lpegmatch
(
pattern
,
str
)
225
else
226
return
{
tonumber
(
str
)
}
227
end
228
for
i
=
1
,
#
str
do
229
str
[
i
]
=
tonumber
(
str
[
i
]
)
230
end
231
return
str
232
end
233 234
local
value
=
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
235
+
C
(
(
nestedbraces
+
nestedbrackets
+
nestedparents
+
(
1
-
comma
)
)
^
0
)
236
local
pattern
=
spaces
*
Ct
(
value
*
(
separator
*
value
)
^
0
)
237 238
function
parsers
.
settings_to_array_obey_fences
(
str
)
239
return
lpegmatch
(
pattern
,
str
)
240
end
241 242
-- inspect(parsers.settings_to_array_obey_fences("url(http://a,b.c)"))
243 244
-- this one also strips end spaces before separators
245
--
246
-- "{123} , 456 " -> "123" "456"
247 248
-- local separator = space^0 * comma * space^0
249
-- local value = P(lbrace * C((nobrace + nestedbraces)^0) * rbrace)
250
-- + C((nestedbraces + (1-(space^0*(comma+P(-1)))))^0)
251
-- local withvalue = Carg(1) * value / function(f,s) return f(s) end
252
-- local pattern_a = spaces * Ct(value*(separator*value)^0)
253
-- local pattern_b = spaces * withvalue * (separator*withvalue)^0
254 255
local
cache_a
=
{
}
256
local
cache_b
=
{
}
257 258
function
parsers
.
groupedsplitat
(
symbol
,
withaction
)
259
if
not
symbol
then
260
symbol
=
"
,
"
261
end
262
local
pattern
=
(
withaction
and
cache_b
or
cache_a
)
[
symbol
]
263
if
not
pattern
then
264
local
symbols
=
S
(
symbol
)
265
local
separator
=
space
^
0
*
symbols
*
space
^
0
266
local
value
=
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
267
+
C
(
(
nestedbraces
+
(
1
-
(
space
^
0
*
(
symbols
+
P
(
-1
)
)
)
)
)
^
0
)
268
if
withaction
then
269
local
withvalue
=
Carg
(
1
)
*
value
/
function
(
f
,
s
)
return
f
(
s
)
end
270
pattern
=
spaces
*
withvalue
*
(
separator
*
withvalue
)
^
0
271
cache_b
[
symbol
]
=
pattern
272
else
273
pattern
=
spaces
*
Ct
(
value
*
(
separator
*
value
)
^
0
)
274
cache_a
[
symbol
]
=
pattern
275
end
276
end
277
return
pattern
278
end
279 280
local
pattern_a
=
parsers
.
groupedsplitat
(
"
,
"
,
false
)
281
local
pattern_b
=
parsers
.
groupedsplitat
(
"
,
"
,
true
)
282 283
function
parsers
.
stripped_settings_to_array
(
str
)
284
if
not
str
or
str
=
=
"
"
then
285
return
{
}
286
else
287
return
lpegmatch
(
pattern_a
,
str
)
288
end
289
end
290 291
function
parsers
.
process_stripped_settings
(
str
,
action
)
292
if
not
str
or
str
=
=
"
"
then
293
return
{
}
294
else
295
return
lpegmatch
(
pattern_b
,
str
,
1
,
action
)
296
end
297
end
298 299
-- parsers.process_stripped_settings("{123} , 456 ",function(s) print("["..s.."]") end)
300
-- parsers.process_stripped_settings("123 , 456 ",function(s) print("["..s.."]") end)
301 302
local
function
set
(
t
,
v
)
303
t
[
#
t
+
1
]
=
v
304
end
305 306
local
value
=
P
(
Carg
(
1
)
*
value
)
/
set
307
local
pattern
=
value
*
(
separator
*
value
)
^
0
*
Carg
(
1
)
308 309
function
parsers
.
add_settings_to_array
(
t
,
str
)
310
return
lpegmatch
(
pattern
,
str
,
nil
,
t
)
311
end
312 313
function
parsers
.
hash_to_string
(
h
,
separator
,
yes
,
no
,
strict
,
omit
)
314
if
h
then
315
local
t
=
{
}
316
local
tn
=
0
317
local
s
=
sortedkeys
(
h
)
318
omit
=
omit
and
tohash
(
omit
)
319
for
i
=
1
,
#
s
do
320
local
key
=
s
[
i
]
321
if
not
omit
or
not
omit
[
key
]
then
322
local
value
=
h
[
key
]
323
if
type
(
value
)
=
=
"
boolean
"
then
324
if
yes
and
no
then
325
if
value
then
326
tn
=
tn
+
1
327
t
[
tn
]
=
key
.
.
'
=
'
.
.
yes
328
elseif
not
strict
then
329
tn
=
tn
+
1
330
t
[
tn
]
=
key
.
.
'
=
'
.
.
no
331
end
332
elseif
value
or
not
strict
then
333
tn
=
tn
+
1
334
t
[
tn
]
=
key
.
.
'
=
'
.
.
tostring
(
value
)
335
end
336
else
337
tn
=
tn
+
1
338
t
[
tn
]
=
key
.
.
'
=
'
.
.
value
339
end
340
end
341
end
342
return
concat
(
t
,
separator
or
"
,
"
)
343
else
344
return
"
"
345
end
346
end
347 348
function
parsers
.
array_to_string
(
a
,
separator
)
349
if
a
then
350
return
concat
(
a
,
separator
or
"
,
"
)
351
else
352
return
"
"
353
end
354
end
355 356
-- function parsers.settings_to_set(str,t) -- tohash? -- todo: lpeg -- duplicate anyway
357
-- if str then
358
-- t = t or { }
359
-- for s in gmatch(str,"[^, ]+") do -- space added
360
-- t[s] = true
361
-- end
362
-- return t
363
-- else
364
-- return { }
365
-- end
366
-- end
367 368
local
pattern
=
Cf
(
Ct
(
"
"
)
*
Cg
(
C
(
(
1
-
S
(
"
,
"
)
)
^
1
)
*
S
(
"
,
"
)
^
0
*
Cc
(
true
)
)
^
1
,
rawset
)
369 370
function
parsers
.
settings_to_set
(
str
)
371
return
str
and
lpegmatch
(
pattern
,
str
)
or
{
}
372
end
373 374
hashes
.
settings_to_set
=
table
.
setmetatableindex
(
function
(
t
,
k
)
-- experiment, not public
375
local
v
=
k
and
lpegmatch
(
pattern
,
k
)
or
{
}
376
t
[
k
]
=
v
377
return
v
378
end
)
379 380
-- as we use a next, we are not sure when the gc kicks in
381 382
getmetatable
(
hashes
.
settings_to_set
)
.
__mode
=
"
kv
"
-- could be an option (maybe sharing makes sense)
383 384
function
parsers
.
simple_hash_to_string
(
h
,
separator
)
385
local
t
=
{
}
386
local
tn
=
0
387
for
k
,
v
in
sortedhash
(
h
)
do
388
if
v
then
389
tn
=
tn
+
1
390
t
[
tn
]
=
k
391
end
392
end
393
return
concat
(
t
,
separator
or
"
,
"
)
394
end
395 396
-- for mtx-context etc: aaaa bbbb cccc=dddd eeee=ffff
397 398
local
str
=
Cs
(
lpegpatterns
.
unquoted
)
+
C
(
(
1
-
whitespace
-
equal
)
^
1
)
399
local
setting
=
Cf
(
Carg
(
1
)
*
(
whitespace
^
0
*
Cg
(
str
*
whitespace
^
0
*
(
equal
*
whitespace
^
0
*
str
+
Cc
(
"
"
)
)
)
)
^
1
,
rawset
)
400
local
splitter
=
setting
^
1
401 402
function
parsers
.
options_to_hash
(
str
,
target
)
403
return
str
and
lpegmatch
(
splitter
,
str
,
1
,
target
or
{
}
)
or
{
}
404
end
405 406
local
splitter
=
lpeg
.
tsplitat
(
"
"
)
407 408
function
parsers
.
options_to_array
(
str
)
409
return
str
and
lpegmatch
(
splitter
,
str
)
or
{
}
410
end
411 412
-- for chem (currently one level)
413 414
local
value
=
P
(
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
)
415
+
C
(
digit
^
1
*
lparent
*
(
noparent
+
nestedparents
)
^
1
*
rparent
)
416
+
C
(
(
nestedbraces
+
(
1
-
comma
)
)
^
1
)
417
local
pattern_a
=
spaces
*
Ct
(
value
*
(
separator
*
value
)
^
0
)
418 419
local
function
repeater
(
n
,
str
)
420
if
not
n
then
421
return
str
422
else
423
local
s
=
lpegmatch
(
pattern_a
,
str
)
424
if
n
=
=
1
then
425
return
unpack
(
s
)
426
else
427
local
t
=
{
}
428
local
tn
=
0
429
for
i
=
1
,
n
do
430
for
j
=
1
,
#
s
do
431
tn
=
tn
+
1
432
t
[
tn
]
=
s
[
j
]
433
end
434
end
435
return
unpack
(
t
)
436
end
437
end
438
end
439 440
local
value
=
P
(
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
)
441
+
(
C
(
digit
^
1
)
/
tonumber
*
lparent
*
Cs
(
(
noparent
+
nestedparents
)
^
1
)
*
rparent
)
/
repeater
442
+
C
(
(
nestedbraces
+
(
1
-
comma
)
)
^
1
)
443
local
pattern_b
=
spaces
*
Ct
(
value
*
(
separator
*
value
)
^
0
)
444 445
function
parsers
.
settings_to_array_with_repeat
(
str
,
expand
)
-- beware: "" => { }
446
if
expand
then
447
return
lpegmatch
(
pattern_b
,
str
)
or
{
}
448
else
449
return
lpegmatch
(
pattern_a
,
str
)
or
{
}
450
end
451
end
452 453
--
454 455
local
value
=
lbrace
*
C
(
(
nobrace
+
nestedbraces
)
^
0
)
*
rbrace
456
local
pattern
=
Ct
(
(
space
+
value
)
^
0
)
457 458
function
parsers
.
arguments_to_table
(
str
)
459
return
lpegmatch
(
pattern
,
str
)
460
end
461 462
-- temporary here (unoptimized)
463 464
function
parsers
.
getparameters
(
self
,
class
,
parentclass
,
settings
)
465
local
sc
=
self
[
class
]
466
if
not
sc
then
467
sc
=
{
}
468
self
[
class
]
=
sc
469
if
parentclass
then
470
local
sp
=
self
[
parentclass
]
471
if
not
sp
then
472
sp
=
{
}
473
self
[
parentclass
]
=
sp
474
end
475
setmetatableindex
(
sc
,
sp
)
476
end
477
end
478
parsers
.
settings_to_hash
(
settings
,
sc
)
479
end
480 481
function
parsers
.
listitem
(
str
)
482
return
gmatch
(
str
,
"
[^, ]+
"
)
483
end
484 485
--
486 487
local
pattern
=
Cs
{
"
start
"
,
488
start
=
V
(
"
one
"
)
+
V
(
"
two
"
)
+
V
(
"
three
"
)
,
489
rest
=
(
Cc
(
"
,
"
)
*
V
(
"
thousand
"
)
)
^
0
*
(
P
(
"
.
"
)
+
endofstring
)
*
anything
^
0
,
490
thousand
=
digit
*
digit
*
digit
,
491
one
=
digit
*
V
(
"
rest
"
)
,
492
two
=
digit
*
digit
*
V
(
"
rest
"
)
,
493
three
=
V
(
"
thousand
"
)
*
V
(
"
rest
"
)
,
494
}
495 496
lpegpatterns
.
splitthousands
=
pattern
-- maybe better in the parsers namespace ?
497 498
function
parsers
.
splitthousands
(
str
)
499
return
lpegmatch
(
pattern
,
str
)
or
str
500
end
501 502
-- print(parsers.splitthousands("11111111111.11"))
503 504
local
optionalwhitespace
=
whitespace
^
0
505 506
lpegpatterns
.
words
=
Ct
(
(
Cs
(
(
1
-
punctuation
-
whitespace
)
^
1
)
+
anything
)
^
1
)
507
lpegpatterns
.
sentences
=
Ct
(
(
optionalwhitespace
*
Cs
(
(
1
-
period
)
^
0
*
period
)
)
^
1
)
508
lpegpatterns
.
paragraphs
=
Ct
(
(
optionalwhitespace
*
Cs
(
(
whitespace
^
1
*
endofstring
/
"
"
+
1
-
(
spacer
^
0
*
newline
*
newline
)
)
^
1
)
)
^
1
)
509 510
-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n "
511
-- inspect(lpegmatch(lpegpatterns.paragraphs,str))
512
-- inspect(lpegmatch(lpegpatterns.sentences,str))
513
-- inspect(lpegmatch(lpegpatterns.words,str))
514 515
-- handy for k="v" [, ] k="v"
516 517
local
dquote
=
P
(
'
"
'
)
518
local
equal
=
P
(
'
=
'
)
519
local
escape
=
P
(
'
\\
'
)
520
local
separator
=
S
(
'
,
'
)
521 522
local
key
=
C
(
(
1
-
equal
)
^
1
)
523
local
value
=
dquote
*
C
(
(
1
-
dquote
-
escape
*
dquote
)
^
0
)
*
dquote
524 525
----- pattern = Cf(Ct("") * Cg(key * equal * value) * separator^0,rawset)^0 * P(-1) -- was wrong
526
local
pattern
=
Cf
(
Ct
(
"
"
)
*
(
Cg
(
key
*
equal
*
value
)
*
separator
^
0
)
^
1
,
rawset
)
^
0
*
P
(
-1
)
527 528
function
parsers
.
keq_to_hash
(
str
)
529
if
str
and
str
~
=
"
"
then
530
return
lpegmatch
(
pattern
,
str
)
531
else
532
return
{
}
533
end
534
end
535 536
-- inspect(lpeg.match(pattern,[[key="value" foo="bar"]]))
537 538
local
defaultspecification
=
{
separator
=
"
,
"
,
quote
=
'
"
'
}
539 540
-- this version accepts multiple separators and quotes as used in the
541
-- database module
542 543
function
parsers
.
csvsplitter
(
specification
)
544
specification
=
specification
and
setmetatableindex
(
specification
,
defaultspecification
)
or
defaultspecification
545
local
separator
=
specification
.
separator
546
local
quotechar
=
specification
.
quote
547
local
numbers
=
specification
.
numbers
548
local
separator
=
S
(
separator
~
=
"
"
and
separator
or
"
,
"
)
549
local
whatever
=
C
(
(
1
-
separator
-
newline
)
^
0
)
550
if
quotechar
and
quotechar
~
=
"
"
then
551
local
quotedata
=
nil
552
for
chr
in
gmatch
(
quotechar
,
"
.
"
)
do
553
local
quotechar
=
P
(
chr
)
554
local
quoteitem
=
(
1
-
quotechar
)
^
0
555
local
quoteword
=
quotechar
*
(
numbers
and
(
quoteitem
/
tonumber
)
or
C
(
quoteitem
)
)
*
quotechar
556
if
quotedata
then
557
quotedata
=
quotedata
+
quoteword
558
else
559
quotedata
=
quoteword
560
end
561
end
562
whatever
=
quotedata
+
whatever
563
end
564
local
parser
=
Ct
(
(
Ct
(
whatever
*
(
separator
*
whatever
)
^
0
)
*
S
(
"
\n\r
"
)
^
1
)
^
0
)
565
return
function
(
data
)
566
return
lpegmatch
(
parser
,
data
)
567
end
568
end
569 570
-- local crap = [[
571
-- first,second,third,fourth
572
-- "1","2","3","4"
573
-- "5","6","7","8"
574
-- ]]
575 576
-- local mycsvsplitter = parsers.csvsplitter { numbers = true }
577 578
-- local list = mycsvsplitter(crap) inspect(list)
579 580
-- and this is a slightly patched version of a version posted by Philipp Gesang
581 582
function
parsers
.
rfc4180splitter
(
specification
)
583
specification
=
specification
and
setmetatableindex
(
specification
,
defaultspecification
)
or
defaultspecification
584
local
numbers
=
specification
.
numbers
585
local
separator
=
specification
.
separator
--> rfc: COMMA
586
local
quotechar
=
P
(
specification
.
quote
)
--> DQUOTE
587
local
dquotechar
=
quotechar
*
quotechar
--> 2DQUOTE
588
/
specification
.
quote
589
local
separator
=
S
(
separator
~
=
"
"
and
separator
or
"
,
"
)
590
local
whatever
=
(
dquotechar
+
(
1
-
quotechar
)
)
^
0
591
local
escaped
=
quotechar
592
*
(
numbers
and
(
whatever
/
tonumber
)
or
Cs
(
whatever
)
)
593
*
quotechar
594
local
non_escaped
=
C
(
(
1
-
quotechar
-
newline
-
separator
)
^
1
)
595
local
field
=
escaped
+
non_escaped
+
Cc
(
"
"
)
596
local
record
=
Ct
(
field
*
(
separator
*
field
)
^
1
)
597
local
headerline
=
record
*
Cp
(
)
598
local
morerecords
=
(
newline
^
(
specification
.
strict
and
-1
or
1
)
*
record
)
^
0
599
local
headeryes
=
Ct
(
morerecords
)
600
local
headernop
=
Ct
(
record
*
morerecords
)
601
return
function
(
data
,
getheader
)
602
if
getheader
then
603
local
header
,
position
=
lpegmatch
(
headerline
,
data
)
604
local
data
=
lpegmatch
(
headeryes
,
data
,
position
)
605
return
data
,
header
606
else
607
return
lpegmatch
(
headernop
,
data
)
608
end
609
end
610
end
611 612
-- local mycsvsplitter = parsers.rfc4180splitter { numbers = true }
613
--
614
-- local crap = [[
615
-- first,second,third,fourth
616
-- "1","2","3","4"
617
-- "a","b","c","d"
618
-- "foo","bar""baz","boogie","xyzzy"
619
-- ]]
620
--
621
-- local list, names = mycsvsplitter(crap,true) inspect(list) inspect(names)
622
-- local list, names = mycsvsplitter(crap) inspect(list) inspect(names)
623 624
-- parsers.stepper("1,7-",9,function(i) print(">>>",i) end)
625
-- parsers.stepper("1-3,7,8,9")
626
-- parsers.stepper("1-3,6,7",function(i) print(">>>",i) end)
627
-- parsers.stepper(" 1 : 3, ,7 ")
628
-- parsers.stepper("1:4,9:13,24:*",30)
629 630
local
function
ranger
(
first
,
last
,
n
,
action
)
631
if
not
first
then
632
-- forget about it
633
elseif
last
=
=
true
then
634
for
i
=
first
,
n
or
first
do
635
action
(
i
)
636
end
637
elseif
last
then
638
for
i
=
first
,
last
do
639
action
(
i
)
640
end
641
else
642
action
(
first
)
643
end
644
end
645 646
local
cardinal
=
lpegpatterns
.
cardinal
/
tonumber
647
local
spacers
=
lpegpatterns
.
spacer
^
0
648
local
endofstring
=
lpegpatterns
.
endofstring
649 650
local
stepper
=
spacers
*
(
cardinal
*
(
spacers
*
S
(
"
:-
"
)
*
spacers
*
(
cardinal
+
Cc
(
true
)
)
+
Cc
(
false
)
)
651
*
Carg
(
1
)
*
Carg
(
2
)
/
ranger
*
S
(
"
,
"
)
^
0
)
^
1
652 653
local
stepper
=
spacers
*
(
cardinal
*
(
spacers
*
S
(
"
:-
"
)
*
spacers
*
(
cardinal
+
(
P
(
"
*
"
)
+
endofstring
)
*
Cc
(
true
)
)
+
Cc
(
false
)
)
654
*
Carg
(
1
)
*
Carg
(
2
)
/
ranger
*
S
(
"
,
"
)
^
0
)
^
1
*
endofstring
-- we're sort of strict (could do without endofstring)
655 656
function
parsers
.
stepper
(
str
,
n
,
action
)
657
if
type
(
n
)
=
=
"
function
"
then
658
lpegmatch
(
stepper
,
str
,
1
,
false
,
n
or
print
)
659
else
660
lpegmatch
(
stepper
,
str
,
1
,
n
,
action
or
print
)
661
end
662
end
663 664
--
665 666
local
pattern_math
=
Cs
(
(
P
(
"
%
"
)
/
"
\\percent
"
+
P
(
"
^
"
)
*
Cc
(
"
{
"
)
*
lpegpatterns
.
integer
*
Cc
(
"
}
"
)
+
anything
)
^
0
)
667
local
pattern_text
=
Cs
(
(
P
(
"
%
"
)
/
"
\\percent
"
+
(
P
(
"
^
"
)
/
"
\\high
"
)
*
Cc
(
"
{
"
)
*
lpegpatterns
.
integer
*
Cc
(
"
}
"
)
+
anything
)
^
0
)
668 669
patterns
.
unittotex
=
pattern
670 671
function
parsers
.
unittotex
(
str
,
textmode
)
672
return
lpegmatch
(
textmode
and
pattern_text
or
pattern_math
,
str
)
673
end
674 675
local
pattern
=
Cs
(
(
P
(
"
^
"
)
/
"
<sup>
"
*
lpegpatterns
.
integer
*
Cc
(
"
</sup>
"
)
+
anything
)
^
0
)
676 677
function
parsers
.
unittoxml
(
str
)
678
return
lpegmatch
(
pattern
,
str
)
679
end
680 681
-- print(parsers.unittotex("10^-32 %"),utilities.parsers.unittoxml("10^32 %"))
682 683
local
cache
=
{
}
684
local
spaces
=
lpegpatterns
.
space
^
0
685
local
dummy
=
function
(
)
end
686 687
setmetatableindex
(
cache
,
function
(
t
,
k
)
688
local
separator
=
S
(
k
)
-- was P
689
local
value
=
(
1
-
separator
)
^
0
690
local
pattern
=
spaces
*
C
(
value
)
*
separator
^
0
*
Cp
(
)
691
t
[
k
]
=
pattern
692
return
pattern
693
end
)
694 695
local
commalistiterator
=
cache
[
"
,
"
]
696 697
function
parsers
.
iterator
(
str
,
separator
)
698
local
n
=
#
str
699
if
n
=
=
0
then
700
return
dummy
701
else
702
local
pattern
=
separator
and
cache
[
separator
]
or
commalistiterator
703
local
p
=
1
704
return
function
(
)
705
if
p
<
=
n
then
706
local
s
,
e
=
lpegmatch
(
pattern
,
str
,
p
)
707
if
e
then
708
p
=
e
709
return
s
710
end
711
end
712
end
713
end
714
end
715 716
-- for s in parsers.iterator("a b c,b,c") do
717
-- print(s)
718
-- end
719 720
local
function
initialize
(
t
,
name
)
721
local
source
=
t
[
name
]
722
if
source
then
723
local
result
=
{
}
724
for
k
,
v
in
next
,
t
[
name
]
do
725
result
[
k
]
=
v
726
end
727
return
result
728
else
729
return
{
}
730
end
731
end
732 733
local
function
fetch
(
t
,
name
)
734
return
t
[
name
]
or
{
}
735
end
736 737
local
function
process
(
result
,
more
)
738
for
k
,
v
in
next
,
more
do
739
result
[
k
]
=
v
740
end
741
return
result
742
end
743 744
local
name
=
C
(
(
1
-
S
(
"
,
"
)
)
^
1
)
745
local
parser
=
(
Carg
(
1
)
*
name
/
initialize
)
*
(
S
(
"
,
"
)
^
1
*
(
Carg
(
1
)
*
name
/
fetch
)
)
^
0
746
local
merge
=
Cf
(
parser
,
process
)
747 748
function
parsers
.
mergehashes
(
hash
,
list
)
749
return
lpegmatch
(
merge
,
list
,
1
,
hash
)
750
end
751 752
-- local t = {
753
-- aa = { alpha = 1, beta = 2, gamma = 3, },
754
-- bb = { alpha = 4, beta = 5, delta = 6, },
755
-- cc = { epsilon = 3 },
756
-- }
757
--
758
-- inspect(parsers.mergehashes(t,"aa, bb, cc"))
759 760
function
parsers
.
runtime
(
time
)
761
if
not
time
then
762
time
=
os
.
runtime
(
)
763
end
764
local
days
=
div
(
time
,
24
*
60
*
60
)
765
time
=
mod
(
time
,
24
*
60
*
60
)
766
local
hours
=
div
(
time
,
60
*
60
)
767
time
=
mod
(
time
,
60
*
60
)
768
local
minutes
=
div
(
time
,
60
)
769
local
seconds
=
mod
(
time
,
60
)
770
return
days
,
hours
,
minutes
,
seconds
771
end
772 773
--
774 775
local
spacing
=
whitespace
^
0
776
local
apply
=
P
(
"
->
"
)
777
local
method
=
C
(
(
1
-
apply
)
^
1
)
778
local
token
=
lbrace
*
C
(
(
1
-
rbrace
)
^
1
)
*
rbrace
+
C
(
anything
^
1
)
779 780
local
pattern
=
spacing
*
(
method
*
spacing
*
apply
+
Carg
(
1
)
)
*
spacing
*
token
781 782
function
parsers
.
splitmethod
(
str
,
default
)
783
if
str
then
784
return
lpegmatch
(
pattern
,
str
,
1
,
default
or
false
)
785
else
786
return
default
or
false
,
"
"
787
end
788
end
789 790
-- print(parsers.splitmethod(" foo -> {bar} "))
791
-- print(parsers.splitmethod("foo->{bar}"))
792
-- print(parsers.splitmethod("foo->bar"))
793
-- print(parsers.splitmethod("foo"))
794
-- print(parsers.splitmethod("{foo}"))
795
-- print(parsers.splitmethod())
796 797
local
p_year
=
lpegpatterns
.
digit
^
4
/
tonumber
798 799
local
pattern
=
Cf
(
Ct
(
"
"
)
*
800
(
801
(
Cg
(
Cc
(
"
year
"
)
*
p_year
)
802
*
S
(
"
-/
"
)
*
Cg
(
Cc
(
"
month
"
)
*
cardinal
)
803
*
S
(
"
-/
"
)
*
Cg
(
Cc
(
"
day
"
)
*
cardinal
)
804
)
+
805
(
Cg
(
Cc
(
"
day
"
)
*
cardinal
)
806
*
S
(
"
-/
"
)
*
Cg
(
Cc
(
"
month
"
)
*
cardinal
)
807
*
S
(
"
-/
"
)
*
Cg
(
Cc
(
"
year
"
)
*
p_year
)
808
)
809
)
810
*
P
(
"
"
)
*
Cg
(
Cc
(
"
hour
"
)
*
cardinal
)
811
*
P
(
"
:
"
)
*
Cg
(
Cc
(
"
min
"
)
*
cardinal
)
812
*
(
P
(
"
:
"
)
*
Cg
(
Cc
(
"
sec
"
)
*
cardinal
)
)
^
-1
813
,
rawset
)
814 815
lpegpatterns
.
splittime
=
pattern
816 817
function
parsers
.
totime
(
str
)
818
return
lpegmatch
(
pattern
,
str
)
819
end
820 821
-- print(os.time(parsers.totime("2019-03-05 12:12:12")))
822
-- print(os.time(parsers.totime("2019/03/05 12:12:12")))
823
-- print(os.time(parsers.totime("05-03-2019 12:12:12")))
824
-- print(os.time(parsers.totime("05/03/2019 12:12:12")))
825