l-url.lua /size: 14 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
l-url
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to luat-lib.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
char
,
format
,
byte
=
string
.
char
,
string
.
format
,
string
.
byte
10
local
concat
=
table
.
concat
11
local
tonumber
,
type
,
next
=
tonumber
,
type
,
next
12
local
P
,
C
,
R
,
S
,
Cs
,
Cc
,
Ct
,
Cf
,
Cg
,
V
=
lpeg
.
P
,
lpeg
.
C
,
lpeg
.
R
,
lpeg
.
S
,
lpeg
.
Cs
,
lpeg
.
Cc
,
lpeg
.
Ct
,
lpeg
.
Cf
,
lpeg
.
Cg
,
lpeg
.
V
13
local
lpegmatch
,
lpegpatterns
,
replacer
=
lpeg
.
match
,
lpeg
.
patterns
,
lpeg
.
replacer
14
local
sortedhash
=
table
.
sortedhash
15 16
-- from wikipedia:
17
--
18
-- foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose
19
-- \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/
20
-- | | | | | | | |
21
-- | userinfo hostname port | | query fragment
22
-- | \________________________________/\_____________|____|/
23
-- scheme | | | |
24
-- | authority path | |
25
-- | | |
26
-- | path interpretable as filename
27
-- | ___________|____________ |
28
-- / \ / \ |
29
-- urn:example:animal:ferret:nose interpretable as extension
30
--
31
-- also nice: http://url.spec.whatwg.org/ (maybe some day ...)
32 33
url
=
url
or
{
}
34
local
url
=
url
35 36
local
unescapes
=
{
}
37
local
escapes
=
{
}
38 39
setmetatable
(
unescapes
,
{
__index
=
function
(
t
,
k
)
40
local
v
=
char
(
tonumber
(
k
,
16
)
)
41
t
[
k
]
=
v
42
return
v
43
end
}
)
44 45
setmetatable
(
escapes
,
{
__index
=
function
(
t
,
k
)
46
local
v
=
format
(
"
%%%02X
"
,
byte
(
k
)
)
47
t
[
k
]
=
v
48
return
v
49
end
}
)
50 51
-- okay:
52 53
local
colon
=
P
(
"
:
"
)
54
local
qmark
=
P
(
"
?
"
)
55
local
hash
=
P
(
"
#
"
)
56
local
slash
=
P
(
"
/
"
)
57
local
atsign
=
P
(
"
@
"
)
58
local
percent
=
P
(
"
%
"
)
59
local
endofstring
=
P
(
-1
)
60
local
hexdigit
=
R
(
"
09
"
,
"
AF
"
,
"
af
"
)
61
local
plus
=
P
(
"
+
"
)
62
local
nothing
=
Cc
(
"
"
)
63
local
okay
=
R
(
"
09
"
,
"
AZ
"
,
"
az
"
)
+
S
(
"
-_.,:=+*~!'()@&$
"
)
64 65
local
escapedchar
=
(
percent
*
C
(
hexdigit
*
hexdigit
)
)
/
unescapes
66
local
unescapedchar
=
P
(
1
)
/
escapes
67
local
escaped
=
(
plus
/
"
"
)
+
escapedchar
-- so no loc://foo++.tex
68
local
noslash
=
P
(
"
/
"
)
/
"
"
69
local
plustospace
=
P
(
"
+
"
)
/
"
"
70 71
local
decoder
=
Cs
(
(
72
plustospace
73
+
escapedchar
74
+
P
(
"
\r\n
"
)
/
"
\n
"
75
+
P
(
1
)
76
)
^
0
)
77
local
encoder
=
Cs
(
(
78
R
(
"
09
"
,
"
AZ
"
,
"
az
"
)
^
1
79
+
S
(
"
-./_
"
)
^
1
80
+
P
(
"
"
)
/
"
+
"
81
+
P
(
"
\n
"
)
/
"
\r\n
"
82
+
unescapedchar
83
)
^
0
)
84 85
lpegpatterns
.
urldecoder
=
decoder
86
lpegpatterns
.
urlencoder
=
encoder
87 88
function
url
.
decode
(
str
)
return
str
and
lpegmatch
(
decoder
,
str
)
or
str
end
89
function
url
.
encode
(
str
)
return
str
and
lpegmatch
(
encoder
,
str
)
or
str
end
90
function
url
.
unescape
(
str
)
return
str
and
lpegmatch
(
unescaper
,
str
)
or
str
end
91 92
-- we assume schemes with more than 1 character (in order to avoid problems with windows disks)
93
-- we also assume that when we have a scheme, we also have an authority
94
--
95
-- maybe we should already split the query (better for unescaping as = & can be part of a value
96 97
local
schemestr
=
Cs
(
(
escaped
+
(
1
-
colon
-
slash
-
qmark
-
hash
)
)
^
2
)
98
local
authoritystr
=
Cs
(
(
escaped
+
(
1
-
slash
-
qmark
-
hash
)
)
^
0
)
99
local
pathstr
=
Cs
(
(
escaped
+
(
1
-
qmark
-
hash
)
)
^
0
)
100
----- querystr = Cs((escaped+(1- hash))^0)
101
local
querystr
=
Cs
(
(
(
1
-
hash
)
)
^
0
)
102
local
fragmentstr
=
Cs
(
(
escaped
+
(
1
-
endofstring
)
)
^
0
)
103 104
local
scheme
=
schemestr
*
colon
+
nothing
105
local
authority
=
slash
*
slash
*
authoritystr
+
nothing
106
local
path
=
slash
*
pathstr
+
nothing
107
local
query
=
qmark
*
querystr
+
nothing
108
local
fragment
=
hash
*
fragmentstr
+
nothing
109 110
local
validurl
=
scheme
*
authority
*
path
*
query
*
fragment
111
local
parser
=
Ct
(
validurl
)
112 113
lpegpatterns
.
url
=
validurl
114
lpegpatterns
.
urlsplitter
=
parser
115 116
local
escaper
=
Cs
(
(
R
(
"
09
"
,
"
AZ
"
,
"
az
"
)
^
1
+
P
(
"
"
)
/
"
%%20
"
+
S
(
"
-./_:
"
)
^
1
+
P
(
1
)
/
escapes
)
^
0
)
-- space happens most
117
local
unescaper
=
Cs
(
(
escapedchar
+
1
)
^
0
)
118
local
getcleaner
=
Cs
(
(
P
(
"
+++
"
)
/
"
%%2B
"
+
P
(
"
+
"
)
/
"
%%20
"
+
P
(
1
)
)
^
1
)
119 120
lpegpatterns
.
urlunescaped
=
escapedchar
121
lpegpatterns
.
urlescaper
=
escaper
122
lpegpatterns
.
urlunescaper
=
unescaper
123
lpegpatterns
.
urlgetcleaner
=
getcleaner
124 125
function
url
.
unescapeget
(
str
)
126
return
lpegmatch
(
getcleaner
,
str
)
127
end
128 129
-- todo: reconsider Ct as we can as well have five return values (saves a table)
130
-- so we can have two parsers, one with and one without
131 132
local
function
split
(
str
)
133
return
(
type
(
str
)
=
=
"
string
"
and
lpegmatch
(
parser
,
str
)
)
or
str
134
end
135 136
local
isscheme
=
schemestr
*
colon
*
slash
*
slash
-- this test also assumes authority
137 138
local
function
hasscheme
(
str
)
139
if
str
then
140
local
scheme
=
lpegmatch
(
isscheme
,
str
)
-- at least one character
141
return
scheme
~
=
"
"
and
scheme
or
false
142
else
143
return
false
144
end
145
end
146 147
--~ print(hasscheme("home:"))
148
--~ print(hasscheme("home://"))
149 150
-- todo: cache them
151 152
local
rootletter
=
R
(
"
az
"
,
"
AZ
"
)
153
+
S
(
"
_-+
"
)
154
local
separator
=
P
(
"
://
"
)
155
local
qualified
=
P
(
"
.
"
)
^
0
*
P
(
"
/
"
)
156
+
rootletter
*
P
(
"
:
"
)
157
+
rootletter
^
1
*
separator
158
+
rootletter
^
1
*
P
(
"
/
"
)
159
local
rootbased
=
P
(
"
/
"
)
160
+
rootletter
*
P
(
"
:
"
)
161 162
local
barswapper
=
replacer
(
"
|
"
,
"
:
"
)
163
local
backslashswapper
=
replacer
(
"
\\
"
,
"
/
"
)
164 165
-- queries:
166 167
local
equal
=
P
(
"
=
"
)
168
local
amp
=
P
(
"
&
"
)
169
local
key
=
Cs
(
(
(
plustospace
+
escapedchar
+
1
)
-
equal
)
^
0
)
170
local
value
=
Cs
(
(
(
plustospace
+
escapedchar
+
1
)
-
amp
-
endofstring
)
^
0
)
171 172
local
splitquery
=
Cf
(
Ct
(
"
"
)
*
P
{
"
sequence
"
,
173
sequence
=
V
(
"
pair
"
)
*
(
amp
*
V
(
"
pair
"
)
)
^
0
,
174
pair
=
Cg
(
key
*
equal
*
value
)
,
175
}
,
rawset
)
176 177
-- hasher
178 179
local
userpart
=
(
1
-
atsign
-
colon
)
^
1
180
local
serverpart
=
(
1
-
colon
)
^
1
181
local
splitauthority
=
(
(
Cs
(
userpart
)
*
colon
*
Cs
(
userpart
)
+
Cs
(
userpart
)
*
Cc
(
nil
)
)
*
atsign
+
Cc
(
nil
)
*
Cc
(
nil
)
)
182
*
Cs
(
serverpart
)
*
(
colon
*
(
serverpart
/
tonumber
)
+
Cc
(
nil
)
)
183 184
local
function
hashed
(
str
)
-- not yet ok (/test?test)
185
if
not
str
or
str
=
=
"
"
then
186
return
{
187
scheme
=
"
invalid
"
,
188
original
=
str
,
189
}
190
end
191
local
detailed
=
split
(
str
)
192
local
rawscheme
=
"
"
193
local
rawquery
=
"
"
194
local
somescheme
=
false
195
local
somequery
=
false
196
if
detailed
then
197
rawscheme
=
detailed
[
1
]
198
rawquery
=
detailed
[
4
]
199
somescheme
=
rawscheme
~
=
"
"
200
somequery
=
rawquery
~
=
"
"
201
end
202
if
not
somescheme
and
not
somequery
then
203
return
{
204
scheme
=
"
file
"
,
205
authority
=
"
"
,
206
path
=
str
,
207
query
=
"
"
,
208
fragment
=
"
"
,
209
original
=
str
,
210
noscheme
=
true
,
211
filename
=
str
,
212
}
213
end
214
-- not always a filename but handy anyway
215
local
authority
=
detailed
[
2
]
216
local
path
=
detailed
[
3
]
217
local
filename
-- = nil
218
local
username
-- = nil
219
local
password
-- = nil
220
local
host
-- = nil
221
local
port
-- = nil
222
if
authority
~
=
"
"
then
223
-- these can be invalid
224
username
,
password
,
host
,
port
=
lpegmatch
(
splitauthority
,
authority
)
225
end
226
if
authority
=
=
"
"
then
227
filename
=
path
228
elseif
path
=
=
"
"
then
229
filename
=
"
"
230
else
231
-- this one can be can be invalid
232
filename
=
authority
.
.
"
/
"
.
.
path
233
end
234
return
{
235
scheme
=
rawscheme
,
236
authority
=
authority
,
237
path
=
path
,
238
query
=
lpegmatch
(
unescaper
,
rawquery
)
,
-- unescaped, but possible conflict with & and =
239
queries
=
lpegmatch
(
splitquery
,
rawquery
)
,
-- split first and then unescaped
240
fragment
=
detailed
[
5
]
,
241
original
=
str
,
242
noscheme
=
false
,
243
filename
=
filename
,
244
--
245
host
=
host
,
246
port
=
port
,
247
-- usename = username,
248
-- password = password,
249
}
250
end
251 252
-- inspect(hashed())
253
-- inspect(hashed(""))
254
-- inspect(hashed("template:///test"))
255
-- inspect(hashed("template:///test++.whatever"))
256
-- inspect(hashed("template:///test%2B%2B.whatever"))
257
-- inspect(hashed("template:///test%x.whatever"))
258
-- inspect(hashed("tem%2Bplate:///test%x.whatever"))
259 260
-- Here we assume:
261
--
262
-- files: /// = relative
263
-- files: //// = absolute (!)
264 265
--~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local
266
--~ table.print(hashed("file://opt/tex/texmf-local" )) -- opt/tex/texmf-local
267
--~ table.print(hashed("file:///opt/tex/texmf-local" )) -- opt/tex/texmf-local
268
--~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local
269
--~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local
270 271
--~ table.print(hashed("c:/opt/tex/texmf-local" )) -- c:/opt/tex/texmf-local
272
--~ table.print(hashed("opt/tex/texmf-local" )) -- opt/tex/texmf-local
273
--~ table.print(hashed("/opt/tex/texmf-local" )) -- /opt/tex/texmf-local
274 275
url
.
split
=
split
276
url
.
hasscheme
=
hasscheme
277
url
.
hashed
=
hashed
278 279
function
url
.
addscheme
(
str
,
scheme
)
-- no authority
280
if
hasscheme
(
str
)
then
281
return
str
282
elseif
not
scheme
then
283
return
"
file:///
"
.
.
str
284
else
285
return
scheme
.
.
"
:///
"
.
.
str
286
end
287
end
288 289
function
url
.
construct
(
hash
)
-- dodo: we need to escape !
290
local
result
,
r
=
{
}
,
0
291
local
scheme
=
hash
.
scheme
292
local
authority
=
hash
.
authority
293
local
path
=
hash
.
path
294
local
queries
=
hash
.
queries
295
local
fragment
=
hash
.
fragment
296
if
scheme
and
scheme
~
=
"
"
then
297
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
scheme
)
298
r
=
r
+
1
;
result
[
r
]
=
"
://
"
299
end
300
if
authority
and
authority
~
=
"
"
then
301
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
authority
)
302
end
303
if
path
and
path
~
=
"
"
then
304
r
=
r
+
1
;
result
[
r
]
=
"
/
"
305
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
path
)
306
end
307
if
queries
then
308
local
done
=
false
309
for
k
,
v
in
sortedhash
(
queries
)
do
310
r
=
r
+
1
;
result
[
r
]
=
done
and
"
&
"
or
"
?
"
311
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
k
)
-- is this escaped
312
r
=
r
+
1
;
result
[
r
]
=
"
=
"
313
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
v
)
-- is this escaped
314
done
=
true
315
end
316
end
317
if
fragment
and
fragment
~
=
"
"
then
318
r
=
r
+
1
;
result
[
r
]
=
"
#
"
319
r
=
r
+
1
;
result
[
r
]
=
lpegmatch
(
escaper
,
fragment
)
320
end
321
return
concat
(
result
)
322
end
323 324
local
pattern
=
Cs
(
slash
^
-1
/
"
"
*
R
(
"
az
"
,
"
AZ
"
)
*
(
(
S
(
"
:|
"
)
/
"
:
"
)
+
P
(
"
:
"
)
)
*
slash
*
P
(
1
)
^
0
)
325 326
function
url
.
filename
(
filename
)
327
local
spec
=
hashed
(
filename
)
328
local
path
=
spec
.
path
329
return
(
spec
.
scheme
=
=
"
file
"
and
path
and
lpegmatch
(
pattern
,
path
)
)
or
filename
330
end
331 332
-- print(url.filename("/c|/test"))
333
-- print(url.filename("/c/test"))
334
-- print(url.filename("file:///t:/sources/cow.svg"))
335 336
local
function
escapestring
(
str
)
337
return
lpegmatch
(
escaper
,
str
)
338
end
339 340
url
.
escape
=
escapestring
341 342
function
url
.
query
(
str
)
343
if
type
(
str
)
=
=
"
string
"
then
344
return
lpegmatch
(
splitquery
,
str
)
or
"
"
345
else
346
return
str
347
end
348
end
349 350
function
url
.
toquery
(
data
)
351
local
td
=
type
(
data
)
352
if
td
=
=
"
string
"
then
353
return
#
str
and
escape
(
data
)
or
nil
-- beware of double escaping
354
elseif
td
=
=
"
table
"
then
355
if
next
(
data
)
then
356
local
t
=
{
}
357
for
k
,
v
in
next
,
data
do
358
t
[
#
t
+
1
]
=
format
(
"
%s=%s
"
,
k
,
escapestring
(
v
)
)
359
end
360
return
concat
(
t
,
"
&
"
)
361
end
362
else
363
-- nil is a signal that no query
364
end
365
end
366 367
-- /test/ | /test | test/ | test => test
368 369
local
pattern
=
Cs
(
noslash
^
0
*
(
1
-
noslash
*
P
(
-1
)
)
^
0
)
370 371
function
url
.
barepath
(
path
)
372
if
not
path
or
path
=
=
"
"
then
373
return
"
"
374
else
375
return
lpegmatch
(
pattern
,
path
)
376
end
377
end
378 379
-- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test"))
380
-- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz"))
381 382
--~ print(url.filename("file:///c:/oeps.txt"))
383
--~ print(url.filename("c:/oeps.txt"))
384
--~ print(url.filename("file:///oeps.txt"))
385
--~ print(url.filename("file:///etc/test.txt"))
386
--~ print(url.filename("/oeps.txt"))
387 388
--~ from the spec on the web (sort of):
389 390
--~ local function test(str)
391
--~ local t = url.hashed(str)
392
--~ t.constructed = url.construct(t)
393
--~ print(table.serialize(t))
394
--~ end
395 396
--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
397
--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
398 399
--~ test("sys:///./colo-rgb")
400 401
--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg")
402
--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
403
--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
404
--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
405
--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
406 407
--~ test("file:///cow%20with%20spaces")
408
--~ test("file:///cow%20with%20spaces.pdf")
409
--~ test("cow%20with%20spaces.pdf")
410
--~ test("some%20file")
411
--~ test("/etc/passwords")
412
--~ test("http://www.myself.com/some%20words.html")
413
--~ test("file:///c:/oeps.txt")
414
--~ test("file:///c|/oeps.txt")
415
--~ test("file:///etc/oeps.txt")
416
--~ test("file://./etc/oeps.txt")
417
--~ test("file:////etc/oeps.txt")
418
--~ test("ftp://ftp.is.co.za/rfc/rfc1808.txt")
419
--~ test("http://www.ietf.org/rfc/rfc2396.txt")
420
--~ test("ldap://[2001:db8::7]/c=GB?objectClass?one#what")
421
--~ test("mailto:John.Doe@example.com")
422
--~ test("news:comp.infosystems.www.servers.unix")
423
--~ test("tel:+1-816-555-1212")
424
--~ test("telnet://192.0.2.16:80/")
425
--~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
426
--~ test("http://www.pragma-ade.com/spaced%20name")
427 428
--~ test("zip:///oeps/oeps.zip#bla/bla.tex")
429
--~ test("zip:///oeps/oeps.zip?bla/bla.tex")
430 431
--~ table.print(url.hashed("/test?test"))
432