l-string.lua /size: 6644 b    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
l-string
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to luat-lib.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
string
=
string
10
local
sub
,
gmatch
,
format
,
char
,
byte
,
rep
,
lower
=
string
.
sub
,
string
.
gmatch
,
string
.
format
,
string
.
char
,
string
.
byte
,
string
.
rep
,
string
.
lower
11
local
lpegmatch
,
patterns
=
lpeg
.
match
,
lpeg
.
patterns
12
local
P
,
S
,
C
,
Ct
,
Cc
,
Cs
=
lpeg
.
P
,
lpeg
.
S
,
lpeg
.
C
,
lpeg
.
Ct
,
lpeg
.
Cc
,
lpeg
.
Cs
13 14
-- Some functions are already defined in l-lpeg and maybe some from here will
15
-- move there (unless we also expose caches).
16 17
-- if not string.split then
18
--
19
-- function string.split(str,pattern)
20
-- local t = { }
21
-- if str ~= "" then
22
-- local n = 1
23
-- for s in gmatch(str..pattern,"(.-)"..pattern) do
24
-- t[n] = s
25
-- n = n + 1
26
-- end
27
-- end
28
-- return t
29
-- end
30
--
31
-- end
32 33
-- function string.unquoted(str)
34
-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
35
-- end
36 37
local
unquoted
=
patterns
.
squote
*
C
(
patterns
.
nosquote
)
*
patterns
.
squote
38
+
patterns
.
dquote
*
C
(
patterns
.
nodquote
)
*
patterns
.
dquote
39 40
function
string
.
unquoted
(
str
)
41
return
lpegmatch
(
unquoted
,
str
)
or
str
42
end
43 44
-- print(string.unquoted("test"))
45
-- print(string.unquoted([["t\"est"]]))
46
-- print(string.unquoted([["t\"est"x]]))
47
-- print(string.unquoted("\'test\'"))
48
-- print(string.unquoted('"test"'))
49
-- print(string.unquoted('"test"'))
50 51
function
string
.
quoted
(
str
)
52
return
format
(
"
%q
"
,
str
)
-- always double quote
53
end
54 55
function
string
.
count
(
str
,
pattern
)
-- variant 3
56
local
n
=
0
57
for
_
in
gmatch
(
str
,
pattern
)
do
-- not for utf
58
n
=
n
+
1
59
end
60
return
n
61
end
62 63
function
string
.
limit
(
str
,
n
,
sentinel
)
-- not utf proof
64
if
#
str
>
n
then
65
sentinel
=
sentinel
or
"
...
"
66
return
sub
(
str
,
1
,
(
n
-
#
sentinel
)
)
.
.
sentinel
67
else
68
return
str
69
end
70
end
71 72
local
stripper
=
patterns
.
stripper
73
local
fullstripper
=
patterns
.
fullstripper
74
local
collapser
=
patterns
.
collapser
75
local
nospacer
=
patterns
.
nospacer
76
local
longtostring
=
patterns
.
longtostring
77 78
function
string
.
strip
(
str
)
79
return
str
and
lpegmatch
(
stripper
,
str
)
or
"
"
80
end
81 82
function
string
.
fullstrip
(
str
)
83
return
str
and
lpegmatch
(
fullstripper
,
str
)
or
"
"
84
end
85 86
function
string
.
collapsespaces
(
str
)
87
return
str
and
lpegmatch
(
collapser
,
str
)
or
"
"
88
end
89 90
function
string
.
nospaces
(
str
)
91
return
str
and
lpegmatch
(
nospacer
,
str
)
or
"
"
92
end
93 94
function
string
.
longtostring
(
str
)
95
return
str
and
lpegmatch
(
longtostring
,
str
)
or
"
"
96
end
97 98
-- function string.is_empty(str)
99
-- return not find(str,"%S")
100
-- end
101 102
local
pattern
=
P
(
"
"
)
^
0
*
P
(
-1
)
-- maybe also newlines
103 104
-- patterns.onlyspaces = pattern
105 106
function
string
.
is_empty
(
str
)
107
if
not
str
or
str
=
=
"
"
then
108
return
true
109
else
110
return
lpegmatch
(
pattern
,
str
)
and
true
or
false
111
end
112
end
113 114
-- if not string.escapedpattern then
115
--
116
-- local patterns_escapes = {
117
-- ["%"] = "%%",
118
-- ["."] = "%.",
119
-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
120
-- ["["] = "%[", ["]"] = "%]",
121
-- ["("] = "%(", [")"] = "%)",
122
-- -- ["{"] = "%{", ["}"] = "%}"
123
-- -- ["^"] = "%^", ["$"] = "%$",
124
-- }
125
--
126
-- local simple_escapes = {
127
-- ["-"] = "%-",
128
-- ["."] = "%.",
129
-- ["?"] = ".",
130
-- ["*"] = ".*",
131
-- }
132
--
133
-- function string.escapedpattern(str,simple)
134
-- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
135
-- end
136
--
137
-- function string.topattern(str,lowercase,strict)
138
-- if str == "" then
139
-- return ".*"
140
-- else
141
-- str = gsub(str,".",simple_escapes)
142
-- if lowercase then
143
-- str = lower(str)
144
-- end
145
-- if strict then
146
-- return "^" .. str .. "$"
147
-- else
148
-- return str
149
-- end
150
-- end
151
-- end
152
--
153
-- end
154 155
--- needs checking
156 157
local
anything
=
patterns
.
anything
158
local
moreescapes
=
Cc
(
"
%
"
)
*
S
(
"
.-+%?()[]*$^{}
"
)
159
local
allescapes
=
Cc
(
"
%
"
)
*
S
(
"
.-+%?()[]*
"
)
-- also {} and ^$ ?
160
local
someescapes
=
Cc
(
"
%
"
)
*
S
(
"
.-+%()[]
"
)
-- also {} and ^$ ?
161
local
matchescapes
=
Cc
(
"
.
"
)
*
S
(
"
*?
"
)
-- wildcard and single match
162 163
local
pattern_m
=
Cs
(
(
moreescapes
+
anything
)
^
0
)
164
local
pattern_a
=
Cs
(
(
allescapes
+
anything
)
^
0
)
165
local
pattern_b
=
Cs
(
(
someescapes
+
matchescapes
+
anything
)
^
0
)
166
local
pattern_c
=
Cs
(
Cc
(
"
^
"
)
*
(
someescapes
+
matchescapes
+
anything
)
^
0
*
Cc
(
"
$
"
)
)
167 168
function
string
.
escapedpattern
(
str
,
simple
)
169
return
lpegmatch
(
simple
and
pattern_b
or
pattern_a
,
str
)
170
end
171 172
function
string
.
topattern
(
str
,
lowercase
,
strict
)
173
if
str
=
=
"
"
or
type
(
str
)
~
=
"
string
"
then
174
return
"
.*
"
175
elseif
strict
=
=
"
all
"
then
176
str
=
lpegmatch
(
pattern_m
,
str
)
177
elseif
strict
then
178
str
=
lpegmatch
(
pattern_c
,
str
)
179
else
180
str
=
lpegmatch
(
pattern_b
,
str
)
181
end
182
if
lowercase
then
183
return
lower
(
str
)
184
else
185
return
str
186
end
187
end
188 189
-- print(string.escapedpattern("abc*234",true))
190
-- print(string.escapedpattern("12+34*.tex",false))
191
-- print(string.escapedpattern("12+34*.tex",true))
192
-- print(string.topattern ("12+34*.tex",false,false))
193
-- print(string.topattern ("12+34*.tex",false,true))
194 195
function
string
.
valid
(
str
,
default
)
196
return
(
type
(
str
)
=
=
"
string
"
and
str
~
=
"
"
and
str
)
or
default
or
nil
197
end
198 199
-- handy fallback
200 201
string
.
itself
=
function
(
s
)
return
s
end
202 203
-- also handy (see utf variant)
204 205
local
pattern_c
=
Ct
(
C
(
1
)
^
0
)
-- string and not utf !
206
local
pattern_b
=
Ct
(
(
C
(
1
)
/
byte
)
^
0
)
207 208
function
string
.
totable
(
str
,
bytes
)
209
return
lpegmatch
(
bytes
and
pattern_b
or
pattern_c
,
str
)
210
end
211 212
-- handy from within tex:
213 214
local
replacer
=
lpeg
.
replacer
(
"
@
"
,
"
%%
"
)
-- Watch the escaped % in lpeg!
215 216
function
string
.
tformat
(
fmt
,
...
)
217
return
format
(
lpegmatch
(
replacer
,
fmt
)
,
...
)
218
end
219 220
-- obsolete names:
221 222
string
.
quote
=
string
.
quoted
223
string
.
unquote
=
string
.
unquoted
224 225
-- new
226 227
if
not
string
.
bytetable
then
-- used in font-cff.lua
228 229
local
limit
=
5000
-- we can go to 8000 in luajit and much higher in lua if needed
230 231
function
string
.
bytetable
(
str
)
-- from a string
232
local
n
=
#
str
233
if
n
>
limit
then
234
local
t
=
{
byte
(
str
,
1
,
limit
)
}
235
for
i
=
limit
+
1
,
n
do
236
t
[
i
]
=
byte
(
str
,
i
)
237
end
238
return
t
239
else
240
return
{
byte
(
str
,
1
,
n
)
}
241
end
242
end
243 244
end
245