l-string.lua /size: 6955 b    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['l-string'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local string = string
10local sub, gmatch, format, char, byte, rep, lower, find = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower, string.find
11local lpegmatch, patterns = lpeg.match, lpeg.patterns
12local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
13
14-- Some functions are already defined in l-lpeg and maybe some from here will
15-- move there (unless we also expose caches).
16
17-- if not string.split then
18--
19--     function string.split(str,pattern)
20--         local t = { }
21--         if str ~= "" then
22--             local n = 1
23--             for s in gmatch(str..pattern,"(.-)"..pattern) do
24--                 t[n] = s
25--                 n = n + 1
26--             end
27--         end
28--         return t
29--     end
30--
31-- end
32
33-- function string.unquoted(str)
34--     return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
35-- end
36
37local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
38               + patterns.dquote * C(patterns.nodquote) * patterns.dquote
39
40function string.unquoted(str)
41    return lpegmatch(unquoted,str) or str
42end
43
44-- print(string.unquoted("test"))
45-- print(string.unquoted([["t\"est"]]))
46-- print(string.unquoted([["t\"est"x]]))
47-- print(string.unquoted("\'test\'"))
48-- print(string.unquoted('"test"'))
49-- print(string.unquoted('"test"'))
50
51function string.quoted(str)
52    return format("%q",str) -- always double quote
53end
54
55-- function string.count(str,pattern) -- variant 3
56--     local n = 0
57--     for _ in gmatch(str,pattern) do -- not for utf
58--         n = n + 1
59--     end
60--     return n
61-- end
62
63function string.count(str,pattern)
64    local n = 0
65    local i = 1
66    local l = #pattern
67    while true do
68        i = find(str,pattern,i)
69        if i then
70            n = n + 1
71            i = i + l
72        else
73            break
74        end
75    end
76    return n
77end
78
79function string.limit(str,n,sentinel) -- not utf proof
80    if #str > n then
81        sentinel = sentinel or "..."
82        return sub(str,1,(n-#sentinel)) .. sentinel
83    else
84        return str
85    end
86end
87
88local stripper     = patterns.stripper
89local fullstripper = patterns.fullstripper
90local collapser    = patterns.collapser
91local nospacer     = patterns.nospacer
92local longtostring = patterns.longtostring
93
94function string.strip(str)
95    return str and lpegmatch(stripper,str) or ""
96end
97
98function string.fullstrip(str)
99    return str and lpegmatch(fullstripper,str) or ""
100end
101
102function string.collapsespaces(str)
103    return str and lpegmatch(collapser,str) or ""
104end
105
106function string.nospaces(str)
107    return str and lpegmatch(nospacer,str) or ""
108end
109
110function string.longtostring(str)
111    return str and lpegmatch(longtostring,str) or ""
112end
113
114-- function string.is_empty(str)
115--     return not find(str,"%S")
116-- end
117
118local pattern = P(" ")^0 * P(-1) -- maybe also newlines
119
120-- patterns.onlyspaces = pattern
121
122function string.is_empty(str)
123    if not str or str == "" then
124        return true
125    else
126        return lpegmatch(pattern,str) and true or false
127    end
128end
129
130-- if not string.escapedpattern then
131--
132--     local patterns_escapes = {
133--         ["%"] = "%%",
134--         ["."] = "%.",
135--         ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
136--         ["["] = "%[", ["]"] = "%]",
137--         ["("] = "%(", [")"] = "%)",
138--      -- ["{"] = "%{", ["}"] = "%}"
139--      -- ["^"] = "%^", ["$"] = "%$",
140--     }
141--
142--     local simple_escapes = {
143--         ["-"] = "%-",
144--         ["."] = "%.",
145--         ["?"] = ".",
146--         ["*"] = ".*",
147--     }
148--
149--     function string.escapedpattern(str,simple)
150--         return (gsub(str,".",simple and simple_escapes or patterns_escapes))
151--     end
152--
153--     function string.topattern(str,lowercase,strict)
154--         if str == "" then
155--             return ".*"
156--         else
157--             str = gsub(str,".",simple_escapes)
158--             if lowercase then
159--                 str = lower(str)
160--             end
161--             if strict then
162--                 return "^" .. str .. "$"
163--             else
164--                 return str
165--             end
166--         end
167--     end
168--
169-- end
170
171--- needs checking
172
173local anything     = patterns.anything
174local moreescapes  = Cc("%") * S(".-+%?()[]*$^{}")
175local allescapes   = Cc("%") * S(".-+%?()[]*")   -- also {} and ^$ ?
176local someescapes  = Cc("%") * S(".-+%()[]")     -- also {} and ^$ ?
177local matchescapes = Cc(".") * S("*?")           -- wildcard and single match
178
179local pattern_m = Cs ( ( moreescapes + anything )^0 )
180local pattern_a = Cs ( ( allescapes  + anything )^0 )
181local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
182local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
183
184function string.escapedpattern(str,simple)
185    return lpegmatch(simple and pattern_b or pattern_a,str)
186end
187
188function string.topattern(str,lowercase,strict)
189    if str == "" or type(str) ~= "string" then
190        return ".*"
191    elseif strict == "all" then
192        str = lpegmatch(pattern_m,str)
193    elseif strict then
194        str = lpegmatch(pattern_c,str)
195    else
196        str = lpegmatch(pattern_b,str)
197    end
198    if lowercase then
199        return lower(str)
200    else
201        return str
202    end
203end
204
205-- print(string.escapedpattern("abc*234",true))
206-- print(string.escapedpattern("12+34*.tex",false))
207-- print(string.escapedpattern("12+34*.tex",true))
208-- print(string.topattern     ("12+34*.tex",false,false))
209-- print(string.topattern     ("12+34*.tex",false,true))
210
211function string.valid(str,default)
212    return (type(str) == "string" and str ~= "" and str) or default or nil
213end
214
215-- handy fallback
216
217string.itself  = function(s) return s end
218
219-- also handy (see utf variant)
220
221local pattern_c = Ct( C(1)      ^0) -- string and not utf !
222local pattern_b = Ct((C(1)/byte)^0)
223
224function string.totable(str,bytes)
225    return lpegmatch(bytes and pattern_b or pattern_c,str)
226end
227
228-- handy from within tex:
229
230local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
231
232function string.tformat(fmt,...)
233    return format(lpegmatch(replacer,fmt),...)
234end
235
236-- obsolete names:
237
238string.quote   = string.quoted
239string.unquote = string.unquoted
240
241-- new
242
243if not string.bytetable then -- used in font-cff.lua
244
245    local limit = 5000 -- we can go to 8000 in luajit and much higher in lua if needed
246
247    function string.bytetable(str) -- from a string
248        local n = #str
249        if n > limit then
250            local t = { byte(str,1,limit) }
251            for i=limit+1,n do
252                t[i] = byte(str,i)
253            end
254            return t
255        else
256            return { byte(str,1,n) }
257        end
258    end
259
260end
261