l-string.lua /size: 6644 b    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['l-string'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local string = string
10local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
11local lpegmatch, patterns = lpeg.match, lpeg.patterns
12local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
13
14-- Some functions are already defined in l-lpeg and maybe some from here will
15-- move there (unless we also expose caches).
16
17-- if not string.split then
18--
19--     function string.split(str,pattern)
20--         local t = { }
21--         if str ~= "" then
22--             local n = 1
23--             for s in gmatch(str..pattern,"(.-)"..pattern) do
24--                 t[n] = s
25--                 n = n + 1
26--             end
27--         end
28--         return t
29--     end
30--
31-- end
32
33-- function string.unquoted(str)
34--     return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
35-- end
36
37local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
38               + patterns.dquote * C(patterns.nodquote) * patterns.dquote
39
40function string.unquoted(str)
41    return lpegmatch(unquoted,str) or str
42end
43
44-- print(string.unquoted("test"))
45-- print(string.unquoted([["t\"est"]]))
46-- print(string.unquoted([["t\"est"x]]))
47-- print(string.unquoted("\'test\'"))
48-- print(string.unquoted('"test"'))
49-- print(string.unquoted('"test"'))
50
51function string.quoted(str)
52    return format("%q",str) -- always double quote
53end
54
55function string.count(str,pattern) -- variant 3
56    local n = 0
57    for _ in gmatch(str,pattern) do -- not for utf
58        n = n + 1
59    end
60    return n
61end
62
63function string.limit(str,n,sentinel) -- not utf proof
64    if #str > n then
65        sentinel = sentinel or "..."
66        return sub(str,1,(n-#sentinel)) .. sentinel
67    else
68        return str
69    end
70end
71
72local stripper     = patterns.stripper
73local fullstripper = patterns.fullstripper
74local collapser    = patterns.collapser
75local nospacer     = patterns.nospacer
76local longtostring = patterns.longtostring
77
78function string.strip(str)
79    return str and lpegmatch(stripper,str) or ""
80end
81
82function string.fullstrip(str)
83    return str and lpegmatch(fullstripper,str) or ""
84end
85
86function string.collapsespaces(str)
87    return str and lpegmatch(collapser,str) or ""
88end
89
90function string.nospaces(str)
91    return str and lpegmatch(nospacer,str) or ""
92end
93
94function string.longtostring(str)
95    return str and lpegmatch(longtostring,str) or ""
96end
97
98-- function string.is_empty(str)
99--     return not find(str,"%S")
100-- end
101
102local pattern = P(" ")^0 * P(-1) -- maybe also newlines
103
104-- patterns.onlyspaces = pattern
105
106function string.is_empty(str)
107    if not str or str == "" then
108        return true
109    else
110        return lpegmatch(pattern,str) and true or false
111    end
112end
113
114-- if not string.escapedpattern then
115--
116--     local patterns_escapes = {
117--         ["%"] = "%%",
118--         ["."] = "%.",
119--         ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
120--         ["["] = "%[", ["]"] = "%]",
121--         ["("] = "%(", [")"] = "%)",
122--      -- ["{"] = "%{", ["}"] = "%}"
123--      -- ["^"] = "%^", ["$"] = "%$",
124--     }
125--
126--     local simple_escapes = {
127--         ["-"] = "%-",
128--         ["."] = "%.",
129--         ["?"] = ".",
130--         ["*"] = ".*",
131--     }
132--
133--     function string.escapedpattern(str,simple)
134--         return (gsub(str,".",simple and simple_escapes or patterns_escapes))
135--     end
136--
137--     function string.topattern(str,lowercase,strict)
138--         if str == "" then
139--             return ".*"
140--         else
141--             str = gsub(str,".",simple_escapes)
142--             if lowercase then
143--                 str = lower(str)
144--             end
145--             if strict then
146--                 return "^" .. str .. "$"
147--             else
148--                 return str
149--             end
150--         end
151--     end
152--
153-- end
154
155--- needs checking
156
157local anything     = patterns.anything
158local moreescapes  = Cc("%") * S(".-+%?()[]*$^{}")
159local allescapes   = Cc("%") * S(".-+%?()[]*")   -- also {} and ^$ ?
160local someescapes  = Cc("%") * S(".-+%()[]")     -- also {} and ^$ ?
161local matchescapes = Cc(".") * S("*?")           -- wildcard and single match
162
163local pattern_m = Cs ( ( moreescapes + anything )^0 )
164local pattern_a = Cs ( ( allescapes  + anything )^0 )
165local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
166local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
167
168function string.escapedpattern(str,simple)
169    return lpegmatch(simple and pattern_b or pattern_a,str)
170end
171
172function string.topattern(str,lowercase,strict)
173    if str == "" or type(str) ~= "string" then
174        return ".*"
175    elseif strict == "all" then
176        str = lpegmatch(pattern_m,str)
177    elseif strict then
178        str = lpegmatch(pattern_c,str)
179    else
180        str = lpegmatch(pattern_b,str)
181    end
182    if lowercase then
183        return lower(str)
184    else
185        return str
186    end
187end
188
189-- print(string.escapedpattern("abc*234",true))
190-- print(string.escapedpattern("12+34*.tex",false))
191-- print(string.escapedpattern("12+34*.tex",true))
192-- print(string.topattern     ("12+34*.tex",false,false))
193-- print(string.topattern     ("12+34*.tex",false,true))
194
195function string.valid(str,default)
196    return (type(str) == "string" and str ~= "" and str) or default or nil
197end
198
199-- handy fallback
200
201string.itself  = function(s) return s end
202
203-- also handy (see utf variant)
204
205local pattern_c = Ct( C(1)      ^0) -- string and not utf !
206local pattern_b = Ct((C(1)/byte)^0)
207
208function string.totable(str,bytes)
209    return lpegmatch(bytes and pattern_b or pattern_c,str)
210end
211
212-- handy from within tex:
213
214local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
215
216function string.tformat(fmt,...)
217    return format(lpegmatch(replacer,fmt),...)
218end
219
220-- obsolete names:
221
222string.quote   = string.quoted
223string.unquote = string.unquoted
224
225-- new
226
227if not string.bytetable then -- used in font-cff.lua
228
229    local limit = 5000 -- we can go to 8000 in luajit and much higher in lua if needed
230
231    function string.bytetable(str) -- from a string
232        local n = #str
233        if n > limit then
234            local t = { byte(str,1,limit) }
235            for i=limit+1,n do
236                t[i] = byte(str,i)
237            end
238            return t
239        else
240            return { byte(str,1,n) }
241        end
242    end
243
244end
245