util-str.lua /size: 45 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['util-str'] = {
2    version   = 1.001,
3    comment   = "companion to luat-lib.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9utilities         = utilities or { }
10utilities.strings = utilities.strings or { }
11local strings     = utilities.strings
12
13local format, gsub, rep, sub, find, char = string.format, string.gsub, string.rep, string.sub, string.find, string.char
14local load, dump = load, string.dump
15local tonumber, type, tostring, next, setmetatable = tonumber, type, tostring, next, setmetatable
16local unpack, concat = table.unpack, table.concat
17local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
18local patterns, lpegmatch = lpeg.patterns, lpeg.match
19local tsplitat = lpeg.tsplitat
20local utfchar, utfbyte, utflen = utf.char, utf.byte, utf.len
21
22----- loadstripped = utilities.lua.loadstripped
23----- setmetatableindex = table.setmetatableindex
24
25local loadstripped = function(str,shortcuts)
26    if shortcuts then
27        return load(dump(load(str),true),nil,nil,shortcuts)
28    else
29        return load(dump(load(str),true))
30    end
31end
32
33-- todo: make a special namespace for the formatter
34
35if not number then number = { } end -- temp hack for luatex-fonts
36
37local stripzero   = patterns.stripzero
38local stripzeros  = patterns.stripzeros
39local newline     = patterns.newline
40local endofstring = patterns.endofstring
41local anything    = patterns.anything
42local whitespace  = patterns.whitespace
43local space       = patterns.space
44local spacer      = patterns.spacer
45local spaceortab  = patterns.spaceortab
46local digit       = patterns.digit
47local sign        = patterns.sign
48local period      = patterns.period
49
50-- local function points(n)
51--     n = tonumber(n)
52--     return (not n or n == 0) and "0pt" or lpegmatch(stripzeros,format("%.5fpt",n/65536))
53-- end
54
55-- local function basepoints(n)
56--     n = tonumber(n)
57--     return (not n or n == 0) and "0bp" or lpegmatch(stripzeros,format("%.5fbp", n*(7200/7227)/65536))
58-- end
59
60local ptf = 1 / 65536
61local bpf = (7200/7227) / 65536
62
63local function points(n)
64    if n == 0 then
65        return "0pt"
66    end
67    n = tonumber(n)
68    if not n or n == 0 then
69        return "0pt"
70    end
71    n = n * ptf
72    if n % 1 == 0 then
73        return format("%ipt",n)
74    else
75        return lpegmatch(stripzeros,format("%.5fpt",n)) -- plural as we need to keep the pt
76    end
77end
78
79local function nupoints(n)
80    if n == 0 then
81        return "0"
82    end
83    n = tonumber(n)
84    if not n or n == 0 then
85        return "0"
86    end
87    n = n * ptf
88    if n % 1 == 0 then
89        return format("%i",n)
90    else
91        return format("%.5f",n) -- no strip
92    end
93end
94
95local function basepoints(n)
96    if n == 0 then
97        return "0bp"
98    end
99    n = tonumber(n)
100    if not n or n == 0 then
101        return "0bp"
102    end
103    n = n * bpf
104    if n % 1 == 0 then
105        return format("%ibp",n)
106    else
107        return lpegmatch(stripzeros,format("%.5fbp",n)) -- plural as we need to keep the pt
108    end
109end
110
111local function nubasepoints(n)
112    if n == 0 then
113        return "0"
114    end
115    n = tonumber(n)
116    if not n or n == 0 then
117        return "0"
118    end
119    n = n * bpf
120    if n % 1 == 0 then
121        return format("%i",n)
122    else
123        return format("%.5f",n) -- no strip
124    end
125end
126
127number.points       = points
128number.nupoints     = nupoints
129number.basepoints   = basepoints
130number.nubasepoints = nubasepoints
131
132-- str = " \n \ntest  \n test\ntest "
133-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]")
134
135local rubish     = spaceortab^0 * newline
136local anyrubish  = spaceortab + newline
137local stripped   = (spaceortab^1 / "") * newline
138local leading    = rubish^0 / ""
139local trailing   = (anyrubish^1 * endofstring) / ""
140local redundant  = rubish^3 / "\n"
141
142local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
143
144function strings.collapsecrlf(str)
145    return lpegmatch(pattern,str)
146end
147
148-- The following functions might end up in another namespace.
149
150local repeaters = { } -- watch how we also moved the -1 in depth-1 to the creator
151
152function strings.newrepeater(str,offset)
153    offset = offset or 0
154    local s = repeaters[str]
155    if not s then
156        s = { }
157        repeaters[str] = s
158    end
159    local t = s[offset]
160    if t then
161        return t
162    end
163    t = { }
164    setmetatable(t, { __index = function(t,k)
165        if not k then
166            return ""
167        end
168        local n = k + offset
169        local s = n > 0 and rep(str,n) or ""
170        t[k] = s
171        return s
172    end })
173    s[offset] = t
174    return t
175end
176
177-- local dashes = strings.newrepeater("--",-1)
178-- print(dashes[2],dashes[3],dashes[1])
179
180local extra, tab, start = 0, 0, 4, 0
181
182local nspaces = strings.newrepeater(" ")
183
184string.nspaces = nspaces
185
186local pattern =
187    Carg(1) / function(t)
188        extra, tab, start = 0, t or 7, 1
189    end
190  * Cs((
191      Cp() * patterns.tab / function(position)
192          local current = (position - start + 1) + extra
193          local spaces = tab-(current-1) % tab
194          if spaces > 0 then
195              extra = extra + spaces - 1
196              return nspaces[spaces] -- rep(" ",spaces)
197          else
198              return ""
199          end
200      end
201    + newline * Cp() / function(position)
202          extra, start = 0, position
203      end
204    + anything
205  )^1)
206
207function strings.tabtospace(str,tab)
208    -- no real gain in first checking if a \t is there
209    return lpegmatch(pattern,str,1,tab or 7)
210end
211
212function string.utfpadding(s,n)
213    if not n or n == 0 then
214        return ""
215    end
216    local l = utflen(s)
217    if n > 0 then
218        return nspaces[n-l]
219    else
220        return nspaces[-n-l]
221    end
222end
223
224-- local t = {
225--     "1234567123456712345671234567",
226--     "\tb\tc",
227--     "a\tb\tc",
228--     "aa\tbb\tcc",
229--     "aaa\tbbb\tccc",
230--     "aaaa\tbbbb\tcccc",
231--     "aaaaa\tbbbbb\tccccc",
232--     "aaaaaa\tbbbbbb\tcccccc\n       aaaaaa\tbbbbbb\tcccccc",
233--     "one\n	two\nxxx	three\nxx	four\nx	five\nsix",
234-- }
235-- for k=1,#t do
236--     print(strings.tabtospace(t[k]))
237-- end
238
239-- todo: lpeg
240
241-- function strings.striplong(str) -- strips all leading spaces
242--     str = gsub(str,"^%s*","")
243--     str = gsub(str,"[\n\r]+ *","\n")
244--     return str
245-- end
246
247local optionalspace = spacer^0
248local nospace       = optionalspace/""
249local endofline     = nospace * newline
250
251local stripend      = (whitespace^1 * endofstring)/""
252
253local normalline    = (nospace * ((1-optionalspace*(newline+endofstring))^1) * nospace)
254
255local stripempty    = endofline^1/""
256local normalempty   = endofline^1
257local singleempty   = endofline * (endofline^0/"")
258local doubleempty   = endofline * endofline^-1 * (endofline^0/"")
259local stripstart    = stripempty^0
260
261local intospace     = whitespace^1/" "
262local noleading     = whitespace^1/""
263local notrailing    = noleading * endofstring
264
265local p_prune_normal    = Cs ( stripstart * ( stripend   + normalline + normalempty )^0 )
266local p_prune_collapse  = Cs ( stripstart * ( stripend   + normalline + doubleempty )^0 )
267local p_prune_noempty   = Cs ( stripstart * ( stripend   + normalline + singleempty )^0 )
268local p_prune_intospace = Cs ( noleading  * ( notrailing + intospace  + 1           )^0 )
269local p_retain_normal   = Cs (              (              normalline + normalempty )^0 )
270local p_retain_collapse = Cs (              (              normalline + doubleempty )^0 )
271local p_retain_noempty  = Cs (              (              normalline + singleempty )^0 )
272
273-- function striplines(str,prune,collapse,noempty)
274--     if prune then
275--         if noempty then
276--             return lpegmatch(p_prune_noempty,str) or str
277--         elseif collapse then
278--             return lpegmatch(p_prune_collapse,str) or str
279--         else
280--             return lpegmatch(p_prune_normal,str) or str
281--         end
282--     else
283--         if noempty then
284--             return lpegmatch(p_retain_noempty,str) or str
285--         elseif collapse then
286--             return lpegmatch(p_retain_collapse,str) or str
287--         else
288--             return lpegmatch(p_retain_normal,str) or str
289--         end
290--     end
291-- end
292
293local striplinepatterns = {
294    ["prune"]               = p_prune_normal,
295    ["prune and collapse"]  = p_prune_collapse, -- default
296    ["prune and no empty"]  = p_prune_noempty,
297    ["prune and to space"]  = p_prune_intospace,
298    ["retain"]              = p_retain_normal,
299    ["retain and collapse"] = p_retain_collapse,
300    ["retain and no empty"] = p_retain_noempty,
301    ["collapse"]            = patterns.collapser,
302}
303
304setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end })
305
306strings.striplinepatterns = striplinepatterns
307
308function strings.striplines(str,how)
309    return str and lpegmatch(striplinepatterns[how],str) or str
310end
311
312function strings.collapse(str) -- maybe also in strings
313    return str and lpegmatch(p_prune_intospace,str) or str
314end
315
316-- also see: string.collapsespaces
317
318strings.striplong = strings.striplines -- for old times sake
319
320-- local str = table.concat( {
321-- "  ",
322-- "    aap",
323-- "  noot mies",
324-- "  ",
325-- "    ",
326-- " zus    wim jet",
327-- "zus    wim jet",
328-- "       zus    wim jet",
329-- "    ",
330-- }, "\n")
331--
332-- local str = table.concat( {
333-- "  aaaa",
334-- "  bb",
335-- "  cccccc",
336-- " ",
337-- }, "\n")
338--
339-- for k, v in table.sortedhash(utilities.strings.striplinepatterns) do
340--     logs.report("stripper","method: %s, result: [[%s]]",k,utilities.strings.striplines(str,k))
341-- end
342
343-- inspect(strings.striplong([[
344--   aaaa
345--   bb
346--   cccccc
347-- ]]))
348
349function strings.nice(str)
350    str = gsub(str,"[:%-+_]+"," ") -- maybe more
351    return str
352end
353
354-- Work in progress. Interesting is that compared to the built-in this is faster in
355-- luatex than in luajittex where we have a comparable speed. It only makes sense
356-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter
357-- is a function so there is some overhead and not all formatted output is worth that
358-- overhead. Keep in mind that there is an extra function call involved. In principle
359-- we end up with a string concatination so one could inline such a sequence but often
360-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course
361-- there is the benefit of more variants. (Concerning the speed: a simple format like
362-- %05fpt is better off with format than with a formatter, but as soon as you put
363-- something in front formatters become faster. Passing the pt as extra argument makes
364-- formatters behave better. Of course this is rather implementation dependent. Also,
365-- when a specific format is only used a few times the overhead in creating it is not
366-- compensated by speed.)
367--
368-- More info can be found in cld-mkiv.pdf so here I stick to a simple list.
369--
370-- integer            %...i   number
371-- integer            %...d   number
372-- unsigned           %...u   number -- not used
373-- character          %...c   number
374-- hexadecimal        %...x   number
375-- HEXADECIMAL        %...X   number
376-- octal              %...o   number
377-- string             %...s   string number
378-- float              %...f   number
379-- checked float      %...F   number
380-- exponential        %...e   number
381-- exponential        %...E   number
382-- stripped e         %...j   number
383-- stripped E         %...J   number
384-- autofloat          %...g   number
385-- autofloat          %...G   number
386-- utf character      %...c   number
387-- force tostring     %...S   any
388-- force tostring     %Q      any
389-- force tonumber     %N      number (strip leading zeros)
390-- signed number      %I      number
391-- rounded number     %r      number
392-- 0xhexadecimal      %...h   character number
393-- 0xHEXADECIMAL      %...H   character number
394-- U+hexadecimal      %...u   character number
395-- U+HEXADECIMAL      %...U   character number
396-- points             %p      number (scaled points)
397-- nupoints           %P      number (scaled points) / without unit / always 5 decimals
398-- basepoints         %b      number (scaled points)
399-- nubasepoints       %B      number (scaled points) / without unit / always 5 decimals
400-- table concat       %...t   table
401-- table concat       %{.}t   table
402-- serialize          %...T   sequenced (no nested tables)
403-- serialize          %{.}T   sequenced (no nested tables)
404-- boolean (logic)    %l      boolean
405-- BOOLEAN            %L      boolean
406-- whitespace         %...w   number
407-- whitespace         %...W   (fixed)
408-- automatic          %...a   'whatever' (string, table, ...)
409-- automatic          %...A   "whatever" (string, table, ...)
410-- zap                %...z   skip
411-- stripped  %...N    %...N
412-- comma/period real  %...m
413-- period/comma real  %...M
414-- formatted float    %...k   n.m
415
416local n = 0
417
418-- we are somewhat sloppy in parsing prefixes as it's not that critical
419
420-- hard to avoid but we can collect them in a private namespace if needed
421
422-- inline the next two makes no sense as we only use this in logging
423
424local sequenced = table.sequenced
425
426function string.autodouble(s,sep)
427    if s == nil then
428        return '""'
429    end
430    local t = type(s)
431    if t == "number" then
432        return tostring(s) -- tostring not really needed
433    end
434    if t == "table" then
435        return ('"' .. sequenced(s,sep or ",") .. '"')
436    end
437    return ('"' .. tostring(s) .. '"')
438end
439
440function string.autosingle(s,sep)
441    if s == nil then
442        return "''"
443    end
444    local t = type(s)
445    if t == "number" then
446        return tostring(s) -- tostring not really needed
447    end
448    if t == "table" then
449        return ("'" .. sequenced(s,sep or ",") .. "'")
450    end
451    return ("'" .. tostring(s) .. "'")
452end
453
454local tracedchars  = { [0] =
455    -- the regular bunch
456    "[null]", "[soh]", "[stx]", "[etx]", "[eot]", "[enq]", "[ack]", "[bel]",
457    "[bs]",   "[ht]",  "[lf]",  "[vt]",  "[ff]",  "[cr]",  "[so]",  "[si]",
458    "[dle]",  "[dc1]", "[dc2]", "[dc3]", "[dc4]", "[nak]", "[syn]", "[etb]",
459    "[can]",  "[em]",  "[sub]", "[esc]", "[fs]",  "[gs]",  "[rs]",  "[us]",
460    -- plus space
461    "[space]", -- 0x20
462}
463
464string.tracedchars = tracedchars
465strings.tracers    = tracedchars
466
467function string.tracedchar(b)
468    -- todo: table
469    if type(b) == "number" then
470        return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")
471    else
472        local c = utfbyte(b)
473        return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")
474    end
475end
476
477function number.signed(i)
478    if i > 0 then
479        return "+",  i
480    else
481        return "-", -i
482    end
483end
484
485-- maybe to util-num
486
487local two    = digit * digit
488local three  = two * digit
489local prefix = (Carg(1) * three)^1
490
491local splitter = Cs (
492    (((1 - (three^1 * period))^1 + C(three)) * prefix + C((1-period)^1))
493  * (anything/"" * Carg(2)) * C(2)
494)
495
496local splitter3 = Cs (
497    three * prefix * endofstring +
498    two   * prefix * endofstring +
499    digit * prefix * endofstring +
500    three +
501    two   +
502    digit
503)
504
505patterns.formattednumber = splitter
506
507function number.formatted(n,sep1,sep2)
508    if sep1 == false then
509        if type(n) == "number" then
510            n = tostring(n)
511        end
512        return lpegmatch(splitter3,n,1,sep2 or ".")
513    else
514        if type(n) == "number" then
515            n = format("%0.2f",n)
516        end
517        if sep1 == true then
518            return lpegmatch(splitter,n,1,".",",")
519        elseif sep1 == "." then
520            return lpegmatch(splitter,n,1,sep1,sep2 or ",")
521        elseif sep1 == "," then
522            return lpegmatch(splitter,n,1,sep1,sep2 or ".")
523        else
524            return lpegmatch(splitter,n,1,sep1 or ",",sep2 or ".")
525        end
526    end
527end
528
529-- print(number.formatted(1))
530-- print(number.formatted(12))
531-- print(number.formatted(123))
532-- print(number.formatted(1234))
533-- print(number.formatted(12345))
534-- print(number.formatted(123456))
535-- print(number.formatted(1234567))
536-- print(number.formatted(12345678))
537-- print(number.formatted(12345678,true))
538-- print(number.formatted(1,false))
539-- print(number.formatted(12,false))
540-- print(number.formatted(123,false))
541-- print(number.formatted(1234,false))
542-- print(number.formatted(12345,false))
543-- print(number.formatted(123456,false))
544-- print(number.formatted(1234567,false))
545-- print(number.formatted(12345678,false))
546-- print(number.formatted(1234.56,"!","?"))
547
548local p = Cs(
549        P("-")^0
550      * (P("0")^1/"")^0
551      * (1-period)^0
552      * (period * P("0")^1 * endofstring/"" + period^0)
553      * P(1-P("0")^1*endofstring)^0
554    )
555
556function number.compactfloat(n,fmt)
557    if n == 0 then
558        return "0"
559    elseif n == 1 then
560        return "1"
561    end
562    n = lpegmatch(p,format(fmt or "%0.3f",n))
563    if n == "." or n == "" or n == "-" then
564        return "0"
565    end
566    return n
567end
568
569local zero      = P("0")^1 / ""
570local plus      = P("+")   / ""
571local minus     = P("-")
572local separator = period
573local trailing  = zero^1 * #S("eE")
574local exponent  = (S("eE") * (plus + Cs((minus * zero^0 * endofstring)/"") + minus) * zero^0 * (endofstring * Cc("0") + anything^1))
575local pattern_a = Cs(minus^0 * digit^1 * (separator/"" * trailing + separator * (trailing + digit)^0) * exponent)
576local pattern_b = Cs((exponent + anything)^0)
577
578function number.sparseexponent(f,n)
579    if not n then
580        n = f
581        f = "%e"
582    end
583    local tn = type(n)
584    if tn == "string" then -- cast to number
585        local m = tonumber(n)
586        if m then
587            return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,m))
588        end
589    elseif tn == "number" then
590        return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,n))
591    end
592    return tostring(n)
593end
594
595local hf = { }
596local hs = { }
597
598setmetatable(hf, { __index = function(t,k)
599    local v = "%." .. k .. "f"
600    t[k] = v
601    return v
602end } )
603
604setmetatable(hs, { __index = function(t,k)
605    local v = "%" .. k .. "s"
606    t[k] = v
607    return v
608end } )
609
610function number.formattedfloat(n,b,a)
611    local s = format(hf[a],n)
612    local l = (b or 0) + (a or 0) + 1
613    if #s < l then
614        return format(hs[l],s)
615    else
616        return s
617    end
618end
619
620local template = [[
621%s
622%s
623return function(%s) return %s end
624]]
625
626-- We only use fast serialize in controlled cases.
627
628local pattern = Cs(Cc('"') * (
629    (1-S('"\\\n\r'))^1
630  + P('"')  / '\\"'
631  + P('\\') / '\\\\'
632  + P('\n') / '\\n'
633  + P('\r') / '\\r'
634)^0 * Cc('"'))
635
636-- -- I need to do more experiments with this:
637--
638-- local pattern = Cs(Cc('"') * (
639--     (1-S('"\\\n\r'))^1
640--   + P('"')  / '\\034'
641--   + P('\\') / '\\092'
642--   + P('\n') / '\\013'
643--   + P('\r') / '\\010'
644-- )^0 * Cc('"'))
645
646patterns.escapedquotes = pattern
647
648function string.escapedquotes(s)
649    return lpegmatch(pattern,s)
650end
651
652local pattern = (1 - P("\\"))^1 ; pattern = Cs (
653    pattern
654 * ( (P("\\") / "" * (digit^-3 / function(s) return char(tonumber(s)) end)) + pattern )^1
655)
656
657patterns.unescapedquotes = pattern
658
659function string.unescapedquotes(s)
660    return lpegmatch(pattern,s) or s
661end
662
663-- function string.longifneeded(s)
664--     if find(s,'["\\\n\r]') then
665--         return "[===[" .. s .. "]===]"
666--     else
667--         return '"' .. s ..'"'
668--     end
669-- end
670
671string.texnewlines = lpeg.replacer(patterns.newline,"\r",true)
672
673-- print(string.escapedquotes('1\\23\n"'))
674
675-- but for now here
676
677local preamble = ""
678
679local environment = {
680    global          = global or _G,
681    lpeg            = lpeg,
682    type            = type,
683    tostring        = tostring,
684    tonumber        = tonumber,
685    format          = string.format,
686    concat          = table.concat,
687    signed          = number.signed,
688    points          = number.points,
689    nupoints        = number.nupoints,
690    basepoints      = number.basepoints,
691    nubasepoints    = number.nubasepoints,
692    utfchar         = utf.char,
693    utfbyte         = utf.byte,
694    lpegmatch       = lpeg.match,
695    nspaces         = string.nspaces,
696    utfpadding      = string.utfpadding,
697    tracedchar      = string.tracedchar,
698    autosingle      = string.autosingle,
699    autodouble      = string.autodouble,
700    sequenced       = table.sequenced,
701    formattednumber = number.formatted,
702    sparseexponent  = number.sparseexponent,
703    formattedfloat  = number.formattedfloat,
704    stripzero       = patterns.stripzero,
705    stripzeros      = patterns.stripzeros,
706    escapedquotes   = string.escapedquotes,
707
708    FORMAT          = string.f6,
709}
710
711-- -- --
712
713local arguments = { "a1" } -- faster than previously used (select(n,...))
714
715setmetatable(arguments, { __index =
716    function(t,k)
717        local v = t[k-1] .. ",a" .. k
718        t[k] = v
719        return v
720    end
721})
722
723local prefix_any = C((sign + space + period + digit)^0)
724local prefix_sub = (C((sign + digit)^0) + Cc(0))
725                 * period
726                 * (C((sign + digit)^0) + Cc(0))
727local prefix_tab = P("{") * C((1-P("}"))^0) * P("}") + C((1-R("az","AZ","09","%%"))^0)
728
729-- we've split all cases as then we can optimize them (let's omit the fuzzy u)
730
731-- todo: replace outer formats in next by ..
732
733local format_s = function(f)
734    n = n + 1
735    if f and f ~= "" then
736        return format("format('%%%ss',a%s)",f,n)
737    else -- best no tostring in order to stay compatible (.. does a selective tostring too)
738        return format("(a%s or '')",n) -- goodie: nil check
739    end
740end
741
742local format_S = function(f) -- can be optimized
743    n = n + 1
744    if f and f ~= "" then
745        return format("format('%%%ss',tostring(a%s))",f,n)
746    else
747        return format("tostring(a%s)",n)
748    end
749end
750
751local format_right = function(f)
752    n = n + 1
753    f = tonumber(f)
754    if not f or f == 0 then
755        return format("(a%s or '')",n)
756    elseif f > 0 then
757        return format("utfpadding(a%s,%i)..a%s",n,f,n)
758    else
759        return format("a%s..utfpadding(a%s,%i)",n,n,f)
760    end
761end
762
763local format_left = function(f)
764    n = n + 1
765    f = tonumber(f)
766    if not f or f == 0 then
767        return format("(a%s or '')",n)
768    end
769    if f < 0 then
770        return format("utfpadding(a%s,%i)..a%s",n,-f,n)
771    else
772        return format("a%s..utfpadding(a%s,%i)",n,n,-f)
773    end
774end
775
776local format_q = JITSUPPORTED and function()
777    n = n + 1
778    -- lua 5.3 has a different q than lua 5.2 (which does a tostring on numbers)
779 -- return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
780    return format("(a%s ~= nil and format('%%q',tostring(a%s)) or '')",n,n)
781 -- return format("(a%s ~= nil and escapedquotes(tostring(a%s)) or '')",n,n)
782end or function()
783    n = n + 1
784    return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
785end
786
787
788local format_Q = function() -- fast escaping
789    n = n + 1
790--  return format("format('%%q',tostring(a%s))",n)
791    return format("escapedquotes(tostring(a%s))",n)
792end
793
794local format_i = function(f)
795    n = n + 1
796    if f and f ~= "" then
797        return format("format('%%%si',a%s)",f,n)
798    else
799        return format("format('%%i',a%s)",n) -- why not just tostring()
800    end
801end
802
803local format_d = format_i
804
805local format_I = function(f)
806    n = n + 1
807    return format("format('%%s%%%si',signed(a%s))",f,n)
808end
809
810local format_f = function(f)
811    n = n + 1
812    return format("format('%%%sf',a%s)",f,n)
813end
814
815-- The next one formats an integer as integer and very small values as zero. This is needed
816-- for pdf backend code.
817--
818--   1.23 % 1 : 0.23
819-- - 1.23 % 1 : 0.77
820--
821-- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's
822-- for the moment use %i.
823
824local format_F = function(f) -- beware, no cast to number
825    n = n + 1
826    if not f or f == "" then
827        return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n)
828    else
829        return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n)
830    end
831end
832
833-- if string.f9 then
834--     format_F = function(f) -- beware, no cast to number
835--         n = n + 1
836--         if not f or f == "" then
837--             return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or FORMAT(a%s))",n,n,n,n,n)
838--         else
839--             return format("((a%s %% 1 == 0) and format('%%i',a%s) or FORMAT(a%s,'%%%sf'))",n,n,n,f)
840--         end
841--     end
842-- end
843
844local format_k = function(b,a) -- slow
845    n = n + 1
846    return format("formattedfloat(a%s,%s,%s)",n,b or 0,a or 0)
847end
848
849local format_g = function(f)
850    n = n + 1
851    return format("format('%%%sg',a%s)",f,n)
852end
853
854local format_G = function(f)
855    n = n + 1
856    return format("format('%%%sG',a%s)",f,n)
857end
858
859local format_e = function(f)
860    n = n + 1
861    return format("format('%%%se',a%s)",f,n)
862end
863
864local format_E = function(f)
865    n = n + 1
866    return format("format('%%%sE',a%s)",f,n)
867end
868
869local format_j = function(f)
870    n = n + 1
871    return format("sparseexponent('%%%se',a%s)",f,n)
872end
873
874local format_J = function(f)
875    n = n + 1
876    return format("sparseexponent('%%%sE',a%s)",f,n)
877end
878
879local format_x = function(f)
880    n = n + 1
881    return format("format('%%%sx',a%s)",f,n)
882end
883
884local format_X = function(f)
885    n = n + 1
886    return format("format('%%%sX',a%s)",f,n)
887end
888
889local format_o = function(f)
890    n = n + 1
891    return format("format('%%%so',a%s)",f,n)
892end
893
894local format_c = function()
895    n = n + 1
896    return format("utfchar(a%s)",n)
897end
898
899local format_C = function()
900    n = n + 1
901    return format("tracedchar(a%s)",n)
902end
903
904local format_r = function(f)
905    n = n + 1
906    return format("format('%%%s.0f',a%s)",f,n)
907end
908
909local format_h = function(f)
910    n = n + 1
911    if f == "-" then
912        f = sub(f,2)
913        return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
914    else
915        return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
916    end
917end
918
919local format_H = function(f)
920    n = n + 1
921    if f == "-" then
922        f = sub(f,2)
923        return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
924    else
925        return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
926    end
927end
928
929local format_u = function(f)
930    n = n + 1
931    if f == "-" then
932        f = sub(f,2)
933        return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
934    else
935        return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
936    end
937end
938
939local format_U = function(f)
940    n = n + 1
941    if f == "-" then
942        f = sub(f,2)
943        return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
944    else
945        return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
946    end
947end
948
949local format_p = function()
950    n = n + 1
951    return format("points(a%s)",n)
952end
953
954local format_P = function()
955    n = n + 1
956    return format("nupoints(a%s)",n)
957end
958
959local format_b = function()
960    n = n + 1
961    return format("basepoints(a%s)",n)
962end
963
964local format_B = function()
965    n = n + 1
966    return format("nubasepoints(a%s)",n)
967end
968
969local format_t = function(f)
970    n = n + 1
971    if f and f ~= "" then
972        return format("concat(a%s,%q)",n,f)
973    else
974        return format("concat(a%s)",n)
975    end
976end
977
978local format_T = function(f)
979    n = n + 1
980    if f and f ~= "" then
981        return format("sequenced(a%s,%q)",n,f)
982    else
983        return format("sequenced(a%s)",n)
984    end
985end
986
987local format_l = function()
988    n = n + 1
989    return format("(a%s and 'true' or 'false')",n)
990end
991
992local format_L = function()
993    n = n + 1
994    return format("(a%s and 'TRUE' or 'FALSE')",n)
995end
996
997local format_n = function() -- strips leading and trailing zeros and removes .0, beware: can produce e notation
998    n = n + 1
999    return format("((a%s %% 1 == 0) and format('%%i',a%s) or tostring(a%s))",n,n,n)
1000end
1001
1002-- local format_N = function() -- strips leading and trailing zeros (also accepts string)
1003--     n = n + 1
1004--     return format("tostring(tonumber(a%s) or a%s)",n,n)
1005-- end
1006
1007-- local format_N = function(f) -- strips leading and trailing zeros
1008--     n = n + 1
1009--     -- stripzero (singular) as we only have a number
1010--     if not f or f == "" then
1011--         return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or ((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%.9f',a%s)))",n,n,n,n,n)
1012--     else
1013--         return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1014--     end
1015-- end
1016
1017-- local format_N = function(f) -- strips leading and trailing zeros
1018--     n = n + 1
1019--     -- stripzero (singular) as we only have a number
1020--     if not f or f == "" then
1021--         return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or ((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or lpegmatch(stripzero,format('%%.9f',a%s)))",n,n,n,n,n)
1022--     else
1023--         return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1024--     end
1025-- end
1026
1027local format_N  if environment.FORMAT then
1028
1029    format_N = function(f)
1030        n = n + 1
1031        if not f or f == "" then
1032            return format("FORMAT(a%s,'%%.9f')",n)
1033        elseif f == ".6" or f == "0.6" then
1034            return format("FORMAT(a%s)",n)
1035        else
1036            return format("FORMAT(a%s,'%%%sf')",n,f)
1037        end
1038    end
1039
1040else
1041
1042    format_N = function(f) -- strips leading and trailing zeros
1043        n = n + 1
1044        -- stripzero (singular) as we only have a number
1045        if not f or f == "" then
1046            f = ".9"
1047        end -- always a leading number !
1048        return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1049    end
1050
1051end
1052
1053local format_a = function(f)
1054    n = n + 1
1055    if f and f ~= "" then
1056        return format("autosingle(a%s,%q)",n,f)
1057    else
1058        return format("autosingle(a%s)",n)
1059    end
1060end
1061
1062local format_A = function(f)
1063    n = n + 1
1064    if f and f ~= "" then
1065        return format("autodouble(a%s,%q)",n,f)
1066    else
1067        return format("autodouble(a%s)",n)
1068    end
1069end
1070
1071local format_w = function(f) -- handy when doing depth related indent
1072    n = n + 1
1073    f = tonumber(f)
1074    if f then -- not that useful
1075        return format("nspaces[%s+a%s]",f,n) -- no real need for tonumber
1076    else
1077        return format("nspaces[a%s]",n) -- no real need for tonumber
1078    end
1079end
1080
1081local format_W = function(f) -- handy when doing depth related indent
1082    return format("nspaces[%s]",tonumber(f) or 0)
1083end
1084
1085local format_m = function(f)
1086    n = n + 1
1087    if not f or f == "" then
1088        f = ","
1089    end
1090    if f == "0" then
1091        return format([[formattednumber(a%s,false)]],n)
1092    else
1093        return format([[formattednumber(a%s,%q,".")]],n,f)
1094    end
1095end
1096
1097local format_M = function(f)
1098    n = n + 1
1099    if not f or f == "" then
1100        f = "."
1101    end
1102    if f == "0" then
1103        return format([[formattednumber(a%s,false)]],n)
1104    else
1105        return format([[formattednumber(a%s,%q,",")]],n,f)
1106    end
1107end
1108
1109--
1110
1111local format_z = function(f)
1112    n = n + (tonumber(f) or 1)
1113    return "''" -- okay, not that efficient to append '' but a special case anyway
1114end
1115
1116--
1117
1118-- local strip
1119--
1120-- local format_Z = function(f)
1121--     n = n + 1
1122--     if not f or f == "" then
1123--         f = ".9"
1124--     end
1125--     return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or (strip and lpegmatch(stripzero,format('%%%sf',a%s))) or format('%%%sf',a%s))",n,n,f,n,f,n)
1126-- end
1127--
1128-- function strings.stripformatterzeros()
1129--     strip = true
1130-- end
1131
1132-- add(formatters,"texexp", [[texexp(...)]], "local texexp = metapost.texexp")
1133--
1134-- add(formatters,"foo:bar",[[foo(...)]], { foo = function(...) print(...) return "!" end })
1135-- print(string.formatters["foo %3!foo:bar! bar"](1,2,3))
1136
1137
1138local format_rest = function(s)
1139    return format("%q",s) -- catches " and \n and such
1140end
1141
1142local format_extension = function(extensions,f,name)
1143    local extension = extensions[name] or "tostring(%s)"
1144    local f = tonumber(f) or 1
1145    local w = find(extension,"%.%.%.")
1146    if f == 0 then
1147        if w then
1148            extension = gsub(extension,"%.%.%.","")
1149        end
1150        return extension
1151    elseif f == 1 then
1152        if w then
1153            extension = gsub(extension,"%.%.%.","%%s")
1154        end
1155        n = n + 1
1156        local a = "a" .. n
1157        return format(extension,a,a) -- maybe more times?
1158    elseif f < 0 then
1159        if w then
1160            -- not supported
1161            extension = gsub(extension,"%.%.%.","")
1162            return extension
1163        else
1164            local a = "a" .. (n + f + 1)
1165            return format(extension,a,a)
1166        end
1167    else
1168        if w then
1169            extension = gsub(extension,"%.%.%.",rep("%%s,",f-1).."%%s")
1170        end
1171        -- we could fill an array and then n = n + 1 unpack(t,n,n+f) but as we
1172        -- cache we don't save much and there are hardly any extensions anyway
1173        local t = { }
1174        for i=1,f do
1175            n = n + 1
1176         -- t[#t+1] = "a" .. n
1177            t[i] = "a" .. n
1178        end
1179        return format(extension,unpack(t))
1180    end
1181end
1182
1183-- aA b cC d eE f gG hH iI jJ lL mM N o p qQ r sS tT uU wW xX z
1184
1185-- extensions : %!tag!
1186
1187-- can be made faster but not called that often
1188
1189local builder = Cs { "start",
1190    start = (
1191        (
1192            P("%") / ""
1193          * (
1194                V("!") -- new
1195              + V("s") + V("q")
1196              + V("i") + V("d")
1197              + V("f") + V("F") + V("g") + V("G") + V("e") + V("E")
1198              + V("x") + V("X") + V("o")
1199              --
1200              + V("c")
1201              + V("C")
1202              + V("S") -- new
1203              + V("Q") -- new
1204              + V("n") -- new
1205              + V("N") -- new
1206              + V("k") -- new
1207              --
1208              + V("r")
1209              + V("h") + V("H") + V("u") + V("U")
1210              + V("p") + V("P") + V("b") + V("B")
1211              + V("t") + V("T")
1212              + V("l") + V("L")
1213              + V("I")
1214              + V("w") -- new
1215              + V("W") -- new
1216              + V("a") -- new
1217              + V("A") -- new
1218              + V("j") + V("J") -- stripped e E
1219              + V("m") + V("M") -- new (formatted number)
1220              + V("z") -- new
1221              --
1222              + V(">") -- left padding
1223              + V("<") -- right padding
1224              --
1225           -- + V("?") -- ignored, probably messed up %
1226            )
1227          + V("*")
1228        )
1229     * (endofstring + Carg(1))
1230    )^0,
1231    --
1232    ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string)
1233    ["q"] = (prefix_any * P("q")) / format_q, -- %q => regular %q (quoted string)
1234    ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer)
1235    ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer)
1236    ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float)
1237    ["F"] = (prefix_any * P("F")) / format_F, -- %F => regular %f (float) but 0/1 check
1238    ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float)
1239    ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float)
1240    ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float)
1241    ["E"] = (prefix_any * P("E")) / format_E, -- %E => regular %E (float)
1242    ["x"] = (prefix_any * P("x")) / format_x, -- %x => regular %x (hexadecimal)
1243    ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL)
1244    ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal)
1245    --
1246    ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring)
1247    ["Q"] = (prefix_any * P("Q")) / format_Q, -- %Q => %q (tostring)
1248    ["n"] = (prefix_any * P("n")) / format_n, -- %n => tonumber (strips leading and trailing zeros, as well as .0, expects number)
1249    ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading and trailing zeros, also takes string)
1250    ["k"] = (prefix_sub * P("k")) / format_k, -- %k => like f but with n.m
1251    ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular)
1252    ["C"] = (prefix_any * P("C")) / format_C, -- %c => U+.... utf character
1253    --
1254    ["r"] = (prefix_any * P("r")) / format_r, -- %r => round
1255    ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v
1256    ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V
1257    ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+)
1258    ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+)
1259    ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt
1260    ["P"] = (prefix_any * P("P")) / format_P, -- %p => 12.345
1261    ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp
1262    ["B"] = (prefix_any * P("B")) / format_B, -- %b => 12.342
1263    ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat
1264    ["T"] = (prefix_tab * P("T")) / format_T, -- %t => sequenced
1265    ["l"] = (prefix_any * P("l")) / format_l, -- %l => boolean
1266    ["L"] = (prefix_any * P("L")) / format_L, -- %L => BOOLEAN
1267    ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer
1268    --
1269    ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added)
1270    ["W"] = (prefix_any * P("W")) / format_W, -- %W => mandate prefix, no specifier
1271    --
1272    ["j"] = (prefix_any * P("j")) / format_j, -- %j => %e (float) stripped exponent (irrational)
1273    ["J"] = (prefix_any * P("J")) / format_J, -- %J => %E (float) stripped exponent (irrational)
1274    --
1275    ["m"] = (prefix_any * P("m")) / format_m, -- %m => xxx.xxx.xxx,xx (optional prefix instead of .)
1276    ["M"] = (prefix_any * P("M")) / format_M, -- %M => xxx,xxx,xxx.xx (optional prefix instead of ,)
1277    --
1278    ["z"] = (prefix_any * P("z")) / format_z, -- %z => skip n arguments
1279 -- ["Z"] = (prefix_any * P("Z")) / format_Z, -- %Z => optionally strip zeros
1280    --
1281    ["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring)
1282    ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring)
1283    --
1284    ["<"] = (prefix_any * P("<")) / format_left,
1285    [">"] = (prefix_any * P(">")) / format_right,
1286    --
1287    ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest, -- rest (including %%)
1288    ["?"] = Cs(((1-P("%"))^1               )^1) / format_rest, -- rest (including %%)
1289    --
1290    ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
1291}
1292
1293-- We can be clever and only alias what is needed:
1294
1295local xx = setmetatable({ }, { __index = function(t,k) local v = format("%02x",k) t[k] = v return v end })
1296local XX = setmetatable({ }, { __index = function(t,k) local v = format("%02X",k) t[k] = v return v end })
1297
1298local preset = {
1299    ["%02x"] = function(n) return xx[n] end,
1300    ["%02X"] = function(n) return XX[n] end,
1301}
1302
1303local direct =
1304    P("%") * (sign + space + period + digit)^0 * S("sqidfgGeExXo") * endofstring
1305  / [[local format = string.format return function(str) return format("%0",str) end]]
1306
1307local function make(t,str)
1308    local f = preset[str]
1309    if f then
1310        return f
1311    end
1312    local p = lpegmatch(direct,str)
1313    if p then
1314     -- print("builder 1 >",p)
1315        f = loadstripped(p)()
1316    else
1317        n = 0 -- used in patterns
1318     -- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
1319        p = lpegmatch(builder,str,1,t._connector_,t._extensions_) -- after this we know n
1320        if n > 0 then
1321            p = format(template,preamble,t._preamble_,arguments[n],p)
1322         -- print("builder 2 >",p)
1323            f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment)
1324        else
1325            f = function() return str end
1326        end
1327    end
1328    t[str] = f
1329    return f
1330end
1331
1332-- -- collect periodically
1333--
1334-- local threshold = 1000 -- max nof cached formats
1335--
1336-- local function make(t,str)
1337--     local f = rawget(t,str)
1338--     if f then
1339--         return f
1340--     end
1341--     local parent = t._t_
1342--     if parent._n_ > threshold then
1343--         local m = { _t_ = parent }
1344--         getmetatable(parent).__index = m
1345--         setmetatable(m, { __index = make })
1346--     else
1347--         parent._n_ = parent._n_ + 1
1348--     end
1349--     local f
1350--     local p = lpegmatch(direct,str)
1351--     if p then
1352--         f = loadstripped(p)()
1353--     else
1354--         n = 0
1355--         p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n
1356--         if n > 0 then
1357--             p = format(template,preamble,parent._preamble_,arguments[n],p)
1358--          -- print("builder>",p)
1359--             f = loadstripped(p)()
1360--         else
1361--             f = function() return str end
1362--         end
1363--     end
1364--     t[str] = f
1365--     return f
1366-- end
1367
1368local function use(t,fmt,...)
1369    return t[fmt](...)
1370end
1371
1372strings.formatters = { }
1373
1374-- we cannot make these tables weak, unless we start using an indirect
1375-- table (metatable) in which case we could better keep a count and
1376-- clear that table when a threshold is reached
1377
1378-- _connector_ is an experiment
1379
1380function strings.formatters.new(noconcat)
1381    local e = { } -- better make a copy as we can overload
1382    for k, v in next, environment do
1383        e[k] = v
1384    end
1385    local t = {
1386        _type_        = "formatter",
1387        _connector_   = noconcat and "," or "..",
1388        _extensions_  = { },
1389        _preamble_    = "",
1390        _environment_ = e,
1391    }
1392    setmetatable(t, { __index = make, __call = use })
1393    return t
1394end
1395
1396local formatters   = strings.formatters.new() -- the default instance
1397
1398string.formatters  = formatters -- in the main string namespace
1399string.formatter   = function(str,...) return formatters[str](...) end -- sometimes nicer name
1400
1401local function add(t,name,template,preamble)
1402    if type(t) == "table" and t._type_ == "formatter" then
1403        t._extensions_[name] = template or "%s"
1404        if type(preamble) == "string" then
1405            t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload !
1406        elseif type(preamble) == "table" then
1407            for k, v in next, preamble do
1408                t._environment_[k] = v
1409            end
1410        end
1411    end
1412end
1413
1414strings.formatters.add = add
1415
1416-- registered in the default instance (should we fall back on this one?)
1417
1418patterns.xmlescape = Cs((P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;" + P('"')/"&quot;" + anything)^0)
1419patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + anything)^0)
1420patterns.luaescape = Cs(((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0) -- maybe also \0
1421patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0 * Cc('"'))
1422
1423-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but
1424-- faster again when other q-escapables are found (the ones we don't need to escape)
1425
1426add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = patterns.xmlescape })
1427add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = patterns.texescape })
1428add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = patterns.luaescape })
1429
1430-- -- yes or no:
1431--
1432-- local function make(t,str)
1433--     local f
1434--     local p = lpegmatch(direct,str)
1435--     if p then
1436--         f = loadstripped(p)()
1437--     else
1438--         n = 0
1439--         p = lpegmatch(builder,str,1,",") -- after this we know n
1440--         if n > 0 then
1441--             p = format(template,template_shortcuts,arguments[n],p)
1442--             f = loadstripped(p)()
1443--         else
1444--             f = function() return str end
1445--         end
1446--     end
1447--     t[str] = f
1448--     return f
1449-- end
1450--
1451-- local formatteds  = string.formatteds or { }
1452-- string.formatteds = formatteds
1453--
1454-- setmetatable(formatteds, { __index = make, __call = use })
1455
1456-- This is a somewhat silly one used in commandline reconstruction but the older
1457-- method, using a combination of fine, gsub, quoted and unquoted was not that
1458-- reliable.
1459--
1460-- '"foo"bar \"and " whatever"' => "foo\"bar \"and \" whatever"
1461-- 'foo"bar \"and " whatever'   => "foo\"bar \"and \" whatever"
1462
1463local dquote = patterns.dquote -- P('"')
1464local equote = patterns.escaped + dquote / '\\"' + 1
1465local cquote = Cc('"')
1466
1467local pattern =
1468    Cs(dquote * (equote - P(-2))^0 * dquote)                    -- we keep the outer but escape unescaped ones
1469  + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote) -- we escape unescaped ones
1470
1471function string.optionalquoted(str)
1472    return lpegmatch(pattern,str) or str
1473end
1474
1475local pattern = Cs((newline / (os.newline or "\r") + 1)^0)
1476
1477function string.replacenewlines(str)
1478    return lpegmatch(pattern,str)
1479end
1480
1481--
1482
1483function strings.newcollector()
1484    local result, r = { }, 0
1485    return
1486        function(fmt,str,...) -- write
1487            r = r + 1
1488            result[r] = str == nil and fmt or formatters[fmt](str,...)
1489        end,
1490        function(connector) -- flush
1491            if result then
1492                local str = concat(result,connector)
1493                result, r = { }, 0
1494                return str
1495            end
1496        end
1497end
1498
1499--
1500
1501local f_16_16 = formatters["%0.5N"]
1502
1503function number.to16dot16(n)
1504    return f_16_16(n/65536.0)
1505end
1506
1507--
1508
1509if not string.explode then
1510
1511 -- local tsplitat = lpeg.tsplitat
1512
1513    local p_utf   = patterns.utf8character
1514    local p_check = C(p_utf) * (P("+") * Cc(true))^0
1515    local p_split = Ct(C(p_utf)^0)
1516    local p_space = Ct((C(1-P(" ")^1) + P(" ")^1)^0)
1517
1518    function string.explode(str,symbol)
1519        if symbol == "" then
1520            return lpegmatch(p_split,str)
1521        elseif symbol then
1522            local a, b = lpegmatch(p_check,symbol)
1523            if b then
1524                return lpegmatch(tsplitat(P(a)^1),str)
1525            else
1526                return lpegmatch(tsplitat(a),str)
1527            end
1528        else
1529            return lpegmatch(p_space,str)
1530        end
1531    end
1532
1533end
1534
1535
1536do
1537
1538    local p_whitespace = patterns.whitespace^1
1539
1540    local cache = setmetatable({ }, { __index = function(t,k)
1541        local p = tsplitat(p_whitespace * P(k) * p_whitespace)
1542        local v = function(s)
1543            return lpegmatch(p,s)
1544        end
1545        t[k] = v
1546        return v
1547    end })
1548
1549    function string.wordsplitter(s)
1550        return cache[s]
1551    end
1552
1553end
1554
1555