1if not modules then modules = { } end modules ['util-str'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9utilities = utilities or { }
10utilities.strings = utilities.strings or { }
11local strings = utilities.strings
12
13local format, gsub, rep, sub, find, char = string.format, string.gsub, string.rep, string.sub, string.find, string.char
14local load, dump = load, string.dump
15local tonumber, type, tostring, next, setmetatable = tonumber, type, tostring, next, setmetatable
16local unpack, concat = table.unpack, table.concat
17local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
18local patterns, lpegmatch = lpeg.patterns, lpeg.match
19local tsplitat = lpeg.tsplitat
20local utfchar, utfbyte, utflen = utf.char, utf.byte, utf.len
21
22
23
24
25local loadstripped = function(str,shortcuts)
26 if shortcuts then
27 return load(dump(load(str),true),nil,nil,shortcuts)
28 else
29 return load(dump(load(str),true))
30 end
31end
32
33
34
35if not number then number = { } end
36
37local stripzero = patterns.stripzero
38local stripzeros = patterns.stripzeros
39local newline = patterns.newline
40local endofstring = patterns.endofstring
41local anything = patterns.anything
42local whitespace = patterns.whitespace
43local space = patterns.space
44local spacer = patterns.spacer
45local spaceortab = patterns.spaceortab
46local digit = patterns.digit
47local sign = patterns.sign
48local period = patterns.period
49
50
51
52
53
54
55
56
57
58
59
60local ptf = 1 / 65536
61local bpf = (7200/7227) / 65536
62
63local function points(n)
64 if n == 0 then
65 return "0pt"
66 end
67 n = tonumber(n)
68 if not n or n == 0 then
69 return "0pt"
70 end
71 n = n * ptf
72 if n % 1 == 0 then
73 return format("%ipt",n)
74 else
75 return lpegmatch(stripzeros,format("%.5fpt",n))
76 end
77end
78
79local function nupoints(n)
80 if n == 0 then
81 return "0"
82 end
83 n = tonumber(n)
84 if not n or n == 0 then
85 return "0"
86 end
87 n = n * ptf
88 if n % 1 == 0 then
89 return format("%i",n)
90 else
91 return format("%.5f",n)
92 end
93end
94
95local function basepoints(n)
96 if n == 0 then
97 return "0bp"
98 end
99 n = tonumber(n)
100 if not n or n == 0 then
101 return "0bp"
102 end
103 n = n * bpf
104 if n % 1 == 0 then
105 return format("%ibp",n)
106 else
107 return lpegmatch(stripzeros,format("%.5fbp",n))
108 end
109end
110
111local function nubasepoints(n)
112 if n == 0 then
113 return "0"
114 end
115 n = tonumber(n)
116 if not n or n == 0 then
117 return "0"
118 end
119 n = n * bpf
120 if n % 1 == 0 then
121 return format("%i",n)
122 else
123 return format("%.5f",n)
124 end
125end
126
127number.points = points
128number.nupoints = nupoints
129number.basepoints = basepoints
130number.nubasepoints = nubasepoints
131
132
133
134
135local rubish = spaceortab^0 * newline
136local anyrubish = spaceortab + newline
137local stripped = (spaceortab^1 / "") * newline
138local leading = rubish^0 / ""
139local trailing = (anyrubish^1 * endofstring) / ""
140local redundant = rubish^3 / "\n"
141
142local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
143
144function strings.collapsecrlf(str)
145 return lpegmatch(pattern,str)
146end
147
148
149
150local repeaters = { }
151
152function strings.newrepeater(str,offset)
153 offset = offset or 0
154 local s = repeaters[str]
155 if not s then
156 s = { }
157 repeaters[str] = s
158 end
159 local t = s[offset]
160 if t then
161 return t
162 end
163 t = { }
164 setmetatable(t, { __index = function(t,k)
165 if not k then
166 return ""
167 end
168 local n = k + offset
169 local s = n > 0 and rep(str,n) or ""
170 t[k] = s
171 return s
172 end })
173 s[offset] = t
174 return t
175end
176
177
178
179
180local extra, tab, start = 0, 0, 4, 0
181
182local nspaces = strings.newrepeater(" ")
183
184string.nspaces = nspaces
185
186local pattern =
187 Carg(1) / function(t)
188 extra, tab, start = 0, t or 7, 1
189 end
190 * Cs((
191 Cp() * patterns.tab / function(position)
192 local current = (position - start + 1) + extra
193 local spaces = tab-(current-1) % tab
194 if spaces > 0 then
195 extra = extra + spaces - 1
196 return nspaces[spaces]
197 else
198 return ""
199 end
200 end
201 + newline * Cp() / function(position)
202 extra, start = 0, position
203 end
204 + anything
205 )^1)
206
207function strings.tabtospace(str,tab)
208
209 return lpegmatch(pattern,str,1,tab or 7)
210end
211
212function string.utfpadding(s,n)
213 if not n or n == 0 then
214 return ""
215 end
216 local l = utflen(s)
217 if n > 0 then
218 return nspaces[n-l]
219 else
220 return nspaces[-n-l]
221 end
222end
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247local optionalspace = spacer^0
248local nospace = optionalspace/""
249local endofline = nospace * newline
250
251local stripend = (whitespace^1 * endofstring)/""
252
253local normalline = (nospace * ((1-optionalspace*(newline+endofstring))^1) * nospace)
254
255local stripempty = endofline^1/""
256local normalempty = endofline^1
257local singleempty = endofline * (endofline^0/"")
258local doubleempty = endofline * endofline^-1 * (endofline^0/"")
259local stripstart = stripempty^0
260
261local intospace = whitespace^1/" "
262local noleading = whitespace^1/""
263local notrailing = noleading * endofstring
264
265local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 )
266local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 )
267local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 )
268local p_prune_intospace = Cs ( noleading * ( notrailing + intospace + 1 )^0 )
269local p_retain_normal = Cs ( ( normalline + normalempty )^0 )
270local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 )
271local p_retain_noempty = Cs ( ( normalline + singleempty )^0 )
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293local striplinepatterns = {
294 ["prune"] = p_prune_normal,
295 ["prune and collapse"] = p_prune_collapse,
296 ["prune and no empty"] = p_prune_noempty,
297 ["prune and to space"] = p_prune_intospace,
298 ["retain"] = p_retain_normal,
299 ["retain and collapse"] = p_retain_collapse,
300 ["retain and no empty"] = p_retain_noempty,
301 ["collapse"] = patterns.collapser,
302}
303
304setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end })
305
306strings.striplinepatterns = striplinepatterns
307
308function strings.striplines(str,how)
309 return str and lpegmatch(striplinepatterns[how],str) or str
310end
311
312function strings.collapse(str)
313 return str and lpegmatch(p_prune_intospace,str) or str
314end
315
316
317
318strings.striplong = strings.striplines
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349function strings.nice(str)
350 str = gsub(str,"[:%-+_]+"," ")
351 return str
352end
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416local n = 0
417
418
419
420
421
422
423
424local sequenced = table.sequenced
425
426function string.autodouble(s,sep)
427 if s == nil then
428 return '""'
429 end
430 local t = type(s)
431 if t == "number" then
432 return tostring(s)
433 end
434 if t == "table" then
435 return ('"' .. sequenced(s,sep or ",") .. '"')
436 end
437 return ('"' .. tostring(s) .. '"')
438end
439
440function string.autosingle(s,sep)
441 if s == nil then
442 return "''"
443 end
444 local t = type(s)
445 if t == "number" then
446 return tostring(s)
447 end
448 if t == "table" then
449 return ("'" .. sequenced(s,sep or ",") .. "'")
450 end
451 return ("'" .. tostring(s) .. "'")
452end
453
454local tracedchars = { [0] =
455
456 "[null]", "[soh]", "[stx]", "[etx]", "[eot]", "[enq]", "[ack]", "[bel]",
457 "[bs]", "[ht]", "[lf]", "[vt]", "[ff]", "[cr]", "[so]", "[si]",
458 "[dle]", "[dc1]", "[dc2]", "[dc3]", "[dc4]", "[nak]", "[syn]", "[etb]",
459 "[can]", "[em]", "[sub]", "[esc]", "[fs]", "[gs]", "[rs]", "[us]",
460
461 "[space]",
462}
463
464string.tracedchars = tracedchars
465strings.tracers = tracedchars
466
467function string.tracedchar(b)
468
469 if type(b) == "number" then
470 return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")
471 else
472 local c = utfbyte(b)
473 return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")
474 end
475end
476
477function number.signed(i)
478 if i > 0 then
479 return "+", i
480 else
481 return "-", -i
482 end
483end
484
485
486
487local two = digit * digit
488local three = two * digit
489local prefix = (Carg(1) * three)^1
490
491local splitter = Cs (
492 (((1 - (three^1 * period))^1 + C(three)) * prefix + C((1-period)^1))
493 * (anything/"" * Carg(2)) * C(2)
494)
495
496local splitter3 = Cs (
497 three * prefix * endofstring +
498 two * prefix * endofstring +
499 digit * prefix * endofstring +
500 three +
501 two +
502 digit
503)
504
505patterns.formattednumber = splitter
506
507function number.formatted(n,sep1,sep2)
508 if sep1 == false then
509 if type(n) == "number" then
510 n = tostring(n)
511 end
512 return lpegmatch(splitter3,n,1,sep2 or ".")
513 else
514 if type(n) == "number" then
515 n = format("%0.2f",n)
516 end
517 if sep1 == true then
518 return lpegmatch(splitter,n,1,".",",")
519 elseif sep1 == "." then
520 return lpegmatch(splitter,n,1,sep1,sep2 or ",")
521 elseif sep1 == "," then
522 return lpegmatch(splitter,n,1,sep1,sep2 or ".")
523 else
524 return lpegmatch(splitter,n,1,sep1 or ",",sep2 or ".")
525 end
526 end
527end
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548local p = Cs(
549 P("-")^0
550 * (P("0")^1/"")^0
551 * (1-period)^0
552 * (period * P("0")^1 * endofstring/"" + period^0)
553 * P(1-P("0")^1*endofstring)^0
554 )
555
556function number.compactfloat(n,fmt)
557 if n == 0 then
558 return "0"
559 elseif n == 1 then
560 return "1"
561 end
562 n = lpegmatch(p,format(fmt or "%0.3f",n))
563 if n == "." or n == "" or n == "-" then
564 return "0"
565 end
566 return n
567end
568
569local zero = P("0")^1 / ""
570local plus = P("+") / ""
571local minus = P("-")
572local separator = period
573local trailing = zero^1 * #S("eE")
574local exponent = (S("eE") * (plus + Cs((minus * zero^0 * endofstring)/"") + minus) * zero^0 * (endofstring * Cc("0") + anything^1))
575local pattern_a = Cs(minus^0 * digit^1 * (separator/"" * trailing + separator * (trailing + digit)^0) * exponent)
576local pattern_b = Cs((exponent + anything)^0)
577
578function number.sparseexponent(f,n)
579 if not n then
580 n = f
581 f = "%e"
582 end
583 local tn = type(n)
584 if tn == "string" then
585 local m = tonumber(n)
586 if m then
587 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,m))
588 end
589 elseif tn == "number" then
590 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,n))
591 end
592 return tostring(n)
593end
594
595local hf = { }
596local hs = { }
597
598setmetatable(hf, { __index = function(t,k)
599 local v = "%." .. k .. "f"
600 t[k] = v
601 return v
602end } )
603
604setmetatable(hs, { __index = function(t,k)
605 local v = "%" .. k .. "s"
606 t[k] = v
607 return v
608end } )
609
610function number.formattedfloat(n,b,a)
611 local s = format(hf[a],n)
612 local l = (b or 0) + (a or 0) + 1
613 if #s < l then
614 return format(hs[l],s)
615 else
616 return s
617 end
618end
619
620local template = [[
621%s
622%s
623return function(%s) return %s end
624]]
625
626
627
628local pattern = Cs(Cc('"') * (
629 (1-S('"\\\n\r'))^1
630 + P('"') / '\\"'
631 + P('\\') / '\\\\'
632 + P('\n') / '\\n'
633 + P('\r') / '\\r'
634)^0 * Cc('"'))
635
636
637
638
639
640
641
642
643
644
645
646patterns.escapedquotes = pattern
647
648function string.escapedquotes(s)
649 return lpegmatch(pattern,s)
650end
651
652local pattern = (1 - P("\\"))^1 ; pattern = Cs (
653 pattern
654 * ( (P("\\") / "" * (digit^-3 / function(s) return char(tonumber(s)) end)) + pattern )^1
655)
656
657patterns.unescapedquotes = pattern
658
659function string.unescapedquotes(s)
660 return lpegmatch(pattern,s) or s
661end
662
663
664
665
666
667
668
669
670
671string.texnewlines = lpeg.replacer(patterns.newline,"\r",true)
672
673
674
675
676
677local preamble = ""
678
679local environment = {
680 global = global or _G,
681 lpeg = lpeg,
682 type = type,
683 tostring = tostring,
684 tonumber = tonumber,
685 format = string.format,
686 concat = table.concat,
687 signed = number.signed,
688 points = number.points,
689 nupoints = number.nupoints,
690 basepoints = number.basepoints,
691 nubasepoints = number.nubasepoints,
692 utfchar = utf.char,
693 utfbyte = utf.byte,
694 lpegmatch = lpeg.match,
695 nspaces = string.nspaces,
696 utfpadding = string.utfpadding,
697 tracedchar = string.tracedchar,
698 autosingle = string.autosingle,
699 autodouble = string.autodouble,
700 sequenced = table.sequenced,
701 formattednumber = number.formatted,
702 sparseexponent = number.sparseexponent,
703 formattedfloat = number.formattedfloat,
704 stripzero = patterns.stripzero,
705 stripzeros = patterns.stripzeros,
706 escapedquotes = string.escapedquotes,
707
708 FORMAT = string.f6,
709}
710
711
712
713local arguments = { "a1" }
714
715setmetatable(arguments, { __index =
716 function(t,k)
717 local v = t[k-1] .. ",a" .. k
718 t[k] = v
719 return v
720 end
721})
722
723local prefix_any = C((sign + space + period + digit)^0)
724local prefix_sub = (C((sign + digit)^0) + Cc(0))
725 * period
726 * (C((sign + digit)^0) + Cc(0))
727local prefix_tab = P("{") * C((1-P("}"))^0) * P("}") + C((1-R("az","AZ","09","%%"))^0)
728
729
730
731
732
733local format_s = function(f)
734 n = n + 1
735 if f and f ~= "" then
736 return format("format('%%%ss',a%s)",f,n)
737 else
738 return format("(a%s or '')",n)
739 end
740end
741
742local format_S = function(f)
743 n = n + 1
744 if f and f ~= "" then
745 return format("format('%%%ss',tostring(a%s))",f,n)
746 else
747 return format("tostring(a%s)",n)
748 end
749end
750
751local format_right = function(f)
752 n = n + 1
753 f = tonumber(f)
754 if not f or f == 0 then
755 return format("(a%s or '')",n)
756 elseif f > 0 then
757 return format("utfpadding(a%s,%i)..a%s",n,f,n)
758 else
759 return format("a%s..utfpadding(a%s,%i)",n,n,f)
760 end
761end
762
763local format_left = function(f)
764 n = n + 1
765 f = tonumber(f)
766 if not f or f == 0 then
767 return format("(a%s or '')",n)
768 end
769 if f < 0 then
770 return format("utfpadding(a%s,%i)..a%s",n,-f,n)
771 else
772 return format("a%s..utfpadding(a%s,%i)",n,n,-f)
773 end
774end
775
776local format_q = JITSUPPORTED and function()
777 n = n + 1
778
779
780 return format("(a%s ~= nil and format('%%q',tostring(a%s)) or '')",n,n)
781
782end or function()
783 n = n + 1
784 return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
785end
786
787
788local format_Q = function()
789 n = n + 1
790
791 return format("escapedquotes(tostring(a%s))",n)
792end
793
794local format_i = function(f)
795 n = n + 1
796 if f and f ~= "" then
797 return format("format('%%%si',a%s)",f,n)
798 else
799 return format("format('%%i',a%s)",n)
800 end
801end
802
803local format_d = format_i
804
805local format_I = function(f)
806 n = n + 1
807 return format("format('%%s%%%si',signed(a%s))",f,n)
808end
809
810local format_f = function(f)
811 n = n + 1
812 return format("format('%%%sf',a%s)",f,n)
813end
814
815
816
817
818
819
820
821
822
823
824local format_F = function(f)
825 n = n + 1
826 if not f or f == "" then
827 return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n)
828 else
829 return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n)
830 end
831end
832
833
834
835
836
837
838
839
840
841
842
843
844local format_k = function(b,a)
845 n = n + 1
846 return format("formattedfloat(a%s,%s,%s)",n,b or 0,a or 0)
847end
848
849local format_g = function(f)
850 n = n + 1
851 return format("format('%%%sg',a%s)",f,n)
852end
853
854local format_G = function(f)
855 n = n + 1
856 return format("format('%%%sG',a%s)",f,n)
857end
858
859local format_e = function(f)
860 n = n + 1
861 return format("format('%%%se',a%s)",f,n)
862end
863
864local format_E = function(f)
865 n = n + 1
866 return format("format('%%%sE',a%s)",f,n)
867end
868
869local format_j = function(f)
870 n = n + 1
871 return format("sparseexponent('%%%se',a%s)",f,n)
872end
873
874local format_J = function(f)
875 n = n + 1
876 return format("sparseexponent('%%%sE',a%s)",f,n)
877end
878
879local format_x = function(f)
880 n = n + 1
881 return format("format('%%%sx',a%s)",f,n)
882end
883
884local format_X = function(f)
885 n = n + 1
886 return format("format('%%%sX',a%s)",f,n)
887end
888
889local format_o = function(f)
890 n = n + 1
891 return format("format('%%%so',a%s)",f,n)
892end
893
894local format_c = function()
895 n = n + 1
896 return format("utfchar(a%s)",n)
897end
898
899local format_C = function()
900 n = n + 1
901 return format("tracedchar(a%s)",n)
902end
903
904local format_r = function(f)
905 n = n + 1
906 return format("format('%%%s.0f',a%s)",f,n)
907end
908
909local format_h = function(f)
910 n = n + 1
911 if f == "-" then
912 f = sub(f,2)
913 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
914 else
915 return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
916 end
917end
918
919local format_H = function(f)
920 n = n + 1
921 if f == "-" then
922 f = sub(f,2)
923 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
924 else
925 return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
926 end
927end
928
929local format_u = function(f)
930 n = n + 1
931 if f == "-" then
932 f = sub(f,2)
933 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
934 else
935 return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
936 end
937end
938
939local format_U = function(f)
940 n = n + 1
941 if f == "-" then
942 f = sub(f,2)
943 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
944 else
945 return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
946 end
947end
948
949local format_p = function()
950 n = n + 1
951 return format("points(a%s)",n)
952end
953
954local format_P = function()
955 n = n + 1
956 return format("nupoints(a%s)",n)
957end
958
959local format_b = function()
960 n = n + 1
961 return format("basepoints(a%s)",n)
962end
963
964local format_B = function()
965 n = n + 1
966 return format("nubasepoints(a%s)",n)
967end
968
969local format_t = function(f)
970 n = n + 1
971 if f and f ~= "" then
972 return format("concat(a%s,%q)",n,f)
973 else
974 return format("concat(a%s)",n)
975 end
976end
977
978local format_T = function(f)
979 n = n + 1
980 if f and f ~= "" then
981 return format("sequenced(a%s,%q)",n,f)
982 else
983 return format("sequenced(a%s)",n)
984 end
985end
986
987local format_l = function()
988 n = n + 1
989 return format("(a%s and 'true' or 'false')",n)
990end
991
992local format_L = function()
993 n = n + 1
994 return format("(a%s and 'TRUE' or 'FALSE')",n)
995end
996
997local format_n = function()
998 n = n + 1
999 return format("((a%s %% 1 == 0) and format('%%i',a%s) or tostring(a%s))",n,n,n)
1000end
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027local format_N if environment.FORMAT then
1028
1029 format_N = function(f)
1030 n = n + 1
1031 if not f or f == "" then
1032 return format("FORMAT(a%s,'%%.9f')",n)
1033 elseif f == ".6" or f == "0.6" then
1034 return format("FORMAT(a%s)",n)
1035 else
1036 return format("FORMAT(a%s,'%%%sf')",n,f)
1037 end
1038 end
1039
1040else
1041
1042 format_N = function(f)
1043 n = n + 1
1044
1045 if not f or f == "" then
1046 f = ".9"
1047 end
1048 return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1049 end
1050
1051end
1052
1053local format_a = function(f)
1054 n = n + 1
1055 if f and f ~= "" then
1056 return format("autosingle(a%s,%q)",n,f)
1057 else
1058 return format("autosingle(a%s)",n)
1059 end
1060end
1061
1062local format_A = function(f)
1063 n = n + 1
1064 if f and f ~= "" then
1065 return format("autodouble(a%s,%q)",n,f)
1066 else
1067 return format("autodouble(a%s)",n)
1068 end
1069end
1070
1071local format_w = function(f)
1072 n = n + 1
1073 f = tonumber(f)
1074 if f then
1075 return format("nspaces[%s+a%s]",f,n)
1076 else
1077 return format("nspaces[a%s]",n)
1078 end
1079end
1080
1081local format_W = function(f)
1082 return format("nspaces[%s]",tonumber(f) or 0)
1083end
1084
1085local format_m = function(f)
1086 n = n + 1
1087 if not f or f == "" then
1088 f = ","
1089 end
1090 if f == "0" then
1091 return format([[formattednumber(a%s,false)]],n)
1092 else
1093 return format([[formattednumber(a%s,%q,".")]],n,f)
1094 end
1095end
1096
1097local format_M = function(f)
1098 n = n + 1
1099 if not f or f == "" then
1100 f = "."
1101 end
1102 if f == "0" then
1103 return format([[formattednumber(a%s,false)]],n)
1104 else
1105 return format([[formattednumber(a%s,%q,",")]],n,f)
1106 end
1107end
1108
1109
1110
1111local format_z = function(f)
1112 n = n + (tonumber(f) or 1)
1113 return "''"
1114end
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138local format_rest = function(s)
1139 return format("%q",s)
1140end
1141
1142local format_extension = function(extensions,f,name)
1143 local extension = extensions[name] or "tostring(%s)"
1144 local f = tonumber(f) or 1
1145 local w = find(extension,"%.%.%.")
1146 if f == 0 then
1147 if w then
1148 extension = gsub(extension,"%.%.%.","")
1149 end
1150 return extension
1151 elseif f == 1 then
1152 if w then
1153 extension = gsub(extension,"%.%.%.","%%s")
1154 end
1155 n = n + 1
1156 local a = "a" .. n
1157 return format(extension,a,a)
1158 elseif f < 0 then
1159 if w then
1160
1161 extension = gsub(extension,"%.%.%.","")
1162 return extension
1163 else
1164 local a = "a" .. (n + f + 1)
1165 return format(extension,a,a)
1166 end
1167 else
1168 if w then
1169 extension = gsub(extension,"%.%.%.",rep("%%s,",f-1).."%%s")
1170 end
1171
1172
1173 local t = { }
1174 for i=1,f do
1175 n = n + 1
1176
1177 t[i] = "a" .. n
1178 end
1179 return format(extension,unpack(t))
1180 end
1181end
1182
1183
1184
1185
1186
1187
1188
1189local builder = Cs { "start",
1190 start = (
1191 (
1192 P("%") / ""
1193 * (
1194 V("!")
1195 + V("s") + V("q")
1196 + V("i") + V("d")
1197 + V("f") + V("F") + V("g") + V("G") + V("e") + V("E")
1198 + V("x") + V("X") + V("o")
1199
1200 + V("c")
1201 + V("C")
1202 + V("S")
1203 + V("Q")
1204 + V("n")
1205 + V("N")
1206 + V("k")
1207
1208 + V("r")
1209 + V("h") + V("H") + V("u") + V("U")
1210 + V("p") + V("P") + V("b") + V("B")
1211 + V("t") + V("T")
1212 + V("l") + V("L")
1213 + V("I")
1214 + V("w")
1215 + V("W")
1216 + V("a")
1217 + V("A")
1218 + V("j") + V("J")
1219 + V("m") + V("M")
1220 + V("z")
1221
1222 + V(">")
1223 + V("<")
1224
1225
1226 )
1227 + V("*")
1228 )
1229 * (endofstring + Carg(1))
1230 )^0,
1231
1232 ["s"] = (prefix_any * P("s")) / format_s,
1233 ["q"] = (prefix_any * P("q")) / format_q,
1234 ["i"] = (prefix_any * P("i")) / format_i,
1235 ["d"] = (prefix_any * P("d")) / format_d,
1236 ["f"] = (prefix_any * P("f")) / format_f,
1237 ["F"] = (prefix_any * P("F")) / format_F,
1238 ["g"] = (prefix_any * P("g")) / format_g,
1239 ["G"] = (prefix_any * P("G")) / format_G,
1240 ["e"] = (prefix_any * P("e")) / format_e,
1241 ["E"] = (prefix_any * P("E")) / format_E,
1242 ["x"] = (prefix_any * P("x")) / format_x,
1243 ["X"] = (prefix_any * P("X")) / format_X,
1244 ["o"] = (prefix_any * P("o")) / format_o,
1245
1246 ["S"] = (prefix_any * P("S")) / format_S,
1247 ["Q"] = (prefix_any * P("Q")) / format_Q,
1248 ["n"] = (prefix_any * P("n")) / format_n,
1249 ["N"] = (prefix_any * P("N")) / format_N,
1250 ["k"] = (prefix_sub * P("k")) / format_k,
1251 ["c"] = (prefix_any * P("c")) / format_c,
1252 ["C"] = (prefix_any * P("C")) / format_C,
1253
1254 ["r"] = (prefix_any * P("r")) / format_r,
1255 ["h"] = (prefix_any * P("h")) / format_h,
1256 ["H"] = (prefix_any * P("H")) / format_H,
1257 ["u"] = (prefix_any * P("u")) / format_u,
1258 ["U"] = (prefix_any * P("U")) / format_U,
1259 ["p"] = (prefix_any * P("p")) / format_p,
1260 ["P"] = (prefix_any * P("P")) / format_P,
1261 ["b"] = (prefix_any * P("b")) / format_b,
1262 ["B"] = (prefix_any * P("B")) / format_B,
1263 ["t"] = (prefix_tab * P("t")) / format_t,
1264 ["T"] = (prefix_tab * P("T")) / format_T,
1265 ["l"] = (prefix_any * P("l")) / format_l,
1266 ["L"] = (prefix_any * P("L")) / format_L,
1267 ["I"] = (prefix_any * P("I")) / format_I,
1268
1269 ["w"] = (prefix_any * P("w")) / format_w,
1270 ["W"] = (prefix_any * P("W")) / format_W,
1271
1272 ["j"] = (prefix_any * P("j")) / format_j,
1273 ["J"] = (prefix_any * P("J")) / format_J,
1274
1275 ["m"] = (prefix_any * P("m")) / format_m,
1276 ["M"] = (prefix_any * P("M")) / format_M,
1277
1278 ["z"] = (prefix_any * P("z")) / format_z,
1279
1280
1281 ["a"] = (prefix_any * P("a")) / format_a,
1282 ["A"] = (prefix_any * P("A")) / format_A,
1283
1284 ["<"] = (prefix_any * P("<")) / format_left,
1285 [">"] = (prefix_any * P(">")) / format_right,
1286
1287 ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest,
1288 ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest,
1289
1290 ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
1291}
1292
1293
1294
1295local xx = setmetatable({ }, { __index = function(t,k) local v = format("%02x",k) t[k] = v return v end })
1296local XX = setmetatable({ }, { __index = function(t,k) local v = format("%02X",k) t[k] = v return v end })
1297
1298local preset = {
1299 ["%02x"] = function(n) return xx[n] end,
1300 ["%02X"] = function(n) return XX[n] end,
1301}
1302
1303local direct =
1304 P("%") * (sign + space + period + digit)^0 * S("sqidfgGeExXo") * endofstring
1305 / [[local format = string.format return function(str) return format("%0",str) end]]
1306
1307local function make(t,str)
1308 local f = preset[str]
1309 if f then
1310 return f
1311 end
1312 local p = lpegmatch(direct,str)
1313 if p then
1314
1315 f = loadstripped(p)()
1316 else
1317 n = 0
1318
1319 p = lpegmatch(builder,str,1,t._connector_,t._extensions_)
1320 if n > 0 then
1321 p = format(template,preamble,t._preamble_,arguments[n],p)
1322
1323 f = loadstripped(p,t._environment_)()
1324 else
1325 f = function() return str end
1326 end
1327 end
1328 t[str] = f
1329 return f
1330end
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368local function use(t,fmt,...)
1369 return t[fmt](...)
1370end
1371
1372strings.formatters = { }
1373
1374
1375
1376
1377
1378
1379
1380function strings.formatters.new(noconcat)
1381 local e = { }
1382 for k, v in next, environment do
1383 e[k] = v
1384 end
1385 local t = {
1386 _type_ = "formatter",
1387 _connector_ = noconcat and "," or "..",
1388 _extensions_ = { },
1389 _preamble_ = "",
1390 _environment_ = e,
1391 }
1392 setmetatable(t, { __index = make, __call = use })
1393 return t
1394end
1395
1396local formatters = strings.formatters.new()
1397
1398string.formatters = formatters
1399string.formatter = function(str,...) return formatters[str](...) end
1400
1401local function add(t,name,template,preamble)
1402 if type(t) == "table" and t._type_ == "formatter" then
1403 t._extensions_[name] = template or "%s"
1404 if type(preamble) == "string" then
1405 t._preamble_ = preamble .. "\n" .. t._preamble_
1406 elseif type(preamble) == "table" then
1407 for k, v in next, preamble do
1408 t._environment_[k] = v
1409 end
1410 end
1411 end
1412end
1413
1414strings.formatters.add = add
1415
1416
1417
1418patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + anything)^0)
1419patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + anything)^0)
1420patterns.luaescape = Cs(((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0)
1421patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0 * Cc('"'))
1422
1423
1424
1425
1426add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = patterns.xmlescape })
1427add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = patterns.texescape })
1428add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = patterns.luaescape })
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463local dquote = patterns.dquote
1464local equote = patterns.escaped + dquote / '\\"' + 1
1465local cquote = Cc('"')
1466
1467local pattern =
1468 Cs(dquote * (equote - P(-2))^0 * dquote)
1469 + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote)
1470
1471function string.optionalquoted(str)
1472 return lpegmatch(pattern,str) or str
1473end
1474
1475local pattern = Cs((newline / (os.newline or "\r") + 1)^0)
1476
1477function string.replacenewlines(str)
1478 return lpegmatch(pattern,str)
1479end
1480
1481
1482
1483function strings.newcollector()
1484 local result, r = { }, 0
1485 return
1486 function(fmt,str,...)
1487 r = r + 1
1488 result[r] = str == nil and fmt or formatters[fmt](str,...)
1489 end,
1490 function(connector)
1491 if result then
1492 local str = concat(result,connector)
1493 result, r = { }, 0
1494 return str
1495 end
1496 end
1497end
1498
1499
1500
1501local f_16_16 = formatters["%0.5N"]
1502
1503function number.to16dot16(n)
1504 return f_16_16(n/65536.0)
1505end
1506
1507
1508
1509if not string.explode then
1510
1511
1512
1513 local p_utf = patterns.utf8character
1514 local p_check = C(p_utf) * (P("+") * Cc(true))^0
1515 local p_split = Ct(C(p_utf)^0)
1516 local p_space = Ct((C(1-P(" ")^1) + P(" ")^1)^0)
1517
1518 function string.explode(str,symbol)
1519 if symbol == "" then
1520 return lpegmatch(p_split,str)
1521 elseif symbol then
1522 local a, b = lpegmatch(p_check,symbol)
1523 if b then
1524 return lpegmatch(tsplitat(P(a)^1),str)
1525 else
1526 return lpegmatch(tsplitat(a),str)
1527 end
1528 else
1529 return lpegmatch(p_space,str)
1530 end
1531 end
1532
1533end
1534
1535
1536do
1537
1538 local p_whitespace = patterns.whitespace^1
1539
1540 local cache = setmetatable({ }, { __index = function(t,k)
1541 local p = tsplitat(p_whitespace * P(k) * p_whitespace)
1542 local v = function(s)
1543 return lpegmatch(p,s)
1544 end
1545 t[k] = v
1546 return v
1547 end })
1548
1549 function string.wordsplitter(s)
1550 return cache[s]
1551 end
1552
1553end
1554
1555 |