1if not modules then modules = { } end modules ['util-str'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9utilities = utilities or { }
10utilities.strings = utilities.strings or { }
11local strings = utilities.strings
12
13local format, gsub, rep, sub, find, char = string.format, string.gsub, string.rep, string.sub, string.find, string.char
14local load, dump = load, string.dump
15local tonumber, type, tostring, next, setmetatable = tonumber, type, tostring, next, setmetatable
16local unpack, concat = table.unpack, table.concat
17local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
18local patterns, lpegmatch = lpeg.patterns, lpeg.match
19local tsplitat = lpeg.tsplitat
20local utfchar, utfbyte, utflen = utf.char, utf.byte, utf.len
21
22
23
24
25local loadstripped = function(str,shortcuts)
26 if shortcuts then
27 return load(dump(load(str),true),nil,nil,shortcuts)
28 else
29 return load(dump(load(str),true))
30 end
31end
32
33
34
35if not number then number = { } end
36
37local stripzero = patterns.stripzero
38local stripzeros = patterns.stripzeros
39local newline = patterns.newline
40local endofstring = patterns.endofstring
41local anything = patterns.anything
42local whitespace = patterns.whitespace
43local space = patterns.space
44local spacer = patterns.spacer
45local spaceortab = patterns.spaceortab
46local digit = patterns.digit
47local sign = patterns.sign
48local period = patterns.period
49
50
51
52
53
54
55
56
57
58
59
60local ptf = 1 / 65536
61local bpf = (7200/7227) / 65536
62
63local function points(n)
64 if n == 0 then
65 return "0pt"
66 end
67 n = tonumber(n)
68 if not n or n == 0 then
69 return "0pt"
70 end
71 n = n * ptf
72 if n % 1 == 0 then
73 return format("%ipt",n)
74 else
75 return lpegmatch(stripzeros,format("%.5fpt",n))
76 end
77end
78
79local function nupoints(n)
80 if n == 0 then
81 return "0"
82 end
83 n = tonumber(n)
84 if not n or n == 0 then
85 return "0"
86 end
87 n = n * ptf
88 if n % 1 == 0 then
89 return format("%i",n)
90 else
91 return format("%.5f",n)
92 end
93end
94
95local function basepoints(n)
96 if n == 0 then
97 return "0bp"
98 end
99 n = tonumber(n)
100 if not n or n == 0 then
101 return "0bp"
102 end
103 n = n * bpf
104 if n % 1 == 0 then
105 return format("%ibp",n)
106 else
107 return lpegmatch(stripzeros,format("%.5fbp",n))
108 end
109end
110
111local function nubasepoints(n)
112 if n == 0 then
113 return "0"
114 end
115 n = tonumber(n)
116 if not n or n == 0 then
117 return "0"
118 end
119 n = n * bpf
120 if n % 1 == 0 then
121 return format("%i",n)
122 else
123 return format("%.5f",n)
124 end
125end
126
127number.points = points
128number.nupoints = nupoints
129number.basepoints = basepoints
130number.nubasepoints = nubasepoints
131
132
133
134
135local rubish = spaceortab^0 * newline
136local anyrubish = spaceortab + newline
137local stripped = (spaceortab^1 / "") * newline
138local leading = rubish^0 / ""
139local trailing = (anyrubish^1 * endofstring) / ""
140local redundant = rubish^3 / "\n"
141
142local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
143
144function strings.collapsecrlf(str)
145 return lpegmatch(pattern,str)
146end
147
148
149
150local repeaters = { }
151
152function strings.newrepeater(str,offset)
153 offset = offset or 0
154 local s = repeaters[str]
155 if not s then
156 s = { }
157 repeaters[str] = s
158 end
159 local t = s[offset]
160 if t then
161 return t
162 end
163 t = { }
164 setmetatable(t, { __index = function(t,k)
165 if not k then
166 return ""
167 end
168 local n = k + offset
169 local s = n > 0 and rep(str,n) or ""
170 t[k] = s
171 return s
172 end })
173 s[offset] = t
174 return t
175end
176
177
178
179
180local extra, tab, start = 0, 0, 4, 0
181
182local nspaces = strings.newrepeater(" ")
183
184string.nspaces = nspaces
185
186local pattern =
187 Carg(1) / function(t)
188 extra, tab, start = 0, t or 7, 1
189 end
190 * Cs((
191 Cp() * patterns.tab / function(position)
192 local current = (position - start + 1) + extra
193 local spaces = tab-(current-1) % tab
194 if spaces > 0 then
195 extra = extra + spaces - 1
196 return nspaces[spaces]
197 else
198 return ""
199 end
200 end
201 + newline * Cp() / function(position)
202 extra, start = 0, position
203 end
204 + anything
205 )^1)
206
207function strings.tabtospace(str,tab)
208
209 return lpegmatch(pattern,str,1,tab or 7)
210end
211
212function string.utfpadding(s,n)
213 if not n or n == 0 then
214 return ""
215 end
216 local l = utflen(s)
217 if n > 0 then
218 return nspaces[n-l]
219 else
220 return nspaces[-n-l]
221 end
222end
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247local optionalspace = spacer^0
248local nospace = optionalspace/""
249local endofline = nospace * newline
250
251local stripend = (whitespace^1 * endofstring)/""
252
253local normalline = (nospace * ((1-optionalspace*(newline+endofstring))^1) * nospace)
254
255local stripempty = endofline^1/""
256local normalempty = endofline^1
257local singleempty = endofline * (endofline^0/"")
258local doubleempty = endofline * endofline^-1 * (endofline^0/"")
259local stripstart = stripempty^0
260
261local intospace = whitespace^1/" "
262local noleading = whitespace^1/""
263local notrailing = noleading * endofstring
264
265local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 )
266local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 )
267local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 )
268local p_prune_intospace = Cs ( noleading * ( notrailing + intospace + 1 )^0 )
269local p_retain_normal = Cs ( ( normalline + normalempty )^0 )
270local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 )
271local p_retain_noempty = Cs ( ( normalline + singleempty )^0 )
272local p_collapse_all = Cs ( stripstart * ( stripend + ((whitespace+newline)^1/" ") + 1)^0 )
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294local striplinepatterns = {
295 ["prune"] = p_prune_normal,
296 ["prune and collapse"] = p_prune_collapse,
297 ["prune and no empty"] = p_prune_noempty,
298 ["prune and to space"] = p_prune_intospace,
299 ["retain"] = p_retain_normal,
300 ["retain and collapse"] = p_retain_collapse,
301 ["retain and no empty"] = p_retain_noempty,
302 ["collapse all"] = p_collapse_all,
303 ["collapse"] = patterns.collapser,
304}
305
306setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end })
307
308strings.striplinepatterns = striplinepatterns
309
310function strings.striplines(str,how)
311 return str and lpegmatch(striplinepatterns[how],str) or str
312end
313
314function strings.collapse(str)
315 return str and lpegmatch(p_prune_intospace,str) or str
316end
317
318
319
320
321
322
323
324strings.striplong = strings.striplines
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355function strings.nice(str)
356 str = gsub(str,"[:%-+_]+"," ")
357 return str
358end
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422local n = 0
423
424
425
426
427
428
429
430local sequenced = table.sequenced
431
432function string.autodouble(s,sep)
433 if s == nil then
434 return '""'
435 end
436 local t = type(s)
437 if t == "number" then
438 return tostring(s)
439 end
440 if t == "table" then
441 return ('"' .. sequenced(s,sep or ",") .. '"')
442 end
443 return ('"' .. tostring(s) .. '"')
444end
445
446function string.autosingle(s,sep)
447 if s == nil then
448 return "''"
449 end
450 local t = type(s)
451 if t == "number" then
452 return tostring(s)
453 end
454 if t == "table" then
455 return ("'" .. sequenced(s,sep or ",") .. "'")
456 end
457 return ("'" .. tostring(s) .. "'")
458end
459
460local tracedchars = { [0] =
461
462 "[null]", "[soh]", "[stx]", "[etx]", "[eot]", "[enq]", "[ack]", "[bel]",
463 "[bs]", "[ht]", "[lf]", "[vt]", "[ff]", "[cr]", "[so]", "[si]",
464 "[dle]", "[dc1]", "[dc2]", "[dc3]", "[dc4]", "[nak]", "[syn]", "[etb]",
465 "[can]", "[em]", "[sub]", "[esc]", "[fs]", "[gs]", "[rs]", "[us]",
466
467 "[space]",
468}
469
470string.tracedchars = tracedchars
471strings.tracers = tracedchars
472
473function string.tracedchar(b)
474
475 if type(b) == "number" then
476 return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")
477 else
478 local c = utfbyte(b)
479 return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")
480 end
481end
482
483function number.signed(i)
484 if i > 0 then
485 return "+", i
486 else
487 return "-", -i
488 end
489end
490
491
492
493local two = digit * digit
494local three = two * digit
495local prefix = (Carg(1) * three)^1
496
497local splitter = Cs (
498 (((1 - (three^1 * period))^1 + C(three)) * prefix + C((1-period)^1))
499 * (anything/"" * Carg(2)) * C(2)
500)
501
502local splitter3 = Cs (
503 three * prefix * endofstring +
504 two * prefix * endofstring +
505 digit * prefix * endofstring +
506 three +
507 two +
508 digit
509)
510
511patterns.formattednumber = splitter
512
513function number.formatted(n,sep1,sep2)
514 if sep1 == false then
515 if type(n) == "number" then
516 n = tostring(n)
517 end
518 return lpegmatch(splitter3,n,1,sep2 or ".")
519 else
520 if type(n) == "number" then
521 n = format("%0.2f",n)
522 end
523 if sep1 == true then
524 return lpegmatch(splitter,n,1,".",",")
525 elseif sep1 == "." then
526 return lpegmatch(splitter,n,1,sep1,sep2 or ",")
527 elseif sep1 == "," then
528 return lpegmatch(splitter,n,1,sep1,sep2 or ".")
529 else
530 return lpegmatch(splitter,n,1,sep1 or ",",sep2 or ".")
531 end
532 end
533end
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554local p = Cs(
555 P("-")^0
556 * (P("0")^1/"")^0
557 * (1-period)^0
558 * (period * P("0")^1 * endofstring/"" + period^0)
559 * P(1-P("0")^1*endofstring)^0
560 )
561
562function number.compactfloat(n,fmt)
563 if n == 0 then
564 return "0"
565 elseif n == 1 then
566 return "1"
567 end
568 n = lpegmatch(p,format(fmt or "%0.3f",n))
569 if n == "." or n == "" or n == "-" then
570 return "0"
571 end
572 return n
573end
574
575local zero = P("0")^1 / ""
576local plus = P("+") / ""
577local minus = P("-")
578local separator = period
579local trailing = zero^1 * #S("eE")
580local exponent = (S("eE") * (plus + Cs((minus * zero^0 * endofstring)/"") + minus) * zero^0 * (endofstring * Cc("0") + anything^1))
581local pattern_a = Cs(minus^0 * digit^1 * (separator/"" * trailing + separator * (trailing + digit)^0) * exponent)
582local pattern_b = Cs((exponent + anything)^0)
583
584function number.sparseexponent(f,n)
585 if not n then
586 n = f
587 f = "%e"
588 end
589 local tn = type(n)
590 if tn == "string" then
591 local m = tonumber(n)
592 if m then
593 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,m))
594 end
595 elseif tn == "number" then
596 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,n))
597 end
598 return tostring(n)
599end
600
601local hf = { }
602local hs = { }
603
604setmetatable(hf, { __index = function(t,k)
605 local v = "%." .. k .. "f"
606 t[k] = v
607 return v
608end } )
609
610setmetatable(hs, { __index = function(t,k)
611 local v = "%" .. k .. "s"
612 t[k] = v
613 return v
614end } )
615
616function number.formattedfloat(n,b,a)
617 local s = format(hf[a],n)
618 local l = (b or 0) + (a or 0) + 1
619 if #s < l then
620 return format(hs[l],s)
621 else
622 return s
623 end
624end
625
626local template = [[
627%s
628%s
629return function(%s) return %s end
630]]
631
632
633
634local pattern = Cs(Cc('"') * (
635 (1-S('"\\\n\r'))^1
636 + P('"') / '\\"'
637 + P('\\') / '\\\\'
638 + P('\n') / '\\n'
639 + P('\r') / '\\r'
640)^0 * Cc('"'))
641
642
643
644
645
646
647
648
649
650
651
652patterns.escapedquotes = pattern
653
654function string.escapedquotes(s)
655 return lpegmatch(pattern,s)
656end
657
658local pattern = (1 - P("\\"))^1 ; pattern = Cs (
659 pattern
660 * ( (P("\\") / "" * (digit^-3 / function(s) return char(tonumber(s)) end)) + pattern )^1
661)
662
663patterns.unescapedquotes = pattern
664
665function string.unescapedquotes(s)
666 return lpegmatch(pattern,s) or s
667end
668
669
670
671
672
673
674
675
676
677string.texnewlines = lpeg.replacer(patterns.newline,"\r",true)
678
679
680
681
682
683local preamble = ""
684
685local environment = {
686 global = global or _G,
687 lpeg = lpeg,
688 type = type,
689 tostring = tostring,
690 tonumber = tonumber,
691 format = string.format,
692 concat = table.concat,
693 signed = number.signed,
694 points = number.points,
695 nupoints = number.nupoints,
696 basepoints = number.basepoints,
697 nubasepoints = number.nubasepoints,
698 utfchar = utf.char,
699 utfbyte = utf.byte,
700 lpegmatch = lpeg.match,
701 nspaces = string.nspaces,
702 utfpadding = string.utfpadding,
703 tracedchar = string.tracedchar,
704 autosingle = string.autosingle,
705 autodouble = string.autodouble,
706 sequenced = table.sequenced,
707 formattednumber = number.formatted,
708 sparseexponent = number.sparseexponent,
709 formattedfloat = number.formattedfloat,
710 stripzero = patterns.stripzero,
711 stripzeros = patterns.stripzeros,
712 escapedquotes = string.escapedquotes,
713
714 FORMAT = string.f6,
715}
716
717
718
719local arguments = { "a1" }
720
721setmetatable(arguments, { __index =
722 function(t,k)
723 local v = t[k-1] .. ",a" .. k
724 t[k] = v
725 return v
726 end
727})
728
729local prefix_any = C((sign + space + period + digit)^0)
730local prefix_sub = (C((sign + digit)^0) + Cc(0))
731 * period
732 * (C((sign + digit)^0) + Cc(0))
733local prefix_tab = P("{") * C((1-P("}"))^0) * P("}") + C((1-R("az","AZ","09","%%"))^0)
734
735
736
737
738
739local format_s = function(f)
740 n = n + 1
741 if f and f ~= "" then
742 return format("format('%%%ss',a%s)",f,n)
743 else
744 return format("(a%s or '')",n)
745 end
746end
747
748local format_S = function(f)
749 n = n + 1
750 if f and f ~= "" then
751 return format("format('%%%ss',tostring(a%s))",f,n)
752 else
753 return format("tostring(a%s)",n)
754 end
755end
756
757local format_right = function(f)
758 n = n + 1
759 f = tonumber(f)
760 if not f or f == 0 then
761 return format("(a%s or '')",n)
762 elseif f > 0 then
763 return format("utfpadding(a%s,%i)..a%s",n,f,n)
764 else
765 return format("a%s..utfpadding(a%s,%i)",n,n,f)
766 end
767end
768
769local format_left = function(f)
770 n = n + 1
771 f = tonumber(f)
772 if not f or f == 0 then
773 return format("(a%s or '')",n)
774 end
775 if f < 0 then
776 return format("utfpadding(a%s,%i)..a%s",n,-f,n)
777 else
778 return format("a%s..utfpadding(a%s,%i)",n,n,-f)
779 end
780end
781
782local format_q = JITSUPPORTED and function()
783 n = n + 1
784
785
786 return format("(a%s ~= nil and format('%%q',tostring(a%s)) or '')",n,n)
787
788end or function()
789 n = n + 1
790 return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
791end
792
793
794local format_Q = function()
795 n = n + 1
796
797 return format("escapedquotes(tostring(a%s))",n)
798end
799
800local format_i = function(f)
801 n = n + 1
802 if f and f ~= "" then
803 return format("format('%%%si',a%s)",f,n)
804 else
805 return format("format('%%i',a%s)",n)
806 end
807end
808
809local format_d = format_i
810
811local format_I = function(f)
812 n = n + 1
813 return format("format('%%s%%%si',signed(a%s))",f,n)
814end
815
816local format_f = function(f)
817 n = n + 1
818 return format("format('%%%sf',a%s)",f,n)
819end
820
821
822
823
824
825
826
827
828
829
830local format_F = function(f)
831 n = n + 1
832 if not f or f == "" then
833 return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n)
834 else
835 return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n)
836 end
837end
838
839
840
841
842
843
844
845
846
847
848
849
850local format_k = function(b,a)
851 n = n + 1
852 return format("formattedfloat(a%s,%s,%s)",n,b or 0,a or 0)
853end
854
855local format_g = function(f)
856 n = n + 1
857 return format("format('%%%sg',a%s)",f,n)
858end
859
860local format_G = function(f)
861 n = n + 1
862 return format("format('%%%sG',a%s)",f,n)
863end
864
865local format_e = function(f)
866 n = n + 1
867 return format("format('%%%se',a%s)",f,n)
868end
869
870local format_E = function(f)
871 n = n + 1
872 return format("format('%%%sE',a%s)",f,n)
873end
874
875local format_j = function(f)
876 n = n + 1
877 return format("sparseexponent('%%%se',a%s)",f,n)
878end
879
880local format_J = function(f)
881 n = n + 1
882 return format("sparseexponent('%%%sE',a%s)",f,n)
883end
884
885local format_x = function(f)
886 n = n + 1
887 return format("format('%%%sx',a%s)",f,n)
888end
889
890local format_X = function(f)
891 n = n + 1
892 return format("format('%%%sX',a%s)",f,n)
893end
894
895local format_o = function(f)
896 n = n + 1
897 return format("format('%%%so',a%s)",f,n)
898end
899
900local format_c = function()
901 n = n + 1
902 return format("utfchar(a%s)",n)
903end
904
905local format_C = function()
906 n = n + 1
907 return format("tracedchar(a%s)",n)
908end
909
910local format_r = function(f)
911 n = n + 1
912 return format("format('%%%s.0f',a%s)",f,n)
913end
914
915local format_h = function(f)
916 n = n + 1
917 if f == "-" then
918 f = sub(f,2)
919 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
920 else
921 return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
922 end
923end
924
925local format_H = function(f)
926 n = n + 1
927 if f == "-" then
928 f = sub(f,2)
929 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
930 else
931 return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
932 end
933end
934
935local format_u = function(f)
936 n = n + 1
937 if f == "-" then
938 f = sub(f,2)
939 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
940 else
941 return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
942 end
943end
944
945local format_U = function(f)
946 n = n + 1
947 if f == "-" then
948 f = sub(f,2)
949 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
950 else
951 return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
952 end
953end
954
955local format_p = function()
956 n = n + 1
957 return format("points(a%s)",n)
958end
959
960local format_P = function()
961 n = n + 1
962 return format("nupoints(a%s)",n)
963end
964
965local format_b = function()
966 n = n + 1
967 return format("basepoints(a%s)",n)
968end
969
970local format_B = function()
971 n = n + 1
972 return format("nubasepoints(a%s)",n)
973end
974
975local format_t = function(f)
976 n = n + 1
977 if f and f ~= "" then
978 return format("concat(a%s,%q)",n,f)
979 else
980 return format("concat(a%s)",n)
981 end
982end
983
984local format_T = function(f)
985 n = n + 1
986 if f and f ~= "" then
987 return format("sequenced(a%s,%q)",n,f)
988 else
989 return format("sequenced(a%s)",n)
990 end
991end
992
993local format_l = function()
994 n = n + 1
995 return format("(a%s and 'true' or 'false')",n)
996end
997
998local format_L = function()
999 n = n + 1
1000 return format("(a%s and 'TRUE' or 'FALSE')",n)
1001end
1002
1003local format_n = function()
1004 n = n + 1
1005 return format("((a%s %% 1 == 0) and format('%%i',a%s) or tostring(a%s))",n,n,n)
1006end
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033local format_N if environment.FORMAT then
1034
1035 format_N = function(f)
1036 n = n + 1
1037 if not f or f == "" then
1038 return format("FORMAT(a%s,'%%.9f')",n)
1039 elseif f == ".6" or f == "0.6" then
1040 return format("FORMAT(a%s)",n)
1041 else
1042 return format("FORMAT(a%s,'%%%sf')",n,f)
1043 end
1044 end
1045
1046else
1047
1048 format_N = function(f)
1049 n = n + 1
1050
1051 if not f or f == "" then
1052 f = ".9"
1053 end
1054 return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1055 end
1056
1057end
1058
1059local format_a = function(f)
1060 n = n + 1
1061 if f and f ~= "" then
1062 return format("autosingle(a%s,%q)",n,f)
1063 else
1064 return format("autosingle(a%s)",n)
1065 end
1066end
1067
1068local format_A = function(f)
1069 n = n + 1
1070 if f and f ~= "" then
1071 return format("autodouble(a%s,%q)",n,f)
1072 else
1073 return format("autodouble(a%s)",n)
1074 end
1075end
1076
1077local format_w = function(f)
1078 n = n + 1
1079 f = tonumber(f)
1080 if f then
1081 return format("nspaces[%s+a%s]",f,n)
1082 else
1083 return format("nspaces[a%s]",n)
1084 end
1085end
1086
1087local format_W = function(f)
1088 return format("nspaces[%s]",tonumber(f) or 0)
1089end
1090
1091local format_m = function(f)
1092 n = n + 1
1093 if not f or f == "" then
1094 f = ","
1095 end
1096 if f == "0" then
1097 return format([[formattednumber(a%s,false)]],n)
1098 else
1099 return format([[formattednumber(a%s,%q,".")]],n,f)
1100 end
1101end
1102
1103local format_M = function(f)
1104 n = n + 1
1105 if not f or f == "" then
1106 f = "."
1107 end
1108 if f == "0" then
1109 return format([[formattednumber(a%s,false)]],n)
1110 else
1111 return format([[formattednumber(a%s,%q,",")]],n,f)
1112 end
1113end
1114
1115
1116
1117local format_z = function(f)
1118 n = n + (tonumber(f) or 1)
1119 return "''"
1120end
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144local format_rest = function(s)
1145 return format("%q",s)
1146end
1147
1148local format_extension = function(extensions,f,name)
1149 local extension = extensions[name] or "tostring(%s)"
1150 local f = tonumber(f) or 1
1151 local w = find(extension,"%.%.%.")
1152 if f == 0 then
1153 if w then
1154 extension = gsub(extension,"%.%.%.","")
1155 end
1156 return extension
1157 elseif f == 1 then
1158 if w then
1159 extension = gsub(extension,"%.%.%.","%%s")
1160 end
1161 n = n + 1
1162 local a = "a" .. n
1163 return format(extension,a,a)
1164 elseif f < 0 then
1165 if w then
1166
1167 extension = gsub(extension,"%.%.%.","")
1168 return extension
1169 else
1170 local a = "a" .. (n + f + 1)
1171 return format(extension,a,a)
1172 end
1173 else
1174 if w then
1175 extension = gsub(extension,"%.%.%.",rep("%%s,",f-1).."%%s")
1176 end
1177
1178
1179 local t = { }
1180 for i=1,f do
1181 n = n + 1
1182
1183 t[i] = "a" .. n
1184 end
1185 return format(extension,unpack(t))
1186 end
1187end
1188
1189
1190
1191
1192
1193
1194
1195local builder = Cs { "start",
1196 start = (
1197 (
1198 P("%") / ""
1199 * (
1200 V("!")
1201 + V("s") + V("q")
1202 + V("i") + V("d")
1203 + V("f") + V("F") + V("g") + V("G") + V("e") + V("E")
1204 + V("x") + V("X") + V("o")
1205
1206 + V("c")
1207 + V("C")
1208 + V("S")
1209 + V("Q")
1210 + V("n")
1211 + V("N")
1212 + V("k")
1213
1214 + V("r")
1215 + V("h") + V("H") + V("u") + V("U")
1216 + V("p") + V("P") + V("b") + V("B")
1217 + V("t") + V("T")
1218 + V("l") + V("L")
1219 + V("I")
1220 + V("w")
1221 + V("W")
1222 + V("a")
1223 + V("A")
1224 + V("j") + V("J")
1225 + V("m") + V("M")
1226 + V("z")
1227
1228 + V(">")
1229 + V("<")
1230
1231
1232 )
1233 + V("*")
1234 )
1235 * (endofstring + Carg(1))
1236 )^0,
1237
1238 ["s"] = (prefix_any * P("s")) / format_s,
1239 ["q"] = (prefix_any * P("q")) / format_q,
1240 ["i"] = (prefix_any * P("i")) / format_i,
1241 ["d"] = (prefix_any * P("d")) / format_d,
1242 ["f"] = (prefix_any * P("f")) / format_f,
1243 ["F"] = (prefix_any * P("F")) / format_F,
1244 ["g"] = (prefix_any * P("g")) / format_g,
1245 ["G"] = (prefix_any * P("G")) / format_G,
1246 ["e"] = (prefix_any * P("e")) / format_e,
1247 ["E"] = (prefix_any * P("E")) / format_E,
1248 ["x"] = (prefix_any * P("x")) / format_x,
1249 ["X"] = (prefix_any * P("X")) / format_X,
1250 ["o"] = (prefix_any * P("o")) / format_o,
1251
1252 ["S"] = (prefix_any * P("S")) / format_S,
1253 ["Q"] = (prefix_any * P("Q")) / format_Q,
1254 ["n"] = (prefix_any * P("n")) / format_n,
1255 ["N"] = (prefix_any * P("N")) / format_N,
1256 ["k"] = (prefix_sub * P("k")) / format_k,
1257 ["c"] = (prefix_any * P("c")) / format_c,
1258 ["C"] = (prefix_any * P("C")) / format_C,
1259
1260 ["r"] = (prefix_any * P("r")) / format_r,
1261 ["h"] = (prefix_any * P("h")) / format_h,
1262 ["H"] = (prefix_any * P("H")) / format_H,
1263 ["u"] = (prefix_any * P("u")) / format_u,
1264 ["U"] = (prefix_any * P("U")) / format_U,
1265 ["p"] = (prefix_any * P("p")) / format_p,
1266 ["P"] = (prefix_any * P("P")) / format_P,
1267 ["b"] = (prefix_any * P("b")) / format_b,
1268 ["B"] = (prefix_any * P("B")) / format_B,
1269 ["t"] = (prefix_tab * P("t")) / format_t,
1270 ["T"] = (prefix_tab * P("T")) / format_T,
1271 ["l"] = (prefix_any * P("l")) / format_l,
1272 ["L"] = (prefix_any * P("L")) / format_L,
1273 ["I"] = (prefix_any * P("I")) / format_I,
1274
1275 ["w"] = (prefix_any * P("w")) / format_w,
1276 ["W"] = (prefix_any * P("W")) / format_W,
1277
1278 ["j"] = (prefix_any * P("j")) / format_j,
1279 ["J"] = (prefix_any * P("J")) / format_J,
1280
1281 ["m"] = (prefix_any * P("m")) / format_m,
1282 ["M"] = (prefix_any * P("M")) / format_M,
1283
1284 ["z"] = (prefix_any * P("z")) / format_z,
1285
1286
1287 ["a"] = (prefix_any * P("a")) / format_a,
1288 ["A"] = (prefix_any * P("A")) / format_A,
1289
1290 ["<"] = (prefix_any * P("<")) / format_left,
1291 [">"] = (prefix_any * P(">")) / format_right,
1292
1293 ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest,
1294 ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest,
1295
1296 ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
1297}
1298
1299
1300
1301local xx = setmetatable({ }, { __index = function(t,k) local v = format("%02x",k) t[k] = v return v end })
1302local XX = setmetatable({ }, { __index = function(t,k) local v = format("%02X",k) t[k] = v return v end })
1303
1304local preset = {
1305 ["%02x"] = function(n) return xx[n] end,
1306 ["%02X"] = function(n) return XX[n] end,
1307}
1308
1309local direct =
1310 P("%") * (sign + space + period + digit)^0 * S("sqidfgGeExXo") * endofstring
1311 / [[local format = string.format return function(str) return format("%0",str) end]]
1312
1313local function make(t,str)
1314 local f = preset[str]
1315 if f then
1316 return f
1317 end
1318 local p = lpegmatch(direct,str)
1319 if p then
1320
1321 f = loadstripped(p)()
1322 else
1323 n = 0
1324
1325 p = lpegmatch(builder,str,1,t._connector_,t._extensions_)
1326 if n > 0 then
1327 p = format(template,preamble,t._preamble_,arguments[n],p)
1328
1329 f = loadstripped(p,t._environment_)()
1330 else
1331 f = function() return str end
1332 end
1333 end
1334 t[str] = f
1335 return f
1336end
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374local function use(t,fmt,...)
1375 return t[fmt](...)
1376end
1377
1378strings.formatters = { }
1379
1380
1381
1382
1383
1384
1385
1386function strings.formatters.new(noconcat)
1387 local e = { }
1388 for k, v in next, environment do
1389 e[k] = v
1390 end
1391 local t = {
1392 _type_ = "formatter",
1393 _connector_ = noconcat and "," or "..",
1394 _extensions_ = { },
1395 _preamble_ = "",
1396 _environment_ = e,
1397 }
1398 setmetatable(t, { __index = make, __call = use })
1399 return t
1400end
1401
1402local formatters = strings.formatters.new()
1403
1404string.formatters = formatters
1405string.formatter = function(str,...) return formatters[str](...) end
1406
1407local function add(t,name,template,preamble)
1408 if type(t) == "table" and t._type_ == "formatter" then
1409 t._extensions_[name] = template or "%s"
1410 if type(preamble) == "string" then
1411 t._preamble_ = preamble .. "\n" .. t._preamble_
1412 elseif type(preamble) == "table" then
1413 for k, v in next, preamble do
1414 t._environment_[k] = v
1415 end
1416 end
1417 end
1418end
1419
1420strings.formatters.add = add
1421
1422
1423
1424patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + anything)^0)
1425patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + anything)^0)
1426patterns.luaescape = Cs(((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0)
1427patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0 * Cc('"'))
1428
1429
1430
1431
1432add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = patterns.xmlescape })
1433add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = patterns.texescape })
1434add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = patterns.luaescape })
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469local dquote = patterns.dquote
1470local equote = patterns.escaped + dquote / '\\"' + 1
1471local cquote = Cc('"')
1472
1473local pattern =
1474 Cs(dquote * (equote - P(-2))^0 * dquote)
1475 + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote)
1476
1477function string.optionalquoted(str)
1478 return lpegmatch(pattern,str) or str
1479end
1480
1481local pattern = Cs((newline / (os.newline or "\r") + 1)^0)
1482
1483function string.replacenewlines(str)
1484 return lpegmatch(pattern,str)
1485end
1486
1487
1488
1489function strings.newcollector()
1490 local result, r = { }, 0
1491 return
1492 function(fmt,str,...)
1493 r = r + 1
1494 result[r] = str == nil and fmt or formatters[fmt](str,...)
1495 end,
1496 function(connector)
1497 if result then
1498 local str = concat(result,connector)
1499 result, r = { }, 0
1500 return str
1501 end
1502 end
1503end
1504
1505
1506
1507local f_16_16 = formatters["%0.5N"]
1508
1509function number.to16dot16(n)
1510 return f_16_16(n/65536.0)
1511end
1512
1513
1514
1515if not string.explode then
1516
1517
1518
1519 local p_utf = patterns.utf8character
1520 local p_check = C(p_utf) * (P("+") * Cc(true))^0
1521 local p_split = Ct(C(p_utf)^0)
1522 local p_space = Ct((C(1-P(" ")^1) + P(" ")^1)^0)
1523
1524 function string.explode(str,symbol)
1525 if symbol == "" then
1526 return lpegmatch(p_split,str)
1527 elseif symbol then
1528 local a, b = lpegmatch(p_check,symbol)
1529 if b then
1530 return lpegmatch(tsplitat(P(a)^1),str)
1531 else
1532 return lpegmatch(tsplitat(a),str)
1533 end
1534 else
1535 return lpegmatch(p_space,str)
1536 end
1537 end
1538
1539end
1540
1541
1542do
1543
1544 local p_whitespace = patterns.whitespace^1
1545
1546 local cache = setmetatable({ }, { __index = function(t,k)
1547 local p = tsplitat(p_whitespace * P(k) * p_whitespace)
1548 local v = function(s)
1549 return lpegmatch(p,s)
1550 end
1551 t[k] = v
1552 return v
1553 end })
1554
1555 function string.wordsplitter(s)
1556 return cache[s]
1557 end
1558
1559end
1560
1561if CONTEXTLMTXMODE and CONTEXTLMTXMODE > 0 then
1562
1563 local t = {
1564 ["#"] = "#H",
1565 ["\n"] = "#L",
1566 ['"'] = "#Q",
1567 ["\r"] = "#R",
1568 [" "] = "#S",
1569 ["\t"] = "#T",
1570 ["\\"] = "#X",
1571 }
1572
1573 function string.texhashed(s)
1574 return (gsub(s,".",t))
1575 end
1576
1577end
1578 |