1if not modules then modules = { } end modules ['util-str'] = {
2 version = 1.001,
3 comment = "companion to luat-lib.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9utilities = utilities or { }
10utilities.strings = utilities.strings or { }
11local strings = utilities.strings
12
13local format, gsub, rep, sub, find, char = string.format, string.gsub, string.rep, string.sub, string.find, string.char
14local load, dump = load, string.dump
15local tonumber, type, tostring, next, setmetatable = tonumber, type, tostring, next, setmetatable
16local unpack, concat = table.unpack, table.concat
17local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
18local patterns, lpegmatch = lpeg.patterns, lpeg.match
19local tsplitat = lpeg.tsplitat
20local utfchar, utfbyte, utflen = utf.char, utf.byte, utf.len
21
22
23
24
25local loadstripped = function(str,shortcuts)
26 if shortcuts then
27 return load(dump(load(str),true),nil,nil,shortcuts)
28 else
29 return load(dump(load(str),true))
30 end
31end
32
33
34
35if not number then number = { } end
36
37local stripzero = patterns.stripzero
38local stripzeros = patterns.stripzeros
39local newline = patterns.newline
40local endofstring = patterns.endofstring
41local anything = patterns.anything
42local whitespace = patterns.whitespace
43local space = patterns.space
44local spacer = patterns.spacer
45local spaceortab = patterns.spaceortab
46local digit = patterns.digit
47local sign = patterns.sign
48local period = patterns.period
49
50
51
52
53
54
55
56
57
58
59
60local ptf = 1 / 65536
61local bpf = (7200/7227) / 65536
62
63local function points(n)
64 if n == 0 then
65 return "0pt"
66 end
67 n = tonumber(n)
68 if not n or n == 0 then
69 return "0pt"
70 end
71 n = n * ptf
72 if n % 1 == 0 then
73 return format("%ipt",n)
74 else
75 return lpegmatch(stripzeros,format("%.5fpt",n))
76 end
77end
78
79local function nupoints(n)
80 if n == 0 then
81 return "0"
82 end
83 n = tonumber(n)
84 if not n or n == 0 then
85 return "0"
86 end
87 n = n * ptf
88 if n % 1 == 0 then
89 return format("%i",n)
90 else
91 return format("%.5f",n)
92 end
93end
94
95local function basepoints(n)
96 if n == 0 then
97 return "0bp"
98 end
99 n = tonumber(n)
100 if not n or n == 0 then
101 return "0bp"
102 end
103 n = n * bpf
104 if n % 1 == 0 then
105 return format("%ibp",n)
106 else
107 return lpegmatch(stripzeros,format("%.5fbp",n))
108 end
109end
110
111local function nubasepoints(n)
112 if n == 0 then
113 return "0"
114 end
115 n = tonumber(n)
116 if not n or n == 0 then
117 return "0"
118 end
119 n = n * bpf
120 if n % 1 == 0 then
121 return format("%i",n)
122 else
123 return format("%.5f",n)
124 end
125end
126
127number.points = points
128number.nupoints = nupoints
129number.basepoints = basepoints
130number.nubasepoints = nubasepoints
131
132
133
134
135local rubish = spaceortab^0 * newline
136local anyrubish = spaceortab + newline
137local stripped = (spaceortab^1 / "") * newline
138local leading = rubish^0 / ""
139local trailing = (anyrubish^1 * endofstring) / ""
140local redundant = rubish^3 / "\n"
141
142local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
143
144function strings.collapsecrlf(str)
145 return lpegmatch(pattern,str)
146end
147
148
149
150local repeaters = { }
151
152function strings.newrepeater(str,offset)
153 offset = offset or 0
154 local s = repeaters[str]
155 if not s then
156 s = { }
157 repeaters[str] = s
158 end
159 local t = s[offset]
160 if t then
161 return t
162 end
163 t = { }
164 setmetatable(t, { __index = function(t,k)
165 if not k then
166 return ""
167 end
168 local n = k + offset
169 local s = n > 0 and rep(str,n) or ""
170 t[k] = s
171 return s
172 end })
173 s[offset] = t
174 return t
175end
176
177
178
179
180local extra, tab, start = 0, 0, 4, 0
181
182local nspaces = strings.newrepeater(" ")
183
184string.nspaces = nspaces
185
186local pattern =
187 Carg(1) / function(t)
188 extra, tab, start = 0, t or 7, 1
189 end
190 * Cs((
191 Cp() * patterns.tab / function(position)
192 local current = (position - start + 1) + extra
193 local spaces = tab-(current-1) % tab
194 if spaces > 0 then
195 extra = extra + spaces - 1
196 return nspaces[spaces]
197 else
198 return ""
199 end
200 end
201 + newline * Cp() / function(position)
202 extra, start = 0, position
203 end
204 + anything
205 )^1)
206
207function strings.tabtospace(str,tab)
208
209 return lpegmatch(pattern,str,1,tab or 7)
210end
211
212function string.utfpadding(s,n)
213 if not n or n == 0 then
214 return ""
215 end
216 local l = utflen(s)
217 if n > 0 then
218 return nspaces[n-l]
219 else
220 return nspaces[-n-l]
221 end
222end
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247local optionalspace = spacer^0
248local nospace = optionalspace/""
249local endofline = nospace * newline
250
251local stripend = (whitespace^1 * endofstring)/""
252
253local normalline = (nospace * ((1-optionalspace*(newline+endofstring))^1) * nospace)
254
255local stripempty = endofline^1/""
256local normalempty = endofline^1
257local singleempty = endofline * (endofline^0/"")
258local doubleempty = endofline * endofline^-1 * (endofline^0/"")
259local stripstart = stripempty^0
260
261local intospace = whitespace^1/" "
262local noleading = whitespace^1/""
263local notrailing = noleading * endofstring
264
265local p_prune_normal = Cs ( stripstart * ( stripend + normalline + normalempty )^0 )
266local p_prune_collapse = Cs ( stripstart * ( stripend + normalline + doubleempty )^0 )
267local p_prune_noempty = Cs ( stripstart * ( stripend + normalline + singleempty )^0 )
268local p_prune_intospace = Cs ( noleading * ( notrailing + intospace + 1 )^0 )
269local p_retain_normal = Cs ( ( normalline + normalempty )^0 )
270local p_retain_collapse = Cs ( ( normalline + doubleempty )^0 )
271local p_retain_noempty = Cs ( ( normalline + singleempty )^0 )
272local p_collapse_all = Cs ( stripstart * ( stripend + ((whitespace+newline)^1/" ") + 1)^0 )
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294local striplinepatterns = {
295 ["prune"] = p_prune_normal,
296 ["prune and collapse"] = p_prune_collapse,
297 ["prune and no empty"] = p_prune_noempty,
298 ["prune and to space"] = p_prune_intospace,
299 ["retain"] = p_retain_normal,
300 ["retain and collapse"] = p_retain_collapse,
301 ["retain and no empty"] = p_retain_noempty,
302 ["collapse all"] = p_collapse_all,
303 ["collapse"] = patterns.collapser,
304}
305
306setmetatable(striplinepatterns,{ __index = function(t,k) return p_prune_collapse end })
307
308strings.striplinepatterns = striplinepatterns
309
310function strings.striplines(str,how)
311 return str and lpegmatch(striplinepatterns[how],str) or str
312end
313
314function strings.collapse(str)
315 return str and lpegmatch(p_prune_intospace,str) or str
316end
317
318
319
320
321
322
323
324strings.striplong = strings.striplines
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355function strings.nice(str)
356 str = gsub(str,"[:%-+_]+"," ")
357 return str
358end
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422local n = 0
423
424
425
426
427
428
429
430local sequenced = table.sequenced
431
432function string.autodouble(s,sep)
433 if s == nil then
434 return '""'
435 end
436 local t = type(s)
437 if t == "number" then
438 return tostring(s)
439 end
440 if t == "table" then
441 return ('"' .. sequenced(s,sep or ",") .. '"')
442 end
443 return ('"' .. tostring(s) .. '"')
444end
445
446function string.autosingle(s,sep)
447 if s == nil then
448 return "''"
449 end
450 local t = type(s)
451 if t == "number" then
452 return tostring(s)
453 end
454 if t == "table" then
455 return ("'" .. sequenced(s,sep or ",") .. "'")
456 end
457 return ("'" .. tostring(s) .. "'")
458end
459
460local tracedchars = { [0] =
461
462 "[null]", "[soh]", "[stx]", "[etx]", "[eot]", "[enq]", "[ack]", "[bel]",
463 "[bs]", "[ht]", "[lf]", "[vt]", "[ff]", "[cr]", "[so]", "[si]",
464 "[dle]", "[dc1]", "[dc2]", "[dc3]", "[dc4]", "[nak]", "[syn]", "[etb]",
465 "[can]", "[em]", "[sub]", "[esc]", "[fs]", "[gs]", "[rs]", "[us]",
466
467 "[space]",
468}
469
470string.tracedchars = tracedchars
471strings.tracers = tracedchars
472
473function string.tracedchar(b)
474
475 if type(b) == "number" then
476 return tracedchars[b] or (utfchar(b) .. " (U+" .. format("%05X",b) .. ")")
477 else
478 local c = utfbyte(b)
479 return tracedchars[c] or (b .. " (U+" .. (c and format("%05X",c) or "?????") .. ")")
480 end
481end
482
483function number.signed(i)
484 if i > 0 then
485 return "+", i
486 else
487 return "-", -i
488 end
489end
490
491
492
493local two = digit * digit
494local three = two * digit
495local prefix = (Carg(1) * three)^1
496
497local splitter = Cs (
498 (((1 - (three^1 * period))^1 + C(three)) * prefix + C((1-period)^1))
499 * (anything/"" * Carg(2)) * C(2)
500)
501
502local splitter3 = Cs (
503 three * prefix * endofstring +
504 two * prefix * endofstring +
505 digit * prefix * endofstring +
506 three +
507 two +
508 digit
509)
510
511patterns.formattednumber = splitter
512
513function number.formatted(n,sep1,sep2)
514 if sep1 == false then
515 if type(n) == "number" then
516 n = tostring(n)
517 end
518 return lpegmatch(splitter3,n,1,sep2 or ".")
519 else
520 if type(n) == "number" then
521 n = format("%0.2f",n)
522 end
523 if sep1 == true then
524 return lpegmatch(splitter,n,1,".",",")
525 elseif sep1 == "." then
526 return lpegmatch(splitter,n,1,sep1,sep2 or ",")
527 elseif sep1 == "," then
528 return lpegmatch(splitter,n,1,sep1,sep2 or ".")
529 else
530 return lpegmatch(splitter,n,1,sep1 or ",",sep2 or ".")
531 end
532 end
533end
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554local p = Cs(
555 P("-")^0
556 * (P("0")^1/"")^0
557 * (1-period)^0
558 * (period * P("0")^1 * endofstring/"" + period^0)
559 * P(1-P("0")^1*endofstring)^0
560 )
561
562function number.compactfloat(n,fmt)
563 if n == 0 then
564 return "0"
565 elseif n == 1 then
566 return "1"
567 end
568 n = lpegmatch(p,format(fmt or "%0.3f",n))
569 if n == "." or n == "" or n == "-" then
570 return "0"
571 end
572 return n
573end
574
575local zero = P("0")^1 / ""
576local plus = P("+") / ""
577local minus = P("-")
578local separator = period
579local trailing = zero^1 * #S("eE")
580local exponent = (S("eE") * (plus + Cs((minus * zero^0 * endofstring)/"") + minus) * zero^0 * (endofstring * Cc("0") + anything^1))
581local pattern_a = Cs(minus^0 * digit^1 * (separator/"" * trailing + separator * (trailing + digit)^0) * exponent)
582local pattern_b = Cs((exponent + anything)^0)
583
584function number.sparseexponent(f,n)
585 if not n then
586 n = f
587 f = "%e"
588 end
589 local tn = type(n)
590 if tn == "string" then
591 local m = tonumber(n)
592 if m then
593 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,m))
594 end
595 elseif tn == "number" then
596 return lpegmatch((f == "%e" or f == "%E") and pattern_a or pattern_b,format(f,n))
597 end
598 return tostring(n)
599end
600
601local hf = { }
602local hs = { }
603
604setmetatable(hf, { __index = function(t,k)
605 local v = "%." .. k .. "f"
606 t[k] = v
607 return v
608end } )
609
610setmetatable(hs, { __index = function(t,k)
611 local v = "%" .. k .. "s"
612 t[k] = v
613 return v
614end } )
615
616function number.formattedfloat(n,b,a)
617 local s = format(hf[a],n)
618 local l = (b or 0) + (a or 0) + 1
619 if #s < l then
620 return format(hs[l],s)
621 else
622 return s
623 end
624end
625
626local template = [[
627%s
628%s
629return function(%s) return %s end
630]]
631
632
633
634local pattern = Cs(Cc('"') * (
635 (1-S('"\\\n\r'))^1
636 + P('"') / '\\"'
637 + P('\\') / '\\\\'
638 + P('\n') / '\\n'
639 + P('\r') / '\\r'
640)^0 * Cc('"'))
641
642
643
644
645
646
647
648
649
650
651
652patterns.escapedquotes = pattern
653
654function string.escapedquotes(s)
655 return lpegmatch(pattern,s)
656end
657
658local pattern = (1 - P("\\"))^1 ; pattern = Cs (
659 pattern
660 * ( (P("\\") / "" * (digit^-3 / function(s) return char(tonumber(s)) end)) + pattern )^1
661)
662
663patterns.unescapedquotes = pattern
664
665function string.unescapedquotes(s)
666 return lpegmatch(pattern,s) or s
667end
668
669
670
671
672
673
674
675
676
677string.texnewlines = lpeg.replacer(patterns.newline,"\r",true)
678
679
680
681
682
683local preamble = ""
684
685local environment = {
686 global = global or _G,
687 lpeg = lpeg,
688 type = type,
689 tostring = tostring,
690 tonumber = tonumber,
691 format = string.format,
692 concat = table.concat,
693 signed = number.signed,
694 points = number.points,
695 nupoints = number.nupoints,
696 basepoints = number.basepoints,
697 nubasepoints = number.nubasepoints,
698 utfchar = utf.char,
699 utfbyte = utf.byte,
700 lpegmatch = lpeg.match,
701 nspaces = string.nspaces,
702 utfpadding = string.utfpadding,
703 tracedchar = string.tracedchar,
704 autosingle = string.autosingle,
705 autodouble = string.autodouble,
706 sequenced = table.sequenced,
707 formattednumber = number.formatted,
708 sparseexponent = number.sparseexponent,
709 formattedfloat = number.formattedfloat,
710 stripzero = patterns.stripzero,
711 stripzeros = patterns.stripzeros,
712 escapedquotes = string.escapedquotes,
713
714 FORMAT = string.f6,
715}
716
717
718
719local arguments = { "a1" }
720
721setmetatable(arguments, { __index =
722 function(t,k)
723 local v = t[k-1] .. ",a" .. k
724 t[k] = v
725 return v
726 end
727})
728
729local prefix_any = C((sign + space + period + digit)^0)
730local prefix_sub = (C((sign + digit)^0) + Cc(0))
731 * period
732 * (C((sign + digit)^0) + Cc(0))
733local prefix_tab = P("{") * C((1-P("}"))^0) * P("}") + C((1-R("az","AZ","09","%%"))^0)
734
735
736
737
738
739local format_s = function(f)
740 n = n + 1
741 if f and f ~= "" then
742 return format("format('%%%ss',a%s)",f,n)
743 else
744 return format("(a%s or '')",n)
745 end
746end
747
748local format_S = function(f)
749 n = n + 1
750 if f and f ~= "" then
751 return format("format('%%%ss',tostring(a%s))",f,n)
752 else
753 return format("tostring(a%s)",n)
754 end
755end
756
757local format_right = function(f)
758 n = n + 1
759 f = tonumber(f)
760 if not f or f == 0 then
761 return format("(a%s or '')",n)
762 elseif f > 0 then
763 return format("utfpadding(a%s,%i)..a%s",n,f,n)
764 else
765 return format("a%s..utfpadding(a%s,%i)",n,n,f)
766 end
767end
768
769local format_left = function(f)
770 n = n + 1
771 f = tonumber(f)
772 if not f or f == 0 then
773 return format("(a%s or '')",n)
774 end
775 if f < 0 then
776 return format("utfpadding(a%s,%i)..a%s",n,-f,n)
777 else
778 return format("a%s..utfpadding(a%s,%i)",n,n,-f)
779 end
780end
781
782local format_q = JITSUPPORTED and function()
783 n = n + 1
784
785
786 return format("(a%s ~= nil and format('%%q',tostring(a%s)) or '')",n,n)
787
788end or function()
789 n = n + 1
790 return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
791end
792
793local format_Q = function()
794 n = n + 1
795
796 return format("escapedquotes(tostring(a%s))",n)
797end
798
799local format_i = function(f)
800 n = n + 1
801 if f and f ~= "" then
802 return format("format('%%%si',a%s)",f,n)
803 else
804 return format("format('%%i',a%s)",n)
805 end
806end
807
808local format_d = format_i
809
810local format_I = function(f)
811 n = n + 1
812 return format("format('%%s%%%si',signed(a%s))",f,n)
813end
814
815local format_f = function(f)
816 n = n + 1
817 return format("format('%%%sf',a%s)",f,n)
818end
819
820
821
822
823
824
825
826
827
828
829local format_F = function(f)
830 n = n + 1
831 if not f or f == "" then
832 return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))",n,n,n,n)
833 else
834 return format("format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)",n,f,n)
835 end
836end
837
838
839
840
841
842
843
844
845
846
847
848
849local format_k = function(b,a)
850 n = n + 1
851 return format("formattedfloat(a%s,%s,%s)",n,b or 0,a or 0)
852end
853
854local format_g = function(f)
855 n = n + 1
856 return format("format('%%%sg',a%s)",f,n)
857end
858
859local format_G = function(f)
860 n = n + 1
861 return format("format('%%%sG',a%s)",f,n)
862end
863
864local format_e = function(f)
865 n = n + 1
866 return format("format('%%%se',a%s)",f,n)
867end
868
869local format_E = function(f)
870 n = n + 1
871 return format("format('%%%sE',a%s)",f,n)
872end
873
874local format_j = function(f)
875 n = n + 1
876 return format("sparseexponent('%%%se',a%s)",f,n)
877end
878
879local format_J = function(f)
880 n = n + 1
881 return format("sparseexponent('%%%sE',a%s)",f,n)
882end
883
884local format_x = function(f)
885 n = n + 1
886 return format("format('%%%sx',a%s)",f,n)
887end
888
889local format_X = function(f)
890 n = n + 1
891 return format("format('%%%sX',a%s)",f,n)
892end
893
894local format_o = function(f)
895 n = n + 1
896 return format("format('%%%so',a%s)",f,n)
897end
898
899local format_c = function()
900 n = n + 1
901 return format("utfchar(a%s)",n)
902end
903
904local format_C = function()
905 n = n + 1
906 return format("tracedchar(a%s)",n)
907end
908
909local format_r = function(f)
910 n = n + 1
911 return format("format('%%%s.0f',a%s)",f,n)
912end
913
914local format_h = function(f)
915 n = n + 1
916 if f == "-" then
917 f = sub(f,2)
918 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
919 else
920 return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
921 end
922end
923
924local format_H = function(f)
925 n = n + 1
926 if f == "-" then
927 f = sub(f,2)
928 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
929 else
930 return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
931 end
932end
933
934local format_u = function(f)
935 n = n + 1
936 if f == "-" then
937 f = sub(f,2)
938 return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
939 else
940 return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
941 end
942end
943
944local format_U = function(f)
945 n = n + 1
946 if f == "-" then
947 f = sub(f,2)
948 return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
949 else
950 return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
951 end
952end
953
954local format_p = function()
955 n = n + 1
956 return format("points(a%s)",n)
957end
958
959local format_P = function()
960 n = n + 1
961 return format("nupoints(a%s)",n)
962end
963
964local format_b = function()
965 n = n + 1
966 return format("basepoints(a%s)",n)
967end
968
969local format_B = function()
970 n = n + 1
971 return format("nubasepoints(a%s)",n)
972end
973
974local format_t = function(f)
975 n = n + 1
976 if f and f ~= "" then
977 return format("concat(a%s,%q)",n,f)
978 else
979 return format("concat(a%s)",n)
980 end
981end
982
983local format_T = function(f)
984 n = n + 1
985 if f and f ~= "" then
986 return format("sequenced(a%s,%q)",n,f)
987 else
988 return format("sequenced(a%s)",n)
989 end
990end
991
992local format_l = function()
993 n = n + 1
994 return format("(a%s and 'true' or 'false')",n)
995end
996
997local format_L = function()
998 n = n + 1
999 return format("(a%s and 'TRUE' or 'FALSE')",n)
1000end
1001
1002local format_n = function()
1003 n = n + 1
1004 return format("((a%s %% 1 == 0) and format('%%i',a%s) or tostring(a%s))",n,n,n)
1005end
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032local format_N if environment.FORMAT then
1033
1034 format_N = function(f)
1035 n = n + 1
1036 if not f or f == "" then
1037 return format("FORMAT(a%s,'%%.9f')",n)
1038 elseif f == ".6" or f == "0.6" then
1039 return format("FORMAT(a%s)",n)
1040 else
1041 return format("FORMAT(a%s,'%%%sf')",n,f)
1042 end
1043 end
1044
1045else
1046
1047 format_N = function(f)
1048 n = n + 1
1049
1050 if not f or f == "" then
1051 f = ".9"
1052 end
1053 return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
1054 end
1055
1056end
1057
1058local format_a = function(f)
1059 n = n + 1
1060 if f and f ~= "" then
1061 return format("autosingle(a%s,%q)",n,f)
1062 else
1063 return format("autosingle(a%s)",n)
1064 end
1065end
1066
1067local format_A = function(f)
1068 n = n + 1
1069 if f and f ~= "" then
1070 return format("autodouble(a%s,%q)",n,f)
1071 else
1072 return format("autodouble(a%s)",n)
1073 end
1074end
1075
1076local format_w = function(f)
1077 n = n + 1
1078 f = tonumber(f)
1079 if f then
1080 return format("nspaces[%s+a%s]",f,n)
1081 else
1082 return format("nspaces[a%s]",n)
1083 end
1084end
1085
1086local format_W = function(f)
1087 return format("nspaces[%s]",tonumber(f) or 0)
1088end
1089
1090local format_m = function(f)
1091 n = n + 1
1092 if not f or f == "" then
1093 f = ","
1094 end
1095 if f == "0" then
1096 return format([[formattednumber(a%s,false)]],n)
1097 else
1098 return format([[formattednumber(a%s,%q,".")]],n,f)
1099 end
1100end
1101
1102local format_M = function(f)
1103 n = n + 1
1104 if not f or f == "" then
1105 f = "."
1106 end
1107 if f == "0" then
1108 return format([[formattednumber(a%s,false)]],n)
1109 else
1110 return format([[formattednumber(a%s,%q,",")]],n,f)
1111 end
1112end
1113
1114
1115
1116local format_z = function(f)
1117 n = n + (tonumber(f) or 1)
1118 return "''"
1119end
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143local format_rest = function(s)
1144 return format("%q",s)
1145end
1146
1147local format_extension = function(extensions,f,name)
1148 local extension = extensions[name] or "tostring(%s)"
1149 local f = tonumber(f) or 1
1150 local w = find(extension,"%.%.%.")
1151 if f == 0 then
1152 if w then
1153 extension = gsub(extension,"%.%.%.","")
1154 end
1155 return extension
1156 elseif f == 1 then
1157 if w then
1158 extension = gsub(extension,"%.%.%.","%%s")
1159 end
1160 n = n + 1
1161 local a = "a" .. n
1162 return format(extension,a,a)
1163 elseif f < 0 then
1164 if w then
1165
1166 extension = gsub(extension,"%.%.%.","")
1167 return extension
1168 else
1169 local a = "a" .. (n + f + 1)
1170 return format(extension,a,a)
1171 end
1172 else
1173 if w then
1174 extension = gsub(extension,"%.%.%.",rep("%%s,",f-1).."%%s")
1175 end
1176
1177
1178 local t = { }
1179 for i=1,f do
1180 n = n + 1
1181
1182 t[i] = "a" .. n
1183 end
1184 return format(extension,unpack(t))
1185 end
1186end
1187
1188
1189
1190
1191
1192
1193
1194local builder = Cs { "start",
1195 start = (
1196 (
1197 P("%") / ""
1198 * (
1199 V("!")
1200 + V("s") + V("q")
1201 + V("i") + V("d")
1202 + V("f") + V("F") + V("g") + V("G") + V("e") + V("E")
1203 + V("x") + V("X") + V("o")
1204
1205 + V("c")
1206 + V("C")
1207 + V("S")
1208 + V("Q")
1209 + V("n")
1210 + V("N")
1211 + V("k")
1212
1213 + V("r")
1214 + V("h") + V("H") + V("u") + V("U")
1215 + V("p") + V("P") + V("b") + V("B")
1216 + V("t") + V("T")
1217 + V("l") + V("L")
1218 + V("I")
1219 + V("w")
1220 + V("W")
1221 + V("a")
1222 + V("A")
1223 + V("j") + V("J")
1224 + V("m") + V("M")
1225 + V("z")
1226
1227 + V(">")
1228 + V("<")
1229
1230
1231 )
1232 + V("*")
1233 )
1234 * (endofstring + Carg(1))
1235 )^0,
1236
1237 ["s"] = (prefix_any * P("s")) / format_s,
1238 ["q"] = (prefix_any * P("q")) / format_q,
1239 ["i"] = (prefix_any * P("i")) / format_i,
1240 ["d"] = (prefix_any * P("d")) / format_d,
1241 ["f"] = (prefix_any * P("f")) / format_f,
1242 ["F"] = (prefix_any * P("F")) / format_F,
1243 ["g"] = (prefix_any * P("g")) / format_g,
1244 ["G"] = (prefix_any * P("G")) / format_G,
1245 ["e"] = (prefix_any * P("e")) / format_e,
1246 ["E"] = (prefix_any * P("E")) / format_E,
1247 ["x"] = (prefix_any * P("x")) / format_x,
1248 ["X"] = (prefix_any * P("X")) / format_X,
1249 ["o"] = (prefix_any * P("o")) / format_o,
1250
1251 ["S"] = (prefix_any * P("S")) / format_S,
1252 ["Q"] = (prefix_any * P("Q")) / format_Q,
1253 ["n"] = (prefix_any * P("n")) / format_n,
1254 ["N"] = (prefix_any * P("N")) / format_N,
1255 ["k"] = (prefix_sub * P("k")) / format_k,
1256 ["c"] = (prefix_any * P("c")) / format_c,
1257 ["C"] = (prefix_any * P("C")) / format_C,
1258
1259 ["r"] = (prefix_any * P("r")) / format_r,
1260 ["h"] = (prefix_any * P("h")) / format_h,
1261 ["H"] = (prefix_any * P("H")) / format_H,
1262 ["u"] = (prefix_any * P("u")) / format_u,
1263 ["U"] = (prefix_any * P("U")) / format_U,
1264 ["p"] = (prefix_any * P("p")) / format_p,
1265 ["P"] = (prefix_any * P("P")) / format_P,
1266 ["b"] = (prefix_any * P("b")) / format_b,
1267 ["B"] = (prefix_any * P("B")) / format_B,
1268 ["t"] = (prefix_tab * P("t")) / format_t,
1269 ["T"] = (prefix_tab * P("T")) / format_T,
1270 ["l"] = (prefix_any * P("l")) / format_l,
1271 ["L"] = (prefix_any * P("L")) / format_L,
1272 ["I"] = (prefix_any * P("I")) / format_I,
1273
1274 ["w"] = (prefix_any * P("w")) / format_w,
1275 ["W"] = (prefix_any * P("W")) / format_W,
1276
1277 ["j"] = (prefix_any * P("j")) / format_j,
1278 ["J"] = (prefix_any * P("J")) / format_J,
1279
1280 ["m"] = (prefix_any * P("m")) / format_m,
1281 ["M"] = (prefix_any * P("M")) / format_M,
1282
1283 ["z"] = (prefix_any * P("z")) / format_z,
1284
1285
1286 ["a"] = (prefix_any * P("a")) / format_a,
1287 ["A"] = (prefix_any * P("A")) / format_A,
1288
1289 ["<"] = (prefix_any * P("<")) / format_left,
1290 [">"] = (prefix_any * P(">")) / format_right,
1291
1292 ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_rest,
1293 ["?"] = Cs(((1-P("%"))^1 )^1) / format_rest,
1294
1295 ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
1296}
1297
1298
1299
1300local xx = setmetatable({ }, { __index = function(t,k) local v = format("%02x",k) t[k] = v return v end })
1301local XX = setmetatable({ }, { __index = function(t,k) local v = format("%02X",k) t[k] = v return v end })
1302
1303local preset = {
1304 ["%02x"] = function(n) return xx[n] end,
1305 ["%02X"] = function(n) return XX[n] end,
1306}
1307
1308local direct =
1309 P("%") * (sign + space + period + digit)^0 * S("sqidfgGeExXo") * endofstring
1310 / [[local format = string.format return function(str) return format("%0",str) end]]
1311
1312local function make(t,str)
1313 local f = preset[str]
1314 if f then
1315 return f
1316 end
1317 local p = lpegmatch(direct,str)
1318 if p then
1319
1320 f = loadstripped(p)()
1321 else
1322 n = 0
1323
1324 p = lpegmatch(builder,str,1,t._connector_,t._extensions_)
1325 if n > 0 then
1326 p = format(template,preamble,t._preamble_,arguments[n],p)
1327
1328 f = loadstripped(p,t._environment_)()
1329 else
1330 f = function() return str end
1331 end
1332 end
1333 t[str] = f
1334 return f
1335end
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373local function use(t,fmt,...)
1374 return t[fmt](...)
1375end
1376
1377strings.formatters = { }
1378
1379
1380
1381
1382
1383
1384
1385function strings.formatters.new(noconcat)
1386 local e = { }
1387 for k, v in next, environment do
1388 e[k] = v
1389 end
1390 local t = {
1391 _type_ = "formatter",
1392 _connector_ = noconcat and "," or "..",
1393 _extensions_ = { },
1394 _preamble_ = "",
1395 _environment_ = e,
1396 }
1397 setmetatable(t, { __index = make, __call = use })
1398 return t
1399end
1400
1401local formatters = strings.formatters.new()
1402
1403string.formatters = formatters
1404string.formatter = function(str,...) return formatters[str](...) end
1405
1406local function add(t,name,template,preamble)
1407 if type(t) == "table" and t._type_ == "formatter" then
1408 t._extensions_[name] = template or "%s"
1409 if type(preamble) == "string" then
1410 t._preamble_ = preamble .. "\n" .. t._preamble_
1411 elseif type(preamble) == "table" then
1412 for k, v in next, preamble do
1413 t._environment_[k] = v
1414 end
1415 end
1416 end
1417end
1418
1419strings.formatters.add = add
1420
1421
1422
1423patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + anything)^0)
1424patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + anything)^0)
1425patterns.ctxescape = Cs((C(S("#$%\\{}|"))/"\\%1" + anything)^0)
1426patterns.luaescape = Cs(((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0)
1427patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"')^0 * Cc('"'))
1428
1429
1430
1431
1432add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],{ xmlescape = patterns.xmlescape })
1433add(formatters,"tex",[[lpegmatch(texescape,%s)]],{ texescape = patterns.texescape })
1434add(formatters,"lua",[[lpegmatch(luaescape,%s)]],{ luaescape = patterns.luaescape })
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469local dquote = patterns.dquote
1470local equote = patterns.escaped + dquote / '\\"' + 1
1471local cquote = Cc('"')
1472
1473local pattern =
1474 Cs(dquote * (equote - P(-2))^0 * dquote)
1475 + Cs(cquote * (equote - space)^0 * space * equote^0 * cquote)
1476
1477function string.optionalquoted(str)
1478 return lpegmatch(pattern,str) or str
1479end
1480
1481local pattern = Cs((newline / (os.newline or "\r") + 1)^0)
1482
1483function string.replacenewlines(str)
1484 return lpegmatch(pattern,str)
1485end
1486
1487
1488
1489function strings.newcollector()
1490 local result, r = { }, 0
1491 return
1492 function(fmt,str,...)
1493 r = r + 1
1494 result[r] = str == nil and fmt or formatters[fmt](str,...)
1495 end,
1496 function(connector)
1497 if result then
1498 local str = concat(result,connector)
1499 result, r = { }, 0
1500 return str
1501 end
1502 end
1503end
1504
1505
1506
1507local f_16_16 = formatters["%0.5N"]
1508
1509function number.to16dot16(n)
1510 return f_16_16(n/65536.0)
1511end
1512
1513
1514
1515if not string.explode then
1516
1517
1518
1519 local p_utf = patterns.utf8character
1520 local p_check = C(p_utf) * (P("+") * Cc(true))^0
1521 local p_split = Ct(C(p_utf)^0)
1522 local p_space = Ct((C(1-P(" ")^1) + P(" ")^1)^0)
1523
1524 function string.explode(str,symbol)
1525 if symbol == "" then
1526 return lpegmatch(p_split,str)
1527 elseif symbol then
1528 local a, b = lpegmatch(p_check,symbol)
1529 if b then
1530 return lpegmatch(tsplitat(P(a)^1),str)
1531 else
1532 return lpegmatch(tsplitat(a),str)
1533 end
1534 else
1535 return lpegmatch(p_space,str)
1536 end
1537 end
1538
1539end
1540
1541
1542do
1543
1544 local p_whitespace = patterns.whitespace^1
1545
1546 local cache = setmetatable({ }, { __index = function(t,k)
1547 local p = tsplitat(p_whitespace * P(k) * p_whitespace)
1548 local v = function(s)
1549 return lpegmatch(p,s)
1550 end
1551 t[k] = v
1552 return v
1553 end })
1554
1555 function string.wordsplitter(s)
1556 return cache[s]
1557 end
1558
1559end
1560
1561if CONTEXTLMTXMODE and CONTEXTLMTXMODE > 0 then
1562
1563 local t = {
1564 ["#"] = "#H",
1565 ["\n"] = "#L",
1566 ['"'] = "#Q",
1567 ["\r"] = "#R",
1568 [" "] = "#S",
1569 ["\t"] = "#T",
1570 ["\\"] = "#X",
1571 }
1572
1573 function string.texhashed(s)
1574 return (gsub(s,".",t))
1575 end
1576
1577end
1578 |