util-str.lua /size: 42 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
util-str
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to luat-lib.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
utilities
=
utilities
or
{
}
10
utilities
.
strings
=
utilities
.
strings
or
{
}
11
local
strings
=
utilities
.
strings
12 13
local
format
,
gsub
,
rep
,
sub
,
find
=
string
.
format
,
string
.
gsub
,
string
.
rep
,
string
.
sub
,
string
.
find
14
local
load
,
dump
=
load
,
string
.
dump
15
local
tonumber
,
type
,
tostring
,
next
,
setmetatable
=
tonumber
,
type
,
tostring
,
next
,
setmetatable
16
local
unpack
,
concat
=
table
.
unpack
,
table
.
concat
17
local
P
,
V
,
C
,
S
,
R
,
Ct
,
Cs
,
Cp
,
Carg
,
Cc
=
lpeg
.
P
,
lpeg
.
V
,
lpeg
.
C
,
lpeg
.
S
,
lpeg
.
R
,
lpeg
.
Ct
,
lpeg
.
Cs
,
lpeg
.
Cp
,
lpeg
.
Carg
,
lpeg
.
Cc
18
local
patterns
,
lpegmatch
=
lpeg
.
patterns
,
lpeg
.
match
19
local
utfchar
,
utfbyte
,
utflen
=
utf
.
char
,
utf
.
byte
,
utf
.
len
20 21
----- loadstripped = utilities.lua.loadstripped
22
----- setmetatableindex = table.setmetatableindex
23 24
local
loadstripped
=
function
(
str
,
shortcuts
)
25
if
shortcuts
then
26
return
load
(
dump
(
load
(
str
)
,
true
)
,
nil
,
nil
,
shortcuts
)
27
else
28
return
load
(
dump
(
load
(
str
)
,
true
)
)
29
end
30
end
31 32
-- todo: make a special namespace for the formatter
33 34
if
not
number
then
number
=
{
}
end
-- temp hack for luatex-fonts
35 36
local
stripzero
=
patterns
.
stripzero
37
local
stripzeros
=
patterns
.
stripzeros
38
local
newline
=
patterns
.
newline
39
local
endofstring
=
patterns
.
endofstring
40
local
anything
=
patterns
.
anything
41
local
whitespace
=
patterns
.
whitespace
42
local
space
=
patterns
.
space
43
local
spacer
=
patterns
.
spacer
44
local
spaceortab
=
patterns
.
spaceortab
45
local
digit
=
patterns
.
digit
46
local
sign
=
patterns
.
sign
47
local
period
=
patterns
.
period
48 49
-- local function points(n)
50
-- n = tonumber(n)
51
-- return (not n or n == 0) and "0pt" or lpegmatch(stripzeros,format("%.5fpt",n/65536))
52
-- end
53 54
-- local function basepoints(n)
55
-- n = tonumber(n)
56
-- return (not n or n == 0) and "0bp" or lpegmatch(stripzeros,format("%.5fbp", n*(7200/7227)/65536))
57
-- end
58 59
local
ptf
=
1
/
65536
60
local
bpf
=
(
7200
/
7227
)
/
65536
61 62
local
function
points
(
n
)
63
if
n
=
=
0
then
64
return
"
0pt
"
65
end
66
n
=
tonumber
(
n
)
67
if
not
n
or
n
=
=
0
then
68
return
"
0pt
"
69
end
70
n
=
n
*
ptf
71
if
n
%
1
=
=
0
then
72
return
format
(
"
%ipt
"
,
n
)
73
end
74
return
lpegmatch
(
stripzeros
,
format
(
"
%.5fpt
"
,
n
)
)
-- plural as we need to keep the pt
75
end
76 77
local
function
basepoints
(
n
)
78
if
n
=
=
0
then
79
return
"
0pt
"
80
end
81
n
=
tonumber
(
n
)
82
if
not
n
or
n
=
=
0
then
83
return
"
0pt
"
84
end
85
n
=
n
*
bpf
86
if
n
%
1
=
=
0
then
87
return
format
(
"
%ibp
"
,
n
)
88
end
89
return
lpegmatch
(
stripzeros
,
format
(
"
%.5fbp
"
,
n
)
)
-- plural as we need to keep the pt
90
end
91 92
number
.
points
=
points
93
number
.
basepoints
=
basepoints
94 95
-- str = " \n \ntest \n test\ntest "
96
-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]")
97 98
local
rubish
=
spaceortab
^
0
*
newline
99
local
anyrubish
=
spaceortab
+
newline
100
local
stripped
=
(
spaceortab
^
1
/
"
"
)
*
newline
101
local
leading
=
rubish
^
0
/
"
"
102
local
trailing
=
(
anyrubish
^
1
*
endofstring
)
/
"
"
103
local
redundant
=
rubish
^
3
/
"
\n
"
104 105
local
pattern
=
Cs
(
leading
*
(
trailing
+
redundant
+
stripped
+
anything
)
^
0
)
106 107
function
strings
.
collapsecrlf
(
str
)
108
return
lpegmatch
(
pattern
,
str
)
109
end
110 111
-- The following functions might end up in another namespace.
112 113
local
repeaters
=
{
}
-- watch how we also moved the -1 in depth-1 to the creator
114 115
function
strings
.
newrepeater
(
str
,
offset
)
116
offset
=
offset
or
0
117
local
s
=
repeaters
[
str
]
118
if
not
s
then
119
s
=
{
}
120
repeaters
[
str
]
=
s
121
end
122
local
t
=
s
[
offset
]
123
if
t
then
124
return
t
125
end
126
t
=
{
}
127
setmetatable
(
t
,
{
__index
=
function
(
t
,
k
)
128
if
not
k
then
129
return
"
"
130
end
131
local
n
=
k
+
offset
132
local
s
=
n
>
0
and
rep
(
str
,
n
)
or
"
"
133
t
[
k
]
=
s
134
return
s
135
end
}
)
136
s
[
offset
]
=
t
137
return
t
138
end
139 140
-- local dashes = strings.newrepeater("--",-1)
141
-- print(dashes[2],dashes[3],dashes[1])
142 143
local
extra
,
tab
,
start
=
0
,
0
,
4
,
0
144 145
local
nspaces
=
strings
.
newrepeater
(
"
"
)
146 147
string
.
nspaces
=
nspaces
148 149
local
pattern
=
150
Carg
(
1
)
/
function
(
t
)
151
extra
,
tab
,
start
=
0
,
t
or
7
,
1
152
end
153
*
Cs
(
(
154
Cp
(
)
*
patterns
.
tab
/
function
(
position
)
155
local
current
=
(
position
-
start
+
1
)
+
extra
156
local
spaces
=
tab
-
(
current
-1
)
%
tab
157
if
spaces
>
0
then
158
extra
=
extra
+
spaces
-
1
159
return
nspaces
[
spaces
]
-- rep(" ",spaces)
160
else
161
return
"
"
162
end
163
end
164
+
newline
*
Cp
(
)
/
function
(
position
)
165
extra
,
start
=
0
,
position
166
end
167
+
anything
168
)
^
1
)
169 170
function
strings
.
tabtospace
(
str
,
tab
)
171
-- no real gain in first checking if a \t is there
172
return
lpegmatch
(
pattern
,
str
,
1
,
tab
or
7
)
173
end
174 175
function
string
.
utfpadding
(
s
,
n
)
176
if
not
n
or
n
=
=
0
then
177
return
"
"
178
end
179
local
l
=
utflen
(
s
)
180
if
n
>
0
then
181
return
nspaces
[
n
-
l
]
182
else
183
return
nspaces
[
-
n
-
l
]
184
end
185
end
186 187
-- local t = {
188
-- "1234567123456712345671234567",
189
-- "\tb\tc",
190
-- "a\tb\tc",
191
-- "aa\tbb\tcc",
192
-- "aaa\tbbb\tccc",
193
-- "aaaa\tbbbb\tcccc",
194
-- "aaaaa\tbbbbb\tccccc",
195
-- "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc",
196
-- "one\n two\nxxx three\nxx four\nx five\nsix",
197
-- }
198
-- for k=1,#t do
199
-- print(strings.tabtospace(t[k]))
200
-- end
201 202
-- todo: lpeg
203 204
-- function strings.striplong(str) -- strips all leading spaces
205
-- str = gsub(str,"^%s*","")
206
-- str = gsub(str,"[\n\r]+ *","\n")
207
-- return str
208
-- end
209 210
local
optionalspace
=
spacer
^
0
211
local
nospace
=
optionalspace
/
"
"
212
local
endofline
=
nospace
*
newline
213 214
local
stripend
=
(
whitespace
^
1
*
endofstring
)
/
"
"
215 216
local
normalline
=
(
nospace
*
(
(
1
-
optionalspace
*
(
newline
+
endofstring
)
)
^
1
)
*
nospace
)
217 218
local
stripempty
=
endofline
^
1
/
"
"
219
local
normalempty
=
endofline
^
1
220
local
singleempty
=
endofline
*
(
endofline
^
0
/
"
"
)
221
local
doubleempty
=
endofline
*
endofline
^
-1
*
(
endofline
^
0
/
"
"
)
222
local
stripstart
=
stripempty
^
0
223 224
local
intospace
=
whitespace
^
1
/
"
"
225
local
noleading
=
whitespace
^
1
/
"
"
226
local
notrailing
=
noleading
*
endofstring
227 228
local
p_prune_normal
=
Cs
(
stripstart
*
(
stripend
+
normalline
+
normalempty
)
^
0
)
229
local
p_prune_collapse
=
Cs
(
stripstart
*
(
stripend
+
normalline
+
doubleempty
)
^
0
)
230
local
p_prune_noempty
=
Cs
(
stripstart
*
(
stripend
+
normalline
+
singleempty
)
^
0
)
231
local
p_prune_intospace
=
Cs
(
noleading
*
(
notrailing
+
intospace
+
1
)
^
0
)
232
local
p_retain_normal
=
Cs
(
(
normalline
+
normalempty
)
^
0
)
233
local
p_retain_collapse
=
Cs
(
(
normalline
+
doubleempty
)
^
0
)
234
local
p_retain_noempty
=
Cs
(
(
normalline
+
singleempty
)
^
0
)
235 236
-- function striplines(str,prune,collapse,noempty)
237
-- if prune then
238
-- if noempty then
239
-- return lpegmatch(p_prune_noempty,str) or str
240
-- elseif collapse then
241
-- return lpegmatch(p_prune_collapse,str) or str
242
-- else
243
-- return lpegmatch(p_prune_normal,str) or str
244
-- end
245
-- else
246
-- if noempty then
247
-- return lpegmatch(p_retain_noempty,str) or str
248
-- elseif collapse then
249
-- return lpegmatch(p_retain_collapse,str) or str
250
-- else
251
-- return lpegmatch(p_retain_normal,str) or str
252
-- end
253
-- end
254
-- end
255 256
local
striplinepatterns
=
{
257
[
"
prune
"
]
=
p_prune_normal
,
258
[
"
prune and collapse
"
]
=
p_prune_collapse
,
-- default
259
[
"
prune and no empty
"
]
=
p_prune_noempty
,
260
[
"
prune and to space
"
]
=
p_prune_intospace
,
261
[
"
retain
"
]
=
p_retain_normal
,
262
[
"
retain and collapse
"
]
=
p_retain_collapse
,
263
[
"
retain and no empty
"
]
=
p_retain_noempty
,
264
[
"
collapse
"
]
=
patterns
.
collapser
,
265
}
266 267
setmetatable
(
striplinepatterns
,
{
__index
=
function
(
t
,
k
)
return
p_prune_collapse
end
}
)
268 269
strings
.
striplinepatterns
=
striplinepatterns
270 271
function
strings
.
striplines
(
str
,
how
)
272
return
str
and
lpegmatch
(
striplinepatterns
[
how
]
,
str
)
or
str
273
end
274 275
function
strings
.
collapse
(
str
)
-- maybe also in strings
276
return
str
and
lpegmatch
(
p_prune_intospace
,
str
)
or
str
277
end
278 279
-- also see: string.collapsespaces
280 281
strings
.
striplong
=
strings
.
striplines
-- for old times sake
282 283
-- local str = table.concat( {
284
-- " ",
285
-- " aap",
286
-- " noot mies",
287
-- " ",
288
-- " ",
289
-- " zus wim jet",
290
-- "zus wim jet",
291
-- " zus wim jet",
292
-- " ",
293
-- }, "\n")
294
--
295
-- local str = table.concat( {
296
-- " aaaa",
297
-- " bb",
298
-- " cccccc",
299
-- " ",
300
-- }, "\n")
301
--
302
-- for k, v in table.sortedhash(utilities.strings.striplinepatterns) do
303
-- logs.report("stripper","method: %s, result: [[%s]]",k,utilities.strings.striplines(str,k))
304
-- end
305 306
-- inspect(strings.striplong([[
307
-- aaaa
308
-- bb
309
-- cccccc
310
-- ]]))
311 312
function
strings
.
nice
(
str
)
313
str
=
gsub
(
str
,
"
[:%-+_]+
"
,
"
"
)
-- maybe more
314
return
str
315
end
316 317
-- Work in progress. Interesting is that compared to the built-in this is faster in
318
-- luatex than in luajittex where we have a comparable speed. It only makes sense
319
-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter
320
-- is a function so there is some overhead and not all formatted output is worth that
321
-- overhead. Keep in mind that there is an extra function call involved. In principle
322
-- we end up with a string concatination so one could inline such a sequence but often
323
-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course
324
-- there is the benefit of more variants. (Concerning the speed: a simple format like
325
-- %05fpt is better off with format than with a formatter, but as soon as you put
326
-- something in front formatters become faster. Passing the pt as extra argument makes
327
-- formatters behave better. Of course this is rather implementation dependent. Also,
328
-- when a specific format is only used a few times the overhead in creating it is not
329
-- compensated by speed.)
330
--
331
-- More info can be found in cld-mkiv.pdf so here I stick to a simple list.
332
--
333
-- integer %...i number
334
-- integer %...d number
335
-- unsigned %...u number -- not used
336
-- character %...c number
337
-- hexadecimal %...x number
338
-- HEXADECIMAL %...X number
339
-- octal %...o number
340
-- string %...s string number
341
-- float %...f number
342
-- checked float %...F number
343
-- exponential %...e number
344
-- exponential %...E number
345
-- stripped e %...j number
346
-- stripped E %...J number
347
-- autofloat %...g number
348
-- autofloat %...G number
349
-- utf character %...c number
350
-- force tostring %...S any
351
-- force tostring %Q any
352
-- force tonumber %N number (strip leading zeros)
353
-- signed number %I number
354
-- rounded number %r number
355
-- 0xhexadecimal %...h character number
356
-- 0xHEXADECIMAL %...H character number
357
-- U+hexadecimal %...u character number
358
-- U+HEXADECIMAL %...U character number
359
-- points %p number (scaled points)
360
-- basepoints %b number (scaled points)
361
-- table concat %...t table
362
-- table concat %{.}t table
363
-- serialize %...T sequenced (no nested tables)
364
-- serialize %{.}T sequenced (no nested tables)
365
-- boolean (logic) %l boolean
366
-- BOOLEAN %L boolean
367
-- whitespace %...w number
368
-- whitespace %...W (fixed)
369
-- automatic %...a 'whatever' (string, table, ...)
370
-- automatic %...A "whatever" (string, table, ...)
371
-- zap %...z skip
372
-- stripped %...N %...N
373
-- comma/period real %...m
374
-- period/comma real %...M
375
-- formatted float %...k n.m
376 377
local
n
=
0
378 379
-- we are somewhat sloppy in parsing prefixes as it's not that critical
380 381
-- hard to avoid but we can collect them in a private namespace if needed
382 383
-- inline the next two makes no sense as we only use this in logging
384 385
local
sequenced
=
table
.
sequenced
386 387
function
string
.
autodouble
(
s
,
sep
)
388
if
s
=
=
nil
then
389
return
'
""
'
390
end
391
local
t
=
type
(
s
)
392
if
t
=
=
"
number
"
then
393
return
tostring
(
s
)
-- tostring not really needed
394
end
395
if
t
=
=
"
table
"
then
396
return
(
'
"
'
.
.
sequenced
(
s
,
sep
or
"
,
"
)
.
.
'
"
'
)
397
end
398
return
(
'
"
'
.
.
tostring
(
s
)
.
.
'
"
'
)
399
end
400 401
function
string
.
autosingle
(
s
,
sep
)
402
if
s
=
=
nil
then
403
return
"
''
"
404
end
405
local
t
=
type
(
s
)
406
if
t
=
=
"
number
"
then
407
return
tostring
(
s
)
-- tostring not really needed
408
end
409
if
t
=
=
"
table
"
then
410
return
(
"
'
"
.
.
sequenced
(
s
,
sep
or
"
,
"
)
.
.
"
'
"
)
411
end
412
return
(
"
'
"
.
.
tostring
(
s
)
.
.
"
'
"
)
413
end
414 415
local
tracedchars
=
{
[
0
]
=
416
-- the regular bunch
417
"
[null]
"
,
"
[soh]
"
,
"
[stx]
"
,
"
[etx]
"
,
"
[eot]
"
,
"
[enq]
"
,
"
[ack]
"
,
"
[bel]
"
,
418
"
[bs]
"
,
"
[ht]
"
,
"
[lf]
"
,
"
[vt]
"
,
"
[ff]
"
,
"
[cr]
"
,
"
[so]
"
,
"
[si]
"
,
419
"
[dle]
"
,
"
[dc1]
"
,
"
[dc2]
"
,
"
[dc3]
"
,
"
[dc4]
"
,
"
[nak]
"
,
"
[syn]
"
,
"
[etb]
"
,
420
"
[can]
"
,
"
[em]
"
,
"
[sub]
"
,
"
[esc]
"
,
"
[fs]
"
,
"
[gs]
"
,
"
[rs]
"
,
"
[us]
"
,
421
-- plus space
422
"
[space]
"
,
-- 0x20
423
}
424 425
string
.
tracedchars
=
tracedchars
426
strings
.
tracers
=
tracedchars
427 428
function
string
.
tracedchar
(
b
)
429
-- todo: table
430
if
type
(
b
)
=
=
"
number
"
then
431
return
tracedchars
[
b
]
or
(
utfchar
(
b
)
.
.
"
(U+
"
.
.
format
(
"
%05X
"
,
b
)
.
.
"
)
"
)
432
else
433
local
c
=
utfbyte
(
b
)
434
return
tracedchars
[
c
]
or
(
b
.
.
"
(U+
"
.
.
(
c
and
format
(
"
%05X
"
,
c
)
or
"
?????
"
)
.
.
"
)
"
)
435
end
436
end
437 438
function
number
.
signed
(
i
)
439
if
i
>
0
then
440
return
"
+
"
,
i
441
else
442
return
"
-
"
,
-
i
443
end
444
end
445 446
-- maybe to util-num
447 448
local
two
=
digit
*
digit
449
local
three
=
two
*
digit
450
local
prefix
=
(
Carg
(
1
)
*
three
)
^
1
451 452
local
splitter
=
Cs
(
453
(
(
(
1
-
(
three
^
1
*
period
)
)
^
1
+
C
(
three
)
)
*
prefix
+
C
(
(
1
-
period
)
^
1
)
)
454
*
(
anything
/
"
"
*
Carg
(
2
)
)
*
C
(
2
)
455
)
456 457
local
splitter3
=
Cs
(
458
three
*
prefix
*
endofstring
+
459
two
*
prefix
*
endofstring
+
460
digit
*
prefix
*
endofstring
+
461
three
+
462
two
+
463
digit
464
)
465 466
patterns
.
formattednumber
=
splitter
467 468
function
number
.
formatted
(
n
,
sep1
,
sep2
)
469
if
sep1
=
=
false
then
470
if
type
(
n
)
=
=
"
number
"
then
471
n
=
tostring
(
n
)
472
end
473
return
lpegmatch
(
splitter3
,
n
,
1
,
sep2
or
"
.
"
)
474
else
475
if
type
(
n
)
=
=
"
number
"
then
476
n
=
format
(
"
%0.2f
"
,
n
)
477
end
478
if
sep1
=
=
true
then
479
return
lpegmatch
(
splitter
,
n
,
1
,
"
.
"
,
"
,
"
)
480
elseif
sep1
=
=
"
.
"
then
481
return
lpegmatch
(
splitter
,
n
,
1
,
sep1
,
sep2
or
"
,
"
)
482
elseif
sep1
=
=
"
,
"
then
483
return
lpegmatch
(
splitter
,
n
,
1
,
sep1
,
sep2
or
"
.
"
)
484
else
485
return
lpegmatch
(
splitter
,
n
,
1
,
sep1
or
"
,
"
,
sep2
or
"
.
"
)
486
end
487
end
488
end
489 490
-- print(number.formatted(1))
491
-- print(number.formatted(12))
492
-- print(number.formatted(123))
493
-- print(number.formatted(1234))
494
-- print(number.formatted(12345))
495
-- print(number.formatted(123456))
496
-- print(number.formatted(1234567))
497
-- print(number.formatted(12345678))
498
-- print(number.formatted(12345678,true))
499
-- print(number.formatted(1,false))
500
-- print(number.formatted(12,false))
501
-- print(number.formatted(123,false))
502
-- print(number.formatted(1234,false))
503
-- print(number.formatted(12345,false))
504
-- print(number.formatted(123456,false))
505
-- print(number.formatted(1234567,false))
506
-- print(number.formatted(12345678,false))
507
-- print(number.formatted(1234.56,"!","?"))
508 509
local
p
=
Cs
(
510
P
(
"
-
"
)
^
0
511
*
(
P
(
"
0
"
)
^
1
/
"
"
)
^
0
512
*
(
1
-
period
)
^
0
513
*
(
period
*
P
(
"
0
"
)
^
1
*
endofstring
/
"
"
+
period
^
0
)
514
*
P
(
1
-
P
(
"
0
"
)
^
1
*
endofstring
)
^
0
515
)
516 517
function
number
.
compactfloat
(
n
,
fmt
)
518
if
n
=
=
0
then
519
return
"
0
"
520
elseif
n
=
=
1
then
521
return
"
1
"
522
end
523
n
=
lpegmatch
(
p
,
format
(
fmt
or
"
%0.3f
"
,
n
)
)
524
if
n
=
=
"
.
"
or
n
=
=
"
"
or
n
=
=
"
-
"
then
525
return
"
0
"
526
end
527
return
n
528
end
529 530
local
zero
=
P
(
"
0
"
)
^
1
/
"
"
531
local
plus
=
P
(
"
+
"
)
/
"
"
532
local
minus
=
P
(
"
-
"
)
533
local
separator
=
period
534
local
trailing
=
zero
^
1
*
#
S
(
"
eE
"
)
535
local
exponent
=
(
S
(
"
eE
"
)
*
(
plus
+
Cs
(
(
minus
*
zero
^
0
*
endofstring
)
/
"
"
)
+
minus
)
*
zero
^
0
*
(
endofstring
*
Cc
(
"
0
"
)
+
anything
^
1
)
)
536
local
pattern_a
=
Cs
(
minus
^
0
*
digit
^
1
*
(
separator
/
"
"
*
trailing
+
separator
*
(
trailing
+
digit
)
^
0
)
*
exponent
)
537
local
pattern_b
=
Cs
(
(
exponent
+
anything
)
^
0
)
538 539
function
number
.
sparseexponent
(
f
,
n
)
540
if
not
n
then
541
n
=
f
542
f
=
"
%e
"
543
end
544
local
tn
=
type
(
n
)
545
if
tn
=
=
"
string
"
then
-- cast to number
546
local
m
=
tonumber
(
n
)
547
if
m
then
548
return
lpegmatch
(
(
f
=
=
"
%e
"
or
f
=
=
"
%E
"
)
and
pattern_a
or
pattern_b
,
format
(
f
,
m
)
)
549
end
550
elseif
tn
=
=
"
number
"
then
551
return
lpegmatch
(
(
f
=
=
"
%e
"
or
f
=
=
"
%E
"
)
and
pattern_a
or
pattern_b
,
format
(
f
,
n
)
)
552
end
553
return
tostring
(
n
)
554
end
555 556
local
hf
=
{
}
557
local
hs
=
{
}
558 559
setmetatable
(
hf
,
{
__index
=
function
(
t
,
k
)
560
local
v
=
"
%.
"
.
.
k
.
.
"
f
"
561
t
[
k
]
=
v
562
return
v
563
end
}
)
564 565
setmetatable
(
hs
,
{
__index
=
function
(
t
,
k
)
566
local
v
=
"
%
"
.
.
k
.
.
"
s
"
567
t
[
k
]
=
v
568
return
v
569
end
}
)
570 571
function
number
.
formattedfloat
(
n
,
b
,
a
)
572
local
s
=
format
(
hf
[
a
]
,
n
)
573
local
l
=
(
b
or
0
)
+
(
a
or
0
)
+
1
574
if
#
s
<
l
then
575
return
format
(
hs
[
l
]
,
s
)
576
else
577
return
s
578
end
579
end
580 581
local
template
=
[[
582%s 583%s 584return function(%s) return %s end 585
]]
586 587
-- this might move
588 589
local
pattern
=
Cs
(
Cc
(
'
"
'
)
*
(
590
(
1
-
S
(
'
"\\\n\r
'
)
)
^
1
591
+
P
(
'
"
'
)
/
'
\\"
'
592
+
P
(
'
\\
'
)
/
'
\\\\
'
593
+
P
(
'
\n
'
)
/
'
\\n
'
594
+
P
(
'
\r
'
)
/
'
\\r
'
595
)
^
0
*
Cc
(
'
"
'
)
)
596 597
patterns
.
escapedquotes
=
pattern
598 599
function
string
.
escapedquotes
(
s
)
600
return
lpegmatch
(
pattern
,
s
)
601
end
602 603
-- print(string.escapedquotes('1\\23\n"'))
604 605
-- but for now here
606 607
local
preamble
=
"
"
608 609
local
environment
=
{
610
global
=
global
or
_G
,
611
lpeg
=
lpeg
,
612
type
=
type
,
613
tostring
=
tostring
,
614
tonumber
=
tonumber
,
615
format
=
string
.
format
,
616
concat
=
table
.
concat
,
617
signed
=
number
.
signed
,
618
points
=
number
.
points
,
619
basepoints
=
number
.
basepoints
,
620
utfchar
=
utf
.
char
,
621
utfbyte
=
utf
.
byte
,
622
lpegmatch
=
lpeg
.
match
,
623
nspaces
=
string
.
nspaces
,
624
utfpadding
=
string
.
utfpadding
,
625
tracedchar
=
string
.
tracedchar
,
626
autosingle
=
string
.
autosingle
,
627
autodouble
=
string
.
autodouble
,
628
sequenced
=
table
.
sequenced
,
629
formattednumber
=
number
.
formatted
,
630
sparseexponent
=
number
.
sparseexponent
,
631
formattedfloat
=
number
.
formattedfloat
,
632
stripzero
=
patterns
.
stripzero
,
633
stripzeros
=
patterns
.
stripzeros
,
634
escapedquotes
=
string
.
escapedquotes
,
635 636
FORMAT
=
string
.
f6
,
637
}
638 639
-- -- --
640 641
local
arguments
=
{
"
a1
"
}
-- faster than previously used (select(n,...))
642 643
setmetatable
(
arguments
,
{
__index
=
644
function
(
t
,
k
)
645
local
v
=
t
[
k
-1
]
.
.
"
,a
"
.
.
k
646
t
[
k
]
=
v
647
return
v
648
end
649
}
)
650 651
local
prefix_any
=
C
(
(
sign
+
space
+
period
+
digit
)
^
0
)
652
local
prefix_sub
=
(
C
(
(
sign
+
digit
)
^
0
)
+
Cc
(
0
)
)
653
*
period
654
*
(
C
(
(
sign
+
digit
)
^
0
)
+
Cc
(
0
)
)
655
local
prefix_tab
=
P
(
"
{
"
)
*
C
(
(
1
-
P
(
"
}
"
)
)
^
0
)
*
P
(
"
}
"
)
+
C
(
(
1
-
R
(
"
az
"
,
"
AZ
"
,
"
09
"
,
"
%%
"
)
)
^
0
)
656 657
-- we've split all cases as then we can optimize them (let's omit the fuzzy u)
658 659
-- todo: replace outer formats in next by ..
660 661
local
format_s
=
function
(
f
)
662
n
=
n
+
1
663
if
f
and
f
~
=
"
"
then
664
return
format
(
"
format('%%%ss',a%s)
"
,
f
,
n
)
665
else
-- best no tostring in order to stay compatible (.. does a selective tostring too)
666
return
format
(
"
(a%s or '')
"
,
n
)
-- goodie: nil check
667
end
668
end
669 670
local
format_S
=
function
(
f
)
-- can be optimized
671
n
=
n
+
1
672
if
f
and
f
~
=
"
"
then
673
return
format
(
"
format('%%%ss',tostring(a%s))
"
,
f
,
n
)
674
else
675
return
format
(
"
tostring(a%s)
"
,
n
)
676
end
677
end
678 679
local
format_right
=
function
(
f
)
680
n
=
n
+
1
681
f
=
tonumber
(
f
)
682
if
not
f
or
f
=
=
0
then
683
return
format
(
"
(a%s or '')
"
,
n
)
684
elseif
f
>
0
then
685
return
format
(
"
utfpadding(a%s,%i)..a%s
"
,
n
,
f
,
n
)
686
else
687
return
format
(
"
a%s..utfpadding(a%s,%i)
"
,
n
,
n
,
f
)
688
end
689
end
690 691
local
format_left
=
function
(
f
)
692
n
=
n
+
1
693
f
=
tonumber
(
f
)
694
if
not
f
or
f
=
=
0
then
695
return
format
(
"
(a%s or '')
"
,
n
)
696
end
697
if
f
<
0
then
698
return
format
(
"
utfpadding(a%s,%i)..a%s
"
,
n
,
-
f
,
n
)
699
else
700
return
format
(
"
a%s..utfpadding(a%s,%i)
"
,
n
,
n
,
-
f
)
701
end
702
end
703 704
local
format_q
=
JITSUPPORTED
and
function
(
)
705
n
=
n
+
1
706
-- lua 5.3 has a different q than lua 5.2 (which does a tostring on numbers)
707
-- return format("(a%s ~= nil and format('%%q',a%s) or '')",n,n)
708
return
format
(
"
(a%s ~= nil and format('%%q',tostring(a%s)) or '')
"
,
n
,
n
)
709
-- return format("(a%s ~= nil and escapedquotes(tostring(a%s)) or '')",n,n)
710
end
or
function
(
)
711
n
=
n
+
1
712
return
format
(
"
(a%s ~= nil and format('%%q',a%s) or '')
"
,
n
,
n
)
713
end
714 715 716
local
format_Q
=
function
(
)
-- fast escaping
717
n
=
n
+
1
718
-- return format("format('%%q',tostring(a%s))",n)
719
return
format
(
"
escapedquotes(tostring(a%s))
"
,
n
)
720
end
721 722
local
format_i
=
function
(
f
)
723
n
=
n
+
1
724
if
f
and
f
~
=
"
"
then
725
return
format
(
"
format('%%%si',a%s)
"
,
f
,
n
)
726
else
727
return
format
(
"
format('%%i',a%s)
"
,
n
)
-- why not just tostring()
728
end
729
end
730 731
local
format_d
=
format_i
732 733
local
format_I
=
function
(
f
)
734
n
=
n
+
1
735
return
format
(
"
format('%%s%%%si',signed(a%s))
"
,
f
,
n
)
736
end
737 738
local
format_f
=
function
(
f
)
739
n
=
n
+
1
740
return
format
(
"
format('%%%sf',a%s)
"
,
f
,
n
)
741
end
742 743
-- The next one formats an integer as integer and very small values as zero. This is needed
744
-- for pdf backend code.
745
--
746
-- 1.23 % 1 : 0.23
747
-- - 1.23 % 1 : 0.77
748
--
749
-- We could probably use just %s with integers but who knows what Lua 5.3 will do? So let's
750
-- for the moment use %i.
751 752
local
format_F
=
function
(
f
)
-- beware, no cast to number
753
n
=
n
+
1
754
if
not
f
or
f
=
=
"
"
then
755
return
format
(
"
(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or format((a%s %% 1 == 0) and '%%i' or '%%.9f',a%s))
"
,
n
,
n
,
n
,
n
)
756
else
757
return
format
(
"
format((a%s %% 1 == 0) and '%%i' or '%%%sf',a%s)
"
,
n
,
f
,
n
)
758
end
759
end
760 761
-- if string.f9 then
762
-- format_F = function(f) -- beware, no cast to number
763
-- n = n + 1
764
-- if not f or f == "" then
765
-- return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or FORMAT(a%s))",n,n,n,n,n)
766
-- else
767
-- return format("((a%s %% 1 == 0) and format('%%i',a%s) or FORMAT(a%s,'%%%sf'))",n,n,n,f)
768
-- end
769
-- end
770
-- end
771 772
local
format_k
=
function
(
b
,
a
)
-- slow
773
n
=
n
+
1
774
return
format
(
"
formattedfloat(a%s,%s,%s)
"
,
n
,
b
or
0
,
a
or
0
)
775
end
776 777
local
format_g
=
function
(
f
)
778
n
=
n
+
1
779
return
format
(
"
format('%%%sg',a%s)
"
,
f
,
n
)
780
end
781 782
local
format_G
=
function
(
f
)
783
n
=
n
+
1
784
return
format
(
"
format('%%%sG',a%s)
"
,
f
,
n
)
785
end
786 787
local
format_e
=
function
(
f
)
788
n
=
n
+
1
789
return
format
(
"
format('%%%se',a%s)
"
,
f
,
n
)
790
end
791 792
local
format_E
=
function
(
f
)
793
n
=
n
+
1
794
return
format
(
"
format('%%%sE',a%s)
"
,
f
,
n
)
795
end
796 797
local
format_j
=
function
(
f
)
798
n
=
n
+
1
799
return
format
(
"
sparseexponent('%%%se',a%s)
"
,
f
,
n
)
800
end
801 802
local
format_J
=
function
(
f
)
803
n
=
n
+
1
804
return
format
(
"
sparseexponent('%%%sE',a%s)
"
,
f
,
n
)
805
end
806 807
local
format_x
=
function
(
f
)
808
n
=
n
+
1
809
return
format
(
"
format('%%%sx',a%s)
"
,
f
,
n
)
810
end
811 812
local
format_X
=
function
(
f
)
813
n
=
n
+
1
814
return
format
(
"
format('%%%sX',a%s)
"
,
f
,
n
)
815
end
816 817
local
format_o
=
function
(
f
)
818
n
=
n
+
1
819
return
format
(
"
format('%%%so',a%s)
"
,
f
,
n
)
820
end
821 822
local
format_c
=
function
(
)
823
n
=
n
+
1
824
return
format
(
"
utfchar(a%s)
"
,
n
)
825
end
826 827
local
format_C
=
function
(
)
828
n
=
n
+
1
829
return
format
(
"
tracedchar(a%s)
"
,
n
)
830
end
831 832
local
format_r
=
function
(
f
)
833
n
=
n
+
1
834
return
format
(
"
format('%%%s.0f',a%s)
"
,
f
,
n
)
835
end
836 837
local
format_h
=
function
(
f
)
838
n
=
n
+
1
839
if
f
=
=
"
-
"
then
840
f
=
sub
(
f
,
2
)
841
return
format
(
"
format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
842
else
843
return
format
(
"
format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
844
end
845
end
846 847
local
format_H
=
function
(
f
)
848
n
=
n
+
1
849
if
f
=
=
"
-
"
then
850
f
=
sub
(
f
,
2
)
851
return
format
(
"
format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
852
else
853
return
format
(
"
format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
854
end
855
end
856 857
local
format_u
=
function
(
f
)
858
n
=
n
+
1
859
if
f
=
=
"
-
"
then
860
f
=
sub
(
f
,
2
)
861
return
format
(
"
format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
862
else
863
return
format
(
"
format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
864
end
865
end
866 867
local
format_U
=
function
(
f
)
868
n
=
n
+
1
869
if
f
=
=
"
-
"
then
870
f
=
sub
(
f
,
2
)
871
return
format
(
"
format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
872
else
873
return
format
(
"
format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))
"
,
f
=
=
"
"
and
"
05
"
or
f
,
n
,
n
,
n
)
874
end
875
end
876 877
local
format_p
=
function
(
)
878
n
=
n
+
1
879
return
format
(
"
points(a%s)
"
,
n
)
880
end
881 882
local
format_b
=
function
(
)
883
n
=
n
+
1
884
return
format
(
"
basepoints(a%s)
"
,
n
)
885
end
886 887
local
format_t
=
function
(
f
)
888
n
=
n
+
1
889
if
f
and
f
~
=
"
"
then
890
return
format
(
"
concat(a%s,%q)
"
,
n
,
f
)
891
else
892
return
format
(
"
concat(a%s)
"
,
n
)
893
end
894
end
895 896
local
format_T
=
function
(
f
)
897
n
=
n
+
1
898
if
f
and
f
~
=
"
"
then
899
return
format
(
"
sequenced(a%s,%q)
"
,
n
,
f
)
900
else
901
return
format
(
"
sequenced(a%s)
"
,
n
)
902
end
903
end
904 905
local
format_l
=
function
(
)
906
n
=
n
+
1
907
return
format
(
"
(a%s and 'true' or 'false')
"
,
n
)
908
end
909 910
local
format_L
=
function
(
)
911
n
=
n
+
1
912
return
format
(
"
(a%s and 'TRUE' or 'FALSE')
"
,
n
)
913
end
914 915
local
format_n
=
function
(
)
-- strips leading and trailing zeros and removes .0, beware: can produce e notation
916
n
=
n
+
1
917
return
format
(
"
((a%s %% 1 == 0) and format('%%i',a%s) or tostring(a%s))
"
,
n
,
n
,
n
)
918
end
919 920
-- local format_N = function() -- strips leading and trailing zeros (also accepts string)
921
-- n = n + 1
922
-- return format("tostring(tonumber(a%s) or a%s)",n,n)
923
-- end
924 925
-- local format_N = function(f) -- strips leading and trailing zeros
926
-- n = n + 1
927
-- -- stripzero (singular) as we only have a number
928
-- if not f or f == "" then
929
-- return format("(((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or ((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%.9f',a%s)))",n,n,n,n,n)
930
-- else
931
-- return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
932
-- end
933
-- end
934 935
-- local format_N = function(f) -- strips leading and trailing zeros
936
-- n = n + 1
937
-- -- stripzero (singular) as we only have a number
938
-- if not f or f == "" then
939
-- return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or ((a%s > -0.0000000005 and a%s < 0.0000000005) and '0') or lpegmatch(stripzero,format('%%.9f',a%s)))",n,n,n,n,n)
940
-- else
941
-- return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))",n,n,f,n)
942
-- end
943
-- end
944 945
local
format_N
if
environment
.
FORMAT
then
946 947
format_N
=
function
(
f
)
948
n
=
n
+
1
949
if
not
f
or
f
=
=
"
"
then
950
return
format
(
"
FORMAT(a%s,'%%.9f')
"
,
n
)
951
elseif
f
=
=
"
.6
"
or
f
=
=
"
0.6
"
then
952
return
format
(
"
FORMAT(a%s)
"
,
n
)
953
else
954
return
format
(
"
FORMAT(a%s,'%%%sf')
"
,
n
,
f
)
955
end
956
end
957 958
else
959 960
format_N
=
function
(
f
)
-- strips leading and trailing zeros
961
n
=
n
+
1
962
-- stripzero (singular) as we only have a number
963
if
not
f
or
f
=
=
"
"
then
964
f
=
"
.9
"
965
end
-- always a leading number !
966
return
format
(
"
(((a%s %% 1 == 0) and format('%%i',a%s)) or lpegmatch(stripzero,format('%%%sf',a%s)))
"
,
n
,
n
,
f
,
n
)
967
end
968 969
end
970 971
local
format_a
=
function
(
f
)
972
n
=
n
+
1
973
if
f
and
f
~
=
"
"
then
974
return
format
(
"
autosingle(a%s,%q)
"
,
n
,
f
)
975
else
976
return
format
(
"
autosingle(a%s)
"
,
n
)
977
end
978
end
979 980
local
format_A
=
function
(
f
)
981
n
=
n
+
1
982
if
f
and
f
~
=
"
"
then
983
return
format
(
"
autodouble(a%s,%q)
"
,
n
,
f
)
984
else
985
return
format
(
"
autodouble(a%s)
"
,
n
)
986
end
987
end
988 989
local
format_w
=
function
(
f
)
-- handy when doing depth related indent
990
n
=
n
+
1
991
f
=
tonumber
(
f
)
992
if
f
then
-- not that useful
993
return
format
(
"
nspaces[%s+a%s]
"
,
f
,
n
)
-- no real need for tonumber
994
else
995
return
format
(
"
nspaces[a%s]
"
,
n
)
-- no real need for tonumber
996
end
997
end
998 999
local
format_W
=
function
(
f
)
-- handy when doing depth related indent
1000
return
format
(
"
nspaces[%s]
"
,
tonumber
(
f
)
or
0
)
1001
end
1002 1003
local
format_m
=
function
(
f
)
1004
n
=
n
+
1
1005
if
not
f
or
f
=
=
"
"
then
1006
f
=
"
,
"
1007
end
1008
if
f
=
=
"
0
"
then
1009
return
format
(
[[
formattednumber(a%s,false)
]]
,
n
)
1010
else
1011
return
format
(
[[
formattednumber(a%s,%q,".")
]]
,
n
,
f
)
1012
end
1013
end
1014 1015
local
format_M
=
function
(
f
)
1016
n
=
n
+
1
1017
if
not
f
or
f
=
=
"
"
then
1018
f
=
"
.
"
1019
end
1020
if
f
=
=
"
0
"
then
1021
return
format
(
[[
formattednumber(a%s,false)
]]
,
n
)
1022
else
1023
return
format
(
[[
formattednumber(a%s,%q,",")
]]
,
n
,
f
)
1024
end
1025
end
1026 1027
--
1028 1029
local
format_z
=
function
(
f
)
1030
n
=
n
+
(
tonumber
(
f
)
or
1
)
1031
return
"
''
"
-- okay, not that efficient to append '' but a special case anyway
1032
end
1033 1034
--
1035 1036
-- local strip
1037
--
1038
-- local format_Z = function(f)
1039
-- n = n + 1
1040
-- if not f or f == "" then
1041
-- f = ".9"
1042
-- end
1043
-- return format("(((a%s %% 1 == 0) and format('%%i',a%s)) or (strip and lpegmatch(stripzero,format('%%%sf',a%s))) or format('%%%sf',a%s))",n,n,f,n,f,n)
1044
-- end
1045
--
1046
-- function strings.stripformatterzeros()
1047
-- strip = true
1048
-- end
1049 1050
-- add(formatters,"texexp", [[texexp(...)]], "local texexp = metapost.texexp")
1051
--
1052
-- add(formatters,"foo:bar",[[foo(...)]], { foo = function(...) print(...) return "!" end })
1053
-- print(string.formatters["foo %3!foo:bar! bar"](1,2,3))
1054 1055 1056
local
format_rest
=
function
(
s
)
1057
return
format
(
"
%q
"
,
s
)
-- catches " and \n and such
1058
end
1059 1060
local
format_extension
=
function
(
extensions
,
f
,
name
)
1061
local
extension
=
extensions
[
name
]
or
"
tostring(%s)
"
1062
local
f
=
tonumber
(
f
)
or
1
1063
local
w
=
find
(
extension
,
"
%.%.%.
"
)
1064
if
f
=
=
0
then
1065
if
w
then
1066
extension
=
gsub
(
extension
,
"
%.%.%.
"
,
"
"
)
1067
end
1068
return
extension
1069
elseif
f
=
=
1
then
1070
if
w
then
1071
extension
=
gsub
(
extension
,
"
%.%.%.
"
,
"
%%s
"
)
1072
end
1073
n
=
n
+
1
1074
local
a
=
"
a
"
.
.
n
1075
return
format
(
extension
,
a
,
a
)
-- maybe more times?
1076
elseif
f
<
0
then
1077
if
w
then
1078
-- not supported
1079
extension
=
gsub
(
extension
,
"
%.%.%.
"
,
"
"
)
1080
return
extension
1081
else
1082
local
a
=
"
a
"
.
.
(
n
+
f
+
1
)
1083
return
format
(
extension
,
a
,
a
)
1084
end
1085
else
1086
if
w
then
1087
extension
=
gsub
(
extension
,
"
%.%.%.
"
,
rep
(
"
%%s,
"
,
f
-1
)
.
.
"
%%s
"
)
1088
end
1089
-- we could fill an array and then n = n + 1 unpack(t,n,n+f) but as we
1090
-- cache we don't save much and there are hardly any extensions anyway
1091
local
t
=
{
}
1092
for
i
=
1
,
f
do
1093
n
=
n
+
1
1094
-- t[#t+1] = "a" .. n
1095
t
[
i
]
=
"
a
"
.
.
n
1096
end
1097
return
format
(
extension
,
unpack
(
t
)
)
1098
end
1099
end
1100 1101
-- aA b cC d eE f gG hH iI jJ lL mM N o p qQ r sS tT uU wW xX z
1102 1103
-- extensions : %!tag!
1104 1105
-- can be made faster but not called that often
1106 1107
local
builder
=
Cs
{
"
start
"
,
1108
start
=
(
1109
(
1110
P
(
"
%
"
)
/
"
"
1111
*
(
1112
V
(
"
!
"
)
-- new
1113
+
V
(
"
s
"
)
+
V
(
"
q
"
)
1114
+
V
(
"
i
"
)
+
V
(
"
d
"
)
1115
+
V
(
"
f
"
)
+
V
(
"
F
"
)
+
V
(
"
g
"
)
+
V
(
"
G
"
)
+
V
(
"
e
"
)
+
V
(
"
E
"
)
1116
+
V
(
"
x
"
)
+
V
(
"
X
"
)
+
V
(
"
o
"
)
1117
--
1118
+
V
(
"
c
"
)
1119
+
V
(
"
C
"
)
1120
+
V
(
"
S
"
)
-- new
1121
+
V
(
"
Q
"
)
-- new
1122
+
V
(
"
n
"
)
-- new
1123
+
V
(
"
N
"
)
-- new
1124
+
V
(
"
k
"
)
-- new
1125
--
1126
+
V
(
"
r
"
)
1127
+
V
(
"
h
"
)
+
V
(
"
H
"
)
+
V
(
"
u
"
)
+
V
(
"
U
"
)
1128
+
V
(
"
p
"
)
+
V
(
"
b
"
)
1129
+
V
(
"
t
"
)
+
V
(
"
T
"
)
1130
+
V
(
"
l
"
)
+
V
(
"
L
"
)
1131
+
V
(
"
I
"
)
1132
+
V
(
"
w
"
)
-- new
1133
+
V
(
"
W
"
)
-- new
1134
+
V
(
"
a
"
)
-- new
1135
+
V
(
"
A
"
)
-- new
1136
+
V
(
"
j
"
)
+
V
(
"
J
"
)
-- stripped e E
1137
+
V
(
"
m
"
)
+
V
(
"
M
"
)
-- new (formatted number)
1138
+
V
(
"
z
"
)
-- new
1139
--
1140
+
V
(
"
>
"
)
-- left padding
1141
+
V
(
"
<
"
)
-- right padding
1142
--
1143
-- + V("?") -- ignored, probably messed up %
1144
)
1145
+
V
(
"
*
"
)
1146
)
1147
*
(
endofstring
+
Carg
(
1
)
)
1148
)
^
0
,
1149
--
1150
[
"
s
"
]
=
(
prefix_any
*
P
(
"
s
"
)
)
/
format_s
,
-- %s => regular %s (string)
1151
[
"
q
"
]
=
(
prefix_any
*
P
(
"
q
"
)
)
/
format_q
,
-- %q => regular %q (quoted string)
1152
[
"
i
"
]
=
(
prefix_any
*
P
(
"
i
"
)
)
/
format_i
,
-- %i => regular %i (integer)
1153
[
"
d
"
]
=
(
prefix_any
*
P
(
"
d
"
)
)
/
format_d
,
-- %d => regular %d (integer)
1154
[
"
f
"
]
=
(
prefix_any
*
P
(
"
f
"
)
)
/
format_f
,
-- %f => regular %f (float)
1155
[
"
F
"
]
=
(
prefix_any
*
P
(
"
F
"
)
)
/
format_F
,
-- %F => regular %f (float) but 0/1 check
1156
[
"
g
"
]
=
(
prefix_any
*
P
(
"
g
"
)
)
/
format_g
,
-- %g => regular %g (float)
1157
[
"
G
"
]
=
(
prefix_any
*
P
(
"
G
"
)
)
/
format_G
,
-- %G => regular %G (float)
1158
[
"
e
"
]
=
(
prefix_any
*
P
(
"
e
"
)
)
/
format_e
,
-- %e => regular %e (float)
1159
[
"
E
"
]
=
(
prefix_any
*
P
(
"
E
"
)
)
/
format_E
,
-- %E => regular %E (float)
1160
[
"
x
"
]
=
(
prefix_any
*
P
(
"
x
"
)
)
/
format_x
,
-- %x => regular %x (hexadecimal)
1161
[
"
X
"
]
=
(
prefix_any
*
P
(
"
X
"
)
)
/
format_X
,
-- %X => regular %X (HEXADECIMAL)
1162
[
"
o
"
]
=
(
prefix_any
*
P
(
"
o
"
)
)
/
format_o
,
-- %o => regular %o (octal)
1163
--
1164
[
"
S
"
]
=
(
prefix_any
*
P
(
"
S
"
)
)
/
format_S
,
-- %S => %s (tostring)
1165
[
"
Q
"
]
=
(
prefix_any
*
P
(
"
Q
"
)
)
/
format_Q
,
-- %Q => %q (tostring)
1166
[
"
n
"
]
=
(
prefix_any
*
P
(
"
n
"
)
)
/
format_n
,
-- %n => tonumber (strips leading and trailing zeros, as well as .0, expects number)
1167
[
"
N
"
]
=
(
prefix_any
*
P
(
"
N
"
)
)
/
format_N
,
-- %N => tonumber (strips leading and trailing zeros, also takes string)
1168
[
"
k
"
]
=
(
prefix_sub
*
P
(
"
k
"
)
)
/
format_k
,
-- %k => like f but with n.m
1169
[
"
c
"
]
=
(
prefix_any
*
P
(
"
c
"
)
)
/
format_c
,
-- %c => utf character (extension to regular)
1170
[
"
C
"
]
=
(
prefix_any
*
P
(
"
C
"
)
)
/
format_C
,
-- %c => U+.... utf character
1171
--
1172
[
"
r
"
]
=
(
prefix_any
*
P
(
"
r
"
)
)
/
format_r
,
-- %r => round
1173
[
"
h
"
]
=
(
prefix_any
*
P
(
"
h
"
)
)
/
format_h
,
-- %h => 0x0a1b2 (when - no 0x) was v
1174
[
"
H
"
]
=
(
prefix_any
*
P
(
"
H
"
)
)
/
format_H
,
-- %H => 0x0A1B2 (when - no 0x) was V
1175
[
"
u
"
]
=
(
prefix_any
*
P
(
"
u
"
)
)
/
format_u
,
-- %u => u+0a1b2 (when - no u+)
1176
[
"
U
"
]
=
(
prefix_any
*
P
(
"
U
"
)
)
/
format_U
,
-- %U => U+0A1B2 (when - no U+)
1177
[
"
p
"
]
=
(
prefix_any
*
P
(
"
p
"
)
)
/
format_p
,
-- %p => 12.345pt / maybe: P (and more units)
1178
[
"
b
"
]
=
(
prefix_any
*
P
(
"
b
"
)
)
/
format_b
,
-- %b => 12.342bp / maybe: B (and more units)
1179
[
"
t
"
]
=
(
prefix_tab
*
P
(
"
t
"
)
)
/
format_t
,
-- %t => concat
1180
[
"
T
"
]
=
(
prefix_tab
*
P
(
"
T
"
)
)
/
format_T
,
-- %t => sequenced
1181
[
"
l
"
]
=
(
prefix_any
*
P
(
"
l
"
)
)
/
format_l
,
-- %l => boolean
1182
[
"
L
"
]
=
(
prefix_any
*
P
(
"
L
"
)
)
/
format_L
,
-- %L => BOOLEAN
1183
[
"
I
"
]
=
(
prefix_any
*
P
(
"
I
"
)
)
/
format_I
,
-- %I => signed integer
1184
--
1185
[
"
w
"
]
=
(
prefix_any
*
P
(
"
w
"
)
)
/
format_w
,
-- %w => n spaces (optional prefix is added)
1186
[
"
W
"
]
=
(
prefix_any
*
P
(
"
W
"
)
)
/
format_W
,
-- %W => mandate prefix, no specifier
1187
--
1188
[
"
j
"
]
=
(
prefix_any
*
P
(
"
j
"
)
)
/
format_j
,
-- %j => %e (float) stripped exponent (irrational)
1189
[
"
J
"
]
=
(
prefix_any
*
P
(
"
J
"
)
)
/
format_J
,
-- %J => %E (float) stripped exponent (irrational)
1190
--
1191
[
"
m
"
]
=
(
prefix_any
*
P
(
"
m
"
)
)
/
format_m
,
-- %m => xxx.xxx.xxx,xx (optional prefix instead of .)
1192
[
"
M
"
]
=
(
prefix_any
*
P
(
"
M
"
)
)
/
format_M
,
-- %M => xxx,xxx,xxx.xx (optional prefix instead of ,)
1193
--
1194
[
"
z
"
]
=
(
prefix_any
*
P
(
"
z
"
)
)
/
format_z
,
-- %z => skip n arguments
1195
-- ["Z"] = (prefix_any * P("Z")) / format_Z, -- %Z => optionally strip zeros
1196
--
1197
[
"
a
"
]
=
(
prefix_any
*
P
(
"
a
"
)
)
/
format_a
,
-- %a => '...' (forces tostring)
1198
[
"
A
"
]
=
(
prefix_any
*
P
(
"
A
"
)
)
/
format_A
,
-- %A => "..." (forces tostring)
1199
--
1200
[
"
<
"
]
=
(
prefix_any
*
P
(
"
<
"
)
)
/
format_left
,
1201
[
"
>
"
]
=
(
prefix_any
*
P
(
"
>
"
)
)
/
format_right
,
1202
--
1203
[
"
*
"
]
=
Cs
(
(
(
1
-
P
(
"
%
"
)
)
^
1
+
P
(
"
%%
"
)
/
"
%%
"
)
^
1
)
/
format_rest
,
-- rest (including %%)
1204
[
"
?
"
]
=
Cs
(
(
(
1
-
P
(
"
%
"
)
)
^
1
)
^
1
)
/
format_rest
,
-- rest (including %%)
1205
--
1206
[
"
!
"
]
=
Carg
(
2
)
*
prefix_any
*
P
(
"
!
"
)
*
C
(
(
1
-
P
(
"
!
"
)
)
^
1
)
*
P
(
"
!
"
)
/
format_extension
,
1207
}
1208 1209
-- We can be clever and only alias what is needed:
1210 1211
local
xx
=
setmetatable
(
{
}
,
{
__index
=
function
(
t
,
k
)
local
v
=
format
(
"
%02x
"
,
k
)
t
[
k
]
=
v
return
v
end
}
)
1212
local
XX
=
setmetatable
(
{
}
,
{
__index
=
function
(
t
,
k
)
local
v
=
format
(
"
%02X
"
,
k
)
t
[
k
]
=
v
return
v
end
}
)
1213 1214
local
preset
=
{
1215
[
"
%02x
"
]
=
function
(
n
)
return
xx
[
n
]
end
,
1216
[
"
%02X
"
]
=
function
(
n
)
return
XX
[
n
]
end
,
1217
}
1218 1219
local
direct
=
1220
P
(
"
%
"
)
*
(
sign
+
space
+
period
+
digit
)
^
0
*
S
(
"
sqidfgGeExXo
"
)
*
endofstring
1221
/
[[
local format = string.format return function(str) return format("%0",str) end
]]
1222 1223
local
function
make
(
t
,
str
)
1224
local
f
=
preset
[
str
]
1225
if
f
then
1226
return
f
1227
end
1228
local
p
=
lpegmatch
(
direct
,
str
)
1229
if
p
then
1230
-- print("builder 1 >",p)
1231
f
=
loadstripped
(
p
)
(
)
1232
else
1233
n
=
0
-- used in patterns
1234
-- p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
1235
p
=
lpegmatch
(
builder
,
str
,
1
,
t
.
_connector_
,
t
.
_extensions_
)
-- after this we know n
1236
if
n
>
0
then
1237
p
=
format
(
template
,
preamble
,
t
.
_preamble_
,
arguments
[
n
]
,
p
)
1238
-- print("builder 2 >",p)
1239
f
=
loadstripped
(
p
,
t
.
_environment_
)
(
)
-- t._environment is not populated (was experiment)
1240
else
1241
f
=
function
(
)
return
str
end
1242
end
1243
end
1244
t
[
str
]
=
f
1245
return
f
1246
end
1247 1248
-- -- collect periodically
1249
--
1250
-- local threshold = 1000 -- max nof cached formats
1251
--
1252
-- local function make(t,str)
1253
-- local f = rawget(t,str)
1254
-- if f then
1255
-- return f
1256
-- end
1257
-- local parent = t._t_
1258
-- if parent._n_ > threshold then
1259
-- local m = { _t_ = parent }
1260
-- getmetatable(parent).__index = m
1261
-- setmetatable(m, { __index = make })
1262
-- else
1263
-- parent._n_ = parent._n_ + 1
1264
-- end
1265
-- local f
1266
-- local p = lpegmatch(direct,str)
1267
-- if p then
1268
-- f = loadstripped(p)()
1269
-- else
1270
-- n = 0
1271
-- p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n
1272
-- if n > 0 then
1273
-- p = format(template,preamble,parent._preamble_,arguments[n],p)
1274
-- -- print("builder>",p)
1275
-- f = loadstripped(p)()
1276
-- else
1277
-- f = function() return str end
1278
-- end
1279
-- end
1280
-- t[str] = f
1281
-- return f
1282
-- end
1283 1284
local
function
use
(
t
,
fmt
,
...
)
1285
return
t
[
fmt
]
(
...
)
1286
end
1287 1288
strings
.
formatters
=
{
}
1289 1290
-- we cannot make these tables weak, unless we start using an indirect
1291
-- table (metatable) in which case we could better keep a count and
1292
-- clear that table when a threshold is reached
1293 1294
-- _connector_ is an experiment
1295 1296
function
strings
.
formatters
.
new
(
noconcat
)
1297
local
e
=
{
}
-- better make a copy as we can overload
1298
for
k
,
v
in
next
,
environment
do
1299
e
[
k
]
=
v
1300
end
1301
local
t
=
{
1302
_type_
=
"
formatter
"
,
1303
_connector_
=
noconcat
and
"
,
"
or
"
..
"
,
1304
_extensions_
=
{
}
,
1305
_preamble_
=
"
"
,
1306
_environment_
=
e
,
1307
}
1308
setmetatable
(
t
,
{
__index
=
make
,
__call
=
use
}
)
1309
return
t
1310
end
1311 1312
local
formatters
=
strings
.
formatters
.
new
(
)
-- the default instance
1313 1314
string
.
formatters
=
formatters
-- in the main string namespace
1315
string
.
formatter
=
function
(
str
,
...
)
return
formatters
[
str
]
(
...
)
end
-- sometimes nicer name
1316 1317
local
function
add
(
t
,
name
,
template
,
preamble
)
1318
if
type
(
t
)
=
=
"
table
"
and
t
.
_type_
=
=
"
formatter
"
then
1319
t
.
_extensions_
[
name
]
=
template
or
"
%s
"
1320
if
type
(
preamble
)
=
=
"
string
"
then
1321
t
.
_preamble_
=
preamble
.
.
"
\n
"
.
.
t
.
_preamble_
-- so no overload !
1322
elseif
type
(
preamble
)
=
=
"
table
"
then
1323
for
k
,
v
in
next
,
preamble
do
1324
t
.
_environment_
[
k
]
=
v
1325
end
1326
end
1327
end
1328
end
1329 1330
strings
.
formatters
.
add
=
add
1331 1332
-- registered in the default instance (should we fall back on this one?)
1333 1334
patterns
.
xmlescape
=
Cs
(
(
P
(
"
<
"
)
/
"
&lt;
"
+
P
(
"
>
"
)
/
"
&gt;
"
+
P
(
"
&
"
)
/
"
&amp;
"
+
P
(
'
"
'
)
/
"
&quot;
"
+
anything
)
^
0
)
1335
patterns
.
texescape
=
Cs
(
(
C
(
S
(
"
#$%\\{}
"
)
)
/
"
\\%1
"
+
anything
)
^
0
)
1336
patterns
.
luaescape
=
Cs
(
(
(
1
-
S
(
'
"\n
'
)
)
^
1
+
P
(
'
"
'
)
/
'
\\"
'
+
P
(
'
\n
'
)
/
'
\\n"
'
)
^
0
)
-- maybe also \0
1337
patterns
.
luaquoted
=
Cs
(
Cc
(
'
"
'
)
*
(
(
1
-
S
(
'
"\n
'
)
)
^
1
+
P
(
'
"
'
)
/
'
\\"
'
+
P
(
'
\n
'
)
/
'
\\n"
'
)
^
0
*
Cc
(
'
"
'
)
)
1338 1339
-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but
1340
-- faster again when other q-escapables are found (the ones we don't need to escape)
1341 1342
add
(
formatters
,
"
xml
"
,
[[
lpegmatch(xmlescape,%s)
]]
,
{
xmlescape
=
patterns
.
xmlescape
}
)
1343
add
(
formatters
,
"
tex
"
,
[[
lpegmatch(texescape,%s)
]]
,
{
texescape
=
patterns
.
texescape
}
)
1344
add
(
formatters
,
"
lua
"
,
[[
lpegmatch(luaescape,%s)
]]
,
{
luaescape
=
patterns
.
luaescape
}
)
1345 1346
-- -- yes or no:
1347
--
1348
-- local function make(t,str)
1349
-- local f
1350
-- local p = lpegmatch(direct,str)
1351
-- if p then
1352
-- f = loadstripped(p)()
1353
-- else
1354
-- n = 0
1355
-- p = lpegmatch(builder,str,1,",") -- after this we know n
1356
-- if n > 0 then
1357
-- p = format(template,template_shortcuts,arguments[n],p)
1358
-- f = loadstripped(p)()
1359
-- else
1360
-- f = function() return str end
1361
-- end
1362
-- end
1363
-- t[str] = f
1364
-- return f
1365
-- end
1366
--
1367
-- local formatteds = string.formatteds or { }
1368
-- string.formatteds = formatteds
1369
--
1370
-- setmetatable(formatteds, { __index = make, __call = use })
1371 1372
-- This is a somewhat silly one used in commandline reconstruction but the older
1373
-- method, using a combination of fine, gsub, quoted and unquoted was not that
1374
-- reliable.
1375
--
1376
-- '"foo"bar \"and " whatever"' => "foo\"bar \"and \" whatever"
1377
-- 'foo"bar \"and " whatever' => "foo\"bar \"and \" whatever"
1378 1379
local
dquote
=
patterns
.
dquote
-- P('"')
1380
local
equote
=
patterns
.
escaped
+
dquote
/
'
\\"
'
+
1
1381
local
cquote
=
Cc
(
'
"
'
)
1382 1383
local
pattern
=
1384
Cs
(
dquote
*
(
equote
-
P
(
-2
)
)
^
0
*
dquote
)
-- we keep the outer but escape unescaped ones
1385
+
Cs
(
cquote
*
(
equote
-
space
)
^
0
*
space
*
equote
^
0
*
cquote
)
-- we escape unescaped ones
1386 1387
function
string
.
optionalquoted
(
str
)
1388
return
lpegmatch
(
pattern
,
str
)
or
str
1389
end
1390 1391
local
pattern
=
Cs
(
(
newline
/
(
os
.
newline
or
"
\r
"
)
+
1
)
^
0
)
1392 1393
function
string
.
replacenewlines
(
str
)
1394
return
lpegmatch
(
pattern
,
str
)
1395
end
1396 1397
--
1398 1399
function
strings
.
newcollector
(
)
1400
local
result
,
r
=
{
}
,
0
1401
return
1402
function
(
fmt
,
str
,
...
)
-- write
1403
r
=
r
+
1
1404
result
[
r
]
=
str
=
=
nil
and
fmt
or
formatters
[
fmt
]
(
str
,
...
)
1405
end
,
1406
function
(
connector
)
-- flush
1407
if
result
then
1408
local
str
=
concat
(
result
,
connector
)
1409
result
,
r
=
{
}
,
0
1410
return
str
1411
end
1412
end
1413
end
1414 1415
--
1416 1417
local
f_16_16
=
formatters
[
"
%0.5N
"
]
1418 1419
function
number
.
to16dot16
(
n
)
1420
return
f_16_16
(
n
/
65536
.
0
)
1421
end
1422 1423
--
1424 1425
if
not
string
.
explode
then
1426 1427
local
tsplitat
=
lpeg
.
tsplitat
1428 1429
local
p_utf
=
patterns
.
utf8character
1430
local
p_check
=
C
(
p_utf
)
*
(
P
(
"
+
"
)
*
Cc
(
true
)
)
^
0
1431
local
p_split
=
Ct
(
C
(
p_utf
)
^
0
)
1432
local
p_space
=
Ct
(
(
C
(
1
-
P
(
"
"
)
^
1
)
+
P
(
"
"
)
^
1
)
^
0
)
1433 1434
function
string
.
explode
(
str
,
symbol
)
1435
if
symbol
=
=
"
"
then
1436
return
lpegmatch
(
p_split
,
str
)
1437
elseif
symbol
then
1438
local
a
,
b
=
lpegmatch
(
p_check
,
symbol
)
1439
if
b
then
1440
return
lpegmatch
(
tsplitat
(
P
(
a
)
^
1
)
,
str
)
1441
else
1442
return
lpegmatch
(
tsplitat
(
a
)
,
str
)
1443
end
1444
else
1445
return
lpegmatch
(
p_space
,
str
)
1446
end
1447
end
1448 1449
end
1450