scrp-ini.lua /size: 36 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
scrp-ini
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to scrp-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
-- We need to rewrite this a bit ... rather old code ... will be done when japanese
10
-- is finished.
11 12
local
tonumber
,
next
=
tonumber
,
next
13
local
setmetatableindex
=
table
.
setmetatableindex
14
local
utfbyte
,
utfsplit
=
utf
.
byte
,
utf
.
split
15
local
gmatch
=
string
.
gmatch
16 17
local
trace_analyzing
=
false
trackers
.
register
(
"
scripts.analyzing
"
,
function
(
v
)
trace_analyzing
=
v
end
)
18
local
trace_injections
=
false
trackers
.
register
(
"
scripts.injections
"
,
function
(
v
)
trace_injections
=
v
end
)
19
local
trace_splitting
=
false
trackers
.
register
(
"
scripts.splitting
"
,
function
(
v
)
trace_splitting
=
v
end
)
20
local
trace_splitdetails
=
false
trackers
.
register
(
"
scripts.splitting.details
"
,
function
(
v
)
trace_splitdetails
=
v
end
)
21 22
local
report_preprocessing
=
logs
.
reporter
(
"
scripts
"
,
"
preprocessing
"
)
23
local
report_splitting
=
logs
.
reporter
(
"
scripts
"
,
"
splitting
"
)
24 25 26
local
attributes
=
attributes
27
local
nodes
=
nodes
28
local
context
=
context
29 30
local
nodecodes
=
nodes
.
nodecodes
31 32
local
implement
=
interfaces
.
implement
33 34
local
glyph_code
=
nodecodes
.
glyph
35
local
glue_code
=
nodecodes
.
glue
36 37
local
emwidths
=
fonts
.
hashes
.
emwidths
38
local
exheights
=
fonts
.
hashes
.
exheights
39 40
local
a_script
=
attributes
.
private
(
'
script
'
)
41 42
local
fontdata
=
fonts
.
hashes
.
identifiers
43
local
allocate
=
utilities
.
storage
.
allocate
44
local
setnodecolor
=
nodes
.
tracers
.
colors
.
set
45 46
local
enableaction
=
nodes
.
tasks
.
enableaction
47
local
disableaction
=
nodes
.
tasks
.
disableaction
48 49
local
nuts
=
nodes
.
nuts
50 51
local
getnext
=
nuts
.
getnext
52
local
getchar
=
nuts
.
getchar
53
local
getfont
=
nuts
.
getfont
54
local
getid
=
nuts
.
getid
55
local
getglyphdata
=
nuts
.
getglyphdata
56
local
setglyphdata
=
nuts
.
setglyphdata
57 58
local
isglyph
=
nuts
.
isglyph
59 60
local
insert_node_after
=
nuts
.
insert_after
61
local
insert_node_before
=
nuts
.
insert_before
62 63
local
first_glyph
=
nuts
.
first_glyph
64 65
local
nextglyph
=
nuts
.
traversers
.
glyph
66
local
nextchar
=
nuts
.
traversers
.
char
67 68
local
nodepool
=
nuts
.
pool
69 70
local
new_glue
=
nodepool
.
glue
71
local
new_rule
=
nodepool
.
rule
72
local
new_penalty
=
nodepool
.
penalty
73 74
scripts
=
scripts
or
{
}
75
local
scripts
=
scripts
76 77
scripts
.
hash
=
scripts
.
hash
or
{
}
78
local
hash
=
scripts
.
hash
79 80
local
handlers
=
allocate
(
)
81
scripts
.
handlers
=
handlers
82 83
local
injectors
=
allocate
(
)
84
scripts
.
injectors
=
handlers
85 86
local
splitters
=
allocate
(
)
87
scripts
.
splitters
=
splitters
88 89
-- we need to fake it in luatex
90 91
local
getscript
=
node
.
direct
.
getscript
92
local
texsetglyphscript
=
tex
.
setglyphscript
93 94
if
not
getscript
then
95 96
local
getattr
=
nuts
.
getattr
97
local
texsetattribute
=
tex
.
setattribute
98
local
unsetvalue
=
attributes
.
unsetvalue
99 100
getscript
=
function
(
n
)
101
local
a
=
getattr
(
n
,
a_script
)
102
if
a
and
a
~
=
unsetvalue
and
a
>
0
then
103
return
a
104
end
105
end
106 107
texsetglyphscript
=
function
(
a
)
108
if
not
a
or
a
=
=
0
then
109
a
=
unsetvalue
110
end
111
texsetattribute
(
a_script
,
a
)
112
end
113 114
nuts
.
getscript
=
getscript
115
tex
.
setglyphscript
=
texsetglyphscript
116 117
end
118 119
local
hash
=
{
-- we could put these presets in char-def.lua
120
--
121
-- half width opening parenthesis
122
--
123
[
0x0028
]
=
"
half_width_open
"
,
124
[
0x005B
]
=
"
half_width_open
"
,
125
[
0x007B
]
=
"
half_width_open
"
,
126
[
0x2018
]
=
"
half_width_open
"
,
-- ‘
127
[
0x201C
]
=
"
half_width_open
"
,
-- “
128
--
129
-- full width opening parenthesis
130
--
131
[
0x3008
]
=
"
full_width_open
"
,
-- 〈 Left book quote
132
[
0x300A
]
=
"
full_width_open
"
,
-- 《 Left double book quote
133
[
0x300C
]
=
"
full_width_open
"
,
-- 「 left quote
134
[
0x300E
]
=
"
full_width_open
"
,
-- 『 left double quote
135
[
0x3010
]
=
"
full_width_open
"
,
-- 【 left double book quote
136
[
0x3014
]
=
"
full_width_open
"
,
-- 〔 left book quote
137
[
0x3016
]
=
"
full_width_open
"
,
--〖 left double book quote
138
[
0x3018
]
=
"
full_width_open
"
,
-- left tortoise bracket
139
[
0x301A
]
=
"
full_width_open
"
,
-- left square bracket
140
[
0x301D
]
=
"
full_width_open
"
,
-- reverse double prime qm
141
[
0xFF08
]
=
"
full_width_open
"
,
-- ( left parenthesis
142
[
0xFF3B
]
=
"
full_width_open
"
,
-- [ left square brackets
143
[
0xFF5B
]
=
"
full_width_open
"
,
-- { left curve bracket
144
--
145
-- half width closing parenthesis
146
--
147
[
0x0029
]
=
"
half_width_close
"
,
148
[
0x005D
]
=
"
half_width_close
"
,
149
[
0x007D
]
=
"
half_width_close
"
,
150
[
0x2019
]
=
"
half_width_close
"
,
-- ’ right quote, right
151
[
0x201D
]
=
"
half_width_close
"
,
-- ” right double quote
152
--
153
-- full width closing parenthesis
154
--
155
[
0x3009
]
=
"
full_width_close
"
,
-- 〉 book quote
156
[
0x300B
]
=
"
full_width_close
"
,
-- 》 double book quote
157
[
0x300D
]
=
"
full_width_close
"
,
-- 」 right quote, right
158
[
0x300F
]
=
"
full_width_close
"
,
-- 』 right double quote
159
[
0x3011
]
=
"
full_width_close
"
,
-- 】 right double book quote
160
[
0x3015
]
=
"
full_width_close
"
,
-- 〕 right book quote
161
[
0x3017
]
=
"
full_width_close
"
,
-- 〗 right double book quote
162
[
0x3019
]
=
"
full_width_close
"
,
-- right tortoise bracket
163
[
0x301B
]
=
"
full_width_close
"
,
-- right square bracket
164
[
0x301E
]
=
"
full_width_close
"
,
-- double prime qm
165
[
0x301F
]
=
"
full_width_close
"
,
-- low double prime qm
166
[
0xFF09
]
=
"
full_width_close
"
,
-- ) right parenthesis
167
[
0xFF3D
]
=
"
full_width_close
"
,
-- ] right square brackets
168
[
0xFF5D
]
=
"
full_width_close
"
,
-- } right curve brackets
169
--
170
[
0xFF62
]
=
"
half_width_open
"
,
-- left corner bracket
171
[
0xFF63
]
=
"
half_width_close
"
,
-- right corner bracket
172
--
173
-- vertical opening vertical
174
--
175
-- 0xFE35, 0xFE37, 0xFE39, 0xFE3B, 0xFE3D, 0xFE3F, 0xFE41, 0xFE43, 0xFE47,
176
--
177
-- vertical closing
178
--
179
-- 0xFE36, 0xFE38, 0xFE3A, 0xFE3C, 0xFE3E, 0xFE40, 0xFE42, 0xFE44, 0xFE48,
180
--
181
-- half width opening punctuation
182
--
183
-- <empty>
184
--
185
-- full width opening punctuation
186
--
187
-- 0x2236, -- ∶
188
-- 0xFF0C, -- ,
189
--
190
-- half width closing punctuation_hw
191
--
192
[
0x0021
]
=
"
half_width_close
"
,
-- !
193
[
0x002C
]
=
"
half_width_close
"
,
-- ,
194
[
0x002E
]
=
"
half_width_close
"
,
-- .
195
[
0x003A
]
=
"
half_width_close
"
,
-- :
196
[
0x003B
]
=
"
half_width_close
"
,
-- ;
197
[
0x003F
]
=
"
half_width_close
"
,
-- ?
198
[
0xFF61
]
=
"
half_width_close
"
,
-- hw full stop
199
--
200
-- full width closing punctuation
201
--
202
[
0x3001
]
=
"
full_width_close
"
,
-- 、
203
[
0x3002
]
=
"
full_width_close
"
,
-- 。
204
[
0xFF0C
]
=
"
full_width_close
"
,
-- ,
205
[
0xFF0E
]
=
"
full_width_close
"
,
--
206
--
207
-- depends on font
208
--
209
[
0xFF01
]
=
"
full_width_close
"
,
-- !
210
[
0xFF1F
]
=
"
full_width_close
"
,
-- ?
211
--
212
[
0xFF1A
]
=
"
full_width_punct
"
,
-- :
213
[
0xFF1B
]
=
"
full_width_punct
"
,
-- ;
214
--
215
-- non starter
216
--
217
[
0x3005
]
=
"
non_starter
"
,
[
0x3041
]
=
"
non_starter
"
,
[
0x3043
]
=
"
non_starter
"
,
[
0x3045
]
=
"
non_starter
"
,
[
0x3047
]
=
"
non_starter
"
,
218
[
0x3049
]
=
"
non_starter
"
,
[
0x3063
]
=
"
non_starter
"
,
[
0x3083
]
=
"
non_starter
"
,
[
0x3085
]
=
"
non_starter
"
,
[
0x3087
]
=
"
non_starter
"
,
219
[
0x308E
]
=
"
non_starter
"
,
[
0x3095
]
=
"
non_starter
"
,
[
0x3096
]
=
"
non_starter
"
,
[
0x309B
]
=
"
non_starter
"
,
[
0x309C
]
=
"
non_starter
"
,
220
[
0x309D
]
=
"
non_starter
"
,
[
0x309E
]
=
"
non_starter
"
,
[
0x30A0
]
=
"
non_starter
"
,
[
0x30A1
]
=
"
non_starter
"
,
[
0x30A3
]
=
"
non_starter
"
,
221
[
0x30A5
]
=
"
non_starter
"
,
[
0x30A7
]
=
"
non_starter
"
,
[
0x30A9
]
=
"
non_starter
"
,
[
0x30C3
]
=
"
non_starter
"
,
[
0x30E3
]
=
"
non_starter
"
,
222
[
0x30E5
]
=
"
non_starter
"
,
[
0x30E7
]
=
"
non_starter
"
,
[
0x30EE
]
=
"
non_starter
"
,
[
0x30F5
]
=
"
non_starter
"
,
[
0x30F6
]
=
"
non_starter
"
,
223
[
0x30FC
]
=
"
non_starter
"
,
[
0x30FD
]
=
"
non_starter
"
,
[
0x30FE
]
=
"
non_starter
"
,
[
0x31F0
]
=
"
non_starter
"
,
[
0x31F1
]
=
"
non_starter
"
,
224
[
0x30F2
]
=
"
non_starter
"
,
[
0x30F3
]
=
"
non_starter
"
,
[
0x30F4
]
=
"
non_starter
"
,
[
0x31F5
]
=
"
non_starter
"
,
[
0x31F6
]
=
"
non_starter
"
,
225
[
0x30F7
]
=
"
non_starter
"
,
[
0x30F8
]
=
"
non_starter
"
,
[
0x30F9
]
=
"
non_starter
"
,
[
0x31FA
]
=
"
non_starter
"
,
[
0x31FB
]
=
"
non_starter
"
,
226
[
0x30FC
]
=
"
non_starter
"
,
[
0x30FD
]
=
"
non_starter
"
,
[
0x30FE
]
=
"
non_starter
"
,
[
0x31FF
]
=
"
non_starter
"
,
227
--
228
-- hyphenation
229
--
230
[
0x2026
]
=
"
hyphen
"
,
-- … ellipsis
231
[
0x2014
]
=
"
hyphen
"
,
-- — hyphen
232
--
233
[
0x1361
]
=
"
ethiopic_word
"
,
234
[
0x1362
]
=
"
ethiopic_sentence
"
,
235
--
236
-- tibetan:
237
--
238
[
0x0F0B
]
=
"
breaking_tsheg
"
,
239
[
0x0F0C
]
=
"
nonbreaking_tsheg
"
,
240 241
}
242 243
local
function
provide
(
t
,
k
)
244
local
v
245
if
not
tonumber
(
k
)
then
v
=
false
246
elseif
(
k
>
=
0x03040
and
k
<
=
0x030FF
)
247
or
(
k
>
=
0x031F0
and
k
<
=
0x031FF
)
248
or
(
k
>
=
0x032D0
and
k
<
=
0x032FE
)
249
or
(
k
>
=
0x0FF00
and
k
<
=
0x0FFEF
)
then
v
=
"
katakana
"
250
elseif
(
k
>
=
0x03400
and
k
<
=
0x04DFF
)
251
or
(
k
>
=
0x04E00
and
k
<
=
0x09FFF
)
252
or
(
k
>
=
0x0F900
and
k
<
=
0x0FAFF
)
253
or
(
k
>
=
0x20000
and
k
<
=
0x2A6DF
)
254
or
(
k
>
=
0x2F800
and
k
<
=
0x2FA1F
)
then
v
=
"
chinese
"
255
elseif
(
k
>
=
0x0AC00
and
k
<
=
0x0D7A3
)
then
v
=
"
korean
"
256
elseif
(
k
>
=
0x01100
and
k
<
=
0x0115F
)
then
v
=
"
jamo_initial
"
257
elseif
(
k
>
=
0x01160
and
k
<
=
0x011A7
)
then
v
=
"
jamo_medial
"
258
elseif
(
k
>
=
0x011A8
and
k
<
=
0x011FF
)
then
v
=
"
jamo_final
"
259
elseif
(
k
>
=
0x01200
and
k
<
=
0x0139F
)
then
v
=
"
ethiopic_syllable
"
260
elseif
(
k
>
=
0x00F00
and
k
<
=
0x00FFF
)
then
v
=
"
tibetan
"
261
else
v
=
false
262
end
263
t
[
k
]
=
v
264
return
v
265
end
266 267
setmetatableindex
(
hash
,
provide
)
-- should come from char-def
268 269
scripts
.
hash
=
hash
270 271
local
numbertodataset
=
allocate
(
)
272
local
numbertohandler
=
allocate
(
)
273 274
--~ storage.register("scripts/hash", hash, "scripts.hash")
275 276
scripts
.
numbertodataset
=
numbertodataset
277
scripts
.
numbertohandler
=
numbertohandler
278 279
local
defaults
=
{
280
inter_char_shrink_factor
=
0
,
281
inter_char_shrink_factor
=
0
,
282
inter_char_stretch_factor
=
0
,
283
inter_char_half_shrink_factor
=
0
,
284
inter_char_half_stretch_factor
=
0
,
285
inter_char_quarter_shrink_factor
=
0
,
286
inter_char_quarter_stretch_factor
=
0
,
287
inter_char_hangul_penalty
=
0
,
288 289
inter_word_stretch_factor
=
0
,
290
}
291 292
scripts
.
defaults
=
defaults
-- so we can add more
293 294
-- todo: copy more efficient than metatable
295 296
function
scripts
.
installmethod
(
handler
)
297
local
name
=
handler
.
name
298
handlers
[
name
]
=
handler
299
local
attributes
=
{
}
300
local
datasets
=
handler
.
datasets
301
if
not
datasets
or
not
datasets
.
default
then
302
report_preprocessing
(
"
missing (default) dataset in script %a
"
,
name
)
303
datasets
.
default
=
{
}
-- slower but an error anyway
304
end
305 306
for
k
,
v
in
next
,
datasets
do
307
setmetatableindex
(
v
,
defaults
)
308
end
309
setmetatableindex
(
attributes
,
function
(
t
,
k
)
310
local
v
=
datasets
[
k
]
or
datasets
.
default
311
local
a
=
0
312
if
v
then
313
v
.
name
=
name
-- for tracing
314
a
=
#
numbertodataset
+
1
315
numbertodataset
[
a
]
=
v
316
numbertohandler
[
a
]
=
handler
317
end
318
t
[
k
]
=
a
319
return
a
320
end
)
321
handler
.
attributes
=
attributes
322
end
323 324
function
scripts
.
installdataset
(
specification
)
-- global overload
325
local
method
=
specification
.
method
326
local
name
=
specification
.
name
327
local
dataset
=
specification
.
dataset
328
if
method
and
name
and
dataset
then
329
local
parent
=
specification
.
parent
or
"
"
330
local
handler
=
handlers
[
method
]
331
if
handler
then
332
local
datasets
=
handler
.
datasets
333
if
datasets
then
334
local
defaultset
=
datasets
.
default
335
if
defaultset
then
336
if
parent
~
=
"
"
then
337
local
p
=
datasets
[
parent
]
338
if
p
then
339
defaultset
=
p
340
else
341
report_preprocessing
(
"
dataset, unknown parent %a for method %a
"
,
parent
,
method
)
342
end
343
end
344
setmetatable
(
dataset
,
defaultset
)
345
local
existing
=
datasets
[
name
]
346
if
existing
then
347
for
k
,
v
in
next
,
existing
do
348
existing
[
k
]
=
dataset
349
end
350
else
351
datasets
[
name
]
=
dataset
352
end
353
else
354
report_preprocessing
(
"
dataset, no default for method %a
"
,
method
)
355
end
356
else
357
report_preprocessing
(
"
dataset, no datasets for method %a
"
,
method
)
358
end
359
else
360
report_preprocessing
(
"
dataset, no method %a
"
,
method
)
361
end
362
else
363
report_preprocessing
(
"
dataset, invalid specification
"
)
-- maybe report table
364
end
365
end
366 367
local
injectorenabled
=
false
368
local
splitterenabled
=
false
369 370
local
function
getscriptdata
(
n
)
371
local
s
=
getscript
(
n
)
372
if
s
then
373
return
s
and
numbertodataset
[
s
]
374
end
375
end
376 377
local
function
getinjector
(
n
)
378
local
s
=
getscript
(
n
)
379
if
s
then
380
s
=
numbertohandler
[
s
]
381
return
s
and
s
.
injector
382
end
383
end
384 385
local
function
getsplitter
(
n
)
386
local
s
=
getscript
(
n
)
387
if
s
then
388
s
=
numbertodataset
[
s
]
389
return
s
and
s
.
splitter
390
end
391
end
392 393
scripts
.
getdata
=
getscriptdata
394
scripts
.
getinjector
=
getinjector
395
scripts
.
getsplitter
=
getsplitter
396 397
function
scripts
.
set
(
name
,
method
,
preset
)
398
local
handler
=
handlers
[
method
]
399
if
handler
then
400
local
index
=
handler
.
attributes
[
preset
]
401
if
handler
.
injector
then
402
if
not
injectorenabled
then
403
enableaction
(
"
processors
"
,
"
scripts.injectors.handler
"
)
404
injectorenabled
=
true
405
end
406
end
407
if
handler
.
splitter
then
408
if
not
splitterenabled
then
409
enableaction
(
"
processors
"
,
"
scripts.splitters.handler
"
)
410
splitterenabled
=
true
411
end
412
end
413
if
handler
.
initializer
then
414
handler
.
initializer
(
handler
)
415
handler
.
initializer
=
nil
416
end
417
texsetglyphscript
(
index
)
418
else
419
texsetglyphscript
(
)
420
end
421
end
422 423
function
scripts
.
reset
(
)
424
texsetglyphscript
(
)
425
end
426 427
-- 0=gray 1=red 2=green 3=blue 4=yellow 5=magenta 6=cyan 7=x-yellow 8=x-magenta 9=x-cyan
428 429
-- local categories = allocate { -- rather bound to cjk ... will be generalized
430
-- "korean",
431
-- "chinese",
432
-- "katakana",
433
-- "hiragana",
434
-- "full_width_open",
435
-- "full_width_close",
436
-- "half_width_open",
437
-- "half_width_close",
438
-- "full_width_punct",
439
-- "hyphen",
440
-- "non_starter",
441
-- "jamo_initial",
442
-- "jamo_medial",
443
-- "jamo_final",
444
-- "ethiopic_syllable",
445
-- "ethiopic_word",
446
-- "ethiopic_sentence",
447
-- "breaking_tsheg",
448
-- "nonbreaking_tsheg",
449
-- }
450
--
451
-- scripts.categories = categories
452 453
local
scriptcolors
=
allocate
{
454
-- todo: just named colors
455
hyphen
=
"
trace:5
"
,
456
}
457 458
scripts
.
colors
=
scriptcolors
459 460
-- this can become setprop ...
461 462
local
propertydata
=
nodes
.
properties
.
data
463 464
local
function
setscriptstatus
(
n
,
s
)
465
local
p
=
propertydata
[
n
]
466
if
p
then
467
p
.
scriptstatus
=
s
468
else
469
propertydata
[
n
]
=
{
scriptstatus
=
s
}
470
end
471
end
472 473
function
getscriptstatus
(
n
)
474
local
p
=
propertydata
[
n
]
475
if
p
then
476
return
p
.
scriptstatus
477
end
478
end
479 480
scripts
.
setstatus
=
setscriptstatus
481
scripts
.
getstatus
=
getscriptstatus
482 483
--
484 485
local
function
colorize
(
start
,
stop
)
486
for
n
in
nextglyph
,
start
do
487
local
kind
=
getscriptstatus
(
n
)
488
if
kind
then
489
local
ac
=
scriptcolors
[
kind
]
490
if
ac
then
491
setnodecolor
(
n
,
ac
)
492
end
493
end
494
if
n
=
=
stop
then
495
break
496
end
497
end
498
end
499 500
local
function
traced_process
(
head
,
first
,
last
,
process
,
a
)
501
if
start
~
=
last
then
502
local
f
,
l
=
first
,
last
503
local
name
=
numbertodataset
[
a
]
504
name
=
name
and
name
.
name
or
"
?
"
505
report_preprocessing
(
"
before %s: %s
"
,
name
,
nodes
.
tosequence
(
f
,
l
)
)
506
process
(
head
,
first
,
last
)
507
report_preprocessing
(
"
after %s: %s
"
,
name
,
nodes
.
tosequence
(
f
,
l
)
)
508
end
509
end
510 511
function
scripts
.
injectors
.
handler
(
head
)
512
local
start
=
first_glyph
(
head
)
-- we already have glyphs here (subtype 1)
513
if
not
start
then
514
return
head
515
else
516
local
last_a
,
normal_process
,
lastfont
,
originals
,
first
,
last
517
local
ok
=
false
518
while
start
do
519
local
char
,
id
=
isglyph
(
start
)
520
if
char
then
521
-- local a = getinjector(start)
522
local
a
=
getscript
(
start
)
523
if
a
then
524
if
a
~
=
last_a
then
525
if
first
then
526
if
ok
then
527
if
trace_analyzing
then
528
colorize
(
first
,
last
)
529
end
530
if
trace_injections
then
531
traced_process
(
head
,
first
,
last
,
normal_process
,
last_a
)
532
else
533
normal_process
(
head
,
first
,
last
)
534
end
535
ok
=
false
536
end
537
first
,
last
=
nil
,
nil
538
end
539
last_a
=
a
540
-- normal_process = a
541
normal_process
=
getinjector
(
start
)
542
end
543
if
normal_process
then
544
-- id == font
545
if
id
~
=
lastfont
then
546
originals
=
fontdata
[
id
]
.
resources
547
if
resources
then
548
originals
=
resources
.
originals
549
else
550
originals
=
nil
-- can't happen
551
end
552
lastfont
=
id
553
end
554
if
originals
and
type
(
originals
)
=
=
"
number
"
then
555
char
=
originals
[
char
]
or
char
556
end
557
local
h
=
hash
[
char
]
558
if
h
then
559
setscriptstatus
(
start
,
h
)
560
if
not
first
then
561
first
,
last
=
start
,
start
562
else
563
last
=
start
564
end
565
-- if cjk == "chinese" or cjk == "korean" then -- we need to prevent too much ( ) processing
566
ok
=
true
567
-- end
568
elseif
first
then
569
if
ok
then
570
if
trace_analyzing
then
571
colorize
(
first
,
last
)
572
end
573
if
trace_injections
then
574
traced_process
(
head
,
first
,
last
,
normal_process
,
last_a
)
575
else
576
normal_process
(
head
,
first
,
last
)
577
end
578
ok
=
false
579
end
580
first
,
last
=
nil
,
nil
581
end
582
end
583
elseif
first
then
584
if
ok
then
585
if
trace_analyzing
then
586
colorize
(
first
,
last
)
587
end
588
if
trace_injections
then
589
traced_process
(
head
,
first
,
last
,
normal_process
,
last_a
)
590
else
591
normal_process
(
head
,
first
,
last
)
592
end
593
ok
=
false
594
end
595
first
,
last
=
nil
,
nil
596
end
597
elseif
id
=
=
glue_code
then
598
if
ok
then
599
-- continue
600
elseif
first
then
601
-- no chinese or korean
602
first
,
last
=
nil
,
nil
603
end
604
elseif
first
then
605
if
ok
then
606
-- some chinese or korean
607
if
trace_analyzing
then
608
colorize
(
first
,
last
)
609
end
610
if
trace_injections
then
611
traced_process
(
head
,
first
,
last
,
normal_process
,
last_a
)
612
else
613
normal_process
(
head
,
first
,
last
)
614
end
615
first
,
last
,
ok
=
nil
,
nil
,
false
616
elseif
first
then
617
first
,
last
=
nil
,
nil
618
end
619
end
620
start
=
getnext
(
start
)
621
end
622
if
ok
then
623
if
trace_analyzing
then
624
colorize
(
first
,
last
)
625
end
626
if
trace_injections
then
627
traced_process
(
head
,
first
,
last
,
normal_process
,
last_a
)
628
else
629
normal_process
(
head
,
first
,
last
)
630
end
631
end
632
return
head
633
end
634
end
635 636
-- kind of experimental .. might move to it's own module
637 638
-- function scripts.splitters.handler(head)
639
-- return head
640
-- end
641 642
local
function
addwords
(
tree
,
data
)
643
if
not
tree
then
644
tree
=
{
}
645
end
646
for
word
in
gmatch
(
data
,
"
%S+
"
)
do
647
local
root
=
tree
648
local
list
=
utfsplit
(
word
,
true
)
649
for
i
=
1
,
#
list
do
650
local
l
=
utfbyte
(
list
[
i
]
)
651
local
r
=
root
[
l
]
652
if
not
r
then
653
r
=
{
}
654
root
[
l
]
=
r
655
end
656
if
i
=
=
#
list
then
657
r
.
final
=
word
-- true -- could be something else, like word in case of tracing
658
else
659
root
=
r
660
end
661
end
662
end
663
return
tree
664
end
665 666
local
loaded
=
{
}
667 668
function
splitters
.
load
(
handler
,
files
)
669
local
files
=
handler
.
files
670
local
tree
=
handler
.
tree
or
{
}
671
handler
.
tree
=
tree
672
if
not
files
then
673
return
674
elseif
type
(
files
)
=
=
"
string
"
then
675
files
=
{
files
}
676
handler
.
files
=
files
677
end
678
if
trace_splitting
then
679
report_splitting
(
"
loading splitter data for language/script %a
"
,
handler
.
name
)
680
end
681
loaded
[
handler
.
name
or
"
unknown
"
]
=
(
loaded
[
handler
.
name
or
"
unknown
"
]
or
0
)
+
1
682
statistics
.
starttiming
(
loaded
)
683
for
i
=
1
,
#
files
do
684
local
filename
=
files
[
i
]
685
local
fullname
=
resolvers
.
findfile
(
filename
)
686
if
fullname
=
=
"
"
then
687
fullname
=
resolvers
.
findfile
(
filename
.
.
"
.gz
"
)
688
end
689
if
fullname
~
=
"
"
then
690
if
trace_splitting
then
691
report_splitting
(
"
loading file %a
"
,
fullname
)
692
end
693
local
suffix
,
gzipped
=
gzip
.
suffix
(
fullname
)
694
if
suffix
=
=
"
lua
"
then
695
local
specification
=
table
.
load
(
fullname
,
gzipped
and
gzip
.
load
)
696
if
specification
then
697
local
lists
=
specification
.
lists
698
if
lists
then
699
for
i
=
1
,
#
lists
do
700
local
entry
=
lists
[
i
]
701
local
data
=
entry
.
data
702
if
data
then
703
if
entry
.
compression
=
=
"
zlib
"
then
704
data
=
zlib
.
decompress
(
data
)
705
if
entry
.
length
and
entry
.
length
~
=
#
data
then
706
report_splitting
(
"
compression error in file %a
"
,
fullname
)
707
end
708
end
709
if
data
then
710
addwords
(
tree
,
data
)
711
end
712
end
713
end
714
end
715
end
716
else
717
local
data
=
gzipped
and
io
.
loadgzip
(
fullname
)
or
io
.
loaddata
(
fullname
)
718
if
data
then
719
addwords
(
tree
,
data
)
720
end
721
end
722
else
723
report_splitting
(
"
unknown file %a
"
,
filename
)
724
end
725
end
726
statistics
.
stoptiming
(
loaded
)
727
return
tree
728
end
729 730
statistics
.
register
(
"
loaded split lists
"
,
function
(
)
731
if
next
(
loaded
)
then
732
return
string
.
format
(
"
%s, load time: %s
"
,
table
.
sequenced
(
loaded
)
,
statistics
.
elapsedtime
(
loaded
)
)
733
end
734
end
)
735 736
-- function splitters.addlist(name,filename)
737
-- local handler = scripts.handlers[name]
738
-- if handler and filename then
739
-- local files = handler.files
740
-- if not files then
741
-- files = { }
742
-- elseif type(files) == "string" then
743
-- files = { files }
744
-- end
745
-- handler.files = files
746
-- if type(filename) == "string" then
747
-- filename = utilities.parsers.settings_to_array(filename)
748
-- end
749
-- if type(filename) == "table" then
750
-- for i=1,#filename do
751
-- files[#files+1] = filenames[i]
752
-- end
753
-- end
754
-- end
755
-- end
756
--
757
-- commands.setscriptsplitterlist = splitters.addlist
758 759
local
categories
=
characters
.
categories
or
{
}
760 761
local
function
hit
(
root
,
head
)
762
local
current
=
getnext
(
head
)
763
local
lastrun
=
false
764
local
lastfinal
=
false
765
while
current
do
766
local
char
=
isglyph
(
current
)
767
if
char
then
768
local
newroot
=
root
[
char
]
769
if
newroot
then
770
local
final
=
newroot
.
final
771
if
final
then
772
lastrun
=
current
773
lastfinal
=
final
774
end
775
root
=
newroot
776
elseif
categories
[
char
]
=
=
"
mn
"
then
777
-- continue
778
else
779
return
lastrun
,
lastfinal
780
end
781
else
782
break
783
end
784
end
785
if
lastrun
then
786
return
lastrun
,
lastfinal
787
end
788
end
789 790
local
tree
,
attr
,
proc
791 792
function
splitters
.
handler
(
head
)
-- todo: also first_glyph test
793
local
current
=
head
794
while
current
do
795
if
getid
(
current
)
=
=
glyph_code
then
796
local
a
=
getsplitter
(
current
)
797
if
a
then
798
if
a
~
=
attr
then
799
local
handler
=
numbertohandler
[
a
]
800
tree
=
handler
.
tree
or
{
}
801
attr
=
a
802
proc
=
a
803
end
804
if
proc
then
805
local
root
=
tree
[
getchar
(
current
)
]
806
if
root
then
807
-- we don't check for attributes in the hitter (yet)
808
local
last
,
final
=
hit
(
root
,
current
)
809
if
last
then
810
local
next
=
getnext
(
last
)
811
if
next
then
812
local
nextchar
=
isglyph
(
next
)
813
if
not
nextchar
then
814
-- we're done
815
elseif
tree
[
nextchar
]
then
816
if
trace_splitdetails
then
817
if
type
(
final
)
=
=
"
string
"
then
818
report_splitting
(
"
advance %s processing between <%s> and <%c>
"
,
"
with
"
,
final
,
nextchar
)
819
else
820
report_splitting
(
"
advance %s processing between <%c> and <%c>
"
,
"
with
"
,
char
,
nextchar
)
821
end
822
end
823
head
,
current
=
proc
(
handler
,
head
,
current
,
last
,
1
)
824
else
825
if
trace_splitdetails
then
826
-- could be punctuation
827
if
type
(
final
)
=
=
"
string
"
then
828
report_splitting
(
"
advance %s processing between <%s> and <%c>
"
,
"
without
"
,
final
,
nextchar
)
829
else
830
report_splitting
(
"
advance %s processing between <%c> and <%c>
"
,
"
without
"
,
char
,
nextchar
)
831
end
832
end
833
head
,
current
=
proc
(
handler
,
head
,
current
,
last
,
2
)
834
end
835
end
836
end
837
end
838
end
839
end
840
end
841
current
=
getnext
(
current
)
842
end
843
return
head
844
end
845 846
local
function
marker
(
head
,
current
,
font
,
color
)
-- could become: nodes.tracers.marker
847
local
ex
=
exheights
[
font
]
848
local
em
=
emwidths
[
font
]
849
head
,
current
=
insert_node_after
(
head
,
current
,
new_penalty
(
10000
)
)
850
head
,
current
=
insert_node_after
(
head
,
current
,
new_glue
(
-0
.
05
*
em
)
)
851
head
,
current
=
insert_node_after
(
head
,
current
,
new_rule
(
0
.
05
*
em
,
1
.
5
*
ex
,
0
.
5
*
ex
)
)
852
setnodecolor
(
current
,
color
)
853
return
head
,
current
854
end
855 856
local
last_a
,
last_f
,
last_s
,
last_q
857 858
function
splitters
.
insertafter
(
handler
,
head
,
first
,
last
,
detail
)
859
local
a
=
getscriptdata
(
first
)
860
local
f
=
getfont
(
first
)
861
if
a
and
a
~
=
last_a
or
f
~
=
last_f
then
862
last_s
=
emwidths
[
f
]
*
data
.
inter_word_stretch_factor
863
last_a
=
a
864
last_f
=
f
865
end
866
if
trace_splitting
then
867
head
,
last
=
marker
(
head
,
last
,
f
,
detail
=
=
2
and
"
trace:r
"
or
"
trace:g
"
)
868
end
869
if
ignore
then
870
return
head
,
last
871
else
872
return
insert_node_after
(
head
,
last
,
new_glue
(
0
,
last_s
)
)
873
end
874
end
875 876
-- word-xx.lua:
877
--
878
-- return {
879
-- comment = "test",
880
-- copyright = "not relevant",
881
-- language = "en",
882
-- timestamp = "2013-05-20 14:15:21",
883
-- version = "1.00",
884
-- lists = {
885
-- {
886
-- -- data = "we thrive information in thick worlds because of our marvelous and everyday capacity to select edit single out structure highlight group pair merge harmonize synthesize focus organize condense reduce boil down choose categorize catalog classify list abstract scan look into idealize isolate discriminate distinguish screen pigeonhole pick over sort integrate blend inspect filter lump skip smooth chunk average approximate cluster aggregate outline summarize itemize review dip into flip through browse glance into leaf through skim refine enumerate glean synopsize winnow the wheat from the chaff and separate the sheep from the goats",
887
-- data = "abstract aggregate and approximate average because blend boil browse capacity catalog categorize chaff choose chunk classify cluster condense dip discriminate distinguish down edit enumerate everyday filter flip focus from glance glean goats group harmonize highlight idealize in information inspect integrate into isolate itemize leaf list look lump marvelous merge of organize our out outline over pair pick pigeonhole reduce refine review scan screen select separate sheep single skim skip smooth sort structure summarize synopsize synthesize the thick thrive through to we wheat winnow worlds",
888
-- },
889
-- },
890
-- }
891 892
scripts
.
installmethod
{
893
name
=
"
test
"
,
894
splitter
=
splitters
.
insertafter
,
895
initializer
=
splitters
.
load
,
896
files
=
{
897
-- "scrp-imp-word-test.lua",
898
"
word-xx.lua
"
,
899
}
,
900
datasets
=
{
901
default
=
{
902
inter_word_stretch_factor
=
0
.
25
,
-- of quad
903
}
,
904
}
,
905
}
906 907
-- new plugin:
908 909
local
registercontext
=
fonts
.
specifiers
.
registercontext
910
local
mergecontext
=
fonts
.
specifiers
.
mergecontext
911 912
local
otfscripts
=
characters
.
otfscripts
913 914
local
report_scripts
=
logs
.
reporter
(
"
scripts
"
,
"
auto feature
"
)
915
local
trace_scripts
=
false
trackers
.
register
(
"
scripts.autofeature
"
,
function
(
v
)
trace_scripts
=
v
end
)
916 917
local
autofontfeature
=
scripts
.
autofontfeature
or
{
}
918
scripts
.
autofontfeature
=
autofontfeature
919 920
local
cache_yes
=
{
}
921
local
cache_nop
=
{
}
922 923
setmetatableindex
(
cache_yes
,
function
(
t
,
k
)
local
v
=
{
}
t
[
k
]
=
v
return
v
end
)
924
setmetatableindex
(
cache_nop
,
function
(
t
,
k
)
local
v
=
{
}
t
[
k
]
=
v
return
v
end
)
925 926
-- beware: we need to tag a done (otherwise too many extra instances ... but how
927
-- often unpack? wait till we have a bitmap
928
--
929
-- we can consider merging this in handlers.characters(head) at some point as there
930
-- already check for the dynamic attribute so it saves a pass, however, then we also
931
-- need to check for a_scriptinjection there which nils the benefit
932
--
933
-- we can consider cheating: set all glyphs in a word as the first one but it's not
934
-- playing nice
935 936
function
autofontfeature
.
handler
(
head
)
937
for
n
,
char
,
font
in
nextchar
,
head
do
938
-- if getscript(n) then
939
-- -- already tagged by script feature, maybe some day adapt
940
-- else
941
local
script
=
otfscripts
[
char
]
942
if
script
then
943
local
dynamic
=
getglyphdata
(
n
)
or
0
944
if
dynamic
>
0
then
945
local
slot
=
cache_yes
[
font
]
946
local
attr
=
slot
[
script
]
947
if
not
attr
then
948
attr
=
mergecontext
(
dynamic
,
name
,
2
)
949
slot
[
script
]
=
attr
950
if
trace_scripts
then
951
report_scripts
(
"
script: %s, trigger %C, dynamic: %a, variant: %a
"
,
script
,
char
,
attr
,
"
extended
"
)
952
end
953
end
954
if
attr
~
=
0
then
955
n
[
0
]
=
attr
956
-- maybe set scriptinjection when associated
957
end
958
else
959
local
slot
=
cache_nop
[
font
]
960
local
attr
=
slot
[
script
]
961
if
not
attr
then
962
attr
=
registercontext
(
font
,
script
,
2
)
963
slot
[
script
]
=
attr
964
if
trace_scripts
then
965
report_scripts
(
"
script: %s, trigger %C, dynamic: %s, variant: %a
"
,
script
,
char
,
attr
,
"
normal
"
)
966
end
967
end
968
if
attr
~
=
0
then
969
setglyphdata
(
n
,
attr
)
970
-- maybe set scriptinjection when associated
971
end
972
end
973
end
974
-- end
975
end
976
return
head
977
end
978 979
function
autofontfeature
.
enable
(
)
980
report_scripts
(
"
globally enabled
"
)
981
enableaction
(
"
processors
"
,
"
scripts.autofontfeature.handler
"
)
982
end
983 984
function
autofontfeature
.
disable
(
)
985
report_scripts
(
"
globally disabled
"
)
986
disableaction
(
"
processors
"
,
"
scripts.autofontfeature.handler
"
)
987
end
988 989
implement
{
990
name
=
"
enableautofontscript
"
,
991
actions
=
autofontfeature
.
enable
992
}
993 994
implement
{
995
name
=
"
disableautofontscript
"
,
996
actions
=
autofontfeature
.
disable
}
997 998
implement
{
999
name
=
"
setscript
"
,
1000
actions
=
scripts
.
set
,
1001
arguments
=
"
3 strings
"
,
1002
}
1003 1004
implement
{
1005
name
=
"
resetscript
"
,
1006
actions
=
scripts
.
reset
1007
}
1008 1009
-- some common helpers
1010 1011
do
1012 1013
local
parameters
=
fonts
.
hashes
.
parameters
1014 1015
local
space
,
stretch
,
shrink
,
lastfont
1016 1017
local
inter_character_space_factor
=
1
1018
local
inter_character_stretch_factor
=
1
1019
local
inter_character_shrink_factor
=
1
1020 1021
local
function
space_glue
(
current
)
1022
-- local data = numbertodataset[getattr(current,a_scriptinjection)]
1023
local
data
=
getscriptdata
(
current
)
1024
if
data
then
1025
inter_character_space_factor
=
data
.
inter_character_space_factor
or
1
1026
inter_character_stretch_factor
=
data
.
inter_character_stretch_factor
or
1
1027
inter_character_shrink_factor
=
data
.
inter_character_shrink_factor
or
1
1028
end
1029
local
font
=
getfont
(
current
)
1030
if
lastfont
~
=
font
then
1031
local
pf
=
parameters
[
font
]
1032
space
=
pf
.
space
1033
stretch
=
pf
.
space_stretch
1034
shrink
=
pf
.
space_shrink
1035
lastfont
=
font
1036
end
1037
return
new_glue
(
1038
inter_character_space_factor
*
space
,
1039
inter_character_stretch_factor
*
stretch
,
1040
inter_character_shrink_factor
*
shrink
1041
)
1042
end
1043 1044
scripts
.
inserters
=
{
1045 1046
space_before
=
function
(
head
,
current
)
1047
return
insert_node_before
(
head
,
current
,
space_glue
(
current
)
)
1048
end
,
1049
space_after
=
function
(
head
,
current
)
1050
return
insert_node_after
(
head
,
current
,
space_glue
(
current
)
)
1051
end
,
1052 1053
zerowidthspace_before
=
function
(
head
,
current
)
1054
return
insert_node_before
(
head
,
current
,
new_glue
(
0
)
)
1055
end
,
1056
zerowidthspace_after
=
function
(
head
,
current
)
1057
return
insert_node_after
(
head
,
current
,
new_glue
(
0
)
)
1058
end
,
1059 1060
nobreakspace_before
=
function
(
head
,
current
)
1061
local
g
=
space_glue
(
current
)
1062
local
p
=
new_penalty
(
10000
)
1063
head
,
current
=
insert_node_before
(
head
,
current
,
p
)
1064
return
insert_node_before
(
head
,
current
,
g
)
1065
end
,
1066
nobreakspace_after
=
function
(
head
,
current
)
1067
local
g
=
space_glue
(
current
)
1068
local
p
=
new_penalty
(
10000
)
1069
head
,
current
=
insert_node_after
(
head
,
current
,
g
)
1070
return
insert_node_after
(
head
,
current
,
p
)
1071
end
,
1072 1073
}
1074 1075
end
1076 1077
-- end of helpers
1078