font-ots.lua /size: 181 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-ots
'
]
=
{
-- sequences
2
version
=
1
.
001
,
3
optimize
=
true
,
4
comment
=
"
companion to font-ini.mkiv
"
,
5
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
6
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
7
license
=
"
see context related readme files
"
,
8
}
9 10
--[[ldx-- 11<p>This module is a bit more split up that I'd like but since we also want to test 12with plain <l n='tex'/> it has to be so. This module is part of <l n='context'/> 13and discussion about improvements and functionality mostly happens on the 14<l n='context'/> mailing list.</p> 15 16<p>The specification of OpenType is (or at least decades ago was) kind of vague. 17Apart from a lack of a proper free specifications there's also the problem that 18Microsoft and Adobe may have their own interpretation of how and in what order to 19apply features. In general the Microsoft website has more detailed specifications 20and is a better reference. There is also some information in the FontForge help 21files. In the end we rely most on the Microsoft specification.</p> 22 23<p>Because there is so much possible, fonts might contain bugs and/or be made to 24work with certain rederers. These may evolve over time which may have the side 25effect that suddenly fonts behave differently. We don't want to catch all font 26issues.</p> 27 28<p>After a lot of experiments (mostly by Taco, me and Idris) the first implementation 29was already quite useful. When it did most of what we wanted, a more optimized version 30evolved. Of course all errors are mine and of course the code can be improved. There 31are quite some optimizations going on here and processing speed is currently quite 32acceptable and has been improved over time. Many complex scripts are not yet supported 33yet, but I will look into them as soon as <l n='context'/> users ask for it.</p> 34 35<p>The specification leaves room for interpretation. In case of doubt the Microsoft 36implementation is the reference as it is the most complete one. As they deal with 37lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and 38their suggestions help improve the code. I'm aware that not all border cases can be 39taken care of, unless we accept excessive runtime, and even then the interference 40with other mechanisms (like hyphenation) are not trivial.</p> 41 42<p>Especially discretionary handling has been improved much by Kai Eigner who uses complex 43(latin) fonts. The current implementation is a compromis between his patches and my code 44and in the meantime performance is quite ok. We cannot check all border cases without 45compromising speed but so far we're okay. Given good test cases we can probably improve 46it here and there. Especially chain lookups are non trivial with discretionaries but 47things got much better over time thanks to Kai.</p> 48 49<p>Glyphs are indexed not by unicode but in their own way. This is because there is no 50relationship with unicode at all, apart from the fact that a font might cover certain 51ranges of characters. One character can have multiple shapes. However, at the 52<l n='tex'/> end we use unicode so and all extra glyphs are mapped into a private 53space. This is needed because we need to access them and <l n='tex'/> has to include 54then in the output eventually.</p> 55 56<p>The initial data table is rather close to the open type specification and also not 57that different from the one produced by <l n='fontforge'/> but we uses hashes instead. 58In <l n='context'/> that table is packed (similar tables are shared) and cached on disk 59so that successive runs can use the optimized table (after loading the table is 60unpacked).</p> 61 62<p>This module is sparsely documented because it is has been a moving target. The 63table format of the reader changed a bit over time and we experiment a lot with 64different methods for supporting features. By now the structures are quite stable</p> 65 66<p>Incrementing the version number will force a re-cache. We jump the number by one 67when there's a fix in the reader or processing code that can result in different 68results.</p> 69 70<p>This code is also used outside context but in context it has to work with other 71mechanisms. Both put some constraints on the code here.</p> 72 73--ldx]]
--
74 75
-- Remark: We assume that cursives don't cross discretionaries which is okay because it
76
-- is only used in semitic scripts.
77
--
78
-- Remark: We assume that marks precede base characters.
79
--
80
-- Remark: When complex ligatures extend into discs nodes we can get side effects. Normally
81
-- this doesn't happen; ff\d{l}{l}{l} in lm works but ff\d{f}{f}{f}.
82
--
83
-- Todo: check if we copy attributes to disc nodes if needed.
84
--
85
-- Todo: it would be nice if we could get rid of components. In other places we can use
86
-- the unicode properties. We can just keep a lua table.
87
--
88
-- Remark: We do some disc juggling where we need to keep in mind that the pre, post and
89
-- replace fields can have prev pointers to a nesting node ... I wonder if that is still
90
-- needed.
91
--
92
-- Remark: This is not possible:
93
--
94
-- \discretionary {alpha-} {betagammadelta}
95
-- {\discretionary {alphabeta-} {gammadelta}
96
-- {\discretionary {alphabetagamma-} {delta}
97
-- {alphabetagammadelta}}}
98
--
99
-- Remark: Something is messed up: we have two mark / ligature indices, one at the
100
-- injection end and one here ... this is based on KE's patches but there is something
101
-- fishy there as I'm pretty sure that for husayni we need some connection (as it's much
102
-- more complex than an average font) but I need proper examples of all cases, not of
103
-- only some.
104
--
105
-- Remark: I wonder if indexed would be faster than unicoded. It would be a major
106
-- rewrite to have char being unicode + an index field in glyph nodes. Also more
107
-- assignments have to be made in order to keep things in sync. So, it's a no-go.
108
--
109
-- Remark: We can provide a fast loop when there are no disc nodes (tests show a 1%
110
-- gain). Smaller functions might perform better cache-wise. But ... memory becomes
111
-- faster anyway, so ...
112
--
113
-- Remark: Some optimizations made sense for 5.2 but seem less important for 5.3 but
114
-- anyway served their purpose.
115
--
116
-- Todo: just (0=l2r and 1=r2l) or maybe (r2l = true)
117 118
-- Experiments with returning the data with the ischar are positive for lmtx but
119
-- have a performance hit on mkiv because there we need to wrap ischardata (pending
120
-- extensions to luatex which is unlikely to happen for such an experiment because
121
-- we then can't remove it). Actually it might make generic slightly faster. Also,
122
-- there are some corner cases where a data check comes before a char fetch and
123
-- we're talking of millions of calls there. At some point I might make a version
124
-- for lmtx that does it slightly different anyway.
125 126
local
type
,
next
,
tonumber
=
type
,
next
,
tonumber
127
local
random
=
math
.
random
128
local
formatters
=
string
.
formatters
129
local
insert
=
table
.
insert
130 131
local
registertracker
=
trackers
.
register
132 133
local
logs
=
logs
134
local
trackers
=
trackers
135
local
nodes
=
nodes
136
local
attributes
=
attributes
137
local
fonts
=
fonts
138 139
local
otf
=
fonts
.
handlers
.
otf
140
local
tracers
=
nodes
.
tracers
141 142
local
trace_singles
=
false
registertracker
(
"
otf.singles
"
,
function
(
v
)
trace_singles
=
v
end
)
143
local
trace_multiples
=
false
registertracker
(
"
otf.multiples
"
,
function
(
v
)
trace_multiples
=
v
end
)
144
local
trace_alternatives
=
false
registertracker
(
"
otf.alternatives
"
,
function
(
v
)
trace_alternatives
=
v
end
)
145
local
trace_ligatures
=
false
registertracker
(
"
otf.ligatures
"
,
function
(
v
)
trace_ligatures
=
v
end
)
146
local
trace_contexts
=
false
registertracker
(
"
otf.contexts
"
,
function
(
v
)
trace_contexts
=
v
end
)
147
local
trace_marks
=
false
registertracker
(
"
otf.marks
"
,
function
(
v
)
trace_marks
=
v
end
)
148
local
trace_kerns
=
false
registertracker
(
"
otf.kerns
"
,
function
(
v
)
trace_kerns
=
v
end
)
149
local
trace_cursive
=
false
registertracker
(
"
otf.cursive
"
,
function
(
v
)
trace_cursive
=
v
end
)
150
local
trace_preparing
=
false
registertracker
(
"
otf.preparing
"
,
function
(
v
)
trace_preparing
=
v
end
)
151
local
trace_bugs
=
false
registertracker
(
"
otf.bugs
"
,
function
(
v
)
trace_bugs
=
v
end
)
152
local
trace_details
=
false
registertracker
(
"
otf.details
"
,
function
(
v
)
trace_details
=
v
end
)
153
local
trace_steps
=
false
registertracker
(
"
otf.steps
"
,
function
(
v
)
trace_steps
=
v
end
)
154
local
trace_skips
=
false
registertracker
(
"
otf.skips
"
,
function
(
v
)
trace_skips
=
v
end
)
155
local
trace_plugins
=
false
registertracker
(
"
otf.plugins
"
,
function
(
v
)
trace_plugins
=
v
end
)
156
local
trace_chains
=
false
registertracker
(
"
otf.chains
"
,
function
(
v
)
trace_chains
=
v
end
)
157 158
local
trace_kernruns
=
false
registertracker
(
"
otf.kernruns
"
,
function
(
v
)
trace_kernruns
=
v
end
)
159
----- trace_discruns = false registertracker("otf.discruns", function(v) trace_discruns = v end)
160
local
trace_compruns
=
false
registertracker
(
"
otf.compruns
"
,
function
(
v
)
trace_compruns
=
v
end
)
161
local
trace_testruns
=
false
registertracker
(
"
otf.testruns
"
,
function
(
v
)
trace_testruns
=
v
end
)
162 163
local
forcediscretionaries
=
false
164
local
forcepairadvance
=
false
-- for testing
165 166
local
repeatablemultiples
=
context
or
false
167 168
directives
.
register
(
"
otf.forcediscretionaries
"
,
function
(
v
)
169
forcediscretionaries
=
v
170
end
)
171 172
directives
.
register
(
"
otf.forcepairadvance
"
,
function
(
v
)
173
forcepairadvance
=
v
174
end
)
175 176
local
report_direct
=
logs
.
reporter
(
"
fonts
"
,
"
otf direct
"
)
177
local
report_subchain
=
logs
.
reporter
(
"
fonts
"
,
"
otf subchain
"
)
178
local
report_chain
=
logs
.
reporter
(
"
fonts
"
,
"
otf chain
"
)
179
local
report_process
=
logs
.
reporter
(
"
fonts
"
,
"
otf process
"
)
180
local
report_warning
=
logs
.
reporter
(
"
fonts
"
,
"
otf warning
"
)
181
local
report_run
=
logs
.
reporter
(
"
fonts
"
,
"
otf run
"
)
182 183
registertracker
(
"
otf.substitutions
"
,
"
otf.singles
"
,
"
otf.multiples
"
,
"
otf.alternatives
"
,
"
otf.ligatures
"
)
184
registertracker
(
"
otf.positions
"
,
"
otf.marks
"
,
"
otf.kerns
"
,
"
otf.cursive
"
)
185
registertracker
(
"
otf.actions
"
,
"
otf.substitutions
"
,
"
otf.positions
"
)
186
registertracker
(
"
otf.sample
"
,
"
otf.steps
"
,
"
otf.substitutions
"
,
"
otf.positions
"
,
"
otf.analyzing
"
)
187
registertracker
(
"
otf.sample.silent
"
,
"
otf.steps=silent
"
,
"
otf.substitutions
"
,
"
otf.positions
"
,
"
otf.analyzing
"
)
188 189
local
nuts
=
nodes
.
nuts
190 191
local
getnext
=
nuts
.
getnext
192
local
setnext
=
nuts
.
setnext
193
local
getprev
=
nuts
.
getprev
194
local
setprev
=
nuts
.
setprev
195
local
getboth
=
nuts
.
getboth
196
local
setboth
=
nuts
.
setboth
197
local
getid
=
nuts
.
getid
198
local
getstate
=
nuts
.
getstate
199
local
getsubtype
=
nuts
.
getsubtype
200
local
setsubtype
=
nuts
.
setsubtype
201
local
getchar
=
nuts
.
getchar
202
local
setchar
=
nuts
.
setchar
203
local
getdisc
=
nuts
.
getdisc
204
local
setdisc
=
nuts
.
setdisc
205
local
getreplace
=
nuts
.
getreplace
206
local
setlink
=
nuts
.
setlink
207
local
getwidth
=
nuts
.
getwidth
208
local
getattr
=
nuts
.
getattr
209 210
local
getglyphdata
=
nuts
.
getglyphdata
211 212
---------------------------------------------------------------------------------------
213 214
-- Beware: In ConTeXt components no longer are real components. We only keep track of
215
-- their positions because some complex ligatures might need that. For the moment we
216
-- use an x_ prefix because for now generic follows the other approach.
217 218
local
components
=
nuts
.
components
219
local
copynocomponents
=
components
.
copynocomponents
220
local
copyonlyglyphs
=
components
.
copyonlyglyphs
221
local
countcomponents
=
components
.
count
222
local
setcomponents
=
components
.
set
223
local
getcomponents
=
components
.
get
224
local
flushcomponents
=
components
.
flush
225 226
---------------------------------------------------------------------------------------
227 228
local
ischar
=
nuts
.
ischar
229
local
usesfont
=
nuts
.
usesfont
230 231
local
insertnodeafter
=
nuts
.
insertafter
232
local
copy_node
=
nuts
.
copy
233
local
copy_node_list
=
nuts
.
copylist
234
local
remove_node
=
nuts
.
remove
235
local
find_node_tail
=
nuts
.
tail
236
local
flushnodelist
=
nuts
.
flushlist
237
local
flushnode
=
nuts
.
flushnode
238
local
endofmath
=
nuts
.
endofmath
239 240
local
startofpar
=
nuts
.
startofpar
241 242
local
setmetatable
=
setmetatable
243
local
setmetatableindex
=
table
.
setmetatableindex
244 245
local
nextnode
=
nuts
.
traversers
.
node
246 247
----- zwnj = 0x200C
248
----- zwj = 0x200D
249 250
local
nodecodes
=
nodes
.
nodecodes
251
local
glyphcodes
=
nodes
.
glyphcodes
252
local
disccodes
=
nodes
.
disccodes
253 254
local
glyph_code
=
nodecodes
.
glyph
255
local
glue_code
=
nodecodes
.
glue
256
local
disc_code
=
nodecodes
.
disc
257
local
math_code
=
nodecodes
.
math
258
local
dir_code
=
nodecodes
.
dir
259
local
par_code
=
nodecodes
.
par
260 261
local
lefttoright_code
=
nodes
.
dirvalues
.
lefttoright
262
local
righttoleft_code
=
nodes
.
dirvalues
.
righttoleft
263 264
local
discretionarydisc_code
=
disccodes
.
discretionary
265
local
ligatureglyph_code
=
glyphcodes
.
ligature
266 267
local
a_noligature
=
attributes
.
private
(
"
noligature
"
)
268 269
local
injections
=
nodes
.
injections
270
local
setmark
=
injections
.
setmark
271
local
setcursive
=
injections
.
setcursive
272
local
setkern
=
injections
.
setkern
273
local
setmove
=
injections
.
setmove
274
local
setposition
=
injections
.
setposition
275
local
resetinjection
=
injections
.
reset
276
local
copyinjection
=
injections
.
copy
277
local
setligaindex
=
injections
.
setligaindex
278
local
getligaindex
=
injections
.
getligaindex
279 280
local
fontdata
=
fonts
.
hashes
.
identifiers
281
local
fontfeatures
=
fonts
.
hashes
.
features
282 283
local
otffeatures
=
fonts
.
constructors
.
features
.
otf
284
local
registerotffeature
=
otffeatures
.
register
285 286
local
onetimemessage
=
fonts
.
loggers
.
onetimemessage
or
function
(
)
end
287 288
local
getrandom
=
utilities
and
utilities
.
randomizer
and
utilities
.
randomizer
.
get
289 290
otf
.
defaultnodealternate
=
"
none
"
-- first last
291 292
-- We use a few semi-global variables. The handler can be called nested but this assumes
293
-- that the same font is used.
294 295
local
tfmdata
=
false
296
local
characters
=
false
297
local
descriptions
=
false
298
local
marks
=
false
299
local
classes
=
false
300
local
currentfont
=
false
301
local
factor
=
0
302
local
threshold
=
0
303
local
checkmarks
=
false
304 305
local
discs
=
false
306
local
spaces
=
false
307 308
local
sweepnode
=
nil
309
local
sweephead
=
{
}
-- we don't nil entries but false them (no collection and such)
310 311
local
notmatchpre
=
{
}
-- to be checked: can we use false instead of nil / what if a == b tests
312
local
notmatchpost
=
{
}
-- to be checked: can we use false instead of nil / what if a == b tests
313
local
notmatchreplace
=
{
}
-- to be checked: can we use false instead of nil / what if a == b tests
314 315
local
handlers
=
{
}
316 317
local
isspace
=
injections
.
isspace
318
local
getthreshold
=
injections
.
getthreshold
319 320
local
checkstep
=
(
tracers
and
tracers
.
steppers
.
check
)
or
function
(
)
end
321
local
registerstep
=
(
tracers
and
tracers
.
steppers
.
register
)
or
function
(
)
end
322
local
registermessage
=
(
tracers
and
tracers
.
steppers
.
message
)
or
function
(
)
end
323 324
local
function
logprocess
(
...
)
325
if
trace_steps
then
326
registermessage
(
...
)
327
if
trace_steps
=
=
"
silent
"
then
328
return
329
end
330
end
331
report_direct
(
...
)
332
end
333 334
local
function
logwarning
(
...
)
335
report_direct
(
...
)
336
end
337 338
local
gref
do
339 340
local
f_unicode
=
formatters
[
"
U+%X
"
]
-- was ["%U"]
341
local
f_uniname
=
formatters
[
"
U+%X (%s)
"
]
-- was ["%U (%s)"]
342
local
f_unilist
=
formatters
[
"
% t
"
]
343 344
gref
=
function
(
n
)
-- currently the same as in font-otb
345
if
type
(
n
)
=
=
"
number
"
then
346
local
description
=
descriptions
[
n
]
347
local
name
=
description
and
description
.
name
348
if
name
then
349
return
f_uniname
(
n
,
name
)
350
else
351
return
f_unicode
(
n
)
352
end
353
elseif
n
then
354
local
t
=
{
}
355
for
i
=
1
,
#
n
do
356
local
ni
=
n
[
i
]
357
if
tonumber
(
ni
)
then
-- later we will start at 2
358
local
di
=
descriptions
[
ni
]
359
local
nn
=
di
and
di
.
name
360
if
nn
then
361
t
[
#
t
+
1
]
=
f_uniname
(
ni
,
nn
)
362
else
363
t
[
#
t
+
1
]
=
f_unicode
(
ni
)
364
end
365
end
366
end
367
return
f_unilist
(
t
)
368
else
369
return
"
<error in node mode tracing>
"
370
end
371
end
372 373
end
374 375
local
function
cref
(
dataset
,
sequence
,
index
)
376
if
not
dataset
then
377
return
"
no valid dataset
"
378
end
379
local
merged
=
sequence
.
merged
and
"
merged
"
or
"
"
380
if
index
then
381
return
formatters
[
"
feature %a, type %a, %schain lookup %a, index %a
"
]
(
382
dataset
[
4
]
,
sequence
.
type
,
merged
,
sequence
.
name
,
index
)
383
else
384
return
formatters
[
"
feature %a, type %a, %schain lookup %a
"
]
(
385
dataset
[
4
]
,
sequence
.
type
,
merged
,
sequence
.
name
)
386
end
387
end
388 389
local
function
pref
(
dataset
,
sequence
)
390
return
formatters
[
"
feature %a, type %a, %slookup %a
"
]
(
391
dataset
[
4
]
,
sequence
.
type
,
sequence
.
merged
and
"
merged
"
or
"
"
,
sequence
.
name
)
392
end
393 394
local
function
mref
(
rlmode
)
395
if
not
rlmode
or
rlmode
>
=
0
then
396
return
"
l2r
"
397
else
398
return
"
r2l
"
399
end
400
end
401 402
-- The next code is somewhat complicated by the fact that some fonts can have ligatures made
403
-- from ligatures that themselves have marks. This was identified by Kai in for instance
404
-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes
405
-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next
406
-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the
407
-- third component.
408 409
-- We can assume that languages that use marks are not hyphenated. We can also assume
410
-- that at most one discretionary is present.
411 412
-- We do need components in funny kerning mode but maybe I can better reconstruct then
413
-- as we do have the font components info available; removing components makes the
414
-- previous code much simpler. Also, later on copying and freeing becomes easier.
415
-- However, for arabic we need to keep them around for the sake of mark placement
416
-- and indices.
417 418
local
function
flattendisk
(
head
,
disc
)
419
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
disc
,
true
)
420
local
prev
,
next
=
getboth
(
disc
)
421
local
ishead
=
head
=
=
disc
422
setdisc
(
disc
)
423
flushnode
(
disc
)
424
if
pre
then
425
flushnodelist
(
pre
)
426
end
427
if
post
then
428
flushnodelist
(
post
)
429
end
430
if
ishead
then
431
if
replace
then
432
if
next
then
433
setlink
(
replacetail
,
next
)
434
end
435
return
replace
,
replace
436
elseif
next
then
437
return
next
,
next
438
else
439
-- return -- maybe warning
440
end
441
else
442
if
replace
then
443
if
next
then
444
setlink
(
replacetail
,
next
)
445
end
446
setlink
(
prev
,
replace
)
447
return
head
,
replace
448
else
449
setlink
(
prev
,
next
)
-- checks for next anyway
450
return
head
,
next
451
end
452
end
453
end
454 455
local
function
appenddisc
(
disc
,
list
)
456
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
disc
,
true
)
457
local
posthead
=
list
458
local
replacehead
=
copy_node_list
(
list
)
459
if
post
then
460
setlink
(
posttail
,
posthead
)
461
else
462
post
=
posthead
463
end
464
if
replace
then
465
setlink
(
replacetail
,
replacehead
)
466
else
467
replace
=
replacehead
468
end
469
setdisc
(
disc
,
pre
,
post
,
replace
)
470
end
471 472
local
function
markstoligature
(
head
,
start
,
stop
,
char
)
473
if
start
=
=
stop
and
getchar
(
start
)
=
=
char
then
474
return
head
,
start
475
else
476
local
prev
=
getprev
(
start
)
477
local
next
=
getnext
(
stop
)
478
setprev
(
start
)
479
setnext
(
stop
)
480
local
base
=
copynocomponents
(
start
,
copyinjection
)
481
if
head
=
=
start
then
482
head
=
base
483
end
484
resetinjection
(
base
)
485
setchar
(
base
,
char
)
486
setsubtype
(
base
,
ligatureglyph_code
)
487
setcomponents
(
base
,
start
)
488
setlink
(
prev
,
base
,
next
)
489
flushcomponents
(
start
)
490
return
head
,
base
491
end
492
end
493 494
-- Remark for Kai: (some arabic fonts do mark + mark = other mark and such)
495
--
496
-- The hasmarks is needed for ligatures of marks that are part of a ligature in
497
-- which case we assume that we can delete the marks anyway (we can always become
498
-- more clever if needed) .. in fact the whole logic here should be redone. We're
499
-- in the not discfound branch then. We now have skiphash too so we can be more
500
-- selective if needed (todo).
501 502
-- we can have more granularity here but for now we only do a simple check
503 504
local
no_left_ligature_code
=
1
505
local
no_right_ligature_code
=
2
506
local
no_left_kern_code
=
4
507
local
no_right_kern_code
=
8
508 509
local
hasglyphoption
=
function
(
n
,
c
)
510
if
c
=
=
no_left_ligature_code
or
c
=
=
no_right_ligature_code
then
511
return
getattr
(
n
,
a_noligature
)
=
=
1
512
else
513
return
false
514
end
515
end
516 517
-- in lmtx we need to check the components and can be slightly more clever
518 519
local
function
toligature
(
head
,
start
,
stop
,
char
,
dataset
,
sequence
,
skiphash
,
discfound
,
hasmarks
)
-- brr head
520
if
hasglyphoption
(
start
,
no_right_ligature_code
)
then
521
return
head
,
start
522
end
523
if
start
=
=
stop
and
getchar
(
start
)
=
=
char
then
524
resetinjection
(
start
)
525
setchar
(
start
,
char
)
526
return
head
,
start
527
end
528
local
prev
=
getprev
(
start
)
529
local
next
=
getnext
(
stop
)
530
local
comp
=
start
531
setprev
(
start
)
532
setnext
(
stop
)
533
local
base
=
copynocomponents
(
start
,
copyinjection
)
534
if
start
=
=
head
then
535
head
=
base
536
end
537
resetinjection
(
base
)
538
setchar
(
base
,
char
)
539
setsubtype
(
base
,
ligatureglyph_code
)
540
setcomponents
(
base
,
comp
)
541
setlink
(
prev
,
base
,
next
)
542
if
not
discfound
then
543
local
deletemarks
=
not
skiphash
or
hasmarks
544
local
components
=
start
-- not used
545
local
baseindex
=
0
546
local
componentindex
=
0
547
local
head
=
base
548
local
current
=
base
549
-- first we loop over the glyphs in start ... stop
550
while
start
do
551
local
char
=
getchar
(
start
)
552
if
not
marks
[
char
]
then
553
baseindex
=
baseindex
+
componentindex
554
componentindex
=
countcomponents
(
start
,
marks
)
555
-- we can be more clever here: "not deletemarks or (skiphash and not skiphash[char])"
556
-- and such:
557
elseif
not
deletemarks
then
558
-- we can get a loop when the font expects otherwise (i.e. unexpected deletemarks)
559
setligaindex
(
start
,
baseindex
+
getligaindex
(
start
,
componentindex
)
)
560
if
trace_marks
then
561
logwarning
(
"
%s: keep ligature mark %s, gets index %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
char
)
,
getligaindex
(
start
)
)
562
end
563
local
n
=
copy_node
(
start
)
564
copyinjection
(
n
,
start
)
-- is this ok ? we position later anyway
565
head
,
current
=
insertnodeafter
(
head
,
current
,
n
)
-- unlikely that mark has components
566
elseif
trace_marks
then
567
logwarning
(
"
%s: delete ligature mark %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
char
)
)
568
end
569
start
=
getnext
(
start
)
570
end
571
-- we can have one accent as part of a lookup and another following
572
local
start
=
getnext
(
current
)
573
while
start
do
574
local
char
=
ischar
(
start
)
575
if
char
then
576
-- also something skiphash here?
577
if
marks
[
char
]
then
578
setligaindex
(
start
,
baseindex
+
getligaindex
(
start
,
componentindex
)
)
579
if
trace_marks
then
580
logwarning
(
"
%s: set ligature mark %s, gets index %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
char
)
,
getligaindex
(
start
)
)
581
end
582
start
=
getnext
(
start
)
583
else
584
break
585
end
586
else
587
break
588
end
589
end
590
flushcomponents
(
components
)
591
else
592
-- discfound ... forget about marks .. probably no scripts that hyphenate and have marks
593
local
discprev
,
discnext
=
getboth
(
discfound
)
594
if
discprev
and
discnext
then
595
-- we assume normalization in context, and don't care about generic ... especially
596
-- \- can give problems as there we can have a negative char but that won't match
597
-- anyway
598
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
discfound
,
true
)
599
if
not
replace
then
600
-- looks like we never come here as it's not okay
601
local
prev
=
getprev
(
base
)
602
-- local comp = getcomponents(base) -- already set
603
local
copied
=
copyonlyglyphs
(
comp
)
604
if
pre
then
605
setlink
(
discprev
,
pre
)
606
else
607
setnext
(
discprev
)
-- also blocks funny assignments
608
end
609
pre
=
comp
-- is start
610
if
post
then
611
setlink
(
posttail
,
discnext
)
612
setprev
(
post
)
-- nil anyway
613
else
614
post
=
discnext
615
setprev
(
discnext
)
-- also blocks funny assignments
616
end
617
setlink
(
prev
,
discfound
,
next
)
618
setboth
(
base
)
619
-- here components have a pointer so we can't free it!
620
setcomponents
(
base
,
copied
)
621
replace
=
base
622
if
forcediscretionaries
then
623
setdisc
(
discfound
,
pre
,
post
,
replace
,
discretionarydisc_code
)
624
else
625
setdisc
(
discfound
,
pre
,
post
,
replace
)
626
end
627
base
=
prev
628
end
629
end
630
end
631
return
head
,
base
632
end
633 634
local
function
multiple_glyphs
(
head
,
start
,
multiple
,
skiphash
,
what
,
stop
)
-- what to do with skiphash matches here
635
local
nofmultiples
=
#
multiple
636
if
nofmultiples
>
0
then
637
resetinjection
(
start
)
638
setchar
(
start
,
multiple
[
1
]
)
639
if
nofmultiples
>
1
then
640
local
sn
=
getnext
(
start
)
641
for
k
=
2
,
nofmultiples
do
642
-- untested:
643
--
644
-- while ignoremarks and marks[getchar(sn)] then
645
-- local sn = getnext(sn)
646
-- end
647
local
n
=
copy_node
(
start
)
-- ignore components
648
resetinjection
(
n
)
649
setchar
(
n
,
multiple
[
k
]
)
650
insertnodeafter
(
head
,
start
,
n
)
651
start
=
n
652
end
653
if
what
~
=
true
and
repeatablemultiples
then
654
-- This is just some experimental code; we might introduce gsub_extensible
655
-- some day instead. Beware: when we have a feature that mixes alternates and
656
-- multiples we need to make sure we don't handle the alternate string values
657
-- here. This might eventually become an lmtx only feature.
658
local
kind
=
type
(
what
)
659
local
m
,
f
,
l
660
if
kind
=
=
"
string
"
then
661
local
what
,
n
=
string
.
match
(
what
,
"
^repeat(.-)[:=](%d+)$
"
)
662
if
what
=
=
"
middle
"
then
663
m
=
tonumber
(
n
)
664
elseif
what
=
=
"
first
"
then
665
f
=
tonumber
(
n
)
666
elseif
what
=
=
"
last
"
then
667
l
=
tonumber
(
n
)
668
end
669
elseif
kind
=
=
"
table
"
then
670
-- won't happen because currently we don't split these values
671
m
=
what
.
middle
672
f
=
what
.
first
673
l
=
what
.
last
674
end
675
if
f
or
m
or
l
then
676
if
m
and
m
>
1
and
nofmultiples
=
=
3
then
677
local
middle
=
getnext
(
first
)
678
for
i
=
2
,
m
do
679
local
n
=
copynode
(
middle
)
-- ignore components
680
resetinjection
(
n
)
681
insertnodeafter
(
head
,
first
,
n
)
682
end
683
end
684
if
f
and
f
>
1
then
685
for
i
=
2
,
f
do
686
local
n
=
copynode
(
first
)
-- ignore components
687
resetinjection
(
n
)
688
insertnodeafter
(
head
,
first
,
n
)
689
end
690
end
691
if
l
and
l
>
1
then
692
for
i
=
2
,
l
do
693
local
n
=
copynode
(
start
)
-- ignore components
694
resetinjection
(
n
)
695
insertnodeafter
(
head
,
start
,
n
)
696
start
=
n
697
end
698
end
699
end
700
end
701
end
702
return
head
,
start
,
true
703
else
704
if
trace_multiples
then
705
logprocess
(
"
no multiple for %s
"
,
gref
(
getchar
(
start
)
)
)
706
end
707
return
head
,
start
,
false
708
end
709
end
710 711
local
function
get_alternative_glyph
(
start
,
alternatives
,
value
)
712
local
n
=
#
alternatives
713
if
n
=
=
1
then
714
-- we could actually change that into a gsub and save some memory in the
715
-- font loader but it makes tracing more messy
716
return
alternatives
[
1
]
,
trace_alternatives
and
"
1 (only one present)
"
717
elseif
value
=
=
"
random
"
then
718
local
r
=
getrandom
and
getrandom
(
"
glyph
"
,
1
,
n
)
or
random
(
1
,
n
)
719
return
alternatives
[
r
]
,
trace_alternatives
and
formatters
[
"
value %a, taking %a
"
]
(
value
,
r
)
720
elseif
value
=
=
"
first
"
then
721
return
alternatives
[
1
]
,
trace_alternatives
and
formatters
[
"
value %a, taking %a
"
]
(
value
,
1
)
722
elseif
value
=
=
"
last
"
then
723
return
alternatives
[
n
]
,
trace_alternatives
and
formatters
[
"
value %a, taking %a
"
]
(
value
,
n
)
724
end
725
value
=
value
=
=
true
and
1
or
tonumber
(
value
)
726
if
type
(
value
)
~
=
"
number
"
then
727
return
alternatives
[
1
]
,
trace_alternatives
and
formatters
[
"
invalid value %s, taking %a
"
]
(
value
,
1
)
728
end
729
-- local a = alternatives[value]
730
-- if a then
731
-- -- some kind of hash
732
-- return a, trace_alternatives and formatters["value %a, taking %a"](value,a)
733
-- end
734
if
value
>
n
then
735
local
defaultalt
=
otf
.
defaultnodealternate
736
if
defaultalt
=
=
"
first
"
then
737
return
alternatives
[
n
]
,
trace_alternatives
and
formatters
[
"
invalid value %s, taking %a
"
]
(
value
,
1
)
738
elseif
defaultalt
=
=
"
last
"
then
739
return
alternatives
[
1
]
,
trace_alternatives
and
formatters
[
"
invalid value %s, taking %a
"
]
(
value
,
n
)
740
else
741
return
false
,
trace_alternatives
and
formatters
[
"
invalid value %a, %s
"
]
(
value
,
"
out of range
"
)
742
end
743
elseif
value
=
=
0
then
744
return
getchar
(
start
)
,
trace_alternatives
and
formatters
[
"
invalid value %a, %s
"
]
(
value
,
"
no change
"
)
745
elseif
value
<
1
then
746
return
alternatives
[
1
]
,
trace_alternatives
and
formatters
[
"
invalid value %a, taking %a
"
]
(
value
,
1
)
747
else
748
return
alternatives
[
value
]
,
trace_alternatives
and
formatters
[
"
value %a, taking %a
"
]
(
value
,
value
)
749
end
750
end
751 752
-- handlers
753 754
function
handlers
.
gsub_single
(
head
,
start
,
dataset
,
sequence
,
replacement
)
755
if
trace_singles
then
756
logprocess
(
"
%s: replacing %s by single %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
getchar
(
start
)
)
,
gref
(
replacement
)
)
757
end
758
resetinjection
(
start
)
759
setchar
(
start
,
replacement
)
760
return
head
,
start
,
true
761
end
762 763
function
handlers
.
gsub_alternate
(
head
,
start
,
dataset
,
sequence
,
alternative
)
764
local
kind
=
dataset
[
4
]
765
local
what
=
dataset
[
1
]
766
local
value
=
what
=
=
true
and
tfmdata
.
shared
.
features
[
kind
]
or
what
767
local
choice
,
comment
=
get_alternative_glyph
(
start
,
alternative
,
value
)
768
if
choice
then
769
if
trace_alternatives
then
770
logprocess
(
"
%s: replacing %s by alternative %a to %s, %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
getchar
(
start
)
)
,
gref
(
choice
)
,
comment
)
771
end
772
resetinjection
(
start
)
773
setchar
(
start
,
choice
)
774
else
775
if
trace_alternatives
then
776
logwarning
(
"
%s: no variant %a for %s, %s
"
,
pref
(
dataset
,
sequence
)
,
value
,
gref
(
getchar
(
start
)
)
,
comment
)
777
end
778
end
779
return
head
,
start
,
true
780
end
781 782
function
handlers
.
gsub_multiple
(
head
,
start
,
dataset
,
sequence
,
multiple
,
rlmode
,
skiphash
)
783
if
trace_multiples
then
784
logprocess
(
"
%s: replacing %s by multiple %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
getchar
(
start
)
)
,
gref
(
multiple
)
)
785
end
786
return
multiple_glyphs
(
head
,
start
,
multiple
,
skiphash
,
dataset
[
1
]
)
787
end
788 789
-- Don't we deal with disc otherwise now? I need to check if the next one can be
790
-- simplified. Anyway, it can be way messier: marks that get removed as well as
791
-- marks that are kept.
792 793
function
handlers
.
gsub_ligature
(
head
,
start
,
dataset
,
sequence
,
ligature
,
rlmode
,
skiphash
)
794
local
current
=
getnext
(
start
)
795
if
not
current
then
796
return
head
,
start
,
false
,
nil
797
end
798
local
stop
=
nil
799
local
startchar
=
getchar
(
start
)
800
if
skiphash
and
skiphash
[
startchar
]
then
801
while
current
do
802
local
char
=
ischar
(
current
,
currentfont
)
803
if
char
then
804
local
lg
=
not
tonumber
(
ligature
)
and
ligature
[
char
]
805
if
lg
then
806
stop
=
current
807
ligature
=
lg
808
current
=
getnext
(
current
)
809
else
810
break
811
end
812
else
813
break
814
end
815
end
816
if
stop
then
817
local
ligature
=
tonumber
(
ligature
)
or
ligature
.
ligature
818
if
ligature
then
819
if
trace_ligatures
then
820
local
stopchar
=
getchar
(
stop
)
821
head
,
start
=
markstoligature
(
head
,
start
,
stop
,
ligature
)
822
logprocess
(
"
%s: replacing %s upto %s by ligature %s case 1
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
stopchar
)
,
gref
(
getchar
(
start
)
)
)
823
else
824
head
,
start
=
markstoligature
(
head
,
start
,
stop
,
ligature
)
825
end
826
return
head
,
start
,
true
,
false
827
else
828
-- ok, goto next lookup
829
end
830
end
831
else
832
local
discfound
=
false
833
local
hasmarks
=
marks
[
startchar
]
834
while
current
do
835
local
char
,
id
=
ischar
(
current
,
currentfont
)
836
if
char
then
837
if
skiphash
and
skiphash
[
char
]
then
838
current
=
getnext
(
current
)
839
else
840
local
lg
=
not
tonumber
(
ligature
)
and
ligature
[
char
]
841
if
lg
then
842
if
marks
[
char
]
then
843
hasmarks
=
true
844
end
845
stop
=
current
-- needed for fake so outside then
846
ligature
=
lg
847
current
=
getnext
(
current
)
848
else
849
break
850
end
851
end
852
elseif
char
=
=
false
then
853
-- kind of weird
854
break
855
elseif
id
=
=
disc_code
then
856
discfound
=
current
857
break
858
else
859
break
860
end
861
end
862
-- of{f-}{}{f}e o{f-}{}{f}fe o{-}{}{ff}e (oe and ff ligature)
863
-- we can end up here when we have a start run .. testruns start at a disc but
864
-- so here we have the other case: char + disc
865
--
866
-- Challenge for Kai (latinmodern): \hyphenation{fii-f-f-iif} fiiffiif
867
--
868
if
discfound
then
869
-- don't assume marks in a disc and we don't run over a disc (for now)
870
local
pre
,
post
,
replace
=
getdisc
(
discfound
)
871
local
match
872
if
replace
then
873
local
char
=
ischar
(
replace
,
currentfont
)
874
if
char
and
(
not
tonumber
(
ligature
)
and
ligature
[
char
]
)
then
875
match
=
true
876
end
877
end
878
if
not
match
and
pre
then
879
local
char
=
ischar
(
pre
,
currentfont
)
880
if
char
and
(
not
tonumber
(
ligature
)
and
ligature
[
char
]
)
then
881
match
=
true
882
end
883
end
884
if
not
match
and
not
pre
or
not
replace
then
885
local
n
=
getnext
(
discfound
)
886
local
char
=
ischar
(
n
,
currentfont
)
887
if
char
and
(
not
tonumber
(
ligature
)
and
ligature
[
char
]
)
then
888
match
=
true
889
end
890
end
891
if
match
then
892
-- we force a restart
893
local
ishead
=
head
=
=
start
894
local
prev
=
getprev
(
start
)
895
if
stop
then
896
setnext
(
stop
)
897
local
copy
=
copy_node_list
(
start
)
898
local
tail
=
stop
-- was: getprev(stop) -- Kai: needs checking on your samples
899
local
liat
=
find_node_tail
(
copy
)
900
if
pre
then
901
setlink
(
liat
,
pre
)
902
end
903
if
replace
then
904
setlink
(
tail
,
replace
)
905
end
906
pre
=
copy
907
replace
=
start
908
else
909
setnext
(
start
)
910
local
copy
=
copy_node
(
start
)
911
if
pre
then
912
setlink
(
copy
,
pre
)
913
end
914
if
replace
then
915
setlink
(
start
,
replace
)
916
end
917
pre
=
copy
918
replace
=
start
919
end
920
setdisc
(
discfound
,
pre
,
post
,
replace
)
921
if
prev
then
922
setlink
(
prev
,
discfound
)
923
else
924
setprev
(
discfound
)
925
head
=
discfound
926
end
927
start
=
discfound
928
return
head
,
start
,
true
,
true
929
end
930
end
931
local
ligature
=
tonumber
(
ligature
)
or
ligature
.
ligature
932
if
ligature
then
933
if
stop
then
934
if
trace_ligatures
then
935
local
stopchar
=
getchar
(
stop
)
936
-- head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
937
head
,
start
=
toligature
(
head
,
start
,
stop
,
ligature
,
dataset
,
sequence
,
skiphash
,
false
,
hasmarks
)
938
logprocess
(
"
%s: replacing %s upto %s by ligature %s case 2
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
stopchar
)
,
gref
(
ligature
)
)
939
-- we can have a rare case of multiple disc in a lig but that makes no sense language wise but if really
940
-- needed we could backtrack if we're in a disc node
941
else
942
-- head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
943
head
,
start
=
toligature
(
head
,
start
,
stop
,
ligature
,
dataset
,
sequence
,
skiphash
,
false
,
hasmarks
)
944
end
945
else
946
-- weird but happens (in some arabic font)
947
resetinjection
(
start
)
948
setchar
(
start
,
ligature
)
949
if
trace_ligatures
then
950
logprocess
(
"
%s: replacing %s by (no real) ligature %s case 3
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
ligature
)
)
951
end
952
end
953
return
head
,
start
,
true
,
false
954
else
955
-- weird but happens, pseudo ligatures ... just the components
956
end
957
end
958
return
head
,
start
,
false
,
false
959
end
960 961
function
handlers
.
gpos_single
(
head
,
start
,
dataset
,
sequence
,
kerns
,
rlmode
,
skiphash
,
step
,
injection
)
962
if
hasglyphoption
(
start
,
no_right_kern_code
)
then
963
return
head
,
start
,
false
964
else
965
local
startchar
=
getchar
(
start
)
966
local
format
=
step
.
format
967
if
format
=
=
"
single
"
or
type
(
kerns
)
=
=
"
table
"
then
-- the table check can go
968
local
dx
,
dy
,
w
,
h
=
setposition
(
0
,
start
,
factor
,
rlmode
,
kerns
,
injection
)
969
if
trace_kerns
then
970
logprocess
(
"
%s: shifting single %s by %s xy (%p,%p) and wh (%p,%p)
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
format
,
dx
,
dy
,
w
,
h
)
971
end
972
else
973
local
k
=
(
format
=
=
"
move
"
and
setmove
or
setkern
)
(
start
,
factor
,
rlmode
,
kerns
,
injection
)
974
if
trace_kerns
then
975
logprocess
(
"
%s: shifting single %s by %s %p
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
format
,
k
)
976
end
977
end
978
return
head
,
start
,
true
979
end
980
end
981 982
function
handlers
.
gpos_pair
(
head
,
start
,
dataset
,
sequence
,
kerns
,
rlmode
,
skiphash
,
step
,
injection
)
983
if
hasglyphoption
(
start
,
no_right_kern_code
)
then
984
return
head
,
start
,
false
985
else
986
local
snext
=
getnext
(
start
)
987
if
not
snext
then
988
return
head
,
start
,
false
989
else
990
local
prev
=
start
991
while
snext
do
992
local
nextchar
=
ischar
(
snext
,
currentfont
)
993
if
nextchar
then
994
if
skiphash
and
skiphash
[
nextchar
]
then
-- includes marks too when flag
995
prev
=
snext
996
snext
=
getnext
(
snext
)
997
else
998
local
krn
=
kerns
[
nextchar
]
999
if
not
krn
then
1000
break
1001
end
1002
local
format
=
step
.
format
1003
if
format
=
=
"
pair
"
then
1004
local
a
=
krn
[
1
]
1005
local
b
=
krn
[
2
]
1006
if
a
=
=
true
then
1007
-- zero
1008
elseif
a
then
-- #a > 0
1009
local
x
,
y
,
w
,
h
=
setposition
(
1
,
start
,
factor
,
rlmode
,
a
,
injection
)
1010
if
trace_kerns
then
1011
local
startchar
=
getchar
(
start
)
1012
logprocess
(
"
%s: shifting first of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
x
,
y
,
w
,
h
,
injection
or
"
injections
"
)
1013
end
1014
end
1015
if
b
=
=
true
then
1016
-- zero
1017
start
=
snext
-- cf spec
1018
elseif
b
then
-- #b > 0
1019
local
x
,
y
,
w
,
h
=
setposition
(
2
,
snext
,
factor
,
rlmode
,
b
,
injection
)
1020
if
trace_kerns
then
1021
local
startchar
=
getchar
(
start
)
1022
logprocess
(
"
%s: shifting second of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
x
,
y
,
w
,
h
,
injection
or
"
injections
"
)
1023
end
1024
start
=
snext
-- cf spec
1025
elseif
forcepairadvance
then
1026
start
=
snext
-- for testing, not cf spec
1027
end
1028
return
head
,
start
,
true
1029
elseif
krn
~
=
0
then
1030
local
k
=
(
format
=
=
"
move
"
and
setmove
or
setkern
)
(
snext
,
factor
,
rlmode
,
krn
,
injection
)
1031
if
trace_kerns
then
1032
logprocess
(
"
%s: inserting %s %p between %s and %s as %s
"
,
pref
(
dataset
,
sequence
)
,
format
,
k
,
gref
(
getchar
(
prev
)
)
,
gref
(
nextchar
)
,
injection
or
"
injections
"
)
1033
end
1034
return
head
,
start
,
true
1035
else
-- can't happen
1036
break
1037
end
1038
end
1039
else
1040
break
1041
end
1042
end
1043
return
head
,
start
,
false
1044
end
1045
end
1046
end
1047 1048
--[[ldx-- 1049<p>We get hits on a mark, but we're not sure if the it has to be applied so 1050we need to explicitly test for basechar, baselig and basemark entries.</p> 1051--ldx]]
--
1052 1053
function
handlers
.
gpos_mark2base
(
head
,
start
,
dataset
,
sequence
,
markanchors
,
rlmode
,
skiphash
)
1054
local
markchar
=
getchar
(
start
)
1055
if
marks
[
markchar
]
then
1056
local
base
=
getprev
(
start
)
-- [glyph] [start=mark]
1057
if
base
then
1058
local
basechar
=
ischar
(
base
,
currentfont
)
1059
if
basechar
then
1060
if
marks
[
basechar
]
then
1061
while
base
do
1062
base
=
getprev
(
base
)
1063
if
base
then
1064
basechar
=
ischar
(
base
,
currentfont
)
1065
if
basechar
then
1066
if
not
marks
[
basechar
]
then
1067
break
1068
end
1069
else
1070
if
trace_bugs
then
1071
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
1
)
1072
end
1073
return
head
,
start
,
false
1074
end
1075
else
1076
if
trace_bugs
then
1077
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
2
)
1078
end
1079
return
head
,
start
,
false
1080
end
1081
end
1082
end
1083
local
ba
=
markanchors
[
1
]
[
basechar
]
1084
if
ba
then
1085
local
ma
=
markanchors
[
2
]
1086
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
false
,
checkmarks
)
1087
if
trace_marks
then
1088
logprocess
(
"
%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)
"
,
1089
pref
(
dataset
,
sequence
)
,
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
dx
,
dy
)
1090
end
1091
return
head
,
start
,
true
1092
elseif
trace_bugs
then
1093
-- onetimemessage(currentfont,basechar,"no base anchors")
1094
logwarning
(
"
%s: mark %s is not anchored to %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
gref
(
basechar
)
)
1095
end
1096
elseif
trace_bugs
then
1097
logwarning
(
"
%s: nothing preceding, case %i
"
,
pref
(
dataset
,
sequence
)
,
1
)
1098
end
1099
elseif
trace_bugs
then
1100
logwarning
(
"
%s: nothing preceding, case %i
"
,
pref
(
dataset
,
sequence
)
,
2
)
1101
end
1102
elseif
trace_bugs
then
1103
logwarning
(
"
%s: mark %s is no mark
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1104
end
1105
return
head
,
start
,
false
1106
end
1107 1108
function
handlers
.
gpos_mark2ligature
(
head
,
start
,
dataset
,
sequence
,
markanchors
,
rlmode
,
skiphash
)
1109
local
markchar
=
getchar
(
start
)
1110
if
marks
[
markchar
]
then
1111
local
base
=
getprev
(
start
)
-- [glyph] [optional marks] [start=mark]
1112
if
base
then
1113
local
basechar
=
ischar
(
base
,
currentfont
)
1114
if
basechar
then
1115
if
marks
[
basechar
]
then
1116
while
base
do
1117
base
=
getprev
(
base
)
1118
if
base
then
1119
basechar
=
ischar
(
base
,
currentfont
)
1120
if
basechar
then
1121
if
not
marks
[
basechar
]
then
1122
break
1123
end
1124
else
1125
if
trace_bugs
then
1126
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
1
)
1127
end
1128
return
head
,
start
,
false
1129
end
1130
else
1131
if
trace_bugs
then
1132
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
2
)
1133
end
1134
return
head
,
start
,
false
1135
end
1136
end
1137
end
1138
local
ba
=
markanchors
[
1
]
[
basechar
]
1139
if
ba
then
1140
local
ma
=
markanchors
[
2
]
1141
if
ma
then
1142
local
index
=
getligaindex
(
start
)
1143
ba
=
ba
[
index
]
1144
if
ba
then
1145
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
false
,
checkmarks
)
1146
if
trace_marks
then
1147
logprocess
(
"
%s, index %s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)
"
,
1148
pref
(
dataset
,
sequence
)
,
index
,
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
index
,
dx
,
dy
)
1149
end
1150
return
head
,
start
,
true
1151
else
1152
if
trace_bugs
then
1153
logwarning
(
"
%s: no matching anchors for mark %s and baselig %s with index %a
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
gref
(
basechar
)
,
index
)
1154
end
1155
end
1156
end
1157
elseif
trace_bugs
then
1158
-- logwarning("%s: char %s is missing in font",pref(dataset,sequence),gref(basechar))
1159
onetimemessage
(
currentfont
,
basechar
,
"
no base anchors
"
)
1160
end
1161
elseif
trace_bugs
then
1162
logwarning
(
"
%s: prev node is no char, case %i
"
,
pref
(
dataset
,
sequence
)
,
1
)
1163
end
1164
elseif
trace_bugs
then
1165
logwarning
(
"
%s: prev node is no char, case %i
"
,
pref
(
dataset
,
sequence
)
,
2
)
1166
end
1167
elseif
trace_bugs
then
1168
logwarning
(
"
%s: mark %s is no mark
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1169
end
1170
return
head
,
start
,
false
1171
end
1172 1173
function
handlers
.
gpos_mark2mark
(
head
,
start
,
dataset
,
sequence
,
markanchors
,
rlmode
,
skiphash
)
1174
local
markchar
=
getchar
(
start
)
1175
if
marks
[
markchar
]
then
1176
local
base
=
getprev
(
start
)
-- [glyph] [basemark] [start=mark]
1177
local
slc
=
getligaindex
(
start
)
1178
if
slc
then
-- a rather messy loop ... needs checking with husayni
1179
while
base
do
1180
local
blc
=
getligaindex
(
base
)
1181
if
blc
and
blc
~
=
slc
then
1182
base
=
getprev
(
base
)
1183
else
1184
break
1185
end
1186
end
1187
end
1188
if
base
then
1189
local
basechar
=
ischar
(
base
,
currentfont
)
1190
if
basechar
then
-- subtype test can go
1191
local
ba
=
markanchors
[
1
]
[
basechar
]
-- slot 1 has been made copy of the class hash
1192
if
ba
then
1193
local
ma
=
markanchors
[
2
]
1194
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
true
,
checkmarks
)
1195
if
trace_marks
then
1196
logprocess
(
"
%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)
"
,
1197
pref
(
dataset
,
sequence
)
,
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
dx
,
dy
)
1198
end
1199
return
head
,
start
,
true
1200
end
1201
end
1202
end
1203
elseif
trace_bugs
then
1204
logwarning
(
"
%s: mark %s is no mark
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1205
end
1206
return
head
,
start
,
false
1207
end
1208 1209
function
handlers
.
gpos_cursive
(
head
,
start
,
dataset
,
sequence
,
exitanchors
,
rlmode
,
skiphash
,
step
)
-- to be checked
1210
local
startchar
=
getchar
(
start
)
1211
if
marks
[
startchar
]
then
1212
if
trace_cursive
then
1213
logprocess
(
"
%s: ignoring cursive for mark %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
)
1214
end
1215
else
1216
local
nxt
=
getnext
(
start
)
1217
while
nxt
do
1218
local
nextchar
=
ischar
(
nxt
,
currentfont
)
1219
if
not
nextchar
then
1220
break
1221
elseif
marks
[
nextchar
]
then
-- always sequence.flags[1]
1222
nxt
=
getnext
(
nxt
)
1223
else
1224
local
exit
=
exitanchors
[
3
]
1225
if
exit
then
1226
local
entry
=
exitanchors
[
1
]
[
nextchar
]
1227
if
entry
then
1228
entry
=
entry
[
2
]
1229
if
entry
then
1230
local
r2lflag
=
sequence
.
flags
[
4
]
-- mentioned in the standard
1231
local
dx
,
dy
,
bound
=
setcursive
(
start
,
nxt
,
factor
,
rlmode
,
exit
,
entry
,
characters
[
startchar
]
,
characters
[
nextchar
]
,
r2lflag
)
1232
if
trace_cursive
then
1233
logprocess
(
"
%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
dx
,
dy
,
bound
,
mref
(
rlmode
)
)
1234
end
1235
return
head
,
start
,
true
1236
end
1237
end
1238
end
1239
break
1240
end
1241
end
1242
end
1243
return
head
,
start
,
false
1244
end
1245 1246
--[[ldx-- 1247<p>I will implement multiple chain replacements once I run into a font that uses 1248it. It's not that complex to handle.</p> 1249--ldx]]
--
1250 1251
local
chainprocs
=
{
}
1252 1253
local
function
logprocess
(
...
)
1254
if
trace_steps
then
1255
registermessage
(
...
)
1256
if
trace_steps
=
=
"
silent
"
then
1257
return
1258
end
1259
end
1260
report_subchain
(
...
)
1261
end
1262 1263
local
logwarning
=
report_subchain
1264 1265
local
function
logprocess
(
...
)
1266
if
trace_steps
then
1267
registermessage
(
...
)
1268
if
trace_steps
=
=
"
silent
"
then
1269
return
1270
end
1271
end
1272
report_chain
(
...
)
1273
end
1274 1275
local
logwarning
=
report_chain
1276 1277
-- We could share functions but that would lead to extra function calls with many
1278
-- arguments, redundant tests and confusing messages.
1279 1280
-- The reversesub is a special case, which is why we need to store the replacements
1281
-- in a bit weird way. There is no lookup and the replacement comes from the lookup
1282
-- itself. It is meant mostly for dealing with Urdu.
1283 1284
local
function
reversesub
(
head
,
start
,
stop
,
dataset
,
sequence
,
replacements
,
rlmode
,
skiphash
)
1285
local
char
=
getchar
(
start
)
1286
local
replacement
=
replacements
[
char
]
1287
if
replacement
then
1288
if
trace_singles
then
1289
logprocess
(
"
%s: single reverse replacement of %s by %s
"
,
cref
(
dataset
,
sequence
)
,
gref
(
char
)
,
gref
(
replacement
)
)
1290
end
1291
resetinjection
(
start
)
1292
setchar
(
start
,
replacement
)
1293
return
head
,
start
,
true
1294
else
1295
return
head
,
start
,
false
1296
end
1297
end
1298 1299 1300
chainprocs
.
reversesub
=
reversesub
1301 1302
--[[ldx-- 1303<p>This chain stuff is somewhat tricky since we can have a sequence of actions to be 1304applied: single, alternate, multiple or ligature where ligature can be an invalid 1305one in the sense that it will replace multiple by one but not neccessary one that 1306looks like the combination (i.e. it is the counterpart of multiple then). For 1307example, the following is valid:</p> 1308 1309<typing> 1310<line>xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx</line> 1311</typing> 1312 1313<p>Therefore we we don't really do the replacement here already unless we have the 1314single lookup case. The efficiency of the replacements can be improved by deleting 1315as less as needed but that would also make the code even more messy.</p> 1316--ldx]]
--
1317 1318
--[[ldx-- 1319<p>Here we replace start by a single variant.</p> 1320--ldx]]
--
1321 1322
-- To be done (example needed): what if > 1 steps
1323 1324
-- this is messy: do we need this disc checking also in alternaties?
1325 1326
local
function
reportzerosteps
(
dataset
,
sequence
)
1327
logwarning
(
"
%s: no steps
"
,
cref
(
dataset
,
sequence
)
)
1328
end
1329 1330
local
function
reportmoresteps
(
dataset
,
sequence
)
1331
logwarning
(
"
%s: more than 1 step
"
,
cref
(
dataset
,
sequence
)
)
1332
end
1333 1334
-- local function reportbadsteps(dataset,sequence)
1335
-- logwarning("%s: bad step, no proper return values",cref(dataset,sequence))
1336
-- end
1337 1338
local
function
getmapping
(
dataset
,
sequence
,
currentlookup
)
1339
local
steps
=
currentlookup
.
steps
1340
local
nofsteps
=
currentlookup
.
nofsteps
1341
if
nofsteps
=
=
0
then
1342
reportzerosteps
(
dataset
,
sequence
)
1343
currentlookup
.
mapping
=
false
1344
return
false
1345
else
1346
if
nofsteps
>
1
then
1347
reportmoresteps
(
dataset
,
sequence
)
1348
end
1349
local
mapping
=
steps
[
1
]
.
coverage
1350
currentlookup
.
mapping
=
mapping
1351
currentlookup
.
format
=
steps
[
1
]
.
format
1352
return
mapping
1353
end
1354
end
1355 1356
function
chainprocs
.
gsub_remove
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1357
if
trace_chains
then
1358
logprocess
(
"
%s: removing character %s
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
getchar
(
start
)
)
)
1359
end
1360
head
,
start
=
remove_node
(
head
,
start
,
true
)
1361
return
head
,
getprev
(
start
)
,
true
1362
end
1363 1364
function
chainprocs
.
gsub_single
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1365
local
mapping
=
currentlookup
.
mapping
1366
if
mapping
=
=
nil
then
1367
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1368
end
1369
if
mapping
then
1370
local
current
=
start
1371
while
current
do
1372
local
currentchar
=
ischar
(
current
)
1373
if
currentchar
then
1374
local
replacement
=
mapping
[
currentchar
]
1375
if
not
replacement
or
replacement
=
=
"
"
then
1376
if
trace_bugs
then
1377
logwarning
(
"
%s: no single for %s
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
currentchar
)
)
1378
end
1379
else
1380
if
trace_singles
then
1381
logprocess
(
"
%s: replacing single %s by %s
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
currentchar
)
,
gref
(
replacement
)
)
1382
end
1383
resetinjection
(
current
)
1384
setchar
(
current
,
replacement
)
1385
end
1386
return
head
,
start
,
true
1387
elseif
currentchar
=
=
false
then
1388
-- can't happen
1389
break
1390
elseif
current
=
=
stop
then
1391
break
1392
else
1393
current
=
getnext
(
current
)
1394
end
1395
end
1396
end
1397
return
head
,
start
,
false
1398
end
1399 1400
--[[ldx-- 1401<p>Here we replace start by new glyph. First we delete the rest of the match.</p> 1402--ldx]]
--
1403 1404
-- char_1 mark_1 -> char_x mark_1 (ignore marks)
1405
-- char_1 mark_1 -> char_x
1406 1407
-- to be checked: do we always have just one glyph?
1408
-- we can also have alternates for marks
1409
-- marks come last anyway
1410
-- are there cases where we need to delete the mark
1411 1412
function
chainprocs
.
gsub_alternate
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1413
local
mapping
=
currentlookup
.
mapping
1414
if
mapping
=
=
nil
then
1415
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1416
end
1417
if
mapping
then
1418
local
kind
=
dataset
[
4
]
1419
local
what
=
dataset
[
1
]
1420
local
value
=
what
=
=
true
and
tfmdata
.
shared
.
features
[
kind
]
or
what
-- todo: optimize in ctx
1421
local
current
=
start
1422
while
current
do
1423
local
currentchar
=
ischar
(
current
)
1424
if
currentchar
then
1425
local
alternatives
=
mapping
[
currentchar
]
1426
if
alternatives
then
1427
local
choice
,
comment
=
get_alternative_glyph
(
current
,
alternatives
,
value
)
1428
if
choice
then
1429
if
trace_alternatives
then
1430
logprocess
(
"
%s: replacing %s by alternative %a to %s, %s
"
,
cref
(
dataset
,
sequence
)
,
gref
(
currentchar
)
,
choice
,
gref
(
choice
)
,
comment
)
1431
end
1432
resetinjection
(
start
)
1433
setchar
(
start
,
choice
)
1434
else
1435
if
trace_alternatives
then
1436
logwarning
(
"
%s: no variant %a for %s, %s
"
,
cref
(
dataset
,
sequence
)
,
value
,
gref
(
currentchar
)
,
comment
)
1437
end
1438
end
1439
end
1440
return
head
,
start
,
true
1441
elseif
currentchar
=
=
false
then
1442
-- can't happen
1443
break
1444
elseif
current
=
=
stop
then
1445
break
1446
else
1447
current
=
getnext
(
current
)
1448
end
1449
end
1450
end
1451
return
head
,
start
,
false
1452
end
1453 1454
--[[ldx-- 1455<p>Here we replace start by a sequence of new glyphs.</p> 1456--ldx]]
--
1457 1458
function
chainprocs
.
gsub_multiple
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1459
local
mapping
=
currentlookup
.
mapping
1460
if
mapping
=
=
nil
then
1461
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1462
end
1463
if
mapping
then
1464
local
startchar
=
getchar
(
start
)
1465
local
replacement
=
mapping
[
startchar
]
1466
if
not
replacement
or
replacement
=
=
"
"
then
1467
if
trace_bugs
then
1468
logwarning
(
"
%s: no multiple for %s
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
)
1469
end
1470
else
1471
if
trace_multiples
then
1472
logprocess
(
"
%s: replacing %s by multiple characters %s
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
replacement
)
)
1473
end
1474
return
multiple_glyphs
(
head
,
start
,
replacement
,
skiphash
,
dataset
[
1
]
,
stop
)
1475
end
1476
end
1477
return
head
,
start
,
false
1478
end
1479 1480
--[[ldx-- 1481<p>When we replace ligatures we use a helper that handles the marks. I might change 1482this function (move code inline and handle the marks by a separate function). We 1483assume rather stupid ligatures (no complex disc nodes).</p> 1484--ldx]]
--
1485 1486
-- compare to handlers.gsub_ligature which is more complex ... why
1487 1488
function
chainprocs
.
gsub_ligature
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1489
local
mapping
=
currentlookup
.
mapping
1490
if
mapping
=
=
nil
then
1491
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1492
end
1493
if
mapping
then
1494
local
startchar
=
getchar
(
start
)
1495
local
ligatures
=
mapping
[
startchar
]
1496
if
not
ligatures
then
1497
if
trace_bugs
then
1498
logwarning
(
"
%s: no ligatures starting with %s
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
startchar
)
)
1499
end
1500
else
1501
local
hasmarks
=
marks
[
startchar
]
1502
local
current
=
getnext
(
start
)
1503
local
discfound
=
false
1504
local
last
=
stop
1505
local
nofreplacements
=
1
1506
while
current
do
1507
-- todo: ischar ... can there really be disc nodes here?
1508
local
id
=
getid
(
current
)
1509
if
id
=
=
disc_code
then
1510
if
not
discfound
then
1511
discfound
=
current
1512
end
1513
if
current
=
=
stop
then
1514
break
-- okay? or before the disc
1515
else
1516
current
=
getnext
(
current
)
1517
end
1518
else
1519
local
schar
=
getchar
(
current
)
1520
if
skiphash
and
skiphash
[
schar
]
then
-- marks
1521
-- if current == stop then -- maybe add this
1522
-- break
1523
-- else
1524
current
=
getnext
(
current
)
1525
-- end
1526
else
1527
local
lg
=
not
tonumber
(
ligatures
)
and
ligatures
[
schar
]
1528
if
lg
then
1529
ligatures
=
lg
1530
last
=
current
1531
nofreplacements
=
nofreplacements
+
1
1532
if
marks
[
char
]
then
1533
hasmarks
=
true
1534
end
1535
if
current
=
=
stop
then
1536
break
1537
else
1538
current
=
getnext
(
current
)
1539
end
1540
else
1541
break
1542
end
1543
end
1544
end
1545
end
1546
local
ligature
=
tonumber
(
ligatures
)
or
ligatures
.
ligature
1547
if
ligature
then
1548
if
chainindex
then
1549
stop
=
last
1550
end
1551
if
trace_ligatures
then
1552
if
start
=
=
stop
then
1553
logprocess
(
"
%s: replacing character %s by ligature %s case 3
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
startchar
)
,
gref
(
ligature
)
)
1554
else
1555
logprocess
(
"
%s: replacing character %s upto %s by ligature %s case 4
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
startchar
)
,
gref
(
getchar
(
stop
)
)
,
gref
(
ligature
)
)
1556
end
1557
end
1558
head
,
start
=
toligature
(
head
,
start
,
stop
,
ligature
,
dataset
,
sequence
,
skiphash
,
discfound
,
hasmarks
)
1559
return
head
,
start
,
true
,
nofreplacements
,
discfound
1560
elseif
trace_bugs
then
1561
if
start
=
=
stop
then
1562
logwarning
(
"
%s: replacing character %s by ligature fails
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
startchar
)
)
1563
else
1564
logwarning
(
"
%s: replacing character %s upto %s by ligature fails
"
,
cref
(
dataset
,
sequence
,
chainindex
)
,
gref
(
startchar
)
,
gref
(
getchar
(
stop
)
)
)
1565
end
1566
end
1567
end
1568
end
1569
return
head
,
start
,
false
,
0
,
false
1570
end
1571 1572
function
chainprocs
.
gpos_single
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1573
-- we actually should check no_left_kern_code with next
1574
if
not
hasglyphoption
(
start
,
no_right_kern_code
)
then
1575
local
mapping
=
currentlookup
.
mapping
1576
if
mapping
=
=
nil
then
1577
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1578
end
1579
if
mapping
then
1580
local
startchar
=
getchar
(
start
)
1581
local
kerns
=
mapping
[
startchar
]
1582
if
kerns
then
1583
local
format
=
currentlookup
.
format
1584
if
format
=
=
"
single
"
then
1585
local
dx
,
dy
,
w
,
h
=
setposition
(
0
,
start
,
factor
,
rlmode
,
kerns
)
-- currentlookup.flags ?
1586
if
trace_kerns
then
1587
logprocess
(
"
%s: shifting single %s by %s (%p,%p) and correction (%p,%p)
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
format
,
dx
,
dy
,
w
,
h
)
1588
end
1589
else
-- needs checking .. maybe no kerns format for single
1590
local
k
=
(
format
=
=
"
move
"
and
setmove
or
setkern
)
(
start
,
factor
,
rlmode
,
kerns
,
injection
)
1591
if
trace_kerns
then
1592
logprocess
(
"
%s: shifting single %s by %s %p
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
format
,
k
)
1593
end
1594
end
1595
return
head
,
start
,
true
1596
end
1597
end
1598
end
1599
return
head
,
start
,
false
1600
end
1601 1602
function
chainprocs
.
gpos_pair
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
-- todo: injections ?
1603
-- we actually should check no_left_kern_code with next
1604
if
not
hasglyphoption
(
start
,
no_right_kern_code
)
then
1605
local
mapping
=
currentlookup
.
mapping
1606
if
mapping
=
=
nil
then
1607
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1608
end
1609
if
mapping
then
1610
local
snext
=
getnext
(
start
)
1611
if
snext
then
1612
local
startchar
=
getchar
(
start
)
1613
local
kerns
=
mapping
[
startchar
]
-- always 1 step
1614
if
kerns
then
1615
local
prev
=
start
1616
while
snext
do
1617
local
nextchar
=
ischar
(
snext
,
currentfont
)
1618
if
not
nextchar
then
1619
break
1620
end
1621
if
skiphash
and
skiphash
[
nextchar
]
then
1622
prev
=
snext
1623
snext
=
getnext
(
snext
)
1624
else
1625
local
krn
=
kerns
[
nextchar
]
1626
if
not
krn
then
1627
break
1628
end
1629
local
format
=
currentlookup
.
format
1630
if
format
=
=
"
pair
"
then
1631
local
a
=
krn
[
1
]
1632
local
b
=
krn
[
2
]
1633
if
a
=
=
true
then
1634
-- zero
1635
elseif
a
then
1636
local
x
,
y
,
w
,
h
=
setposition
(
1
,
start
,
factor
,
rlmode
,
a
,
"
injections
"
)
-- currentlookups flags?
1637
if
trace_kerns
then
1638
local
startchar
=
getchar
(
start
)
1639
logprocess
(
"
%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
x
,
y
,
w
,
h
)
1640
end
1641
end
1642
if
b
=
=
true
then
1643
-- zero
1644
start
=
snext
-- cf spec
1645
elseif
b
then
-- #b > 0
1646
local
x
,
y
,
w
,
h
=
setposition
(
2
,
snext
,
factor
,
rlmode
,
b
,
"
injections
"
)
1647
if
trace_kerns
then
1648
local
startchar
=
getchar
(
start
)
1649
logprocess
(
"
%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)
"
,
cref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
x
,
y
,
w
,
h
)
1650
end
1651
start
=
snext
-- cf spec
1652
elseif
forcepairadvance
then
1653
start
=
snext
-- for testing, not cf spec
1654
end
1655
return
head
,
start
,
true
1656
elseif
krn
~
=
0
then
1657
local
k
=
(
format
=
=
"
move
"
and
setmove
or
setkern
)
(
snext
,
factor
,
rlmode
,
krn
)
1658
if
trace_kerns
then
1659
logprocess
(
"
%s: inserting %s %p between %s and %s
"
,
cref
(
dataset
,
sequence
)
,
format
,
k
,
gref
(
getchar
(
prev
)
)
,
gref
(
nextchar
)
)
1660
end
1661
return
head
,
start
,
true
1662
else
1663
break
1664
end
1665
end
1666
end
1667
end
1668
end
1669
end
1670
end
1671
return
head
,
start
,
false
1672
end
1673 1674
function
chainprocs
.
gpos_mark2base
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1675
local
mapping
=
currentlookup
.
mapping
1676
if
mapping
=
=
nil
then
1677
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1678
end
1679
if
mapping
then
1680
local
markchar
=
getchar
(
start
)
1681
if
marks
[
markchar
]
then
1682
local
markanchors
=
mapping
[
markchar
]
-- always 1 step
1683
if
markanchors
then
1684
local
base
=
getprev
(
start
)
-- [glyph] [start=mark]
1685
if
base
then
1686
local
basechar
=
ischar
(
base
,
currentfont
)
1687
if
basechar
then
1688
if
marks
[
basechar
]
then
1689
while
base
do
1690
base
=
getprev
(
base
)
1691
if
base
then
1692
local
basechar
=
ischar
(
base
,
currentfont
)
1693
if
basechar
then
1694
if
not
marks
[
basechar
]
then
1695
break
1696
end
1697
else
1698
if
trace_bugs
then
1699
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
1
)
1700
end
1701
return
head
,
start
,
false
1702
end
1703
else
1704
if
trace_bugs
then
1705
logwarning
(
"
%s: no base for mark %s, case %i
"
,
pref
(
dataset
,
sequence
)
,
gref
(
markchar
)
,
2
)
1706
end
1707
return
head
,
start
,
false
1708
end
1709
end
1710
end
1711
local
ba
=
markanchors
[
1
]
[
basechar
]
1712
if
ba
then
1713
local
ma
=
markanchors
[
2
]
1714
if
ma
then
1715
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
false
,
checkmarks
)
1716
if
trace_marks
then
1717
logprocess
(
"
%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)
"
,
1718
cref
(
dataset
,
sequence
)
,
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
dx
,
dy
)
1719
end
1720
return
head
,
start
,
true
1721
end
1722
end
1723
elseif
trace_bugs
then
1724
logwarning
(
"
%s: prev node is no char, case %i
"
,
cref
(
dataset
,
sequence
)
,
1
)
1725
end
1726
elseif
trace_bugs
then
1727
logwarning
(
"
%s: prev node is no char, case %i
"
,
cref
(
dataset
,
sequence
)
,
2
)
1728
end
1729
elseif
trace_bugs
then
1730
logwarning
(
"
%s: mark %s has no anchors
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1731
end
1732
elseif
trace_bugs
then
1733
logwarning
(
"
%s: mark %s is no mark
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1734
end
1735
end
1736
return
head
,
start
,
false
1737
end
1738 1739
function
chainprocs
.
gpos_mark2ligature
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1740
local
mapping
=
currentlookup
.
mapping
1741
if
mapping
=
=
nil
then
1742
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1743
end
1744
if
mapping
then
1745
local
markchar
=
getchar
(
start
)
1746
if
marks
[
markchar
]
then
1747
local
markanchors
=
mapping
[
markchar
]
-- always 1 step
1748
if
markanchors
then
1749
local
base
=
getprev
(
start
)
-- [glyph] [optional marks] [start=mark]
1750
if
base
then
1751
local
basechar
=
ischar
(
base
,
currentfont
)
1752
if
basechar
then
1753
if
marks
[
basechar
]
then
1754
while
base
do
1755
base
=
getprev
(
base
)
1756
if
base
then
1757
local
basechar
=
ischar
(
base
,
currentfont
)
1758
if
basechar
then
1759
if
not
marks
[
basechar
]
then
1760
break
1761
end
1762
else
1763
if
trace_bugs
then
1764
logwarning
(
"
%s: no base for mark %s, case %i
"
,
cref
(
dataset
,
sequence
)
,
markchar
,
1
)
1765
end
1766
return
head
,
start
,
false
1767
end
1768
else
1769
if
trace_bugs
then
1770
logwarning
(
"
%s: no base for mark %s, case %i
"
,
cref
(
dataset
,
sequence
)
,
markchar
,
2
)
1771
end
1772
return
head
,
start
,
false
1773
end
1774
end
1775
end
1776
local
ba
=
markanchors
[
1
]
[
basechar
]
1777
if
ba
then
1778
local
ma
=
markanchors
[
2
]
1779
if
ma
then
1780
local
index
=
getligaindex
(
start
)
1781
ba
=
ba
[
index
]
1782
if
ba
then
1783
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
false
,
checkmarks
)
1784
if
trace_marks
then
1785
logprocess
(
"
%s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)
"
,
1786
cref
(
dataset
,
sequence
)
,
a
or
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
index
,
dx
,
dy
)
1787
end
1788
return
head
,
start
,
true
1789
end
1790
end
1791
end
1792
elseif
trace_bugs
then
1793
logwarning
(
"
%s, prev node is no char, case %i
"
,
cref
(
dataset
,
sequence
)
,
1
)
1794
end
1795
elseif
trace_bugs
then
1796
logwarning
(
"
%s, prev node is no char, case %i
"
,
cref
(
dataset
,
sequence
)
,
2
)
1797
end
1798
elseif
trace_bugs
then
1799
logwarning
(
"
%s, mark %s has no anchors
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1800
end
1801
elseif
trace_bugs
then
1802
logwarning
(
"
%s, mark %s is no mark
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1803
end
1804
end
1805
return
head
,
start
,
false
1806
end
1807 1808
function
chainprocs
.
gpos_mark2mark
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1809
local
mapping
=
currentlookup
.
mapping
1810
if
mapping
=
=
nil
then
1811
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1812
end
1813
if
mapping
then
1814
local
markchar
=
getchar
(
start
)
1815
if
marks
[
markchar
]
then
1816
local
markanchors
=
mapping
[
markchar
]
-- always 1 step
1817
if
markanchors
then
1818
local
base
=
getprev
(
start
)
-- [glyph] [basemark] [start=mark]
1819
local
slc
=
getligaindex
(
start
)
1820
if
slc
then
-- a rather messy loop ... needs checking with husayni
1821
while
base
do
1822
local
blc
=
getligaindex
(
base
)
1823
if
blc
and
blc
~
=
slc
then
1824
base
=
getprev
(
base
)
1825
else
1826
break
1827
end
1828
end
1829
end
1830
if
base
then
-- subtype test can go
1831
local
basechar
=
ischar
(
base
,
currentfont
)
1832
if
basechar
then
1833
local
ba
=
markanchors
[
1
]
[
basechar
]
1834
if
ba
then
1835
local
ma
=
markanchors
[
2
]
1836
if
ma
then
1837
local
dx
,
dy
,
bound
=
setmark
(
start
,
base
,
factor
,
rlmode
,
ba
,
ma
,
characters
[
basechar
]
,
true
,
checkmarks
)
1838
if
trace_marks
then
1839
logprocess
(
"
%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)
"
,
1840
cref
(
dataset
,
sequence
)
,
bound
,
gref
(
markchar
)
,
gref
(
basechar
)
,
dx
,
dy
)
1841
end
1842
return
head
,
start
,
true
1843
end
1844
end
1845
elseif
trace_bugs
then
1846
logwarning
(
"
%s: prev node is no mark, case %i
"
,
cref
(
dataset
,
sequence
)
,
1
)
1847
end
1848
elseif
trace_bugs
then
1849
logwarning
(
"
%s: prev node is no mark, case %i
"
,
cref
(
dataset
,
sequence
)
,
2
)
1850
end
1851
elseif
trace_bugs
then
1852
logwarning
(
"
%s: mark %s has no anchors
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1853
end
1854
elseif
trace_bugs
then
1855
logwarning
(
"
%s: mark %s is no mark
"
,
cref
(
dataset
,
sequence
)
,
gref
(
markchar
)
)
1856
end
1857
end
1858
return
head
,
start
,
false
1859
end
1860 1861
function
chainprocs
.
gpos_cursive
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
,
chainindex
)
1862
local
mapping
=
currentlookup
.
mapping
1863
if
mapping
=
=
nil
then
1864
mapping
=
getmapping
(
dataset
,
sequence
,
currentlookup
)
1865
end
1866
if
mapping
then
1867
local
startchar
=
getchar
(
start
)
1868
local
exitanchors
=
mapping
[
startchar
]
-- always 1 step
1869
if
exitanchors
then
1870
if
marks
[
startchar
]
then
1871
if
trace_cursive
then
1872
logprocess
(
"
%s: ignoring cursive for mark %s
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
)
1873
end
1874
else
1875
local
nxt
=
getnext
(
start
)
1876
while
nxt
do
1877
local
nextchar
=
ischar
(
nxt
,
currentfont
)
1878
if
not
nextchar
then
1879
break
1880
elseif
marks
[
nextchar
]
then
1881
-- should not happen (maybe warning)
1882
nxt
=
getnext
(
nxt
)
1883
else
1884
local
exit
=
exitanchors
[
3
]
1885
if
exit
then
1886
local
entry
=
exitanchors
[
1
]
[
nextchar
]
1887
if
entry
then
1888
entry
=
entry
[
2
]
1889
if
entry
then
1890
local
r2lflag
=
sequence
.
flags
[
4
]
-- mentioned in the standard
1891
local
dx
,
dy
,
bound
=
setcursive
(
start
,
nxt
,
factor
,
rlmode
,
exit
,
entry
,
characters
[
startchar
]
,
characters
[
nextchar
]
,
r2lflag
)
1892
if
trace_cursive
then
1893
logprocess
(
"
%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode
"
,
pref
(
dataset
,
sequence
)
,
gref
(
startchar
)
,
gref
(
nextchar
)
,
dx
,
dy
,
bound
,
mref
(
rlmode
)
)
1894
end
1895
return
head
,
start
,
true
1896
end
1897
end
1898
elseif
trace_bugs
then
1899
onetimemessage
(
currentfont
,
startchar
,
"
no entry anchors
"
)
1900
end
1901
break
1902
end
1903
end
1904
end
1905
elseif
trace_cursive
and
trace_details
then
1906
logprocess
(
"
%s, cursive %s is already done
"
,
pref
(
dataset
,
sequence
)
,
gref
(
getchar
(
start
)
)
,
alreadydone
)
1907
end
1908
end
1909
return
head
,
start
,
false
1910
end
1911 1912
-- what pointer to return, spec says stop
1913
-- to be discussed ... is bidi changer a space?
1914
-- elseif char == zwnj and sequence[n][32] then -- brrr
1915 1916
local
function
show_skip
(
dataset
,
sequence
,
char
,
ck
,
class
)
1917
logwarning
(
"
%s: skipping char %s, class %a, rule %a, lookuptype %a
"
,
cref
(
dataset
,
sequence
)
,
gref
(
char
)
,
class
,
ck
[
1
]
,
ck
[
8
]
or
ck
[
2
]
)
1918
end
1919 1920
-- A previous version had disc collapsing code in the (single sub) handler plus some
1921
-- checking in the main loop, but that left the pre/post sequences undone. The best
1922
-- solution is to add some checking there and backtrack when a replace/post matches
1923
-- but it takes a bit of work to figure out an efficient way (this is what the
1924
-- sweep* names refer to). I might look into that variant one day again as it can
1925
-- replace some other code too. In that approach we can have a special version for
1926
-- gub and pos which gains some speed. This method does the test and passes info to
1927
-- the handlers. Here collapsing is handled in the main loop which also makes code
1928
-- elsewhere simpler (i.e. no need for the other special runners and disc code in
1929
-- ligature building). I also experimented with pushing preceding glyphs sequences
1930
-- in the replace/pre fields beforehand which saves checking afterwards but at the
1931
-- cost of duplicate glyphs (memory) but it's too much overhead (runtime).
1932
--
1933
-- In the meantime Kai had moved the code from the single chain into a more general
1934
-- handler and this one (renamed to chaindisk) is used now. I optimized the code a
1935
-- bit and brought it in sycn with the other code. Hopefully I didn't introduce
1936
-- errors. Note: this somewhat complex approach is meant for fonts that implement
1937
-- (for instance) ligatures by character replacement which to some extend is not
1938
-- that suitable for hyphenation. I also use some helpers. This method passes some
1939
-- states but reparses the list. There is room for a bit of speed up but that will
1940
-- be done in the context version. (In fact a partial rewrite of all code can bring
1941
-- some more efficiency.)
1942
--
1943
-- I didn't test it with extremes but successive disc nodes still can give issues
1944
-- but in order to handle that we need more complex code which also slows down even
1945
-- more. The main loop variant could deal with that: test, collapse, backtrack.
1946 1947
local
userkern
=
nuts
.
pool
and
nuts
.
pool
.
newkern
-- context
1948 1949
do
if
not
userkern
then
-- generic
1950 1951
local
thekern
=
nuts
.
new
(
"
kern
"
,
1
)
-- userkern
1952
local
setkern
=
nuts
.
setkern
-- not injections.setkern
1953 1954
userkern
=
function
(
k
)
1955
local
n
=
copy_node
(
thekern
)
1956
setkern
(
n
,
k
)
1957
return
n
1958
end
1959 1960
end
end
1961 1962
local
function
checked
(
head
)
1963
local
current
=
head
1964
while
current
do
1965
if
getid
(
current
)
=
=
glue_code
then
1966
local
kern
=
userkern
(
getwidth
(
current
)
)
1967
if
head
=
=
current
then
1968
local
next
=
getnext
(
current
)
1969
if
next
then
1970
setlink
(
kern
,
next
)
1971
end
1972
flushnode
(
current
)
1973
head
=
kern
1974
current
=
next
1975
else
1976
local
prev
,
next
=
getboth
(
current
)
1977
setlink
(
prev
,
kern
,
next
)
1978
flushnode
(
current
)
1979
current
=
next
1980
end
1981
else
1982
current
=
getnext
(
current
)
1983
end
1984
end
1985
return
head
1986
end
1987 1988
local
function
setdiscchecked
(
d
,
pre
,
post
,
replace
)
1989
if
pre
then
pre
=
checked
(
pre
)
end
1990
if
post
then
post
=
checked
(
post
)
end
1991
if
replace
then
replace
=
checked
(
replace
)
end
1992
setdisc
(
d
,
pre
,
post
,
replace
)
1993
end
1994 1995
local
noflags
=
{
false
,
false
,
false
,
false
}
1996 1997
local
function
chainrun
(
head
,
start
,
last
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
1998 1999
local
size
=
ck
[
5
]
-
ck
[
4
]
+
1
2000
local
chainlookups
=
ck
[
6
]
2001
local
done
=
false
2002 2003
-- current match
2004
if
chainlookups
then
2005
-- Lookups can be like { 1, false, 3 } or { false, 2 } or basically anything and
2006
-- #lookups can be less than #current
2007 2008
if
size
=
=
1
then
2009 2010
-- if nofchainlookups > size then
2011
-- -- bad rules
2012
-- end
2013 2014
local
chainlookup
=
chainlookups
[
1
]
2015
for
j
=
1
,
#
chainlookup
do
2016
local
chainstep
=
chainlookup
[
j
]
2017
local
chainkind
=
chainstep
.
type
2018
local
chainproc
=
chainprocs
[
chainkind
]
2019
if
chainproc
then
2020
local
ok
2021
-- HH: chainindex 1 added here (for KAI to check too), there are weird ligatures e.g.
2022
-- char + mark -> char where mark has to disappear
2023
head
,
start
,
ok
=
chainproc
(
head
,
start
,
last
,
dataset
,
sequence
,
chainstep
,
rlmode
,
skiphash
,
1
)
2024
if
ok
then
2025
done
=
true
2026
end
2027
else
2028
logprocess
(
"
%s: %s is not yet supported (1)
"
,
cref
(
dataset
,
sequence
)
,
chainkind
)
2029
end
2030
end
2031 2032
else
2033 2034
-- See LookupType 5: Contextual Substitution Subtable. Now it becomes messy. The
2035
-- easiest case is where #current maps on #lookups i.e. one-to-one. But what if
2036
-- we have a ligature. Cf the spec we then need to advance one character but we
2037
-- really need to test it as there are fonts out there that are fuzzy and have
2038
-- too many lookups:
2039
--
2040
-- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes
2041
--
2042
-- Even worse are these family emoji shapes as they can have multiple lookups
2043
-- per slot (probably only for gpos).
2044 2045
-- It's very unlikely that we will have skip classes here but still ... we seldom
2046
-- enter this branch anyway.
2047 2048
local
i
=
1
2049
local
laststart
=
start
2050
local
nofchainlookups
=
#
chainlookups
-- useful?
2051
while
start
do
2052
if
skiphash
then
-- hm, so we know we skip some
2053
while
start
do
2054
local
char
=
ischar
(
start
,
currentfont
)
2055
if
char
then
2056
if
skiphash
and
skiphash
[
char
]
then
2057
start
=
getnext
(
start
)
2058
else
2059
break
2060
end
2061
else
2062
break
2063
end
2064
end
2065
end
2066
local
chainlookup
=
chainlookups
[
i
]
2067
if
chainlookup
then
2068
for
j
=
1
,
#
chainlookup
do
2069
local
chainstep
=
chainlookup
[
j
]
2070
local
chainkind
=
chainstep
.
type
2071
local
chainproc
=
chainprocs
[
chainkind
]
2072
if
chainproc
then
2073
local
ok
,
n
2074
head
,
start
,
ok
,
n
=
chainproc
(
head
,
start
,
last
,
dataset
,
sequence
,
chainstep
,
rlmode
,
skiphash
,
i
)
2075
-- messy since last can be changed !
2076
if
ok
then
2077
done
=
true
2078
if
n
and
n
>
1
and
i
+
n
>
nofchainlookups
then
2079
-- this is a safeguard, we just ignore the rest of the lookups
2080
i
=
size
-- prevents an advance
2081
break
2082
end
2083
end
2084
else
2085
-- actually an error
2086
logprocess
(
"
%s: %s is not yet supported (2)
"
,
cref
(
dataset
,
sequence
)
,
chainkind
)
2087
end
2088
end
2089
else
2090
-- we skip but we could also delete as option .. what does an empty lookup actually mean
2091
-- in opentype ... anyway, we could map it onto gsub_remove if needed
2092
end
2093
i
=
i
+
1
2094
if
i
>
size
or
not
start
then
2095
break
2096
elseif
start
then
2097
laststart
=
start
2098
start
=
getnext
(
start
)
2099
end
2100
end
2101
if
not
start
then
2102
start
=
laststart
2103
end
2104 2105
end
2106
else
2107
-- todo: needs checking for holes in the replacements
2108
local
replacements
=
ck
[
7
]
2109
if
replacements
then
2110
head
,
start
,
done
=
reversesub
(
head
,
start
,
last
,
dataset
,
sequence
,
replacements
,
rlmode
,
skiphash
)
2111
else
2112
done
=
true
2113
if
trace_contexts
then
2114
logprocess
(
"
%s: skipping match
"
,
cref
(
dataset
,
sequence
)
)
2115
end
2116
end
2117
end
2118
return
head
,
start
,
done
2119
end
2120 2121
local
function
chaindisk
(
head
,
start
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2122 2123
if
not
start
then
2124
return
head
,
start
,
false
2125
end
2126 2127
local
startishead
=
start
=
=
head
2128
local
seq
=
ck
[
3
]
2129
local
f
=
ck
[
4
]
2130
local
l
=
ck
[
5
]
2131
local
s
=
#
seq
2132
local
done
=
false
2133
local
sweepnode
=
sweepnode
2134
local
sweeptype
=
sweeptype
2135
local
sweepoverflow
=
false
2136
local
checkdisc
=
getprev
(
head
)
2137
local
keepdisc
=
not
sweepnode
2138
local
lookaheaddisc
=
nil
2139
local
backtrackdisc
=
nil
2140
local
current
=
start
2141
local
last
=
start
2142
local
prev
=
getprev
(
start
)
2143
local
hasglue
=
false
2144 2145
-- fishy: so we can overflow and then go on in the sweep?
2146
-- todo : id can also be glue_code as we checked spaces
2147 2148
local
i
=
f
2149
while
i
<
=
l
do
2150
local
id
=
getid
(
current
)
2151
if
id
=
=
glyph_code
then
2152
i
=
i
+
1
2153
last
=
current
2154
current
=
getnext
(
current
)
2155
elseif
id
=
=
glue_code
then
2156
i
=
i
+
1
2157
last
=
current
2158
current
=
getnext
(
current
)
2159
hasglue
=
true
2160
elseif
id
=
=
disc_code
then
2161
if
keepdisc
then
2162
keepdisc
=
false
2163
lookaheaddisc
=
current
2164
local
replace
=
getreplace
(
current
)
2165
if
not
replace
then
2166
sweepoverflow
=
true
2167
sweepnode
=
current
2168
current
=
getnext
(
current
)
2169
else
2170
-- we can use an iterator
2171
while
replace
and
i
<
=
l
do
2172
if
getid
(
replace
)
=
=
glyph_code
then
2173
i
=
i
+
1
2174
end
2175
replace
=
getnext
(
replace
)
2176
end
2177
current
=
getnext
(
replace
)
2178
end
2179
last
=
current
2180
else
2181
head
,
current
=
flattendisk
(
head
,
current
)
2182
end
2183
else
2184
last
=
current
2185
current
=
getnext
(
current
)
2186
end
2187
if
current
then
2188
-- go on
2189
elseif
sweepoverflow
then
2190
-- we already are following up on sweepnode
2191
break
2192
elseif
sweeptype
=
=
"
post
"
or
sweeptype
=
=
"
replace
"
then
2193
current
=
getnext
(
sweepnode
)
2194
if
current
then
2195
sweeptype
=
nil
2196
sweepoverflow
=
true
2197
else
2198
break
2199
end
2200
else
2201
break
-- added
2202
end
2203
end
2204 2205
if
sweepoverflow
then
2206
local
prev
=
current
and
getprev
(
current
)
2207
if
not
current
or
prev
~
=
sweepnode
then
2208
local
head
=
getnext
(
sweepnode
)
2209
local
tail
=
nil
2210
if
prev
then
2211
tail
=
prev
2212
setprev
(
current
,
sweepnode
)
2213
else
2214
tail
=
find_node_tail
(
head
)
2215
end
2216
setnext
(
sweepnode
,
current
)
2217
setprev
(
head
)
2218
setnext
(
tail
)
2219
appenddisc
(
sweepnode
,
head
)
2220
end
2221
end
2222 2223
if
l
<
s
then
2224
local
i
=
l
2225
local
t
=
sweeptype
=
=
"
post
"
or
sweeptype
=
=
"
replace
"
2226
while
current
and
i
<
s
do
2227
local
id
=
getid
(
current
)
2228
if
id
=
=
glyph_code
then
2229
i
=
i
+
1
2230
current
=
getnext
(
current
)
2231
elseif
id
=
=
glue_code
then
2232
i
=
i
+
1
2233
current
=
getnext
(
current
)
2234
hasglue
=
true
2235
elseif
id
=
=
disc_code
then
2236
if
keepdisc
then
2237
keepdisc
=
false
2238
if
notmatchpre
[
current
]
~
=
notmatchreplace
[
current
]
then
2239
lookaheaddisc
=
current
2240
end
2241
-- we assume a simple text only replace (we could use nuts.count)
2242
local
replace
=
getreplace
(
current
)
2243
while
replace
and
i
<
s
do
2244
if
getid
(
replace
)
=
=
glyph_code
then
2245
i
=
i
+
1
2246
end
2247
replace
=
getnext
(
replace
)
2248
end
2249
current
=
getnext
(
current
)
2250
elseif
notmatchpre
[
current
]
~
=
notmatchreplace
[
current
]
then
2251
head
,
current
=
flattendisk
(
head
,
current
)
2252
else
2253
current
=
getnext
(
current
)
-- HH
2254
end
2255
else
2256
current
=
getnext
(
current
)
2257
end
2258
if
not
current
and
t
then
2259
current
=
getnext
(
sweepnode
)
2260
if
current
then
2261
sweeptype
=
nil
2262
end
2263
end
2264
end
2265
end
2266 2267
if
f
>
1
then
2268
local
current
=
prev
2269
local
i
=
f
2270
local
t
=
sweeptype
=
=
"
pre
"
or
sweeptype
=
=
"
replace
"
2271
if
not
current
and
t
and
current
=
=
checkdisc
then
2272
current
=
getprev
(
sweepnode
)
2273
end
2274
while
current
and
i
>
1
do
-- missing getprev added / moved outside
2275
local
id
=
getid
(
current
)
2276
if
id
=
=
glyph_code
then
2277
i
=
i
-
1
2278
elseif
id
=
=
glue_code
then
2279
i
=
i
-
1
2280
hasglue
=
true
2281
elseif
id
=
=
disc_code
then
2282
if
keepdisc
then
2283
keepdisc
=
false
2284
if
notmatchpost
[
current
]
~
=
notmatchreplace
[
current
]
then
2285
backtrackdisc
=
current
2286
end
2287
-- we assume a simple text only replace (we could use nuts.count)
2288
local
replace
=
getreplace
(
current
)
2289
while
replace
and
i
>
1
do
2290
if
getid
(
replace
)
=
=
glyph_code
then
2291
i
=
i
-
1
2292
end
2293
replace
=
getnext
(
replace
)
2294
end
2295
elseif
notmatchpost
[
current
]
~
=
notmatchreplace
[
current
]
then
2296
head
,
current
=
flattendisk
(
head
,
current
)
2297
end
2298
end
2299
current
=
getprev
(
current
)
2300
if
t
and
current
=
=
checkdisc
then
2301
current
=
getprev
(
sweepnode
)
2302
end
2303
end
2304
end
2305
local
done
=
false
2306 2307
if
lookaheaddisc
then
2308 2309
local
cf
=
start
2310
local
cl
=
getprev
(
lookaheaddisc
)
2311
local
cprev
=
getprev
(
start
)
2312
local
insertedmarks
=
0
2313 2314
while
cprev
do
2315
local
char
=
ischar
(
cf
,
currentfont
)
2316
if
char
and
marks
[
char
]
then
2317
insertedmarks
=
insertedmarks
+
1
2318
cf
=
cprev
2319
startishead
=
cf
=
=
head
2320
cprev
=
getprev
(
cprev
)
2321
else
2322
break
2323
end
2324
end
2325
setlink
(
cprev
,
lookaheaddisc
)
2326
setprev
(
cf
)
2327
setnext
(
cl
)
2328
if
startishead
then
2329
head
=
lookaheaddisc
2330
end
2331
local
pre
,
post
,
replace
=
getdisc
(
lookaheaddisc
)
2332
local
new
=
copy_node_list
(
cf
)
-- br, how often does that happen
2333
local
cnew
=
new
2334
if
pre
then
2335
setlink
(
find_node_tail
(
cf
)
,
pre
)
2336
end
2337
if
replace
then
2338
local
tail
=
find_node_tail
(
new
)
2339
setlink
(
tail
,
replace
)
2340
end
2341
for
i
=
1
,
insertedmarks
do
2342
cnew
=
getnext
(
cnew
)
2343
end
2344
cl
=
start
2345
local
clast
=
cnew
2346
for
i
=
f
,
l
do
2347
cl
=
getnext
(
cl
)
2348
clast
=
getnext
(
clast
)
2349
end
2350
if
not
notmatchpre
[
lookaheaddisc
]
then
2351
local
ok
=
false
2352
cf
,
start
,
ok
=
chainrun
(
cf
,
start
,
cl
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2353
if
ok
then
2354
done
=
true
2355
end
2356
end
2357
if
not
notmatchreplace
[
lookaheaddisc
]
then
2358
local
ok
=
false
2359
new
,
cnew
,
ok
=
chainrun
(
new
,
cnew
,
clast
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2360
if
ok
then
2361
done
=
true
2362
end
2363
end
2364
if
hasglue
then
2365
setdiscchecked
(
lookaheaddisc
,
cf
,
post
,
new
)
2366
else
2367
setdisc
(
lookaheaddisc
,
cf
,
post
,
new
)
2368
end
2369
start
=
getprev
(
lookaheaddisc
)
2370
sweephead
[
cf
]
=
getnext
(
clast
)
or
false
2371
sweephead
[
new
]
=
getnext
(
cl
)
or
false
2372 2373
elseif
backtrackdisc
then
2374 2375
local
cf
=
getnext
(
backtrackdisc
)
2376
local
cl
=
start
2377
local
cnext
=
getnext
(
start
)
2378
local
insertedmarks
=
0
2379 2380
while
cnext
do
2381
local
char
=
ischar
(
cnext
,
currentfont
)
2382
if
char
and
marks
[
char
]
then
2383
insertedmarks
=
insertedmarks
+
1
2384
cl
=
cnext
2385
cnext
=
getnext
(
cnext
)
2386
else
2387
break
2388
end
2389
end
2390
setlink
(
backtrackdisc
,
cnext
)
2391
setprev
(
cf
)
2392
setnext
(
cl
)
2393
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
backtrackdisc
,
true
)
2394
local
new
=
copy_node_list
(
cf
)
2395
local
cnew
=
find_node_tail
(
new
)
2396
for
i
=
1
,
insertedmarks
do
2397
cnew
=
getprev
(
cnew
)
2398
end
2399
local
clast
=
cnew
2400
for
i
=
f
,
l
do
2401
clast
=
getnext
(
clast
)
2402
end
2403
if
not
notmatchpost
[
backtrackdisc
]
then
2404
local
ok
=
false
2405
cf
,
start
,
ok
=
chainrun
(
cf
,
start
,
last
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2406
if
ok
then
2407
done
=
true
2408
end
2409
end
2410
if
not
notmatchreplace
[
backtrackdisc
]
then
2411
local
ok
=
false
2412
new
,
cnew
,
ok
=
chainrun
(
new
,
cnew
,
clast
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2413
if
ok
then
2414
done
=
true
2415
end
2416
end
2417
if
post
then
2418
setlink
(
posttail
,
cf
)
2419
else
2420
post
=
cf
2421
end
2422
if
replace
then
2423
setlink
(
replacetail
,
new
)
2424
else
2425
replace
=
new
2426
end
2427
if
hasglue
then
2428
setdiscchecked
(
backtrackdisc
,
pre
,
post
,
replace
)
2429
else
2430
setdisc
(
backtrackdisc
,
pre
,
post
,
replace
)
2431
end
2432
start
=
getprev
(
backtrackdisc
)
2433
sweephead
[
post
]
=
getnext
(
clast
)
or
false
2434
sweephead
[
replace
]
=
getnext
(
last
)
or
false
2435 2436
else
2437 2438
local
ok
=
false
2439
head
,
start
,
ok
=
chainrun
(
head
,
start
,
last
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
)
2440
if
ok
then
2441
done
=
true
2442
end
2443 2444
end
2445 2446
return
head
,
start
,
done
2447
end
2448 2449
local
function
chaintrac
(
head
,
start
,
dataset
,
sequence
,
rlmode
,
skiphash
,
ck
,
match
,
discseen
,
sweepnode
)
2450
local
rule
=
ck
[
1
]
2451
local
lookuptype
=
ck
[
8
]
or
ck
[
2
]
2452
local
nofseq
=
#
ck
[
3
]
2453
local
first
=
ck
[
4
]
2454
local
last
=
ck
[
5
]
2455
local
char
=
getchar
(
start
)
2456
logwarning
(
"
%s: rule %s %s at char %s for (%s,%s,%s) chars, lookuptype %a, %sdisc seen, %ssweeping
"
,
2457
cref
(
dataset
,
sequence
)
,
rule
,
match
and
"
matches
"
or
"
nomatch
"
,
2458
gref
(
char
)
,
first
-1
,
last
-
first
+
1
,
nofseq
-
last
,
lookuptype
,
2459
discseen
and
"
"
or
"
no
"
,
sweepnode
and
"
"
or
"
not
"
)
2460
end
2461 2462
-- The next one is quite optimized but still somewhat slow, fonts like ebgaramond
2463
-- are real torture tests because they have many steps with one context (having
2464
-- multiple contexts makes more sense) also because we (can) reduce them. Instead of
2465
-- a match boolean variable and check for that I decided to use a goto with labels
2466
-- instead. This is one of the cases where it makes the code more readable and we
2467
-- might even gain a bit performance.
2468 2469
-- when we have less replacements (lookups) then current matches we can push too much into
2470
-- the previous disc .. such be it (<before><disc><current=fl><after> with only f done)
2471 2472
local
function
handle_contextchain
(
head
,
start
,
dataset
,
sequence
,
contexts
,
rlmode
,
skiphash
)
2473
-- optimizing for rlmode gains nothing
2474
local
sweepnode
=
sweepnode
2475
local
sweeptype
=
sweeptype
2476
local
postreplace
2477
local
prereplace
2478
local
checkdisc
2479
local
discseen
-- = false
2480
if
sweeptype
then
2481
if
sweeptype
=
=
"
replace
"
then
2482
postreplace
=
true
2483
prereplace
=
true
2484
else
2485
postreplace
=
sweeptype
=
=
"
post
"
2486
prereplace
=
sweeptype
=
=
"
pre
"
2487
end
2488
checkdisc
=
getprev
(
head
)
2489
end
2490
local
currentfont
=
currentfont
2491 2492
local
skipped
-- = false
2493 2494
local
startprev
,
2495
startnext
=
getboth
(
start
)
2496
local
done
-- = false
2497 2498
-- we can have multiple hits and as we scan (currently) all we need to check
2499
-- if we have a match ... contextchains have no real coverage table (with
2500
-- unique entries)
2501 2502
-- fonts can have many steps (each doing one check) or many contexts
2503 2504
-- todo: make a per-char cache so that we have small contexts (when we have a context
2505
-- n == 1 and otherwise it can be more so we can even distingish n == 1 or more)
2506 2507
local
nofcontexts
=
contexts
.
n
-- #contexts
2508 2509
local
startchar
=
nofcontext
=
=
1
or
ischar
(
start
,
currentfont
)
-- only needed in a chain
2510 2511
for
k
=
1
,
nofcontexts
do
-- does this disc mess work well with n > 1
2512 2513
local
ck
=
contexts
[
k
]
2514
local
seq
=
ck
[
3
]
2515
local
f
=
ck
[
4
]
-- first current
2516
local
last
=
start
2517
if
not
startchar
or
not
seq
[
f
]
[
startchar
]
then
2518
-- report("no hit in %a at %i of %i contexts",sequence.type,k,nofcontexts)
2519
goto
next
2520
end
2521
local
s
=
seq
.
n
-- or #seq
2522
if
s
=
=
1
then
2523
-- bit weird case: why use a chain, but it is a hit
2524
else
2525
local
l
=
ck
[
5
]
-- last current
2526
local
current
=
start
2527
-- local last = start
2528 2529
-- current match
2530 2531
if
l
>
f
then
2532
-- before/current/after | before/current | current/after
2533
local
discfound
-- = nil
2534
local
n
=
f
+
1
2535
last
=
startnext
-- the second in current (first already matched)
2536
while
n
<
=
l
do
2537
if
postreplace
and
not
last
then
2538
last
=
getnext
(
sweepnode
)
2539
sweeptype
=
nil
2540
end
2541
if
last
then
2542
local
char
,
id
=
ischar
(
last
,
currentfont
)
2543
if
char
then
2544
if
skiphash
and
skiphash
[
char
]
then
2545
skipped
=
true
2546
if
trace_skips
then
2547
show_skip
(
dataset
,
sequence
,
char
,
ck
,
classes
[
char
]
)
2548
end
2549
last
=
getnext
(
last
)
2550
elseif
seq
[
n
]
[
char
]
then
2551
if
n
<
l
then
2552
last
=
getnext
(
last
)
2553
end
2554
n
=
n
+
1
2555
elseif
discfound
then
2556
notmatchreplace
[
discfound
]
=
true
2557
if
notmatchpre
[
discfound
]
then
2558
goto
next
2559
else
2560
break
2561
end
2562
else
2563
goto
next
2564
end
2565
elseif
char
=
=
false
then
2566
if
discfound
then
2567
notmatchreplace
[
discfound
]
=
true
2568
if
notmatchpre
[
discfound
]
then
2569
goto
next
2570
else
2571
break
2572
end
2573
else
2574
goto
next
2575
end
2576
elseif
id
=
=
disc_code
then
2577
-- elseif id == disc_code and (not discs or discs[last]) then
2578
discseen
=
true
2579
discfound
=
last
2580
notmatchpre
[
last
]
=
nil
2581
notmatchpost
[
last
]
=
true
2582
notmatchreplace
[
last
]
=
nil
2583
local
pre
,
post
,
replace
=
getdisc
(
last
)
2584
if
pre
then
2585
local
n
=
n
2586
while
pre
do
2587
if
seq
[
n
]
[
getchar
(
pre
)
]
then
2588
n
=
n
+
1
2589
if
n
>
l
then
2590
break
2591
end
2592
pre
=
getnext
(
pre
)
2593
else
2594
notmatchpre
[
last
]
=
true
2595
break
2596
end
2597
end
2598
if
n
<
=
l
then
2599
notmatchpre
[
last
]
=
true
2600
end
2601
else
2602
notmatchpre
[
last
]
=
true
2603
end
2604
if
replace
then
2605
-- so far we never entered this branch
2606
while
replace
do
2607
if
seq
[
n
]
[
getchar
(
replace
)
]
then
2608
n
=
n
+
1
2609
if
n
>
l
then
2610
break
2611
end
2612
replace
=
getnext
(
replace
)
2613
else
2614
notmatchreplace
[
last
]
=
true
2615
if
notmatchpre
[
last
]
then
2616
goto
next
2617
else
2618
break
2619
end
2620
end
2621
end
2622
-- why here again
2623
if
notmatchpre
[
last
]
then
2624
goto
next
2625
end
2626
end
2627
-- maybe only if match
2628
last
=
getnext
(
last
)
2629
else
2630
goto
next
2631
end
2632
else
2633
goto
next
2634
end
2635
end
2636
end
2637 2638
-- before
2639 2640
if
f
>
1
then
2641
if
startprev
then
2642
local
prev
=
startprev
2643
if
prereplace
and
prev
=
=
checkdisc
then
2644
prev
=
getprev
(
sweepnode
)
2645
end
2646
if
prev
then
2647
local
discfound
-- = nil
2648
local
n
=
f
-
1
2649
while
n
>
=
1
do
2650
if
prev
then
2651
local
char
,
id
=
ischar
(
prev
,
currentfont
)
2652
if
char
then
2653
if
skiphash
and
skiphash
[
char
]
then
2654
skipped
=
true
2655
if
trace_skips
then
2656
show_skip
(
dataset
,
sequence
,
char
,
ck
,
classes
[
char
]
)
2657
end
2658
prev
=
getprev
(
prev
)
2659
elseif
seq
[
n
]
[
char
]
then
2660
if
n
>
1
then
2661
prev
=
getprev
(
prev
)
2662
end
2663
n
=
n
-
1
2664
elseif
discfound
then
2665
notmatchreplace
[
discfound
]
=
true
2666
if
notmatchpost
[
discfound
]
then
2667
goto
next
2668
else
2669
break
2670
end
2671
else
2672
goto
next
2673
end
2674
elseif
char
=
=
false
then
2675
if
discfound
then
2676
notmatchreplace
[
discfound
]
=
true
2677
if
notmatchpost
[
discfound
]
then
2678
goto
next
2679
end
2680
else
2681
goto
next
2682
end
2683
break
2684
elseif
id
=
=
disc_code
then
2685
-- elseif id == disc_code and (not discs or discs[prev]) then
2686
-- the special case: f i where i becomes dottless i ..
2687
discseen
=
true
2688
discfound
=
prev
2689
notmatchpre
[
prev
]
=
true
2690
notmatchpost
[
prev
]
=
nil
2691
notmatchreplace
[
prev
]
=
nil
2692
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
prev
,
true
)
2693
-- weird test: needs checking
2694
if
pre
~
=
start
and
post
~
=
start
and
replace
~
=
start
then
2695
if
post
then
2696
local
n
=
n
2697
while
posttail
do
2698
if
seq
[
n
]
[
getchar
(
posttail
)
]
then
2699
n
=
n
-
1
2700
if
posttail
=
=
post
or
n
<
1
then
2701
break
2702
else
2703
posttail
=
getprev
(
posttail
)
2704
end
2705
else
2706
notmatchpost
[
prev
]
=
true
2707
break
2708
end
2709
end
2710
if
n
>
=
1
then
2711
notmatchpost
[
prev
]
=
true
2712
end
2713
else
2714
notmatchpost
[
prev
]
=
true
2715
end
2716
if
replace
then
2717
-- we seldom enter this branch (e.g. on brill efficient)
2718
while
replacetail
do
2719
if
seq
[
n
]
[
getchar
(
replacetail
)
]
then
2720
n
=
n
-
1
2721
if
replacetail
=
=
replace
or
n
<
1
then
2722
break
2723
else
2724
replacetail
=
getprev
(
replacetail
)
2725
end
2726
else
2727
notmatchreplace
[
prev
]
=
true
2728
if
notmatchpost
[
prev
]
then
2729
goto
next
2730
else
2731
break
2732
end
2733
end
2734
end
2735
else
2736
-- notmatchreplace[prev] = true -- not according to Kai
2737
end
2738
end
2739
prev
=
getprev
(
prev
)
2740
-- elseif id == glue_code and seq[n][32] and isspace(prev,threshold,id) then
2741
-- elseif seq[n][32] and spaces[prev] then
2742
-- n = n - 1
2743
-- prev = getprev(prev)
2744
elseif
id
=
=
glue_code
then
2745
local
sn
=
seq
[
n
]
2746
if
(
sn
[
32
]
and
spaces
[
prev
]
)
or
sn
[
0xFFFC
]
then
2747
n
=
n
-
1
2748
prev
=
getprev
(
prev
)
2749
else
2750
goto
next
2751
end
2752
elseif
seq
[
n
]
[
0xFFFC
]
then
2753
n
=
n
-
1
2754
prev
=
getprev
(
prev
)
2755
else
2756
goto
next
2757
end
2758
else
2759
goto
next
2760
end
2761
end
2762
else
2763
goto
next
2764
end
2765
else
2766
goto
next
2767
end
2768
end
2769 2770
-- after
2771 2772
if
s
>
l
then
2773
local
current
=
last
and
getnext
(
last
)
2774
if
not
current
and
postreplace
then
2775
current
=
getnext
(
sweepnode
)
2776
end
2777
if
current
then
2778
local
discfound
-- = nil
2779
local
n
=
l
+
1
2780
while
n
<
=
s
do
2781
if
current
then
2782
local
char
,
id
=
ischar
(
current
,
currentfont
)
2783
if
char
then
2784
if
skiphash
and
skiphash
[
char
]
then
2785
skipped
=
true
2786
if
trace_skips
then
2787
show_skip
(
dataset
,
sequence
,
char
,
ck
,
classes
[
char
]
)
2788
end
2789
current
=
getnext
(
current
)
-- was absent
2790
elseif
seq
[
n
]
[
char
]
then
2791
if
n
<
s
then
-- new test
2792
current
=
getnext
(
current
)
-- was absent
2793
end
2794
n
=
n
+
1
2795
elseif
discfound
then
2796
notmatchreplace
[
discfound
]
=
true
2797
if
notmatchpre
[
discfound
]
then
2798
goto
next
2799
else
2800
break
2801
end
2802
else
2803
goto
next
2804
end
2805
elseif
char
=
=
false
then
2806
if
discfound
then
2807
notmatchreplace
[
discfound
]
=
true
2808
if
notmatchpre
[
discfound
]
then
2809
goto
next
2810
else
2811
break
2812
end
2813
else
2814
goto
next
2815
end
2816
elseif
id
=
=
disc_code
then
2817
-- elseif id == disc_code and (not discs or discs[current]) then
2818
discseen
=
true
2819
discfound
=
current
2820
notmatchpre
[
current
]
=
nil
2821
notmatchpost
[
current
]
=
true
2822
notmatchreplace
[
current
]
=
nil
2823
local
pre
,
post
,
replace
=
getdisc
(
current
)
2824
if
pre
then
2825
local
n
=
n
2826
while
pre
do
2827
if
seq
[
n
]
[
getchar
(
pre
)
]
then
2828
n
=
n
+
1
2829
if
n
>
s
then
2830
break
2831
else
2832
pre
=
getnext
(
pre
)
2833
end
2834
else
2835
notmatchpre
[
current
]
=
true
2836
break
2837
end
2838
end
2839
if
n
<
=
s
then
2840
notmatchpre
[
current
]
=
true
2841
end
2842
else
2843
notmatchpre
[
current
]
=
true
2844
end
2845
if
replace
then
2846
-- so far we never entered this branch
2847
while
replace
do
2848
if
seq
[
n
]
[
getchar
(
replace
)
]
then
2849
n
=
n
+
1
2850
if
n
>
s
then
2851
break
2852
else
2853
replace
=
getnext
(
replace
)
2854
end
2855
else
2856
notmatchreplace
[
current
]
=
true
2857
if
notmatchpre
[
current
]
then
2858
goto
next
2859
else
2860
break
2861
end
2862
end
2863
end
2864
else
2865
-- notmatchreplace[current] = true -- not according to Kai
2866
end
2867
current
=
getnext
(
current
)
2868
elseif
id
=
=
glue_code
then
2869
local
sn
=
seq
[
n
]
2870
if
(
sn
[
32
]
and
spaces
[
current
]
)
or
sn
[
0xFFFC
]
then
2871
n
=
n
+
1
2872
current
=
getnext
(
current
)
2873
else
2874
goto
next
2875
end
2876
elseif
seq
[
n
]
[
0xFFFC
]
then
2877
n
=
n
+
1
2878
current
=
getnext
(
current
)
2879
else
2880
goto
next
2881
end
2882
else
2883
goto
next
2884
end
2885
end
2886
else
2887
goto
next
2888
end
2889
end
2890
end
2891 2892
if
trace_contexts
then
2893
chaintrac
(
head
,
start
,
dataset
,
sequence
,
rlmode
,
skipped
and
skiphash
,
ck
,
true
,
discseen
,
sweepnode
)
2894
end
2895
if
discseen
or
sweepnode
then
2896
head
,
start
,
done
=
chaindisk
(
head
,
start
,
dataset
,
sequence
,
rlmode
,
skipped
and
skiphash
,
ck
)
2897
else
2898
head
,
start
,
done
=
chainrun
(
head
,
start
,
last
,
dataset
,
sequence
,
rlmode
,
skipped
and
skiphash
,
ck
)
2899
end
2900
if
done
then
2901
break
2902
-- else
2903
-- next context
2904
end
2905
::
next
::
2906
-- if trace_chains then
2907
-- chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,false,discseen,sweepnode)
2908
-- end
2909
end
2910
if
discseen
then
2911
notmatchpre
=
{
}
2912
notmatchpost
=
{
}
2913
notmatchreplace
=
{
}
2914
-- notmatchpre = { a = 1, b = 1 } notmatchpre .a = nil notmatchpre .b = nil
2915
-- notmatchpost = { a = 1, b = 1 } notmatchpost .a = nil notmatchpost .b = nil
2916
-- notmatchreplace = { a = 1, b = 1 } notmatchreplace.a = nil notmatchreplace.b = nil
2917
end
2918
return
head
,
start
,
done
2919
end
2920 2921
handlers
.
gsub_context
=
handle_contextchain
2922
handlers
.
gsub_contextchain
=
handle_contextchain
2923
handlers
.
gsub_reversecontextchain
=
handle_contextchain
2924
handlers
.
gpos_contextchain
=
handle_contextchain
2925
handlers
.
gpos_context
=
handle_contextchain
2926 2927
-- this needs testing
2928 2929
local
function
chained_contextchain
(
head
,
start
,
stop
,
dataset
,
sequence
,
currentlookup
,
rlmode
,
skiphash
)
2930
local
steps
=
currentlookup
.
steps
2931
local
nofsteps
=
currentlookup
.
nofsteps
2932
if
nofsteps
>
1
then
2933
reportmoresteps
(
dataset
,
sequence
)
2934
end
2935
-- probably wrong
2936
local
l
=
steps
[
1
]
.
coverage
[
getchar
(
start
)
]
2937
if
l
then
2938
return
handle_contextchain
(
head
,
start
,
dataset
,
sequence
,
l
,
rlmode
,
skiphash
)
2939
else
2940
return
head
,
start
,
false
2941
end
2942
end
2943 2944
chainprocs
.
gsub_context
=
chained_contextchain
2945
chainprocs
.
gsub_contextchain
=
chained_contextchain
2946
chainprocs
.
gsub_reversecontextchain
=
chained_contextchain
2947
chainprocs
.
gpos_contextchain
=
chained_contextchain
2948
chainprocs
.
gpos_context
=
chained_contextchain
2949 2950
------------------------------
2951 2952
-- experiment (needs no handler in font-otc so not now):
2953
--
2954
-- function otf.registerchainproc(name,f)
2955
-- -- chainprocs[name] = f
2956
-- chainprocs[name] = function(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
2957
-- local done = currentlookup.nofsteps > 0
2958
-- if not done then
2959
-- reportzerosteps(dataset,sequence)
2960
-- else
2961
-- head, start, done = f(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
2962
-- if not head or not start then
2963
-- reportbadsteps(dataset,sequence)
2964
-- end
2965
-- end
2966
-- return head, start, done
2967
-- end
2968
-- end
2969 2970
local
missing
=
setmetatableindex
(
"
table
"
)
2971
local
logwarning
=
report_process
2972
local
resolved
=
{
}
-- we only resolve a font,script,language pair once
2973 2974
local
function
logprocess
(
...
)
2975
if
trace_steps
then
2976
registermessage
(
...
)
2977
if
trace_steps
=
=
"
silent
"
then
2978
return
2979
end
2980
end
2981
report_process
(
...
)
2982
end
2983 2984
-- todo: pass all these 'locals' in a table
2985 2986
local
sequencelists
=
setmetatableindex
(
function
(
t
,
font
)
2987
local
sequences
=
fontdata
[
font
]
.
resources
.
sequences
2988
if
not
sequences
or
not
next
(
sequences
)
then
2989
sequences
=
false
2990
end
2991
t
[
font
]
=
sequences
2992
return
sequences
2993
end
)
2994 2995
-- fonts.hashes.sequences = sequencelists
2996 2997
do
-- overcome local limit
2998 2999
local
autofeatures
=
fonts
.
analyzers
.
features
3000
local
featuretypes
=
otf
.
tables
.
featuretypes
3001
local
defaultscript
=
otf
.
features
.
checkeddefaultscript
3002
local
defaultlanguage
=
otf
.
features
.
checkeddefaultlanguage
3003 3004
local
wildcard
=
"
*
"
3005
local
default
=
"
dflt
"
3006 3007
local
function
initialize
(
sequence
,
script
,
language
,
enabled
,
autoscript
,
autolanguage
)
3008
local
features
=
sequence
.
features
3009
if
features
then
3010
local
order
=
sequence
.
order
3011
if
order
then
3012
local
featuretype
=
featuretypes
[
sequence
.
type
or
"
unknown
"
]
3013
for
i
=
1
,
#
order
do
3014
local
kind
=
order
[
i
]
3015
local
valid
=
enabled
[
kind
]
3016
if
valid
then
3017
local
scripts
=
features
[
kind
]
3018
local
languages
=
scripts
and
(
3019
scripts
[
script
]
or
3020
scripts
[
wildcard
]
or
3021
(
autoscript
and
defaultscript
(
featuretype
,
autoscript
,
scripts
)
)
3022
)
3023
local
enabled
=
languages
and
(
3024
languages
[
language
]
or
3025
languages
[
wildcard
]
or
3026
(
autolanguage
and
defaultlanguage
(
featuretype
,
autolanguage
,
languages
)
)
3027
)
3028
if
enabled
then
3029
return
{
valid
,
autofeatures
[
kind
]
or
false
,
sequence
,
kind
}
3030
end
3031
end
3032
end
3033
else
3034
-- can't happen
3035
end
3036
end
3037
return
false
3038
end
3039 3040
function
otf
.
dataset
(
tfmdata
,
font
)
-- generic variant, overloaded in context
3041
local
shared
=
tfmdata
.
shared
3042
local
properties
=
tfmdata
.
properties
3043
local
language
=
properties
.
language
or
"
dflt
"
3044
local
script
=
properties
.
script
or
"
dflt
"
3045
local
enabled
=
shared
.
features
3046
local
autoscript
=
enabled
and
enabled
.
autoscript
3047
local
autolanguage
=
enabled
and
enabled
.
autolanguage
3048
local
res
=
resolved
[
font
]
3049
if
not
res
then
3050
res
=
{
}
3051
resolved
[
font
]
=
res
3052
end
3053
local
rs
=
res
[
script
]
3054
if
not
rs
then
3055
rs
=
{
}
3056
res
[
script
]
=
rs
3057
end
3058
local
rl
=
rs
[
language
]
3059
if
not
rl
then
3060
rl
=
{
3061
-- indexed but we can also add specific data by key
3062
}
3063
rs
[
language
]
=
rl
3064
local
sequences
=
tfmdata
.
resources
.
sequences
3065
if
sequences
then
3066
for
s
=
1
,
#
sequences
do
3067
local
v
=
enabled
and
initialize
(
sequences
[
s
]
,
script
,
language
,
enabled
,
autoscript
,
autolanguage
)
3068
if
v
then
3069
rl
[
#
rl
+
1
]
=
v
3070
end
3071
end
3072
end
3073
end
3074
return
rl
3075
end
3076 3077
end
3078 3079
-- Functions like kernrun, comprun etc evolved over time and in the end look rather
3080
-- complex. It's a bit of a compromis between extensive copying and creating subruns.
3081
-- The logic has been improved a lot by Kai and Ivo who use complex fonts which
3082
-- really helped to identify border cases on the one hand and get insight in the diverse
3083
-- ways fonts implement features (not always that consistent and efficient). At the same
3084
-- time I tried to keep the code relatively efficient so that the overhead in runtime
3085
-- stays acceptable.
3086 3087
local
function
report_disc
(
what
,
n
)
3088
report_run
(
"
%s: %s > %s
"
,
what
,
n
,
languages
.
serializediscretionary
(
n
)
)
3089
end
3090 3091
local
function
kernrun
(
disc
,
k_run
,
font
,
attr
,
...
)
3092
--
3093
-- we catch <font 1><disc font 2>
3094
--
3095
if
trace_kernruns
then
3096
report_disc
(
"
kern
"
,
disc
)
3097
end
3098
--
3099
local
prev
,
next
=
getboth
(
disc
)
3100
--
3101
local
nextstart
=
next
3102
local
done
=
false
3103
--
3104
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
disc
,
true
)
3105
--
3106
local
prevmarks
=
prev
3107
--
3108
-- can be optional, because why on earth do we get a disc after a mark (okay, maybe when a ccmp
3109
-- has happened but then it should be in the disc so basically this test indicates an error)
3110
--
3111
while
prevmarks
do
3112
local
char
=
ischar
(
prevmarks
,
font
)
3113
if
char
and
marks
[
char
]
then
3114
prevmarks
=
getprev
(
prevmarks
)
3115
else
3116
break
3117
end
3118
end
3119
--
3120
if
prev
and
not
ischar
(
prev
,
font
)
then
-- and (pre or replace)
3121
prev
=
false
3122
end
3123
if
next
and
not
ischar
(
next
,
font
)
then
-- and (post or replace)
3124
next
=
false
3125
end
3126
--
3127
-- we need to get rid of this nest mess some day .. has to be done otherwise
3128
--
3129
if
pre
then
3130
if
k_run
(
pre
,
"
injections
"
,
nil
,
font
,
attr
,
...
)
then
3131
done
=
true
3132
end
3133
if
prev
then
3134
setlink
(
prev
,
pre
)
3135
if
k_run
(
prevmarks
,
"
preinjections
"
,
pre
,
font
,
attr
,
...
)
then
-- or prev?
3136
done
=
true
3137
end
3138
setprev
(
pre
)
3139
setlink
(
prev
,
disc
)
3140
end
3141
end
3142
--
3143
if
post
then
3144
if
k_run
(
post
,
"
injections
"
,
nil
,
font
,
attr
,
...
)
then
3145
done
=
true
3146
end
3147
if
next
then
3148
setlink
(
posttail
,
next
)
3149
if
k_run
(
posttail
,
"
postinjections
"
,
next
,
font
,
attr
,
...
)
then
3150
done
=
true
3151
end
3152
setnext
(
posttail
)
3153
setlink
(
disc
,
next
)
3154
end
3155
end
3156
--
3157
if
replace
then
3158
if
k_run
(
replace
,
"
injections
"
,
nil
,
font
,
attr
,
...
)
then
3159
done
=
true
3160
end
3161
if
prev
then
3162
setlink
(
prev
,
replace
)
3163
if
k_run
(
prevmarks
,
"
replaceinjections
"
,
replace
,
font
,
attr
,
...
)
then
-- getnext(replace))
3164
done
=
true
3165
end
3166
setprev
(
replace
)
3167
setlink
(
prev
,
disc
)
3168
end
3169
if
next
then
3170
setlink
(
replacetail
,
next
)
3171
if
k_run
(
replacetail
,
"
replaceinjections
"
,
next
,
font
,
attr
,
...
)
then
3172
done
=
true
3173
end
3174
setnext
(
replacetail
)
3175
setlink
(
disc
,
next
)
3176
end
3177
elseif
prev
and
next
then
3178
setlink
(
prev
,
next
)
3179
if
k_run
(
prevmarks
,
"
emptyinjections
"
,
next
,
font
,
attr
,
...
)
then
3180
done
=
true
3181
end
3182
setlink
(
prev
,
disc
,
next
)
3183
end
3184
if
done
and
trace_testruns
then
3185
report_disc
(
"
done
"
,
disc
)
3186
end
3187
return
nextstart
,
done
3188
end
3189 3190
-- fonts like ebgaramond do ligatures this way (less efficient than e.g. dejavu which
3191
-- will do the testrun variant)
3192 3193
local
function
comprun
(
disc
,
c_run
,
...
)
-- vararg faster than the whole list
3194
if
trace_compruns
then
3195
report_disc
(
"
comp
"
,
disc
)
3196
end
3197
--
3198
local
pre
,
post
,
replace
=
getdisc
(
disc
)
3199
local
renewed
=
false
3200
--
3201
if
pre
then
3202
sweepnode
=
disc
3203
sweeptype
=
"
pre
"
-- in alternative code preinjections is used (also used then for properties, saves a variable)
3204
local
new
,
done
=
c_run
(
pre
,
...
)
3205
if
done
then
3206
pre
=
new
3207
renewed
=
true
3208
end
3209
end
3210
--
3211
if
post
then
3212
sweepnode
=
disc
3213
sweeptype
=
"
post
"
3214
local
new
,
done
=
c_run
(
post
,
...
)
3215
if
done
then
3216
post
=
new
3217
renewed
=
true
3218
end
3219
end
3220
--
3221
if
replace
then
3222
sweepnode
=
disc
3223
sweeptype
=
"
replace
"
3224
local
new
,
done
=
c_run
(
replace
,
...
)
3225
if
done
then
3226
replace
=
new
3227
renewed
=
true
3228
end
3229
end
3230
--
3231
sweepnode
=
nil
3232
sweeptype
=
nil
3233
if
renewed
then
3234
if
trace_testruns
then
3235
report_disc
(
"
done
"
,
disc
)
3236
end
3237
setdisc
(
disc
,
pre
,
post
,
replace
)
3238
end
3239
--
3240
return
getnext
(
disc
)
,
renewed
3241
end
3242 3243
-- if we can hyphenate in a lig then unlikely a lig so we
3244
-- could have a option here to ignore lig
3245 3246
local
function
testrun
(
disc
,
t_run
,
c_run
,
...
)
3247
if
trace_testruns
then
3248
report_disc
(
"
test
"
,
disc
)
3249
end
3250
local
prev
,
next
=
getboth
(
disc
)
3251
if
not
next
then
3252
-- weird discretionary
3253
return
3254
end
3255
local
pre
,
post
,
replace
,
pretail
,
posttail
,
replacetail
=
getdisc
(
disc
,
true
)
3256
local
renewed
=
false
3257
if
post
or
replace
then
-- and prev then -- hm, we can start with a disc
3258
if
post
then
3259
setlink
(
posttail
,
next
)
3260
else
3261
post
=
next
3262
end
3263
if
replace
then
3264
setlink
(
replacetail
,
next
)
3265
else
3266
replace
=
next
3267
end
3268
local
d_post
=
t_run
(
post
,
next
,
...
)
3269
local
d_replace
=
t_run
(
replace
,
next
,
...
)
3270
if
d_post
>
0
or
d_replace
>
0
then
3271
local
d
=
d_replace
>
d_post
and
d_replace
or
d_post
3272
local
head
=
getnext
(
disc
)
-- is: next
3273
local
tail
=
head
3274
for
i
=
2
,
d
do
-- must start at 2 according to Kai
3275
local
nx
=
getnext
(
tail
)
3276
local
id
=
getid
(
nx
)
3277
if
id
=
=
disc_code
then
3278
head
,
tail
=
flattendisk
(
head
,
nx
)
3279
elseif
id
=
=
glyph_code
then
3280
tail
=
nx
3281
else
3282
-- we can have overrun into a glue
3283
break
3284
end
3285
end
3286
next
=
getnext
(
tail
)
3287
setnext
(
tail
)
3288
setprev
(
head
)
3289
local
new
=
copy_node_list
(
head
)
3290
if
posttail
then
3291
setlink
(
posttail
,
head
)
3292
else
3293
post
=
head
3294
end
3295
if
replacetail
then
3296
setlink
(
replacetail
,
new
)
3297
else
3298
replace
=
new
3299
end
3300
else
3301
-- we stay inside the disc
3302
if
posttail
then
3303
setnext
(
posttail
)
3304
else
3305
post
=
nil
3306
end
3307
if
replacetail
then
3308
setnext
(
replacetail
)
3309
else
3310
replace
=
nil
3311
end
3312
end
3313
setlink
(
disc
,
next
)
3314
-- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
3315
end
3316
--
3317
-- like comprun
3318
--
3319
if
trace_testruns
then
3320
report_disc
(
"
more
"
,
disc
)
3321
end
3322
--
3323
if
pre
then
3324
sweepnode
=
disc
3325
sweeptype
=
"
pre
"
3326
local
new
,
ok
=
c_run
(
pre
,
...
)
3327
if
ok
then
3328
pre
=
new
3329
renewed
=
true
3330
end
3331
end
3332
--
3333
if
post
then
3334
sweepnode
=
disc
3335
sweeptype
=
"
post
"
3336
local
new
,
ok
=
c_run
(
post
,
...
)
3337
if
ok
then
3338
post
=
new
3339
renewed
=
true
3340
end
3341
end
3342
--
3343
if
replace
then
3344
sweepnode
=
disc
3345
sweeptype
=
"
replace
"
3346
local
new
,
ok
=
c_run
(
replace
,
...
)
3347
if
ok
then
3348
replace
=
new
3349
renewed
=
true
3350
end
3351
end
3352
--
3353
sweepnode
=
nil
3354
sweeptype
=
nil
3355
if
renewed
then
3356
setdisc
(
disc
,
pre
,
post
,
replace
)
3357
if
trace_testruns
then
3358
report_disc
(
"
done
"
,
disc
)
3359
end
3360
end
3361
-- next can have changed (copied list)
3362
return
getnext
(
disc
)
,
renewed
3363
end
3364 3365
-- 1{2{\oldstyle\discretionary{3}{4}{5}}6}7\par
3366
-- 1{2\discretionary{3{\oldstyle3}}{{\oldstyle4}4}{5{\oldstyle5}5}6}7\par
3367 3368
local
nesting
=
0
3369 3370
local
function
c_run_single
(
head
,
font
,
attr
,
lookupcache
,
step
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3371
local
done
=
false
3372
local
sweep
=
sweephead
[
head
]
3373
local
start
3374
if
sweep
then
3375
start
=
sweep
3376
-- sweephead[head] = nil
3377
sweephead
[
head
]
=
false
3378
else
3379
start
=
head
3380
end
3381
while
start
do
3382
local
char
,
id
=
ischar
(
start
,
font
)
3383
if
char
then
3384
local
a
-- happens often so no assignment is faster
3385
if
attr
then
3386
a
=
getglyphdata
(
start
)
3387
end
3388
if
not
a
or
(
a
=
=
attr
)
then
3389
local
lookupmatch
=
lookupcache
[
char
]
3390
if
lookupmatch
then
3391
local
ok
3392
head
,
start
,
ok
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
3393
if
ok
then
3394
done
=
true
3395
end
3396
end
3397
if
start
then
3398
start
=
getnext
(
start
)
3399
end
3400
else
3401
-- go on can be a mixed one
3402
start
=
getnext
(
start
)
3403
end
3404
elseif
char
=
=
false
then
3405
return
head
,
done
3406
elseif
sweep
then
3407
-- else we loose the rest
3408
return
head
,
done
3409
else
3410
-- in disc component
3411
start
=
getnext
(
start
)
3412
end
3413
end
3414
return
head
,
done
3415
end
3416 3417
-- only replace?
3418 3419
local
function
t_run_single
(
start
,
stop
,
font
,
attr
,
lookupcache
)
3420
local
lastd
=
nil
3421
while
start
~
=
stop
do
3422
local
char
=
ischar
(
start
,
font
)
3423
if
char
then
3424
local
a
-- happens often so no assignment is faster
3425
if
attr
then
3426
a
=
getglyphdata
(
start
)
3427
end
3428
local
startnext
=
getnext
(
start
)
3429
if
not
a
or
(
a
=
=
attr
)
then
3430
local
lookupmatch
=
lookupcache
[
char
]
3431
if
lookupmatch
then
-- hm, hyphens can match (tlig) so we need to really check
3432
-- if we need more than ligatures we can outline the code and use functions
3433
local
s
=
startnext
3434
local
ss
=
nil
3435
local
sstop
=
s
=
=
stop
3436
if
not
s
then
3437
s
=
ss
3438
ss
=
nil
3439
end
3440
-- a bit weird: why multiple ... anyway we can't have a disc in a disc
3441
-- how about post ... we can probably merge this into the while
3442
while
getid
(
s
)
=
=
disc_code
do
3443
ss
=
getnext
(
s
)
3444
s
=
getreplace
(
s
)
3445
if
not
s
then
3446
s
=
ss
3447
ss
=
nil
3448
end
3449
end
3450
local
l
=
nil
3451
local
d
=
0
3452
while
s
do
3453
local
char
=
ischar
(
s
,
font
)
3454
if
char
then
3455
local
lg
=
not
tonumber
(
lookupmatch
)
and
lookupmatch
[
char
]
3456
if
lg
then
3457
if
sstop
then
3458
d
=
1
3459
elseif
d
>
0
then
3460
d
=
d
+
1
3461
end
3462
l
=
lg
3463
s
=
getnext
(
s
)
3464
sstop
=
s
=
=
stop
3465
if
not
s
then
3466
s
=
ss
3467
ss
=
nil
3468
end
3469
while
getid
(
s
)
=
=
disc_code
do
3470
ss
=
getnext
(
s
)
3471
s
=
getreplace
(
s
)
3472
if
not
s
then
3473
s
=
ss
3474
ss
=
nil
3475
end
3476
end
3477
lookupmatch
=
lg
3478
else
3479
break
3480
end
3481
else
3482
break
3483
end
3484
end
3485
if
l
and
(
tonumber
(
l
)
or
l
.
ligature
)
then
-- so we test for ligature
3486
lastd
=
d
3487
end
3488
-- why not: if not l then break elseif l.ligature then return d end
3489
else
3490
-- why not: break
3491
-- no match (yet)
3492
end
3493
else
3494
-- go on can be a mixed one
3495
-- why not: break
3496
end
3497
if
lastd
then
3498
return
lastd
3499
end
3500
start
=
startnext
3501
else
3502
break
3503
end
3504
end
3505
return
0
3506
end
3507 3508
local
function
k_run_single
(
sub
,
injection
,
last
,
font
,
attr
,
lookupcache
,
step
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3509
local
a
-- happens often so no assignment is faster
3510
if
attr
then
3511
a
=
getglyphdata
(
sub
)
3512
end
3513
if
not
a
or
(
a
=
=
attr
)
then
3514
for
n
in
nextnode
,
sub
do
-- only gpos
3515
if
n
=
=
last
then
3516
break
3517
end
3518
local
char
=
ischar
(
n
,
font
)
3519
if
char
then
3520
local
lookupmatch
=
lookupcache
[
char
]
3521
if
lookupmatch
then
3522
local
h
,
d
,
ok
=
handler
(
sub
,
n
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
,
injection
)
3523
if
ok
then
3524
return
true
3525
end
3526
end
3527
end
3528
end
3529
end
3530
end
3531 3532
local
function
c_run_multiple
(
head
,
font
,
attr
,
steps
,
nofsteps
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3533
local
done
=
false
3534
local
sweep
=
sweephead
[
head
]
3535
local
start
3536
if
sweep
then
3537
start
=
sweep
3538
-- sweephead[head] = nil
3539
sweephead
[
head
]
=
false
3540
else
3541
start
=
head
3542
end
3543
while
start
do
3544
local
char
=
ischar
(
start
,
font
)
3545
if
char
then
3546
local
a
-- happens often so no assignment is faster
3547
if
attr
then
3548
a
=
getglyphdata
(
start
)
3549
end
3550
if
not
a
or
(
a
=
=
attr
)
then
3551
for
i
=
1
,
nofsteps
do
3552
local
step
=
steps
[
i
]
3553
local
lookupcache
=
step
.
coverage
3554
local
lookupmatch
=
lookupcache
[
char
]
3555
if
lookupmatch
then
3556
-- we could move all code inline but that makes things even more unreadable
3557
local
ok
3558
head
,
start
,
ok
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
3559
if
ok
then
3560
done
=
true
3561
break
3562
elseif
not
start
then
3563
-- don't ask why ... shouldn't happen
3564
break
3565
end
3566
end
3567
end
3568
if
start
then
3569
start
=
getnext
(
start
)
3570
end
3571
else
3572
-- go on can be a mixed one
3573
start
=
getnext
(
start
)
3574
end
3575
elseif
char
=
=
false
then
3576
-- whatever glyph
3577
return
head
,
done
3578
elseif
sweep
then
3579
-- else we loose the rest
3580
return
head
,
done
3581
else
3582
-- in disc component
3583
start
=
getnext
(
start
)
3584
end
3585
end
3586
return
head
,
done
3587
end
3588 3589
local
function
t_run_multiple
(
start
,
stop
,
font
,
attr
,
steps
,
nofsteps
)
3590
local
lastd
=
nil
3591
while
start
~
=
stop
do
3592
local
char
=
ischar
(
start
,
font
)
3593
if
char
then
3594
local
a
-- happens often so no assignment is faster
3595
if
attr
then
3596
a
=
getglyphdata
(
start
)
3597
end
3598
local
startnext
=
getnext
(
start
)
3599
if
not
a
or
(
a
=
=
attr
)
then
3600
for
i
=
1
,
nofsteps
do
3601
local
step
=
steps
[
i
]
3602
local
lookupcache
=
step
.
coverage
3603
local
lookupmatch
=
lookupcache
[
char
]
3604
if
lookupmatch
then
3605
-- if we need more than ligatures we can outline the code and use functions
3606
local
s
=
startnext
3607
local
ss
=
nil
3608
local
sstop
=
s
=
=
stop
3609
if
not
s
then
3610
s
=
ss
3611
ss
=
nil
3612
end
3613
while
getid
(
s
)
=
=
disc_code
do
3614
ss
=
getnext
(
s
)
3615
s
=
getreplace
(
s
)
3616
if
not
s
then
3617
s
=
ss
3618
ss
=
nil
3619
end
3620
end
3621
local
l
=
nil
3622
local
d
=
0
3623
while
s
do
3624
local
char
=
ischar
(
s
)
3625
if
char
then
3626
local
lg
=
not
tonumber
(
lookupmatch
)
and
lookupmatch
[
char
]
3627
if
lg
then
3628
if
sstop
then
3629
d
=
1
3630
elseif
d
>
0
then
3631
d
=
d
+
1
3632
end
3633
l
=
lg
3634
s
=
getnext
(
s
)
3635
sstop
=
s
=
=
stop
3636
if
not
s
then
3637
s
=
ss
3638
ss
=
nil
3639
end
3640
while
getid
(
s
)
=
=
disc_code
do
3641
ss
=
getnext
(
s
)
3642
s
=
getreplace
(
s
)
3643
if
not
s
then
3644
s
=
ss
3645
ss
=
nil
3646
end
3647
end
3648
lookupmatch
=
lg
3649
else
3650
break
3651
end
3652
else
3653
break
3654
end
3655
end
3656
if
l
and
(
tonumber
(
l
)
or
l
.
ligature
)
then
3657
lastd
=
d
3658
end
3659
end
3660
end
3661
else
3662
-- go on can be a mixed one
3663
end
3664
if
lastd
then
3665
return
lastd
3666
end
3667
start
=
startnext
3668
else
3669
break
3670
end
3671
end
3672
return
0
3673
end
3674 3675
local
function
k_run_multiple
(
sub
,
injection
,
last
,
font
,
attr
,
steps
,
nofsteps
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3676
local
a
-- happens often so no assignment is faster
3677
if
attr
then
3678
a
=
getglyphdata
(
sub
)
3679
end
3680
if
not
a
or
(
a
=
=
attr
)
then
3681
for
n
in
nextnode
,
sub
do
-- only gpos
3682
if
n
=
=
last
then
3683
break
3684
end
3685
local
char
=
ischar
(
n
)
3686
if
char
then
3687
for
i
=
1
,
nofsteps
do
3688
local
step
=
steps
[
i
]
3689
local
lookupcache
=
step
.
coverage
3690
local
lookupmatch
=
lookupcache
[
char
]
3691
if
lookupmatch
then
3692
local
h
,
d
,
ok
=
handler
(
sub
,
n
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
,
injection
)
-- sub was head
3693
if
ok
then
3694
return
true
3695
end
3696
end
3697
end
3698
end
3699
end
3700
end
3701
end
3702 3703
local
txtdirstate
,
pardirstate
do
-- this might change (no need for nxt in pardirstate)
3704 3705
local
getdirection
=
nuts
.
getdirection
3706 3707
txtdirstate
=
function
(
start
,
stack
,
top
,
rlparmode
)
3708
local
dir
,
pop
=
getdirection
(
start
)
3709
if
pop
then
3710
if
top
=
=
1
then
3711
return
0
,
rlparmode
3712
else
3713
top
=
top
-
1
3714
if
stack
[
top
]
=
=
righttoleft_code
then
3715
return
top
,
-1
3716
else
3717
return
top
,
1
3718
end
3719
end
3720
elseif
dir
=
=
lefttoright_code
then
3721
top
=
top
+
1
3722
stack
[
top
]
=
lefttoright_code
3723
return
top
,
1
3724
elseif
dir
=
=
righttoleft_code
then
3725
top
=
top
+
1
3726
stack
[
top
]
=
righttoleft_code
3727
return
top
,
-1
3728
else
3729
return
top
,
rlparmode
3730
end
3731
end
3732 3733
pardirstate
=
function
(
start
)
3734
local
dir
=
getdirection
(
start
)
3735
if
dir
=
=
lefttoright_code
then
3736
return
1
,
1
3737
elseif
dir
=
=
righttoleft_code
then
3738
return
-1
,
-1
3739
else
3740
return
0
,
0
3741
end
3742
end
3743 3744
end
3745 3746
-- These are non public helpers that can change without notice!
3747 3748
otf
.
helpers
=
otf
.
helpers
or
{
}
3749
otf
.
helpers
.
txtdirstate
=
txtdirstate
3750
otf
.
helpers
.
pardirstate
=
pardirstate
3751 3752
-- This is the main loop. We run over the node list dealing with a specific font. The
3753
-- attribute is a context specific thing. We could work on sub start-stop ranges instead
3754
-- but I wonder if there is that much speed gain (experiments showed that it made not
3755
-- much sense) and we need to keep track of directions anyway. Also at some point I
3756
-- want to play with font interactions and then we do need the full sweeps. Apart from
3757
-- optimizations the principles of processing the features hasn't changed much since
3758
-- the beginning.
3759 3760
do
3761 3762
-- This is a measurable experimental speedup (only with hyphenated text and multiple
3763
-- fonts per processor call), especially for fonts with lots of contextual lookups.
3764 3765
local
fastdisc
=
true
3766
local
testdics
=
false
3767 3768
directives
.
register
(
"
otf.fastdisc
"
,
function
(
v
)
fastdisc
=
v
end
)
-- normally enabled
3769 3770
-- using a merged combined hash as first test saves some 30% on ebgaramond and
3771
-- about 15% on arabtype .. then moving the a test also saves a bit (even when
3772
-- often a is not set at all so that one is a bit debatable
3773 3774
local
otfdataset
=
nil
-- todo: make an installer
3775 3776
local
getfastdisc
=
{
__index
=
function
(
t
,
k
)
3777
local
v
=
usesfont
(
k
,
currentfont
)
3778
t
[
k
]
=
v
3779
return
v
3780
end
}
3781 3782
local
getfastspace
=
{
__index
=
function
(
t
,
k
)
3783
-- we don't pass the id so that one can overload isspace
3784
local
v
=
isspace
(
k
,
threshold
)
or
false
3785
t
[
k
]
=
v
3786
return
v
3787
end
}
3788 3789
function
otf
.
featuresprocessor
(
head
,
font
,
attr
,
direction
,
n
)
3790 3791
local
sequences
=
sequencelists
[
font
]
-- temp hack
3792 3793
nesting
=
nesting
+
1
3794 3795
if
nesting
=
=
1
then
3796
currentfont
=
font
3797
tfmdata
=
fontdata
[
font
]
3798
descriptions
=
tfmdata
.
descriptions
-- only needed in gref so we could pass node there instead
3799
characters
=
tfmdata
.
characters
-- but this branch is not entered that often anyway
3800
local
resources
=
tfmdata
.
resources
3801
marks
=
resources
.
marks
3802
classes
=
resources
.
classes
3803
threshold
,
3804
factor
=
getthreshold
(
font
)
3805
checkmarks
=
tfmdata
.
properties
.
checkmarks
3806 3807
if
not
otfdataset
then
3808
otfdataset
=
otf
.
dataset
3809
end
3810 3811
discs
=
fastdisc
and
n
and
n
>
1
and
setmetatable
(
{
}
,
getfastdisc
)
-- maybe inline
3812
spaces
=
setmetatable
(
{
}
,
getfastspace
)
3813 3814
elseif
currentfont
~
=
font
then
3815 3816
report_warning
(
"
nested call with a different font, level %s, quitting
"
,
nesting
)
3817
nesting
=
nesting
-
1
3818
return
head
,
false
3819 3820
end
3821 3822
-- some 10% faster when no dynamics but hardly measureable on real runs .. but: it only
3823
-- works when we have no other dynamics as otherwise the zero run will be applied to the
3824
-- whole stream for which we then need to pass another variable which we won't
3825 3826
-- if attr == 0 then
3827
-- attr = false
3828
-- end
3829 3830
if
trace_steps
then
3831
checkstep
(
head
)
3832
end
3833 3834
local
initialrl
=
0
3835 3836
if
getid
(
head
)
=
=
par_code
and
startofpar
(
head
)
then
3837
initialrl
=
pardirstate
(
head
)
3838
elseif
direction
=
=
righttoleft_code
then
3839
initialrl
=
-1
3840
end
3841 3842
-- local done = false
3843
local
datasets
=
otfdataset
(
tfmdata
,
font
,
attr
)
3844
local
dirstack
=
{
nil
}
-- could move outside function but we can have local runs
3845
sweephead
=
{
}
3846
-- sweephead = { a = 1, b = 1 } sweephead.a = nil sweephead.b = nil
3847 3848
-- Keeping track of the headnode is needed for devanagari. (I generalized it a bit
3849
-- so that multiple cases are also covered.) We could prepend a temp node.
3850 3851
-- We don't goto the next node when a disc node is created so that we can then treat
3852
-- the pre, post and replace. It's a bit of a hack but works out ok for most cases.
3853 3854
for
s
=
1
,
#
datasets
do
3855
local
dataset
=
datasets
[
s
]
3856
local
attribute
=
dataset
[
2
]
3857
local
sequence
=
dataset
[
3
]
-- sequences[s] -- also dataset[5]
3858
local
rlparmode
=
initialrl
3859
local
topstack
=
0
3860
local
typ
=
sequence
.
type
3861
local
gpossing
=
typ
=
=
"
gpos_single
"
or
typ
=
=
"
gpos_pair
"
-- store in dataset
3862
local
forcetestrun
=
typ
=
=
"
gsub_ligature
"
-- testrun is only for ligatures
3863
local
handler
=
handlers
[
typ
]
-- store in dataset
3864
local
steps
=
sequence
.
steps
3865
local
nofsteps
=
sequence
.
nofsteps
3866
local
skiphash
=
sequence
.
skiphash
3867 3868
if
not
steps
then
3869
-- This permits injection, watch the different arguments. Watch out, the arguments passed
3870
-- are not frozen as we might extend or change this. Is this used at all apart from some
3871
-- experiments?
3872
local
h
,
ok
=
handler
(
head
,
dataset
,
sequence
,
initialrl
,
font
,
attr
)
-- less arguments now
3873
-- if ok then
3874
-- done = true
3875
-- end
3876
if
h
and
h
~
=
head
then
3877
head
=
h
3878
end
3879
elseif
typ
=
=
"
gsub_reversecontextchain
"
then
3880
--
3881
-- This might need a check: if we have #before or #after > 0 then we might need to reverse
3882
-- the before and after lists in the loader. But first I need to see a font that uses multiple
3883
-- matches.
3884
--
3885
local
start
=
find_node_tail
(
head
)
3886
local
rlmode
=
0
-- how important is this .. do we need to check for dir?
3887
local
merged
=
steps
.
merged
3888
while
start
do
3889
local
char
=
ischar
(
start
,
font
)
3890
if
char
then
3891
local
m
=
merged
[
char
]
3892
if
m
then
3893
local
a
-- happens often so no assignment is faster
3894
if
attr
then
3895
a
=
getglyphdata
(
start
)
3896
end
3897
if
not
a
or
(
a
=
=
attr
)
then
3898
for
i
=
m
[
1
]
,
m
[
2
]
do
3899
local
step
=
steps
[
i
]
3900
-- for i=1,#m do
3901
-- local step = m[i]
3902
local
lookupcache
=
step
.
coverage
3903
local
lookupmatch
=
lookupcache
[
char
]
3904
if
lookupmatch
then
3905
local
ok
3906
head
,
start
,
ok
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
3907
if
ok
then
3908
-- done = true
3909
break
3910
end
3911
end
3912
end
3913
if
start
then
3914
start
=
getprev
(
start
)
3915
end
3916
else
3917
start
=
getprev
(
start
)
3918
end
3919
else
3920
start
=
getprev
(
start
)
3921
end
3922
else
3923
start
=
getprev
(
start
)
3924
end
3925
end
3926
else
3927
local
start
=
head
3928
local
rlmode
=
initialrl
3929
if
nofsteps
=
=
1
then
-- happens often
3930
local
step
=
steps
[
1
]
3931
local
lookupcache
=
step
.
coverage
3932
while
start
do
3933
local
char
,
id
=
ischar
(
start
,
font
)
3934
if
char
then
3935
if
skiphash
and
skiphash
[
char
]
then
-- we never needed it here but let's try
3936
start
=
getnext
(
start
)
3937
else
3938
local
lookupmatch
=
lookupcache
[
char
]
3939
if
lookupmatch
then
3940
local
a
-- happens often so no assignment is faster
3941
if
attr
then
3942
if
getglyphdata
(
start
)
=
=
attr
and
(
not
attribute
or
getstate
(
start
,
attribute
)
)
then
3943
a
=
true
3944
end
3945
elseif
not
attribute
or
getstate
(
start
,
attribute
)
then
3946
a
=
true
3947
end
3948
if
a
then
3949
local
ok
,
df
3950
head
,
start
,
ok
,
df
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
3951
-- if ok then
3952
-- done = true
3953
-- end
3954
if
df
then
3955
-- print("restart 1",typ)
3956
elseif
start
then
3957
start
=
getnext
(
start
)
3958
end
3959
else
3960
start
=
getnext
(
start
)
3961
end
3962
else
3963
start
=
getnext
(
start
)
3964
end
3965
end
3966
elseif
char
=
=
false
or
id
=
=
glue_code
then
3967
-- a different font|state or glue (happens often)
3968
start
=
getnext
(
start
)
3969
elseif
id
=
=
disc_code
then
3970
if
not
discs
or
discs
[
start
]
=
=
true
then
3971
local
ok
3972
if
gpossing
then
3973
start
,
ok
=
kernrun
(
start
,
k_run_single
,
font
,
attr
,
lookupcache
,
step
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3974
elseif
forcetestrun
then
3975
start
,
ok
=
testrun
(
start
,
t_run_single
,
c_run_single
,
font
,
attr
,
lookupcache
,
step
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3976
else
3977
start
,
ok
=
comprun
(
start
,
c_run_single
,
font
,
attr
,
lookupcache
,
step
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
3978
end
3979
-- if ok then
3980
-- done = true
3981
-- end
3982
else
3983
start
=
getnext
(
start
)
3984
end
3985
elseif
id
=
=
math_code
then
3986
start
=
getnext
(
endofmath
(
start
)
)
3987
elseif
id
=
=
dir_code
then
3988
topstack
,
rlmode
=
txtdirstate
(
start
,
dirstack
,
topstack
,
rlparmode
)
3989
start
=
getnext
(
start
)
3990
-- elseif id == par_code and startofpar(start) then
3991
-- rlparmode, rlmode = pardirstate(start)
3992
-- start = getnext(start)
3993
else
3994
start
=
getnext
(
start
)
3995
end
3996
end
3997
else
3998
local
merged
=
steps
.
merged
3999
while
start
do
4000
local
char
,
id
=
ischar
(
start
,
font
)
4001
if
char
then
4002
if
skiphash
and
skiphash
[
char
]
then
-- we never needed it here but let's try
4003
start
=
getnext
(
start
)
4004
else
4005
local
m
=
merged
[
char
]
4006
if
m
then
4007
local
a
-- happens often so no assignment is faster
4008
if
attr
then
4009
if
getglyphdata
(
start
)
=
=
attr
and
(
not
attribute
or
getstate
(
start
,
attribute
)
)
then
4010
a
=
true
4011
end
4012
elseif
not
attribute
or
getstate
(
start
,
attribute
)
then
4013
a
=
true
4014
end
4015
if
a
then
4016
local
ok
,
df
4017
for
i
=
m
[
1
]
,
m
[
2
]
do
4018
local
step
=
steps
[
i
]
4019
-- for i=1,#m do
4020
-- local step = m[i]
4021
local
lookupcache
=
step
.
coverage
4022
local
lookupmatch
=
lookupcache
[
char
]
4023
if
lookupmatch
then
4024
-- we could move all code inline but that makes things even more unreadable
4025
-- local ok, df
4026
head
,
start
,
ok
,
df
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
4027
if
df
then
4028
break
4029
elseif
ok
then
4030
-- done = true
4031
break
4032
elseif
not
start
then
4033
-- don't ask why ... shouldn't happen
4034
break
4035
end
4036
end
4037
end
4038
if
df
then
4039
-- print("restart 2",typ)
4040
elseif
start
then
4041
start
=
getnext
(
start
)
4042
end
4043
else
4044
start
=
getnext
(
start
)
4045
end
4046
else
4047
start
=
getnext
(
start
)
4048
end
4049
end
4050
elseif
char
=
=
false
or
id
=
=
glue_code
then
4051
-- a different font|state or glue (happens often)
4052
start
=
getnext
(
start
)
4053
elseif
id
=
=
disc_code
then
4054
if
not
discs
or
discs
[
start
]
=
=
true
then
4055
local
ok
4056
if
gpossing
then
4057
start
,
ok
=
kernrun
(
start
,
k_run_multiple
,
font
,
attr
,
steps
,
nofsteps
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
4058
elseif
forcetestrun
then
4059
start
,
ok
=
testrun
(
start
,
t_run_multiple
,
c_run_multiple
,
font
,
attr
,
steps
,
nofsteps
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
4060
else
4061
start
,
ok
=
comprun
(
start
,
c_run_multiple
,
font
,
attr
,
steps
,
nofsteps
,
dataset
,
sequence
,
rlmode
,
skiphash
,
handler
)
4062
end
4063
-- if ok then
4064
-- done = true
4065
-- end
4066
else
4067
start
=
getnext
(
start
)
4068
end
4069
elseif
id
=
=
math_code
then
4070
start
=
getnext
(
endofmath
(
start
)
)
4071
elseif
id
=
=
dir_code
then
4072
topstack
,
rlmode
=
txtdirstate
(
start
,
dirstack
,
topstack
,
rlparmode
)
4073
start
=
getnext
(
start
)
4074
-- elseif id == par_code and startofpar(start) then
4075
-- rlparmode, rlmode = pardirstate(start)
4076
-- start = getnext(start)
4077
else
4078
start
=
getnext
(
start
)
4079
end
4080
end
4081
end
4082
end
4083 4084
if
trace_steps
then
-- ?
4085
registerstep
(
head
)
4086
end
4087 4088
end
4089 4090
nesting
=
nesting
-
1
4091 4092
-- return head, done
4093
return
head
4094
end
4095 4096
-- This is not an official helper and used for tracing experiments. It can be changed as I like
4097
-- at any moment. At some point it might be used in a module that can help font development.
4098 4099
function
otf
.
datasetpositionprocessor
(
head
,
font
,
direction
,
dataset
)
4100 4101
currentfont
=
font
4102
tfmdata
=
fontdata
[
font
]
4103
descriptions
=
tfmdata
.
descriptions
-- only needed in gref so we could pass node there instead
4104
characters
=
tfmdata
.
characters
-- but this branch is not entered that often anyway
4105
local
resources
=
tfmdata
.
resources
4106
marks
=
resources
.
marks
4107
classes
=
resources
.
classes
4108
threshold
,
4109
factor
=
getthreshold
(
font
)
4110
checkmarks
=
tfmdata
.
properties
.
checkmarks
4111 4112
if
type
(
dataset
)
=
=
"
number
"
then
4113
dataset
=
otfdataset
(
tfmdata
,
font
,
0
)
[
dataset
]
4114
end
4115 4116
local
sequence
=
dataset
[
3
]
-- sequences[s] -- also dataset[5]
4117
local
typ
=
sequence
.
type
4118
-- local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
4119 4120
-- gpos_contextchain gpos_context
4121 4122
-- if not gpossing then
4123
-- return head, false
4124
-- end
4125 4126
local
handler
=
handlers
[
typ
]
-- store in dataset
4127
local
steps
=
sequence
.
steps
4128
local
nofsteps
=
sequence
.
nofsteps
4129 4130
local
done
=
false
4131
local
dirstack
=
{
nil
}
-- could move outside function but we can have local runs (maybe a few more nils)
4132
local
start
=
head
4133
local
initialrl
=
(
direction
=
=
righttoleft_code
)
and
-1
or
0
4134
local
rlmode
=
initialrl
4135
local
rlparmode
=
initialrl
4136
local
topstack
=
0
4137
local
merged
=
steps
.
merged
4138 4139
-- local matches = false
4140
local
position
=
0
4141 4142
while
start
do
4143
local
char
,
id
=
ischar
(
start
,
font
)
4144
if
char
then
4145
position
=
position
+
1
4146
local
m
=
merged
[
char
]
4147
if
m
then
4148
if
skiphash
and
skiphash
[
char
]
then
-- we never needed it here but let's try
4149
start
=
getnext
(
start
)
4150
else
4151
for
i
=
m
[
1
]
,
m
[
2
]
do
4152
local
step
=
steps
[
i
]
4153
local
lookupcache
=
step
.
coverage
4154
local
lookupmatch
=
lookupcache
[
char
]
4155
if
lookupmatch
then
4156
local
ok
4157
head
,
start
,
ok
=
handler
(
head
,
start
,
dataset
,
sequence
,
lookupmatch
,
rlmode
,
skiphash
,
step
)
4158
if
ok
then
4159
-- if matches then
4160
-- matches[position] = i
4161
-- else
4162
-- matches = { [position] = i }
4163
-- end
4164
break
4165
elseif
not
start
then
4166
break
4167
end
4168
end
4169
end
4170
if
start
then
4171
start
=
getnext
(
start
)
4172
end
4173
end
4174
else
4175
start
=
getnext
(
start
)
4176
end
4177
elseif
char
=
=
false
or
id
=
=
glue_code
then
4178
-- a different font|state or glue (happens often)
4179
start
=
getnext
(
start
)
4180
elseif
id
=
=
math_code
then
4181
start
=
getnext
(
endofmath
(
start
)
)
4182
elseif
id
=
=
dir_code
then
4183
topstack
,
rlmode
=
txtdirstate
(
start
,
dirstack
,
topstack
,
rlparmode
)
4184
start
=
getnext
(
start
)
4185
-- elseif id == par_code and startofpar(start) then
4186
-- rlparmode, rlmode = pardirstate(start)
4187
-- start = getnext(start)
4188
else
4189
start
=
getnext
(
start
)
4190
end
4191
end
4192 4193
return
head
4194
end
4195 4196
-- end of experiment
4197 4198
end
4199 4200
-- so far
4201 4202
do
4203 4204
local
plugins
=
{
}
4205
otf
.
plugins
=
plugins
4206 4207
local
report
=
logs
.
reporter
(
"
fonts
"
)
4208
local
warned
=
false
4209
local
okay
=
{
text
=
true
}
4210 4211
function
otf
.
registerplugin
(
name
,
f
)
4212
if
type
(
name
)
=
=
"
string
"
and
type
(
f
)
=
=
"
function
"
then
4213
plugins
[
name
]
=
{
name
,
f
}
4214
if
okay
[
name
]
then
4215
-- no warning (e.g. the diagnostic text plugin)
4216
else
4217
report
(
"
plugin %a has been loaded, please be aware of possible side effects
"
,
name
)
4218
if
not
warned
then
4219
if
logs
.
pushtarget
then
4220
logs
.
pushtarget
(
"
log
"
)
4221
end
4222
report
(
"
Plugins are not officially supported unless stated otherwise. This is because
"
)
4223
report
(
"
they bypass the regular font handling and therefore some features in ConTeXt
"
)
4224
report
(
"
(especially those related to fonts) might not work as expected or might not work
"
)
4225
report
(
"
at all. Some plugins are for testing and development only and might change
"
)
4226
report
(
"
whenever we feel the need for it.
"
)
4227
report
(
)
4228
if
logs
.
poptarget
then
4229
logs
.
poptarget
(
)
4230
end
4231
warned
=
true
4232
end
4233
end
4234
end
4235
end
4236 4237
function
otf
.
plugininitializer
(
tfmdata
,
value
)
4238
if
type
(
value
)
=
=
"
string
"
then
4239
tfmdata
.
shared
.
plugin
=
plugins
[
value
]
4240
end
4241
end
4242 4243
function
otf
.
pluginprocessor
(
head
,
font
,
dynamic
,
direction
)
-- n
4244
local
s
=
fontdata
[
font
]
.
shared
4245
local
p
=
s
and
s
.
plugin
4246
if
p
then
4247
if
trace_plugins
then
4248
report_process
(
"
applying plugin %a
"
,
p
[
1
]
)
4249
end
4250
return
p
[
2
]
(
head
,
font
,
dynamic
,
direction
)
4251
else
4252
return
head
,
false
4253
end
4254
end
4255 4256
end
4257 4258
function
otf
.
featuresinitializer
(
tfmdata
,
value
)
4259
-- nothing done here any more
4260
end
4261 4262
registerotffeature
{
4263
name
=
"
features
"
,
4264
description
=
"
features
"
,
4265
default
=
true
,
4266
initializers
=
{
4267
position
=
1
,
4268
node
=
otf
.
featuresinitializer
,
4269
plug
=
otf
.
plugininitializer
,
4270
}
,
4271
processors
=
{
4272
node
=
otf
.
featuresprocessor
,
4273
plug
=
otf
.
pluginprocessor
,
4274
}
4275
}
4276 4277
-- Moved here (up) a bit. This doesn't really belong in generic so it will
4278
-- move to a context module some day.
4279 4280
local
function
markinitializer
(
tfmdata
,
value
)
4281
local
properties
=
tfmdata
.
properties
4282
properties
.
checkmarks
=
value
4283
end
4284 4285
registerotffeature
{
4286
name
=
"
checkmarks
"
,
4287
description
=
"
check mark widths
"
,
4288
default
=
true
,
4289
initializers
=
{
4290
node
=
markinitializer
,
4291
}
,
4292
}
4293 4294
-- This can be used for extra handlers, but should be used with care! We implement one
4295
-- here but some more can be found in the osd (script devanagary) file. Now watch out:
4296
-- when a handler has steps, it is called as the other ones, but when we have no steps,
4297
-- we use a different call:
4298
--
4299
-- function(head,dataset,sequence,initialrl,font,attr)
4300
-- return head, done
4301
-- end
4302
--
4303
-- Also see (!!).
4304 4305
otf
.
handlers
=
handlers
4306 4307
if
context
then
4308
return
4309
else
4310
-- todo: move the following code someplace else
4311
end
4312 4313
local
setspacekerns
=
nodes
.
injections
.
setspacekerns
if
not
setspacekerns
then
os
.
exit
(
)
end
4314 4315
local
tag
=
"
kern
"
4316 4317
-- if fontfeatures then
4318 4319
-- function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
4320
-- local features = fontfeatures[font]
4321
-- local enabled = features and features.spacekern and features[tag]
4322
-- if enabled then
4323
-- setspacekerns(font,sequence)
4324
-- end
4325
-- return head, enabled
4326
-- end
4327 4328
-- else -- generic (no hashes)
4329 4330
function
handlers
.
trigger_space_kerns
(
head
,
dataset
,
sequence
,
initialrl
,
font
,
attr
)
4331
local
shared
=
fontdata
[
font
]
.
shared
4332
local
features
=
shared
and
shared
.
features
4333
local
enabled
=
features
and
features
.
spacekern
and
features
[
tag
]
4334
if
enabled
then
4335
setspacekerns
(
font
,
sequence
)
4336
end
4337
return
head
,
enabled
4338
end
4339 4340
-- end
4341 4342
-- There are fonts out there that change the space but we don't do that kind of
4343
-- things in TeX.
4344 4345
local
function
hasspacekerns
(
data
)
4346
local
resources
=
data
.
resources
4347
local
sequences
=
resources
.
sequences
4348
local
validgpos
=
resources
.
features
.
gpos
4349
if
validgpos
and
sequences
then
4350
for
i
=
1
,
#
sequences
do
4351
local
sequence
=
sequences
[
i
]
4352
local
steps
=
sequence
.
steps
4353
if
steps
and
sequence
.
features
[
tag
]
then
4354
local
kind
=
sequence
.
type
4355
if
kind
=
=
"
gpos_pair
"
or
kind
=
=
"
gpos_single
"
then
4356
for
i
=
1
,
#
steps
do
4357
local
step
=
steps
[
i
]
4358
local
coverage
=
step
.
coverage
4359
local
rules
=
step
.
rules
4360
if
rules
then
4361
-- not now: analyze (simple) rules
4362
elseif
not
coverage
then
4363
-- nothing to do
4364
elseif
kind
=
=
"
gpos_single
"
then
4365
-- maybe a message that we ignore
4366
elseif
kind
=
=
"
gpos_pair
"
then
4367
local
format
=
step
.
format
4368
if
format
=
=
"
move
"
or
format
=
=
"
kern
"
then
4369
local
kerns
=
coverage
[
32
]
4370
if
kerns
then
4371
return
true
4372
end
4373
for
k
,
v
in
next
,
coverage
do
4374
if
v
[
32
]
then
4375
return
true
4376
end
4377
end
4378
elseif
format
=
=
"
pair
"
then
4379
local
kerns
=
coverage
[
32
]
4380
if
kerns
then
4381
for
k
,
v
in
next
,
kerns
do
4382
local
one
=
v
[
1
]
4383
if
one
and
one
~
=
true
then
4384
return
true
4385
end
4386
end
4387
end
4388
for
k
,
v
in
next
,
coverage
do
4389
local
kern
=
v
[
32
]
4390
if
kern
then
4391
local
one
=
kern
[
1
]
4392
if
one
and
one
~
=
true
then
4393
return
true
4394
end
4395
end
4396
end
4397
end
4398
end
4399
end
4400
end
4401
end
4402
end
4403
end
4404
return
false
4405
end
4406 4407
otf
.
readers
.
registerextender
{
4408
name
=
"
spacekerns
"
,
4409
action
=
function
(
data
)
4410
data
.
properties
.
hasspacekerns
=
hasspacekerns
(
data
)
4411
end
4412
}
4413 4414
local
function
spaceinitializer
(
tfmdata
,
value
)
-- attr
4415
local
resources
=
tfmdata
.
resources
4416
local
spacekerns
=
resources
and
resources
.
spacekerns
4417
if
value
and
spacekerns
=
=
nil
then
4418
local
rawdata
=
tfmdata
.
shared
and
tfmdata
.
shared
.
rawdata
4419
local
properties
=
rawdata
.
properties
4420
if
properties
and
properties
.
hasspacekerns
then
4421
local
sequences
=
resources
.
sequences
4422
local
validgpos
=
resources
.
features
.
gpos
4423
if
validgpos
and
sequences
then
4424
local
left
=
{
}
4425
local
right
=
{
}
4426
local
last
=
0
4427
local
feat
=
nil
4428
for
i
=
1
,
#
sequences
do
4429
local
sequence
=
sequences
[
i
]
4430
local
steps
=
sequence
.
steps
4431
if
steps
then
4432
-- we don't support space kerns in other features
4433
local
kern
=
sequence
.
features
[
tag
]
4434
if
kern
then
4435
local
kind
=
sequence
.
type
4436
if
kind
=
=
"
gpos_pair
"
or
kind
=
=
"
gpos_single
"
then
4437
if
feat
then
4438
for
script
,
languages
in
next
,
kern
do
4439
local
f
=
feat
[
script
]
4440
if
f
then
4441
for
l
in
next
,
languages
do
4442
f
[
l
]
=
true
4443
end
4444
else
4445
feat
[
script
]
=
languages
4446
end
4447
end
4448
else
4449
feat
=
kern
4450
end
4451
for
i
=
1
,
#
steps
do
4452
local
step
=
steps
[
i
]
4453
local
coverage
=
step
.
coverage
4454
local
rules
=
step
.
rules
4455
if
rules
then
4456
-- not now: analyze (simple) rules
4457
elseif
not
coverage
then
4458
-- nothing to do
4459
elseif
kind
=
=
"
gpos_single
"
then
4460
-- makes no sense in TeX
4461
elseif
kind
=
=
"
gpos_pair
"
then
4462
local
format
=
step
.
format
4463
if
format
=
=
"
move
"
or
format
=
=
"
kern
"
then
4464
local
kerns
=
coverage
[
32
]
4465
if
kerns
then
4466
for
k
,
v
in
next
,
kerns
do
4467
right
[
k
]
=
v
4468
end
4469
end
4470
for
k
,
v
in
next
,
coverage
do
4471
local
kern
=
v
[
32
]
4472
if
kern
then
4473
left
[
k
]
=
kern
4474
end
4475
end
4476
elseif
format
=
=
"
pair
"
then
4477
local
kerns
=
coverage
[
32
]
4478
if
kerns
then
4479
for
k
,
v
in
next
,
kerns
do
4480
local
one
=
v
[
1
]
4481
if
one
and
one
~
=
true
then
4482
right
[
k
]
=
one
[
3
]
4483
end
4484
end
4485
end
4486
for
k
,
v
in
next
,
coverage
do
4487
local
kern
=
v
[
32
]
4488
if
kern
then
4489
local
one
=
kern
[
1
]
4490
if
one
and
one
~
=
true
then
4491
left
[
k
]
=
one
[
3
]
4492
end
4493
end
4494
end
4495
end
4496
end
4497
end
4498
last
=
i
4499
end
4500
else
4501
-- no steps ... needed for old one ... we could use the basekerns
4502
-- instead
4503
end
4504
end
4505
end
4506
left
=
next
(
left
)
and
left
or
false
4507
right
=
next
(
right
)
and
right
or
false
4508
if
left
or
right
then
4509
spacekerns
=
{
4510
left
=
left
,
4511
right
=
right
,
4512
}
4513
if
last
>
0
then
4514
local
triggersequence
=
{
4515
-- no steps, see (!!)
4516
features
=
{
[
tag
]
=
feat
or
{
dflt
=
{
dflt
=
true
,
}
}
}
,
4517
flags
=
noflags
,
4518
name
=
"
trigger_space_kerns
"
,
4519
order
=
{
tag
}
,
4520
type
=
"
trigger_space_kerns
"
,
4521
left
=
left
,
4522
right
=
right
,
4523
}
4524
insert
(
sequences
,
last
,
triggersequence
)
4525
end
4526
end
4527
end
4528
end
4529
resources
.
spacekerns
=
spacekerns
4530
end
4531
return
spacekerns
4532
end
4533 4534
registerotffeature
{
4535
name
=
"
spacekern
"
,
4536
description
=
"
space kern injection
"
,
4537
default
=
true
,
4538
initializers
=
{
4539
node
=
spaceinitializer
,
4540
}
,
4541
}
4542