font-txt.lua /size: 21 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-txt
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to font-ini.mkiv
"
,
4
original
=
"
derived from a prototype by Kai Eigner
"
,
5
author
=
"
Hans Hagen
"
,
-- so don't blame KE
6
copyright
=
"
TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team
"
,
7
license
=
"
see context related readme files
"
8
}
9 10
-- The next code is derived from a snippet handler prototype by Kai Eigner and
11
-- resembles the main loop of the Lua font handler but I decided use a more generic
12
-- (and pluggable) approach and not hook it into the already present opentype
13
-- handler. This is cleaner as it cannot interfere with the Lua font processor
14
-- (which does some more things) and is also better performance wise. It also makes
15
-- it possible to support other handlers as history has proven that there are no
16
-- universal solution in computer land. Most of the disc logic is kept but done
17
-- slightly different.
18
--
19
-- The code is heavily optimized and generalized so there can be errors. As
20
-- mentioned, the plug mode can be used for alternative font handlers. A font is
21
-- still loaded but the node and base mode handlers are ignored. Plugins are
22
-- unlikely to work well in context as they can mess up attribute driven subsystem,
23
-- so they are not officially supported. The language and script options are
24
-- available in the usual way.
25
--
26
-- The code collects snippets, either or not with spacing around them and partially
27
-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is
28
-- not robust so I got rid of that branch. This is somewhat similar to the Lua font
29
-- handler.
30
--
31
-- An alternative is to run over longer strings with dummy chars (unicode objects) as
32
-- markers for whatever is in the list but that becomes tricky with mixed fonts and
33
-- reconstruction becomes a bit of a mess then, especially because disc nodes force
34
-- us to backtrack and look at several solutions. It also has a larger memory
35
-- footprint. Some tests demonstrated that it has no gain and only adds complexity.
36
--
37
-- This (derived) variant is better suited for context and seems to work ok in the
38
-- generic variant. I also added some context specific tracing to the code. This
39
-- variant uses the plug model provided in the context font system. So, in context,
40
-- using the built in Lua handler is the better alternative, also because it has
41
-- extensive tracing features. Context users would loose additional functionality
42
-- that has been provided for a decade and therefore plugins are not officially
43
-- supported (at least not by me, unless I use them myself).
44
--
45
-- There is no checking here for already processed characters so best not mix this
46
-- variant with code that does similar things. If this code evolves depends on the
47
-- useability. Kai's code can now be found on github where it is used with a harfbuzz
48
-- library. We add this kind of stuff because occasionally we compare engines and
49
-- Kai sends me examples and I then need to check context.
50
--
51
-- One important difference between Kai's approach and the one used in ConTeXt is
52
-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just
53
-- indices into the original text that made more sense. The first implementation
54
-- used the command line tool (binary), then I went for ffi (library).
55
--
56
-- Beware: this file only implements the framework for plugins. Plugins themselves
57
-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin
58
-- because that is after all the reference for opentype support, but that interface
59
-- needs a bit more work (so it might never happen).
60
--
61
-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file
62
-- migth be added to the distribution somewhere in the middle of 2017 when the ffi
63
-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that
64
-- date but we never had a reason for adding it to the ConTeXt distribution. It
65
-- should still work okay because I occasionally checked it against progress made in
66
-- the engines and used newer helpers.
67
--
68
-- Here is an example of usage:
69
--
70
-- \starttext
71
-- \definefontfeature[test][mode=plug,features=text]
72
-- \start
73
-- \showfontkerns
74
-- \definedfont[Serif*test]
75
-- \input tufte \par
76
-- \stop
77
-- \stoptext
78 79
local
fonts
=
fonts
80
local
otf
=
fonts
.
handlers
.
otf
81
local
nodes
=
nodes
82 83
local
utfchar
=
utf
.
char
84 85
local
nuts
=
nodes
.
nuts
86 87
local
getnext
=
nuts
.
getnext
88
local
setnext
=
nuts
.
setnext
89
local
getprev
=
nuts
.
getprev
90
local
setprev
=
nuts
.
setprev
91
local
getid
=
nuts
.
getid
92
local
getsubtype
=
nuts
.
getsubtype
93
local
getfont
=
nuts
.
getfont
94
local
getchar
=
nuts
.
getchar
95
local
getdisc
=
nuts
.
getdisc
96
local
setdisc
=
nuts
.
setdisc
97
local
getboth
=
nuts
.
getboth
98
local
setlink
=
nuts
.
setlink
99
local
getkern
=
nuts
.
getkern
100
local
getwidth
=
nuts
.
getwidth
101 102
local
ischar
=
nuts
.
ischar
103
local
isglyph
=
nuts
.
isglyph
104
local
usesfont
=
nuts
.
usesfont
105 106
local
copy_node_list
=
nuts
.
copylist
107
local
find_node_tail
=
nuts
.
tail
108
local
flushlist
=
nuts
.
flushlist
109
local
freenode
=
nuts
.
free
110
local
endofmath
=
nuts
.
endofmath
111 112
local
startofpar
=
nuts
.
startofpar
113 114
local
nodecodes
=
nodes
.
nodecodes
115 116
local
glyph_code
=
nodecodes
.
glyph
117
local
glue_code
=
nodecodes
.
glue
118
local
disc_code
=
nodecodes
.
disc
119
local
kern_code
=
nodecodes
.
kern
120
local
math_code
=
nodecodes
.
math
121
local
dir_code
=
nodecodes
.
dir
122
local
par_code
=
nodecodes
.
par
123 124
local
righttoleft_code
=
nodes
.
dirvalues
.
righttoleft
125 126
local
txtdirstate
=
otf
.
helpers
.
txtdirstate
127
local
pardirstate
=
otf
.
helpers
.
pardirstate
128 129
local
fonthashes
=
fonts
.
hashes
130
local
fontdata
=
fonthashes
.
identifiers
131 132
local
function
deletedisc
(
head
)
133
local
current
=
head
134
local
next
=
nil
135
while
current
do
136
next
=
getnext
(
current
)
137
if
getid
(
current
)
=
=
disc_code
then
138
local
pre
,
post
,
replace
,
pre_tail
,
post_tail
,
replace_tail
=
getdisc
(
current
,
true
)
139
setdisc
(
current
)
140
if
pre
then
141
flushlist
(
pre
)
142
end
143
if
post
then
144
flushlist
(
post
)
145
end
146
local
p
,
n
=
getboth
(
current
)
147
if
replace
then
148
if
current
=
=
head
then
149
head
=
replace
150
setprev
(
replace
)
-- already nil
151
else
152
setlink
(
p
,
replace
)
153
end
154
setlink
(
replace_tail
,
n
)
-- was: setlink(n,replace_tail)
155
elseif
current
=
=
head
then
156
head
=
n
157
setprev
(
n
)
158
else
159
setlink
(
p
,
n
)
160
end
161
freenode
(
current
)
162
end
163
current
=
next
164
end
165
return
head
166
end
167 168
-- As we know that we have the same font we can probably optimize this a bit more.
169
-- Although we can have more in disc nodes than characters and kerns we only support
170
-- those two types.
171 172
local
function
eqnode
(
n
,
m
)
-- no real improvement in speed
173
local
n_char
=
isglyph
(
n
)
174
if
n_char
then
175
return
n_char
=
=
ischar
(
m
,
getfont
(
n
)
)
176
elseif
n_id
=
=
kern_code
then
177
return
getkern
(
n
)
=
=
getkern
(
m
)
178
end
179
end
180 181
local
function
equalnode
(
n
,
m
)
182
if
not
n
then
183
return
not
m
184
elseif
not
m
then
185
return
false
186
end
187
local
n_char
,
n_id
=
isglyph
(
n
)
188
if
n_char
then
189
return
n_char
=
=
ischar
(
m
,
n_id
)
-- n_id == n_font
190
elseif
n_id
=
=
whatsit_code
then
191
return
false
192
elseif
n_id
=
=
glue_code
then
193
return
true
194
elseif
n_id
=
=
kern_code
then
195
return
getkern
(
n
)
=
=
getkern
(
m
)
196
elseif
n_id
=
=
disc_code
then
197
local
n_pre
,
n_post
,
n_replace
=
getdisc
(
n
)
198
local
m_pre
,
m_post
,
m_replace
=
getdisc
(
m
)
199
while
n_pre
and
m_pre
do
200
if
not
eqnode
(
n_pre
,
m_pre
)
then
201
return
false
202
end
203
n_pre
=
getnext
(
n_pre
)
204
m_pre
=
getnext
(
m_pre
)
205
end
206
if
n_pre
or
m_pre
then
207
return
false
208
end
209
while
n_post
and
m_post
do
210
if
not
eqnode
(
n_post
,
m_post
)
then
211
return
false
212
end
213
n_post
=
getnext
(
n_post
)
214
m_post
=
getnext
(
m_post
)
215
end
216
if
n_post
or
m_post
then
217
return
false
218
end
219
while
n_replace
and
m_replace
do
220
if
not
eqnode
(
n_replace
,
m_replace
)
then
221
return
false
222
end
223
n_replace
=
getnext
(
n_replace
)
224
m_replace
=
getnext
(
m_replace
)
225
end
226
if
n_replace
or
m_replace
then
227
return
false
228
end
229
return
true
230
end
231
return
false
232
end
233 234
-- The spacing hackery is not nice. The text can get leading and trailing spaces
235
-- and even mid spaces while the start and stop nodes not always are glues then
236
-- so the plugin really needs to do some testing there. We could pass more context
237
-- but it doesn't become much better.
238
--
239
-- The attribute gets passed for tracing purposes. We could support it (not that
240
-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not
241
-- compatible anyway. It would also mean more testing. So, don't use this mixed
242
-- with node and base mode in context.
243
--
244
-- We don't distinguish between modes in treatment (so no r2l assumptions) and
245
-- no cheats for scripts that might not use discretionaries. Such hacks can work
246
-- in predictable cases but in context one can use a mix all kind of things and
247
-- users do that. On the other hand, we do support longer glyph runs in both modes
248
-- so there we gain a bit.
249 250
do
251 252
local
function
texthandler
(
head
,
font
,
dynamic
,
rlmode
,
handler
,
startspacing
,
stopspacing
,
nesting
)
253
if
not
head
then
254
return
255
end
256
if
startspacing
=
=
nil
then
257
startspacing
=
false
258
end
259
if
stopspacing
=
=
nil
then
260
stopspacing
=
false
261
end
262 263
if
getid
(
head
)
=
=
par_code
and
startofpar
(
head
)
then
264
rlmode
=
pardirstate
(
head
)
265
elseif
rlmode
=
=
righttoleft_code
then
266
rlmode
=
-1
267
else
268
rlmode
=
0
269
end
270 271
local
dirstack
=
{
}
272
local
rlparmode
=
0
273
local
topstack
=
0
274
local
text
=
{
}
275
local
size
=
0
276
local
current
=
head
277
local
start
=
nil
278
local
stop
=
nil
279
local
startrlmode
=
rlmode
280 281
local
function
handle
(
leading
,
trailing
)
-- what gets passed can become configureable: e.g. utf 8
282
local
stop
=
current
or
start
-- hm, what with outer stop
283
if
getid
(
stop
)
~
=
glyph_code
then
284
stop
=
getprev
(
stop
)
285
end
286
head
=
handler
(
head
,
font
,
dynamic
,
rlmode
,
start
,
stop
,
text
,
leading
,
trailing
)
-- handler can adapt text
287
size
=
0
288
text
=
{
}
289
start
=
nil
290
end
291 292
while
current
do
293
local
char
,
id
=
ischar
(
current
,
font
)
294
if
char
then
295
if
not
start
then
296
start
=
current
297
startrlmode
=
rlmode
298
end
299
local
char
=
getchar
(
current
)
300
size
=
size
+
1
301
text
[
size
]
=
char
302
current
=
getnext
(
current
)
303
elseif
char
=
=
false
then
304
-- so a mixed font
305
if
start
and
size
>
0
then
306
handle
(
startspacing
,
false
)
307
end
308
startspacing
=
false
309
current
=
getnext
(
current
)
310
elseif
id
=
=
glue_code
then
311
-- making this branch optional i.e. always use the else doesn't really
312
-- make a difference in performance (in hb) .. tricky anyway as we can
313
local
width
=
getwidth
(
current
)
314
if
width
>
0
then
315
if
start
and
size
>
0
then
316
handle
(
startspacing
,
true
)
317
end
318
startspacing
=
true
319
stopspacing
=
false
320
else
321
if
start
and
size
>
0
then
322
head
=
handle
(
startspacing
)
323
end
324
startspacing
=
false
325
stopspacing
=
false
326
end
327
current
=
getnext
(
current
)
328
elseif
id
=
=
disc_code
and
usesfont
(
current
,
font
)
then
-- foo|-|bar : has hbox
329
-- This looks much like the original code but I don't see a need to optimize
330
-- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see
331
-- this branch anyway and if there are, we should just deal with them.
332
--
333
-- There is still some weird code here ... start/stop and such. When I'm in
334
-- the mood (or see a need) I'll rewrite this bit.
335 336
-- bug: disc in last word moves to end (in practice not an issue as one
337
-- doesn't want a break there)
338 339
local
pre
=
nil
340
local
post
=
nil
341
local
currentnext
=
getnext
(
current
)
342
local
current_pre
,
current_post
,
current_replace
=
getdisc
(
current
)
343
setdisc
(
current
)
-- why, we set it later
344
if
start
then
345
pre
=
copy_node_list
(
start
,
current
)
346
stop
=
getprev
(
current
)
347
-- why also current and not:
348
-- pre = copy_node_list(start,stop)
349
if
start
=
=
head
then
350
head
=
current
351
end
352
setlink
(
getprev
(
start
)
,
current
)
353
setlink
(
stop
,
current_pre
)
354
current_pre
=
start
355
setprev
(
current_pre
)
356
start
=
nil
357
stop
=
nil
358
startrlmode
=
rlmode
359
end
360
while
currentnext
do
361
local
char
,
id
=
ischar
(
currentnext
,
font
)
362
if
char
or
id
=
=
disc_code
then
363
stop
=
currentnext
364
currentnext
=
getnext
(
currentnext
)
365
elseif
id
=
=
glue_code
then
366
local
width
=
getwidth
(
currentnext
)
367
if
width
and
width
>
0
then
368
stopspacing
=
true
369
else
370
stopspacing
=
false
371
end
372
break
373
else
374
break
375
end
376
end
377
if
stop
then
378
local
currentnext
=
getnext
(
current
)
379
local
stopnext
=
getnext
(
stop
)
380
post
=
copy_node_list
(
currentnext
,
stopnext
)
381
if
current_post
then
382
setlink
(
find_node_tail
(
current_post
)
,
currentnext
)
383
else
384
setprev
(
currentnext
)
385
current_post
=
currentnext
386
end
387
setlink
(
current
,
stopnext
)
388
setnext
(
stop
)
389
stop
=
nil
390
end
391
if
pre
then
392
setlink
(
find_node_tail
(
pre
)
,
current_replace
)
393
current_replace
=
pre
394
pre
=
nil
395
end
396
if
post
then
397
if
current_replace
then
398
setlink
(
find_node_tail
(
current_replace
)
,
post
)
399
else
400
current_replace
=
post
401
end
402
post
=
nil
403
end
404
size
=
0
-- hm, ok, start is also nil now
405
text
=
{
}
406
if
current_pre
then
407
current_pre
=
texthandler
(
current_pre
,
font
,
dynamic
,
rlmode
,
handler
,
startspacing
,
false
,
"
pre
"
)
408
end
409
if
current_post
then
410
current_post
=
texthandler
(
current_post
,
font
,
dynamic
,
rlmode
,
handler
,
false
,
stopspacing
,
"
post
"
)
411
end
412
if
current_replace
then
413
current_replace
=
texthandler
(
current_replace
,
font
,
dynamic
,
rlmode
,
handler
,
startspacing
,
stopspacing
,
"
replace
"
)
414
end
415
startspacing
=
false
416
stopspacing
=
false
417
local
cpost
=
current_post
and
find_node_tail
(
current_post
)
418
local
creplace
=
current_replace
and
find_node_tail
(
current_replace
)
419
local
cpostnew
=
nil
420
local
creplacenew
=
nil
421
local
newcurrent
=
nil
422
while
cpost
and
equalnode
(
cpost
,
creplace
)
do
423
cpostnew
=
cpost
424
creplacenew
=
creplace
425
if
creplace
then
426
creplace
=
getprev
(
creplace
)
427
end
428
cpost
=
getprev
(
cpost
)
429
end
430
if
cpostnew
then
431
if
cpostnew
=
=
current_post
then
432
current_post
=
nil
433
else
434
setnext
(
getprev
(
cpostnew
)
)
435
end
436
flushlist
(
cpostnew
)
437
if
creplacenew
=
=
current_replace
then
438
current_replace
=
nil
439
else
440
setnext
(
getprev
(
creplacenew
)
)
441
end
442
local
c
=
getnext
(
current
)
443
setlink
(
current
,
creplacenew
)
444
local
creplacenewtail
=
find_node_tail
(
creplacenew
)
445
setlink
(
creplacenewtail
,
c
)
446
newcurrent
=
creplacenewtail
447
end
448
current_post
=
current_post
and
deletedisc
(
current_post
)
449
current_replace
=
current_replace
and
deletedisc
(
current_replace
)
450
local
cpre
=
current_pre
451
local
creplace
=
current_replace
452
local
cprenew
=
nil
453
local
creplacenew
=
nil
454
while
cpre
and
equalnode
(
cpre
,
creplace
)
do
455
cprenew
=
cpre
456
creplacenew
=
creplace
457
if
creplace
then
458
creplace
=
getnext
(
creplace
)
459
end
460
cpre
=
getnext
(
cpre
)
461
end
462
if
cprenew
then
463
cpre
=
current_pre
464
current_pre
=
getnext
(
cprenew
)
465
if
current_pre
then
466
setprev
(
current_pre
)
467
end
468
setnext
(
cprenew
)
469
flushlist
(
cpre
)
470
creplace
=
current_replace
471
current_replace
=
getnext
(
creplacenew
)
472
if
current_replace
then
473
setprev
(
current_replace
)
474
end
475
setlink
(
getprev
(
current
)
,
creplace
)
476
if
current
=
=
head
then
477
head
=
creplace
478
end
479
setlink
(
creplacenew
,
current
)
480
end
481
setdisc
(
current
,
current_pre
,
current_post
,
current_replace
)
482
current
=
currentnext
483
else
484
if
start
and
size
>
0
then
485
handle
(
startspacing
,
stopspacing
)
486
end
487
startspacing
=
false
488
stopspacing
=
false
489
if
id
=
=
math_code
then
490
current
=
getnext
(
endofmath
(
current
)
)
491
elseif
id
=
=
dir_code
then
492
startspacing
=
false
493
topstack
,
rlmode
=
txtdirstate
(
current
,
dirstack
,
topstack
,
rlparmode
)
494
current
=
getnext
(
current
)
495
-- elseif id == par_code and startofpar(current) then
496
-- startspacing = false
497
-- rlparmode, rlmode = pardirstate(current)
498
-- current = getnext(current)
499
else
500
current
=
getnext
(
current
)
501
end
502
end
503
end
504
if
start
and
size
>
0
then
505
handle
(
startspacing
,
stopspacing
)
506
end
507
return
head
,
true
508
end
509 510
function
fonts
.
handlers
.
otf
.
texthandler
(
head
,
font
,
dynamic
,
direction
,
action
)
511
if
action
then
512
return
texthandler
(
head
,
font
,
dynamic
,
direction
=
=
righttoleft_code
and
-1
or
0
,
action
)
513
else
514
return
head
,
false
515
end
516
end
517 518
-- Next comes a tracer plug into context.
519 520
----- texthandler = fonts.handlers.otf.texthandler
521
local
report_text
=
logs
.
reporter
(
"
otf plugin
"
,
"
text
"
)
522
local
nofruns
=
0
523
local
nofsnippets
=
0
524
local
f_unicode
=
string
.
formatters
[
"
%U
"
]
525 526
local
function
showtext
(
head
,
font
,
dynamic
,
rlmode
,
start
,
stop
,
list
,
before
,
after
)
527
if
list
then
528
nofsnippets
=
nofsnippets
+
1
529
local
plus
=
{
}
530
for
i
=
1
,
#
list
do
531
local
u
=
list
[
i
]
532
list
[
i
]
=
utfchar
(
u
)
533
plus
[
i
]
=
f_unicode
(
u
)
534
end
535
report_text
(
"
%03i : [%s] %t [%s]-> % t
"
,
nofsnippets
,
before
and
"
+
"
or
"
-
"
,
list
,
after
and
"
+
"
or
"
-
"
,
plus
)
536
else
537
report_text
(
)
538
report_text
(
"
invalid list
"
)
539
report_text
(
)
540
end
541
return
head
,
false
542
end
543 544
fonts
.
handlers
.
otf
.
registerplugin
(
"
text
"
,
function
(
head
,
font
,
dynamic
,
direction
)
545
nofruns
=
nofruns
+
1
546
nofsnippets
=
0
547
report_text
(
"
start run %i
"
,
nofruns
)
548
local
h
,
d
=
texthandler
(
head
,
font
,
dynamic
,
direction
,
showtext
)
549
report_text
(
"
stop run %i
"
,
nofruns
)
550
return
h
,
d
551
end
)
552 553
end
554