typo-duc.lua /size: 37 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
typo-duc
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to typo-dir.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
,
7
comment
=
"
Unicode bidi (sort of) variant c
"
,
8
}
9 10
-- This is a follow up on typo-uda which itself is a follow up on t-bidi by Khaled Hosny which
11
-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
12
-- an update on some recent unicode bidi developments. There is (and will) also be more control
13
-- added. As a consequence this module is somewhat slower than its precursor which itself is
14
-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
15
-- support. However, in the meantime performance got a bit better and this third variant is again
16
-- some 10% faster than the second variant.
17 18
-- todo (cf html):
19
--
20
-- normal The element does not offer a additional level of embedding with respect to the bidirectional algorithm. For inline elements implicit reordering works across element boundaries.
21
-- embed If the element is inline, this value opens an additional level of embedding with respect to the bidirectional algorithm. The direction of this embedding level is given by the direction property.
22
-- bidi-override For inline elements this creates an override. For block container elements this creates an override for inline-level descendants not within another block container element. This means that inside the element, reordering is strictly in sequence according to the direction property; the implicit part of the bidirectional algorithm is ignored.
23
-- isolate This keyword indicates that the element's container directionality should be calculated without considering the content of this element. The element is therefore isolated from its siblings. When applying its bidirectional-resolution algorithm, its container element treats it as one or several U+FFFC Object Replacement Character, i.e. like an image.
24
-- isolate-override This keyword applies the isolation behavior of the isolate keyword to the surrounding content and the override behavior o f the bidi-override keyword to the inner content.
25
-- plaintext This keyword makes the elements directionality calculated without considering its parent bidirectional state or the value of the direction property. The directionality is calculated using the P2 and P3 rules of the Unicode Bidirectional Algorithm.
26
-- This value allows to display data which has already formatted using a tool following the Unicode Bidirectional Algorithm.
27
--
28
-- todo: check for introduced errors
29
-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there)
30
-- todo: reuse the stack
31
-- todo: no need for a max check
32
-- todo: collapse bound similar ranges (not ok yet)
33
-- todo: combine some sweeps
34
-- todo: removing is not needed when we inject at the same spot (only chnage the dir property)
35
-- todo: isolated runs (isolating runs are similar to bidi=local in the basic analyzer)
36 37
-- todo: check unicode addenda (from the draft):
38
--
39
-- Added support for canonical equivalents in BD16.
40
-- Changed logic in N0 to not check forwards for context in the case of enclosed text opposite the embedding direction.
41
-- Major extension of the algorithm to allow for the implementation of directional isolates and the introduction of new isolate-related values to the Bidi_Class property.
42
-- Adds BD8, BD9, BD10, BD11, BD12, BD13, BD14, BD15, and BD16, Sections 2.4 and 2.5, and Rules X5a, X5b, X5c and X6a.
43
-- Extensively revises Section 3.3.2, Explicit Levels and Directions and its existing X rules to formalize the algorithm for matching a PDF with the embedding or override initiator whose scope it terminates.
44
-- Moves Rules X9 and X10 into a separate new Section 3.3.3, Preparations for Implicit Processing.
45
-- Modifies Rule X10 to make the isolating run sequence the unit to which subsequent rules are applied.
46
-- Modifies Rule W1 to change an NSM preceded by an isolate initiator or PDI into ON.
47
-- Adds Rule N0 and makes other changes to Section 3.3.5, Resolving Neutral and Isolate Formatting Types to resolve bracket pairs to the same level.
48 49
local
insert
,
remove
,
unpack
,
concat
=
table
.
insert
,
table
.
remove
,
table
.
unpack
,
table
.
concat
50
local
utfchar
=
utf
.
char
51
local
setmetatable
=
setmetatable
52
local
formatters
=
string
.
formatters
53 54
local
directiondata
=
characters
.
directions
55
local
mirrordata
=
characters
.
mirrors
56
local
textclassdata
=
characters
.
textclasses
57 58
local
nuts
=
nodes
.
nuts
59 60
local
getnext
=
nuts
.
getnext
61
local
getid
=
nuts
.
getid
62
local
getsubtype
=
nuts
.
getsubtype
63
local
getlist
=
nuts
.
getlist
64
local
getchar
=
nuts
.
getchar
65
local
getattr
=
nuts
.
getattr
66
local
getprop
=
nuts
.
getprop
67
local
getdirection
=
nuts
.
getdirection
68
local
isglyph
=
nuts
.
isglyph
69 70
local
setprop
=
nuts
.
setprop
71
local
setchar
=
nuts
.
setchar
72
local
setdirection
=
nuts
.
setdirection
73
local
setattrlist
=
nuts
.
setattrlist
74 75
local
properties
=
nodes
.
properties
.
data
76 77
local
remove_node
=
nuts
.
remove
78
local
insert_node_after
=
nuts
.
insert_after
79
local
insert_node_before
=
nuts
.
insert_before
80
local
start_of_par
=
nuts
.
start_of_par
81 82
local
nodepool
=
nuts
.
pool
83
local
new_direction
=
nodepool
.
direction
84 85
local
nodecodes
=
nodes
.
nodecodes
86
local
gluecodes
=
nodes
.
gluecodes
87 88
local
glyph_code
=
nodecodes
.
glyph
89
local
glue_code
=
nodecodes
.
glue
90
local
hlist_code
=
nodecodes
.
hlist
91
local
vlist_code
=
nodecodes
.
vlist
92
local
math_code
=
nodecodes
.
math
93
local
dir_code
=
nodecodes
.
dir
94
local
localpar_code
=
nodecodes
.
localpar
95 96
local
parfillskip_code
=
gluecodes
.
parfillskip
97 98
local
dirvalues
=
nodes
.
dirvalues
99
local
lefttoright_code
=
dirvalues
.
lefttoright
100
local
righttoleft_code
=
dirvalues
.
righttoleft
101 102
local
maximum_stack
=
0xFF
103 104
local
a_directions
=
attributes
.
private
(
'
directions
'
)
105 106
local
directions
=
typesetters
.
directions
107
local
setcolor
=
directions
.
setcolor
108
local
getfences
=
directions
.
getfences
109 110
local
remove_controls
=
true
directives
.
register
(
"
typesetters.directions.removecontrols
"
,
function
(
v
)
remove_controls
=
v
end
)
111
----- analyze_fences = true directives.register("typesetters.directions.analyzefences", function(v) analyze_fences = v end)
112 113
local
report_directions
=
logs
.
reporter
(
"
typesetting
"
,
"
directions three
"
)
114 115
local
trace_directions
=
false
trackers
.
register
(
"
typesetters.directions
"
,
function
(
v
)
trace_directions
=
v
end
)
116
local
trace_details
=
false
trackers
.
register
(
"
typesetters.directions.details
"
,
function
(
v
)
trace_details
=
v
end
)
117
local
trace_list
=
false
trackers
.
register
(
"
typesetters.directions.list
"
,
function
(
v
)
trace_list
=
v
end
)
118 119
-- strong (old):
120
--
121
-- l : left to right
122
-- r : right to left
123
-- lro : left to right override
124
-- rlo : left to left override
125
-- lre : left to right embedding
126
-- rle : left to left embedding
127
-- al : right to legt arabic (esp punctuation issues)
128
--
129
-- weak:
130
--
131
-- en : english number
132
-- es : english number separator
133
-- et : english number terminator
134
-- an : arabic number
135
-- cs : common number separator
136
-- nsm : nonspacing mark
137
-- bn : boundary neutral
138
--
139
-- neutral:
140
--
141
-- b : paragraph separator
142
-- s : segment separator
143
-- ws : whitespace
144
-- on : other neutrals
145
--
146
-- interesting: this is indeed better (and more what we expect i.e. we already use this split
147
-- in the old original (also these isolates)
148
--
149
-- strong (new):
150
--
151
-- l : left to right
152
-- r : right to left
153
-- al : right to left arabic (esp punctuation issues)
154
--
155
-- explicit: (new)
156
--
157
-- lro : left to right override
158
-- rlo : left to left override
159
-- lre : left to right embedding
160
-- rle : left to left embedding
161
-- pdf : pop dir format
162
-- lri : left to right isolate
163
-- rli : left to left isolate
164
-- fsi : first string isolate
165
-- pdi : pop directional isolate
166 167
local
whitespace
=
{
168
lre
=
true
,
169
rle
=
true
,
170
lro
=
true
,
171
rlo
=
true
,
172
pdf
=
true
,
173
bn
=
true
,
174
ws
=
true
,
175
}
176 177
local
b_s_ws_on
=
{
178
b
=
true
,
179
s
=
true
,
180
ws
=
true
,
181
on
=
true
182
}
183 184
-- tracing
185 186
local
function
show_list
(
list
,
size
,
what
)
187
local
what
=
what
or
"
direction
"
188
local
joiner
=
utfchar
(
0x200C
)
189
local
result
=
{
}
190
for
i
=
1
,
size
do
191
local
entry
=
list
[
i
]
192
local
character
=
entry
.
char
193
local
direction
=
entry
[
what
]
194
if
character
=
=
0xFFFC
then
195
local
first
=
entry
.
id
196
local
last
=
entry
.
last
197
local
skip
=
entry
.
skip
198
if
last
then
199
result
[
i
]
=
formatters
[
"
%-3s:%s %s..%s (%i)
"
]
(
direction
,
joiner
,
nodecodes
[
first
]
,
nodecodes
[
last
]
,
skip
or
0
)
200
else
201
result
[
i
]
=
formatters
[
"
%-3s:%s %s (%i)
"
]
(
direction
,
joiner
,
nodecodes
[
first
]
,
skip
or
0
)
202
end
203
elseif
character
>
=
0x202A
and
character
<
=
0x202C
then
204
result
[
i
]
=
formatters
[
"
%-3s:%s %U
"
]
(
direction
,
joiner
,
character
)
205
else
206
result
[
i
]
=
formatters
[
"
%-3s:%s %c %U
"
]
(
direction
,
joiner
,
character
,
character
)
207
end
208
end
209
return
concat
(
result
,
joiner
.
.
"
|
"
.
.
joiner
)
210
end
211 212
-- preparation
213 214
local
function
show_done
(
list
,
size
)
215
local
joiner
=
utfchar
(
0x200C
)
216
local
result
=
{
}
217
local
format
=
formatters
[
"
<%s>
"
]
218
for
i
=
1
,
size
do
219
local
entry
=
list
[
i
]
220
local
character
=
entry
.
char
221
local
begindir
=
entry
.
begindir
222
local
enddir
=
entry
.
enddir
223
if
begindir
then
224
result
[
#
result
+
1
]
=
format
(
begindir
)
225
end
226
if
entry
.
remove
then
227
-- continue
228
elseif
character
=
=
0xFFFC
then
229
result
[
#
result
+
1
]
=
format
(
"
?
"
)
230
elseif
character
=
=
0x0020
then
231
result
[
#
result
+
1
]
=
format
(
"
"
)
232
elseif
character
>
=
0x202A
and
character
<
=
0x202C
then
233
result
[
#
result
+
1
]
=
format
(
entry
.
original
)
234
else
235
result
[
#
result
+
1
]
=
utfchar
(
character
)
236
end
237
if
enddir
then
238
result
[
#
result
+
1
]
=
format
(
enddir
)
239
end
240
end
241
return
concat
(
result
,
joiner
)
242
end
243 244
-- keeping the list and overwriting doesn't save much runtime, only a few percent
245
-- char is only used for mirror, so in fact we can as well only store it for
246
-- glyphs only
247
--
248
-- tracking what direction is used and skipping tests is not faster (extra kind of
249
-- compensates gain)
250 251
local
mt_space
=
{
__index
=
{
char
=
0x0020
,
direction
=
"
ws
"
,
original
=
"
ws
"
,
level
=
0
,
skip
=
0
}
}
252
local
mt_lre
=
{
__index
=
{
char
=
0x202A
,
direction
=
"
lre
"
,
original
=
"
lre
"
,
level
=
0
,
skip
=
0
}
}
253
local
mt_rle
=
{
__index
=
{
char
=
0x202B
,
direction
=
"
rle
"
,
original
=
"
rle
"
,
level
=
0
,
skip
=
0
}
}
254
local
mt_pdf
=
{
__index
=
{
char
=
0x202C
,
direction
=
"
pdf
"
,
original
=
"
pdf
"
,
level
=
0
,
skip
=
0
}
}
255
local
mt_object
=
{
__index
=
{
char
=
0xFFFC
,
direction
=
"
on
"
,
original
=
"
on
"
,
level
=
0
,
skip
=
0
}
}
256 257
local
stack
=
table
.
setmetatableindex
(
"
table
"
)
-- shared
258
local
list
=
{
}
-- shared
259 260
local
function
build_list
(
head
,
where
)
261
-- P1
262
local
current
=
head
263
local
size
=
0
264
while
current
do
265
size
=
size
+
1
266
local
id
=
getid
(
current
)
267
local
p
=
properties
[
current
]
268
if
p
and
p
.
directions
then
269
-- tricky as dirs can be injected in between
270
local
skip
=
0
271
local
last
=
id
272
current
=
getnext
(
current
)
273
while
current
do
274
local
id
=
getid
(
current
)
275
local
p
=
properties
[
current
]
276
if
p
and
p
.
directions
then
277
skip
=
skip
+
1
278
last
=
id
279
current
=
getnext
(
current
)
280
else
281
break
282
end
283
end
284
if
id
=
=
last
then
-- the start id
285
list
[
size
]
=
setmetatable
(
{
skip
=
skip
,
id
=
id
}
,
mt_object
)
286
else
287
list
[
size
]
=
setmetatable
(
{
skip
=
skip
,
id
=
id
,
last
=
last
}
,
mt_object
)
288
end
289
elseif
id
=
=
glyph_code
then
290
local
chr
=
getchar
(
current
)
291
local
dir
=
directiondata
[
chr
]
292
-- could also be a metatable
293
list
[
size
]
=
{
char
=
chr
,
direction
=
dir
,
original
=
dir
,
level
=
0
}
294
current
=
getnext
(
current
)
295
-- if not list[dir] then list[dir] = true end -- not faster when we check for usage
296
elseif
id
=
=
glue_code
then
-- and how about kern
297
list
[
size
]
=
setmetatable
(
{
}
,
mt_space
)
298
current
=
getnext
(
current
)
299
elseif
id
=
=
dir_code
then
300
local
dir
,
pop
=
getdirection
(
current
)
301
if
dir
=
=
lefttoright_code
then
302
list
[
size
]
=
setmetatable
(
{
}
,
pop
and
mt_pdf
or
mt_lre
)
303
elseif
dir
=
=
righttoleft_code
then
304
list
[
size
]
=
setmetatable
(
{
}
,
pop
and
mt_pdf
or
mt_rle
)
305
else
306
list
[
size
]
=
setmetatable
(
{
id
=
id
}
,
mt_object
)
307
end
308
current
=
getnext
(
current
)
309
elseif
id
=
=
math_code
then
310
local
skip
=
0
311
current
=
getnext
(
current
)
312
while
getid
(
current
)
~
=
math_code
do
313
skip
=
skip
+
1
314
current
=
getnext
(
current
)
315
end
316
skip
=
skip
+
1
317
current
=
getnext
(
current
)
318
list
[
size
]
=
setmetatable
(
{
id
=
id
,
skip
=
skip
}
,
mt_object
)
319
else
-- disc_code: we assume that these are the same as the surrounding
320
local
skip
=
0
321
local
last
=
id
322
current
=
getnext
(
current
)
323
while
n
do
324
local
id
=
getid
(
current
)
325
if
id
~
=
glyph_code
and
id
~
=
glue_code
and
id
~
=
dir_code
then
326
skip
=
skip
+
1
327
last
=
id
328
current
=
getnext
(
current
)
329
else
330
break
331
end
332
end
333
if
id
=
=
last
then
-- the start id
334
list
[
size
]
=
setmetatable
(
{
id
=
id
,
skip
=
skip
}
,
mt_object
)
335
else
336
list
[
size
]
=
setmetatable
(
{
id
=
id
,
skip
=
skip
,
last
=
last
}
,
mt_object
)
337
end
338
end
339
end
340
return
list
,
size
341
end
342 343
-- new
344 345
-- we could support ( ] and [ ) and such ...
346 347
-- ש ) ל ( א 0-0
348
-- ש ( ל ] א 0-0
349
-- ש ( ל ) א 2-4
350
-- ש ( ל [ א ) כ ] 2-6
351
-- ש ( ל ] א ) כ 2-6
352
-- ש ( ל ) א ) כ 2-4
353
-- ש ( ל ( א ) כ 4-6
354
-- ש ( ל ( א ) כ ) 2-8,4-6
355
-- ש ( ל [ א ] כ ) 2-8,4-6
356 357
local
fencestack
=
table
.
setmetatableindex
(
"
table
"
)
358 359
local
function
resolve_fences
(
list
,
size
,
start
,
limit
)
360
-- N0: funny effects, not always better, so it's an option
361
local
nofstack
=
0
362
for
i
=
start
,
limit
do
363
local
entry
=
list
[
i
]
364
if
entry
.
direction
=
=
"
on
"
then
365
local
char
=
entry
.
char
366
local
mirror
=
mirrordata
[
char
]
367
if
mirror
then
368
local
class
=
textclassdata
[
char
]
369
entry
.
mirror
=
mirror
370
entry
.
class
=
class
371
if
class
=
=
"
open
"
then
372
nofstack
=
nofstack
+
1
373
local
stacktop
=
fencestack
[
nofstack
]
374
stacktop
[
1
]
=
mirror
375
stacktop
[
2
]
=
i
376
elseif
nofstack
=
=
0
then
377
-- skip
378
elseif
class
=
=
"
close
"
then
379
while
nofstack
>
0
do
380
local
stacktop
=
fencestack
[
nofstack
]
381
if
stacktop
[
1
]
=
=
char
then
382
local
open
=
stacktop
[
2
]
383
local
close
=
i
384
list
[
open
]
.
paired
=
close
385
list
[
close
]
.
paired
=
open
386
break
387
else
388
-- do we mirror or not
389
end
390
nofstack
=
nofstack
-
1
391
end
392
end
393
end
394
end
395
end
396
end
397 398
-- local function test_fences(str)
399
-- local list = { }
400
-- for s in string.gmatch(str,".") do
401
-- local b = utf.byte(s)
402
-- list[#list+1] = { c = s, char = b, direction = directiondata[b] }
403
-- end
404
-- resolve_fences(list,#list,1,#size)
405
-- inspect(list)
406
-- end
407
--
408
-- test_fences("a(b)c(d)e(f(g)h)i")
409
-- test_fences("a(b[c)d]")
410 411
-- the action
412 413
local
function
get_baselevel
(
head
,
list
,
size
,
direction
)
414
if
direction
=
=
lefttoright_code
or
direction
=
=
righttoleft_code
then
415
return
direction
,
true
416
elseif
getid
(
head
)
=
=
localpar_code
and
start_of_par
(
head
)
then
417
direction
=
getdirection
(
head
)
418
if
direction
=
=
lefttoright_code
or
direction
=
=
righttoleft_code
then
419
return
direction
,
true
420
end
421
end
422
-- for old times sake we we handle strings too
423
if
direction
=
=
"
TLT
"
then
424
return
lefttoright_code
,
true
425
elseif
direction
=
=
"
TRT
"
then
426
return
righttoleft_code
,
true
427
end
428
-- P2, P3
429
for
i
=
1
,
size
do
430
local
entry
=
list
[
i
]
431
local
direction
=
entry
.
direction
432
if
direction
=
=
"
r
"
or
direction
=
=
"
al
"
then
-- and an ?
433
return
righttoleft_code
,
true
434
elseif
direction
=
=
"
l
"
then
435
return
lefttoright_code
,
true
436
end
437
end
438
return
lefttoright_code
,
false
439
end
440 441
local
function
resolve_explicit
(
list
,
size
,
baselevel
)
442
-- if list.rle or list.lre or list.rlo or list.lro then
443
-- X1
444
local
level
=
baselevel
445
local
override
=
"
on
"
446
local
nofstack
=
0
447
for
i
=
1
,
size
do
448
local
entry
=
list
[
i
]
449
local
direction
=
entry
.
direction
450
-- X2
451
if
direction
=
=
"
rle
"
then
452
if
nofstack
<
maximum_stack
then
453
nofstack
=
nofstack
+
1
454
local
stacktop
=
stack
[
nofstack
]
455
stacktop
[
1
]
=
level
456
stacktop
[
2
]
=
override
457
level
=
level
+
(
level
%
2
=
=
1
and
2
or
1
)
-- least_greater_odd(level)
458
override
=
"
on
"
459
entry
.
level
=
level
460
entry
.
direction
=
"
bn
"
461
entry
.
remove
=
true
462
elseif
trace_directions
then
463
report_directions
(
"
stack overflow at position %a with direction %a
"
,
i
,
direction
)
464
end
465
-- X3
466
elseif
direction
=
=
"
lre
"
then
467
if
nofstack
<
maximum_stack
then
468
nofstack
=
nofstack
+
1
469
local
stacktop
=
stack
[
nofstack
]
470
stacktop
[
1
]
=
level
471
stacktop
[
2
]
=
override
472
level
=
level
+
(
level
%
2
=
=
1
and
1
or
2
)
-- least_greater_even(level)
473
override
=
"
on
"
474
entry
.
level
=
level
475
entry
.
direction
=
"
bn
"
476
entry
.
remove
=
true
477
elseif
trace_directions
then
478
report_directions
(
"
stack overflow at position %a with direction %a
"
,
i
,
direction
)
479
end
480
-- X4
481
elseif
direction
=
=
"
rlo
"
then
482
if
nofstack
<
maximum_stack
then
483
nofstack
=
nofstack
+
1
484
local
stacktop
=
stack
[
nofstack
]
485
stacktop
[
1
]
=
level
486
stacktop
[
2
]
=
override
487
level
=
level
+
(
level
%
2
=
=
1
and
2
or
1
)
-- least_greater_odd(level)
488
override
=
"
r
"
489
entry
.
level
=
level
490
entry
.
direction
=
"
bn
"
491
entry
.
remove
=
true
492
elseif
trace_directions
then
493
report_directions
(
"
stack overflow at position %a with direction %a
"
,
i
,
direction
)
494
end
495
-- X5
496
elseif
direction
=
=
"
lro
"
then
497
if
nofstack
<
maximum_stack
then
498
nofstack
=
nofstack
+
1
499
local
stacktop
=
stack
[
nofstack
]
500
stacktop
[
1
]
=
level
501
stacktop
[
2
]
=
override
502
level
=
level
+
(
level
%
2
=
=
1
and
1
or
2
)
-- least_greater_even(level)
503
override
=
"
l
"
504
entry
.
level
=
level
505
entry
.
direction
=
"
bn
"
506
entry
.
remove
=
true
507
elseif
trace_directions
then
508
report_directions
(
"
stack overflow at position %a with direction %a
"
,
i
,
direction
)
509
end
510
-- X7
511
elseif
direction
=
=
"
pdf
"
then
512
if
nofstack
>
0
then
513
local
stacktop
=
stack
[
nofstack
]
514
level
=
stacktop
[
1
]
515
override
=
stacktop
[
2
]
516
nofstack
=
nofstack
-
1
517
entry
.
level
=
level
518
entry
.
direction
=
"
bn
"
519
entry
.
remove
=
true
520
elseif
trace_directions
then
521
report_directions
(
"
stack underflow at position %a with direction %a
"
,
522
i
,
direction
)
523
else
524
report_directions
(
"
stack underflow at position %a with direction %a: %s
"
,
525
i
,
direction
,
show_list
(
list
,
size
)
)
526
end
527
-- X6
528
else
529
entry
.
level
=
level
530
if
override
~
=
"
on
"
then
531
entry
.
direction
=
override
532
end
533
end
534
end
535
-- X8 (reset states and overrides after paragraph)
536
end
537 538
local
function
resolve_weak
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
539
-- W1: non spacing marks get the direction of the previous character
540
-- if list.nsm then
541
for
i
=
start
,
limit
do
542
local
entry
=
list
[
i
]
543
if
entry
.
direction
=
=
"
nsm
"
then
544
if
i
=
=
start
then
545
entry
.
direction
=
orderbefore
546
else
547
entry
.
direction
=
list
[
i
-1
]
.
direction
548
end
549
end
550
end
551
-- end
552
-- W2: mess with numbers and arabic
553
-- if list.en then
554
for
i
=
start
,
limit
do
555
local
entry
=
list
[
i
]
556
if
entry
.
direction
=
=
"
en
"
then
557
for
j
=
i
-1
,
start
,
-1
do
558
local
prev
=
list
[
j
]
559
local
direction
=
prev
.
direction
560
if
direction
=
=
"
al
"
then
561
entry
.
direction
=
"
an
"
562
break
563
elseif
direction
=
=
"
r
"
or
direction
=
=
"
l
"
then
564
break
565
end
566
end
567
end
568
end
569
-- end
570
-- W3
571
-- if list.al then
572
for
i
=
start
,
limit
do
573
local
entry
=
list
[
i
]
574
if
entry
.
direction
=
=
"
al
"
then
575
entry
.
direction
=
"
r
"
576
end
577
end
578
-- end
579
-- W4: make separators number
580
-- if list.es or list.cs then
581
-- skip
582
if
false
then
583
for
i
=
start
+
1
,
limit
-1
do
584
local
entry
=
list
[
i
]
585
local
direction
=
entry
.
direction
586
if
direction
=
=
"
es
"
then
587
if
list
[
i
-1
]
.
direction
=
=
"
en
"
and
list
[
i
+
1
]
.
direction
=
=
"
en
"
then
588
entry
.
direction
=
"
en
"
589
end
590
elseif
direction
=
=
"
cs
"
then
591
local
prevdirection
=
list
[
i
-1
]
.
direction
592
if
prevdirection
=
=
"
en
"
then
593
if
list
[
i
+
1
]
.
direction
=
=
"
en
"
then
594
entry
.
direction
=
"
en
"
595
end
596
elseif
prevdirection
=
=
"
an
"
and
list
[
i
+
1
]
.
direction
=
=
"
an
"
then
597
entry
.
direction
=
"
an
"
598
end
599
end
600
end
601
else
-- only more efficient when we have es/cs
602
local
runner
=
start
+
2
603
if
runner
<
=
limit
then
604
local
before
=
list
[
start
]
605
local
entry
=
list
[
start
+
1
]
606
local
after
=
list
[
runner
]
607
while
after
do
608
local
direction
=
entry
.
direction
609
if
direction
=
=
"
es
"
then
610
if
before
.
direction
=
=
"
en
"
and
after
.
direction
=
=
"
en
"
then
611
entry
.
direction
=
"
en
"
612
end
613
elseif
direction
=
=
"
cs
"
then
614
local
prevdirection
=
before
.
direction
615
if
prevdirection
=
=
"
en
"
then
616
if
after
.
direction
=
=
"
en
"
then
617
entry
.
direction
=
"
en
"
618
end
619
elseif
prevdirection
=
=
"
an
"
and
after
.
direction
=
=
"
an
"
then
620
entry
.
direction
=
"
an
"
621
end
622
end
623
before
=
current
624
current
=
after
625
after
=
list
[
runner
]
626
runner
=
runner
+
1
627
end
628
end
629
end
630
-- end
631
-- W5
632
-- if list.et then
633
local
i
=
start
634
while
i
<
=
limit
do
635
if
list
[
i
]
.
direction
=
=
"
et
"
then
636
local
runstart
=
i
637
local
runlimit
=
runstart
638
for
i
=
runstart
,
limit
do
639
if
list
[
i
]
.
direction
=
=
"
et
"
then
640
runlimit
=
i
641
else
642
break
643
end
644
end
645
local
rundirection
=
runstart
=
=
start
and
sor
or
list
[
runstart
-1
]
.
direction
646
if
rundirection
~
=
"
en
"
then
647
rundirection
=
runlimit
=
=
limit
and
orderafter
or
list
[
runlimit
+
1
]
.
direction
648
end
649
if
rundirection
=
=
"
en
"
then
650
for
j
=
runstart
,
runlimit
do
651
list
[
j
]
.
direction
=
"
en
"
652
end
653
end
654
i
=
runlimit
655
end
656
i
=
i
+
1
657
end
658
-- end
659
-- W6
660
-- if list.es or list.cs or list.et then
661
for
i
=
start
,
limit
do
662
local
entry
=
list
[
i
]
663
local
direction
=
entry
.
direction
664
if
direction
=
=
"
es
"
or
direction
=
=
"
et
"
or
direction
=
=
"
cs
"
then
665
entry
.
direction
=
"
on
"
666
end
667
end
668
-- end
669
-- W7
670
for
i
=
start
,
limit
do
671
local
entry
=
list
[
i
]
672
if
entry
.
direction
=
=
"
en
"
then
673
local
prev_strong
=
orderbefore
674
for
j
=
i
-1
,
start
,
-1
do
675
local
direction
=
list
[
j
]
.
direction
676
if
direction
=
=
"
l
"
or
direction
=
=
"
r
"
then
677
prev_strong
=
direction
678
break
679
end
680
end
681
if
prev_strong
=
=
"
l
"
then
682
entry
.
direction
=
"
l
"
683
end
684
end
685
end
686
end
687 688
local
function
resolve_neutral
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
689
-- N1, N2
690
for
i
=
start
,
limit
do
691
local
entry
=
list
[
i
]
692
if
b_s_ws_on
[
entry
.
direction
]
then
693
-- this needs checking
694
local
leading_direction
,
trailing_direction
,
resolved_direction
695
local
runstart
=
i
696
local
runlimit
=
runstart
697
-- for j=runstart,limit do
698
for
j
=
runstart
+
1
,
limit
do
699
if
b_s_ws_on
[
list
[
j
]
.
direction
]
then
700
-- runstart = j
701
runlimit
=
j
702
else
703
break
704
end
705
end
706
if
runstart
=
=
start
then
707
leading_direction
=
orderbefore
708
else
709
leading_direction
=
list
[
runstart
-1
]
.
direction
710
if
leading_direction
=
=
"
en
"
or
leading_direction
=
=
"
an
"
then
711
leading_direction
=
"
r
"
712
end
713
end
714
if
runlimit
=
=
limit
then
715
trailing_direction
=
orderafter
716
else
717
trailing_direction
=
list
[
runlimit
+
1
]
.
direction
718
if
trailing_direction
=
=
"
en
"
or
trailing_direction
=
=
"
an
"
then
719
trailing_direction
=
"
r
"
720
end
721
end
722
if
leading_direction
=
=
trailing_direction
then
723
-- N1
724
resolved_direction
=
leading_direction
725
else
726
-- N2 / does the weird period
727
resolved_direction
=
entry
.
level
%
2
=
=
1
and
"
r
"
or
"
l
"
728
end
729
for
j
=
runstart
,
runlimit
do
730
list
[
j
]
.
direction
=
resolved_direction
731
end
732
i
=
runlimit
733
end
734
i
=
i
+
1
735
end
736
end
737 738
local
function
resolve_implicit
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
,
baselevel
)
739
for
i
=
start
,
limit
do
740
local
entry
=
list
[
i
]
741
local
level
=
entry
.
level
742
local
direction
=
entry
.
direction
743
if
level
%
2
~
=
1
then
-- even
744
-- I1
745
if
direction
=
=
"
r
"
then
746
entry
.
level
=
level
+
1
747
elseif
direction
=
=
"
an
"
or
direction
=
=
"
en
"
then
748
entry
.
level
=
level
+
2
749
end
750
else
751
-- I2
752
if
direction
=
=
"
l
"
or
direction
=
=
"
en
"
or
direction
=
=
"
an
"
then
753
entry
.
level
=
level
+
1
754
end
755
end
756
end
757
end
758 759
local
function
resolve_levels
(
list
,
size
,
baselevel
,
analyze_fences
)
760
-- X10
761
local
start
=
1
762
while
start
<
size
do
763
local
level
=
list
[
start
]
.
level
764
local
limit
=
start
+
1
765
while
limit
<
size
and
list
[
limit
]
.
level
=
=
level
do
766
limit
=
limit
+
1
767
end
768
local
prev_level
=
start
=
=
1
and
baselevel
or
list
[
start
-1
]
.
level
769
local
next_level
=
limit
=
=
size
and
baselevel
or
list
[
limit
+
1
]
.
level
770
local
orderbefore
=
(
level
>
prev_level
and
level
or
prev_level
)
%
2
=
=
1
and
"
r
"
or
"
l
"
771
local
orderafter
=
(
level
>
next_level
and
level
or
next_level
)
%
2
=
=
1
and
"
r
"
or
"
l
"
772
-- W1 .. W7
773
resolve_weak
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
774
-- N0
775
if
analyze_fences
then
776
resolve_fences
(
list
,
size
,
start
,
limit
)
777
end
778
-- N1 .. N2
779
resolve_neutral
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
780
-- I1 .. I2
781
resolve_implicit
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
,
baselevel
)
782
start
=
limit
783
end
784
-- L1
785
for
i
=
1
,
size
do
786
local
entry
=
list
[
i
]
787
local
direction
=
entry
.
original
788
-- (1)
789
if
direction
=
=
"
s
"
or
direction
=
=
"
b
"
then
790
entry
.
level
=
baselevel
791
-- (2)
792
for
j
=
i
-1
,
1
,
-1
do
793
local
entry
=
list
[
j
]
794
if
whitespace
[
entry
.
original
]
then
795
entry
.
level
=
baselevel
796
else
797
break
798
end
799
end
800
end
801
end
802
-- (3)
803
for
i
=
size
,
1
,
-1
do
804
local
entry
=
list
[
i
]
805
if
whitespace
[
entry
.
original
]
then
806
entry
.
level
=
baselevel
807
else
808
break
809
end
810
end
811
-- L4
812
if
analyze_fences
then
813
for
i
=
1
,
size
do
814
local
entry
=
list
[
i
]
815
if
entry
.
level
%
2
=
=
1
then
-- odd(entry.level)
816
if
entry
.
mirror
and
not
entry
.
paired
then
817
entry
.
mirror
=
false
818
end
819
-- okay
820
elseif
entry
.
mirror
then
821
entry
.
mirror
=
false
822
end
823
end
824
else
825
for
i
=
1
,
size
do
826
local
entry
=
list
[
i
]
827
if
entry
.
level
%
2
=
=
1
then
-- odd(entry.level)
828
local
mirror
=
mirrordata
[
entry
.
char
]
829
if
mirror
then
830
entry
.
mirror
=
mirror
831
end
832
end
833
end
834
end
835
end
836 837
local
stack
=
{
}
838 839
local
function
insert_dir_points
(
list
,
size
)
840
-- L2, but no actual reversion is done, we simply annotate where
841
-- begindir/endddir node will be inserted.
842
local
maxlevel
=
0
843
local
toggle
=
true
844
for
i
=
1
,
size
do
845
local
level
=
list
[
i
]
.
level
846
if
level
>
maxlevel
then
847
maxlevel
=
level
848
end
849
end
850
for
level
=
0
,
maxlevel
do
851
local
started
-- = false
852
local
begindir
-- = nil
853
local
enddir
-- = nil
854
local
prev
-- = nil
855
if
toggle
then
856
begindir
=
lefttoright_code
857
enddir
=
lefttoright_code
858
toggle
=
false
859
else
860
begindir
=
righttoleft_code
861
enddir
=
righttoleft_code
862
toggle
=
true
863
end
864
for
i
=
1
,
size
do
865
local
entry
=
list
[
i
]
866
if
entry
.
level
>
=
level
then
867
if
not
started
then
868
entry
.
begindir
=
begindir
869
started
=
true
870
end
871
else
872
if
started
then
873
prev
.
enddir
=
enddir
874
started
=
false
875
end
876
end
877
prev
=
entry
878
end
879
end
880
-- make sure to close the run at end of line
881
local
last
=
list
[
size
]
882
if
not
last
.
enddir
then
883
local
n
=
0
884
for
i
=
1
,
size
do
885
local
entry
=
list
[
i
]
886
local
e
=
entry
.
enddir
887
local
b
=
entry
.
begindir
888
if
e
then
889
n
=
n
-
1
890
end
891
if
b
then
892
n
=
n
+
1
893
stack
[
n
]
=
b
894
end
895
end
896
if
n
>
0
then
897
if
trace_list
and
n
>
1
then
898
report_directions
(
"
unbalanced list
"
)
899
end
900
last
.
enddir
=
stack
[
n
]
901
end
902
end
903
end
904 905
-- We flag nodes that can be skipped when we see them again but because whatever
906
-- mechanism can injetc dir nodes that then are not flagged, we don't flag dir
907
-- nodes that we inject here.
908 909
local
function
apply_to_list
(
list
,
size
,
head
,
pardir
)
910
local
index
=
1
911
local
current
=
head
912
if
trace_list
then
913
report_directions
(
"
start run
"
)
914
end
915
while
current
do
916
if
index
>
size
then
917
report_directions
(
"
fatal error, size mismatch
"
)
918
break
919
end
920
local
id
=
getid
(
current
)
921
local
entry
=
list
[
index
]
922
local
begindir
=
entry
.
begindir
923
local
enddir
=
entry
.
enddir
924
local
p
=
properties
[
current
]
925
if
p
then
926
p
.
directions
=
true
927
else
928
properties
[
current
]
=
{
directions
=
true
}
929
end
930
if
id
=
=
glyph_code
then
931
local
mirror
=
entry
.
mirror
932
if
mirror
then
933
setchar
(
current
,
mirror
)
934
end
935
if
trace_directions
then
936
local
direction
=
entry
.
direction
937
if
trace_list
then
938
local
original
=
entry
.
original
939
local
char
=
entry
.
char
940
local
level
=
entry
.
level
941
if
direction
=
=
original
then
942
report_directions
(
"
%2i : %C : %s
"
,
level
,
char
,
direction
)
943
else
944
report_directions
(
"
%2i : %C : %s -> %s
"
,
level
,
char
,
original
,
direction
)
945
end
946
end
947
setcolor
(
current
,
direction
,
false
,
mirror
)
948
end
949
elseif
id
=
=
hlist_code
or
id
=
=
vlist_code
then
950
setdirection
(
current
,
pardir
)
-- is this really needed?
951
elseif
id
=
=
glue_code
then
952
if
enddir
and
getsubtype
(
current
)
=
=
parfillskip_code
then
953
-- insert the last enddir before \parfillskip glue
954
head
=
insert_node_before
(
head
,
current
,
new_direction
(
enddir
,
true
)
)
955
enddir
=
false
956
end
957
elseif
begindir
then
958
if
id
=
=
localpar_code
and
start_of_par
(
current
)
then
959
-- localpar should always be the 1st node
960
head
,
current
=
insert_node_after
(
head
,
current
,
new_direction
(
begindir
)
)
961
begindir
=
nil
962
end
963
end
964
if
begindir
then
965
head
=
insert_node_before
(
head
,
current
,
new_direction
(
begindir
)
)
966
end
967
local
skip
=
entry
.
skip
968
if
skip
and
skip
>
0
then
969
for
i
=
1
,
skip
do
970
current
=
getnext
(
current
)
971
local
p
=
properties
[
current
]
972
if
p
then
973
p
.
directions
=
true
974
else
975
properties
[
current
]
=
{
directions
=
true
}
976
end
977
end
978
end
979
if
enddir
then
980
head
,
current
=
insert_node_after
(
head
,
current
,
new_direction
(
enddir
,
true
)
)
981
end
982
if
not
entry
.
remove
then
983
current
=
getnext
(
current
)
984
elseif
remove_controls
then
985
-- X9
986
head
,
current
=
remove_node
(
head
,
current
,
true
)
987
else
988
current
=
getnext
(
current
)
989
end
990
index
=
index
+
1
991
end
992
if
trace_list
then
993
report_directions
(
"
stop run
"
)
994
end
995
return
head
996
end
997 998
-- If needed we can optimize for only_one. There is no need to do anything
999
-- when it's not a glyph. Otherwise we only need to check mirror and apply
1000
-- directions when it's different from the surrounding. Paragraphs always
1001
-- have more than one node. Actually, we only enter this function when we
1002
-- do have a glyph!
1003 1004
local
function
process
(
head
,
direction
,
only_one
,
where
)
1005
-- for the moment a whole paragraph property
1006
local
attr
=
getattr
(
head
,
a_directions
)
1007
local
analyze_fences
=
getfences
(
attr
)
1008
--
1009
local
list
,
size
=
build_list
(
head
,
where
)
1010
local
baselevel
,
dirfound
=
get_baselevel
(
head
,
list
,
size
,
direction
)
1011
if
trace_details
then
1012
report_directions
(
"
analyze: baselevel %a
"
,
baselevel
=
=
righttoleft_code
and
"
r2l
"
or
"
l2r
"
)
1013
report_directions
(
"
before : %s
"
,
show_list
(
list
,
size
,
"
original
"
)
)
1014
end
1015
resolve_explicit
(
list
,
size
,
baselevel
)
1016
resolve_levels
(
list
,
size
,
baselevel
,
analyze_fences
)
1017
insert_dir_points
(
list
,
size
)
1018
if
trace_details
then
1019
report_directions
(
"
after : %s
"
,
show_list
(
list
,
size
,
"
direction
"
)
)
1020
report_directions
(
"
result : %s
"
,
show_done
(
list
,
size
)
)
1021
end
1022
return
apply_to_list
(
list
,
size
,
head
,
baselevel
)
1023
end
1024 1025
local
variables
=
interfaces
.
variables
1026 1027
directions
.
installhandler
(
variables
.
one
,
process
)
-- for old times sake
1028
directions
.
installhandler
(
variables
.
two
,
process
)
-- for old times sake
1029
directions
.
installhandler
(
variables
.
three
,
process
)
-- for old times sake
1030
directions
.
installhandler
(
variables
.
unicode
,
process
)
1031