scite-ctx-bidi.lua /size: 17 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
scite-ctx-bidi
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to scite-ctx.lua
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
,
7
comment
=
"
Unicode bidi (sort of) variant c
"
,
8
}
9 10
-- Partial comment from typo-duc.lua:
11
--
12
-- This is a follow up on typo-dua which itself is a follow up on t-bidi by Khaled Hosny which
13
-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
14
-- an update on some recent unicode bidi developments. There is (and will) also be more control
15
-- added. As a consequence this module is somewhat slower than its precursor which itself is
16
-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
17
-- support. However, in the meantime performance got a bit better and this third variant is again
18
-- some 10% faster than the second variant.
19
--
20
-- ... some patches and updates applied
21
-- ... some code can be removed
22
-- ... has to be kept in sync with context
23
-- ... mtxrun --script interface
24
--
25
-- ... this feature is more fun than useful
26
-- ... this way we can use it to check what scite / uniscribe sees (as scintila is weak on
27
-- bidi selection)
28 29
local
setmetatable
=
setmetatable
30 31
local
data
=
require
(
"
context.lexers.data.scite-context-data-bidi
"
)
32 33
local
directiondata
=
data
.
directions
34
local
mirrordata
=
data
.
mirrors
35
local
textclassdata
=
data
.
textclasses
36 37
-- setmetatable(directiondata,{ __index = function(t,k) local v = "l" t[k] = v return v end })
38 39
local
maximum_stack
=
0xFF
-- unicode: 60, will be jumped to 125, we don't care too much
40
local
analyze_fences
=
false
41 42
local
whitespace
=
{
43
lre
=
true
,
44
rle
=
true
,
45
lro
=
true
,
46
rlo
=
true
,
47
pdf
=
true
,
48
bn
=
true
,
49
ws
=
true
,
50
}
51 52
local
b_s_ws_on
=
{
53
b
=
true
,
54
s
=
true
,
55
ws
=
true
,
56
on
=
true
57
}
58 59
local
mt_space
=
{
__index
=
{
char
=
0x0020
,
direction
=
"
ws
"
,
original
=
"
ws
"
,
level
=
0
}
}
60
----- mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
61
----- mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
62
----- mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
63
----- mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } }
64 65
local
stack
=
{
}
-- shared
66 67
setmetatable
(
stack
,
{
__index
=
function
(
t
,
k
)
local
v
=
{
}
t
[
k
]
=
v
return
v
end
}
)
68 69
local
function
build_list
(
list
)
70
-- P1
71
local
size
=
#
list
72
for
i
=
1
,
size
do
73
local
chr
=
list
[
i
]
74
if
chr
=
=
"
"
then
75
list
[
i
]
=
setmetatable
(
{
}
,
mt_space
)
76
else
77
local
dir
=
directiondata
[
chr
]
or
"
l
"
78
list
[
i
]
=
{
char
=
chr
,
direction
=
dir
,
original
=
dir
,
level
=
0
}
79
end
80
end
81
return
list
,
size
82
end
83 84
local
function
resolve_fences
(
list
,
size
,
start
,
limit
)
85
-- N0: funny effects, not always better, so it's an option
86
local
nofstack
=
0
87
for
i
=
start
,
limit
do
88
local
entry
=
list
[
i
]
89
if
entry
.
direction
=
=
"
on
"
then
90
local
char
=
entry
.
char
91
local
mirror
=
mirrordata
[
char
]
92
if
mirror
then
93
local
class
=
textclassdata
[
char
]
94
entry
.
mirror
=
mirror
95
entry
.
class
=
class
96
if
class
=
=
"
open
"
then
97
nofstack
=
nofstack
+
1
98
local
stacktop
=
stack
[
nofstack
]
99
stacktop
[
1
]
=
mirror
100
stacktop
[
2
]
=
i
101
stacktop
[
3
]
=
false
-- not used
102
elseif
nofstack
=
=
0
then
103
-- skip
104
elseif
class
=
=
"
close
"
then
105
while
nofstack
>
0
do
106
local
stacktop
=
stack
[
nofstack
]
107
if
stacktop
[
1
]
=
=
char
then
108
local
open
=
stacktop
[
2
]
109
local
close
=
i
110
list
[
open
]
.
paired
=
close
111
list
[
close
]
.
paired
=
open
112
break
113
else
114
-- do we mirror or not
115
end
116
nofstack
=
nofstack
-
1
117
end
118
end
119
end
120
end
121
end
122
end
123 124
local
function
get_baselevel
(
list
,
size
,
direction
)
125
if
direction
=
=
"
TRT
"
then
126
return
1
,
"
TRT
"
,
true
127
elseif
direction
=
=
"
TLT
"
then
128
return
0
,
"
TLT
"
,
true
129
end
130
-- P2, P3:
131
for
i
=
1
,
size
do
132
local
entry
=
list
[
i
]
133
local
direction
=
entry
.
direction
134
if
direction
=
=
"
r
"
or
direction
=
=
"
al
"
then
-- and an ?
135
return
1
,
"
TRT
"
,
true
136
elseif
direction
=
=
"
l
"
then
137
return
0
,
"
TLT
"
,
true
138
end
139
end
140
return
0
,
"
TLT
"
,
false
141
end
142 143
local
function
resolve_explicit
(
list
,
size
,
baselevel
)
144
-- if list.rle or list.lre or list.rlo or list.lro then
145
-- X1
146
local
level
=
baselevel
147
local
override
=
"
on
"
148
local
nofstack
=
0
149
for
i
=
1
,
size
do
150
local
entry
=
list
[
i
]
151
local
direction
=
entry
.
direction
152
-- X2
153
if
direction
=
=
"
rle
"
then
154
if
nofstack
<
maximum_stack
then
155
nofstack
=
nofstack
+
1
156
local
stacktop
=
stack
[
nofstack
]
157
stacktop
[
1
]
=
level
158
stacktop
[
2
]
=
override
159
level
=
level
+
(
level
%
2
=
=
1
and
2
or
1
)
-- least_greater_odd(level)
160
override
=
"
on
"
161
entry
.
level
=
level
162
entry
.
direction
=
"
bn
"
163
entry
.
remove
=
true
164
end
165
-- X3
166
elseif
direction
=
=
"
lre
"
then
167
if
nofstack
<
maximum_stack
then
168
nofstack
=
nofstack
+
1
169
local
stacktop
=
stack
[
nofstack
]
170
stacktop
[
1
]
=
level
171
stacktop
[
2
]
=
override
172
level
=
level
+
(
level
%
2
=
=
1
and
1
or
2
)
-- least_greater_even(level)
173
override
=
"
on
"
174
entry
.
level
=
level
175
entry
.
direction
=
"
bn
"
176
entry
.
remove
=
true
177
end
178
-- X4
179
elseif
direction
=
=
"
rlo
"
then
180
if
nofstack
<
maximum_stack
then
181
nofstack
=
nofstack
+
1
182
local
stacktop
=
stack
[
nofstack
]
183
stacktop
[
1
]
=
level
184
stacktop
[
2
]
=
override
185
level
=
level
+
(
level
%
2
=
=
1
and
2
or
1
)
-- least_greater_odd(level)
186
override
=
"
r
"
187
entry
.
level
=
level
188
entry
.
direction
=
"
bn
"
189
entry
.
remove
=
true
190
end
191
-- X5
192
elseif
direction
=
=
"
lro
"
then
193
if
nofstack
<
maximum_stack
then
194
nofstack
=
nofstack
+
1
195
local
stacktop
=
stack
[
nofstack
]
196
stacktop
[
1
]
=
level
197
stacktop
[
2
]
=
override
198
level
=
level
+
(
level
%
2
=
=
1
and
1
or
2
)
-- least_greater_even(level)
199
override
=
"
l
"
200
entry
.
level
=
level
201
entry
.
direction
=
"
bn
"
202
entry
.
remove
=
true
203
end
204
-- X7
205
elseif
direction
=
=
"
pdf
"
then
206
if
nofstack
<
maximum_stack
then
207
local
stacktop
=
stack
[
nofstack
]
208
level
=
stacktop
[
1
]
209
override
=
stacktop
[
2
]
210
nofstack
=
nofstack
-
1
211
entry
.
level
=
level
212
entry
.
direction
=
"
bn
"
213
entry
.
remove
=
true
214
end
215
-- X6
216
else
217
entry
.
level
=
level
218
if
override
~
=
"
on
"
then
219
entry
.
direction
=
override
220
end
221
end
222
end
223
-- else
224
-- for i=1,size do
225
-- list[i].level = baselevel
226
-- end
227
-- end
228
-- X8 (reset states and overrides after paragraph)
229
end
230 231
local
function
resolve_weak
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
232
-- W1: non spacing marks get the direction of the previous character
233
-- if list.nsm then
234
for
i
=
start
,
limit
do
235
local
entry
=
list
[
i
]
236
if
entry
.
direction
=
=
"
nsm
"
then
237
if
i
=
=
start
then
238
entry
.
direction
=
orderbefore
239
else
240
entry
.
direction
=
list
[
i
-1
]
.
direction
241
end
242
end
243
end
244
-- end
245
-- W2: mess with numbers and arabic
246
-- if list.en then
247
for
i
=
start
,
limit
do
248
local
entry
=
list
[
i
]
249
if
entry
.
direction
=
=
"
en
"
then
250
for
j
=
i
-1
,
start
,
-1
do
251
local
prev
=
list
[
j
]
252
local
direction
=
prev
.
direction
253
if
direction
=
=
"
al
"
then
254
entry
.
direction
=
"
an
"
255
break
256
elseif
direction
=
=
"
r
"
or
direction
=
=
"
l
"
then
257
break
258
end
259
end
260
end
261
end
262
-- end
263
-- W3
264
-- if list.al then
265
for
i
=
start
,
limit
do
266
local
entry
=
list
[
i
]
267
if
entry
.
direction
=
=
"
al
"
then
268
entry
.
direction
=
"
r
"
269
end
270
end
271
-- end
272
-- W4: make separators number
273
-- if list.es or list.cs then
274
-- skip
275
-- if false then
276
if
false
then
277
for
i
=
start
+
1
,
limit
-1
do
278
local
entry
=
list
[
i
]
279
local
direction
=
entry
.
direction
280
if
direction
=
=
"
es
"
then
281
if
list
[
i
-1
]
.
direction
=
=
"
en
"
and
list
[
i
+
1
]
.
direction
=
=
"
en
"
then
282
entry
.
direction
=
"
en
"
283
end
284
elseif
direction
=
=
"
cs
"
then
285
local
prevdirection
=
list
[
i
-1
]
.
direction
286
if
prevdirection
=
=
"
en
"
then
287
if
list
[
i
+
1
]
.
direction
=
=
"
en
"
then
288
entry
.
direction
=
"
en
"
289
end
290
elseif
prevdirection
=
=
"
an
"
and
list
[
i
+
1
]
.
direction
=
=
"
an
"
then
291
entry
.
direction
=
"
an
"
292
end
293
end
294
end
295
else
-- only more efficient when we have es/cs
296
local
runner
=
start
+
2
297
local
before
=
list
[
start
]
298
local
entry
=
list
[
start
+
1
]
299
local
after
=
list
[
runner
]
300
while
after
do
301
local
direction
=
entry
.
direction
302
if
direction
=
=
"
es
"
then
303
if
before
and
before
.
direction
=
=
"
en
"
and
after
.
direction
=
=
"
en
"
then
304
entry
.
direction
=
"
en
"
305
end
306
elseif
direction
=
=
"
cs
"
then
307
local
prevdirection
=
before
and
before
.
direction
308
if
prevdirection
=
=
"
en
"
then
309
if
after
.
direction
=
=
"
en
"
then
310
entry
.
direction
=
"
en
"
311
end
312
elseif
prevdirection
=
=
"
an
"
and
after
.
direction
=
=
"
an
"
then
313
entry
.
direction
=
"
an
"
314
end
315
end
316
before
=
current
317
current
=
after
318
after
=
list
[
runner
]
319
runner
=
runner
+
1
320
end
321
end
322
-- end
323
-- W5
324
-- if list.et then
325
local
i
=
start
326
while
i
<
=
limit
do
327
if
list
[
i
]
.
direction
=
=
"
et
"
then
328
local
runstart
=
i
329
local
runlimit
=
runstart
330
for
i
=
runstart
,
limit
do
331
if
list
[
i
]
.
direction
=
=
"
et
"
then
332
runlimit
=
i
333
else
334
break
335
end
336
end
337
local
rundirection
=
runstart
=
=
start
and
sor
or
(
runstart
>
1
and
list
[
runstart
-1
]
.
direction
)
338
if
rundirection
~
=
"
en
"
then
339
rundirection
=
runlimit
=
=
limit
and
orderafter
or
list
[
runlimit
+
1
]
.
direction
340
end
341
if
rundirection
=
=
"
en
"
then
342
for
j
=
runstart
,
runlimit
do
343
list
[
j
]
.
direction
=
"
en
"
344
end
345
end
346
i
=
runlimit
347
end
348
i
=
i
+
1
349
end
350
-- end
351
-- W6
352
-- if list.es or list.cs or list.et then
353
for
i
=
start
,
limit
do
354
local
entry
=
list
[
i
]
355
local
direction
=
entry
.
direction
356
if
direction
=
=
"
es
"
or
direction
=
=
"
et
"
or
direction
=
=
"
cs
"
then
357
entry
.
direction
=
"
on
"
358
end
359
end
360
-- end
361
-- W7
362
for
i
=
start
,
limit
do
363
local
entry
=
list
[
i
]
364
if
entry
.
direction
=
=
"
en
"
then
365
local
prev_strong
=
orderbefore
366
for
j
=
i
-1
,
start
,
-1
do
367
local
direction
=
list
[
j
]
.
direction
368
if
direction
=
=
"
l
"
or
direction
=
=
"
r
"
then
369
prev_strong
=
direction
370
break
371
end
372
end
373
if
prev_strong
=
=
"
l
"
then
374
entry
.
direction
=
"
l
"
375
end
376
end
377
end
378
end
379 380
local
function
resolve_neutral
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
381
-- N1, N2
382
for
i
=
start
,
limit
do
383
local
entry
=
list
[
i
]
384
if
b_s_ws_on
[
entry
.
direction
]
then
385
-- this needs checking
386
local
leading_direction
,
trailing_direction
,
resolved_direction
387
local
runstart
=
i
388
local
runlimit
=
runstart
389
for
j
=
runstart
+
1
,
limit
do
390
if
b_s_ws_on
[
list
[
j
]
.
direction
]
then
391
runlimit
=
j
392
else
393
break
394
end
395
end
396
if
runstart
=
=
start
then
397
leading_direction
=
orderbefore
398
else
399
leading_direction
=
list
[
runstart
-1
]
.
direction
400
if
leading_direction
=
=
"
en
"
or
leading_direction
=
=
"
an
"
then
401
leading_direction
=
"
r
"
402
end
403
end
404
if
runlimit
=
=
limit
then
405
trailing_direction
=
orderafter
406
else
407
trailing_direction
=
list
[
runlimit
+
1
]
.
direction
408
if
trailing_direction
=
=
"
en
"
or
trailing_direction
=
=
"
an
"
then
409
trailing_direction
=
"
r
"
410
end
411
end
412
if
leading_direction
=
=
trailing_direction
then
413
-- N1
414
resolved_direction
=
leading_direction
415
else
416
-- N2 / does the weird period
417
resolved_direction
=
entry
.
level
%
2
=
=
1
and
"
r
"
or
"
l
"
418
end
419
for
j
=
runstart
,
runlimit
do
420
list
[
j
]
.
direction
=
resolved_direction
421
end
422
i
=
runlimit
423
end
424
i
=
i
+
1
425
end
426
end
427 428
local
function
resolve_implicit
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
,
baselevel
)
429
for
i
=
start
,
limit
do
430
local
entry
=
list
[
i
]
431
local
level
=
entry
.
level
432
local
direction
=
entry
.
direction
433
if
level
%
2
~
=
1
then
-- even
434
-- I1
435
if
direction
=
=
"
r
"
then
436
entry
.
level
=
level
+
1
437
elseif
direction
=
=
"
an
"
or
direction
=
=
"
en
"
then
438
entry
.
level
=
level
+
2
439
end
440
else
441
-- I2
442
if
direction
=
=
"
l
"
or
direction
=
=
"
en
"
or
direction
=
=
"
an
"
then
443
entry
.
level
=
level
+
1
444
end
445
end
446
end
447
end
448 449
local
function
resolve_levels
(
list
,
size
,
baselevel
,
analyze_fences
)
450
-- X10
451
local
start
=
1
452
while
start
<
size
do
453
local
level
=
list
[
start
]
.
level
454
local
limit
=
start
+
1
455
while
limit
<
size
and
list
[
limit
]
.
level
=
=
level
do
456
limit
=
limit
+
1
457
end
458
local
prev_level
=
start
=
=
1
and
baselevel
or
list
[
start
-1
]
.
level
459
local
next_level
=
limit
=
=
size
and
baselevel
or
list
[
limit
+
1
]
.
level
460
local
orderbefore
=
(
level
>
prev_level
and
level
or
prev_level
)
%
2
=
=
1
and
"
r
"
or
"
l
"
461
local
orderafter
=
(
level
>
next_level
and
level
or
next_level
)
%
2
=
=
1
and
"
r
"
or
"
l
"
462
-- W1 .. W7
463
resolve_weak
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
464
-- N0
465
if
analyze_fences
then
466
resolve_fences
(
list
,
size
,
start
,
limit
)
467
end
468
-- N1 .. N2
469
resolve_neutral
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
)
470
-- I1 .. I2
471
resolve_implicit
(
list
,
size
,
start
,
limit
,
orderbefore
,
orderafter
,
baselevel
)
472
start
=
limit
473
end
474
-- L1
475
for
i
=
1
,
size
do
476
local
entry
=
list
[
i
]
477
local
direction
=
entry
.
original
478
-- (1)
479
if
direction
=
=
"
s
"
or
direction
=
=
"
b
"
then
480
entry
.
level
=
baselevel
481
-- (2)
482
for
j
=
i
-1
,
1
,
-1
do
483
local
entry
=
list
[
j
]
484
if
whitespace
[
entry
.
original
]
then
485
entry
.
level
=
baselevel
486
else
487
break
488
end
489
end
490
end
491
end
492
-- (3)
493
for
i
=
size
,
1
,
-1
do
494
local
entry
=
list
[
i
]
495
if
whitespace
[
entry
.
original
]
then
496
entry
.
level
=
baselevel
497
else
498
break
499
end
500
end
501
-- L4
502
if
analyze_fences
then
503
for
i
=
1
,
size
do
504
local
entry
=
list
[
i
]
505
if
entry
.
level
%
2
=
=
1
then
506
if
entry
.
mirror
and
not
entry
.
paired
then
507
entry
.
mirror
=
false
508
end
509
elseif
entry
.
mirror
then
510
entry
.
mirror
=
false
511
end
512
end
513
else
514
for
i
=
1
,
size
do
515
local
entry
=
list
[
i
]
516
if
entry
.
level
%
2
=
=
1
then
517
local
mirror
=
mirrordata
[
entry
.
char
]
518
if
mirror
then
519
entry
.
mirror
=
mirror
520
end
521
end
522
end
523
end
524
end
525 526
local
function
process
(
head
,
direction
)
527
local
list
,
size
=
build_list
(
head
)
528
local
baselevel
=
get_baselevel
(
list
,
size
,
direction
)
529
resolve_explicit
(
list
,
size
,
baselevel
)
530
resolve_levels
(
list
,
size
,
baselevel
,
analyze_fences
)
531
return
list
,
size
532
end
533 534
return
{
535
process
=
process
,
536
}
537