scite-ctx-bidi.lua /size: 17 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['scite-ctx-bidi'] = {
2    version   = 1.001,
3    comment   = "companion to scite-ctx.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "Unicode bidi (sort of) variant c",
8}
9
10-- Partial comment from typo-duc.lua:
11--
12-- This is a follow up on typo-dua which itself is a follow up on t-bidi by Khaled Hosny which
13-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
14-- an update on some recent unicode bidi developments. There is (and will) also be more control
15-- added. As a consequence this module is somewhat slower than its precursor which itself is
16-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
17-- support. However, in the meantime performance got a bit better and this third variant is again
18-- some 10% faster than the second variant.
19--
20-- ... some patches and updates applied
21-- ... some code can be removed
22-- ... has to be kept in sync with context
23-- ... mtxrun --script interface
24--
25-- ... this feature is more fun than useful
26-- ... this way we can use it to check what scite / uniscribe sees (as scintila is weak on
27-- bidi selection)
28
29local setmetatable = setmetatable
30
31local data = require("context.lexers.data.scite-context-data-bidi")
32
33local directiondata  = data.directions
34local mirrordata     = data.mirrors
35local textclassdata  = data.textclasses
36
37-- setmetatable(directiondata,{ __index = function(t,k) local v = "l" t[k] = v return v end })
38
39local maximum_stack  = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much
40local analyze_fences = false
41
42local whitespace = {
43    lre = true,
44    rle = true,
45    lro = true,
46    rlo = true,
47    pdf = true,
48    bn  = true,
49    ws  = true,
50}
51
52local b_s_ws_on = {
53    b   = true,
54    s   = true,
55    ws  = true,
56    on  = true
57}
58
59local mt_space  = { __index = { char = 0x0020, direction = "ws",  original = "ws",  level = 0 } }
60----- mt_lre    = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
61----- mt_rle    = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
62----- mt_pdf    = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
63----- mt_object = { __index = { char = 0xFFFC, direction = "on",  original = "on",  level = 0 } }
64
65local stack = { } -- shared
66
67setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })
68
69local function build_list(list)
70    -- P1
71    local size = #list
72    for i=1,size do
73        local chr = list[i]
74        if chr == " " then
75            list[i] = setmetatable({ },mt_space)
76        else
77            local dir = directiondata[chr] or "l"
78            list[i] = { char = chr, direction = dir, original = dir, level = 0 }
79        end
80    end
81    return list, size
82end
83
84local function resolve_fences(list,size,start,limit)
85    -- N0: funny effects, not always better, so it's an option
86    local nofstack = 0
87    for i=start,limit do
88        local entry = list[i]
89        if entry.direction == "on" then
90            local char   = entry.char
91            local mirror = mirrordata[char]
92            if mirror then
93                local class = textclassdata[char]
94                entry.mirror = mirror
95                entry.class  = class
96                if class == "open" then
97                    nofstack       = nofstack + 1
98                    local stacktop = stack[nofstack]
99                    stacktop[1]    = mirror
100                    stacktop[2]    = i
101                    stacktop[3]    = false -- not used
102                elseif nofstack == 0 then
103                    -- skip
104                elseif class == "close" then
105                    while nofstack > 0 do
106                        local stacktop = stack[nofstack]
107                        if stacktop[1] == char then
108                            local open  = stacktop[2]
109                            local close = i
110                            list[open ].paired = close
111                            list[close].paired = open
112                            break
113                        else
114                            -- do we mirror or not
115                        end
116                        nofstack = nofstack - 1
117                    end
118                end
119            end
120        end
121    end
122end
123
124local function get_baselevel(list,size,direction)
125    if direction == "TRT" then
126        return 1, "TRT", true
127    elseif direction == "TLT" then
128        return 0, "TLT", true
129    end
130    -- P2, P3:
131    for i=1,size do
132        local entry     = list[i]
133        local direction = entry.direction
134        if direction == "r" or direction == "al" then -- and an ?
135            return 1, "TRT", true
136        elseif direction == "l" then
137            return 0, "TLT", true
138        end
139    end
140    return 0, "TLT", false
141end
142
143local function resolve_explicit(list,size,baselevel)
144-- if list.rle or list.lre or list.rlo or list.lro then
145    -- X1
146    local level    = baselevel
147    local override = "on"
148    local nofstack = 0
149    for i=1,size do
150        local entry     = list[i]
151        local direction = entry.direction
152        -- X2
153        if direction == "rle" then
154            if nofstack < maximum_stack then
155                nofstack        = nofstack + 1
156                local stacktop  = stack[nofstack]
157                stacktop[1]     = level
158                stacktop[2]     = override
159                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
160                override        = "on"
161                entry.level     = level
162                entry.direction = "bn"
163                entry.remove    = true
164            end
165        -- X3
166        elseif direction == "lre" then
167            if nofstack < maximum_stack then
168                nofstack        = nofstack + 1
169                local stacktop  = stack[nofstack]
170                stacktop[1]     = level
171                stacktop[2]     = override
172                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
173                override        = "on"
174                entry.level     = level
175                entry.direction = "bn"
176                entry.remove    = true
177            end
178        -- X4
179        elseif direction == "rlo" then
180            if nofstack < maximum_stack then
181                nofstack        = nofstack + 1
182                local stacktop  = stack[nofstack]
183                stacktop[1]     = level
184                stacktop[2]     = override
185                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
186                override        = "r"
187                entry.level     = level
188                entry.direction = "bn"
189                entry.remove    = true
190            end
191        -- X5
192        elseif direction == "lro" then
193            if nofstack < maximum_stack then
194                nofstack        = nofstack + 1
195                local stacktop  = stack[nofstack]
196                stacktop[1]     = level
197                stacktop[2]     = override
198                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
199                override        = "l"
200                entry.level     = level
201                entry.direction = "bn"
202                entry.remove    = true
203            end
204        -- X7
205        elseif direction == "pdf" then
206            if nofstack < maximum_stack then
207                local stacktop  = stack[nofstack]
208                level           = stacktop[1]
209                override        = stacktop[2]
210                nofstack        = nofstack - 1
211                entry.level     = level
212                entry.direction = "bn"
213                entry.remove    = true
214            end
215        -- X6
216        else
217            entry.level = level
218            if override ~= "on" then
219                entry.direction = override
220            end
221        end
222    end
223-- else
224--     for i=1,size do
225--         list[i].level = baselevel
226--     end
227-- end
228    -- X8 (reset states and overrides after paragraph)
229end
230
231local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
232    -- W1: non spacing marks get the direction of the previous character
233-- if list.nsm then
234    for i=start,limit do
235        local entry = list[i]
236        if entry.direction == "nsm" then
237            if i == start then
238                entry.direction = orderbefore
239            else
240                entry.direction = list[i-1].direction
241            end
242        end
243    end
244-- end
245    -- W2: mess with numbers and arabic
246-- if list.en then
247    for i=start,limit do
248        local entry = list[i]
249        if entry.direction == "en" then
250            for j=i-1,start,-1 do
251                local prev = list[j]
252                local direction = prev.direction
253                if direction == "al" then
254                    entry.direction = "an"
255                    break
256                elseif direction == "r" or direction == "l" then
257                    break
258                end
259            end
260        end
261    end
262-- end
263    -- W3
264-- if list.al then
265    for i=start,limit do
266        local entry = list[i]
267        if entry.direction == "al" then
268            entry.direction = "r"
269        end
270    end
271-- end
272    -- W4: make separators number
273-- if list.es or list.cs then
274        -- skip
275--     if false then
276    if false then
277        for i=start+1,limit-1 do
278            local entry     = list[i]
279            local direction = entry.direction
280            if direction == "es" then
281                if list[i-1].direction == "en" and list[i+1].direction == "en" then
282                    entry.direction = "en"
283                end
284            elseif direction == "cs" then
285                local prevdirection = list[i-1].direction
286                if prevdirection == "en" then
287                    if list[i+1].direction == "en" then
288                        entry.direction = "en"
289                    end
290                elseif prevdirection == "an" and list[i+1].direction == "an" then
291                    entry.direction = "an"
292                end
293            end
294        end
295    else -- only more efficient when we have es/cs
296        local runner = start + 2
297        local before = list[start]
298        local entry  = list[start + 1]
299        local after  = list[runner]
300        while after do
301            local direction = entry.direction
302            if direction == "es" then
303                if before and before.direction == "en" and after.direction == "en" then
304                    entry.direction = "en"
305                end
306            elseif direction == "cs" then
307                local prevdirection = before and before.direction
308                if prevdirection == "en" then
309                    if after.direction == "en" then
310                        entry.direction = "en"
311                    end
312                elseif prevdirection == "an" and after.direction == "an" then
313                    entry.direction = "an"
314                end
315            end
316            before  = current
317            current = after
318            after   = list[runner]
319            runner  = runner + 1
320        end
321    end
322-- end
323    -- W5
324-- if list.et then
325    local i = start
326    while i <= limit do
327        if list[i].direction == "et" then
328            local runstart = i
329            local runlimit = runstart
330            for i=runstart,limit do
331                if list[i].direction == "et" then
332                    runlimit = i
333                else
334                    break
335                end
336            end
337            local rundirection = runstart == start and sor or (runstart > 1 and list[runstart-1].direction)
338            if rundirection ~= "en" then
339                rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
340            end
341            if rundirection == "en" then
342                for j=runstart,runlimit do
343                    list[j].direction = "en"
344                end
345            end
346            i = runlimit
347        end
348        i = i + 1
349    end
350-- end
351    -- W6
352-- if list.es or list.cs or list.et then
353    for i=start,limit do
354        local entry     = list[i]
355        local direction = entry.direction
356        if direction == "es" or direction == "et" or direction == "cs" then
357            entry.direction = "on"
358        end
359    end
360-- end
361    -- W7
362    for i=start,limit do
363        local entry = list[i]
364        if entry.direction == "en" then
365            local prev_strong = orderbefore
366            for j=i-1,start,-1 do
367                local direction = list[j].direction
368                if direction == "l" or direction == "r" then
369                    prev_strong = direction
370                    break
371                end
372            end
373            if prev_strong == "l" then
374                entry.direction = "l"
375            end
376        end
377    end
378end
379
380local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
381    -- N1, N2
382    for i=start,limit do
383        local entry = list[i]
384        if b_s_ws_on[entry.direction] then
385            -- this needs checking
386            local leading_direction, trailing_direction, resolved_direction
387            local runstart = i
388            local runlimit = runstart
389            for j=runstart+1,limit do
390                if b_s_ws_on[list[j].direction] then
391                    runlimit = j
392                else
393                    break
394                end
395            end
396            if runstart == start then
397                leading_direction = orderbefore
398            else
399                leading_direction = list[runstart-1].direction
400                if leading_direction == "en" or leading_direction == "an" then
401                    leading_direction = "r"
402                end
403            end
404            if runlimit == limit then
405                trailing_direction = orderafter
406            else
407                trailing_direction = list[runlimit+1].direction
408                if trailing_direction == "en" or trailing_direction == "an" then
409                    trailing_direction = "r"
410                end
411            end
412            if leading_direction == trailing_direction then
413                -- N1
414                resolved_direction = leading_direction
415            else
416                -- N2 / does the weird period
417                resolved_direction = entry.level % 2 == 1 and "r" or "l"
418            end
419            for j=runstart,runlimit do
420                list[j].direction = resolved_direction
421            end
422            i = runlimit
423        end
424        i = i + 1
425    end
426end
427
428local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
429    for i=start,limit do
430        local entry     = list[i]
431        local level     = entry.level
432        local direction = entry.direction
433        if level % 2 ~= 1 then -- even
434            -- I1
435            if direction == "r" then
436                entry.level = level + 1
437            elseif direction == "an" or direction == "en" then
438                entry.level = level + 2
439            end
440        else
441            -- I2
442            if direction == "l" or direction == "en" or direction == "an" then
443                entry.level = level + 1
444            end
445        end
446    end
447end
448
449local function resolve_levels(list,size,baselevel,analyze_fences)
450    -- X10
451    local start = 1
452    while start < size do
453        local level = list[start].level
454        local limit = start + 1
455        while limit < size and list[limit].level == level do
456            limit = limit + 1
457        end
458        local prev_level  = start == 1    and baselevel or list[start-1].level
459        local next_level  = limit == size and baselevel or list[limit+1].level
460        local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
461        local orderafter  = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
462        -- W1 .. W7
463        resolve_weak(list,size,start,limit,orderbefore,orderafter)
464        -- N0
465        if analyze_fences then
466            resolve_fences(list,size,start,limit)
467        end
468        -- N1 .. N2
469        resolve_neutral(list,size,start,limit,orderbefore,orderafter)
470        -- I1 .. I2
471        resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
472        start = limit
473    end
474    -- L1
475    for i=1,size do
476        local entry     = list[i]
477        local direction = entry.original
478        -- (1)
479        if direction == "s" or direction == "b" then
480            entry.level = baselevel
481            -- (2)
482            for j=i-1,1,-1 do
483                local entry = list[j]
484                if whitespace[entry.original] then
485                    entry.level = baselevel
486                else
487                    break
488                end
489            end
490        end
491    end
492    -- (3)
493    for i=size,1,-1 do
494        local entry = list[i]
495        if whitespace[entry.original] then
496            entry.level = baselevel
497        else
498            break
499        end
500    end
501    -- L4
502    if analyze_fences then
503        for i=1,size do
504            local entry = list[i]
505            if entry.level % 2 == 1 then
506                if entry.mirror and not entry.paired then
507                    entry.mirror = false
508                end
509            elseif entry.mirror then
510                entry.mirror = false
511            end
512        end
513    else
514        for i=1,size do
515            local entry = list[i]
516            if entry.level % 2 == 1 then
517                local mirror = mirrordata[entry.char]
518                if mirror then
519                    entry.mirror = mirror
520                end
521            end
522        end
523    end
524end
525
526local function process(head,direction)
527    local list, size = build_list(head)
528    local baselevel = get_baselevel(list,size,direction)
529    resolve_explicit(list,size,baselevel)
530    resolve_levels(list,size,baselevel,analyze_fences)
531    return list, size
532end
533
534return {
535    process = process,
536}
537