typo-duc.lua /size: 37 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['typo-duc'] = {
2    version   = 1.001,
3    comment   = "companion to typo-dir.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "Unicode bidi (sort of) variant c",
8}
9
10-- This is a follow up on typo-uda which itself is a follow up on t-bidi by Khaled Hosny which
11-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
12-- an update on some recent unicode bidi developments. There is (and will) also be more control
13-- added. As a consequence this module is somewhat slower than its precursor which itself is
14-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
15-- support. However, in the meantime performance got a bit better and this third variant is again
16-- some 10% faster than the second variant.
17
18-- todo (cf html):
19--
20-- normal            The element does not offer a additional level of embedding with respect to the bidirectional algorithm. For inline elements implicit reordering works across element boundaries.
21-- embed             If the element is inline, this value opens an additional level of embedding with respect to the bidirectional algorithm. The direction of this embedding level is given by the direction property.
22-- bidi-override     For inline elements this creates an override. For block container elements this creates an override for inline-level descendants not within another block container element. This means that inside the element, reordering is strictly in sequence according to the direction property; the implicit part of the bidirectional algorithm is ignored.
23-- isolate           This keyword indicates that the element's container directionality should be calculated without considering the content of this element. The element is therefore isolated from its siblings. When applying its bidirectional-resolution algorithm, its container element treats it as one or several U+FFFC Object Replacement Character, i.e. like an image.
24-- isolate-override  This keyword applies the isolation behavior of the isolate keyword to the surrounding content and the override behavior o f the bidi-override keyword to the inner content.
25-- plaintext         This keyword makes the elements directionality calculated without considering its parent bidirectional state or the value of the direction property. The directionality is calculated using the P2 and P3 rules of the Unicode Bidirectional Algorithm.
26--                   This value allows to display data which has already formatted using a tool following the Unicode Bidirectional Algorithm.
27--
28-- todo: check for introduced errors
29-- todo: reuse list, we have size, so we can just change values (and auto allocate when not there)
30-- todo: reuse the stack
31-- todo: no need for a max check
32-- todo: collapse bound similar ranges (not ok yet)
33-- todo: combine some sweeps
34-- todo: removing is not needed when we inject at the same spot (only chnage the dir property)
35-- todo: isolated runs (isolating runs are similar to bidi=local in the basic analyzer)
36
37-- todo: check unicode addenda (from the draft):
38--
39-- Added support for canonical equivalents in BD16.
40-- Changed logic in N0 to not check forwards for context in the case of enclosed text opposite the embedding direction.
41-- Major extension of the algorithm to allow for the implementation of directional isolates and the introduction of new isolate-related values to the Bidi_Class property.
42-- Adds BD8, BD9, BD10, BD11, BD12, BD13, BD14, BD15, and BD16, Sections 2.4 and 2.5, and Rules X5a, X5b, X5c and X6a.
43-- Extensively revises Section 3.3.2, Explicit Levels and Directions and its existing X rules to formalize the algorithm for matching a PDF with the embedding or override initiator whose scope it terminates.
44-- Moves Rules X9 and X10 into a separate new Section 3.3.3, Preparations for Implicit Processing.
45-- Modifies Rule X10 to make the isolating run sequence the unit to which subsequent rules are applied.
46-- Modifies Rule W1 to change an NSM preceded by an isolate initiator or PDI into ON.
47-- Adds Rule N0 and makes other changes to Section 3.3.5, Resolving Neutral and Isolate Formatting Types to resolve bracket pairs to the same level.
48
49local insert, remove, unpack, concat = table.insert, table.remove, table.unpack, table.concat
50local utfchar = utf.char
51local setmetatable = setmetatable
52local formatters = string.formatters
53
54local directiondata        = characters.directions
55local mirrordata           = characters.mirrors
56local textclassdata        = characters.textclasses
57
58local nuts                 = nodes.nuts
59
60local getnext              = nuts.getnext
61local getprev              = nuts.getprev
62local getid                = nuts.getid
63local getsubtype           = nuts.getsubtype
64local getlist              = nuts.getlist
65local getchar              = nuts.getchar
66local getattr              = nuts.getattr
67local getprop              = nuts.getprop
68local getdirection         = nuts.getdirection
69local isglyph              = nuts.isglyph
70
71local setprop              = nuts.setprop
72local setchar              = nuts.setchar
73local setdirection         = nuts.setdirection
74local setattrlist          = nuts.setattrlist
75
76local properties           = nodes.properties.data
77
78local remove_node          = nuts.remove
79local insertnodeafter      = nuts.insertafter
80local insertnodebefore     = nuts.insertbefore
81
82local startofpar           = nuts.startofpar
83
84local nodepool             = nuts.pool
85local new_direction        = nodepool.direction
86
87local nodecodes            = nodes.nodecodes
88local gluecodes            = nodes.gluecodes
89
90local glyph_code           = nodecodes.glyph
91local glue_code            = nodecodes.glue
92local hlist_code           = nodecodes.hlist
93local vlist_code           = nodecodes.vlist
94local math_code            = nodecodes.math
95local dir_code             = nodecodes.dir
96local par_code             = nodecodes.par
97local penalty_code         = nodecodes.penalty
98
99local parfillskip_code     = gluecodes.parfillskip
100local parfillleftskip_code = gluecodes.parfillleftskip
101
102local dirvalues            = nodes.dirvalues
103local lefttoright_code     = dirvalues.lefttoright
104local righttoleft_code     = dirvalues.righttoleft
105
106local maximum_stack        = 0xFF
107
108local a_directions         = attributes.private('directions')
109
110local directions           = typesetters.directions
111local setcolor             = directions.setcolor
112local getfences            = directions.getfences
113
114local remove_controls      = true  directives.register("typesetters.directions.removecontrols",function(v) remove_controls  = v end)
115----- analyze_fences       = true  directives.register("typesetters.directions.analyzefences", function(v) analyze_fences   = v end)
116
117local report_directions    = logs.reporter("typesetting","directions three")
118
119local trace_directions     = false trackers.register("typesetters.directions",         function(v) trace_directions = v end)
120local trace_details        = false trackers.register("typesetters.directions.details", function(v) trace_details    = v end)
121local trace_list           = false trackers.register("typesetters.directions.list",    function(v) trace_list       = v end)
122
123-- strong (old):
124--
125-- l   : left to right
126-- r   : right to left
127-- lro : left to right override
128-- rlo : left to left override
129-- lre : left to right embedding
130-- rle : left to left embedding
131-- al  : right to legt arabic (esp punctuation issues)
132--
133-- weak:
134--
135-- en  : english number
136-- es  : english number separator
137-- et  : english number terminator
138-- an  : arabic number
139-- cs  : common number separator
140-- nsm : nonspacing mark
141-- bn  : boundary neutral
142--
143-- neutral:
144--
145-- b  : paragraph separator
146-- s  : segment separator
147-- ws : whitespace
148-- on : other neutrals
149--
150-- interesting: this is indeed better (and more what we expect i.e. we already use this split
151-- in the old original (also these isolates)
152--
153-- strong (new):
154--
155-- l   : left to right
156-- r   : right to left
157-- al  : right to left arabic (esp punctuation issues)
158--
159-- explicit: (new)
160--
161-- lro : left to right override
162-- rlo : left to left override
163-- lre : left to right embedding
164-- rle : left to left embedding
165-- pdf : pop dir format
166-- lri : left to right isolate
167-- rli : left to left isolate
168-- fsi : first string isolate
169-- pdi : pop directional isolate
170
171local whitespace = {
172    lre = true,
173    rle = true,
174    lro = true,
175    rlo = true,
176    pdf = true,
177    bn  = true,
178    ws  = true,
179}
180
181local b_s_ws_on = {
182    b   = true,
183    s   = true,
184    ws  = true,
185    on  = true
186}
187
188-- tracing
189
190local function show_list(list,size,what)
191    local what   = what or "direction"
192    local joiner = utfchar(0x200C)
193    local result = { }
194    for i=1,size do
195        local entry     = list[i]
196        local character = entry.char
197        local direction = entry[what]
198        if character == 0xFFFC then
199            local first = entry.id
200            local last  = entry.last
201            local skip  = entry.skip
202            if last then
203                result[i] = formatters["%-3s:%s %s..%s (%i)"](direction,joiner,nodecodes[first],nodecodes[last],skip or 0)
204            else
205                result[i] = formatters["%-3s:%s %s (%i)"](direction,joiner,nodecodes[first],skip or 0)
206            end
207        elseif character >= 0x202A and character <= 0x202C then
208            result[i] = formatters["%-3s:%s %U"](direction,joiner,character)
209        else
210            result[i] = formatters["%-3s:%s %c %U"](direction,joiner,character,character)
211        end
212    end
213    return concat(result,joiner .. " | " .. joiner)
214end
215
216-- preparation
217
218local function show_done(list,size)
219    local joiner = utfchar(0x200C)
220    local result = { }
221    local format = formatters["<%s>"]
222    for i=1,size do
223        local entry     = list[i]
224        local character = entry.char
225        local begindir  = entry.begindir
226        local enddir    = entry.enddir
227        if begindir then
228            result[#result+1] = format(begindir)
229        end
230        if entry.remove then
231            -- continue
232        elseif character == 0xFFFC then
233            result[#result+1] = format("?")
234        elseif character == 0x0020 then
235            result[#result+1] = format(" ")
236        elseif character >= 0x202A and character <= 0x202C then
237            result[#result+1] = format(entry.original)
238        else
239            result[#result+1] = utfchar(character)
240        end
241        if enddir then
242            result[#result+1] = format(enddir)
243        end
244    end
245    return concat(result,joiner)
246end
247
248-- keeping the list and overwriting doesn't save much runtime, only a few percent
249-- char is only used for mirror, so in fact we can as well only store it for
250-- glyphs only
251--
252-- tracking what direction is used and skipping tests is not faster (extra kind of
253-- compensates gain)
254
255local mt_space  = { __index = { char = 0x0020, direction = "ws",  original = "ws",  level = 0, skip = 0 } }
256local mt_lre    = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0, skip = 0 } }
257local mt_rle    = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0, skip = 0 } }
258local mt_pdf    = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0, skip = 0 } }
259local mt_object = { __index = { char = 0xFFFC, direction = "on",  original = "on",  level = 0, skip = 0 } }
260
261local stack = table.setmetatableindex("table") -- shared
262local list  = { }                              -- shared
263
264local function build_list(head,where)
265    -- P1
266    local current = head
267    local size    = 0
268    while current do
269        size = size + 1
270        local id = getid(current)
271        local p  = properties[current]
272        if p and p.directions then
273            -- tricky as dirs can be injected in between
274            local skip = 0
275            local last = id
276            current    = getnext(current)
277            while current do
278                local id = getid(current)
279                local p  = properties[current]
280                if p and p.directions then
281                    skip    = skip + 1
282                    last    = id
283                    current = getnext(current)
284                else
285                    break
286                end
287            end
288            if id == last then -- the start id
289                list[size] = setmetatable({ skip = skip, id = id },mt_object)
290            else
291                list[size] = setmetatable({ skip = skip, id = id, last = last },mt_object)
292            end
293        elseif id == glyph_code then
294            local chr  = getchar(current)
295            local dir  = directiondata[chr]
296            -- could also be a metatable
297            list[size] = { char = chr, direction = dir, original = dir, level = 0 }
298            current    = getnext(current)
299         -- if not list[dir] then list[dir] = true end -- not faster when we check for usage
300        elseif id == glue_code then -- and how about kern
301            list[size] = setmetatable({ },mt_space)
302            current    = getnext(current)
303        elseif id == dir_code then
304            local dir, pop = getdirection(current)
305            if dir == lefttoright_code then
306                list[size] = setmetatable({ },pop and mt_pdf or mt_lre)
307            elseif dir == righttoleft_code then
308                list[size] = setmetatable({ },pop and mt_pdf or mt_rle)
309            else
310                list[size] = setmetatable({ id = id },mt_object)
311            end
312            current = getnext(current)
313        elseif id == math_code then
314            local skip = 0
315            current    = getnext(current)
316            while getid(current) ~= math_code do
317                skip    = skip + 1
318                current = getnext(current)
319            end
320            skip       = skip + 1
321            current    = getnext(current)
322            list[size] = setmetatable({ id = id, skip = skip },mt_object)
323        else -- disc_code: we assume that these are the same as the surrounding
324            local skip = 0
325            local last = id
326            current    = getnext(current)
327            while n do
328                local id = getid(current)
329                if id ~= glyph_code and id ~= glue_code and id ~= dir_code then
330                    skip    = skip + 1
331                    last    = id
332                    current = getnext(current)
333                else
334                    break
335                end
336            end
337            if id == last then -- the start id
338                list[size] = setmetatable({ id = id, skip = skip },mt_object)
339            else
340                list[size] = setmetatable({ id = id, skip = skip, last = last },mt_object)
341            end
342        end
343    end
344    return list, size
345end
346
347-- new
348
349-- we could support ( ] and [ ) and such ...
350
351-- ש ) ל ( א       0-0
352-- ש ( ל ] א       0-0
353-- ש ( ל ) א       2-4
354-- ש ( ל [ א ) כ ] 2-6
355-- ש ( ל ] א ) כ   2-6
356-- ש ( ל ) א ) כ   2-4
357-- ש ( ל ( א ) כ   4-6
358-- ש ( ל ( א ) כ ) 2-8,4-6
359-- ש ( ל [ א ] כ ) 2-8,4-6
360
361local fencestack = table.setmetatableindex("table")
362
363local function resolve_fences(list,size,start,limit)
364    -- N0: funny effects, not always better, so it's an option
365    local nofstack = 0
366    for i=start,limit do
367        local entry = list[i]
368        if entry.direction == "on" then
369            local char   = entry.char
370            local mirror = mirrordata[char]
371            if mirror then
372                local class = textclassdata[char]
373                entry.mirror = mirror
374                entry.class  = class
375                if class == "open" then
376                    nofstack       = nofstack + 1
377                    local stacktop = fencestack[nofstack]
378                    stacktop[1]    = mirror
379                    stacktop[2]    = i
380                elseif nofstack == 0 then
381                    -- skip
382                elseif class == "close" then
383                    while nofstack > 0 do
384                        local stacktop = fencestack[nofstack]
385                        if stacktop[1] == char then
386                            local open  = stacktop[2]
387                            local close = i
388                            list[open ].paired = close
389                            list[close].paired = open
390                            break
391                        else
392                            -- do we mirror or not
393                        end
394                        nofstack = nofstack - 1
395                    end
396                end
397            end
398        end
399    end
400end
401
402-- local function test_fences(str)
403--     local list  = { }
404--     for s in string.gmatch(str,".") do
405--         local b = utf.byte(s)
406--         list[#list+1] = { c = s, char = b, direction = directiondata[b] }
407--     end
408--     resolve_fences(list,#list,1,#size)
409--     inspect(list)
410-- end
411--
412-- test_fences("a(b)c(d)e(f(g)h)i")
413-- test_fences("a(b[c)d]")
414
415-- the action
416
417local function get_baselevel(head,list,size,direction)
418    if direction == lefttoright_code or direction == righttoleft_code then
419        return direction, true
420    elseif getid(head) == par_code and startofpar(head) then
421        direction = getdirection(head)
422        if direction == lefttoright_code or direction == righttoleft_code then
423            return direction, true
424        end
425    end
426    -- P2, P3
427    for i=1,size do
428        local entry     = list[i]
429        local direction = entry.direction
430        if direction == "r" or direction == "al" then -- and an ?
431            return righttoleft_code, true
432        elseif direction == "l" then
433            return lefttoright_code, true
434        end
435    end
436    return lefttoright_code, false
437end
438
439local function resolve_explicit(list,size,baselevel)
440-- if list.rle or list.lre or list.rlo or list.lro then
441    -- X1
442    local level    = baselevel
443    local override = "on"
444    local nofstack = 0
445    for i=1,size do
446        local entry     = list[i]
447        local direction = entry.direction
448        -- X2
449        if direction == "rle" then
450            if nofstack < maximum_stack then
451                nofstack        = nofstack + 1
452                local stacktop  = stack[nofstack]
453                stacktop[1]     = level
454                stacktop[2]     = override
455                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
456                override        = "on"
457                entry.level     = level
458                entry.direction = "bn"
459                entry.remove    = true
460            elseif trace_directions then
461                report_directions("stack overflow at position %a with direction %a",i,direction)
462            end
463        -- X3
464        elseif direction == "lre" then
465            if nofstack < maximum_stack then
466                nofstack        = nofstack + 1
467                local stacktop  = stack[nofstack]
468                stacktop[1]     = level
469                stacktop[2]     = override
470                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
471                override        = "on"
472                entry.level     = level
473                entry.direction = "bn"
474                entry.remove    = true
475            elseif trace_directions then
476                report_directions("stack overflow at position %a with direction %a",i,direction)
477            end
478        -- X4
479        elseif direction == "rlo" then
480            if nofstack < maximum_stack then
481                nofstack        = nofstack + 1
482                local stacktop  = stack[nofstack]
483                stacktop[1]     = level
484                stacktop[2]     = override
485                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
486                override        = "r"
487                entry.level     = level
488                entry.direction = "bn"
489                entry.remove    = true
490            elseif trace_directions then
491                report_directions("stack overflow at position %a with direction %a",i,direction)
492            end
493        -- X5
494        elseif direction == "lro" then
495            if nofstack < maximum_stack then
496                nofstack        = nofstack + 1
497                local stacktop  = stack[nofstack]
498                stacktop[1]     = level
499                stacktop[2]     = override
500                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
501                override        = "l"
502                entry.level     = level
503                entry.direction = "bn"
504                entry.remove    = true
505            elseif trace_directions then
506                report_directions("stack overflow at position %a with direction %a",i,direction)
507            end
508        -- X7
509        elseif direction == "pdf" then
510            if nofstack > 0 then
511                local stacktop  = stack[nofstack]
512                level           = stacktop[1]
513                override        = stacktop[2]
514                nofstack        = nofstack - 1
515                entry.level     = level
516                entry.direction = "bn"
517                entry.remove    = true
518            elseif trace_directions then
519                report_directions("stack underflow at position %a with direction %a",
520                    i, direction)
521            else
522                report_directions("stack underflow at position %a with direction %a: %s",
523                    i, direction, show_list(list,size))
524            end
525        -- X6
526        else
527            entry.level = level
528            if override ~= "on" then
529                entry.direction = override
530            end
531        end
532    end
533    -- X8 (reset states and overrides after paragraph)
534end
535
536local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
537    -- W1: non spacing marks get the direction of the previous character
538-- if list.nsm then
539    for i=start,limit do
540        local entry = list[i]
541        if entry.direction == "nsm" then
542            if i == start then
543                entry.direction = orderbefore
544            else
545                entry.direction = list[i-1].direction
546            end
547        end
548    end
549-- end
550    -- W2: mess with numbers and arabic
551-- if list.en then
552    for i=start,limit do
553        local entry = list[i]
554        if entry.direction == "en" then
555            for j=i-1,start,-1 do
556                local prev = list[j]
557                local direction = prev.direction
558                if direction == "al" then
559                    entry.direction = "an"
560                    break
561                elseif direction == "r" or direction == "l" then
562                    break
563                end
564            end
565        end
566    end
567-- end
568    -- W3
569-- if list.al then
570    for i=start,limit do
571        local entry = list[i]
572        if entry.direction == "al" then
573            entry.direction = "r"
574        end
575    end
576-- end
577    -- W4: make separators number
578-- if list.es or list.cs then
579        -- skip
580    if false then
581        for i=start+1,limit-1 do
582            local entry     = list[i]
583            local direction = entry.direction
584            if direction == "es" then
585                if list[i-1].direction == "en" and list[i+1].direction == "en" then
586                    entry.direction = "en"
587                end
588            elseif direction == "cs" then
589                local prevdirection = list[i-1].direction
590                if prevdirection == "en" then
591                    if list[i+1].direction == "en" then
592                        entry.direction = "en"
593                    end
594                elseif prevdirection == "an" and list[i+1].direction == "an" then
595                    entry.direction = "an"
596                end
597            end
598        end
599    else -- only more efficient when we have es/cs
600        local runner = start + 2
601        if runner <= limit then
602            local before  = list[start]
603            local current = list[start + 1]
604            local after   = list[runner]
605            while after do
606                local direction = current.direction
607                if direction == "es" then
608                    if before.direction == "en" and after.direction == "en" then
609                        current.direction = "en"
610                    end
611                elseif direction == "cs" then
612                    local prevdirection = before.direction
613                    if prevdirection == "en" then
614                        if after.direction == "en" then
615                            current.direction = "en"
616                        end
617                    elseif prevdirection == "an" and after.direction == "an" then
618                        current.direction = "an"
619                    end
620                end
621                before  = current
622                current = after
623                after   = list[runner]
624                runner  = runner + 1
625            end
626        end
627    end
628-- end
629    -- W5
630-- if list.et then
631    local i = start
632    while i <= limit do
633        if list[i].direction == "et" then
634            local runstart = i
635            local runlimit = runstart
636            for i=runstart,limit do
637                if list[i].direction == "et" then
638                    runlimit = i
639                else
640                    break
641                end
642            end
643            local rundirection = runstart == start and sor or list[runstart-1].direction
644            if rundirection ~= "en" then
645                rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
646            end
647            if rundirection == "en" then
648                for j=runstart,runlimit do
649                    list[j].direction = "en"
650                end
651            end
652            i = runlimit
653        end
654        i = i + 1
655    end
656-- end
657    -- W6
658-- if list.es or list.cs or list.et then
659    for i=start,limit do
660        local entry     = list[i]
661        local direction = entry.direction
662        if direction == "es" or direction == "et" or direction == "cs" then
663            entry.direction = "on"
664        end
665    end
666-- end
667    -- W7
668    for i=start,limit do
669        local entry = list[i]
670        if entry.direction == "en" then
671            local prev_strong = orderbefore
672            for j=i-1,start,-1 do
673                local direction = list[j].direction
674                if direction == "l" or direction == "r" then
675                    prev_strong = direction
676                    break
677                end
678            end
679            if prev_strong == "l" then
680                entry.direction = "l"
681            end
682        end
683    end
684end
685
686local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
687    -- N1, N2
688    for i=start,limit do
689        local entry = list[i]
690        if b_s_ws_on[entry.direction] then
691            -- this needs checking
692            local leading_direction, trailing_direction, resolved_direction
693            local runstart = i
694            local runlimit = runstart
695--             for j=runstart,limit do
696            for j=runstart+1,limit do
697                if b_s_ws_on[list[j].direction] then
698--                     runstart = j
699                    runlimit = j
700                else
701                    break
702                end
703            end
704            if runstart == start then
705                leading_direction = orderbefore
706            else
707                leading_direction = list[runstart-1].direction
708                if leading_direction == "en" or leading_direction == "an" then
709                    leading_direction = "r"
710                end
711            end
712            if runlimit == limit then
713                trailing_direction = orderafter
714            else
715                trailing_direction = list[runlimit+1].direction
716                if trailing_direction == "en" or trailing_direction == "an" then
717                    trailing_direction = "r"
718                end
719            end
720            if leading_direction == trailing_direction then
721                -- N1
722                resolved_direction = leading_direction
723            else
724                -- N2 / does the weird period
725                resolved_direction = entry.level % 2 == 1 and "r" or "l"
726            end
727            for j=runstart,runlimit do
728                list[j].direction = resolved_direction
729            end
730            i = runlimit
731        end
732        i = i + 1
733    end
734end
735
736local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
737    for i=start,limit do
738        local entry     = list[i]
739        local level     = entry.level
740        local direction = entry.direction
741        if level % 2 ~= 1 then -- even
742            -- I1
743            if direction == "r" then
744                entry.level = level + 1
745            elseif direction == "an" or direction == "en" then
746                entry.level = level + 2
747            end
748        else
749            -- I2
750            if direction == "l" or direction == "en" or direction == "an" then
751                entry.level = level + 1
752            end
753        end
754    end
755end
756
757local function resolve_levels(list,size,baselevel,analyze_fences)
758    -- X10
759    local start = 1
760    while start < size do
761        local level = list[start].level
762        local limit = start + 1
763        while limit < size and list[limit].level == level do
764            limit = limit + 1
765        end
766        local prev_level  = start == 1    and baselevel or list[start-1].level
767        local next_level  = limit == size and baselevel or list[limit+1].level
768        local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
769        local orderafter  = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
770        -- W1 .. W7
771        resolve_weak(list,size,start,limit,orderbefore,orderafter)
772        -- N0
773        if analyze_fences then
774            resolve_fences(list,size,start,limit)
775        end
776        -- N1 .. N2
777        resolve_neutral(list,size,start,limit,orderbefore,orderafter)
778        -- I1 .. I2
779        resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
780        start = limit
781    end
782    -- L1
783    for i=1,size do
784        local entry     = list[i]
785        local direction = entry.original
786        -- (1)
787        if direction == "s" or direction == "b" then
788            entry.level = baselevel
789            -- (2)
790            for j=i-1,1,-1 do
791                local entry = list[j]
792                if whitespace[entry.original] then
793                    entry.level = baselevel
794                else
795                    break
796                end
797            end
798        end
799    end
800    -- (3)
801    for i=size,1,-1 do
802        local entry = list[i]
803        if whitespace[entry.original] then
804            entry.level = baselevel
805        else
806            break
807        end
808    end
809    -- L4
810    if analyze_fences then
811        for i=1,size do
812            local entry = list[i]
813            if entry.level % 2 == 1 then -- odd(entry.level)
814                if entry.mirror and not entry.paired then
815                    entry.mirror = false
816                end
817                -- okay
818            elseif entry.mirror then
819                entry.mirror = false
820            end
821        end
822    else
823        for i=1,size do
824            local entry = list[i]
825            if entry.level % 2 == 1 then -- odd(entry.level)
826                local mirror = mirrordata[entry.char]
827                if mirror then
828                    entry.mirror = mirror
829                end
830            end
831        end
832    end
833end
834
835local stack = { }
836
837local function insert_dir_points(list,size)
838    -- L2, but no actual reversion is done, we simply annotate where
839    -- begindir/endddir node will be inserted.
840    local maxlevel = 0
841    local toggle   = true
842    for i=1,size do
843        local level = list[i].level
844        if level > maxlevel then
845            maxlevel = level
846        end
847    end
848    for level=0,maxlevel do
849        local started  -- = false
850        local begindir -- = nil
851        local enddir   -- = nil
852        local prev     -- = nil
853        if toggle then
854            begindir = lefttoright_code
855            enddir   = lefttoright_code
856            toggle   = false
857        else
858            begindir = righttoleft_code
859            enddir   = righttoleft_code
860            toggle   = true
861        end
862        for i=1,size do
863            local entry = list[i]
864            if entry.level >= level then
865                if not started then
866                    entry.begindir = begindir
867                    started        = true
868                end
869            else
870                if started then
871                    prev.enddir = enddir
872                    started     = false
873                end
874            end
875            prev = entry
876        end
877    end
878    -- make sure to close the run at end of line
879    local last = list[size]
880    if not last.enddir then
881        local n = 0
882        for i=1,size do
883            local entry = list[i]
884            local e = entry.enddir
885            local b = entry.begindir
886            if e then
887                n = n - 1
888            end
889            if b then
890                n = n + 1
891                stack[n] = b
892            end
893        end
894        if n > 0 then
895            if trace_list and n > 1 then
896                report_directions("unbalanced list")
897            end
898            last.enddir = stack[n]
899        end
900    end
901end
902
903-- We flag nodes that can be skipped when we see them again but because whatever
904-- mechanism can injetc dir nodes that then are not flagged, we don't flag dir
905-- nodes that we inject here.
906
907local function apply_to_list(list,size,head,pardir)
908    local index   = 1
909    local current = head
910    if trace_list then
911        report_directions("start run")
912    end
913    while current do
914        if index > size then
915            report_directions("fatal error, size mismatch")
916            break
917        end
918        local id       = getid(current)
919        local entry    = list[index]
920        local begindir = entry.begindir
921        local enddir   = entry.enddir
922        local p = properties[current]
923        if p then
924            p.directions = true
925        else
926            properties[current] = { directions = true }
927        end
928        if id == glyph_code then
929            local mirror = entry.mirror
930            if mirror then
931                setchar(current,mirror)
932            end
933            if trace_directions then
934                local direction = entry.direction
935                if trace_list then
936                    local original = entry.original
937                    local char     = entry.char
938                    local level    = entry.level
939                    if direction == original then
940                        report_directions("%2i : %C : %s",level,char,direction)
941                    else
942                        report_directions("%2i : %C : %s -> %s",level,char,original,direction)
943                    end
944                end
945                setcolor(current,direction,false,mirror)
946            end
947        elseif id == hlist_code or id == vlist_code then
948            setdirection(current,pardir) -- is this really needed?
949        elseif id == glue_code then
950            -- Maybe I should also fix dua and dub but on the other hand ... why?
951            if enddir and getsubtype(current) == parfillskip_code then
952                -- insert the last enddir before \parfillskip glue
953                local c = current
954                local p = getprev(c)
955                if p and getid(p) == glue_code and getsubtype(p) == parfillleftskip_code then
956                    c = p
957                    p = getprev(c)
958                end
959                if p and getid(p) == penalty_code then -- linepenalty
960                    c = p
961                end
962                -- there is always a par nodes so head will stay
963                head = insertnodebefore(head,c,new_direction(enddir,true))
964                enddir = false
965            end
966        elseif begindir then
967            if id == par_code and startofpar(current) then
968                -- par should always be the 1st node
969                head, current = insertnodeafter(head,current,new_direction(begindir))
970                begindir = nil
971            end
972        end
973        if begindir then
974            head = insertnodebefore(head,current,new_direction(begindir))
975        end
976        local skip = entry.skip
977        if skip and skip > 0 then
978            for i=1,skip do
979                current = getnext(current)
980                local p = properties[current]
981                if p then
982                    p.directions = true
983                else
984                    properties[current] = { directions = true }
985                end
986            end
987        end
988        if enddir then
989            head, current = insertnodeafter(head,current,new_direction(enddir,true))
990        end
991        if not entry.remove then
992            current = getnext(current)
993        elseif remove_controls then
994            -- X9
995            head, current = remove_node(head,current,true)
996        else
997            current = getnext(current)
998        end
999        index = index + 1
1000    end
1001    if trace_list then
1002        report_directions("stop run")
1003    end
1004    return head
1005end
1006
1007-- If needed we can optimize for only_one. There is no need to do anything
1008-- when it's not a glyph. Otherwise we only need to check mirror and apply
1009-- directions when it's different from the surrounding. Paragraphs always
1010-- have more than one node. Actually, we only enter this function when we
1011-- do have a glyph!
1012
1013local function process(head,direction,only_one,where)
1014    -- for the moment a whole paragraph property
1015    local attr = getattr(head,a_directions)
1016    local analyze_fences = getfences(attr)
1017    --
1018    local list, size = build_list(head,where)
1019    local baselevel, dirfound = get_baselevel(head,list,size,direction)
1020    if trace_details then
1021        report_directions("analyze: baselevel %a",baselevel == righttoleft_code and "r2l" or "l2r")
1022        report_directions("before : %s",show_list(list,size,"original"))
1023    end
1024    resolve_explicit(list,size,baselevel)
1025    resolve_levels(list,size,baselevel,analyze_fences)
1026    insert_dir_points(list,size)
1027    if trace_details then
1028        report_directions("after  : %s",show_list(list,size,"direction"))
1029        report_directions("result : %s",show_done(list,size))
1030    end
1031    return apply_to_list(list,size,head,baselevel)
1032end
1033
1034local variables = interfaces.variables
1035
1036directions.installhandler(variables.one,    process) -- for old times sake
1037directions.installhandler(variables.two,    process) -- for old times sake
1038directions.installhandler(variables.three,  process) -- for old times sake
1039directions.installhandler(variables.unicode,process)
1040