font-imp-combining.lua /size: 8169 b    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['font-imp-combining'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA ADE",
5    copyright = "ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9if not context then return end
10
11local next, unpack = next, unpack
12local sort, copy, insert = table.sort, table.copy, table.insert
13local setmetatableindex = table.setmetatableindex
14
15local fontdata  = fonts.hashes.identifiers
16local otf       = fonts.handlers.otf
17
18local nuts      = nodes.nuts
19
20local nextnode  = nuts.traversers.node
21local ischar    = nuts.ischar
22local getprev   = nuts.getprev
23local getnext   = nuts.getnext
24local setprev   = nuts.setprev
25local setnext   = nuts.setnext
26local setboth   = nuts.setboth
27local setlink   = nuts.setlink
28local exchange  = nuts.exchange
29
30local class     = { } -- reused
31local point     = { } -- reused
32local classes   = { }
33local sorters   = { }
34local slide     = { }
35local count     = 0
36
37-- List provided by Joey McCollum (Hebrew Layout Intelligence):
38--
39-- 1. The consonants (Unicode points 05D0-05EA) have no combining class and are never reordered; this is typographically correct.
40-- 2. Shin dot and sin dot (05C1-05C2) should be next, but Unicode places them in combining classes 24 and 25, after the characters in recommended classes 3-5 and many of the characters in recommended class 6.
41-- 3. Dagesh / mapiq (05BC) should be next, but Unicode assigns it a combining class of 21. This means that it will be incorrectly ordered before characters in recommended class 2 and after characters in recommended classes 4-6 after Unicode normalization.
42-- 4. Rafe (05BF) should be next, but Unicode assigns it a combining class of 23. Thus, it will be correctly placed after characters in recommended class 3, but incorrectly placed before characters in recommended class 2 after Unicode normalization.
43-- 5. The holam and holam haser vowel points (05B9-05BA) should be next, but Unicode places them in combining class 19. This means that it will be placed incorrectly before characters in recommended classes 2-4 and after all characters in recommended class 6 except 05BB after Unicode normalization.
44-- 6. The characters in 0591, 0596, 059B, 05A2-05A7, 05AA, 05B0-05B8, 05BB, 05BD, 05C5, 05C7 should be treated as being in the same class, but Unicode places them in combining classes 10-18, 20, 22, and 220.
45-- 7. The prepositive marks yetiv and dehi (059A, 05AD) should be next; Unicode places them in combining class 222, so they should correctly come after all characters in recommended classes 1-6.
46-- 8. The characters 0307, 0593-0595, 0597-0598, 059C-05A1, 05A8, 05AB-05AC, 05AF, 05C4 should be treated as being in the same class; Unicode places them in combining class 230, so they should correctly come after all characters in recommended classes 1-7.
47-- 9. The postpositive marks segolta, pashta, telisha qetana, and zinor (0592, 0599, 05A9, 05AE) should be next; Unicode places them in combining class 230, so they will need to be reordered after the characters in recommended class 8.
48--
49-- Some tests by Joey:
50--
51-- Arial, Calibri, and Times New Roman will correctly typeset most combinations of points even in Unicode's canonical order, but they typeset the normalized sequences (hiriq, shin dot, tipeha) and (qamatz, dagesh, shin dot) incorrectly and their typographically recommended reorderings correctly.
52-- Cardo will correctly typeset most combinations of points even in Unicode's canonical order, but it typesets the normalized sequences (hiriq, shin dot, tipeha) incorrectly and its typographically recommended reorderings correctly.
53-- Frank Ruehl CLM typesets most combinations of points even in Unicode's canonical order, but it consistently does a poor job positioning cantillation marks even when they are placed in the typographically recommended position. Taamey Frank CLM is another version of the same font that handles this correctly, so it is possible that  Frank Ruehl CLM is just an obsolete font that did not have well-implemented Hebrew font features for cantillation marks to begin with.
54-- For Linux Libertine, the text samples with both the normalized mark ordering and the typographically recommended mark ordering were typeset poorly. I think that this is just because that font does not have full support for the Hebrew glyph set (it lacks cantillation marks) or Hebrew font features (it does not place Hebrew diacritical marks intelligently), so no mark reordering would fix its problems.
55-- Taamey David CLM and Taamey Frank CLM exhibits the same typographical mistakes as SBL Hebrew when the input is in Unicode canonical order, and these mistakes go away if the marks are ordered in the typographically recommended way.
56--
57-- SBL Hebrew is used as reference font.
58
59classes.hebr = {
60    [0x05C1] = 1, [0x05C2] = 1,
61    [0x05BC] = 2,
62    [0x05BF] = 3,
63    [0x05B9] = 4, [0x05BA] = 4,
64    [0x0591] = 5, [0x0596] = 5, [0x059B] = 5, [0x05A2] = 5, [0x05A3] = 5, [0x05A4] = 5,
65    [0x05A5] = 5, [0x05A6] = 5, [0x05A7] = 5, [0x05AA] = 5, [0x05B0] = 5, [0x05B1] = 5,
66    [0x05B2] = 5, [0x05B3] = 5, [0x05B4] = 5, [0x05B5] = 5, [0x05B6] = 5, [0x05B7] = 5,
67    [0x05B8] = 5, [0x05BB] = 5, [0x05BD] = 5, [0x05C5] = 5, [0x05C7] = 5,
68    [0x059A] = 6, [0x05AD] = 6,
69    [0x0307] = 7, [0x0593] = 7, [0x0594] = 7, [0x0595] = 7, [0x0597] = 7, [0x0598] = 7,
70    [0x059C] = 7, [0x059D] = 7, [0x059E] = 7, [0x059F] = 7, [0x05A0] = 7, [0x05A1] = 7,
71    [0x05A8] = 7, [0x05AB] = 7, [0x05AC] = 7, [0x05AF] = 7, [0x05C4] = 7,
72    [0x0592] = 8, [0x0599] = 8, [0x05A9] = 8, [0x05AE] = 8,
73}
74
75sorters.hebr = function(a,b)
76    return class[a] < class[b]
77end
78
79-- local dflt = setmetatableindex(function(t,k,v)
80--     for k, v in next, characters.data do
81--         local c = v.combining
82--         if c then
83--             t[k] = c
84--         end
85--     end
86--     setmetatableindex(t,nil)
87--     return t[k]
88-- end)
89--
90-- classes.dflt = dflt
91-- sorters.dflt = function(a,b) return class[b] < class[a] end
92
93-- see analyzeprocessor in case we want scripts
94
95local function reorder(head)
96    if count == 2 then
97        local first = slide[1]
98        local last  = slide[2]
99        if sorter(last,first) then
100            head = exchange(head,first,last)
101        end
102    elseif count > 1 then
103        local first  = slide[1]
104        local last   = slide[count]
105        local before = getprev(first)
106        local after  = getnext(last)
107        setprev(first)
108        setnext(last)
109        sort(slide,sorter)
110        setlink(unpack(slide))
111        local first = slide[1]
112        local last  = slide[count]
113        if before then
114            setlink(before,first)
115        end
116        setlink(last,after)
117        if first == head then
118            head = first
119        end
120    end
121    count = 0
122    return head
123end
124
125local function reorderprocessor(head,font,attr)
126    local tfmdata = fontdata[font]
127    local script  = otf.scriptandlanguage(tfmdata,attr)
128    sorter  = sorters[script]
129    if sorter then
130        local classes = classes[script]
131        for n in nextnode, head do
132            local char, id = ischar(n,font)
133            if char then
134                local c = classes[char]
135                if c then
136                    if count == 0 then
137                        count = 1
138                        slide = { n }
139                    else
140                        count = count + 1
141                        slide[count] = n
142                    end
143                    class[n] = c
144                    point[n] = char
145                elseif count > 0 then
146                    head = reorder(head)
147                end
148            elseif count > 0 then
149                head = reorder(head)
150            end
151        end
152        if count > 0 then
153            head = reorder(head)
154        end
155    end
156    return head
157end
158
159fonts.constructors.features.otf.register {
160    name         = "reordercombining",
161    description  = "reorder combining characters",
162--  default      = true,
163--  initializers = {
164--      node     = reorderinitializer,
165--  },
166    processors = {
167        position = 1,
168        node     = reorderprocessor,
169    }
170}
171