1if not modules then modules = { } end modules ['font-imp-combining'] = { 2 version = 1.001, 3 comment = "companion to font-ini.mkiv", 4 author = "Hans Hagen, PRAGMA ADE", 5 copyright = "ConTeXt Development Team", 6 license = "see context related readme files" 7} 8 9if not context then return end 10 11local next, unpack = next, unpack 12local sort, copy, insert = table.sort, table.copy, table.insert 13local setmetatableindex = table.setmetatableindex 14 15local fontdata = fonts.hashes.identifiers 16local otf = fonts.handlers.otf 17 18local nuts = nodes.nuts 19 20local nextnode = nuts.traversers.node 21local ischar = nuts.ischar 22local getprev = nuts.getprev 23local getnext = nuts.getnext 24local setprev = nuts.setprev 25local setnext = nuts.setnext 26local setboth = nuts.setboth 27local setlink = nuts.setlink 28local exchange = nuts.exchange 29 30local class = { } -- reused 31local point = { } -- reused 32local classes = { } 33local sorters = { } 34local slide = { } 35local count = 0 36 37-- List provided by Joey McCollum (Hebrew Layout Intelligence): 38-- 39-- 1. The consonants (Unicode points 05D0-05EA) have no combining class and are never reordered; this is typographically correct. 40-- 2. Shin dot and sin dot (05C1-05C2) should be next, but Unicode places them in combining classes 24 and 25, after the characters in recommended classes 3-5 and many of the characters in recommended class 6. 41-- 3. Dagesh / mapiq (05BC) should be next, but Unicode assigns it a combining class of 21. This means that it will be incorrectly ordered before characters in recommended class 2 and after characters in recommended classes 4-6 after Unicode normalization. 42-- 4. Rafe (05BF) should be next, but Unicode assigns it a combining class of 23. Thus, it will be correctly placed after characters in recommended class 3, but incorrectly placed before characters in recommended class 2 after Unicode normalization. 43-- 5. The holam and holam haser vowel points (05B9-05BA) should be next, but Unicode places them in combining class 19. This means that it will be placed incorrectly before characters in recommended classes 2-4 and after all characters in recommended class 6 except 05BB after Unicode normalization. 44-- 6. The characters in 0591, 0596, 059B, 05A2-05A7, 05AA, 05B0-05B8, 05BB, 05BD, 05C5, 05C7 should be treated as being in the same class, but Unicode places them in combining classes 10-18, 20, 22, and 220. 45-- 7. The prepositive marks yetiv and dehi (059A, 05AD) should be next; Unicode places them in combining class 222, so they should correctly come after all characters in recommended classes 1-6. 46-- 8. The characters 0307, 0593-0595, 0597-0598, 059C-05A1, 05A8, 05AB-05AC, 05AF, 05C4 should be treated as being in the same class; Unicode places them in combining class 230, so they should correctly come after all characters in recommended classes 1-7. 47-- 9. The postpositive marks segolta, pashta, telisha qetana, and zinor (0592, 0599, 05A9, 05AE) should be next; Unicode places them in combining class 230, so they will need to be reordered after the characters in recommended class 8. 48-- 49-- Some tests by Joey: 50-- 51-- Arial, Calibri, and Times New Roman will correctly typeset most combinations of points even in Unicode's canonical order, but they typeset the normalized sequences (hiriq, shin dot, tipeha) and (qamatz, dagesh, shin dot) incorrectly and their typographically recommended reorderings correctly. 52-- Cardo will correctly typeset most combinations of points even in Unicode's canonical order, but it typesets the normalized sequences (hiriq, shin dot, tipeha) incorrectly and its typographically recommended reorderings correctly. 53-- Frank Ruehl CLM typesets most combinations of points even in Unicode's canonical order, but it consistently does a poor job positioning cantillation marks even when they are placed in the typographically recommended position. Taamey Frank CLM is another version of the same font that handles this correctly, so it is possible that Frank Ruehl CLM is just an obsolete font that did not have well-implemented Hebrew font features for cantillation marks to begin with. 54-- For Linux Libertine, the text samples with both the normalized mark ordering and the typographically recommended mark ordering were typeset poorly. I think that this is just because that font does not have full support for the Hebrew glyph set (it lacks cantillation marks) or Hebrew font features (it does not place Hebrew diacritical marks intelligently), so no mark reordering would fix its problems. 55-- Taamey David CLM and Taamey Frank CLM exhibits the same typographical mistakes as SBL Hebrew when the input is in Unicode canonical order, and these mistakes go away if the marks are ordered in the typographically recommended way. 56-- 57-- SBL Hebrew is used as reference font. 58 59classes.hebr = { 60 [0x05C1] = 1, [0x05C2] = 1, 61 [0x05BC] = 2, 62 [0x05BF] = 3, 63 [0x05B9] = 4, [0x05BA] = 4, 64 [0x0591] = 5, [0x0596] = 5, [0x059B] = 5, [0x05A2] = 5, [0x05A3] = 5, [0x05A4] = 5, 65 [0x05A5] = 5, [0x05A6] = 5, [0x05A7] = 5, [0x05AA] = 5, [0x05B0] = 5, [0x05B1] = 5, 66 [0x05B2] = 5, [0x05B3] = 5, [0x05B4] = 5, [0x05B5] = 5, [0x05B6] = 5, [0x05B7] = 5, 67 [0x05B8] = 5, [0x05BB] = 5, [0x05BD] = 5, [0x05C5] = 5, [0x05C7] = 5, 68 [0x059A] = 6, [0x05AD] = 6, 69 [0x0307] = 7, [0x0593] = 7, [0x0594] = 7, [0x0595] = 7, [0x0597] = 7, [0x0598] = 7, 70 [0x059C] = 7, [0x059D] = 7, [0x059E] = 7, [0x059F] = 7, [0x05A0] = 7, [0x05A1] = 7, 71 [0x05A8] = 7, [0x05AB] = 7, [0x05AC] = 7, [0x05AF] = 7, [0x05C4] = 7, 72 [0x0592] = 8, [0x0599] = 8, [0x05A9] = 8, [0x05AE] = 8, 73} 74 75sorters.hebr = function(a,b) 76 return class[a] < class[b] 77end 78 79-- local dflt = setmetatableindex(function(t,k,v) 80-- for k, v in next, characters.data do 81-- local c = v.combining 82-- if c then 83-- t[k] = c 84-- end 85-- end 86-- setmetatableindex(t,nil) 87-- return t[k] 88-- end) 89-- 90-- classes.dflt = dflt 91-- sorters.dflt = function(a,b) return class[b] < class[a] end 92 93-- see analyzeprocessor in case we want scripts 94 95local function reorder(head) 96 if count == 2 then 97 local first = slide[1] 98 local last = slide[2] 99 if sorter(last,first) then 100 head = exchange(head,first,last) 101 end 102 elseif count > 1 then 103 local first = slide[1] 104 local last = slide[count] 105 local before = getprev(first) 106 local after = getnext(last) 107 setprev(first) 108 setnext(last) 109 sort(slide,sorter) 110 setlink(unpack(slide)) 111 local first = slide[1] 112 local last = slide[count] 113 if before then 114 setlink(before,first) 115 end 116 setlink(last,after) 117 if first == head then 118 head = first 119 end 120 end 121 count = 0 122 return head 123end 124 125local function reorderprocessor(head,font,attr) 126 local tfmdata = fontdata[font] 127 local script = otf.scriptandlanguage(tfmdata,attr) 128 sorter = sorters[script] 129 if sorter then 130 local classes = classes[script] 131 for n in nextnode, head do 132 local char, id = ischar(n,font) 133 if char then 134 local c = classes[char] 135 if c then 136 if count == 0 then 137 count = 1 138 slide = { n } 139 else 140 count = count + 1 141 slide[count] = n 142 end 143 class[n] = c 144 point[n] = char 145 elseif count > 0 then 146 head = reorder(head) 147 end 148 elseif count > 0 then 149 head = reorder(head) 150 end 151 end 152 if count > 0 then 153 head = reorder(head) 154 end 155 end 156 return head 157end 158 159fonts.constructors.features.otf.register { 160 name = "reordercombining", 161 description = "reorder combining characters", 162-- default = true, 163-- initializers = { 164-- node = reorderinitializer, 165-- }, 166 processors = { 167 position = 1, 168 node = reorderprocessor, 169 } 170} 171 |