font-imp-combining.lua /size: 8169 b    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-imp-combining
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to font-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA ADE
"
,
5
copyright
=
"
ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
if
not
context
then
return
end
10 11
local
next
,
unpack
=
next
,
unpack
12
local
sort
,
copy
,
insert
=
table
.
sort
,
table
.
copy
,
table
.
insert
13
local
setmetatableindex
=
table
.
setmetatableindex
14 15
local
fontdata
=
fonts
.
hashes
.
identifiers
16
local
otf
=
fonts
.
handlers
.
otf
17 18
local
nuts
=
nodes
.
nuts
19 20
local
nextnode
=
nuts
.
traversers
.
node
21
local
ischar
=
nuts
.
ischar
22
local
getprev
=
nuts
.
getprev
23
local
getnext
=
nuts
.
getnext
24
local
setprev
=
nuts
.
setprev
25
local
setnext
=
nuts
.
setnext
26
local
setboth
=
nuts
.
setboth
27
local
setlink
=
nuts
.
setlink
28
local
exchange
=
nuts
.
exchange
29 30
local
class
=
{
}
-- reused
31
local
point
=
{
}
-- reused
32
local
classes
=
{
}
33
local
sorters
=
{
}
34
local
slide
=
{
}
35
local
count
=
0
36 37
-- List provided by Joey McCollum (Hebrew Layout Intelligence):
38
--
39
-- 1. The consonants (Unicode points 05D0-05EA) have no combining class and are never reordered; this is typographically correct.
40
-- 2. Shin dot and sin dot (05C1-05C2) should be next, but Unicode places them in combining classes 24 and 25, after the characters in recommended classes 3-5 and many of the characters in recommended class 6.
41
-- 3. Dagesh / mapiq (05BC) should be next, but Unicode assigns it a combining class of 21. This means that it will be incorrectly ordered before characters in recommended class 2 and after characters in recommended classes 4-6 after Unicode normalization.
42
-- 4. Rafe (05BF) should be next, but Unicode assigns it a combining class of 23. Thus, it will be correctly placed after characters in recommended class 3, but incorrectly placed before characters in recommended class 2 after Unicode normalization.
43
-- 5. The holam and holam haser vowel points (05B9-05BA) should be next, but Unicode places them in combining class 19. This means that it will be placed incorrectly before characters in recommended classes 2-4 and after all characters in recommended class 6 except 05BB after Unicode normalization.
44
-- 6. The characters in 0591, 0596, 059B, 05A2-05A7, 05AA, 05B0-05B8, 05BB, 05BD, 05C5, 05C7 should be treated as being in the same class, but Unicode places them in combining classes 10-18, 20, 22, and 220.
45
-- 7. The prepositive marks yetiv and dehi (059A, 05AD) should be next; Unicode places them in combining class 222, so they should correctly come after all characters in recommended classes 1-6.
46
-- 8. The characters 0307, 0593-0595, 0597-0598, 059C-05A1, 05A8, 05AB-05AC, 05AF, 05C4 should be treated as being in the same class; Unicode places them in combining class 230, so they should correctly come after all characters in recommended classes 1-7.
47
-- 9. The postpositive marks segolta, pashta, telisha qetana, and zinor (0592, 0599, 05A9, 05AE) should be next; Unicode places them in combining class 230, so they will need to be reordered after the characters in recommended class 8.
48
--
49
-- Some tests by Joey:
50
--
51
-- Arial, Calibri, and Times New Roman will correctly typeset most combinations of points even in Unicode's canonical order, but they typeset the normalized sequences (hiriq, shin dot, tipeha) and (qamatz, dagesh, shin dot) incorrectly and their typographically recommended reorderings correctly.
52
-- Cardo will correctly typeset most combinations of points even in Unicode's canonical order, but it typesets the normalized sequences (hiriq, shin dot, tipeha) incorrectly and its typographically recommended reorderings correctly.
53
-- Frank Ruehl CLM typesets most combinations of points even in Unicode's canonical order, but it consistently does a poor job positioning cantillation marks even when they are placed in the typographically recommended position. Taamey Frank CLM is another version of the same font that handles this correctly, so it is possible that Frank Ruehl CLM is just an obsolete font that did not have well-implemented Hebrew font features for cantillation marks to begin with.
54
-- For Linux Libertine, the text samples with both the normalized mark ordering and the typographically recommended mark ordering were typeset poorly. I think that this is just because that font does not have full support for the Hebrew glyph set (it lacks cantillation marks) or Hebrew font features (it does not place Hebrew diacritical marks intelligently), so no mark reordering would fix its problems.
55
-- Taamey David CLM and Taamey Frank CLM exhibits the same typographical mistakes as SBL Hebrew when the input is in Unicode canonical order, and these mistakes go away if the marks are ordered in the typographically recommended way.
56
--
57
-- SBL Hebrew is used as reference font.
58 59
classes
.
hebr
=
{
60
[
0x05C1
]
=
1
,
[
0x05C2
]
=
1
,
61
[
0x05BC
]
=
2
,
62
[
0x05BF
]
=
3
,
63
[
0x05B9
]
=
4
,
[
0x05BA
]
=
4
,
64
[
0x0591
]
=
5
,
[
0x0596
]
=
5
,
[
0x059B
]
=
5
,
[
0x05A2
]
=
5
,
[
0x05A3
]
=
5
,
[
0x05A4
]
=
5
,
65
[
0x05A5
]
=
5
,
[
0x05A6
]
=
5
,
[
0x05A7
]
=
5
,
[
0x05AA
]
=
5
,
[
0x05B0
]
=
5
,
[
0x05B1
]
=
5
,
66
[
0x05B2
]
=
5
,
[
0x05B3
]
=
5
,
[
0x05B4
]
=
5
,
[
0x05B5
]
=
5
,
[
0x05B6
]
=
5
,
[
0x05B7
]
=
5
,
67
[
0x05B8
]
=
5
,
[
0x05BB
]
=
5
,
[
0x05BD
]
=
5
,
[
0x05C5
]
=
5
,
[
0x05C7
]
=
5
,
68
[
0x059A
]
=
6
,
[
0x05AD
]
=
6
,
69
[
0x0307
]
=
7
,
[
0x0593
]
=
7
,
[
0x0594
]
=
7
,
[
0x0595
]
=
7
,
[
0x0597
]
=
7
,
[
0x0598
]
=
7
,
70
[
0x059C
]
=
7
,
[
0x059D
]
=
7
,
[
0x059E
]
=
7
,
[
0x059F
]
=
7
,
[
0x05A0
]
=
7
,
[
0x05A1
]
=
7
,
71
[
0x05A8
]
=
7
,
[
0x05AB
]
=
7
,
[
0x05AC
]
=
7
,
[
0x05AF
]
=
7
,
[
0x05C4
]
=
7
,
72
[
0x0592
]
=
8
,
[
0x0599
]
=
8
,
[
0x05A9
]
=
8
,
[
0x05AE
]
=
8
,
73
}
74 75
sorters
.
hebr
=
function
(
a
,
b
)
76
return
class
[
a
]
<
class
[
b
]
77
end
78 79
-- local dflt = setmetatableindex(function(t,k,v)
80
-- for k, v in next, characters.data do
81
-- local c = v.combining
82
-- if c then
83
-- t[k] = c
84
-- end
85
-- end
86
-- setmetatableindex(t,nil)
87
-- return t[k]
88
-- end)
89
--
90
-- classes.dflt = dflt
91
-- sorters.dflt = function(a,b) return class[b] < class[a] end
92 93
-- see analyzeprocessor in case we want scripts
94 95
local
function
reorder
(
head
)
96
if
count
=
=
2
then
97
local
first
=
slide
[
1
]
98
local
last
=
slide
[
2
]
99
if
sorter
(
last
,
first
)
then
100
head
=
exchange
(
head
,
first
,
last
)
101
end
102
elseif
count
>
1
then
103
local
first
=
slide
[
1
]
104
local
last
=
slide
[
count
]
105
local
before
=
getprev
(
first
)
106
local
after
=
getnext
(
last
)
107
setprev
(
first
)
108
setnext
(
last
)
109
sort
(
slide
,
sorter
)
110
setlink
(
unpack
(
slide
)
)
111
local
first
=
slide
[
1
]
112
local
last
=
slide
[
count
]
113
if
before
then
114
setlink
(
before
,
first
)
115
end
116
setlink
(
last
,
after
)
117
if
first
=
=
head
then
118
head
=
first
119
end
120
end
121
count
=
0
122
return
head
123
end
124 125
local
function
reorderprocessor
(
head
,
font
,
attr
)
126
local
tfmdata
=
fontdata
[
font
]
127
local
script
=
otf
.
scriptandlanguage
(
tfmdata
,
attr
)
128
sorter
=
sorters
[
script
]
129
if
sorter
then
130
local
classes
=
classes
[
script
]
131
for
n
in
nextnode
,
head
do
132
local
char
,
id
=
ischar
(
n
,
font
)
133
if
char
then
134
local
c
=
classes
[
char
]
135
if
c
then
136
if
count
=
=
0
then
137
count
=
1
138
slide
=
{
n
}
139
else
140
count
=
count
+
1
141
slide
[
count
]
=
n
142
end
143
class
[
n
]
=
c
144
point
[
n
]
=
char
145
elseif
count
>
0
then
146
head
=
reorder
(
head
)
147
end
148
elseif
count
>
0
then
149
head
=
reorder
(
head
)
150
end
151
end
152
if
count
>
0
then
153
head
=
reorder
(
head
)
154
end
155
end
156
return
head
157
end
158 159
fonts
.
constructors
.
features
.
otf
.
register
{
160
name
=
"
reordercombining
"
,
161
description
=
"
reorder combining characters
"
,
162
-- default = true,
163
-- initializers = {
164
-- node = reorderinitializer,
165
-- },
166
processors
=
{
167
position
=
1
,
168
node
=
reorderprocessor
,
169
}
170
}
171