font-map.lua /size: 19 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-map
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to font-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
tonumber
,
next
,
type
=
tonumber
,
next
,
type
10 11
local
match
,
format
,
find
,
concat
,
gsub
,
lower
=
string
.
match
,
string
.
format
,
string
.
find
,
table
.
concat
,
string
.
gsub
,
string
.
lower
12
local
P
,
R
,
S
,
C
,
Ct
,
Cc
,
lpegmatch
=
lpeg
.
P
,
lpeg
.
R
,
lpeg
.
S
,
lpeg
.
C
,
lpeg
.
Ct
,
lpeg
.
Cc
,
lpeg
.
match
13
local
formatters
=
string
.
formatters
14
local
sortedhash
,
sortedkeys
=
table
.
sortedhash
,
table
.
sortedkeys
15
local
idiv
=
number
.
idiv
16 17
local
trace_loading
=
false
trackers
.
register
(
"
fonts.loading
"
,
function
(
v
)
trace_loading
=
v
end
)
18
local
trace_mapping
=
false
trackers
.
register
(
"
fonts.mapping
"
,
function
(
v
)
trace_mapping
=
v
end
)
19 20
local
report_fonts
=
logs
.
reporter
(
"
fonts
"
,
"
loading
"
)
-- not otf only
21 22
-- force_ligatures was true for a while so that these emoji's with bad names work too
23 24
local
force_ligatures
=
false
directives
.
register
(
"
fonts.mapping.forceligatures
"
,
function
(
v
)
force_ligatures
=
v
end
)
25 26
local
fonts
=
fonts
or
{
}
27
local
mappings
=
fonts
.
mappings
or
{
}
28
fonts
.
mappings
=
mappings
29 30
local
allocate
=
utilities
.
storage
.
allocate
31 32
local
hex
=
R
(
"
AF
"
,
"
af
"
,
"
09
"
)
33
local
hexfour
=
(
hex
*
hex
*
hex
^
-2
)
/
function
(
s
)
return
tonumber
(
s
,
16
)
end
34
local
hexsix
=
(
hex
*
hex
*
hex
^
-4
)
/
function
(
s
)
return
tonumber
(
s
,
16
)
end
35
local
dec
=
(
R
(
"
09
"
)
^
1
)
/
tonumber
36
local
period
=
P
(
"
.
"
)
37
local
unicode
=
(
P
(
"
uni
"
)
+
P
(
"
UNI
"
)
)
*
(
hexfour
*
(
period
+
P
(
-1
)
)
*
Cc
(
false
)
+
Ct
(
hexfour
^
1
)
*
Cc
(
true
)
)
-- base planes
38
local
ucode
=
(
P
(
"
u
"
)
+
P
(
"
U
"
)
)
*
(
hexsix
*
(
period
+
P
(
-1
)
)
*
Cc
(
false
)
+
Ct
(
hexsix
^
1
)
*
Cc
(
true
)
)
-- extended
39
local
index
=
P
(
"
index
"
)
*
dec
*
Cc
(
false
)
40 41
local
parser
=
unicode
+
ucode
+
index
42
local
parsers
=
{
}
43 44
local
function
makenameparser
(
str
)
45
if
not
str
or
str
=
=
"
"
then
46
return
parser
47
else
48
local
p
=
parsers
[
str
]
49
if
not
p
then
50
p
=
P
(
str
)
*
period
*
dec
*
Cc
(
false
)
51
parsers
[
str
]
=
p
52
end
53
return
p
54
end
55
end
56 57
local
f_single
=
formatters
[
"
%04X
"
]
58
local
f_double
=
formatters
[
"
%04X%04X
"
]
59
local
s_unknown
=
"
FFFD
"
60 61
local
function
tounicode16
(
unicode
)
62
if
unicode
<
0xD7FF
or
(
unicode
>
0xDFFF
and
unicode
<
=
0xFFFF
)
then
63
return
f_single
(
unicode
)
64
elseif
unicode
>
=
0x00E000
and
unicode
<
=
0x00F8FF
then
65
return
s_unknown
66
elseif
unicode
>
=
0x0F0000
and
unicode
<
=
0x0FFFFF
then
67
return
s_unknown
68
elseif
unicode
>
=
0x100000
and
unicode
<
=
0x10FFFF
then
69
return
s_unknown
70
elseif
unicode
>
=
0x00D800
and
unicode
<
=
0x00DFFF
then
71
return
s_unknown
72
else
73
unicode
=
unicode
-
0x10000
74
return
f_double
(
idiv
(
k
,
0x400
)
+
0xD800
,
unicode
%
0x400
+
0xDC00
)
75
end
76
end
77 78
local
function
tounicode16sequence
(
unicodes
)
79
local
t
=
{
}
80
for
l
=
1
,
#
unicodes
do
81
local
u
=
unicodes
[
l
]
82
if
u
<
0xD7FF
or
(
u
>
0xDFFF
and
u
<
=
0xFFFF
)
then
83
t
[
l
]
=
f_single
(
u
)
84
elseif
unicode
>
=
0x00E000
and
unicode
<
=
0x00F8FF
then
85
t
[
l
]
=
s_unknown
86
elseif
unicode
>
=
0x0F0000
and
unicode
<
=
0x0FFFFF
then
87
t
[
l
]
=
s_unknown
88
elseif
unicode
>
=
0x100000
and
unicode
<
=
0x10FFFF
then
89
t
[
l
]
=
s_unknown
90
-- elseif unicode >= 0x00D800 and unicode <= 0x00DFFF then
91
elseif
unicode
>
=
0x00D7FF
and
unicode
<
=
0x00DFFF
then
92
t
[
l
]
=
s_unknown
93
else
94
u
=
u
-
0x10000
95
t
[
l
]
=
f_double
(
idiv
(
k
,
0x400
)
+
0xD800
,
u
%
0x400
+
0xDC00
)
96
end
97
end
98
return
concat
(
t
)
99
end
100 101 102
local
hash
=
{
}
103
local
conc
=
{
}
104 105
table
.
setmetatableindex
(
hash
,
function
(
t
,
k
)
106
if
k
<
0xD7FF
or
(
k
>
0xDFFF
and
k
<
=
0xFFFF
)
then
107
v
=
f_single
(
k
)
108
else
109
local
k
=
k
-
0x10000
110
v
=
f_double
(
idiv
(
k
,
0x400
)
+
0xD800
,
k
%
0x400
+
0xDC00
)
111
end
112
t
[
k
]
=
v
113
return
v
114
end
)
115 116
local
function
tounicode
(
k
)
117
if
type
(
k
)
=
=
"
table
"
then
118
local
n
=
#
k
119
for
l
=
1
,
n
do
120
conc
[
l
]
=
hash
[
k
[
l
]
]
121
end
122
return
concat
(
conc
,
"
"
,
1
,
n
)
123
elseif
k
>
=
0x00E000
and
k
<
=
0x00F8FF
then
124
return
s_unknown
125
elseif
k
>
=
0x0F0000
and
k
<
=
0x0FFFFF
then
126
return
s_unknown
127
elseif
k
>
=
0x100000
and
k
<
=
0x10FFFF
then
128
return
s_unknown
129
-- elseif k >= 0x00D800 and k <= 0x00DFFF then
130
elseif
k
>
=
0x00D7FF
and
k
<
=
0x00DFFF
then
131
return
s_unknown
132
else
133
return
hash
[
k
]
134
end
135
end
136 137
local
function
fromunicode16
(
str
)
138
if
#
str
=
=
4
then
139
return
tonumber
(
str
,
16
)
140
else
141
local
l
,
r
=
match
(
str
,
"
(....)(....)
"
)
142
return
0x10000
+
(
tonumber
(
l
,
16
)
-0xD800
)
*
0x400
+
tonumber
(
r
,
16
)
-
0xDC00
143
end
144
end
145 146
-- Slightly slower:
147
--
148
-- local p = C(4) * (C(4)^-1) / function(l,r)
149
-- if r then
150
-- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00
151
-- else
152
-- return tonumber(l,16)
153
-- end
154
-- end
155
--
156
-- local function fromunicode16(str)
157
-- return lpegmatch(p,str)
158
-- end
159 160
mappings
.
makenameparser
=
makenameparser
161
mappings
.
tounicode
=
tounicode
162
mappings
.
tounicode16
=
tounicode16
163
mappings
.
tounicode16sequence
=
tounicode16sequence
164
mappings
.
fromunicode16
=
fromunicode16
165 166
-- mozilla emoji has bad lig names: name = gsub(name,"(u[a-f0-9_]+)%-([a-f0-9_]+)","%1_%2")
167 168
local
ligseparator
=
P
(
"
_
"
)
169
local
varseparator
=
P
(
"
.
"
)
170
local
namesplitter
=
Ct
(
C
(
(
1
-
ligseparator
-
varseparator
)
^
1
)
*
(
ligseparator
*
C
(
(
1
-
ligseparator
-
varseparator
)
^
1
)
)
^
0
)
171 172
-- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l
173 174
-- local function test(name)
175
-- local split = lpegmatch(namesplitter,name)
176
-- print(string.formatters["%s: [% t]"](name,split))
177
-- end
178 179
-- test("i.f_")
180
-- test("this")
181
-- test("this.that")
182
-- test("japan1.123")
183
-- test("such_so_more")
184
-- test("such_so_more.that")
185 186
-- to be completed .. for fonts that use unicodes for ligatures which
187
-- is a actually a bad thing and should be avoided in the first place
188 189
do
190 191
local
overloads
=
{
192
IJ
=
{
name
=
"
I_J
"
,
unicode
=
{
0x49
,
0x4A
}
,
mess
=
0x0132
}
,
193
ij
=
{
name
=
"
i_j
"
,
unicode
=
{
0x69
,
0x6A
}
,
mess
=
0x0133
}
,
194
ff
=
{
name
=
"
f_f
"
,
unicode
=
{
0x66
,
0x66
}
,
mess
=
0xFB00
}
,
195
fi
=
{
name
=
"
f_i
"
,
unicode
=
{
0x66
,
0x69
}
,
mess
=
0xFB01
}
,
196
fl
=
{
name
=
"
f_l
"
,
unicode
=
{
0x66
,
0x6C
}
,
mess
=
0xFB02
}
,
197
ffi
=
{
name
=
"
f_f_i
"
,
unicode
=
{
0x66
,
0x66
,
0x69
}
,
mess
=
0xFB03
}
,
198
ffl
=
{
name
=
"
f_f_l
"
,
unicode
=
{
0x66
,
0x66
,
0x6C
}
,
mess
=
0xFB04
}
,
199
fj
=
{
name
=
"
f_j
"
,
unicode
=
{
0x66
,
0x6A
}
}
,
200
fk
=
{
name
=
"
f_k
"
,
unicode
=
{
0x66
,
0x6B
}
}
,
201 202
-- endash = { name = "endash", unicode = 0x2013, mess = 0x2013 },
203
-- emdash = { name = "emdash", unicode = 0x2014, mess = 0x2014 },
204
}
205 206
local
o
=
allocate
{
}
207 208
for
k
,
v
in
next
,
overloads
do
209
local
name
=
v
.
name
210
local
mess
=
v
.
mess
211
if
name
then
212
o
[
name
]
=
v
213
end
214
if
mess
then
215
o
[
mess
]
=
v
216
end
217
o
[
k
]
=
v
218
end
219 220
mappings
.
overloads
=
o
221 222
end
223 224
function
mappings
.
addtounicode
(
data
,
filename
,
checklookups
,
forceligatures
)
225
local
resources
=
data
.
resources
226
local
unicodes
=
resources
.
unicodes
227
if
not
unicodes
then
228
if
trace_mapping
then
229
report_fonts
(
"
no unicode list, quitting tounicode for %a
"
,
filename
)
230
end
231
return
232
end
233
local
properties
=
data
.
properties
234
local
descriptions
=
data
.
descriptions
235
local
overloads
=
mappings
.
overloads
236
-- we need to move this code
237
unicodes
[
'
space
'
]
=
unicodes
[
'
space
'
]
or
32
238
unicodes
[
'
hyphen
'
]
=
unicodes
[
'
hyphen
'
]
or
45
239
unicodes
[
'
zwj
'
]
=
unicodes
[
'
zwj
'
]
or
0x200D
240
unicodes
[
'
zwnj
'
]
=
unicodes
[
'
zwnj
'
]
or
0x200C
241
--
242
local
private
=
fonts
.
constructors
and
fonts
.
constructors
.
privateoffset
or
0xF0000
-- 0x10FFFF
243
local
unicodevector
=
fonts
.
encodings
.
agl
.
unicodes
or
{
}
-- loaded runtime in context
244
local
contextvector
=
fonts
.
encodings
.
agl
.
ctxcodes
or
{
}
-- loaded runtime in context
245
local
missing
=
{
}
246
local
nofmissing
=
0
247
local
oparser
=
nil
248
local
cidnames
=
nil
249
local
cidcodes
=
nil
250
local
cidinfo
=
properties
.
cidinfo
251
local
usedmap
=
cidinfo
and
fonts
.
cid
.
getmap
(
cidinfo
)
252
local
uparser
=
makenameparser
(
)
-- hm, every time?
253
if
usedmap
then
254
oparser
=
usedmap
and
makenameparser
(
cidinfo
.
ordering
)
255
cidnames
=
usedmap
.
names
256
cidcodes
=
usedmap
.
unicodes
257
end
258
local
ns
=
0
259
local
nl
=
0
260
--
261
-- in order to avoid differences between runs due to hash randomization we
262
-- run over a sorted list
263
--
264
local
dlist
=
sortedkeys
(
descriptions
)
265
--
266
-- for du, glyph in next, descriptions do
267
for
i
=
1
,
#
dlist
do
268
local
du
=
dlist
[
i
]
269
local
glyph
=
descriptions
[
du
]
270
local
name
=
glyph
.
name
271
if
name
then
272
local
overload
=
overloads
[
name
]
or
overloads
[
du
]
273
if
overload
then
274
-- get rid of weird ligatures
275
-- glyph.name = overload.name
276
glyph
.
unicode
=
overload
.
unicode
277
else
278
local
gu
=
glyph
.
unicode
-- can already be set (number or table)
279
if
not
gu
or
gu
=
=
-1
or
du
>
=
private
or
(
du
>
=
0xE000
and
du
<
=
0xF8FF
)
or
du
=
=
0xFFFE
or
du
=
=
0xFFFF
then
280
local
unicode
=
unicodevector
[
name
]
or
contextvector
[
name
]
281
if
unicode
then
282
glyph
.
unicode
=
unicode
283
ns
=
ns
+
1
284
end
285
-- cidmap heuristics, beware, there is no guarantee for a match unless
286
-- the chain resolves
287
if
(
not
unicode
)
and
usedmap
then
288
local
foundindex
=
lpegmatch
(
oparser
,
name
)
289
if
foundindex
then
290
unicode
=
cidcodes
[
foundindex
]
-- name to number
291
if
unicode
then
292
glyph
.
unicode
=
unicode
293
ns
=
ns
+
1
294
else
295
local
reference
=
cidnames
[
foundindex
]
-- number to name
296
if
reference
then
297
local
foundindex
=
lpegmatch
(
oparser
,
reference
)
298
if
foundindex
then
299
unicode
=
cidcodes
[
foundindex
]
300
if
unicode
then
301
glyph
.
unicode
=
unicode
302
ns
=
ns
+
1
303
end
304
end
305
if
not
unicode
or
unicode
=
=
"
"
then
306
local
foundcodes
,
multiple
=
lpegmatch
(
uparser
,
reference
)
307
if
foundcodes
then
308
glyph
.
unicode
=
foundcodes
309
if
multiple
then
310
nl
=
nl
+
1
311
unicode
=
true
312
else
313
ns
=
ns
+
1
314
unicode
=
foundcodes
315
end
316
end
317
end
318
end
319
end
320
end
321
end
322
-- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
323
--
324
-- It is not trivial to find a solution that suits all fonts. We tried several alternatives
325
-- and this one seems to work reasonable also with fonts that use less standardized naming
326
-- schemes. The extra private test is tested by KE and seems to work okay with non-typical
327
-- fonts as well.
328
--
329
if
not
unicode
or
unicode
=
=
"
"
then
330
local
split
=
lpegmatch
(
namesplitter
,
name
)
331
local
nsplit
=
split
and
#
split
or
0
-- add if
332
if
nsplit
=
=
0
then
333
-- skip
334
elseif
nsplit
=
=
1
then
335
local
base
=
split
[
1
]
336
local
u
=
unicodes
[
base
]
or
unicodevector
[
base
]
or
contextvector
[
name
]
337
if
not
u
then
338
-- skip
339
elseif
type
(
u
)
=
=
"
table
"
then
340
-- unlikely
341
if
u
[
1
]
<
private
then
342
unicode
=
u
343
glyph
.
unicode
=
unicode
344
end
345
elseif
u
<
private
then
346
unicode
=
u
347
glyph
.
unicode
=
unicode
348
end
349
else
350
local
t
=
{
}
351
local
n
=
0
352
for
l
=
1
,
nsplit
do
353
local
base
=
split
[
l
]
354
local
u
=
unicodes
[
base
]
or
unicodevector
[
base
]
or
contextvector
[
name
]
355
if
not
u
then
356
break
357
elseif
type
(
u
)
=
=
"
table
"
then
358
if
u
[
1
]
>
=
private
then
359
break
360
end
361
n
=
n
+
1
362
t
[
n
]
=
u
[
1
]
363
else
364
if
u
>
=
private
then
365
break
366
end
367
n
=
n
+
1
368
t
[
n
]
=
u
369
end
370
end
371
if
n
>
0
then
372
if
n
=
=
1
then
373
unicode
=
t
[
1
]
374
else
375
unicode
=
t
376
end
377
glyph
.
unicode
=
unicode
378
end
379
end
380
nl
=
nl
+
1
381
end
382
-- last resort (we might need to catch private here as well)
383
if
not
unicode
or
unicode
=
=
"
"
then
384
local
foundcodes
,
multiple
=
lpegmatch
(
uparser
,
name
)
385
if
foundcodes
then
386
glyph
.
unicode
=
foundcodes
387
if
multiple
then
388
nl
=
nl
+
1
389
unicode
=
true
390
else
391
ns
=
ns
+
1
392
unicode
=
foundcodes
393
end
394
end
395
end
396
-- check using substitutes and alternates
397
local
r
=
overloads
[
unicode
]
398
if
r
then
399
unicode
=
r
.
unicode
400
glyph
.
unicode
=
unicode
401
end
402
--
403
if
not
unicode
then
404
missing
[
du
]
=
true
405
nofmissing
=
nofmissing
+
1
406
end
407
else
408
-- maybe a message or so
409
end
410
end
411
else
412
local
overload
=
overloads
[
du
]
413
if
overload
then
414
glyph
.
unicode
=
overload
.
unicode
415
elseif
not
glyph
.
unicode
then
416
missing
[
du
]
=
true
417
nofmissing
=
nofmissing
+
1
418
end
419
end
420
end
421
if
type
(
checklookups
)
=
=
"
function
"
then
422
checklookups
(
data
,
missing
,
nofmissing
)
423
end
424 425
local
unicoded
=
0
426
local
collected
=
fonts
.
handlers
.
otf
.
readers
.
getcomponents
(
data
)
-- neglectable overhead
427 428
local
function
resolve
(
glyph
,
u
)
429
local
n
=
#
u
430
for
i
=
1
,
n
do
431
if
u
[
i
]
>
private
then
432
n
=
0
433
break
434
end
435
end
436
if
n
>
0
then
437
if
n
>
1
then
438
glyph
.
unicode
=
u
439
else
440
glyph
.
unicode
=
u
[
1
]
441
end
442
unicoded
=
unicoded
+
1
443
end
444
end
445 446
if
not
collected
then
447
-- move on
448
elseif
forceligatures
or
force_ligatures
then
449
for
i
=
1
,
#
dlist
do
450
local
du
=
dlist
[
i
]
451
if
du
>
=
private
or
(
du
>
=
0xE000
and
du
<
=
0xF8FF
)
then
452
local
u
=
collected
[
du
]
-- always tables
453
if
u
then
454
resolve
(
descriptions
[
du
]
,
u
)
455
end
456
end
457
end
458
else
459
for
i
=
1
,
#
dlist
do
460
local
du
=
dlist
[
i
]
461
if
du
>
=
private
or
(
du
>
=
0xE000
and
du
<
=
0xF8FF
)
then
462
local
glyph
=
descriptions
[
du
]
463
if
glyph
.
class
=
=
"
ligature
"
and
not
glyph
.
unicode
then
464
local
u
=
collected
[
du
]
-- always tables
465
if
u
then
466
resolve
(
glyph
,
u
)
467
end
468
end
469
end
470
end
471
end
472 473
if
trace_mapping
and
unicoded
>
0
then
474
report_fonts
(
"
%n ligature tounicode mappings deduced from gsub ligature features
"
,
unicoded
)
475
end
476
if
trace_mapping
then
477
-- for unic, glyph in sortedhash(descriptions) do
478
for
i
=
1
,
#
dlist
do
479
local
du
=
dlist
[
i
]
480
local
glyph
=
descriptions
[
du
]
481
local
name
=
glyph
.
name
or
"
-
"
482
local
index
=
glyph
.
index
or
0
483
local
unicode
=
glyph
.
unicode
484
if
unicode
then
485
if
type
(
unicode
)
=
=
"
table
"
then
486
local
unicodes
=
{
}
487
for
i
=
1
,
#
unicode
do
488
unicodes
[
i
]
=
formatters
(
"
%U
"
,
unicode
[
i
]
)
489
end
490
report_fonts
(
"
internal slot %U, name %a, unicode %U, tounicode % t
"
,
index
,
name
,
du
,
unicodes
)
491
else
492
report_fonts
(
"
internal slot %U, name %a, unicode %U, tounicode %U
"
,
index
,
name
,
du
,
unicode
)
493
end
494
else
495
report_fonts
(
"
internal slot %U, name %a, unicode %U
"
,
index
,
name
,
du
)
496
end
497
end
498
end
499
if
trace_loading
and
(
ns
>
0
or
nl
>
0
)
then
500
report_fonts
(
"
%s tounicode entries added, ligatures %s
"
,
nl
+
ns
,
ns
)
501
end
502
end
503 504
-- local parser = makenameparser("Japan1")
505
-- local parser = makenameparser()
506
-- local function test(str)
507
-- local b, a = lpegmatch(parser,str)
508
-- print((a and table.serialize(b)) or b)
509
-- end
510
-- test("a.sc")
511
-- test("a")
512
-- test("uni1234")
513
-- test("uni1234.xx")
514
-- test("uni12349876")
515
-- test("u123400987600")
516
-- test("index1234")
517
-- test("Japan1.123")
518