1if not modules then modules = { } end modules ['font-map'] = {
2 version = 1.001,
3 optimize = true,
4 comment = "companion to font-ini.mkiv",
5 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6 copyright = "PRAGMA ADE / ConTeXt Development Team",
7 license = "see context related readme files"
8}
9
10local next, type = next, type
11
12local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
13local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
14local formatters = string.formatters
15local utfbyte = utf.byte
16local sortedhash, sortedkeys = table.sortedhash, table.sortedkeys
17local hextointeger, dectointeger = string.hextointeger, string.dectointeger
18
19local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end)
20local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_mapping = v end)
21
22local report_fonts = logs.reporter("fonts","loading")
23
24
25
26local force_ligatures = false directives.register("fonts.mapping.forceligatures",function(v) force_ligatures = v end)
27
28local fonts = fonts or { }
29local mappings = fonts.mappings or { }
30fonts.mappings = mappings
31
32local allocate = utilities.storage.allocate
33
34local hex = R("AF","af","09")
35local hexfour = (hex*hex*hex^-2) / function(s) return hextointeger(s) end
36local hexsix = (hex*hex*hex^-4) / function(s) return hextointeger(s) end
37local dec = (R("09")^1) / dectointeger
38local period = P(".")
39local unicode = (P("uni") + P("UNI")) * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true))
40local ucode = (P("u") + P("U") ) * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true))
41local index = P("index") * dec * Cc(false)
42
43local parser = unicode + ucode + index
44local parsers = { }
45
46local function makenameparser(str)
47 if not str or str == "" then
48 return parser
49 else
50 local p = parsers[str]
51 if not p then
52 p = P(str) * period * dec * Cc(false)
53 parsers[str] = p
54 end
55 return p
56 end
57end
58
59local f_single = formatters["%04X"]
60local f_double = formatters["%04X%04X"]
61local s_unknown = "FFFD"
62
63local function tounicode16(unicode)
64 if unicode < 0xD7FF or (unicode > 0xDFFF and unicode <= 0xFFFF) then
65 return f_single(unicode)
66 elseif unicode >= 0x00E000 and unicode <= 0x00F8FF then
67 return s_unknown
68 elseif unicode >= 0x0F0000 and unicode <= 0x0FFFFF then
69 return s_unknown
70 elseif unicode >= 0x100000 and unicode <= 0x10FFFF then
71 return s_unknown
72 elseif unicode >= 0x00D800 and unicode <= 0x00DFFF then
73 return s_unknown
74 else
75 unicode = unicode - 0x10000
76 return f_double((unicode//0x400)+0xD800,unicode%0x400+0xDC00)
77 end
78end
79
80local function tounicode16sequence(unicodes)
81 local t = { }
82 for l=1,#unicodes do
83 local u = unicodes[l]
84 if u < 0xD7FF or (u > 0xDFFF and u <= 0xFFFF) then
85 t[l] = f_single(u)
86 elseif unicode >= 0x00E000 and unicode <= 0x00F8FF then
87 t[l] = s_unknown
88 elseif unicode >= 0x0F0000 and unicode <= 0x0FFFFF then
89 t[l] = s_unknown
90 elseif unicode >= 0x100000 and unicode <= 0x10FFFF then
91 t[l] = s_unknown
92
93 elseif unicode >= 0x00D7FF and unicode <= 0x00DFFF then
94 t[l] = s_unknown
95 else
96 u = u - 0x10000
97 t[l] = f_double((u//0x400)+0xD800,u%0x400+0xDC00)
98 end
99 end
100 return concat(t)
101end
102
103
104local hash = { }
105local conc = { }
106
107table.setmetatableindex(hash,function(t,k)
108 local v
109 if k < 0xD7FF or (k > 0xDFFF and k <= 0xFFFF) then
110 v = f_single(k)
111 else
112 local k = k - 0x10000
113 v = f_double((k//0x400)+0xD800,k%0x400+0xDC00)
114 end
115 t[k] = v
116 return v
117end)
118
119local function tounicode(k)
120 local t = type(k)
121 if t == "table" then
122 local n = #k
123 for l=1,n do
124 conc[l] = hash[k[l]]
125 end
126 return concat(conc,"",1,n)
127 elseif t == "string" then
128 return hash[utfbyte(k)]
129 elseif t ~= "number" then
130 return s_unknown
131 elseif k >= 0x00E000 and k <= 0x00F8FF then
132 return s_unknown
133 elseif k >= 0x0F0000 and k <= 0x0FFFFF then
134 return s_unknown
135 elseif k >= 0x100000 and k <= 0x10FFFF then
136 return s_unknown
137
138 elseif k >= 0x00D7FF and k <= 0x00DFFF then
139 return s_unknown
140 else
141 return hash[k]
142 end
143end
144
145local function fromunicode16(str)
146 if #str == 4 then
147 return hextointeger(str)
148 else
149 local l, r = match(str,"(....)(....)")
150 return 0x10000 + (hextointeger(l)-0xD800)*0x400 + hextointeger(r) - 0xDC00
151 end
152end
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168mappings.makenameparser = makenameparser
169mappings.tounicode = tounicode
170mappings.tounicode16 = tounicode16
171mappings.tounicode16sequence = tounicode16sequence
172mappings.fromunicode16 = fromunicode16
173
174
175
176local ligseparator = P("_")
177local varseparator = P(".")
178local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0)
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197do
198
199 local overloads = {
200 IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 },
201 ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 },
202 ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 },
203 fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 },
204 fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 },
205 ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 },
206 ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 },
207 fj = { name = "f_j", unicode = { 0x66, 0x6A } },
208 fk = { name = "f_k", unicode = { 0x66, 0x6B } },
209
210
211
212 }
213
214 local o = allocate { }
215
216 for k, v in next, overloads do
217 local name = v.name
218 local mess = v.mess
219 if name then
220 o[name] = v
221 end
222 if mess then
223 o[mess] = v
224 end
225 o[k] = v
226 end
227
228 mappings.overloads = o
229
230end
231
232function mappings.addtounicode(data,filename,checklookups,forceligatures)
233 local resources = data.resources
234 local unicodes = resources.unicodes
235 if not unicodes then
236 if trace_mapping then
237 report_fonts("no unicode list, quitting tounicode for %a",filename)
238 end
239 return
240 end
241 local properties = data.properties
242 local descriptions = data.descriptions
243 local overloads = mappings.overloads
244
245 unicodes['space'] = unicodes['space'] or 32
246 unicodes['hyphen'] = unicodes['hyphen'] or 45
247 unicodes['zwj'] = unicodes['zwj'] or 0x200D
248 unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
249
250 local private = fonts.constructors and fonts.constructors.privateoffset or 0xF0000
251 local unicodevector = fonts.encodings.agl.unicodes or { }
252 local contextvector = fonts.encodings.agl.ctxcodes or { }
253 local missing = { }
254 local nofmissing = 0
255 local oparser = nil
256 local cidnames = nil
257 local cidcodes = nil
258 local cidinfo = properties.cidinfo
259 local usedmap = cidinfo and fonts.cid.getmap(cidinfo)
260 local uparser = makenameparser()
261 if usedmap then
262 oparser = usedmap and makenameparser(cidinfo.ordering)
263 cidnames = usedmap.names
264 cidcodes = usedmap.unicodes
265 end
266 local ns = 0
267 local nl = 0
268
269
270
271
272 local dlist = sortedkeys(descriptions)
273
274
275 for i=1,#dlist do
276 local du = dlist[i]
277 local glyph = descriptions[du]
278 local name = glyph.name
279 if name then
280 local overload = overloads[name] or overloads[du]
281 if overload then
282
283
284 glyph.unicode = overload.unicode
285 else
286 local gu = glyph.unicode
287 if not gu or gu == -1 or du >= private or (du >= 0xE000 and du <= 0xF8FF) or du == 0xFFFE or du == 0xFFFF then
288 local unicode = unicodevector[name] or contextvector[name]
289 if unicode then
290 glyph.unicode = unicode
291 ns = ns + 1
292 end
293
294
295 if (not unicode) and usedmap then
296 local foundindex = lpegmatch(oparser,name)
297 if foundindex then
298 unicode = cidcodes[foundindex]
299 if unicode then
300 glyph.unicode = unicode
301 ns = ns + 1
302 else
303 local reference = cidnames[foundindex]
304 if reference then
305 local foundindex = lpegmatch(oparser,reference)
306 if foundindex then
307 unicode = cidcodes[foundindex]
308 if unicode then
309 glyph.unicode = unicode
310 ns = ns + 1
311 end
312 end
313 if not unicode or unicode == "" then
314 local foundcodes, multiple = lpegmatch(uparser,reference)
315 if foundcodes then
316 glyph.unicode = foundcodes
317 if multiple then
318 nl = nl + 1
319 unicode = true
320 else
321 ns = ns + 1
322 unicode = foundcodes
323 end
324 end
325 end
326 end
327 end
328 end
329 end
330
331
332
333
334
335
336
337 if not unicode or unicode == "" then
338 local split = lpegmatch(namesplitter,name)
339 local nsplit = split and #split or 0
340 if nsplit == 0 then
341
342 elseif nsplit == 1 then
343 local base = split[1]
344 local u = unicodes[base] or unicodevector[base] or contextvector[name]
345 if not u then
346
347 elseif type(u) == "table" then
348
349 if u[1] < private then
350 unicode = u
351 glyph.unicode = unicode
352 end
353 elseif u < private then
354 unicode = u
355 glyph.unicode = unicode
356 end
357 else
358 local t = { }
359 local n = 0
360 for l=1,nsplit do
361 local base = split[l]
362 local u = unicodes[base] or unicodevector[base] or contextvector[name]
363 if not u then
364 break
365 elseif type(u) == "table" then
366 if u[1] >= private then
367 break
368 end
369 n = n + 1
370 t[n] = u[1]
371 else
372 if u >= private then
373 break
374 end
375 n = n + 1
376 t[n] = u
377 end
378 end
379 if n > 0 then
380 if n == 1 then
381 unicode = t[1]
382 else
383 unicode = t
384 end
385 glyph.unicode = unicode
386 end
387 end
388 nl = nl + 1
389 end
390
391 if not unicode or unicode == "" then
392 local foundcodes, multiple = lpegmatch(uparser,name)
393 if foundcodes then
394 glyph.unicode = foundcodes
395 if multiple then
396 nl = nl + 1
397 unicode = true
398 else
399 ns = ns + 1
400 unicode = foundcodes
401 end
402 end
403 end
404
405 local r = overloads[unicode]
406 if r then
407 unicode = r.unicode
408 glyph.unicode = unicode
409 end
410
411 if not unicode then
412 missing[du] = true
413 nofmissing = nofmissing + 1
414 end
415 else
416
417 end
418 end
419 else
420 local overload = overloads[du]
421 if overload then
422 glyph.unicode = overload.unicode
423 elseif not glyph.unicode then
424 missing[du] = true
425 nofmissing = nofmissing + 1
426 end
427 end
428 end
429 if type(checklookups) == "function" then
430 checklookups(data,missing,nofmissing)
431 end
432
433 local unicoded = 0
434 local collected = fonts.handlers.otf.readers.getcomponents(data)
435
436 local function resolve(glyph,u)
437 local n = #u
438 for i=1,n do
439 if u[i] > private then
440 n = 0
441 break
442 end
443 end
444 if n > 0 then
445 if n > 1 then
446 glyph.unicode = u
447 else
448 glyph.unicode = u[1]
449 end
450 unicoded = unicoded + 1
451 end
452 end
453
454 if not collected then
455
456 elseif forceligatures or force_ligatures then
457 for i=1,#dlist do
458 local du = dlist[i]
459 if du >= private or (du >= 0xE000 and du <= 0xF8FF) then
460 local u = collected[du]
461 if u then
462 resolve(descriptions[du],u)
463 end
464 end
465 end
466 else
467 for i=1,#dlist do
468 local du = dlist[i]
469 if du >= private or (du >= 0xE000 and du <= 0xF8FF) then
470 local glyph = descriptions[du]
471 if glyph.class == "ligature" and not glyph.unicode then
472 local u = collected[du]
473 if u then
474 resolve(glyph,u)
475 end
476 end
477 end
478 end
479 end
480
481 if trace_mapping and unicoded > 0 then
482 report_fonts("%n ligature tounicode mappings deduced from gsub ligature features",unicoded)
483 end
484 if trace_mapping then
485
486 for i=1,#dlist do
487 local du = dlist[i]
488 local glyph = descriptions[du]
489 local name = glyph.name or "-"
490 local index = glyph.index or 0
491 local unicode = glyph.unicode
492 if unicode then
493 if type(unicode) == "table" then
494 local unicodes = { }
495 for i=1,#unicode do
496 unicodes[i] = formatters("%U",unicode[i])
497 end
498 report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,du,unicodes)
499 else
500 report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,du,unicode)
501 end
502 else
503 report_fonts("internal slot %U, name %a, unicode %U",index,name,du)
504 end
505 end
506 end
507 if trace_loading and (ns > 0 or nl > 0) then
508 report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
509 end
510end
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526 |