font-otr.lua /size: 93 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['font-otr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to font-ini.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- When looking into a cid font relates issue in the ff library I wondered if
11-- it made sense to use Lua to filter the information from the otf and ttf
12-- files. Quite some ff code relates to special fonts and in practice we only
13-- use rather normal opentype fonts.
14--
15-- The code here is based on the documentation (and examples) at the microsoft
16-- website. The code will be extended and improved stepwise. After some experiments
17-- I decided to convert to a format more suitable for the context font handler
18-- because it makes no sense to rehash all those lookups again.
19--
20-- Currently we can use this code for getting basic info about the font, loading
21-- shapes and loading the extensive table. I'm not sure if I will provide a ff
22-- compatible output as well (We're not that far from it as currently I can load
23-- all data reasonable fast.)
24
25-- We can omit redundant glyphs names i.e. ones that match the agl or
26-- are just a unicode string but it doesn't save that much. It will be an option
27-- some day.
28
29-- Optimizing the widths will be done anyway as it save quite some on a cjk font
30-- and the existing (old) code if okay.
31
32-- todo: more messages (only if really needed)
33--
34-- considered, in math:
35--
36-- start -> first (so we can skip the first same-size one)
37-- end   -> last
38--
39-- Widths and weights are kind of messy: for instance lmmonolt has a pfmweight of
40-- 400 while it should be 300. So, for now we mostly stick to the old compromis.
41
42-- We don't really need all those language tables so they might be dropped some
43-- day.
44
45-- The new reader is faster on some aspects and slower on other. The memory footprint
46-- is lower. The string reader is a  bit faster than the file reader. The new reader
47-- gives more efficient tables and has bit more analysis. In practice these times are
48-- not that relevant because we cache. The otf files take a it more time because we
49-- need to calculate the boundingboxes. In theory the processing of text should be
50-- somewhat faster especially for complex fonts with many lookups.
51--
52--                        old    new    str reader
53-- lmroman12-regular.otf  0.103  0.203  0.195
54-- latinmodern-math.otf   0.454  0.768  0.712
55-- husayni.ttf            1.142  1.526  1.259
56--
57-- If there is demand I will consider making a ff compatible table dumper but it's
58-- probably more fun to provide a way to show features applied.
59
60-- I experimented a bit with f:readbyte(n) and f:readshort() and so and it is indeed
61-- faster but it might not be the real bottleneck as we still need to juggle data. It
62-- is probably more memory efficient as no intermediate strings are involved.
63
64-- if not characters then
65--     require("char-def")
66--     require("char-ini")
67-- end
68
69local number = number
70local next, type, tonumber, rawget = next, type, tonumber, rawget
71local byte, lower, char, gsub = string.byte, string.lower, string.char, string.gsub
72local fullstrip = string.fullstrip
73local floor, round = math.floor, math.round
74local P, R, S, C, Cs, Cc, Ct, Carg, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Carg, lpeg.Cmt
75local lpegmatch = lpeg.match
76local rshift = bit32.rshift
77
78local setmetatableindex  = table.setmetatableindex
79local sortedkeys         = table.sortedkeys
80local sortedhash         = table.sortedhash
81local stripstring        = string.nospaces
82local utf16_to_utf8_be   = utf.utf16_to_utf8_be
83
84local report             = logs.reporter("otf reader")
85local report_cmap        = logs.reporter("otf reader","cmap")
86
87local trace_cmap         = false  trackers.register("otf.cmap",         function(v) trace_cmap         = v end)
88local trace_cmap_details = false  trackers.register("otf.cmap.details", function(v) trace_cmap_details = v end)
89
90fonts                    = fonts or { }
91local handlers           = fonts.handlers or { }
92fonts.handlers           = handlers
93local otf                = handlers.otf or { }
94handlers.otf             = otf
95local readers            = otf.readers or { }
96otf.readers              = readers
97
98----- streamreader       = utilities.streams -- faster on big files (not true any longer)
99local streamreader       = utilities.files   -- faster on identify (also uses less memory)
100local streamwriter       = utilities.files
101
102readers.streamreader     = streamreader
103readers.streamwriter     = streamwriter
104
105local openfile           = streamreader.open
106local closefile          = streamreader.close
107----- skipbytes          = streamreader.skip
108local setposition        = streamreader.setposition
109local skipshort          = streamreader.skipshort
110local readbytes          = streamreader.readbytes
111local readstring         = streamreader.readstring
112local readbyte           = streamreader.readcardinal1  --  8-bit unsigned integer
113local readushort         = streamreader.readcardinal2  -- 16-bit unsigned integer
114local readuint           = streamreader.readcardinal3  -- 24-bit unsigned integer
115local readulong          = streamreader.readcardinal4  -- 32-bit unsigned integer
116----- readchar           = streamreader.readinteger1   --  8-bit   signed integer
117local readshort          = streamreader.readinteger2   -- 16-bit   signed integer
118local readlong           = streamreader.readinteger4   -- 32-bit unsigned integer
119local readfixed          = streamreader.readfixed4
120local read2dot14         = streamreader.read2dot14     -- 16-bit signed fixed number with the low 14 bits of fraction (2.14) (F2DOT14)
121local readfword          = readshort                   -- 16-bit   signed integer that describes a quantity in FUnits
122local readufword         = readushort                  -- 16-bit unsigned integer that describes a quantity in FUnits
123local readoffset         = readushort
124local readcardinaltable  = streamreader.readcardinaltable
125local readintegertable   = streamreader.readintegertable
126
127function streamreader.readtag(f)
128    return lower(stripstring(readstring(f,4)))
129end
130
131local short  = 2
132local ushort = 2
133local ulong  = 4
134
135directives.register("fonts.streamreader",function()
136
137    streamreader      = utilities.streams
138
139    openfile          = streamreader.open
140    closefile         = streamreader.close
141    setposition       = streamreader.setposition
142    skipshort         = streamreader.skipshort
143    readbytes         = streamreader.readbytes
144    readstring        = streamreader.readstring
145    readbyte          = streamreader.readcardinal1
146    readushort        = streamreader.readcardinal2
147    readuint          = streamreader.readcardinal3
148    readulong         = streamreader.readcardinal4
149    readshort         = streamreader.readinteger2
150    readlong          = streamreader.readinteger4
151    readfixed         = streamreader.readfixed4
152    read2dot14        = streamreader.read2dot14
153    readfword         = readshort
154    readufword        = readushort
155    readoffset        = readushort
156    readcardinaltable = streamreader.readcardinaltable
157    readintegertable  = streamreader.readintegertable
158
159    function streamreader.readtag(f)
160        return lower(stripstring(readstring(f,4)))
161    end
162
163end)
164
165-- date represented in number of seconds since 12:00 midnight, January 1, 1904. The value is represented as a
166-- signed 64-bit integer
167
168local function readlongdatetime(f)
169    local a, b, c, d, e, f, g, h = readbytes(f,8)
170    return 0x100000000 * d + 0x1000000 * e + 0x10000 * f + 0x100 * g + h
171end
172
173local tableversion    = 0.004
174readers.tableversion  = tableversion
175local privateoffset   = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF
176
177-- We have quite some data tables. We are somewhat ff compatible with names but as I used
178-- the information from the microsoft site there can be differences. Eventually I might end
179-- up with a different ordering and naming.
180
181local reservednames = { [0] =
182    "copyright",
183    "family",
184    "subfamily",
185    "uniqueid",
186    "fullname",
187    "version",
188    "postscriptname",
189    "trademark",
190    "manufacturer",
191    "designer",
192    "description", -- descriptor in ff
193    "vendorurl",
194    "designerurl",
195    "license",
196    "licenseurl",
197    "reserved",
198    "typographicfamily",    -- preffamilyname
199    "typographicsubfamily", -- prefmodifiers
200    "compatiblefullname",   -- for mac
201    "sampletext",
202    "cidfindfontname",
203    "wwsfamily",
204    "wwssubfamily",
205    "lightbackgroundpalette",
206    "darkbackgroundpalette",
207    "variationspostscriptnameprefix",
208}
209
210-- more at: https://www.microsoft.com/typography/otspec/name.htm
211
212-- setmetatableindex(reservednames,function(t,k)
213--     local v = "name_" .. k
214--     t[k] =  v
215--     return v
216-- end)
217
218local platforms = { [0] =
219    "unicode",
220    "macintosh",
221    "iso",
222    "windows",
223    "custom",
224}
225
226local encodings = {
227    -- these stay:
228    unicode = { [0] =
229        "unicode 1.0 semantics",
230        "unicode 1.1 semantics",
231        "iso/iec 10646",
232        "unicode 2.0 bmp",             -- cmap subtable formats 0, 4, 6
233        "unicode 2.0 full",            -- cmap subtable formats 0, 4, 6, 10, 12
234        "unicode variation sequences", -- cmap subtable format 14).
235        "unicode full repertoire",     -- cmap subtable formats 0, 4, 6, 10, 12, 13
236    },
237    -- these can go:
238    macintosh = { [0] =
239        "roman", "japanese", "chinese (traditional)", "korean", "arabic", "hebrew", "greek", "russian",
240        "rsymbol", "devanagari", "gurmukhi", "gujarati", "oriya", "bengali", "tamil", "telugu", "kannada",
241        "malayalam", "sinhalese", "burmese", "khmer", "thai", "laotian", "georgian", "armenian",
242        "chinese (simplified)", "tibetan", "mongolian", "geez", "slavic", "vietnamese", "sindhi",
243        "uninterpreted",
244    },
245    -- these stay:
246    iso = { [0] =
247        "7-bit ascii",
248        "iso 10646",
249        "iso 8859-1",
250    },
251    -- these stay:
252    windows = { [0] =
253        "symbol",
254        "unicode bmp", -- this is utf16
255        "shiftjis",
256        "prc",
257        "big5",
258        "wansung",
259        "johab",
260        "reserved 7",
261        "reserved 8",
262        "reserved 9",
263        "unicode ucs-4",
264    },
265    custom = {
266        --custom: 0-255 : otf windows nt compatibility mapping
267    }
268}
269
270local decoders = {
271    unicode   = { },
272    macintosh = { },
273    iso       = { },
274    windows   = {
275        -- maybe always utf16
276        ["unicode semantics"]           = utf16_to_utf8_be,
277        ["unicode bmp"]                 = utf16_to_utf8_be,
278        ["unicode full"]                = utf16_to_utf8_be,
279        ["unicode 1.0 semantics"]       = utf16_to_utf8_be,
280        ["unicode 1.1 semantics"]       = utf16_to_utf8_be,
281        ["unicode 2.0 bmp"]             = utf16_to_utf8_be,
282        ["unicode 2.0 full"]            = utf16_to_utf8_be,
283        ["unicode variation sequences"] = utf16_to_utf8_be,
284        ["unicode full repertoire"]     = utf16_to_utf8_be,
285    },
286    custom    = { },
287}
288
289-- This is bit over the top as we can just look for either windows, unicode or macintosh
290-- names (in that order). A font with no english name is probably a weird one anyway.
291
292local languages = {
293    -- these stay:
294    unicode = {
295        [  0] = "english",
296    },
297    -- english can stay:
298    macintosh = {
299        [  0] = "english",
300     -- [  1] = "french",
301     -- [  2] = "german",
302     -- [  3] = "italian",
303     -- [  4] = "dutch",
304     -- [  5] = "swedish",
305     -- [  6] = "spanish",
306     -- [  7] = "danish",
307     -- [  8] = "portuguese",
308     -- [  9] = "norwegian",
309     -- [ 10] = "hebrew",
310     -- [ 11] = "japanese",
311     -- [ 12] = "arabic",
312     -- [ 13] = "finnish",
313     -- [ 14] = "greek",
314     -- [ 15] = "icelandic",
315     -- [ 16] = "maltese",
316     -- [ 17] = "turkish",
317     -- [ 18] = "croatian",
318     -- [ 19] = "chinese (traditional)",
319     -- [ 20] = "urdu",
320     -- [ 21] = "hindi",
321     -- [ 22] = "thai",
322     -- [ 23] = "korean",
323     -- [ 24] = "lithuanian",
324     -- [ 25] = "polish",
325     -- [ 26] = "hungarian",
326     -- [ 27] = "estonian",
327     -- [ 28] = "latvian",
328     -- [ 29] = "sami",
329     -- [ 30] = "faroese",
330     -- [ 31] = "farsi/persian",
331     -- [ 32] = "russian",
332     -- [ 33] = "chinese (simplified)",
333     -- [ 34] = "flemish",
334     -- [ 35] = "irish gaelic",
335     -- [ 36] = "albanian",
336     -- [ 37] = "romanian",
337     -- [ 38] = "czech",
338     -- [ 39] = "slovak",
339     -- [ 40] = "slovenian",
340     -- [ 41] = "yiddish",
341     -- [ 42] = "serbian",
342     -- [ 43] = "macedonian",
343     -- [ 44] = "bulgarian",
344     -- [ 45] = "ukrainian",
345     -- [ 46] = "byelorussian",
346     -- [ 47] = "uzbek",
347     -- [ 48] = "kazakh",
348     -- [ 49] = "azerbaijani (cyrillic script)",
349     -- [ 50] = "azerbaijani (arabic script)",
350     -- [ 51] = "armenian",
351     -- [ 52] = "georgian",
352     -- [ 53] = "moldavian",
353     -- [ 54] = "kirghiz",
354     -- [ 55] = "tajiki",
355     -- [ 56] = "turkmen",
356     -- [ 57] = "mongolian (mongolian script)",
357     -- [ 58] = "mongolian (cyrillic script)",
358     -- [ 59] = "pashto",
359     -- [ 60] = "kurdish",
360     -- [ 61] = "kashmiri",
361     -- [ 62] = "sindhi",
362     -- [ 63] = "tibetan",
363     -- [ 64] = "nepali",
364     -- [ 65] = "sanskrit",
365     -- [ 66] = "marathi",
366     -- [ 67] = "bengali",
367     -- [ 68] = "assamese",
368     -- [ 69] = "gujarati",
369     -- [ 70] = "punjabi",
370     -- [ 71] = "oriya",
371     -- [ 72] = "malayalam",
372     -- [ 73] = "kannada",
373     -- [ 74] = "tamil",
374     -- [ 75] = "telugu",
375     -- [ 76] = "sinhalese",
376     -- [ 77] = "burmese",
377     -- [ 78] = "khmer",
378     -- [ 79] = "lao",
379     -- [ 80] = "vietnamese",
380     -- [ 81] = "indonesian",
381     -- [ 82] = "tagalong",
382     -- [ 83] = "malay (roman script)",
383     -- [ 84] = "malay (arabic script)",
384     -- [ 85] = "amharic",
385     -- [ 86] = "tigrinya",
386     -- [ 87] = "galla",
387     -- [ 88] = "somali",
388     -- [ 89] = "swahili",
389     -- [ 90] = "kinyarwanda/ruanda",
390     -- [ 91] = "rundi",
391     -- [ 92] = "nyanja/chewa",
392     -- [ 93] = "malagasy",
393     -- [ 94] = "esperanto",
394     -- [128] = "welsh",
395     -- [129] = "basque",
396     -- [130] = "catalan",
397     -- [131] = "latin",
398     -- [132] = "quenchua",
399     -- [133] = "guarani",
400     -- [134] = "aymara",
401     -- [135] = "tatar",
402     -- [136] = "uighur",
403     -- [137] = "dzongkha",
404     -- [138] = "javanese (roman script)",
405     -- [139] = "sundanese (roman script)",
406     -- [140] = "galician",
407     -- [141] = "afrikaans",
408     -- [142] = "breton",
409     -- [143] = "inuktitut",
410     -- [144] = "scottish gaelic",
411     -- [145] = "manx gaelic",
412     -- [146] = "irish gaelic (with dot above)",
413     -- [147] = "tongan",
414     -- [148] = "greek (polytonic)",
415     -- [149] = "greenlandic",
416     -- [150] = "azerbaijani (roman script)",
417    },
418    -- these can stay:
419    iso = {
420    },
421    -- english can stay:
422    windows = {
423     -- [0x0436] = "afrikaans - south africa",
424     -- [0x041c] = "albanian - albania",
425     -- [0x0484] = "alsatian - france",
426     -- [0x045e] = "amharic - ethiopia",
427     -- [0x1401] = "arabic - algeria",
428     -- [0x3c01] = "arabic - bahrain",
429     -- [0x0c01] = "arabic - egypt",
430     -- [0x0801] = "arabic - iraq",
431     -- [0x2c01] = "arabic - jordan",
432     -- [0x3401] = "arabic - kuwait",
433     -- [0x3001] = "arabic - lebanon",
434     -- [0x1001] = "arabic - libya",
435     -- [0x1801] = "arabic - morocco",
436     -- [0x2001] = "arabic - oman",
437     -- [0x4001] = "arabic - qatar",
438     -- [0x0401] = "arabic - saudi arabia",
439     -- [0x2801] = "arabic - syria",
440     -- [0x1c01] = "arabic - tunisia",
441     -- [0x3801] = "arabic - u.a.e.",
442     -- [0x2401] = "arabic - yemen",
443     -- [0x042b] = "armenian - armenia",
444     -- [0x044d] = "assamese - india",
445     -- [0x082c] = "azeri (cyrillic) - azerbaijan",
446     -- [0x042c] = "azeri (latin) - azerbaijan",
447     -- [0x046d] = "bashkir - russia",
448     -- [0x042d] = "basque - basque",
449     -- [0x0423] = "belarusian - belarus",
450     -- [0x0845] = "bengali - bangladesh",
451     -- [0x0445] = "bengali - india",
452     -- [0x201a] = "bosnian (cyrillic) - bosnia and herzegovina",
453     -- [0x141a] = "bosnian (latin) - bosnia and herzegovina",
454     -- [0x047e] = "breton - france",
455     -- [0x0402] = "bulgarian - bulgaria",
456     -- [0x0403] = "catalan - catalan",
457     -- [0x0c04] = "chinese - hong kong s.a.r.",
458     -- [0x1404] = "chinese - macao s.a.r.",
459     -- [0x0804] = "chinese - people's republic of china",
460     -- [0x1004] = "chinese - singapore",
461     -- [0x0404] = "chinese - taiwan",
462     -- [0x0483] = "corsican - france",
463     -- [0x041a] = "croatian - croatia",
464     -- [0x101a] = "croatian (latin) - bosnia and herzegovina",
465     -- [0x0405] = "czech - czech republic",
466     -- [0x0406] = "danish - denmark",
467     -- [0x048c] = "dari - afghanistan",
468     -- [0x0465] = "divehi - maldives",
469     -- [0x0813] = "dutch - belgium",
470     -- [0x0413] = "dutch - netherlands",
471     -- [0x0c09] = "english - australia",
472     -- [0x2809] = "english - belize",
473     -- [0x1009] = "english - canada",
474     -- [0x2409] = "english - caribbean",
475     -- [0x4009] = "english - india",
476     -- [0x1809] = "english - ireland",
477     -- [0x2009] = "english - jamaica",
478     -- [0x4409] = "english - malaysia",
479     -- [0x1409] = "english - new zealand",
480     -- [0x3409] = "english - republic of the philippines",
481     -- [0x4809] = "english - singapore",
482     -- [0x1c09] = "english - south africa",
483     -- [0x2c09] = "english - trinidad and tobago",
484     -- [0x0809] = "english - united kingdom",
485        [0x0409] = "english - united states",
486     -- [0x3009] = "english - zimbabwe",
487     -- [0x0425] = "estonian - estonia",
488     -- [0x0438] = "faroese - faroe islands",
489     -- [0x0464] = "filipino - philippines",
490     -- [0x040b] = "finnish - finland",
491     -- [0x080c] = "french - belgium",
492     -- [0x0c0c] = "french - canada",
493     -- [0x040c] = "french - france",
494     -- [0x140c] = "french - luxembourg",
495     -- [0x180c] = "french - principality of monoco",
496     -- [0x100c] = "french - switzerland",
497     -- [0x0462] = "frisian - netherlands",
498     -- [0x0456] = "galician - galician",
499     -- [0x0437] = "georgian -georgia",
500     -- [0x0c07] = "german - austria",
501     -- [0x0407] = "german - germany",
502     -- [0x1407] = "german - liechtenstein",
503     -- [0x1007] = "german - luxembourg",
504     -- [0x0807] = "german - switzerland",
505     -- [0x0408] = "greek - greece",
506     -- [0x046f] = "greenlandic - greenland",
507     -- [0x0447] = "gujarati - india",
508     -- [0x0468] = "hausa (latin) - nigeria",
509     -- [0x040d] = "hebrew - israel",
510     -- [0x0439] = "hindi - india",
511     -- [0x040e] = "hungarian - hungary",
512     -- [0x040f] = "icelandic - iceland",
513     -- [0x0470] = "igbo - nigeria",
514     -- [0x0421] = "indonesian - indonesia",
515     -- [0x045d] = "inuktitut - canada",
516     -- [0x085d] = "inuktitut (latin) - canada",
517     -- [0x083c] = "irish - ireland",
518     -- [0x0434] = "isixhosa - south africa",
519     -- [0x0435] = "isizulu - south africa",
520     -- [0x0410] = "italian - italy",
521     -- [0x0810] = "italian - switzerland",
522     -- [0x0411] = "japanese - japan",
523     -- [0x044b] = "kannada - india",
524     -- [0x043f] = "kazakh - kazakhstan",
525     -- [0x0453] = "khmer - cambodia",
526     -- [0x0486] = "k'iche - guatemala",
527     -- [0x0487] = "kinyarwanda - rwanda",
528     -- [0x0441] = "kiswahili - kenya",
529     -- [0x0457] = "konkani - india",
530     -- [0x0412] = "korean - korea",
531     -- [0x0440] = "kyrgyz - kyrgyzstan",
532     -- [0x0454] = "lao - lao p.d.r.",
533     -- [0x0426] = "latvian - latvia",
534     -- [0x0427] = "lithuanian - lithuania",
535     -- [0x082e] = "lower sorbian - germany",
536     -- [0x046e] = "luxembourgish - luxembourg",
537     -- [0x042f] = "macedonian (fyrom) - former yugoslav republic of macedonia",
538     -- [0x083e] = "malay - brunei darussalam",
539     -- [0x043e] = "malay - malaysia",
540     -- [0x044c] = "malayalam - india",
541     -- [0x043a] = "maltese - malta",
542     -- [0x0481] = "maori - new zealand",
543     -- [0x047a] = "mapudungun - chile",
544     -- [0x044e] = "marathi - india",
545     -- [0x047c] = "mohawk - mohawk",
546     -- [0x0450] = "mongolian (cyrillic) - mongolia",
547     -- [0x0850] = "mongolian (traditional) - people's republic of china",
548     -- [0x0461] = "nepali - nepal",
549     -- [0x0414] = "norwegian (bokmal) - norway",
550     -- [0x0814] = "norwegian (nynorsk) - norway",
551     -- [0x0482] = "occitan - france",
552     -- [0x0448] = "odia (formerly oriya) - india",
553     -- [0x0463] = "pashto - afghanistan",
554     -- [0x0415] = "polish - poland",
555     -- [0x0416] = "portuguese - brazil",
556     -- [0x0816] = "portuguese - portugal",
557     -- [0x0446] = "punjabi - india",
558     -- [0x046b] = "quechua - bolivia",
559     -- [0x086b] = "quechua - ecuador",
560     -- [0x0c6b] = "quechua - peru",
561     -- [0x0418] = "romanian - romania",
562     -- [0x0417] = "romansh - switzerland",
563     -- [0x0419] = "russian - russia",
564     -- [0x243b] = "sami (inari) - finland",
565     -- [0x103b] = "sami (lule) - norway",
566     -- [0x143b] = "sami (lule) - sweden",
567     -- [0x0c3b] = "sami (northern) - finland",
568     -- [0x043b] = "sami (northern) - norway",
569     -- [0x083b] = "sami (northern) - sweden",
570     -- [0x203b] = "sami (skolt) - finland",
571     -- [0x183b] = "sami (southern) - norway",
572     -- [0x1c3b] = "sami (southern) - sweden",
573     -- [0x044f] = "sanskrit - india",
574     -- [0x1c1a] = "serbian (cyrillic) - bosnia and herzegovina",
575     -- [0x0c1a] = "serbian (cyrillic) - serbia",
576     -- [0x181a] = "serbian (latin) - bosnia and herzegovina",
577     -- [0x081a] = "serbian (latin) - serbia",
578     -- [0x046c] = "sesotho sa leboa - south africa",
579     -- [0x0432] = "setswana - south africa",
580     -- [0x045b] = "sinhala - sri lanka",
581     -- [0x041b] = "slovak - slovakia",
582     -- [0x0424] = "slovenian - slovenia",
583     -- [0x2c0a] = "spanish - argentina",
584     -- [0x400a] = "spanish - bolivia",
585     -- [0x340a] = "spanish - chile",
586     -- [0x240a] = "spanish - colombia",
587     -- [0x140a] = "spanish - costa rica",
588     -- [0x1c0a] = "spanish - dominican republic",
589     -- [0x300a] = "spanish - ecuador",
590     -- [0x440a] = "spanish - el salvador",
591     -- [0x100a] = "spanish - guatemala",
592     -- [0x480a] = "spanish - honduras",
593     -- [0x080a] = "spanish - mexico",
594     -- [0x4c0a] = "spanish - nicaragua",
595     -- [0x180a] = "spanish - panama",
596     -- [0x3c0a] = "spanish - paraguay",
597     -- [0x280a] = "spanish - peru",
598     -- [0x500a] = "spanish - puerto rico",
599     -- [0x0c0a] = "spanish (modern sort) - spain",
600     -- [0x040a] = "spanish (traditional sort) - spain",
601     -- [0x540a] = "spanish - united states",
602     -- [0x380a] = "spanish - uruguay",
603     -- [0x200a] = "spanish - venezuela",
604     -- [0x081d] = "sweden - finland",
605     -- [0x041d] = "swedish - sweden",
606     -- [0x045a] = "syriac - syria",
607     -- [0x0428] = "tajik (cyrillic) - tajikistan",
608     -- [0x085f] = "tamazight (latin) - algeria",
609     -- [0x0449] = "tamil - india",
610     -- [0x0444] = "tatar - russia",
611     -- [0x044a] = "telugu - india",
612     -- [0x041e] = "thai - thailand",
613     -- [0x0451] = "tibetan - prc",
614     -- [0x041f] = "turkish - turkey",
615     -- [0x0442] = "turkmen - turkmenistan",
616     -- [0x0480] = "uighur - prc",
617     -- [0x0422] = "ukrainian - ukraine",
618     -- [0x042e] = "upper sorbian - germany",
619     -- [0x0420] = "urdu - islamic republic of pakistan",
620     -- [0x0843] = "uzbek (cyrillic) - uzbekistan",
621     -- [0x0443] = "uzbek (latin) - uzbekistan",
622     -- [0x042a] = "vietnamese - vietnam",
623     -- [0x0452] = "welsh - united kingdom",
624     -- [0x0488] = "wolof - senegal",
625     -- [0x0485] = "yakut - russia",
626     -- [0x0478] = "yi - prc",
627     -- [0x046a] = "yoruba - nigeria",
628    },
629    custom = {
630    },
631}
632
633local standardromanencoding = { [0] = -- taken from wikipedia
634    "notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl",
635    "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft",
636    "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
637    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
638    "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at",
639    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
640    "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
641    "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b",
642    "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
643    "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar",
644    "braceright", "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute",
645    "Ntilde", "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex",
646    "adieresis", "atilde", "aring", "ccedilla", "eacute", "egrave",
647    "ecircumflex", "edieresis", "iacute", "igrave", "icircumflex", "idieresis",
648    "ntilde", "oacute", "ograve", "ocircumflex", "odieresis", "otilde", "uacute",
649    "ugrave", "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling",
650    "section", "bullet", "paragraph", "germandbls", "registered", "copyright",
651    "trademark", "acute", "dieresis", "notequal", "AE", "Oslash", "infinity",
652    "plusminus", "lessequal", "greaterequal", "yen", "mu", "partialdiff",
653    "summation", "product", "pi", "integral", "ordfeminine", "ordmasculine",
654    "Omega", "ae", "oslash", "questiondown", "exclamdown", "logicalnot",
655    "radical", "florin", "approxequal", "Delta", "guillemotleft",
656    "guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde",
657    "Otilde", "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright",
658    "quoteleft", "quoteright", "divide", "lozenge", "ydieresis", "Ydieresis",
659    "fraction", "currency", "guilsinglleft", "guilsinglright", "fi", "fl",
660    "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
661    "perthousand", "Acircumflex", "Ecircumflex", "Aacute", "Edieresis", "Egrave",
662    "Iacute", "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
663    "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi",
664    "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla",
665    "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", "Scaron", "scaron",
666    "Zcaron", "zcaron", "brokenbar", "Eth", "eth", "Yacute", "yacute", "Thorn",
667    "thorn", "minus", "multiply", "onesuperior", "twosuperior", "threesuperior",
668    "onehalf", "onequarter", "threequarters", "franc", "Gbreve", "gbreve",
669    "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron",
670    "dcroat",
671}
672
673local weights = {
674    [100] = "thin",
675    [200] = "extralight",
676    [300] = "light",
677    [400] = "normal",
678    [500] = "medium",
679    [600] = "semibold", -- demi demibold
680    [700] = "bold",
681    [800] = "extrabold",
682    [900] = "black",
683}
684
685local widths = {
686    "ultracondensed",
687    "extracondensed",
688    "condensed",
689    "semicondensed",
690    "normal",
691    "semiexpanded",
692    "expanded",
693    "extraexpanded",
694    "ultraexpanded",
695}
696
697setmetatableindex(weights, function(t,k)
698    local r = floor((k + 50) / 100) * 100
699    local v = (r > 900 and "black") or rawget(t,r) or "normal"
700    return v
701end)
702
703setmetatableindex(widths,function(t,k)
704    return "normal"
705end)
706
707local panoseweights = { [0] =
708    "normal",
709    "normal",
710    "verylight",
711    "light",
712    "thin",
713    "book",
714    "medium",
715    "demi",
716    "bold",
717    "heavy",
718    "black",
719}
720
721local panosewidths = { [0] =
722    "normal",
723    "normal",
724    "normal",
725    "normal",
726    "normal",
727    "expanded",
728    "condensed",
729    "veryexpanded",
730    "verycondensed",
731    "monospaced",
732}
733
734-- We implement a reader per table.
735
736-- helper
737
738local helpers   = { }
739readers.helpers = helpers
740
741local function gotodatatable(f,fontdata,tag,criterium)
742    if criterium and f then
743        local tables = fontdata.tables
744        if tables then
745            local datatable = tables[tag]
746            if datatable then
747                local tableoffset = datatable.offset
748                setposition(f,tableoffset)
749                return tableoffset
750            end
751        else
752            report("no tables")
753        end
754    end
755end
756
757local function reportskippedtable(f,fontdata,tag,criterium)
758    if criterium and f then
759        local tables = fontdata.tables
760        if tables then
761            local datatable = tables[tag]
762            if datatable then
763                report("loading of table %a skipped",tag)
764            end
765        else
766            report("no tables")
767        end
768    end
769end
770
771local function setvariabledata(fontdata,tag,data)
772    local variabledata = fontdata.variabledata
773    if variabledata then
774        variabledata[tag] = data
775    else
776        fontdata.variabledata = { [tag] = data }
777    end
778end
779
780helpers.gotodatatable      = gotodatatable
781helpers.setvariabledata    = setvariabledata
782helpers.reportskippedtable = reportskippedtable
783
784-- The name table is probably the first one to load. After all this one provides
785-- useful information about what we deal with. The complication is that we need
786-- to filter the best one available.
787
788local platformnames = {
789    postscriptname       = true,
790    fullname             = true,
791    family               = true,
792    subfamily            = true,
793    typographicfamily    = true,
794    typographicsubfamily = true,
795    compatiblefullname   = true,
796}
797
798local platformextras = {
799    uniqueid     = true,
800    version      = true,
801    copyright    = true,
802    license      = true,
803    licenseurl   = true,
804    manufacturer = true,
805    vendorurl    = true,
806}
807
808function readers.name(f,fontdata,specification)
809    local tableoffset = gotodatatable(f,fontdata,"name",true)
810    if tableoffset then
811        local format   = readushort(f)
812        local nofnames = readushort(f)
813        local offset   = readushort(f)
814        -- we can also provide a raw list as extra, todo as option
815        local start    = tableoffset + offset
816        local namelists = {
817            unicode   = { },
818            windows   = { },
819            macintosh = { },
820         -- iso       = { },
821         -- windows   = { },
822        }
823        for i=1,nofnames do
824            local platform = platforms[readushort(f)]
825            if platform then
826                local namelist = namelists[platform]
827                if namelist then
828                    local encoding  = readushort(f)
829                    local language  = readushort(f)
830                    local encodings = encodings[platform]
831                    local languages = languages[platform]
832                    if encodings and languages then
833                        local encoding = encodings[encoding]
834                        local language = languages[language]
835                        if encoding and language then
836                            local index = readushort(f)
837                            local name  = reservednames[index]
838                            namelist[#namelist+1] = {
839                                platform = platform,
840                                encoding = encoding,
841                                language = language,
842                                name     = name,
843                                index    = index,
844                                length   = readushort(f),
845                                offset   = start + readushort(f),
846                            }
847                        else
848                            skipshort(f,3)
849                        end
850                    else
851                        skipshort(f,3)
852                    end
853                else
854                    skipshort(f,5)
855                end
856            else
857                skipshort(f,5)
858            end
859        end
860     -- if format == 1 then
861     --     local noftags = readushort(f)
862     --     for i=1,noftags do
863     --        local length = readushort(f)
864     --        local offset = readushort(f)
865     --     end
866     -- end
867        --
868        -- we need to choose one we like, for instance an unicode one
869        --
870        local names  = { }
871        local done   = { }
872        local extras = { }
873        --
874        -- there is quite some logic in ff ... hard to follow so we start simple
875        -- and extend when we run into it (todo: proper reverse hash) .. we're only
876        -- interested in english anyway
877        --
878        local function decoded(platform,encoding,content)
879            local decoder = decoders[platform]
880            if decoder then
881                decoder = decoder[encoding]
882            end
883            if decoder then
884                return decoder(content)
885            else
886                return content
887            end
888        end
889        --
890        local function filter(platform,e,l)
891            local namelist = namelists[platform]
892            for i=1,#namelist do
893                local name    = namelist[i]
894                local nametag = name.name
895                local index = name.index
896                if not done[nametag or i] then
897                    local encoding = name.encoding
898                    local language = name.language
899                    if (not e or encoding == e) and (not l or language == l) then
900                        setposition(f,name.offset)
901                        local content = decoded(platform,encoding,readstring(f,name.length))
902                        if nametag then
903                            names[nametag] = {
904                                content  = content,
905                                platform = platform,
906                                encoding = encoding,
907                                language = language,
908                            }
909                        end
910                        extras[index] = content
911                        done[nametag or i] = true
912                    end
913                end
914            end
915        end
916        --
917        filter("windows","unicode bmp","english - united states")
918     -- filter("unicode") -- which one ?
919        filter("macintosh","roman","english")
920        filter("windows")
921        filter("macintosh")
922        filter("unicode")
923        --
924        fontdata.names  = names
925        fontdata.extras = extras
926        --
927        if specification.platformnames then
928            local collected      = { }
929            local platformextras = specification.platformextras and platformextras
930            for platform, namelist in next, namelists do
931                local filtered = false
932                for i=1,#namelist do
933                    local entry = namelist[i]
934                    local name  = entry.name
935                    if platformnames[name] or (platformextras and platformextras[name]) then
936                        setposition(f,entry.offset)
937                        local content = decoded(platform,entry.encoding,readstring(f,entry.length))
938                        if filtered then
939                            filtered[name] = content
940                        else
941                            filtered = { [name] = content }
942                        end
943                    end
944                end
945                if filtered then
946                    collected[platform] = filtered
947                end
948            end
949            fontdata.platformnames = collected
950        end
951    else
952        fontdata.names = { }
953    end
954end
955
956----- validutf = lpeg.patterns.utf8character^0 * P(-1)
957local validutf = lpeg.patterns.validutf8
958
959local function getname(fontdata,key)
960    local names = fontdata.names
961    if names then
962        local value = names[key]
963        if value then
964            local content = value.content
965            return lpegmatch(validutf,content) and content or nil
966        end
967    end
968end
969
970-- This table is an original windows (with its precursor os/2) table. In ff this one is
971-- part of the pfminfo table but here we keep it separate (for now). We will create a
972-- properties table afterwards.
973
974readers["os/2"] = function(f,fontdata)
975    local tableoffset = gotodatatable(f,fontdata,"os/2",true)
976    if tableoffset then
977        local version = readushort(f)
978        local windowsmetrics = {
979            version            = version,
980            averagewidth       = readshort(f), -- ushort?
981            weightclass        = readushort(f),
982            widthclass         = readushort(f),
983            fstype             = readushort(f),
984            subscriptxsize     = readshort(f),
985            subscriptysize     = readshort(f),
986            subscriptxoffset   = readshort(f),
987            subscriptyoffset   = readshort(f),
988            superscriptxsize   = readshort(f),
989            superscriptysize   = readshort(f),
990            superscriptxoffset = readshort(f),
991            superscriptyoffset = readshort(f),
992            strikeoutsize      = readshort(f),
993            strikeoutpos       = readshort(f),
994            familyclass        = readshort(f),
995            panose             = { readbytes(f,10) },
996            unicoderanges      = { readulong(f), readulong(f), readulong(f), readulong(f) },
997            vendor             = readstring(f,4),
998            fsselection        = readushort(f),
999            firstcharindex     = readushort(f),
1000            lastcharindex      = readushort(f),
1001            typoascender       = readshort(f),
1002            typodescender      = readshort(f),
1003            typolinegap        = readshort(f),
1004            winascent          = readushort(f),
1005            windescent         = readushort(f),
1006        }
1007        if version >= 1 then
1008            windowsmetrics.codepageranges = { readulong(f), readulong(f) }
1009        end
1010        if version >= 2 then
1011            windowsmetrics.xheight               = readshort(f)
1012            windowsmetrics.capheight             = readshort(f)
1013            windowsmetrics.defaultchar           = readushort(f)
1014            windowsmetrics.breakchar             = readushort(f)
1015         -- windowsmetrics.maxcontexts           = readushort(f)
1016         -- windowsmetrics.loweropticalpointsize = readushort(f)
1017         -- windowsmetrics.upperopticalpointsize = readushort(f)
1018        end
1019        --
1020        -- todo: unicoderanges
1021        --
1022        windowsmetrics.weight = windowsmetrics.weightclass and weights[windowsmetrics.weightclass]
1023        windowsmetrics.width  = windowsmetrics.widthclass and  widths [windowsmetrics.widthclass]
1024        --
1025        windowsmetrics.panoseweight = panoseweights[windowsmetrics.panose[3]]
1026        windowsmetrics.panosewidth  = panosewidths [windowsmetrics.panose[4]]
1027        --
1028        fontdata.windowsmetrics = windowsmetrics
1029    else
1030        fontdata.windowsmetrics = { }
1031    end
1032end
1033
1034readers.head = function(f,fontdata)
1035    local tableoffset = gotodatatable(f,fontdata,"head",true)
1036    if tableoffset then
1037        local version     = readulong(f)
1038        local fontversion = readulong(f)
1039        local fontheader = {
1040            version           = version,
1041            fontversion       = number.to16dot16(fontversion),
1042            fontversionnumber = fontversion,
1043         -- checksum          = readulong(f),
1044            checksum          = readushort(f) * 0x10000 + readushort(f),
1045            magic             = readulong(f),
1046            flags             = readushort(f),
1047            units             = readushort(f),
1048            created           = readlongdatetime(f),
1049            modified          = readlongdatetime(f),
1050            xmin              = readshort(f),
1051            ymin              = readshort(f),
1052            xmax              = readshort(f),
1053            ymax              = readshort(f),
1054            macstyle          = readushort(f),
1055            smallpixels       = readushort(f),
1056            directionhint     = readshort(f),
1057            indextolocformat  = readshort(f),
1058            glyphformat       = readshort(f),
1059        }
1060        fontdata.fontheader = fontheader
1061    else
1062        fontdata.fontheader = { }
1063    end
1064    fontdata.nofglyphs = 0
1065end
1066
1067-- This table is a rather simple one. No treatment of values is needed here. Most
1068-- variables are not used but nofmetrics is quite important.
1069
1070readers.hhea = function(f,fontdata,specification)
1071    local tableoffset = gotodatatable(f,fontdata,"hhea",specification.details)
1072    if tableoffset then
1073        fontdata.horizontalheader = {
1074            version             = readulong(f),
1075            ascender            = readfword(f),
1076            descender           = readfword(f),
1077            linegap             = readfword(f),
1078            maxadvancewidth     = readufword(f),
1079            minleftsidebearing  = readfword(f),
1080            minrightsidebearing = readfword(f),
1081            maxextent           = readfword(f),
1082            caretsloperise      = readshort(f),
1083            caretsloperun       = readshort(f),
1084            caretoffset         = readshort(f),
1085            reserved_1          = readshort(f),
1086            reserved_2          = readshort(f),
1087            reserved_3          = readshort(f),
1088            reserved_4          = readshort(f),
1089            metricdataformat    = readshort(f),
1090            nofmetrics          = readushort(f),
1091        }
1092    else
1093        fontdata.horizontalheader = {
1094            nofmetrics = 0,
1095        }
1096    end
1097end
1098
1099readers.vhea = function(f,fontdata,specification)
1100    local tableoffset = gotodatatable(f,fontdata,"vhea",specification.details)
1101    if tableoffset then
1102        fontdata.verticalheader = {
1103            version              = readulong(f),
1104            ascender             = readfword(f),
1105            descender            = readfword(f),
1106            linegap              = readfword(f),
1107            maxadvanceheight     = readufword(f),
1108            mintopsidebearing    = readfword(f),
1109            minbottomsidebearing = readfword(f),
1110            maxextent            = readfword(f),
1111            caretsloperise       = readshort(f),
1112            caretsloperun        = readshort(f),
1113            caretoffset          = readshort(f),
1114            reserved_1           = readshort(f),
1115            reserved_2           = readshort(f),
1116            reserved_3           = readshort(f),
1117            reserved_4           = readshort(f),
1118            metricdataformat     = readshort(f),
1119            nofmetrics           = readushort(f),
1120        }
1121    else
1122        fontdata.verticalheader = {
1123            nofmetrics = 0,
1124        }
1125    end
1126end
1127
1128-- We probably never need all these variables, but we do need the nofglyphs when loading other
1129-- tables. Again we use the microsoft names but see no reason to have "max" in each name.
1130
1131-- fontdata.maximumprofile can be bad
1132
1133readers.maxp = function(f,fontdata,specification)
1134    local tableoffset = gotodatatable(f,fontdata,"maxp",specification.details)
1135    if tableoffset then
1136        local version      = readulong(f)
1137        local nofglyphs    = readushort(f)
1138        fontdata.nofglyphs = nofglyphs
1139        if version == 0x00005000 then
1140            fontdata.maximumprofile = {
1141                version   = version,
1142                nofglyphs = nofglyphs,
1143            }
1144        elseif version == 0x00010000 then
1145            fontdata.maximumprofile = {
1146                version            = version,
1147                nofglyphs          = nofglyphs,
1148                points             = readushort(f),
1149                contours           = readushort(f),
1150                compositepoints    = readushort(f),
1151                compositecontours  = readushort(f),
1152                zones              = readushort(f),
1153                twilightpoints     = readushort(f),
1154                storage            = readushort(f),
1155                functiondefs       = readushort(f),
1156                instructiondefs    = readushort(f),
1157                stackelements      = readushort(f),
1158                sizeofinstructions = readushort(f),
1159                componentelements  = readushort(f),
1160                componentdepth     = readushort(f),
1161            }
1162        else
1163            fontdata.maximumprofile = {
1164                version   = version,
1165                nofglyphs = 0,
1166            }
1167        end
1168    end
1169end
1170
1171-- Here we filter the (advance) widths (that can be different from the boundingbox width of
1172-- course).
1173
1174readers.hmtx = function(f,fontdata,specification)
1175    local tableoffset = gotodatatable(f,fontdata,"hmtx",specification.glyphs)
1176    if tableoffset then
1177        local horizontalheader = fontdata.horizontalheader
1178        local nofmetrics       = horizontalheader.nofmetrics
1179        local glyphs           = fontdata.glyphs
1180        local nofglyphs        = fontdata.nofglyphs
1181        local width            = 0 -- advance
1182        local leftsidebearing  = 0
1183        for i=0,nofmetrics-1 do
1184            local glyph     = glyphs[i]
1185            width           = readshort(f) -- readushort
1186            leftsidebearing = readshort(f)
1187            glyph.width     = width        -- zero is okay
1188         -- glyph.lsb       = leftsidebearing
1189        end
1190        -- The next can happen in for instance a monospace font or in a cjk font
1191        -- with fixed widths.
1192--         for i=nofmetrics,nofglyphs-1 do
1193--             local glyph = glyphs[i]
1194--             if width ~= 0 then
1195--                 glyph.width = width
1196--             end
1197--          -- if leftsidebearing ~= 0 then
1198--          --     glyph.lsb = leftsidebearing
1199--          -- end
1200--         end
1201        for i=0,nofglyphs-1 do
1202            local glyph = glyphs[i]
1203            if not glyph.width then
1204                glyph.width = width
1205            end
1206         -- if not glyph.lsb and leftsidebearing ~= 0 then
1207         --     glyph.lsb = leftsidebearing
1208         -- end
1209        end
1210    end
1211end
1212
1213readers.vmtx = function(f,fontdata,specification)
1214    local tableoffset = gotodatatable(f,fontdata,"vmtx",specification.glyphs)
1215    if tableoffset then
1216        local verticalheader = fontdata.verticalheader
1217        local nofmetrics     = verticalheader.nofmetrics
1218        local glyphs         = fontdata.glyphs
1219        local nofglyphs      = fontdata.nofglyphs
1220        local vheight        = 0
1221        local vdefault       = verticalheader.ascender - verticalheader.descender
1222        local topsidebearing = 0
1223        for i=0,nofmetrics-1 do
1224            local glyph     = glyphs[i]
1225            vheight         = readushort(f)
1226            topsidebearing  = readshort(f)
1227            if vheight ~= 0 and vheight ~= vdefault then
1228                glyph.vheight = vheight
1229            end
1230            if topsidebearing ~= 0 then
1231                glyph.tsb = topsidebearing
1232            end
1233        end
1234        -- The next can happen in for instance a monospace font or in a cjk font
1235        -- with fixed heights.
1236        for i=nofmetrics,nofglyphs-1 do
1237            local glyph = glyphs[i]
1238            if vheight ~= 0 and vheight ~= vdefault then
1239                glyph.vheight = vheight
1240            end
1241        end
1242    end
1243end
1244
1245readers.vorg = function(f,fontdata,specification)
1246    reportskippedtable(f,fontdata,"vorg",specification.glyphs)
1247end
1248
1249-- The post table relates to postscript (printing) but has some relevant properties for other
1250-- usage as well. We just use the names from the microsoft specification. The version 2.0
1251-- description is somewhat fuzzy but it is a hybrid with overloads.
1252
1253readers.post = function(f,fontdata,specification)
1254    local tableoffset = gotodatatable(f,fontdata,"post",true)
1255    if tableoffset then
1256        local version = readulong(f)
1257        fontdata.postscript = {
1258            version            = version,
1259            italicangle        = readfixed(f),
1260            underlineposition  = readfword(f),
1261            underlinethickness = readfword(f),
1262            monospaced         = readulong(f),
1263            minmemtype42       = readulong(f),
1264            maxmemtype42       = readulong(f),
1265            minmemtype1        = readulong(f),
1266            maxmemtype1        = readulong(f),
1267        }
1268        if not specification.glyphs then
1269            -- enough done
1270        elseif version == 0x00010000 then
1271            -- mac encoding (258 glyphs)
1272            for index=0,#standardromanencoding do
1273                glyphs[index].name = standardromanencoding[index]
1274            end
1275        elseif version == 0x00020000 then
1276            local glyphs    = fontdata.glyphs
1277            local nofglyphs = readushort(f)
1278            local indices   = { }
1279            local names     = { }
1280            local maxnames  = 0
1281            for i=0,nofglyphs-1 do
1282                local nameindex = readushort(f)
1283                if nameindex >= 258 then
1284                    maxnames  = maxnames + 1
1285                    nameindex = nameindex - 257
1286                    indices[nameindex] = i
1287                else
1288                    glyphs[i].name = standardromanencoding[nameindex]
1289                end
1290            end
1291            for i=1,maxnames do
1292                local mapping = indices[i]
1293                if not mapping then
1294                    report("quit post name fetching at %a of %a: %s",i,maxnames,"no index")
1295                    break
1296                else
1297                    local length = readbyte(f)
1298                    if length > 0 then
1299                        glyphs[mapping].name = readstring(f,length)
1300                    else
1301                     -- report("quit post name fetching at %a of %a: %s",i,maxnames,"overflow")
1302                     -- break
1303                    end
1304                end
1305            end
1306        end
1307    else
1308        fontdata.postscript = { }
1309    end
1310end
1311
1312readers.cff = function(f,fontdata,specification)
1313    reportskippedtable(f,fontdata,"cff",specification.glyphs)
1314end
1315
1316-- Not all cmaps make sense .. e.g. dfont is obsolete and probably more are not relevant. Let's see
1317-- what we run into. There is some weird calculation going on here because we offset in a table
1318-- being a blob of memory or file. Anyway, I can't stand lunatic formats like this esp when there
1319-- is no real gain.
1320
1321local formatreaders = { }
1322local duplicatestoo = true
1323
1324local sequence = {
1325    -- these is some provision against redundant loading
1326    { 3,  1,  4 },
1327    { 3, 10, 12 },
1328    { 0,  3,  4 },
1329    { 0,  3, 12 },
1330    { 0,  1,  4 },
1331    { 0,  1, 12 }, -- for some old mac fonts
1332    { 0,  0,  6 },
1333    { 3,  0,  6 },
1334    { 3,  0,  4 }, -- for (likely) old crap
1335    -- variants
1336    { 0,  5, 14 },
1337    -- last resort ranges
1338    { 0,  4, 12 },
1339    { 3, 10, 13 },
1340}
1341
1342local supported = {  }
1343
1344for i=1,#sequence do
1345    local si = sequence[i]
1346    local sp, se, sf = si[1], si[2], si[3]
1347    local p = supported[sp]
1348    if not p then
1349        p = { }
1350        supported[sp] = p
1351    end
1352    local e = p[se]
1353    if not e then
1354        e = { }
1355        p[se] = e
1356    end
1357    e[sf] = true
1358end
1359
1360formatreaders[4] = function(f,fontdata,offset)
1361    setposition(f,offset+2) -- skip format
1362    --
1363    local length      = readushort(f) -- in bytes of subtable
1364    local language    = readushort(f)
1365    local nofsegments = readushort(f) / 2
1366    --
1367    skipshort(f,3) -- searchrange entryselector rangeshift
1368    --
1369    local mapping    = fontdata.mapping
1370    local glyphs     = fontdata.glyphs
1371    local duplicates = fontdata.duplicates
1372    local nofdone    = 0
1373    local endchars   = readcardinaltable(f,nofsegments,ushort)
1374    local reserved   = readushort(f) -- 0
1375    local startchars = readcardinaltable(f,nofsegments,ushort)
1376    local deltas     = readcardinaltable(f,nofsegments,ushort)
1377    local offsets    = readcardinaltable(f,nofsegments,ushort)
1378    -- format length language nofsegments searchrange entryselector rangeshift 4-tables
1379    local size       = (length - 2 * 2 - 5 * 2 - 4 * 2 * nofsegments) / 2
1380    local indices    = readcardinaltable(f,size-1,ushort)
1381    --
1382    for segment=1,nofsegments do
1383        local startchar = startchars[segment]
1384        local endchar   = endchars[segment]
1385        local offset    = offsets[segment]
1386        local delta     = deltas[segment]
1387        if startchar == 0xFFFF and endchar == 0xFFFF then
1388            -- break
1389        elseif startchar == 0xFFFF and offset == 0 then
1390            -- break
1391        elseif offset == 0xFFFF then
1392            -- bad encoding
1393        elseif offset == 0 then
1394            if trace_cmap_details then
1395                report("format 4.%i segment %2i from %C upto %C at index %H",1,segment,startchar,endchar,(startchar + delta) % 65536)
1396            end
1397            for unicode=startchar,endchar do
1398                local index = (unicode + delta) % 65536
1399                if index and index > 0 then
1400                    local glyph = glyphs[index]
1401                    if glyph then
1402                        local gu = glyph.unicode
1403                        if not gu then
1404                            glyph.unicode = unicode
1405                            nofdone = nofdone + 1
1406                        elseif gu ~= unicode then
1407                            if duplicatestoo then
1408                                local d = duplicates[gu]
1409                                if d then
1410                                    d[unicode] = true
1411                                else
1412                                    duplicates[gu] = { [unicode] = true }
1413                                end
1414                            else
1415                                -- no duplicates ... weird side effects in lm
1416                                report("duplicate case 1: %C %04i %s",unicode,index,glyphs[index].name)
1417                            end
1418                        end
1419                        if not mapping[index] then
1420                            mapping[index] = unicode
1421                        end
1422                    end
1423                end
1424            end
1425        else
1426            local shift = (segment-nofsegments+offset/2) - startchar
1427            if trace_cmap_details then
1428                report_cmap("format 4.%i segment %2i from %C upto %C at index %H",0,segment,startchar,endchar,(startchar + delta) % 65536)
1429            end
1430            for unicode=startchar,endchar do
1431                local slot  = shift + unicode
1432                local index = indices[slot]
1433                if index and index > 0 then
1434                    index = (index + delta) % 65536
1435                    local glyph = glyphs[index]
1436                    if glyph then
1437                        local gu = glyph.unicode
1438                        if not gu then
1439                            glyph.unicode = unicode
1440                            nofdone = nofdone + 1
1441                        elseif gu ~= unicode then
1442                            if duplicatestoo then
1443                                local d = duplicates[gu]
1444                                if d then
1445                                    d[unicode] = true
1446                                else
1447                                    duplicates[gu] = { [unicode] = true }
1448                                end
1449                            else
1450                                -- no duplicates ... weird side effects in lm
1451                                report("duplicate case 2: %C %04i %s",unicode,index,glyphs[index].name)
1452                            end
1453                        end
1454                        if not mapping[index] then
1455                            mapping[index] = unicode
1456                        end
1457                    end
1458                end
1459            end
1460        end
1461    end
1462    return nofdone
1463end
1464
1465formatreaders[6] = function(f,fontdata,offset)
1466    setposition(f,offset) -- + 2 + 2 + 2 -- skip format length language
1467    local format     = readushort(f)
1468    local length     = readushort(f)
1469    local language   = readushort(f)
1470    local mapping    = fontdata.mapping
1471    local glyphs     = fontdata.glyphs
1472    local duplicates = fontdata.duplicates
1473    local start      = readushort(f)
1474    local count      = readushort(f)
1475    local stop       = start+count-1
1476    local nofdone    = 0
1477    if trace_cmap_details then
1478        report_cmap("format 6 from %C to %C",2,start,stop)
1479    end
1480    for unicode=start,stop do
1481        local index = readushort(f)
1482        if index > 0 then
1483            local glyph = glyphs[index]
1484            if glyph then
1485                local gu = glyph.unicode
1486                if not gu then
1487                    glyph.unicode = unicode
1488                    nofdone = nofdone + 1
1489                elseif gu ~= unicode then
1490                    -- report("format 6 overloading %C to %C",gu,unicode)
1491                    -- glyph.unicode = unicode
1492                    -- no duplicates ... weird side effects in lm
1493                end
1494                if not mapping[index] then
1495                    mapping[index] = unicode
1496                end
1497            end
1498        end
1499    end
1500    return nofdone
1501end
1502
1503formatreaders[12] = function(f,fontdata,offset)
1504    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1505    local mapping    = fontdata.mapping
1506    local glyphs     = fontdata.glyphs
1507    local duplicates = fontdata.duplicates
1508    local nofgroups  = readulong(f)
1509    local nofdone    = 0
1510    for i=1,nofgroups do
1511        local first = readulong(f)
1512        local last  = readulong(f)
1513        local index = readulong(f)
1514        if trace_cmap_details then
1515            report_cmap("format 12 from %C to %C starts at index %i",first,last,index)
1516        end
1517        for unicode=first,last do
1518            local glyph = glyphs[index]
1519            if glyph then
1520                local gu = glyph.unicode
1521                if not gu then
1522                    glyph.unicode = unicode
1523                    nofdone = nofdone + 1
1524                elseif gu ~= unicode then
1525                    -- e.g. sourcehan fonts need this
1526                    local d = duplicates[gu]
1527                    if d then
1528                        d[unicode] = true
1529                    else
1530                        duplicates[gu] = { [unicode] = true }
1531                    end
1532                end
1533                if not mapping[index] then
1534                    mapping[index] = unicode
1535                end
1536            end
1537            index = index + 1
1538        end
1539    end
1540    return nofdone
1541end
1542
1543formatreaders[13] = function(f,fontdata,offset)
1544    --
1545    -- this vector is only used for simple fallback fonts
1546    --
1547    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1548    local mapping    = fontdata.mapping
1549    local glyphs     = fontdata.glyphs
1550    local duplicates = fontdata.duplicates
1551    local nofgroups  = readulong(f)
1552    local nofdone    = 0
1553    for i=1,nofgroups do
1554        local first = readulong(f)
1555        local last  = readulong(f)
1556        local index = readulong(f)
1557        if first < privateoffset then
1558            if trace_cmap_details then
1559                report_cmap("format 13 from %C to %C get index %i",first,last,index)
1560            end
1561            local glyph   = glyphs[index]
1562            local unicode = glyph.unicode
1563            if not unicode then
1564                unicode = first
1565                glyph.unicode = unicode
1566                first = first + 1
1567            end
1568            local list     = duplicates[unicode]
1569            mapping[index] = unicode
1570            if not list then
1571                list = { }
1572                duplicates[unicode] = list
1573            end
1574            if last >= privateoffset then
1575                local limit = privateoffset - 1
1576                report("format 13 from %C to %C pruned to %C",first,last,limit)
1577                last = limit
1578            end
1579            for unicode=first,last do
1580                list[unicode] = true
1581            end
1582            nofdone = nofdone + last - first + 1
1583        else
1584            report("format 13 from %C to %C ignored",first,last)
1585        end
1586    end
1587    return nofdone
1588end
1589
1590formatreaders[14] = function(f,fontdata,offset)
1591    if offset and offset ~= 0 then
1592        setposition(f,offset)
1593        local format      = readushort(f)
1594        local length      = readulong(f)
1595        local nofrecords  = readulong(f)
1596        local records     = { }
1597        local variants    = { }
1598        local nofdone     = 0
1599        fontdata.variants = variants
1600        for i=1,nofrecords do
1601            records[i] = {
1602                selector = readuint(f),
1603                default  = readulong(f), -- default offset
1604                other    = readulong(f), -- non-default offset
1605            }
1606        end
1607        for i=1,nofrecords do
1608            local record   = records[i]
1609            local selector = record.selector
1610            local default  = record.default
1611            local other    = record.other
1612            --
1613            -- there is no need to map the defaults to themselves
1614            --
1615         -- if default ~= 0 then
1616         --     setposition(f,offset+default)
1617         --     local nofranges = readulong(f)
1618         --     for i=1,nofranges do
1619         --         local start = readuint(f)
1620         --         local extra = readbyte(f)
1621         --         for i=start,start+extra do
1622         --             mapping[i] = i
1623         --         end
1624         --     end
1625         -- end
1626            local other = record.other
1627            if other ~= 0 then
1628                setposition(f,offset+other)
1629                local mapping = { }
1630                local count   = readulong(f)
1631                for i=1,count do
1632                    mapping[readuint(f)] = readushort(f)
1633                end
1634                nofdone = nofdone + count
1635                variants[selector] = mapping
1636            end
1637        end
1638        return nofdone
1639    else
1640        return 0
1641    end
1642end
1643
1644local function checkcmap(f,fontdata,records,platform,encoding,format)
1645    local pdata = records[platform]
1646    if not pdata then
1647        if trace_cmap_details then
1648            report_cmap("skipped, %s, p=%i e=%i f=%i","no platform",platform,encoding,format)
1649        end
1650        return 0
1651    end
1652    local edata = pdata[encoding]
1653    if not edata then
1654        if trace_cmap_details then
1655            report_cmap("skipped, %s, p=%i e=%i f=%i","no encoding",platform,encoding,format)
1656        end
1657        return 0
1658    end
1659    local fdata = edata[format]
1660    if not fdata then
1661        if trace_cmap_details then
1662            report_cmap("skipped, %s, p=%i e=%i f=%i","no format",platform,encoding,format)
1663        end
1664        return 0
1665    elseif type(fdata) ~= "number" then
1666        if trace_cmap_details then
1667            report_cmap("skipped, %s, p=%i e=%i f=%i","already done",platform,encoding,format)
1668        end
1669        return 0
1670    end
1671    edata[format] = true -- done
1672    local reader = formatreaders[format]
1673    if not reader then
1674        if trace_cmap_details then
1675            report_cmap("skipped, %s, p=%i e=%i f=%i","unsupported format",platform,encoding,format)
1676        end
1677        return 0
1678    end
1679    local n = reader(f,fontdata,fdata) or 0
1680    if trace_cmap_details or trace_cmap then
1681        local p = platforms[platform]
1682        local e = encodings[p]
1683        report_cmap("checked, platform %i (%s), encoding %i (%s), format %i, new unicodes %i",
1684            platform,p,encoding,e and e[encoding] or "?",format,n)
1685    end
1686    return n
1687end
1688
1689function readers.cmap(f,fontdata,specification)
1690    local tableoffset = gotodatatable(f,fontdata,"cmap",specification.glyphs)
1691    if tableoffset then
1692        local version      = readushort(f) -- check later versions
1693        local noftables    = readushort(f)
1694        local records      = { }
1695        local unicodecid   = false
1696        local variantcid   = false
1697        local variants     = { }
1698        local duplicates   = fontdata.duplicates or { }
1699        fontdata.duplicates = duplicates
1700        for i=1,noftables do
1701            local platform = readushort(f)
1702            local encoding = readushort(f)
1703            local offset   = readulong(f)
1704            local record   = records[platform]
1705            if not record then
1706                records[platform] = {
1707                    [encoding] = {
1708                        offsets = { offset },
1709                        formats = { },
1710                    }
1711                }
1712            else
1713                local subtables = record[encoding]
1714                if not subtables then
1715                    record[encoding] = {
1716                        offsets = { offset },
1717                        formats = { },
1718                    }
1719                else
1720                    local offsets = subtables.offsets
1721                    offsets[#offsets+1] = offset
1722                end
1723            end
1724        end
1725        if trace_cmap then
1726            report("found cmaps:")
1727        end
1728        for platform, record in sortedhash(records) do
1729            local p  = platforms[platform]
1730            local e  = encodings[p]
1731            local sp = supported[platform]
1732            local ps = p or "?"
1733            if trace_cmap then
1734                if sp then
1735                    report("  platform %i: %s",platform,ps)
1736                else
1737                    report("  platform %i: %s (unsupported)",platform,ps)
1738                end
1739            end
1740            for encoding, subtables in sortedhash(record) do
1741                local se = sp and sp[encoding]
1742                local es = e and e[encoding] or "?"
1743                if trace_cmap then
1744                    if se then
1745                        report("    encoding %i: %s",encoding,es)
1746                    else
1747                        report("    encoding %i: %s (unsupported)",encoding,es)
1748                    end
1749                end
1750                local offsets = subtables.offsets
1751                local formats = subtables.formats
1752                for i=1,#offsets do
1753                    local offset = tableoffset + offsets[i]
1754                    setposition(f,offset)
1755                    formats[readushort(f)] = offset
1756                end
1757                record[encoding] = formats
1758                if trace_cmap then
1759                    local list = sortedkeys(formats)
1760                    for i=1,#list do
1761                        if not (se and se[list[i]]) then
1762                            list[i] = list[i] .. " (unsupported)"
1763                        end
1764                    end
1765                    report("      formats: % t",list)
1766                end
1767            end
1768        end
1769        --
1770        local ok = false
1771        for i=1,#sequence do
1772            local si = sequence[i]
1773            local sp, se, sf = si[1], si[2], si[3]
1774            if checkcmap(f,fontdata,records,sp,se,sf) > 0 then
1775                ok = true
1776            end
1777        end
1778        if not ok then
1779            report("no useable unicode cmap found")
1780        end
1781        --
1782        fontdata.cidmaps = {
1783            version   = version,
1784            noftables = noftables,
1785            records   = records,
1786        }
1787    else
1788        fontdata.cidmaps = { }
1789    end
1790end
1791
1792-- The glyf table depends on the loca table. We have one entry to much in the locations table (the
1793-- last one is a dummy) because we need to calculate the size of a glyph blob from the delta,
1794-- although we not need it in our usage (yet). We can remove the locations table when we're done.
1795
1796function readers.loca(f,fontdata,specification)
1797    reportskippedtable(f,fontdata,"loca",specification.glyphs)
1798end
1799
1800function readers.glyf(f,fontdata,specification) -- part goes to cff module
1801    reportskippedtable(f,fontdata,"glyf",specification.glyphs)
1802end
1803
1804-- The MicroSoft variant is pretty clean and is supported (implemented elsewhere)
1805-- just because I wanted to see how such a font looks like.
1806
1807function readers.colr(f,fontdata,specification)
1808    reportskippedtable(f,fontdata,"colr",specification.glyphs)
1809end
1810function readers.cpal(f,fontdata,specification)
1811    reportskippedtable(f,fontdata,"cpal",specification.glyphs)
1812end
1813
1814-- This one is also supported, if only because I could locate a proper font for
1815-- testing.
1816
1817function readers.svg(f,fontdata,specification)
1818    reportskippedtable(f,fontdata,"svg",specification.glyphs)
1819end
1820
1821-- There is a font from apple to test the next one. Will there be more? Anyhow,
1822-- it's relatively easy to support, so I did it.
1823
1824function readers.sbix(f,fontdata,specification)
1825    reportskippedtable(f,fontdata,"sbix",specification.glyphs)
1826end
1827
1828-- I'm only willing to look into the next variant if I see a decent and complete (!)
1829-- font and more can show up. It makes no sense to waste time on ideas. Okay, the
1830-- apple font also has these tables.
1831
1832function readers.cbdt(f,fontdata,specification)
1833    reportskippedtable(f,fontdata,"cbdt",specification.glyphs)
1834end
1835function readers.cblc(f,fontdata,specification)
1836    reportskippedtable(f,fontdata,"cblc",specification.glyphs)
1837end
1838function readers.ebdt(f,fontdata,specification)
1839    reportskippedtable(f,fontdata,"ebdt",specification.glyphs)
1840end
1841function readers.ebsc(f,fontdata,specification)
1842    reportskippedtable(f,fontdata,"ebsc",specification.glyphs)
1843end
1844function readers.eblc(f,fontdata,specification)
1845    reportskippedtable(f,fontdata,"eblc",specification.glyphs)
1846end
1847
1848-- Here we have a table that we really need for later processing although a more advanced gpos table
1849-- can also be available. Todo: we need a 'fake' lookup for this (analogue to ff).
1850
1851function readers.kern(f,fontdata,specification)
1852    local tableoffset = gotodatatable(f,fontdata,"kern",specification.kerns)
1853    if tableoffset then
1854        local version   = readushort(f)
1855        local noftables = readushort(f)
1856        for i=1,noftables do
1857            local version  = readushort(f)
1858            local length   = readushort(f)
1859            local coverage = readushort(f)
1860            -- bit 8-15 of coverage: format 0 or 2
1861            local format   = rshift(coverage,8) -- is this ok?
1862            if format == 0 then
1863                local nofpairs      = readushort(f)
1864                local searchrange   = readushort(f)
1865                local entryselector = readushort(f)
1866                local rangeshift    = readushort(f)
1867                local kerns  = { }
1868                local glyphs = fontdata.glyphs
1869                for i=1,nofpairs do
1870                    local left  = readushort(f)
1871                    local right = readushort(f)
1872                    local kern  = readfword(f)
1873                    local glyph = glyphs[left]
1874                    local kerns = glyph.kerns
1875                    if kerns then
1876                        kerns[right] = kern
1877                    else
1878                        glyph.kerns = { [right] = kern }
1879                    end
1880                end
1881            elseif format == 2 then
1882                report("todo: kern classes")
1883            else
1884                report("todo: kerns")
1885            end
1886        end
1887    end
1888end
1889
1890function readers.gdef(f,fontdata,specification)
1891    reportskippedtable(f,fontdata,"gdef",specification.details)
1892end
1893
1894function readers.gsub(f,fontdata,specification)
1895    reportskippedtable(f,fontdata,"gsub",specification.details)
1896end
1897
1898function readers.gpos(f,fontdata,specification)
1899    reportskippedtable(f,fontdata,"gpos",specification.details)
1900end
1901
1902function readers.math(f,fontdata,specification)
1903    reportskippedtable(f,fontdata,"math",specification.details)
1904end
1905
1906-- Now comes the loader. The order of reading these matters as we need to know
1907-- some properties in order to read following tables. When details is true we also
1908-- initialize the glyphs data.
1909
1910local function getinfo(maindata,sub,platformnames,rawfamilynames,metricstoo,instancenames)
1911    local fontdata = sub and maindata.subfonts and maindata.subfonts[sub] or maindata
1912    local names    = fontdata.names
1913    local info     = nil
1914    if names then
1915        local metrics        = fontdata.windowsmetrics or { }
1916        local postscript     = fontdata.postscript     or { }
1917        local fontheader     = fontdata.fontheader     or { }
1918        local cffinfo        = fontdata.cffinfo        or { }
1919        local verticalheader = fontdata.verticalheader or { }
1920        local filename       = fontdata.filename
1921        local weight         = getname(fontdata,"weight") or (cffinfo and cffinfo.weight) or (metrics and metrics.weight)
1922        local width          = getname(fontdata,"width")  or (cffinfo and cffinfo.width ) or (metrics and metrics.width )
1923        local fontname       = getname(fontdata,"postscriptname")
1924        local fullname       = getname(fontdata,"fullname")
1925        local family         = getname(fontdata,"family")
1926        local subfamily      = getname(fontdata,"subfamily")
1927        local familyname     = getname(fontdata,"typographicfamily")
1928        local subfamilyname  = getname(fontdata,"typographicsubfamily")
1929        local compatiblename = getname(fontdata,"compatiblefullname") -- kind of useless
1930        if rawfamilynames then
1931            -- for PG (for now, as i need to check / adapt context to catch a no-fallback case)
1932        else
1933            if not    familyname then    familyname =    family end
1934            if not subfamilyname then subfamilyname = subfamily end
1935        end
1936        if platformnames then
1937            platformnames = fontdata.platformnames
1938        end
1939        if instancenames then
1940            local variabledata = fontdata.variabledata
1941            if variabledata then
1942                local instances = variabledata and variabledata.instances
1943                if instances then
1944                    instancenames = { }
1945                    for i=1,#instances do
1946                        instancenames[i] = lower(stripstring(instances[i].subfamily))
1947                    end
1948                else
1949                    instancenames = nil
1950                end
1951            else
1952                instancenames = nil
1953            end
1954        end
1955        info = { -- we inherit some inconsistencies/choices from ff
1956            subfontindex   = fontdata.subfontindex or sub or 0,
1957         -- filename       = filename,
1958            version        = getname(fontdata,"version"),
1959         -- format         = fontdata.format,
1960            fontname       = fontname,
1961            fullname       = fullname,
1962         -- cfffullname    = cff.fullname,
1963            family         = family,
1964            subfamily      = subfamily,
1965            familyname     = familyname,
1966            subfamilyname  = subfamilyname,
1967            compatiblename = compatiblename,
1968            weight         = weight and lower(weight),
1969            width          = width and lower(width),
1970            pfmweight      = metrics.weightclass or 400, -- will become weightclass
1971            pfmwidth       = metrics.widthclass or 5, -- will become widthclass
1972            panosewidth    = metrics.panosewidth,
1973            panoseweight   = metrics.panoseweight,
1974            fstype         = metrics.fstype or 0, -- embedding, subsetting and editing
1975            italicangle    = postscript.italicangle or 0,
1976            units          = fontheader.units or 0,
1977            designsize     = fontdata.designsize,
1978            minsize        = fontdata.minsize,
1979            maxsize        = fontdata.maxsize,
1980            boundingbox    = fontheader and { fontheader.xmin or 0, fontheader.ymin or 0, fontheader.xmax or 0, fontheader.ymax or 0 } or nil,
1981            monospaced     = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
1982            averagewidth   = metrics.averagewidth,
1983            xheight        = metrics.xheight, -- can be missing
1984            capheight      = metrics.capheight or fontdata.maxy, -- can be missing
1985            ascender       = metrics.typoascender,
1986            descender      = metrics.typodescender,
1987            ascent         = metrics.winascent,  -- these might be more reliable
1988            descent        = metrics.windescent, -- these might be more reliable
1989            platformnames  = platformnames or nil,
1990            instancenames  = instancenames or nil,
1991            tableoffsets   = fontdata.tableoffsets,
1992            defaultvheight = (verticalheader.ascender or 0) - (verticalheader.descender or 0)
1993        }
1994      -- print(fontname,fontheader.macstyle) : maybe for italic
1995        if metricstoo then
1996            local keys = {
1997                "version",
1998                "ascender", "descender", "linegap",
1999             -- "caretoffset", "caretsloperise", "caretsloperun",
2000                "maxadvancewidth", "maxadvanceheight", "maxextent",
2001             -- "metricdataformat",
2002                "minbottomsidebearing", "mintopsidebearing",
2003            }
2004            local h = fontdata.horizontalheader or { }
2005            local v = fontdata.verticalheader   or { }
2006            if h then
2007                local th = { }
2008                local tv = { }
2009                for i=1,#keys do
2010                    local key = keys[i]
2011                    th[key] = h[key] or 0
2012                    tv[key] = v[key] or 0
2013                end
2014                info.horizontalmetrics = th
2015                info.verticalmetrics   = tv
2016            end
2017        end
2018    elseif n then
2019        info = {
2020            filename = fontdata.filename,
2021            comment  = "there is no info for subfont " .. n,
2022        }
2023    else
2024        info = {
2025            filename = fontdata.filename,
2026            comment  = "there is no info",
2027        }
2028    end
2029 -- inspect(info)
2030    return info
2031end
2032
2033local function loadtables(f,specification,offset)
2034    if offset then
2035        setposition(f,offset)
2036    end
2037    local tables   = { }
2038    local basename = file.basename(specification.filename)
2039    local filesize = specification.filesize
2040    local filetime = specification.filetime
2041    local fontdata = { -- some can/will go
2042        filename      = basename,
2043        filesize      = filesize,
2044        filetime      = filetime,
2045        version       = readstring(f,4),
2046        noftables     = readushort(f),
2047        searchrange   = readushort(f), -- not needed
2048        entryselector = readushort(f), -- not needed
2049        rangeshift    = readushort(f), -- not needed
2050        tables        = tables,
2051        foundtables   = false,
2052    }
2053    for i=1,fontdata.noftables do
2054        local tag      = lower(stripstring(readstring(f,4)))
2055     -- local checksum = readulong(f) -- not used
2056        local checksum = readushort(f) * 0x10000 + readushort(f)
2057        local offset   = readulong(f)
2058        local length   = readulong(f)
2059        if offset + length > filesize then
2060            report("bad %a table in file %a",tag,basename)
2061        end
2062        tables[tag] = {
2063            checksum = checksum,
2064            offset   = offset,
2065            length   = length,
2066        }
2067    end
2068-- inspect(tables)
2069    fontdata.foundtables = sortedkeys(tables)
2070    if tables.cff or tables.cff2 then
2071        fontdata.format = "opentype"
2072    else
2073        fontdata.format = "truetype"
2074    end
2075    return fontdata, tables
2076end
2077
2078local function prepareglyps(fontdata)
2079    local glyphs = setmetatableindex(function(t,k)
2080        local v = {
2081            -- maybe more defaults
2082            index = k,
2083        }
2084        t[k] = v
2085        return v
2086    end)
2087    fontdata.glyphs  = glyphs
2088    fontdata.mapping = { }
2089end
2090
2091local function readtable(tag,f,fontdata,specification,...)
2092    local reader = readers[tag]
2093    if reader then
2094        reader(f,fontdata,specification,...)
2095    end
2096end
2097
2098local function readdata(f,offset,specification)
2099
2100    local fontdata, tables = loadtables(f,specification,offset)
2101
2102    if specification.glyphs then
2103        prepareglyps(fontdata)
2104    end
2105
2106    fontdata.temporary = { }
2107
2108    readtable("name",f,fontdata,specification)
2109
2110    local askedname = specification.askedname
2111    if askedname then
2112        local fullname  = getname(fontdata,"fullname") or ""
2113        local cleanname = gsub(askedname,"[^a-zA-Z0-9]","")
2114        local foundname = gsub(fullname,"[^a-zA-Z0-9]","")
2115        if lower(cleanname) ~= lower(foundname) then
2116            return -- keep searching
2117        end
2118    end
2119
2120    readtable("stat",f,fontdata,specification)
2121    readtable("avar",f,fontdata,specification)
2122    readtable("fvar",f,fontdata,specification)
2123
2124    local variabledata = fontdata.variabledata
2125
2126    if variabledata then
2127        local instances = variabledata.instances
2128        local axis      = variabledata.axis
2129        if axis and (not instances or #instances == 0) then
2130            instances = { }
2131            variabledata.instances = instances
2132            local function add(n,subfamily,value)
2133                local values = { }
2134                for i=1,#axis do
2135                    local a = axis[i]
2136                    values[i] = {
2137                        axis  = a.tag,
2138                        value = i == n and value or a.default,
2139                    }
2140                end
2141                instances[#instances+1] = {
2142                    subfamily = subfamily,
2143                    values    = values,
2144                }
2145            end
2146            for i=1,#axis do
2147                local a   = axis[i]
2148                local tag = a.tag
2149                add(i,"default"..tag,a.default)
2150                add(i,"minimum"..tag,a.minimum)
2151                add(i,"maximum"..tag,a.maximum)
2152            end
2153         -- report("%i fake instances added",#instances)
2154        end
2155    end
2156    if not specification.factors then
2157        local instance = specification.instance
2158        if type(instance) == "string" then
2159            local factors = helpers.getfactors(fontdata,instance)
2160            if factors then
2161                specification.factors = factors
2162                fontdata.factors  = factors
2163                fontdata.instance = instance
2164                report("user instance: %s, factors: % t",instance,factors)
2165            else
2166                report("user instance: %s, bad factors",instance)
2167            end
2168        end
2169    end
2170
2171    if not fontdata.factors then
2172        if fontdata.variabledata then
2173            local factors = helpers.getfactors(fontdata,true)
2174            if factors then
2175                specification.factors = factors
2176                fontdata.factors = factors
2177         --     report("factors: % t",factors)
2178         -- else
2179         --     report("bad factors")
2180            end
2181        else
2182         -- report("unknown instance")
2183        end
2184    end
2185
2186    readtable("os/2",f,fontdata,specification)
2187    readtable("head",f,fontdata,specification)
2188    readtable("maxp",f,fontdata,specification)
2189    readtable("hhea",f,fontdata,specification)
2190    readtable("vhea",f,fontdata,specification)
2191    readtable("hmtx",f,fontdata,specification)
2192    readtable("vmtx",f,fontdata,specification)
2193    readtable("vorg",f,fontdata,specification)
2194    readtable("post",f,fontdata,specification)
2195
2196    readtable("mvar",f,fontdata,specification)
2197    readtable("hvar",f,fontdata,specification)
2198    readtable("vvar",f,fontdata,specification)
2199
2200    readtable("gdef",f,fontdata,specification)
2201
2202    readtable("cff" ,f,fontdata,specification)
2203    readtable("cff2",f,fontdata,specification)
2204
2205    readtable("cmap",f,fontdata,specification)
2206    readtable("loca",f,fontdata,specification) -- maybe load it in glyf
2207    readtable("glyf",f,fontdata,specification) -- loads gvar
2208
2209    readtable("colr",f,fontdata,specification)
2210    readtable("cpal",f,fontdata,specification)
2211
2212    readtable("svg" ,f,fontdata,specification)
2213
2214    readtable("sbix",f,fontdata,specification)
2215
2216    readtable("cbdt",f,fontdata,specification)
2217    readtable("cblc",f,fontdata,specification)
2218    readtable("ebdt",f,fontdata,specification)
2219    readtable("eblc",f,fontdata,specification)
2220
2221    readtable("kern",f,fontdata,specification)
2222    readtable("gsub",f,fontdata,specification)
2223    readtable("gpos",f,fontdata,specification)
2224
2225    readtable("math",f,fontdata,specification)
2226
2227    fontdata.locations    = nil
2228    fontdata.cidmaps      = nil
2229    fontdata.dictionaries = nil
2230 -- fontdata.cff          = nil
2231
2232    if specification.tableoffsets then
2233        fontdata.tableoffsets = tables
2234        setmetatableindex(tables, {
2235            version       = fontdata.version,
2236            noftables     = fontdata.noftables,
2237            searchrange   = fontdata.searchrange,
2238            entryselector = fontdata.entryselector,
2239            rangeshift    = fontdata.rangeshift,
2240        })
2241    end
2242
2243    return fontdata
2244end
2245
2246local function loadfontdata(specification)
2247    local filename = specification.filename
2248    local fileattr = lfs.attributes(filename)
2249    local filesize = fileattr and fileattr.size or 0
2250    local filetime = fileattr and fileattr.modification or 0
2251    local f = openfile(filename,true) -- zero based
2252    if not f then
2253        report("unable to open %a",filename)
2254    elseif filesize == 0 then
2255        report("empty file %a",filename)
2256        closefile(f)
2257    else
2258        specification.filesize = filesize
2259        specification.filetime = filetime
2260        local version  = readstring(f,4)
2261        local fontdata = nil
2262        if version == "OTTO" or version == "true" or version == "\0\1\0\0" then
2263            fontdata = readdata(f,0,specification)
2264        elseif version == "ttcf" then
2265            local subfont     = tonumber(specification.subfont)
2266            local ttcversion  = readulong(f)
2267            local nofsubfonts = readulong(f)
2268            local offsets     = readcardinaltable(f,nofsubfonts,ulong)
2269            if subfont then -- a number of not
2270                if subfont >= 1 and subfont <= nofsubfonts then
2271                    fontdata = readdata(f,offsets[subfont],specification)
2272                else
2273                    report("no subfont %a in file %a",subfont,filename)
2274                end
2275            else
2276                subfont = specification.subfont
2277                if type(subfont) == "string" and subfont ~= "" then
2278                    specification.askedname = subfont
2279                    for i=1,nofsubfonts do
2280                        fontdata = readdata(f,offsets[i],specification)
2281                        if fontdata then
2282                            fontdata.subfontindex = i
2283                            report("subfont named %a has index %a",subfont,i)
2284                            break
2285                        end
2286                    end
2287                    if not fontdata then
2288                        report("no subfont named %a",subfont)
2289                    end
2290                else
2291                    local subfonts = { }
2292                    fontdata = {
2293                        filename    = filename,
2294                        filesize    = filesize,
2295                        filetime    = filetime,
2296                        version     = version,
2297                        subfonts    = subfonts,
2298                        ttcversion  = ttcversion,
2299                        nofsubfonts = nofsubfonts,
2300                    }
2301                    for i=1,nofsubfonts do
2302                        subfonts[i] = readdata(f,offsets[i],specification)
2303                    end
2304                end
2305            end
2306        else
2307            report("unknown version %a in file %a",version,filename)
2308        end
2309        closefile(f)
2310        return fontdata or { }
2311    end
2312end
2313
2314local function loadfont(specification,n,instance)
2315    if type(specification) == "string" then
2316        specification = {
2317            filename    = specification,
2318            info        = true, -- always true (for now)
2319            details     = true,
2320            glyphs      = true,
2321            shapes      = true,
2322            kerns       = true,
2323            variable    = true,
2324            globalkerns = true,
2325            lookups     = true,
2326            -- true or number:
2327            subfont     = n or true,
2328            tounicode   = false,
2329            instance    = instance
2330        }
2331    end
2332    -- if shapes only then
2333    if specification.shapes or specification.lookups or specification.kerns then
2334        specification.glyphs = true
2335    end
2336    if specification.glyphs then
2337        specification.details = true
2338    end
2339    if specification.details then
2340        specification.info = true -- not really used any more
2341    end
2342    if specification.platformnames then
2343        specification.platformnames = true -- not really used any more
2344    end
2345    if specification.instance or instance then
2346        specification.variable = true
2347        specification.instance = specification.instance or instance
2348    end
2349    local function message(str)
2350        report("fatal error in file %a: %s\n%s",specification.filename,str,debug and debug.traceback())
2351    end
2352    local ok, result = xpcall(loadfontdata,message,specification)
2353    if ok then
2354        return result
2355    end
2356--     return loadfontdata(specification)
2357end
2358
2359-- we need even less, but we can have a 'detail' variant
2360
2361function readers.loadshapes(filename,n,instance,streams)
2362    local fontdata = loadfont {
2363        filename = filename,
2364        shapes   = true,
2365        streams  = streams,
2366        variable = true,
2367        subfont  = n,
2368        instance = instance,
2369    }
2370    if fontdata then
2371        -- easier on luajit but still we can hit the 64 K stack constants issue
2372        for k, v in next, fontdata.glyphs do
2373            v.class = nil
2374            v.index = nil
2375            v.math  = nil
2376         -- v.name  = nil
2377        end
2378        local names = fontdata.names
2379        if names then
2380            for k, v in next, names do
2381                names[k] = fullstrip(v.content)
2382            end
2383        end
2384    end
2385    return fontdata and {
2386     -- version          = 0.123 -- todo
2387        filename         = filename,
2388        format           = fontdata.format,
2389        glyphs           = fontdata.glyphs,
2390        units            = fontdata.fontheader.units,
2391        cffinfo          = fontdata.cffinfo,
2392        fontheader       = fontdata.fontheader,
2393        horizontalheader = fontdata.horizontalheader,
2394        verticalheader   = fontdata.verticalheader,
2395        maximumprofile   = fontdata.maximumprofile,
2396        names            = fontdata.names,
2397        postscript       = fontdata.postscript,
2398    } or {
2399        filename = filename,
2400        format   = "unknown",
2401        glyphs   = { },
2402        units    = 0,
2403    }
2404end
2405
2406function readers.loadfont(filename,n,instance)
2407    local fontdata = loadfont {
2408        filename    = filename,
2409        glyphs      = true,
2410        shapes      = false,
2411        lookups     = true,
2412        variable    = true,
2413     -- kerns       = true,
2414     -- globalkerns = true, -- only for testing, e.g. cambria has different gpos and kern
2415        subfont     = n,
2416        instance    = instance,
2417    }
2418    if fontdata then
2419        return {
2420            tableversion  = tableversion,
2421            creator       = "context mkiv",
2422            size          = fontdata.filesize,
2423            time          = fontdata.filetime,
2424            glyphs        = fontdata.glyphs,
2425            descriptions  = fontdata.descriptions,
2426            format        = fontdata.format,
2427            goodies       = { },
2428            metadata      = getinfo(fontdata,n,false,false,true,true), -- no platformnames here !
2429            properties    = {
2430                hasitalics    = fontdata.hasitalics or false,
2431                maxcolorclass = fontdata.maxcolorclass,
2432                hascolor      = fontdata.hascolor or false,
2433                instance      = fontdata.instance,
2434                factors       = fontdata.factors,
2435                nofsubfonts   = fontdata.subfonts and #fontdata.subfonts or nil,
2436            },
2437            resources     = {
2438             -- filename        = fontdata.filename,
2439                filename        = filename,
2440                private         = privateoffset,
2441                duplicates      = fontdata.duplicates  or { },
2442                features        = fontdata.features    or { }, -- we need to add these in the loader
2443                sublookups      = fontdata.sublookups  or { }, -- we need to add these in the loader
2444                marks           = fontdata.marks       or { }, -- we need to add these in the loader
2445                markclasses     = fontdata.markclasses or { }, -- we need to add these in the loader
2446                marksets        = fontdata.marksets    or { }, -- we need to add these in the loader
2447                sequences       = fontdata.sequences   or { }, -- we need to add these in the loader
2448                variants        = fontdata.variants, -- variant -> unicode -> glyph
2449                version         = getname(fontdata,"version"),
2450                cidinfo         = fontdata.cidinfo,
2451                mathconstants   = fontdata.mathconstants,
2452                colorpalettes   = fontdata.colorpalettes,
2453                colorpaintdata  = fontdata.colorpaintdata,
2454                colorpaintlist  = fontdata.colorpaintlist,
2455                colorlinesdata  = fontdata.colorlinesdata,
2456                coloraffinedata = fontdata.coloraffinedata,
2457                svgshapes       = fontdata.svgshapes,
2458                pngshapes       = fontdata.pngshapes,
2459                variabledata    = fontdata.variabledata,
2460                foundtables     = fontdata.foundtables,
2461            },
2462        }
2463    end
2464end
2465
2466function readers.getinfo(filename,specification) -- string, nil|number|table
2467    -- platformnames is optional and not used by context (a too unpredictable mess
2468    -- that only add to the confusion) .. so it's only for checking things
2469    local subfont        = nil
2470    local platformnames  = false
2471    local rawfamilynames = false
2472    local instancenames  = true
2473    local tableoffsets   = false
2474    if type(specification) == "table" then
2475        subfont        = tonumber(specification.subfont)
2476        platformnames  = specification.platformnames
2477        rawfamilynames = specification.rawfamilynames
2478        tableoffsets   = specification.tableoffsets
2479    else
2480        subfont       = tonumber(specification)
2481    end
2482    local fontdata = loadfont {
2483        filename       = filename,
2484        details        = true,
2485        platformnames  = platformnames,
2486        instancenames  = true,
2487        tableoffsets   = tableoffsets,
2488     -- rawfamilynames = rawfamilynames,
2489    }
2490    if fontdata then
2491        local subfonts = fontdata.subfonts
2492        if not subfonts then
2493            return getinfo(fontdata,nil,platformnames,rawfamilynames,false,instancenames)
2494        elseif not subfont then
2495            local info = { }
2496            for i=1,#subfonts do
2497                info[i] = getinfo(fontdata,i,platformnames,rawfamilynames,false,instancenames)
2498            end
2499            return info
2500        elseif subfont >= 1 and subfont <= #subfonts then
2501            return getinfo(fontdata,subfont,platformnames,rawfamilynames,false,instancenames)
2502        else
2503            return {
2504                filename = filename,
2505                comment  = "there is no subfont " .. subfont .. " in this file"
2506            }
2507        end
2508    else
2509        return {
2510            filename = filename,
2511            comment  = "the file cannot be opened for reading",
2512        }
2513    end
2514end
2515
2516function readers.rehash() -- fontdata,hashmethod
2517    report("the %a helper is not yet implemented","rehash")
2518end
2519
2520function readers.checkhash() --fontdata
2521    report("the %a helper is not yet implemented","checkhash")
2522end
2523
2524function readers.pack() -- fontdata,hashmethod
2525    report("the %a helper is not yet implemented","pack")
2526end
2527
2528function readers.unpack(fontdata)
2529    report("the %a helper is not yet implemented","unpack")
2530end
2531
2532function readers.expand(fontdata)
2533    report("the %a helper is not yet implemented","unpack")
2534end
2535
2536function readers.compact(fontdata)
2537    report("the %a helper is not yet implemented","compact")
2538end
2539
2540function readers.condense(fontdata)
2541    report("the %a helper is not yet implemented","condense")
2542end
2543
2544-- plug in
2545
2546local extenders = { }
2547
2548function readers.registerextender(extender)
2549    extenders[#extenders+1] = extender
2550end
2551
2552function readers.extend(fontdata)
2553    for i=1,#extenders do
2554        local extender = extenders[i]
2555        local name     = extender.name or "unknown"
2556        local action   = extender.action
2557        if action then
2558            action(fontdata)
2559        end
2560    end
2561end
2562