font-otr.lua /size: 92 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['font-otr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to font-ini.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- When looking into a cid font relates issue in the ff library I wondered if
11-- it made sense to use Lua to filter the information from the otf and ttf
12-- files. Quite some ff code relates to special fonts and in practice we only
13-- use rather normal opentype fonts.
14--
15-- The code here is based on the documentation (and examples) at the microsoft
16-- website. The code will be extended and improved stepwise. After some experiments
17-- I decided to convert to a format more suitable for the context font handler
18-- because it makes no sense to rehash all those lookups again.
19--
20-- Currently we can use this code for getting basic info about the font, loading
21-- shapes and loading the extensive table. I'm not sure if I will provide a ff
22-- compatible output as well (We're not that far from it as currently I can load
23-- all data reasonable fast.)
24
25-- We can omit redundant glyphs names i.e. ones that match the agl or
26-- are just a unicode string but it doesn't save that much. It will be an option
27-- some day.
28
29-- Optimizing the widths will be done anyway as it save quite some on a cjk font
30-- and the existing (old) code if okay.
31
32-- todo: more messages (only if really needed)
33--
34-- considered, in math:
35--
36-- start -> first (so we can skip the first same-size one)
37-- end   -> last
38--
39-- Widths and weights are kind of messy: for instance lmmonolt has a pfmweight of
40-- 400 while it should be 300. So, for now we mostly stick to the old compromis.
41
42-- We don't really need all those language tables so they might be dropped some
43-- day.
44
45-- The new reader is faster on some aspects and slower on other. The memory footprint
46-- is lower. The string reader is a  bit faster than the file reader. The new reader
47-- gives more efficient tables and has bit more analysis. In practice these times are
48-- not that relevant because we cache. The otf files take a it more time because we
49-- need to calculate the boundingboxes. In theory the processing of text should be
50-- somewhat faster especially for complex fonts with many lookups.
51--
52--                        old    new    str reader
53-- lmroman12-regular.otf  0.103  0.203  0.195
54-- latinmodern-math.otf   0.454  0.768  0.712
55-- husayni.ttf            1.142  1.526  1.259
56--
57-- If there is demand I will consider making a ff compatible table dumper but it's
58-- probably more fun to provide a way to show features applied.
59
60-- I experimented a bit with f:readbyte(n) and f:readshort() and so and it is indeed
61-- faster but it might not be the real bottleneck as we still need to juggle data. It
62-- is probably more memory efficient as no intermediate strings are involved.
63
64-- if not characters then
65--     require("char-def")
66--     require("char-ini")
67-- end
68
69local number = number
70local next, type, tonumber, rawget = next, type, tonumber, rawget
71local byte, lower, char, gsub = string.byte, string.lower, string.char, string.gsub
72local fullstrip = string.fullstrip
73local floor, round = math.floor, math.round
74local P, R, S, C, Cs, Cc, Ct, Carg, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Carg, lpeg.Cmt
75local lpegmatch = lpeg.match
76local rshift = bit32.rshift
77
78local setmetatableindex  = table.setmetatableindex
79local sortedkeys         = table.sortedkeys
80local sortedhash         = table.sortedhash
81local stripstring        = string.nospaces
82local utf16_to_utf8_be   = utf.utf16_to_utf8_be
83
84local report             = logs.reporter("otf reader")
85local report_cmap        = logs.reporter("otf reader","cmap")
86
87local trace_cmap         = false  trackers.register("otf.cmap",         function(v) trace_cmap         = v end)
88local trace_cmap_details = false  trackers.register("otf.cmap.details", function(v) trace_cmap_details = v end)
89
90fonts                    = fonts or { }
91local handlers           = fonts.handlers or { }
92fonts.handlers           = handlers
93local otf                = handlers.otf or { }
94handlers.otf             = otf
95local readers            = otf.readers or { }
96otf.readers              = readers
97
98----- streamreader       = utilities.streams -- faster on big files (not true any longer)
99local streamreader       = utilities.files   -- faster on identify (also uses less memory)
100local streamwriter       = utilities.files
101
102readers.streamreader     = streamreader
103readers.streamwriter     = streamwriter
104
105local openfile           = streamreader.open
106local closefile          = streamreader.close
107----- skipbytes          = streamreader.skip
108local setposition        = streamreader.setposition
109local skipshort          = streamreader.skipshort
110local readbytes          = streamreader.readbytes
111local readstring         = streamreader.readstring
112local readbyte           = streamreader.readcardinal1  --  8-bit unsigned integer
113local readushort         = streamreader.readcardinal2  -- 16-bit unsigned integer
114local readuint           = streamreader.readcardinal3  -- 24-bit unsigned integer
115local readulong          = streamreader.readcardinal4  -- 32-bit unsigned integer
116----- readchar           = streamreader.readinteger1   --  8-bit   signed integer
117local readshort          = streamreader.readinteger2   -- 16-bit   signed integer
118local readlong           = streamreader.readinteger4   -- 32-bit unsigned integer
119local readfixed          = streamreader.readfixed4
120local read2dot14         = streamreader.read2dot14     -- 16-bit signed fixed number with the low 14 bits of fraction (2.14) (F2DOT14)
121local readfword          = readshort                   -- 16-bit   signed integer that describes a quantity in FUnits
122local readufword         = readushort                  -- 16-bit unsigned integer that describes a quantity in FUnits
123local readoffset         = readushort
124local readcardinaltable  = streamreader.readcardinaltable
125local readintegertable   = streamreader.readintegertable
126
127function streamreader.readtag(f)
128    return lower(stripstring(readstring(f,4)))
129end
130
131local short  = 2
132local ushort = 2
133local ulong  = 4
134
135directives.register("fonts.streamreader",function()
136
137    streamreader      = utilities.streams
138
139    openfile          = streamreader.open
140    closefile         = streamreader.close
141    setposition       = streamreader.setposition
142    skipshort         = streamreader.skipshort
143    readbytes         = streamreader.readbytes
144    readstring        = streamreader.readstring
145    readbyte          = streamreader.readcardinal1
146    readushort        = streamreader.readcardinal2
147    readuint          = streamreader.readcardinal3
148    readulong         = streamreader.readcardinal4
149    readshort         = streamreader.readinteger2
150    readlong          = streamreader.readinteger4
151    readfixed         = streamreader.readfixed4
152    read2dot14        = streamreader.read2dot14
153    readfword         = readshort
154    readufword        = readushort
155    readoffset        = readushort
156    readcardinaltable = streamreader.readcardinaltable
157    readintegertable  = streamreader.readintegertable
158
159    function streamreader.readtag(f)
160        return lower(stripstring(readstring(f,4)))
161    end
162
163end)
164
165-- date represented in number of seconds since 12:00 midnight, January 1, 1904. The value is represented as a
166-- signed 64-bit integer
167
168local function readlongdatetime(f)
169    local a, b, c, d, e, f, g, h = readbytes(f,8)
170    return 0x100000000 * d + 0x1000000 * e + 0x10000 * f + 0x100 * g + h
171end
172
173local tableversion    = 0.004
174readers.tableversion  = tableversion
175local privateoffset   = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF
176
177-- We have quite some data tables. We are somewhat ff compatible with names but as I used
178-- the information from the microsoft site there can be differences. Eventually I might end
179-- up with a different ordering and naming.
180
181local reservednames = { [0] =
182    "copyright",
183    "family",
184    "subfamily",
185    "uniqueid",
186    "fullname",
187    "version",
188    "postscriptname",
189    "trademark",
190    "manufacturer",
191    "designer",
192    "description", -- descriptor in ff
193    "vendorurl",
194    "designerurl",
195    "license",
196    "licenseurl",
197    "reserved",
198    "typographicfamily",    -- preffamilyname
199    "typographicsubfamily", -- prefmodifiers
200    "compatiblefullname",   -- for mac
201    "sampletext",
202    "cidfindfontname",
203    "wwsfamily",
204    "wwssubfamily",
205    "lightbackgroundpalette",
206    "darkbackgroundpalette",
207    "variationspostscriptnameprefix",
208}
209
210-- more at: https://www.microsoft.com/typography/otspec/name.htm
211
212-- setmetatableindex(reservednames,function(t,k)
213--     local v = "name_" .. k
214--     t[k] =  v
215--     return v
216-- end)
217
218local platforms = { [0] =
219    "unicode",
220    "macintosh",
221    "iso",
222    "windows",
223    "custom",
224}
225
226local encodings = {
227    -- these stay:
228    unicode = { [0] =
229        "unicode 1.0 semantics",
230        "unicode 1.1 semantics",
231        "iso/iec 10646",
232        "unicode 2.0 bmp",             -- cmap subtable formats 0, 4, 6
233        "unicode 2.0 full",            -- cmap subtable formats 0, 4, 6, 10, 12
234        "unicode variation sequences", -- cmap subtable format 14).
235        "unicode full repertoire",     -- cmap subtable formats 0, 4, 6, 10, 12, 13
236    },
237    -- these can go:
238    macintosh = { [0] =
239        "roman", "japanese", "chinese (traditional)", "korean", "arabic", "hebrew", "greek", "russian",
240        "rsymbol", "devanagari", "gurmukhi", "gujarati", "oriya", "bengali", "tamil", "telugu", "kannada",
241        "malayalam", "sinhalese", "burmese", "khmer", "thai", "laotian", "georgian", "armenian",
242        "chinese (simplified)", "tibetan", "mongolian", "geez", "slavic", "vietnamese", "sindhi",
243        "uninterpreted",
244    },
245    -- these stay:
246    iso = { [0] =
247        "7-bit ascii",
248        "iso 10646",
249        "iso 8859-1",
250    },
251    -- these stay:
252    windows = { [0] =
253        "symbol",
254        "unicode bmp", -- this is utf16
255        "shiftjis",
256        "prc",
257        "big5",
258        "wansung",
259        "johab",
260        "reserved 7",
261        "reserved 8",
262        "reserved 9",
263        "unicode ucs-4",
264    },
265    custom = {
266        --custom: 0-255 : otf windows nt compatibility mapping
267    }
268}
269
270local decoders = {
271    unicode   = { },
272    macintosh = { },
273    iso       = { },
274    windows   = {
275        -- maybe always utf16
276        ["unicode semantics"]           = utf16_to_utf8_be,
277        ["unicode bmp"]                 = utf16_to_utf8_be,
278        ["unicode full"]                = utf16_to_utf8_be,
279        ["unicode 1.0 semantics"]       = utf16_to_utf8_be,
280        ["unicode 1.1 semantics"]       = utf16_to_utf8_be,
281        ["unicode 2.0 bmp"]             = utf16_to_utf8_be,
282        ["unicode 2.0 full"]            = utf16_to_utf8_be,
283        ["unicode variation sequences"] = utf16_to_utf8_be,
284        ["unicode full repertoire"]     = utf16_to_utf8_be,
285    },
286    custom    = { },
287}
288
289-- This is bit over the top as we can just look for either windows, unicode or macintosh
290-- names (in that order). A font with no english name is probably a weird one anyway.
291
292local languages = {
293    -- these stay:
294    unicode = {
295        [  0] = "english",
296    },
297    -- english can stay:
298    macintosh = {
299        [  0] = "english",
300     -- [  1] = "french",
301     -- [  2] = "german",
302     -- [  3] = "italian",
303     -- [  4] = "dutch",
304     -- [  5] = "swedish",
305     -- [  6] = "spanish",
306     -- [  7] = "danish",
307     -- [  8] = "portuguese",
308     -- [  9] = "norwegian",
309     -- [ 10] = "hebrew",
310     -- [ 11] = "japanese",
311     -- [ 12] = "arabic",
312     -- [ 13] = "finnish",
313     -- [ 14] = "greek",
314     -- [ 15] = "icelandic",
315     -- [ 16] = "maltese",
316     -- [ 17] = "turkish",
317     -- [ 18] = "croatian",
318     -- [ 19] = "chinese (traditional)",
319     -- [ 20] = "urdu",
320     -- [ 21] = "hindi",
321     -- [ 22] = "thai",
322     -- [ 23] = "korean",
323     -- [ 24] = "lithuanian",
324     -- [ 25] = "polish",
325     -- [ 26] = "hungarian",
326     -- [ 27] = "estonian",
327     -- [ 28] = "latvian",
328     -- [ 29] = "sami",
329     -- [ 30] = "faroese",
330     -- [ 31] = "farsi/persian",
331     -- [ 32] = "russian",
332     -- [ 33] = "chinese (simplified)",
333     -- [ 34] = "flemish",
334     -- [ 35] = "irish gaelic",
335     -- [ 36] = "albanian",
336     -- [ 37] = "romanian",
337     -- [ 38] = "czech",
338     -- [ 39] = "slovak",
339     -- [ 40] = "slovenian",
340     -- [ 41] = "yiddish",
341     -- [ 42] = "serbian",
342     -- [ 43] = "macedonian",
343     -- [ 44] = "bulgarian",
344     -- [ 45] = "ukrainian",
345     -- [ 46] = "byelorussian",
346     -- [ 47] = "uzbek",
347     -- [ 48] = "kazakh",
348     -- [ 49] = "azerbaijani (cyrillic script)",
349     -- [ 50] = "azerbaijani (arabic script)",
350     -- [ 51] = "armenian",
351     -- [ 52] = "georgian",
352     -- [ 53] = "moldavian",
353     -- [ 54] = "kirghiz",
354     -- [ 55] = "tajiki",
355     -- [ 56] = "turkmen",
356     -- [ 57] = "mongolian (mongolian script)",
357     -- [ 58] = "mongolian (cyrillic script)",
358     -- [ 59] = "pashto",
359     -- [ 60] = "kurdish",
360     -- [ 61] = "kashmiri",
361     -- [ 62] = "sindhi",
362     -- [ 63] = "tibetan",
363     -- [ 64] = "nepali",
364     -- [ 65] = "sanskrit",
365     -- [ 66] = "marathi",
366     -- [ 67] = "bengali",
367     -- [ 68] = "assamese",
368     -- [ 69] = "gujarati",
369     -- [ 70] = "punjabi",
370     -- [ 71] = "oriya",
371     -- [ 72] = "malayalam",
372     -- [ 73] = "kannada",
373     -- [ 74] = "tamil",
374     -- [ 75] = "telugu",
375     -- [ 76] = "sinhalese",
376     -- [ 77] = "burmese",
377     -- [ 78] = "khmer",
378     -- [ 79] = "lao",
379     -- [ 80] = "vietnamese",
380     -- [ 81] = "indonesian",
381     -- [ 82] = "tagalong",
382     -- [ 83] = "malay (roman script)",
383     -- [ 84] = "malay (arabic script)",
384     -- [ 85] = "amharic",
385     -- [ 86] = "tigrinya",
386     -- [ 87] = "galla",
387     -- [ 88] = "somali",
388     -- [ 89] = "swahili",
389     -- [ 90] = "kinyarwanda/ruanda",
390     -- [ 91] = "rundi",
391     -- [ 92] = "nyanja/chewa",
392     -- [ 93] = "malagasy",
393     -- [ 94] = "esperanto",
394     -- [128] = "welsh",
395     -- [129] = "basque",
396     -- [130] = "catalan",
397     -- [131] = "latin",
398     -- [132] = "quenchua",
399     -- [133] = "guarani",
400     -- [134] = "aymara",
401     -- [135] = "tatar",
402     -- [136] = "uighur",
403     -- [137] = "dzongkha",
404     -- [138] = "javanese (roman script)",
405     -- [139] = "sundanese (roman script)",
406     -- [140] = "galician",
407     -- [141] = "afrikaans",
408     -- [142] = "breton",
409     -- [143] = "inuktitut",
410     -- [144] = "scottish gaelic",
411     -- [145] = "manx gaelic",
412     -- [146] = "irish gaelic (with dot above)",
413     -- [147] = "tongan",
414     -- [148] = "greek (polytonic)",
415     -- [149] = "greenlandic",
416     -- [150] = "azerbaijani (roman script)",
417    },
418    -- these can stay:
419    iso = {
420    },
421    -- english can stay:
422    windows = {
423     -- [0x0436] = "afrikaans - south africa",
424     -- [0x041c] = "albanian - albania",
425     -- [0x0484] = "alsatian - france",
426     -- [0x045e] = "amharic - ethiopia",
427     -- [0x1401] = "arabic - algeria",
428     -- [0x3c01] = "arabic - bahrain",
429     -- [0x0c01] = "arabic - egypt",
430     -- [0x0801] = "arabic - iraq",
431     -- [0x2c01] = "arabic - jordan",
432     -- [0x3401] = "arabic - kuwait",
433     -- [0x3001] = "arabic - lebanon",
434     -- [0x1001] = "arabic - libya",
435     -- [0x1801] = "arabic - morocco",
436     -- [0x2001] = "arabic - oman",
437     -- [0x4001] = "arabic - qatar",
438     -- [0x0401] = "arabic - saudi arabia",
439     -- [0x2801] = "arabic - syria",
440     -- [0x1c01] = "arabic - tunisia",
441     -- [0x3801] = "arabic - u.a.e.",
442     -- [0x2401] = "arabic - yemen",
443     -- [0x042b] = "armenian - armenia",
444     -- [0x044d] = "assamese - india",
445     -- [0x082c] = "azeri (cyrillic) - azerbaijan",
446     -- [0x042c] = "azeri (latin) - azerbaijan",
447     -- [0x046d] = "bashkir - russia",
448     -- [0x042d] = "basque - basque",
449     -- [0x0423] = "belarusian - belarus",
450     -- [0x0845] = "bengali - bangladesh",
451     -- [0x0445] = "bengali - india",
452     -- [0x201a] = "bosnian (cyrillic) - bosnia and herzegovina",
453     -- [0x141a] = "bosnian (latin) - bosnia and herzegovina",
454     -- [0x047e] = "breton - france",
455     -- [0x0402] = "bulgarian - bulgaria",
456     -- [0x0403] = "catalan - catalan",
457     -- [0x0c04] = "chinese - hong kong s.a.r.",
458     -- [0x1404] = "chinese - macao s.a.r.",
459     -- [0x0804] = "chinese - people's republic of china",
460     -- [0x1004] = "chinese - singapore",
461     -- [0x0404] = "chinese - taiwan",
462     -- [0x0483] = "corsican - france",
463     -- [0x041a] = "croatian - croatia",
464     -- [0x101a] = "croatian (latin) - bosnia and herzegovina",
465     -- [0x0405] = "czech - czech republic",
466     -- [0x0406] = "danish - denmark",
467     -- [0x048c] = "dari - afghanistan",
468     -- [0x0465] = "divehi - maldives",
469     -- [0x0813] = "dutch - belgium",
470     -- [0x0413] = "dutch - netherlands",
471     -- [0x0c09] = "english - australia",
472     -- [0x2809] = "english - belize",
473     -- [0x1009] = "english - canada",
474     -- [0x2409] = "english - caribbean",
475     -- [0x4009] = "english - india",
476     -- [0x1809] = "english - ireland",
477     -- [0x2009] = "english - jamaica",
478     -- [0x4409] = "english - malaysia",
479     -- [0x1409] = "english - new zealand",
480     -- [0x3409] = "english - republic of the philippines",
481     -- [0x4809] = "english - singapore",
482     -- [0x1c09] = "english - south africa",
483     -- [0x2c09] = "english - trinidad and tobago",
484     -- [0x0809] = "english - united kingdom",
485        [0x0409] = "english - united states",
486     -- [0x3009] = "english - zimbabwe",
487     -- [0x0425] = "estonian - estonia",
488     -- [0x0438] = "faroese - faroe islands",
489     -- [0x0464] = "filipino - philippines",
490     -- [0x040b] = "finnish - finland",
491     -- [0x080c] = "french - belgium",
492     -- [0x0c0c] = "french - canada",
493     -- [0x040c] = "french - france",
494     -- [0x140c] = "french - luxembourg",
495     -- [0x180c] = "french - principality of monoco",
496     -- [0x100c] = "french - switzerland",
497     -- [0x0462] = "frisian - netherlands",
498     -- [0x0456] = "galician - galician",
499     -- [0x0437] = "georgian -georgia",
500     -- [0x0c07] = "german - austria",
501     -- [0x0407] = "german - germany",
502     -- [0x1407] = "german - liechtenstein",
503     -- [0x1007] = "german - luxembourg",
504     -- [0x0807] = "german - switzerland",
505     -- [0x0408] = "greek - greece",
506     -- [0x046f] = "greenlandic - greenland",
507     -- [0x0447] = "gujarati - india",
508     -- [0x0468] = "hausa (latin) - nigeria",
509     -- [0x040d] = "hebrew - israel",
510     -- [0x0439] = "hindi - india",
511     -- [0x040e] = "hungarian - hungary",
512     -- [0x040f] = "icelandic - iceland",
513     -- [0x0470] = "igbo - nigeria",
514     -- [0x0421] = "indonesian - indonesia",
515     -- [0x045d] = "inuktitut - canada",
516     -- [0x085d] = "inuktitut (latin) - canada",
517     -- [0x083c] = "irish - ireland",
518     -- [0x0434] = "isixhosa - south africa",
519     -- [0x0435] = "isizulu - south africa",
520     -- [0x0410] = "italian - italy",
521     -- [0x0810] = "italian - switzerland",
522     -- [0x0411] = "japanese - japan",
523     -- [0x044b] = "kannada - india",
524     -- [0x043f] = "kazakh - kazakhstan",
525     -- [0x0453] = "khmer - cambodia",
526     -- [0x0486] = "k'iche - guatemala",
527     -- [0x0487] = "kinyarwanda - rwanda",
528     -- [0x0441] = "kiswahili - kenya",
529     -- [0x0457] = "konkani - india",
530     -- [0x0412] = "korean - korea",
531     -- [0x0440] = "kyrgyz - kyrgyzstan",
532     -- [0x0454] = "lao - lao p.d.r.",
533     -- [0x0426] = "latvian - latvia",
534     -- [0x0427] = "lithuanian - lithuania",
535     -- [0x082e] = "lower sorbian - germany",
536     -- [0x046e] = "luxembourgish - luxembourg",
537     -- [0x042f] = "macedonian (fyrom) - former yugoslav republic of macedonia",
538     -- [0x083e] = "malay - brunei darussalam",
539     -- [0x043e] = "malay - malaysia",
540     -- [0x044c] = "malayalam - india",
541     -- [0x043a] = "maltese - malta",
542     -- [0x0481] = "maori - new zealand",
543     -- [0x047a] = "mapudungun - chile",
544     -- [0x044e] = "marathi - india",
545     -- [0x047c] = "mohawk - mohawk",
546     -- [0x0450] = "mongolian (cyrillic) - mongolia",
547     -- [0x0850] = "mongolian (traditional) - people's republic of china",
548     -- [0x0461] = "nepali - nepal",
549     -- [0x0414] = "norwegian (bokmal) - norway",
550     -- [0x0814] = "norwegian (nynorsk) - norway",
551     -- [0x0482] = "occitan - france",
552     -- [0x0448] = "odia (formerly oriya) - india",
553     -- [0x0463] = "pashto - afghanistan",
554     -- [0x0415] = "polish - poland",
555     -- [0x0416] = "portuguese - brazil",
556     -- [0x0816] = "portuguese - portugal",
557     -- [0x0446] = "punjabi - india",
558     -- [0x046b] = "quechua - bolivia",
559     -- [0x086b] = "quechua - ecuador",
560     -- [0x0c6b] = "quechua - peru",
561     -- [0x0418] = "romanian - romania",
562     -- [0x0417] = "romansh - switzerland",
563     -- [0x0419] = "russian - russia",
564     -- [0x243b] = "sami (inari) - finland",
565     -- [0x103b] = "sami (lule) - norway",
566     -- [0x143b] = "sami (lule) - sweden",
567     -- [0x0c3b] = "sami (northern) - finland",
568     -- [0x043b] = "sami (northern) - norway",
569     -- [0x083b] = "sami (northern) - sweden",
570     -- [0x203b] = "sami (skolt) - finland",
571     -- [0x183b] = "sami (southern) - norway",
572     -- [0x1c3b] = "sami (southern) - sweden",
573     -- [0x044f] = "sanskrit - india",
574     -- [0x1c1a] = "serbian (cyrillic) - bosnia and herzegovina",
575     -- [0x0c1a] = "serbian (cyrillic) - serbia",
576     -- [0x181a] = "serbian (latin) - bosnia and herzegovina",
577     -- [0x081a] = "serbian (latin) - serbia",
578     -- [0x046c] = "sesotho sa leboa - south africa",
579     -- [0x0432] = "setswana - south africa",
580     -- [0x045b] = "sinhala - sri lanka",
581     -- [0x041b] = "slovak - slovakia",
582     -- [0x0424] = "slovenian - slovenia",
583     -- [0x2c0a] = "spanish - argentina",
584     -- [0x400a] = "spanish - bolivia",
585     -- [0x340a] = "spanish - chile",
586     -- [0x240a] = "spanish - colombia",
587     -- [0x140a] = "spanish - costa rica",
588     -- [0x1c0a] = "spanish - dominican republic",
589     -- [0x300a] = "spanish - ecuador",
590     -- [0x440a] = "spanish - el salvador",
591     -- [0x100a] = "spanish - guatemala",
592     -- [0x480a] = "spanish - honduras",
593     -- [0x080a] = "spanish - mexico",
594     -- [0x4c0a] = "spanish - nicaragua",
595     -- [0x180a] = "spanish - panama",
596     -- [0x3c0a] = "spanish - paraguay",
597     -- [0x280a] = "spanish - peru",
598     -- [0x500a] = "spanish - puerto rico",
599     -- [0x0c0a] = "spanish (modern sort) - spain",
600     -- [0x040a] = "spanish (traditional sort) - spain",
601     -- [0x540a] = "spanish - united states",
602     -- [0x380a] = "spanish - uruguay",
603     -- [0x200a] = "spanish - venezuela",
604     -- [0x081d] = "sweden - finland",
605     -- [0x041d] = "swedish - sweden",
606     -- [0x045a] = "syriac - syria",
607     -- [0x0428] = "tajik (cyrillic) - tajikistan",
608     -- [0x085f] = "tamazight (latin) - algeria",
609     -- [0x0449] = "tamil - india",
610     -- [0x0444] = "tatar - russia",
611     -- [0x044a] = "telugu - india",
612     -- [0x041e] = "thai - thailand",
613     -- [0x0451] = "tibetan - prc",
614     -- [0x041f] = "turkish - turkey",
615     -- [0x0442] = "turkmen - turkmenistan",
616     -- [0x0480] = "uighur - prc",
617     -- [0x0422] = "ukrainian - ukraine",
618     -- [0x042e] = "upper sorbian - germany",
619     -- [0x0420] = "urdu - islamic republic of pakistan",
620     -- [0x0843] = "uzbek (cyrillic) - uzbekistan",
621     -- [0x0443] = "uzbek (latin) - uzbekistan",
622     -- [0x042a] = "vietnamese - vietnam",
623     -- [0x0452] = "welsh - united kingdom",
624     -- [0x0488] = "wolof - senegal",
625     -- [0x0485] = "yakut - russia",
626     -- [0x0478] = "yi - prc",
627     -- [0x046a] = "yoruba - nigeria",
628    },
629    custom = {
630    },
631}
632
633local standardromanencoding = { [0] = -- taken from wikipedia
634    "notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl",
635    "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft",
636    "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
637    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
638    "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at",
639    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
640    "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
641    "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b",
642    "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
643    "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar",
644    "braceright", "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute",
645    "Ntilde", "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex",
646    "adieresis", "atilde", "aring", "ccedilla", "eacute", "egrave",
647    "ecircumflex", "edieresis", "iacute", "igrave", "icircumflex", "idieresis",
648    "ntilde", "oacute", "ograve", "ocircumflex", "odieresis", "otilde", "uacute",
649    "ugrave", "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling",
650    "section", "bullet", "paragraph", "germandbls", "registered", "copyright",
651    "trademark", "acute", "dieresis", "notequal", "AE", "Oslash", "infinity",
652    "plusminus", "lessequal", "greaterequal", "yen", "mu", "partialdiff",
653    "summation", "product", "pi", "integral", "ordfeminine", "ordmasculine",
654    "Omega", "ae", "oslash", "questiondown", "exclamdown", "logicalnot",
655    "radical", "florin", "approxequal", "Delta", "guillemotleft",
656    "guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde",
657    "Otilde", "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright",
658    "quoteleft", "quoteright", "divide", "lozenge", "ydieresis", "Ydieresis",
659    "fraction", "currency", "guilsinglleft", "guilsinglright", "fi", "fl",
660    "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
661    "perthousand", "Acircumflex", "Ecircumflex", "Aacute", "Edieresis", "Egrave",
662    "Iacute", "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
663    "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi",
664    "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla",
665    "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", "Scaron", "scaron",
666    "Zcaron", "zcaron", "brokenbar", "Eth", "eth", "Yacute", "yacute", "Thorn",
667    "thorn", "minus", "multiply", "onesuperior", "twosuperior", "threesuperior",
668    "onehalf", "onequarter", "threequarters", "franc", "Gbreve", "gbreve",
669    "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron",
670    "dcroat",
671}
672
673local weights = {
674    [100] = "thin",
675    [200] = "extralight",
676    [300] = "light",
677    [400] = "normal",
678    [500] = "medium",
679    [600] = "semibold", -- demi demibold
680    [700] = "bold",
681    [800] = "extrabold",
682    [900] = "black",
683}
684
685local widths = {
686    "ultracondensed",
687    "extracondensed",
688    "condensed",
689    "semicondensed",
690    "normal",
691    "semiexpanded",
692    "expanded",
693    "extraexpanded",
694    "ultraexpanded",
695}
696
697setmetatableindex(weights, function(t,k)
698    local r = floor((k + 50) / 100) * 100
699    local v = (r > 900 and "black") or rawget(t,r) or "normal"
700    return v
701end)
702
703setmetatableindex(widths,function(t,k)
704    return "normal"
705end)
706
707local panoseweights = { [0] =
708    "normal",
709    "normal",
710    "verylight",
711    "light",
712    "thin",
713    "book",
714    "medium",
715    "demi",
716    "bold",
717    "heavy",
718    "black",
719}
720
721local panosewidths = { [0] =
722    "normal",
723    "normal",
724    "normal",
725    "normal",
726    "normal",
727    "expanded",
728    "condensed",
729    "veryexpanded",
730    "verycondensed",
731    "monospaced",
732}
733
734-- We implement a reader per table.
735
736-- helper
737
738local helpers   = { }
739readers.helpers = helpers
740
741local function gotodatatable(f,fontdata,tag,criterium)
742    if criterium and f then
743        local tables = fontdata.tables
744        if tables then
745            local datatable = tables[tag]
746            if datatable then
747                local tableoffset = datatable.offset
748                setposition(f,tableoffset)
749                return tableoffset
750            end
751        else
752            report("no tables")
753        end
754    end
755end
756
757local function reportskippedtable(f,fontdata,tag,criterium)
758    if criterium and f then
759        local tables = fontdata.tables
760        if tables then
761            local datatable = tables[tag]
762            if datatable then
763                report("loading of table %a skipped",tag)
764            end
765        else
766            report("no tables")
767        end
768    end
769end
770
771local function setvariabledata(fontdata,tag,data)
772    local variabledata = fontdata.variabledata
773    if variabledata then
774        variabledata[tag] = data
775    else
776        fontdata.variabledata = { [tag] = data }
777    end
778end
779
780helpers.gotodatatable      = gotodatatable
781helpers.setvariabledata    = setvariabledata
782helpers.reportskippedtable = reportskippedtable
783
784-- The name table is probably the first one to load. After all this one provides
785-- useful information about what we deal with. The complication is that we need
786-- to filter the best one available.
787
788local platformnames = {
789    postscriptname       = true,
790    fullname             = true,
791    family               = true,
792    subfamily            = true,
793    typographicfamily    = true,
794    typographicsubfamily = true,
795    compatiblefullname   = true,
796}
797
798local platformextras = {
799    uniqueid     = true,
800    version      = true,
801    copyright    = true,
802    license      = true,
803    licenseurl   = true,
804    manufacturer = true,
805    vendorurl    = true,
806}
807
808function readers.name(f,fontdata,specification)
809    local tableoffset = gotodatatable(f,fontdata,"name",true)
810    if tableoffset then
811        local format   = readushort(f)
812        local nofnames = readushort(f)
813        local offset   = readushort(f)
814        -- we can also provide a raw list as extra, todo as option
815        local start    = tableoffset + offset
816        local namelists = {
817            unicode   = { },
818            windows   = { },
819            macintosh = { },
820         -- iso       = { },
821         -- windows   = { },
822        }
823        for i=1,nofnames do
824            local platform = platforms[readushort(f)]
825            if platform then
826                local namelist = namelists[platform]
827                if namelist then
828                    local encoding  = readushort(f)
829                    local language  = readushort(f)
830                    local encodings = encodings[platform]
831                    local languages = languages[platform]
832                    if encodings and languages then
833                        local encoding = encodings[encoding]
834                        local language = languages[language]
835                        if encoding and language then
836                            local index = readushort(f)
837                            local name  = reservednames[index]
838                            namelist[#namelist+1] = {
839                                platform = platform,
840                                encoding = encoding,
841                                language = language,
842                                name     = name,
843                                index    = index,
844                                length   = readushort(f),
845                                offset   = start + readushort(f),
846                            }
847                        else
848                            skipshort(f,3)
849                        end
850                    else
851                        skipshort(f,3)
852                    end
853                else
854                    skipshort(f,5)
855                end
856            else
857                skipshort(f,5)
858            end
859        end
860     -- if format == 1 then
861     --     local noftags = readushort(f)
862     --     for i=1,noftags do
863     --        local length = readushort(f)
864     --        local offset = readushort(f)
865     --     end
866     -- end
867        --
868        -- we need to choose one we like, for instance an unicode one
869        --
870        local names  = { }
871        local done   = { }
872        local extras = { }
873        --
874        -- there is quite some logic in ff ... hard to follow so we start simple
875        -- and extend when we run into it (todo: proper reverse hash) .. we're only
876        -- interested in english anyway
877        --
878        local function decoded(platform,encoding,content)
879            local decoder = decoders[platform]
880            if decoder then
881                decoder = decoder[encoding]
882            end
883            if decoder then
884                return decoder(content)
885            else
886                return content
887            end
888        end
889        --
890        local function filter(platform,e,l)
891            local namelist = namelists[platform]
892            for i=1,#namelist do
893                local name    = namelist[i]
894                local nametag = name.name
895                local index = name.index
896                if not done[nametag or i] then
897                    local encoding = name.encoding
898                    local language = name.language
899                    if (not e or encoding == e) and (not l or language == l) then
900                        setposition(f,name.offset)
901                        local content = decoded(platform,encoding,readstring(f,name.length))
902                        if nametag then
903                            names[nametag] = {
904                                content  = content,
905                                platform = platform,
906                                encoding = encoding,
907                                language = language,
908                            }
909                        end
910                        extras[index] = content
911                        done[nametag or i] = true
912                    end
913                end
914            end
915        end
916        --
917        filter("windows","unicode bmp","english - united states")
918     -- filter("unicode") -- which one ?
919        filter("macintosh","roman","english")
920        filter("windows")
921        filter("macintosh")
922        filter("unicode")
923        --
924        fontdata.names  = names
925        fontdata.extras = extras
926        --
927        if specification.platformnames then
928            local collected      = { }
929            local platformextras = specification.platformextras and platformextras
930            for platform, namelist in next, namelists do
931                local filtered = false
932                for i=1,#namelist do
933                    local entry = namelist[i]
934                    local name  = entry.name
935                    if platformnames[name] or (platformextras and platformextras[name]) then
936                        setposition(f,entry.offset)
937                        local content = decoded(platform,entry.encoding,readstring(f,entry.length))
938                        if filtered then
939                            filtered[name] = content
940                        else
941                            filtered = { [name] = content }
942                        end
943                    end
944                end
945                if filtered then
946                    collected[platform] = filtered
947                end
948            end
949            fontdata.platformnames = collected
950        end
951    else
952        fontdata.names = { }
953    end
954end
955
956----- validutf = lpeg.patterns.utf8character^0 * P(-1)
957local validutf = lpeg.patterns.validutf8
958
959local function getname(fontdata,key)
960    local names = fontdata.names
961    if names then
962        local value = names[key]
963        if value then
964            local content = value.content
965            return lpegmatch(validutf,content) and content or nil
966        end
967    end
968end
969
970-- This table is an original windows (with its precursor os/2) table. In ff this one is
971-- part of the pfminfo table but here we keep it separate (for now). We will create a
972-- properties table afterwards.
973
974readers["os/2"] = function(f,fontdata)
975    local tableoffset = gotodatatable(f,fontdata,"os/2",true)
976    if tableoffset then
977        local version = readushort(f)
978        local windowsmetrics = {
979            version            = version,
980            averagewidth       = readshort(f), -- ushort?
981            weightclass        = readushort(f),
982            widthclass         = readushort(f),
983            fstype             = readushort(f),
984            subscriptxsize     = readshort(f),
985            subscriptysize     = readshort(f),
986            subscriptxoffset   = readshort(f),
987            subscriptyoffset   = readshort(f),
988            superscriptxsize   = readshort(f),
989            superscriptysize   = readshort(f),
990            superscriptxoffset = readshort(f),
991            superscriptyoffset = readshort(f),
992            strikeoutsize      = readshort(f),
993            strikeoutpos       = readshort(f),
994            familyclass        = readshort(f),
995            panose             = { readbytes(f,10) },
996            unicoderanges      = { readulong(f), readulong(f), readulong(f), readulong(f) },
997            vendor             = readstring(f,4),
998            fsselection        = readushort(f),
999            firstcharindex     = readushort(f),
1000            lastcharindex      = readushort(f),
1001            typoascender       = readshort(f),
1002            typodescender      = readshort(f),
1003            typolinegap        = readshort(f),
1004            winascent          = readushort(f),
1005            windescent         = readushort(f),
1006        }
1007        if version >= 1 then
1008            windowsmetrics.codepageranges = { readulong(f), readulong(f) }
1009        end
1010        if version >= 2 then
1011            windowsmetrics.xheight               = readshort(f)
1012            windowsmetrics.capheight             = readshort(f)
1013            windowsmetrics.defaultchar           = readushort(f)
1014            windowsmetrics.breakchar             = readushort(f)
1015         -- windowsmetrics.maxcontexts           = readushort(f)
1016         -- windowsmetrics.loweropticalpointsize = readushort(f)
1017         -- windowsmetrics.upperopticalpointsize = readushort(f)
1018        end
1019        --
1020        -- todo: unicoderanges
1021        --
1022        windowsmetrics.weight = windowsmetrics.weightclass and weights[windowsmetrics.weightclass]
1023        windowsmetrics.width  = windowsmetrics.widthclass and  widths [windowsmetrics.widthclass]
1024        --
1025        windowsmetrics.panoseweight = panoseweights[windowsmetrics.panose[3]]
1026        windowsmetrics.panosewidth  = panosewidths [windowsmetrics.panose[4]]
1027        --
1028        fontdata.windowsmetrics = windowsmetrics
1029    else
1030        fontdata.windowsmetrics = { }
1031    end
1032end
1033
1034readers.head = function(f,fontdata)
1035    local tableoffset = gotodatatable(f,fontdata,"head",true)
1036    if tableoffset then
1037        local version     = readulong(f)
1038        local fontversion = readulong(f)
1039        local fontheader = {
1040            version           = version,
1041            fontversion       = number.to16dot16(fontversion),
1042            fontversionnumber = fontversion,
1043         -- checksum          = readulong(f),
1044            checksum          = readushort(f) * 0x10000 + readushort(f),
1045            magic             = readulong(f),
1046            flags             = readushort(f),
1047            units             = readushort(f),
1048            created           = readlongdatetime(f),
1049            modified          = readlongdatetime(f),
1050            xmin              = readshort(f),
1051            ymin              = readshort(f),
1052            xmax              = readshort(f),
1053            ymax              = readshort(f),
1054            macstyle          = readushort(f),
1055            smallpixels       = readushort(f),
1056            directionhint     = readshort(f),
1057            indextolocformat  = readshort(f),
1058            glyphformat       = readshort(f),
1059        }
1060        fontdata.fontheader = fontheader
1061    else
1062        fontdata.fontheader = { }
1063    end
1064    fontdata.nofglyphs = 0
1065end
1066
1067-- This table is a rather simple one. No treatment of values is needed here. Most
1068-- variables are not used but nofmetrics is quite important.
1069
1070readers.hhea = function(f,fontdata,specification)
1071    local tableoffset = gotodatatable(f,fontdata,"hhea",specification.details)
1072    if tableoffset then
1073        fontdata.horizontalheader = {
1074            version             = readulong(f),
1075            ascender            = readfword(f),
1076            descender           = readfword(f),
1077            linegap             = readfword(f),
1078            maxadvancewidth     = readufword(f),
1079            minleftsidebearing  = readfword(f),
1080            minrightsidebearing = readfword(f),
1081            maxextent           = readfword(f),
1082            caretsloperise      = readshort(f),
1083            caretsloperun       = readshort(f),
1084            caretoffset         = readshort(f),
1085            reserved_1          = readshort(f),
1086            reserved_2          = readshort(f),
1087            reserved_3          = readshort(f),
1088            reserved_4          = readshort(f),
1089            metricdataformat    = readshort(f),
1090            nofmetrics          = readushort(f),
1091        }
1092    else
1093        fontdata.horizontalheader = {
1094            nofmetrics = 0,
1095        }
1096    end
1097end
1098
1099readers.vhea = function(f,fontdata,specification)
1100    local tableoffset = gotodatatable(f,fontdata,"vhea",specification.details)
1101    if tableoffset then
1102        fontdata.verticalheader = {
1103            version              = readulong(f),
1104            ascender             = readfword(f),
1105            descender            = readfword(f),
1106            linegap              = readfword(f),
1107            maxadvanceheight     = readufword(f),
1108            mintopsidebearing    = readfword(f),
1109            minbottomsidebearing = readfword(f),
1110            maxextent            = readfword(f),
1111            caretsloperise       = readshort(f),
1112            caretsloperun        = readshort(f),
1113            caretoffset          = readshort(f),
1114            reserved_1           = readshort(f),
1115            reserved_2           = readshort(f),
1116            reserved_3           = readshort(f),
1117            reserved_4           = readshort(f),
1118            metricdataformat     = readshort(f),
1119            nofmetrics           = readushort(f),
1120        }
1121    else
1122        fontdata.verticalheader = {
1123            nofmetrics = 0,
1124        }
1125    end
1126end
1127
1128-- We probably never need all these variables, but we do need the nofglyphs when loading other
1129-- tables. Again we use the microsoft names but see no reason to have "max" in each name.
1130
1131-- fontdata.maximumprofile can be bad
1132
1133readers.maxp = function(f,fontdata,specification)
1134    local tableoffset = gotodatatable(f,fontdata,"maxp",specification.details)
1135    if tableoffset then
1136        local version      = readulong(f)
1137        local nofglyphs    = readushort(f)
1138        fontdata.nofglyphs = nofglyphs
1139        if version == 0x00005000 then
1140            fontdata.maximumprofile = {
1141                version   = version,
1142                nofglyphs = nofglyphs,
1143            }
1144        elseif version == 0x00010000 then
1145            fontdata.maximumprofile = {
1146                version            = version,
1147                nofglyphs          = nofglyphs,
1148                points             = readushort(f),
1149                contours           = readushort(f),
1150                compositepoints    = readushort(f),
1151                compositecontours  = readushort(f),
1152                zones              = readushort(f),
1153                twilightpoints     = readushort(f),
1154                storage            = readushort(f),
1155                functiondefs       = readushort(f),
1156                instructiondefs    = readushort(f),
1157                stackelements      = readushort(f),
1158                sizeofinstructions = readushort(f),
1159                componentelements  = readushort(f),
1160                componentdepth     = readushort(f),
1161            }
1162        else
1163            fontdata.maximumprofile = {
1164                version   = version,
1165                nofglyphs = 0,
1166            }
1167        end
1168    end
1169end
1170
1171-- Here we filter the (advance) widths (that can be different from the boundingbox width of
1172-- course).
1173
1174readers.hmtx = function(f,fontdata,specification)
1175    local tableoffset = gotodatatable(f,fontdata,"hmtx",specification.glyphs)
1176    if tableoffset then
1177        local horizontalheader = fontdata.horizontalheader
1178        local nofmetrics       = horizontalheader.nofmetrics
1179        local glyphs           = fontdata.glyphs
1180        local nofglyphs        = fontdata.nofglyphs
1181        local width            = 0 -- advance
1182        local leftsidebearing  = 0
1183        for i=0,nofmetrics-1 do
1184            local glyph     = glyphs[i]
1185            width           = readshort(f) -- readushort
1186            leftsidebearing = readshort(f)
1187            if width ~= 0 then
1188                glyph.width = width
1189            end
1190         -- if leftsidebearing ~= 0 then
1191         --     glyph.lsb = leftsidebearing
1192         -- end
1193-- if leftsidebearing ~= 0 then
1194--     glyph.lsb = leftsidebearing
1195-- end
1196        end
1197        -- The next can happen in for instance a monospace font or in a cjk font
1198        -- with fixed widths.
1199        for i=nofmetrics,nofglyphs-1 do
1200            local glyph = glyphs[i]
1201            if width ~= 0 then
1202                glyph.width = width
1203            end
1204         -- if leftsidebearing ~= 0 then
1205         --     glyph.lsb = leftsidebearing
1206         -- end
1207        end
1208    end
1209end
1210
1211readers.vmtx = function(f,fontdata,specification)
1212    local tableoffset = gotodatatable(f,fontdata,"vmtx",specification.glyphs)
1213    if tableoffset then
1214        local verticalheader = fontdata.verticalheader
1215        local nofmetrics     = verticalheader.nofmetrics
1216        local glyphs         = fontdata.glyphs
1217        local nofglyphs      = fontdata.nofglyphs
1218        local vheight        = 0
1219        local vdefault       = verticalheader.ascender - verticalheader.descender
1220        local topsidebearing = 0
1221        for i=0,nofmetrics-1 do
1222            local glyph     = glyphs[i]
1223            vheight         = readushort(f)
1224            topsidebearing  = readshort(f)
1225            if vheight ~= 0 and vheight ~= vdefault then
1226                glyph.vheight = vheight
1227            end
1228            if topsidebearing ~= 0 then
1229                glyph.tsb = topsidebearing
1230            end
1231        end
1232        -- The next can happen in for instance a monospace font or in a cjk font
1233        -- with fixed heights.
1234        for i=nofmetrics,nofglyphs-1 do
1235            local glyph = glyphs[i]
1236            if vheight ~= 0 and vheight ~= vdefault then
1237                glyph.vheight = vheight
1238            end
1239        end
1240    end
1241end
1242
1243readers.vorg = function(f,fontdata,specification)
1244    reportskippedtable(f,fontdata,"vorg",specification.glyphs)
1245end
1246
1247-- The post table relates to postscript (printing) but has some relevant properties for other
1248-- usage as well. We just use the names from the microsoft specification. The version 2.0
1249-- description is somewhat fuzzy but it is a hybrid with overloads.
1250
1251readers.post = function(f,fontdata,specification)
1252    local tableoffset = gotodatatable(f,fontdata,"post",true)
1253    if tableoffset then
1254        local version = readulong(f)
1255        fontdata.postscript = {
1256            version            = version,
1257            italicangle        = readfixed(f),
1258            underlineposition  = readfword(f),
1259            underlinethickness = readfword(f),
1260            monospaced         = readulong(f),
1261            minmemtype42       = readulong(f),
1262            maxmemtype42       = readulong(f),
1263            minmemtype1        = readulong(f),
1264            maxmemtype1        = readulong(f),
1265        }
1266        if not specification.glyphs then
1267            -- enough done
1268        elseif version == 0x00010000 then
1269            -- mac encoding (258 glyphs)
1270            for index=0,#standardromanencoding do
1271                glyphs[index].name = standardromanencoding[index]
1272            end
1273        elseif version == 0x00020000 then
1274            local glyphs    = fontdata.glyphs
1275            local nofglyphs = readushort(f)
1276            local indices   = { }
1277            local names     = { }
1278            local maxnames  = 0
1279            for i=0,nofglyphs-1 do
1280                local nameindex = readushort(f)
1281                if nameindex >= 258 then
1282                    maxnames  = maxnames + 1
1283                    nameindex = nameindex - 257
1284                    indices[nameindex] = i
1285                else
1286                    glyphs[i].name = standardromanencoding[nameindex]
1287                end
1288            end
1289            for i=1,maxnames do
1290                local mapping = indices[i]
1291                if not mapping then
1292                    report("quit post name fetching at %a of %a: %s",i,maxnames,"no index")
1293                    break
1294                else
1295                    local length = readbyte(f)
1296                    if length > 0 then
1297                        glyphs[mapping].name = readstring(f,length)
1298                    else
1299                     -- report("quit post name fetching at %a of %a: %s",i,maxnames,"overflow")
1300                     -- break
1301                    end
1302                end
1303            end
1304        end
1305    else
1306        fontdata.postscript = { }
1307    end
1308end
1309
1310readers.cff = function(f,fontdata,specification)
1311    reportskippedtable(f,fontdata,"cff",specification.glyphs)
1312end
1313
1314-- Not all cmaps make sense .. e.g. dfont is obsolete and probably more are not relevant. Let's see
1315-- what we run into. There is some weird calculation going on here because we offset in a table
1316-- being a blob of memory or file. Anyway, I can't stand lunatic formats like this esp when there
1317-- is no real gain.
1318
1319local formatreaders = { }
1320local duplicatestoo = true
1321
1322local sequence = {
1323    -- these is some provision against redundant loading
1324    { 3,  1,  4 },
1325    { 3, 10, 12 },
1326    { 0,  3,  4 },
1327    { 0,  3, 12 },
1328    { 0,  1,  4 },
1329    { 0,  1, 12 }, -- for some old mac fonts
1330    { 0,  0,  6 },
1331    { 3,  0,  6 },
1332    { 3,  0,  4 }, -- for (likely) old crap
1333    -- variants
1334    { 0,  5, 14 },
1335    -- last resort ranges
1336    { 0,  4, 12 },
1337    { 3, 10, 13 },
1338}
1339
1340local supported = {  }
1341
1342for i=1,#sequence do
1343    local si = sequence[i]
1344    local sp, se, sf = si[1], si[2], si[3]
1345    local p = supported[sp]
1346    if not p then
1347        p = { }
1348        supported[sp] = p
1349    end
1350    local e = p[se]
1351    if not e then
1352        e = { }
1353        p[se] = e
1354    end
1355    e[sf] = true
1356end
1357
1358formatreaders[4] = function(f,fontdata,offset)
1359    setposition(f,offset+2) -- skip format
1360    --
1361    local length      = readushort(f) -- in bytes of subtable
1362    local language    = readushort(f)
1363    local nofsegments = readushort(f) / 2
1364    --
1365    skipshort(f,3) -- searchrange entryselector rangeshift
1366    --
1367    local mapping    = fontdata.mapping
1368    local glyphs     = fontdata.glyphs
1369    local duplicates = fontdata.duplicates
1370    local nofdone    = 0
1371    local endchars   = readcardinaltable(f,nofsegments,ushort)
1372    local reserved   = readushort(f) -- 0
1373    local startchars = readcardinaltable(f,nofsegments,ushort)
1374    local deltas     = readcardinaltable(f,nofsegments,ushort)
1375    local offsets    = readcardinaltable(f,nofsegments,ushort)
1376    -- format length language nofsegments searchrange entryselector rangeshift 4-tables
1377    local size       = (length - 2 * 2 - 5 * 2 - 4 * 2 * nofsegments) / 2
1378    local indices    = readcardinaltable(f,size-1,ushort)
1379    --
1380    for segment=1,nofsegments do
1381        local startchar = startchars[segment]
1382        local endchar   = endchars[segment]
1383        local offset    = offsets[segment]
1384        local delta     = deltas[segment]
1385        if startchar == 0xFFFF and endchar == 0xFFFF then
1386            -- break
1387        elseif startchar == 0xFFFF and offset == 0 then
1388            -- break
1389        elseif offset == 0xFFFF then
1390            -- bad encoding
1391        elseif offset == 0 then
1392            if trace_cmap_details then
1393                report("format 4.%i segment %2i from %C upto %C at index %H",1,segment,startchar,endchar,(startchar + delta) % 65536)
1394            end
1395            for unicode=startchar,endchar do
1396                local index = (unicode + delta) % 65536
1397                if index and index > 0 then
1398                    local glyph = glyphs[index]
1399                    if glyph then
1400                        local gu = glyph.unicode
1401                        if not gu then
1402                            glyph.unicode = unicode
1403                            nofdone = nofdone + 1
1404                        elseif gu ~= unicode then
1405                            if duplicatestoo then
1406                                local d = duplicates[gu]
1407                                if d then
1408                                    d[unicode] = true
1409                                else
1410                                    duplicates[gu] = { [unicode] = true }
1411                                end
1412                            else
1413                                -- no duplicates ... weird side effects in lm
1414                                report("duplicate case 1: %C %04i %s",unicode,index,glyphs[index].name)
1415                            end
1416                        end
1417                        if not mapping[index] then
1418                            mapping[index] = unicode
1419                        end
1420                    end
1421                end
1422            end
1423        else
1424            local shift = (segment-nofsegments+offset/2) - startchar
1425            if trace_cmap_details then
1426                report_cmap("format 4.%i segment %2i from %C upto %C at index %H",0,segment,startchar,endchar,(startchar + delta) % 65536)
1427            end
1428            for unicode=startchar,endchar do
1429                local slot  = shift + unicode
1430                local index = indices[slot]
1431                if index and index > 0 then
1432                    index = (index + delta) % 65536
1433                    local glyph = glyphs[index]
1434                    if glyph then
1435                        local gu = glyph.unicode
1436                        if not gu then
1437                            glyph.unicode = unicode
1438                            nofdone = nofdone + 1
1439                        elseif gu ~= unicode then
1440                            if duplicatestoo then
1441                                local d = duplicates[gu]
1442                                if d then
1443                                    d[unicode] = true
1444                                else
1445                                    duplicates[gu] = { [unicode] = true }
1446                                end
1447                            else
1448                                -- no duplicates ... weird side effects in lm
1449                                report("duplicate case 2: %C %04i %s",unicode,index,glyphs[index].name)
1450                            end
1451                        end
1452                        if not mapping[index] then
1453                            mapping[index] = unicode
1454                        end
1455                    end
1456                end
1457            end
1458        end
1459    end
1460    return nofdone
1461end
1462
1463formatreaders[6] = function(f,fontdata,offset)
1464    setposition(f,offset) -- + 2 + 2 + 2 -- skip format length language
1465    local format     = readushort(f)
1466    local length     = readushort(f)
1467    local language   = readushort(f)
1468    local mapping    = fontdata.mapping
1469    local glyphs     = fontdata.glyphs
1470    local duplicates = fontdata.duplicates
1471    local start      = readushort(f)
1472    local count      = readushort(f)
1473    local stop       = start+count-1
1474    local nofdone    = 0
1475    if trace_cmap_details then
1476        report_cmap("format 6 from %C to %C",2,start,stop)
1477    end
1478    for unicode=start,stop do
1479        local index = readushort(f)
1480        if index > 0 then
1481            local glyph = glyphs[index]
1482            if glyph then
1483                local gu = glyph.unicode
1484                if not gu then
1485                    glyph.unicode = unicode
1486                    nofdone = nofdone + 1
1487                elseif gu ~= unicode then
1488                    -- report("format 6 overloading %C to %C",gu,unicode)
1489                    -- glyph.unicode = unicode
1490                    -- no duplicates ... weird side effects in lm
1491                end
1492                if not mapping[index] then
1493                    mapping[index] = unicode
1494                end
1495            end
1496        end
1497    end
1498    return nofdone
1499end
1500
1501formatreaders[12] = function(f,fontdata,offset)
1502    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1503    local mapping    = fontdata.mapping
1504    local glyphs     = fontdata.glyphs
1505    local duplicates = fontdata.duplicates
1506    local nofgroups  = readulong(f)
1507    local nofdone    = 0
1508    for i=1,nofgroups do
1509        local first = readulong(f)
1510        local last  = readulong(f)
1511        local index = readulong(f)
1512        if trace_cmap_details then
1513            report_cmap("format 12 from %C to %C starts at index %i",first,last,index)
1514        end
1515        for unicode=first,last do
1516            local glyph = glyphs[index]
1517            if glyph then
1518                local gu = glyph.unicode
1519                if not gu then
1520                    glyph.unicode = unicode
1521                    nofdone = nofdone + 1
1522                elseif gu ~= unicode then
1523                    -- e.g. sourcehan fonts need this
1524                    local d = duplicates[gu]
1525                    if d then
1526                        d[unicode] = true
1527                    else
1528                        duplicates[gu] = { [unicode] = true }
1529                    end
1530                end
1531                if not mapping[index] then
1532                    mapping[index] = unicode
1533                end
1534            end
1535            index = index + 1
1536        end
1537    end
1538    return nofdone
1539end
1540
1541formatreaders[13] = function(f,fontdata,offset)
1542    --
1543    -- this vector is only used for simple fallback fonts
1544    --
1545    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1546    local mapping    = fontdata.mapping
1547    local glyphs     = fontdata.glyphs
1548    local duplicates = fontdata.duplicates
1549    local nofgroups  = readulong(f)
1550    local nofdone    = 0
1551    for i=1,nofgroups do
1552        local first = readulong(f)
1553        local last  = readulong(f)
1554        local index = readulong(f)
1555        if first < privateoffset then
1556            if trace_cmap_details then
1557                report_cmap("format 13 from %C to %C get index %i",first,last,index)
1558            end
1559            local glyph   = glyphs[index]
1560            local unicode = glyph.unicode
1561            if not unicode then
1562                unicode = first
1563                glyph.unicode = unicode
1564                first = first + 1
1565            end
1566            local list     = duplicates[unicode]
1567            mapping[index] = unicode
1568            if not list then
1569                list = { }
1570                duplicates[unicode] = list
1571            end
1572            if last >= privateoffset then
1573                local limit = privateoffset - 1
1574                report("format 13 from %C to %C pruned to %C",first,last,limit)
1575                last = limit
1576            end
1577            for unicode=first,last do
1578                list[unicode] = true
1579            end
1580            nofdone = nofdone + last - first + 1
1581        else
1582            report("format 13 from %C to %C ignored",first,last)
1583        end
1584    end
1585    return nofdone
1586end
1587
1588formatreaders[14] = function(f,fontdata,offset)
1589    if offset and offset ~= 0 then
1590        setposition(f,offset)
1591        local format      = readushort(f)
1592        local length      = readulong(f)
1593        local nofrecords  = readulong(f)
1594        local records     = { }
1595        local variants    = { }
1596        local nofdone     = 0
1597        fontdata.variants = variants
1598        for i=1,nofrecords do
1599            records[i] = {
1600                selector = readuint(f),
1601                default  = readulong(f), -- default offset
1602                other    = readulong(f), -- non-default offset
1603            }
1604        end
1605        for i=1,nofrecords do
1606            local record   = records[i]
1607            local selector = record.selector
1608            local default  = record.default
1609            local other    = record.other
1610            --
1611            -- there is no need to map the defaults to themselves
1612            --
1613         -- if default ~= 0 then
1614         --     setposition(f,offset+default)
1615         --     local nofranges = readulong(f)
1616         --     for i=1,nofranges do
1617         --         local start = readuint(f)
1618         --         local extra = readbyte(f)
1619         --         for i=start,start+extra do
1620         --             mapping[i] = i
1621         --         end
1622         --     end
1623         -- end
1624            local other = record.other
1625            if other ~= 0 then
1626                setposition(f,offset+other)
1627                local mapping = { }
1628                local count   = readulong(f)
1629                for i=1,count do
1630                    mapping[readuint(f)] = readushort(f)
1631                end
1632                nofdone = nofdone + count
1633                variants[selector] = mapping
1634            end
1635        end
1636        return nofdone
1637    else
1638        return 0
1639    end
1640end
1641
1642local function checkcmap(f,fontdata,records,platform,encoding,format)
1643    local pdata = records[platform]
1644    if not pdata then
1645        if trace_cmap_details then
1646            report_cmap("skipped, %s, p=%i e=%i f=%i","no platform",platform,encoding,format)
1647        end
1648        return 0
1649    end
1650    local edata = pdata[encoding]
1651    if not edata then
1652        if trace_cmap_details then
1653            report_cmap("skipped, %s, p=%i e=%i f=%i","no encoding",platform,encoding,format)
1654        end
1655        return 0
1656    end
1657    local fdata = edata[format]
1658    if not fdata then
1659        if trace_cmap_details then
1660            report_cmap("skipped, %s, p=%i e=%i f=%i","no format",platform,encoding,format)
1661        end
1662        return 0
1663    elseif type(fdata) ~= "number" then
1664        if trace_cmap_details then
1665            report_cmap("skipped, %s, p=%i e=%i f=%i","already done",platform,encoding,format)
1666        end
1667        return 0
1668    end
1669    edata[format] = true -- done
1670    local reader = formatreaders[format]
1671    if not reader then
1672        if trace_cmap_details then
1673            report_cmap("skipped, %s, p=%i e=%i f=%i","unsupported format",platform,encoding,format)
1674        end
1675        return 0
1676    end
1677    local n = reader(f,fontdata,fdata) or 0
1678    if trace_cmap_details or trace_cmap then
1679        local p = platforms[platform]
1680        local e = encodings[p]
1681        report_cmap("checked, platform %i (%s), encoding %i (%s), format %i, new unicodes %i",
1682            platform,p,encoding,e and e[encoding] or "?",format,n)
1683    end
1684    return n
1685end
1686
1687function readers.cmap(f,fontdata,specification)
1688    local tableoffset = gotodatatable(f,fontdata,"cmap",specification.glyphs)
1689    if tableoffset then
1690        local version      = readushort(f) -- check later versions
1691        local noftables    = readushort(f)
1692        local records      = { }
1693        local unicodecid   = false
1694        local variantcid   = false
1695        local variants     = { }
1696        local duplicates   = fontdata.duplicates or { }
1697        fontdata.duplicates = duplicates
1698        for i=1,noftables do
1699            local platform = readushort(f)
1700            local encoding = readushort(f)
1701            local offset   = readulong(f)
1702            local record   = records[platform]
1703            if not record then
1704                records[platform] = {
1705                    [encoding] = {
1706                        offsets = { offset },
1707                        formats = { },
1708                    }
1709                }
1710            else
1711                local subtables = record[encoding]
1712                if not subtables then
1713                    record[encoding] = {
1714                        offsets = { offset },
1715                        formats = { },
1716                    }
1717                else
1718                    local offsets = subtables.offsets
1719                    offsets[#offsets+1] = offset
1720                end
1721            end
1722        end
1723        if trace_cmap then
1724            report("found cmaps:")
1725        end
1726        for platform, record in sortedhash(records) do
1727            local p  = platforms[platform]
1728            local e  = encodings[p]
1729            local sp = supported[platform]
1730            local ps = p or "?"
1731            if trace_cmap then
1732                if sp then
1733                    report("  platform %i: %s",platform,ps)
1734                else
1735                    report("  platform %i: %s (unsupported)",platform,ps)
1736                end
1737            end
1738            for encoding, subtables in sortedhash(record) do
1739                local se = sp and sp[encoding]
1740                local es = e and e[encoding] or "?"
1741                if trace_cmap then
1742                    if se then
1743                        report("    encoding %i: %s",encoding,es)
1744                    else
1745                        report("    encoding %i: %s (unsupported)",encoding,es)
1746                    end
1747                end
1748                local offsets = subtables.offsets
1749                local formats = subtables.formats
1750                for i=1,#offsets do
1751                    local offset = tableoffset + offsets[i]
1752                    setposition(f,offset)
1753                    formats[readushort(f)] = offset
1754                end
1755                record[encoding] = formats
1756                if trace_cmap then
1757                    local list = sortedkeys(formats)
1758                    for i=1,#list do
1759                        if not (se and se[list[i]]) then
1760                            list[i] = list[i] .. " (unsupported)"
1761                        end
1762                    end
1763                    report("      formats: % t",list)
1764                end
1765            end
1766        end
1767        --
1768        local ok = false
1769        for i=1,#sequence do
1770            local si = sequence[i]
1771            local sp, se, sf = si[1], si[2], si[3]
1772            if checkcmap(f,fontdata,records,sp,se,sf) > 0 then
1773                ok = true
1774            end
1775        end
1776        if not ok then
1777            report("no useable unicode cmap found")
1778        end
1779        --
1780        fontdata.cidmaps = {
1781            version   = version,
1782            noftables = noftables,
1783            records   = records,
1784        }
1785    else
1786        fontdata.cidmaps = { }
1787    end
1788end
1789
1790-- The glyf table depends on the loca table. We have one entry to much in the locations table (the
1791-- last one is a dummy) because we need to calculate the size of a glyph blob from the delta,
1792-- although we not need it in our usage (yet). We can remove the locations table when we're done.
1793
1794function readers.loca(f,fontdata,specification)
1795    reportskippedtable(f,fontdata,"loca",specification.glyphs)
1796end
1797
1798function readers.glyf(f,fontdata,specification) -- part goes to cff module
1799    reportskippedtable(f,fontdata,"glyf",specification.glyphs)
1800end
1801
1802-- The MicroSoft variant is pretty clean and is supported (implemented elsewhere)
1803-- just because I wanted to see how such a font looks like.
1804
1805function readers.colr(f,fontdata,specification)
1806    reportskippedtable(f,fontdata,"colr",specification.glyphs)
1807end
1808function readers.cpal(f,fontdata,specification)
1809    reportskippedtable(f,fontdata,"cpal",specification.glyphs)
1810end
1811
1812-- This one is also supported, if only because I could locate a proper font for
1813-- testing.
1814
1815function readers.svg(f,fontdata,specification)
1816    reportskippedtable(f,fontdata,"svg",specification.glyphs)
1817end
1818
1819-- There is a font from apple to test the next one. Will there be more? Anyhow,
1820-- it's relatively easy to support, so I did it.
1821
1822function readers.sbix(f,fontdata,specification)
1823    reportskippedtable(f,fontdata,"sbix",specification.glyphs)
1824end
1825
1826-- I'm only willing to look into the next variant if I see a decent and complete (!)
1827-- font and more can show up. It makes no sense to waste time on ideas. Okay, the
1828-- apple font also has these tables.
1829
1830function readers.cbdt(f,fontdata,specification)
1831    reportskippedtable(f,fontdata,"cbdt",specification.glyphs)
1832end
1833function readers.cblc(f,fontdata,specification)
1834    reportskippedtable(f,fontdata,"cblc",specification.glyphs)
1835end
1836function readers.ebdt(f,fontdata,specification)
1837    reportskippedtable(f,fontdata,"ebdt",specification.glyphs)
1838end
1839function readers.ebsc(f,fontdata,specification)
1840    reportskippedtable(f,fontdata,"ebsc",specification.glyphs)
1841end
1842function readers.eblc(f,fontdata,specification)
1843    reportskippedtable(f,fontdata,"eblc",specification.glyphs)
1844end
1845
1846-- Here we have a table that we really need for later processing although a more advanced gpos table
1847-- can also be available. Todo: we need a 'fake' lookup for this (analogue to ff).
1848
1849function readers.kern(f,fontdata,specification)
1850    local tableoffset = gotodatatable(f,fontdata,"kern",specification.kerns)
1851    if tableoffset then
1852        local version   = readushort(f)
1853        local noftables = readushort(f)
1854        for i=1,noftables do
1855            local version  = readushort(f)
1856            local length   = readushort(f)
1857            local coverage = readushort(f)
1858            -- bit 8-15 of coverage: format 0 or 2
1859            local format   = rshift(coverage,8) -- is this ok?
1860            if format == 0 then
1861                local nofpairs      = readushort(f)
1862                local searchrange   = readushort(f)
1863                local entryselector = readushort(f)
1864                local rangeshift    = readushort(f)
1865                local kerns  = { }
1866                local glyphs = fontdata.glyphs
1867                for i=1,nofpairs do
1868                    local left  = readushort(f)
1869                    local right = readushort(f)
1870                    local kern  = readfword(f)
1871                    local glyph = glyphs[left]
1872                    local kerns = glyph.kerns
1873                    if kerns then
1874                        kerns[right] = kern
1875                    else
1876                        glyph.kerns = { [right] = kern }
1877                    end
1878                end
1879            elseif format == 2 then
1880                report("todo: kern classes")
1881            else
1882                report("todo: kerns")
1883            end
1884        end
1885    end
1886end
1887
1888function readers.gdef(f,fontdata,specification)
1889    reportskippedtable(f,fontdata,"gdef",specification.details)
1890end
1891
1892function readers.gsub(f,fontdata,specification)
1893    reportskippedtable(f,fontdata,"gsub",specification.details)
1894end
1895
1896function readers.gpos(f,fontdata,specification)
1897    reportskippedtable(f,fontdata,"gpos",specification.details)
1898end
1899
1900function readers.math(f,fontdata,specification)
1901    reportskippedtable(f,fontdata,"math",specification.details)
1902end
1903
1904-- Now comes the loader. The order of reading these matters as we need to know
1905-- some properties in order to read following tables. When details is true we also
1906-- initialize the glyphs data.
1907
1908local function getinfo(maindata,sub,platformnames,rawfamilynames,metricstoo,instancenames)
1909    local fontdata = sub and maindata.subfonts and maindata.subfonts[sub] or maindata
1910    local names    = fontdata.names
1911    local info     = nil
1912    if names then
1913        local metrics        = fontdata.windowsmetrics or { }
1914        local postscript     = fontdata.postscript     or { }
1915        local fontheader     = fontdata.fontheader     or { }
1916        local cffinfo        = fontdata.cffinfo        or { }
1917        local verticalheader = fontdata.verticalheader or { }
1918        local filename       = fontdata.filename
1919        local weight         = getname(fontdata,"weight") or (cffinfo and cffinfo.weight) or (metrics and metrics.weight)
1920        local width          = getname(fontdata,"width")  or (cffinfo and cffinfo.width ) or (metrics and metrics.width )
1921        local fontname       = getname(fontdata,"postscriptname")
1922        local fullname       = getname(fontdata,"fullname")
1923        local family         = getname(fontdata,"family")
1924        local subfamily      = getname(fontdata,"subfamily")
1925        local familyname     = getname(fontdata,"typographicfamily")
1926        local subfamilyname  = getname(fontdata,"typographicsubfamily")
1927        local compatiblename = getname(fontdata,"compatiblefullname") -- kind of useless
1928        if rawfamilynames then
1929            -- for PG (for now, as i need to check / adapt context to catch a no-fallback case)
1930        else
1931            if not    familyname then    familyname =    family end
1932            if not subfamilyname then subfamilyname = subfamily end
1933        end
1934        if platformnames then
1935            platformnames = fontdata.platformnames
1936        end
1937        if instancenames then
1938            local variabledata = fontdata.variabledata
1939            if variabledata then
1940                local instances = variabledata and variabledata.instances
1941                if instances then
1942                    instancenames = { }
1943                    for i=1,#instances do
1944                        instancenames[i] = lower(stripstring(instances[i].subfamily))
1945                    end
1946                else
1947                    instancenames = nil
1948                end
1949            else
1950                instancenames = nil
1951            end
1952        end
1953        info = { -- we inherit some inconsistencies/choices from ff
1954            subfontindex   = fontdata.subfontindex or sub or 0,
1955         -- filename       = filename,
1956            version        = getname(fontdata,"version"),
1957         -- format         = fontdata.format,
1958            fontname       = fontname,
1959            fullname       = fullname,
1960         -- cfffullname    = cff.fullname,
1961            family         = family,
1962            subfamily      = subfamily,
1963            familyname     = familyname,
1964            subfamilyname  = subfamilyname,
1965            compatiblename = compatiblename,
1966            weight         = weight and lower(weight),
1967            width          = width and lower(width),
1968            pfmweight      = metrics.weightclass or 400, -- will become weightclass
1969            pfmwidth       = metrics.widthclass or 5, -- will become widthclass
1970            panosewidth    = metrics.panosewidth,
1971            panoseweight   = metrics.panoseweight,
1972            fstype         = metrics.fstype or 0, -- embedding, subsetting and editing
1973            italicangle    = postscript.italicangle or 0,
1974            units          = fontheader.units or 0,
1975            designsize     = fontdata.designsize,
1976            minsize        = fontdata.minsize,
1977            maxsize        = fontdata.maxsize,
1978            boundingbox    = fontheader and { fontheader.xmin or 0, fontheader.ymin or 0, fontheader.xmax or 0, fontheader.ymax or 0 } or nil,
1979            monospaced     = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
1980            averagewidth   = metrics.averagewidth,
1981            xheight        = metrics.xheight, -- can be missing
1982            capheight      = metrics.capheight or fontdata.maxy, -- can be missing
1983            ascender       = metrics.typoascender,
1984            descender      = metrics.typodescender,
1985            ascent         = metrics.winascent,  -- these might be more reliable
1986            descent        = metrics.windescent, -- these might be more reliable
1987            platformnames  = platformnames or nil,
1988            instancenames  = instancenames or nil,
1989            tableoffsets   = fontdata.tableoffsets,
1990            defaultvheight = (verticalheader.ascender or 0) - (verticalheader.descender or 0)
1991        }
1992      -- print(fontname,fontheader.macstyle) : maybe for italic
1993        if metricstoo then
1994            local keys = {
1995                "version",
1996                "ascender", "descender", "linegap",
1997             -- "caretoffset", "caretsloperise", "caretsloperun",
1998                "maxadvancewidth", "maxadvanceheight", "maxextent",
1999             -- "metricdataformat",
2000                "minbottomsidebearing", "mintopsidebearing",
2001            }
2002            local h = fontdata.horizontalheader or { }
2003            local v = fontdata.verticalheader   or { }
2004            if h then
2005                local th = { }
2006                local tv = { }
2007                for i=1,#keys do
2008                    local key = keys[i]
2009                    th[key] = h[key] or 0
2010                    tv[key] = v[key] or 0
2011                end
2012                info.horizontalmetrics = th
2013                info.verticalmetrics   = tv
2014            end
2015        end
2016    elseif n then
2017        info = {
2018            filename = fontdata.filename,
2019            comment  = "there is no info for subfont " .. n,
2020        }
2021    else
2022        info = {
2023            filename = fontdata.filename,
2024            comment  = "there is no info",
2025        }
2026    end
2027 -- inspect(info)
2028    return info
2029end
2030
2031local function loadtables(f,specification,offset)
2032    if offset then
2033        setposition(f,offset)
2034    end
2035    local tables   = { }
2036    local basename = file.basename(specification.filename)
2037    local filesize = specification.filesize
2038    local filetime = specification.filetime
2039    local fontdata = { -- some can/will go
2040        filename      = basename,
2041        filesize      = filesize,
2042        filetime      = filetime,
2043        version       = readstring(f,4),
2044        noftables     = readushort(f),
2045        searchrange   = readushort(f), -- not needed
2046        entryselector = readushort(f), -- not needed
2047        rangeshift    = readushort(f), -- not needed
2048        tables        = tables,
2049        foundtables   = false,
2050    }
2051    for i=1,fontdata.noftables do
2052        local tag      = lower(stripstring(readstring(f,4)))
2053     -- local checksum = readulong(f) -- not used
2054        local checksum = readushort(f) * 0x10000 + readushort(f)
2055        local offset   = readulong(f)
2056        local length   = readulong(f)
2057        if offset + length > filesize then
2058            report("bad %a table in file %a",tag,basename)
2059        end
2060        tables[tag] = {
2061            checksum = checksum,
2062            offset   = offset,
2063            length   = length,
2064        }
2065    end
2066-- inspect(tables)
2067    fontdata.foundtables = sortedkeys(tables)
2068    if tables.cff or tables.cff2 then
2069        fontdata.format = "opentype"
2070    else
2071        fontdata.format = "truetype"
2072    end
2073    return fontdata, tables
2074end
2075
2076local function prepareglyps(fontdata)
2077    local glyphs = setmetatableindex(function(t,k)
2078        local v = {
2079            -- maybe more defaults
2080            index = k,
2081        }
2082        t[k] = v
2083        return v
2084    end)
2085    fontdata.glyphs  = glyphs
2086    fontdata.mapping = { }
2087end
2088
2089local function readtable(tag,f,fontdata,specification,...)
2090    local reader = readers[tag]
2091    if reader then
2092        reader(f,fontdata,specification,...)
2093    end
2094end
2095
2096local function readdata(f,offset,specification)
2097
2098    local fontdata, tables = loadtables(f,specification,offset)
2099
2100    if specification.glyphs then
2101        prepareglyps(fontdata)
2102    end
2103
2104    fontdata.temporary = { }
2105
2106    readtable("name",f,fontdata,specification)
2107
2108    local askedname = specification.askedname
2109    if askedname then
2110        local fullname  = getname(fontdata,"fullname") or ""
2111        local cleanname = gsub(askedname,"[^a-zA-Z0-9]","")
2112        local foundname = gsub(fullname,"[^a-zA-Z0-9]","")
2113        if lower(cleanname) ~= lower(foundname) then
2114            return -- keep searching
2115        end
2116    end
2117
2118    readtable("stat",f,fontdata,specification)
2119    readtable("avar",f,fontdata,specification)
2120    readtable("fvar",f,fontdata,specification)
2121
2122    local variabledata = fontdata.variabledata
2123
2124    if variabledata then
2125        local instances = variabledata.instances
2126        local axis      = variabledata.axis
2127        if axis and (not instances or #instances == 0) then
2128            instances = { }
2129            variabledata.instances = instances
2130            local function add(n,subfamily,value)
2131                local values = { }
2132                for i=1,#axis do
2133                    local a = axis[i]
2134                    values[i] = {
2135                        axis  = a.tag,
2136                        value = i == n and value or a.default,
2137                    }
2138                end
2139                instances[#instances+1] = {
2140                    subfamily = subfamily,
2141                    values    = values,
2142                }
2143            end
2144            for i=1,#axis do
2145                local a   = axis[i]
2146                local tag = a.tag
2147                add(i,"default"..tag,a.default)
2148                add(i,"minimum"..tag,a.minimum)
2149                add(i,"maximum"..tag,a.maximum)
2150            end
2151         -- report("%i fake instances added",#instances)
2152        end
2153    end
2154    if not specification.factors then
2155        local instance = specification.instance
2156        if type(instance) == "string" then
2157            local factors = helpers.getfactors(fontdata,instance)
2158            if factors then
2159                specification.factors = factors
2160                fontdata.factors  = factors
2161                fontdata.instance = instance
2162                report("user instance: %s, factors: % t",instance,factors)
2163            else
2164                report("user instance: %s, bad factors",instance)
2165            end
2166        end
2167    end
2168
2169    if not fontdata.factors then
2170        if fontdata.variabledata then
2171            local factors = helpers.getfactors(fontdata,true)
2172            if factors then
2173                specification.factors = factors
2174                fontdata.factors = factors
2175         --     report("factors: % t",factors)
2176         -- else
2177         --     report("bad factors")
2178            end
2179        else
2180         -- report("unknown instance")
2181        end
2182    end
2183
2184    readtable("os/2",f,fontdata,specification)
2185    readtable("head",f,fontdata,specification)
2186    readtable("maxp",f,fontdata,specification)
2187    readtable("hhea",f,fontdata,specification)
2188    readtable("vhea",f,fontdata,specification)
2189    readtable("hmtx",f,fontdata,specification)
2190    readtable("vmtx",f,fontdata,specification)
2191    readtable("vorg",f,fontdata,specification)
2192    readtable("post",f,fontdata,specification)
2193
2194    readtable("mvar",f,fontdata,specification)
2195    readtable("hvar",f,fontdata,specification)
2196    readtable("vvar",f,fontdata,specification)
2197
2198    readtable("gdef",f,fontdata,specification)
2199
2200    readtable("cff" ,f,fontdata,specification)
2201    readtable("cff2",f,fontdata,specification)
2202
2203    readtable("cmap",f,fontdata,specification)
2204    readtable("loca",f,fontdata,specification) -- maybe load it in glyf
2205    readtable("glyf",f,fontdata,specification) -- loads gvar
2206
2207    readtable("colr",f,fontdata,specification)
2208    readtable("cpal",f,fontdata,specification)
2209
2210    readtable("svg" ,f,fontdata,specification)
2211
2212    readtable("sbix",f,fontdata,specification)
2213
2214    readtable("cbdt",f,fontdata,specification)
2215    readtable("cblc",f,fontdata,specification)
2216    readtable("ebdt",f,fontdata,specification)
2217    readtable("eblc",f,fontdata,specification)
2218
2219    readtable("kern",f,fontdata,specification)
2220    readtable("gsub",f,fontdata,specification)
2221    readtable("gpos",f,fontdata,specification)
2222
2223    readtable("math",f,fontdata,specification)
2224
2225    fontdata.locations    = nil
2226    fontdata.cidmaps      = nil
2227    fontdata.dictionaries = nil
2228 -- fontdata.cff          = nil
2229
2230    if specification.tableoffsets then
2231        fontdata.tableoffsets = tables
2232        setmetatableindex(tables, {
2233            version       = fontdata.version,
2234            noftables     = fontdata.noftables,
2235            searchrange   = fontdata.searchrange,
2236            entryselector = fontdata.entryselector,
2237            rangeshift    = fontdata.rangeshift,
2238        })
2239    end
2240    return fontdata
2241end
2242
2243local function loadfontdata(specification)
2244    local filename = specification.filename
2245    local fileattr = lfs.attributes(filename)
2246    local filesize = fileattr and fileattr.size or 0
2247    local filetime = fileattr and fileattr.modification or 0
2248    local f = openfile(filename,true) -- zero based
2249    if not f then
2250        report("unable to open %a",filename)
2251    elseif filesize == 0 then
2252        report("empty file %a",filename)
2253        closefile(f)
2254    else
2255        specification.filesize = filesize
2256        specification.filetime = filetime
2257        local version  = readstring(f,4)
2258        local fontdata = nil
2259        if version == "OTTO" or version == "true" or version == "\0\1\0\0" then
2260            fontdata = readdata(f,0,specification)
2261        elseif version == "ttcf" then
2262            local subfont     = tonumber(specification.subfont)
2263            local ttcversion  = readulong(f)
2264            local nofsubfonts = readulong(f)
2265            local offsets     = readcardinaltable(f,nofsubfonts,ulong)
2266            if subfont then -- a number of not
2267                if subfont >= 1 and subfont <= nofsubfonts then
2268                    fontdata = readdata(f,offsets[subfont],specification)
2269                else
2270                    report("no subfont %a in file %a",subfont,filename)
2271                end
2272            else
2273                subfont = specification.subfont
2274                if type(subfont) == "string" and subfont ~= "" then
2275                    specification.askedname = subfont
2276                    for i=1,nofsubfonts do
2277                        fontdata = readdata(f,offsets[i],specification)
2278                        if fontdata then
2279                            fontdata.subfontindex = i
2280                            report("subfont named %a has index %a",subfont,i)
2281                            break
2282                        end
2283                    end
2284                    if not fontdata then
2285                        report("no subfont named %a",subfont)
2286                    end
2287                else
2288                    local subfonts = { }
2289                    fontdata = {
2290                        filename    = filename,
2291                        filesize    = filesize,
2292                        filetime    = filetime,
2293                        version     = version,
2294                        subfonts    = subfonts,
2295                        ttcversion  = ttcversion,
2296                        nofsubfonts = nofsubfonts,
2297                    }
2298                    for i=1,nofsubfonts do
2299                        subfonts[i] = readdata(f,offsets[i],specification)
2300                    end
2301                end
2302            end
2303        else
2304            report("unknown version %a in file %a",version,filename)
2305        end
2306        closefile(f)
2307        return fontdata or { }
2308    end
2309end
2310
2311local function loadfont(specification,n,instance)
2312    if type(specification) == "string" then
2313        specification = {
2314            filename    = specification,
2315            info        = true, -- always true (for now)
2316            details     = true,
2317            glyphs      = true,
2318            shapes      = true,
2319            kerns       = true,
2320            variable    = true,
2321            globalkerns = true,
2322            lookups     = true,
2323            -- true or number:
2324            subfont     = n or true,
2325            tounicode   = false,
2326            instance    = instance
2327        }
2328    end
2329    -- if shapes only then
2330    if specification.shapes or specification.lookups or specification.kerns then
2331        specification.glyphs = true
2332    end
2333    if specification.glyphs then
2334        specification.details = true
2335    end
2336    if specification.details then
2337        specification.info = true -- not really used any more
2338    end
2339    if specification.platformnames then
2340        specification.platformnames = true -- not really used any more
2341    end
2342    if specification.instance or instance then
2343        specification.variable = true
2344        specification.instance = specification.instance or instance
2345    end
2346    local function message(str)
2347        report("fatal error in file %a: %s\n%s",specification.filename,str,debug and debug.traceback())
2348    end
2349    local ok, result = xpcall(loadfontdata,message,specification)
2350    if ok then
2351        return result
2352    end
2353--     return loadfontdata(specification)
2354end
2355
2356-- we need even less, but we can have a 'detail' variant
2357
2358function readers.loadshapes(filename,n,instance,streams)
2359    local fontdata = loadfont {
2360        filename = filename,
2361        shapes   = true,
2362        streams  = streams,
2363        variable = true,
2364        subfont  = n,
2365        instance = instance,
2366    }
2367    if fontdata then
2368        -- easier on luajit but still we can hit the 64 K stack constants issue
2369        for k, v in next, fontdata.glyphs do
2370            v.class = nil
2371            v.index = nil
2372            v.math  = nil
2373         -- v.name  = nil
2374        end
2375        local names = fontdata.names
2376        if names then
2377            for k, v in next, names do
2378                names[k] = fullstrip(v.content)
2379            end
2380        end
2381    end
2382    return fontdata and {
2383     -- version          = 0.123 -- todo
2384        filename         = filename,
2385        format           = fontdata.format,
2386        glyphs           = fontdata.glyphs,
2387        units            = fontdata.fontheader.units,
2388        cffinfo          = fontdata.cffinfo,
2389        fontheader       = fontdata.fontheader,
2390        horizontalheader = fontdata.horizontalheader,
2391        verticalheader   = fontdata.verticalheader,
2392        maximumprofile   = fontdata.maximumprofile,
2393        names            = fontdata.names,
2394        postscript       = fontdata.postscript,
2395    } or {
2396        filename = filename,
2397        format   = "unknown",
2398        glyphs   = { },
2399        units    = 0,
2400    }
2401end
2402
2403function readers.loadfont(filename,n,instance)
2404    local fontdata = loadfont {
2405        filename    = filename,
2406        glyphs      = true,
2407        shapes      = false,
2408        lookups     = true,
2409        variable    = true,
2410     -- kerns       = true,
2411     -- globalkerns = true, -- only for testing, e.g. cambria has different gpos and kern
2412        subfont     = n,
2413        instance    = instance,
2414    }
2415    if fontdata then
2416        return {
2417            tableversion  = tableversion,
2418            creator       = "context mkiv",
2419            size          = fontdata.filesize,
2420            time          = fontdata.filetime,
2421            glyphs        = fontdata.glyphs,
2422            descriptions  = fontdata.descriptions,
2423            format        = fontdata.format,
2424            goodies       = { },
2425            metadata      = getinfo(fontdata,n,false,false,true,true), -- no platformnames here !
2426            properties    = {
2427                hasitalics    = fontdata.hasitalics or false,
2428                maxcolorclass = fontdata.maxcolorclass,
2429                hascolor      = fontdata.hascolor or false,
2430                instance      = fontdata.instance,
2431                factors       = fontdata.factors,
2432                nofsubfonts   = fontdata.subfonts and #fontdata.subfonts or nil,
2433            },
2434            resources     = {
2435             -- filename        = fontdata.filename,
2436                filename        = filename,
2437                private         = privateoffset,
2438                duplicates      = fontdata.duplicates  or { },
2439                features        = fontdata.features    or { }, -- we need to add these in the loader
2440                sublookups      = fontdata.sublookups  or { }, -- we need to add these in the loader
2441                marks           = fontdata.marks       or { }, -- we need to add these in the loader
2442                markclasses     = fontdata.markclasses or { }, -- we need to add these in the loader
2443                marksets        = fontdata.marksets    or { }, -- we need to add these in the loader
2444                sequences       = fontdata.sequences   or { }, -- we need to add these in the loader
2445                variants        = fontdata.variants, -- variant -> unicode -> glyph
2446                version         = getname(fontdata,"version"),
2447                cidinfo         = fontdata.cidinfo,
2448                mathconstants   = fontdata.mathconstants,
2449                colorpalettes   = fontdata.colorpalettes,
2450                colorpaintdata  = fontdata.colorpaintdata,
2451                colorpaintlist  = fontdata.colorpaintlist,
2452                colorlinesdata  = fontdata.colorlinesdata,
2453                coloraffinedata = fontdata.coloraffinedata,
2454                svgshapes       = fontdata.svgshapes,
2455                pngshapes       = fontdata.pngshapes,
2456                variabledata    = fontdata.variabledata,
2457                foundtables     = fontdata.foundtables,
2458            },
2459        }
2460    end
2461end
2462
2463function readers.getinfo(filename,specification) -- string, nil|number|table
2464    -- platformnames is optional and not used by context (a too unpredictable mess
2465    -- that only add to the confusion) .. so it's only for checking things
2466    local subfont        = nil
2467    local platformnames  = false
2468    local rawfamilynames = false
2469    local instancenames  = true
2470    local tableoffsets   = false
2471    if type(specification) == "table" then
2472        subfont        = tonumber(specification.subfont)
2473        platformnames  = specification.platformnames
2474        rawfamilynames = specification.rawfamilynames
2475        tableoffsets   = specification.tableoffsets
2476    else
2477        subfont       = tonumber(specification)
2478    end
2479    local fontdata = loadfont {
2480        filename       = filename,
2481        details        = true,
2482        platformnames  = platformnames,
2483        instancenames  = true,
2484        tableoffsets   = tableoffsets,
2485     -- rawfamilynames = rawfamilynames,
2486    }
2487    if fontdata then
2488        local subfonts = fontdata.subfonts
2489        if not subfonts then
2490            return getinfo(fontdata,nil,platformnames,rawfamilynames,false,instancenames)
2491        elseif not subfont then
2492            local info = { }
2493            for i=1,#subfonts do
2494                info[i] = getinfo(fontdata,i,platformnames,rawfamilynames,false,instancenames)
2495            end
2496            return info
2497        elseif subfont >= 1 and subfont <= #subfonts then
2498            return getinfo(fontdata,subfont,platformnames,rawfamilynames,false,instancenames)
2499        else
2500            return {
2501                filename = filename,
2502                comment  = "there is no subfont " .. subfont .. " in this file"
2503            }
2504        end
2505    else
2506        return {
2507            filename = filename,
2508            comment  = "the file cannot be opened for reading",
2509        }
2510    end
2511end
2512
2513function readers.rehash() -- fontdata,hashmethod
2514    report("the %a helper is not yet implemented","rehash")
2515end
2516
2517function readers.checkhash() --fontdata
2518    report("the %a helper is not yet implemented","checkhash")
2519end
2520
2521function readers.pack() -- fontdata,hashmethod
2522    report("the %a helper is not yet implemented","pack")
2523end
2524
2525function readers.unpack(fontdata)
2526    report("the %a helper is not yet implemented","unpack")
2527end
2528
2529function readers.expand(fontdata)
2530    report("the %a helper is not yet implemented","unpack")
2531end
2532
2533function readers.compact(fontdata)
2534    report("the %a helper is not yet implemented","compact")
2535end
2536
2537function readers.condense(fontdata)
2538    report("the %a helper is not yet implemented","condense")
2539end
2540
2541-- plug in
2542
2543local extenders = { }
2544
2545function readers.registerextender(extender)
2546    extenders[#extenders+1] = extender
2547end
2548
2549function readers.extend(fontdata)
2550    for i=1,#extenders do
2551        local extender = extenders[i]
2552        local name     = extender.name or "unknown"
2553        local action   = extender.action
2554        if action then
2555            action(fontdata)
2556        end
2557    end
2558end
2559