font-otr.lua /size: 92 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['font-otr'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to font-ini.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- When looking into a cid font relates issue in the ff library I wondered if
11-- it made sense to use Lua to filter the information from the otf and ttf
12-- files. Quite some ff code relates to special fonts and in practice we only
13-- use rather normal opentype fonts.
14--
15-- The code here is based on the documentation (and examples) at the microsoft
16-- website. The code will be extended and improved stepwise. After some experiments
17-- I decided to convert to a format more suitable for the context font handler
18-- because it makes no sense to rehash all those lookups again.
19--
20-- Currently we can use this code for getting basic info about the font, loading
21-- shapes and loading the extensive table. I'm not sure if I will provide a ff
22-- compatible output as well (We're not that far from it as currently I can load
23-- all data reasonable fast.)
24
25-- We can omit redundant glyphs names i.e. ones that match the agl or
26-- are just a unicode string but it doesn't save that much. It will be an option
27-- some day.
28
29-- Optimizing the widths will be done anyway as it save quite some on a cjk font
30-- and the existing (old) code if okay.
31
32-- todo: more messages (only if really needed)
33--
34-- considered, in math:
35--
36-- start -> first (so we can skip the first same-size one)
37-- end   -> last
38--
39-- Widths and weights are kind of messy: for instance lmmonolt has a pfmweight of
40-- 400 while it should be 300. So, for now we mostly stick to the old compromis.
41
42-- We don't really need all those language tables so they might be dropped some
43-- day.
44
45-- The new reader is faster on some aspects and slower on other. The memory footprint
46-- is lower. The string reader is a  bit faster than the file reader. The new reader
47-- gives more efficient tables and has bit more analysis. In practice these times are
48-- not that relevant because we cache. The otf files take a it more time because we
49-- need to calculate the boundingboxes. In theory the processing of text should be
50-- somewhat faster especially for complex fonts with many lookups.
51--
52--                        old    new    str reader
53-- lmroman12-regular.otf  0.103  0.203  0.195
54-- latinmodern-math.otf   0.454  0.768  0.712
55-- husayni.ttf            1.142  1.526  1.259
56--
57-- If there is demand I will consider making a ff compatible table dumper but it's
58-- probably more fun to provide a way to show features applied.
59
60-- I experimented a bit with f:readbyte(n) and f:readshort() and so and it is indeed
61-- faster but it might not be the real bottleneck as we still need to juggle data. It
62-- is probably more memory efficient as no intermediate strings are involved.
63
64-- if not characters then
65--     require("char-def")
66--     require("char-ini")
67-- end
68
69local next, type, tonumber, rawget = next, type, tonumber, rawget
70local byte, lower, char, gsub = string.byte, string.lower, string.char, string.gsub
71local fullstrip = string.fullstrip
72local floor, round = math.floor, math.round
73local P, R, S, C, Cs, Cc, Ct, Carg, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Carg, lpeg.Cmt
74local lpegmatch = lpeg.match
75local rshift = bit32.rshift
76
77local setmetatableindex  = table.setmetatableindex
78local sortedkeys         = table.sortedkeys
79local sortedhash         = table.sortedhash
80local stripstring        = string.nospaces
81local utf16_to_utf8_be   = utf.utf16_to_utf8_be
82
83local report             = logs.reporter("otf reader")
84local report_cmap        = logs.reporter("otf reader","cmap")
85
86local trace_cmap         = false  trackers.register("otf.cmap",         function(v) trace_cmap         = v end)
87local trace_cmap_details = false  trackers.register("otf.cmap.details", function(v) trace_cmap_details = v end)
88
89fonts                    = fonts or { }
90local handlers           = fonts.handlers or { }
91fonts.handlers           = handlers
92local otf                = handlers.otf or { }
93handlers.otf             = otf
94local readers            = otf.readers or { }
95otf.readers              = readers
96
97----- streamreader       = utilities.streams -- faster on big files (not true any longer)
98local streamreader       = utilities.files   -- faster on identify (also uses less memory)
99local streamwriter       = utilities.files
100
101readers.streamreader     = streamreader
102readers.streamwriter     = streamwriter
103
104local openfile           = streamreader.open
105local closefile          = streamreader.close
106----- skipbytes          = streamreader.skip
107local setposition        = streamreader.setposition
108local skipshort          = streamreader.skipshort
109local readbytes          = streamreader.readbytes
110local readstring         = streamreader.readstring
111local readbyte           = streamreader.readcardinal1  --  8-bit unsigned integer
112local readushort         = streamreader.readcardinal2  -- 16-bit unsigned integer
113local readuint           = streamreader.readcardinal3  -- 24-bit unsigned integer
114local readulong          = streamreader.readcardinal4  -- 32-bit unsigned integer
115----- readchar           = streamreader.readinteger1   --  8-bit   signed integer
116local readshort          = streamreader.readinteger2   -- 16-bit   signed integer
117local readlong           = streamreader.readinteger4   -- 32-bit unsigned integer
118local readfixed          = streamreader.readfixed4
119local read2dot14         = streamreader.read2dot14     -- 16-bit signed fixed number with the low 14 bits of fraction (2.14) (F2DOT14)
120local readfword          = readshort                   -- 16-bit   signed integer that describes a quantity in FUnits
121local readufword         = readushort                  -- 16-bit unsigned integer that describes a quantity in FUnits
122local readoffset         = readushort
123local readcardinaltable  = streamreader.readcardinaltable
124local readintegertable   = streamreader.readintegertable
125
126function streamreader.readtag(f)
127    return lower(stripstring(readstring(f,4)))
128end
129
130local short  = 2
131local ushort = 2
132local ulong  = 4
133
134directives.register("fonts.streamreader",function()
135
136    streamreader      = utilities.streams
137
138    openfile          = streamreader.open
139    closefile         = streamreader.close
140    setposition       = streamreader.setposition
141    skipshort         = streamreader.skipshort
142    readbytes         = streamreader.readbytes
143    readstring        = streamreader.readstring
144    readbyte          = streamreader.readcardinal1
145    readushort        = streamreader.readcardinal2
146    readuint          = streamreader.readcardinal3
147    readulong         = streamreader.readcardinal4
148    readshort         = streamreader.readinteger2
149    readlong          = streamreader.readinteger4
150    readfixed         = streamreader.readfixed4
151    read2dot14        = streamreader.read2dot14
152    readfword         = readshort
153    readufword        = readushort
154    readoffset        = readushort
155    readcardinaltable = streamreader.readcardinaltable
156    readintegertable  = streamreader.readintegertable
157
158    function streamreader.readtag(f)
159        return lower(stripstring(readstring(f,4)))
160    end
161
162end)
163
164-- date represented in number of seconds since 12:00 midnight, January 1, 1904. The value is represented as a
165-- signed 64-bit integer
166
167local function readlongdatetime(f)
168    local a, b, c, d, e, f, g, h = readbytes(f,8)
169    return 0x100000000 * d + 0x1000000 * e + 0x10000 * f + 0x100 * g + h
170end
171
172local tableversion    = 0.004
173readers.tableversion  = tableversion
174local privateoffset   = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF
175
176-- We have quite some data tables. We are somewhat ff compatible with names but as I used
177-- the information from the microsoft site there can be differences. Eventually I might end
178-- up with a different ordering and naming.
179
180local reservednames = { [0] =
181    "copyright",
182    "family",
183    "subfamily",
184    "uniqueid",
185    "fullname",
186    "version",
187    "postscriptname",
188    "trademark",
189    "manufacturer",
190    "designer",
191    "description", -- descriptor in ff
192    "vendorurl",
193    "designerurl",
194    "license",
195    "licenseurl",
196    "reserved",
197    "typographicfamily",    -- preffamilyname
198    "typographicsubfamily", -- prefmodifiers
199    "compatiblefullname",   -- for mac
200    "sampletext",
201    "cidfindfontname",
202    "wwsfamily",
203    "wwssubfamily",
204    "lightbackgroundpalette",
205    "darkbackgroundpalette",
206    "variationspostscriptnameprefix",
207}
208
209-- more at: https://www.microsoft.com/typography/otspec/name.htm
210
211-- setmetatableindex(reservednames,function(t,k)
212--     local v = "name_" .. k
213--     t[k] =  v
214--     return v
215-- end)
216
217local platforms = { [0] =
218    "unicode",
219    "macintosh",
220    "iso",
221    "windows",
222    "custom",
223}
224
225local encodings = {
226    -- these stay:
227    unicode = { [0] =
228        "unicode 1.0 semantics",
229        "unicode 1.1 semantics",
230        "iso/iec 10646",
231        "unicode 2.0 bmp",             -- cmap subtable formats 0, 4, 6
232        "unicode 2.0 full",            -- cmap subtable formats 0, 4, 6, 10, 12
233        "unicode variation sequences", -- cmap subtable format 14).
234        "unicode full repertoire",     -- cmap subtable formats 0, 4, 6, 10, 12, 13
235    },
236    -- these can go:
237    macintosh = { [0] =
238        "roman", "japanese", "chinese (traditional)", "korean", "arabic", "hebrew", "greek", "russian",
239        "rsymbol", "devanagari", "gurmukhi", "gujarati", "oriya", "bengali", "tamil", "telugu", "kannada",
240        "malayalam", "sinhalese", "burmese", "khmer", "thai", "laotian", "georgian", "armenian",
241        "chinese (simplified)", "tibetan", "mongolian", "geez", "slavic", "vietnamese", "sindhi",
242        "uninterpreted",
243    },
244    -- these stay:
245    iso = { [0] =
246        "7-bit ascii",
247        "iso 10646",
248        "iso 8859-1",
249    },
250    -- these stay:
251    windows = { [0] =
252        "symbol",
253        "unicode bmp", -- this is utf16
254        "shiftjis",
255        "prc",
256        "big5",
257        "wansung",
258        "johab",
259        "reserved 7",
260        "reserved 8",
261        "reserved 9",
262        "unicode ucs-4",
263    },
264    custom = {
265        --custom: 0-255 : otf windows nt compatibility mapping
266    }
267}
268
269local decoders = {
270    unicode   = { },
271    macintosh = { },
272    iso       = { },
273    windows   = {
274        -- maybe always utf16
275        ["unicode semantics"]           = utf16_to_utf8_be,
276        ["unicode bmp"]                 = utf16_to_utf8_be,
277        ["unicode full"]                = utf16_to_utf8_be,
278        ["unicode 1.0 semantics"]       = utf16_to_utf8_be,
279        ["unicode 1.1 semantics"]       = utf16_to_utf8_be,
280        ["unicode 2.0 bmp"]             = utf16_to_utf8_be,
281        ["unicode 2.0 full"]            = utf16_to_utf8_be,
282        ["unicode variation sequences"] = utf16_to_utf8_be,
283        ["unicode full repertoire"]     = utf16_to_utf8_be,
284    },
285    custom    = { },
286}
287
288-- This is bit over the top as we can just look for either windows, unicode or macintosh
289-- names (in that order). A font with no english name is probably a weird one anyway.
290
291local languages = {
292    -- these stay:
293    unicode = {
294        [  0] = "english",
295    },
296    -- english can stay:
297    macintosh = {
298        [  0] = "english",
299     -- [  1] = "french",
300     -- [  2] = "german",
301     -- [  3] = "italian",
302     -- [  4] = "dutch",
303     -- [  5] = "swedish",
304     -- [  6] = "spanish",
305     -- [  7] = "danish",
306     -- [  8] = "portuguese",
307     -- [  9] = "norwegian",
308     -- [ 10] = "hebrew",
309     -- [ 11] = "japanese",
310     -- [ 12] = "arabic",
311     -- [ 13] = "finnish",
312     -- [ 14] = "greek",
313     -- [ 15] = "icelandic",
314     -- [ 16] = "maltese",
315     -- [ 17] = "turkish",
316     -- [ 18] = "croatian",
317     -- [ 19] = "chinese (traditional)",
318     -- [ 20] = "urdu",
319     -- [ 21] = "hindi",
320     -- [ 22] = "thai",
321     -- [ 23] = "korean",
322     -- [ 24] = "lithuanian",
323     -- [ 25] = "polish",
324     -- [ 26] = "hungarian",
325     -- [ 27] = "estonian",
326     -- [ 28] = "latvian",
327     -- [ 29] = "sami",
328     -- [ 30] = "faroese",
329     -- [ 31] = "farsi/persian",
330     -- [ 32] = "russian",
331     -- [ 33] = "chinese (simplified)",
332     -- [ 34] = "flemish",
333     -- [ 35] = "irish gaelic",
334     -- [ 36] = "albanian",
335     -- [ 37] = "romanian",
336     -- [ 38] = "czech",
337     -- [ 39] = "slovak",
338     -- [ 40] = "slovenian",
339     -- [ 41] = "yiddish",
340     -- [ 42] = "serbian",
341     -- [ 43] = "macedonian",
342     -- [ 44] = "bulgarian",
343     -- [ 45] = "ukrainian",
344     -- [ 46] = "byelorussian",
345     -- [ 47] = "uzbek",
346     -- [ 48] = "kazakh",
347     -- [ 49] = "azerbaijani (cyrillic script)",
348     -- [ 50] = "azerbaijani (arabic script)",
349     -- [ 51] = "armenian",
350     -- [ 52] = "georgian",
351     -- [ 53] = "moldavian",
352     -- [ 54] = "kirghiz",
353     -- [ 55] = "tajiki",
354     -- [ 56] = "turkmen",
355     -- [ 57] = "mongolian (mongolian script)",
356     -- [ 58] = "mongolian (cyrillic script)",
357     -- [ 59] = "pashto",
358     -- [ 60] = "kurdish",
359     -- [ 61] = "kashmiri",
360     -- [ 62] = "sindhi",
361     -- [ 63] = "tibetan",
362     -- [ 64] = "nepali",
363     -- [ 65] = "sanskrit",
364     -- [ 66] = "marathi",
365     -- [ 67] = "bengali",
366     -- [ 68] = "assamese",
367     -- [ 69] = "gujarati",
368     -- [ 70] = "punjabi",
369     -- [ 71] = "oriya",
370     -- [ 72] = "malayalam",
371     -- [ 73] = "kannada",
372     -- [ 74] = "tamil",
373     -- [ 75] = "telugu",
374     -- [ 76] = "sinhalese",
375     -- [ 77] = "burmese",
376     -- [ 78] = "khmer",
377     -- [ 79] = "lao",
378     -- [ 80] = "vietnamese",
379     -- [ 81] = "indonesian",
380     -- [ 82] = "tagalong",
381     -- [ 83] = "malay (roman script)",
382     -- [ 84] = "malay (arabic script)",
383     -- [ 85] = "amharic",
384     -- [ 86] = "tigrinya",
385     -- [ 87] = "galla",
386     -- [ 88] = "somali",
387     -- [ 89] = "swahili",
388     -- [ 90] = "kinyarwanda/ruanda",
389     -- [ 91] = "rundi",
390     -- [ 92] = "nyanja/chewa",
391     -- [ 93] = "malagasy",
392     -- [ 94] = "esperanto",
393     -- [128] = "welsh",
394     -- [129] = "basque",
395     -- [130] = "catalan",
396     -- [131] = "latin",
397     -- [132] = "quenchua",
398     -- [133] = "guarani",
399     -- [134] = "aymara",
400     -- [135] = "tatar",
401     -- [136] = "uighur",
402     -- [137] = "dzongkha",
403     -- [138] = "javanese (roman script)",
404     -- [139] = "sundanese (roman script)",
405     -- [140] = "galician",
406     -- [141] = "afrikaans",
407     -- [142] = "breton",
408     -- [143] = "inuktitut",
409     -- [144] = "scottish gaelic",
410     -- [145] = "manx gaelic",
411     -- [146] = "irish gaelic (with dot above)",
412     -- [147] = "tongan",
413     -- [148] = "greek (polytonic)",
414     -- [149] = "greenlandic",
415     -- [150] = "azerbaijani (roman script)",
416    },
417    -- these can stay:
418    iso = {
419    },
420    -- english can stay:
421    windows = {
422     -- [0x0436] = "afrikaans - south africa",
423     -- [0x041c] = "albanian - albania",
424     -- [0x0484] = "alsatian - france",
425     -- [0x045e] = "amharic - ethiopia",
426     -- [0x1401] = "arabic - algeria",
427     -- [0x3c01] = "arabic - bahrain",
428     -- [0x0c01] = "arabic - egypt",
429     -- [0x0801] = "arabic - iraq",
430     -- [0x2c01] = "arabic - jordan",
431     -- [0x3401] = "arabic - kuwait",
432     -- [0x3001] = "arabic - lebanon",
433     -- [0x1001] = "arabic - libya",
434     -- [0x1801] = "arabic - morocco",
435     -- [0x2001] = "arabic - oman",
436     -- [0x4001] = "arabic - qatar",
437     -- [0x0401] = "arabic - saudi arabia",
438     -- [0x2801] = "arabic - syria",
439     -- [0x1c01] = "arabic - tunisia",
440     -- [0x3801] = "arabic - u.a.e.",
441     -- [0x2401] = "arabic - yemen",
442     -- [0x042b] = "armenian - armenia",
443     -- [0x044d] = "assamese - india",
444     -- [0x082c] = "azeri (cyrillic) - azerbaijan",
445     -- [0x042c] = "azeri (latin) - azerbaijan",
446     -- [0x046d] = "bashkir - russia",
447     -- [0x042d] = "basque - basque",
448     -- [0x0423] = "belarusian - belarus",
449     -- [0x0845] = "bengali - bangladesh",
450     -- [0x0445] = "bengali - india",
451     -- [0x201a] = "bosnian (cyrillic) - bosnia and herzegovina",
452     -- [0x141a] = "bosnian (latin) - bosnia and herzegovina",
453     -- [0x047e] = "breton - france",
454     -- [0x0402] = "bulgarian - bulgaria",
455     -- [0x0403] = "catalan - catalan",
456     -- [0x0c04] = "chinese - hong kong s.a.r.",
457     -- [0x1404] = "chinese - macao s.a.r.",
458     -- [0x0804] = "chinese - people's republic of china",
459     -- [0x1004] = "chinese - singapore",
460     -- [0x0404] = "chinese - taiwan",
461     -- [0x0483] = "corsican - france",
462     -- [0x041a] = "croatian - croatia",
463     -- [0x101a] = "croatian (latin) - bosnia and herzegovina",
464     -- [0x0405] = "czech - czech republic",
465     -- [0x0406] = "danish - denmark",
466     -- [0x048c] = "dari - afghanistan",
467     -- [0x0465] = "divehi - maldives",
468     -- [0x0813] = "dutch - belgium",
469     -- [0x0413] = "dutch - netherlands",
470     -- [0x0c09] = "english - australia",
471     -- [0x2809] = "english - belize",
472     -- [0x1009] = "english - canada",
473     -- [0x2409] = "english - caribbean",
474     -- [0x4009] = "english - india",
475     -- [0x1809] = "english - ireland",
476     -- [0x2009] = "english - jamaica",
477     -- [0x4409] = "english - malaysia",
478     -- [0x1409] = "english - new zealand",
479     -- [0x3409] = "english - republic of the philippines",
480     -- [0x4809] = "english - singapore",
481     -- [0x1c09] = "english - south africa",
482     -- [0x2c09] = "english - trinidad and tobago",
483     -- [0x0809] = "english - united kingdom",
484        [0x0409] = "english - united states",
485     -- [0x3009] = "english - zimbabwe",
486     -- [0x0425] = "estonian - estonia",
487     -- [0x0438] = "faroese - faroe islands",
488     -- [0x0464] = "filipino - philippines",
489     -- [0x040b] = "finnish - finland",
490     -- [0x080c] = "french - belgium",
491     -- [0x0c0c] = "french - canada",
492     -- [0x040c] = "french - france",
493     -- [0x140c] = "french - luxembourg",
494     -- [0x180c] = "french - principality of monoco",
495     -- [0x100c] = "french - switzerland",
496     -- [0x0462] = "frisian - netherlands",
497     -- [0x0456] = "galician - galician",
498     -- [0x0437] = "georgian -georgia",
499     -- [0x0c07] = "german - austria",
500     -- [0x0407] = "german - germany",
501     -- [0x1407] = "german - liechtenstein",
502     -- [0x1007] = "german - luxembourg",
503     -- [0x0807] = "german - switzerland",
504     -- [0x0408] = "greek - greece",
505     -- [0x046f] = "greenlandic - greenland",
506     -- [0x0447] = "gujarati - india",
507     -- [0x0468] = "hausa (latin) - nigeria",
508     -- [0x040d] = "hebrew - israel",
509     -- [0x0439] = "hindi - india",
510     -- [0x040e] = "hungarian - hungary",
511     -- [0x040f] = "icelandic - iceland",
512     -- [0x0470] = "igbo - nigeria",
513     -- [0x0421] = "indonesian - indonesia",
514     -- [0x045d] = "inuktitut - canada",
515     -- [0x085d] = "inuktitut (latin) - canada",
516     -- [0x083c] = "irish - ireland",
517     -- [0x0434] = "isixhosa - south africa",
518     -- [0x0435] = "isizulu - south africa",
519     -- [0x0410] = "italian - italy",
520     -- [0x0810] = "italian - switzerland",
521     -- [0x0411] = "japanese - japan",
522     -- [0x044b] = "kannada - india",
523     -- [0x043f] = "kazakh - kazakhstan",
524     -- [0x0453] = "khmer - cambodia",
525     -- [0x0486] = "k'iche - guatemala",
526     -- [0x0487] = "kinyarwanda - rwanda",
527     -- [0x0441] = "kiswahili - kenya",
528     -- [0x0457] = "konkani - india",
529     -- [0x0412] = "korean - korea",
530     -- [0x0440] = "kyrgyz - kyrgyzstan",
531     -- [0x0454] = "lao - lao p.d.r.",
532     -- [0x0426] = "latvian - latvia",
533     -- [0x0427] = "lithuanian - lithuania",
534     -- [0x082e] = "lower sorbian - germany",
535     -- [0x046e] = "luxembourgish - luxembourg",
536     -- [0x042f] = "macedonian (fyrom) - former yugoslav republic of macedonia",
537     -- [0x083e] = "malay - brunei darussalam",
538     -- [0x043e] = "malay - malaysia",
539     -- [0x044c] = "malayalam - india",
540     -- [0x043a] = "maltese - malta",
541     -- [0x0481] = "maori - new zealand",
542     -- [0x047a] = "mapudungun - chile",
543     -- [0x044e] = "marathi - india",
544     -- [0x047c] = "mohawk - mohawk",
545     -- [0x0450] = "mongolian (cyrillic) - mongolia",
546     -- [0x0850] = "mongolian (traditional) - people's republic of china",
547     -- [0x0461] = "nepali - nepal",
548     -- [0x0414] = "norwegian (bokmal) - norway",
549     -- [0x0814] = "norwegian (nynorsk) - norway",
550     -- [0x0482] = "occitan - france",
551     -- [0x0448] = "odia (formerly oriya) - india",
552     -- [0x0463] = "pashto - afghanistan",
553     -- [0x0415] = "polish - poland",
554     -- [0x0416] = "portuguese - brazil",
555     -- [0x0816] = "portuguese - portugal",
556     -- [0x0446] = "punjabi - india",
557     -- [0x046b] = "quechua - bolivia",
558     -- [0x086b] = "quechua - ecuador",
559     -- [0x0c6b] = "quechua - peru",
560     -- [0x0418] = "romanian - romania",
561     -- [0x0417] = "romansh - switzerland",
562     -- [0x0419] = "russian - russia",
563     -- [0x243b] = "sami (inari) - finland",
564     -- [0x103b] = "sami (lule) - norway",
565     -- [0x143b] = "sami (lule) - sweden",
566     -- [0x0c3b] = "sami (northern) - finland",
567     -- [0x043b] = "sami (northern) - norway",
568     -- [0x083b] = "sami (northern) - sweden",
569     -- [0x203b] = "sami (skolt) - finland",
570     -- [0x183b] = "sami (southern) - norway",
571     -- [0x1c3b] = "sami (southern) - sweden",
572     -- [0x044f] = "sanskrit - india",
573     -- [0x1c1a] = "serbian (cyrillic) - bosnia and herzegovina",
574     -- [0x0c1a] = "serbian (cyrillic) - serbia",
575     -- [0x181a] = "serbian (latin) - bosnia and herzegovina",
576     -- [0x081a] = "serbian (latin) - serbia",
577     -- [0x046c] = "sesotho sa leboa - south africa",
578     -- [0x0432] = "setswana - south africa",
579     -- [0x045b] = "sinhala - sri lanka",
580     -- [0x041b] = "slovak - slovakia",
581     -- [0x0424] = "slovenian - slovenia",
582     -- [0x2c0a] = "spanish - argentina",
583     -- [0x400a] = "spanish - bolivia",
584     -- [0x340a] = "spanish - chile",
585     -- [0x240a] = "spanish - colombia",
586     -- [0x140a] = "spanish - costa rica",
587     -- [0x1c0a] = "spanish - dominican republic",
588     -- [0x300a] = "spanish - ecuador",
589     -- [0x440a] = "spanish - el salvador",
590     -- [0x100a] = "spanish - guatemala",
591     -- [0x480a] = "spanish - honduras",
592     -- [0x080a] = "spanish - mexico",
593     -- [0x4c0a] = "spanish - nicaragua",
594     -- [0x180a] = "spanish - panama",
595     -- [0x3c0a] = "spanish - paraguay",
596     -- [0x280a] = "spanish - peru",
597     -- [0x500a] = "spanish - puerto rico",
598     -- [0x0c0a] = "spanish (modern sort) - spain",
599     -- [0x040a] = "spanish (traditional sort) - spain",
600     -- [0x540a] = "spanish - united states",
601     -- [0x380a] = "spanish - uruguay",
602     -- [0x200a] = "spanish - venezuela",
603     -- [0x081d] = "sweden - finland",
604     -- [0x041d] = "swedish - sweden",
605     -- [0x045a] = "syriac - syria",
606     -- [0x0428] = "tajik (cyrillic) - tajikistan",
607     -- [0x085f] = "tamazight (latin) - algeria",
608     -- [0x0449] = "tamil - india",
609     -- [0x0444] = "tatar - russia",
610     -- [0x044a] = "telugu - india",
611     -- [0x041e] = "thai - thailand",
612     -- [0x0451] = "tibetan - prc",
613     -- [0x041f] = "turkish - turkey",
614     -- [0x0442] = "turkmen - turkmenistan",
615     -- [0x0480] = "uighur - prc",
616     -- [0x0422] = "ukrainian - ukraine",
617     -- [0x042e] = "upper sorbian - germany",
618     -- [0x0420] = "urdu - islamic republic of pakistan",
619     -- [0x0843] = "uzbek (cyrillic) - uzbekistan",
620     -- [0x0443] = "uzbek (latin) - uzbekistan",
621     -- [0x042a] = "vietnamese - vietnam",
622     -- [0x0452] = "welsh - united kingdom",
623     -- [0x0488] = "wolof - senegal",
624     -- [0x0485] = "yakut - russia",
625     -- [0x0478] = "yi - prc",
626     -- [0x046a] = "yoruba - nigeria",
627    },
628    custom = {
629    },
630}
631
632local standardromanencoding = { [0] = -- taken from wikipedia
633    "notdef", ".null", "nonmarkingreturn", "space", "exclam", "quotedbl",
634    "numbersign", "dollar", "percent", "ampersand", "quotesingle", "parenleft",
635    "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
636    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
637    "nine", "colon", "semicolon", "less", "equal", "greater", "question", "at",
638    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O",
639    "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "bracketleft",
640    "backslash", "bracketright", "asciicircum", "underscore", "grave", "a", "b",
641    "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
642    "r", "s", "t", "u", "v", "w", "x", "y", "z", "braceleft", "bar",
643    "braceright", "asciitilde", "Adieresis", "Aring", "Ccedilla", "Eacute",
644    "Ntilde", "Odieresis", "Udieresis", "aacute", "agrave", "acircumflex",
645    "adieresis", "atilde", "aring", "ccedilla", "eacute", "egrave",
646    "ecircumflex", "edieresis", "iacute", "igrave", "icircumflex", "idieresis",
647    "ntilde", "oacute", "ograve", "ocircumflex", "odieresis", "otilde", "uacute",
648    "ugrave", "ucircumflex", "udieresis", "dagger", "degree", "cent", "sterling",
649    "section", "bullet", "paragraph", "germandbls", "registered", "copyright",
650    "trademark", "acute", "dieresis", "notequal", "AE", "Oslash", "infinity",
651    "plusminus", "lessequal", "greaterequal", "yen", "mu", "partialdiff",
652    "summation", "product", "pi", "integral", "ordfeminine", "ordmasculine",
653    "Omega", "ae", "oslash", "questiondown", "exclamdown", "logicalnot",
654    "radical", "florin", "approxequal", "Delta", "guillemotleft",
655    "guillemotright", "ellipsis", "nonbreakingspace", "Agrave", "Atilde",
656    "Otilde", "OE", "oe", "endash", "emdash", "quotedblleft", "quotedblright",
657    "quoteleft", "quoteright", "divide", "lozenge", "ydieresis", "Ydieresis",
658    "fraction", "currency", "guilsinglleft", "guilsinglright", "fi", "fl",
659    "daggerdbl", "periodcentered", "quotesinglbase", "quotedblbase",
660    "perthousand", "Acircumflex", "Ecircumflex", "Aacute", "Edieresis", "Egrave",
661    "Iacute", "Icircumflex", "Idieresis", "Igrave", "Oacute", "Ocircumflex",
662    "apple", "Ograve", "Uacute", "Ucircumflex", "Ugrave", "dotlessi",
663    "circumflex", "tilde", "macron", "breve", "dotaccent", "ring", "cedilla",
664    "hungarumlaut", "ogonek", "caron", "Lslash", "lslash", "Scaron", "scaron",
665    "Zcaron", "zcaron", "brokenbar", "Eth", "eth", "Yacute", "yacute", "Thorn",
666    "thorn", "minus", "multiply", "onesuperior", "twosuperior", "threesuperior",
667    "onehalf", "onequarter", "threequarters", "franc", "Gbreve", "gbreve",
668    "Idotaccent", "Scedilla", "scedilla", "Cacute", "cacute", "Ccaron", "ccaron",
669    "dcroat",
670}
671
672local weights = {
673    [100] = "thin",
674    [200] = "extralight",
675    [300] = "light",
676    [400] = "normal",
677    [500] = "medium",
678    [600] = "semibold", -- demi demibold
679    [700] = "bold",
680    [800] = "extrabold",
681    [900] = "black",
682}
683
684local widths = {
685    "ultracondensed",
686    "extracondensed",
687    "condensed",
688    "semicondensed",
689    "normal",
690    "semiexpanded",
691    "expanded",
692    "extraexpanded",
693    "ultraexpanded",
694}
695
696setmetatableindex(weights, function(t,k)
697    local r = floor((k + 50) / 100) * 100
698    local v = (r > 900 and "black") or rawget(t,r) or "normal"
699    return v
700end)
701
702setmetatableindex(widths,function(t,k)
703    return "normal"
704end)
705
706local panoseweights = { [0] =
707    "normal",
708    "normal",
709    "verylight",
710    "light",
711    "thin",
712    "book",
713    "medium",
714    "demi",
715    "bold",
716    "heavy",
717    "black",
718}
719
720local panosewidths = { [0] =
721    "normal",
722    "normal",
723    "normal",
724    "normal",
725    "normal",
726    "expanded",
727    "condensed",
728    "veryexpanded",
729    "verycondensed",
730    "monospaced",
731}
732
733-- We implement a reader per table.
734
735-- helper
736
737local helpers   = { }
738readers.helpers = helpers
739
740local function gotodatatable(f,fontdata,tag,criterium)
741    if criterium and f then
742        local tables = fontdata.tables
743        if tables then
744            local datatable = tables[tag]
745            if datatable then
746                local tableoffset = datatable.offset
747                setposition(f,tableoffset)
748                return tableoffset
749            end
750        else
751            report("no tables")
752        end
753    end
754end
755
756local function reportskippedtable(f,fontdata,tag,criterium)
757    if criterium and f then
758        local tables = fontdata.tables
759        if tables then
760            local datatable = tables[tag]
761            if datatable then
762                report("loading of table %a skipped",tag)
763            end
764        else
765            report("no tables")
766        end
767    end
768end
769
770local function setvariabledata(fontdata,tag,data)
771    local variabledata = fontdata.variabledata
772    if variabledata then
773        variabledata[tag] = data
774    else
775        fontdata.variabledata = { [tag] = data }
776    end
777end
778
779helpers.gotodatatable      = gotodatatable
780helpers.setvariabledata    = setvariabledata
781helpers.reportskippedtable = reportskippedtable
782
783-- The name table is probably the first one to load. After all this one provides
784-- useful information about what we deal with. The complication is that we need
785-- to filter the best one available.
786
787local platformnames = {
788    postscriptname       = true,
789    fullname             = true,
790    family               = true,
791    subfamily            = true,
792    typographicfamily    = true,
793    typographicsubfamily = true,
794    compatiblefullname   = true,
795}
796
797local platformextras = {
798    uniqueid     = true,
799    version      = true,
800    copyright    = true,
801    license      = true,
802    licenseurl   = true,
803    manufacturer = true,
804    vendorurl    = true,
805}
806
807function readers.name(f,fontdata,specification)
808    local tableoffset = gotodatatable(f,fontdata,"name",true)
809    if tableoffset then
810        local format   = readushort(f)
811        local nofnames = readushort(f)
812        local offset   = readushort(f)
813        -- we can also provide a raw list as extra, todo as option
814        local start    = tableoffset + offset
815        local namelists = {
816            unicode   = { },
817            windows   = { },
818            macintosh = { },
819         -- iso       = { },
820         -- windows   = { },
821        }
822        for i=1,nofnames do
823            local platform = platforms[readushort(f)]
824            if platform then
825                local namelist = namelists[platform]
826                if namelist then
827                    local encoding  = readushort(f)
828                    local language  = readushort(f)
829                    local encodings = encodings[platform]
830                    local languages = languages[platform]
831                    if encodings and languages then
832                        local encoding = encodings[encoding]
833                        local language = languages[language]
834                        if encoding and language then
835                            local index = readushort(f)
836                            local name  = reservednames[index]
837                            namelist[#namelist+1] = {
838                                platform = platform,
839                                encoding = encoding,
840                                language = language,
841                                name     = name,
842                                index    = index,
843                                length   = readushort(f),
844                                offset   = start + readushort(f),
845                            }
846                        else
847                            skipshort(f,3)
848                        end
849                    else
850                        skipshort(f,3)
851                    end
852                else
853                    skipshort(f,5)
854                end
855            else
856                skipshort(f,5)
857            end
858        end
859     -- if format == 1 then
860     --     local noftags = readushort(f)
861     --     for i=1,noftags do
862     --        local length = readushort(f)
863     --        local offset = readushort(f)
864     --     end
865     -- end
866        --
867        -- we need to choose one we like, for instance an unicode one
868        --
869        local names  = { }
870        local done   = { }
871        local extras = { }
872        --
873        -- there is quite some logic in ff ... hard to follow so we start simple
874        -- and extend when we run into it (todo: proper reverse hash) .. we're only
875        -- interested in english anyway
876        --
877        local function decoded(platform,encoding,content)
878            local decoder = decoders[platform]
879            if decoder then
880                decoder = decoder[encoding]
881            end
882            if decoder then
883                return decoder(content)
884            else
885                return content
886            end
887        end
888        --
889        local function filter(platform,e,l)
890            local namelist = namelists[platform]
891            for i=1,#namelist do
892                local name    = namelist[i]
893                local nametag = name.name
894                local index = name.index
895                if not done[nametag or i] then
896                    local encoding = name.encoding
897                    local language = name.language
898                    if (not e or encoding == e) and (not l or language == l) then
899                        setposition(f,name.offset)
900                        local content = decoded(platform,encoding,readstring(f,name.length))
901                        if nametag then
902                            names[nametag] = {
903                                content  = content,
904                                platform = platform,
905                                encoding = encoding,
906                                language = language,
907                            }
908                        end
909                        extras[index] = content
910                        done[nametag or i] = true
911                    end
912                end
913            end
914        end
915        --
916        filter("windows","unicode bmp","english - united states")
917     -- filter("unicode") -- which one ?
918        filter("macintosh","roman","english")
919        filter("windows")
920        filter("macintosh")
921        filter("unicode")
922        --
923        fontdata.names  = names
924        fontdata.extras = extras
925        --
926        if specification.platformnames then
927            local collected      = { }
928            local platformextras = specification.platformextras and platformextras
929            for platform, namelist in next, namelists do
930                local filtered = false
931                for i=1,#namelist do
932                    local entry = namelist[i]
933                    local name  = entry.name
934                    if platformnames[name] or (platformextras and platformextras[name]) then
935                        setposition(f,entry.offset)
936                        local content = decoded(platform,entry.encoding,readstring(f,entry.length))
937                        if filtered then
938                            filtered[name] = content
939                        else
940                            filtered = { [name] = content }
941                        end
942                    end
943                end
944                if filtered then
945                    collected[platform] = filtered
946                end
947            end
948            fontdata.platformnames = collected
949        end
950    else
951        fontdata.names = { }
952    end
953end
954
955----- validutf = lpeg.patterns.utf8character^0 * P(-1)
956local validutf = lpeg.patterns.validutf8
957
958local function getname(fontdata,key)
959    local names = fontdata.names
960    if names then
961        local value = names[key]
962        if value then
963            local content = value.content
964            return lpegmatch(validutf,content) and content or nil
965        end
966    end
967end
968
969-- This table is an original windows (with its precursor os/2) table. In ff this one is
970-- part of the pfminfo table but here we keep it separate (for now). We will create a
971-- properties table afterwards.
972
973readers["os/2"] = function(f,fontdata)
974    local tableoffset = gotodatatable(f,fontdata,"os/2",true)
975    if tableoffset then
976        local version = readushort(f)
977        local windowsmetrics = {
978            version            = version,
979            averagewidth       = readshort(f), -- ushort?
980            weightclass        = readushort(f),
981            widthclass         = readushort(f),
982            fstype             = readushort(f),
983            subscriptxsize     = readshort(f),
984            subscriptysize     = readshort(f),
985            subscriptxoffset   = readshort(f),
986            subscriptyoffset   = readshort(f),
987            superscriptxsize   = readshort(f),
988            superscriptysize   = readshort(f),
989            superscriptxoffset = readshort(f),
990            superscriptyoffset = readshort(f),
991            strikeoutsize      = readshort(f),
992            strikeoutpos       = readshort(f),
993            familyclass        = readshort(f),
994            panose             = { readbytes(f,10) },
995            unicoderanges      = { readulong(f), readulong(f), readulong(f), readulong(f) },
996            vendor             = readstring(f,4),
997            fsselection        = readushort(f),
998            firstcharindex     = readushort(f),
999            lastcharindex      = readushort(f),
1000            typoascender       = readshort(f),
1001            typodescender      = readshort(f),
1002            typolinegap        = readshort(f),
1003            winascent          = readushort(f),
1004            windescent         = readushort(f),
1005        }
1006        if version >= 1 then
1007            windowsmetrics.codepageranges = { readulong(f), readulong(f) }
1008        end
1009        if version >= 2 then
1010            windowsmetrics.xheight               = readshort(f)
1011            windowsmetrics.capheight             = readshort(f)
1012            windowsmetrics.defaultchar           = readushort(f)
1013            windowsmetrics.breakchar             = readushort(f)
1014         -- windowsmetrics.maxcontexts           = readushort(f)
1015         -- windowsmetrics.loweropticalpointsize = readushort(f)
1016         -- windowsmetrics.upperopticalpointsize = readushort(f)
1017        end
1018        --
1019        -- todo: unicoderanges
1020        --
1021        windowsmetrics.weight = windowsmetrics.weightclass and weights[windowsmetrics.weightclass]
1022        windowsmetrics.width  = windowsmetrics.widthclass and  widths [windowsmetrics.widthclass]
1023        --
1024        windowsmetrics.panoseweight = panoseweights[windowsmetrics.panose[3]]
1025        windowsmetrics.panosewidth  = panosewidths [windowsmetrics.panose[4]]
1026        --
1027        fontdata.windowsmetrics = windowsmetrics
1028    else
1029        fontdata.windowsmetrics = { }
1030    end
1031end
1032
1033readers.head = function(f,fontdata)
1034    local tableoffset = gotodatatable(f,fontdata,"head",true)
1035    if tableoffset then
1036        local version     = readulong(f)
1037        local fontversion = readulong(f)
1038        local fontheader = {
1039            version           = version,
1040            fontversion       = number.to16dot16(fontversion),
1041            fontversionnumber = fontversion,
1042         -- checksum          = readulong(f),
1043            checksum          = readushort(f) * 0x10000 + readushort(f),
1044            magic             = readulong(f),
1045            flags             = readushort(f),
1046            units             = readushort(f),
1047            created           = readlongdatetime(f),
1048            modified          = readlongdatetime(f),
1049            xmin              = readshort(f),
1050            ymin              = readshort(f),
1051            xmax              = readshort(f),
1052            ymax              = readshort(f),
1053            macstyle          = readushort(f),
1054            smallpixels       = readushort(f),
1055            directionhint     = readshort(f),
1056            indextolocformat  = readshort(f),
1057            glyphformat       = readshort(f),
1058        }
1059        fontdata.fontheader = fontheader
1060    else
1061        fontdata.fontheader = { }
1062    end
1063    fontdata.nofglyphs = 0
1064end
1065
1066-- This table is a rather simple one. No treatment of values is needed here. Most
1067-- variables are not used but nofmetrics is quite important.
1068
1069readers.hhea = function(f,fontdata,specification)
1070    local tableoffset = gotodatatable(f,fontdata,"hhea",specification.details)
1071    if tableoffset then
1072        fontdata.horizontalheader = {
1073            version             = readulong(f),
1074            ascender            = readfword(f),
1075            descender           = readfword(f),
1076            linegap             = readfword(f),
1077            maxadvancewidth     = readufword(f),
1078            minleftsidebearing  = readfword(f),
1079            minrightsidebearing = readfword(f),
1080            maxextent           = readfword(f),
1081            caretsloperise      = readshort(f),
1082            caretsloperun       = readshort(f),
1083            caretoffset         = readshort(f),
1084            reserved_1          = readshort(f),
1085            reserved_2          = readshort(f),
1086            reserved_3          = readshort(f),
1087            reserved_4          = readshort(f),
1088            metricdataformat    = readshort(f),
1089            nofmetrics          = readushort(f),
1090        }
1091    else
1092        fontdata.horizontalheader = {
1093            nofmetrics = 0,
1094        }
1095    end
1096end
1097
1098readers.vhea = function(f,fontdata,specification)
1099    local tableoffset = gotodatatable(f,fontdata,"vhea",specification.details)
1100    if tableoffset then
1101        fontdata.verticalheader = {
1102            version              = readulong(f),
1103            ascender             = readfword(f),
1104            descender            = readfword(f),
1105            linegap              = readfword(f),
1106            maxadvanceheight     = readufword(f),
1107            mintopsidebearing    = readfword(f),
1108            minbottomsidebearing = readfword(f),
1109            maxextent            = readfword(f),
1110            caretsloperise       = readshort(f),
1111            caretsloperun        = readshort(f),
1112            caretoffset          = readshort(f),
1113            reserved_1           = readshort(f),
1114            reserved_2           = readshort(f),
1115            reserved_3           = readshort(f),
1116            reserved_4           = readshort(f),
1117            metricdataformat     = readshort(f),
1118            nofmetrics           = readushort(f),
1119        }
1120    else
1121        fontdata.verticalheader = {
1122            nofmetrics = 0,
1123        }
1124    end
1125end
1126
1127-- We probably never need all these variables, but we do need the nofglyphs when loading other
1128-- tables. Again we use the microsoft names but see no reason to have "max" in each name.
1129
1130-- fontdata.maximumprofile can be bad
1131
1132readers.maxp = function(f,fontdata,specification)
1133    local tableoffset = gotodatatable(f,fontdata,"maxp",specification.details)
1134    if tableoffset then
1135        local version      = readulong(f)
1136        local nofglyphs    = readushort(f)
1137        fontdata.nofglyphs = nofglyphs
1138        if version == 0x00005000 then
1139            fontdata.maximumprofile = {
1140                version   = version,
1141                nofglyphs = nofglyphs,
1142            }
1143        elseif version == 0x00010000 then
1144            fontdata.maximumprofile = {
1145                version            = version,
1146                nofglyphs          = nofglyphs,
1147                points             = readushort(f),
1148                contours           = readushort(f),
1149                compositepoints    = readushort(f),
1150                compositecontours  = readushort(f),
1151                zones              = readushort(f),
1152                twilightpoints     = readushort(f),
1153                storage            = readushort(f),
1154                functiondefs       = readushort(f),
1155                instructiondefs    = readushort(f),
1156                stackelements      = readushort(f),
1157                sizeofinstructions = readushort(f),
1158                componentelements  = readushort(f),
1159                componentdepth     = readushort(f),
1160            }
1161        else
1162            fontdata.maximumprofile = {
1163                version   = version,
1164                nofglyphs = 0,
1165            }
1166        end
1167    end
1168end
1169
1170-- Here we filter the (advance) widths (that can be different from the boundingbox width of
1171-- course).
1172
1173readers.hmtx = function(f,fontdata,specification)
1174    local tableoffset = gotodatatable(f,fontdata,"hmtx",specification.glyphs)
1175    if tableoffset then
1176        local horizontalheader = fontdata.horizontalheader
1177        local nofmetrics       = horizontalheader.nofmetrics
1178        local glyphs           = fontdata.glyphs
1179        local nofglyphs        = fontdata.nofglyphs
1180        local width            = 0 -- advance
1181        local leftsidebearing  = 0
1182        for i=0,nofmetrics-1 do
1183            local glyph     = glyphs[i]
1184            width           = readshort(f) -- readushort
1185            leftsidebearing = readshort(f)
1186            if width ~= 0 then
1187                glyph.width = width
1188            end
1189         -- if leftsidebearing ~= 0 then
1190         --     glyph.lsb = leftsidebearing
1191         -- end
1192        end
1193        -- The next can happen in for instance a monospace font or in a cjk font
1194        -- with fixed widths.
1195        for i=nofmetrics,nofglyphs-1 do
1196            local glyph = glyphs[i]
1197            if width ~= 0 then
1198                glyph.width = width
1199            end
1200         -- if leftsidebearing ~= 0 then
1201         --     glyph.lsb = leftsidebearing
1202         -- end
1203        end
1204    end
1205end
1206
1207readers.vmtx = function(f,fontdata,specification)
1208    local tableoffset = gotodatatable(f,fontdata,"vmtx",specification.glyphs)
1209    if tableoffset then
1210        local verticalheader = fontdata.verticalheader
1211        local nofmetrics     = verticalheader.nofmetrics
1212        local glyphs         = fontdata.glyphs
1213        local nofglyphs      = fontdata.nofglyphs
1214        local vheight        = 0
1215        local vdefault       = verticalheader.ascender - verticalheader.descender
1216        local topsidebearing = 0
1217        for i=0,nofmetrics-1 do
1218            local glyph     = glyphs[i]
1219            vheight         = readushort(f)
1220            topsidebearing  = readshort(f)
1221            if vheight ~= 0 and vheight ~= vdefault then
1222                glyph.vheight = vheight
1223            end
1224            if topsidebearing ~= 0 then
1225                glyph.tsb = topsidebearing
1226            end
1227        end
1228        -- The next can happen in for instance a monospace font or in a cjk font
1229        -- with fixed heights.
1230        for i=nofmetrics,nofglyphs-1 do
1231            local glyph = glyphs[i]
1232            if vheight ~= 0 and vheight ~= vdefault then
1233                glyph.vheight = vheight
1234            end
1235        end
1236    end
1237end
1238
1239readers.vorg = function(f,fontdata,specification)
1240    reportskippedtable(f,fontdata,"vorg",specification.glyphs)
1241end
1242
1243-- The post table relates to postscript (printing) but has some relevant properties for other
1244-- usage as well. We just use the names from the microsoft specification. The version 2.0
1245-- description is somewhat fuzzy but it is a hybrid with overloads.
1246
1247readers.post = function(f,fontdata,specification)
1248    local tableoffset = gotodatatable(f,fontdata,"post",true)
1249    if tableoffset then
1250        local version = readulong(f)
1251        fontdata.postscript = {
1252            version            = version,
1253            italicangle        = readfixed(f),
1254            underlineposition  = readfword(f),
1255            underlinethickness = readfword(f),
1256            monospaced         = readulong(f),
1257            minmemtype42       = readulong(f),
1258            maxmemtype42       = readulong(f),
1259            minmemtype1        = readulong(f),
1260            maxmemtype1        = readulong(f),
1261        }
1262        if not specification.glyphs then
1263            -- enough done
1264        elseif version == 0x00010000 then
1265            -- mac encoding (258 glyphs)
1266            for index=0,#standardromanencoding do
1267                glyphs[index].name = standardromanencoding[index]
1268            end
1269        elseif version == 0x00020000 then
1270            local glyphs    = fontdata.glyphs
1271            local nofglyphs = readushort(f)
1272            local indices   = { }
1273            local names     = { }
1274            local maxnames  = 0
1275            for i=0,nofglyphs-1 do
1276                local nameindex = readushort(f)
1277                if nameindex >= 258 then
1278                    maxnames  = maxnames + 1
1279                    nameindex = nameindex - 257
1280                    indices[nameindex] = i
1281                else
1282                    glyphs[i].name = standardromanencoding[nameindex]
1283                end
1284            end
1285            for i=1,maxnames do
1286                local mapping = indices[i]
1287                if not mapping then
1288                    report("quit post name fetching at %a of %a: %s",i,maxnames,"no index")
1289                    break
1290                else
1291                    local length = readbyte(f)
1292                    if length > 0 then
1293                        glyphs[mapping].name = readstring(f,length)
1294                    else
1295                     -- report("quit post name fetching at %a of %a: %s",i,maxnames,"overflow")
1296                     -- break
1297                    end
1298                end
1299            end
1300        end
1301    else
1302        fontdata.postscript = { }
1303    end
1304end
1305
1306readers.cff = function(f,fontdata,specification)
1307    reportskippedtable(f,fontdata,"cff",specification.glyphs)
1308end
1309
1310-- Not all cmaps make sense .. e.g. dfont is obsolete and probably more are not relevant. Let's see
1311-- what we run into. There is some weird calculation going on here because we offset in a table
1312-- being a blob of memory or file. Anyway, I can't stand lunatic formats like this esp when there
1313-- is no real gain.
1314
1315local formatreaders = { }
1316local duplicatestoo = true
1317
1318local sequence = {
1319    -- these is some provision against redundant loading
1320    { 3,  1,  4 },
1321    { 3, 10, 12 },
1322    { 0,  3,  4 },
1323    { 0,  3, 12 },
1324    { 0,  1,  4 },
1325    { 0,  1, 12 }, -- for some old mac fonts
1326    { 0,  0,  6 },
1327    { 3,  0,  6 },
1328    { 3,  0,  4 }, -- for (likely) old crap
1329    -- variants
1330    { 0,  5, 14 },
1331    -- last resort ranges
1332    { 0,  4, 12 },
1333    { 3, 10, 13 },
1334}
1335
1336local supported = {  }
1337
1338for i=1,#sequence do
1339    local si = sequence[i]
1340    local sp, se, sf = si[1], si[2], si[3]
1341    local p = supported[sp]
1342    if not p then
1343        p = { }
1344        supported[sp] = p
1345    end
1346    local e = p[se]
1347    if not e then
1348        e = { }
1349        p[se] = e
1350    end
1351    e[sf] = true
1352end
1353
1354formatreaders[4] = function(f,fontdata,offset)
1355    setposition(f,offset+2) -- skip format
1356    --
1357    local length      = readushort(f) -- in bytes of subtable
1358    local language    = readushort(f)
1359    local nofsegments = readushort(f) / 2
1360    --
1361    skipshort(f,3) -- searchrange entryselector rangeshift
1362    --
1363    local mapping    = fontdata.mapping
1364    local glyphs     = fontdata.glyphs
1365    local duplicates = fontdata.duplicates
1366    local nofdone    = 0
1367    local endchars   = readcardinaltable(f,nofsegments,ushort)
1368    local reserved   = readushort(f) -- 0
1369    local startchars = readcardinaltable(f,nofsegments,ushort)
1370    local deltas     = readcardinaltable(f,nofsegments,ushort)
1371    local offsets    = readcardinaltable(f,nofsegments,ushort)
1372    -- format length language nofsegments searchrange entryselector rangeshift 4-tables
1373    local size       = (length - 2 * 2 - 5 * 2 - 4 * 2 * nofsegments) / 2
1374    local indices    = readcardinaltable(f,size-1,ushort)
1375    --
1376    for segment=1,nofsegments do
1377        local startchar = startchars[segment]
1378        local endchar   = endchars[segment]
1379        local offset    = offsets[segment]
1380        local delta     = deltas[segment]
1381        if startchar == 0xFFFF and endchar == 0xFFFF then
1382            -- break
1383        elseif startchar == 0xFFFF and offset == 0 then
1384            -- break
1385        elseif offset == 0xFFFF then
1386            -- bad encoding
1387        elseif offset == 0 then
1388            if trace_cmap_details then
1389                report("format 4.%i segment %2i from %C upto %C at index %H",1,segment,startchar,endchar,(startchar + delta) % 65536)
1390            end
1391            for unicode=startchar,endchar do
1392                local index = (unicode + delta) % 65536
1393                if index and index > 0 then
1394                    local glyph = glyphs[index]
1395                    if glyph then
1396                        local gu = glyph.unicode
1397                        if not gu then
1398                            glyph.unicode = unicode
1399                            nofdone = nofdone + 1
1400                        elseif gu ~= unicode then
1401                            if duplicatestoo then
1402                                local d = duplicates[gu]
1403                                if d then
1404                                    d[unicode] = true
1405                                else
1406                                    duplicates[gu] = { [unicode] = true }
1407                                end
1408                            else
1409                                -- no duplicates ... weird side effects in lm
1410                                report("duplicate case 1: %C %04i %s",unicode,index,glyphs[index].name)
1411                            end
1412                        end
1413                        if not mapping[index] then
1414                            mapping[index] = unicode
1415                        end
1416                    end
1417                end
1418            end
1419        else
1420            local shift = (segment-nofsegments+offset/2) - startchar
1421            if trace_cmap_details then
1422                report_cmap("format 4.%i segment %2i from %C upto %C at index %H",0,segment,startchar,endchar,(startchar + delta) % 65536)
1423            end
1424            for unicode=startchar,endchar do
1425                local slot  = shift + unicode
1426                local index = indices[slot]
1427                if index and index > 0 then
1428                    index = (index + delta) % 65536
1429                    local glyph = glyphs[index]
1430                    if glyph then
1431                        local gu = glyph.unicode
1432                        if not gu then
1433                            glyph.unicode = unicode
1434                            nofdone = nofdone + 1
1435                        elseif gu ~= unicode then
1436                            if duplicatestoo then
1437                                local d = duplicates[gu]
1438                                if d then
1439                                    d[unicode] = true
1440                                else
1441                                    duplicates[gu] = { [unicode] = true }
1442                                end
1443                            else
1444                                -- no duplicates ... weird side effects in lm
1445                                report("duplicate case 2: %C %04i %s",unicode,index,glyphs[index].name)
1446                            end
1447                        end
1448                        if not mapping[index] then
1449                            mapping[index] = unicode
1450                        end
1451                    end
1452                end
1453            end
1454        end
1455    end
1456    return nofdone
1457end
1458
1459formatreaders[6] = function(f,fontdata,offset)
1460    setposition(f,offset) -- + 2 + 2 + 2 -- skip format length language
1461    local format     = readushort(f)
1462    local length     = readushort(f)
1463    local language   = readushort(f)
1464    local mapping    = fontdata.mapping
1465    local glyphs     = fontdata.glyphs
1466    local duplicates = fontdata.duplicates
1467    local start      = readushort(f)
1468    local count      = readushort(f)
1469    local stop       = start+count-1
1470    local nofdone    = 0
1471    if trace_cmap_details then
1472        report_cmap("format 6 from %C to %C",2,start,stop)
1473    end
1474    for unicode=start,stop do
1475        local index = readushort(f)
1476        if index > 0 then
1477            local glyph = glyphs[index]
1478            if glyph then
1479                local gu = glyph.unicode
1480                if not gu then
1481                    glyph.unicode = unicode
1482                    nofdone = nofdone + 1
1483                elseif gu ~= unicode then
1484                    -- report("format 6 overloading %C to %C",gu,unicode)
1485                    -- glyph.unicode = unicode
1486                    -- no duplicates ... weird side effects in lm
1487                end
1488                if not mapping[index] then
1489                    mapping[index] = unicode
1490                end
1491            end
1492        end
1493    end
1494    return nofdone
1495end
1496
1497formatreaders[12] = function(f,fontdata,offset)
1498    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1499    local mapping    = fontdata.mapping
1500    local glyphs     = fontdata.glyphs
1501    local duplicates = fontdata.duplicates
1502    local nofgroups  = readulong(f)
1503    local nofdone    = 0
1504    for i=1,nofgroups do
1505        local first = readulong(f)
1506        local last  = readulong(f)
1507        local index = readulong(f)
1508        if trace_cmap_details then
1509            report_cmap("format 12 from %C to %C starts at index %i",first,last,index)
1510        end
1511        for unicode=first,last do
1512            local glyph = glyphs[index]
1513            if glyph then
1514                local gu = glyph.unicode
1515                if not gu then
1516                    glyph.unicode = unicode
1517                    nofdone = nofdone + 1
1518                elseif gu ~= unicode then
1519                    -- e.g. sourcehan fonts need this
1520                    local d = duplicates[gu]
1521                    if d then
1522                        d[unicode] = true
1523                    else
1524                        duplicates[gu] = { [unicode] = true }
1525                    end
1526                end
1527                if not mapping[index] then
1528                    mapping[index] = unicode
1529                end
1530            end
1531            index = index + 1
1532        end
1533    end
1534    return nofdone
1535end
1536
1537formatreaders[13] = function(f,fontdata,offset)
1538    --
1539    -- this vector is only used for simple fallback fonts
1540    --
1541    setposition(f,offset+2+2+4+4) -- skip format reserved length language
1542    local mapping    = fontdata.mapping
1543    local glyphs     = fontdata.glyphs
1544    local duplicates = fontdata.duplicates
1545    local nofgroups  = readulong(f)
1546    local nofdone    = 0
1547    for i=1,nofgroups do
1548        local first = readulong(f)
1549        local last  = readulong(f)
1550        local index = readulong(f)
1551        if first < privateoffset then
1552            if trace_cmap_details then
1553                report_cmap("format 13 from %C to %C get index %i",first,last,index)
1554            end
1555            local glyph   = glyphs[index]
1556            local unicode = glyph.unicode
1557            if not unicode then
1558                unicode = first
1559                glyph.unicode = unicode
1560                first = first + 1
1561            end
1562            local list     = duplicates[unicode]
1563            mapping[index] = unicode
1564            if not list then
1565                list = { }
1566                duplicates[unicode] = list
1567            end
1568            if last >= privateoffset then
1569                local limit = privateoffset - 1
1570                report("format 13 from %C to %C pruned to %C",first,last,limit)
1571                last = limit
1572            end
1573            for unicode=first,last do
1574                list[unicode] = true
1575            end
1576            nofdone = nofdone + last - first + 1
1577        else
1578            report("format 13 from %C to %C ignored",first,last)
1579        end
1580    end
1581    return nofdone
1582end
1583
1584formatreaders[14] = function(f,fontdata,offset)
1585    if offset and offset ~= 0 then
1586        setposition(f,offset)
1587        local format      = readushort(f)
1588        local length      = readulong(f)
1589        local nofrecords  = readulong(f)
1590        local records     = { }
1591        local variants    = { }
1592        local nofdone     = 0
1593        fontdata.variants = variants
1594        for i=1,nofrecords do
1595            records[i] = {
1596                selector = readuint(f),
1597                default  = readulong(f), -- default offset
1598                other    = readulong(f), -- non-default offset
1599            }
1600        end
1601        for i=1,nofrecords do
1602            local record   = records[i]
1603            local selector = record.selector
1604            local default  = record.default
1605            local other    = record.other
1606            --
1607            -- there is no need to map the defaults to themselves
1608            --
1609         -- if default ~= 0 then
1610         --     setposition(f,offset+default)
1611         --     local nofranges = readulong(f)
1612         --     for i=1,nofranges do
1613         --         local start = readuint(f)
1614         --         local extra = readbyte(f)
1615         --         for i=start,start+extra do
1616         --             mapping[i] = i
1617         --         end
1618         --     end
1619         -- end
1620            local other = record.other
1621            if other ~= 0 then
1622                setposition(f,offset+other)
1623                local mapping = { }
1624                local count   = readulong(f)
1625                for i=1,count do
1626                    mapping[readuint(f)] = readushort(f)
1627                end
1628                nofdone = nofdone + count
1629                variants[selector] = mapping
1630            end
1631        end
1632        return nofdone
1633    else
1634        return 0
1635    end
1636end
1637
1638local function checkcmap(f,fontdata,records,platform,encoding,format)
1639    local pdata = records[platform]
1640    if not pdata then
1641        if trace_cmap_details then
1642            report_cmap("skipped, %s, p=%i e=%i f=%i","no platform",platform,encoding,format)
1643        end
1644        return 0
1645    end
1646    local edata = pdata[encoding]
1647    if not edata then
1648        if trace_cmap_details then
1649            report_cmap("skipped, %s, p=%i e=%i f=%i","no encoding",platform,encoding,format)
1650        end
1651        return 0
1652    end
1653    local fdata = edata[format]
1654    if not fdata then
1655        if trace_cmap_details then
1656            report_cmap("skipped, %s, p=%i e=%i f=%i","no format",platform,encoding,format)
1657        end
1658        return 0
1659    elseif type(fdata) ~= "number" then
1660        if trace_cmap_details then
1661            report_cmap("skipped, %s, p=%i e=%i f=%i","already done",platform,encoding,format)
1662        end
1663        return 0
1664    end
1665    edata[format] = true -- done
1666    local reader = formatreaders[format]
1667    if not reader then
1668        if trace_cmap_details then
1669            report_cmap("skipped, %s, p=%i e=%i f=%i","unsupported format",platform,encoding,format)
1670        end
1671        return 0
1672    end
1673    local n = reader(f,fontdata,fdata) or 0
1674    if trace_cmap_details or trace_cmap then
1675        local p = platforms[platform]
1676        local e = encodings[p]
1677        report_cmap("checked, platform %i (%s), encoding %i (%s), format %i, new unicodes %i",
1678            platform,p,encoding,e and e[encoding] or "?",format,n)
1679    end
1680    return n
1681end
1682
1683function readers.cmap(f,fontdata,specification)
1684    local tableoffset = gotodatatable(f,fontdata,"cmap",specification.glyphs)
1685    if tableoffset then
1686        local version      = readushort(f)
1687        local noftables    = readushort(f)
1688        local records      = { }
1689        local unicodecid   = false
1690        local variantcid   = false
1691        local variants     = { }
1692        local duplicates   = fontdata.duplicates or { }
1693        fontdata.duplicates = duplicates
1694        for i=1,noftables do
1695            local platform = readushort(f)
1696            local encoding = readushort(f)
1697            local offset   = readulong(f)
1698            local record   = records[platform]
1699            if not record then
1700                records[platform] = {
1701                    [encoding] = {
1702                        offsets = { offset },
1703                        formats = { },
1704                    }
1705                }
1706            else
1707                local subtables = record[encoding]
1708                if not subtables then
1709                    record[encoding] = {
1710                        offsets = { offset },
1711                        formats = { },
1712                    }
1713                else
1714                    local offsets = subtables.offsets
1715                    offsets[#offsets+1] = offset
1716                end
1717            end
1718        end
1719        if trace_cmap then
1720            report("found cmaps:")
1721        end
1722        for platform, record in sortedhash(records) do
1723            local p  = platforms[platform]
1724            local e  = encodings[p]
1725            local sp = supported[platform]
1726            local ps = p or "?"
1727            if trace_cmap then
1728                if sp then
1729                    report("  platform %i: %s",platform,ps)
1730                else
1731                    report("  platform %i: %s (unsupported)",platform,ps)
1732                end
1733            end
1734            for encoding, subtables in sortedhash(record) do
1735                local se = sp and sp[encoding]
1736                local es = e and e[encoding] or "?"
1737                if trace_cmap then
1738                    if se then
1739                        report("    encoding %i: %s",encoding,es)
1740                    else
1741                        report("    encoding %i: %s (unsupported)",encoding,es)
1742                    end
1743                end
1744                local offsets = subtables.offsets
1745                local formats = subtables.formats
1746                for i=1,#offsets do
1747                    local offset = tableoffset + offsets[i]
1748                    setposition(f,offset)
1749                    formats[readushort(f)] = offset
1750                end
1751                record[encoding] = formats
1752                if trace_cmap then
1753                    local list = sortedkeys(formats)
1754                    for i=1,#list do
1755                        if not (se and se[list[i]]) then
1756                            list[i] = list[i] .. " (unsupported)"
1757                        end
1758                    end
1759                    report("      formats: % t",list)
1760                end
1761            end
1762        end
1763        --
1764        local ok = false
1765        for i=1,#sequence do
1766            local si = sequence[i]
1767            local sp, se, sf = si[1], si[2], si[3]
1768            if checkcmap(f,fontdata,records,sp,se,sf) > 0 then
1769                ok = true
1770            end
1771        end
1772        if not ok then
1773            report("no useable unicode cmap found")
1774        end
1775        --
1776        fontdata.cidmaps = {
1777            version   = version,
1778            noftables = noftables,
1779            records   = records,
1780        }
1781    else
1782        fontdata.cidmaps = { }
1783    end
1784end
1785
1786-- The glyf table depends on the loca table. We have one entry to much in the locations table (the
1787-- last one is a dummy) because we need to calculate the size of a glyph blob from the delta,
1788-- although we not need it in our usage (yet). We can remove the locations table when we're done.
1789
1790function readers.loca(f,fontdata,specification)
1791    reportskippedtable(f,fontdata,"loca",specification.glyphs)
1792end
1793
1794function readers.glyf(f,fontdata,specification) -- part goes to cff module
1795    reportskippedtable(f,fontdata,"glyf",specification.glyphs)
1796end
1797
1798-- The MicroSoft variant is pretty clean and is supported (implemented elsewhere)
1799-- just because I wanted to see how such a font looks like.
1800
1801function readers.colr(f,fontdata,specification)
1802    reportskippedtable(f,fontdata,"colr",specification.glyphs)
1803end
1804function readers.cpal(f,fontdata,specification)
1805    reportskippedtable(f,fontdata,"cpal",specification.glyphs)
1806end
1807
1808-- This one is also supported, if only because I could locate a proper font for
1809-- testing.
1810
1811function readers.svg(f,fontdata,specification)
1812    reportskippedtable(f,fontdata,"svg",specification.glyphs)
1813end
1814
1815-- There is a font from apple to test the next one. Will there be more? Anyhow,
1816-- it's relatively easy to support, so I did it.
1817
1818function readers.sbix(f,fontdata,specification)
1819    reportskippedtable(f,fontdata,"sbix",specification.glyphs)
1820end
1821
1822-- I'm only willing to look into the next variant if I see a decent and complete (!)
1823-- font and more can show up. It makes no sense to waste time on ideas. Okay, the
1824-- apple font also has these tables.
1825
1826function readers.cbdt(f,fontdata,specification)
1827    reportskippedtable(f,fontdata,"cbdt",specification.glyphs)
1828end
1829function readers.cblc(f,fontdata,specification)
1830    reportskippedtable(f,fontdata,"cblc",specification.glyphs)
1831end
1832function readers.ebdt(f,fontdata,specification)
1833    reportskippedtable(f,fontdata,"ebdt",specification.glyphs)
1834end
1835function readers.ebsc(f,fontdata,specification)
1836    reportskippedtable(f,fontdata,"ebsc",specification.glyphs)
1837end
1838function readers.eblc(f,fontdata,specification)
1839    reportskippedtable(f,fontdata,"eblc",specification.glyphs)
1840end
1841
1842-- Here we have a table that we really need for later processing although a more advanced gpos table
1843-- can also be available. Todo: we need a 'fake' lookup for this (analogue to ff).
1844
1845function readers.kern(f,fontdata,specification)
1846    local tableoffset = gotodatatable(f,fontdata,"kern",specification.kerns)
1847    if tableoffset then
1848        local version   = readushort(f)
1849        local noftables = readushort(f)
1850        for i=1,noftables do
1851            local version  = readushort(f)
1852            local length   = readushort(f)
1853            local coverage = readushort(f)
1854            -- bit 8-15 of coverage: format 0 or 2
1855            local format   = rshift(coverage,8) -- is this ok?
1856            if format == 0 then
1857                local nofpairs      = readushort(f)
1858                local searchrange   = readushort(f)
1859                local entryselector = readushort(f)
1860                local rangeshift    = readushort(f)
1861                local kerns  = { }
1862                local glyphs = fontdata.glyphs
1863                for i=1,nofpairs do
1864                    local left  = readushort(f)
1865                    local right = readushort(f)
1866                    local kern  = readfword(f)
1867                    local glyph = glyphs[left]
1868                    local kerns = glyph.kerns
1869                    if kerns then
1870                        kerns[right] = kern
1871                    else
1872                        glyph.kerns = { [right] = kern }
1873                    end
1874                end
1875            elseif format == 2 then
1876                report("todo: kern classes")
1877            else
1878                report("todo: kerns")
1879            end
1880        end
1881    end
1882end
1883
1884function readers.gdef(f,fontdata,specification)
1885    reportskippedtable(f,fontdata,"gdef",specification.details)
1886end
1887
1888function readers.gsub(f,fontdata,specification)
1889    reportskippedtable(f,fontdata,"gsub",specification.details)
1890end
1891
1892function readers.gpos(f,fontdata,specification)
1893    reportskippedtable(f,fontdata,"gpos",specification.details)
1894end
1895
1896function readers.math(f,fontdata,specification)
1897    reportskippedtable(f,fontdata,"math",specification.details)
1898end
1899
1900-- Now comes the loader. The order of reading these matters as we need to know
1901-- some properties in order to read following tables. When details is true we also
1902-- initialize the glyphs data.
1903
1904local function getinfo(maindata,sub,platformnames,rawfamilynames,metricstoo,instancenames)
1905    local fontdata = sub and maindata.subfonts and maindata.subfonts[sub] or maindata
1906    local names    = fontdata.names
1907    local info     = nil
1908    if names then
1909        local metrics        = fontdata.windowsmetrics or { }
1910        local postscript     = fontdata.postscript     or { }
1911        local fontheader     = fontdata.fontheader     or { }
1912        local cffinfo        = fontdata.cffinfo        or { }
1913        local verticalheader = fontdata.verticalheader or { }
1914        local filename       = fontdata.filename
1915        local weight         = getname(fontdata,"weight") or (cffinfo and cffinfo.weight) or (metrics and metrics.weight)
1916        local width          = getname(fontdata,"width")  or (cffinfo and cffinfo.width ) or (metrics and metrics.width )
1917        local fontname       = getname(fontdata,"postscriptname")
1918        local fullname       = getname(fontdata,"fullname")
1919        local family         = getname(fontdata,"family")
1920        local subfamily      = getname(fontdata,"subfamily")
1921        local familyname     = getname(fontdata,"typographicfamily")
1922        local subfamilyname  = getname(fontdata,"typographicsubfamily")
1923        local compatiblename = getname(fontdata,"compatiblefullname") -- kind of useless
1924        if rawfamilynames then
1925            -- for PG (for now, as i need to check / adapt context to catch a no-fallback case)
1926        else
1927            if not    familyname then    familyname =    family end
1928            if not subfamilyname then subfamilyname = subfamily end
1929        end
1930        if platformnames then
1931            platformnames = fontdata.platformnames
1932        end
1933        if instancenames then
1934            local variabledata = fontdata.variabledata
1935            if variabledata then
1936                local instances = variabledata and variabledata.instances
1937                if instances then
1938                    instancenames = { }
1939                    for i=1,#instances do
1940                        instancenames[i] = lower(stripstring(instances[i].subfamily))
1941                    end
1942                else
1943                    instancenames = nil
1944                end
1945            else
1946                instancenames = nil
1947            end
1948        end
1949        info = { -- we inherit some inconsistencies/choices from ff
1950            subfontindex   = fontdata.subfontindex or sub or 0,
1951         -- filename       = filename,
1952            version        = getname(fontdata,"version"),
1953         -- format         = fontdata.format,
1954            fontname       = fontname,
1955            fullname       = fullname,
1956         -- cfffullname    = cff.fullname,
1957            family         = family,
1958            subfamily      = subfamily,
1959            familyname     = familyname,
1960            subfamilyname  = subfamilyname,
1961            compatiblename = compatiblename,
1962            weight         = weight and lower(weight),
1963            width          = width and lower(width),
1964            pfmweight      = metrics.weightclass or 400, -- will become weightclass
1965            pfmwidth       = metrics.widthclass or 5, -- will become widthclass
1966            panosewidth    = metrics.panosewidth,
1967            panoseweight   = metrics.panoseweight,
1968            fstype         = metrics.fstype or 0, -- embedding, subsetting and editing
1969            italicangle    = postscript.italicangle or 0,
1970            units          = fontheader.units or 0,
1971            designsize     = fontdata.designsize,
1972            minsize        = fontdata.minsize,
1973            maxsize        = fontdata.maxsize,
1974            boundingbox    = fontheader and { fontheader.xmin or 0, fontheader.ymin or 0, fontheader.xmax or 0, fontheader.ymax or 0 } or nil,
1975            monospaced     = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
1976            averagewidth   = metrics.averagewidth,
1977            xheight        = metrics.xheight, -- can be missing
1978            capheight      = metrics.capheight or fontdata.maxy, -- can be missing
1979            ascender       = metrics.typoascender,
1980            descender      = metrics.typodescender,
1981            platformnames  = platformnames or nil,
1982            instancenames  = instancenames or nil,
1983            tableoffsets   = fontdata.tableoffsets,
1984            defaultvheight = (verticalheader.ascender or 0) - (verticalheader.descender or 0)
1985        }
1986        if metricstoo then
1987            local keys = {
1988                "version",
1989                "ascender", "descender", "linegap",
1990             -- "caretoffset", "caretsloperise", "caretsloperun",
1991                "maxadvancewidth", "maxadvanceheight", "maxextent",
1992             -- "metricdataformat",
1993                "minbottomsidebearing", "mintopsidebearing",
1994            }
1995            local h = fontdata.horizontalheader or { }
1996            local v = fontdata.verticalheader   or { }
1997            if h then
1998                local th = { }
1999                local tv = { }
2000                for i=1,#keys do
2001                    local key = keys[i]
2002                    th[key] = h[key] or 0
2003                    tv[key] = v[key] or 0
2004                end
2005                info.horizontalmetrics = th
2006                info.verticalmetrics   = tv
2007            end
2008        end
2009    elseif n then
2010        info = {
2011            filename = fontdata.filename,
2012            comment  = "there is no info for subfont " .. n,
2013        }
2014    else
2015        info = {
2016            filename = fontdata.filename,
2017            comment  = "there is no info",
2018        }
2019    end
2020 -- inspect(info)
2021    return info
2022end
2023
2024local function loadtables(f,specification,offset)
2025    if offset then
2026        setposition(f,offset)
2027    end
2028    local tables   = { }
2029    local basename = file.basename(specification.filename)
2030    local filesize = specification.filesize
2031    local filetime = specification.filetime
2032    local fontdata = { -- some can/will go
2033        filename      = basename,
2034        filesize      = filesize,
2035        filetime      = filetime,
2036        version       = readstring(f,4),
2037        noftables     = readushort(f),
2038        searchrange   = readushort(f), -- not needed
2039        entryselector = readushort(f), -- not needed
2040        rangeshift    = readushort(f), -- not needed
2041        tables        = tables,
2042        foundtables   = false,
2043    }
2044    for i=1,fontdata.noftables do
2045        local tag      = lower(stripstring(readstring(f,4)))
2046     -- local checksum = readulong(f) -- not used
2047        local checksum = readushort(f) * 0x10000 + readushort(f)
2048        local offset   = readulong(f)
2049        local length   = readulong(f)
2050        if offset + length > filesize then
2051            report("bad %a table in file %a",tag,basename)
2052        end
2053        tables[tag] = {
2054            checksum = checksum,
2055            offset   = offset,
2056            length   = length,
2057        }
2058    end
2059-- inspect(tables)
2060    fontdata.foundtables = sortedkeys(tables)
2061    if tables.cff or tables.cff2 then
2062        fontdata.format = "opentype"
2063    else
2064        fontdata.format = "truetype"
2065    end
2066    return fontdata, tables
2067end
2068
2069local function prepareglyps(fontdata)
2070    local glyphs = setmetatableindex(function(t,k)
2071        local v = {
2072            -- maybe more defaults
2073            index = k,
2074        }
2075        t[k] = v
2076        return v
2077    end)
2078    fontdata.glyphs  = glyphs
2079    fontdata.mapping = { }
2080end
2081
2082local function readtable(tag,f,fontdata,specification,...)
2083    local reader = readers[tag]
2084    if reader then
2085        reader(f,fontdata,specification,...)
2086    end
2087end
2088
2089local function readdata(f,offset,specification)
2090
2091    local fontdata, tables = loadtables(f,specification,offset)
2092
2093    if specification.glyphs then
2094        prepareglyps(fontdata)
2095    end
2096
2097    fontdata.temporary = { }
2098
2099    readtable("name",f,fontdata,specification)
2100
2101    local askedname = specification.askedname
2102    if askedname then
2103        local fullname  = getname(fontdata,"fullname") or ""
2104        local cleanname = gsub(askedname,"[^a-zA-Z0-9]","")
2105        local foundname = gsub(fullname,"[^a-zA-Z0-9]","")
2106        if lower(cleanname) ~= lower(foundname) then
2107            return -- keep searching
2108        end
2109    end
2110
2111    readtable("stat",f,fontdata,specification)
2112    readtable("avar",f,fontdata,specification)
2113    readtable("fvar",f,fontdata,specification)
2114
2115    local variabledata = fontdata.variabledata
2116
2117    if variabledata then
2118        local instances = variabledata.instances
2119        local axis      = variabledata.axis
2120        if axis and (not instances or #instances == 0) then
2121            instances = { }
2122            variabledata.instances = instances
2123            local function add(n,subfamily,value)
2124                local values = { }
2125                for i=1,#axis do
2126                    local a = axis[i]
2127                    values[i] = {
2128                        axis  = a.tag,
2129                        value = i == n and value or a.default,
2130                    }
2131                end
2132                instances[#instances+1] = {
2133                    subfamily = subfamily,
2134                    values    = values,
2135                }
2136            end
2137            for i=1,#axis do
2138                local a   = axis[i]
2139                local tag = a.tag
2140                add(i,"default"..tag,a.default)
2141                add(i,"minimum"..tag,a.minimum)
2142                add(i,"maximum"..tag,a.maximum)
2143            end
2144         -- report("%i fake instances added",#instances)
2145        end
2146    end
2147    if not specification.factors then
2148        local instance = specification.instance
2149        if type(instance) == "string" then
2150            local factors = helpers.getfactors(fontdata,instance)
2151            if factors then
2152                specification.factors = factors
2153                fontdata.factors  = factors
2154                fontdata.instance = instance
2155                report("user instance: %s, factors: % t",instance,factors)
2156            else
2157                report("user instance: %s, bad factors",instance)
2158            end
2159        end
2160    end
2161
2162    if not fontdata.factors then
2163        if fontdata.variabledata then
2164            local factors = helpers.getfactors(fontdata,true)
2165            if factors then
2166                specification.factors = factors
2167                fontdata.factors = factors
2168         --     report("factors: % t",factors)
2169         -- else
2170         --     report("bad factors")
2171            end
2172        else
2173         -- report("unknown instance")
2174        end
2175    end
2176
2177    readtable("os/2",f,fontdata,specification)
2178    readtable("head",f,fontdata,specification)
2179    readtable("maxp",f,fontdata,specification)
2180    readtable("hhea",f,fontdata,specification)
2181    readtable("vhea",f,fontdata,specification)
2182    readtable("hmtx",f,fontdata,specification)
2183    readtable("vmtx",f,fontdata,specification)
2184    readtable("vorg",f,fontdata,specification)
2185    readtable("post",f,fontdata,specification)
2186
2187    readtable("mvar",f,fontdata,specification)
2188    readtable("hvar",f,fontdata,specification)
2189    readtable("vvar",f,fontdata,specification)
2190
2191    readtable("gdef",f,fontdata,specification)
2192
2193    readtable("cff" ,f,fontdata,specification)
2194    readtable("cff2",f,fontdata,specification)
2195
2196    readtable("cmap",f,fontdata,specification)
2197    readtable("loca",f,fontdata,specification) -- maybe load it in glyf
2198    readtable("glyf",f,fontdata,specification) -- loads gvar
2199
2200    readtable("colr",f,fontdata,specification)
2201    readtable("cpal",f,fontdata,specification)
2202
2203    readtable("svg" ,f,fontdata,specification)
2204
2205    readtable("sbix",f,fontdata,specification)
2206
2207    readtable("cbdt",f,fontdata,specification)
2208    readtable("cblc",f,fontdata,specification)
2209    readtable("ebdt",f,fontdata,specification)
2210    readtable("eblc",f,fontdata,specification)
2211
2212    readtable("kern",f,fontdata,specification)
2213    readtable("gsub",f,fontdata,specification)
2214    readtable("gpos",f,fontdata,specification)
2215
2216    readtable("math",f,fontdata,specification)
2217
2218    fontdata.locations    = nil
2219    fontdata.cidmaps      = nil
2220    fontdata.dictionaries = nil
2221 -- fontdata.cff          = nil
2222
2223    if specification.tableoffsets then
2224        fontdata.tableoffsets = tables
2225        setmetatableindex(tables, {
2226            version       = fontdata.version,
2227            noftables     = fontdata.noftables,
2228            searchrange   = fontdata.searchrange,
2229            entryselector = fontdata.entryselector,
2230            rangeshift    = fontdata.rangeshift,
2231        })
2232    end
2233    return fontdata
2234end
2235
2236local function loadfontdata(specification)
2237    local filename = specification.filename
2238    local fileattr = lfs.attributes(filename)
2239    local filesize = fileattr and fileattr.size or 0
2240    local filetime = fileattr and fileattr.modification or 0
2241    local f = openfile(filename,true) -- zero based
2242    if not f then
2243        report("unable to open %a",filename)
2244    elseif filesize == 0 then
2245        report("empty file %a",filename)
2246        closefile(f)
2247    else
2248        specification.filesize = filesize
2249        specification.filetime = filetime
2250        local version  = readstring(f,4)
2251        local fontdata = nil
2252        if version == "OTTO" or version == "true" or version == "\0\1\0\0" then
2253            fontdata = readdata(f,0,specification)
2254        elseif version == "ttcf" then
2255            local subfont     = tonumber(specification.subfont)
2256            local ttcversion  = readulong(f)
2257            local nofsubfonts = readulong(f)
2258            local offsets     = readcardinaltable(f,nofsubfonts,ulong)
2259            if subfont then -- a number of not
2260                if subfont >= 1 and subfont <= nofsubfonts then
2261                    fontdata = readdata(f,offsets[subfont],specification)
2262                else
2263                    report("no subfont %a in file %a",subfont,filename)
2264                end
2265            else
2266                subfont = specification.subfont
2267                if type(subfont) == "string" and subfont ~= "" then
2268                    specification.askedname = subfont
2269                    for i=1,nofsubfonts do
2270                        fontdata = readdata(f,offsets[i],specification)
2271                        if fontdata then
2272                            fontdata.subfontindex = i
2273                            report("subfont named %a has index %a",subfont,i)
2274                            break
2275                        end
2276                    end
2277                    if not fontdata then
2278                        report("no subfont named %a",subfont)
2279                    end
2280                else
2281                    local subfonts = { }
2282                    fontdata = {
2283                        filename    = filename,
2284                        filesize    = filesize,
2285                        filetime    = filetime,
2286                        version     = version,
2287                        subfonts    = subfonts,
2288                        ttcversion  = ttcversion,
2289                        nofsubfonts = nofsubfonts,
2290                    }
2291                    for i=1,nofsubfonts do
2292                        subfonts[i] = readdata(f,offsets[i],specification)
2293                    end
2294                end
2295            end
2296        else
2297            report("unknown version %a in file %a",version,filename)
2298        end
2299        closefile(f)
2300        return fontdata or { }
2301    end
2302end
2303
2304local function loadfont(specification,n,instance)
2305    if type(specification) == "string" then
2306        specification = {
2307            filename    = specification,
2308            info        = true, -- always true (for now)
2309            details     = true,
2310            glyphs      = true,
2311            shapes      = true,
2312            kerns       = true,
2313            variable    = true,
2314            globalkerns = true,
2315            lookups     = true,
2316            -- true or number:
2317            subfont     = n or true,
2318            tounicode   = false,
2319            instance    = instance
2320        }
2321    end
2322    -- if shapes only then
2323    if specification.shapes or specification.lookups or specification.kerns then
2324        specification.glyphs = true
2325    end
2326    if specification.glyphs then
2327        specification.details = true
2328    end
2329    if specification.details then
2330        specification.info = true -- not really used any more
2331    end
2332    if specification.platformnames then
2333        specification.platformnames = true -- not really used any more
2334    end
2335    if specification.instance or instance then
2336        specification.variable = true
2337        specification.instance = specification.instance or instance
2338    end
2339    local function message(str)
2340        report("fatal error in file %a: %s\n%s",specification.filename,str,debug and debug.traceback())
2341    end
2342    local ok, result = xpcall(loadfontdata,message,specification)
2343    if ok then
2344        return result
2345    end
2346--     return loadfontdata(specification)
2347end
2348
2349-- we need even less, but we can have a 'detail' variant
2350
2351function readers.loadshapes(filename,n,instance,streams)
2352    local fontdata = loadfont {
2353        filename = filename,
2354        shapes   = true,
2355        streams  = streams,
2356        variable = true,
2357        subfont  = n,
2358        instance = instance,
2359    }
2360    if fontdata then
2361        -- easier on luajit but still we can hit the 64 K stack constants issue
2362        for k, v in next, fontdata.glyphs do
2363            v.class = nil
2364            v.index = nil
2365            v.math  = nil
2366         -- v.name  = nil
2367        end
2368        local names = fontdata.names
2369        if names then
2370            for k, v in next, names do
2371                names[k] = fullstrip(v.content)
2372            end
2373        end
2374    end
2375    return fontdata and {
2376     -- version          = 0.123 -- todo
2377        filename         = filename,
2378        format           = fontdata.format,
2379        glyphs           = fontdata.glyphs,
2380        units            = fontdata.fontheader.units,
2381        cffinfo          = fontdata.cffinfo,
2382        fontheader       = fontdata.fontheader,
2383        horizontalheader = fontdata.horizontalheader,
2384        verticalheader   = fontdata.verticalheader,
2385        maximumprofile   = fontdata.maximumprofile,
2386        names            = fontdata.names,
2387        postscript       = fontdata.postscript,
2388    } or {
2389        filename = filename,
2390        format   = "unknown",
2391        glyphs   = { },
2392        units    = 0,
2393    }
2394end
2395
2396function readers.loadfont(filename,n,instance)
2397    local fontdata = loadfont {
2398        filename    = filename,
2399        glyphs      = true,
2400        shapes      = false,
2401        lookups     = true,
2402        variable    = true,
2403     -- kerns       = true,
2404     -- globalkerns = true, -- only for testing, e.g. cambria has different gpos and kern
2405        subfont     = n,
2406        instance    = instance,
2407    }
2408    if fontdata then
2409        return {
2410            tableversion  = tableversion,
2411            creator       = "context mkiv",
2412            size          = fontdata.filesize,
2413            time          = fontdata.filetime,
2414            glyphs        = fontdata.glyphs,
2415            descriptions  = fontdata.descriptions,
2416            format        = fontdata.format,
2417            goodies       = { },
2418            metadata      = getinfo(fontdata,n,false,false,true,true), -- no platformnames here !
2419            properties    = {
2420                hasitalics    = fontdata.hasitalics or false,
2421                maxcolorclass = fontdata.maxcolorclass,
2422                hascolor      = fontdata.hascolor or false,
2423                instance      = fontdata.instance,
2424                factors       = fontdata.factors,
2425                nofsubfonts   = fontdata.subfonts and #fontdata.subfonts or nil,
2426            },
2427            resources     = {
2428             -- filename      = fontdata.filename,
2429                filename      = filename,
2430                private       = privateoffset,
2431                duplicates    = fontdata.duplicates  or { },
2432                features      = fontdata.features    or { }, -- we need to add these in the loader
2433                sublookups    = fontdata.sublookups  or { }, -- we need to add these in the loader
2434                marks         = fontdata.marks       or { }, -- we need to add these in the loader
2435                markclasses   = fontdata.markclasses or { }, -- we need to add these in the loader
2436                marksets      = fontdata.marksets    or { }, -- we need to add these in the loader
2437                sequences     = fontdata.sequences   or { }, -- we need to add these in the loader
2438                variants      = fontdata.variants, -- variant -> unicode -> glyph
2439                version       = getname(fontdata,"version"),
2440                cidinfo       = fontdata.cidinfo,
2441                mathconstants = fontdata.mathconstants,
2442                colorpalettes = fontdata.colorpalettes,
2443                svgshapes     = fontdata.svgshapes,
2444                pngshapes     = fontdata.pngshapes,
2445                variabledata  = fontdata.variabledata,
2446                foundtables   = fontdata.foundtables,
2447            },
2448        }
2449    end
2450end
2451
2452function readers.getinfo(filename,specification) -- string, nil|number|table
2453    -- platformnames is optional and not used by context (a too unpredictable mess
2454    -- that only add to the confusion) .. so it's only for checking things
2455    local subfont        = nil
2456    local platformnames  = false
2457    local rawfamilynames = false
2458    local instancenames  = true
2459    local tableoffsets   = false
2460    if type(specification) == "table" then
2461        subfont        = tonumber(specification.subfont)
2462        platformnames  = specification.platformnames
2463        rawfamilynames = specification.rawfamilynames
2464        tableoffsets   = specification.tableoffsets
2465    else
2466        subfont       = tonumber(specification)
2467    end
2468    local fontdata = loadfont {
2469        filename       = filename,
2470        details        = true,
2471        platformnames  = platformnames,
2472        instancenames  = true,
2473        tableoffsets   = tableoffsets,
2474     -- rawfamilynames = rawfamilynames,
2475    }
2476    if fontdata then
2477        local subfonts = fontdata.subfonts
2478        if not subfonts then
2479            return getinfo(fontdata,nil,platformnames,rawfamilynames,false,instancenames)
2480        elseif not subfont then
2481            local info = { }
2482            for i=1,#subfonts do
2483                info[i] = getinfo(fontdata,i,platformnames,rawfamilynames,false,instancenames)
2484            end
2485            return info
2486        elseif subfont >= 1 and subfont <= #subfonts then
2487            return getinfo(fontdata,subfont,platformnames,rawfamilynames,false,instancenames)
2488        else
2489            return {
2490                filename = filename,
2491                comment  = "there is no subfont " .. subfont .. " in this file"
2492            }
2493        end
2494    else
2495        return {
2496            filename = filename,
2497            comment  = "the file cannot be opened for reading",
2498        }
2499    end
2500end
2501
2502function readers.rehash(fontdata,hashmethod)
2503    report("the %a helper is not yet implemented","rehash")
2504end
2505
2506function readers.checkhash(fontdata)
2507    report("the %a helper is not yet implemented","checkhash")
2508end
2509
2510function readers.pack(fontdata,hashmethod)
2511    report("the %a helper is not yet implemented","pack")
2512end
2513
2514function readers.unpack(fontdata)
2515    report("the %a helper is not yet implemented","unpack")
2516end
2517
2518function readers.expand(fontdata)
2519    report("the %a helper is not yet implemented","unpack")
2520end
2521
2522function readers.compact(fontdata)
2523    report("the %a helper is not yet implemented","compact")
2524end
2525
2526function readers.condense(fontdata)
2527    report("the %a helper is not yet implemented","condense")
2528end
2529
2530-- plug in
2531
2532local extenders = { }
2533
2534function readers.registerextender(extender)
2535    extenders[#extenders+1] = extender
2536end
2537
2538function readers.extend(fontdata)
2539    for i=1,#extenders do
2540        local extender = extenders[i]
2541        local name     = extender.name or "unknown"
2542        local action   = extender.action
2543        if action then
2544            action(fontdata)
2545        end
2546    end
2547end
2548