font-phb-imp-library.lua /size: 15 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['font-phb-imp-library'] = {
2    version   = 1.000, -- 2020.01.08,
3    comment   = "companion to font-txt.mkiv",
4    original  = "derived from a prototype by Kai Eigner",
5    author    = "Hans Hagen", -- so don't blame KE
6    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files",
8}
9
10-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape
11-- suite. In principle one can have incompatibilities due to updates but that is the nature
12-- of a library. When a library ie expected one has better use the system version, if only
13-- to make sure that different programs behave the same.
14--
15-- The main reason for testing this approach was that when Idris was working on his fonts,
16-- we wanted to know how different shapers deal with it and the hb command line program
17-- could provide uniscribe output. For the context shaper uniscribe is the reference, also
18-- because Idris started out with Volt a decade ago.
19--
20-- We treat the lib as a black box as it should be. At some point Kai Eigner made an ffi
21-- binding and that one was adapted to the plugin approach of context. It saved me the
22-- trouble of looking at source files to figure it all out. Below is the adapted code.
23--
24-- Keep in mind that this file is for mkiv only. It won't work in lmtx where instead of
25-- ffi we use simple optional libraries with delayed bindings. In principle this mechanism
26-- is generic but because other macropackages follow another route we don't spend time
27-- on that code path here.
28
29local next, tonumber, pcall = next, tonumber, pcall
30local reverse = table.reverse
31local loaddata = io.loaddata
32
33local report      = utilities.hb.report or print
34local packtoutf32 = utilities.hb.helpers.packtoutf32
35
36if not FFISUPPORTED or not ffi then
37    report("no ffi support")
38    return
39elseif CONTEXTLMTXMODE and CONTEXTLMTXMODE > 0 then
40    report("no ffi support")
41    return
42elseif not context then
43    return
44end
45
46local harfbuzz = ffilib(os.name == "windows" and "libharfbuzz-0" or "libharfbuzz")
47
48if not harfbuzz then
49    report("no hb library found")
50    return
51end
52
53-- jit.on() : on very long (hundreds of pages) it looks faster but
54-- the normal font processor slows down ... this is consistent with
55-- earlier observations that turning it on is often slower on these
56-- one-shot tex runs (also because we don't use many math and/or
57-- string helpers and therefore the faster vm of luajit gives most
58-- benefits (given the patched hasher)
59
60-- Here is Kai's ffi mapping, a bit reorganized. We only define what we
61-- need. I'm happy that Kai did the deciphering of the api that I could
62-- then build upon.
63
64ffi.cdef [[
65
66typedef struct hb_blob_t hb_blob_t ;
67
68typedef enum {
69    HB_MEMORY_MODE_DUPLICATE,
70    HB_MEMORY_MODE_READONLY,
71    HB_MEMORY_MODE_WRITABLE,
72    HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE
73} hb_memory_mode_t ;
74
75typedef void (*hb_destroy_func_t) (
76    void *user_data
77) ;
78
79typedef struct hb_face_t hb_face_t ;
80
81typedef const struct hb_language_impl_t *hb_language_t ;
82
83typedef struct hb_buffer_t hb_buffer_t ;
84
85typedef enum {
86    HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED, HB_SCRIPT_UNKNOWN,
87
88    HB_SCRIPT_ARABIC, HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI, HB_SCRIPT_CYRILLIC,
89    HB_SCRIPT_DEVANAGARI, HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK,
90    HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI, HB_SCRIPT_HANGUL, HB_SCRIPT_HAN,
91    HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA, HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA,
92    HB_SCRIPT_LAO, HB_SCRIPT_LATIN, HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA,
93    HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU, HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN,
94    HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE, HB_SCRIPT_CANADIAN_SYLLABICS,
95    HB_SCRIPT_CHEROKEE, HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER, HB_SCRIPT_MONGOLIAN,
96    HB_SCRIPT_MYANMAR, HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC, HB_SCRIPT_SINHALA,
97    HB_SCRIPT_SYRIAC, HB_SCRIPT_THAANA, HB_SCRIPT_YI, HB_SCRIPT_DESERET,
98    HB_SCRIPT_GOTHIC, HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID, HB_SCRIPT_HANUNOO,
99    HB_SCRIPT_TAGALOG, HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT, HB_SCRIPT_LIMBU,
100    HB_SCRIPT_LINEAR_B, HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN, HB_SCRIPT_TAI_LE,
101    HB_SCRIPT_UGARITIC, HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC,
102    HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI, HB_SCRIPT_NEW_TAI_LUE,
103    HB_SCRIPT_OLD_PERSIAN, HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH,
104    HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM, HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA,
105    HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN, HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI,
106    HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN, HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI,
107    HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA, HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI,
108    HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS,
109    HB_SCRIPT_IMPERIAL_ARAMAIC, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI,
110    HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI,
111    HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK, HB_SCRIPT_OLD_SOUTH_ARABIAN,
112    HB_SCRIPT_OLD_TURKIC, HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM,
113    HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK, HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC,
114    HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE, HB_SCRIPT_MEROITIC_HIEROGLYPHS,
115    HB_SCRIPT_MIAO, HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG, HB_SCRIPT_TAKRI,
116    HB_SCRIPT_BASSA_VAH, HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN,
117    HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA, HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI,
118    HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI, HB_SCRIPT_MANICHAEAN,
119    HB_SCRIPT_MENDE_KIKAKUI, HB_SCRIPT_MODI, HB_SCRIPT_MRO, HB_SCRIPT_NABATAEAN,
120    HB_SCRIPT_OLD_NORTH_ARABIAN, HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG,
121    HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU, HB_SCRIPT_PSALTER_PAHLAVI,
122    HB_SCRIPT_SIDDHAM, HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI, HB_SCRIPT_AHOM,
123    HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI,
124    HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING, HB_SCRIPT_ADLAM,
125    HB_SCRIPT_BHAIKSUKI, HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE, HB_SCRIPT_TANGUT,
126    HB_SCRIPT_NEWA, HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU, HB_SCRIPT_SOYOMBO,
127    HB_SCRIPT_ZANABAZAR_SQUARE, HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI,
128    HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR, HB_SCRIPT_MEDEFAIDRIN,
129    HB_SCRIPT_OLD_SOGDIAN, HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC,
130    HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG, HB_SCRIPT_WANCHO,
131
132    HB_SCRIPT_INVALID, _HB_SCRIPT_MAX_VALUE, _HB_SCRIPT_MAX_VALUE_SIGNED,
133} hb_script_t ;
134
135typedef enum {
136    HB_DIRECTION_INVALID,
137    HB_DIRECTION_LTR,
138    HB_DIRECTION_RTL,
139    HB_DIRECTION_TTB,
140    HB_DIRECTION_BTT
141} hb_direction_t ;
142
143typedef int hb_bool_t ;
144
145typedef uint32_t hb_tag_t ;
146
147typedef struct hb_feature_t {
148    hb_tag_t      tag;
149    uint32_t      value;
150    unsigned int  start;
151    unsigned int  end;
152} hb_feature_t ;
153
154typedef struct hb_font_t hb_font_t ;
155
156typedef uint32_t hb_codepoint_t ;
157typedef int32_t  hb_position_t ;
158typedef uint32_t hb_mask_t ;
159
160typedef union _hb_var_int_t {
161    uint32_t u32;
162    int32_t  i32;
163    uint16_t u16[2];
164    int16_t  i16[2];
165    uint8_t  u8[4];
166    int8_t   i8[4];
167} hb_var_int_t ;
168
169typedef struct hb_glyph_info_t {
170    hb_codepoint_t codepoint ;
171    hb_mask_t      mask ;
172    uint32_t       cluster ;
173    /*< private >*/
174    hb_var_int_t   var1 ;
175    hb_var_int_t   var2 ;
176} hb_glyph_info_t ;
177
178typedef struct hb_glyph_position_t {
179    hb_position_t  x_advance ;
180    hb_position_t  y_advance ;
181    hb_position_t  x_offset ;
182    hb_position_t  y_offset ;
183    /*< private >*/
184    hb_var_int_t   var ;
185} hb_glyph_position_t ;
186
187const char * hb_version_string (
188    void
189) ;
190
191hb_blob_t * hb_blob_create (
192    const char        *data,
193    unsigned int       length,
194    hb_memory_mode_t   mode,
195    void              *user_data,
196    hb_destroy_func_t  destroy
197) ;
198
199void hb_blob_destroy (
200    hb_blob_t *blob
201) ;
202
203hb_face_t * hb_face_create (
204    hb_blob_t    *blob,
205    unsigned int  index
206) ;
207
208void hb_face_destroy (
209    hb_face_t *face
210) ;
211
212hb_language_t hb_language_from_string (
213    const char *str,
214    int        len
215) ;
216
217void hb_buffer_set_language (
218    hb_buffer_t   *buffer,
219    hb_language_t  language
220) ;
221
222hb_script_t hb_script_from_string (
223    const char *s,
224    int         len
225) ;
226
227void hb_buffer_set_script (
228    hb_buffer_t *buffer,
229    hb_script_t  script
230) ;
231
232hb_direction_t hb_direction_from_string (
233    const char *str,
234    int         len
235) ;
236
237void hb_buffer_set_direction (
238    hb_buffer_t     *buffer,
239    hb_direction_t   direction
240) ;
241
242hb_bool_t hb_feature_from_string (
243    const char   *str,
244    int           len,
245    hb_feature_t *feature
246) ;
247
248hb_bool_t hb_shape_full (
249    hb_font_t          *font,
250    hb_buffer_t        *buffer,
251    const hb_feature_t *features,
252    unsigned int        num_features,
253    const char * const *shaper_list
254) ;
255
256
257hb_buffer_t * hb_buffer_create (
258    void
259) ;
260
261void hb_buffer_destroy (
262    hb_buffer_t *buffer
263) ;
264
265void hb_buffer_add_utf8 (
266    hb_buffer_t  *buffer,
267    const char   *text,
268    int           text_length,
269    unsigned int  item_offset,
270    int           item_length
271) ;
272
273void hb_buffer_add_utf32 (
274    hb_buffer_t  *buffer,
275    const char   *text,
276    int           text_length,
277    unsigned int  item_offset,
278    int           item_length
279) ;
280
281void hb_buffer_add (
282    hb_buffer_t    *buffer,
283    hb_codepoint_t  codepoint,
284    unsigned int    cluster
285) ;
286
287unsigned int hb_buffer_get_length (
288    hb_buffer_t *buffer
289) ;
290
291hb_glyph_info_t * hb_buffer_get_glyph_infos (
292    hb_buffer_t  *buffer,
293    unsigned int *length
294) ;
295
296hb_glyph_position_t *hb_buffer_get_glyph_positions (
297    hb_buffer_t  *buffer,
298    unsigned int *length
299) ;
300
301void hb_buffer_reverse (
302    hb_buffer_t *buffer
303) ;
304
305void hb_buffer_reset (
306    hb_buffer_t *buffer
307) ;
308
309void hb_buffer_guess_segment_properties (
310    hb_buffer_t *buffer
311) ;
312
313hb_font_t * hb_font_create (
314    hb_face_t *face
315) ;
316
317void hb_font_destroy (
318    hb_font_t *font
319) ;
320
321void hb_font_set_scale (
322    hb_font_t *font,
323    int        x_scale,
324    int        y_scale
325) ;
326
327void hb_ot_font_set_funcs (
328    hb_font_t *font
329) ;
330
331unsigned int hb_face_get_upem (
332    hb_face_t *face
333) ;
334
335const char ** hb_shape_list_shapers (
336    void
337);
338]]
339
340-- The library must be somewhere accessible. The calls to the library are similar to
341-- the ones in the prototype but we organize things a bit differently. I tried to alias
342-- the functions in the harfbuzz namespace (luajittex will optimize this anyway but
343-- normal luatex not) but it crashes luajittex so I revered that.
344
345do
346
347    local l = harfbuzz.hb_shape_list_shapers()
348    local s = { }
349
350    for i=0,9 do
351        local str = l[i]
352        if str == ffi.NULL then
353            break
354        else
355            s[#s+1] = ffi.string(str)
356        end
357    end
358
359    report("using hb library version %a, supported shapers: %,t",ffi.string(harfbuzz.hb_version_string()),s)
360
361end
362
363-- we don't want to store userdata in the public data blob
364
365local fontdata = fonts.hashes.identifiers
366
367local loaded   = { }
368local shared   = { }
369local featured = { }
370
371local function loadfont(font)
372    local tfmdata   = fontdata[font]
373    local resources = tfmdata.resources
374    local filename  = resources.filename
375    local instance  = shared[filename]
376    if not instance then
377        local wholefont = io.loaddata(filename)
378        local wholeblob = ffi.gc(harfbuzz.hb_blob_create(wholefont,#wholefont,0,nil,nil),harfbuzz.hb_blob_destroy)
379        local wholeface = ffi.gc(harfbuzz.hb_face_create(wholeblob,font),harfbuzz.hb_face_destroy)
380        local scale     = harfbuzz.hb_face_get_upem(wholeface)
381              instance  = ffi.gc(harfbuzz.hb_font_create(wholeface),harfbuzz.hb_font_destroy)
382        harfbuzz.hb_font_set_scale(instance,scale,scale)
383        harfbuzz.hb_ot_font_set_funcs(instance)
384        shared[filename] = instance
385    end
386    return instance
387end
388
389local function loadfeatures(data)
390    local featureset  = data.featureset or { }
391    local feature     = ffi.new("hb_feature_t[?]",#featureset)
392    local featurespec = feature[0]
393    local noffeatures = 0
394    for i=1,#featureset do
395        local f = featureset[i]
396        harfbuzz.hb_feature_from_string(f,#f,feature[noffeatures])
397        noffeatures = noffeatures + 1
398    end
399    return {
400        noffeatures = #featureset,
401        featureblob = feature,
402        featurespec = featurespec,
403    }
404end
405
406local function crap(t)
407    return ffi.new("const char *[?]", #t, t)
408end
409
410local shapers = {
411    native    = crap { "ot", "uniscribe", "fallback" },
412    uniscribe = crap { "uniscribe", "ot", "fallback" },
413 -- uniscribe = crap { "uniscribe", "fallback" }, -- stalls without fallback when no uniscribe present
414    fallback  = crap { "fallback" },
415}
416
417-- Reusing a buffer doesn't make a difference in performance so we forget
418-- about it and keep things simple. Todo: check if using locals makes sense.
419
420function utilities.hb.methods.library(font,data,rlmode,text,leading,trailing)
421    local instance = loaded[font]
422    if not instance then
423        instance     = loadfont(font)
424        loaded[font] = instance
425    end
426    -- todo: dflt -> DFLT ?
427    -- todo: whatever -> Whatever ?
428    local language  = data.language or "dflt"
429    local script    = data.script or "dflt"
430    local direction = rlmode < 0 and "rtl" or "ltr"
431    local shaper    = shapers[data.shaper]
432    local featurehash = data.features
433    local featuredata = featured[featurehash]
434    if not featuredata then
435        featuredata           = loadfeatures(data)
436        featured[featurehash] = featuredata
437    end
438
439    local buffer = ffi.gc(harfbuzz.hb_buffer_create(),harfbuzz.hb_buffer_destroy)
440
441 -- if false then
442 --     -- i have no time to look into this now but something like this should
443 --     -- be possible .. it probably doesn't make a difference in performance
444 --     local n = 0 -- here we also start at 0
445 --     if leading then
446 --         harfbuzz.hb_buffer_add(buffer,[todo: 0x20],n)
447 --     end
448 --     for i=1,#text do
449 --         n = n + 1
450 --         harfbuzz.hb_buffer_add(buffer,[todo: text[i] ],n)
451 --     end
452 --     if trailing then
453 --         n = n + 1
454 --         harfbuzz.hb_buffer_add(buffer,[todo: 0x20 ],n)
455 --     end
456 -- else
457        -- maybe also utf 8 clusters here like on the command line but i have no time
458        -- to figure that out
459        text = packtoutf32(text,leading,trailing)
460        local size = #text/4
461        text = text .. "\000\000\000\000\000\000\000\000" -- trial and error: avoid crash
462        harfbuzz.hb_buffer_add_utf32(buffer,text,#text,0,size)
463 -- end
464
465    -- maybe: hb_buffer_set_segment_properties(buffer,...)
466
467    harfbuzz.hb_buffer_set_language(buffer,harfbuzz.hb_language_from_string(language,#language))
468    harfbuzz.hb_buffer_set_script(buffer,harfbuzz.hb_script_from_string(script,#script))
469    harfbuzz.hb_buffer_set_direction(buffer,harfbuzz.hb_direction_from_string(direction,#direction))
470
471    harfbuzz.hb_buffer_guess_segment_properties(buffer) -- why is this needed (we already set them)
472    harfbuzz.hb_shape_full(instance,buffer,featuredata.featurespec,featuredata.noffeatures,shaper)
473
474    if rlmode < 0 then
475        harfbuzz.hb_buffer_reverse(buffer)
476    end
477
478    local size      = harfbuzz.hb_buffer_get_length(buffer)
479    local infos     = harfbuzz.hb_buffer_get_glyph_infos(buffer, nil)
480    local positions = harfbuzz.hb_buffer_get_glyph_positions(buffer, nil)
481
482    local result = { }
483    for i=1,size do
484        local info     = infos[i-1]
485        local position = positions[i-1]
486        result[i] = {
487               info.codepoint,
488               info.cluster,
489               position.x_offset,
490               position.y_offset,
491               position.x_advance,
492               position.y_advance,
493           }
494    end
495 -- inspect(result)
496    return result
497
498end
499