font-phb-imp-library.lua /size: 15 Kb    last modification: 2021-10-28 13:50
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-phb-imp-library
'
]
=
{
2
version
=
1
.
000
,
-- 2020.01.08,
3
comment
=
"
companion to font-txt.mkiv
"
,
4
original
=
"
derived from a prototype by Kai Eigner
"
,
5
author
=
"
Hans Hagen
"
,
-- so don't blame KE
6
copyright
=
"
TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team
"
,
7
license
=
"
see context related readme files
"
,
8
}
9 10
-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape
11
-- suite. In principle one can have incompatibilities due to updates but that is the nature
12
-- of a library. When a library ie expected one has better use the system version, if only
13
-- to make sure that different programs behave the same.
14
--
15
-- The main reason for testing this approach was that when Idris was working on his fonts,
16
-- we wanted to know how different shapers deal with it and the hb command line program
17
-- could provide uniscribe output. For the context shaper uniscribe is the reference, also
18
-- because Idris started out with Volt a decade ago.
19
--
20
-- We treat the lib as a black box as it should be. At some point Kai Eigner made an ffi
21
-- binding and that one was adapted to the plugin approach of context. It saved me the
22
-- trouble of looking at source files to figure it all out. Below is the adapted code.
23
--
24
-- Keep in mind that this file is for mkiv only. It won't work in lmtx where instead of
25
-- ffi we use simple optional libraries with delayed bindings. In principle this mechanism
26
-- is generic but because other macropackages follow another route we don't spend time
27
-- on that code path here.
28 29
local
next
,
tonumber
,
pcall
=
next
,
tonumber
,
pcall
30
local
reverse
=
table
.
reverse
31
local
loaddata
=
io
.
loaddata
32 33
local
report
=
utilities
.
hb
.
report
or
print
34
local
packtoutf32
=
utilities
.
hb
.
helpers
.
packtoutf32
35 36
if
not
FFISUPPORTED
or
not
ffi
then
37
report
(
"
no ffi support
"
)
38
return
39
elseif
CONTEXTLMTXMODE
and
CONTEXTLMTXMODE
>
0
then
40
report
(
"
no ffi support
"
)
41
return
42
elseif
not
context
then
43
return
44
end
45 46
local
harfbuzz
=
ffilib
(
os
.
name
=
=
"
windows
"
and
"
libharfbuzz-0
"
or
"
libharfbuzz
"
)
47 48
if
not
harfbuzz
then
49
report
(
"
no hb library found
"
)
50
return
51
end
52 53
-- jit.on() : on very long (hundreds of pages) it looks faster but
54
-- the normal font processor slows down ... this is consistent with
55
-- earlier observations that turning it on is often slower on these
56
-- one-shot tex runs (also because we don't use many math and/or
57
-- string helpers and therefore the faster vm of luajit gives most
58
-- benefits (given the patched hasher)
59 60
-- Here is Kai's ffi mapping, a bit reorganized. We only define what we
61
-- need. I'm happy that Kai did the deciphering of the api that I could
62
-- then build upon.
63 64
ffi
.
cdef
[[
65 66typedef struct hb_blob_t hb_blob_t ; 67 68typedef enum { 69 HB_MEMORY_MODE_DUPLICATE, 70 HB_MEMORY_MODE_READONLY, 71 HB_MEMORY_MODE_WRITABLE, 72 HB_MEMORY_MODE_READONLY_MAY_MAKE_WRITABLE 73} hb_memory_mode_t ; 74 75typedef void (*hb_destroy_func_t) ( 76 void *user_data 77) ; 78 79typedef struct hb_face_t hb_face_t ; 80 81typedef const struct hb_language_impl_t *hb_language_t ; 82 83typedef struct hb_buffer_t hb_buffer_t ; 84 85typedef enum { 86 HB_SCRIPT_COMMON, HB_SCRIPT_INHERITED, HB_SCRIPT_UNKNOWN, 87 88 HB_SCRIPT_ARABIC, HB_SCRIPT_ARMENIAN, HB_SCRIPT_BENGALI, HB_SCRIPT_CYRILLIC, 89 HB_SCRIPT_DEVANAGARI, HB_SCRIPT_GEORGIAN, HB_SCRIPT_GREEK, 90 HB_SCRIPT_GUJARATI, HB_SCRIPT_GURMUKHI, HB_SCRIPT_HANGUL, HB_SCRIPT_HAN, 91 HB_SCRIPT_HEBREW, HB_SCRIPT_HIRAGANA, HB_SCRIPT_KANNADA, HB_SCRIPT_KATAKANA, 92 HB_SCRIPT_LAO, HB_SCRIPT_LATIN, HB_SCRIPT_MALAYALAM, HB_SCRIPT_ORIYA, 93 HB_SCRIPT_TAMIL, HB_SCRIPT_TELUGU, HB_SCRIPT_THAI, HB_SCRIPT_TIBETAN, 94 HB_SCRIPT_BOPOMOFO, HB_SCRIPT_BRAILLE, HB_SCRIPT_CANADIAN_SYLLABICS, 95 HB_SCRIPT_CHEROKEE, HB_SCRIPT_ETHIOPIC, HB_SCRIPT_KHMER, HB_SCRIPT_MONGOLIAN, 96 HB_SCRIPT_MYANMAR, HB_SCRIPT_OGHAM, HB_SCRIPT_RUNIC, HB_SCRIPT_SINHALA, 97 HB_SCRIPT_SYRIAC, HB_SCRIPT_THAANA, HB_SCRIPT_YI, HB_SCRIPT_DESERET, 98 HB_SCRIPT_GOTHIC, HB_SCRIPT_OLD_ITALIC, HB_SCRIPT_BUHID, HB_SCRIPT_HANUNOO, 99 HB_SCRIPT_TAGALOG, HB_SCRIPT_TAGBANWA, HB_SCRIPT_CYPRIOT, HB_SCRIPT_LIMBU, 100 HB_SCRIPT_LINEAR_B, HB_SCRIPT_OSMANYA, HB_SCRIPT_SHAVIAN, HB_SCRIPT_TAI_LE, 101 HB_SCRIPT_UGARITIC, HB_SCRIPT_BUGINESE, HB_SCRIPT_COPTIC, 102 HB_SCRIPT_GLAGOLITIC, HB_SCRIPT_KHAROSHTHI, HB_SCRIPT_NEW_TAI_LUE, 103 HB_SCRIPT_OLD_PERSIAN, HB_SCRIPT_SYLOTI_NAGRI, HB_SCRIPT_TIFINAGH, 104 HB_SCRIPT_BALINESE, HB_SCRIPT_CUNEIFORM, HB_SCRIPT_NKO, HB_SCRIPT_PHAGS_PA, 105 HB_SCRIPT_PHOENICIAN, HB_SCRIPT_CARIAN, HB_SCRIPT_CHAM, HB_SCRIPT_KAYAH_LI, 106 HB_SCRIPT_LEPCHA, HB_SCRIPT_LYCIAN, HB_SCRIPT_LYDIAN, HB_SCRIPT_OL_CHIKI, 107 HB_SCRIPT_REJANG, HB_SCRIPT_SAURASHTRA, HB_SCRIPT_SUNDANESE, HB_SCRIPT_VAI, 108 HB_SCRIPT_AVESTAN, HB_SCRIPT_BAMUM, HB_SCRIPT_EGYPTIAN_HIEROGLYPHS, 109 HB_SCRIPT_IMPERIAL_ARAMAIC, HB_SCRIPT_INSCRIPTIONAL_PAHLAVI, 110 HB_SCRIPT_INSCRIPTIONAL_PARTHIAN, HB_SCRIPT_JAVANESE, HB_SCRIPT_KAITHI, 111 HB_SCRIPT_LISU, HB_SCRIPT_MEETEI_MAYEK, HB_SCRIPT_OLD_SOUTH_ARABIAN, 112 HB_SCRIPT_OLD_TURKIC, HB_SCRIPT_SAMARITAN, HB_SCRIPT_TAI_THAM, 113 HB_SCRIPT_TAI_VIET, HB_SCRIPT_BATAK, HB_SCRIPT_BRAHMI, HB_SCRIPT_MANDAIC, 114 HB_SCRIPT_CHAKMA, HB_SCRIPT_MEROITIC_CURSIVE, HB_SCRIPT_MEROITIC_HIEROGLYPHS, 115 HB_SCRIPT_MIAO, HB_SCRIPT_SHARADA, HB_SCRIPT_SORA_SOMPENG, HB_SCRIPT_TAKRI, 116 HB_SCRIPT_BASSA_VAH, HB_SCRIPT_CAUCASIAN_ALBANIAN, HB_SCRIPT_DUPLOYAN, 117 HB_SCRIPT_ELBASAN, HB_SCRIPT_GRANTHA, HB_SCRIPT_KHOJKI, HB_SCRIPT_KHUDAWADI, 118 HB_SCRIPT_LINEAR_A, HB_SCRIPT_MAHAJANI, HB_SCRIPT_MANICHAEAN, 119 HB_SCRIPT_MENDE_KIKAKUI, HB_SCRIPT_MODI, HB_SCRIPT_MRO, HB_SCRIPT_NABATAEAN, 120 HB_SCRIPT_OLD_NORTH_ARABIAN, HB_SCRIPT_OLD_PERMIC, HB_SCRIPT_PAHAWH_HMONG, 121 HB_SCRIPT_PALMYRENE, HB_SCRIPT_PAU_CIN_HAU, HB_SCRIPT_PSALTER_PAHLAVI, 122 HB_SCRIPT_SIDDHAM, HB_SCRIPT_TIRHUTA, HB_SCRIPT_WARANG_CITI, HB_SCRIPT_AHOM, 123 HB_SCRIPT_ANATOLIAN_HIEROGLYPHS, HB_SCRIPT_HATRAN, HB_SCRIPT_MULTANI, 124 HB_SCRIPT_OLD_HUNGARIAN, HB_SCRIPT_SIGNWRITING, HB_SCRIPT_ADLAM, 125 HB_SCRIPT_BHAIKSUKI, HB_SCRIPT_MARCHEN, HB_SCRIPT_OSAGE, HB_SCRIPT_TANGUT, 126 HB_SCRIPT_NEWA, HB_SCRIPT_MASARAM_GONDI, HB_SCRIPT_NUSHU, HB_SCRIPT_SOYOMBO, 127 HB_SCRIPT_ZANABAZAR_SQUARE, HB_SCRIPT_DOGRA, HB_SCRIPT_GUNJALA_GONDI, 128 HB_SCRIPT_HANIFI_ROHINGYA, HB_SCRIPT_MAKASAR, HB_SCRIPT_MEDEFAIDRIN, 129 HB_SCRIPT_OLD_SOGDIAN, HB_SCRIPT_SOGDIAN, HB_SCRIPT_ELYMAIC, 130 HB_SCRIPT_NANDINAGARI, HB_SCRIPT_NYIAKENG_PUACHUE_HMONG, HB_SCRIPT_WANCHO, 131 132 HB_SCRIPT_INVALID, _HB_SCRIPT_MAX_VALUE, _HB_SCRIPT_MAX_VALUE_SIGNED, 133} hb_script_t ; 134 135typedef enum { 136 HB_DIRECTION_INVALID, 137 HB_DIRECTION_LTR, 138 HB_DIRECTION_RTL, 139 HB_DIRECTION_TTB, 140 HB_DIRECTION_BTT 141} hb_direction_t ; 142 143typedef int hb_bool_t ; 144 145typedef uint32_t hb_tag_t ; 146 147typedef struct hb_feature_t { 148 hb_tag_t tag; 149 uint32_t value; 150 unsigned int start; 151 unsigned int end; 152} hb_feature_t ; 153 154typedef struct hb_font_t hb_font_t ; 155 156typedef uint32_t hb_codepoint_t ; 157typedef int32_t hb_position_t ; 158typedef uint32_t hb_mask_t ; 159 160typedef union _hb_var_int_t { 161 uint32_t u32; 162 int32_t i32; 163 uint16_t u16[2]; 164 int16_t i16[2]; 165 uint8_t u8[4]; 166 int8_t i8[4]; 167} hb_var_int_t ; 168 169typedef struct hb_glyph_info_t { 170 hb_codepoint_t codepoint ; 171 hb_mask_t mask ; 172 uint32_t cluster ; 173 /*< private >*/ 174 hb_var_int_t var1 ; 175 hb_var_int_t var2 ; 176} hb_glyph_info_t ; 177 178typedef struct hb_glyph_position_t { 179 hb_position_t x_advance ; 180 hb_position_t y_advance ; 181 hb_position_t x_offset ; 182 hb_position_t y_offset ; 183 /*< private >*/ 184 hb_var_int_t var ; 185} hb_glyph_position_t ; 186 187const char * hb_version_string ( 188 void 189) ; 190 191hb_blob_t * hb_blob_create ( 192 const char *data, 193 unsigned int length, 194 hb_memory_mode_t mode, 195 void *user_data, 196 hb_destroy_func_t destroy 197) ; 198 199void hb_blob_destroy ( 200 hb_blob_t *blob 201) ; 202 203hb_face_t * hb_face_create ( 204 hb_blob_t *blob, 205 unsigned int index 206) ; 207 208void hb_face_destroy ( 209 hb_face_t *face 210) ; 211 212hb_language_t hb_language_from_string ( 213 const char *str, 214 int len 215) ; 216 217void hb_buffer_set_language ( 218 hb_buffer_t *buffer, 219 hb_language_t language 220) ; 221 222hb_script_t hb_script_from_string ( 223 const char *s, 224 int len 225) ; 226 227void hb_buffer_set_script ( 228 hb_buffer_t *buffer, 229 hb_script_t script 230) ; 231 232hb_direction_t hb_direction_from_string ( 233 const char *str, 234 int len 235) ; 236 237void hb_buffer_set_direction ( 238 hb_buffer_t *buffer, 239 hb_direction_t direction 240) ; 241 242hb_bool_t hb_feature_from_string ( 243 const char *str, 244 int len, 245 hb_feature_t *feature 246) ; 247 248hb_bool_t hb_shape_full ( 249 hb_font_t *font, 250 hb_buffer_t *buffer, 251 const hb_feature_t *features, 252 unsigned int num_features, 253 const char * const *shaper_list 254) ; 255 256 257hb_buffer_t * hb_buffer_create ( 258 void 259) ; 260 261void hb_buffer_destroy ( 262 hb_buffer_t *buffer 263) ; 264 265void hb_buffer_add_utf8 ( 266 hb_buffer_t *buffer, 267 const char *text, 268 int text_length, 269 unsigned int item_offset, 270 int item_length 271) ; 272 273void hb_buffer_add_utf32 ( 274 hb_buffer_t *buffer, 275 const char *text, 276 int text_length, 277 unsigned int item_offset, 278 int item_length 279) ; 280 281void hb_buffer_add ( 282 hb_buffer_t *buffer, 283 hb_codepoint_t codepoint, 284 unsigned int cluster 285) ; 286 287unsigned int hb_buffer_get_length ( 288 hb_buffer_t *buffer 289) ; 290 291hb_glyph_info_t * hb_buffer_get_glyph_infos ( 292 hb_buffer_t *buffer, 293 unsigned int *length 294) ; 295 296hb_glyph_position_t *hb_buffer_get_glyph_positions ( 297 hb_buffer_t *buffer, 298 unsigned int *length 299) ; 300 301void hb_buffer_reverse ( 302 hb_buffer_t *buffer 303) ; 304 305void hb_buffer_reset ( 306 hb_buffer_t *buffer 307) ; 308 309void hb_buffer_guess_segment_properties ( 310 hb_buffer_t *buffer 311) ; 312 313hb_font_t * hb_font_create ( 314 hb_face_t *face 315) ; 316 317void hb_font_destroy ( 318 hb_font_t *font 319) ; 320 321void hb_font_set_scale ( 322 hb_font_t *font, 323 int x_scale, 324 int y_scale 325) ; 326 327void hb_ot_font_set_funcs ( 328 hb_font_t *font 329) ; 330 331unsigned int hb_face_get_upem ( 332 hb_face_t *face 333) ; 334 335const char ** hb_shape_list_shapers ( 336 void 337); 338
]]
339 340
-- The library must be somewhere accessible. The calls to the library are similar to
341
-- the ones in the prototype but we organize things a bit differently. I tried to alias
342
-- the functions in the harfbuzz namespace (luajittex will optimize this anyway but
343
-- normal luatex not) but it crashes luajittex so I revered that.
344 345
do
346 347
local
l
=
harfbuzz
.
hb_shape_list_shapers
(
)
348
local
s
=
{
}
349 350
for
i
=
0
,
9
do
351
local
str
=
l
[
i
]
352
if
str
=
=
ffi
.
NULL
then
353
break
354
else
355
s
[
#
s
+
1
]
=
ffi
.
string
(
str
)
356
end
357
end
358 359
report
(
"
using hb library version %a, supported shapers: %,t
"
,
ffi
.
string
(
harfbuzz
.
hb_version_string
(
)
)
,
s
)
360 361
end
362 363
-- we don't want to store userdata in the public data blob
364 365
local
fontdata
=
fonts
.
hashes
.
identifiers
366 367
local
loaded
=
{
}
368
local
shared
=
{
}
369
local
featured
=
{
}
370 371
local
function
loadfont
(
font
)
372
local
tfmdata
=
fontdata
[
font
]
373
local
resources
=
tfmdata
.
resources
374
local
filename
=
resources
.
filename
375
local
instance
=
shared
[
filename
]
376
if
not
instance
then
377
local
wholefont
=
io
.
loaddata
(
filename
)
378
local
wholeblob
=
ffi
.
gc
(
harfbuzz
.
hb_blob_create
(
wholefont
,
#
wholefont
,
0
,
nil
,
nil
)
,
harfbuzz
.
hb_blob_destroy
)
379
local
wholeface
=
ffi
.
gc
(
harfbuzz
.
hb_face_create
(
wholeblob
,
font
)
,
harfbuzz
.
hb_face_destroy
)
380
local
scale
=
harfbuzz
.
hb_face_get_upem
(
wholeface
)
381
instance
=
ffi
.
gc
(
harfbuzz
.
hb_font_create
(
wholeface
)
,
harfbuzz
.
hb_font_destroy
)
382
harfbuzz
.
hb_font_set_scale
(
instance
,
scale
,
scale
)
383
harfbuzz
.
hb_ot_font_set_funcs
(
instance
)
384
shared
[
filename
]
=
instance
385
end
386
return
instance
387
end
388 389
local
function
loadfeatures
(
data
)
390
local
featureset
=
data
.
featureset
or
{
}
391
local
feature
=
ffi
.
new
(
"
hb_feature_t[?]
"
,
#
featureset
)
392
local
featurespec
=
feature
[
0
]
393
local
noffeatures
=
0
394
for
i
=
1
,
#
featureset
do
395
local
f
=
featureset
[
i
]
396
harfbuzz
.
hb_feature_from_string
(
f
,
#
f
,
feature
[
noffeatures
]
)
397
noffeatures
=
noffeatures
+
1
398
end
399
return
{
400
noffeatures
=
#
featureset
,
401
featureblob
=
feature
,
402
featurespec
=
featurespec
,
403
}
404
end
405 406
local
function
crap
(
t
)
407
return
ffi
.
new
(
"
const char *[?]
"
,
#
t
,
t
)
408
end
409 410
local
shapers
=
{
411
native
=
crap
{
"
ot
"
,
"
uniscribe
"
,
"
fallback
"
}
,
412
uniscribe
=
crap
{
"
uniscribe
"
,
"
ot
"
,
"
fallback
"
}
,
413
-- uniscribe = crap { "uniscribe", "fallback" }, -- stalls without fallback when no uniscribe present
414
fallback
=
crap
{
"
fallback
"
}
,
415
}
416 417
-- Reusing a buffer doesn't make a difference in performance so we forget
418
-- about it and keep things simple. Todo: check if using locals makes sense.
419 420
function
utilities
.
hb
.
methods
.
library
(
font
,
data
,
rlmode
,
text
,
leading
,
trailing
)
421
local
instance
=
loaded
[
font
]
422
if
not
instance
then
423
instance
=
loadfont
(
font
)
424
loaded
[
font
]
=
instance
425
end
426
-- todo: dflt -> DFLT ?
427
-- todo: whatever -> Whatever ?
428
local
language
=
data
.
language
or
"
dflt
"
429
local
script
=
data
.
script
or
"
dflt
"
430
local
direction
=
rlmode
<
0
and
"
rtl
"
or
"
ltr
"
431
local
shaper
=
shapers
[
data
.
shaper
]
432
local
featurehash
=
data
.
features
433
local
featuredata
=
featured
[
featurehash
]
434
if
not
featuredata
then
435
featuredata
=
loadfeatures
(
data
)
436
featured
[
featurehash
]
=
featuredata
437
end
438 439
local
buffer
=
ffi
.
gc
(
harfbuzz
.
hb_buffer_create
(
)
,
harfbuzz
.
hb_buffer_destroy
)
440 441
-- if false then
442
-- -- i have no time to look into this now but something like this should
443
-- -- be possible .. it probably doesn't make a difference in performance
444
-- local n = 0 -- here we also start at 0
445
-- if leading then
446
-- harfbuzz.hb_buffer_add(buffer,[todo: 0x20],n)
447
-- end
448
-- for i=1,#text do
449
-- n = n + 1
450
-- harfbuzz.hb_buffer_add(buffer,[todo: text[i] ],n)
451
-- end
452
-- if trailing then
453
-- n = n + 1
454
-- harfbuzz.hb_buffer_add(buffer,[todo: 0x20 ],n)
455
-- end
456
-- else
457
-- maybe also utf 8 clusters here like on the command line but i have no time
458
-- to figure that out
459
text
=
packtoutf32
(
text
,
leading
,
trailing
)
460
local
size
=
#
text
/
4
461
text
=
text
.
.
"
\000\000\000\000\000\000\000\000
"
-- trial and error: avoid crash
462
harfbuzz
.
hb_buffer_add_utf32
(
buffer
,
text
,
#
text
,
0
,
size
)
463
-- end
464 465
-- maybe: hb_buffer_set_segment_properties(buffer,...)
466 467
harfbuzz
.
hb_buffer_set_language
(
buffer
,
harfbuzz
.
hb_language_from_string
(
language
,
#
language
)
)
468
harfbuzz
.
hb_buffer_set_script
(
buffer
,
harfbuzz
.
hb_script_from_string
(
script
,
#
script
)
)
469
harfbuzz
.
hb_buffer_set_direction
(
buffer
,
harfbuzz
.
hb_direction_from_string
(
direction
,
#
direction
)
)
470 471
harfbuzz
.
hb_buffer_guess_segment_properties
(
buffer
)
-- why is this needed (we already set them)
472
harfbuzz
.
hb_shape_full
(
instance
,
buffer
,
featuredata
.
featurespec
,
featuredata
.
noffeatures
,
shaper
)
473 474
if
rlmode
<
0
then
475
harfbuzz
.
hb_buffer_reverse
(
buffer
)
476
end
477 478
local
size
=
harfbuzz
.
hb_buffer_get_length
(
buffer
)
479
local
infos
=
harfbuzz
.
hb_buffer_get_glyph_infos
(
buffer
,
nil
)
480
local
positions
=
harfbuzz
.
hb_buffer_get_glyph_positions
(
buffer
,
nil
)
481 482
local
result
=
{
}
483
for
i
=
1
,
size
do
484
local
info
=
infos
[
i
-1
]
485
local
position
=
positions
[
i
-1
]
486
result
[
i
]
=
{
487
info
.
codepoint
,
488
info
.
cluster
,
489
position
.
x_offset
,
490
position
.
y_offset
,
491
position
.
x_advance
,
492
position
.
y_advance
,
493
}
494
end
495
-- inspect(result)
496
return
result
497 498
end
499