scrp-cjk.lua /size: 37 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
scrp-cjk
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to scrp-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
-- We can speed this up by preallocating nodes and copying them but the gain is not
10
-- that large.
11
--
12
-- If needed we can speed this up (traversers and prev next and such) but cjk
13
-- documents don't have that many glyphs and certainly not much font processing so
14
-- there not much gain in it.
15
--
16
-- The input line endings: there is no way to distinguish between inline spaces and
17
-- endofline turned into spaces (would not make sense either because otherwise a
18
-- wanted space at the end of a line would have to be a hard coded ones.
19 20
local
nuts
=
nodes
.
nuts
21 22
local
insert_node_after
=
nuts
.
insert_after
23
local
insert_node_before
=
nuts
.
insert_before
24
local
copy_node
=
nuts
.
copy
25
local
remove_node
=
nuts
.
remove
26
local
nextglyph
=
nuts
.
traversers
.
glyph
27 28
local
getnext
=
nuts
.
getnext
29
local
getprev
=
nuts
.
getprev
30
local
getfont
=
nuts
.
getfont
31
local
getchar
=
nuts
.
getchar
32
local
getid
=
nuts
.
getid
33
local
getsubtype
=
nuts
.
getsubtype
34
local
getwidth
=
nuts
.
getwidth
35 36
local
setchar
=
nuts
.
setchar
37 38
local
nodepool
=
nuts
.
pool
39
local
new_glue
=
nodepool
.
glue
40
local
new_kern
=
nodepool
.
kern
41
local
new_penalty
=
nodepool
.
penalty
42 43
local
nodecodes
=
nodes
.
nodecodes
44
local
gluecodes
=
nodes
.
gluecodes
45 46
local
glyph_code
=
nodecodes
.
glyph
47
local
glue_code
=
nodecodes
.
glue
48 49
local
userskip_code
=
gluecodes
.
userskip
50
local
spaceskip_code
=
gluecodes
.
spaceskip
51
local
xspaceskip_code
=
gluecodes
.
xspaceskip
52 53
local
hash
=
scripts
.
hash
54 55
local
getscriptstatus
=
scripts
.
getstatus
56
local
getscriptdata
=
scripts
.
getdata
57
local
scriptcolors
=
scripts
.
colors
58 59
local
fonthashes
=
fonts
.
hashes
60
local
quaddata
=
fonthashes
.
quads
61
local
spacedata
=
fonthashes
.
spaces
62 63
local
decomposed
=
characters
.
hangul
.
decomposed
64 65
local
trace_details
=
false
trackers
.
register
(
"
scripts.details
"
,
function
(
v
)
trace_details
=
v
end
)
66 67
local
report_details
=
logs
.
reporter
(
"
scripts
"
,
"
detail
"
)
68 69
-- raggedleft is controlled by leftskip and we might end up with a situation where
70
-- the intercharacter spacing interferes with this; the solution is to patch the
71
-- nodelist but better is to use veryraggedleft
72 73
local
inter_char_shrink
=
0
74
local
inter_char_stretch
=
0
75
local
inter_char_half_shrink
=
0
76
local
inter_char_half_stretch
=
0
77
local
inter_char_quarter_shrink
=
0
78
local
inter_char_quarter_stretch
=
0
79 80
local
full_char_width
=
0
81
local
half_char_width
=
0
82
local
quarter_char_width
=
0
83 84
local
inter_char_hangul_penalty
=
0
85 86
local
function
set_parameters
(
font
,
data
)
87
-- beware: parameters can be nil in e.g. punk variants
88
local
quad
=
quaddata
[
font
]
89
full_char_width
=
quad
90
half_char_width
=
quad
/
2
91
quarter_char_width
=
quad
/
4
92
inter_char_shrink
=
data
.
inter_char_shrink_factor
*
quad
93
inter_char_stretch
=
data
.
inter_char_stretch_factor
*
quad
94
inter_char_half_shrink
=
data
.
inter_char_half_shrink_factor
*
quad
95
inter_char_half_stretch
=
data
.
inter_char_half_stretch_factor
*
quad
96
inter_char_quarter_shrink
=
data
.
inter_char_quarter_shrink_factor
*
quad
97
inter_char_quarter_stretch
=
data
.
inter_char_quarter_stretch_factor
*
quad
98
inter_char_hangul_penalty
=
data
.
inter_char_hangul_penalty
99
end
100 101
-- a test version did compensate for crappy halfwidth but we can best do that
102
-- at font definition time and/or just assume a correct font
103 104
local
function
trace_detail
(
current
,
what
)
105
local
prev
=
getprev
(
current
)
106
local
c_id
=
getid
(
current
)
107
local
p_id
=
prev
and
getid
(
prev
)
108
if
c_id
=
=
glyph_code
then
109
local
c_ch
=
getchar
(
current
)
110
if
p_id
=
=
glyph_code
then
111
local
p_ch
=
p_id
and
getchar
(
prev
)
112
report_details
(
"
[%C %a] [%s] [%C %a]
"
,
p_ch
,
hash
[
p_ch
]
,
what
,
c_ch
,
hash
[
c_ch
]
)
113
else
114
report_details
(
"
[%s] [%C %a]
"
,
what
,
c_ch
,
hash
[
c_ch
]
)
115
end
116
else
117
if
p_id
=
=
glyph_code
then
118
local
p_ch
=
p_id
and
getchar
(
prev
)
119
report_details
(
"
[%C %a] [%s]
"
,
p_ch
,
hash
[
p_ch
]
,
what
)
120
else
121
report_details
(
"
[%s]
"
,
what
)
122
end
123
end
124
end
125 126
local
function
trace_detail_between
(
p
,
n
,
what
)
127
local
p_ch
=
getchar
(
p
)
128
local
n_ch
=
getchar
(
n
)
129
report_details
(
"
[%C %a] [%s] [%C %a]
"
,
p_ch
,
hash
[
p_ch
]
,
what
,
n_ch
,
hash
[
n_ch
]
)
130
end
131 132
local
function
nobreak
(
head
,
current
)
133
if
trace_details
then
134
trace_detail
(
current
,
"
break
"
)
135
end
136
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
137
end
138 139
local
function
stretch_break
(
head
,
current
)
140
if
trace_details
then
141
trace_detail
(
current
,
"
stretch break
"
)
142
end
143
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
144
end
145 146
local
function
shrink_break
(
head
,
current
)
147
if
trace_details
then
148
trace_detail
(
current
,
"
shrink break
"
)
149
end
150
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
151
end
152 153
local
function
nobreak_stretch
(
head
,
current
)
154
if
trace_details
then
155
trace_detail
(
current
,
"
no break stretch
"
)
156
end
157
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
158
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
159
end
160 161
local
function
korean_break
(
head
,
current
)
162
if
trace_details
then
163
trace_detail
(
current
,
"
korean break
"
)
164
end
165
insert_node_before
(
head
,
current
,
new_penalty
(
inter_char_hangul_penalty
)
)
166
end
167 168
local
function
nobreak_shrink
(
head
,
current
)
169
if
trace_details
then
170
trace_detail
(
current
,
"
nobreak shrink
"
)
171
end
172
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
173
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
174
end
175 176
local
function
nobreak_autoshrink
(
head
,
current
)
177
if
trace_details
then
178
trace_detail
(
current
,
"
nobreak autoshrink
"
)
179
end
180
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
181
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
182
end
183 184
local
function
nobreak_stretch_nobreak_shrink
(
head
,
current
)
185
if
trace_details
then
186
trace_detail
(
current
,
"
nobreak stretch nobreak shrink
"
)
187
end
188
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
189
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
190
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
191
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
192
end
193 194
local
function
nobreak_stretch_nobreak_autoshrink
(
head
,
current
)
195
if
trace_details
then
196
trace_detail
(
current
,
"
nobreak stretch nobreak autoshrink
"
)
197
end
198
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
199
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
200
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
201
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
202
end
203 204
local
function
nobreak_shrink_nobreak_stretch
(
head
,
current
)
205
if
trace_details
then
206
trace_detail
(
current
,
"
nobreak shrink nobreak stretch
"
)
207
end
208
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
209
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
210
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
211
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
212
end
213 214
local
function
nobreak_autoshrink_nobreak_stretch
(
head
,
current
)
215
if
trace_details
then
216
trace_detail
(
current
,
"
nobreak autoshrink nobreak stretch
"
)
217
end
218
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
219
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
220
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
221
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
222
end
223 224
local
function
nobreak_shrink_break_stretch
(
head
,
current
)
225
if
trace_details
then
226
trace_detail
(
current
,
"
nobreak shrink break stretch
"
)
227
end
228
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
229
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
230
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
231
end
232 233
local
function
nobreak_autoshrink_break_stretch
(
head
,
current
)
234
if
trace_details
then
235
trace_detail
(
current
,
"
nobreak autoshrink break stretch
"
)
236
end
237
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
238
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
239
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
240
end
241 242
local
function
nobreak_shrink_break_stretch_nobreak_shrink
(
head
,
current
)
243
if
trace_details
then
244
trace_detail
(
current
,
"
nobreak shrink break stretch nobreak shrink
"
)
245
end
246
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
247
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
248
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
249
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
250
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
251
end
252 253
local
function
japanese_between_full_close_open
(
head
,
current
)
-- todo: check width
254
if
trace_details
then
255
trace_detail
(
current
,
"
japanese between full close open
"
)
256
end
257
insert_node_before
(
head
,
current
,
new_kern
(
-
half_char_width
)
)
258
insert_node_before
(
head
,
current
,
new_glue
(
half_char_width
,
0
,
inter_char_half_shrink
)
)
259
insert_node_before
(
head
,
current
,
new_kern
(
-
half_char_width
)
)
260
end
261 262
local
function
japanese_between_full_close_full_close
(
head
,
current
)
-- todo: check width
263
if
trace_details
then
264
trace_detail
(
current
,
"
japanese between full close full close
"
)
265
end
266
insert_node_before
(
head
,
current
,
new_kern
(
-
half_char_width
)
)
267
-- insert_node_before(head,current,new_glue(half_char_width,0,inter_char_half_shrink))
268
end
269 270
local
function
japanese_before_full_width_punct
(
head
,
current
)
-- todo: check width
271
if
trace_details
then
272
trace_detail
(
current
,
"
japanese before full width punct
"
)
273
end
274
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
275
insert_node_before
(
head
,
current
,
new_glue
(
quarter_char_width
,
0
,
inter_char_quarter_shrink
)
)
276
insert_node_before
(
head
,
current
,
new_kern
(
-
quarter_char_width
)
)
277
end
278 279
local
function
japanese_after_full_width_punct
(
head
,
current
)
-- todo: check width
280
if
trace_details
then
281
trace_detail
(
current
,
"
japanese after full width punct
"
)
282
end
283
insert_node_before
(
head
,
current
,
new_kern
(
-
quarter_char_width
)
)
284
insert_node_before
(
head
,
current
,
new_glue
(
quarter_char_width
,
0
,
inter_char_quarter_shrink
)
)
285
end
286 287
local
function
nobreak_autoshrink_break_stretch_nobreak_autoshrink
(
head
,
current
)
288
if
trace_details
then
289
trace_detail
(
current
,
"
nobreak autoshrink break stretch nobreak autoshrink
"
)
290
end
291
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
292
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
293
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
294
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
295
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
296
end
297 298
local
function
nobreak_autoshrink_break_stretch_nobreak_shrink
(
head
,
current
)
299
if
trace_details
then
300
trace_detail
(
current
,
"
nobreak autoshrink break stretch nobreak shrink
"
)
301
end
302
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
303
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
304
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
305
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
306
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
307
end
308 309
local
function
nobreak_shrink_break_stretch_nobreak_autoshrink
(
head
,
current
)
310
if
trace_details
then
311
trace_detail
(
current
,
"
nobreak shrink break stretch nobreak autoshrink
"
)
312
end
313
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
314
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
315
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
316
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
317
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
318
end
319 320
local
function
nobreak_stretch_break_shrink
(
head
,
current
)
321
if
trace_details
then
322
trace_detail
(
current
,
"
nobreak stretch break shrink
"
)
323
end
324
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
325
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
326
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
327
end
328 329
local
function
nobreak_stretch_break_autoshrink
(
head
,
current
)
330
if
trace_details
then
331
trace_detail
(
current
,
"
nobreak stretch break autoshrink
"
)
332
end
333
insert_node_before
(
head
,
current
,
new_penalty
(
10000
)
)
334
insert_node_before
(
head
,
current
,
new_glue
(
0
,
inter_char_stretch
,
0
)
)
335
insert_node_before
(
head
,
current
,
new_glue
(
0
,
0
,
inter_char_half_shrink
)
)
336
end
337 338
-- Korean: hangul
339 340
local
korean_0
=
{
341
}
342 343
local
korean_1
=
{
344
jamo_initial
=
korean_break
,
345
korean
=
korean_break
,
346
chinese
=
korean_break
,
347
hiragana
=
korean_break
,
348
katakana
=
korean_break
,
349
half_width_open
=
stretch_break
,
350
half_width_close
=
nobreak
,
351
full_width_open
=
stretch_break
,
352
full_width_close
=
nobreak
,
353
full_width_punct
=
nobreak
,
354
-- hyphen = nil,
355
non_starter
=
korean_break
,
356
other
=
korean_break
,
357
}
358 359
local
korean_2
=
{
360
jamo_initial
=
stretch_break
,
361
korean
=
stretch_break
,
362
chinese
=
stretch_break
,
363
hiragana
=
stretch_break
,
364
katakana
=
stretch_break
,
365
half_width_open
=
stretch_break
,
366
half_width_close
=
nobreak
,
367
full_width_open
=
stretch_break
,
368
full_width_close
=
nobreak
,
369
full_width_punct
=
nobreak
,
370
-- hyphen = nil,
371
non_starter
=
stretch_break
,
372
other
=
stretch_break
,
373
}
374 375
local
korean_3
=
{
376
jamo_initial
=
stretch_break
,
377
korean
=
stretch_break
,
378
chinese
=
stretch_break
,
379
hiragana
=
stretch_break
,
380
katakana
=
stretch_break
,
381
half_width_open
=
stretch_break
,
382
half_width_close
=
nobreak
,
383
full_width_open
=
stretch_break
,
384
full_width_close
=
nobreak
,
385
full_width_punct
=
nobreak
,
386
-- hyphen = nil,
387
non_starter
=
nobreak
,
388
other
=
nobreak
,
389
}
390 391
local
korean_4
=
{
392
jamo_initial
=
nobreak
,
393
korean
=
nobreak
,
394
chinese
=
nobreak
,
395
hiragana
=
nobreak
,
396
katakana
=
nobreak
,
397
half_width_open
=
nobreak
,
398
half_width_close
=
nobreak
,
399
full_width_open
=
nobreak
,
400
full_width_close
=
nobreak
,
401
full_width_punct
=
nobreak
,
402
hyphen
=
nobreak
,
403
non_starter
=
nobreak
,
404
other
=
nobreak
,
405
}
406 407
local
korean_5
=
{
408
jamo_initial
=
stretch_break
,
409
korean
=
stretch_break
,
410
chinese
=
stretch_break
,
411
hiragana
=
stretch_break
,
412
katakana
=
stretch_break
,
413
half_width_open
=
stretch_break
,
414
half_width_close
=
nobreak_stretch
,
415
full_width_open
=
stretch_break
,
416
full_width_close
=
nobreak_stretch
,
417
full_width_punct
=
nobreak_stretch
,
418
hyphen
=
nobreak_stretch
,
419
non_starter
=
nobreak_stretch
,
420
other
=
stretch_break
,
421
}
422 423
local
injectors
=
{
-- [previous] [current]
424
jamo_final
=
korean_1
,
425
korean
=
korean_1
,
426
chinese
=
korean_1
,
427
hiragana
=
korean_1
,
428
katakana
=
korean_1
,
429
hyphen
=
korean_2
,
430
start
=
korean_0
,
431
other
=
korean_2
,
432
non_starter
=
korean_3
,
433
full_width_open
=
korean_4
,
434
half_width_open
=
korean_4
,
435
full_width_close
=
korean_5
,
436
full_width_punct
=
korean_5
,
437
half_width_close
=
korean_5
,
438
}
439 440
scriptcolors
.
korean
=
"
trace:0
"
441
scriptcolors
.
chinese
=
"
trace:0
"
442
scriptcolors
.
katakana
=
"
trace:0
"
443
scriptcolors
.
hiragana
=
"
trace:0
"
444
scriptcolors
.
full_width_open
=
"
trace:1
"
445
scriptcolors
.
full_width_close
=
"
trace:2
"
446
scriptcolors
.
half_width_open
=
"
trace:3
"
447
scriptcolors
.
half_width_close
=
"
trace:4
"
448
scriptcolors
.
full_width_punct
=
"
trace:5
"
449
------------.hyphen = "trace:5"
450
scriptcolors
.
non_starter
=
"
trace:6
"
451
scriptcolors
.
jamo_initial
=
"
trace:7
"
452
scriptcolors
.
jamo_medial
=
"
trace:8
"
453
scriptcolors
.
jamo_final
=
"
trace:9
"
454 455
local
function
process
(
head
,
first
,
last
)
456
if
first
~
=
last
then
457
local
lastfont
=
nil
458
local
previous
=
"
start
"
459
local
last
=
nil
460
while
true
do
461
local
upcoming
=
getnext
(
first
)
462
local
id
=
getid
(
first
)
463
if
id
=
=
glyph_code
then
464
local
current
=
getscriptstatus
(
first
)
465
local
action
=
injectors
[
previous
]
466
if
action
then
467
action
=
action
[
current
]
468
if
action
then
469
local
font
=
getfont
(
first
)
470
if
font
~
=
lastfont
then
471
lastfont
=
font
472
set_parameters
(
font
,
getscriptdata
(
first
)
)
473
end
474
action
(
head
,
first
)
475
end
476
end
477
previous
=
current
478
else
-- glue
479
local
p
=
getprev
(
first
)
480
local
n
=
upcoming
481
if
p
and
n
then
482
local
pid
=
getid
(
p
)
483
local
nid
=
getid
(
n
)
484
if
pid
=
=
glyph_code
and
nid
=
=
glyph_code
then
485
local
pcjk
=
getscriptstatus
(
p
)
486
local
ncjk
=
getscriptstatus
(
n
)
487
if
not
pcjk
or
not
ncjk
488
or
pcjk
=
=
"
korean
"
or
ncjk
=
=
"
korean
"
489
or
pcjk
=
=
"
other
"
or
ncjk
=
=
"
other
"
490
or
pcjk
=
=
"
jamo_final
"
or
ncjk
=
=
"
jamo_initial
"
then
491
previous
=
"
start
"
492
else
-- if head ~= first then
493
remove_node
(
head
,
first
,
true
)
494
previous
=
pcjk
495
-- else
496
-- previous = pcjk
497
end
498
else
499
previous
=
"
start
"
500
end
501
else
502
previous
=
"
start
"
503
end
504
end
505
if
upcoming
=
=
last
then
-- was stop
506
break
507
else
508
first
=
upcoming
509
end
510
end
511
end
512
end
513 514
scripts
.
installmethod
{
515
name
=
"
hangul
"
,
516
injector
=
process
,
517
datasets
=
{
-- todo: metatables
518
default
=
{
519
inter_char_shrink_factor
=
0
.
50
,
-- of quad
520
inter_char_stretch_factor
=
0
.
50
,
-- of quad
521
inter_char_half_shrink_factor
=
0
.
50
,
-- of quad
522
inter_char_half_stretch_factor
=
0
.
50
,
-- of quad
523
inter_char_quarter_shrink_factor
=
0
.
50
,
-- of quad
524
inter_char_quarter_stretch_factor
=
0
.
50
,
-- of quad
525
inter_char_hangul_penalty
=
50
,
526
}
,
527
}
,
528
}
529 530
function
scripts
.
decomposehangul
(
head
)
531
local
done
=
false
532
for
current
,
char
in
nextglyph
,
head
do
533
local
lead_consonant
,
medial_vowel
,
tail_consonant
=
decomposed
(
char
)
534
if
lead_consonant
then
535
setchar
(
current
,
lead_consonant
)
536
local
m
=
copy_node
(
current
)
537
setchar
(
m
,
medial_vowel
)
538
head
,
current
=
insert_node_after
(
head
,
current
,
m
)
539
if
tail_consonant
then
540
local
t
=
copy_node
(
current
)
541
setchar
(
t
,
tail_consonant
)
542
head
,
current
=
insert_node_after
(
head
,
current
,
t
)
543
end
544
done
=
true
545
end
546
end
547
return
head
,
done
548
end
549 550
-- nodes.tasks.prependaction("processors","normalizers","scripts.decomposehangul")
551 552
local
otffeatures
=
fonts
.
constructors
.
features
.
otf
553
local
registerotffeature
=
otffeatures
.
register
554 555
registerotffeature
{
556
name
=
"
decomposehangul
"
,
557
description
=
"
decompose hangul
"
,
558
processors
=
{
559
position
=
1
,
560
node
=
scripts
.
decomposehangul
,
561
}
562
}
563 564
-- Chinese: hanzi
565 566
local
chinese_0
=
{
567
}
568 569
local
chinese_1
=
{
570
jamo_initial
=
korean_break
,
571
korean
=
korean_break
,
572
chinese
=
stretch_break
,
573
hiragana
=
stretch_break
,
574
katakana
=
stretch_break
,
575
half_width_open
=
nobreak_stretch_break_autoshrink
,
576
half_width_close
=
nobreak_stretch
,
577
full_width_open
=
nobreak_stretch_break_shrink
,
578
full_width_close
=
nobreak_stretch
,
579
full_width_punct
=
nobreak_stretch
,
580
-- hyphen = nil,
581
non_starter
=
nobreak_stretch
,
582
other
=
stretch_break
,
583
}
584 585
local
chinese_2
=
{
586
jamo_initial
=
korean_break
,
587
korean
=
stretch_break
,
588
chinese
=
stretch_break
,
589
hiragana
=
stretch_break
,
590
katakana
=
stretch_break
,
591
half_width_open
=
nobreak_stretch_break_autoshrink
,
592
half_width_close
=
nobreak_stretch
,
593
full_width_open
=
nobreak_stretch_break_shrink
,
594
full_width_close
=
nobreak_stretch
,
595
full_width_punct
=
nobreak_stretch
,
596
hyphen
=
nobreak_stretch
,
597
non_starter
=
nobreak_stretch
,
598
other
=
stretch_break
,
599
}
600 601
local
chinese_3
=
{
602
jamo_initial
=
korean_break
,
603
korean
=
stretch_break
,
604
chinese
=
stretch_break
,
605
hiragana
=
stretch_break
,
606
katakana
=
stretch_break
,
607
half_width_open
=
nobreak_stretch_break_autoshrink
,
608
half_width_close
=
nobreak_stretch
,
609
full_width_open
=
nobreak_stretch_break_shrink
,
610
full_width_close
=
nobreak_stretch
,
611
full_width_punct
=
nobreak_stretch
,
612
-- hyphen = nil,
613
non_starter
=
nobreak_stretch
,
614
other
=
stretch_break
,
615
}
616 617
local
chinese_4
=
{
618
-- jamo_initial = nil,
619
-- korean = nil,
620
-- chinese = nil,
621
-- hiragana = nil,
622
-- katakana = nil,
623
half_width_open
=
nobreak_autoshrink
,
624
half_width_close
=
nil
,
625
full_width_open
=
nobreak_shrink
,
626
full_width_close
=
nobreak
,
627
full_width_punct
=
nobreak
,
628
-- hyphen = nil,
629
non_starter
=
nobreak
,
630
-- other = nil,
631
}
632 633
local
chinese_5
=
{
634
jamo_initial
=
stretch_break
,
635
korean
=
stretch_break
,
636
chinese
=
stretch_break
,
637
hiragana
=
stretch_break
,
638
katakana
=
stretch_break
,
639
half_width_open
=
nobreak_stretch_break_autoshrink
,
640
half_width_close
=
nobreak_stretch
,
641
full_width_open
=
nobreak_stretch_break_shrink
,
642
full_width_close
=
nobreak_stretch
,
643
full_width_punct
=
nobreak_stretch
,
644
-- hyphen = nil,
645
non_starter
=
nobreak_stretch
,
646
other
=
stretch_break
,
647
}
648 649
local
chinese_6
=
{
650
jamo_initial
=
nobreak_stretch
,
651
korean
=
nobreak_stretch
,
652
chinese
=
nobreak_stretch
,
653
hiragana
=
nobreak_stretch
,
654
katakana
=
nobreak_stretch
,
655
half_width_open
=
nobreak_stretch_break_autoshrink
,
656
half_width_close
=
nobreak_stretch
,
657
full_width_open
=
nobreak_stretch_break_shrink
,
658
full_width_close
=
nobreak_stretch
,
659
full_width_punct
=
nobreak_stretch
,
660
hyphen
=
nobreak_stretch
,
661
non_starter
=
nobreak_stretch
,
662
other
=
nobreak_stretch
,
663
}
664 665
local
chinese_7
=
{
666
jami_initial
=
nobreak_shrink_break_stretch
,
667
korean
=
nobreak_shrink_break_stretch
,
668
chinese
=
stretch_break
,
-- nobreak_shrink_break_stretch,
669
hiragana
=
stretch_break
,
-- nobreak_shrink_break_stretch,
670
katakana
=
stretch_break
,
-- nobreak_shrink_break_stretch,
671
half_width_open
=
nobreak_shrink_break_stretch_nobreak_autoshrink
,
672
half_width_close
=
nobreak_shrink_nobreak_stretch
,
673
full_width_open
=
nobreak_shrink_break_stretch_nobreak_shrink
,
674
full_width_close
=
nobreak_shrink_nobreak_stretch
,
675
full_width_punct
=
nobreak_shrink_nobreak_stretch
,
676
hyphen
=
nobreak_shrink_break_stretch
,
677
non_starter
=
nobreak_shrink_break_stretch
,
678
other
=
nobreak_shrink_break_stretch
,
679
}
680 681
local
chinese_8
=
{
682
jami_initial
=
nobreak_shrink_break_stretch
,
683
korean
=
nobreak_autoshrink_break_stretch
,
684
chinese
=
stretch_break
,
-- nobreak_autoshrink_break_stretch,
685
hiragana
=
stretch_break
,
-- nobreak_autoshrink_break_stretch,
686
katakana
=
stretch_break
,
-- nobreak_autoshrink_break_stretch,
687
half_width_open
=
nobreak_autoshrink_break_stretch_nobreak_autoshrink
,
688
half_width_close
=
nobreak_autoshrink_nobreak_stretch
,
689
full_width_open
=
nobreak_autoshrink_break_stretch_nobreak_shrink
,
690
full_width_close
=
nobreak_autoshrink_nobreak_stretch
,
691
full_width_punct
=
nobreak_autoshrink_nobreak_stretch
,
692
hyphen
=
nobreak_autoshrink_break_stretch
,
693
non_starter
=
nobreak_autoshrink_break_stretch
,
694
other
=
nobreak_autoshrink_break_stretch
,
695
}
696 697
local
injectors
=
{
-- [previous] [current]
698
jamo_final
=
chinese_1
,
699
korean
=
chinese_1
,
700
chinese
=
chinese_2
,
701
hiragana
=
chinese_2
,
702
katakana
=
chinese_2
,
703
hyphen
=
chinese_3
,
704
start
=
chinese_4
,
705
other
=
chinese_5
,
706
non_starter
=
chinese_5
,
707
full_width_open
=
chinese_6
,
708
half_width_open
=
chinese_6
,
709
full_width_close
=
chinese_7
,
710
full_width_punct
=
chinese_7
,
711
half_width_close
=
chinese_8
,
712
}
713 714
local
function
process
(
head
,
first
,
last
)
715
if
first
~
=
last
then
716
local
lastfont
=
nil
717
local
previous
=
"
start
"
718
local
last
=
nil
719
while
true
do
720
local
upcoming
=
getnext
(
first
)
721
local
id
=
getid
(
first
)
722
if
id
=
=
glyph_code
then
723
local
current
=
getscriptstatus
(
first
)
724
local
action
=
injectors
[
previous
]
725
if
action
then
726
action
=
action
[
current
]
727
if
action
then
728
local
font
=
getfont
(
first
)
729
if
font
~
=
lastfont
then
730
lastfont
=
font
731
set_parameters
(
font
,
getscriptdata
(
first
)
)
732
end
733
action
(
head
,
first
)
734
end
735
end
736
previous
=
current
737
else
-- glue
738
local
p
=
getprev
(
first
)
739
local
n
=
upcoming
740
if
p
and
n
then
741
local
pid
=
getid
(
p
)
742
local
nid
=
getid
(
n
)
743
if
pid
=
=
glyph_code
and
nid
=
=
glyph_code
then
744
local
pcjk
=
getscriptstatus
(
p
)
745
local
ncjk
=
getscriptstatus
(
n
)
746
if
not
pcjk
or
not
ncjk
747
or
pcjk
=
=
"
korean
"
or
ncjk
=
=
"
korean
"
748
or
pcjk
=
=
"
other
"
or
ncjk
=
=
"
other
"
749
or
pcjk
=
=
"
jamo_final
"
or
ncjk
=
=
"
jamo_initial
"
750
or
pcjk
=
=
"
half_width_close
"
or
ncjk
=
=
"
half_width_open
"
then
-- extra compared to korean
751
previous
=
"
start
"
752
else
-- if head ~= first then
753
remove_node
(
head
,
first
,
true
)
754
previous
=
pcjk
755
-- else
756
-- previous = pcjk
757
end
758
else
759
previous
=
"
start
"
760
end
761
else
762
previous
=
"
start
"
763
end
764
end
765
if
upcoming
=
=
last
then
-- was stop
766
break
767
else
768
first
=
upcoming
769
end
770
end
771
end
772
end
773 774
scripts
.
installmethod
{
775
name
=
"
hanzi
"
,
776
injector
=
process
,
777
datasets
=
{
778
default
=
{
779
inter_char_shrink_factor
=
0
.
50
,
-- of quad
780
inter_char_stretch_factor
=
0
.
50
,
-- of quad
781
inter_char_half_shrink_factor
=
0
.
50
,
-- of quad
782
inter_char_half_stretch_factor
=
0
.
50
,
-- of quad
783
inter_char_quarter_shrink_factor
=
0
.
50
,
-- of quad
784
inter_char_quarter_stretch_factor
=
0
.
50
,
-- of quad
785
inter_char_hangul_penalty
=
50
,
786
}
,
787
}
,
788
}
789 790
-- Japanese: idiographic, hiragana, katakana, romanji / jis
791 792
local
japanese_0
=
{
793
}
794 795
local
japanese_1
=
{
796
jamo_initial
=
korean_break
,
797
korean
=
korean_break
,
798
chinese
=
stretch_break
,
799
hiragana
=
stretch_break
,
800
katakana
=
stretch_break
,
801
half_width_open
=
nobreak_stretch_break_autoshrink
,
802
half_width_close
=
nobreak_stretch
,
803
full_width_open
=
nobreak_stretch_break_shrink
,
804
full_width_close
=
nobreak_stretch
,
805
full_width_punct
=
nobreak_stretch
,
806
-- hyphen = nil,
807
non_starter
=
nobreak_stretch
,
808
other
=
stretch_break
,
809
}
810 811
local
japanese_2
=
{
812
jamo_initial
=
korean_break
,
813
korean
=
stretch_break
,
814
chinese
=
stretch_break
,
815
hiragana
=
stretch_break
,
816
katakana
=
stretch_break
,
817
half_width_open
=
nobreak_stretch_break_autoshrink
,
818
half_width_close
=
nobreak_stretch
,
819
full_width_open
=
nobreak_stretch_break_shrink
,
820
full_width_close
=
nobreak_stretch
,
821
full_width_punct
=
japanese_before_full_width_punct
,
-- nobreak_stretch,
822
hyphen
=
nobreak_stretch
,
823
non_starter
=
nobreak_stretch
,
824
other
=
stretch_break
,
825
}
826 827
local
japanese_3
=
{
828
jamo_initial
=
korean_break
,
829
korean
=
stretch_break
,
830
chinese
=
stretch_break
,
831
hiragana
=
stretch_break
,
832
katakana
=
stretch_break
,
833
half_width_open
=
nobreak_stretch_break_autoshrink
,
834
half_width_close
=
nobreak_stretch
,
835
full_width_open
=
nobreak_stretch_break_shrink
,
836
full_width_close
=
nobreak_stretch
,
837
full_width_punct
=
nobreak_stretch
,
838
-- hyphen = nil,
839
non_starter
=
nobreak_stretch
,
840
other
=
stretch_break
,
841
}
842 843
local
japanese_4
=
{
844
-- jamo_initial = nil,
845
-- korean = nil,
846
-- chinese = nil,
847
-- hiragana = nil,
848
-- katakana = nil,
849
half_width_open
=
nobreak_autoshrink
,
850
half_width_close
=
nil
,
851
full_width_open
=
nobreak_shrink
,
852
full_width_close
=
nobreak
,
853
full_width_punct
=
nobreak
,
854
-- hyphen = nil,
855
non_starter
=
nobreak
,
856
-- other = nil,
857
}
858 859
local
japanese_5
=
{
860
jamo_initial
=
stretch_break
,
861
korean
=
stretch_break
,
862
chinese
=
stretch_break
,
863
hiragana
=
stretch_break
,
864
katakana
=
stretch_break
,
865
half_width_open
=
nobreak_stretch_break_autoshrink
,
866
half_width_close
=
nobreak_stretch
,
867
full_width_open
=
nobreak_stretch_break_shrink
,
868
full_width_close
=
nobreak_stretch
,
869
full_width_punct
=
nobreak_stretch
,
870
-- hyphen = nil,
871
non_starter
=
nobreak_stretch
,
872
other
=
stretch_break
,
873
}
874 875
local
japanese_6
=
{
876
jamo_initial
=
nobreak_stretch
,
877
korean
=
nobreak_stretch
,
878
chinese
=
nobreak_stretch
,
879
hiragana
=
nobreak_stretch
,
880
katakana
=
nobreak_stretch
,
881
half_width_open
=
nobreak_stretch_break_autoshrink
,
882
half_width_close
=
nobreak_stretch
,
883
full_width_open
=
nobreak_stretch_break_shrink
,
884
full_width_close
=
nobreak_stretch
,
885
full_width_punct
=
nobreak_stretch
,
886
hyphen
=
nobreak_stretch
,
887
non_starter
=
nobreak_stretch
,
888
other
=
nobreak_stretch
,
889
}
890 891
local
japanese_7
=
{
892
jami_initial
=
nobreak_shrink_break_stretch
,
893
korean
=
nobreak_shrink_break_stretch
,
894
chinese
=
japanese_after_full_width_punct
,
-- stretch_break
895
hiragana
=
japanese_after_full_width_punct
,
-- stretch_break
896
katakana
=
japanese_after_full_width_punct
,
-- stretch_break
897
half_width_open
=
nobreak_shrink_break_stretch_nobreak_autoshrink
,
898
half_width_close
=
nobreak_shrink_nobreak_stretch
,
899
full_width_open
=
japanese_between_full_close_open
,
-- !!
900
full_width_close
=
japanese_between_full_close_full_close
,
-- nobreak_shrink_nobreak_stretch,
901
full_width_punct
=
nobreak_shrink_nobreak_stretch
,
902
hyphen
=
nobreak_shrink_break_stretch
,
903
non_starter
=
nobreak_shrink_break_stretch
,
904
other
=
nobreak_shrink_break_stretch
,
905
}
906 907
local
japanese_8
=
{
908
jami_initial
=
nobreak_shrink_break_stretch
,
909
korean
=
nobreak_autoshrink_break_stretch
,
910
chinese
=
stretch_break
,
911
hiragana
=
stretch_break
,
912
katakana
=
stretch_break
,
913
half_width_open
=
nobreak_autoshrink_break_stretch_nobreak_autoshrink
,
914
half_width_close
=
nobreak_autoshrink_nobreak_stretch
,
915
full_width_open
=
nobreak_autoshrink_break_stretch_nobreak_shrink
,
916
full_width_close
=
nobreak_autoshrink_nobreak_stretch
,
917
full_width_punct
=
nobreak_autoshrink_nobreak_stretch
,
918
hyphen
=
nobreak_autoshrink_break_stretch
,
919
non_starter
=
nobreak_autoshrink_break_stretch
,
920
other
=
nobreak_autoshrink_break_stretch
,
921
}
922 923
local
injectors
=
{
-- [previous] [current]
924
jamo_final
=
japanese_1
,
925
korean
=
japanese_1
,
926
chinese
=
japanese_2
,
927
hiragana
=
japanese_2
,
928
katakana
=
japanese_2
,
929
hyphen
=
japanese_3
,
930
start
=
japanese_4
,
931
other
=
japanese_5
,
932
non_starter
=
japanese_5
,
933
full_width_open
=
japanese_6
,
934
half_width_open
=
japanese_6
,
935
full_width_close
=
japanese_7
,
936
full_width_punct
=
japanese_7
,
937
half_width_close
=
japanese_8
,
938
}
939 940
local
function
process
(
head
,
first
,
last
)
941
if
first
~
=
last
then
942
local
lastfont
=
nil
943
local
previous
=
"
start
"
944
local
last
=
nil
945
while
true
do
946
local
upcoming
=
getnext
(
first
)
947
local
id
=
getid
(
first
)
948
if
id
=
=
glyph_code
then
949
local
current
=
getscriptstatus
(
first
)
950
local
action
=
injectors
[
previous
]
951
if
action
then
952
action
=
action
[
current
]
953
if
action
then
954
local
font
=
getfont
(
first
)
955
if
font
~
=
lastfont
then
956
lastfont
=
font
957
set_parameters
(
font
,
getscriptdata
(
first
)
)
958
end
959
action
(
head
,
first
)
960
end
961
end
962
previous
=
current
963
-- elseif id == math_code then
964
-- upcoming = getnext(end_of_math(current))
965
-- previous = "start"
966
else
-- glue
967
local
p
=
getprev
(
first
)
968
local
n
=
upcoming
969
if
p
and
n
then
970
local
pid
=
getid
(
p
)
971
local
nid
=
getid
(
n
)
972
if
pid
=
=
glyph_code
and
nid
=
=
glyph_code
then
973
local
pcjk
=
getscriptstatus
(
p
)
974
local
ncjk
=
getscriptstatus
(
n
)
975
if
not
pcjk
or
not
ncjk
976
or
pcjk
=
=
"
korean
"
or
ncjk
=
=
"
korean
"
977
or
pcjk
=
=
"
other
"
or
ncjk
=
=
"
other
"
978
or
pcjk
=
=
"
jamo_final
"
or
ncjk
=
=
"
jamo_initial
"
979
or
pcjk
=
=
"
half_width_close
"
or
ncjk
=
=
"
half_width_open
"
then
-- extra compared to korean
980
previous
=
"
start
"
981
else
-- if head ~= first then
982
if
id
=
=
glue_code
then
983
-- also scriptstatus check?
984
local
subtype
=
getsubtype
(
first
)
985
if
subtype
=
=
userskip_code
or
subtype
=
=
spaceskip_code
or
subtype
=
=
xspaceskip_code
then
986
-- for the moment no distinction possible between space and userskip
987
local
w
=
getwidth
(
first
)
988
local
s
=
spacedata
[
getfont
(
p
)
]
989
if
w
=
=
s
then
-- could be option
990
if
trace_details
then
991
trace_detail_between
(
p
,
n
,
"
space removed
"
)
992
end
993
remove_node
(
head
,
first
,
true
)
994
end
995
end
996
end
997
previous
=
pcjk
998
-- else
999
-- previous = pcjk
1000
end
1001
else
1002
previous
=
"
start
"
1003
end
1004
else
1005
previous
=
"
start
"
1006
end
1007
end
1008
if
upcoming
=
=
last
then
-- was stop
1009
break
1010
else
1011
first
=
upcoming
1012
end
1013
end
1014
end
1015
end
1016 1017
scripts
.
installmethod
{
1018
name
=
"
nihongo
"
,
-- what name to use?
1019
injector
=
process
,
1020
datasets
=
{
1021
default
=
{
1022
inter_char_shrink_factor
=
0
.
50
,
-- of quad
1023
inter_char_stretch_factor
=
0
.
50
,
-- of quad
1024
inter_char_half_shrink_factor
=
0
.
50
,
-- of quad
1025
inter_char_half_stretch_factor
=
0
.
50
,
-- of quad
1026
inter_char_quarter_shrink_factor
=
0
.
25
,
-- of quad
1027
inter_char_quarter_stretch_factor
=
0
.
25
,
-- of quad
1028
inter_char_hangul_penalty
=
50
,
1029
}
,
1030
}
,
1031
}
1032