scrp-cjk.lua /size: 37 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['scrp-cjk'] = {
2    version   = 1.001,
3    comment   = "companion to scrp-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- We can speed this up by preallocating nodes and copying them but the gain is not
10-- that large.
11--
12-- If needed we can speed this up (traversers and prev next and such) but cjk
13-- documents don't have that many glyphs and certainly not much font processing so
14-- there not much gain in it.
15--
16-- The input line endings: there is no way to distinguish between inline spaces and
17-- endofline turned into spaces (would not make sense either because otherwise a
18-- wanted space at the end of a line would have to be a hard coded ones.
19
20local nuts              = nodes.nuts
21
22local copy_node        = nuts.copy
23local remove_node      = nuts.remove
24local nextglyph        = nuts.traversers.glyph
25
26local getnext          = nuts.getnext
27local getprev          = nuts.getprev
28local getfont          = nuts.getfont
29local getchar          = nuts.getchar
30local getid            = nuts.getid
31local getsubtype       = nuts.getsubtype
32local getwidth         = nuts.getwidth
33
34local setchar          = nuts.setchar
35
36local nodepool         = nuts.pool
37local new_glue         = nodepool.glue
38local new_kern         = nodepool.kern
39local new_penalty      = nodepool.penalty
40
41local nodecodes        = nodes.nodecodes
42local gluecodes        = nodes.gluecodes
43
44local glyph_code       = nodecodes.glyph
45local glue_code        = nodecodes.glue
46
47local userskip_code    = gluecodes.userskip
48local spaceskip_code   = gluecodes.spaceskip
49local xspaceskip_code  = gluecodes.xspaceskip
50
51local hash             = characters.scripthash
52
53local getscriptstatus  = scripts.getstatus
54local getscriptdata    = scripts.getdata
55local scriptcolors     = scripts.colors
56
57local fonthashes       = fonts.hashes
58local quaddata         = fonthashes.quads
59local spacedata        = fonthashes.spaces
60
61local decomposed       = characters.hangul.decomposed
62
63local trace_details    = false  trackers.register("scripts.details", function(v) trace_details = v end)
64
65local report_details   = logs.reporter("scripts","detail")
66
67-- raggedleft is controlled by leftskip and we might end up with a situation where
68-- the intercharacter spacing interferes with this; the solution is to patch the
69-- nodelist but better is to use veryraggedleft
70
71local insertnodeafter  = scripts.helpers.insertnodeafter
72local insertnodebefore = scripts.helpers.insertnodebefore
73
74local inter_char_shrink          = 0
75local inter_char_stretch         = 0
76local inter_char_half_shrink     = 0
77local inter_char_half_stretch    = 0
78local inter_char_quarter_shrink  = 0
79local inter_char_quarter_stretch = 0
80
81local full_char_width            = 0
82local half_char_width            = 0
83local quarter_char_width         = 0
84
85local inter_char_hangul_penalty  = 0
86
87local function set_parameters(font,data)
88    -- beware: parameters can be nil in e.g. punk variants
89    local quad = quaddata[font]
90    full_char_width            = quad
91    half_char_width            = quad/2
92    quarter_char_width         = quad/4
93    inter_char_shrink          = data.inter_char_shrink_factor          * quad
94    inter_char_stretch         = data.inter_char_stretch_factor         * quad
95    inter_char_half_shrink     = data.inter_char_half_shrink_factor     * quad
96    inter_char_half_stretch    = data.inter_char_half_stretch_factor    * quad
97    inter_char_quarter_shrink  = data.inter_char_quarter_shrink_factor  * quad
98    inter_char_quarter_stretch = data.inter_char_quarter_stretch_factor * quad
99    inter_char_hangul_penalty  = data.inter_char_hangul_penalty
100end
101
102-- a test version did compensate for crappy halfwidth but we can best do that
103-- at font definition time and/or just assume a correct font
104
105local function trace_detail(current,what)
106    local prev = getprev(current)
107    local c_id = getid(current)
108    local p_id = prev and getid(prev)
109    if c_id == glyph_code then
110        local c_ch = getchar(current)
111        if p_id == glyph_code then
112            local p_ch = p_id and getchar(prev)
113            report_details("[%C %a] [%s] [%C %a]",p_ch,hash[p_ch],what,c_ch,hash[c_ch])
114        else
115            report_details("[%s] [%C %a]",what,c_ch,hash[c_ch])
116        end
117    else
118        if p_id == glyph_code then
119            local p_ch = p_id and getchar(prev)
120            report_details("[%C %a] [%s]",p_ch,hash[p_ch],what)
121        else
122            report_details("[%s]",what)
123        end
124    end
125end
126
127local function trace_detail_between(p,n,what)
128    local p_ch = getchar(p)
129    local n_ch = getchar(n)
130    report_details("[%C %a] [%s] [%C %a]",p_ch,hash[p_ch],what,n_ch,hash[n_ch])
131end
132
133local function nobreak(head,current)
134    if trace_details then
135        trace_detail(current,"break")
136    end
137    insertnodebefore(head,current,new_penalty(10000))
138end
139
140local function stretch_break(head,current)
141    if trace_details then
142        trace_detail(current,"stretch break")
143    end
144    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
145end
146
147local function shrink_break(head,current)
148    if trace_details then
149        trace_detail(current,"shrink break")
150    end
151    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
152end
153
154local function nobreak_stretch(head,current)
155    if trace_details then
156        trace_detail(current,"no break stretch")
157    end
158    insertnodebefore(head,current,new_penalty(10000))
159    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
160end
161
162local function korean_break(head,current)
163    if trace_details then
164        trace_detail(current,"korean break")
165    end
166    insertnodebefore(head,current,new_penalty(inter_char_hangul_penalty))
167end
168
169local function nobreak_shrink(head,current)
170    if trace_details then
171        trace_detail(current,"nobreak shrink")
172    end
173    insertnodebefore(head,current,new_penalty(10000))
174    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
175end
176
177local function nobreak_autoshrink(head,current)
178    if trace_details then
179        trace_detail(current,"nobreak autoshrink")
180    end
181    insertnodebefore(head,current,new_penalty(10000))
182    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
183end
184
185local function nobreak_stretch_nobreak_shrink(head,current)
186    if trace_details then
187        trace_detail(current,"nobreak stretch nobreak shrink")
188    end
189    insertnodebefore(head,current,new_penalty(10000))
190    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
191    insertnodebefore(head,current,new_penalty(10000))
192    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
193end
194
195local function nobreak_stretch_nobreak_autoshrink(head,current)
196    if trace_details then
197        trace_detail(current,"nobreak stretch nobreak autoshrink")
198    end
199    insertnodebefore(head,current,new_penalty(10000))
200    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
201    insertnodebefore(head,current,new_penalty(10000))
202    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
203end
204
205local function nobreak_shrink_nobreak_stretch(head,current)
206    if trace_details then
207        trace_detail(current,"nobreak shrink nobreak stretch")
208    end
209    insertnodebefore(head,current,new_penalty(10000))
210    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
211    insertnodebefore(head,current,new_penalty(10000))
212    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
213end
214
215local function nobreak_autoshrink_nobreak_stretch(head,current)
216    if trace_details then
217        trace_detail(current,"nobreak autoshrink nobreak stretch")
218    end
219    insertnodebefore(head,current,new_penalty(10000))
220    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
221    insertnodebefore(head,current,new_penalty(10000))
222    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
223end
224
225local function nobreak_shrink_break_stretch(head,current)
226    if trace_details then
227        trace_detail(current,"nobreak shrink break stretch")
228    end
229    insertnodebefore(head,current,new_penalty(10000))
230    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
231    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
232end
233
234local function nobreak_autoshrink_break_stretch(head,current)
235    if trace_details then
236        trace_detail(current,"nobreak autoshrink break stretch")
237    end
238    insertnodebefore(head,current,new_penalty(10000))
239    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
240    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
241end
242
243local function nobreak_shrink_break_stretch_nobreak_shrink(head,current)
244    if trace_details then
245        trace_detail(current,"nobreak shrink break stretch nobreak shrink")
246    end
247    insertnodebefore(head,current,new_penalty(10000))
248    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
249    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
250    insertnodebefore(head,current,new_penalty(10000))
251    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
252end
253
254local function japanese_between_full_close_open(head,current) -- todo: check width
255    if trace_details then
256        trace_detail(current,"japanese between full close open")
257    end
258    insertnodebefore(head,current,new_kern(-half_char_width))
259    insertnodebefore(head,current,new_glue(half_char_width,0,inter_char_half_shrink))
260    insertnodebefore(head,current,new_kern(-half_char_width))
261end
262
263local function japanese_between_full_close_full_close(head,current) -- todo: check width
264    if trace_details then
265        trace_detail(current,"japanese between full close full close")
266    end
267    insertnodebefore(head,current,new_kern(-half_char_width))
268 -- insertnodebefore(head,current,new_glue(half_char_width,0,inter_char_half_shrink))
269end
270
271local function japanese_before_full_width_punct(head,current) -- todo: check width
272    if trace_details then
273        trace_detail(current,"japanese before full width punct")
274    end
275    insertnodebefore(head,current,new_penalty(10000))
276    insertnodebefore(head,current,new_glue(quarter_char_width,0,inter_char_quarter_shrink))
277    insertnodebefore(head,current,new_kern(-quarter_char_width))
278end
279
280local function japanese_after_full_width_punct(head,current) -- todo: check width
281    if trace_details then
282        trace_detail(current,"japanese after full width punct")
283    end
284    insertnodebefore(head,current,new_kern(-quarter_char_width))
285    insertnodebefore(head,current,new_glue(quarter_char_width,0,inter_char_quarter_shrink))
286end
287
288local function nobreak_autoshrink_break_stretch_nobreak_autoshrink(head,current)
289    if trace_details then
290        trace_detail(current,"nobreak autoshrink break stretch nobreak autoshrink")
291    end
292    insertnodebefore(head,current,new_penalty(10000))
293    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
294    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
295    insertnodebefore(head,current,new_penalty(10000))
296    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
297end
298
299local function nobreak_autoshrink_break_stretch_nobreak_shrink(head,current)
300    if trace_details then
301        trace_detail(current,"nobreak autoshrink break stretch nobreak shrink")
302    end
303    insertnodebefore(head,current,new_penalty(10000))
304    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
305    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
306    insertnodebefore(head,current,new_penalty(10000))
307    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
308end
309
310local function nobreak_shrink_break_stretch_nobreak_autoshrink(head,current)
311    if trace_details then
312        trace_detail(current,"nobreak shrink break stretch nobreak autoshrink")
313    end
314    insertnodebefore(head,current,new_penalty(10000))
315    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
316    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
317    insertnodebefore(head,current,new_penalty(10000))
318    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
319end
320
321local function nobreak_stretch_break_shrink(head,current)
322    if trace_details then
323        trace_detail(current,"nobreak stretch break shrink")
324    end
325    insertnodebefore(head,current,new_penalty(10000))
326    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
327    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
328end
329
330local function nobreak_stretch_break_autoshrink(head,current)
331    if trace_details then
332        trace_detail(current,"nobreak stretch break autoshrink")
333    end
334    insertnodebefore(head,current,new_penalty(10000))
335    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
336    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
337end
338
339-- Korean: hangul
340
341local korean_0 = {
342}
343
344local korean_1 = {
345    jamo_initial     = korean_break,
346    korean           = korean_break,
347    chinese          = korean_break,
348    hiragana         = korean_break,
349    katakana         = korean_break,
350    half_width_open  = stretch_break,
351    half_width_close = nobreak,
352    full_width_open  = stretch_break,
353    full_width_close = nobreak,
354    full_width_punct = nobreak,
355--  hyphen           = nil,
356    non_starter      = korean_break,
357    other            = korean_break,
358}
359
360local korean_2 = {
361    jamo_initial     = stretch_break,
362    korean           = stretch_break,
363    chinese          = stretch_break,
364    hiragana         = stretch_break,
365    katakana         = stretch_break,
366    half_width_open  = stretch_break,
367    half_width_close = nobreak,
368    full_width_open  = stretch_break,
369    full_width_close = nobreak,
370    full_width_punct = nobreak,
371--  hyphen           = nil,
372    non_starter      = stretch_break,
373    other            = stretch_break,
374}
375
376local korean_3 = {
377    jamo_initial     = stretch_break,
378    korean           = stretch_break,
379    chinese          = stretch_break,
380    hiragana         = stretch_break,
381    katakana         = stretch_break,
382    half_width_open  = stretch_break,
383    half_width_close = nobreak,
384    full_width_open  = stretch_break,
385    full_width_close = nobreak,
386    full_width_punct = nobreak,
387--  hyphen           = nil,
388    non_starter      = nobreak,
389    other            = nobreak,
390}
391
392local korean_4 = {
393    jamo_initial     = nobreak,
394    korean           = nobreak,
395    chinese          = nobreak,
396    hiragana         = nobreak,
397    katakana         = nobreak,
398    half_width_open  = nobreak,
399    half_width_close = nobreak,
400    full_width_open  = nobreak,
401    full_width_close = nobreak,
402    full_width_punct = nobreak,
403    hyphen           = nobreak,
404    non_starter      = nobreak,
405    other            = nobreak,
406}
407
408local korean_5 = {
409    jamo_initial     = stretch_break,
410    korean           = stretch_break,
411    chinese          = stretch_break,
412    hiragana         = stretch_break,
413    katakana         = stretch_break,
414    half_width_open  = stretch_break,
415    half_width_close = nobreak_stretch,
416    full_width_open  = stretch_break,
417    full_width_close = nobreak_stretch,
418    full_width_punct = nobreak_stretch,
419    hyphen           = nobreak_stretch,
420    non_starter      = nobreak_stretch,
421    other            = stretch_break,
422}
423
424local injectors = { -- [previous] [current]
425    jamo_final       = korean_1,
426    korean           = korean_1,
427    chinese          = korean_1,
428    hiragana         = korean_1,
429    katakana         = korean_1,
430    hyphen           = korean_2,
431    start            = korean_0,
432    other            = korean_2,
433    non_starter      = korean_3,
434    full_width_open  = korean_4,
435    half_width_open  = korean_4,
436    full_width_close = korean_5,
437    full_width_punct = korean_5,
438    half_width_close = korean_5,
439}
440
441scriptcolors.korean            = "trace:0"
442scriptcolors.chinese           = "trace:0"
443scriptcolors.katakana          = "trace:0"
444scriptcolors.hiragana          = "trace:0"
445scriptcolors.full_width_open   = "trace:1"
446scriptcolors.full_width_close  = "trace:2"
447scriptcolors.half_width_open   = "trace:3"
448scriptcolors.half_width_close  = "trace:4"
449scriptcolors.full_width_punct  = "trace:5"
450------------.hyphen            = "trace:5"
451scriptcolors.non_starter       = "trace:6"
452scriptcolors.jamo_initial      = "trace:7"
453scriptcolors.jamo_medial       = "trace:8"
454scriptcolors.jamo_final        = "trace:9"
455
456local function process(head,first,last)
457    if first ~= last then
458        local lastfont = nil
459        local previous = "start"
460        local last     = nil
461        while true do
462            local upcoming = getnext(first)
463            local id       = getid(first)
464            if id == glyph_code then
465                local current = getscriptstatus(first)
466                local action  = injectors[previous]
467                if action then
468                    action = action[current]
469                    if action then
470                        local font = getfont(first)
471                        if font ~= lastfont then
472                            lastfont = font
473                            set_parameters(font,getscriptdata(first))
474                        end
475                        action(head,first)
476                    end
477                end
478                previous = current
479            else -- glue
480                local p = getprev(first)
481                local n = upcoming
482                if p and n then
483                    local pid = getid(p)
484                    local nid = getid(n)
485                    if pid == glyph_code and nid == glyph_code then
486                        local pcjk = getscriptstatus(p)
487                        local ncjk = getscriptstatus(n)
488                        if not pcjk                 or not ncjk
489                            or pcjk == "korean"     or ncjk == "korean"
490                            or pcjk == "other"      or ncjk == "other"
491                            or pcjk == "jamo_final" or ncjk == "jamo_initial" then
492                            previous = "start"
493                        else -- if head ~= first then
494                            remove_node(head,first,true)
495                            previous = pcjk
496                    --    else
497                    --        previous = pcjk
498                        end
499                    else
500                        previous = "start"
501                    end
502                else
503                    previous = "start"
504                end
505            end
506            if upcoming == last then -- was stop
507                break
508            else
509                first = upcoming
510            end
511        end
512    end
513end
514
515scripts.installmethod {
516    name     = "hangul",
517    injector = process,
518    datasets = { -- todo: metatables and maybe some stretch and shrink factor
519        default = {
520            inter_char_shrink_factor          = 0.50, -- of quad
521            inter_char_stretch_factor         = 0.50, -- of quad
522            inter_char_half_shrink_factor     = 0.50, -- of quad
523            inter_char_half_stretch_factor    = 0.50, -- of quad
524            inter_char_quarter_shrink_factor  = 0.50, -- of quad
525            inter_char_quarter_stretch_factor = 0.50, -- of quad
526            inter_char_hangul_penalty         =   50,
527        },
528        tight = {
529            inter_char_shrink_factor          = 0.10, -- of quad
530            inter_char_stretch_factor         = 0.10, -- of quad
531            inter_char_half_shrink_factor     = 0.10, -- of quad
532            inter_char_half_stretch_factor    = 0.10, -- of quad
533            inter_char_quarter_shrink_factor  = 0.10, -- of quad
534            inter_char_quarter_stretch_factor = 0.10, -- of quad
535            inter_char_hangul_penalty         =   50,
536        },
537    },
538}
539
540function scripts.decomposehangul(head)
541    local done = false
542    for current, char in nextglyph, head do
543        local lead_consonant, medial_vowel, tail_consonant = decomposed(char)
544        if lead_consonant then
545            local current = current -- 5.5 constant
546            setchar(current,lead_consonant)
547            local m = copy_node(current)
548            setchar(m,medial_vowel)
549            head, current = insertnodeafter(head,current,m)
550            if tail_consonant then
551                local t = copy_node(current)
552                setchar(t,tail_consonant)
553                head, current = insertnodeafter(head,current,t)
554            end
555            done = true
556        end
557    end
558    return head, done
559end
560
561-- nodes.tasks.prependaction("processors","normalizers","scripts.decomposehangul")
562
563local otffeatures         = fonts.constructors.features.otf
564local registerotffeature  = otffeatures.register
565
566registerotffeature {
567    name         = "decomposehangul",
568    description  = "decompose hangul",
569    processors = {
570        position = 1,
571        node     = scripts.decomposehangul,
572    }
573}
574
575-- Chinese: hanzi
576
577local chinese_0 = {
578}
579
580local chinese_1 = {
581    jamo_initial     = korean_break,
582    korean           = korean_break,
583    chinese          = stretch_break,
584    hiragana         = stretch_break,
585    katakana         = stretch_break,
586    half_width_open  = nobreak_stretch_break_autoshrink,
587    half_width_close = nobreak_stretch,
588    full_width_open  = nobreak_stretch_break_shrink,
589    full_width_close = nobreak_stretch,
590    full_width_punct = nobreak_stretch,
591--  hyphen           = nil,
592    non_starter      = nobreak_stretch,
593    other            = stretch_break,
594}
595
596local chinese_2 = {
597    jamo_initial     = korean_break,
598    korean           = stretch_break,
599    chinese          = stretch_break,
600    hiragana         = stretch_break,
601    katakana         = stretch_break,
602    half_width_open  = nobreak_stretch_break_autoshrink,
603    half_width_close = nobreak_stretch,
604    full_width_open  = nobreak_stretch_break_shrink,
605    full_width_close = nobreak_stretch,
606    full_width_punct = nobreak_stretch,
607    hyphen           = nobreak_stretch,
608    non_starter      = nobreak_stretch,
609    other            = stretch_break,
610}
611
612local chinese_3 = {
613    jamo_initial     = korean_break,
614    korean           = stretch_break,
615    chinese          = stretch_break,
616    hiragana         = stretch_break,
617    katakana         = stretch_break,
618    half_width_open  = nobreak_stretch_break_autoshrink,
619    half_width_close = nobreak_stretch,
620    full_width_open  = nobreak_stretch_break_shrink,
621    full_width_close = nobreak_stretch,
622    full_width_punct = nobreak_stretch,
623--  hyphen           = nil,
624    non_starter      = nobreak_stretch,
625    other            = stretch_break,
626}
627
628local chinese_4 = {
629--  jamo_initial     = nil,
630--  korean           = nil,
631--  chinese          = nil,
632--  hiragana         = nil,
633--  katakana         = nil,
634    half_width_open  = nobreak_autoshrink,
635    half_width_close = nil,
636    full_width_open  = nobreak_shrink,
637    full_width_close = nobreak,
638    full_width_punct = nobreak,
639--  hyphen           = nil,
640    non_starter      = nobreak,
641--  other            = nil,
642}
643
644local chinese_5 = {
645    jamo_initial     = stretch_break,
646    korean           = stretch_break,
647    chinese          = stretch_break,
648    hiragana         = stretch_break,
649    katakana         = stretch_break,
650    half_width_open  = nobreak_stretch_break_autoshrink,
651    half_width_close = nobreak_stretch,
652    full_width_open  = nobreak_stretch_break_shrink,
653    full_width_close = nobreak_stretch,
654    full_width_punct = nobreak_stretch,
655--  hyphen           = nil,
656    non_starter      = nobreak_stretch,
657    other            = stretch_break,
658}
659
660local chinese_6 = {
661    jamo_initial     = nobreak_stretch,
662    korean           = nobreak_stretch,
663    chinese          = nobreak_stretch,
664    hiragana         = nobreak_stretch,
665    katakana         = nobreak_stretch,
666    half_width_open  = nobreak_stretch_break_autoshrink,
667    half_width_close = nobreak_stretch,
668    full_width_open  = nobreak_stretch_break_shrink,
669    full_width_close = nobreak_stretch,
670    full_width_punct = nobreak_stretch,
671    hyphen           = nobreak_stretch,
672    non_starter      = nobreak_stretch,
673    other            = nobreak_stretch,
674}
675
676local chinese_7 = {
677    jami_initial     = nobreak_shrink_break_stretch,
678    korean           = nobreak_shrink_break_stretch,
679    chinese          = stretch_break, -- nobreak_shrink_break_stretch,
680    hiragana         = stretch_break, -- nobreak_shrink_break_stretch,
681    katakana         = stretch_break, -- nobreak_shrink_break_stretch,
682    half_width_open  = nobreak_shrink_break_stretch_nobreak_autoshrink,
683    half_width_close = nobreak_shrink_nobreak_stretch,
684    full_width_open  = nobreak_shrink_break_stretch_nobreak_shrink,
685    full_width_close = nobreak_shrink_nobreak_stretch,
686    full_width_punct = nobreak_shrink_nobreak_stretch,
687    hyphen           = nobreak_shrink_break_stretch,
688    non_starter      = nobreak_shrink_break_stretch,
689    other            = nobreak_shrink_break_stretch,
690}
691
692local chinese_8 = {
693    jami_initial     = nobreak_shrink_break_stretch,
694    korean           = nobreak_autoshrink_break_stretch,
695    chinese          = stretch_break, -- nobreak_autoshrink_break_stretch,
696    hiragana         = stretch_break, -- nobreak_autoshrink_break_stretch,
697    katakana         = stretch_break, -- nobreak_autoshrink_break_stretch,
698    half_width_open  = nobreak_autoshrink_break_stretch_nobreak_autoshrink,
699half_width_open  = stretch_break,
700    half_width_close = nobreak_autoshrink_nobreak_stretch,
701    full_width_open  = nobreak_autoshrink_break_stretch_nobreak_shrink,
702    full_width_close = nobreak_autoshrink_nobreak_stretch,
703    full_width_punct = nobreak_autoshrink_nobreak_stretch,
704    hyphen           = nobreak_autoshrink_break_stretch,
705    non_starter      = nobreak_autoshrink_break_stretch,
706    other            = nobreak_autoshrink_break_stretch,
707}
708
709local injectors = { -- [previous] [current]
710    jamo_final       = chinese_1,
711    korean           = chinese_1,
712    chinese          = chinese_2,
713    hiragana         = chinese_2,
714    katakana         = chinese_2,
715    hyphen           = chinese_3,
716    start            = chinese_4,
717    other            = chinese_5,
718    non_starter      = chinese_5,
719    full_width_open  = chinese_6,
720    half_width_open  = chinese_6,
721    full_width_close = chinese_7,
722    full_width_punct = chinese_7,
723    half_width_close = chinese_8,
724}
725
726local function process(head,first,last)
727    if first ~= last then
728        local lastfont = nil
729        local previous = "start"
730        local last     = nil
731        while true do
732            local upcoming = getnext(first)
733            local id       = getid(first)
734            if id == glyph_code then
735                local current = getscriptstatus(first)
736                local action  = injectors[previous]
737                if action then
738                    action = action[current]
739                    if action then
740                        local font = getfont(first)
741                        if font ~= lastfont then
742                            lastfont = font
743                            set_parameters(font,getscriptdata(first))
744                        end
745                        action(head,first)
746                    end
747                end
748                previous = current
749            else -- glue
750                local p = getprev(first)
751                local n = upcoming
752                if p and n then
753                    local pid = getid(p)
754                    local nid = getid(n)
755                    if pid == glyph_code and nid == glyph_code then
756                        local pcjk = getscriptstatus(p)
757                        local ncjk = getscriptstatus(n)
758                        if not pcjk                       or not ncjk
759                            or pcjk == "korean"           or ncjk == "korean"
760                            or pcjk == "other"            or ncjk == "other"
761                            or pcjk == "jamo_final"       or ncjk == "jamo_initial"
762                            or pcjk == "half_width_close" or ncjk == "half_width_open" then -- extra compared to korean
763                            previous = "start"
764                        else -- if head ~= first then
765                            remove_node(head,first,true)
766                            previous = pcjk
767                    --    else
768                    --        previous = pcjk
769                        end
770                    else
771                        previous = "start"
772                    end
773                else
774                    previous = "start"
775                end
776            end
777            if upcoming == last then -- was stop
778                break
779            else
780                first = upcoming
781            end
782        end
783    end
784end
785
786scripts.installmethod {
787    name     = "hanzi",
788    injector = process,
789    datasets = {
790        default = {
791            inter_char_shrink_factor          = 0.50, -- of quad
792            inter_char_stretch_factor         = 0.50, -- of quad
793            inter_char_half_shrink_factor     = 0.50, -- of quad
794            inter_char_half_stretch_factor    = 0.50, -- of quad
795            inter_char_quarter_shrink_factor  = 0.50, -- of quad
796            inter_char_quarter_stretch_factor = 0.50, -- of quad
797            inter_char_hangul_penalty         =   50,
798        },
799    },
800}
801
802-- Japanese: idiographic, hiragana, katakana, romanji / jis
803
804local japanese_0 = {
805}
806
807local japanese_1 = {
808    jamo_initial     = korean_break,
809    korean           = korean_break,
810    chinese          = stretch_break,
811    hiragana         = stretch_break,
812    katakana         = stretch_break,
813    half_width_open  = nobreak_stretch_break_autoshrink,
814    half_width_close = nobreak_stretch,
815    full_width_open  = nobreak_stretch_break_shrink,
816    full_width_close = nobreak_stretch,
817    full_width_punct = nobreak_stretch,
818--  hyphen           = nil,
819    non_starter      = nobreak_stretch,
820    other            = stretch_break,
821}
822
823local japanese_2 = {
824    jamo_initial     = korean_break,
825    korean           = stretch_break,
826    chinese          = stretch_break,
827    hiragana         = stretch_break,
828    katakana         = stretch_break,
829    half_width_open  = nobreak_stretch_break_autoshrink,
830    half_width_close = nobreak_stretch,
831    full_width_open  = stretch_break, -- WS, was: nobreak_stretch_break_shrink,
832    full_width_close = nobreak_stretch,
833    full_width_punct = japanese_before_full_width_punct, -- nobreak_stretch,
834    hyphen           = nobreak_stretch,
835    non_starter      = nobreak_stretch,
836    other            = stretch_break,
837}
838
839local japanese_3 = {
840    jamo_initial     = korean_break,
841    korean           = stretch_break,
842    chinese          = stretch_break,
843    hiragana         = stretch_break,
844    katakana         = stretch_break,
845    half_width_open  = nobreak_stretch_break_autoshrink,
846    half_width_close = nobreak_stretch,
847    full_width_open  = nobreak_stretch_break_shrink,
848    full_width_close = nobreak_stretch,
849    full_width_punct = nobreak_stretch,
850--  hyphen           = nil,
851    non_starter      = nobreak_stretch,
852    other            = stretch_break,
853}
854
855local japanese_4 = {
856--  jamo_initial     = nil,
857--  korean           = nil,
858--  chinese          = nil,
859--  hiragana         = nil,
860--  katakana         = nil,
861    half_width_open  = nobreak_autoshrink,
862    half_width_close = nil,
863    full_width_open  = nobreak_shrink,
864    full_width_close = nobreak,
865    full_width_punct = nobreak,
866--  hyphen           = nil,
867    non_starter      = nobreak,
868--  other            = nil,
869}
870
871local japanese_5 = {
872    jamo_initial     = stretch_break,
873    korean           = stretch_break,
874    chinese          = stretch_break,
875    hiragana         = stretch_break,
876    katakana         = stretch_break,
877    half_width_open  = nobreak_stretch_break_autoshrink,
878    half_width_close = nobreak_stretch,
879    full_width_open  = nobreak_stretch_break_shrink,
880    full_width_close = nobreak_stretch,
881    full_width_punct = nobreak_stretch,
882--  hyphen           = nil,
883    non_starter      = nobreak_stretch,
884    other            = stretch_break,
885}
886
887local japanese_6 = {
888    jamo_initial     = nobreak_stretch,
889    korean           = nobreak_stretch,
890    chinese          = nobreak_stretch,
891    hiragana         = nobreak_stretch,
892    katakana         = nobreak_stretch,
893    half_width_open  = nobreak_stretch_break_autoshrink,
894    half_width_close = nobreak_stretch,
895    full_width_open  = nobreak_stretch_break_shrink,
896    full_width_close = nobreak_stretch,
897    full_width_punct = nobreak_stretch,
898    hyphen           = nobreak_stretch,
899    non_starter      = nobreak_stretch,
900    other            = nobreak_stretch,
901}
902
903local japanese_7 = {
904    jami_initial     = nobreak_shrink_break_stretch,
905    korean           = nobreak_shrink_break_stretch,
906    chinese          = japanese_after_full_width_punct, -- stretch_break
907    hiragana         = japanese_after_full_width_punct, -- stretch_break
908    katakana         = japanese_after_full_width_punct, -- stretch_break
909    half_width_open  = nobreak_shrink_break_stretch_nobreak_autoshrink,
910    half_width_close = nobreak_shrink_nobreak_stretch,
911    full_width_open  = japanese_between_full_close_open, -- !!
912    full_width_close = japanese_between_full_close_full_close, -- nobreak_shrink_nobreak_stretch,
913    full_width_punct = nobreak_shrink_nobreak_stretch,
914    hyphen           = nobreak_shrink_break_stretch,
915    non_starter      = nobreak_shrink_break_stretch,
916    other            = nobreak_shrink_break_stretch,
917}
918
919local japanese_8 = {
920    jami_initial     = nobreak_shrink_break_stretch,
921    korean           = nobreak_autoshrink_break_stretch,
922    chinese          = stretch_break,
923    hiragana         = stretch_break,
924    katakana         = stretch_break,
925    half_width_open  = nobreak_autoshrink_break_stretch_nobreak_autoshrink,
926    half_width_close = nobreak_autoshrink_nobreak_stretch,
927    full_width_open  = nobreak_autoshrink_break_stretch_nobreak_shrink,
928    full_width_close = nobreak_autoshrink_nobreak_stretch,
929    full_width_punct = nobreak_autoshrink_nobreak_stretch,
930    hyphen           = nobreak_autoshrink_break_stretch,
931    non_starter      = nobreak_autoshrink_break_stretch,
932    other            = nobreak_autoshrink_break_stretch,
933}
934
935local injectors = { -- [previous] [current]
936    jamo_final       = japanese_1,
937    korean           = japanese_1,
938    chinese          = japanese_2,
939    hiragana         = japanese_2,
940    katakana         = japanese_2,
941    hyphen           = japanese_3,
942    start            = japanese_4,
943    other            = japanese_5,
944    non_starter      = japanese_5,
945    full_width_open  = japanese_6,
946    half_width_open  = japanese_6,
947    full_width_close = japanese_7,
948    full_width_punct = japanese_7,
949    half_width_close = japanese_8,
950}
951
952local function process(head,first,last)
953    if first ~= last then
954        local lastfont = nil
955        local previous = "start"
956        local last     = nil
957        while true do
958            local upcoming = getnext(first)
959            local id       = getid(first)
960            if id == glyph_code then
961                local current = getscriptstatus(first)
962                local action  = injectors[previous]
963                if action then
964                    action = action[current]
965                    if action then
966                        local font = getfont(first)
967                        if font ~= lastfont then
968                            lastfont = font
969                            set_parameters(font,getscriptdata(first))
970                        end
971                        action(head,first)
972                    end
973                end
974                previous = current
975         -- elseif id == math_code then
976         --     upcoming = getnext(endofmath(current))
977         --     previous = "start"
978            else -- glue
979                local p = getprev(first)
980                local n = upcoming
981                if p and n then
982                    local pid = getid(p)
983                    local nid = getid(n)
984                    if pid == glyph_code and nid == glyph_code then
985                        local pcjk = getscriptstatus(p)
986                        local ncjk = getscriptstatus(n)
987                        if not pcjk                       or not ncjk
988                            or pcjk == "korean"           or ncjk == "korean"
989                            or pcjk == "other"            or ncjk == "other"
990                            or pcjk == "jamo_final"       or ncjk == "jamo_initial"
991                            or pcjk == "half_width_close" or ncjk == "half_width_open" then -- extra compared to korean
992                            previous = "start"
993                        else -- if head ~= first then
994                            if id == glue_code then
995                                -- also scriptstatus check?
996                                local subtype = getsubtype(first)
997                                if subtype == userskip_code or subtype == spaceskip_code or subtype == xspaceskip_code then
998                                    -- for the moment no distinction possible between space and userskip
999                                    local w = getwidth(first)
1000                                    local s = spacedata[getfont(p)]
1001                                    if w == s then -- could be option
1002                                        if trace_details then
1003                                            trace_detail_between(p,n,"space removed")
1004                                        end
1005                                        remove_node(head,first,true)
1006                                    end
1007                                end
1008                            end
1009                            previous = pcjk
1010                    --    else
1011                    --        previous = pcjk
1012                        end
1013                    else
1014                        previous = "start"
1015                    end
1016                else
1017                    previous = "start"
1018                end
1019            end
1020            if upcoming == last then -- was stop
1021                break
1022            else
1023                first = upcoming
1024            end
1025        end
1026    end
1027end
1028
1029scripts.installmethod {
1030    name     = "nihongo", -- what name to use?
1031    injector = process,
1032    datasets = {
1033        default = {
1034            inter_char_shrink_factor          = 0.50, -- of quad
1035            inter_char_stretch_factor         = 0.50, -- of quad
1036            inter_char_half_shrink_factor     = 0.50, -- of quad
1037            inter_char_half_stretch_factor    = 0.50, -- of quad
1038            inter_char_quarter_shrink_factor  = 0.25, -- of quad
1039            inter_char_quarter_stretch_factor = 0.25, -- of quad
1040            inter_char_hangul_penalty         =   50,
1041        },
1042    },
1043}
1044