scrp-cjk.lua /size: 37 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['scrp-cjk'] = {
2    version   = 1.001,
3    comment   = "companion to scrp-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- We can speed this up by preallocating nodes and copying them but the gain is not
10-- that large.
11--
12-- If needed we can speed this up (traversers and prev next and such) but cjk
13-- documents don't have that many glyphs and certainly not much font processing so
14-- there not much gain in it.
15--
16-- The input line endings: there is no way to distinguish between inline spaces and
17-- endofline turned into spaces (would not make sense either because otherwise a
18-- wanted space at the end of a line would have to be a hard coded ones.
19
20local nuts              = nodes.nuts
21
22local copy_node        = nuts.copy
23local remove_node      = nuts.remove
24local nextglyph        = nuts.traversers.glyph
25
26local getnext          = nuts.getnext
27local getprev          = nuts.getprev
28local getfont          = nuts.getfont
29local getchar          = nuts.getchar
30local getid            = nuts.getid
31local getsubtype       = nuts.getsubtype
32local getwidth         = nuts.getwidth
33
34local setchar          = nuts.setchar
35
36local nodepool         = nuts.pool
37local new_glue         = nodepool.glue
38local new_kern         = nodepool.kern
39local new_penalty      = nodepool.penalty
40
41local nodecodes        = nodes.nodecodes
42local gluecodes        = nodes.gluecodes
43
44local glyph_code       = nodecodes.glyph
45local glue_code        = nodecodes.glue
46
47local userskip_code    = gluecodes.userskip
48local spaceskip_code   = gluecodes.spaceskip
49local xspaceskip_code  = gluecodes.xspaceskip
50
51local hash             = characters.scripthash
52
53local getscriptstatus  = scripts.getstatus
54local getscriptdata    = scripts.getdata
55local scriptcolors     = scripts.colors
56
57local fonthashes       = fonts.hashes
58local quaddata         = fonthashes.quads
59local spacedata        = fonthashes.spaces
60
61local decomposed       = characters.hangul.decomposed
62
63local trace_details    = false  trackers.register("scripts.details", function(v) trace_details = v end)
64
65local report_details   = logs.reporter("scripts","detail")
66
67-- raggedleft is controlled by leftskip and we might end up with a situation where
68-- the intercharacter spacing interferes with this; the solution is to patch the
69-- nodelist but better is to use veryraggedleft
70
71local insertnodeafter  = scripts.helpers.insertnodeafter
72local insertnodebefore = scripts.helpers.insertnodebefore
73
74local inter_char_shrink          = 0
75local inter_char_stretch         = 0
76local inter_char_half_shrink     = 0
77local inter_char_half_stretch    = 0
78local inter_char_quarter_shrink  = 0
79local inter_char_quarter_stretch = 0
80
81local full_char_width            = 0
82local half_char_width            = 0
83local quarter_char_width         = 0
84
85local inter_char_hangul_penalty  = 0
86
87local function set_parameters(font,data)
88    -- beware: parameters can be nil in e.g. punk variants
89    local quad = quaddata[font]
90    full_char_width            = quad
91    half_char_width            = quad/2
92    quarter_char_width         = quad/4
93    inter_char_shrink          = data.inter_char_shrink_factor          * quad
94    inter_char_stretch         = data.inter_char_stretch_factor         * quad
95    inter_char_half_shrink     = data.inter_char_half_shrink_factor     * quad
96    inter_char_half_stretch    = data.inter_char_half_stretch_factor    * quad
97    inter_char_quarter_shrink  = data.inter_char_quarter_shrink_factor  * quad
98    inter_char_quarter_stretch = data.inter_char_quarter_stretch_factor * quad
99    inter_char_hangul_penalty  = data.inter_char_hangul_penalty
100end
101
102-- a test version did compensate for crappy halfwidth but we can best do that
103-- at font definition time and/or just assume a correct font
104
105local function trace_detail(current,what)
106    local prev = getprev(current)
107    local c_id = getid(current)
108    local p_id = prev and getid(prev)
109    if c_id == glyph_code then
110        local c_ch = getchar(current)
111        if p_id == glyph_code then
112            local p_ch = p_id and getchar(prev)
113            report_details("[%C %a] [%s] [%C %a]",p_ch,hash[p_ch],what,c_ch,hash[c_ch])
114        else
115            report_details("[%s] [%C %a]",what,c_ch,hash[c_ch])
116        end
117    else
118        if p_id == glyph_code then
119            local p_ch = p_id and getchar(prev)
120            report_details("[%C %a] [%s]",p_ch,hash[p_ch],what)
121        else
122            report_details("[%s]",what)
123        end
124    end
125end
126
127local function trace_detail_between(p,n,what)
128    local p_ch = getchar(p)
129    local n_ch = getchar(n)
130    report_details("[%C %a] [%s] [%C %a]",p_ch,hash[p_ch],what,n_ch,hash[n_ch])
131end
132
133local function nobreak(head,current)
134    if trace_details then
135        trace_detail(current,"break")
136    end
137    insertnodebefore(head,current,new_penalty(10000))
138end
139
140local function stretch_break(head,current)
141    if trace_details then
142        trace_detail(current,"stretch break")
143    end
144    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
145end
146
147local function shrink_break(head,current)
148    if trace_details then
149        trace_detail(current,"shrink break")
150    end
151    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
152end
153
154local function nobreak_stretch(head,current)
155    if trace_details then
156        trace_detail(current,"no break stretch")
157    end
158    insertnodebefore(head,current,new_penalty(10000))
159    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
160end
161
162local function korean_break(head,current)
163    if trace_details then
164        trace_detail(current,"korean break")
165    end
166    insertnodebefore(head,current,new_penalty(inter_char_hangul_penalty))
167end
168
169local function nobreak_shrink(head,current)
170    if trace_details then
171        trace_detail(current,"nobreak shrink")
172    end
173    insertnodebefore(head,current,new_penalty(10000))
174    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
175end
176
177local function nobreak_autoshrink(head,current)
178    if trace_details then
179        trace_detail(current,"nobreak autoshrink")
180    end
181    insertnodebefore(head,current,new_penalty(10000))
182    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
183end
184
185local function nobreak_stretch_nobreak_shrink(head,current)
186    if trace_details then
187        trace_detail(current,"nobreak stretch nobreak shrink")
188    end
189    insertnodebefore(head,current,new_penalty(10000))
190    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
191    insertnodebefore(head,current,new_penalty(10000))
192    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
193end
194
195local function nobreak_stretch_nobreak_autoshrink(head,current)
196    if trace_details then
197        trace_detail(current,"nobreak stretch nobreak autoshrink")
198    end
199    insertnodebefore(head,current,new_penalty(10000))
200    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
201    insertnodebefore(head,current,new_penalty(10000))
202    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
203end
204
205local function nobreak_shrink_nobreak_stretch(head,current)
206    if trace_details then
207        trace_detail(current,"nobreak shrink nobreak stretch")
208    end
209    insertnodebefore(head,current,new_penalty(10000))
210    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
211    insertnodebefore(head,current,new_penalty(10000))
212    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
213end
214
215local function nobreak_autoshrink_nobreak_stretch(head,current)
216    if trace_details then
217        trace_detail(current,"nobreak autoshrink nobreak stretch")
218    end
219    insertnodebefore(head,current,new_penalty(10000))
220    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
221    insertnodebefore(head,current,new_penalty(10000))
222    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
223end
224
225local function nobreak_shrink_break_stretch(head,current)
226    if trace_details then
227        trace_detail(current,"nobreak shrink break stretch")
228    end
229    insertnodebefore(head,current,new_penalty(10000))
230    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
231    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
232end
233
234local function nobreak_autoshrink_break_stretch(head,current)
235    if trace_details then
236        trace_detail(current,"nobreak autoshrink break stretch")
237    end
238    insertnodebefore(head,current,new_penalty(10000))
239    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
240    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
241end
242
243local function nobreak_shrink_break_stretch_nobreak_shrink(head,current)
244    if trace_details then
245        trace_detail(current,"nobreak shrink break stretch nobreak shrink")
246    end
247    insertnodebefore(head,current,new_penalty(10000))
248    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
249    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
250    insertnodebefore(head,current,new_penalty(10000))
251    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
252end
253
254local function japanese_between_full_close_open(head,current) -- todo: check width
255    if trace_details then
256        trace_detail(current,"japanese between full close open")
257    end
258    insertnodebefore(head,current,new_kern(-half_char_width))
259    insertnodebefore(head,current,new_glue(half_char_width,0,inter_char_half_shrink))
260    insertnodebefore(head,current,new_kern(-half_char_width))
261end
262
263local function japanese_between_full_close_full_close(head,current) -- todo: check width
264    if trace_details then
265        trace_detail(current,"japanese between full close full close")
266    end
267    insertnodebefore(head,current,new_kern(-half_char_width))
268 -- insertnodebefore(head,current,new_glue(half_char_width,0,inter_char_half_shrink))
269end
270
271local function japanese_before_full_width_punct(head,current) -- todo: check width
272    if trace_details then
273        trace_detail(current,"japanese before full width punct")
274    end
275    insertnodebefore(head,current,new_penalty(10000))
276    insertnodebefore(head,current,new_glue(quarter_char_width,0,inter_char_quarter_shrink))
277    insertnodebefore(head,current,new_kern(-quarter_char_width))
278end
279
280local function japanese_after_full_width_punct(head,current) -- todo: check width
281    if trace_details then
282        trace_detail(current,"japanese after full width punct")
283    end
284    insertnodebefore(head,current,new_kern(-quarter_char_width))
285    insertnodebefore(head,current,new_glue(quarter_char_width,0,inter_char_quarter_shrink))
286end
287
288local function nobreak_autoshrink_break_stretch_nobreak_autoshrink(head,current)
289    if trace_details then
290        trace_detail(current,"nobreak autoshrink break stretch nobreak autoshrink")
291    end
292    insertnodebefore(head,current,new_penalty(10000))
293    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
294    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
295    insertnodebefore(head,current,new_penalty(10000))
296    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
297end
298
299local function nobreak_autoshrink_break_stretch_nobreak_shrink(head,current)
300    if trace_details then
301        trace_detail(current,"nobreak autoshrink break stretch nobreak shrink")
302    end
303    insertnodebefore(head,current,new_penalty(10000))
304    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
305    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
306    insertnodebefore(head,current,new_penalty(10000))
307    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
308end
309
310local function nobreak_shrink_break_stretch_nobreak_autoshrink(head,current)
311    if trace_details then
312        trace_detail(current,"nobreak shrink break stretch nobreak autoshrink")
313    end
314    insertnodebefore(head,current,new_penalty(10000))
315    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
316    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
317    insertnodebefore(head,current,new_penalty(10000))
318    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
319end
320
321local function nobreak_stretch_break_shrink(head,current)
322    if trace_details then
323        trace_detail(current,"nobreak stretch break shrink")
324    end
325    insertnodebefore(head,current,new_penalty(10000))
326    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
327    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
328end
329
330local function nobreak_stretch_break_autoshrink(head,current)
331    if trace_details then
332        trace_detail(current,"nobreak stretch break autoshrink")
333    end
334    insertnodebefore(head,current,new_penalty(10000))
335    insertnodebefore(head,current,new_glue(0,inter_char_stretch,0))
336    insertnodebefore(head,current,new_glue(0,0,inter_char_half_shrink))
337end
338
339-- Korean: hangul
340
341local korean_0 = {
342}
343
344local korean_1 = {
345    jamo_initial     = korean_break,
346    korean           = korean_break,
347    chinese          = korean_break,
348    hiragana         = korean_break,
349    katakana         = korean_break,
350    half_width_open  = stretch_break,
351    half_width_close = nobreak,
352    full_width_open  = stretch_break,
353    full_width_close = nobreak,
354    full_width_punct = nobreak,
355--  hyphen           = nil,
356    non_starter      = korean_break,
357    other            = korean_break,
358}
359
360local korean_2 = {
361    jamo_initial     = stretch_break,
362    korean           = stretch_break,
363    chinese          = stretch_break,
364    hiragana         = stretch_break,
365    katakana         = stretch_break,
366    half_width_open  = stretch_break,
367    half_width_close = nobreak,
368    full_width_open  = stretch_break,
369    full_width_close = nobreak,
370    full_width_punct = nobreak,
371--  hyphen           = nil,
372    non_starter      = stretch_break,
373    other            = stretch_break,
374}
375
376local korean_3 = {
377    jamo_initial     = stretch_break,
378    korean           = stretch_break,
379    chinese          = stretch_break,
380    hiragana         = stretch_break,
381    katakana         = stretch_break,
382    half_width_open  = stretch_break,
383    half_width_close = nobreak,
384    full_width_open  = stretch_break,
385    full_width_close = nobreak,
386    full_width_punct = nobreak,
387--  hyphen           = nil,
388    non_starter      = nobreak,
389    other            = nobreak,
390}
391
392local korean_4 = {
393    jamo_initial     = nobreak,
394    korean           = nobreak,
395    chinese          = nobreak,
396    hiragana         = nobreak,
397    katakana         = nobreak,
398    half_width_open  = nobreak,
399    half_width_close = nobreak,
400    full_width_open  = nobreak,
401    full_width_close = nobreak,
402    full_width_punct = nobreak,
403    hyphen           = nobreak,
404    non_starter      = nobreak,
405    other            = nobreak,
406}
407
408local korean_5 = {
409    jamo_initial     = stretch_break,
410    korean           = stretch_break,
411    chinese          = stretch_break,
412    hiragana         = stretch_break,
413    katakana         = stretch_break,
414    half_width_open  = stretch_break,
415    half_width_close = nobreak_stretch,
416    full_width_open  = stretch_break,
417    full_width_close = nobreak_stretch,
418    full_width_punct = nobreak_stretch,
419    hyphen           = nobreak_stretch,
420    non_starter      = nobreak_stretch,
421    other            = stretch_break,
422}
423
424local injectors = { -- [previous] [current]
425    jamo_final       = korean_1,
426    korean           = korean_1,
427    chinese          = korean_1,
428    hiragana         = korean_1,
429    katakana         = korean_1,
430    hyphen           = korean_2,
431    start            = korean_0,
432    other            = korean_2,
433    non_starter      = korean_3,
434    full_width_open  = korean_4,
435    half_width_open  = korean_4,
436    full_width_close = korean_5,
437    full_width_punct = korean_5,
438    half_width_close = korean_5,
439}
440
441scriptcolors.korean            = "trace:0"
442scriptcolors.chinese           = "trace:0"
443scriptcolors.katakana          = "trace:0"
444scriptcolors.hiragana          = "trace:0"
445scriptcolors.full_width_open   = "trace:1"
446scriptcolors.full_width_close  = "trace:2"
447scriptcolors.half_width_open   = "trace:3"
448scriptcolors.half_width_close  = "trace:4"
449scriptcolors.full_width_punct  = "trace:5"
450------------.hyphen            = "trace:5"
451scriptcolors.non_starter       = "trace:6"
452scriptcolors.jamo_initial      = "trace:7"
453scriptcolors.jamo_medial       = "trace:8"
454scriptcolors.jamo_final        = "trace:9"
455
456local function process(head,first,last)
457    if first ~= last then
458        local lastfont = nil
459        local previous = "start"
460        local last     = nil
461        while true do
462            local upcoming = getnext(first)
463            local id       = getid(first)
464            if id == glyph_code then
465                local current = getscriptstatus(first)
466                local action  = injectors[previous]
467                if action then
468                    action = action[current]
469                    if action then
470                        local font = getfont(first)
471                        if font ~= lastfont then
472                            lastfont = font
473                            set_parameters(font,getscriptdata(first))
474                        end
475                        action(head,first)
476                    end
477                end
478                previous = current
479            else -- glue
480                local p = getprev(first)
481                local n = upcoming
482                if p and n then
483                    local pid = getid(p)
484                    local nid = getid(n)
485                    if pid == glyph_code and nid == glyph_code then
486                        local pcjk = getscriptstatus(p)
487                        local ncjk = getscriptstatus(n)
488                        if not pcjk                 or not ncjk
489                            or pcjk == "korean"     or ncjk == "korean"
490                            or pcjk == "other"      or ncjk == "other"
491                            or pcjk == "jamo_final" or ncjk == "jamo_initial" then
492                            previous = "start"
493                        else -- if head ~= first then
494                            remove_node(head,first,true)
495                            previous = pcjk
496                    --    else
497                    --        previous = pcjk
498                        end
499                    else
500                        previous = "start"
501                    end
502                else
503                    previous = "start"
504                end
505            end
506            if upcoming == last then -- was stop
507                break
508            else
509                first = upcoming
510            end
511        end
512    end
513end
514
515scripts.installmethod {
516    name     = "hangul",
517    injector = process,
518    datasets = { -- todo: metatables and maybe some stretch and shrink factor
519        default = {
520            inter_char_shrink_factor          = 0.50, -- of quad
521            inter_char_stretch_factor         = 0.50, -- of quad
522            inter_char_half_shrink_factor     = 0.50, -- of quad
523            inter_char_half_stretch_factor    = 0.50, -- of quad
524            inter_char_quarter_shrink_factor  = 0.50, -- of quad
525            inter_char_quarter_stretch_factor = 0.50, -- of quad
526            inter_char_hangul_penalty         =   50,
527        },
528        tight = {
529            inter_char_shrink_factor          = 0.10, -- of quad
530            inter_char_stretch_factor         = 0.10, -- of quad
531            inter_char_half_shrink_factor     = 0.10, -- of quad
532            inter_char_half_stretch_factor    = 0.10, -- of quad
533            inter_char_quarter_shrink_factor  = 0.10, -- of quad
534            inter_char_quarter_stretch_factor = 0.10, -- of quad
535            inter_char_hangul_penalty         =   50,
536        },
537    },
538}
539
540function scripts.decomposehangul(head)
541    local done = false
542    for current, char in nextglyph, head do
543        local lead_consonant, medial_vowel, tail_consonant = decomposed(char)
544        if lead_consonant then
545            setchar(current,lead_consonant)
546            local m = copy_node(current)
547            setchar(m,medial_vowel)
548            head, current = insertnodeafter(head,current,m)
549            if tail_consonant then
550                local t = copy_node(current)
551                setchar(t,tail_consonant)
552                head, current = insertnodeafter(head,current,t)
553            end
554            done = true
555        end
556    end
557    return head, done
558end
559
560-- nodes.tasks.prependaction("processors","normalizers","scripts.decomposehangul")
561
562local otffeatures         = fonts.constructors.features.otf
563local registerotffeature  = otffeatures.register
564
565registerotffeature {
566    name         = "decomposehangul",
567    description  = "decompose hangul",
568    processors = {
569        position = 1,
570        node     = scripts.decomposehangul,
571    }
572}
573
574-- Chinese: hanzi
575
576local chinese_0 = {
577}
578
579local chinese_1 = {
580    jamo_initial     = korean_break,
581    korean           = korean_break,
582    chinese          = stretch_break,
583    hiragana         = stretch_break,
584    katakana         = stretch_break,
585    half_width_open  = nobreak_stretch_break_autoshrink,
586    half_width_close = nobreak_stretch,
587    full_width_open  = nobreak_stretch_break_shrink,
588    full_width_close = nobreak_stretch,
589    full_width_punct = nobreak_stretch,
590--  hyphen           = nil,
591    non_starter      = nobreak_stretch,
592    other            = stretch_break,
593}
594
595local chinese_2 = {
596    jamo_initial     = korean_break,
597    korean           = stretch_break,
598    chinese          = stretch_break,
599    hiragana         = stretch_break,
600    katakana         = stretch_break,
601    half_width_open  = nobreak_stretch_break_autoshrink,
602    half_width_close = nobreak_stretch,
603    full_width_open  = nobreak_stretch_break_shrink,
604    full_width_close = nobreak_stretch,
605    full_width_punct = nobreak_stretch,
606    hyphen           = nobreak_stretch,
607    non_starter      = nobreak_stretch,
608    other            = stretch_break,
609}
610
611local chinese_3 = {
612    jamo_initial     = korean_break,
613    korean           = stretch_break,
614    chinese          = stretch_break,
615    hiragana         = stretch_break,
616    katakana         = stretch_break,
617    half_width_open  = nobreak_stretch_break_autoshrink,
618    half_width_close = nobreak_stretch,
619    full_width_open  = nobreak_stretch_break_shrink,
620    full_width_close = nobreak_stretch,
621    full_width_punct = nobreak_stretch,
622--  hyphen           = nil,
623    non_starter      = nobreak_stretch,
624    other            = stretch_break,
625}
626
627local chinese_4 = {
628--  jamo_initial     = nil,
629--  korean           = nil,
630--  chinese          = nil,
631--  hiragana         = nil,
632--  katakana         = nil,
633    half_width_open  = nobreak_autoshrink,
634    half_width_close = nil,
635    full_width_open  = nobreak_shrink,
636    full_width_close = nobreak,
637    full_width_punct = nobreak,
638--  hyphen           = nil,
639    non_starter      = nobreak,
640--  other            = nil,
641}
642
643local chinese_5 = {
644    jamo_initial     = stretch_break,
645    korean           = stretch_break,
646    chinese          = stretch_break,
647    hiragana         = stretch_break,
648    katakana         = stretch_break,
649    half_width_open  = nobreak_stretch_break_autoshrink,
650    half_width_close = nobreak_stretch,
651    full_width_open  = nobreak_stretch_break_shrink,
652    full_width_close = nobreak_stretch,
653    full_width_punct = nobreak_stretch,
654--  hyphen           = nil,
655    non_starter      = nobreak_stretch,
656    other            = stretch_break,
657}
658
659local chinese_6 = {
660    jamo_initial     = nobreak_stretch,
661    korean           = nobreak_stretch,
662    chinese          = nobreak_stretch,
663    hiragana         = nobreak_stretch,
664    katakana         = nobreak_stretch,
665    half_width_open  = nobreak_stretch_break_autoshrink,
666    half_width_close = nobreak_stretch,
667    full_width_open  = nobreak_stretch_break_shrink,
668    full_width_close = nobreak_stretch,
669    full_width_punct = nobreak_stretch,
670    hyphen           = nobreak_stretch,
671    non_starter      = nobreak_stretch,
672    other            = nobreak_stretch,
673}
674
675local chinese_7 = {
676    jami_initial     = nobreak_shrink_break_stretch,
677    korean           = nobreak_shrink_break_stretch,
678    chinese          = stretch_break, -- nobreak_shrink_break_stretch,
679    hiragana         = stretch_break, -- nobreak_shrink_break_stretch,
680    katakana         = stretch_break, -- nobreak_shrink_break_stretch,
681    half_width_open  = nobreak_shrink_break_stretch_nobreak_autoshrink,
682    half_width_close = nobreak_shrink_nobreak_stretch,
683    full_width_open  = nobreak_shrink_break_stretch_nobreak_shrink,
684    full_width_close = nobreak_shrink_nobreak_stretch,
685    full_width_punct = nobreak_shrink_nobreak_stretch,
686    hyphen           = nobreak_shrink_break_stretch,
687    non_starter      = nobreak_shrink_break_stretch,
688    other            = nobreak_shrink_break_stretch,
689}
690
691local chinese_8 = {
692    jami_initial     = nobreak_shrink_break_stretch,
693    korean           = nobreak_autoshrink_break_stretch,
694    chinese          = stretch_break, -- nobreak_autoshrink_break_stretch,
695    hiragana         = stretch_break, -- nobreak_autoshrink_break_stretch,
696    katakana         = stretch_break, -- nobreak_autoshrink_break_stretch,
697    half_width_open  = nobreak_autoshrink_break_stretch_nobreak_autoshrink,
698half_width_open  = stretch_break,
699    half_width_close = nobreak_autoshrink_nobreak_stretch,
700    full_width_open  = nobreak_autoshrink_break_stretch_nobreak_shrink,
701    full_width_close = nobreak_autoshrink_nobreak_stretch,
702    full_width_punct = nobreak_autoshrink_nobreak_stretch,
703    hyphen           = nobreak_autoshrink_break_stretch,
704    non_starter      = nobreak_autoshrink_break_stretch,
705    other            = nobreak_autoshrink_break_stretch,
706}
707
708local injectors = { -- [previous] [current]
709    jamo_final       = chinese_1,
710    korean           = chinese_1,
711    chinese          = chinese_2,
712    hiragana         = chinese_2,
713    katakana         = chinese_2,
714    hyphen           = chinese_3,
715    start            = chinese_4,
716    other            = chinese_5,
717    non_starter      = chinese_5,
718    full_width_open  = chinese_6,
719    half_width_open  = chinese_6,
720    full_width_close = chinese_7,
721    full_width_punct = chinese_7,
722    half_width_close = chinese_8,
723}
724
725local function process(head,first,last)
726    if first ~= last then
727        local lastfont = nil
728        local previous = "start"
729        local last     = nil
730        while true do
731            local upcoming = getnext(first)
732            local id       = getid(first)
733            if id == glyph_code then
734                local current = getscriptstatus(first)
735                local action  = injectors[previous]
736                if action then
737                    action = action[current]
738                    if action then
739                        local font = getfont(first)
740                        if font ~= lastfont then
741                            lastfont = font
742                            set_parameters(font,getscriptdata(first))
743                        end
744                        action(head,first)
745                    end
746                end
747                previous = current
748            else -- glue
749                local p = getprev(first)
750                local n = upcoming
751                if p and n then
752                    local pid = getid(p)
753                    local nid = getid(n)
754                    if pid == glyph_code and nid == glyph_code then
755                        local pcjk = getscriptstatus(p)
756                        local ncjk = getscriptstatus(n)
757                        if not pcjk                       or not ncjk
758                            or pcjk == "korean"           or ncjk == "korean"
759                            or pcjk == "other"            or ncjk == "other"
760                            or pcjk == "jamo_final"       or ncjk == "jamo_initial"
761                            or pcjk == "half_width_close" or ncjk == "half_width_open" then -- extra compared to korean
762                            previous = "start"
763                        else -- if head ~= first then
764                            remove_node(head,first,true)
765                            previous = pcjk
766                    --    else
767                    --        previous = pcjk
768                        end
769                    else
770                        previous = "start"
771                    end
772                else
773                    previous = "start"
774                end
775            end
776            if upcoming == last then -- was stop
777                break
778            else
779                first = upcoming
780            end
781        end
782    end
783end
784
785scripts.installmethod {
786    name     = "hanzi",
787    injector = process,
788    datasets = {
789        default = {
790            inter_char_shrink_factor          = 0.50, -- of quad
791            inter_char_stretch_factor         = 0.50, -- of quad
792            inter_char_half_shrink_factor     = 0.50, -- of quad
793            inter_char_half_stretch_factor    = 0.50, -- of quad
794            inter_char_quarter_shrink_factor  = 0.50, -- of quad
795            inter_char_quarter_stretch_factor = 0.50, -- of quad
796            inter_char_hangul_penalty         =   50,
797        },
798    },
799}
800
801-- Japanese: idiographic, hiragana, katakana, romanji / jis
802
803local japanese_0 = {
804}
805
806local japanese_1 = {
807    jamo_initial     = korean_break,
808    korean           = korean_break,
809    chinese          = stretch_break,
810    hiragana         = stretch_break,
811    katakana         = stretch_break,
812    half_width_open  = nobreak_stretch_break_autoshrink,
813    half_width_close = nobreak_stretch,
814    full_width_open  = nobreak_stretch_break_shrink,
815    full_width_close = nobreak_stretch,
816    full_width_punct = nobreak_stretch,
817--  hyphen           = nil,
818    non_starter      = nobreak_stretch,
819    other            = stretch_break,
820}
821
822local japanese_2 = {
823    jamo_initial     = korean_break,
824    korean           = stretch_break,
825    chinese          = stretch_break,
826    hiragana         = stretch_break,
827    katakana         = stretch_break,
828    half_width_open  = nobreak_stretch_break_autoshrink,
829    half_width_close = nobreak_stretch,
830    full_width_open  = nobreak_stretch_break_shrink,
831    full_width_close = nobreak_stretch,
832    full_width_punct = japanese_before_full_width_punct, -- nobreak_stretch,
833    hyphen           = nobreak_stretch,
834    non_starter      = nobreak_stretch,
835    other            = stretch_break,
836}
837
838local japanese_3 = {
839    jamo_initial     = korean_break,
840    korean           = stretch_break,
841    chinese          = stretch_break,
842    hiragana         = stretch_break,
843    katakana         = stretch_break,
844    half_width_open  = nobreak_stretch_break_autoshrink,
845    half_width_close = nobreak_stretch,
846    full_width_open  = nobreak_stretch_break_shrink,
847    full_width_close = nobreak_stretch,
848    full_width_punct = nobreak_stretch,
849--  hyphen           = nil,
850    non_starter      = nobreak_stretch,
851    other            = stretch_break,
852}
853
854local japanese_4 = {
855--  jamo_initial     = nil,
856--  korean           = nil,
857--  chinese          = nil,
858--  hiragana         = nil,
859--  katakana         = nil,
860    half_width_open  = nobreak_autoshrink,
861    half_width_close = nil,
862    full_width_open  = nobreak_shrink,
863    full_width_close = nobreak,
864    full_width_punct = nobreak,
865--  hyphen           = nil,
866    non_starter      = nobreak,
867--  other            = nil,
868}
869
870local japanese_5 = {
871    jamo_initial     = stretch_break,
872    korean           = stretch_break,
873    chinese          = stretch_break,
874    hiragana         = stretch_break,
875    katakana         = stretch_break,
876    half_width_open  = nobreak_stretch_break_autoshrink,
877    half_width_close = nobreak_stretch,
878    full_width_open  = nobreak_stretch_break_shrink,
879    full_width_close = nobreak_stretch,
880    full_width_punct = nobreak_stretch,
881--  hyphen           = nil,
882    non_starter      = nobreak_stretch,
883    other            = stretch_break,
884}
885
886local japanese_6 = {
887    jamo_initial     = nobreak_stretch,
888    korean           = nobreak_stretch,
889    chinese          = nobreak_stretch,
890    hiragana         = nobreak_stretch,
891    katakana         = nobreak_stretch,
892    half_width_open  = nobreak_stretch_break_autoshrink,
893    half_width_close = nobreak_stretch,
894    full_width_open  = nobreak_stretch_break_shrink,
895    full_width_close = nobreak_stretch,
896    full_width_punct = nobreak_stretch,
897    hyphen           = nobreak_stretch,
898    non_starter      = nobreak_stretch,
899    other            = nobreak_stretch,
900}
901
902local japanese_7 = {
903    jami_initial     = nobreak_shrink_break_stretch,
904    korean           = nobreak_shrink_break_stretch,
905    chinese          = japanese_after_full_width_punct, -- stretch_break
906    hiragana         = japanese_after_full_width_punct, -- stretch_break
907    katakana         = japanese_after_full_width_punct, -- stretch_break
908    half_width_open  = nobreak_shrink_break_stretch_nobreak_autoshrink,
909    half_width_close = nobreak_shrink_nobreak_stretch,
910    full_width_open  = japanese_between_full_close_open, -- !!
911    full_width_close = japanese_between_full_close_full_close, -- nobreak_shrink_nobreak_stretch,
912    full_width_punct = nobreak_shrink_nobreak_stretch,
913    hyphen           = nobreak_shrink_break_stretch,
914    non_starter      = nobreak_shrink_break_stretch,
915    other            = nobreak_shrink_break_stretch,
916}
917
918local japanese_8 = {
919    jami_initial     = nobreak_shrink_break_stretch,
920    korean           = nobreak_autoshrink_break_stretch,
921    chinese          = stretch_break,
922    hiragana         = stretch_break,
923    katakana         = stretch_break,
924    half_width_open  = nobreak_autoshrink_break_stretch_nobreak_autoshrink,
925    half_width_close = nobreak_autoshrink_nobreak_stretch,
926    full_width_open  = nobreak_autoshrink_break_stretch_nobreak_shrink,
927    full_width_close = nobreak_autoshrink_nobreak_stretch,
928    full_width_punct = nobreak_autoshrink_nobreak_stretch,
929    hyphen           = nobreak_autoshrink_break_stretch,
930    non_starter      = nobreak_autoshrink_break_stretch,
931    other            = nobreak_autoshrink_break_stretch,
932}
933
934local injectors = { -- [previous] [current]
935    jamo_final       = japanese_1,
936    korean           = japanese_1,
937    chinese          = japanese_2,
938    hiragana         = japanese_2,
939    katakana         = japanese_2,
940    hyphen           = japanese_3,
941    start            = japanese_4,
942    other            = japanese_5,
943    non_starter      = japanese_5,
944    full_width_open  = japanese_6,
945    half_width_open  = japanese_6,
946    full_width_close = japanese_7,
947    full_width_punct = japanese_7,
948    half_width_close = japanese_8,
949}
950
951local function process(head,first,last)
952    if first ~= last then
953        local lastfont = nil
954        local previous = "start"
955        local last     = nil
956        while true do
957            local upcoming = getnext(first)
958            local id       = getid(first)
959            if id == glyph_code then
960                local current = getscriptstatus(first)
961                local action  = injectors[previous]
962                if action then
963                    action = action[current]
964                    if action then
965                        local font = getfont(first)
966                        if font ~= lastfont then
967                            lastfont = font
968                            set_parameters(font,getscriptdata(first))
969                        end
970                        action(head,first)
971                    end
972                end
973                previous = current
974         -- elseif id == math_code then
975         --     upcoming = getnext(endofmath(current))
976         --     previous = "start"
977            else -- glue
978                local p = getprev(first)
979                local n = upcoming
980                if p and n then
981                    local pid = getid(p)
982                    local nid = getid(n)
983                    if pid == glyph_code and nid == glyph_code then
984                        local pcjk = getscriptstatus(p)
985                        local ncjk = getscriptstatus(n)
986                        if not pcjk                       or not ncjk
987                            or pcjk == "korean"           or ncjk == "korean"
988                            or pcjk == "other"            or ncjk == "other"
989                            or pcjk == "jamo_final"       or ncjk == "jamo_initial"
990                            or pcjk == "half_width_close" or ncjk == "half_width_open" then -- extra compared to korean
991                            previous = "start"
992                        else -- if head ~= first then
993                            if id == glue_code then
994                                -- also scriptstatus check?
995                                local subtype = getsubtype(first)
996                                if subtype == userskip_code or subtype == spaceskip_code or subtype == xspaceskip_code then
997                                    -- for the moment no distinction possible between space and userskip
998                                    local w = getwidth(first)
999                                    local s = spacedata[getfont(p)]
1000                                    if w == s then -- could be option
1001                                        if trace_details then
1002                                            trace_detail_between(p,n,"space removed")
1003                                        end
1004                                        remove_node(head,first,true)
1005                                    end
1006                                end
1007                            end
1008                            previous = pcjk
1009                    --    else
1010                    --        previous = pcjk
1011                        end
1012                    else
1013                        previous = "start"
1014                    end
1015                else
1016                    previous = "start"
1017                end
1018            end
1019            if upcoming == last then -- was stop
1020                break
1021            else
1022                first = upcoming
1023            end
1024        end
1025    end
1026end
1027
1028scripts.installmethod {
1029    name     = "nihongo", -- what name to use?
1030    injector = process,
1031    datasets = {
1032        default = {
1033            inter_char_shrink_factor          = 0.50, -- of quad
1034            inter_char_stretch_factor         = 0.50, -- of quad
1035            inter_char_half_shrink_factor     = 0.50, -- of quad
1036            inter_char_half_stretch_factor    = 0.50, -- of quad
1037            inter_char_quarter_shrink_factor  = 0.25, -- of quad
1038            inter_char_quarter_stretch_factor = 0.25, -- of quad
1039            inter_char_hangul_penalty         =   50,
1040        },
1041    },
1042}
1043