font-osd.lua /size: 115 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['font-osd'] = { -- script devanagari
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Kai Eigner, TAT Zetwerk / Hans Hagen, PRAGMA ADE",
5    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- we need to check nbsphash (context only)
10
11-- A few remarks:
12--
13-- This code is a partial rewrite of the code that deals with devanagari. The data
14-- and logic is by Kai Eigner and based based on Microsoft's OpenType specifications
15-- for specific scripts, but with a few improvements. More information can be found
16-- at:
17--
18-- deva: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/introO.mspx
19-- dev2: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/intro.mspx
20--
21-- Rajeesh Nambiar provided patches for the malayalam variant. Thanks to feedback
22-- from the mailing list some aspects could be improved.
23--
24-- As I touched nearly all code, reshuffled it, optimized a lot, etc. etc. (imagine
25-- how much can get messed up in over a week work) it could be that I introduced
26-- bugs. There is more to gain (esp in the functions applied to a range) but I'll do
27-- that when everything works as expected. Kai's original code is kept in
28-- font-odk.lua as a reference so blame me (HH) for bugs. (We no longer ship that
29-- file as the code below has diverted too much and in the meantime has more than
30-- doubled in size.)
31--
32-- Interesting is that Kai managed to write this on top of the existing otf handler.
33-- Only a few extensions were needed, like a few more analyzing states and dealing
34-- with changed head nodes in the core scanner as that only happens here. There's a
35-- lot going on here and it's only because I touched nearly all code that I got a
36-- bit of a picture of what happens. For in-depth knowledge one needs to consult
37-- Kai.
38--
39-- The rewrite mostly deals with efficiency, both in terms of speed and code. We
40-- also made sure that it suits generic use as well as use in ConTeXt. I removed
41-- some buglets but can as well have messed up the logic by doing this. For this we
42-- keep the original around as that serves as reference. Due to the lots of
43-- reshuffling glyphs quite some leaks occur(red) but once I'm satisfied with the
44-- rewrite I'll weed them. I also integrated initialization etc into the regular
45-- mechanisms.
46--
47-- In the meantime, we're down from 25.5-3.5=22 seconds to 17.7-3.5=14.2 seconds for
48-- a 100 page sample (mid 2012) with both variants so it's worth the effort. Some
49-- more speedup is to be expected. Due to the method chosen it will never be real
50-- fast. If I ever become a power user I'll have a go at some further speed up. I
51-- will rename some functions (and features) once we don't need to check the
52-- original code. We now use a special subset sequence for use inside the analyzer
53-- (after all we could can store this in the dataset and save redundant analysis).
54--
55-- By now we have yet another incremental improved version. In the end I might
56-- rewrite the code.
57--
58-- At the start of 2022 Kauśika spent a lot of time testing combinations of fonts
59-- and scripts and in the process some more tracing was added as well as a mixed
60-- conjuncts options that can deal with fuzzy fonts. The machinery does what it has
61-- to do but some fonts expect more magic to be applied.
62--
63-- Some changes have been reverted because they interfere with proper fonts. We just
64-- don't support bad fonts with heuristics any longer. If needed one can use the
65-- translitaration filters that come with ConTeXt.
66--
67-- Hans Hagen, PRAGMA-ADE, Hasselt NL
68
69-- Todo:
70--
71-- Matras: according to Microsoft typography specifications "up to one of each type:
72-- pre-, above-, below- or post- base", but that does not seem to be right. It could
73-- become an option.
74--
75-- Resources:
76--
77-- The tables that we had here are now generated from char-def.lua or in the case of
78-- generic usage loaded from luatex-basics-chr.lua. Still a couple of entries need
79-- to be added to char-def.lua but finally I moved the indic specific tables there.
80-- For generic usage one can create the relevant resources by running:
81--
82--     context luatex-basics-prepare.tex
83--
84-- and an overview with:
85--
86--     context --global s-fonts-basics.mkiv
87--
88-- For now we have defined: bengali, devanagari, gujarati, gurmukhi, kannada,
89-- malayalam, oriya, tamil and tolugu but not all are checked. Also, some of the
90-- code below might need to be adapted to the extra scripts.
91
92local insert, remove, imerge, copy, tohash = table.insert, table.remove, table.imerge, table.copy, table.tohash
93local next, type, rawget = next, type, rawget
94local formatters = string.formatters
95local settings_to_hash = utilities.parsers.settings_to_hash
96
97local report             = logs.reporter("otf","devanagari")
98
99fonts                    = fonts                   or { }
100fonts.analyzers          = fonts.analyzers         or { }
101fonts.analyzers.methods  = fonts.analyzers.methods or { node = { otf = { } } }
102
103local otf                = fonts.handlers.otf
104
105local handlers           = otf.handlers
106local methods            = fonts.analyzers.methods
107
108local otffeatures        = fonts.constructors.features.otf
109local registerotffeature = otffeatures.register
110
111local trace_steps        = false
112
113local nuts               = nodes.nuts
114
115local getnext            = nuts.getnext
116local getprev            = nuts.getprev
117local getboth            = nuts.getboth
118local getid              = nuts.getid
119local getchar            = nuts.getchar
120local getfont            = nuts.getfont
121local getsubtype         = nuts.getsubtype
122local setlink            = nuts.setlink
123local setnext            = nuts.setnext
124local setprev            = nuts.setprev
125local setchar            = nuts.setchar
126local getprop            = nuts.getprop
127local setprop            = nuts.setprop
128local getstate           = nuts.getstate
129local setstate           = nuts.setstate
130
131local ischar             = nuts.ischar
132
133local insertnodeafter    = nuts.insertafter
134local copy_node          = nuts.copy
135local remove_node        = nuts.remove
136local flushlist          = nuts.flushlist
137local flushnode          = nuts.flushnode
138
139local copyinjection      = nodes.injections.copy -- KE: is this necessary? HH: probably not as positioning comes later and we rawget/set
140
141local unsetvalue         = attributes.unsetvalue
142
143local fontdata           = fonts.hashes.identifiers
144
145local a_syllabe          = "syllable"  -- attributes.private('syllabe')   -- can be just a property key
146local a_reordered        = "reordered" -- attributes.private('reordered') -- can be just a property key
147
148local dotted_circle      = 0x25CC
149local c_nbsp             = 0x00A0
150local c_zwnj             = 0x200C
151local c_zwj              = 0x200D
152
153local states             = fonts.analyzers.states -- not features
154
155local s_rphf             = states.rphf
156local s_half             = states.half
157local s_pref             = states.pref
158local s_blwf             = states.blwf
159local s_pstf             = states.pstf
160local s_init             = states.init
161
162local replace_all_nbsp   = nil
163
164replace_all_nbsp = function(head) -- delayed definition
165    replace_all_nbsp = typesetters and typesetters.characters and typesetters.characters.replacenbspaces or function(head)
166        return head
167    end
168    return replace_all_nbsp(head)
169end
170
171local processcharacters = nil
172local logprocess        = nil
173
174if context then
175
176    local fontprocesses   = fonts.hashes.processes
177    local tracers         = nodes.tracers
178    local registermessage = (tracers and tracers.steppers.message) or function() end
179
180    function processcharacters(head,font)
181        local processors = fontprocesses[font]
182        for i=1,#processors do
183            head = processors[i](head,font,0)
184        end
185        return head
186    end
187
188    -- When we'retrying to fix something it can be handy to have some more
189    -- details available.
190
191    trackers.register("otf.steps", function(v) trace_steps = v end)
192
193    logprocess = function(str)
194        if trace_steps then
195            registermessage("devanagari %s",str)
196            if trace_steps == "silent" then
197                return
198            end
199        end
200        report(str)
201    end
202
203else
204
205    function processcharacters(head,font)
206        local processors = fontdata[font].shared.processes
207        for i=1,#processors do
208            head = processors[i](head,font,0)
209        end
210        return head
211    end
212
213    logprocess = function(str)
214        -- do nothing
215    end
216
217end
218
219-- We can assume that script are not mixed in the source but if that is the case
220-- we might need to have consonants etc per script and initialize a local table
221-- pointing to the right one. But not now.
222
223-- We have additional data in char-def that provides information not present (at
224-- least when this was written) in Unicode.
225
226local indicgroups = characters and characters.indicgroups
227
228if not indicgroups and characters then
229
230    local indic = {
231        c = { }, -- consonant
232        i = { }, -- independent vowel
233        d = { }, -- dependent vowel
234        m = { }, -- vowel modifier
235        s = { }, -- stress tone mark
236        o = { }, -- other
237    }
238
239    local indicmarks   = {
240        l = { }, -- left   | pre_mark
241        t = { }, -- top    | above_mark
242        b = { }, -- bottom | below_mark
243        r = { }, -- right  | post_mark
244        s = { }, -- split  | twopart_mark
245    }
246
247    local indicclasses = {
248        nukta    = { },
249        halant   = { },
250        ra       = { },
251        anudatta = { },
252    }
253
254    local indicorders = {
255        bp = { }, -- before_postscript
256        ap = { }, -- after_postscript
257        bs = { }, -- before_subscript
258        as = { }, -- after_subscript
259        bh = { }, -- before_half
260        ah = { }, -- after_half
261        bm = { }, -- before_main
262        am = { }, -- after_main
263    }
264
265    for k, v in next, characters.data do
266        local i = v.indic
267        if i then
268            indic[i][k] = true
269            i = v.indicmark
270            if i then
271                if i == "s" then
272                    local s = v.specials
273                    indicmarks[i][k] = { s[2], s[3] }
274                else
275                    indicmarks[i][k] = true
276                end
277            end
278            i = v.indicclass
279            if i then
280                indicclasses[i][k] = true
281            end
282            i = v.indicorder
283            if i then
284                indicorders[i][k] = true
285            end
286        end
287    end
288
289    indicgroups = {
290        consonant         = indic.c,
291        independent_vowel = indic.i,
292        dependent_vowel   = indic.d,
293        vowel_modifier    = indic.m,
294        stress_tone_mark  = indic.s,
295     -- other             = indic.o,
296        pre_mark          = indicmarks.l,
297        above_mark        = indicmarks.t,
298        below_mark        = indicmarks.b,
299        post_mark         = indicmarks.r,
300        twopart_mark      = indicmarks.s,
301        nukta             = indicclasses.nukta,
302        halant            = indicclasses.halant,
303        ra                = indicclasses.ra,
304        anudatta          = indicclasses.anudatta,
305        before_postscript = indicorders.bp,
306        after_postscript  = indicorders.ap,
307        before_half       = indicorders.bh,
308        after_half        = indicorders.ah,
309        before_subscript  = indicorders.bs,
310        after_subscript   = indicorders.as,
311        before_main       = indicorders.bm,
312        after_main        = indicorders.am,
313    }
314
315    indic        = nil
316    indicmarks   = nil
317    indicclasses = nil
318    indicorders  = nil
319
320    characters.indicgroups = indicgroups
321
322end
323
324local consonant         = indicgroups.consonant
325local independent_vowel = indicgroups.independent_vowel
326local dependent_vowel   = indicgroups.dependent_vowel
327local vowel_modifier    = indicgroups.vowel_modifier
328local stress_tone_mark  = indicgroups.stress_tone_mark
329local pre_mark          = indicgroups.pre_mark
330local above_mark        = indicgroups.above_mark
331local below_mark        = indicgroups.below_mark
332local post_mark         = indicgroups.post_mark
333local twopart_mark      = indicgroups.twopart_mark
334local nukta             = indicgroups.nukta
335local halant            = indicgroups.halant
336local ra                = indicgroups.ra
337local anudatta          = indicgroups.anudatta
338
339local before_postscript = indicgroups.before_postscript
340local after_postscript  = indicgroups.after_postscript
341local before_half       = indicgroups.before_half
342local after_half        = indicgroups.after_half
343local before_subscript  = indicgroups.before_subscript
344local after_subscript   = indicgroups.after_subscript
345local before_main       = indicgroups.before_main
346local after_main        = indicgroups.after_main
347
348local mark_pre_above_below_post = table.merged (
349    pre_mark,
350    above_mark,
351    below_mark,
352    post_mark
353)
354
355local mark_above_below_post = table.merged (
356    above_mark,
357    below_mark,
358    post_mark
359)
360
361-- Handy
362
363local devanagarihash = table.setmetatableindex(function(t,k)
364    local v = fontdata[k].resources.devanagari or false
365    t[k] = v
366    return v
367end)
368
369-- We use some pseudo features as we need to manipulate the nodelist based
370-- on information in the font as well as already applied features. We can
371-- probably replace some of the code below by injecting 'real' features
372-- using the extension mechanism.
373
374local zw_char = { -- both_joiners_true
375    [c_zwnj] = true,
376    [c_zwj ] = true,
377}
378
379local dflt_true = {
380    dflt = true,
381}
382
383local two_defaults = { }
384local one_defaults = { }
385
386local false_flags = { false, false, false, false }
387
388local sequence_reorder_matras = {
389    features  = { dv01 = two_defaults },
390    flags     = false_flags,
391    name      = "dv01_reorder_matras",
392    order     = { "dv01" },
393    type      = "devanagari_reorder_matras",
394    nofsteps  = 1,
395    steps     = {
396        {
397            coverage = pre_mark,
398        }
399    }
400}
401
402local sequence_reorder_reph = {
403    features  = { dv02 = two_defaults },
404    flags     = false_flags,
405    name      = "dv02_reorder_reph",
406    order     = { "dv02" },
407    type      = "devanagari_reorder_reph",
408    nofsteps  = 1,
409    steps     = {
410        {
411            coverage = { },
412        }
413    }
414}
415
416local sequence_reorder_pre_base_reordering_consonants = {
417    features  = { dv03 = one_defaults },
418    flags     = false_flags,
419    name      = "dv03_reorder_pre_base_reordering_consonants",
420    order     = { "dv03" },
421    type      = "devanagari_reorder_pre_base_reordering_consonants",
422    nofsteps  = 1,
423    steps     = {
424        {
425            coverage = { },
426        }
427    }
428}
429
430local sequence_remove_joiners = {
431    features  = { dv04 = one_defaults },
432    flags     = false_flags,
433    name      = "dv04_remove_joiners",
434    order     = { "dv04" },
435    type      = "devanagari_remove_joiners",
436    nofsteps  = 1,
437    steps     = {
438        {
439           coverage = zw_char, -- both_joiners_true
440        },
441    }
442}
443
444-- Looping over feature twice as efficient as looping over basic forms (some
445-- 350 checks instead of 750 for one font). This is something to keep an eye on
446-- as it might depends on the font. Not that it's a bottleneck.
447
448local basic_shaping_forms =  {
449    akhn = true,
450    blwf = true,
451    cjct = true,
452    half = true,
453    nukt = true,
454    pref = true,
455    pstf = true,
456    rkrf = true,
457    rphf = true,
458    vatu = true,
459    locl = true,
460}
461
462local valid = {
463    abvs = true,
464    akhn = true,
465    blwf = true,
466    calt = true,
467    cjct = true,
468    half = true,
469    haln = true,
470    nukt = true,
471    pref = true,
472    pres = true,
473    pstf = true,
474    psts = true,
475    rkrf = true,
476    rphf = true,
477    vatu = true,
478    pres = true,
479    abvs = true,
480    blws = true,
481    psts = true,
482    haln = true,
483    calt = true,
484    locl = true,
485}
486
487local scripts = { }
488
489local scripts_one = { "deva", "mlym", "beng", "gujr", "guru", "knda", "orya", "taml", "telu" }
490local scripts_two = { "dev2", "mlm2", "bng2", "gjr2", "gur2", "knd2", "ory2", "tml2", "tel2" }
491
492local nofscripts = #scripts_one
493
494for i=1,nofscripts do
495    local one = scripts_one[i]
496    local two = scripts_two[i]
497    scripts[one] = true
498    scripts[two] = true
499    two_defaults[two] = dflt_true
500    one_defaults[one] = dflt_true
501    one_defaults[two] = dflt_true
502end
503
504local function valid_one(s) for i=1,nofscripts do if s[scripts_one[i]] then return true end end end
505local function valid_two(s) for i=1,nofscripts do if s[scripts_two[i]] then return true end end end
506
507local function initializedevanagi(tfmdata)
508    local script, language = otf.scriptandlanguage(tfmdata,attr) -- todo: take fast variant
509    if scripts[script] then
510        local resources  = tfmdata.resources
511        local devanagari = resources.devanagari
512        if not devanagari then
513            --
514            report("adding features to font")
515            --
516            local gsubfeatures   = resources.features.gsub
517            local sequences      = resources.sequences
518            local sharedfeatures = tfmdata.shared.features
519            --
520            gsubfeatures["dv01"] = two_defaults -- reorder matras
521            gsubfeatures["dv02"] = two_defaults -- reorder reph
522            gsubfeatures["dv03"] = one_defaults -- reorder pre base reordering consonants
523            gsubfeatures["dv04"] = one_defaults -- remove joiners
524            --
525            local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants)
526            local reorder_reph                           = copy(sequence_reorder_reph)
527            local reorder_matras                         = copy(sequence_reorder_matras)
528            local remove_joiners                         = copy(sequence_remove_joiners)
529
530            local lastmatch = 0
531            for s=1,#sequences do -- classify chars and make sure basic_shaping_forms come first
532                local features = sequences[s].features
533                if features then
534                    for k, v in next, features do
535                        if k == "locl" then
536                            local steps = sequences[s].steps
537                            local nofsteps = sequences[s].nofsteps
538                            for i=1,nofsteps do
539                                local step     = steps[i]
540                                local coverage = step.coverage
541                                if coverage then
542                                    for k, v in next, pre_mark do
543                                        local locl = coverage[k]
544                                        if locl then
545                                            -- if #locl > 0 then we have a list otherwise a hash; we actually should
546                                            -- test properly for gsub_...
547                                            if #locl > 0 then
548                                                for j=1,#locl do
549                                                    local ck      = locl[j]
550                                                    local f       = ck[4]
551                                                    local chainlookups = ck[6]
552                                                    if chainlookups then
553                                                        local chainlookup = chainlookups[f]
554                                                        for j=1,#chainlookup do
555                                                            local chainstep = chainlookup[j]
556                                                            local steps    = chainstep.steps
557                                                            local nofsteps = chainstep.nofsteps
558                                                            for i=1,nofsteps do
559                                                                local step     = steps[i]
560                                                                local coverage = step.coverage
561                                                                if coverage then
562                                                                    locl = coverage[k]
563                                                                end
564                                                            end
565                                                        end
566                                                    end
567                                                end
568                                            else
569                                                -- useless next if, because locl is a table
570                                            end
571                                            if locl then
572                                                reorder_matras.steps[1].coverage[locl] = true
573                                            end
574                                        end
575                                    end
576                                end
577                            end
578                        end
579                        if basic_shaping_forms[k] then
580                            lastmatch = lastmatch + 1
581                            if s ~= lastmatch then
582                                insert(sequences,lastmatch,remove(sequences,s))
583                            end
584                        end
585                    end
586                end
587            end
588            local insertindex = lastmatch + 1
589            --
590            if tfmdata.properties.language then
591                dflt_true[tfmdata.properties.language] = true
592            end
593            --
594            insert(sequences,insertindex,reorder_pre_base_reordering_consonants)
595            insert(sequences,insertindex,reorder_reph)
596            insert(sequences,insertindex,reorder_matras)
597            insert(sequences,insertindex,remove_joiners)
598            --
599            local blwfcache  = { }
600            local vatucache  = { }
601            local pstfcache  = { }
602            local seqsubset  = { }
603            local rephstep   = { coverage = { } } -- will be adapted each work
604            local devanagari = {
605                reph        = false,
606                vattu       = false,
607                blwfcache   = blwfcache,
608                vatucache   = vatucache,
609                pstfcache   = pstfcache,
610                seqsubset   = seqsubset,
611                reorderreph = rephstep,
612
613            }
614            --
615            reorder_reph.steps = { rephstep }
616            --
617            local pre_base_reordering_consonants = { }
618            reorder_pre_base_reordering_consonants.steps[1].coverage = pre_base_reordering_consonants
619            --
620            resources.devanagari = devanagari
621            --
622            for s=1,#sequences do
623                local sequence = sequences[s]
624                local steps    = sequence.steps
625                local nofsteps = sequence.nofsteps
626                local features = sequence.features
627                local has_rphf = features.rphf
628                local has_blwf = features.blwf
629                local has_vatu = features.vatu
630                local has_pstf = features.pstf
631                if has_rphf and has_rphf[script] then
632                    devanagari.reph = true
633                elseif (has_blwf and has_blwf[script]) or (has_vatu and has_vatu[script]) then
634                    devanagari.vattu = true
635                    for i=1,nofsteps do
636                        local step     = steps[i]
637                        local coverage = step.coverage
638                        if coverage then
639                            for k, v in next, coverage do
640                                for h, w in next, halant do
641                                    if v[h] and not blwfcache[k] then
642                                        blwfcache[k] = v
643                                    end
644                                    if has_vatu and has_vatu[script] and not vatucache[k] then
645                                        vatucache[k] = v
646                                    end
647                                end
648                            end
649                        end
650                    end
651                elseif has_pstf and has_pstf[script] then
652                    for i=1,nofsteps do
653                        local step     = steps[i]
654                        local coverage = step.coverage
655                        if coverage then
656                            for k, v in next, coverage do
657                                if not pstfcache[k] then
658                                    pstfcache[k] = v
659                                end
660                            end
661                            for k, v in next, ra do
662                                local r = coverage[k]
663                                if r then
664                                    -- if #r > 0 then we have a list otherwise a hash; we actually should
665                                    -- test properly for gsub_...
666                                    local found = false
667                                    if #r > 0 then
668                                        for j=1,#r do
669                                            local ck = r[j]
670                                            local f  = ck[4]
671                                            local chainlookups = ck[6]
672                                            if chainlookups then
673                                                local chainlookup = chainlookups[f]
674                                                if chainlookup then
675                                                    for j=1,#chainlookup do
676                                                        local chainstep = chainlookup[j]
677                                                        local steps     = chainstep.steps
678                                                        local nofsteps  = chainstep.nofsteps
679                                                        for i=1,nofsteps do
680                                                            local step     = steps[i]
681                                                            local coverage = step.coverage
682                                                            if coverage then
683                                                                local h = coverage[k]
684                                                                if h then
685                                                                    for k, v in next, h do
686                                                                        if v then
687                                                                            found = tonumber(v) or v.ligature
688                                                                            if found then
689                                                                                pre_base_reordering_consonants[found] = true
690                                                                                break
691                                                                            end
692                                                                        end
693                                                                    end
694                                                                    if found then
695                                                                        break
696                                                                    end
697                                                                end
698                                                            end
699                                                        end
700                                                    end
701                                                end
702                                            end
703                                        end
704                                    else
705                                        for k, v in next, r do
706                                            if v then
707                                                found = tonumber(v) or v.ligature
708                                                if found then
709                                                    pre_base_reordering_consonants[found] = true
710                                                    break
711                                                end
712                                            end
713                                        end
714                                    end
715                                    if found then
716                                        break
717                                    end
718                                end
719                            end
720                        end
721                    end
722                end
723                for kind, spec in next, features do
724                    if valid[kind] and valid_two(spec)then
725                        for i=1,nofsteps do
726                            local step     = steps[i]
727                            local coverage = step.coverage
728                            if coverage then
729                                local reph = false
730                                local base = false
731                                if kind == "rphf" then
732                                    -- rphf acts on consonant + halant
733                                    for k, v in next, ra do
734                                        local r = coverage[k]
735                                        if r then
736                                            -- if #r > 0 then we have a list otherwise a hash; we actually should
737                                            -- test properly for gsub_...
738                                            base = k
739                                            local h = false
740                                            if #r > 0 then
741                                                for j=1,#r do
742                                                    local ck = r[j]
743                                                    local f  = ck[4]
744                                                    local chainlookups = ck[6]
745                                                    if chainlookups then
746                                                        local chainlookup = chainlookups[f]
747                                                        for j=1,#chainlookup do
748                                                            local chainstep = chainlookup[j]
749                                                            local steps    = chainstep.steps
750                                                            local nofsteps = chainstep.nofsteps
751                                                            for i=1,nofsteps do
752                                                                local step     = steps[i]
753                                                                local coverage = step.coverage
754                                                                if coverage then
755                                                                    local r = coverage[k]
756                                                                    if r then
757                                                                        for k, v in next, halant do
758                                                                            local h = r[k]
759                                                                            if h then
760                                                                                reph = tonumber(h) or h.ligature or false
761                                                                                break
762                                                                            end
763                                                                        end
764                                                                        if h then
765                                                                            break
766                                                                        end
767                                                                    end
768                                                                end
769                                                            end
770                                                        end
771                                                    end
772                                                end
773                                            else
774                                                for k, v in next, halant do
775                                                    local h = r[k]
776                                                    if h then
777                                                        reph = tonumber(h) or h.ligature or false
778                                                        break
779                                                    end
780                                                end
781                                            end
782                                            if reph then
783                                                break
784                                            end
785                                        end
786                                    end
787                                end
788--                                 if reph then
789                                    seqsubset[#seqsubset+1] = { kind, coverage, reph, base }
790--                                 end
791                            end
792                        end
793                    end
794                    if kind == "pref" then
795                        local steps    = sequence.steps
796                        local nofsteps = sequence.nofsteps
797                        for i=1,nofsteps do
798                            local step     = steps[i]
799                            local coverage = step.coverage
800                            if coverage then
801                                for k, v in next, halant do
802                                    local h = coverage[k]
803                                    if h then
804                                        -- if #h > 0 then we have a list otherwise a hash; we actually should
805                                        -- test properly for gsub_...
806                                        local found = false
807                                        if #h > 0 then
808                                            for j=1,#h do
809                                                local ck = h[j]
810                                                local f  = ck[4]
811                                                local chainlookups = ck[6]
812                                                if chainlookups then
813                                                    local chainlookup = chainlookups[f]
814                                                    for j=1,#chainlookup do
815                                                        local chainstep = chainlookup[j]
816                                                        local steps     = chainstep.steps
817                                                        local nofsteps  = chainstep.nofsteps
818                                                        for i=1,nofsteps do
819                                                            local step     = steps[i]
820                                                            local coverage = step.coverage
821                                                            if coverage then
822                                                                local h = coverage[k]
823                                                                if h then
824                                                                    for k, v in next, h do
825                                                                        if v then
826                                                                            found = tonumber(v) or v.ligature
827                                                                            if found then
828                                                                                pre_base_reordering_consonants[found] = true
829                                                                                break
830                                                                            end
831                                                                        end
832                                                                    end
833                                                                    if found then
834                                                                        break
835                                                                    end
836                                                                end
837                                                            end
838                                                        end
839                                                    end
840                                                end
841                                            end
842                                        else
843                                            for k, v in next, h do
844                                                found = v and (tonumber(v) or v.ligature)
845                                                if found then
846                                                    pre_base_reordering_consonants[found] = true
847                                                    break
848                                                end
849                                            end
850                                        end
851                                        if found then
852                                            break
853                                        end
854                                    end
855                                end
856                            end
857                        end
858                    end
859                end
860            end
861            --
862            if two_defaults[script] then
863                sharedfeatures["dv01"] = true -- dv01_reorder_matras
864                sharedfeatures["dv02"] = true -- dv02_reorder_reph
865                sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants
866                sharedfeatures["dv04"] = true -- dv04_remove_joiners
867            elseif one_defaults[script] then
868                sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants
869                sharedfeatures["dv04"] = true -- dv04_remove_joiners
870            end
871         -- if script == "mlym" or script == "taml" then
872         --     devanagari.movematra = "leftbeforebase"
873         -- end
874        end
875    end
876end
877
878registerotffeature {
879    name         = "devanagari",
880    description  = "inject additional features",
881    default      = true,
882    initializers = {
883        node     = initializedevanagi,
884    },
885}
886
887local function initializeconjuncts(tfmdata,value)
888    if value then
889        local resources  = tfmdata.resources
890        local devanagari = resources.devanagari
891        if devanagari then
892            -- quit was the old situation
893            local conjuncts = "auto" -- mixed|continue|quit|auto
894            local movematra = "auto" -- default|leftbeforebase|auto
895            if type(value) == "string" and value ~= "auto" then
896                value     = settings_to_hash(value)
897                conjuncts = rawget(value,"conjuncts") or conjuncts
898                movematra = rawget(value,"movematra") or movematra
899            end
900            if conjuncts == "auto" then
901                conjuncts = "mixed" -- for all scripts ?
902            end
903            if movematra == "auto" and
904                  script == "mlym" or
905                  script == "taml" then
906                movematra = "leftbeforebase"
907            else
908                movematra = "default"
909            end
910            devanagari.conjuncts = conjuncts
911            devanagari.movematra = movematra
912            --
913            if trace_steps then
914                report("conjuncts %a, movematra %a",conjuncts,movematra)
915            end
916            --
917        end
918    end
919end
920
921registerotffeature {
922    name         = "indic",
923    description  = "control indic",
924    default      = "auto",
925    initializers = {
926        node     = initializeconjuncts,
927    },
928}
929
930local show_syntax_errors = false
931
932local function inject_syntax_error(head,current,char)
933    local signal = copy_node(current)
934    copyinjection(signal,current)
935    if pre_mark[char] then
936        setchar(signal,dotted_circle)
937    else
938        setchar(current,dotted_circle)
939    end
940    return insertnodeafter(head,current,signal)
941end
942
943-- hm, this is applied to one character:
944
945local function initialize_one(font,attr) -- we need a proper hook into the dataset initializer
946
947    local tfmdata        = fontdata[font]
948    local datasets       = otf.dataset(tfmdata,font,attr) -- don't we know this one?
949    local devanagaridata = datasets.devanagari
950
951    if not devanagaridata then
952
953        devanagaridata = {
954            reph      = false,
955            vattu     = false,
956            blwfcache = { },
957            vatucache = { },
958            pstfcache = { },
959        }
960        datasets.devanagari = devanagaridata
961        local resources     = tfmdata.resources
962        local devanagari    = resources.devanagari
963
964        for s=1,#datasets do
965            local dataset = datasets[s]
966            if dataset and dataset[1] then -- value
967                local kind = dataset[4]
968                if kind == "rphf" then
969                    -- deva
970                    devanagaridata.reph = true
971                elseif kind == "blwf" or kind == "vatu" then
972                    -- deva
973                    devanagaridata.vattu = true
974                    -- dev2
975                    devanagaridata.blwfcache = devanagari.blwfcache
976                    devanagaridata.vatucache = devanagari.vatucache
977                    devanagaridata.pstfcache = devanagari.pstfcache
978                end
979            end
980        end
981
982    end
983
984    return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache, devanagaridata.vatucache, devanagaridata.pstfcache
985
986end
987
988-- HH: somehow we can get a non context here so for now we check for .n
989
990local function contextchain(contexts,n)
991    local char = getchar(n)
992    if not contexts.n then
993        return contexts[char]
994    else
995        for k=1,#contexts do
996            local ck  = contexts[k]
997            local seq = ck[3]
998            local f   = ck[4]
999            local l   = ck[5]
1000            if (l - f) == 1 and seq[f+1][char] then
1001                local ok = true
1002                local c = n
1003                for i=l+1,#seq do
1004                    c = getnext(c)
1005                    if not c or not seq[i][ischar(c)] then
1006                        ok = false
1007                        break
1008                    end
1009                end
1010                if ok then
1011                    c = getprev(n)
1012                    for i=1,f-1 do
1013                        c = getprev(c)
1014                        if not c or not seq[f-i][ischar(c)] then
1015                            ok = false
1016                        end
1017                    end
1018                end
1019                if ok then
1020                    return true
1021                end
1022            end
1023        end
1024        return false
1025    end
1026end
1027
1028local function order_matras(c)
1029    local cn   = getnext(c)
1030    local char = getchar(cn)
1031    while dependent_vowel[char] do
1032        local next  = getnext(cn)
1033        local cc    = c
1034        local cchar = getchar(cc)
1035        while cc ~= cn do
1036            if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then
1037                local prev, next = getboth(cn)
1038                if next then
1039                    setprev(next,prev)
1040                end
1041                -- todo: setlink
1042                setnext(prev,next)
1043                setnext(getprev(cc),cn)
1044                setprev(cn,getprev(cc))
1045                setnext(cn,cc)
1046                setprev(cc,cn)
1047                break
1048            end
1049            cc    = getnext(cc)
1050            cchar = getchar(cc)
1051        end
1052        cn   = next
1053        char = getchar(cn)
1054    end
1055end
1056
1057local swapped = table.swapped(states)
1058
1059local function reorder_one(head,start,stop,font,attr,nbspaces)
1060
1061    local reph, vattu, blwfcache, vatucache, pstfcache = initialize_one(font,attr) -- todo: a hash[font]
1062
1063 -- local devanagari = devanagarihash[font]
1064    local current    = start
1065    local n          = getnext(start)
1066    local base       = nil
1067    local firstcons  = nil
1068    local lastcons   = nil
1069    local basefound  = false
1070
1071    if reph and ra[getchar(start)] and halant[getchar(n)] then
1072        -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph
1073        -- from candidates for base consonants
1074        if n == stop then
1075            return head, stop, nbspaces
1076        end
1077        if getchar(getnext(n)) == c_zwj then
1078            current = start
1079        else
1080            current = getnext(n)
1081            setstate(start,s_rphf)
1082        end
1083    end
1084
1085    if getchar(current) == c_nbsp then
1086        -- Stand Alone cluster
1087        if current == stop then
1088            stop = getprev(stop)
1089            head = remove_node(head,current)
1090            flushnode(current)
1091            if trace_steps then
1092                logprocess("reorder one, remove nbsp")
1093            end
1094            return head, stop, nbspaces
1095        else
1096            nbspaces  = nbspaces + 1
1097            base      = current
1098            firstcons = current
1099            lastcons  = current
1100            current   = getnext(current)
1101            if current ~= stop then
1102                local char = getchar(current)
1103                if nukta[char] then
1104                    current = getnext(current)
1105                    char = getchar(current)
1106                end
1107                if char == c_zwj and current ~= stop then
1108                    local next = getnext(current)
1109                    if next ~= stop and halant[getchar(next)] then
1110                        current = next
1111                        next = getnext(current)
1112                        local tmp = next and getnext(next) or nil -- needs checking
1113                        local changestop = next == stop
1114                        local tempcurrent = copy_node(next)
1115                        copyinjection(tempcurrent,next)
1116                        local nextcurrent = copy_node(current)
1117                        copyinjection(nextcurrent,current) -- KE: necessary? HH: probably not as positioning comes later and we rawget/set
1118                        setlink(tempcurrent,nextcurrent)
1119                        setstate(tempcurrent,s_blwf)
1120                        tempcurrent = processcharacters(tempcurrent,font)
1121                        setstate(tempcurrent,unsetvalue)
1122                        if getchar(next) == getchar(tempcurrent) then
1123                            flushlist(tempcurrent)
1124                            if show_syntax_errors then
1125                                head, current = inject_syntax_error(head,current,char)
1126                            end
1127                        else
1128                            setchar(current,getchar(tempcurrent)) -- we assumes that the result of blwf consists of one node
1129                            local freenode = getnext(current)
1130                            setlink(current,tmp)
1131                            flushnode(freenode)
1132                            flushlist(tempcurrent)
1133                            if changestop then
1134                                stop = current
1135                            end
1136                        end
1137                        if trace_steps then
1138                            logprocess("reorder one, handle nbsp")
1139                        end
1140                    end
1141                end
1142            end
1143        end
1144    end
1145
1146    while not basefound do
1147        -- find base consonant
1148        local char = getchar(current)
1149        if consonant[char] then
1150            setstate(current,s_half)
1151            if not firstcons then
1152                firstcons = current
1153            end
1154            lastcons = current
1155            if not base then
1156                base = current
1157            elseif blwfcache[char] then
1158                -- consonant has below-base form
1159                setstate(current,s_blwf)
1160            elseif pstfcache[char] then
1161                -- consonant has post-base form
1162                setstate(current,s_pstf)
1163            else
1164                base = current
1165            end
1166        end
1167        basefound = current == stop
1168        current = getnext(current)
1169    end
1170
1171    if base ~= lastcons then
1172        -- if base consonant is not last one then move halant from base consonant to last one
1173        local np = base
1174        local n  = getnext(base)
1175        local ch = getchar(n)
1176        if nukta[ch] then
1177            np = n
1178            n  = getnext(n)
1179            ch = getchar(n)
1180        end
1181        if halant[ch] then
1182            if lastcons ~= stop then
1183                local ln = getnext(lastcons)
1184                if nukta[getchar(ln)] then
1185                    lastcons = ln
1186                end
1187            end
1188         -- local np = getprev(n)
1189            local nn = getnext(n)
1190            local ln = getnext(lastcons) -- what if lastcons is nn ?
1191            setlink(np,nn)
1192            setnext(lastcons,n)
1193            if ln then
1194                setprev(ln,n)
1195            end
1196            setnext(n,ln)
1197            setprev(n,lastcons)
1198            if lastcons == stop then
1199                stop = n
1200            end
1201            if trace_steps then
1202                logprocess("reorder one, handle halant")
1203            end
1204        end
1205    end
1206
1207    n = getnext(start)
1208    if n ~= stop and ra[getchar(start)] and halant[getchar(n)] and not zw_char[getchar(getnext(n))] then
1209        -- if syllable starts with Ra + H then move this combination so that it follows either:
1210        -- the post-base 'matra' (if any) or the base consonant
1211        local matra = base
1212        if base ~= stop then
1213            local next = getnext(base)
1214            if dependent_vowel[getchar(next)] then
1215                matra = next
1216            end
1217        end
1218        -- [sp][start][n][nn] [matra|base][?]
1219        -- [matra|base][start]  [n][?] [sp][nn]
1220        local sp = getprev(start)
1221        local nn = getnext(n)
1222        local mn = getnext(matra)
1223        setlink(sp,nn)
1224        setlink(matra,start)
1225        setlink(n,mn)
1226        if head == start then
1227            head = nn
1228        end
1229        start = nn
1230        if matra == stop then
1231            stop = n
1232        end
1233        if trace_steps then
1234            logprocess("reorder one, handle matra")
1235        end
1236    end
1237
1238    local current = start
1239    while current ~= stop do
1240        local next = getnext(current)
1241        if next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwnj then
1242            setstate(current,unsetvalue)
1243        end
1244        current = next
1245    end
1246
1247    if base ~= stop and getstate(base) then -- state can also be init
1248        local next = getnext(base)
1249        if halant[getchar(next)] and not (next ~= stop and getchar(getnext(next)) == c_zwj) then
1250            setstate(base,unsetvalue)
1251        end
1252    end
1253
1254    -- split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable.
1255    -- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification
1256
1257    local current, allreordered, moved = start, false, { [base] = true }
1258    local a, b, p, bn = base, base, base, getnext(base)
1259    if base ~= stop and nukta[getchar(bn)] then
1260        a, b, p = bn, bn, bn
1261    end
1262    while not allreordered do
1263        -- current is always consonant
1264        local c = current
1265        local n = getnext(current)
1266        local l = nil -- used ?
1267        if c ~= stop then
1268            local ch = getchar(n)
1269            if nukta[ch] then
1270                c  = n
1271                n  = getnext(n)
1272                ch = getchar(n)
1273            end
1274            if c ~= stop then
1275                if halant[ch] then
1276                    c  = n
1277                    n  = getnext(n)
1278                    ch = getchar(n)
1279                end
1280
1281                local tpm = twopart_mark[ch]
1282                if tpm then
1283                    while tpm do
1284                        local extra = copy_node(n)
1285                        copyinjection(extra,n)
1286                        ch = tpm[1]
1287                        setchar(n,ch)
1288                        setchar(extra,tpm[2])
1289                        head = insertnodeafter(head,current,extra)
1290                        tpm = twopart_mark[ch]
1291                    end
1292                    if trace_steps then
1293                        logprocess("reorder one, handle mark")
1294                    end
1295                end
1296                while c ~= stop and dependent_vowel[ch] do
1297                    c  = n
1298                    n  = getnext(n)
1299                    ch = getchar(n)
1300                end
1301                if c ~= stop then
1302                    if vowel_modifier[ch] then
1303                        c  = n
1304                        n  = getnext(n)
1305                        ch = getchar(n)
1306                    end
1307                    if c ~= stop and stress_tone_mark[ch] then
1308                        c = n
1309                        n = getnext(n)
1310                    end
1311                end
1312            end
1313        end
1314        local bp   = getprev(firstcons)
1315        local cn   = getnext(current)
1316        local last = getnext(c)
1317        local done = false
1318        while cn ~= last do
1319            -- move pre-base matras...
1320            if pre_mark[getchar(cn)] then
1321                if devanagarihash[font].movematra == "leftbeforebase" then
1322                    local prev, next = getboth(cn)
1323                    setlink(prev,next)
1324                    if cn == stop then
1325                        stop = getprev(cn)
1326                    end
1327                    if base == start then
1328                       if head == start then
1329                           head = cn
1330                       end
1331                       start = cn
1332                    end
1333                    setlink(getprev(base),cn)
1334                    setlink(cn,base)
1335                 -- setlink(getprev(base),cn,base) -- maybe
1336                    cn = next
1337                else
1338                    if bp then
1339                        setnext(bp,cn)
1340                    end
1341                    local prev, next = getboth(cn)
1342                    if next then
1343                        setprev(next,prev)
1344                    end
1345                    setnext(prev,next)
1346                    if cn == stop then
1347                        stop = prev
1348                    end
1349                    setprev(cn,bp)
1350                    setlink(cn,firstcons)
1351                    if firstcons == start then
1352                        if head == start then
1353                            head = cn
1354                        end
1355                        start = cn
1356                    end
1357                    cn = next
1358                end
1359                done = true
1360            elseif current ~= base and dependent_vowel[getchar(cn)] then
1361                local prev, next = getboth(cn)
1362                if next then
1363                    setprev(next,prev)
1364                end
1365                setnext(prev,next)
1366                if cn == stop then
1367                    stop = prev
1368                end
1369                setlink(b,cn,getnext(b))
1370                order_matras(cn)
1371                cn = next
1372                done = true
1373            elseif current == base and dependent_vowel[getchar(cn)] then
1374                local cnn = getnext(cn)
1375                order_matras(cn)
1376                cn = cnn
1377                while cn ~= last and dependent_vowel[getchar(cn)] do
1378                    cn = getnext(cn)
1379                end
1380            else
1381                cn = getnext(cn)
1382            end
1383        end
1384        allreordered = c == stop
1385        current = getnext(c)
1386        if done and trace_steps then
1387            logprocess("reorder one, matra")
1388        end
1389    end
1390
1391    if reph or vattu then
1392        local current = start
1393        local cns     = nil
1394        local done    = false
1395        while current ~= stop do
1396            local c = current
1397            local n = getnext(current)
1398            if ra[getchar(current)] and halant[getchar(n)] then
1399                c = n
1400                n = getnext(n)
1401                local b, bn = base, base
1402                while bn ~= stop  do
1403                    local next = getnext(bn)
1404                    if dependent_vowel[getchar(next)] then
1405                        b = next
1406                    end
1407                    bn = next
1408                end
1409                if getstate(current,s_rphf) then
1410                    -- position Reph (Ra + H) after post-base 'matra' (if any) since these
1411                    -- become marks on the 'matra', not on the base glyph
1412                    if b ~= current then
1413                        if current == start then
1414                            if head == start then
1415                                head = n
1416                            end
1417                            start = n
1418                        end
1419                        if b == stop then
1420                            stop = c
1421                        end
1422                        local prev = getprev(current)
1423                        setlink(prev,n)
1424                        local next = getnext(b)
1425                        setlink(c,next)
1426                        setlink(b,current)
1427                        done = true
1428                    end
1429                elseif cns and getnext(cns) ~= current then -- todo: optimize next
1430                    -- position below-base Ra (vattu) following the consonants on which it is placed (either the base consonant or one of the pre-base consonants)
1431                    local cp   = getprev(current)
1432                    local cnsn = getnext(cns)
1433                    setlink(cp,n)
1434                    setlink(cns,current) -- cns ?
1435                    setlink(c,cnsn)
1436                    done = true
1437                    if c == stop then
1438                        stop = cp
1439                        break
1440                    end
1441                    current = getprev(n)
1442                end
1443            else
1444                local char = getchar(current)
1445                if consonant[char] then
1446                    cns = current
1447                    local next = getnext(cns)
1448                    if halant[getchar(next)] then
1449                        cns = next
1450                    end
1451                    if not vatucache[char] then
1452                        next = getnext(cns)
1453                        while dependent_vowel[getchar(next)] do
1454                            cns  = next
1455                            next = getnext(cns)
1456                        end
1457                    end
1458                elseif char == c_nbsp then
1459                    nbspaces   = nbspaces + 1
1460                    cns        = current
1461                    local next = getnext(cns)
1462                    if halant[getchar(next)] then
1463                        cns = next
1464                    end
1465                    if not vatucache[char] then
1466                        next = getnext(cns)
1467                        while dependent_vowel[getchar(next)] do
1468                            cns  = next
1469                            next = getnext(cns)
1470                        end
1471                    end
1472                end
1473            end
1474            current = getnext(current)
1475        end
1476        if done and trace_steps then
1477            logprocess("reorder one, handle reph and vata") -- todo: boolean
1478        end
1479    end
1480
1481    if getchar(base) == c_nbsp then
1482        nbspaces = nbspaces - 1
1483        if base == stop then
1484            stop = getprev(stop)
1485        end
1486        head = remove_node(head,base)
1487        flushnode(base)
1488    end
1489
1490    return head, stop, nbspaces
1491end
1492
1493-- If a pre-base matra character had been reordered before applying basic features,
1494-- the glyph can be moved closer to the main consonant based on whether half-forms had been formed.
1495-- Actual position for the matra is defined as “after last standalone halant glyph,
1496-- after initial matra position and before the main consonant”.
1497-- If ZWJ or ZWNJ follow this halant, position is moved after it.
1498
1499-- so we break out ... this is only done for the first 'word' (if we feed words we can as
1500-- well test for non glyph.
1501
1502function handlers.devanagari_reorder_matras(head,start) -- no leak
1503    local current = start -- we could cache attributes here
1504    local startfont = getfont(start)
1505    local startattr = getprop(start,a_syllabe)
1506    while current do
1507        local char = ischar(current,startfont)
1508        local next = getnext(current)
1509        if char and getprop(current,a_syllabe) == startattr then
1510            if halant[char] then -- state can also be init
1511                if next then
1512                    local char = ischar(next,startfont)
1513                    if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1514                        current = next
1515                        next    = getnext(current)
1516                    end
1517                end
1518                -- can be optimzied
1519                local startnext = getnext(start)
1520                head = remove_node(head,start)
1521                setlink(start,next)
1522                setlink(current,start)
1523             -- setlink(current,start,next) -- maybe
1524                start = startnext
1525                if trace_steps then
1526                    logprocess("reorder matra")
1527                end
1528                break
1529         -- elseif consonant[char] and (not getstate(current) or getstate(current,s_init) then
1530         --     startnext = getnext(start)
1531         --     head = remove_node(head,start)
1532         --     if current == head then
1533         --         setlink(start,current)
1534         --         head = start
1535         --     else
1536         --         setlink(getprev(current),start)
1537         --         setlink(start,current)
1538         --     end
1539         --     start = startnext
1540         --     break
1541            end
1542        else
1543            break
1544        end
1545        current = next
1546    end
1547    return head, start, true
1548end
1549
1550-- Reph’s original position is always at the beginning of the syllable, (i.e. it is
1551-- not reordered at the character reordering stage). However, it will be reordered
1552-- according to the basic-forms shaping results. Possible positions for reph,
1553-- depending on the script, are; after main, before post-base consonant forms, and
1554-- after post-base consonant forms.
1555
1556-- In Devanagari reph has reordering position 'before postscript' and dev2 only
1557-- follows step 2, 4, and 6.
1558
1559local rephbase = { }
1560
1561function handlers.devanagari_reorder_reph(head,start)
1562    local current   = getnext(start)
1563    local startnext = nil
1564    local startprev = nil
1565    local startfont = getfont(start)
1566    local startattr = getprop(start,a_syllabe)
1567    --
1568    ::step_1::
1569    --
1570    -- If reph should be positioned after post-base consonant forms, proceed to step 5.
1571    --
1572    local char = ischar(start,startfont)
1573    local rephbase = rephbase[startfont][char]
1574    if char and after_subscript[rephbase] then
1575        goto step_5
1576    end
1577    --
1578    ::step_2::
1579    --
1580    -- If the reph repositioning class is not after post-base: target position is after
1581    -- the first explicit halant glyph between the first post-reph consonant and last
1582    -- main consonant. If ZWJ or ZWNJ are following this halant, position is moved after
1583    -- it. If such position is found, this is the target position. Otherwise, proceed to
1584    -- the next step. Note: in old-implementation fonts, where classifications were
1585    -- fixed in shaping engine, there was no case where reph position will be found on
1586    -- this step.
1587    --
1588    if char and not after_postscript[rephbase] then
1589        while current do
1590            local char = ischar(current,startfont)
1591            if char and getprop(current,a_syllabe) == startattr then
1592                if halant[char] then
1593                    if trace_steps then
1594                        logprocess("reorder reph, handling halant")
1595                    end
1596                    local next = getnext(current)
1597                    if next then
1598                        local nextchar = ischar(next,startfont)
1599                        if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then
1600                            current = next
1601                            next    = getnext(current)
1602                        end
1603                    end
1604                    startnext = getnext(start)
1605                    head = remove_node(head,start)
1606                    setlink(start,next)
1607                    setlink(current,start)
1608                 -- setlink(current,start,next) -- maybe
1609                    start = startnext
1610                    startattr = getprop(start,a_syllabe)
1611                    break
1612                end
1613                current = getnext(current)
1614            else
1615                break
1616            end
1617        end
1618    end
1619    --
1620    ::step_3::
1621    --
1622    -- If reph should be repositioned after the main consonant: find the first consonant
1623    -- not ligated with main, or find the first consonant that is not a potential
1624    -- pre-base reordering Ra.
1625    --
1626    if not startnext then
1627        if char and after_main[rephbase] then
1628            current = getnext(start)
1629            while current do
1630                local char = ischar(current,startfont)
1631                if char and getprop(current,a_syllabe) == startattr then
1632                    if consonant[char] and not getstate(current,s_pref) then
1633                        if trace_steps then
1634                            logprocess("reorder reph, handling consonant")
1635                        end
1636                        startnext = getnext(start)
1637                        head = remove_node(head,start)
1638                        setlink(current,start)
1639                        setlink(start,getnext(current))
1640                     -- setlink(current,start,getnext(current)) -- maybe
1641                        start = startnext
1642                        startattr = getprop(start,a_syllabe)
1643                        break
1644                    end
1645                    current = getnext(current)
1646                else
1647                    break
1648                end
1649            end
1650        end
1651    end
1652    --
1653    ::step_4::
1654    --
1655    -- If reph should be positioned before post-base consonant, find first post-base
1656    -- classified consonant not ligated with main. If no consonant is found, the target
1657    -- position should be before the first matra, syllable modifier sign or vedic sign.
1658    --
1659    if not startnext then
1660        if char and before_postscript[rephbase] then
1661            current = getnext(start)
1662            local c = nil
1663            while current do
1664                local char = ischar(current,startfont)
1665                if char and getprop(current,a_syllabe) == startattr then
1666                    if getstate(current,s_pstf) then -- post-base
1667                        if trace_steps then
1668                            logprocess("reorder reph, before postscript, post base")
1669                        end
1670                        startnext = getnext(start)
1671                        head = remove_node(head,start)
1672                        setlink(getprev(current),start)
1673                        setlink(start,current)
1674                     -- setlink(getprev(current),start,current) -- maybe
1675                        start = startnext
1676                        startattr = getprop(start,a_syllabe)
1677                        break
1678                    elseif not c and (vowel_modifier[char] or stress_tone_mark[char]) then
1679                        c = current
1680                    end
1681                    current = getnext(current)
1682                else
1683                    if c then
1684                        if trace_steps then
1685                            logprocess("reorder reph, before postscript")
1686                        end
1687                        startnext = getnext(start)
1688                        head = remove_node(head,start)
1689                        setlink(getprev(c),start)
1690                        setlink(start,c)
1691                     -- setlink(getprev(c),start,c) -- maybe
1692                        start = startnext
1693                        startattr = getprop(start,a_syllabe)
1694                    end
1695                    break
1696                end
1697            end
1698        end
1699    end
1700    --
1701    ::step_5::
1702    --
1703    -- If no consonant is found in steps 3 or 4, move reph to a position immediately
1704    -- before the first post-base matra, syllable modifier sign or vedic sign that has a
1705    -- reordering class after the intended reph position. For example, if the reordering
1706    -- position for reph is post-main, it will skip above-base matras that also have a
1707    -- post-main position.
1708    --
1709    if not startnext then
1710        current = getnext(start)
1711        local c = nil
1712        while current do
1713            local char = ischar(current,startfont)
1714            if char and getprop(current,a_syllabe) == startattr then
1715                local state = getstate(current)
1716                if before_subscript[rephbase] and (state == s_blwf or state == s_pstf) then
1717                    c = current
1718                    if trace_steps then
1719                        logprocess("reorder reph, before subscript")
1720                    end
1721                elseif after_subscript[rephbase] and (state == s_pstf) then
1722                    if trace_steps then
1723                        logprocess("reorder reph, after subscript")
1724                    end
1725                    c = current
1726                end
1727                current = getnext(current)
1728            else
1729                break
1730            end
1731        end
1732        -- here we can loose the old start node: maybe best split cases
1733        if c then
1734            startnext = getnext(start)
1735            head = remove_node(head,start)
1736            setlink(getprev(c),start)
1737            setlink(start,c)
1738         -- setlink(getprev(c),start,c) -- maybe
1739            -- end
1740            start = startnext
1741            startattr = getprop(start,a_syllabe)
1742        end
1743    end
1744    --
1745    ::step_6::
1746    --
1747    -- Otherwise, reorder reph to the end of the syllable.
1748    --
1749    if not startnext then
1750        current = start
1751        local next = getnext(current)
1752        while next do
1753            local nextchar = ischar(next,startfont)
1754            if nextchar and getprop(next,a_syllabe) == startattr then
1755                current = next
1756                next = getnext(current)
1757            else
1758                break
1759            end
1760        end
1761        if start ~= current then
1762            if trace_steps then
1763                logprocess("reorder reph, to end")
1764            end
1765            startnext = getnext(start)
1766            head = remove_node(head,start)
1767            setlink(start,getnext(current))
1768            setlink(current,start)
1769         -- setlink(current,start,getnext(current)) -- maybe
1770            start = startnext
1771        end
1772    end
1773    --
1774    return head, start, true
1775end
1776
1777-- If a pre-base reordering consonant is found, reorder it according to the following rules:
1778--
1779-- 1  Only reorder a glyph produced by substitution during application of the feature. (Note
1780--    that a font may shape a Ra consonant with the feature generally but block it in certain
1781--    contexts.)
1782-- 2  Try to find a target position the same way as for pre-base matra. If it is found, reorder
1783--    pre-base consonant glyph.
1784-- 3  If position is not found, reorder immediately before main consonant.
1785
1786-- Here we implement a few handlers:
1787--
1788--   function(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
1789--       return head, start, done
1790--   end
1791
1792-- todo: nodes -> table -> nodes
1793
1794function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start)
1795    if getprop(start,a_reordered) then
1796        return head, start, true
1797    end
1798    local current = start -- we could cache attributes here
1799    local startfont = getfont(start)
1800    local startattr = getprop(start,a_syllabe)
1801    while current do
1802        local char = ischar(current,startfont)
1803        local next = getnext(current)
1804        if char and getprop(current,a_syllabe) == startattr then
1805            if halant[char] then -- state can also be init
1806                if trace_steps then
1807                    logprocess("reorder pre base consonants, handle halant")
1808                end
1809                if next then
1810                    local char = ischar(next,startfont)
1811                    if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1812                        current = next
1813                        next    = getnext(current)
1814                    end
1815                end
1816                -- can be optimzied
1817                local startnext = getnext(start)
1818                head = remove_node(head,start)
1819                setlink(start,next)
1820                setlink(current,start)
1821             -- setlink(current,start,next) -- maybe
1822                setprop(start,"reordered",true)
1823                start = startnext
1824                return head, start, true
1825         -- elseif consonant[char] and (not getstate(current) or getstate(current,s_init)) then
1826         --     startnext = getnext(start)
1827         --     head = remove_node(head,start)
1828         --     if current == head then
1829         --         setlink(start,current)
1830         --         head = start
1831         --     else
1832         --         setlink(getprev(current),start)
1833         --         setlink(start,current)
1834         --     end
1835         --     start = startnext
1836         --     break
1837            end
1838        else
1839            break
1840        end
1841        current = next
1842    end
1843
1844    local startattr = getprop(start,a_syllabe)
1845    local current = getprev(start)
1846    while current and getprop(current,a_syllabe) == startattr do
1847        local char = ischar(current)
1848        if (not dependent_vowel[char] and (not getstate(current) or getstate(current,s_init))) then
1849            if trace_steps then
1850                logprocess("reorder pre base consonants, handle vowel or initial")
1851            end
1852            startnext = getnext(start)
1853            head = remove_node(head,start)
1854            if current == head then
1855                setlink(start,current)
1856                head = start
1857            else
1858                setlink(getprev(current),start)
1859                setlink(start,current)
1860            end
1861            setprop(start,"reordered",true)
1862            start = startnext
1863            break
1864        end
1865        current = getprev(current)
1866    end
1867
1868    return head, start, true
1869end
1870
1871function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement)
1872    local stop = getnext(start)
1873    local font = getfont(start)
1874    local last = start
1875    while stop do
1876        local char = ischar(stop,font)
1877        if char and (char == c_zwnj or char == c_zwj) then
1878            last = stop
1879            stop = getnext(stop)
1880        else
1881            break
1882        end
1883    end
1884    local prev = getprev(start)
1885    if stop then
1886        setnext(last)
1887        setlink(prev,stop)
1888    elseif prev then
1889        setnext(prev)
1890    end
1891    if head == start then
1892        head = stop
1893    end
1894    flushlist(start)
1895    if trace_steps then
1896        logprocess("remove joiners")
1897    end
1898    return head, stop, true
1899end
1900
1901local function initialize_two(font,attr)
1902
1903    local devanagari = fontdata[font].resources.devanagari
1904
1905    if devanagari then
1906        return devanagari.seqsubset or { }, devanagari.reorderreph or { }
1907    else
1908        return { }, { }
1909    end
1910
1911end
1912
1913-- this one will be merged into the caller: it saves a call, but we will then make function
1914-- of the actions
1915
1916local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pass over (determine stop in sweep)
1917    local seqsubset, reorderreph = initialize_two(font,attr)
1918
1919    local halfpos  = nil
1920    local basepos  = nil
1921    local subpos   = nil
1922    local postpos  = nil
1923
1924    reorderreph.coverage = { } -- use local
1925    rephbase[font]       = { } -- use local
1926
1927    for i=1,#seqsubset do
1928
1929        -- this can be done more efficient, the last test and less getnext
1930
1931        local subset      = seqsubset[i]
1932        local kind        = subset[1]
1933        local lookupcache = subset[2]
1934        if kind == "rphf" then
1935            --
1936            local reph = subset[3]
1937            local base = subset[4]
1938            reorderreph.coverage[reph] = true -- neat -- use local
1939            rephbase[font][reph] = base               -- use local
1940            --
1941            local current = start
1942            local last = getnext(stop)
1943            while current ~= last do
1944                if current ~= stop then
1945                    local c = getchar(current)
1946                    local found = lookupcache[c]
1947                    if found then
1948                        local next = getnext(current)
1949                        if contextchain(found, next) then -- above-base: rphf Consonant + Halant
1950                            local afternext = next ~= stop and getnext(next)
1951                            if afternext and zw_char[getchar(afternext)] then -- ZWJ and ZWNJ prevent creation of reph
1952                                current = afternext -- getnext(next)
1953                            elseif current == start then
1954                                setstate(current,s_rphf)
1955                                current = next -- later again next
1956                            else
1957                                current = next -- later again next
1958                            end
1959                        end
1960                    end
1961                end
1962                current = getnext(current)
1963            end
1964        elseif kind == "pref" then
1965            local current = start
1966            local last = getnext(stop)
1967            while current ~= last do
1968                if current ~= stop then
1969                    local c = getchar(current)
1970                    local found = lookupcache[c]
1971                    if found then -- pre-base: pref	Halant + Consonant
1972                        local next = getnext(current)
1973                        if contextchain(found, next) then
1974                            if not getstate(current) and not getstate(next) then --KE: state can also be init...
1975                                setstate(current,s_pref)
1976                                setstate(next,s_pref)
1977                                current = next
1978                            end
1979                        end
1980                    end
1981                end
1982                current = getnext(current)
1983            end
1984        elseif kind == "half" then -- half forms: half / Consonant + Halant
1985            local current = start
1986            local last = getnext(stop)
1987            while current ~= last do
1988                if current ~= stop then
1989                    local c = getchar(current)
1990                    local found = lookupcache[c]
1991                    if found then
1992                        local next = getnext(current)
1993                        if contextchain(found, next) then
1994                            if next ~= stop and getchar(getnext(next)) == c_zwnj then    -- zwnj prevent creation of half
1995                                current = next
1996                            elseif not getstate(current) then --KE: state can also be init...
1997                                setstate(current,s_half)
1998                                if not halfpos then
1999                                    halfpos = current
2000                                end
2001                            end
2002                            current = getnext(current)
2003                        end
2004                    end
2005                end
2006                current = getnext(current)
2007            end
2008        elseif kind == "blwf" or kind == "vatu" then -- below-base: blwf / Halant + Consonant
2009            local current = start
2010            local last = getnext(stop)
2011            while current ~= last do
2012                if current ~= stop then
2013                    local c = getchar(current)
2014                    local found = lookupcache[c]
2015                    if found then
2016                        local next = getnext(current)
2017                        if contextchain(found, next) then
2018                            if not getstate(current) and not getstate(next) then --KE: state can also be init...
2019                                setstate(current,s_blwf)
2020                                setstate(next,s_blwf)
2021                                current = next
2022                                subpos  = current
2023                            end
2024                        end
2025                    end
2026                end
2027                current = getnext(current)
2028            end
2029        elseif kind == "pstf" then -- post-base: pstf / Halant + Consonant
2030            local current = start
2031            local last = getnext(stop)
2032            while current ~= last do
2033                if current ~= stop then
2034                    local c = getchar(current)
2035                    local found = lookupcache[c]
2036                    if found then
2037                        local next = getnext(current)
2038                        if contextchain(found, next) then
2039                            if not getstate(current) and not getstate(next) then -- KE: state can also be init...
2040                                setstate(current,s_pstf)
2041                                setstate(next,s_pstf)
2042                                current = next
2043                                postpos = current
2044                            end
2045                        end
2046                    end
2047                end
2048                current = getnext(current)
2049            end
2050        end
2051    end
2052
2053    local current, base, firstcons, subnotafterbase, postnotafterbase = start, nil, nil, nil, nil
2054
2055    if getstate(start,s_rphf) then
2056        -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants
2057        current = getnext(getnext(start))
2058    end
2059
2060    if current ~= getnext(stop) and getchar(current) == c_nbsp then
2061        -- Stand Alone cluster
2062        if current == stop then
2063            stop = getprev(stop)
2064            head = remove_node(head,current)
2065            flushnode(current)
2066            if trace_steps then
2067                logprocess("reorder two, remove nbsp")
2068            end
2069            return head, stop, nbspaces
2070        else
2071            nbspaces = nbspaces + 1
2072            base     = current
2073            current  = getnext(current)
2074            if current ~= stop then
2075                local char = getchar(current)
2076                if nukta[char] then
2077                    current = getnext(current)
2078                    char = getchar(current)
2079                end
2080                if char == c_zwj then
2081                    local next = getnext(current)
2082                    if current ~= stop and next ~= stop and halant[getchar(next)] then
2083                        current = next
2084                        next = getnext(current)
2085                        local tmp = getnext(next)
2086                        local changestop = next == stop
2087                        setnext(next)
2088                        setstate(current,s_pref)
2089                        current = processcharacters(current,font)
2090                        setstate(current,s_blwf)
2091                        current = processcharacters(current,font)
2092                        setstate(current,s_pstf)
2093                        current = processcharacters(current,font)
2094                        setstate(current,unsetvalue)
2095                        if halant[getchar(current)] then
2096                            setnext(getnext(current),tmp)
2097                            if show_syntax_errors then
2098                                head, current = inject_syntax_error(head,current,char)
2099                            end
2100                        else
2101                            setnext(current,tmp) -- assumes that result of pref, blwf, or pstf consists of one node
2102                            if changestop then
2103                                stop = current
2104                            end
2105                        end
2106                    end
2107                end
2108            end
2109            if trace_steps then
2110                logprocess("reorder two, handle nbsp")
2111            end
2112        end
2113    else -- not Stand Alone cluster
2114        local last = getnext(stop)
2115        while current ~= last do    -- find base consonant
2116            local next = getnext(current)
2117            if current == subpos then
2118                subnotafterbase = current
2119            end
2120            if current == postpos then
2121                postnotafterbase = current
2122            end
2123            if consonant[getchar(current)] then
2124                if not (current ~= stop and next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwj) then
2125                    if not firstcons then
2126                        firstcons = current
2127                    end
2128                    -- check whether consonant has below-base or post-base form or is pre-base reordering Ra
2129                    local a = getstate(current)
2130                    if not (a == s_blwf or a == s_pstf or (a ~= s_rphf and a ~= s_blwf and ra[getchar(current)])) then
2131                        base = current
2132                        if subnotafterbase then
2133                            subpos = base
2134                        end
2135                        if postnotafterbase then
2136                            postpos = base
2137                        end
2138                    end
2139                end
2140            end
2141            current = next
2142        end
2143        if not base then
2144            base = firstcons
2145        end
2146    end
2147
2148    if not base then
2149        if getstate(start,s_rphf) then
2150            setstate(start,unsetvalue)
2151        end
2152        return head, stop, nbspaces
2153    else
2154        if getstate(base) then -- state can also be init
2155            setstate(base,unsetvalue)  -- THIS RESETS THE HALF STATE
2156        end
2157        basepos = base
2158    end
2159    if not halfpos then
2160        halfpos = base
2161    end
2162    if not subpos then
2163        subpos = base
2164    end
2165    if not postpos then
2166        postpos = subpos or base
2167    end
2168
2169    -- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for
2170
2171    local moved   = { }
2172    local current = start
2173    local last    = getnext(stop)
2174    while current ~= last do
2175        local char   = getchar(current)
2176        local target = nil
2177        local cn     = getnext(current)
2178        -- not so efficient (needed for malayalam)
2179        local tpm = twopart_mark[char]
2180        if tpm then
2181            while tpm do
2182                local extra = copy_node(current)
2183                copyinjection(extra,current)
2184                char = tpm[1]
2185                setchar(current,char)
2186                setchar(extra,tpm[2])
2187                head = insertnodeafter(head,current,extra)
2188                tpm = twopart_mark[char]
2189            end
2190            if tpm and trace_steps then
2191                logprocess("reorder two, handle matra")
2192            end
2193        end
2194        --
2195         if not moved[current] and dependent_vowel[char] then
2196            if pre_mark[char] then -- or: if before_main or before_half
2197                moved[current] = true
2198                -- can be helper to remove one node
2199                local prev, next = getboth(current)
2200                setlink(prev,next)
2201                if current == stop then
2202                    stop = getprev(current)
2203                end
2204
2205                local pos
2206                if before_main[char] then
2207                    pos     = basepos
2208                 -- basepos = current -- is this correct?
2209                else
2210                    -- must be before_half
2211                    pos      = halfpos
2212                 -- halfpos = current -- is this correct?
2213                end
2214
2215                local ppos = getprev(pos) -- necessary?
2216                while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) do
2217                    if getstate(ppos,s_pref) then
2218                        pos = ppos
2219                    end
2220                    ppos = getprev(ppos)
2221                end
2222
2223                local ppos = getprev(pos) -- necessary?
2224                while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and halant[ischar(ppos)] do
2225                    ppos = getprev(ppos)
2226                    if ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and consonant[ischar(ppos)] then
2227                        pos  = ppos
2228                        ppos = getprev(ppos)
2229                    else
2230                        break
2231                    end
2232                end
2233
2234                if pos == start then
2235                    if head == start then
2236                        head = current
2237                    end
2238                    start = current
2239                end
2240                setlink(getprev(pos),current)
2241                setlink(current,pos)
2242             -- setlink(getprev(pos),current,pos) -- maybe
2243                if trace_steps then
2244                    logprocess("reorder two, handle pre mark")
2245                end
2246            elseif above_mark[char] then
2247                -- after main consonant
2248                target = subpos
2249                if postpos == subpos then
2250                    postpos = current
2251                end
2252                subpos = current
2253            elseif below_mark[char] then
2254                -- after subjoined consonants
2255                target = subpos
2256                if postpos == subpos then
2257                    postpos = current
2258                end
2259                subpos = current
2260            elseif post_mark[char] then
2261                -- after post-form consonant
2262                local n = getnext(postpos) -- nukta and vedic sign come first - is that right? and also halant+ra
2263                while n do
2264                    local v = ischar(n,font)
2265                    if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then
2266                        postpos = n
2267                    else
2268                        break
2269                    end
2270                    n = getnext(n)
2271                end
2272                target = postpos
2273                postpos = current
2274            end
2275            if mark_above_below_post[char] then
2276                local prev = getprev(current)
2277                if prev ~= target then
2278                    local next = getnext(current)
2279                    setlink(prev,next)
2280                    if current == stop then
2281                        stop = prev
2282                    end
2283                    setlink(current,getnext(target))
2284                    setlink(target,current)
2285                 -- setlink(target,current,getnext(target)) -- maybe
2286                    if trace_steps then
2287                        logprocess("reorder two, handle mark")
2288                    end
2289                end
2290            end
2291        end
2292        current = cn
2293    end
2294
2295    -- reorder halant + Ra
2296
2297    local current = getnext(start)
2298    local last    = getnext(stop)
2299    while current ~= last do
2300        local char = getchar(current)
2301        local cn   = getnext(current)
2302        if halant[char] and ra[ischar(cn)] and (not getstate(cn,s_rphf)) and (not getstate(cn,s_blwf)) then
2303            if after_main[ischar(cn)] then
2304                local prev = getprev(current)
2305                local next = getnext(cn)
2306                local bpn  = getnext(basepos)
2307                while bpn and dependent_vowel[ischar(bpn)] do
2308                    basepos = bpn
2309                    bpn     = getnext(bpn)
2310                end
2311                if basepos ~= prev then
2312                    setlink(prev,next)
2313                    setlink(cn, getnext(basepos))
2314                    setlink(basepos, current)
2315                    if cn == stop then
2316                        stop = prev
2317                    end
2318                    cn = next
2319                    if trace_steps then
2320                        logprocess("reorder two, handle halant and ra")
2321                    end
2322                end
2323            end
2324            -- after_postscript
2325            -- after_subscript
2326            -- before_postscript
2327            -- before_subscript
2328        end
2329        current = cn
2330    end
2331
2332    -- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first.
2333
2334    local current = start
2335    local c       = nil
2336    while current ~= stop do
2337        local char = getchar(current)
2338        if halant[char] or stress_tone_mark[char] then
2339            if not c then
2340                c = current
2341            end
2342        else
2343            c = nil
2344        end
2345        local next = getnext(current)
2346        if c and nukta[getchar(next)] then
2347            if head == c then
2348                head = next
2349            end
2350            if stop == next then
2351                stop = current
2352            end
2353            setlink(getprev(c),next)
2354            local nextnext = getnext(next)
2355            setnext(current,nextnext)
2356            local nextnextnext = getnext(nextnext)
2357            if nextnextnext then
2358                setprev(nextnextnext,current)
2359            end
2360            setlink(nextnext,c)
2361            if trace_steps then
2362                logprocess("reorder two, handle nukta")
2363            end
2364        end
2365        if stop == current then break end
2366        current = getnext(current)
2367    end
2368
2369    if getchar(base) == c_nbsp then
2370        if base == stop then
2371            stop = getprev(stop)
2372        end
2373        nbspaces = nbspaces - 1
2374        head = remove_node(head, base)
2375        flushnode(base)
2376        if trace_steps then
2377            logprocess("reorder two, handle nbsp")
2378        end
2379    end
2380
2381    return head, stop, nbspaces
2382end
2383
2384-- cleaned up and optimized ... needs checking (local, check order, fixes, extra hash, etc)
2385
2386local separator = { }
2387
2388imerge(separator,consonant)
2389imerge(separator,independent_vowel)
2390imerge(separator,dependent_vowel)
2391imerge(separator,vowel_modifier)
2392imerge(separator,stress_tone_mark)
2393
2394for k, v in next, nukta  do separator[k] = true end
2395for k, v in next, halant do separator[k] = true end
2396
2397local function analyze_next_chars_one(c,font,variant) -- skip one dependent vowel
2398    -- why two variants ... the comment suggests that it's the same ruleset
2399    local n = getnext(c)
2400    if not n then
2401        return c
2402    end
2403    local v = ischar(n,font)
2404    if variant == 1 then
2405        if v and nukta[v] then
2406            n = getnext(n)
2407            if n then
2408                v = ischar(n,font)
2409            end
2410        end
2411        if n and v then
2412            local nn = getnext(n)
2413            if nn then
2414                local vv = ischar(nn,font)
2415                if vv then
2416                    local nnn = getnext(nn)
2417                    if nnn then
2418                        local vvv = ischar(nnn,font)
2419                        if vvv then
2420                            if vv == c_zwj and consonant[vvv] then
2421                                c = nnn
2422                            elseif (vv == c_zwnj or vv == c_zwj) and halant[vvv] then
2423                                local nnnn = getnext(nnn)
2424                                if nnnn then
2425                                    local vvvv = ischar(nnnn,font)
2426                                    if vvvv and consonant[vvvv] then
2427                                        c = nnnn
2428                                    end
2429                                end
2430                            end
2431                        end
2432                    end
2433                end
2434            end
2435        end
2436    elseif variant == 2 then
2437        if v and nukta[v] then
2438            c = n
2439        end
2440        n = getnext(c)
2441        if n then
2442            v = ischar(n,font)
2443            if v then
2444                local nn = getnext(n)
2445                if nn then
2446                    local vv = ischar(nn,font)
2447                    if vv and zw_char[v] then
2448                        n = nn
2449                        v = vv
2450                        nn = getnext(nn)
2451                        vv = nn and ischar(nn,font)
2452                    end
2453                    if vv and halant[v] and consonant[vv] then
2454                        c = nn
2455                    end
2456                end
2457            end
2458        end
2459    end
2460    -- c = ms_matra(c)
2461    n = getnext(c)
2462    if not n then
2463        return c
2464    end
2465    v = ischar(n,font)
2466    if not v then
2467        return c
2468    end
2469    local already_pre_mark   -- = false
2470    local already_above_mark -- = false
2471    local already_below_mark -- = false
2472    local already_post_mark  -- = false
2473    while dependent_vowel[v] do
2474        local vowels = twopart_mark[v]
2475        if vowels then
2476            for k=1,#vowels do
2477                local v = vowels[k]
2478                if pre_mark[v] and not already_pre_mark then
2479                    already_pre_mark = true
2480                elseif above_mark[v] and not already_above_mark then
2481                    already_above_mark = true
2482                elseif below_mark[v] and not already_below_mark then
2483                    already_below_mark = true
2484                elseif post_mark[v] and not already_post_mark then
2485                    already_post_mark = true
2486                elseif devanagarihash[font].conjuncts == "continue" then
2487                    -- for testing
2488                else
2489                    return c
2490                end
2491            end
2492        else
2493            if pre_mark[v] and not already_pre_mark then
2494                already_pre_mark = true
2495            elseif post_mark[v] and not already_post_mark then
2496                 already_post_mark = true
2497            elseif below_mark[v] and not already_below_mark then
2498                already_below_mark = true
2499            elseif above_mark[v] and not already_above_mark then
2500                already_above_mark = true
2501            elseif devanagarihash[font].conjuncts == "continue" then
2502                -- for testing
2503            else
2504                return c
2505            end
2506        end
2507        c = n
2508        n = getnext(c)
2509        if not n then
2510            return c
2511        end
2512        v = ischar(n,font)
2513        if not v then
2514            return c
2515        end
2516    end
2517    if nukta[v] then
2518        c = n
2519        n = getnext(c)
2520        if not n then
2521            return c
2522        end
2523        v = ischar(n,font)
2524        if not v then
2525            return c
2526        end
2527    end
2528    if halant[v] then
2529        c = n
2530        n = getnext(c)
2531        if not n then
2532            return c
2533        end
2534        v = ischar(n,font)
2535        if not v then
2536            return c
2537        end
2538    end
2539    if vowel_modifier[v] then
2540        c = n
2541        n = getnext(c)
2542        if not n then
2543            return c
2544        end
2545        v = ischar(n,font)
2546        if not v then
2547            return c
2548        end
2549    end
2550    if stress_tone_mark[v] then
2551        c = n
2552        n = getnext(c)
2553        if not n then
2554            return c
2555        end
2556        v = ischar(n,font)
2557        if not v then
2558            return c
2559        end
2560    end
2561    if stress_tone_mark[v] then
2562        return n
2563    else
2564        return c
2565    end
2566end
2567
2568-- Consonant syllable:
2569--
2570--   { C + [ N ] + < H + [ < ZWNJ | ZWJ > ] | < ZWNJ | ZWJ > + H > }
2571-- + C
2572-- + [ N ]
2573-- + [ A ]
2574-- + [ < H + [ < ZWNJ|ZWJ > ] | { M } + [ N ] + [ H ] > ]
2575-- + [ SM ]
2576-- + [ ( VD ) ]
2577
2578local function analyze_next_chars_two(c,font)
2579    local n = getnext(c)
2580    if not n then
2581        return c
2582    end
2583    local v = ischar(n,font)
2584    if v and nukta[v] then
2585        c = n
2586    end
2587    n = c
2588    while true do
2589        local nn = getnext(n)
2590        if nn then
2591            local vv = ischar(nn,font)
2592            if vv then
2593                if halant[vv] then
2594                    n = nn
2595                    local nnn = getnext(nn)
2596                    if nnn then
2597                        local vvv = ischar(nnn,font)
2598                        if vvv and zw_char[vvv] then
2599                            n = nnn
2600                        end
2601                    end
2602                elseif vv == c_zwnj or vv == c_zwj then
2603                 -- n = nn -- not here (?)
2604                    local nnn = getnext(nn)
2605                    if nnn then
2606                        local vvv = ischar(nnn,font)
2607                        if vvv and halant[vvv] then
2608                            n = nnn
2609                        end
2610                    end
2611                else
2612                    break
2613                end
2614                local nn = getnext(n)
2615                if nn then
2616                    local vv = ischar(nn,font)
2617                    if vv and consonant[vv] then
2618                        n = nn
2619                        local nnn = getnext(nn)
2620                        if nnn then
2621                            local vvv = ischar(nnn,font)
2622                            if vvv and nukta[vvv] then
2623                                n = nnn
2624                            end
2625                        end
2626                        c = n
2627                    else
2628                        break
2629                    end
2630                else
2631                    break
2632                end
2633            else
2634                break
2635            end
2636        else
2637            break
2638        end
2639    end
2640    --
2641    if not c then
2642        -- This shouldn't happen I guess.
2643        return
2644    end
2645    n = getnext(c)
2646    if not n then
2647        return c
2648    end
2649    v = ischar(n,font)
2650    if not v then
2651        return c
2652    end
2653    if anudatta[v] then
2654        c = n
2655        n = getnext(c)
2656        if not n then
2657            return c
2658        end
2659        v = ischar(n,font)
2660        if not v then
2661            return c
2662        end
2663    end
2664    if halant[v] then
2665        c = n
2666        n = getnext(c)
2667        if not n then
2668            return c
2669        end
2670        v = ischar(n,font)
2671        if not v then
2672            return c
2673        end
2674        if v == c_zwnj or v == c_zwj then
2675            c = n
2676            n = getnext(c)
2677            if not n then
2678                return c
2679            end
2680            v = ischar(n,font)
2681            if not v then
2682                return c
2683            end
2684        end
2685    else
2686        -- c = ms_matra(c)
2687        -- same as one
2688        local already_pre_mark   -- = false
2689        local already_above_mark -- = false
2690        local already_below_mark -- = false
2691        local already_post_mark  -- = false
2692        -- inefficient : too many tests but seldom more than one
2693        while dependent_vowel[v] do
2694            local vowels = twopart_mark[v]
2695            if vowels then
2696                for k=1,#vowels do
2697                    local v = vowels[k]
2698                    if pre_mark[v] and not already_pre_mark then
2699                        already_pre_mark = true
2700                    elseif above_mark[v] and not already_above_mark then
2701                        already_above_mark = true
2702                    elseif below_mark[v] and not already_below_mark then
2703                        already_below_mark = true
2704                    elseif post_mark[v] and not already_post_mark then
2705                        already_post_mark = true
2706                    elseif devanagarihash[font].conjuncts == "continue" then
2707                        -- for testing
2708                    else
2709                        return c
2710                    end
2711                end
2712            else
2713                if pre_mark[v] and not already_pre_mark then
2714                    already_pre_mark = true
2715                elseif post_mark[v] and not already_post_mark then
2716                       already_post_mark = true
2717                elseif below_mark[v] and not already_below_mark then
2718                    already_below_mark = true
2719                elseif above_mark[v] and not already_above_mark then
2720                    already_above_mark = true
2721                elseif devanagarihash[font].conjuncts == "continue" then
2722                    -- for testing
2723                else
2724                    return c
2725                end
2726            end
2727            c = n
2728            n = getnext(c)
2729            if not n then
2730                return c
2731            end
2732            v = ischar(n,font)
2733            if not v then
2734                return c
2735            end
2736        end
2737        if nukta[v] then
2738            c = n
2739            n = getnext(c)
2740            if not n then
2741                return c
2742            end
2743            v = ischar(n,font)
2744            if not v then
2745                return c
2746            end
2747        end
2748        if halant[v] then
2749            c = n
2750            n = getnext(c)
2751            if not n then
2752                return c
2753            end
2754            v = ischar(n,font)
2755            if not v then
2756                return c
2757            end
2758        end
2759    end
2760    -- same as one
2761    if vowel_modifier[v] then
2762        c = n
2763        n = getnext(c)
2764        if not n then
2765            return c
2766        end
2767        v = ischar(n,font)
2768        if not v then
2769            return c
2770        end
2771    end
2772    if stress_tone_mark[v] then
2773        c = n
2774        n = getnext(c)
2775        if not n then
2776            return c
2777        end
2778        v = ischar(n,font)
2779        if not v then
2780            return c
2781        end
2782    end
2783    if stress_tone_mark[v] then
2784        return n
2785    else
2786        return c
2787    end
2788end
2789
2790-- It looks like these two analyzers were written independently but they share
2791-- a lot. Common code has been synced.
2792
2793local function method_one(head,font,attr)
2794    local current  = head
2795    local start    = true
2796    local done     = false
2797    local nbspaces = 0
2798    local syllabe  = 0
2799    while current do
2800        local char = ischar(current,font)
2801        if char then
2802            done = true
2803            local syllablestart = current
2804            local syllableend   = nil
2805            local c = current
2806            local n = getnext(c)
2807            local first = char
2808            if n and ra[first] then
2809                local second = ischar(n,font)
2810                if second and halant[second] then
2811                    local n = getnext(n)
2812                    if n then
2813                        local third = ischar(n,font)
2814                        if third then
2815                            c = n
2816                            first = third
2817                        end
2818                    end
2819                end
2820            end
2821            local standalone = first == c_nbsp
2822            if standalone then
2823                local prev = getprev(current)
2824                if prev then
2825                    local prevchar = ischar(prev,font)
2826                    if not prevchar then
2827                        -- different font or language so quite certainly a different word
2828                    elseif not separator[prevchar] then
2829                        -- something that separates words
2830                    else
2831                        standalone = false
2832                    end
2833                else
2834                    -- begin of paragraph or box
2835                end
2836            end
2837            if standalone then
2838                -- stand alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2839                local syllableend = analyze_next_chars_one(c,font,2)
2840                current = getnext(syllableend)
2841                if syllablestart ~= syllableend then
2842                    head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2843                    current = getnext(current)
2844                end
2845            else
2846                -- we can delay the getsubtype(n) and getfont(n) and test for say halant first
2847                -- as an table access is faster than two function calls (subtype and font are
2848                -- pseudo fields) but the code becomes messy (unless we make it a function)
2849                if consonant[char] then
2850                    -- syllable containing consonant
2851                    local prevc = true
2852                    while prevc do
2853                        prevc = false
2854                        local n = getnext(current)
2855                        if not n then
2856                            break
2857                        end
2858                        local v = ischar(n,font)
2859                        if not v then
2860                            break
2861                        end
2862                        if nukta[v] then
2863                            n = getnext(n)
2864                            if not n then
2865                                break
2866                            end
2867                            v = ischar(n,font)
2868                            if not v then
2869                                break
2870                            end
2871                        end
2872                        if halant[v] then
2873                            n = getnext(n)
2874                            if not n then
2875                                break
2876                            end
2877                            v = ischar(n,font)
2878                            if not v then
2879                                break
2880                            end
2881                            if v == c_zwnj or v == c_zwj then
2882                                n = getnext(n)
2883                                if not n then
2884                                    break
2885                                end
2886                                v = ischar(n,font)
2887                                if not v then
2888                                    break
2889                                end
2890                            end
2891                            if consonant[v] then
2892                                prevc = true
2893                                current = n
2894                            end
2895                        end
2896                    end
2897                    local n = getnext(current)
2898                    if n then
2899                        local v = ischar(n,font)
2900                        if v and nukta[v] then
2901                            -- nukta (not specified in Microsft Devanagari OpenType specification)
2902                            current = n
2903                            n = getnext(current)
2904                        end
2905                    end
2906                    syllableend = current
2907                    current = n
2908                    if current then
2909                        local v = ischar(current,font)
2910                        if not v then
2911                            -- skip
2912                        elseif halant[v] then
2913                            -- syllable containing consonant without vowels: {C + [Nukta] + H} + C + H
2914                            local n = getnext(current)
2915                            if n then
2916                                local v = ischar(n,font)
2917                                if v and zw_char[v] then
2918                                    -- code collapsed, probably needs checking with intention
2919                                    syllableend = n
2920                                    current = getnext(n)
2921                                else
2922                                    syllableend = current
2923                                    current = n
2924                                end
2925                            else
2926                                syllableend = current
2927                                current = n
2928                            end
2929                        else
2930                            -- syllable containing consonant with vowels: {C + [Nukta] + H} + C + [M] + [VM] + [SM]
2931                            if dependent_vowel[v] then
2932                                syllableend = current
2933                                current = getnext(current)
2934                                v = ischar(current,font)
2935                            end
2936                            if v and vowel_modifier[v] then
2937                                syllableend = current
2938                                current = getnext(current)
2939                                v = ischar(current,font)
2940                            end
2941                            if v and stress_tone_mark[v] then
2942                                syllableend = current
2943                                current = getnext(current)
2944                            end
2945                        end
2946                    end
2947                    if syllablestart ~= syllableend then
2948                        if syllableend then
2949                            syllabe = syllabe + 1
2950                            local c = syllablestart
2951                            local n = getnext(syllableend)
2952                            while c ~= n do
2953                                setprop(c,a_syllabe,syllabe)
2954                                c = getnext(c)
2955                            end
2956                        end
2957                        head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2958                        current = getnext(current)
2959                    end
2960                elseif independent_vowel[char] then
2961                    -- syllable without consonants: VO + [VM] + [SM]
2962                    syllableend = current
2963                    current = getnext(current)
2964                    if current then
2965                        local v = ischar(current,font)
2966                        if v then
2967                            if vowel_modifier[v] then
2968                                syllableend = current
2969                                current = getnext(current)
2970                                v = ischar(current,font)
2971                            end
2972                            if v and stress_tone_mark[v] then
2973                                syllableend = current
2974                                current = getnext(current)
2975                            end
2976                        end
2977                    end
2978                else
2979                    if show_syntax_errors then
2980                        local mark = mark_pre_above_below_post[char]
2981                        if mark then
2982                            head, current = inject_syntax_error(head,current,char)
2983                        end
2984                    end
2985                    current = getnext(current)
2986                end
2987            end
2988        else
2989            current = getnext(current)
2990        end
2991        start = false
2992    end
2993
2994    if nbspaces > 0 then
2995        head = replace_all_nbsp(head)
2996    end
2997
2998    current = head
2999    local n = 0
3000    while current do
3001        local char = ischar(current,font)
3002        if char then
3003            if n == 0 and not getstate(current) then
3004                setstate(current,s_init)
3005            end
3006            n = n + 1
3007        else
3008            n = 0
3009        end
3010        current = getnext(current)
3011    end
3012
3013    return head, done
3014end
3015
3016-- there is a good change that when we run into one with subtype < 256 that the rest is also done
3017-- so maybe we can omit this check (it's pretty hard to get glyphs in the stream out of the blue)
3018
3019local function method_two(head,font,attr)
3020    local current  = head
3021    local start    = true
3022    local done     = false
3023    local syllabe  = 0
3024    local nbspaces = 0
3025    while current do
3026        local syllablestart = nil
3027        local syllableend   = nil
3028        local char = ischar(current,font)
3029        if char then
3030            done = true
3031            syllablestart = current
3032            local c = current
3033            local n = getnext(current)
3034            if n and ra[char] then
3035                local nextchar = ischar(n,font)
3036                if nextchar and halant[nextchar] then
3037                    local n = getnext(n)
3038                    if n then
3039                        local nextnextchar = ischar(n,font)
3040                        if nextnextchar then
3041                            c = n
3042                            char = nextnextchar
3043                        end
3044                    end
3045                end
3046            end
3047            if independent_vowel[char] then
3048                -- vowel-based syllable: [Ra+H]+V+[N]+[<[<ZWJ|ZWNJ>]+H+C|ZWJ+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
3049                current = analyze_next_chars_one(c,font,1)
3050                syllableend = current
3051            else
3052                local standalone = char == c_nbsp
3053                if standalone then
3054                    nbspaces = nbspaces + 1
3055                    local p = getprev(current)
3056                    if not p then
3057                        -- begin of paragraph or box
3058                    elseif ischar(p,font) then
3059                        -- different font or language so quite certainly a different word
3060                    elseif not separator[getchar(p)] then
3061                        -- something that separates words
3062                    else
3063                        standalone = false
3064                    end
3065                end
3066                if standalone then
3067                    -- Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
3068                    current = analyze_next_chars_one(c,font,2)
3069                    syllableend = current
3070                elseif consonant[getchar(current)] then
3071                    -- WHY current INSTEAD OF c ?
3072                    -- Consonant syllable: {C+[N]+<H+[<ZWNJ|ZWJ>]|<ZWNJ|ZWJ>+H>} + C+[N]+[A] + [< H+[<ZWNJ|ZWJ>] | {M}+[N]+[H]>]+[SM]+[(VD)]
3073                    current = analyze_next_chars_two(current,font) -- not c !
3074                    syllableend = current
3075                end
3076            end
3077        end
3078        if syllableend then
3079            syllabe = syllabe + 1
3080            local c = syllablestart
3081            local n = getnext(syllableend)
3082            while c ~= n do
3083                setprop(c,a_syllabe,syllabe)
3084                c = getnext(c)
3085            end
3086        end
3087        if syllableend and syllablestart ~= syllableend then
3088            head, current, nbspaces = reorder_two(head,syllablestart,syllableend,font,attr,nbspaces)
3089        end
3090        if not syllableend and show_syntax_errors then
3091            local char = ischar(current,font)
3092            if char and not getstate(current) then -- state can also be init
3093                local mark = mark_pre_above_below_post[char]
3094                if mark then
3095                    head, current = inject_syntax_error(head,current,char)
3096                end
3097            end
3098        end
3099        start = false
3100        current = getnext(current)
3101    end
3102
3103    if nbspaces > 0 then
3104        head = replace_all_nbsp(head)
3105    end
3106
3107    current = head
3108    local n = 0
3109    while current do
3110        local char = ischar(current,font)
3111        if char then
3112            if n == 0 and not getstate(current) then -- state can also be init
3113                setstate(current,s_init)
3114            end
3115            n = n + 1
3116        else
3117            n = 0
3118        end
3119        current = getnext(current)
3120    end
3121
3122 -- if languages.indic then
3123 --     head = languages.indic.handler(head)
3124 -- end
3125
3126    return head, done
3127end
3128
3129for i=1,nofscripts do
3130    methods[scripts_one[i]] = method_one
3131    methods[scripts_two[i]] = method_two
3132end
3133