font-osd.lua /size: 106 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['font-osd'] = { -- script devanagari
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Kai Eigner, TAT Zetwerk / Hans Hagen, PRAGMA ADE",
5    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9
10-- we need to check nbsphash (context only)
11
12-- A few remarks:
13--
14-- This code is a partial rewrite of the code that deals with devanagari. The data
15-- and logic is by Kai Eigner and based based on Microsoft's OpenType specifications
16-- for specific scripts, but with a few improvements. More information can be found
17-- at:
18--
19-- deva: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/introO.mspx
20-- dev2: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/intro.mspx
21--
22-- Rajeesh Nambiar provided patches for the malayalam variant. Thanks to feedback
23-- from the mailing list some aspects could be improved.
24--
25-- As I touched nearly all code, reshuffled it, optimized a lot, etc. etc. (imagine
26-- how much can get messed up in over a week work) it could be that I introduced
27-- bugs. There is more to gain (esp in the functions applied to a range) but I'll do
28-- that when everything works as expected. Kai's original code is kept in
29-- font-odk.lua as a reference so blame me (HH) for bugs. (We no longer ship that
30-- file as the code below has diverted too much and in the meantime has more than
31-- doubled in size.)
32--
33-- Interesting is that Kai managed to write this on top of the existing otf handler.
34-- Only a few extensions were needed, like a few more analyzing states and dealing
35-- with changed head nodes in the core scanner as that only happens here. There's a
36-- lot going on here and it's only because I touched nearly all code that I got a
37-- bit of a picture of what happens. For in-depth knowledge one needs to consult
38-- Kai.
39--
40-- The rewrite mostly deals with efficiency, both in terms of speed and code. We
41-- also made sure that it suits generic use as well as use in ConTeXt. I removed
42-- some buglets but can as well have messed up the logic by doing this. For this we
43-- keep the original around as that serves as reference. Due to the lots of
44-- reshuffling glyphs quite some leaks occur(red) but once I'm satisfied with the
45-- rewrite I'll weed them. I also integrated initialization etc into the regular
46-- mechanisms.
47--
48-- In the meantime, we're down from 25.5-3.5=22 seconds to 17.7-3.5=14.2 seconds for
49-- a 100 page sample (mid 2012) with both variants so it's worth the effort. Some
50-- more speedup is to be expected. Due to the method chosen it will never be real
51-- fast. If I ever become a power user I'll have a go at some further speed up. I
52-- will rename some functions (and features) once we don't need to check the
53-- original code. We now use a special subset sequence for use inside the analyzer
54-- (after all we could can store this in the dataset and save redundant analysis).
55--
56-- By now we have yet another incremental improved version. In the end I might
57-- rewrite the code.
58--
59-- Hans Hagen, PRAGMA-ADE, Hasselt NL
60
61-- Todo:
62--
63-- Matras: according to Microsoft typography specifications "up to one of each type:
64-- pre-, above-, below- or post- base", but that does not seem to be right. It could
65-- become an option.
66--
67-- Resources:
68--
69-- The tables that we had here are now generated from char-def.lua or in the case of
70-- generic usage loaded from luatex-basics-chr.lua. Still a couple of entries need
71-- to be added to char-def.lua but finally I moved the indic specific tables there.
72-- For generic usage one can create the relevant resources by running:
73--
74--     context luatex-basics-prepare.tex
75--
76-- and an overview with:
77--
78--     context --global s-fonts-basics.mkiv
79--
80-- For now we have defined: bengali, devanagari, gujarati, gurmukhi, kannada,
81-- malayalam, oriya, tamil and tolugu but not all are checked. Also, some of the
82-- code below might need to be adapted to the extra scripts.
83
84local insert, imerge, copy, tohash = table.insert, table.imerge, table.copy, table.tohash
85local next, type = next, type
86
87local report             = logs.reporter("otf","devanagari")
88
89fonts                    = fonts                   or { }
90fonts.analyzers          = fonts.analyzers         or { }
91fonts.analyzers.methods  = fonts.analyzers.methods or { node = { otf = { } } }
92
93local otf                = fonts.handlers.otf
94
95local handlers           = otf.handlers
96local methods            = fonts.analyzers.methods
97
98local otffeatures        = fonts.constructors.features.otf
99local registerotffeature = otffeatures.register
100
101local nuts               = nodes.nuts
102
103local getnext            = nuts.getnext
104local getprev            = nuts.getprev
105local getboth            = nuts.getboth
106local getid              = nuts.getid
107local getchar            = nuts.getchar
108local getfont            = nuts.getfont
109local getsubtype         = nuts.getsubtype
110local setlink            = nuts.setlink
111local setnext            = nuts.setnext
112local setprev            = nuts.setprev
113local setchar            = nuts.setchar
114local getprop            = nuts.getprop
115local setprop            = nuts.setprop
116local getstate           = nuts.getstate
117local setstate           = nuts.setstate
118
119local ischar             = nuts.ischar
120
121local insertnodeafter    = nuts.insertafter
122local copy_node          = nuts.copy
123local remove_node        = nuts.remove
124local flushlist          = nuts.flushlist
125local flushnode          = nuts.flushnode
126
127local copyinjection      = nodes.injections.copy -- KE: is this necessary? HH: probably not as positioning comes later and we rawget/set
128
129local unsetvalue         = attributes.unsetvalue
130
131local fontdata           = fonts.hashes.identifiers
132
133local a_syllabe          = "syllable"  -- attributes.private('syllabe') -- can be just a property key
134local a_reordered        = "reordered" -- attributes.private('reordered') -- can be just a property key
135
136local dotted_circle      = 0x25CC
137local c_nbsp             = 0x00A0
138local c_zwnj             = 0x200C
139local c_zwj              = 0x200D
140
141local states             = fonts.analyzers.states -- not features
142
143local s_rphf             = states.rphf
144local s_half             = states.half
145local s_pref             = states.pref
146local s_blwf             = states.blwf
147local s_pstf             = states.pstf
148local s_init             = states.init
149
150local replace_all_nbsp   = nil
151
152replace_all_nbsp = function(head) -- delayed definition
153    replace_all_nbsp = typesetters and typesetters.characters and typesetters.characters.replacenbspaces or function(head)
154        return head
155    end
156    return replace_all_nbsp(head)
157end
158
159local processcharacters = nil
160
161if context then
162    local fontprocesses = fonts.hashes.processes
163    function processcharacters(head,font)
164        local processors = fontprocesses[font]
165        for i=1,#processors do
166            head = processors[i](head,font,0)
167        end
168        return head
169    end
170else
171    function processcharacters(head,font)
172        local processors = fontdata[font].shared.processes
173        for i=1,#processors do
174            head = processors[i](head,font,0)
175        end
176        return head
177    end
178end
179
180-- We can assume that script are not mixed in the source but if that is the case
181-- we might need to have consonants etc per script and initialize a local table
182-- pointing to the right one. But not now.
183
184local indicgroups = characters and characters.indicgroups
185
186if not indicgroups and characters then
187
188    local indic = {
189        c = { }, -- consonant
190        i = { }, -- independent vowel
191        d = { }, -- dependent vowel
192        m = { }, -- vowel modifier
193        s = { }, -- stress tone mark
194        o = { }, -- other
195    }
196
197    local indicmarks   = {
198        l = { }, -- left   | pre_mark
199        t = { }, -- top    | above_mark
200        b = { }, -- bottom | below_mark
201        r = { }, -- right  | post_mark
202        s = { }, -- split  | twopart_mark
203    }
204
205    local indicclasses = {
206        nukta    = { },
207        halant   = { },
208        ra       = { },
209        anudatta = { },
210    }
211
212    local indicorders = {
213        bp = { }, -- before_postscript
214        ap = { }, -- after_postscript
215        bs = { }, -- before_subscript
216        as = { }, -- after_subscript
217        bh = { }, -- before_half
218        ah = { }, -- after_half
219        bm = { }, -- before_main
220        am = { }, -- after_main
221    }
222
223    for k, v in next, characters.data do
224        local i = v.indic
225        if i then
226            indic[i][k] = true
227            i = v.indicmark
228            if i then
229                if i == "s" then
230                    local s = v.specials
231                    indicmarks[i][k] = { s[2], s[3] }
232                else
233                    indicmarks[i][k] = true
234                end
235            end
236            i = v.indicclass
237            if i then
238                indicclasses[i][k] = true
239            end
240            i = v.indicorder
241            if i then
242                indicorders[i][k] = true
243            end
244        end
245    end
246
247    indicgroups = {
248        consonant         = indic.c,
249        independent_vowel = indic.i,
250        dependent_vowel   = indic.d,
251        vowel_modifier    = indic.m,
252        stress_tone_mark  = indic.s,
253     -- other             = indic.o,
254        pre_mark          = indicmarks.l,
255        above_mark        = indicmarks.t,
256        below_mark        = indicmarks.b,
257        post_mark         = indicmarks.r,
258        twopart_mark      = indicmarks.s,
259        nukta             = indicclasses.nukta,
260        halant            = indicclasses.halant,
261        ra                = indicclasses.ra,
262        anudatta          = indicclasses.anudatta,
263        before_postscript = indicorders.bp,
264        after_postscript  = indicorders.ap,
265        before_half       = indicorders.bh,
266        after_half        = indicorders.ah,
267        before_subscript  = indicorders.bs,
268        after_subscript   = indicorders.as,
269        before_main       = indicorders.bm,
270        after_main        = indicorders.am,
271    }
272
273    indic        = nil
274    indicmarks   = nil
275    indicclasses = nil
276    indicorders  = nil
277
278    characters.indicgroups = indicgroups
279
280end
281
282local consonant         = indicgroups.consonant
283local independent_vowel = indicgroups.independent_vowel
284local dependent_vowel   = indicgroups.dependent_vowel
285local vowel_modifier    = indicgroups.vowel_modifier
286local stress_tone_mark  = indicgroups.stress_tone_mark
287local pre_mark          = indicgroups.pre_mark
288local above_mark        = indicgroups.above_mark
289local below_mark        = indicgroups.below_mark
290local post_mark         = indicgroups.post_mark
291local twopart_mark      = indicgroups.twopart_mark
292local nukta             = indicgroups.nukta
293local halant            = indicgroups.halant
294local ra                = indicgroups.ra
295local anudatta          = indicgroups.anudatta
296
297local before_postscript = indicgroups.before_postscript
298local after_postscript  = indicgroups.after_postscript
299local before_half       = indicgroups.before_half
300local after_half        = indicgroups.after_half
301local before_subscript  = indicgroups.before_subscript
302local after_subscript   = indicgroups.after_subscript
303local before_main       = indicgroups.before_main
304local after_main        = indicgroups.after_main
305
306local mark_four = table.merged (
307    pre_mark,
308    above_mark,
309    below_mark,
310    post_mark
311)
312
313local mark_above_below_post = table.merged (
314    above_mark,
315    below_mark,
316    post_mark
317)
318
319-- We use some pseudo features as we need to manipulate the nodelist based
320-- on information in the font as well as already applied features. We can
321-- probably replace some of the code below by injecting 'real' features
322-- using the extension mechanism.
323
324local zw_char = { -- both_joiners_true
325    [c_zwnj] = true,
326    [c_zwj ] = true,
327}
328
329local dflt_true = {
330    dflt = true,
331}
332
333local two_defaults = { }
334local one_defaults = { }
335
336local false_flags = { false, false, false, false }
337
338local sequence_reorder_matras = {
339    features  = { dv01 = two_defaults },
340    flags     = false_flags,
341    name      = "dv01_reorder_matras",
342    order     = { "dv01" },
343    type      = "devanagari_reorder_matras",
344    nofsteps  = 1,
345    steps     = {
346        {
347            coverage = pre_mark,
348        }
349    }
350}
351
352local sequence_reorder_reph = {
353    features  = { dv02 = two_defaults },
354    flags     = false_flags,
355    name      = "dv02_reorder_reph",
356    order     = { "dv02" },
357    type      = "devanagari_reorder_reph",
358    nofsteps  = 1,
359    steps     = {
360        {
361            coverage = { },
362        }
363    }
364}
365
366local sequence_reorder_pre_base_reordering_consonants = {
367    features  = { dv03 = one_defaults },
368    flags     = false_flags,
369    name      = "dv03_reorder_pre_base_reordering_consonants",
370    order     = { "dv03" },
371    type      = "devanagari_reorder_pre_base_reordering_consonants",
372    nofsteps  = 1,
373    steps     = {
374        {
375            coverage = { },
376        }
377    }
378}
379
380local sequence_remove_joiners = {
381    features  = { dv04 = one_defaults },
382    flags     = false_flags,
383    name      = "dv04_remove_joiners",
384    order     = { "dv04" },
385    type      = "devanagari_remove_joiners",
386    nofsteps  = 1,
387    steps     = {
388        {
389           coverage = zw_char, -- both_joiners_true
390        },
391    }
392}
393
394-- Looping over feature twice as efficient as looping over basic forms (some
395-- 350 checks instead of 750 for one font). This is something to keep an eye on
396-- as it might depends on the font. Not that it's a bottleneck.
397
398local basic_shaping_forms =  {
399    akhn = true,
400    blwf = true,
401    cjct = true,
402    half = true,
403    nukt = true,
404    pref = true,
405    pstf = true,
406    rkrf = true,
407    rphf = true,
408    vatu = true,
409    locl = true,
410}
411
412local valid = {
413    abvs = true,
414    akhn = true,
415    blwf = true,
416    calt = true,
417    cjct = true,
418    half = true,
419    haln = true,
420    nukt = true,
421    pref = true,
422    pres = true,
423    pstf = true,
424    psts = true,
425    rkrf = true,
426    rphf = true,
427    vatu = true,
428    pres = true,
429    abvs = true,
430    blws = true,
431    psts = true,
432    haln = true,
433    calt = true,
434    locl = true,
435}
436
437local scripts = { }
438
439local scripts_one = { "deva", "mlym", "beng", "gujr", "guru", "knda", "orya", "taml", "telu" }
440local scripts_two = { "dev2", "mlm2", "bng2", "gjr2", "gur2", "knd2", "ory2", "tml2", "tel2" }
441
442local nofscripts = #scripts_one
443
444for i=1,nofscripts do
445    local one = scripts_one[i]
446    local two = scripts_two[i]
447    scripts[one] = true
448    scripts[two] = true
449    two_defaults[two] = dflt_true
450    one_defaults[one] = dflt_true
451    one_defaults[two] = dflt_true
452end
453
454local function valid_one(s) for i=1,nofscripts do if s[scripts_one[i]] then return true end end end
455local function valid_two(s) for i=1,nofscripts do if s[scripts_two[i]] then return true end end end
456
457local function initializedevanagi(tfmdata)
458    local script, language = otf.scriptandlanguage(tfmdata,attr) -- todo: take fast variant
459    if scripts[script] then
460        local resources  = tfmdata.resources
461        local devanagari = resources.devanagari
462        if not devanagari then
463            --
464            report("adding devanagari features to font")
465            --
466            local gsubfeatures   = resources.features.gsub
467            local sequences      = resources.sequences
468            local sharedfeatures = tfmdata.shared.features
469            --
470            gsubfeatures["dv01"] = two_defaults -- reorder matras
471            gsubfeatures["dv02"] = two_defaults -- reorder reph
472            gsubfeatures["dv03"] = one_defaults -- reorder pre base reordering consonants
473            gsubfeatures["dv04"] = one_defaults -- remove joiners
474            --
475            local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants)
476            local reorder_reph                           = copy(sequence_reorder_reph)
477            local reorder_matras                         = copy(sequence_reorder_matras)
478            local remove_joiners                         = copy(sequence_remove_joiners)
479
480            local lastmatch = 0
481            for s=1,#sequences do -- classify chars and make sure basic_shaping_forms come first
482                local features = sequences[s].features
483                if features then
484                    for k, v in next, features do
485                        if k == "locl" then
486                            local steps = sequences[s].steps
487                            local nofsteps = sequences[s].nofsteps
488                            for i=1,nofsteps do
489                                local step     = steps[i]
490                                local coverage = step.coverage
491                                if coverage then
492                                    for k, v in next, pre_mark do
493                                        local locl = coverage[k]
494                                        if locl then
495                                            if #locl > 0 then	--contextchain; KE: is this right?
496                                                for j=1,#locl do
497                                                    local ck      = locl[j]
498                                                    local f       = ck[4]
499                                                    local chainlookups = ck[6]
500                                                    if chainlookups then
501                                                        local chainlookup = chainlookups[f]
502                                                        for j=1,#chainlookup do
503                                                            local chainstep = chainlookup[j]
504                                                            local steps    = chainstep.steps
505                                                            local nofsteps = chainstep.nofsteps
506                                                            for i=1,nofsteps do
507                                                                local step     = steps[i]
508                                                                local coverage = step.coverage
509                                                                if coverage then
510                                                                    locl = coverage[k]
511                                                                end
512                                                            end
513                                                        end
514                                                    end
515                                                end
516                                            end
517                                            if locl then
518                                                reorder_matras.steps[1].coverage[locl] = true
519                                            end
520                                        end
521                                    end
522                                end
523                            end
524                        end
525                        if basic_shaping_forms[k] then
526                            lastmatch = lastmatch + 1
527                            if s ~= lastmatch then
528                                table.insert(sequences, lastmatch, table.remove(sequences, s))
529                            end
530                        end
531                    end
532                end
533            end
534            local insertindex = lastmatch + 1
535            --
536            if tfmdata.properties.language then
537                dflt_true[tfmdata.properties.language] = true
538            end
539            --
540            insert(sequences,insertindex,reorder_pre_base_reordering_consonants)
541            insert(sequences,insertindex,reorder_reph)
542            insert(sequences,insertindex,reorder_matras)
543            insert(sequences,insertindex,remove_joiners)
544            --
545            local blwfcache  = { }
546            local vatucache  = { }
547            local pstfcache  = { }
548            local seqsubset  = { }
549            local rephstep   = {
550                coverage = { } -- will be adapted each work
551            }
552            local devanagari = {
553                reph        = false,
554                vattu       = false,
555                blwfcache   = blwfcache,
556                vatucache   = vatucache,
557                pstfcache   = pstfcache,
558                seqsubset   = seqsubset,
559                reorderreph = rephstep,
560
561            }
562            --
563            reorder_reph.steps = { rephstep }
564            --
565            local pre_base_reordering_consonants = { }
566            reorder_pre_base_reordering_consonants.steps[1].coverage = pre_base_reordering_consonants
567            --
568            resources.devanagari = devanagari
569            --
570            for s=1,#sequences do
571                local sequence = sequences[s]
572                local steps    = sequence.steps
573                local nofsteps = sequence.nofsteps
574                local features = sequence.features
575                local has_rphf = features.rphf
576                local has_blwf = features.blwf
577                local has_vatu = features.vatu
578                local has_pstf = features.pstf
579                if has_rphf and has_rphf[script] then
580                    devanagari.reph = true
581                elseif (has_blwf and has_blwf[script] ) or (has_vatu and has_vatu[script] ) then
582                    devanagari.vattu = true
583                    for i=1,nofsteps do
584                        local step     = steps[i]
585                        local coverage = step.coverage
586                        if coverage then
587                            for k, v in next, coverage do
588                                for h, w in next, halant do
589                                    if v[h] then
590                                        if not blwfcache[k] then
591                                            blwfcache[k] = v
592                                        end
593                                    end
594                                    if has_vatu and has_vatu[script] and not vatucache[k] then
595                                        vatucache[k] = v
596                                    end
597                                end
598                            end
599                        end
600                    end
601                elseif has_pstf and has_pstf[script] then
602                    for i=1,nofsteps do
603                        local step     = steps[i]
604                        local coverage = step.coverage
605                        if coverage then
606                            for k, v in next, coverage do
607                                if not pstfcache[k] then
608                                    pstfcache[k] = v
609                                end
610                            end
611                            for k, v in next, ra do
612                                local r = coverage[k]
613                                if r then
614                                    local found = false
615                                    if #r > 0 then  -- contextchain; KE: is this right?
616                                        for j=1,#r do
617                                            local ck      = r[j]
618                                            local f       = ck[4]
619                                            local chainlookups = ck[6]
620                                            if chainlookups and chainlookups[f] then	--KE: why is check for chainlookups[f] necessacy???
621                                                local chainlookup = chainlookups[f]
622                                                for j=1,#chainlookup do
623                                                    local chainstep = chainlookup[j]
624                                                    local steps    = chainstep.steps
625                                                    local nofsteps = chainstep.nofsteps
626                                                    for i=1,nofsteps do
627                                                        local step     = steps[i]
628                                                        local coverage = step.coverage
629                                                        if coverage then
630                                                            local h = coverage[k]
631                                                            if h then
632                                                                for k, v in next, h do
633                                                                 -- found = v and v.ligature
634                                                                    found = v and (tonumber(v) or v.ligature)
635                                                                    if found then
636                                                                        pre_base_reordering_consonants[found] = true
637                                                                        break
638                                                                    end
639                                                                end
640                                                                if found then
641                                                                    break
642                                                                end
643                                                            end
644                                                        end
645                                                    end
646                                                end
647                                            end
648                                        end
649                                    else
650                                        for k, v in next, r do
651                                         -- found = v and v.ligature
652                                            found = v and (tonumber(v) or v.ligature)
653                                            if found then
654                                                pre_base_reordering_consonants[found] = true
655                                                break
656                                            end
657                                        end
658                                    end
659                                    if found then
660                                        break
661                                    end
662                                end
663                            end
664                        end
665                    end
666                end
667                for kind, spec in next, features do
668                    if valid[kind] and valid_two(spec)then
669                        for i=1,nofsteps do
670                            local step     = steps[i]
671                            local coverage = step.coverage
672                            if coverage then
673                                local reph, rephbase = false, false
674                                if kind == "rphf" then
675                                    -- rphf acts on consonant + halant
676                                    for k, v in next, ra do
677                                        local r = coverage[k]
678                                        if r then
679                                            rephbase = k
680                                            local h = false
681                                            if #r > 0 then	--contextchain; KE: is this right?
682                                                for j=1,#r do
683                                                    local ck      = r[j]
684                                                    local f       = ck[4]
685                                                    local chainlookups = ck[6]
686                                                    if chainlookups then
687                                                        local chainlookup = chainlookups[f]
688                                                        for j=1,#chainlookup do
689                                                            local chainstep = chainlookup[j]
690                                                            local steps    = chainstep.steps
691                                                            local nofsteps = chainstep.nofsteps
692                                                            for i=1,nofsteps do
693                                                                local step     = steps[i]
694                                                                local coverage = step.coverage
695                                                                if coverage then
696                                                                    local r = coverage[k]
697                                                                    if r then
698                                                                        for k, v in next, halant do
699                                                                            local h = r[k]
700                                                                            if h then
701                                                                             -- reph = h.ligature or false
702                                                                                reph = tonumber(h) or h.ligature or false
703                                                                                break
704                                                                            end
705                                                                        end
706                                                                        if h then
707                                                                            break
708                                                                        end
709                                                                    end
710                                                                end
711                                                            end
712                                                        end
713                                                    end
714                                                end
715                                            else
716                                                for k, v in next, halant do
717                                                    local h = r[k]
718                                                    if h then
719                                                     -- reph = h.ligature or false
720                                                        reph = tonumber(h) or h.ligature or false
721                                                        break
722                                                    end
723                                                end
724                                            end
725                                            if reph then
726                                                break
727                                            end
728                                        end
729                                    end
730                                end
731                                seqsubset[#seqsubset+1] = { kind, coverage, reph, rephbase }
732                            end
733                        end
734                    end
735                    if kind == "pref" then
736                        local steps    = sequence.steps
737                        local nofsteps = sequence.nofsteps
738                        for i=1,nofsteps do
739                            local step     = steps[i]
740                            local coverage = step.coverage
741                            if coverage then
742                                for k, v in next, halant do
743                                    local h = coverage[k]
744                                    if h then
745                                        local found = false
746                                        if #h > 0 then -- contextchain; KE: is this right?
747                                            for j=1,#h do
748                                                local ck      = h[j]
749                                                local f       = ck[4]
750                                                local chainlookups = ck[6]
751                                                if chainlookups then
752                                                    local chainlookup = chainlookups[f]
753                                                    for j=1,#chainlookup do
754                                                        local chainstep = chainlookup[j]
755                                                        local steps    = chainstep.steps
756                                                        local nofsteps = chainstep.nofsteps
757                                                        for i=1,nofsteps do
758                                                            local step     = steps[i]
759                                                            local coverage = step.coverage
760                                                            if coverage then
761                                                                local h = coverage[k]
762                                                                if h then
763                                                                    for k, v in next, h do
764                                                                     -- found = v and v.ligature
765                                                                        found = v and (tonumber(v) or v.ligature)
766                                                                        if found then
767                                                                            pre_base_reordering_consonants[found] = true
768                                                                            break
769                                                                        end
770                                                                    end
771                                                                    if found then
772                                                                        break
773                                                                    end
774                                                                end
775                                                            end
776                                                        end
777                                                    end
778                                                end
779                                            end
780                                        else
781                                            for k, v in next, h do
782                                             -- found = v and v.ligature
783                                                found = v and (tonumber(v) or v.ligature)
784                                                if found then
785                                                    pre_base_reordering_consonants[found] = true
786                                                    break
787                                                end
788                                            end
789                                        end
790                                        if found then
791                                            break
792                                        end
793                                    end
794                                end
795                            end
796                        end
797                    end
798                end
799            end
800            --
801            if two_defaults[script] then
802                sharedfeatures["dv01"] = true -- dv01_reorder_matras
803                sharedfeatures["dv02"] = true -- dv02_reorder_reph
804                sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants
805                sharedfeatures["dv04"] = true -- dv04_remove_joiners
806            elseif one_defaults[script] then
807                sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants
808                sharedfeatures["dv04"] = true -- dv04_remove_joiners
809            end
810            if script == "mlym" or script == "taml" then
811                devanagari.left_matra_before_base = true
812            end
813        end
814    end
815end
816
817registerotffeature {
818    name         = "devanagari",
819    description  = "inject additional features",
820    default      = true,
821    initializers = {
822        node     = initializedevanagi,
823    },
824}
825
826local show_syntax_errors = false
827
828local function inject_syntax_error(head,current,char)
829    local signal = copy_node(current)
830    copyinjection(signal,current)
831    if pre_mark[char] then
832        setchar(signal,dotted_circle)
833    else
834        setchar(current,dotted_circle)
835    end
836    return insertnodeafter(head,current,signal)
837end
838
839-- hm, this is applied to one character:
840
841local function initialize_one(font,attr) -- we need a proper hook into the dataset initializer
842
843    local tfmdata        = fontdata[font]
844    local datasets       = otf.dataset(tfmdata,font,attr) -- don't we know this one?
845    local devanagaridata = datasets.devanagari
846
847    if not devanagaridata then
848
849        devanagaridata = {
850            reph      = false,
851            vattu     = false,
852            blwfcache = { },
853            vatucache = { },
854            pstfcache = { },
855        }
856        datasets.devanagari = devanagaridata
857        local resources     = tfmdata.resources
858        local devanagari    = resources.devanagari
859
860        for s=1,#datasets do
861            local dataset = datasets[s]
862            if dataset and dataset[1] then -- value
863                local kind = dataset[4]
864                if kind == "rphf" then
865                    -- deva
866                    devanagaridata.reph = true
867                elseif kind == "blwf" or kind == "vatu" then
868                    -- deva
869                    devanagaridata.vattu = true
870                    -- dev2
871                    devanagaridata.blwfcache = devanagari.blwfcache
872                    devanagaridata.vatucache = devanagari.vatucache
873                    devanagaridata.pstfcache = devanagari.pstfcache
874                end
875            end
876        end
877
878    end
879
880    return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache, devanagaridata.vatucache, devanagaridata.pstfcache
881
882end
883
884-- HH: somehow we can get a non context here so for now we check for .n
885
886local function contextchain(contexts, n)
887    local char = getchar(n)
888    if not contexts.n then
889        return contexts[char]
890    else
891        for k=1,#contexts do
892            local ck  = contexts[k]
893            local seq = ck[3]
894            local f   = ck[4]
895            local l   = ck[5]
896            if (l - f) == 1 and seq[f+1][char] then
897                local ok = true
898                local c = n
899                for i=l+1,#seq do
900                    c = getnext(c)
901                    if not c or not seq[i][ischar(c)] then
902                        ok = false
903                        break
904                    end
905                end
906                if ok then
907                    c = getprev(n)
908                    for i=1,f-1 do
909                        c = getprev(c)
910                        if not c or not seq[f-i][ischar(c)] then
911                            ok = false
912                        end
913                    end
914                end
915                if ok then
916                    return true
917                end
918            end
919        end
920        return false
921    end
922end
923
924local function order_matras(c)
925    local cn   = getnext(c)
926    local char = getchar(cn)
927    while dependent_vowel[char] do
928        local next  = getnext(cn)
929        local cc    = c
930        local cchar = getchar(cc)
931        while cc ~= cn do
932            if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then
933                local prev, next = getboth(cn)
934                if next then
935                    setprev(next,prev)
936                end
937                -- todo: setlink
938                setnext(prev,next)
939                setnext(getprev(cc),cn)
940                setprev(cn,getprev(cc))
941                setnext(cn,cc)
942                setprev(cc,cn)
943                break
944            end
945            cc    = getnext(cc)
946            cchar = getchar(cc)
947        end
948        cn   = next
949        char = getchar(cn)
950    end
951end
952
953local function reorder_one(head,start,stop,font,attr,nbspaces)
954
955    local reph, vattu, blwfcache, vatucache, pstfcache = initialize_one(font,attr) -- todo: a hash[font]
956
957    local devanagari = fontdata[font].resources.devanagari
958    local current    = start
959    local n          = getnext(start)
960    local base       = nil
961    local firstcons  = nil
962    local lastcons   = nil
963    local basefound  = false
964
965    if reph and ra[getchar(start)] and halant[getchar(n)] then
966        -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph
967        -- from candidates for base consonants
968        if n == stop then
969            return head, stop, nbspaces
970        end
971        if getchar(getnext(n)) == c_zwj then
972            current = start
973        else
974            current = getnext(n)
975            setstate(start,s_rphf)
976        end
977    end
978
979    if getchar(current) == c_nbsp then
980        -- Stand Alone cluster
981        if current == stop then
982            stop = getprev(stop)
983            head = remove_node(head,current)
984            flushnode(current)
985            return head, stop, nbspaces
986        else
987            nbspaces  = nbspaces + 1
988            base      = current
989            firstcons = current
990            lastcons  = current
991            current   = getnext(current)
992            if current ~= stop then
993                local char = getchar(current)
994                if nukta[char] then
995                    current = getnext(current)
996                    char = getchar(current)
997                end
998                if char == c_zwj and current ~= stop then
999                    local next = getnext(current)
1000                    if next ~= stop and halant[getchar(next)] then
1001                        current = next
1002                        next = getnext(current)
1003                        local tmp = next and getnext(next) or nil -- needs checking
1004                        local changestop = next == stop
1005                        local tempcurrent = copy_node(next)
1006                        copyinjection(tempcurrent,next)
1007                        local nextcurrent = copy_node(current)
1008                        copyinjection(nextcurrent,current) -- KE: necessary? HH: probably not as positioning comes later and we rawget/set
1009                        setlink(tempcurrent,nextcurrent)
1010                        setstate(tempcurrent,s_blwf)
1011                        tempcurrent = processcharacters(tempcurrent,font)
1012                        setstate(tempcurrent,unsetvalue)
1013                        if getchar(next) == getchar(tempcurrent) then
1014                            flushlist(tempcurrent)
1015                            if show_syntax_errors then
1016                                head, current = inject_syntax_error(head,current,char)
1017                            end
1018                        else
1019                            setchar(current,getchar(tempcurrent)) -- we assumes that the result of blwf consists of one node
1020                            local freenode = getnext(current)
1021                            setlink(current,tmp)
1022                            flushnode(freenode)
1023                            flushlist(tempcurrent)
1024                            if changestop then
1025                                stop = current
1026                            end
1027                        end
1028                    end
1029                end
1030            end
1031        end
1032    end
1033
1034    while not basefound do
1035        -- find base consonant
1036        local char = getchar(current)
1037        if consonant[char] then
1038            setstate(current,s_half)
1039            if not firstcons then
1040                firstcons = current
1041            end
1042            lastcons = current
1043            if not base then
1044                base = current
1045            elseif blwfcache[char] then
1046                -- consonant has below-base form
1047                setstate(current,s_blwf)
1048            elseif pstfcache[char] then
1049                -- consonant has post-base form
1050                setstate(current,s_pstf)
1051            else
1052                base = current
1053            end
1054        end
1055        basefound = current == stop
1056        current = getnext(current)
1057    end
1058
1059    if base ~= lastcons then
1060        -- if base consonant is not last one then move halant from base consonant to last one
1061        local np = base
1062        local n  = getnext(base)
1063        local ch = getchar(n)
1064        if nukta[ch] then
1065            np = n
1066            n  = getnext(n)
1067            ch = getchar(n)
1068        end
1069        if halant[ch] then
1070            if lastcons ~= stop then
1071                local ln = getnext(lastcons)
1072                if nukta[getchar(ln)] then
1073                    lastcons = ln
1074                end
1075            end
1076         -- local np = getprev(n)
1077            local nn = getnext(n)
1078            local ln = getnext(lastcons) -- what if lastcons is nn ?
1079            setlink(np,nn)
1080            setnext(lastcons,n)
1081            if ln then
1082                setprev(ln,n)
1083            end
1084            setnext(n,ln)
1085            setprev(n,lastcons)
1086            if lastcons == stop then
1087                stop = n
1088            end
1089        end
1090    end
1091
1092    n = getnext(start)
1093    if n ~= stop and ra[getchar(start)] and halant[getchar(n)] and not zw_char[getchar(getnext(n))] then
1094        -- if syllable starts with Ra + H then move this combination so that it follows either:
1095        -- the post-base 'matra' (if any) or the base consonant
1096        local matra = base
1097        if base ~= stop then
1098            local next = getnext(base)
1099            if dependent_vowel[getchar(next)] then
1100                matra = next
1101            end
1102        end
1103        -- [sp][start][n][nn] [matra|base][?]
1104        -- [matra|base][start]  [n][?] [sp][nn]
1105        local sp = getprev(start)
1106        local nn = getnext(n)
1107        local mn = getnext(matra)
1108        setlink(sp,nn)
1109        setlink(matra,start)
1110        setlink(n,mn)
1111        if head == start then
1112            head = nn
1113        end
1114        start = nn
1115        if matra == stop then
1116            stop = n
1117        end
1118    end
1119
1120    local current = start
1121    while current ~= stop do
1122        local next = getnext(current)
1123        if next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwnj then
1124            setstate(current,unsetvalue)
1125        end
1126        current = next
1127    end
1128
1129    if base ~= stop and getstate(base) then -- state can also be init
1130        local next = getnext(base)
1131        if halant[getchar(next)] and not (next ~= stop and getchar(getnext(next)) == c_zwj) then
1132            setstate(base,unsetvalue)
1133        end
1134    end
1135
1136    -- split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable.
1137    -- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification
1138
1139    local current, allreordered, moved = start, false, { [base] = true }
1140    local a, b, p, bn = base, base, base, getnext(base)
1141    if base ~= stop and nukta[getchar(bn)] then
1142        a, b, p = bn, bn, bn
1143    end
1144    while not allreordered do
1145        -- current is always consonant
1146        local c = current
1147        local n = getnext(current)
1148        local l = nil -- used ?
1149        if c ~= stop then
1150            local ch = getchar(n)
1151            if nukta[ch] then
1152                c  = n
1153                n  = getnext(n)
1154                ch = getchar(n)
1155            end
1156            if c ~= stop then
1157                if halant[ch] then
1158                    c  = n
1159                    n  = getnext(n)
1160                    ch = getchar(n)
1161                end
1162
1163                local tpm = twopart_mark[ch]
1164                while tpm do
1165                    local extra = copy_node(n)
1166                    copyinjection(extra,n)
1167                    ch = tpm[1]
1168                    setchar(n,ch)
1169                    setchar(extra,tpm[2])
1170                    head = insertnodeafter(head,current,extra)
1171                    tpm = twopart_mark[ch]
1172                end
1173                while c ~= stop and dependent_vowel[ch] do
1174                    c  = n
1175                    n  = getnext(n)
1176                    ch = getchar(n)
1177                end
1178                if c ~= stop then
1179                    if vowel_modifier[ch] then
1180                        c  = n
1181                        n  = getnext(n)
1182                        ch = getchar(n)
1183                    end
1184                    if c ~= stop and stress_tone_mark[ch] then
1185                        c = n
1186                        n = getnext(n)
1187                    end
1188                end
1189            end
1190        end
1191        local bp   = getprev(firstcons)
1192        local cn   = getnext(current)
1193        local last = getnext(c)
1194        while cn ~= last do
1195            -- move pre-base matras...
1196            if pre_mark[getchar(cn)] then
1197                if devanagari.left_matra_before_base then
1198                    local prev, next = getboth(cn)
1199                    setlink(prev,next)
1200                    if cn == stop then
1201                        stop = getprev(cn)
1202                    end
1203                    if base == start then
1204                       if head == start then
1205                           head = cn
1206                       end
1207                       start = cn
1208                    end
1209                    setlink(getprev(base),cn)
1210                    setlink(cn,base)
1211                 -- setlink(getprev(base),cn,base) -- maybe
1212                    cn = next
1213                else
1214                    if bp then
1215                        setnext(bp,cn)
1216                    end
1217                    local prev, next = getboth(cn)
1218                    if next then
1219                        setprev(next,prev)
1220                    end
1221                    setnext(prev,next)
1222                    if cn == stop then
1223                        stop = prev
1224                    end
1225                    setprev(cn,bp)
1226                    setlink(cn,firstcons)
1227                    if firstcons == start then
1228                        if head == start then
1229                            head = cn
1230                        end
1231                        start = cn
1232                    end
1233                    cn = next
1234                end
1235            elseif current ~= base and dependent_vowel[getchar(cn)] then
1236                local prev, next = getboth(cn)
1237                if next then
1238                    setprev(next,prev)
1239                end
1240                setnext(prev,next)
1241                if cn == stop then
1242                    stop = prev
1243                end
1244                setlink(b,cn,getnext(b))
1245                order_matras(cn)
1246                cn = next
1247            elseif current == base and dependent_vowel[getchar(cn)] then
1248                local cnn = getnext(cn)
1249                order_matras(cn)
1250                cn = cnn
1251                while cn ~= last and dependent_vowel[getchar(cn)] do
1252                    cn = getnext(cn)
1253                end
1254            else
1255                cn = getnext(cn)
1256            end
1257        end
1258        allreordered = c == stop
1259        current = getnext(c)
1260    end
1261
1262    if reph or vattu then
1263        local current, cns = start, nil
1264        while current ~= stop do
1265            local c = current
1266            local n = getnext(current)
1267            if ra[getchar(current)] and halant[getchar(n)] then
1268                c = n
1269                n = getnext(n)
1270                local b, bn = base, base
1271                while bn ~= stop  do
1272                    local next = getnext(bn)
1273                    if dependent_vowel[getchar(next)] then
1274                        b = next
1275                    end
1276                    bn = next
1277                end
1278                if getstate(current,s_rphf) then
1279                    -- position Reph (Ra + H) after post-base 'matra' (if any) since these
1280                    -- become marks on the 'matra', not on the base glyph
1281                    if b ~= current then
1282                        if current == start then
1283                            if head == start then
1284                                head = n
1285                            end
1286                            start = n
1287                        end
1288                        if b == stop then
1289                            stop = c
1290                        end
1291                        local prev = getprev(current)
1292                        setlink(prev,n)
1293                        local next = getnext(b)
1294                        setlink(c,next)
1295                        setlink(b,current)
1296                    end
1297                elseif cns and getnext(cns) ~= current then -- todo: optimize next
1298                    -- position below-base Ra (vattu) following the consonants on which it is placed (either the base consonant or one of the pre-base consonants)
1299                    local cp   = getprev(current)
1300                    local cnsn = getnext(cns)
1301                    setlink(cp,n)
1302                    setlink(cns,current) -- cns ?
1303                    setlink(c,cnsn)
1304                    if c == stop then
1305                        stop = cp
1306                        break
1307                    end
1308                    current = getprev(n)
1309                end
1310            else
1311                local char = getchar(current)
1312                if consonant[char] then
1313                    cns = current
1314                    local next = getnext(cns)
1315                    if halant[getchar(next)] then
1316                        cns = next
1317                    end
1318                    if not vatucache[char] then
1319                        next = getnext(cns)
1320                        while dependent_vowel[getchar(next)] do
1321                            cns  = next
1322                            next = getnext(cns)
1323                        end
1324                    end
1325                elseif char == c_nbsp then
1326                    nbspaces   = nbspaces + 1
1327                    cns        = current
1328                    local next = getnext(cns)
1329                    if halant[getchar(next)] then
1330                        cns = next
1331                    end
1332                    if not vatucache[char] then
1333                        next = getnext(cns)
1334                        while dependent_vowel[getchar(next)] do
1335                            cns  = next
1336                            next = getnext(cns)
1337                        end
1338                    end
1339                end
1340            end
1341            current = getnext(current)
1342        end
1343    end
1344
1345    if getchar(base) == c_nbsp then
1346        nbspaces = nbspaces - 1
1347        if base == stop then
1348            stop = getprev(stop)
1349        end
1350        head = remove_node(head,base)
1351        flushnode(base)
1352    end
1353
1354    return head, stop, nbspaces
1355end
1356
1357-- If a pre-base matra character had been reordered before applying basic features,
1358-- the glyph can be moved closer to the main consonant based on whether half-forms had been formed.
1359-- Actual position for the matra is defined as “after last standalone halant glyph,
1360-- after initial matra position and before the main consonant”.
1361-- If ZWJ or ZWNJ follow this halant, position is moved after it.
1362
1363-- so we break out ... this is only done for the first 'word' (if we feed words we can as
1364-- well test for non glyph.
1365
1366function handlers.devanagari_reorder_matras(head,start) -- no leak
1367    local current = start -- we could cache attributes here
1368    local startfont = getfont(start)
1369    local startattr = getprop(start,a_syllabe)
1370    while current do
1371        local char = ischar(current,startfont)
1372        local next = getnext(current)
1373        if char and getprop(current,a_syllabe) == startattr then
1374            if halant[char] then -- state can also be init
1375                if next then
1376                    local char = ischar(next,startfont)
1377                    if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1378                        current = next
1379                        next    = getnext(current)
1380                    end
1381                end
1382                -- can be optimzied
1383                local startnext = getnext(start)
1384                head = remove_node(head,start)
1385                setlink(start,next)
1386                setlink(current,start)
1387             -- setlink(current,start,next) -- maybe
1388                start = startnext
1389                break
1390         -- elseif consonant[char] and (not getstate(current) or getstate(current,s_init) then
1391         --     startnext = getnext(start)
1392         --     head = remove_node(head,start)
1393         --     if current == head then
1394         --         setlink(start,current)
1395         --         head = start
1396         --     else
1397         --         setlink(getprev(current),start)
1398         --         setlink(start,current)
1399         --     end
1400         --     start = startnext
1401         --     break
1402            end
1403        else
1404            break
1405        end
1406        current = next
1407    end
1408    return head, start, true
1409end
1410
1411-- Reph’s original position is always at the beginning of the syllable, (i.e. it is
1412-- not reordered at the character reordering stage). However, it will be reordered
1413-- according to the basic-forms shaping results. Possible positions for reph,
1414-- depending on the script, are; after main, before post-base consonant forms, and
1415-- after post-base consonant forms.
1416
1417-- In Devanagari reph has reordering position 'before postscript' and dev2 only
1418-- follows step 2, 4, and 6.
1419
1420local rephbase = { }
1421
1422function handlers.devanagari_reorder_reph(head,start)
1423    local current   = getnext(start)
1424    local startnext = nil
1425    local startprev = nil
1426    local startfont = getfont(start)
1427    local startattr = getprop(start,a_syllabe)
1428    --
1429    ::step_1::
1430    --
1431    -- If reph should be positioned after post-base consonant forms, proceed to step 5.
1432    --
1433    local char = ischar(start,startfont)
1434    local rephbase = rephbase[startfont][char]
1435    if char and after_subscript[rephbase] then
1436        goto step_5
1437    end
1438    --
1439    ::step_2::
1440    --
1441    -- If the reph repositioning class is not after post-base: target position is after
1442    -- the first explicit halant glyph between the first post-reph consonant and last
1443    -- main consonant. If ZWJ or ZWNJ are following this halant, position is moved after
1444    -- it. If such position is found, this is the target position. Otherwise, proceed to
1445    -- the next step. Note: in old-implementation fonts, where classifications were
1446    -- fixed in shaping engine, there was no case where reph position will be found on
1447    -- this step.
1448    --
1449    if char and not after_postscript[rephbase] then
1450        while current do
1451            local char = ischar(current,startfont)
1452            if char and getprop(current,a_syllabe) == startattr then
1453                if halant[char] then
1454                    local next = getnext(current)
1455                    if next then
1456                        local nextchar = ischar(next,startfont)
1457                        if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then
1458                            current = next
1459                            next    = getnext(current)
1460                        end
1461                    end
1462                    startnext = getnext(start)
1463                    head = remove_node(head,start)
1464                    setlink(start,next)
1465                    setlink(current,start)
1466                 -- setlink(current,start,next) -- maybe
1467                    start = startnext
1468                    startattr = getprop(start,a_syllabe)
1469                    break
1470                end
1471                current = getnext(current)
1472            else
1473                break
1474            end
1475        end
1476    end
1477    --
1478    ::step_3::
1479    --
1480    -- If reph should be repositioned after the main consonant: find the first consonant
1481    -- not ligated with main, or find the first consonant that is not a potential
1482    -- pre-base reordering Ra.
1483    --
1484    if not startnext then
1485        if char and after_main[rephbase] then
1486            current = getnext(start)
1487            while current do
1488                local char = ischar(current,startfont)
1489                if char and getprop(current,a_syllabe) == startattr then
1490                    if consonant[char] and not getstate(current,s_pref) then
1491                        startnext = getnext(start)
1492                        head = remove_node(head,start)
1493                        setlink(current,start)
1494                        setlink(start,getnext(current))
1495                     -- setlink(current,start,getnext(current)) -- maybe
1496                        start = startnext
1497                        startattr = getprop(start,a_syllabe)
1498                        break
1499                    end
1500                    current = getnext(current)
1501                else
1502                    break
1503                end
1504            end
1505        end
1506    end
1507    --
1508    ::step_4::
1509    --
1510    -- If reph should be positioned before post-base consonant, find first post-base
1511    -- classified consonant not ligated with main. If no consonant is found, the target
1512    -- position should be before the first matra, syllable modifier sign or vedic sign.
1513    --
1514    if not startnext then
1515        if char and before_postscript[rephbase] then
1516            current = getnext(start)
1517            local c = nil
1518            while current do
1519                local char = ischar(current,startfont)
1520                if char and getprop(current,a_syllabe) == startattr then
1521                    if getstate(current,s_pstf) then -- post-base
1522                        startnext = getnext(start)
1523                        head = remove_node(head,start)
1524                        setlink(getprev(current),start)
1525                        setlink(start,current)
1526                     -- setlink(getprev(current),start,current) -- maybe
1527                        start = startnext
1528                        startattr = getprop(start,a_syllabe)
1529                        break
1530                    elseif not c and (vowel_modifier[char] or stress_tone_mark[char]) then
1531                        c = current
1532                    end
1533                    current = getnext(current)
1534                else
1535                    if c then
1536                        startnext = getnext(start)
1537                        head = remove_node(head,start)
1538                        setlink(getprev(c),start)
1539                        setlink(start,c)
1540                     -- setlink(getprev(c),start,c) -- maybe
1541                        start = startnext
1542                        startattr = getprop(start,a_syllabe)
1543                    end
1544                    break
1545                end
1546            end
1547        end
1548    end
1549    --
1550    ::step_5::
1551    --
1552    -- If no consonant is found in steps 3 or 4, move reph to a position immediately
1553    -- before the first post-base matra, syllable modifier sign or vedic sign that has a
1554    -- reordering class after the intended reph position. For example, if the reordering
1555    -- position for reph is post-main, it will skip above-base matras that also have a
1556    -- post-main position.
1557    --
1558    if not startnext then
1559        current = getnext(start)
1560        local c = nil
1561        while current do
1562            local char = ischar(current,startfont)
1563            if char and getprop(current,a_syllabe) == startattr then
1564                local state = getstate(current)
1565                if before_subscript[rephbase] and (state == s_blwf or state == s_pstf) then
1566                    c = current
1567                elseif after_subscript[rephbase] and (state == s_pstf) then
1568                    c = current
1569                end
1570                current = getnext(current)
1571            else
1572                break
1573            end
1574        end
1575        -- here we can loose the old start node: maybe best split cases
1576        if c then
1577            startnext = getnext(start)
1578            head = remove_node(head,start)
1579            setlink(getprev(c),start)
1580            setlink(start,c)
1581         -- setlink(getprev(c),start,c) -- maybe
1582            -- end
1583            start = startnext
1584            startattr = getprop(start,a_syllabe)
1585        end
1586    end
1587    --
1588    ::step_6::
1589    --
1590    -- Otherwise, reorder reph to the end of the syllable.
1591    --
1592    if not startnext then
1593        current = start
1594        local next = getnext(current)
1595        while next do
1596            local nextchar = ischar(next,startfont)
1597            if nextchar and getprop(next,a_syllabe) == startattr then
1598                current = next
1599                next = getnext(current)
1600            else
1601                break
1602            end
1603        end
1604        if start ~= current then
1605            startnext = getnext(start)
1606            head = remove_node(head,start)
1607            setlink(start,getnext(current))
1608            setlink(current,start)
1609         -- setlink(current,start,getnext(current)) -- maybe
1610            start = startnext
1611        end
1612    end
1613    --
1614    return head, start, true
1615end
1616
1617-- If a pre-base reordering consonant is found, reorder it according to the following rules:
1618--
1619-- 1  Only reorder a glyph produced by substitution during application of the feature. (Note
1620--    that a font may shape a Ra consonant with the feature generally but block it in certain
1621--    contexts.)
1622-- 2  Try to find a target position the same way as for pre-base matra. If it is found, reorder
1623--    pre-base consonant glyph.
1624-- 3  If position is not found, reorder immediately before main consonant.
1625
1626-- Here we implement a few handlers:
1627--
1628--   function(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
1629--       return head, start, done
1630--   end
1631
1632function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start)
1633    if getprop(start,a_reordered) then
1634        return head, start, true
1635    end
1636    local current = start -- we could cache attributes here
1637    local startfont = getfont(start)
1638    local startattr = getprop(start,a_syllabe)
1639    while current do
1640        local char = ischar(current,startfont)
1641        local next = getnext(current)
1642        if char and getprop(current,a_syllabe) == startattr then
1643            if halant[char] then -- state can also be init
1644                if next then
1645                    local char = ischar(next,startfont)
1646                    if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1647                        current = next
1648                        next    = getnext(current)
1649                    end
1650                end
1651                -- can be optimzied
1652                local startnext = getnext(start)
1653                head = remove_node(head,start)
1654                setlink(start,next)
1655                setlink(current,start)
1656             -- setlink(current,start,next) -- maybe
1657                setprop(start,"reordered",true)
1658                start = startnext
1659                return head, start, true
1660         -- elseif consonant[char] and (not getstate(current) or getstate(current,s_init)) then
1661         --     startnext = getnext(start)
1662         --     head = remove_node(head,start)
1663         --     if current == head then
1664         --         setlink(start,current)
1665         --         head = start
1666         --     else
1667         --         setlink(getprev(current),start)
1668         --         setlink(start,current)
1669         --     end
1670         --     start = startnext
1671         --     break
1672            end
1673        else
1674            break
1675        end
1676        current = next
1677    end
1678
1679    local startattr = getprop(start,a_syllabe)
1680    local current = getprev(start)
1681    while current and getprop(current,a_syllabe) == startattr do
1682        local char = ischar(current)
1683        if (not dependent_vowel[char] and (not getstate(current) or getstate(current,s_init))) then
1684            startnext = getnext(start)
1685            head = remove_node(head,start)
1686            if current == head then
1687                setlink(start,current)
1688                head = start
1689            else
1690                setlink(getprev(current),start)
1691                setlink(start,current)
1692            end
1693            setprop(start,"reordered",true)
1694            start = startnext
1695            break
1696        end
1697        current = getprev(current)
1698    end
1699
1700    return head, start, true
1701end
1702
1703function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement)
1704    local stop = getnext(start)
1705    local font = getfont(start)
1706    local last = start
1707    while stop do
1708        local char = ischar(stop,font)
1709        if char and (char == c_zwnj or char == c_zwj) then
1710            last = stop
1711            stop = getnext(stop)
1712        else
1713            break
1714        end
1715    end
1716    local prev = getprev(start)
1717    if stop then
1718        setnext(last)
1719        setlink(prev,stop)
1720    elseif prev then
1721        setnext(prev)
1722    end
1723    if head == start then
1724        head = stop
1725    end
1726    flushlist(start)
1727    return head, stop, true
1728end
1729
1730local function initialize_two(font,attr)
1731
1732    local devanagari = fontdata[font].resources.devanagari
1733
1734    if devanagari then
1735        return devanagari.seqsubset or { }, devanagari.reorderreph or { }
1736    else
1737        return { }, { }
1738    end
1739
1740end
1741
1742-- this one will be merged into the caller: it saves a call, but we will then make function
1743-- of the actions
1744
1745local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pass over (determine stop in sweep)
1746    local seqsubset, reorderreph = initialize_two(font,attr)
1747
1748    local halfpos  = nil
1749    local basepos  = nil
1750    local subpos   = nil
1751    local postpos  = nil
1752
1753    reorderreph.coverage = { }
1754    rephbase[font]       = { }
1755
1756    for i=1,#seqsubset do
1757
1758        -- this can be done more efficient, the last test and less getnext
1759
1760        local subset      = seqsubset[i]
1761        local kind        = subset[1]
1762        local lookupcache = subset[2]
1763        if kind == "rphf" then
1764            reorderreph.coverage[subset[3]] = true -- neat
1765            rephbase[font][subset[3]] = subset[4]
1766            local current = start
1767            local last = getnext(stop)
1768            while current ~= last do
1769                if current ~= stop then
1770                    local c = getchar(current)
1771                    local found = lookupcache[c]
1772                    if found then
1773                        local next = getnext(current)
1774                     -- if found[getchar(next)] or contextchain(found, next) then    --above-base: rphf    Consonant + Halant
1775                        if contextchain(found, next) then    --above-base: rphf    Consonant + Halant
1776                            local afternext = next ~= stop and getnext(next)
1777                            if afternext and zw_char[getchar(afternext)] then -- ZWJ and ZWNJ prevent creation of reph
1778                                current = afternext -- getnext(next)
1779                            elseif current == start then
1780                                setstate(current,s_rphf)
1781                                current = next
1782                            else
1783                                current = next
1784                            end
1785                        end
1786                    end
1787                end
1788                current = getnext(current)
1789            end
1790        elseif kind == "pref" then
1791            local current = start
1792            local last = getnext(stop)
1793            while current ~= last do
1794                if current ~= stop then
1795                    local c = getchar(current)
1796                    local found = lookupcache[c]
1797                    if found then -- pre-base: pref	Halant + Consonant
1798                        local next = getnext(current)
1799                     -- if found[getchar(next)] or contextchain(found, next) then
1800                        if contextchain(found, next) then
1801                            if (not getstate(current) and not getstate(next)) then	--KE: state can also be init...
1802                                setstate(current,s_pref)
1803                                setstate(next,s_pref)
1804                                current = next
1805                            end
1806                        end
1807                    end
1808                end
1809                current = getnext(current)
1810            end
1811        elseif kind == "half" then -- half forms: half / Consonant + Halant
1812            local current = start
1813            local last = getnext(stop)
1814            while current ~= last do
1815                if current ~= stop then
1816                    local c = getchar(current)
1817                    local found = lookupcache[c]
1818                    if found then
1819                        local next = getnext(current)
1820                     -- if found[getchar(next)] or contextchain(found, next) then
1821                        if contextchain(found, next) then
1822                            if next ~= stop and getchar(getnext(next)) == c_zwnj then    -- zwnj prevent creation of half
1823                                current = next
1824                            elseif (not getstate(current)) then	--KE: state can also be init...
1825                                setstate(current,s_half)
1826                                if not halfpos then
1827                                    halfpos = current
1828                                end
1829                            end
1830                            current = getnext(current)
1831                        end
1832                    end
1833                end
1834                current = getnext(current)
1835            end
1836        elseif kind == "blwf" or kind == "vatu" then -- below-base: blwf / Halant + Consonant
1837            local current = start
1838            local last = getnext(stop)
1839            while current ~= last do
1840                if current ~= stop then
1841                    local c = getchar(current)
1842                    local found = lookupcache[c]
1843                    if found then
1844                        local next = getnext(current)
1845                     -- if found[getchar(next)] or contextchain(found, next) then
1846                        if contextchain(found, next) then
1847                            if (not getstate(current) and not getstate(next)) then --KE: state can also be init...
1848                                setstate(current,s_blwf)
1849                                setstate(next,s_blwf)
1850                                current = next
1851                                subpos  = current
1852                            end
1853                        end
1854                    end
1855                end
1856                current = getnext(current)
1857            end
1858        elseif kind == "pstf" then -- post-base: pstf / Halant + Consonant
1859            local current = start
1860            local last = getnext(stop)
1861            while current ~= last do
1862                if current ~= stop then
1863                    local c = getchar(current)
1864                    local found = lookupcache[c]
1865                    if found then
1866                        local next = getnext(current)
1867                     -- if found[getchar(next)] or contextchain(found, next) then
1868                        if contextchain(found, next) then
1869                            if (not getstate(current) and not getstate(next)) then -- KE: state can also be init...
1870                                setstate(current,s_pstf)
1871                                setstate(next,s_pstf)
1872                                current = next
1873                                postpos = current
1874                            end
1875                        end
1876                    end
1877                end
1878                current = getnext(current)
1879            end
1880        end
1881    end
1882
1883    local current, base, firstcons = start, nil, nil
1884
1885    if getstate(start,s_rphf) then
1886        -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants
1887        current = getnext(getnext(start))
1888    end
1889
1890    if current ~= getnext(stop) and getchar(current) == c_nbsp then
1891        -- Stand Alone cluster
1892        if current == stop then
1893            stop = getprev(stop)
1894            head = remove_node(head,current)
1895            flushnode(current)
1896            return head, stop, nbspaces
1897        else
1898            nbspaces = nbspaces + 1
1899            base     = current
1900            current  = getnext(current)
1901            if current ~= stop then
1902                local char = getchar(current)
1903                if nukta[char] then
1904                    current = getnext(current)
1905                    char = getchar(current)
1906                end
1907                if char == c_zwj then
1908                    local next = getnext(current)
1909                    if current ~= stop and next ~= stop and halant[getchar(next)] then
1910                        current = next
1911                        next = getnext(current)
1912                        local tmp = getnext(next)
1913                        local changestop = next == stop
1914                        setnext(next)
1915                        setstate(current,s_pref)
1916                        current = processcharacters(current,font)
1917                        setstate(current,s_blwf)
1918                        current = processcharacters(current,font)
1919                        setstate(current,s_pstf)
1920                        current = processcharacters(current,font)
1921                        setstate(current,unsetvalue)
1922                        if halant[getchar(current)] then
1923                            setnext(getnext(current),tmp)
1924                            if show_syntax_errors then
1925                                head, current = inject_syntax_error(head,current,char)
1926                            end
1927                        else
1928                            setnext(current,tmp) -- assumes that result of pref, blwf, or pstf consists of one node
1929                            if changestop then
1930                                stop = current
1931                            end
1932                        end
1933                    end
1934                end
1935            end
1936        end
1937    else -- not Stand Alone cluster
1938        local last = getnext(stop)
1939        while current ~= last do    -- find base consonant
1940            local next = getnext(current)
1941            if consonant[getchar(current)] then
1942                if not (current ~= stop and next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwj) then
1943                    if not firstcons then
1944                        firstcons = current
1945                    end
1946                    -- check whether consonant has below-base or post-base form or is pre-base reordering Ra
1947                    local a = getstate(current)
1948                    if not (a == s_blwf or a == s_pstf or (a ~= s_rphf and a ~= s_blwf and ra[getchar(current)])) then
1949                        base = current
1950                    end
1951                end
1952            end
1953            current = next
1954        end
1955        if not base then
1956            base = firstcons
1957        end
1958    end
1959
1960    if not base then
1961        if getstate(start,s_rphf) then
1962            setstate(start,unsetvalue)
1963        end
1964        return head, stop, nbspaces
1965    else
1966        if getstate(base) then -- state can also be init
1967            setstate(base,unsetvalue)  -- THIS RESETS THE HALF STATE
1968        end
1969        basepos = base
1970    end
1971    if not halfpos then
1972        halfpos = base
1973    end
1974    if not subpos then
1975        subpos = base
1976    end
1977    if not postpos then
1978        postpos = subpos or base
1979    end
1980
1981    -- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for
1982
1983    local moved   = { }
1984    local current = start
1985    local last    = getnext(stop)
1986    while current ~= last do
1987        local char   = getchar(current)
1988        local target = nil
1989        local cn     = getnext(current)
1990        -- not so efficient (needed for malayalam)
1991        local tpm = twopart_mark[char]
1992        while tpm do
1993            local extra = copy_node(current)
1994            copyinjection(extra,current)
1995            char = tpm[1]
1996            setchar(current,char)
1997            setchar(extra,tpm[2])
1998            head = insertnodeafter(head,current,extra)
1999            tpm = twopart_mark[char]
2000        end
2001        --
2002        if not moved[current] and dependent_vowel[char] then
2003            if pre_mark[char] then -- or: if before_main or before_half
2004                moved[current] = true
2005                -- can be helper to remove one node
2006                local prev, next = getboth(current)
2007                setlink(prev,next)
2008                if current == stop then
2009                    stop = getprev(current)
2010                end
2011
2012                local pos
2013                if before_main[char] then
2014                    pos     = basepos
2015                 -- basepos = current -- is this correct?
2016                else
2017                    -- must be before_half
2018                    pos      = halfpos
2019                 -- halfpos = current -- is this correct?
2020                end
2021
2022                local ppos = getprev(pos) -- necessary?
2023                while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) do
2024                    if getstate(ppos,s_pref) then
2025                        pos = ppos
2026                    end
2027                    ppos = getprev(ppos)
2028                end
2029
2030                local ppos = getprev(pos) -- necessary?
2031                while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and halant[ischar(ppos)] do
2032                    ppos = getprev(ppos)
2033                    if ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and consonant[ischar(ppos)] then
2034                        pos  = ppos
2035                        ppos = getprev(ppos)
2036                    else
2037                        break
2038                    end
2039                end
2040
2041                if pos == start then
2042                    if head == start then
2043                        head = current
2044                    end
2045                    start = current
2046                end
2047                setlink(getprev(pos),current)
2048                setlink(current,pos)
2049             -- setlink(getprev(pos),current,pos) -- maybe
2050            elseif above_mark[char] then
2051                -- after main consonant
2052                target = basepos
2053                if subpos == basepos then
2054                    subpos = current
2055                end
2056                if postpos == basepos then
2057                    postpos = current
2058                end
2059                basepos = current
2060            elseif below_mark[char] then
2061                -- after subjoined consonants
2062                target = subpos
2063                if postpos == subpos then
2064                    postpos = current
2065                end
2066                subpos = current
2067            elseif post_mark[char] then
2068                -- after post-form consonant
2069                local n = getnext(postpos) -- nukta and vedic sign come first - is that right? and also halant+ra
2070                while n do
2071                    local v = ischar(n,font)
2072                    if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then
2073                        postpos = n
2074                    else
2075                        break
2076                    end
2077                    n = getnext(n)
2078                end
2079                target = postpos
2080                postpos = current
2081            end
2082            if mark_above_below_post[char] then
2083                local prev = getprev(current)
2084                if prev ~= target then
2085                    local next = getnext(current)
2086                    setlink(prev,next)
2087                    if current == stop then
2088                        stop = prev
2089                    end
2090                    setlink(current,getnext(target))
2091                    setlink(target,current)
2092                 -- setlink(target,current,getnext(target)) -- maybe
2093                end
2094            end
2095        end
2096        current = cn
2097    end
2098
2099    -- reorder halant+Ra
2100
2101    local current = getnext(start)
2102    local last    = getnext(stop)
2103    while current ~= last do
2104        local char = getchar(current)
2105        local cn   = getnext(current)
2106        if halant[char] and ra[ischar(cn)] and (not getstate(cn,s_rphf)) and (not getstate(cn,s_blwf)) then
2107            if after_main[ischar(cn)] then
2108                local prev = getprev(current)
2109                local next = getnext(cn)
2110                local bpn  = getnext(basepos)
2111                while bpn and dependent_vowel[ischar(bpn)] do
2112                    basepos = bpn
2113                    bpn     = getnext(bpn)
2114                end
2115                if basepos ~= prev then
2116                    setlink(prev,next)
2117                    setlink(cn, getnext(basepos))
2118                    setlink(basepos, current)
2119                    if cn == stop then
2120                        stop = prev
2121                    end
2122                    cn = next
2123                end
2124            end
2125            -- after_postscript
2126            -- after_subscript
2127            -- before_postscript
2128            -- before_subscript
2129        end
2130        current = cn
2131    end
2132
2133    -- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first.
2134
2135    local current = start
2136    local c       = nil
2137    while current ~= stop do
2138        local char = getchar(current)
2139        if halant[char] or stress_tone_mark[char] then
2140            if not c then
2141                c = current
2142            end
2143        else
2144            c = nil
2145        end
2146        local next = getnext(current)
2147        if c and nukta[getchar(next)] then
2148            if head == c then
2149                head = next
2150            end
2151            if stop == next then
2152                stop = current
2153            end
2154            setlink(getprev(c),next)
2155            local nextnext = getnext(next)
2156            setnext(current,nextnext)
2157            local nextnextnext = getnext(nextnext)
2158            if nextnextnext then
2159                setprev(nextnextnext,current)
2160            end
2161            setlink(nextnext,c)
2162        end
2163        if stop == current then break end
2164        current = getnext(current)
2165    end
2166
2167    if getchar(base) == c_nbsp then
2168        if base == stop then
2169            stop = getprev(stop)
2170        end
2171        nbspaces = nbspaces - 1
2172        head = remove_node(head, base)
2173        flushnode(base)
2174    end
2175
2176    return head, stop, nbspaces
2177end
2178
2179-- cleaned up and optimized ... needs checking (local, check order, fixes, extra hash, etc)
2180
2181local separator = { }
2182
2183imerge(separator,consonant)
2184imerge(separator,independent_vowel)
2185imerge(separator,dependent_vowel)
2186imerge(separator,vowel_modifier)
2187imerge(separator,stress_tone_mark)
2188
2189for k, v in next, nukta  do separator[k] = true end
2190for k, v in next, halant do separator[k] = true end
2191
2192local function analyze_next_chars_one(c,font,variant) -- skip one dependent vowel
2193    -- why two variants ... the comment suggests that it's the same ruleset
2194    local n = getnext(c)
2195    if not n then
2196        return c
2197    end
2198    if variant == 1 then
2199        local v = ischar(n,font)
2200        if v and nukta[v] then
2201            n = getnext(n)
2202            if n then
2203                v = ischar(n,font)
2204            end
2205        end
2206        if n and v then
2207            local nn = getnext(n)
2208            if nn then
2209                local vv = ischar(nn,font)
2210                if vv then
2211                    local nnn = getnext(nn)
2212                    if nnn then
2213                        local vvv = ischar(nnn,font)
2214                        if vvv then
2215                            if vv == c_zwj and consonant[vvv] then
2216                                c = nnn
2217                            elseif (vv == c_zwnj or vv == c_zwj) and halant[vvv] then
2218                                local nnnn = getnext(nnn)
2219                                if nnnn then
2220                                    local vvvv = ischar(nnnn,font)
2221                                    if vvvv and consonant[vvvv] then
2222                                        c = nnnn
2223                                    end
2224                                end
2225                            end
2226                        end
2227                    end
2228                end
2229            end
2230        end
2231    elseif variant == 2 then
2232        local v = ischar(n,font)
2233        if v and nukta[v] then
2234            c = n
2235        end
2236        n = getnext(c)
2237        if n then
2238            v = ischar(n,font)
2239            if v then
2240                local nn = getnext(n)
2241                if nn then
2242                    local vv = ischar(nn,font)
2243                    if vv and zw_char[v] then
2244                        n = nn
2245                        v = vv
2246                        nn = getnext(nn)
2247                        vv = nn and ischar(nn,font)
2248                    end
2249                    if vv and halant[v] and consonant[vv] then
2250                        c = nn
2251                    end
2252                end
2253            end
2254        end
2255    end
2256    -- c = ms_matra(c)
2257    local n = getnext(c)
2258    if not n then
2259        return c
2260    end
2261    local v = ischar(n,font)
2262    if not v then
2263        return c
2264    end
2265    local already_pre_mark   -- = false
2266    local already_above_mark -- = false
2267    local already_below_mark -- = false
2268    local already_post_mark  -- = false
2269    while dependent_vowel[v] do
2270        local vowels = twopart_mark[v] or { v }
2271        for k, v in next, vowels do
2272            if pre_mark[v] and not already_pre_mark then
2273                already_pre_mark = true
2274            elseif above_mark[v] and not already_above_mark then
2275                already_above_mark = true
2276            elseif below_mark[v] and not already_below_mark then
2277                already_below_mark = true
2278            elseif post_mark[v] and not already_post_mark then
2279                already_post_mark = true
2280            else
2281                return c
2282            end
2283        end
2284        c = getnext(c)
2285        n = getnext(c)
2286        if not n then
2287            return c
2288        end
2289        v = ischar(n,font)
2290        if not v then
2291            return c
2292        end
2293    end
2294    if nukta[v] then
2295        c = getnext(c)
2296        n = getnext(c)
2297        if not n then
2298            return c
2299        end
2300        v = ischar(n,font)
2301        if not v then
2302            return c
2303        end
2304    end
2305    if halant[v] then
2306        c = getnext(c)
2307        n = getnext(c)
2308        if not n then
2309            return c
2310        end
2311        v = ischar(n,font)
2312        if not v then
2313            return c
2314        end
2315    end
2316    if vowel_modifier[v] then
2317        c = getnext(c)
2318        n = getnext(c)
2319        if not n then
2320            return c
2321        end
2322        v = ischar(n,font)
2323        if not v then
2324            return c
2325        end
2326    end
2327    if stress_tone_mark[v] then
2328        c = getnext(c)
2329        n = getnext(c)
2330        if not n then
2331            return c
2332        end
2333        v = ischar(n,font)
2334        if not v then
2335            return c
2336        end
2337    end
2338    if stress_tone_mark[v] then
2339        return n
2340    else
2341        return c
2342    end
2343end
2344
2345local function analyze_next_chars_two(c,font)
2346    local n = getnext(c)
2347    if not n then
2348        return c
2349    end
2350    local v = ischar(n,font)
2351    if v and nukta[v] then
2352        c = n
2353    end
2354    n = c
2355    while true do
2356        local nn = getnext(n)
2357        if nn then
2358            local vv = ischar(nn,font)
2359            if vv then
2360                if halant[vv] then
2361                    n = nn
2362                    local nnn = getnext(nn)
2363                    if nnn then
2364                        local vvv = ischar(nnn,font)
2365                        if vvv and zw_char[vvv] then
2366                            n = nnn
2367                        end
2368                    end
2369                elseif vv == c_zwnj or vv == c_zwj then
2370                 -- n = nn -- not here (?)
2371                    local nnn = getnext(nn)
2372                    if nnn then
2373                        local vvv = ischar(nnn,font)
2374                        if vvv and halant[vvv] then
2375                            n = nnn
2376                        end
2377                    end
2378                else
2379                    break
2380                end
2381                local nn = getnext(n)
2382                if nn then
2383                    local vv = ischar(nn,font)
2384                    if vv and consonant[vv] then
2385                        n = nn
2386                        local nnn = getnext(nn)
2387                        if nnn then
2388                            local vvv = ischar(nnn,font)
2389                            if vvv and nukta[vvv] then
2390                                n = nnn
2391                            end
2392                        end
2393                        c = n
2394                    else
2395                        break
2396                    end
2397                else
2398                    break
2399                end
2400            else
2401                break
2402            end
2403        else
2404            break
2405        end
2406    end
2407    --
2408    if not c then
2409        -- This shouldn't happen I guess.
2410        return
2411    end
2412    local n = getnext(c)
2413    if not n then
2414        return c
2415    end
2416    local v = ischar(n,font)
2417    if not v then
2418        return c
2419    end
2420    if anudatta[v] then
2421        c = n
2422        n = getnext(c)
2423        if not n then
2424            return c
2425        end
2426        v = ischar(n,font)
2427        if not v then
2428            return c
2429        end
2430    end
2431    if halant[v] then
2432        c = n
2433        n = getnext(c)
2434        if not n then
2435            return c
2436        end
2437        v = ischar(n,font)
2438        if not v then
2439            return c
2440        end
2441        if v == c_zwnj or v == c_zwj then
2442            c = n
2443            n = getnext(c)
2444            if not n then
2445                return c
2446            end
2447            v = ischar(n,font)
2448            if not v then
2449                return c
2450            end
2451        end
2452    else
2453        -- c = ms_matra(c)
2454        -- same as one
2455        local already_pre_mark   -- = false
2456        local already_above_mark -- = false
2457        local already_below_mark -- = false
2458        local already_post_mark  -- = false
2459        while dependent_vowel[v] do
2460            local vowels = twopart_mark[v] or { v }
2461            for k, v in next, vowels do
2462                if pre_mark[v] and not already_pre_mark then
2463                    already_pre_mark = true
2464                elseif above_mark[v] and not already_above_mark then
2465                    already_above_mark = true
2466                elseif below_mark[v] and not already_below_mark then
2467                    already_below_mark = true
2468                elseif post_mark[v] and not already_post_mark then
2469                    already_post_mark = true
2470                else
2471                    return c
2472                end
2473            end
2474            c = n
2475            n = getnext(c)
2476            if not n then
2477                return c
2478            end
2479            v = ischar(n,font)
2480            if not v then
2481                return c
2482            end
2483        end
2484        if nukta[v] then
2485            c = n
2486            n = getnext(c)
2487            if not n then
2488                return c
2489            end
2490            v = ischar(n,font)
2491            if not v then
2492                return c
2493            end
2494        end
2495        if halant[v] then
2496            c = n
2497            n = getnext(c)
2498            if not n then
2499                return c
2500            end
2501            v = ischar(n,font)
2502            if not v then
2503                return c
2504            end
2505        end
2506    end
2507    -- same as one
2508    if vowel_modifier[v] then
2509        c = n
2510        n = getnext(c)
2511        if not n then
2512            return c
2513        end
2514        v = ischar(n,font)
2515        if not v then
2516            return c
2517        end
2518    end
2519    if stress_tone_mark[v] then
2520        c = n
2521        n = getnext(c)
2522        if not n then
2523            return c
2524        end
2525        v = ischar(n,font)
2526        if not v then
2527            return c
2528        end
2529    end
2530    if stress_tone_mark[v] then
2531        return n
2532    else
2533        return c
2534    end
2535end
2536
2537-- It looks like these two analyzers were written independently but they share
2538-- a lot. Common code has been synced.
2539
2540local function method_one(head,font,attr)
2541    local current  = head
2542    local start    = true
2543    local done     = false
2544    local nbspaces = 0
2545    local syllabe  = 0
2546    while current do
2547        local char = ischar(current,font)
2548        if char then
2549            done = true
2550            local syllablestart = current
2551            local syllableend   = nil
2552            local c = current
2553            local n = getnext(c)
2554            local first = char
2555            if n and ra[first] then
2556                local second = ischar(n,font)
2557                if second and halant[second] then
2558                    local n = getnext(n)
2559                    if n then
2560                        local third = ischar(n,font)
2561                        if third then
2562                            c = n
2563                            first = third
2564                        end
2565                    end
2566                end
2567            end
2568            local standalone = first == c_nbsp
2569            if standalone then
2570                local prev = getprev(current)
2571                if prev then
2572                    local prevchar = ischar(prev,font)
2573                    if not prevchar then
2574                        -- different font or language so quite certainly a different word
2575                    elseif not separator[prevchar] then
2576                        -- something that separates words
2577                    else
2578                        standalone = false
2579                    end
2580                else
2581                    -- begin of paragraph or box
2582                end
2583            end
2584            if standalone then
2585                -- stand alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2586                local syllableend = analyze_next_chars_one(c,font,2)
2587                current = getnext(syllableend)
2588                if syllablestart ~= syllableend then
2589                    head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2590                    current = getnext(current)
2591                end
2592            else
2593                -- we can delay the getsubtype(n) and getfont(n) and test for say halant first
2594                -- as an table access is faster than two function calls (subtype and font are
2595                -- pseudo fields) but the code becomes messy (unless we make it a function)
2596                if consonant[char] then
2597                    -- syllable containing consonant
2598                    local prevc = true
2599                    while prevc do
2600                        prevc = false
2601                        local n = getnext(current)
2602                        if not n then
2603                            break
2604                        end
2605                        local v = ischar(n,font)
2606                        if not v then
2607                            break
2608                        end
2609                        if nukta[v] then
2610                            n = getnext(n)
2611                            if not n then
2612                                break
2613                            end
2614                            v = ischar(n,font)
2615                            if not v then
2616                                break
2617                            end
2618                        end
2619                        if halant[v] then
2620                            n = getnext(n)
2621                            if not n then
2622                                break
2623                            end
2624                            v = ischar(n,font)
2625                            if not v then
2626                                break
2627                            end
2628                            if v == c_zwnj or v == c_zwj then
2629                                n = getnext(n)
2630                                if not n then
2631                                    break
2632                                end
2633                                v = ischar(n,font)
2634                                if not v then
2635                                    break
2636                                end
2637                            end
2638                            if consonant[v] then
2639                                prevc = true
2640                                current = n
2641                            end
2642                        end
2643                    end
2644                    local n = getnext(current)
2645                    if n then
2646                        local v = ischar(n,font)
2647                        if v and nukta[v] then
2648                            -- nukta (not specified in Microsft Devanagari OpenType specification)
2649                            current = n
2650                            n = getnext(current)
2651                        end
2652                    end
2653                    syllableend = current
2654                    current = n
2655                    if current then
2656                        local v = ischar(current,font)
2657                        if not v then
2658                            -- skip
2659                        elseif halant[v] then
2660                            -- syllable containing consonant without vowels: {C + [Nukta] + H} + C + H
2661                            local n = getnext(current)
2662                            if n then
2663                                local v = ischar(n,font)
2664                                if v and zw_char[v] then
2665                                    -- code collapsed, probably needs checking with intention
2666                                    syllableend = n
2667                                    current = getnext(n)
2668                                else
2669                                    syllableend = current
2670                                    current = n
2671                                end
2672                            else
2673                                syllableend = current
2674                                current = n
2675                            end
2676                        else
2677                            -- syllable containing consonant with vowels: {C + [Nukta] + H} + C + [M] + [VM] + [SM]
2678                            if dependent_vowel[v] then
2679                                syllableend = current
2680                                current = getnext(current)
2681                                v = ischar(current,font)
2682                            end
2683                            if v and vowel_modifier[v] then
2684                                syllableend = current
2685                                current = getnext(current)
2686                                v = ischar(current,font)
2687                            end
2688                            if v and stress_tone_mark[v] then
2689                                syllableend = current
2690                                current = getnext(current)
2691                            end
2692                        end
2693                    end
2694                    if syllablestart ~= syllableend then
2695                        if syllableend then
2696                            syllabe = syllabe + 1
2697                            local c = syllablestart
2698                            local n = getnext(syllableend)
2699                            while c ~= n do
2700                                setprop(c,a_syllabe,syllabe)
2701                                c = getnext(c)
2702                            end
2703                        end
2704                        head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2705                        current = getnext(current)
2706                    end
2707                elseif independent_vowel[char] then
2708                    -- syllable without consonants: VO + [VM] + [SM]
2709                    syllableend = current
2710                    current = getnext(current)
2711                    if current then
2712                        local v = ischar(current,font)
2713                        if v then
2714                            if vowel_modifier[v] then
2715                                syllableend = current
2716                                current = getnext(current)
2717                                v = ischar(current,font)
2718                            end
2719                            if v and stress_tone_mark[v] then
2720                                syllableend = current
2721                                current = getnext(current)
2722                            end
2723                        end
2724                    end
2725                else
2726                    if show_syntax_errors then
2727                        local mark = mark_four[char]
2728                        if mark then
2729                            head, current = inject_syntax_error(head,current,char)
2730                        end
2731                    end
2732                    current = getnext(current)
2733                end
2734            end
2735        else
2736            current = getnext(current)
2737        end
2738        start = false
2739    end
2740
2741    if nbspaces > 0 then
2742        head = replace_all_nbsp(head)
2743    end
2744
2745    current = head
2746    local n = 0
2747    while current do
2748        local char = ischar(current,font)
2749        if char then
2750            if n == 0 and not getstate(current) then
2751                setstate(current,s_init)
2752            end
2753            n = n + 1
2754        else
2755            n = 0
2756        end
2757        current = getnext(current)
2758    end
2759
2760    return head, done
2761end
2762
2763-- there is a good change that when we run into one with subtype < 256 that the rest is also done
2764-- so maybe we can omit this check (it's pretty hard to get glyphs in the stream out of the blue)
2765
2766local function method_two(head,font,attr)
2767    local current  = head
2768    local start    = true
2769    local done     = false
2770    local syllabe  = 0
2771    local nbspaces = 0
2772    while current do
2773        local syllablestart = nil
2774        local syllableend   = nil
2775        local char = ischar(current,font)
2776        if char then
2777            done = true
2778            syllablestart = current
2779            local c = current
2780            local n = getnext(current)
2781            if n and ra[char] then
2782                local nextchar = ischar(n,font)
2783                if nextchar and halant[nextchar] then
2784                    local n = getnext(n)
2785                    if n then
2786                        local nextnextchar = ischar(n,font)
2787                        if nextnextchar then
2788                            c = n
2789                            char = nextnextchar
2790                        end
2791                    end
2792                end
2793            end
2794            if independent_vowel[char] then
2795                -- vowel-based syllable: [Ra+H]+V+[N]+[<[<ZWJ|ZWNJ>]+H+C|ZWJ+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2796                current = analyze_next_chars_one(c,font,1)
2797                syllableend = current
2798            else
2799                local standalone = char == c_nbsp
2800                if standalone then
2801                    nbspaces = nbspaces + 1
2802                    local p = getprev(current)
2803                    if not p then
2804                        -- begin of paragraph or box
2805                    elseif ischar(p,font) then
2806                        -- different font or language so quite certainly a different word
2807                    elseif not separator[getchar(p)] then
2808                        -- something that separates words
2809                    else
2810                        standalone = false
2811                    end
2812                end
2813                if standalone then
2814                    -- Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2815                    current = analyze_next_chars_one(c,font,2)
2816                    syllableend = current
2817                elseif consonant[getchar(current)] then
2818                    -- WHY current INSTEAD OF c ?
2819
2820                    -- Consonant syllable: {C+[N]+<H+[<ZWNJ|ZWJ>]|<ZWNJ|ZWJ>+H>} + C+[N]+[A] + [< H+[<ZWNJ|ZWJ>] | {M}+[N]+[H]>]+[SM]+[(VD)]
2821                    current = analyze_next_chars_two(current,font) -- not c !
2822                    syllableend = current
2823                end
2824            end
2825        end
2826        if syllableend then
2827            syllabe = syllabe + 1
2828            local c = syllablestart
2829            local n = getnext(syllableend)
2830            while c ~= n do
2831                setprop(c,a_syllabe,syllabe)
2832                c = getnext(c)
2833            end
2834        end
2835        if syllableend and syllablestart ~= syllableend then
2836            head, current, nbspaces = reorder_two(head,syllablestart,syllableend,font,attr,nbspaces)
2837        end
2838        if not syllableend and show_syntax_errors then
2839            local char = ischar(current,font)
2840            if char and not getstate(current) then -- state can also be init
2841                local mark = mark_four[char]
2842                if mark then
2843                    head, current = inject_syntax_error(head,current,char)
2844                end
2845            end
2846        end
2847        start = false
2848        current = getnext(current)
2849    end
2850
2851    if nbspaces > 0 then
2852        head = replace_all_nbsp(head)
2853    end
2854
2855    current = head
2856    local n = 0
2857    while current do
2858        local char = ischar(current,font)
2859        if char then
2860            if n == 0 and not getstate(current) then -- state can also be init
2861                setstate(current,s_init)
2862            end
2863            n = n + 1
2864        else
2865            n = 0
2866        end
2867        current = getnext(current)
2868    end
2869
2870    return head, done
2871end
2872
2873for i=1,nofscripts do
2874    methods[scripts_one[i]] = method_one
2875    methods[scripts_two[i]] = method_two
2876end
2877