1if not modules then modules = { } end modules ['font-osd'] = {
2 version = 1.001,
3 comment = "companion to font-ini.mkiv",
4 author = "Kai Eigner, TAT Zetwerk / Hans Hagen, PRAGMA ADE",
5 copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84local insert, imerge, copy, tohash = table.insert, table.imerge, table.copy, table.tohash
85local next, type = next, type
86
87local report = logs.reporter("otf","devanagari")
88
89fonts = fonts or { }
90fonts.analyzers = fonts.analyzers or { }
91fonts.analyzers.methods = fonts.analyzers.methods or { node = { otf = { } } }
92
93local otf = fonts.handlers.otf
94
95local handlers = otf.handlers
96local methods = fonts.analyzers.methods
97
98local otffeatures = fonts.constructors.features.otf
99local registerotffeature = otffeatures.register
100
101local nuts = nodes.nuts
102
103local getnext = nuts.getnext
104local getprev = nuts.getprev
105local getboth = nuts.getboth
106local getid = nuts.getid
107local getchar = nuts.getchar
108local getfont = nuts.getfont
109local getsubtype = nuts.getsubtype
110local setlink = nuts.setlink
111local setnext = nuts.setnext
112local setprev = nuts.setprev
113local setchar = nuts.setchar
114local getprop = nuts.getprop
115local setprop = nuts.setprop
116local getstate = nuts.getstate
117local setstate = nuts.setstate
118
119local ischar = nuts.ischar
120
121local insertnodeafter = nuts.insertafter
122local copy_node = nuts.copy
123local remove_node = nuts.remove
124local flushlist = nuts.flushlist
125local flushnode = nuts.flushnode
126
127local copyinjection = nodes.injections.copy
128
129local unsetvalue = attributes.unsetvalue
130
131local fontdata = fonts.hashes.identifiers
132
133local a_syllabe = "syllable"
134local a_reordered = "reordered"
135
136local dotted_circle = 0x25CC
137local c_nbsp = 0x00A0
138local c_zwnj = 0x200C
139local c_zwj = 0x200D
140
141local states = fonts.analyzers.states
142
143local s_rphf = states.rphf
144local s_half = states.half
145local s_pref = states.pref
146local s_blwf = states.blwf
147local s_pstf = states.pstf
148local s_init = states.init
149
150local replace_all_nbsp = nil
151
152replace_all_nbsp = function(head)
153 replace_all_nbsp = typesetters and typesetters.characters and typesetters.characters.replacenbspaces or function(head)
154 return head
155 end
156 return replace_all_nbsp(head)
157end
158
159local processcharacters = nil
160
161if context then
162 local fontprocesses = fonts.hashes.processes
163 function processcharacters(head,font)
164 local processors = fontprocesses[font]
165 for i=1,#processors do
166 head = processors[i](head,font,0)
167 end
168 return head
169 end
170else
171 function processcharacters(head,font)
172 local processors = fontdata[font].shared.processes
173 for i=1,#processors do
174 head = processors[i](head,font,0)
175 end
176 return head
177 end
178end
179
180
181
182
183
184local indicgroups = characters and characters.indicgroups
185
186if not indicgroups and characters then
187
188 local indic = {
189 c = { },
190 i = { },
191 d = { },
192 m = { },
193 s = { },
194 o = { },
195 }
196
197 local indicmarks = {
198 l = { },
199 t = { },
200 b = { },
201 r = { },
202 s = { },
203 }
204
205 local indicclasses = {
206 nukta = { },
207 halant = { },
208 ra = { },
209 anudatta = { },
210 }
211
212 local indicorders = {
213 bp = { },
214 ap = { },
215 bs = { },
216 as = { },
217 bh = { },
218 ah = { },
219 bm = { },
220 am = { },
221 }
222
223 for k, v in next, characters.data do
224 local i = v.indic
225 if i then
226 indic[i][k] = true
227 i = v.indicmark
228 if i then
229 if i == "s" then
230 local s = v.specials
231 indicmarks[i][k] = { s[2], s[3] }
232 else
233 indicmarks[i][k] = true
234 end
235 end
236 i = v.indicclass
237 if i then
238 indicclasses[i][k] = true
239 end
240 i = v.indicorder
241 if i then
242 indicorders[i][k] = true
243 end
244 end
245 end
246
247 indicgroups = {
248 consonant = indic.c,
249 independent_vowel = indic.i,
250 dependent_vowel = indic.d,
251 vowel_modifier = indic.m,
252 stress_tone_mark = indic.s,
253
254 pre_mark = indicmarks.l,
255 above_mark = indicmarks.t,
256 below_mark = indicmarks.b,
257 post_mark = indicmarks.r,
258 twopart_mark = indicmarks.s,
259 nukta = indicclasses.nukta,
260 halant = indicclasses.halant,
261 ra = indicclasses.ra,
262 anudatta = indicclasses.anudatta,
263 before_postscript = indicorders.bp,
264 after_postscript = indicorders.ap,
265 before_half = indicorders.bh,
266 after_half = indicorders.ah,
267 before_subscript = indicorders.bs,
268 after_subscript = indicorders.as,
269 before_main = indicorders.bm,
270 after_main = indicorders.am,
271 }
272
273 indic = nil
274 indicmarks = nil
275 indicclasses = nil
276 indicorders = nil
277
278 characters.indicgroups = indicgroups
279
280end
281
282local consonant = indicgroups.consonant
283local independent_vowel = indicgroups.independent_vowel
284local dependent_vowel = indicgroups.dependent_vowel
285local vowel_modifier = indicgroups.vowel_modifier
286local stress_tone_mark = indicgroups.stress_tone_mark
287local pre_mark = indicgroups.pre_mark
288local above_mark = indicgroups.above_mark
289local below_mark = indicgroups.below_mark
290local post_mark = indicgroups.post_mark
291local twopart_mark = indicgroups.twopart_mark
292local nukta = indicgroups.nukta
293local halant = indicgroups.halant
294local ra = indicgroups.ra
295local anudatta = indicgroups.anudatta
296
297local before_postscript = indicgroups.before_postscript
298local after_postscript = indicgroups.after_postscript
299local before_half = indicgroups.before_half
300local after_half = indicgroups.after_half
301local before_subscript = indicgroups.before_subscript
302local after_subscript = indicgroups.after_subscript
303local before_main = indicgroups.before_main
304local after_main = indicgroups.after_main
305
306local mark_four = table.merged (
307 pre_mark,
308 above_mark,
309 below_mark,
310 post_mark
311)
312
313local mark_above_below_post = table.merged (
314 above_mark,
315 below_mark,
316 post_mark
317)
318
319
320
321
322
323
324local zw_char = {
325 [c_zwnj] = true,
326 [c_zwj ] = true,
327}
328
329local dflt_true = {
330 dflt = true,
331}
332
333local two_defaults = { }
334local one_defaults = { }
335
336local false_flags = { false, false, false, false }
337
338local sequence_reorder_matras = {
339 features = { dv01 = two_defaults },
340 flags = false_flags,
341 name = "dv01_reorder_matras",
342 order = { "dv01" },
343 type = "devanagari_reorder_matras",
344 nofsteps = 1,
345 steps = {
346 {
347 coverage = pre_mark,
348 }
349 }
350}
351
352local sequence_reorder_reph = {
353 features = { dv02 = two_defaults },
354 flags = false_flags,
355 name = "dv02_reorder_reph",
356 order = { "dv02" },
357 type = "devanagari_reorder_reph",
358 nofsteps = 1,
359 steps = {
360 {
361 coverage = { },
362 }
363 }
364}
365
366local sequence_reorder_pre_base_reordering_consonants = {
367 features = { dv03 = one_defaults },
368 flags = false_flags,
369 name = "dv03_reorder_pre_base_reordering_consonants",
370 order = { "dv03" },
371 type = "devanagari_reorder_pre_base_reordering_consonants",
372 nofsteps = 1,
373 steps = {
374 {
375 coverage = { },
376 }
377 }
378}
379
380local sequence_remove_joiners = {
381 features = { dv04 = one_defaults },
382 flags = false_flags,
383 name = "dv04_remove_joiners",
384 order = { "dv04" },
385 type = "devanagari_remove_joiners",
386 nofsteps = 1,
387 steps = {
388 {
389 coverage = zw_char,
390 },
391 }
392}
393
394
395
396
397
398local basic_shaping_forms = {
399 akhn = true,
400 blwf = true,
401 cjct = true,
402 half = true,
403 nukt = true,
404 pref = true,
405 pstf = true,
406 rkrf = true,
407 rphf = true,
408 vatu = true,
409 locl = true,
410}
411
412local valid = {
413 abvs = true,
414 akhn = true,
415 blwf = true,
416 calt = true,
417 cjct = true,
418 half = true,
419 haln = true,
420 nukt = true,
421 pref = true,
422 pres = true,
423 pstf = true,
424 psts = true,
425 rkrf = true,
426 rphf = true,
427 vatu = true,
428 pres = true,
429 abvs = true,
430 blws = true,
431 psts = true,
432 haln = true,
433 calt = true,
434 locl = true,
435}
436
437local scripts = { }
438
439local scripts_one = { "deva", "mlym", "beng", "gujr", "guru", "knda", "orya", "taml", "telu" }
440local scripts_two = { "dev2", "mlm2", "bng2", "gjr2", "gur2", "knd2", "ory2", "tml2", "tel2" }
441
442local nofscripts = #scripts_one
443
444for i=1,nofscripts do
445 local one = scripts_one[i]
446 local two = scripts_two[i]
447 scripts[one] = true
448 scripts[two] = true
449 two_defaults[two] = dflt_true
450 one_defaults[one] = dflt_true
451 one_defaults[two] = dflt_true
452end
453
454local function valid_one(s) for i=1,nofscripts do if s[scripts_one[i]] then return true end end end
455local function valid_two(s) for i=1,nofscripts do if s[scripts_two[i]] then return true end end end
456
457local function initializedevanagi(tfmdata)
458 local script, language = otf.scriptandlanguage(tfmdata,attr)
459 if scripts[script] then
460 local resources = tfmdata.resources
461 local devanagari = resources.devanagari
462 if not devanagari then
463
464 report("adding devanagari features to font")
465
466 local gsubfeatures = resources.features.gsub
467 local sequences = resources.sequences
468 local sharedfeatures = tfmdata.shared.features
469
470 gsubfeatures["dv01"] = two_defaults
471 gsubfeatures["dv02"] = two_defaults
472 gsubfeatures["dv03"] = one_defaults
473 gsubfeatures["dv04"] = one_defaults
474
475 local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants)
476 local reorder_reph = copy(sequence_reorder_reph)
477 local reorder_matras = copy(sequence_reorder_matras)
478 local remove_joiners = copy(sequence_remove_joiners)
479
480 local lastmatch = 0
481 for s=1,#sequences do
482 local features = sequences[s].features
483 if features then
484 for k, v in next, features do
485 if k == "locl" then
486 local steps = sequences[s].steps
487 local nofsteps = sequences[s].nofsteps
488 for i=1,nofsteps do
489 local step = steps[i]
490 local coverage = step.coverage
491 if coverage then
492 for k, v in next, pre_mark do
493 local locl = coverage[k]
494 if locl then
495 if #locl > 0 then
496 for j=1,#locl do
497 local ck = locl[j]
498 local f = ck[4]
499 local chainlookups = ck[6]
500 if chainlookups then
501 local chainlookup = chainlookups[f]
502 for j=1,#chainlookup do
503 local chainstep = chainlookup[j]
504 local steps = chainstep.steps
505 local nofsteps = chainstep.nofsteps
506 for i=1,nofsteps do
507 local step = steps[i]
508 local coverage = step.coverage
509 if coverage then
510 locl = coverage[k]
511 end
512 end
513 end
514 end
515 end
516 end
517 if locl then
518 reorder_matras.steps[1].coverage[locl] = true
519 end
520 end
521 end
522 end
523 end
524 end
525 if basic_shaping_forms[k] then
526 lastmatch = lastmatch + 1
527 if s ~= lastmatch then
528 table.insert(sequences, lastmatch, table.remove(sequences, s))
529 end
530 end
531 end
532 end
533 end
534 local insertindex = lastmatch + 1
535
536 if tfmdata.properties.language then
537 dflt_true[tfmdata.properties.language] = true
538 end
539
540 insert(sequences,insertindex,reorder_pre_base_reordering_consonants)
541 insert(sequences,insertindex,reorder_reph)
542 insert(sequences,insertindex,reorder_matras)
543 insert(sequences,insertindex,remove_joiners)
544
545 local blwfcache = { }
546 local vatucache = { }
547 local pstfcache = { }
548 local seqsubset = { }
549 local rephstep = {
550 coverage = { }
551 }
552 local devanagari = {
553 reph = false,
554 vattu = false,
555 blwfcache = blwfcache,
556 vatucache = vatucache,
557 pstfcache = pstfcache,
558 seqsubset = seqsubset,
559 reorderreph = rephstep,
560
561 }
562
563 reorder_reph.steps = { rephstep }
564
565 local pre_base_reordering_consonants = { }
566 reorder_pre_base_reordering_consonants.steps[1].coverage = pre_base_reordering_consonants
567
568 resources.devanagari = devanagari
569
570 for s=1,#sequences do
571 local sequence = sequences[s]
572 local steps = sequence.steps
573 local nofsteps = sequence.nofsteps
574 local features = sequence.features
575 local has_rphf = features.rphf
576 local has_blwf = features.blwf
577 local has_vatu = features.vatu
578 local has_pstf = features.pstf
579 if has_rphf and has_rphf[script] then
580 devanagari.reph = true
581 elseif (has_blwf and has_blwf[script] ) or (has_vatu and has_vatu[script] ) then
582 devanagari.vattu = true
583 for i=1,nofsteps do
584 local step = steps[i]
585 local coverage = step.coverage
586 if coverage then
587 for k, v in next, coverage do
588 for h, w in next, halant do
589 if v[h] then
590 if not blwfcache[k] then
591 blwfcache[k] = v
592 end
593 end
594 if has_vatu and has_vatu[script] and not vatucache[k] then
595 vatucache[k] = v
596 end
597 end
598 end
599 end
600 end
601 elseif has_pstf and has_pstf[script] then
602 for i=1,nofsteps do
603 local step = steps[i]
604 local coverage = step.coverage
605 if coverage then
606 for k, v in next, coverage do
607 if not pstfcache[k] then
608 pstfcache[k] = v
609 end
610 end
611 for k, v in next, ra do
612 local r = coverage[k]
613 if r then
614 local found = false
615 if #r > 0 then
616 for j=1,#r do
617 local ck = r[j]
618 local f = ck[4]
619 local chainlookups = ck[6]
620 if chainlookups and chainlookups[f] then
621 local chainlookup = chainlookups[f]
622 for j=1,#chainlookup do
623 local chainstep = chainlookup[j]
624 local steps = chainstep.steps
625 local nofsteps = chainstep.nofsteps
626 for i=1,nofsteps do
627 local step = steps[i]
628 local coverage = step.coverage
629 if coverage then
630 local h = coverage[k]
631 if h then
632 for k, v in next, h do
633
634 found = v and (tonumber(v) or v.ligature)
635 if found then
636 pre_base_reordering_consonants[found] = true
637 break
638 end
639 end
640 if found then
641 break
642 end
643 end
644 end
645 end
646 end
647 end
648 end
649 else
650 for k, v in next, r do
651
652 found = v and (tonumber(v) or v.ligature)
653 if found then
654 pre_base_reordering_consonants[found] = true
655 break
656 end
657 end
658 end
659 if found then
660 break
661 end
662 end
663 end
664 end
665 end
666 end
667 for kind, spec in next, features do
668 if valid[kind] and valid_two(spec)then
669 for i=1,nofsteps do
670 local step = steps[i]
671 local coverage = step.coverage
672 if coverage then
673 local reph, rephbase = false, false
674 if kind == "rphf" then
675
676 for k, v in next, ra do
677 local r = coverage[k]
678 if r then
679 rephbase = k
680 local h = false
681 if #r > 0 then
682 for j=1,#r do
683 local ck = r[j]
684 local f = ck[4]
685 local chainlookups = ck[6]
686 if chainlookups then
687 local chainlookup = chainlookups[f]
688 for j=1,#chainlookup do
689 local chainstep = chainlookup[j]
690 local steps = chainstep.steps
691 local nofsteps = chainstep.nofsteps
692 for i=1,nofsteps do
693 local step = steps[i]
694 local coverage = step.coverage
695 if coverage then
696 local r = coverage[k]
697 if r then
698 for k, v in next, halant do
699 local h = r[k]
700 if h then
701
702 reph = tonumber(h) or h.ligature or false
703 break
704 end
705 end
706 if h then
707 break
708 end
709 end
710 end
711 end
712 end
713 end
714 end
715 else
716 for k, v in next, halant do
717 local h = r[k]
718 if h then
719
720 reph = tonumber(h) or h.ligature or false
721 break
722 end
723 end
724 end
725 if reph then
726 break
727 end
728 end
729 end
730 end
731 seqsubset[#seqsubset+1] = { kind, coverage, reph, rephbase }
732 end
733 end
734 end
735 if kind == "pref" then
736 local steps = sequence.steps
737 local nofsteps = sequence.nofsteps
738 for i=1,nofsteps do
739 local step = steps[i]
740 local coverage = step.coverage
741 if coverage then
742 for k, v in next, halant do
743 local h = coverage[k]
744 if h then
745 local found = false
746 if #h > 0 then
747 for j=1,#h do
748 local ck = h[j]
749 local f = ck[4]
750 local chainlookups = ck[6]
751 if chainlookups then
752 local chainlookup = chainlookups[f]
753 for j=1,#chainlookup do
754 local chainstep = chainlookup[j]
755 local steps = chainstep.steps
756 local nofsteps = chainstep.nofsteps
757 for i=1,nofsteps do
758 local step = steps[i]
759 local coverage = step.coverage
760 if coverage then
761 local h = coverage[k]
762 if h then
763 for k, v in next, h do
764
765 found = v and (tonumber(v) or v.ligature)
766 if found then
767 pre_base_reordering_consonants[found] = true
768 break
769 end
770 end
771 if found then
772 break
773 end
774 end
775 end
776 end
777 end
778 end
779 end
780 else
781 for k, v in next, h do
782
783 found = v and (tonumber(v) or v.ligature)
784 if found then
785 pre_base_reordering_consonants[found] = true
786 break
787 end
788 end
789 end
790 if found then
791 break
792 end
793 end
794 end
795 end
796 end
797 end
798 end
799 end
800
801 if two_defaults[script] then
802 sharedfeatures["dv01"] = true
803 sharedfeatures["dv02"] = true
804 sharedfeatures["dv03"] = true
805 sharedfeatures["dv04"] = true
806 elseif one_defaults[script] then
807 sharedfeatures["dv03"] = true
808 sharedfeatures["dv04"] = true
809 end
810 if script == "mlym" or script == "taml" then
811 devanagari.left_matra_before_base = true
812 end
813 end
814 end
815end
816
817registerotffeature {
818 name = "devanagari",
819 description = "inject additional features",
820 default = true,
821 initializers = {
822 node = initializedevanagi,
823 },
824}
825
826local show_syntax_errors = false
827
828local function inject_syntax_error(head,current,char)
829 local signal = copy_node(current)
830 copyinjection(signal,current)
831 if pre_mark[char] then
832 setchar(signal,dotted_circle)
833 else
834 setchar(current,dotted_circle)
835 end
836 return insertnodeafter(head,current,signal)
837end
838
839
840
841local function initialize_one(font,attr)
842
843 local tfmdata = fontdata[font]
844 local datasets = otf.dataset(tfmdata,font,attr)
845 local devanagaridata = datasets.devanagari
846
847 if not devanagaridata then
848
849 devanagaridata = {
850 reph = false,
851 vattu = false,
852 blwfcache = { },
853 vatucache = { },
854 pstfcache = { },
855 }
856 datasets.devanagari = devanagaridata
857 local resources = tfmdata.resources
858 local devanagari = resources.devanagari
859
860 for s=1,#datasets do
861 local dataset = datasets[s]
862 if dataset and dataset[1] then
863 local kind = dataset[4]
864 if kind == "rphf" then
865
866 devanagaridata.reph = true
867 elseif kind == "blwf" or kind == "vatu" then
868
869 devanagaridata.vattu = true
870
871 devanagaridata.blwfcache = devanagari.blwfcache
872 devanagaridata.vatucache = devanagari.vatucache
873 devanagaridata.pstfcache = devanagari.pstfcache
874 end
875 end
876 end
877
878 end
879
880 return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache, devanagaridata.vatucache, devanagaridata.pstfcache
881
882end
883
884
885
886local function contextchain(contexts, n)
887 local char = getchar(n)
888 if not contexts.n then
889 return contexts[char]
890 else
891 for k=1,#contexts do
892 local ck = contexts[k]
893 local seq = ck[3]
894 local f = ck[4]
895 local l = ck[5]
896 if (l - f) == 1 and seq[f+1][char] then
897 local ok = true
898 local c = n
899 for i=l+1,#seq do
900 c = getnext(c)
901 if not c or not seq[i][ischar(c)] then
902 ok = false
903 break
904 end
905 end
906 if ok then
907 c = getprev(n)
908 for i=1,f-1 do
909 c = getprev(c)
910 if not c or not seq[f-i][ischar(c)] then
911 ok = false
912 end
913 end
914 end
915 if ok then
916 return true
917 end
918 end
919 end
920 return false
921 end
922end
923
924local function order_matras(c)
925 local cn = getnext(c)
926 local char = getchar(cn)
927 while dependent_vowel[char] do
928 local next = getnext(cn)
929 local cc = c
930 local cchar = getchar(cc)
931 while cc ~= cn do
932 if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then
933 local prev, next = getboth(cn)
934 if next then
935 setprev(next,prev)
936 end
937
938 setnext(prev,next)
939 setnext(getprev(cc),cn)
940 setprev(cn,getprev(cc))
941 setnext(cn,cc)
942 setprev(cc,cn)
943 break
944 end
945 cc = getnext(cc)
946 cchar = getchar(cc)
947 end
948 cn = next
949 char = getchar(cn)
950 end
951end
952
953local function reorder_one(head,start,stop,font,attr,nbspaces)
954
955 local reph, vattu, blwfcache, vatucache, pstfcache = initialize_one(font,attr)
956
957 local devanagari = fontdata[font].resources.devanagari
958 local current = start
959 local n = getnext(start)
960 local base = nil
961 local firstcons = nil
962 local lastcons = nil
963 local basefound = false
964
965 if reph and ra[getchar(start)] and halant[getchar(n)] then
966
967
968 if n == stop then
969 return head, stop, nbspaces
970 end
971 if getchar(getnext(n)) == c_zwj then
972 current = start
973 else
974 current = getnext(n)
975 setstate(start,s_rphf)
976 end
977 end
978
979 if getchar(current) == c_nbsp then
980
981 if current == stop then
982 stop = getprev(stop)
983 head = remove_node(head,current)
984 flushnode(current)
985 return head, stop, nbspaces
986 else
987 nbspaces = nbspaces + 1
988 base = current
989 firstcons = current
990 lastcons = current
991 current = getnext(current)
992 if current ~= stop then
993 local char = getchar(current)
994 if nukta[char] then
995 current = getnext(current)
996 char = getchar(current)
997 end
998 if char == c_zwj and current ~= stop then
999 local next = getnext(current)
1000 if next ~= stop and halant[getchar(next)] then
1001 current = next
1002 next = getnext(current)
1003 local tmp = next and getnext(next) or nil
1004 local changestop = next == stop
1005 local tempcurrent = copy_node(next)
1006 copyinjection(tempcurrent,next)
1007 local nextcurrent = copy_node(current)
1008 copyinjection(nextcurrent,current)
1009 setlink(tempcurrent,nextcurrent)
1010 setstate(tempcurrent,s_blwf)
1011 tempcurrent = processcharacters(tempcurrent,font)
1012 setstate(tempcurrent,unsetvalue)
1013 if getchar(next) == getchar(tempcurrent) then
1014 flushlist(tempcurrent)
1015 if show_syntax_errors then
1016 head, current = inject_syntax_error(head,current,char)
1017 end
1018 else
1019 setchar(current,getchar(tempcurrent))
1020 local freenode = getnext(current)
1021 setlink(current,tmp)
1022 flushnode(freenode)
1023 flushlist(tempcurrent)
1024 if changestop then
1025 stop = current
1026 end
1027 end
1028 end
1029 end
1030 end
1031 end
1032 end
1033
1034 while not basefound do
1035
1036 local char = getchar(current)
1037 if consonant[char] then
1038 setstate(current,s_half)
1039 if not firstcons then
1040 firstcons = current
1041 end
1042 lastcons = current
1043 if not base then
1044 base = current
1045 elseif blwfcache[char] then
1046
1047 setstate(current,s_blwf)
1048 elseif pstfcache[char] then
1049
1050 setstate(current,s_pstf)
1051 else
1052 base = current
1053 end
1054 end
1055 basefound = current == stop
1056 current = getnext(current)
1057 end
1058
1059 if base ~= lastcons then
1060
1061 local np = base
1062 local n = getnext(base)
1063 local ch = getchar(n)
1064 if nukta[ch] then
1065 np = n
1066 n = getnext(n)
1067 ch = getchar(n)
1068 end
1069 if halant[ch] then
1070 if lastcons ~= stop then
1071 local ln = getnext(lastcons)
1072 if nukta[getchar(ln)] then
1073 lastcons = ln
1074 end
1075 end
1076
1077 local nn = getnext(n)
1078 local ln = getnext(lastcons)
1079 setlink(np,nn)
1080 setnext(lastcons,n)
1081 if ln then
1082 setprev(ln,n)
1083 end
1084 setnext(n,ln)
1085 setprev(n,lastcons)
1086 if lastcons == stop then
1087 stop = n
1088 end
1089 end
1090 end
1091
1092 n = getnext(start)
1093 if n ~= stop and ra[getchar(start)] and halant[getchar(n)] and not zw_char[getchar(getnext(n))] then
1094
1095
1096 local matra = base
1097 if base ~= stop then
1098 local next = getnext(base)
1099 if dependent_vowel[getchar(next)] then
1100 matra = next
1101 end
1102 end
1103
1104
1105 local sp = getprev(start)
1106 local nn = getnext(n)
1107 local mn = getnext(matra)
1108 setlink(sp,nn)
1109 setlink(matra,start)
1110 setlink(n,mn)
1111 if head == start then
1112 head = nn
1113 end
1114 start = nn
1115 if matra == stop then
1116 stop = n
1117 end
1118 end
1119
1120 local current = start
1121 while current ~= stop do
1122 local next = getnext(current)
1123 if next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwnj then
1124 setstate(current,unsetvalue)
1125 end
1126 current = next
1127 end
1128
1129 if base ~= stop and getstate(base) then
1130 local next = getnext(base)
1131 if halant[getchar(next)] and not (next ~= stop and getchar(getnext(next)) == c_zwj) then
1132 setstate(base,unsetvalue)
1133 end
1134 end
1135
1136
1137
1138
1139 local current, allreordered, moved = start, false, { [base] = true }
1140 local a, b, p, bn = base, base, base, getnext(base)
1141 if base ~= stop and nukta[getchar(bn)] then
1142 a, b, p = bn, bn, bn
1143 end
1144 while not allreordered do
1145
1146 local c = current
1147 local n = getnext(current)
1148 local l = nil
1149 if c ~= stop then
1150 local ch = getchar(n)
1151 if nukta[ch] then
1152 c = n
1153 n = getnext(n)
1154 ch = getchar(n)
1155 end
1156 if c ~= stop then
1157 if halant[ch] then
1158 c = n
1159 n = getnext(n)
1160 ch = getchar(n)
1161 end
1162
1163 local tpm = twopart_mark[ch]
1164 while tpm do
1165 local extra = copy_node(n)
1166 copyinjection(extra,n)
1167 ch = tpm[1]
1168 setchar(n,ch)
1169 setchar(extra,tpm[2])
1170 head = insertnodeafter(head,current,extra)
1171 tpm = twopart_mark[ch]
1172 end
1173 while c ~= stop and dependent_vowel[ch] do
1174 c = n
1175 n = getnext(n)
1176 ch = getchar(n)
1177 end
1178 if c ~= stop then
1179 if vowel_modifier[ch] then
1180 c = n
1181 n = getnext(n)
1182 ch = getchar(n)
1183 end
1184 if c ~= stop and stress_tone_mark[ch] then
1185 c = n
1186 n = getnext(n)
1187 end
1188 end
1189 end
1190 end
1191 local bp = getprev(firstcons)
1192 local cn = getnext(current)
1193 local last = getnext(c)
1194 while cn ~= last do
1195
1196 if pre_mark[getchar(cn)] then
1197 if devanagari.left_matra_before_base then
1198 local prev, next = getboth(cn)
1199 setlink(prev,next)
1200 if cn == stop then
1201 stop = getprev(cn)
1202 end
1203 if base == start then
1204 if head == start then
1205 head = cn
1206 end
1207 start = cn
1208 end
1209 setlink(getprev(base),cn)
1210 setlink(cn,base)
1211
1212 cn = next
1213 else
1214 if bp then
1215 setnext(bp,cn)
1216 end
1217 local prev, next = getboth(cn)
1218 if next then
1219 setprev(next,prev)
1220 end
1221 setnext(prev,next)
1222 if cn == stop then
1223 stop = prev
1224 end
1225 setprev(cn,bp)
1226 setlink(cn,firstcons)
1227 if firstcons == start then
1228 if head == start then
1229 head = cn
1230 end
1231 start = cn
1232 end
1233 cn = next
1234 end
1235 elseif current ~= base and dependent_vowel[getchar(cn)] then
1236 local prev, next = getboth(cn)
1237 if next then
1238 setprev(next,prev)
1239 end
1240 setnext(prev,next)
1241 if cn == stop then
1242 stop = prev
1243 end
1244 setlink(b,cn,getnext(b))
1245 order_matras(cn)
1246 cn = next
1247 elseif current == base and dependent_vowel[getchar(cn)] then
1248 local cnn = getnext(cn)
1249 order_matras(cn)
1250 cn = cnn
1251 while cn ~= last and dependent_vowel[getchar(cn)] do
1252 cn = getnext(cn)
1253 end
1254 else
1255 cn = getnext(cn)
1256 end
1257 end
1258 allreordered = c == stop
1259 current = getnext(c)
1260 end
1261
1262 if reph or vattu then
1263 local current, cns = start, nil
1264 while current ~= stop do
1265 local c = current
1266 local n = getnext(current)
1267 if ra[getchar(current)] and halant[getchar(n)] then
1268 c = n
1269 n = getnext(n)
1270 local b, bn = base, base
1271 while bn ~= stop do
1272 local next = getnext(bn)
1273 if dependent_vowel[getchar(next)] then
1274 b = next
1275 end
1276 bn = next
1277 end
1278 if getstate(current,s_rphf) then
1279
1280
1281 if b ~= current then
1282 if current == start then
1283 if head == start then
1284 head = n
1285 end
1286 start = n
1287 end
1288 if b == stop then
1289 stop = c
1290 end
1291 local prev = getprev(current)
1292 setlink(prev,n)
1293 local next = getnext(b)
1294 setlink(c,next)
1295 setlink(b,current)
1296 end
1297 elseif cns and getnext(cns) ~= current then
1298
1299 local cp = getprev(current)
1300 local cnsn = getnext(cns)
1301 setlink(cp,n)
1302 setlink(cns,current)
1303 setlink(c,cnsn)
1304 if c == stop then
1305 stop = cp
1306 break
1307 end
1308 current = getprev(n)
1309 end
1310 else
1311 local char = getchar(current)
1312 if consonant[char] then
1313 cns = current
1314 local next = getnext(cns)
1315 if halant[getchar(next)] then
1316 cns = next
1317 end
1318 if not vatucache[char] then
1319 next = getnext(cns)
1320 while dependent_vowel[getchar(next)] do
1321 cns = next
1322 next = getnext(cns)
1323 end
1324 end
1325 elseif char == c_nbsp then
1326 nbspaces = nbspaces + 1
1327 cns = current
1328 local next = getnext(cns)
1329 if halant[getchar(next)] then
1330 cns = next
1331 end
1332 if not vatucache[char] then
1333 next = getnext(cns)
1334 while dependent_vowel[getchar(next)] do
1335 cns = next
1336 next = getnext(cns)
1337 end
1338 end
1339 end
1340 end
1341 current = getnext(current)
1342 end
1343 end
1344
1345 if getchar(base) == c_nbsp then
1346 nbspaces = nbspaces - 1
1347 if base == stop then
1348 stop = getprev(stop)
1349 end
1350 head = remove_node(head,base)
1351 flushnode(base)
1352 end
1353
1354 return head, stop, nbspaces
1355end
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366function handlers.devanagari_reorder_matras(head,start)
1367 local current = start
1368 local startfont = getfont(start)
1369 local startattr = getprop(start,a_syllabe)
1370 while current do
1371 local char = ischar(current,startfont)
1372 local next = getnext(current)
1373 if char and getprop(current,a_syllabe) == startattr then
1374 if halant[char] then
1375 if next then
1376 local char = ischar(next,startfont)
1377 if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1378 current = next
1379 next = getnext(current)
1380 end
1381 end
1382
1383 local startnext = getnext(start)
1384 head = remove_node(head,start)
1385 setlink(start,next)
1386 setlink(current,start)
1387
1388 start = startnext
1389 break
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402 end
1403 else
1404 break
1405 end
1406 current = next
1407 end
1408 return head, start, true
1409end
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420local rephbase = { }
1421
1422function handlers.devanagari_reorder_reph(head,start)
1423 local current = getnext(start)
1424 local startnext = nil
1425 local startprev = nil
1426 local startfont = getfont(start)
1427 local startattr = getprop(start,a_syllabe)
1428
1429 ::step_1::
1430
1431
1432
1433 local char = ischar(start,startfont)
1434 local rephbase = rephbase[startfont][char]
1435 if char and after_subscript[rephbase] then
1436 goto step_5
1437 end
1438
1439 ::step_2::
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449 if char and not after_postscript[rephbase] then
1450 while current do
1451 local char = ischar(current,startfont)
1452 if char and getprop(current,a_syllabe) == startattr then
1453 if halant[char] then
1454 local next = getnext(current)
1455 if next then
1456 local nextchar = ischar(next,startfont)
1457 if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then
1458 current = next
1459 next = getnext(current)
1460 end
1461 end
1462 startnext = getnext(start)
1463 head = remove_node(head,start)
1464 setlink(start,next)
1465 setlink(current,start)
1466
1467 start = startnext
1468 startattr = getprop(start,a_syllabe)
1469 break
1470 end
1471 current = getnext(current)
1472 else
1473 break
1474 end
1475 end
1476 end
1477
1478 ::step_3::
1479
1480
1481
1482
1483
1484 if not startnext then
1485 if char and after_main[rephbase] then
1486 current = getnext(start)
1487 while current do
1488 local char = ischar(current,startfont)
1489 if char and getprop(current,a_syllabe) == startattr then
1490 if consonant[char] and not getstate(current,s_pref) then
1491 startnext = getnext(start)
1492 head = remove_node(head,start)
1493 setlink(current,start)
1494 setlink(start,getnext(current))
1495
1496 start = startnext
1497 startattr = getprop(start,a_syllabe)
1498 break
1499 end
1500 current = getnext(current)
1501 else
1502 break
1503 end
1504 end
1505 end
1506 end
1507
1508 ::step_4::
1509
1510
1511
1512
1513
1514 if not startnext then
1515 if char and before_postscript[rephbase] then
1516 current = getnext(start)
1517 local c = nil
1518 while current do
1519 local char = ischar(current,startfont)
1520 if char and getprop(current,a_syllabe) == startattr then
1521 if getstate(current,s_pstf) then
1522 startnext = getnext(start)
1523 head = remove_node(head,start)
1524 setlink(getprev(current),start)
1525 setlink(start,current)
1526
1527 start = startnext
1528 startattr = getprop(start,a_syllabe)
1529 break
1530 elseif not c and (vowel_modifier[char] or stress_tone_mark[char]) then
1531 c = current
1532 end
1533 current = getnext(current)
1534 else
1535 if c then
1536 startnext = getnext(start)
1537 head = remove_node(head,start)
1538 setlink(getprev(c),start)
1539 setlink(start,c)
1540
1541 start = startnext
1542 startattr = getprop(start,a_syllabe)
1543 end
1544 break
1545 end
1546 end
1547 end
1548 end
1549
1550 ::step_5::
1551
1552
1553
1554
1555
1556
1557
1558 if not startnext then
1559 current = getnext(start)
1560 local c = nil
1561 while current do
1562 local char = ischar(current,startfont)
1563 if char and getprop(current,a_syllabe) == startattr then
1564 local state = getstate(current)
1565 if before_subscript[rephbase] and (state == s_blwf or state == s_pstf) then
1566 c = current
1567 elseif after_subscript[rephbase] and (state == s_pstf) then
1568 c = current
1569 end
1570 current = getnext(current)
1571 else
1572 break
1573 end
1574 end
1575
1576 if c then
1577 startnext = getnext(start)
1578 head = remove_node(head,start)
1579 setlink(getprev(c),start)
1580 setlink(start,c)
1581
1582
1583 start = startnext
1584 startattr = getprop(start,a_syllabe)
1585 end
1586 end
1587
1588 ::step_6::
1589
1590
1591
1592 if not startnext then
1593 current = start
1594 local next = getnext(current)
1595 while next do
1596 local nextchar = ischar(next,startfont)
1597 if nextchar and getprop(next,a_syllabe) == startattr then
1598 current = next
1599 next = getnext(current)
1600 else
1601 break
1602 end
1603 end
1604 if start ~= current then
1605 startnext = getnext(start)
1606 head = remove_node(head,start)
1607 setlink(start,getnext(current))
1608 setlink(current,start)
1609
1610 start = startnext
1611 end
1612 end
1613
1614 return head, start, true
1615end
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start)
1633 if getprop(start,a_reordered) then
1634 return head, start, true
1635 end
1636 local current = start
1637 local startfont = getfont(start)
1638 local startattr = getprop(start,a_syllabe)
1639 while current do
1640 local char = ischar(current,startfont)
1641 local next = getnext(current)
1642 if char and getprop(current,a_syllabe) == startattr then
1643 if halant[char] then
1644 if next then
1645 local char = ischar(next,startfont)
1646 if char and zw_char[char] and getprop(next,a_syllabe) == startattr then
1647 current = next
1648 next = getnext(current)
1649 end
1650 end
1651
1652 local startnext = getnext(start)
1653 head = remove_node(head,start)
1654 setlink(start,next)
1655 setlink(current,start)
1656
1657 setprop(start,"reordered",true)
1658 start = startnext
1659 return head, start, true
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672 end
1673 else
1674 break
1675 end
1676 current = next
1677 end
1678
1679 local startattr = getprop(start,a_syllabe)
1680 local current = getprev(start)
1681 while current and getprop(current,a_syllabe) == startattr do
1682 local char = ischar(current)
1683 if (not dependent_vowel[char] and (not getstate(current) or getstate(current,s_init))) then
1684 startnext = getnext(start)
1685 head = remove_node(head,start)
1686 if current == head then
1687 setlink(start,current)
1688 head = start
1689 else
1690 setlink(getprev(current),start)
1691 setlink(start,current)
1692 end
1693 setprop(start,"reordered",true)
1694 start = startnext
1695 break
1696 end
1697 current = getprev(current)
1698 end
1699
1700 return head, start, true
1701end
1702
1703function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement)
1704 local stop = getnext(start)
1705 local font = getfont(start)
1706 local last = start
1707 while stop do
1708 local char = ischar(stop,font)
1709 if char and (char == c_zwnj or char == c_zwj) then
1710 last = stop
1711 stop = getnext(stop)
1712 else
1713 break
1714 end
1715 end
1716 local prev = getprev(start)
1717 if stop then
1718 setnext(last)
1719 setlink(prev,stop)
1720 elseif prev then
1721 setnext(prev)
1722 end
1723 if head == start then
1724 head = stop
1725 end
1726 flushlist(start)
1727 return head, stop, true
1728end
1729
1730local function initialize_two(font,attr)
1731
1732 local devanagari = fontdata[font].resources.devanagari
1733
1734 if devanagari then
1735 return devanagari.seqsubset or { }, devanagari.reorderreph or { }
1736 else
1737 return { }, { }
1738 end
1739
1740end
1741
1742
1743
1744
1745local function reorder_two(head,start,stop,font,attr,nbspaces)
1746 local seqsubset, reorderreph = initialize_two(font,attr)
1747
1748 local halfpos = nil
1749 local basepos = nil
1750 local subpos = nil
1751 local postpos = nil
1752
1753 reorderreph.coverage = { }
1754 rephbase[font] = { }
1755
1756 for i=1,#seqsubset do
1757
1758
1759
1760 local subset = seqsubset[i]
1761 local kind = subset[1]
1762 local lookupcache = subset[2]
1763 if kind == "rphf" then
1764 reorderreph.coverage[subset[3]] = true
1765 rephbase[font][subset[3]] = subset[4]
1766 local current = start
1767 local last = getnext(stop)
1768 while current ~= last do
1769 if current ~= stop then
1770 local c = getchar(current)
1771 local found = lookupcache[c]
1772 if found then
1773 local next = getnext(current)
1774
1775 if contextchain(found, next) then
1776 local afternext = next ~= stop and getnext(next)
1777 if afternext and zw_char[getchar(afternext)] then
1778 current = afternext
1779 elseif current == start then
1780 setstate(current,s_rphf)
1781 current = next
1782 else
1783 current = next
1784 end
1785 end
1786 end
1787 end
1788 current = getnext(current)
1789 end
1790 elseif kind == "pref" then
1791 local current = start
1792 local last = getnext(stop)
1793 while current ~= last do
1794 if current ~= stop then
1795 local c = getchar(current)
1796 local found = lookupcache[c]
1797 if found then
1798 local next = getnext(current)
1799
1800 if contextchain(found, next) then
1801 if (not getstate(current) and not getstate(next)) then
1802 setstate(current,s_pref)
1803 setstate(next,s_pref)
1804 current = next
1805 end
1806 end
1807 end
1808 end
1809 current = getnext(current)
1810 end
1811 elseif kind == "half" then
1812 local current = start
1813 local last = getnext(stop)
1814 while current ~= last do
1815 if current ~= stop then
1816 local c = getchar(current)
1817 local found = lookupcache[c]
1818 if found then
1819 local next = getnext(current)
1820
1821 if contextchain(found, next) then
1822 if next ~= stop and getchar(getnext(next)) == c_zwnj then
1823 current = next
1824 elseif (not getstate(current)) then
1825 setstate(current,s_half)
1826 if not halfpos then
1827 halfpos = current
1828 end
1829 end
1830 current = getnext(current)
1831 end
1832 end
1833 end
1834 current = getnext(current)
1835 end
1836 elseif kind == "blwf" or kind == "vatu" then
1837 local current = start
1838 local last = getnext(stop)
1839 while current ~= last do
1840 if current ~= stop then
1841 local c = getchar(current)
1842 local found = lookupcache[c]
1843 if found then
1844 local next = getnext(current)
1845
1846 if contextchain(found, next) then
1847 if (not getstate(current) and not getstate(next)) then
1848 setstate(current,s_blwf)
1849 setstate(next,s_blwf)
1850 current = next
1851 subpos = current
1852 end
1853 end
1854 end
1855 end
1856 current = getnext(current)
1857 end
1858 elseif kind == "pstf" then
1859 local current = start
1860 local last = getnext(stop)
1861 while current ~= last do
1862 if current ~= stop then
1863 local c = getchar(current)
1864 local found = lookupcache[c]
1865 if found then
1866 local next = getnext(current)
1867
1868 if contextchain(found, next) then
1869 if (not getstate(current) and not getstate(next)) then
1870 setstate(current,s_pstf)
1871 setstate(next,s_pstf)
1872 current = next
1873 postpos = current
1874 end
1875 end
1876 end
1877 end
1878 current = getnext(current)
1879 end
1880 end
1881 end
1882
1883 local current, base, firstcons = start, nil, nil
1884
1885 if getstate(start,s_rphf) then
1886
1887 current = getnext(getnext(start))
1888 end
1889
1890 if current ~= getnext(stop) and getchar(current) == c_nbsp then
1891
1892 if current == stop then
1893 stop = getprev(stop)
1894 head = remove_node(head,current)
1895 flushnode(current)
1896 return head, stop, nbspaces
1897 else
1898 nbspaces = nbspaces + 1
1899 base = current
1900 current = getnext(current)
1901 if current ~= stop then
1902 local char = getchar(current)
1903 if nukta[char] then
1904 current = getnext(current)
1905 char = getchar(current)
1906 end
1907 if char == c_zwj then
1908 local next = getnext(current)
1909 if current ~= stop and next ~= stop and halant[getchar(next)] then
1910 current = next
1911 next = getnext(current)
1912 local tmp = getnext(next)
1913 local changestop = next == stop
1914 setnext(next)
1915 setstate(current,s_pref)
1916 current = processcharacters(current,font)
1917 setstate(current,s_blwf)
1918 current = processcharacters(current,font)
1919 setstate(current,s_pstf)
1920 current = processcharacters(current,font)
1921 setstate(current,unsetvalue)
1922 if halant[getchar(current)] then
1923 setnext(getnext(current),tmp)
1924 if show_syntax_errors then
1925 head, current = inject_syntax_error(head,current,char)
1926 end
1927 else
1928 setnext(current,tmp)
1929 if changestop then
1930 stop = current
1931 end
1932 end
1933 end
1934 end
1935 end
1936 end
1937 else
1938 local last = getnext(stop)
1939 while current ~= last do
1940 local next = getnext(current)
1941 if consonant[getchar(current)] then
1942 if not (current ~= stop and next ~= stop and halant[getchar(next)] and getchar(getnext(next)) == c_zwj) then
1943 if not firstcons then
1944 firstcons = current
1945 end
1946
1947 local a = getstate(current)
1948 if not (a == s_blwf or a == s_pstf or (a ~= s_rphf and a ~= s_blwf and ra[getchar(current)])) then
1949 base = current
1950 end
1951 end
1952 end
1953 current = next
1954 end
1955 if not base then
1956 base = firstcons
1957 end
1958 end
1959
1960 if not base then
1961 if getstate(start,s_rphf) then
1962 setstate(start,unsetvalue)
1963 end
1964 return head, stop, nbspaces
1965 else
1966 if getstate(base) then
1967 setstate(base,unsetvalue)
1968 end
1969 basepos = base
1970 end
1971 if not halfpos then
1972 halfpos = base
1973 end
1974 if not subpos then
1975 subpos = base
1976 end
1977 if not postpos then
1978 postpos = subpos or base
1979 end
1980
1981
1982
1983 local moved = { }
1984 local current = start
1985 local last = getnext(stop)
1986 while current ~= last do
1987 local char = getchar(current)
1988 local target = nil
1989 local cn = getnext(current)
1990
1991 local tpm = twopart_mark[char]
1992 while tpm do
1993 local extra = copy_node(current)
1994 copyinjection(extra,current)
1995 char = tpm[1]
1996 setchar(current,char)
1997 setchar(extra,tpm[2])
1998 head = insertnodeafter(head,current,extra)
1999 tpm = twopart_mark[char]
2000 end
2001
2002 if not moved[current] and dependent_vowel[char] then
2003 if pre_mark[char] then
2004 moved[current] = true
2005
2006 local prev, next = getboth(current)
2007 setlink(prev,next)
2008 if current == stop then
2009 stop = getprev(current)
2010 end
2011
2012 local pos
2013 if before_main[char] then
2014 pos = basepos
2015
2016 else
2017
2018 pos = halfpos
2019
2020 end
2021
2022 local ppos = getprev(pos)
2023 while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) do
2024 if getstate(ppos,s_pref) then
2025 pos = ppos
2026 end
2027 ppos = getprev(ppos)
2028 end
2029
2030 local ppos = getprev(pos)
2031 while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and halant[ischar(ppos)] do
2032 ppos = getprev(ppos)
2033 if ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and consonant[ischar(ppos)] then
2034 pos = ppos
2035 ppos = getprev(ppos)
2036 else
2037 break
2038 end
2039 end
2040
2041 if pos == start then
2042 if head == start then
2043 head = current
2044 end
2045 start = current
2046 end
2047 setlink(getprev(pos),current)
2048 setlink(current,pos)
2049
2050 elseif above_mark[char] then
2051
2052 target = basepos
2053 if subpos == basepos then
2054 subpos = current
2055 end
2056 if postpos == basepos then
2057 postpos = current
2058 end
2059 basepos = current
2060 elseif below_mark[char] then
2061
2062 target = subpos
2063 if postpos == subpos then
2064 postpos = current
2065 end
2066 subpos = current
2067 elseif post_mark[char] then
2068
2069 local n = getnext(postpos)
2070 while n do
2071 local v = ischar(n,font)
2072 if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then
2073 postpos = n
2074 else
2075 break
2076 end
2077 n = getnext(n)
2078 end
2079 target = postpos
2080 postpos = current
2081 end
2082 if mark_above_below_post[char] then
2083 local prev = getprev(current)
2084 if prev ~= target then
2085 local next = getnext(current)
2086 setlink(prev,next)
2087 if current == stop then
2088 stop = prev
2089 end
2090 setlink(current,getnext(target))
2091 setlink(target,current)
2092
2093 end
2094 end
2095 end
2096 current = cn
2097 end
2098
2099
2100
2101 local current = getnext(start)
2102 local last = getnext(stop)
2103 while current ~= last do
2104 local char = getchar(current)
2105 local cn = getnext(current)
2106 if halant[char] and ra[ischar(cn)] and (not getstate(cn,s_rphf)) and (not getstate(cn,s_blwf)) then
2107 if after_main[ischar(cn)] then
2108 local prev = getprev(current)
2109 local next = getnext(cn)
2110 local bpn = getnext(basepos)
2111 while bpn and dependent_vowel[ischar(bpn)] do
2112 basepos = bpn
2113 bpn = getnext(bpn)
2114 end
2115 if basepos ~= prev then
2116 setlink(prev,next)
2117 setlink(cn, getnext(basepos))
2118 setlink(basepos, current)
2119 if cn == stop then
2120 stop = prev
2121 end
2122 cn = next
2123 end
2124 end
2125
2126
2127
2128
2129 end
2130 current = cn
2131 end
2132
2133
2134
2135 local current = start
2136 local c = nil
2137 while current ~= stop do
2138 local char = getchar(current)
2139 if halant[char] or stress_tone_mark[char] then
2140 if not c then
2141 c = current
2142 end
2143 else
2144 c = nil
2145 end
2146 local next = getnext(current)
2147 if c and nukta[getchar(next)] then
2148 if head == c then
2149 head = next
2150 end
2151 if stop == next then
2152 stop = current
2153 end
2154 setlink(getprev(c),next)
2155 local nextnext = getnext(next)
2156 setnext(current,nextnext)
2157 local nextnextnext = getnext(nextnext)
2158 if nextnextnext then
2159 setprev(nextnextnext,current)
2160 end
2161 setlink(nextnext,c)
2162 end
2163 if stop == current then break end
2164 current = getnext(current)
2165 end
2166
2167 if getchar(base) == c_nbsp then
2168 if base == stop then
2169 stop = getprev(stop)
2170 end
2171 nbspaces = nbspaces - 1
2172 head = remove_node(head, base)
2173 flushnode(base)
2174 end
2175
2176 return head, stop, nbspaces
2177end
2178
2179
2180
2181local separator = { }
2182
2183imerge(separator,consonant)
2184imerge(separator,independent_vowel)
2185imerge(separator,dependent_vowel)
2186imerge(separator,vowel_modifier)
2187imerge(separator,stress_tone_mark)
2188
2189for k, v in next, nukta do separator[k] = true end
2190for k, v in next, halant do separator[k] = true end
2191
2192local function analyze_next_chars_one(c,font,variant)
2193
2194 local n = getnext(c)
2195 if not n then
2196 return c
2197 end
2198 if variant == 1 then
2199 local v = ischar(n,font)
2200 if v and nukta[v] then
2201 n = getnext(n)
2202 if n then
2203 v = ischar(n,font)
2204 end
2205 end
2206 if n and v then
2207 local nn = getnext(n)
2208 if nn then
2209 local vv = ischar(nn,font)
2210 if vv then
2211 local nnn = getnext(nn)
2212 if nnn then
2213 local vvv = ischar(nnn,font)
2214 if vvv then
2215 if vv == c_zwj and consonant[vvv] then
2216 c = nnn
2217 elseif (vv == c_zwnj or vv == c_zwj) and halant[vvv] then
2218 local nnnn = getnext(nnn)
2219 if nnnn then
2220 local vvvv = ischar(nnnn,font)
2221 if vvvv and consonant[vvvv] then
2222 c = nnnn
2223 end
2224 end
2225 end
2226 end
2227 end
2228 end
2229 end
2230 end
2231 elseif variant == 2 then
2232 local v = ischar(n,font)
2233 if v and nukta[v] then
2234 c = n
2235 end
2236 n = getnext(c)
2237 if n then
2238 v = ischar(n,font)
2239 if v then
2240 local nn = getnext(n)
2241 if nn then
2242 local vv = ischar(nn,font)
2243 if vv and zw_char[v] then
2244 n = nn
2245 v = vv
2246 nn = getnext(nn)
2247 vv = nn and ischar(nn,font)
2248 end
2249 if vv and halant[v] and consonant[vv] then
2250 c = nn
2251 end
2252 end
2253 end
2254 end
2255 end
2256
2257 local n = getnext(c)
2258 if not n then
2259 return c
2260 end
2261 local v = ischar(n,font)
2262 if not v then
2263 return c
2264 end
2265 local already_pre_mark
2266 local already_above_mark
2267 local already_below_mark
2268 local already_post_mark
2269 while dependent_vowel[v] do
2270 local vowels = twopart_mark[v] or { v }
2271 for k, v in next, vowels do
2272 if pre_mark[v] and not already_pre_mark then
2273 already_pre_mark = true
2274 elseif above_mark[v] and not already_above_mark then
2275 already_above_mark = true
2276 elseif below_mark[v] and not already_below_mark then
2277 already_below_mark = true
2278 elseif post_mark[v] and not already_post_mark then
2279 already_post_mark = true
2280 else
2281 return c
2282 end
2283 end
2284 c = getnext(c)
2285 n = getnext(c)
2286 if not n then
2287 return c
2288 end
2289 v = ischar(n,font)
2290 if not v then
2291 return c
2292 end
2293 end
2294 if nukta[v] then
2295 c = getnext(c)
2296 n = getnext(c)
2297 if not n then
2298 return c
2299 end
2300 v = ischar(n,font)
2301 if not v then
2302 return c
2303 end
2304 end
2305 if halant[v] then
2306 c = getnext(c)
2307 n = getnext(c)
2308 if not n then
2309 return c
2310 end
2311 v = ischar(n,font)
2312 if not v then
2313 return c
2314 end
2315 end
2316 if vowel_modifier[v] then
2317 c = getnext(c)
2318 n = getnext(c)
2319 if not n then
2320 return c
2321 end
2322 v = ischar(n,font)
2323 if not v then
2324 return c
2325 end
2326 end
2327 if stress_tone_mark[v] then
2328 c = getnext(c)
2329 n = getnext(c)
2330 if not n then
2331 return c
2332 end
2333 v = ischar(n,font)
2334 if not v then
2335 return c
2336 end
2337 end
2338 if stress_tone_mark[v] then
2339 return n
2340 else
2341 return c
2342 end
2343end
2344
2345local function analyze_next_chars_two(c,font)
2346 local n = getnext(c)
2347 if not n then
2348 return c
2349 end
2350 local v = ischar(n,font)
2351 if v and nukta[v] then
2352 c = n
2353 end
2354 n = c
2355 while true do
2356 local nn = getnext(n)
2357 if nn then
2358 local vv = ischar(nn,font)
2359 if vv then
2360 if halant[vv] then
2361 n = nn
2362 local nnn = getnext(nn)
2363 if nnn then
2364 local vvv = ischar(nnn,font)
2365 if vvv and zw_char[vvv] then
2366 n = nnn
2367 end
2368 end
2369 elseif vv == c_zwnj or vv == c_zwj then
2370
2371 local nnn = getnext(nn)
2372 if nnn then
2373 local vvv = ischar(nnn,font)
2374 if vvv and halant[vvv] then
2375 n = nnn
2376 end
2377 end
2378 else
2379 break
2380 end
2381 local nn = getnext(n)
2382 if nn then
2383 local vv = ischar(nn,font)
2384 if vv and consonant[vv] then
2385 n = nn
2386 local nnn = getnext(nn)
2387 if nnn then
2388 local vvv = ischar(nnn,font)
2389 if vvv and nukta[vvv] then
2390 n = nnn
2391 end
2392 end
2393 c = n
2394 else
2395 break
2396 end
2397 else
2398 break
2399 end
2400 else
2401 break
2402 end
2403 else
2404 break
2405 end
2406 end
2407
2408 if not c then
2409
2410 return
2411 end
2412 local n = getnext(c)
2413 if not n then
2414 return c
2415 end
2416 local v = ischar(n,font)
2417 if not v then
2418 return c
2419 end
2420 if anudatta[v] then
2421 c = n
2422 n = getnext(c)
2423 if not n then
2424 return c
2425 end
2426 v = ischar(n,font)
2427 if not v then
2428 return c
2429 end
2430 end
2431 if halant[v] then
2432 c = n
2433 n = getnext(c)
2434 if not n then
2435 return c
2436 end
2437 v = ischar(n,font)
2438 if not v then
2439 return c
2440 end
2441 if v == c_zwnj or v == c_zwj then
2442 c = n
2443 n = getnext(c)
2444 if not n then
2445 return c
2446 end
2447 v = ischar(n,font)
2448 if not v then
2449 return c
2450 end
2451 end
2452 else
2453
2454
2455 local already_pre_mark
2456 local already_above_mark
2457 local already_below_mark
2458 local already_post_mark
2459 while dependent_vowel[v] do
2460 local vowels = twopart_mark[v] or { v }
2461 for k, v in next, vowels do
2462 if pre_mark[v] and not already_pre_mark then
2463 already_pre_mark = true
2464 elseif above_mark[v] and not already_above_mark then
2465 already_above_mark = true
2466 elseif below_mark[v] and not already_below_mark then
2467 already_below_mark = true
2468 elseif post_mark[v] and not already_post_mark then
2469 already_post_mark = true
2470 else
2471 return c
2472 end
2473 end
2474 c = n
2475 n = getnext(c)
2476 if not n then
2477 return c
2478 end
2479 v = ischar(n,font)
2480 if not v then
2481 return c
2482 end
2483 end
2484 if nukta[v] then
2485 c = n
2486 n = getnext(c)
2487 if not n then
2488 return c
2489 end
2490 v = ischar(n,font)
2491 if not v then
2492 return c
2493 end
2494 end
2495 if halant[v] then
2496 c = n
2497 n = getnext(c)
2498 if not n then
2499 return c
2500 end
2501 v = ischar(n,font)
2502 if not v then
2503 return c
2504 end
2505 end
2506 end
2507
2508 if vowel_modifier[v] then
2509 c = n
2510 n = getnext(c)
2511 if not n then
2512 return c
2513 end
2514 v = ischar(n,font)
2515 if not v then
2516 return c
2517 end
2518 end
2519 if stress_tone_mark[v] then
2520 c = n
2521 n = getnext(c)
2522 if not n then
2523 return c
2524 end
2525 v = ischar(n,font)
2526 if not v then
2527 return c
2528 end
2529 end
2530 if stress_tone_mark[v] then
2531 return n
2532 else
2533 return c
2534 end
2535end
2536
2537
2538
2539
2540local function method_one(head,font,attr)
2541 local current = head
2542 local start = true
2543 local done = false
2544 local nbspaces = 0
2545 local syllabe = 0
2546 while current do
2547 local char = ischar(current,font)
2548 if char then
2549 done = true
2550 local syllablestart = current
2551 local syllableend = nil
2552 local c = current
2553 local n = getnext(c)
2554 local first = char
2555 if n and ra[first] then
2556 local second = ischar(n,font)
2557 if second and halant[second] then
2558 local n = getnext(n)
2559 if n then
2560 local third = ischar(n,font)
2561 if third then
2562 c = n
2563 first = third
2564 end
2565 end
2566 end
2567 end
2568 local standalone = first == c_nbsp
2569 if standalone then
2570 local prev = getprev(current)
2571 if prev then
2572 local prevchar = ischar(prev,font)
2573 if not prevchar then
2574
2575 elseif not separator[prevchar] then
2576
2577 else
2578 standalone = false
2579 end
2580 else
2581
2582 end
2583 end
2584 if standalone then
2585
2586 local syllableend = analyze_next_chars_one(c,font,2)
2587 current = getnext(syllableend)
2588 if syllablestart ~= syllableend then
2589 head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2590 current = getnext(current)
2591 end
2592 else
2593
2594
2595
2596 if consonant[char] then
2597
2598 local prevc = true
2599 while prevc do
2600 prevc = false
2601 local n = getnext(current)
2602 if not n then
2603 break
2604 end
2605 local v = ischar(n,font)
2606 if not v then
2607 break
2608 end
2609 if nukta[v] then
2610 n = getnext(n)
2611 if not n then
2612 break
2613 end
2614 v = ischar(n,font)
2615 if not v then
2616 break
2617 end
2618 end
2619 if halant[v] then
2620 n = getnext(n)
2621 if not n then
2622 break
2623 end
2624 v = ischar(n,font)
2625 if not v then
2626 break
2627 end
2628 if v == c_zwnj or v == c_zwj then
2629 n = getnext(n)
2630 if not n then
2631 break
2632 end
2633 v = ischar(n,font)
2634 if not v then
2635 break
2636 end
2637 end
2638 if consonant[v] then
2639 prevc = true
2640 current = n
2641 end
2642 end
2643 end
2644 local n = getnext(current)
2645 if n then
2646 local v = ischar(n,font)
2647 if v and nukta[v] then
2648
2649 current = n
2650 n = getnext(current)
2651 end
2652 end
2653 syllableend = current
2654 current = n
2655 if current then
2656 local v = ischar(current,font)
2657 if not v then
2658
2659 elseif halant[v] then
2660
2661 local n = getnext(current)
2662 if n then
2663 local v = ischar(n,font)
2664 if v and zw_char[v] then
2665
2666 syllableend = n
2667 current = getnext(n)
2668 else
2669 syllableend = current
2670 current = n
2671 end
2672 else
2673 syllableend = current
2674 current = n
2675 end
2676 else
2677
2678 if dependent_vowel[v] then
2679 syllableend = current
2680 current = getnext(current)
2681 v = ischar(current,font)
2682 end
2683 if v and vowel_modifier[v] then
2684 syllableend = current
2685 current = getnext(current)
2686 v = ischar(current,font)
2687 end
2688 if v and stress_tone_mark[v] then
2689 syllableend = current
2690 current = getnext(current)
2691 end
2692 end
2693 end
2694 if syllablestart ~= syllableend then
2695 if syllableend then
2696 syllabe = syllabe + 1
2697 local c = syllablestart
2698 local n = getnext(syllableend)
2699 while c ~= n do
2700 setprop(c,a_syllabe,syllabe)
2701 c = getnext(c)
2702 end
2703 end
2704 head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces)
2705 current = getnext(current)
2706 end
2707 elseif independent_vowel[char] then
2708
2709 syllableend = current
2710 current = getnext(current)
2711 if current then
2712 local v = ischar(current,font)
2713 if v then
2714 if vowel_modifier[v] then
2715 syllableend = current
2716 current = getnext(current)
2717 v = ischar(current,font)
2718 end
2719 if v and stress_tone_mark[v] then
2720 syllableend = current
2721 current = getnext(current)
2722 end
2723 end
2724 end
2725 else
2726 if show_syntax_errors then
2727 local mark = mark_four[char]
2728 if mark then
2729 head, current = inject_syntax_error(head,current,char)
2730 end
2731 end
2732 current = getnext(current)
2733 end
2734 end
2735 else
2736 current = getnext(current)
2737 end
2738 start = false
2739 end
2740
2741 if nbspaces > 0 then
2742 head = replace_all_nbsp(head)
2743 end
2744
2745 current = head
2746 local n = 0
2747 while current do
2748 local char = ischar(current,font)
2749 if char then
2750 if n == 0 and not getstate(current) then
2751 setstate(current,s_init)
2752 end
2753 n = n + 1
2754 else
2755 n = 0
2756 end
2757 current = getnext(current)
2758 end
2759
2760 return head, done
2761end
2762
2763
2764
2765
2766local function method_two(head,font,attr)
2767 local current = head
2768 local start = true
2769 local done = false
2770 local syllabe = 0
2771 local nbspaces = 0
2772 while current do
2773 local syllablestart = nil
2774 local syllableend = nil
2775 local char = ischar(current,font)
2776 if char then
2777 done = true
2778 syllablestart = current
2779 local c = current
2780 local n = getnext(current)
2781 if n and ra[char] then
2782 local nextchar = ischar(n,font)
2783 if nextchar and halant[nextchar] then
2784 local n = getnext(n)
2785 if n then
2786 local nextnextchar = ischar(n,font)
2787 if nextnextchar then
2788 c = n
2789 char = nextnextchar
2790 end
2791 end
2792 end
2793 end
2794 if independent_vowel[char] then
2795
2796 current = analyze_next_chars_one(c,font,1)
2797 syllableend = current
2798 else
2799 local standalone = char == c_nbsp
2800 if standalone then
2801 nbspaces = nbspaces + 1
2802 local p = getprev(current)
2803 if not p then
2804
2805 elseif ischar(p,font) then
2806
2807 elseif not separator[getchar(p)] then
2808
2809 else
2810 standalone = false
2811 end
2812 end
2813 if standalone then
2814
2815 current = analyze_next_chars_one(c,font,2)
2816 syllableend = current
2817 elseif consonant[getchar(current)] then
2818
2819
2820
2821 current = analyze_next_chars_two(current,font)
2822 syllableend = current
2823 end
2824 end
2825 end
2826 if syllableend then
2827 syllabe = syllabe + 1
2828 local c = syllablestart
2829 local n = getnext(syllableend)
2830 while c ~= n do
2831 setprop(c,a_syllabe,syllabe)
2832 c = getnext(c)
2833 end
2834 end
2835 if syllableend and syllablestart ~= syllableend then
2836 head, current, nbspaces = reorder_two(head,syllablestart,syllableend,font,attr,nbspaces)
2837 end
2838 if not syllableend and show_syntax_errors then
2839 local char = ischar(current,font)
2840 if char and not getstate(current) then
2841 local mark = mark_four[char]
2842 if mark then
2843 head, current = inject_syntax_error(head,current,char)
2844 end
2845 end
2846 end
2847 start = false
2848 current = getnext(current)
2849 end
2850
2851 if nbspaces > 0 then
2852 head = replace_all_nbsp(head)
2853 end
2854
2855 current = head
2856 local n = 0
2857 while current do
2858 local char = ischar(current,font)
2859 if char then
2860 if n == 0 and not getstate(current) then
2861 setstate(current,s_init)
2862 end
2863 n = n + 1
2864 else
2865 n = 0
2866 end
2867 current = getnext(current)
2868 end
2869
2870 return head, done
2871end
2872
2873for i=1,nofscripts do
2874 methods[scripts_one[i]] = method_one
2875 methods[scripts_two[i]] = method_two
2876end
2877 |