font-osd.lua /size: 105 Kb    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
font-osd
'
]
=
{
-- script devanagari
2
version
=
1
.
001
,
3
comment
=
"
companion to font-ini.mkiv
"
,
4
author
=
"
Kai Eigner, TAT Zetwerk / Hans Hagen, PRAGMA ADE
"
,
5
copyright
=
"
TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9 10
-- we need to check nbsphash (context only)
11 12
-- A few remarks:
13
--
14
-- This code is a partial rewrite of the code that deals with devanagari. The data
15
-- and logic is by Kai Eigner and based based on Microsoft's OpenType specifications
16
-- for specific scripts, but with a few improvements. More information can be found
17
-- at:
18
--
19
-- deva: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/introO.mspx
20
-- dev2: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/intro.mspx
21
--
22
-- Rajeesh Nambiar provided patches for the malayalam variant. Thanks to feedback
23
-- from the mailing list some aspects could be improved.
24
--
25
-- As I touched nearly all code, reshuffled it, optimized a lot, etc. etc. (imagine
26
-- how much can get messed up in over a week work) it could be that I introduced
27
-- bugs. There is more to gain (esp in the functions applied to a range) but I'll do
28
-- that when everything works as expected. Kai's original code is kept in
29
-- font-odk.lua as a reference so blame me (HH) for bugs. (We no longer ship that
30
-- file as the code below has diverted too much and in the meantime has more than
31
-- doubled in size.)
32
--
33
-- Interesting is that Kai managed to write this on top of the existing otf handler.
34
-- Only a few extensions were needed, like a few more analyzing states and dealing
35
-- with changed head nodes in the core scanner as that only happens here. There's a
36
-- lot going on here and it's only because I touched nearly all code that I got a
37
-- bit of a picture of what happens. For in-depth knowledge one needs to consult
38
-- Kai.
39
--
40
-- The rewrite mostly deals with efficiency, both in terms of speed and code. We
41
-- also made sure that it suits generic use as well as use in ConTeXt. I removed
42
-- some buglets but can as well have messed up the logic by doing this. For this we
43
-- keep the original around as that serves as reference. Due to the lots of
44
-- reshuffling glyphs quite some leaks occur(red) but once I'm satisfied with the
45
-- rewrite I'll weed them. I also integrated initialization etc into the regular
46
-- mechanisms.
47
--
48
-- In the meantime, we're down from 25.5-3.5=22 seconds to 17.7-3.5=14.2 seconds for
49
-- a 100 page sample (mid 2012) with both variants so it's worth the effort. Some
50
-- more speedup is to be expected. Due to the method chosen it will never be real
51
-- fast. If I ever become a power user I'll have a go at some further speed up. I
52
-- will rename some functions (and features) once we don't need to check the
53
-- original code. We now use a special subset sequence for use inside the analyzer
54
-- (after all we could can store this in the dataset and save redundant analysis).
55
--
56
-- By now we have yet another incremental improved version. In the end I might
57
-- rewrite the code.
58
--
59
-- Hans Hagen, PRAGMA-ADE, Hasselt NL
60 61
-- Todo:
62
--
63
-- Matras: according to Microsoft typography specifications "up to one of each type:
64
-- pre-, above-, below- or post- base", but that does not seem to be right. It could
65
-- become an option.
66
--
67
-- Resources:
68
--
69
-- The tables that we had here are now generated from char-def.lua or in the case of
70
-- generic usage loaded from luatex-basics-chr.lua. Still a couple of entries need
71
-- to be added to char-def.lua but finally I moved the indic specific tables there.
72
-- For generic usage one can create the relevant resources by running:
73
--
74
-- context luatex-basics-prepare.tex
75
--
76
-- and an overview with:
77
--
78
-- context --global s-fonts-basics.mkiv
79
--
80
-- For now we have defined: bengali, devanagari, gujarati, gurmukhi, kannada,
81
-- malayalam, oriya, tamil and tolugu but not all are checked. Also, some of the
82
-- code below might need to be adapted to the extra scripts.
83 84
local
insert
,
imerge
,
copy
,
tohash
=
table
.
insert
,
table
.
imerge
,
table
.
copy
,
table
.
tohash
85
local
next
,
type
=
next
,
type
86 87
local
report
=
logs
.
reporter
(
"
otf
"
,
"
devanagari
"
)
88 89
fonts
=
fonts
or
{
}
90
fonts
.
analyzers
=
fonts
.
analyzers
or
{
}
91
fonts
.
analyzers
.
methods
=
fonts
.
analyzers
.
methods
or
{
node
=
{
otf
=
{
}
}
}
92 93
local
otf
=
fonts
.
handlers
.
otf
94 95
local
handlers
=
otf
.
handlers
96
local
methods
=
fonts
.
analyzers
.
methods
97 98
local
otffeatures
=
fonts
.
constructors
.
features
.
otf
99
local
registerotffeature
=
otffeatures
.
register
100 101
local
nuts
=
nodes
.
nuts
102 103
local
getnext
=
nuts
.
getnext
104
local
getprev
=
nuts
.
getprev
105
local
getboth
=
nuts
.
getboth
106
local
getid
=
nuts
.
getid
107
local
getchar
=
nuts
.
getchar
108
local
getfont
=
nuts
.
getfont
109
local
getsubtype
=
nuts
.
getsubtype
110
local
setlink
=
nuts
.
setlink
111
local
setnext
=
nuts
.
setnext
112
local
setprev
=
nuts
.
setprev
113
local
setchar
=
nuts
.
setchar
114
local
getprop
=
nuts
.
getprop
115
local
setprop
=
nuts
.
setprop
116
local
getstate
=
nuts
.
getstate
117
local
setstate
=
nuts
.
setstate
118 119
local
ischar
=
nuts
.
ischar
120 121
local
insert_node_after
=
nuts
.
insert_after
122
local
copy_node
=
nuts
.
copy
123
local
remove_node
=
nuts
.
remove
124
local
flush_list
=
nuts
.
flush_list
125
local
flush_node
=
nuts
.
flush_node
126 127
local
copyinjection
=
nodes
.
injections
.
copy
-- KE: is this necessary? HH: probably not as positioning comes later and we rawget/set
128 129
local
unsetvalue
=
attributes
.
unsetvalue
130 131
local
fontdata
=
fonts
.
hashes
.
identifiers
132 133
local
a_syllabe
=
attributes
.
private
(
'
syllabe
'
)
134 135
local
dotted_circle
=
0x25CC
136
local
c_nbsp
=
0x00A0
137
local
c_zwnj
=
0x200C
138
local
c_zwj
=
0x200D
139 140
local
states
=
fonts
.
analyzers
.
states
-- not features
141 142
local
s_rphf
=
states
.
rphf
143
local
s_half
=
states
.
half
144
local
s_pref
=
states
.
pref
145
local
s_blwf
=
states
.
blwf
146
local
s_pstf
=
states
.
pstf
147
local
s_init
=
states
.
init
148 149
local
replace_all_nbsp
=
nil
150 151
replace_all_nbsp
=
function
(
head
)
-- delayed definition
152
replace_all_nbsp
=
typesetters
and
typesetters
.
characters
and
typesetters
.
characters
.
replacenbspaces
or
function
(
head
)
153
return
head
154
end
155
return
replace_all_nbsp
(
head
)
156
end
157 158
local
processcharacters
=
nil
159 160
if
context
then
161
local
fontprocesses
=
fonts
.
hashes
.
processes
162
function
processcharacters
(
head
,
font
)
163
local
processors
=
fontprocesses
[
font
]
164
for
i
=
1
,
#
processors
do
165
head
=
processors
[
i
]
(
head
,
font
,
0
)
166
end
167
return
head
168
end
169
else
170
function
processcharacters
(
head
,
font
)
171
local
processors
=
fontdata
[
font
]
.
shared
.
processes
172
for
i
=
1
,
#
processors
do
173
head
=
processors
[
i
]
(
head
,
font
,
0
)
174
end
175
return
head
176
end
177
end
178 179
-- We can assume that script are not mixed in the source but if that is the case
180
-- we might need to have consonants etc per script and initialize a local table
181
-- pointing to the right one. But not now.
182 183
local
indicgroups
=
characters
and
characters
.
indicgroups
184 185
if
not
indicgroups
and
characters
then
186 187
local
indic
=
{
188
c
=
{
}
,
-- consonant
189
i
=
{
}
,
-- independent vowel
190
d
=
{
}
,
-- dependent vowel
191
m
=
{
}
,
-- vowel modifier
192
s
=
{
}
,
-- stress tone mark
193
o
=
{
}
,
-- other
194
}
195 196
local
indicmarks
=
{
197
l
=
{
}
,
-- left | pre_mark
198
t
=
{
}
,
-- top | above_mark
199
b
=
{
}
,
-- bottom | below_mark
200
r
=
{
}
,
-- right | post_mark
201
s
=
{
}
,
-- split | twopart_mark
202
}
203 204
local
indicclasses
=
{
205
nukta
=
{
}
,
206
halant
=
{
}
,
207
ra
=
{
}
,
208
anudatta
=
{
}
,
209
}
210 211
local
indicorders
=
{
212
bp
=
{
}
,
-- before_postscript
213
ap
=
{
}
,
-- after_postscript
214
bs
=
{
}
,
-- before_subscript
215
as
=
{
}
,
-- after_subscript
216
bh
=
{
}
,
-- before_half
217
ah
=
{
}
,
-- after_half
218
bm
=
{
}
,
-- before_main
219
am
=
{
}
,
-- after_main
220
}
221 222
for
k
,
v
in
next
,
characters
.
data
do
223
local
i
=
v
.
indic
224
if
i
then
225
indic
[
i
]
[
k
]
=
true
226
i
=
v
.
indicmark
227
if
i
then
228
if
i
=
=
"
s
"
then
229
local
s
=
v
.
specials
230
indicmarks
[
i
]
[
k
]
=
{
s
[
2
]
,
s
[
3
]
}
231
else
232
indicmarks
[
i
]
[
k
]
=
true
233
end
234
end
235
i
=
v
.
indicclass
236
if
i
then
237
indicclasses
[
i
]
[
k
]
=
true
238
end
239
i
=
v
.
indicorder
240
if
i
then
241
indicorders
[
i
]
[
k
]
=
true
242
end
243
end
244
end
245 246
indicgroups
=
{
247
consonant
=
indic
.
c
,
248
independent_vowel
=
indic
.
i
,
249
dependent_vowel
=
indic
.
d
,
250
vowel_modifier
=
indic
.
m
,
251
stress_tone_mark
=
indic
.
s
,
252
-- other = indic.o,
253
pre_mark
=
indicmarks
.
l
,
254
above_mark
=
indicmarks
.
t
,
255
below_mark
=
indicmarks
.
b
,
256
post_mark
=
indicmarks
.
r
,
257
twopart_mark
=
indicmarks
.
s
,
258
nukta
=
indicclasses
.
nukta
,
259
halant
=
indicclasses
.
halant
,
260
ra
=
indicclasses
.
ra
,
261
anudatta
=
indicclasses
.
anudatta
,
262
before_postscript
=
indicorders
.
bp
,
263
after_postscript
=
indicorders
.
ap
,
264
before_half
=
indicorders
.
bh
,
265
after_half
=
indicorders
.
ah
,
266
before_subscript
=
indicorders
.
bs
,
267
after_subscript
=
indicorders
.
as
,
268
before_main
=
indicorders
.
bm
,
269
after_main
=
indicorders
.
am
,
270
}
271 272
indic
=
nil
273
indicmarks
=
nil
274
indicclasses
=
nil
275
indicorders
=
nil
276 277
characters
.
indicgroups
=
indicgroups
278 279
end
280 281
local
consonant
=
indicgroups
.
consonant
282
local
independent_vowel
=
indicgroups
.
independent_vowel
283
local
dependent_vowel
=
indicgroups
.
dependent_vowel
284
local
vowel_modifier
=
indicgroups
.
vowel_modifier
285
local
stress_tone_mark
=
indicgroups
.
stress_tone_mark
286
local
pre_mark
=
indicgroups
.
pre_mark
287
local
above_mark
=
indicgroups
.
above_mark
288
local
below_mark
=
indicgroups
.
below_mark
289
local
post_mark
=
indicgroups
.
post_mark
290
local
twopart_mark
=
indicgroups
.
twopart_mark
291
local
nukta
=
indicgroups
.
nukta
292
local
halant
=
indicgroups
.
halant
293
local
ra
=
indicgroups
.
ra
294
local
anudatta
=
indicgroups
.
anudatta
295 296
local
before_postscript
=
indicgroups
.
before_postscript
297
local
after_postscript
=
indicgroups
.
after_postscript
298
local
before_half
=
indicgroups
.
before_half
299
local
after_half
=
indicgroups
.
after_half
300
local
before_subscript
=
indicgroups
.
before_subscript
301
local
after_subscript
=
indicgroups
.
after_subscript
302
local
before_main
=
indicgroups
.
before_main
303
local
after_main
=
indicgroups
.
after_main
304 305
local
mark_four
=
table
.
merged
(
306
pre_mark
,
307
above_mark
,
308
below_mark
,
309
post_mark
310
)
311 312
local
mark_above_below_post
=
table
.
merged
(
313
above_mark
,
314
below_mark
,
315
post_mark
316
)
317 318
-- We use some pseudo features as we need to manipulate the nodelist based
319
-- on information in the font as well as already applied features. We can
320
-- probably replace some of the code below by injecting 'real' features
321
-- using the extension mechanism.
322 323
local
zw_char
=
{
-- both_joiners_true
324
[
c_zwnj
]
=
true
,
325
[
c_zwj
]
=
true
,
326
}
327 328
local
dflt_true
=
{
329
dflt
=
true
,
330
}
331 332
local
two_defaults
=
{
}
333
local
one_defaults
=
{
}
334 335
local
false_flags
=
{
false
,
false
,
false
,
false
}
336 337
local
sequence_reorder_matras
=
{
338
features
=
{
dv01
=
two_defaults
}
,
339
flags
=
false_flags
,
340
name
=
"
dv01_reorder_matras
"
,
341
order
=
{
"
dv01
"
}
,
342
type
=
"
devanagari_reorder_matras
"
,
343
nofsteps
=
1
,
344
steps
=
{
345
{
346
coverage
=
pre_mark
,
347
}
348
}
349
}
350 351
local
sequence_reorder_reph
=
{
352
features
=
{
dv02
=
two_defaults
}
,
353
flags
=
false_flags
,
354
name
=
"
dv02_reorder_reph
"
,
355
order
=
{
"
dv02
"
}
,
356
type
=
"
devanagari_reorder_reph
"
,
357
nofsteps
=
1
,
358
steps
=
{
359
{
360
coverage
=
{
}
,
361
}
362
}
363
}
364 365
local
sequence_reorder_pre_base_reordering_consonants
=
{
366
features
=
{
dv03
=
one_defaults
}
,
367
flags
=
false_flags
,
368
name
=
"
dv03_reorder_pre_base_reordering_consonants
"
,
369
order
=
{
"
dv03
"
}
,
370
type
=
"
devanagari_reorder_pre_base_reordering_consonants
"
,
371
nofsteps
=
1
,
372
steps
=
{
373
{
374
coverage
=
{
}
,
375
}
376
}
377
}
378 379
local
sequence_remove_joiners
=
{
380
features
=
{
dv04
=
one_defaults
}
,
381
flags
=
false_flags
,
382
name
=
"
dv04_remove_joiners
"
,
383
order
=
{
"
dv04
"
}
,
384
type
=
"
devanagari_remove_joiners
"
,
385
nofsteps
=
1
,
386
steps
=
{
387
{
388
coverage
=
zw_char
,
-- both_joiners_true
389
}
,
390
}
391
}
392 393
-- Looping over feature twice as efficient as looping over basic forms (some
394
-- 350 checks instead of 750 for one font). This is something to keep an eye on
395
-- as it might depends on the font. Not that it's a bottleneck.
396 397
local
basic_shaping_forms
=
{
398
akhn
=
true
,
399
blwf
=
true
,
400
cjct
=
true
,
401
half
=
true
,
402
nukt
=
true
,
403
pref
=
true
,
404
pstf
=
true
,
405
rkrf
=
true
,
406
rphf
=
true
,
407
vatu
=
true
,
408
locl
=
true
,
409
}
410 411
local
valid
=
{
412
abvs
=
true
,
413
akhn
=
true
,
414
blwf
=
true
,
415
calt
=
true
,
416
cjct
=
true
,
417
half
=
true
,
418
haln
=
true
,
419
nukt
=
true
,
420
pref
=
true
,
421
pres
=
true
,
422
pstf
=
true
,
423
psts
=
true
,
424
rkrf
=
true
,
425
rphf
=
true
,
426
vatu
=
true
,
427
pres
=
true
,
428
abvs
=
true
,
429
blws
=
true
,
430
psts
=
true
,
431
haln
=
true
,
432
calt
=
true
,
433
locl
=
true
,
434
}
435 436
local
scripts
=
{
}
437 438
local
scripts_one
=
{
"
deva
"
,
"
mlym
"
,
"
beng
"
,
"
gujr
"
,
"
guru
"
,
"
knda
"
,
"
orya
"
,
"
taml
"
,
"
telu
"
}
439
local
scripts_two
=
{
"
dev2
"
,
"
mlm2
"
,
"
bng2
"
,
"
gjr2
"
,
"
gur2
"
,
"
knd2
"
,
"
ory2
"
,
"
tml2
"
,
"
tel2
"
}
440 441
local
nofscripts
=
#
scripts_one
442 443
for
i
=
1
,
nofscripts
do
444
local
one
=
scripts_one
[
i
]
445
local
two
=
scripts_two
[
i
]
446
scripts
[
one
]
=
true
447
scripts
[
two
]
=
true
448
two_defaults
[
two
]
=
dflt_true
449
one_defaults
[
one
]
=
dflt_true
450
one_defaults
[
two
]
=
dflt_true
451
end
452 453
local
function
valid_one
(
s
)
for
i
=
1
,
nofscripts
do
if
s
[
scripts_one
[
i
]
]
then
return
true
end
end
end
454
local
function
valid_two
(
s
)
for
i
=
1
,
nofscripts
do
if
s
[
scripts_two
[
i
]
]
then
return
true
end
end
end
455 456
local
function
initializedevanagi
(
tfmdata
)
457
local
script
,
language
=
otf
.
scriptandlanguage
(
tfmdata
,
attr
)
-- todo: take fast variant
458
if
scripts
[
script
]
then
459
local
resources
=
tfmdata
.
resources
460
local
devanagari
=
resources
.
devanagari
461
if
not
devanagari
then
462
--
463
report
(
"
adding devanagari features to font
"
)
464
--
465
local
gsubfeatures
=
resources
.
features
.
gsub
466
local
sequences
=
resources
.
sequences
467
local
sharedfeatures
=
tfmdata
.
shared
.
features
468
--
469
gsubfeatures
[
"
dv01
"
]
=
two_defaults
-- reorder matras
470
gsubfeatures
[
"
dv02
"
]
=
two_defaults
-- reorder reph
471
gsubfeatures
[
"
dv03
"
]
=
one_defaults
-- reorder pre base reordering consonants
472
gsubfeatures
[
"
dv04
"
]
=
one_defaults
-- remove joiners
473
--
474
local
reorder_pre_base_reordering_consonants
=
copy
(
sequence_reorder_pre_base_reordering_consonants
)
475
local
reorder_reph
=
copy
(
sequence_reorder_reph
)
476
local
reorder_matras
=
copy
(
sequence_reorder_matras
)
477
local
remove_joiners
=
copy
(
sequence_remove_joiners
)
478 479
local
lastmatch
=
0
480
for
s
=
1
,
#
sequences
do
-- classify chars and make sure basic_shaping_forms come first
481
local
features
=
sequences
[
s
]
.
features
482
if
features
then
483
for
k
,
v
in
next
,
features
do
484
if
k
=
=
"
locl
"
then
485
local
steps
=
sequences
[
s
]
.
steps
486
local
nofsteps
=
sequences
[
s
]
.
nofsteps
487
for
i
=
1
,
nofsteps
do
488
local
step
=
steps
[
i
]
489
local
coverage
=
step
.
coverage
490
if
coverage
then
491
for
k
,
v
in
next
,
pre_mark
do
492
local
locl
=
coverage
[
k
]
493
if
locl
then
494
if
#
locl
>
0
then
--contextchain; KE: is this right?
495
for
j
=
1
,
#
locl
do
496
local
ck
=
locl
[
j
]
497
local
f
=
ck
[
4
]
498
local
chainlookups
=
ck
[
6
]
499
if
chainlookups
then
500
local
chainlookup
=
chainlookups
[
f
]
501
for
j
=
1
,
#
chainlookup
do
502
local
chainstep
=
chainlookup
[
j
]
503
local
steps
=
chainstep
.
steps
504
local
nofsteps
=
chainstep
.
nofsteps
505
for
i
=
1
,
nofsteps
do
506
local
step
=
steps
[
i
]
507
local
coverage
=
step
.
coverage
508
if
coverage
then
509
locl
=
coverage
[
k
]
510
end
511
end
512
end
513
end
514
end
515
end
516
if
locl
then
517
reorder_matras
.
steps
[
1
]
.
coverage
[
locl
]
=
true
518
end
519
end
520
end
521
end
522
end
523
end
524
if
basic_shaping_forms
[
k
]
then
525
lastmatch
=
lastmatch
+
1
526
if
s
~
=
lastmatch
then
527
table
.
insert
(
sequences
,
lastmatch
,
table
.
remove
(
sequences
,
s
)
)
528
end
529
end
530
end
531
end
532
end
533
local
insertindex
=
lastmatch
+
1
534
--
535
if
tfmdata
.
properties
.
language
then
536
dflt_true
[
tfmdata
.
properties
.
language
]
=
true
537
end
538
--
539
insert
(
sequences
,
insertindex
,
reorder_pre_base_reordering_consonants
)
540
insert
(
sequences
,
insertindex
,
reorder_reph
)
541
insert
(
sequences
,
insertindex
,
reorder_matras
)
542
insert
(
sequences
,
insertindex
,
remove_joiners
)
543
--
544
local
blwfcache
=
{
}
545
local
vatucache
=
{
}
546
local
pstfcache
=
{
}
547
local
seqsubset
=
{
}
548
local
rephstep
=
{
549
coverage
=
{
}
-- will be adapted each work
550
}
551
local
devanagari
=
{
552
reph
=
false
,
553
vattu
=
false
,
554
blwfcache
=
blwfcache
,
555
vatucache
=
vatucache
,
556
pstfcache
=
pstfcache
,
557
seqsubset
=
seqsubset
,
558
reorderreph
=
rephstep
,
559 560
}
561
--
562
reorder_reph
.
steps
=
{
rephstep
}
563
--
564
local
pre_base_reordering_consonants
=
{
}
565
reorder_pre_base_reordering_consonants
.
steps
[
1
]
.
coverage
=
pre_base_reordering_consonants
566
--
567
resources
.
devanagari
=
devanagari
568
--
569
for
s
=
1
,
#
sequences
do
570
local
sequence
=
sequences
[
s
]
571
local
steps
=
sequence
.
steps
572
local
nofsteps
=
sequence
.
nofsteps
573
local
features
=
sequence
.
features
574
local
has_rphf
=
features
.
rphf
575
local
has_blwf
=
features
.
blwf
576
local
has_vatu
=
features
.
vatu
577
local
has_pstf
=
features
.
pstf
578
if
has_rphf
and
has_rphf
[
script
]
then
579
devanagari
.
reph
=
true
580
elseif
(
has_blwf
and
has_blwf
[
script
]
)
or
(
has_vatu
and
has_vatu
[
script
]
)
then
581
devanagari
.
vattu
=
true
582
for
i
=
1
,
nofsteps
do
583
local
step
=
steps
[
i
]
584
local
coverage
=
step
.
coverage
585
if
coverage
then
586
for
k
,
v
in
next
,
coverage
do
587
for
h
,
w
in
next
,
halant
do
588
if
v
[
h
]
then
589
if
not
blwfcache
[
k
]
then
590
blwfcache
[
k
]
=
v
591
end
592
end
593
if
has_vatu
and
has_vatu
[
script
]
and
not
vatucache
[
k
]
then
594
vatucache
[
k
]
=
v
595
end
596
end
597
end
598
end
599
end
600
elseif
has_pstf
and
has_pstf
[
script
]
then
601
for
i
=
1
,
nofsteps
do
602
local
step
=
steps
[
i
]
603
local
coverage
=
step
.
coverage
604
if
coverage
then
605
for
k
,
v
in
next
,
coverage
do
606
if
not
pstfcache
[
k
]
then
607
pstfcache
[
k
]
=
v
608
end
609
end
610
for
k
,
v
in
next
,
ra
do
611
local
r
=
coverage
[
k
]
612
if
r
then
613
local
found
=
false
614
if
#
r
>
0
then
-- contextchain; KE: is this right?
615
for
j
=
1
,
#
r
do
616
local
ck
=
r
[
j
]
617
local
f
=
ck
[
4
]
618
local
chainlookups
=
ck
[
6
]
619
if
chainlookups
and
chainlookups
[
f
]
then
--KE: why is check for chainlookups[f] necessacy???
620
local
chainlookup
=
chainlookups
[
f
]
621
for
j
=
1
,
#
chainlookup
do
622
local
chainstep
=
chainlookup
[
j
]
623
local
steps
=
chainstep
.
steps
624
local
nofsteps
=
chainstep
.
nofsteps
625
for
i
=
1
,
nofsteps
do
626
local
step
=
steps
[
i
]
627
local
coverage
=
step
.
coverage
628
if
coverage
then
629
local
h
=
coverage
[
k
]
630
if
h
then
631
for
k
,
v
in
next
,
h
do
632
found
=
v
and
v
.
ligature
633
if
found
then
634
pre_base_reordering_consonants
[
found
]
=
true
635
break
636
end
637
end
638
if
found
then
639
break
640
end
641
end
642
end
643
end
644
end
645
end
646
end
647
else
648
for
k
,
v
in
next
,
r
do
649
found
=
v
and
v
.
ligature
650
if
found
then
651
pre_base_reordering_consonants
[
found
]
=
true
652
break
653
end
654
end
655
end
656
if
found
then
657
break
658
end
659
end
660
end
661
end
662
end
663
end
664
for
kind
,
spec
in
next
,
features
do
665
if
valid
[
kind
]
and
valid_two
(
spec
)
then
666
for
i
=
1
,
nofsteps
do
667
local
step
=
steps
[
i
]
668
local
coverage
=
step
.
coverage
669
if
coverage
then
670
local
reph
,
rephbase
=
false
,
false
671
if
kind
=
=
"
rphf
"
then
672
-- rphf acts on consonant + halant
673
for
k
,
v
in
next
,
ra
do
674
local
r
=
coverage
[
k
]
675
if
r
then
676
rephbase
=
k
677
local
h
=
false
678
if
#
r
>
0
then
--contextchain; KE: is this right?
679
for
j
=
1
,
#
r
do
680
local
ck
=
r
[
j
]
681
local
f
=
ck
[
4
]
682
local
chainlookups
=
ck
[
6
]
683
if
chainlookups
then
684
local
chainlookup
=
chainlookups
[
f
]
685
for
j
=
1
,
#
chainlookup
do
686
local
chainstep
=
chainlookup
[
j
]
687
local
steps
=
chainstep
.
steps
688
local
nofsteps
=
chainstep
.
nofsteps
689
for
i
=
1
,
nofsteps
do
690
local
step
=
steps
[
i
]
691
local
coverage
=
step
.
coverage
692
if
coverage
then
693
local
r
=
coverage
[
k
]
694
if
r
then
695
for
k
,
v
in
next
,
halant
do
696
local
h
=
r
[
k
]
697
if
h
then
698
reph
=
h
.
ligature
or
false
699
break
700
end
701
end
702
if
h
then
703
break
704
end
705
end
706
end
707
end
708
end
709
end
710
end
711
else
712
for
k
,
v
in
next
,
halant
do
713
local
h
=
r
[
k
]
714
if
h
then
715
reph
=
h
.
ligature
or
false
716
break
717
end
718
end
719
end
720
if
reph
then
721
break
722
end
723
end
724
end
725
end
726
seqsubset
[
#
seqsubset
+
1
]
=
{
kind
,
coverage
,
reph
,
rephbase
}
727
end
728
end
729
end
730
if
kind
=
=
"
pref
"
then
731
local
steps
=
sequence
.
steps
732
local
nofsteps
=
sequence
.
nofsteps
733
for
i
=
1
,
nofsteps
do
734
local
step
=
steps
[
i
]
735
local
coverage
=
step
.
coverage
736
if
coverage
then
737
for
k
,
v
in
next
,
halant
do
738
local
h
=
coverage
[
k
]
739
if
h
then
740
local
found
=
false
741
if
#
h
>
0
then
-- contextchain; KE: is this right?
742
for
j
=
1
,
#
h
do
743
local
ck
=
h
[
j
]
744
local
f
=
ck
[
4
]
745
local
chainlookups
=
ck
[
6
]
746
if
chainlookups
then
747
local
chainlookup
=
chainlookups
[
f
]
748
for
j
=
1
,
#
chainlookup
do
749
local
chainstep
=
chainlookup
[
j
]
750
local
steps
=
chainstep
.
steps
751
local
nofsteps
=
chainstep
.
nofsteps
752
for
i
=
1
,
nofsteps
do
753
local
step
=
steps
[
i
]
754
local
coverage
=
step
.
coverage
755
if
coverage
then
756
local
h
=
coverage
[
k
]
757
if
h
then
758
for
k
,
v
in
next
,
h
do
759
found
=
v
and
v
.
ligature
760
if
found
then
761
pre_base_reordering_consonants
[
found
]
=
true
762
break
763
end
764
end
765
if
found
then
766
break
767
end
768
end
769
end
770
end
771
end
772
end
773
end
774
else
775
for
k
,
v
in
next
,
h
do
776
found
=
v
and
v
.
ligature
777
if
found
then
778
pre_base_reordering_consonants
[
found
]
=
true
779
break
780
end
781
end
782
end
783
if
found
then
784
break
785
end
786
end
787
end
788
end
789
end
790
end
791
end
792
end
793
--
794
if
two_defaults
[
script
]
then
795
sharedfeatures
[
"
dv01
"
]
=
true
-- dv01_reorder_matras
796
sharedfeatures
[
"
dv02
"
]
=
true
-- dv02_reorder_reph
797
sharedfeatures
[
"
dv03
"
]
=
true
-- dv03_reorder_pre_base_reordering_consonants
798
sharedfeatures
[
"
dv04
"
]
=
true
-- dv04_remove_joiners
799
elseif
one_defaults
[
script
]
then
800
sharedfeatures
[
"
dv03
"
]
=
true
-- dv03_reorder_pre_base_reordering_consonants
801
sharedfeatures
[
"
dv04
"
]
=
true
-- dv04_remove_joiners
802
end
803
if
script
=
=
"
mlym
"
or
script
=
=
"
taml
"
then
804
devanagari
.
left_matra_before_base
=
true
805
end
806
end
807
end
808
end
809 810
registerotffeature
{
811
name
=
"
devanagari
"
,
812
description
=
"
inject additional features
"
,
813
default
=
true
,
814
initializers
=
{
815
node
=
initializedevanagi
,
816
}
,
817
}
818 819
local
show_syntax_errors
=
false
820 821
local
function
inject_syntax_error
(
head
,
current
,
char
)
822
local
signal
=
copy_node
(
current
)
823
copyinjection
(
signal
,
current
)
824
if
pre_mark
[
char
]
then
825
setchar
(
signal
,
dotted_circle
)
826
else
827
setchar
(
current
,
dotted_circle
)
828
end
829
return
insert_node_after
(
head
,
current
,
signal
)
830
end
831 832
-- hm, this is applied to one character:
833 834
local
function
initialize_one
(
font
,
attr
)
-- we need a proper hook into the dataset initializer
835 836
local
tfmdata
=
fontdata
[
font
]
837
local
datasets
=
otf
.
dataset
(
tfmdata
,
font
,
attr
)
-- don't we know this one?
838
local
devanagaridata
=
datasets
.
devanagari
839 840
if
not
devanagaridata
then
841 842
devanagaridata
=
{
843
reph
=
false
,
844
vattu
=
false
,
845
blwfcache
=
{
}
,
846
vatucache
=
{
}
,
847
pstfcache
=
{
}
,
848
}
849
datasets
.
devanagari
=
devanagaridata
850
local
resources
=
tfmdata
.
resources
851
local
devanagari
=
resources
.
devanagari
852 853
for
s
=
1
,
#
datasets
do
854
local
dataset
=
datasets
[
s
]
855
if
dataset
and
dataset
[
1
]
then
-- value
856
local
kind
=
dataset
[
4
]
857
if
kind
=
=
"
rphf
"
then
858
-- deva
859
devanagaridata
.
reph
=
true
860
elseif
kind
=
=
"
blwf
"
or
kind
=
=
"
vatu
"
then
861
-- deva
862
devanagaridata
.
vattu
=
true
863
-- dev2
864
devanagaridata
.
blwfcache
=
devanagari
.
blwfcache
865
devanagaridata
.
vatucache
=
devanagari
.
vatucache
866
devanagaridata
.
pstfcache
=
devanagari
.
pstfcache
867
end
868
end
869
end
870 871
end
872 873
return
devanagaridata
.
reph
,
devanagaridata
.
vattu
,
devanagaridata
.
blwfcache
,
devanagaridata
.
vatucache
,
devanagaridata
.
pstfcache
874 875
end
876 877
local
function
contextchain
(
contexts
,
n
)
878
local
char
=
getchar
(
n
)
879
for
k
=
1
,
#
contexts
do
880
local
ck
=
contexts
[
k
]
881
local
seq
=
ck
[
3
]
882
local
f
=
ck
[
4
]
883
local
l
=
ck
[
5
]
884
if
(
l
-
f
)
=
=
1
and
seq
[
f
+
1
]
[
char
]
then
885
local
ok
=
true
886
local
c
=
n
887
for
i
=
l
+
1
,
#
seq
do
888
c
=
getnext
(
c
)
889
if
not
c
or
not
seq
[
i
]
[
ischar
(
c
)
]
then
890
ok
=
false
891
break
892
end
893
end
894
if
ok
then
895
c
=
getprev
(
n
)
896
for
i
=
1
,
f
-1
do
897
c
=
getprev
(
c
)
898
if
not
c
or
not
seq
[
f
-
i
]
[
ischar
(
c
)
]
then
899
ok
=
false
900
end
901
end
902
end
903
if
ok
then
904
return
true
905
end
906
end
907
end
908
return
false
909
end
910 911
local
function
order_matras
(
c
)
912
local
cn
=
getnext
(
c
)
913
local
char
=
getchar
(
cn
)
914
while
dependent_vowel
[
char
]
do
915
local
next
=
getnext
(
cn
)
916
local
cc
=
c
917
local
cchar
=
getchar
(
cc
)
918
while
cc
~
=
cn
do
919
if
(
above_mark
[
char
]
and
(
below_mark
[
cchar
]
or
post_mark
[
cchar
]
)
)
or
(
below_mark
[
char
]
and
(
post_mark
[
cchar
]
)
)
then
920
local
prev
,
next
=
getboth
(
cn
)
921
if
next
then
922
setprev
(
next
,
prev
)
923
end
924
-- todo: setlink
925
setnext
(
prev
,
next
)
926
setnext
(
getprev
(
cc
)
,
cn
)
927
setprev
(
cn
,
getprev
(
cc
)
)
928
setnext
(
cn
,
cc
)
929
setprev
(
cc
,
cn
)
930
break
931
end
932
cc
=
getnext
(
cc
)
933
cchar
=
getchar
(
cc
)
934
end
935
cn
=
next
936
char
=
getchar
(
cn
)
937
end
938
end
939 940
local
function
reorder_one
(
head
,
start
,
stop
,
font
,
attr
,
nbspaces
)
941 942
local
reph
,
vattu
,
blwfcache
,
vatucache
,
pstfcache
=
initialize_one
(
font
,
attr
)
-- todo: a hash[font]
943 944
local
devanagari
=
fontdata
[
font
]
.
resources
.
devanagari
945
local
current
=
start
946
local
n
=
getnext
(
start
)
947
local
base
=
nil
948
local
firstcons
=
nil
949
local
lastcons
=
nil
950
local
basefound
=
false
951 952
if
reph
and
ra
[
getchar
(
start
)
]
and
halant
[
getchar
(
n
)
]
then
953
-- if syllable starts with Ra + H and script has 'Reph' then exclude Reph
954
-- from candidates for base consonants
955
if
n
=
=
stop
then
956
return
head
,
stop
,
nbspaces
957
end
958
if
getchar
(
getnext
(
n
)
)
=
=
c_zwj
then
959
current
=
start
960
else
961
current
=
getnext
(
n
)
962
setstate
(
start
,
s_rphf
)
963
end
964
end
965 966
if
getchar
(
current
)
=
=
c_nbsp
then
967
-- Stand Alone cluster
968
if
current
=
=
stop
then
969
stop
=
getprev
(
stop
)
970
head
=
remove_node
(
head
,
current
)
971
flush_node
(
current
)
972
return
head
,
stop
,
nbspaces
973
else
974
nbspaces
=
nbspaces
+
1
975
base
=
current
976
firstcons
=
current
977
lastcons
=
current
978
current
=
getnext
(
current
)
979
if
current
~
=
stop
then
980
local
char
=
getchar
(
current
)
981
if
nukta
[
char
]
then
982
current
=
getnext
(
current
)
983
char
=
getchar
(
current
)
984
end
985
if
char
=
=
c_zwj
and
current
~
=
stop
then
986
local
next
=
getnext
(
current
)
987
if
next
~
=
stop
and
halant
[
getchar
(
next
)
]
then
988
current
=
next
989
next
=
getnext
(
current
)
990
local
tmp
=
next
and
getnext
(
next
)
or
nil
-- needs checking
991
local
changestop
=
next
=
=
stop
992
local
tempcurrent
=
copy_node
(
next
)
993
copyinjection
(
tempcurrent
,
next
)
994
local
nextcurrent
=
copy_node
(
current
)
995
copyinjection
(
nextcurrent
,
current
)
-- KE: necessary? HH: probably not as positioning comes later and we rawget/set
996
setlink
(
tempcurrent
,
nextcurrent
)
997
setstate
(
tempcurrent
,
s_blwf
)
998
tempcurrent
=
processcharacters
(
tempcurrent
,
font
)
999
setstate
(
tempcurrent
,
unsetvalue
)
1000
if
getchar
(
next
)
=
=
getchar
(
tempcurrent
)
then
1001
flush_list
(
tempcurrent
)
1002
if
show_syntax_errors
then
1003
head
,
current
=
inject_syntax_error
(
head
,
current
,
char
)
1004
end
1005
else
1006
setchar
(
current
,
getchar
(
tempcurrent
)
)
-- we assumes that the result of blwf consists of one node
1007
local
freenode
=
getnext
(
current
)
1008
setlink
(
current
,
tmp
)
1009
flush_node
(
freenode
)
1010
flush_list
(
tempcurrent
)
1011
if
changestop
then
1012
stop
=
current
1013
end
1014
end
1015
end
1016
end
1017
end
1018
end
1019
end
1020 1021
while
not
basefound
do
1022
-- find base consonant
1023
local
char
=
getchar
(
current
)
1024
if
consonant
[
char
]
then
1025
setstate
(
current
,
s_half
)
1026
if
not
firstcons
then
1027
firstcons
=
current
1028
end
1029
lastcons
=
current
1030
if
not
base
then
1031
base
=
current
1032
elseif
blwfcache
[
char
]
then
1033
-- consonant has below-base form
1034
setstate
(
current
,
s_blwf
)
1035
elseif
pstfcache
[
char
]
then
1036
-- consonant has post-base form
1037
setstate
(
current
,
s_pstf
)
1038
else
1039
base
=
current
1040
end
1041
end
1042
basefound
=
current
=
=
stop
1043
current
=
getnext
(
current
)
1044
end
1045 1046
if
base
~
=
lastcons
then
1047
-- if base consonant is not last one then move halant from base consonant to last one
1048
local
np
=
base
1049
local
n
=
getnext
(
base
)
1050
local
ch
=
getchar
(
n
)
1051
if
nukta
[
ch
]
then
1052
np
=
n
1053
n
=
getnext
(
n
)
1054
ch
=
getchar
(
n
)
1055
end
1056
if
halant
[
ch
]
then
1057
if
lastcons
~
=
stop
then
1058
local
ln
=
getnext
(
lastcons
)
1059
if
nukta
[
getchar
(
ln
)
]
then
1060
lastcons
=
ln
1061
end
1062
end
1063
-- local np = getprev(n)
1064
local
nn
=
getnext
(
n
)
1065
local
ln
=
getnext
(
lastcons
)
-- what if lastcons is nn ?
1066
setlink
(
np
,
nn
)
1067
setnext
(
lastcons
,
n
)
1068
if
ln
then
1069
setprev
(
ln
,
n
)
1070
end
1071
setnext
(
n
,
ln
)
1072
setprev
(
n
,
lastcons
)
1073
if
lastcons
=
=
stop
then
1074
stop
=
n
1075
end
1076
end
1077
end
1078 1079
n
=
getnext
(
start
)
1080
if
n
~
=
stop
and
ra
[
getchar
(
start
)
]
and
halant
[
getchar
(
n
)
]
and
not
zw_char
[
getchar
(
getnext
(
n
)
)
]
then
1081
-- if syllable starts with Ra + H then move this combination so that it follows either:
1082
-- the post-base 'matra' (if any) or the base consonant
1083
local
matra
=
base
1084
if
base
~
=
stop
then
1085
local
next
=
getnext
(
base
)
1086
if
dependent_vowel
[
getchar
(
next
)
]
then
1087
matra
=
next
1088
end
1089
end
1090
-- [sp][start][n][nn] [matra|base][?]
1091
-- [matra|base][start] [n][?] [sp][nn]
1092
local
sp
=
getprev
(
start
)
1093
local
nn
=
getnext
(
n
)
1094
local
mn
=
getnext
(
matra
)
1095
setlink
(
sp
,
nn
)
1096
setlink
(
matra
,
start
)
1097
setlink
(
n
,
mn
)
1098
if
head
=
=
start
then
1099
head
=
nn
1100
end
1101
start
=
nn
1102
if
matra
=
=
stop
then
1103
stop
=
n
1104
end
1105
end
1106 1107
local
current
=
start
1108
while
current
~
=
stop
do
1109
local
next
=
getnext
(
current
)
1110
if
next
~
=
stop
and
halant
[
getchar
(
next
)
]
and
getchar
(
getnext
(
next
)
)
=
=
c_zwnj
then
1111
setstate
(
current
,
unsetvalue
)
1112
end
1113
current
=
next
1114
end
1115 1116
if
base
~
=
stop
and
getstate
(
base
)
then
-- state can also be init
1117
local
next
=
getnext
(
base
)
1118
if
halant
[
getchar
(
next
)
]
and
not
(
next
~
=
stop
and
getchar
(
getnext
(
next
)
)
=
=
c_zwj
)
then
1119
setstate
(
base
,
unsetvalue
)
1120
end
1121
end
1122 1123
-- split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable.
1124
-- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification
1125 1126
local
current
,
allreordered
,
moved
=
start
,
false
,
{
[
base
]
=
true
}
1127
local
a
,
b
,
p
,
bn
=
base
,
base
,
base
,
getnext
(
base
)
1128
if
base
~
=
stop
and
nukta
[
getchar
(
bn
)
]
then
1129
a
,
b
,
p
=
bn
,
bn
,
bn
1130
end
1131
while
not
allreordered
do
1132
-- current is always consonant
1133
local
c
=
current
1134
local
n
=
getnext
(
current
)
1135
local
l
=
nil
-- used ?
1136
if
c
~
=
stop
then
1137
local
ch
=
getchar
(
n
)
1138
if
nukta
[
ch
]
then
1139
c
=
n
1140
n
=
getnext
(
n
)
1141
ch
=
getchar
(
n
)
1142
end
1143
if
c
~
=
stop
then
1144
if
halant
[
ch
]
then
1145
c
=
n
1146
n
=
getnext
(
n
)
1147
ch
=
getchar
(
n
)
1148
end
1149 1150
local
tpm
=
twopart_mark
[
ch
]
1151
while
tpm
do
1152
local
extra
=
copy_node
(
n
)
1153
copyinjection
(
extra
,
n
)
1154
ch
=
tpm
[
1
]
1155
setchar
(
n
,
ch
)
1156
setchar
(
extra
,
tpm
[
2
]
)
1157
head
=
insert_node_after
(
head
,
current
,
extra
)
1158
tpm
=
twopart_mark
[
ch
]
1159
end
1160
while
c
~
=
stop
and
dependent_vowel
[
ch
]
do
1161
c
=
n
1162
n
=
getnext
(
n
)
1163
ch
=
getchar
(
n
)
1164
end
1165
if
c
~
=
stop
then
1166
if
vowel_modifier
[
ch
]
then
1167
c
=
n
1168
n
=
getnext
(
n
)
1169
ch
=
getchar
(
n
)
1170
end
1171
if
c
~
=
stop
and
stress_tone_mark
[
ch
]
then
1172
c
=
n
1173
n
=
getnext
(
n
)
1174
end
1175
end
1176
end
1177
end
1178
local
bp
=
getprev
(
firstcons
)
1179
local
cn
=
getnext
(
current
)
1180
local
last
=
getnext
(
c
)
1181
while
cn
~
=
last
do
1182
-- move pre-base matras...
1183
if
pre_mark
[
getchar
(
cn
)
]
then
1184
if
devanagari
.
left_matra_before_base
then
1185
local
prev
,
next
=
getboth
(
cn
)
1186
setlink
(
prev
,
next
)
1187
if
cn
=
=
stop
then
1188
stop
=
getprev
(
cn
)
1189
end
1190
if
base
=
=
start
then
1191
if
head
=
=
start
then
1192
head
=
cn
1193
end
1194
start
=
cn
1195
end
1196
setlink
(
getprev
(
base
)
,
cn
)
1197
setlink
(
cn
,
base
)
1198
-- setlink(getprev(base),cn,base) -- maybe
1199
cn
=
next
1200
else
1201
if
bp
then
1202
setnext
(
bp
,
cn
)
1203
end
1204
local
prev
,
next
=
getboth
(
cn
)
1205
if
next
then
1206
setprev
(
next
,
prev
)
1207
end
1208
setnext
(
prev
,
next
)
1209
if
cn
=
=
stop
then
1210
stop
=
prev
1211
end
1212
setprev
(
cn
,
bp
)
1213
setlink
(
cn
,
firstcons
)
1214
if
firstcons
=
=
start
then
1215
if
head
=
=
start
then
1216
head
=
cn
1217
end
1218
start
=
cn
1219
end
1220
cn
=
next
1221
end
1222
elseif
current
~
=
base
and
dependent_vowel
[
getchar
(
cn
)
]
then
1223
local
prev
,
next
=
getboth
(
cn
)
1224
if
next
then
1225
setprev
(
next
,
prev
)
1226
end
1227
setnext
(
prev
,
next
)
1228
if
cn
=
=
stop
then
1229
stop
=
prev
1230
end
1231
setlink
(
b
,
cn
,
getnext
(
b
)
)
1232
order_matras
(
cn
)
1233
cn
=
next
1234
elseif
current
=
=
base
and
dependent_vowel
[
getchar
(
cn
)
]
then
1235
local
cnn
=
getnext
(
cn
)
1236
order_matras
(
cn
)
1237
cn
=
cnn
1238
while
cn
~
=
last
and
dependent_vowel
[
getchar
(
cn
)
]
do
1239
cn
=
getnext
(
cn
)
1240
end
1241
else
1242
cn
=
getnext
(
cn
)
1243
end
1244
end
1245
allreordered
=
c
=
=
stop
1246
current
=
getnext
(
c
)
1247
end
1248 1249
if
reph
or
vattu
then
1250
local
current
,
cns
=
start
,
nil
1251
while
current
~
=
stop
do
1252
local
c
=
current
1253
local
n
=
getnext
(
current
)
1254
if
ra
[
getchar
(
current
)
]
and
halant
[
getchar
(
n
)
]
then
1255
c
=
n
1256
n
=
getnext
(
n
)
1257
local
b
,
bn
=
base
,
base
1258
while
bn
~
=
stop
do
1259
local
next
=
getnext
(
bn
)
1260
if
dependent_vowel
[
getchar
(
next
)
]
then
1261
b
=
next
1262
end
1263
bn
=
next
1264
end
1265
if
getstate
(
current
,
s_rphf
)
then
1266
-- position Reph (Ra + H) after post-base 'matra' (if any) since these
1267
-- become marks on the 'matra', not on the base glyph
1268
if
b
~
=
current
then
1269
if
current
=
=
start
then
1270
if
head
=
=
start
then
1271
head
=
n
1272
end
1273
start
=
n
1274
end
1275
if
b
=
=
stop
then
1276
stop
=
c
1277
end
1278
local
prev
=
getprev
(
current
)
1279
setlink
(
prev
,
n
)
1280
local
next
=
getnext
(
b
)
1281
setlink
(
c
,
next
)
1282
setlink
(
b
,
current
)
1283
end
1284
elseif
cns
and
getnext
(
cns
)
~
=
current
then
-- todo: optimize next
1285
-- position below-base Ra (vattu) following the consonants on which it is placed (either the base consonant or one of the pre-base consonants)
1286
local
cp
=
getprev
(
current
)
1287
local
cnsn
=
getnext
(
cns
)
1288
setlink
(
cp
,
n
)
1289
setlink
(
cns
,
current
)
-- cns ?
1290
setlink
(
c
,
cnsn
)
1291
if
c
=
=
stop
then
1292
stop
=
cp
1293
break
1294
end
1295
current
=
getprev
(
n
)
1296
end
1297
else
1298
local
char
=
getchar
(
current
)
1299
if
consonant
[
char
]
then
1300
cns
=
current
1301
local
next
=
getnext
(
cns
)
1302
if
halant
[
getchar
(
next
)
]
then
1303
cns
=
next
1304
end
1305
if
not
vatucache
[
char
]
then
1306
next
=
getnext
(
cns
)
1307
while
dependent_vowel
[
getchar
(
next
)
]
do
1308
cns
=
next
1309
next
=
getnext
(
cns
)
1310
end
1311
end
1312
elseif
char
=
=
c_nbsp
then
1313
nbspaces
=
nbspaces
+
1
1314
cns
=
current
1315
local
next
=
getnext
(
cns
)
1316
if
halant
[
getchar
(
next
)
]
then
1317
cns
=
next
1318
end
1319
if
not
vatucache
[
char
]
then
1320
next
=
getnext
(
cns
)
1321
while
dependent_vowel
[
getchar
(
next
)
]
do
1322
cns
=
next
1323
next
=
getnext
(
cns
)
1324
end
1325
end
1326
end
1327
end
1328
current
=
getnext
(
current
)
1329
end
1330
end
1331 1332
if
getchar
(
base
)
=
=
c_nbsp
then
1333
nbspaces
=
nbspaces
-
1
1334
if
base
=
=
stop
then
1335
stop
=
getprev
(
stop
)
1336
end
1337
head
=
remove_node
(
head
,
base
)
1338
flush_node
(
base
)
1339
end
1340 1341
return
head
,
stop
,
nbspaces
1342
end
1343 1344
-- If a pre-base matra character had been reordered before applying basic features,
1345
-- the glyph can be moved closer to the main consonant based on whether half-forms had been formed.
1346
-- Actual position for the matra is defined as “after last standalone halant glyph,
1347
-- after initial matra position and before the main consonant”.
1348
-- If ZWJ or ZWNJ follow this halant, position is moved after it.
1349 1350
-- so we break out ... this is only done for the first 'word' (if we feed words we can as
1351
-- well test for non glyph.
1352 1353
function
handlers
.
devanagari_reorder_matras
(
head
,
start
)
-- no leak
1354
local
current
=
start
-- we could cache attributes here
1355
local
startfont
=
getfont
(
start
)
1356
local
startattr
=
getprop
(
start
,
a_syllabe
)
1357
while
current
do
1358
local
char
=
ischar
(
current
,
startfont
)
1359
local
next
=
getnext
(
current
)
1360
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1361
if
halant
[
char
]
then
-- state can also be init
1362
if
next
then
1363
local
char
=
ischar
(
next
,
startfont
)
1364
if
char
and
zw_char
[
char
]
and
getprop
(
next
,
a_syllabe
)
=
=
startattr
then
1365
current
=
next
1366
next
=
getnext
(
current
)
1367
end
1368
end
1369
-- can be optimzied
1370
local
startnext
=
getnext
(
start
)
1371
head
=
remove_node
(
head
,
start
)
1372
setlink
(
start
,
next
)
1373
setlink
(
current
,
start
)
1374
-- setlink(current,start,next) -- maybe
1375
start
=
startnext
1376
break
1377
-- elseif consonant[char] and (not getstate(current) or getstate(current,s_init) then
1378
-- startnext = getnext(start)
1379
-- head = remove_node(head,start)
1380
-- if current == head then
1381
-- setlink(start,current)
1382
-- head = start
1383
-- else
1384
-- setlink(getprev(current),start)
1385
-- setlink(start,current)
1386
-- end
1387
-- start = startnext
1388
-- break
1389
end
1390
else
1391
break
1392
end
1393
current
=
next
1394
end
1395
return
head
,
start
,
true
1396
end
1397 1398
-- Reph’s original position is always at the beginning of the syllable, (i.e. it is
1399
-- not reordered at the character reordering stage). However, it will be reordered
1400
-- according to the basic-forms shaping results. Possible positions for reph,
1401
-- depending on the script, are; after main, before post-base consonant forms, and
1402
-- after post-base consonant forms.
1403 1404
-- In Devanagari reph has reordering position 'before postscript' and dev2 only
1405
-- follows step 2, 4, and 6.
1406 1407
local
rephbase
=
{
}
1408 1409
function
handlers
.
devanagari_reorder_reph
(
head
,
start
)
1410
local
current
=
getnext
(
start
)
1411
local
startnext
=
nil
1412
local
startprev
=
nil
1413
local
startfont
=
getfont
(
start
)
1414
local
startattr
=
getprop
(
start
,
a_syllabe
)
1415
--
1416
::
step_1
::
1417
--
1418
-- If reph should be positioned after post-base consonant forms, proceed to step 5.
1419
--
1420
local
char
=
ischar
(
start
,
startfont
)
1421
local
rephbase
=
rephbase
[
startfont
]
[
char
]
1422
if
char
and
after_subscript
[
rephbase
]
then
1423
goto
step_5
1424
end
1425
--
1426
::
step_2
::
1427
--
1428
-- If the reph repositioning class is not after post-base: target position is after
1429
-- the first explicit halant glyph between the first post-reph consonant and last
1430
-- main consonant. If ZWJ or ZWNJ are following this halant, position is moved after
1431
-- it. If such position is found, this is the target position. Otherwise, proceed to
1432
-- the next step. Note: in old-implementation fonts, where classifications were
1433
-- fixed in shaping engine, there was no case where reph position will be found on
1434
-- this step.
1435
--
1436
if
char
and
not
after_postscript
[
rephbase
]
then
1437
while
current
do
1438
local
char
=
ischar
(
current
,
startfont
)
1439
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1440
if
halant
[
char
]
then
1441
local
next
=
getnext
(
current
)
1442
if
next
then
1443
local
nextchar
=
ischar
(
next
,
startfont
)
1444
if
nextchar
and
zw_char
[
nextchar
]
and
getprop
(
next
,
a_syllabe
)
=
=
startattr
then
1445
current
=
next
1446
next
=
getnext
(
current
)
1447
end
1448
end
1449
startnext
=
getnext
(
start
)
1450
head
=
remove_node
(
head
,
start
)
1451
setlink
(
start
,
next
)
1452
setlink
(
current
,
start
)
1453
-- setlink(current,start,next) -- maybe
1454
start
=
startnext
1455
startattr
=
getprop
(
start
,
a_syllabe
)
1456
break
1457
end
1458
current
=
getnext
(
current
)
1459
else
1460
break
1461
end
1462
end
1463
end
1464
--
1465
::
step_3
::
1466
--
1467
-- If reph should be repositioned after the main consonant: find the first consonant
1468
-- not ligated with main, or find the first consonant that is not a potential
1469
-- pre-base reordering Ra.
1470
--
1471
if
not
startnext
then
1472
if
char
and
after_main
[
rephbase
]
then
1473
current
=
getnext
(
start
)
1474
while
current
do
1475
local
char
=
ischar
(
current
,
startfont
)
1476
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1477
if
consonant
[
char
]
and
not
getstate
(
current
,
s_pref
)
then
1478
startnext
=
getnext
(
start
)
1479
head
=
remove_node
(
head
,
start
)
1480
setlink
(
current
,
start
)
1481
setlink
(
start
,
getnext
(
current
)
)
1482
-- setlink(current,start,getnext(current)) -- maybe
1483
start
=
startnext
1484
startattr
=
getprop
(
start
,
a_syllabe
)
1485
break
1486
end
1487
current
=
getnext
(
current
)
1488
else
1489
break
1490
end
1491
end
1492
end
1493
end
1494
--
1495
::
step_4
::
1496
--
1497
-- If reph should be positioned before post-base consonant, find first post-base
1498
-- classified consonant not ligated with main. If no consonant is found, the target
1499
-- position should be before the first matra, syllable modifier sign or vedic sign.
1500
--
1501
if
not
startnext
then
1502
if
char
and
before_postscript
[
rephbase
]
then
1503
current
=
getnext
(
start
)
1504
local
c
=
nil
1505
while
current
do
1506
local
char
=
ischar
(
current
,
startfont
)
1507
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1508
if
getstate
(
current
,
s_pstf
)
then
-- post-base
1509
startnext
=
getnext
(
start
)
1510
head
=
remove_node
(
head
,
start
)
1511
setlink
(
getprev
(
current
)
,
start
)
1512
setlink
(
start
,
current
)
1513
-- setlink(getprev(current),start,current) -- maybe
1514
start
=
startnext
1515
startattr
=
getprop
(
start
,
a_syllabe
)
1516
break
1517
elseif
not
c
and
(
vowel_modifier
[
char
]
or
stress_tone_mark
[
char
]
)
then
1518
c
=
current
1519
end
1520
current
=
getnext
(
current
)
1521
else
1522
if
c
then
1523
startnext
=
getnext
(
start
)
1524
head
=
remove_node
(
head
,
start
)
1525
setlink
(
getprev
(
c
)
,
start
)
1526
setlink
(
start
,
c
)
1527
-- setlink(getprev(c),start,c) -- maybe
1528
start
=
startnext
1529
startattr
=
getprop
(
start
,
a_syllabe
)
1530
end
1531
break
1532
end
1533
end
1534
end
1535
end
1536
--
1537
::
step_5
::
1538
--
1539
-- If no consonant is found in steps 3 or 4, move reph to a position immediately
1540
-- before the first post-base matra, syllable modifier sign or vedic sign that has a
1541
-- reordering class after the intended reph position. For example, if the reordering
1542
-- position for reph is post-main, it will skip above-base matras that also have a
1543
-- post-main position.
1544
--
1545
if
not
startnext
then
1546
current
=
getnext
(
start
)
1547
local
c
=
nil
1548
while
current
do
1549
local
char
=
ischar
(
current
,
startfont
)
1550
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1551
local
state
=
getstate
(
current
)
1552
if
before_subscript
[
rephbase
]
and
(
state
=
=
s_blwf
or
state
=
=
s_pstf
)
then
1553
c
=
current
1554
elseif
after_subscript
[
rephbase
]
and
(
state
=
=
s_pstf
)
then
1555
c
=
current
1556
end
1557
current
=
getnext
(
current
)
1558
else
1559
break
1560
end
1561
end
1562
-- here we can loose the old start node: maybe best split cases
1563
if
c
then
1564
startnext
=
getnext
(
start
)
1565
head
=
remove_node
(
head
,
start
)
1566
setlink
(
getprev
(
c
)
,
start
)
1567
setlink
(
start
,
c
)
1568
-- setlink(getprev(c),start,c) -- maybe
1569
-- end
1570
start
=
startnext
1571
startattr
=
getprop
(
start
,
a_syllabe
)
1572
end
1573
end
1574
--
1575
::
step_6
::
1576
--
1577
-- Otherwise, reorder reph to the end of the syllable.
1578
--
1579
if
not
startnext
then
1580
current
=
start
1581
local
next
=
getnext
(
current
)
1582
while
next
do
1583
local
nextchar
=
ischar
(
next
,
startfont
)
1584
if
nextchar
and
getprop
(
next
,
a_syllabe
)
=
=
startattr
then
1585
current
=
next
1586
next
=
getnext
(
current
)
1587
else
1588
break
1589
end
1590
end
1591
if
start
~
=
current
then
1592
startnext
=
getnext
(
start
)
1593
head
=
remove_node
(
head
,
start
)
1594
setlink
(
start
,
getnext
(
current
)
)
1595
setlink
(
current
,
start
)
1596
-- setlink(current,start,getnext(current)) -- maybe
1597
start
=
startnext
1598
end
1599
end
1600
--
1601
return
head
,
start
,
true
1602
end
1603 1604
-- If a pre-base reordering consonant is found, reorder it according to the following rules:
1605
--
1606
-- 1 Only reorder a glyph produced by substitution during application of the feature. (Note
1607
-- that a font may shape a Ra consonant with the feature generally but block it in certain
1608
-- contexts.)
1609
-- 2 Try to find a target position the same way as for pre-base matra. If it is found, reorder
1610
-- pre-base consonant glyph.
1611
-- 3 If position is not found, reorder immediately before main consonant.
1612 1613
-- Here we implement a few handlers:
1614
--
1615
-- function(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
1616
-- return head, start, done
1617
-- end
1618 1619
local
reordered_pre_base_reordering_consonants
=
{
}
-- shared ? not reset ?
1620 1621
function
handlers
.
devanagari_reorder_pre_base_reordering_consonants
(
head
,
start
)
1622
if
reordered_pre_base_reordering_consonants
[
start
]
then
1623
return
head
,
start
,
true
1624
end
1625
local
current
=
start
-- we could cache attributes here
1626
local
startfont
=
getfont
(
start
)
1627
local
startattr
=
getprop
(
start
,
a_syllabe
)
1628
while
current
do
1629
local
char
=
ischar
(
current
,
startfont
)
1630
local
next
=
getnext
(
current
)
1631
if
char
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
then
1632
if
halant
[
char
]
then
-- state can also be init
1633
if
next
then
1634
local
char
=
ischar
(
next
,
startfont
)
1635
if
char
and
zw_char
[
char
]
and
getprop
(
next
,
a_syllabe
)
=
=
startattr
then
1636
current
=
next
1637
next
=
getnext
(
current
)
1638
end
1639
end
1640
-- can be optimzied
1641
local
startnext
=
getnext
(
start
)
1642
head
=
remove_node
(
head
,
start
)
1643
setlink
(
start
,
next
)
1644
setlink
(
current
,
start
)
1645
-- setlink(current,start,next) -- maybe
1646
reordered_pre_base_reordering_consonants
[
start
]
=
true
1647
start
=
startnext
1648
return
head
,
start
,
true
1649
-- elseif consonant[char] and (not getstate(current) or getstate(current,s_init)) then
1650
-- startnext = getnext(start)
1651
-- head = remove_node(head,start)
1652
-- if current == head then
1653
-- setlink(start,current)
1654
-- head = start
1655
-- else
1656
-- setlink(getprev(current),start)
1657
-- setlink(start,current)
1658
-- end
1659
-- start = startnext
1660
-- break
1661
end
1662
else
1663
break
1664
end
1665
current
=
next
1666
end
1667 1668
local
startattr
=
getprop
(
start
,
a_syllabe
)
1669
local
current
=
getprev
(
start
)
1670
while
current
and
getprop
(
current
,
a_syllabe
)
=
=
startattr
do
1671
local
char
=
ischar
(
current
)
1672
if
(
not
dependent_vowel
[
char
]
and
(
not
getstate
(
current
)
or
getstate
(
current
,
s_init
)
)
)
then
1673
startnext
=
getnext
(
start
)
1674
head
=
remove_node
(
head
,
start
)
1675
if
current
=
=
head
then
1676
setlink
(
start
,
current
)
1677
head
=
start
1678
else
1679
setlink
(
getprev
(
current
)
,
start
)
1680
setlink
(
start
,
current
)
1681
end
1682
reordered_pre_base_reordering_consonants
[
start
]
=
true
1683
start
=
startnext
1684
break
1685
end
1686
current
=
getprev
(
current
)
1687
end
1688 1689
return
head
,
start
,
true
1690
end
1691 1692
function
handlers
.
devanagari_remove_joiners
(
head
,
start
,
kind
,
lookupname
,
replacement
)
1693
local
stop
=
getnext
(
start
)
1694
local
font
=
getfont
(
start
)
1695
local
last
=
start
1696
while
stop
do
1697
local
char
=
ischar
(
stop
,
font
)
1698
if
char
and
(
char
=
=
c_zwnj
or
char
=
=
c_zwj
)
then
1699
last
=
stop
1700
stop
=
getnext
(
stop
)
1701
else
1702
break
1703
end
1704
end
1705
local
prev
=
getprev
(
start
)
1706
if
stop
then
1707
setnext
(
last
)
1708
setlink
(
prev
,
stop
)
1709
elseif
prev
then
1710
setnext
(
prev
)
1711
end
1712
if
head
=
=
start
then
1713
head
=
stop
1714
end
1715
flush_list
(
start
)
1716
return
head
,
stop
,
true
1717
end
1718 1719
local
function
initialize_two
(
font
,
attr
)
1720 1721
local
devanagari
=
fontdata
[
font
]
.
resources
.
devanagari
1722 1723
if
devanagari
then
1724
return
devanagari
.
seqsubset
or
{
}
,
devanagari
.
reorderreph
or
{
}
1725
else
1726
return
{
}
,
{
}
1727
end
1728 1729
end
1730 1731
-- this one will be merged into the caller: it saves a call, but we will then make function
1732
-- of the actions
1733 1734
local
function
reorder_two
(
head
,
start
,
stop
,
font
,
attr
,
nbspaces
)
-- maybe do a pass over (determine stop in sweep)
1735
local
seqsubset
,
reorderreph
=
initialize_two
(
font
,
attr
)
1736 1737
local
halfpos
=
nil
1738
local
basepos
=
nil
1739
local
subpos
=
nil
1740
local
postpos
=
nil
1741 1742
reorderreph
.
coverage
=
{
}
1743
rephbase
[
font
]
=
{
}
1744 1745
for
i
=
1
,
#
seqsubset
do
1746 1747
-- this can be done more efficient, the last test and less getnext
1748 1749
local
subset
=
seqsubset
[
i
]
1750
local
kind
=
subset
[
1
]
1751
local
lookupcache
=
subset
[
2
]
1752
if
kind
=
=
"
rphf
"
then
1753
reorderreph
.
coverage
[
subset
[
3
]
]
=
true
-- neat
1754
rephbase
[
font
]
[
subset
[
3
]
]
=
subset
[
4
]
1755
local
current
=
start
1756
local
last
=
getnext
(
stop
)
1757
while
current
~
=
last
do
1758
if
current
~
=
stop
then
1759
local
c
=
getchar
(
current
)
1760
local
found
=
lookupcache
[
c
]
1761
if
found
then
1762
local
next
=
getnext
(
current
)
1763
if
found
[
getchar
(
next
)
]
or
contextchain
(
found
,
next
)
then
--above-base: rphf Consonant + Halant
1764
local
afternext
=
next
~
=
stop
and
getnext
(
next
)
1765
if
afternext
and
zw_char
[
getchar
(
afternext
)
]
then
-- ZWJ and ZWNJ prevent creation of reph
1766
current
=
afternext
-- getnext(next)
1767
elseif
current
=
=
start
then
1768
setstate
(
current
,
s_rphf
)
1769
current
=
next
1770
else
1771
current
=
next
1772
end
1773
end
1774
end
1775
end
1776
current
=
getnext
(
current
)
1777
end
1778
elseif
kind
=
=
"
pref
"
then
1779
local
current
=
start
1780
local
last
=
getnext
(
stop
)
1781
while
current
~
=
last
do
1782
if
current
~
=
stop
then
1783
local
c
=
getchar
(
current
)
1784
local
found
=
lookupcache
[
c
]
1785
if
found
then
-- pre-base: pref Halant + Consonant
1786
local
next
=
getnext
(
current
)
1787
if
found
[
getchar
(
next
)
]
or
contextchain
(
found
,
next
)
then
1788
if
(
not
getstate
(
current
)
and
not
getstate
(
next
)
)
then
--KE: state can also be init...
1789
setstate
(
current
,
s_pref
)
1790
setstate
(
next
,
s_pref
)
1791
current
=
next
1792
end
1793
end
1794
end
1795
end
1796
current
=
getnext
(
current
)
1797
end
1798
elseif
kind
=
=
"
half
"
then
-- half forms: half / Consonant + Halant
1799
local
current
=
start
1800
local
last
=
getnext
(
stop
)
1801
while
current
~
=
last
do
1802
if
current
~
=
stop
then
1803
local
c
=
getchar
(
current
)
1804
local
found
=
lookupcache
[
c
]
1805
if
found
then
1806
local
next
=
getnext
(
current
)
1807
if
found
[
getchar
(
next
)
]
or
contextchain
(
found
,
next
)
then
1808
if
next
~
=
stop
and
getchar
(
getnext
(
next
)
)
=
=
c_zwnj
then
-- zwnj prevent creation of half
1809
current
=
next
1810
elseif
(
not
getstate
(
current
)
)
then
--KE: state can also be init...
1811
setstate
(
current
,
s_half
)
1812
if
not
halfpos
then
1813
halfpos
=
current
1814
end
1815
end
1816
current
=
getnext
(
current
)
1817
end
1818
end
1819
end
1820
current
=
getnext
(
current
)
1821
end
1822
elseif
kind
=
=
"
blwf
"
or
kind
=
=
"
vatu
"
then
-- below-base: blwf / Halant + Consonant
1823
local
current
=
start
1824
local
last
=
getnext
(
stop
)
1825
while
current
~
=
last
do
1826
if
current
~
=
stop
then
1827
local
c
=
getchar
(
current
)
1828
local
found
=
lookupcache
[
c
]
1829
if
found
then
1830
local
next
=
getnext
(
current
)
1831
if
found
[
getchar
(
next
)
]
or
contextchain
(
found
,
next
)
then
1832
if
(
not
getstate
(
current
)
and
not
getstate
(
next
)
)
then
--KE: state can also be init...
1833
setstate
(
current
,
s_blwf
)
1834
setstate
(
next
,
s_blwf
)
1835
current
=
next
1836
subpos
=
current
1837
end
1838
end
1839
end
1840
end
1841
current
=
getnext
(
current
)
1842
end
1843
elseif
kind
=
=
"
pstf
"
then
-- post-base: pstf / Halant + Consonant
1844
local
current
=
start
1845
local
last
=
getnext
(
stop
)
1846
while
current
~
=
last
do
1847
if
current
~
=
stop
then
1848
local
c
=
getchar
(
current
)
1849
local
found
=
lookupcache
[
c
]
1850
if
found
then
1851
local
next
=
getnext
(
current
)
1852
if
found
[
getchar
(
next
)
]
or
contextchain
(
found
,
next
)
then
1853
if
(
not
getstate
(
current
)
and
not
getstate
(
next
)
)
then
-- KE: state can also be init...
1854
setstate
(
current
,
s_pstf
)
1855
setstate
(
next
,
s_pstf
)
1856
current
=
next
1857
postpos
=
current
1858
end
1859
end
1860
end
1861
end
1862
current
=
getnext
(
current
)
1863
end
1864
end
1865
end
1866 1867
local
current
,
base
,
firstcons
=
start
,
nil
,
nil
1868 1869
if
getstate
(
start
,
s_rphf
)
then
1870
-- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants
1871
current
=
getnext
(
getnext
(
start
)
)
1872
end
1873 1874
if
current
~
=
getnext
(
stop
)
and
getchar
(
current
)
=
=
c_nbsp
then
1875
-- Stand Alone cluster
1876
if
current
=
=
stop
then
1877
stop
=
getprev
(
stop
)
1878
head
=
remove_node
(
head
,
current
)
1879
flush_node
(
current
)
1880
return
head
,
stop
,
nbspaces
1881
else
1882
nbspaces
=
nbspaces
+
1
1883
base
=
current
1884
current
=
getnext
(
current
)
1885
if
current
~
=
stop
then
1886
local
char
=
getchar
(
current
)
1887
if
nukta
[
char
]
then
1888
current
=
getnext
(
current
)
1889
char
=
getchar
(
current
)
1890
end
1891
if
char
=
=
c_zwj
then
1892
local
next
=
getnext
(
current
)
1893
if
current
~
=
stop
and
next
~
=
stop
and
halant
[
getchar
(
next
)
]
then
1894
current
=
next
1895
next
=
getnext
(
current
)
1896
local
tmp
=
getnext
(
next
)
1897
local
changestop
=
next
=
=
stop
1898
setnext
(
next
)
1899
setstate
(
current
,
s_pref
)
1900
current
=
processcharacters
(
current
,
font
)
1901
setstate
(
current
,
s_blwf
)
1902
current
=
processcharacters
(
current
,
font
)
1903
setstate
(
current
,
s_pstf
)
1904
current
=
processcharacters
(
current
,
font
)
1905
setstate
(
current
,
unsetvalue
)
1906
if
halant
[
getchar
(
current
)
]
then
1907
setnext
(
getnext
(
current
)
,
tmp
)
1908
if
show_syntax_errors
then
1909
head
,
current
=
inject_syntax_error
(
head
,
current
,
char
)
1910
end
1911
else
1912
setnext
(
current
,
tmp
)
-- assumes that result of pref, blwf, or pstf consists of one node
1913
if
changestop
then
1914
stop
=
current
1915
end
1916
end
1917
end
1918
end
1919
end
1920
end
1921
else
-- not Stand Alone cluster
1922
local
last
=
getnext
(
stop
)
1923
while
current
~
=
last
do
-- find base consonant
1924
local
next
=
getnext
(
current
)
1925
if
consonant
[
getchar
(
current
)
]
then
1926
if
not
(
current
~
=
stop
and
next
~
=
stop
and
halant
[
getchar
(
next
)
]
and
getchar
(
getnext
(
next
)
)
=
=
c_zwj
)
then
1927
if
not
firstcons
then
1928
firstcons
=
current
1929
end
1930
-- check whether consonant has below-base or post-base form or is pre-base reordering Ra
1931
local
a
=
getstate
(
current
)
1932
if
not
(
a
=
=
s_blwf
or
a
=
=
s_pstf
or
(
a
~
=
s_rphf
and
a
~
=
s_blwf
and
ra
[
getchar
(
current
)
]
)
)
then
1933
base
=
current
1934
end
1935
end
1936
end
1937
current
=
next
1938
end
1939
if
not
base
then
1940
base
=
firstcons
1941
end
1942
end
1943 1944
if
not
base
then
1945
if
getstate
(
start
,
s_rphf
)
then
1946
setstate
(
start
,
unsetvalue
)
1947
end
1948
return
head
,
stop
,
nbspaces
1949
else
1950
if
getstate
(
base
)
then
-- state can also be init
1951
setstate
(
base
,
unsetvalue
)
1952
end
1953
basepos
=
base
1954
end
1955
if
not
halfpos
then
1956
halfpos
=
base
1957
end
1958
if
not
subpos
then
1959
subpos
=
base
1960
end
1961
if
not
postpos
then
1962
postpos
=
subpos
or
base
1963
end
1964 1965
-- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for
1966 1967
local
moved
=
{
}
1968
local
current
=
start
1969
local
last
=
getnext
(
stop
)
1970
while
current
~
=
last
do
1971
local
char
=
getchar
(
current
)
1972
local
target
=
nil
1973
local
cn
=
getnext
(
current
)
1974
-- not so efficient (needed for malayalam)
1975
local
tpm
=
twopart_mark
[
char
]
1976
while
tpm
do
1977
local
extra
=
copy_node
(
current
)
1978
copyinjection
(
extra
,
current
)
1979
char
=
tpm
[
1
]
1980
setchar
(
current
,
char
)
1981
setchar
(
extra
,
tpm
[
2
]
)
1982
head
=
insert_node_after
(
head
,
current
,
extra
)
1983
tpm
=
twopart_mark
[
char
]
1984
end
1985
--
1986
if
not
moved
[
current
]
and
dependent_vowel
[
char
]
then
1987
if
pre_mark
[
char
]
then
-- or: if before_main or before_half
1988
moved
[
current
]
=
true
1989
-- can be helper to remove one node
1990
local
prev
,
next
=
getboth
(
current
)
1991
setlink
(
prev
,
next
)
1992
if
current
=
=
stop
then
1993
stop
=
getprev
(
current
)
1994
end
1995 1996
local
pos
1997
if
before_main
[
char
]
then
1998
pos
=
basepos
1999
-- basepos = current -- is this correct?
2000
else
2001
-- must be before_half
2002
pos
=
halfpos
2003
-- halfpos = current -- is this correct?
2004
end
2005 2006
local
ppos
=
getprev
(
pos
)
-- necessary?
2007
while
ppos
and
getprop
(
ppos
,
a_syllabe
)
=
=
getprop
(
pos
,
a_syllabe
)
do
2008
if
getstate
(
ppos
,
s_pref
)
then
2009
pos
=
ppos
2010
end
2011
ppos
=
getprev
(
ppos
)
2012
end
2013 2014
local
ppos
=
getprev
(
pos
)
-- necessary?
2015
while
ppos
and
getprop
(
ppos
,
a_syllabe
)
=
=
getprop
(
pos
,
a_syllabe
)
and
halant
[
ischar
(
ppos
)
]
do
2016
ppos
=
getprev
(
ppos
)
2017
if
ppos
and
getprop
(
ppos
,
a_syllabe
)
=
=
getprop
(
pos
,
a_syllabe
)
and
consonant
[
ischar
(
ppos
)
]
then
2018
pos
=
ppos
2019
ppos
=
getprev
(
ppos
)
2020
else
2021
break
2022
end
2023
end
2024 2025
if
pos
=
=
start
then
2026
if
head
=
=
start
then
2027
head
=
current
2028
end
2029
start
=
current
2030
end
2031
setlink
(
getprev
(
pos
)
,
current
)
2032
setlink
(
current
,
pos
)
2033
-- setlink(getprev(pos),current,pos) -- maybe
2034
elseif
above_mark
[
char
]
then
2035
-- after main consonant
2036
target
=
basepos
2037
if
subpos
=
=
basepos
then
2038
subpos
=
current
2039
end
2040
if
postpos
=
=
basepos
then
2041
postpos
=
current
2042
end
2043
basepos
=
current
2044
elseif
below_mark
[
char
]
then
2045
-- after subjoined consonants
2046
target
=
subpos
2047
if
postpos
=
=
subpos
then
2048
postpos
=
current
2049
end
2050
subpos
=
current
2051
elseif
post_mark
[
char
]
then
2052
-- after post-form consonant
2053
local
n
=
getnext
(
postpos
)
-- nukta and vedic sign come first - is that right? and also halant+ra
2054
while
n
do
2055
local
v
=
ischar
(
n
,
font
)
2056
if
nukta
[
v
]
or
stress_tone_mark
[
v
]
or
vowel_modifier
[
v
]
then
2057
postpos
=
n
2058
else
2059
break
2060
end
2061
n
=
getnext
(
n
)
2062
end
2063
target
=
postpos
2064
postpos
=
current
2065
end
2066
if
mark_above_below_post
[
char
]
then
2067
local
prev
=
getprev
(
current
)
2068
if
prev
~
=
target
then
2069
local
next
=
getnext
(
current
)
2070
setlink
(
prev
,
next
)
2071
if
current
=
=
stop
then
2072
stop
=
prev
2073
end
2074
setlink
(
current
,
getnext
(
target
)
)
2075
setlink
(
target
,
current
)
2076
-- setlink(target,current,getnext(target)) -- maybe
2077
end
2078
end
2079
end
2080
current
=
cn
2081
end
2082 2083
-- reorder halant+Ra
2084 2085
local
current
=
getnext
(
start
)
2086
local
last
=
getnext
(
stop
)
2087
while
current
~
=
last
do
2088
local
char
=
getchar
(
current
)
2089
local
cn
=
getnext
(
current
)
2090
if
halant
[
char
]
and
ra
[
ischar
(
cn
)
]
and
(
not
getstate
(
cn
,
s_rphf
)
)
and
(
not
getstate
(
cn
,
s_blwf
)
)
then
2091
if
after_main
[
ischar
(
cn
)
]
then
2092
local
prev
=
getprev
(
current
)
2093
local
next
=
getnext
(
cn
)
2094
local
bpn
=
getnext
(
basepos
)
2095
while
bpn
and
dependent_vowel
[
ischar
(
bpn
)
]
do
2096
basepos
=
bpn
2097
bpn
=
getnext
(
bpn
)
2098
end
2099
if
basepos
~
=
prev
then
2100
setlink
(
prev
,
next
)
2101
setlink
(
cn
,
getnext
(
basepos
)
)
2102
setlink
(
basepos
,
current
)
2103
if
cn
=
=
stop
then
2104
stop
=
prev
2105
end
2106
cn
=
next
2107
end
2108
end
2109
-- after_postscript
2110
-- after_subscript
2111
-- before_postscript
2112
-- before_subscript
2113
end
2114
current
=
cn
2115
end
2116 2117
-- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first.
2118 2119
local
current
=
start
2120
local
c
=
nil
2121
while
current
~
=
stop
do
2122
local
char
=
getchar
(
current
)
2123
if
halant
[
char
]
or
stress_tone_mark
[
char
]
then
2124
if
not
c
then
2125
c
=
current
2126
end
2127
else
2128
c
=
nil
2129
end
2130
local
next
=
getnext
(
current
)
2131
if
c
and
nukta
[
getchar
(
next
)
]
then
2132
if
head
=
=
c
then
2133
head
=
next
2134
end
2135
if
stop
=
=
next
then
2136
stop
=
current
2137
end
2138
setlink
(
getprev
(
c
)
,
next
)
2139
local
nextnext
=
getnext
(
next
)
2140
setnext
(
current
,
nextnext
)
2141
local
nextnextnext
=
getnext
(
nextnext
)
2142
if
nextnextnext
then
2143
setprev
(
nextnextnext
,
current
)
2144
end
2145
setlink
(
nextnext
,
c
)
2146
end
2147
if
stop
=
=
current
then
break
end
2148
current
=
getnext
(
current
)
2149
end
2150 2151
if
getchar
(
base
)
=
=
c_nbsp
then
2152
if
base
=
=
stop
then
2153
stop
=
getprev
(
stop
)
2154
end
2155
nbspaces
=
nbspaces
-
1
2156
head
=
remove_node
(
head
,
base
)
2157
flush_node
(
base
)
2158
end
2159 2160
return
head
,
stop
,
nbspaces
2161
end
2162 2163
-- cleaned up and optimized ... needs checking (local, check order, fixes, extra hash, etc)
2164 2165
local
separator
=
{
}
2166 2167
imerge
(
separator
,
consonant
)
2168
imerge
(
separator
,
independent_vowel
)
2169
imerge
(
separator
,
dependent_vowel
)
2170
imerge
(
separator
,
vowel_modifier
)
2171
imerge
(
separator
,
stress_tone_mark
)
2172 2173
for
k
,
v
in
next
,
nukta
do
separator
[
k
]
=
true
end
2174
for
k
,
v
in
next
,
halant
do
separator
[
k
]
=
true
end
2175 2176
local
function
analyze_next_chars_one
(
c
,
font
,
variant
)
-- skip one dependent vowel
2177
-- why two variants ... the comment suggests that it's the same ruleset
2178
local
n
=
getnext
(
c
)
2179
if
not
n
then
2180
return
c
2181
end
2182
if
variant
=
=
1
then
2183
local
v
=
ischar
(
n
,
font
)
2184
if
v
and
nukta
[
v
]
then
2185
n
=
getnext
(
n
)
2186
if
n
then
2187
v
=
ischar
(
n
,
font
)
2188
end
2189
end
2190
if
n
and
v
then
2191
local
nn
=
getnext
(
n
)
2192
if
nn
then
2193
local
vv
=
ischar
(
nn
,
font
)
2194
if
vv
then
2195
local
nnn
=
getnext
(
nn
)
2196
if
nnn
then
2197
local
vvv
=
ischar
(
nnn
,
font
)
2198
if
vvv
then
2199
if
vv
=
=
c_zwj
and
consonant
[
vvv
]
then
2200
c
=
nnn
2201
elseif
(
vv
=
=
c_zwnj
or
vv
=
=
c_zwj
)
and
halant
[
vvv
]
then
2202
local
nnnn
=
getnext
(
nnn
)
2203
if
nnnn
then
2204
local
vvvv
=
ischar
(
nnnn
,
font
)
2205
if
vvvv
and
consonant
[
vvvv
]
then
2206
c
=
nnnn
2207
end
2208
end
2209
end
2210
end
2211
end
2212
end
2213
end
2214
end
2215
elseif
variant
=
=
2
then
2216
local
v
=
ischar
(
n
,
font
)
2217
if
v
and
nukta
[
v
]
then
2218
c
=
n
2219
end
2220
n
=
getnext
(
c
)
2221
if
n
then
2222
v
=
ischar
(
n
,
font
)
2223
if
v
then
2224
local
nn
=
getnext
(
n
)
2225
if
nn
then
2226
local
vv
=
ischar
(
nn
,
font
)
2227
if
vv
and
zw_char
[
v
]
then
2228
n
=
nn
2229
v
=
vv
2230
nn
=
getnext
(
nn
)
2231
vv
=
nn
and
ischar
(
nn
,
font
)
2232
end
2233
if
vv
and
halant
[
v
]
and
consonant
[
vv
]
then
2234
c
=
nn
2235
end
2236
end
2237
end
2238
end
2239
end
2240
-- c = ms_matra(c)
2241
local
n
=
getnext
(
c
)
2242
if
not
n
then
2243
return
c
2244
end
2245
local
v
=
ischar
(
n
,
font
)
2246
if
not
v
then
2247
return
c
2248
end
2249
local
already_pre_mark
-- = false
2250
local
already_above_mark
-- = false
2251
local
already_below_mark
-- = false
2252
local
already_post_mark
-- = false
2253
while
dependent_vowel
[
v
]
do
2254
local
vowels
=
twopart_mark
[
v
]
or
{
v
}
2255
for
k
,
v
in
next
,
vowels
do
2256
if
pre_mark
[
v
]
and
not
already_pre_mark
then
2257
already_pre_mark
=
true
2258
elseif
above_mark
[
v
]
and
not
already_above_mark
then
2259
already_above_mark
=
true
2260
elseif
below_mark
[
v
]
and
not
already_below_mark
then
2261
already_below_mark
=
true
2262
elseif
post_mark
[
v
]
and
not
already_post_mark
then
2263
already_post_mark
=
true
2264
else
2265
return
c
2266
end
2267
end
2268
c
=
getnext
(
c
)
2269
n
=
getnext
(
c
)
2270
if
not
n
then
2271
return
c
2272
end
2273
v
=
ischar
(
n
,
font
)
2274
if
not
v
then
2275
return
c
2276
end
2277
end
2278
if
nukta
[
v
]
then
2279
c
=
getnext
(
c
)
2280
n
=
getnext
(
c
)
2281
if
not
n
then
2282
return
c
2283
end
2284
v
=
ischar
(
n
,
font
)
2285
if
not
v
then
2286
return
c
2287
end
2288
end
2289
if
halant
[
v
]
then
2290
c
=
getnext
(
c
)
2291
n
=
getnext
(
c
)
2292
if
not
n
then
2293
return
c
2294
end
2295
v
=
ischar
(
n
,
font
)
2296
if
not
v
then
2297
return
c
2298
end
2299
end
2300
if
vowel_modifier
[
v
]
then
2301
c
=
getnext
(
c
)
2302
n
=
getnext
(
c
)
2303
if
not
n
then
2304
return
c
2305
end
2306
v
=
ischar
(
n
,
font
)
2307
if
not
v
then
2308
return
c
2309
end
2310
end
2311
if
stress_tone_mark
[
v
]
then
2312
c
=
getnext
(
c
)
2313
n
=
getnext
(
c
)
2314
if
not
n
then
2315
return
c
2316
end
2317
v
=
ischar
(
n
,
font
)
2318
if
not
v
then
2319
return
c
2320
end
2321
end
2322
if
stress_tone_mark
[
v
]
then
2323
return
n
2324
else
2325
return
c
2326
end
2327
end
2328 2329
local
function
analyze_next_chars_two
(
c
,
font
)
2330
local
n
=
getnext
(
c
)
2331
if
not
n
then
2332
return
c
2333
end
2334
local
v
=
ischar
(
n
,
font
)
2335
if
v
and
nukta
[
v
]
then
2336
c
=
n
2337
end
2338
n
=
c
2339
while
true
do
2340
local
nn
=
getnext
(
n
)
2341
if
nn
then
2342
local
vv
=
ischar
(
nn
,
font
)
2343
if
vv
then
2344
if
halant
[
vv
]
then
2345
n
=
nn
2346
local
nnn
=
getnext
(
nn
)
2347
if
nnn
then
2348
local
vvv
=
ischar
(
nnn
,
font
)
2349
if
vvv
and
zw_char
[
vvv
]
then
2350
n
=
nnn
2351
end
2352
end
2353
elseif
vv
=
=
c_zwnj
or
vv
=
=
c_zwj
then
2354
-- n = nn -- not here (?)
2355
local
nnn
=
getnext
(
nn
)
2356
if
nnn
then
2357
local
vvv
=
ischar
(
nnn
,
font
)
2358
if
vvv
and
halant
[
vvv
]
then
2359
n
=
nnn
2360
end
2361
end
2362
else
2363
break
2364
end
2365
local
nn
=
getnext
(
n
)
2366
if
nn
then
2367
local
vv
=
ischar
(
nn
,
font
)
2368
if
vv
and
consonant
[
vv
]
then
2369
n
=
nn
2370
local
nnn
=
getnext
(
nn
)
2371
if
nnn
then
2372
local
vvv
=
ischar
(
nnn
,
font
)
2373
if
vvv
and
nukta
[
vvv
]
then
2374
n
=
nnn
2375
end
2376
end
2377
c
=
n
2378
else
2379
break
2380
end
2381
else
2382
break
2383
end
2384
else
2385
break
2386
end
2387
else
2388
break
2389
end
2390
end
2391
--
2392
if
not
c
then
2393
-- This shouldn't happen I guess.
2394
return
2395
end
2396
local
n
=
getnext
(
c
)
2397
if
not
n
then
2398
return
c
2399
end
2400
local
v
=
ischar
(
n
,
font
)
2401
if
not
v
then
2402
return
c
2403
end
2404
if
anudatta
[
v
]
then
2405
c
=
n
2406
n
=
getnext
(
c
)
2407
if
not
n
then
2408
return
c
2409
end
2410
v
=
ischar
(
n
,
font
)
2411
if
not
v
then
2412
return
c
2413
end
2414
end
2415
if
halant
[
v
]
then
2416
c
=
n
2417
n
=
getnext
(
c
)
2418
if
not
n
then
2419
return
c
2420
end
2421
v
=
ischar
(
n
,
font
)
2422
if
not
v
then
2423
return
c
2424
end
2425
if
v
=
=
c_zwnj
or
v
=
=
c_zwj
then
2426
c
=
n
2427
n
=
getnext
(
c
)
2428
if
not
n
then
2429
return
c
2430
end
2431
v
=
ischar
(
n
,
font
)
2432
if
not
v
then
2433
return
c
2434
end
2435
end
2436
else
2437
-- c = ms_matra(c)
2438
-- same as one
2439
local
already_pre_mark
-- = false
2440
local
already_above_mark
-- = false
2441
local
already_below_mark
-- = false
2442
local
already_post_mark
-- = false
2443
while
dependent_vowel
[
v
]
do
2444
local
vowels
=
twopart_mark
[
v
]
or
{
v
}
2445
for
k
,
v
in
next
,
vowels
do
2446
if
pre_mark
[
v
]
and
not
already_pre_mark
then
2447
already_pre_mark
=
true
2448
elseif
above_mark
[
v
]
and
not
already_above_mark
then
2449
already_above_mark
=
true
2450
elseif
below_mark
[
v
]
and
not
already_below_mark
then
2451
already_below_mark
=
true
2452
elseif
post_mark
[
v
]
and
not
already_post_mark
then
2453
already_post_mark
=
true
2454
else
2455
return
c
2456
end
2457
end
2458
c
=
n
2459
n
=
getnext
(
c
)
2460
if
not
n
then
2461
return
c
2462
end
2463
v
=
ischar
(
n
,
font
)
2464
if
not
v
then
2465
return
c
2466
end
2467
end
2468
if
nukta
[
v
]
then
2469
c
=
n
2470
n
=
getnext
(
c
)
2471
if
not
n
then
2472
return
c
2473
end
2474
v
=
ischar
(
n
,
font
)
2475
if
not
v
then
2476
return
c
2477
end
2478
end
2479
if
halant
[
v
]
then
2480
c
=
n
2481
n
=
getnext
(
c
)
2482
if
not
n
then
2483
return
c
2484
end
2485
v
=
ischar
(
n
,
font
)
2486
if
not
v
then
2487
return
c
2488
end
2489
end
2490
end
2491
-- same as one
2492
if
vowel_modifier
[
v
]
then
2493
c
=
n
2494
n
=
getnext
(
c
)
2495
if
not
n
then
2496
return
c
2497
end
2498
v
=
ischar
(
n
,
font
)
2499
if
not
v
then
2500
return
c
2501
end
2502
end
2503
if
stress_tone_mark
[
v
]
then
2504
c
=
n
2505
n
=
getnext
(
c
)
2506
if
not
n
then
2507
return
c
2508
end
2509
v
=
ischar
(
n
,
font
)
2510
if
not
v
then
2511
return
c
2512
end
2513
end
2514
if
stress_tone_mark
[
v
]
then
2515
return
n
2516
else
2517
return
c
2518
end
2519
end
2520 2521
-- It looks like these two analyzers were written independently but they share
2522
-- a lot. Common code has been synced.
2523 2524
local
function
method_one
(
head
,
font
,
attr
)
2525
local
current
=
head
2526
local
start
=
true
2527
local
done
=
false
2528
local
nbspaces
=
0
2529
local
syllabe
=
0
2530
while
current
do
2531
local
char
=
ischar
(
current
,
font
)
2532
if
char
then
2533
done
=
true
2534
local
syllablestart
=
current
2535
local
syllableend
=
nil
2536
local
c
=
current
2537
local
n
=
getnext
(
c
)
2538
local
first
=
char
2539
if
n
and
ra
[
first
]
then
2540
local
second
=
ischar
(
n
,
font
)
2541
if
second
and
halant
[
second
]
then
2542
local
n
=
getnext
(
n
)
2543
if
n
then
2544
local
third
=
ischar
(
n
,
font
)
2545
if
third
then
2546
c
=
n
2547
first
=
third
2548
end
2549
end
2550
end
2551
end
2552
local
standalone
=
first
=
=
c_nbsp
2553
if
standalone
then
2554
local
prev
=
getprev
(
current
)
2555
if
prev
then
2556
local
prevchar
=
ischar
(
prev
,
font
)
2557
if
not
prevchar
then
2558
-- different font or language so quite certainly a different word
2559
elseif
not
separator
[
prevchar
]
then
2560
-- something that separates words
2561
else
2562
standalone
=
false
2563
end
2564
else
2565
-- begin of paragraph or box
2566
end
2567
end
2568
if
standalone
then
2569
-- stand alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2570
local
syllableend
=
analyze_next_chars_one
(
c
,
font
,
2
)
2571
current
=
getnext
(
syllableend
)
2572
if
syllablestart
~
=
syllableend
then
2573
head
,
current
,
nbspaces
=
reorder_one
(
head
,
syllablestart
,
syllableend
,
font
,
attr
,
nbspaces
)
2574
current
=
getnext
(
current
)
2575
end
2576
else
2577
-- we can delay the getsubtype(n) and getfont(n) and test for say halant first
2578
-- as an table access is faster than two function calls (subtype and font are
2579
-- pseudo fields) but the code becomes messy (unless we make it a function)
2580
if
consonant
[
char
]
then
2581
-- syllable containing consonant
2582
local
prevc
=
true
2583
while
prevc
do
2584
prevc
=
false
2585
local
n
=
getnext
(
current
)
2586
if
not
n
then
2587
break
2588
end
2589
local
v
=
ischar
(
n
,
font
)
2590
if
not
v
then
2591
break
2592
end
2593
if
nukta
[
v
]
then
2594
n
=
getnext
(
n
)
2595
if
not
n
then
2596
break
2597
end
2598
v
=
ischar
(
n
,
font
)
2599
if
not
v
then
2600
break
2601
end
2602
end
2603
if
halant
[
v
]
then
2604
n
=
getnext
(
n
)
2605
if
not
n
then
2606
break
2607
end
2608
v
=
ischar
(
n
,
font
)
2609
if
not
v
then
2610
break
2611
end
2612
if
v
=
=
c_zwnj
or
v
=
=
c_zwj
then
2613
n
=
getnext
(
n
)
2614
if
not
n
then
2615
break
2616
end
2617
v
=
ischar
(
n
,
font
)
2618
if
not
v
then
2619
break
2620
end
2621
end
2622
if
consonant
[
v
]
then
2623
prevc
=
true
2624
current
=
n
2625
end
2626
end
2627
end
2628
local
n
=
getnext
(
current
)
2629
if
n
then
2630
local
v
=
ischar
(
n
,
font
)
2631
if
v
and
nukta
[
v
]
then
2632
-- nukta (not specified in Microsft Devanagari OpenType specification)
2633
current
=
n
2634
n
=
getnext
(
current
)
2635
end
2636
end
2637
syllableend
=
current
2638
current
=
n
2639
if
current
then
2640
local
v
=
ischar
(
current
,
font
)
2641
if
not
v
then
2642
-- skip
2643
elseif
halant
[
v
]
then
2644
-- syllable containing consonant without vowels: {C + [Nukta] + H} + C + H
2645
local
n
=
getnext
(
current
)
2646
if
n
then
2647
local
v
=
ischar
(
n
,
font
)
2648
if
v
and
zw_char
[
v
]
then
2649
-- code collapsed, probably needs checking with intention
2650
syllableend
=
n
2651
current
=
getnext
(
n
)
2652
else
2653
syllableend
=
current
2654
current
=
n
2655
end
2656
else
2657
syllableend
=
current
2658
current
=
n
2659
end
2660
else
2661
-- syllable containing consonant with vowels: {C + [Nukta] + H} + C + [M] + [VM] + [SM]
2662
if
dependent_vowel
[
v
]
then
2663
syllableend
=
current
2664
current
=
getnext
(
current
)
2665
v
=
ischar
(
current
,
font
)
2666
end
2667
if
v
and
vowel_modifier
[
v
]
then
2668
syllableend
=
current
2669
current
=
getnext
(
current
)
2670
v
=
ischar
(
current
,
font
)
2671
end
2672
if
v
and
stress_tone_mark
[
v
]
then
2673
syllableend
=
current
2674
current
=
getnext
(
current
)
2675
end
2676
end
2677
end
2678
if
syllablestart
~
=
syllableend
then
2679
if
syllableend
then
2680
syllabe
=
syllabe
+
1
2681
local
c
=
syllablestart
2682
local
n
=
getnext
(
syllableend
)
2683
while
c
~
=
n
do
2684
setprop
(
c
,
a_syllabe
,
syllabe
)
2685
c
=
getnext
(
c
)
2686
end
2687
end
2688
head
,
current
,
nbspaces
=
reorder_one
(
head
,
syllablestart
,
syllableend
,
font
,
attr
,
nbspaces
)
2689
current
=
getnext
(
current
)
2690
end
2691
elseif
independent_vowel
[
char
]
then
2692
-- syllable without consonants: VO + [VM] + [SM]
2693
syllableend
=
current
2694
current
=
getnext
(
current
)
2695
if
current
then
2696
local
v
=
ischar
(
current
,
font
)
2697
if
v
then
2698
if
vowel_modifier
[
v
]
then
2699
syllableend
=
current
2700
current
=
getnext
(
current
)
2701
v
=
ischar
(
current
,
font
)
2702
end
2703
if
v
and
stress_tone_mark
[
v
]
then
2704
syllableend
=
current
2705
current
=
getnext
(
current
)
2706
end
2707
end
2708
end
2709
else
2710
if
show_syntax_errors
then
2711
local
mark
=
mark_four
[
char
]
2712
if
mark
then
2713
head
,
current
=
inject_syntax_error
(
head
,
current
,
char
)
2714
end
2715
end
2716
current
=
getnext
(
current
)
2717
end
2718
end
2719
else
2720
current
=
getnext
(
current
)
2721
end
2722
start
=
false
2723
end
2724 2725
if
nbspaces
>
0
then
2726
head
=
replace_all_nbsp
(
head
)
2727
end
2728 2729
current
=
head
2730
local
n
=
0
2731
while
current
do
2732
local
char
=
ischar
(
current
,
font
)
2733
if
char
then
2734
if
n
=
=
0
and
not
getstate
(
current
)
then
2735
setstate
(
current
,
s_init
)
2736
end
2737
n
=
n
+
1
2738
else
2739
n
=
0
2740
end
2741
current
=
getnext
(
current
)
2742
end
2743 2744
return
head
,
done
2745
end
2746 2747
-- there is a good change that when we run into one with subtype < 256 that the rest is also done
2748
-- so maybe we can omit this check (it's pretty hard to get glyphs in the stream out of the blue)
2749 2750
local
function
method_two
(
head
,
font
,
attr
)
2751
local
current
=
head
2752
local
start
=
true
2753
local
done
=
false
2754
local
syllabe
=
0
2755
local
nbspaces
=
0
2756
while
current
do
2757
local
syllablestart
=
nil
2758
local
syllableend
=
nil
2759
local
char
=
ischar
(
current
,
font
)
2760
if
char
then
2761
done
=
true
2762
syllablestart
=
current
2763
local
c
=
current
2764
local
n
=
getnext
(
current
)
2765
if
n
and
ra
[
char
]
then
2766
local
nextchar
=
ischar
(
n
,
font
)
2767
if
nextchar
and
halant
[
nextchar
]
then
2768
local
n
=
getnext
(
n
)
2769
if
n
then
2770
local
nextnextchar
=
ischar
(
n
,
font
)
2771
if
nextnextchar
then
2772
c
=
n
2773
char
=
nextnextchar
2774
end
2775
end
2776
end
2777
end
2778
if
independent_vowel
[
char
]
then
2779
-- vowel-based syllable: [Ra+H]+V+[N]+[<[<ZWJ|ZWNJ>]+H+C|ZWJ+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2780
current
=
analyze_next_chars_one
(
c
,
font
,
1
)
2781
syllableend
=
current
2782
else
2783
local
standalone
=
char
=
=
c_nbsp
2784
if
standalone
then
2785
nbspaces
=
nbspaces
+
1
2786
local
p
=
getprev
(
current
)
2787
if
not
p
then
2788
-- begin of paragraph or box
2789
elseif
ischar
(
p
,
font
)
then
2790
-- different font or language so quite certainly a different word
2791
elseif
not
separator
[
getchar
(
p
)
]
then
2792
-- something that separates words
2793
else
2794
standalone
=
false
2795
end
2796
end
2797
if
standalone
then
2798
-- Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)]
2799
current
=
analyze_next_chars_one
(
c
,
font
,
2
)
2800
syllableend
=
current
2801
elseif
consonant
[
getchar
(
current
)
]
then
2802
-- WHY current INSTEAD OF c ?
2803 2804
-- Consonant syllable: {C+[N]+<H+[<ZWNJ|ZWJ>]|<ZWNJ|ZWJ>+H>} + C+[N]+[A] + [< H+[<ZWNJ|ZWJ>] | {M}+[N]+[H]>]+[SM]+[(VD)]
2805
current
=
analyze_next_chars_two
(
current
,
font
)
-- not c !
2806
syllableend
=
current
2807
end
2808
end
2809
end
2810
if
syllableend
then
2811
syllabe
=
syllabe
+
1
2812
local
c
=
syllablestart
2813
local
n
=
getnext
(
syllableend
)
2814
while
c
~
=
n
do
2815
setprop
(
c
,
a_syllabe
,
syllabe
)
2816
c
=
getnext
(
c
)
2817
end
2818
end
2819
if
syllableend
and
syllablestart
~
=
syllableend
then
2820
head
,
current
,
nbspaces
=
reorder_two
(
head
,
syllablestart
,
syllableend
,
font
,
attr
,
nbspaces
)
2821
end
2822
if
not
syllableend
and
show_syntax_errors
then
2823
local
char
=
ischar
(
current
,
font
)
2824
if
char
and
not
getstate
(
current
)
then
-- state can also be init
2825
local
mark
=
mark_four
[
char
]
2826
if
mark
then
2827
head
,
current
=
inject_syntax_error
(
head
,
current
,
char
)
2828
end
2829
end
2830
end
2831
start
=
false
2832
current
=
getnext
(
current
)
2833
end
2834 2835
if
nbspaces
>
0
then
2836
head
=
replace_all_nbsp
(
head
)
2837
end
2838 2839
current
=
head
2840
local
n
=
0
2841
while
current
do
2842
local
char
=
ischar
(
current
,
font
)
2843
if
char
then
2844
if
n
=
=
0
and
not
getstate
(
current
)
then
-- state can also be init
2845
setstate
(
current
,
s_init
)
2846
end
2847
n
=
n
+
1
2848
else
2849
n
=
0
2850
end
2851
current
=
getnext
(
current
)
2852
end
2853 2854
return
head
,
done
2855
end
2856 2857
for
i
=
1
,
nofscripts
do
2858
methods
[
scripts_one
[
i
]
]
=
method_one
2859
methods
[
scripts_two
[
i
]
]
=
method_two
2860
end
2861