font-ota.lmt /size: 18 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['font-ota'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- context only
10
11local type = type
12local setmetatableindex = table.setmetatableindex
13
14if not trackers then trackers = { register = function() end } end
15
16----- trace_analyzing = false  trackers.register("otf.analyzing",  function(v) trace_analyzing = v end)
17
18local fonts, nodes, node = fonts, nodes, node
19
20local allocate            = utilities.storage.allocate
21
22local otf                 = fonts.handlers.otf
23
24local analyzers           = fonts.analyzers
25local initializers        = allocate()
26local methods             = allocate()
27
28analyzers.initializers    = initializers
29analyzers.methods         = methods
30
31local nuts                = nodes.nuts
32local tonut               = nuts.tonut
33
34local getnext             = nuts.getnext
35local getprev             = nuts.getprev
36local getprev             = nuts.getprev
37local getprop             = nuts.getprop
38local setprop             = nuts.setprop
39local getchar             = nuts.getchar
40local ischar              = nuts.ischar
41local isnextchar          = nuts.isnextchar
42----- isprevchar          = nuts.isprevchar
43
44local endofmath           = nuts.endofmath
45
46local nodecodes           = nodes.nodecodes
47local disc_code           <const> = nodecodes.disc
48local math_code           <const> = nodecodes.math
49
50local fontdata            = fonts.hashes.identifiers
51local descriptiondata     = fonts.hashes.descriptions
52local categories          = characters and characters.categories or { } -- sorry, only in context
53local chardata            = characters and characters.data
54
55local otffeatures         = fonts.constructors.features.otf
56local registerotffeature  = otffeatures.register
57
58-- Analyzers run per script and/or language and are needed in order to process
59-- features right.
60
61local setstate = nuts.setstate
62local getstate = nuts.getstate
63
64local classifiers = characters.classifiers
65
66-- never use these numbers directly
67
68local s_init = 1    local s_rphf =  7
69local s_medi = 2    local s_half =  8
70local s_fina = 3    local s_pref =  9
71local s_isol = 4    local s_blwf = 10
72local s_mark = 5    local s_pstf = 11
73local s_rest = 6
74
75local states = allocate {
76    init = s_init,
77    medi = s_medi,
78    med2 = s_medi,
79    fina = s_fina,
80    fin2 = s_fina,
81    fin3 = s_fina,
82    isol = s_isol,
83    mark = s_mark,
84    rest = s_rest,
85    rphf = s_rphf,
86    half = s_half,
87    pref = s_pref,
88    blwf = s_blwf,
89    pstf = s_pstf,
90}
91
92local features = allocate {
93    init = s_init,
94    medi = s_medi,
95    med2 = s_medi,
96    fina = s_fina,
97    fin2 = s_fina,
98    fin3 = s_fina,
99    isol = s_isol,
100 -- mark = s_mark,
101 -- rest = s_rest,
102    rphf = s_rphf,
103    half = s_half,
104    pref = s_pref,
105    blwf = s_blwf,
106    pstf = s_pstf,
107}
108
109local mappers = allocate {
110    l = s_init, -- left
111    d = s_medi, -- double
112    c = s_medi, -- joiner
113    r = s_fina, -- right
114    u = s_isol, -- nonjoiner
115}
116
117analyzers.states          = states
118analyzers.features        = features
119analyzers.useunicodemarks = false
120
121if not classifiers then
122
123    -- why not just always use categories[k] = "mn"
124
125    local f_arabic,  l_arabic  = characters.blockrange("arabic")
126    local f_syriac,  l_syriac  = characters.blockrange("syriac")
127    local f_mandiac, l_mandiac = characters.blockrange("mandiac")
128    local f_nko,     l_nko     = characters.blockrange("nko")
129    local f_ext_a,   l_ext_a   = characters.blockrange("arabicextendeda")
130
131    classifiers = setmetatableindex(function(t,k)
132        if type(k) == "number" then
133            local c = chardata[k]
134            local v = false
135            if c then
136                local arabic = c.arabic
137                if arabic then
138                    v = mappers[arabic]
139                    if not v then
140                        log.report("analyze","error in mapping arabic %C",k)
141                        --  error
142                        v = false
143                    end
144                elseif (k >= f_arabic  and k <= l_arabic)  or
145                       (k >= f_syriac  and k <= l_syriac)  or
146                       (k >= f_mandiac and k <= l_mandiac) or
147                       (k >= f_nko     and k <= l_nko)     or
148                       (k >= f_ext_a   and k <= l_ext_a)   then
149                    if categories[k] == "mn" then
150                        v = s_mark
151                    else
152                        v = s_rest
153                    end
154                end
155            end
156            t[k] = v
157            return v
158        end
159    end)
160
161    characters.classifiers = classifiers
162
163end
164
165-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script
166-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace
167
168local is_letter = characters.is_letter
169local is_mark   = characters.is_mark
170
171function analyzers.setstate(head,font) -- latin
172    local useunicodemarks = analyzers.useunicodemarks
173    local descriptions    = descriptiondata[font]
174    local first           = false
175    local last            = false
176    local current         = head
177    local done            = false
178    -- only letters
179    while current do
180     -- local char, id = ischar(current,font)
181        local nxt, char, id = isnextchar(current,font)
182        if char then
183            if not getstate(current) then
184             -- local d = descriptions[char]
185             -- if d then
186                 -- if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then
187                    if is_mark[char] then
188                        setstate(current,s_mark)
189                    elseif is_letter[char] then
190                        if first then
191                            setstate(current,s_medi)
192                        else
193                            setstate(current,s_init)
194                            first = current
195                            done  = true
196                        end
197                        last = current
198                    else
199                        goto PICKUP
200                    end
201                    goto NEXT
202             -- end
203            end
204          ::PICKUP::
205            if first then
206                setstate(last,first == last and s_isol or s_fina)
207            end
208            first = false
209        elseif char == false then
210            -- other font
211            if first then
212                setstate(last,first == last and s_isol or s_fina)
213            end
214            first = false
215            if id == math_code then
216             -- current = endofmath(current)
217                nxt = getnext(endofmath(current))
218            end
219        elseif id == disc_code then
220            -- always in the middle .. it doesn't make much sense to assign a property
221            -- here ... we might at some point decide to flag the components when present
222            -- but even then it's kind of bogus
223            setstate(current,s_medi)
224            last = current
225        else -- finish
226            if first then
227                setstate(last,first == last and s_isol or s_fina)
228            end
229            first = false
230            if id == math_code then
231                current = endofmath(current)
232            end
233        end
234      ::NEXT::
235     -- current = getnext(current)
236        current = nxt
237    end
238    if first then
239        setstate(last,first == last and s_isol or s_fina)
240    end
241    return head, done
242end
243
244-- in the future we will use language/script attributes instead of the
245-- font related value, but then we also need dynamic features which is
246-- somewhat slower; and .. we need a chain of them
247
248local function analyzeinitializer(tfmdata,value) -- attr
249    local script, language = otf.scriptandlanguage(tfmdata) -- attr
250    local action = initializers[script]
251    if not action then
252        -- skip
253    elseif type(action) == "function" then
254        return action(tfmdata,value)
255    else
256        local action = action[language]
257        if action then
258            return action(tfmdata,value)
259        end
260    end
261end
262
263local function analyzeprocessor(head,font,attr)
264    local tfmdata = fontdata[font]
265    local script, language = otf.scriptandlanguage(tfmdata,attr)
266    local action = methods[script]
267    if not action then
268        -- skip
269    elseif type(action) == "function" then
270        return action(head,font,attr)
271    else
272        action = action[language]
273        if action then
274            return action(head,font,attr)
275        end
276    end
277    return head, false
278end
279
280registerotffeature {
281    name         = "analyze",
282    description  = "analysis of character classes",
283    default      = true,
284    initializers = {
285        node     = analyzeinitializer,
286    },
287    processors = {
288        position = 1,
289        node     = analyzeprocessor,
290    }
291}
292
293-- latin
294
295methods.latn = analyzers.setstate
296-------.dflt = analyzers.setstate % can be an option or just the default
297
298local arab_warned = { }
299
300local function warning(current,what)
301    local char = getchar(current)
302    if not arab_warned[char] then
303        log.report("analyze","arab: character %C has no %a class",char,what)
304        arab_warned[char] = true
305    end
306end
307
308-- we can also use this trick for devanagari
309
310function methods.arab(head,font,attr)
311    local first, last, c_first, c_last
312    local current = head
313    local done    = false
314    current = tonut(current)
315    while current do
316     -- local char, id = ischar(current,font)
317        local nxt, char, id = isnextchar(current,font)
318        if char and not getstate(current) then
319            done = true
320            local classifier = classifiers[char]
321            if not classifier then
322                if last then
323                    if c_last == s_medi or c_last == s_fina then
324                        setstate(last,s_fina)
325                    else
326                        warning(last,"fina")
327                        setstate(last,s_error)
328                    end
329                    first = nil
330                    last  = nil
331                elseif first then
332                    if c_first == s_medi or c_first == s_fina then
333                        setstate(first,s_isol)
334                    else
335                        warning(first,"isol")
336                        setstate(first,s_error)
337                    end
338                    first = nil
339                end
340            elseif classifier == s_mark then
341                setstate(current,s_mark)
342            elseif classifier == s_isol then
343                if last then
344                    if c_last == s_medi or c_last == s_fina then
345                        setstate(last,s_fina)
346                    else
347                        warning(last,"fina")
348                        setstate(last,s_error)
349                    end
350                    first = nil
351                    last  = nil
352                elseif first then
353                    if c_first == s_medi or c_first == s_fina then
354                        setstate(first,s_isol)
355                    else
356                        warning(first,"isol")
357                        setstate(first,s_error)
358                    end
359                    first = nil
360                end
361                setstate(current,s_isol)
362            elseif classifier == s_medi then
363                if first then
364                    last = current
365                    c_last = classifier
366                    setstate(current,s_medi)
367                else
368                    setstate(current,s_init)
369                    first = current
370                    c_first = classifier
371                end
372            elseif classifier == s_fina then
373                if last then
374                    if getstate(last) ~= s_init then
375                        setstate(last,s_medi)
376                    end
377                    setstate(current,s_fina)
378                    first, last = nil, nil
379                elseif first then
380                 -- if getstate(first) ~= s_init then
381                 --     -- needs checking
382                 --     setstate(first,s_medi)
383                 -- end
384                    setstate(current,s_fina)
385                    first = nil
386                else
387                    setstate(current,s_isol)
388                end
389            else -- classifier == s_rest
390                setstate(current,s_rest)
391                if last then
392                    if c_last == s_medi or c_last == s_fina then
393                        setstate(last,s_fina)
394                    else
395                        warning(last,"fina")
396                        setstate(last,s_error)
397                    end
398                    first = nil
399                    last  = nil
400                elseif first then
401                    if c_first == s_medi or c_first == s_fina then
402                        setstate(first,s_isol)
403                    else
404                        warning(first,"isol")
405                        setstate(first,s_error)
406                    end
407                    first = nil
408                end
409            end
410        else
411            if last then
412                if c_last == s_medi or c_last == s_fina then
413                    setstate(last,s_fina)
414                else
415                    warning(last,"fina")
416                    setstate(last,s_error)
417                end
418                first = nil
419                last  = nil
420            elseif first then
421                if c_first == s_medi or c_first == s_fina then
422                    setstate(first,s_isol)
423                else
424                    warning(first,"isol")
425                    setstate(first,s_error)
426                end
427                first = nil
428            end
429            if id == math_code then -- a bit duplicate as we test for glyphs twice
430             -- current = endofmath(current)
431                nxt = getnext(endofmath(current))
432            end
433        end
434     -- current = getnext(current)
435        current = nxt
436    end
437    if last then
438        if c_last == s_medi or c_last == s_fina then
439            setstate(last,s_fina)
440        else
441            warning(last,"fina")
442            setstate(last,s_error)
443        end
444    elseif first then
445        if c_first == s_medi or c_first == s_fina then
446            setstate(first,s_isol)
447        else
448            warning(first,"isol")
449            setstate(first,s_error)
450        end
451    end
452    return head, done
453end
454
455methods.syrc = methods.arab
456methods.mand = methods.arab
457methods.nko  = methods.arab
458
459-- a quick first attemp .. more later
460
461do
462
463    -- https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-mongolian.md#joining-properties
464    -- todo syrc
465
466    local joining = setmetatableindex(function(t,k)
467        if type(k) == "number" then
468            local c = chardata[k]
469            local v = false
470            if c then
471                local mongolian = c.mongolian
472                --
473                v = mongolian
474            end
475            t[k] = v
476            return v
477        end
478    end)
479
480    function methods.mong(head,font,attr)
481        local first, last
482        local current  = head
483        local done     = false
484        local prevjoin = nil
485        local prestate = nil
486        current = tonut(current)
487
488        local function wrapup()
489            if last then
490                if last ~= first then
491                    local s = getstate(last)
492                    if s == s_medi then
493                        setstate(last,s_fina)
494                    elseif s == s_init then
495                        setstate(last,s_isol)
496                    end
497                end
498                last = nil
499                first = nil
500                prevjoin = nil
501                prestate = nil
502            end
503        end
504
505        while current do
506         -- local char, id = ischar(current,font)
507            local nxt, char, id = isnextchar(current,font)
508            if char and not getstate(current) then
509                local currjoin = joining[char]
510                done = true
511                if not last then
512                    setstate(current,s_isol)
513                    prevjoin  = currjoin
514                    first     = current
515                    last      = current
516                    prevstate = s_isol
517                elseif currjoin == "t" then -- transparent
518                    -- keep state
519                    last = current
520                elseif prevjoin == "d" or prevjoin == "jc" or prevjoin == "l" then
521                    if currjoin == "d" or prevjoin == "jc" or prevjoin == "r" then
522                        local s = getstate(last)
523                        if s == s_isol then
524                            setstate(last,s_init)
525                        elseif s == s_fina then
526                            setstate(last,s_medi)
527                        end
528                        setstate(current,s_fina)
529                        prevstate = s_fina
530                    elseif prevjoin == "nj" or prevjoin == "l" then
531                        local s = getstate(last)
532                        if s == s_medi then
533                            setstate(last,s_fina)
534                        elseif s == s_init then
535                            setstate(last,s_isol)
536                        end
537                        setstate(current,s_isol)
538                        prevstate = s_isol
539                    end
540                    prevjoin = currjoin
541                    last = current
542                elseif prevjoin == "nj" or prevjoin == "r" then
543                    if s == s_medi then
544                        setstate(last,s_fina)
545                    elseif s == s_init then
546                        setstate(last,s_isol)
547                    end
548                    setstate(current,s_isol)
549                    prevjoin = currjoin
550                    prevstate = s_isol
551                    last = current
552                elseif last then
553                    wrapup()
554                end
555            else
556                if last then
557                    wrapup()
558                end
559                if id == math_code then -- a bit duplicate as we test for glyphs twice
560                 -- current = endofmath(current)
561                    nxt = getnext(endofmath(current))
562                end
563            end
564         -- current = getnext(current)
565            current = nxt
566        end
567        if last then
568            wrapup()
569        end
570        return head, done
571    end
572
573end
574
575directives.register("otf.analyze.useunicodemarks",function(v)
576    analyzers.useunicodemarks = v
577end)
578