font-ota.lmt /size: 18 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['font-ota'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- context only
10
11local type = type
12local setmetatableindex = table.setmetatableindex
13
14if not trackers then trackers = { register = function() end } end
15
16----- trace_analyzing = false  trackers.register("otf.analyzing",  function(v) trace_analyzing = v end)
17
18local fonts, nodes, node = fonts, nodes, node
19
20local allocate            = utilities.storage.allocate
21
22local otf                 = fonts.handlers.otf
23
24local analyzers           = fonts.analyzers
25local initializers        = allocate()
26local methods             = allocate()
27
28analyzers.initializers    = initializers
29analyzers.methods         = methods
30
31local nuts                = nodes.nuts
32local tonut               = nuts.tonut
33
34local getnext             = nuts.getnext
35local getprev             = nuts.getprev
36local getprev             = nuts.getprev
37local getprop             = nuts.getprop
38local setprop             = nuts.setprop
39local getsubtype          = nuts.getsubtype
40local getchar             = nuts.getchar
41local ischar              = nuts.ischar
42local isnextchar          = nuts.isnextchar
43----- isprevchar          = nuts.isprevchar
44
45local endofmath           = nuts.endofmath
46
47local nodecodes           = nodes.nodecodes
48local disc_code           = nodecodes.disc
49local math_code           = nodecodes.math
50
51local fontdata            = fonts.hashes.identifiers
52local descriptiondata     = fonts.hashes.descriptions
53local categories          = characters and characters.categories or { } -- sorry, only in context
54local chardata            = characters and characters.data
55
56local otffeatures         = fonts.constructors.features.otf
57local registerotffeature  = otffeatures.register
58
59-- Analyzers run per script and/or language and are needed in order to process
60-- features right.
61
62local setstate = nuts.setstate
63local getstate = nuts.getstate
64
65local classifiers = characters.classifiers
66
67-- never use these numbers directly
68
69local s_init = 1    local s_rphf =  7
70local s_medi = 2    local s_half =  8
71local s_fina = 3    local s_pref =  9
72local s_isol = 4    local s_blwf = 10
73local s_mark = 5    local s_pstf = 11
74local s_rest = 6
75
76local states = allocate {
77    init = s_init,
78    medi = s_medi,
79    med2 = s_medi,
80    fina = s_fina,
81    fin2 = s_fina,
82    fin3 = s_fina,
83    isol = s_isol,
84    mark = s_mark,
85    rest = s_rest,
86    rphf = s_rphf,
87    half = s_half,
88    pref = s_pref,
89    blwf = s_blwf,
90    pstf = s_pstf,
91}
92
93local features = allocate {
94    init = s_init,
95    medi = s_medi,
96    med2 = s_medi,
97    fina = s_fina,
98    fin2 = s_fina,
99    fin3 = s_fina,
100    isol = s_isol,
101 -- mark = s_mark,
102 -- rest = s_rest,
103    rphf = s_rphf,
104    half = s_half,
105    pref = s_pref,
106    blwf = s_blwf,
107    pstf = s_pstf,
108}
109
110local mappers = allocate {
111    l = s_init, -- left
112    d = s_medi, -- double
113    c = s_medi, -- joiner
114    r = s_fina, -- right
115    u = s_isol, -- nonjoiner
116}
117
118analyzers.states          = states
119analyzers.features        = features
120analyzers.useunicodemarks = false
121
122if not classifiers then
123
124    -- why not just always use categories[k] = "mn"
125
126    local f_arabic,  l_arabic  = characters.blockrange("arabic")
127    local f_syriac,  l_syriac  = characters.blockrange("syriac")
128    local f_mandiac, l_mandiac = characters.blockrange("mandiac")
129    local f_nko,     l_nko     = characters.blockrange("nko")
130    local f_ext_a,   l_ext_a   = characters.blockrange("arabicextendeda")
131
132    classifiers = setmetatableindex(function(t,k)
133        if type(k) == "number" then
134            local c = chardata[k]
135            local v = false
136            if c then
137                local arabic = c.arabic
138                if arabic then
139                    v = mappers[arabic]
140                    if not v then
141                        log.report("analyze","error in mapping arabic %C",k)
142                        --  error
143                        v = false
144                    end
145                elseif (k >= f_arabic  and k <= l_arabic)  or
146                       (k >= f_syriac  and k <= l_syriac)  or
147                       (k >= f_mandiac and k <= l_mandiac) or
148                       (k >= f_nko     and k <= l_nko)     or
149                       (k >= f_ext_a   and k <= l_ext_a)   then
150                    if categories[k] == "mn" then
151                        v = s_mark
152                    else
153                        v = s_rest
154                    end
155                end
156            end
157            t[k] = v
158            return v
159        end
160    end)
161
162    characters.classifiers = classifiers
163
164end
165
166-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script
167-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace
168
169local is_letter = characters.is_letter
170local is_mark   = characters.is_mark
171
172function analyzers.setstate(head,font) -- latin
173    local useunicodemarks = analyzers.useunicodemarks
174    local descriptions    = descriptiondata[font]
175    local first           = false
176    local last            = false
177    local current         = head
178    local done            = false
179    -- only letters
180    while current do
181     -- local char, id = ischar(current,font)
182        local nxt, char, id = isnextchar(current,font)
183        if char then
184            if not getstate(current) then
185             -- local d = descriptions[char]
186             -- if d then
187                 -- if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then
188                    if is_mark[char] then
189                        setstate(current,s_mark)
190                    elseif is_letter[char] then
191                        if first then
192                            setstate(current,s_medi)
193                        else
194                            setstate(current,s_init)
195                            first = current
196                            done  = true
197                        end
198                        last = current
199                    else
200                        goto PICKUP
201                    end
202                    goto NEXT
203             -- end
204            end
205          ::PICKUP::
206            if first then
207                setstate(last,first == last and s_isol or s_fina)
208            end
209            first = false
210        elseif char == false then
211            -- other font
212            if first then
213                setstate(last,first == last and s_isol or s_fina)
214            end
215            first = false
216            if id == math_code then
217             -- current = endofmath(current)
218                nxt = getnext(endofmath(current))
219            end
220        elseif id == disc_code then
221            -- always in the middle .. it doesn't make much sense to assign a property
222            -- here ... we might at some point decide to flag the components when present
223            -- but even then it's kind of bogus
224            setstate(current,s_medi)
225            last = current
226        else -- finish
227            if first then
228                setstate(last,first == last and s_isol or s_fina)
229            end
230            first = false
231            if id == math_code then
232                current = endofmath(current)
233            end
234        end
235      ::NEXT::
236     -- current = getnext(current)
237        current = nxt
238    end
239    if first then
240        setstate(last,first == last and s_isol or s_fina)
241    end
242    return head, done
243end
244
245-- in the future we will use language/script attributes instead of the
246-- font related value, but then we also need dynamic features which is
247-- somewhat slower; and .. we need a chain of them
248
249local function analyzeinitializer(tfmdata,value) -- attr
250    local script, language = otf.scriptandlanguage(tfmdata) -- attr
251    local action = initializers[script]
252    if not action then
253        -- skip
254    elseif type(action) == "function" then
255        return action(tfmdata,value)
256    else
257        local action = action[language]
258        if action then
259            return action(tfmdata,value)
260        end
261    end
262end
263
264local function analyzeprocessor(head,font,attr)
265    local tfmdata = fontdata[font]
266    local script, language = otf.scriptandlanguage(tfmdata,attr)
267    local action = methods[script]
268    if not action then
269        -- skip
270    elseif type(action) == "function" then
271        return action(head,font,attr)
272    else
273        action = action[language]
274        if action then
275            return action(head,font,attr)
276        end
277    end
278    return head, false
279end
280
281registerotffeature {
282    name         = "analyze",
283    description  = "analysis of character classes",
284    default      = true,
285    initializers = {
286        node     = analyzeinitializer,
287    },
288    processors = {
289        position = 1,
290        node     = analyzeprocessor,
291    }
292}
293
294-- latin
295
296methods.latn = analyzers.setstate
297-------.dflt = analyzers.setstate % can be an option or just the default
298
299local arab_warned = { }
300
301local function warning(current,what)
302    local char = getchar(current)
303    if not arab_warned[char] then
304        log.report("analyze","arab: character %C has no %a class",char,what)
305        arab_warned[char] = true
306    end
307end
308
309-- we can also use this trick for devanagari
310
311function methods.arab(head,font,attr)
312    local first, last, c_first, c_last
313    local current = head
314    local done    = false
315    current = tonut(current)
316    while current do
317     -- local char, id = ischar(current,font)
318        local nxt, char, id = isnextchar(current,font)
319        if char and not getstate(current) then
320            done = true
321            local classifier = classifiers[char]
322            if not classifier then
323                if last then
324                    if c_last == s_medi or c_last == s_fina then
325                        setstate(last,s_fina)
326                    else
327                        warning(last,"fina")
328                        setstate(last,s_error)
329                    end
330                    first = nil
331                    last  = nil
332                elseif first then
333                    if c_first == s_medi or c_first == s_fina then
334                        setstate(first,s_isol)
335                    else
336                        warning(first,"isol")
337                        setstate(first,s_error)
338                    end
339                    first = nil
340                end
341            elseif classifier == s_mark then
342                setstate(current,s_mark)
343            elseif classifier == s_isol then
344                if last then
345                    if c_last == s_medi or c_last == s_fina then
346                        setstate(last,s_fina)
347                    else
348                        warning(last,"fina")
349                        setstate(last,s_error)
350                    end
351                    first = nil
352                    last  = nil
353                elseif first then
354                    if c_first == s_medi or c_first == s_fina then
355                        setstate(first,s_isol)
356                    else
357                        warning(first,"isol")
358                        setstate(first,s_error)
359                    end
360                    first = nil
361                end
362                setstate(current,s_isol)
363            elseif classifier == s_medi then
364                if first then
365                    last = current
366                    c_last = classifier
367                    setstate(current,s_medi)
368                else
369                    setstate(current,s_init)
370                    first = current
371                    c_first = classifier
372                end
373            elseif classifier == s_fina then
374                if last then
375                    if getstate(last) ~= s_init then
376                        setstate(last,s_medi)
377                    end
378                    setstate(current,s_fina)
379                    first, last = nil, nil
380                elseif first then
381                 -- if getstate(first) ~= s_init then
382                 --     -- needs checking
383                 --     setstate(first,s_medi)
384                 -- end
385                    setstate(current,s_fina)
386                    first = nil
387                else
388                    setstate(current,s_isol)
389                end
390            else -- classifier == s_rest
391                setstate(current,s_rest)
392                if last then
393                    if c_last == s_medi or c_last == s_fina then
394                        setstate(last,s_fina)
395                    else
396                        warning(last,"fina")
397                        setstate(last,s_error)
398                    end
399                    first = nil
400                    last  = nil
401                elseif first then
402                    if c_first == s_medi or c_first == s_fina then
403                        setstate(first,s_isol)
404                    else
405                        warning(first,"isol")
406                        setstate(first,s_error)
407                    end
408                    first = nil
409                end
410            end
411        else
412            if last then
413                if c_last == s_medi or c_last == s_fina then
414                    setstate(last,s_fina)
415                else
416                    warning(last,"fina")
417                    setstate(last,s_error)
418                end
419                first = nil
420                last  = nil
421            elseif first then
422                if c_first == s_medi or c_first == s_fina then
423                    setstate(first,s_isol)
424                else
425                    warning(first,"isol")
426                    setstate(first,s_error)
427                end
428                first = nil
429            end
430            if id == math_code then -- a bit duplicate as we test for glyphs twice
431             -- current = endofmath(current)
432                nxt = getnext(endofmath(current))
433            end
434        end
435     -- current = getnext(current)
436        current = nxt
437    end
438    if last then
439        if c_last == s_medi or c_last == s_fina then
440            setstate(last,s_fina)
441        else
442            warning(last,"fina")
443            setstate(last,s_error)
444        end
445    elseif first then
446        if c_first == s_medi or c_first == s_fina then
447            setstate(first,s_isol)
448        else
449            warning(first,"isol")
450            setstate(first,s_error)
451        end
452    end
453    return head, done
454end
455
456methods.syrc = methods.arab
457methods.mand = methods.arab
458methods.nko  = methods.arab
459
460-- a quick first attemp .. more later
461
462do
463
464    -- https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-mongolian.md#joining-properties
465    -- todo syrc
466
467    local joining = setmetatableindex(function(t,k)
468        if type(k) == "number" then
469            local c = chardata[k]
470            local v = false
471            if c then
472                local mongolian = c.mongolian
473                --
474                v = mongolian
475            end
476            t[k] = v
477            return v
478        end
479    end)
480
481    function methods.mong(head,font,attr)
482        local first, last
483        local current  = head
484        local done     = false
485        local prevjoin = nil
486        local prestate = nil
487        current = tonut(current)
488
489        local function wrapup()
490            if last then
491                if last ~= first then
492                    local s = getstate(last)
493                    if s == s_medi then
494                        setstate(last,s_fina)
495                    elseif s == s_init then
496                        setstate(last,s_isol)
497                    end
498                end
499                last = nil
500                first = nil
501                prevjoin = nil
502                prestate = nil
503            end
504        end
505
506        while current do
507         -- local char, id = ischar(current,font)
508            local nxt, char, id = isnextchar(current,font)
509            if char and not getstate(current) then
510                local currjoin = joining[char]
511                done = true
512                if not last then
513                    setstate(current,s_isol)
514                    prevjoin  = currjoin
515                    first     = current
516                    last      = current
517                    prevstate = s_isol
518                elseif currjoin == "t" then -- transparent
519                    -- keep state
520                    last = current
521                elseif prevjoin == "d" or prevjoin == "jc" or prevjoin == "l" then
522                    if currjoin == "d" or prevjoin == "jc" or prevjoin == "r" then
523                        local s = getstate(last)
524                        if s == s_isol then
525                            setstate(last,s_init)
526                        elseif s == s_fina then
527                            setstate(last,s_medi)
528                        end
529                        setstate(current,s_fina)
530                        prevstate = s_fina
531                    elseif prevjoin == "nj" or prevjoin == "l" then
532                        local s = getstate(last)
533                        if s == s_medi then
534                            setstate(last,s_fina)
535                        elseif s == s_init then
536                            setstate(last,s_isol)
537                        end
538                        setstate(current,s_isol)
539                        prevstate = s_isol
540                    end
541                    prevjoin = currjoin
542                    last = current
543                elseif prevjoin == "nj" or prevjoin == "r" then
544                    if s == s_medi then
545                        setstate(last,s_fina)
546                    elseif s == s_init then
547                        setstate(last,s_isol)
548                    end
549                    setstate(current,s_isol)
550                    prevjoin = currjoin
551                    prevstate = s_isol
552                    last = current
553                elseif last then
554                    wrapup()
555                end
556            else
557                if last then
558                    wrapup()
559                end
560                if id == math_code then -- a bit duplicate as we test for glyphs twice
561                 -- current = endofmath(current)
562                    nxt = getnext(endofmath(current))
563                end
564            end
565         -- current = getnext(current)
566            current = nxt
567        end
568        if last then
569            wrapup()
570        end
571        return head, done
572    end
573
574end
575
576directives.register("otf.analyze.useunicodemarks",function(v)
577    analyzers.useunicodemarks = v
578end)
579