font-ota.lua /size: 17 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['font-ota'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- context only
10
11local type = type
12local setmetatableindex = table.setmetatableindex
13
14if not trackers then trackers = { register = function() end } end
15
16----- trace_analyzing = false  trackers.register("otf.analyzing",  function(v) trace_analyzing = v end)
17
18local fonts, nodes, node = fonts, nodes, node
19
20local allocate            = utilities.storage.allocate
21
22local otf                 = fonts.handlers.otf
23
24local analyzers           = fonts.analyzers
25local initializers        = allocate()
26local methods             = allocate()
27
28analyzers.initializers    = initializers
29analyzers.methods         = methods
30
31local nuts                = nodes.nuts
32local tonut               = nuts.tonut
33
34local getnext             = nuts.getnext
35local getprev             = nuts.getprev
36local getprev             = nuts.getprev
37local getprop             = nuts.getprop
38local setprop             = nuts.setprop
39local getsubtype          = nuts.getsubtype
40local getchar             = nuts.getchar
41local ischar              = nuts.ischar
42
43local endofmath           = nuts.endofmath
44
45local nodecodes           = nodes.nodecodes
46----- glyph_code          = nodecodes.glyph
47local disc_code           = nodecodes.disc
48local math_code           = nodecodes.math
49
50local fontdata            = fonts.hashes.identifiers
51local categories          = characters and characters.categories or { } -- sorry, only in context
52local chardata            = characters and characters.data
53
54local otffeatures         = fonts.constructors.features.otf
55local registerotffeature  = otffeatures.register
56
57-- Analyzers run per script and/or language and are needed in order to process
58-- features right.
59
60local setstate = nuts.setstate
61local getstate = nuts.getstate
62
63if not setstate or not getstate then
64    -- generic (might move to the nod lib)
65    setstate = function(n,v)
66        setprop(n,"state",v)
67    end
68    getstate = function(n,v)
69        local s = getprop(n,"state")
70        if v then
71            return s == v
72        else
73            return s
74        end
75    end
76    nuts.setstate = setstate
77    nuts.getstate = getstate
78end
79
80-- never use these numbers directly
81
82local s_init = 1    local s_rphf =  7
83local s_medi = 2    local s_half =  8
84local s_fina = 3    local s_pref =  9
85local s_isol = 4    local s_blwf = 10
86local s_mark = 5    local s_pstf = 11
87local s_rest = 6
88
89local states = allocate {
90    init = s_init,
91    medi = s_medi,
92    med2 = s_medi,
93    fina = s_fina,
94    fin2 = s_fina,
95    fin3 = s_fina,
96    isol = s_isol,
97    mark = s_mark,
98    rest = s_rest,
99    rphf = s_rphf,
100    half = s_half,
101    pref = s_pref,
102    blwf = s_blwf,
103    pstf = s_pstf,
104}
105
106local features = allocate {
107    init = s_init,
108    medi = s_medi,
109    med2 = s_medi,
110    fina = s_fina,
111    fin2 = s_fina,
112    fin3 = s_fina,
113    isol = s_isol,
114 -- mark = s_mark,
115 -- rest = s_rest,
116    rphf = s_rphf,
117    half = s_half,
118    pref = s_pref,
119    blwf = s_blwf,
120    pstf = s_pstf,
121}
122
123analyzers.states          = states
124analyzers.features        = features
125analyzers.useunicodemarks = false
126
127-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script
128-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace
129
130-- done can go away as can tonut
131
132function analyzers.setstate(head,font)
133    local useunicodemarks  = analyzers.useunicodemarks
134    local tfmdata = fontdata[font]
135    local descriptions = tfmdata.descriptions
136    local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean
137    current = tonut(current)
138    while current do
139        local char, id = ischar(current,font)
140        if char and not getstate(current) then
141            done = true
142            local d = descriptions[char]
143            if d then
144                if d.class == "mark" then
145                    done = true
146                    setstate(current,s_mark)
147                elseif useunicodemarks and categories[char] == "mn" then
148                    done = true
149                    setstate(current,s_mark)
150                elseif n == 0 then
151                    first, last, n = current, current, 1
152                    setstate(current,s_init)
153                else
154                    last, n = current, n+1
155                    setstate(current,s_medi)
156                end
157            else -- finish
158                if first and first == last then
159                    setstate(last,s_isol)
160                elseif last then
161                    setstate(last,s_fina)
162                end
163                first, last, n = nil, nil, 0
164            end
165        elseif char == false then
166            -- other font
167            if first and first == last then
168                setstate(last,s_isol)
169            elseif last then
170                setstate(last,s_fina)
171            end
172            first, last, n = nil, nil, 0
173            if id == math_code then
174                current = endofmath(current)
175            end
176        elseif id == disc_code then
177            -- always in the middle .. it doesn't make much sense to assign a property
178            -- here ... we might at some point decide to flag the components when present
179            -- but even then it's kind of bogus
180            setstate(current,s_medi)
181            last = current
182        else -- finish
183            if first and first == last then
184                setstate(last,s_isol)
185            elseif last then
186                setstate(last,s_fina)
187            end
188            first, last, n = nil, nil, 0
189            if id == math_code then
190                current = endofmath(current)
191            end
192        end
193        current = getnext(current)
194    end
195    if first and first == last then
196        setstate(last,s_isol)
197    elseif last then
198        setstate(last,s_fina)
199    end
200    return head, done
201end
202
203-- in the future we will use language/script attributes instead of the
204-- font related value, but then we also need dynamic features which is
205-- somewhat slower; and .. we need a chain of them
206
207local function analyzeinitializer(tfmdata,value) -- attr
208    local script, language = otf.scriptandlanguage(tfmdata) -- attr
209    local action = initializers[script]
210    if not action then
211        -- skip
212    elseif type(action) == "function" then
213        return action(tfmdata,value)
214    else
215        local action = action[language]
216        if action then
217            return action(tfmdata,value)
218        end
219    end
220end
221
222local function analyzeprocessor(head,font,attr)
223    local tfmdata = fontdata[font]
224    local script, language = otf.scriptandlanguage(tfmdata,attr)
225    local action = methods[script]
226    if not action then
227        -- skip
228    elseif type(action) == "function" then
229        return action(head,font,attr)
230    else
231        action = action[language]
232        if action then
233            return action(head,font,attr)
234        end
235    end
236    return head, false
237end
238
239registerotffeature {
240    name         = "analyze",
241    description  = "analysis of character classes",
242    default      = true,
243    initializers = {
244        node     = analyzeinitializer,
245    },
246    processors = {
247        position = 1,
248        node     = analyzeprocessor,
249    }
250}
251
252-- latin
253
254methods.latn = analyzers.setstate
255-------.dflt = analyzers.setstate % can be an option or just the default
256
257local arab_warned = { }
258
259local function warning(current,what)
260    local char = getchar(current)
261    if not arab_warned[char] then
262        log.report("analyze","arab: character %C has no %a class",char,what)
263        arab_warned[char] = true
264    end
265end
266
267local mappers = allocate {
268    l = s_init, -- left
269    d = s_medi, -- double
270    c = s_medi, -- joiner
271    r = s_fina, -- right
272    u = s_isol, -- nonjoiner
273}
274
275-- we can also use this trick for devanagari
276
277local classifiers = characters.classifiers
278
279if not classifiers then
280
281    local f_arabic,  l_arabic  = characters.blockrange("arabic")
282    local f_syriac,  l_syriac  = characters.blockrange("syriac")
283    local f_mandiac, l_mandiac = characters.blockrange("mandiac")
284    local f_nko,     l_nko     = characters.blockrange("nko")
285    local f_ext_a,   l_ext_a   = characters.blockrange("arabicextendeda")
286
287    classifiers = setmetatableindex(function(t,k)
288        if type(k) == "number" then
289            local c = chardata[k]
290            local v = false
291            if c then
292                local arabic = c.arabic
293                if arabic then
294                    v = mappers[arabic]
295                    if not v then
296                        log.report("analyze","error in mapping arabic %C",k)
297                        --  error
298                        v = false
299                    end
300                elseif (k >= f_arabic  and k <= l_arabic)  or
301                       (k >= f_syriac  and k <= l_syriac)  or
302                       (k >= f_mandiac and k <= l_mandiac) or
303                       (k >= f_nko     and k <= l_nko)     or
304                       (k >= f_ext_a   and k <= l_ext_a)   then
305                    if categories[k] == "mn" then
306                        v = s_mark
307                    else
308                        v = s_rest
309                    end
310                end
311            end
312            t[k] = v
313            return v
314        end
315    end)
316
317    characters.classifiers = classifiers
318
319end
320
321function methods.arab(head,font,attr)
322    local first, last, c_first, c_last
323    local current = head
324    local done    = false
325    current = tonut(current)
326    while current do
327        local char, id = ischar(current,font)
328        if char and not getstate(current) then
329            done = true
330            local classifier = classifiers[char]
331            if not classifier then
332                if last then
333                    if c_last == s_medi or c_last == s_fina then
334                        setstate(last,s_fina)
335                    else
336                        warning(last,"fina")
337                        setstate(last,s_error)
338                    end
339                    first, last = nil, nil
340                elseif first then
341                    if c_first == s_medi or c_first == s_fina then
342                        setstate(first,s_isol)
343                    else
344                        warning(first,"isol")
345                        setstate(first,s_error)
346                    end
347                    first = nil
348                end
349            elseif classifier == s_mark then
350                setstate(current,s_mark)
351            elseif classifier == s_isol then
352                if last then
353                    if c_last == s_medi or c_last == s_fina then
354                        setstate(last,s_fina)
355                    else
356                        warning(last,"fina")
357                        setstate(last,s_error)
358                    end
359                    first, last = nil, nil
360                elseif first then
361                    if c_first == s_medi or c_first == s_fina then
362                        setstate(first,s_isol)
363                    else
364                        warning(first,"isol")
365                        setstate(first,s_error)
366                    end
367                    first = nil
368                end
369                setstate(current,s_isol)
370            elseif classifier == s_medi then
371                if first then
372                    last = current
373                    c_last = classifier
374                    setstate(current,s_medi)
375                else
376                    setstate(current,s_init)
377                    first = current
378                    c_first = classifier
379                end
380            elseif classifier == s_fina then
381                if last then
382                    if getstate(last) ~= s_init then
383                        setstate(last,s_medi)
384                    end
385                    setstate(current,s_fina)
386                    first, last = nil, nil
387                elseif first then
388                 -- if getstate(first) ~= s_init then
389                 --     -- needs checking
390                 --     setstate(first,s_medi)
391                 -- end
392                    setstate(current,s_fina)
393                    first = nil
394                else
395                    setstate(current,s_isol)
396                end
397            else -- classifier == s_rest
398                setstate(current,s_rest)
399                if last then
400                    if c_last == s_medi or c_last == s_fina then
401                        setstate(last,s_fina)
402                    else
403                        warning(last,"fina")
404                        setstate(last,s_error)
405                    end
406                    first, last = nil, nil
407                elseif first then
408                    if c_first == s_medi or c_first == s_fina then
409                        setstate(first,s_isol)
410                    else
411                        warning(first,"isol")
412                        setstate(first,s_error)
413                    end
414                    first = nil
415                end
416            end
417        else
418            if last then
419                if c_last == s_medi or c_last == s_fina then
420                    setstate(last,s_fina)
421                else
422                    warning(last,"fina")
423                    setstate(last,s_error)
424                end
425                first, last = nil, nil
426            elseif first then
427                if c_first == s_medi or c_first == s_fina then
428                    setstate(first,s_isol)
429                else
430                    warning(first,"isol")
431                    setstate(first,s_error)
432                end
433                first = nil
434            end
435            if id == math_code then -- a bit duplicate as we test for glyphs twice
436                current = endofmath(current)
437            end
438        end
439        current = getnext(current)
440    end
441    if last then
442        if c_last == s_medi or c_last == s_fina then
443            setstate(last,s_fina)
444        else
445            warning(last,"fina")
446            setstate(last,s_error)
447        end
448    elseif first then
449        if c_first == s_medi or c_first == s_fina then
450            setstate(first,s_isol)
451        else
452            warning(first,"isol")
453            setstate(first,s_error)
454        end
455    end
456    return head, done
457end
458
459methods.syrc = methods.arab
460methods.mand = methods.arab
461methods.nko  = methods.arab
462
463-- a quick first attemp .. more later
464
465do
466
467    -- https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-mongolian.md#joining-properties
468    -- todo syrc
469
470    local joining = setmetatableindex(function(t,k)
471        if type(k) == "number" then
472            local c = chardata[k]
473            local v = false
474            if c then
475                local mongolian = c.mongolian
476                --
477                v = mongolian
478            end
479            t[k] = v
480            return v
481        end
482    end)
483
484    function methods.mong(head,font,attr)
485        local first, last
486        local current  = head
487        local done     = false
488        local prevjoin = nil
489        local prestate = nil
490        current = tonut(current)
491
492        local function wrapup()
493            if last then
494                if last ~= first then
495                    local s = getstate(last)
496                    if s == s_medi then
497                        setstate(last,s_fina)
498                    elseif s == s_init then
499                        setstate(last,s_isol)
500                    end
501                end
502                last = nil
503                first = nil
504                prevjoin = nil
505                prestate = nil
506            end
507        end
508
509        while current do
510            local char, id = ischar(current,font)
511            if char and not getstate(current) then
512                local currjoin = joining[char]
513                done = true
514                if not last then
515                    setstate(current,s_isol)
516                    prevjoin  = currjoin
517                    first     = current
518                    last      = current
519                    prevstate = s_isol
520                elseif currjoin == "t" then -- transparent
521                    -- keep state
522                    last = current
523                elseif prevjoin == "d" or prevjoin == "jc" or prevjoin == "l" then
524                    if currjoin == "d" or prevjoin == "jc" or prevjoin == "r" then
525                        local s = getstate(last)
526                        if s == s_isol then
527                            setstate(last,s_init)
528                        elseif s == s_fina then
529                            setstate(last,s_medi)
530                        end
531                        setstate(current,s_fina)
532                        prevstate = s_fina
533                    elseif prevjoin == "nj" or prevjoin == "l" then
534                        local s = getstate(last)
535                        if s == s_medi then
536                            setstate(last,s_fina)
537                        elseif s == s_init then
538                            setstate(last,s_isol)
539                        end
540                        setstate(current,s_isol)
541                        prevstate = s_isol
542                    end
543                    prevjoin = currjoin
544                    last = current
545                elseif prevjoin == "nj" or prevjoin == "r" then
546                    if s == s_medi then
547                        setstate(last,s_fina)
548                    elseif s == s_init then
549                        setstate(last,s_isol)
550                    end
551                    setstate(current,s_isol)
552                    prevjoin = currjoin
553                    prevstate = s_isol
554                    last = current
555                elseif last then
556                    wrapup()
557                end
558            else
559                if last then
560                    wrapup()
561                end
562                if id == math_code then -- a bit duplicate as we test for glyphs twice
563                    current = endofmath(current)
564                end
565            end
566            current = getnext(current)
567        end
568        if last then
569            wrapup()
570        end
571        return head, done
572    end
573
574end
575
576directives.register("otf.analyze.useunicodemarks",function(v)
577    analyzers.useunicodemarks = v
578end)
579