font-ota.lua /size: 17 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['font-ota'] = {
2    version   = 1.001,
3    comment   = "companion to font-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- context only
10
11local type = type
12local setmetatableindex = table.setmetatableindex
13
14if not trackers then trackers = { register = function() end } end
15
16----- trace_analyzing = false  trackers.register("otf.analyzing",  function(v) trace_analyzing = v end)
17
18local fonts, nodes, node = fonts, nodes, node
19
20local allocate            = utilities.storage.allocate
21
22local otf                 = fonts.handlers.otf
23
24local analyzers           = fonts.analyzers
25local initializers        = allocate()
26local methods             = allocate()
27
28analyzers.initializers    = initializers
29analyzers.methods         = methods
30
31local nuts                = nodes.nuts
32local tonut               = nuts.tonut
33
34local getnext             = nuts.getnext
35local getprev             = nuts.getprev
36local getprev             = nuts.getprev
37local getprop             = nuts.getprop
38local setprop             = nuts.setprop
39local getsubtype          = nuts.getsubtype
40local getchar             = nuts.getchar
41local ischar              = nuts.ischar
42
43local endofmath           = nuts.endofmath
44
45local nodecodes           = nodes.nodecodes
46----- glyph_code          = nodecodes.glyph
47local disc_code           = nodecodes.disc
48local math_code           = nodecodes.math
49
50local fontdata            = fonts.hashes.identifiers
51local categories          = characters and characters.categories or { } -- sorry, only in context
52local chardata            = characters and characters.data
53
54local otffeatures         = fonts.constructors.features.otf
55local registerotffeature  = otffeatures.register
56
57--[[ldx--
58<p>Analyzers run per script and/or language and are needed in order to
59process features right.</p>
60--ldx]]--
61
62local setstate = nuts.setstate
63local getstate = nuts.getstate
64
65if not setstate or not getstate then
66    -- generic (might move to the nod lib)
67    setstate = function(n,v)
68        setprop(n,"state",v)
69    end
70    getstate = function(n,v)
71        local s = getprop(n,"state")
72        if v then
73            return s == v
74        else
75            return s
76        end
77    end
78    nuts.setstate = setstate
79    nuts.getstate = getstate
80end
81
82-- never use these numbers directly
83
84local s_init = 1    local s_rphf =  7
85local s_medi = 2    local s_half =  8
86local s_fina = 3    local s_pref =  9
87local s_isol = 4    local s_blwf = 10
88local s_mark = 5    local s_pstf = 11
89local s_rest = 6
90
91local states = allocate {
92    init = s_init,
93    medi = s_medi,
94    med2 = s_medi,
95    fina = s_fina,
96    fin2 = s_fina,
97    fin3 = s_fina,
98    isol = s_isol,
99    mark = s_mark,
100    rest = s_rest,
101    rphf = s_rphf,
102    half = s_half,
103    pref = s_pref,
104    blwf = s_blwf,
105    pstf = s_pstf,
106}
107
108local features = allocate {
109    init = s_init,
110    medi = s_medi,
111    med2 = s_medi,
112    fina = s_fina,
113    fin2 = s_fina,
114    fin3 = s_fina,
115    isol = s_isol,
116 -- mark = s_mark,
117 -- rest = s_rest,
118    rphf = s_rphf,
119    half = s_half,
120    pref = s_pref,
121    blwf = s_blwf,
122    pstf = s_pstf,
123}
124
125analyzers.states          = states
126analyzers.features        = features
127analyzers.useunicodemarks = false
128
129-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script
130-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace
131
132-- done can go away as can tonut
133
134function analyzers.setstate(head,font)
135    local useunicodemarks  = analyzers.useunicodemarks
136    local tfmdata = fontdata[font]
137    local descriptions = tfmdata.descriptions
138    local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean
139    current = tonut(current)
140    while current do
141        local char, id = ischar(current,font)
142        if char and not getstate(current) then
143            done = true
144            local d = descriptions[char]
145            if d then
146                if d.class == "mark" then
147                    done = true
148                    setstate(current,s_mark)
149                elseif useunicodemarks and categories[char] == "mn" then
150                    done = true
151                    setstate(current,s_mark)
152                elseif n == 0 then
153                    first, last, n = current, current, 1
154                    setstate(current,s_init)
155                else
156                    last, n = current, n+1
157                    setstate(current,s_medi)
158                end
159            else -- finish
160                if first and first == last then
161                    setstate(last,s_isol)
162                elseif last then
163                    setstate(last,s_fina)
164                end
165                first, last, n = nil, nil, 0
166            end
167        elseif char == false then
168            -- other font
169            if first and first == last then
170                setstate(last,s_isol)
171            elseif last then
172                setstate(last,s_fina)
173            end
174            first, last, n = nil, nil, 0
175            if id == math_code then
176                current = endofmath(current)
177            end
178        elseif id == disc_code then
179            -- always in the middle .. it doesn't make much sense to assign a property
180            -- here ... we might at some point decide to flag the components when present
181            -- but even then it's kind of bogus
182            setstate(current,s_medi)
183            last = current
184        else -- finish
185            if first and first == last then
186                setstate(last,s_isol)
187            elseif last then
188                setstate(last,s_fina)
189            end
190            first, last, n = nil, nil, 0
191            if id == math_code then
192                current = endofmath(current)
193            end
194        end
195        current = getnext(current)
196    end
197    if first and first == last then
198        setstate(last,s_isol)
199    elseif last then
200        setstate(last,s_fina)
201    end
202    return head, done
203end
204
205-- in the future we will use language/script attributes instead of the
206-- font related value, but then we also need dynamic features which is
207-- somewhat slower; and .. we need a chain of them
208
209local function analyzeinitializer(tfmdata,value) -- attr
210    local script, language = otf.scriptandlanguage(tfmdata) -- attr
211    local action = initializers[script]
212    if not action then
213        -- skip
214    elseif type(action) == "function" then
215        return action(tfmdata,value)
216    else
217        local action = action[language]
218        if action then
219            return action(tfmdata,value)
220        end
221    end
222end
223
224local function analyzeprocessor(head,font,attr)
225    local tfmdata = fontdata[font]
226    local script, language = otf.scriptandlanguage(tfmdata,attr)
227    local action = methods[script]
228    if not action then
229        -- skip
230    elseif type(action) == "function" then
231        return action(head,font,attr)
232    else
233        action = action[language]
234        if action then
235            return action(head,font,attr)
236        end
237    end
238    return head, false
239end
240
241registerotffeature {
242    name         = "analyze",
243    description  = "analysis of character classes",
244    default      = true,
245    initializers = {
246        node     = analyzeinitializer,
247    },
248    processors = {
249        position = 1,
250        node     = analyzeprocessor,
251    }
252}
253
254-- latin
255
256methods.latn = analyzers.setstate
257-------.dflt = analyzers.setstate % can be an option or just the default
258
259local arab_warned = { }
260
261local function warning(current,what)
262    local char = getchar(current)
263    if not arab_warned[char] then
264        log.report("analyze","arab: character %C has no %a class",char,what)
265        arab_warned[char] = true
266    end
267end
268
269local mappers = allocate {
270    l = s_init, -- left
271    d = s_medi, -- double
272    c = s_medi, -- joiner
273    r = s_fina, -- right
274    u = s_isol, -- nonjoiner
275}
276
277-- we can also use this trick for devanagari
278
279local classifiers = characters.classifiers
280
281if not classifiers then
282
283    local f_arabic,  l_arabic  = characters.blockrange("arabic")
284    local f_syriac,  l_syriac  = characters.blockrange("syriac")
285    local f_mandiac, l_mandiac = characters.blockrange("mandiac")
286    local f_nko,     l_nko     = characters.blockrange("nko")
287    local f_ext_a,   l_ext_a   = characters.blockrange("arabicextendeda")
288
289    classifiers = setmetatableindex(function(t,k)
290        if type(k) == "number" then
291            local c = chardata[k]
292            local v = false
293            if c then
294                local arabic = c.arabic
295                if arabic then
296                    v = mappers[arabic]
297                    if not v then
298                        log.report("analyze","error in mapping arabic %C",k)
299                        --  error
300                        v = false
301                    end
302                elseif (k >= f_arabic  and k <= l_arabic)  or
303                       (k >= f_syriac  and k <= l_syriac)  or
304                       (k >= f_mandiac and k <= l_mandiac) or
305                       (k >= f_nko     and k <= l_nko)     or
306                       (k >= f_ext_a   and k <= l_ext_a)   then
307                    if categories[k] == "mn" then
308                        v = s_mark
309                    else
310                        v = s_rest
311                    end
312                end
313            end
314            t[k] = v
315            return v
316        end
317    end)
318
319    characters.classifiers = classifiers
320
321end
322
323function methods.arab(head,font,attr)
324    local first, last, c_first, c_last
325    local current = head
326    local done    = false
327    current = tonut(current)
328    while current do
329        local char, id = ischar(current,font)
330        if char and not getstate(current) then
331            done = true
332            local classifier = classifiers[char]
333            if not classifier then
334                if last then
335                    if c_last == s_medi or c_last == s_fina then
336                        setstate(last,s_fina)
337                    else
338                        warning(last,"fina")
339                        setstate(last,s_error)
340                    end
341                    first, last = nil, nil
342                elseif first then
343                    if c_first == s_medi or c_first == s_fina then
344                        setstate(first,s_isol)
345                    else
346                        warning(first,"isol")
347                        setstate(first,s_error)
348                    end
349                    first = nil
350                end
351            elseif classifier == s_mark then
352                setstate(current,s_mark)
353            elseif classifier == s_isol then
354                if last then
355                    if c_last == s_medi or c_last == s_fina then
356                        setstate(last,s_fina)
357                    else
358                        warning(last,"fina")
359                        setstate(last,s_error)
360                    end
361                    first, last = nil, nil
362                elseif first then
363                    if c_first == s_medi or c_first == s_fina then
364                        setstate(first,s_isol)
365                    else
366                        warning(first,"isol")
367                        setstate(first,s_error)
368                    end
369                    first = nil
370                end
371                setstate(current,s_isol)
372            elseif classifier == s_medi then
373                if first then
374                    last = current
375                    c_last = classifier
376                    setstate(current,s_medi)
377                else
378                    setstate(current,s_init)
379                    first = current
380                    c_first = classifier
381                end
382            elseif classifier == s_fina then
383                if last then
384                    if getstate(last) ~= s_init then
385                        setstate(last,s_medi)
386                    end
387                    setstate(current,s_fina)
388                    first, last = nil, nil
389                elseif first then
390                 -- if getstate(first) ~= s_init then
391                 --     -- needs checking
392                 --     setstate(first,s_medi)
393                 -- end
394                    setstate(current,s_fina)
395                    first = nil
396                else
397                    setstate(current,s_isol)
398                end
399            else -- classifier == s_rest
400                setstate(current,s_rest)
401                if last then
402                    if c_last == s_medi or c_last == s_fina then
403                        setstate(last,s_fina)
404                    else
405                        warning(last,"fina")
406                        setstate(last,s_error)
407                    end
408                    first, last = nil, nil
409                elseif first then
410                    if c_first == s_medi or c_first == s_fina then
411                        setstate(first,s_isol)
412                    else
413                        warning(first,"isol")
414                        setstate(first,s_error)
415                    end
416                    first = nil
417                end
418            end
419        else
420            if last then
421                if c_last == s_medi or c_last == s_fina then
422                    setstate(last,s_fina)
423                else
424                    warning(last,"fina")
425                    setstate(last,s_error)
426                end
427                first, last = nil, nil
428            elseif first then
429                if c_first == s_medi or c_first == s_fina then
430                    setstate(first,s_isol)
431                else
432                    warning(first,"isol")
433                    setstate(first,s_error)
434                end
435                first = nil
436            end
437            if id == math_code then -- a bit duplicate as we test for glyphs twice
438                current = endofmath(current)
439            end
440        end
441        current = getnext(current)
442    end
443    if last then
444        if c_last == s_medi or c_last == s_fina then
445            setstate(last,s_fina)
446        else
447            warning(last,"fina")
448            setstate(last,s_error)
449        end
450    elseif first then
451        if c_first == s_medi or c_first == s_fina then
452            setstate(first,s_isol)
453        else
454            warning(first,"isol")
455            setstate(first,s_error)
456        end
457    end
458    return head, done
459end
460
461methods.syrc = methods.arab
462methods.mand = methods.arab
463methods.nko  = methods.arab
464
465-- a quick first attemp .. more later
466
467do
468
469    -- https://github.com/n8willis/opentype-shaping-documents/blob/master/opentype-shaping-mongolian.md#joining-properties
470    -- todo syrc
471
472    local joining = setmetatableindex(function(t,k)
473        if type(k) == "number" then
474            local c = chardata[k]
475            local v = false
476            if c then
477                local mongolian = c.mongolian
478                --
479                v = mongolian
480            end
481            t[k] = v
482            return v
483        end
484    end)
485
486    function methods.mong(head,font,attr)
487        local first, last
488        local current  = head
489        local done     = false
490        local prevjoin = nil
491        local prestate = nil
492        current = tonut(current)
493
494        local function wrapup()
495            if last then
496                if last ~= first then
497                    local s = getstate(last)
498                    if s == s_medi then
499                        setstate(last,s_fina)
500                    elseif s == s_init then
501                        setstate(last,s_isol)
502                    end
503                end
504                last = nil
505                first = nil
506                prevjoin = nil
507                prestate = nil
508            end
509        end
510
511        while current do
512            local char, id = ischar(current,font)
513            if char and not getstate(current) then
514                local currjoin = joining[char]
515                done = true
516                if not last then
517                    setstate(current,s_isol)
518                    prevjoin  = currjoin
519                    first     = current
520                    last      = current
521                    prevstate = s_isol
522                elseif currjoin == "t" then -- transparent
523                    -- keep state
524                    last = current
525                elseif prevjoin == "d" or prevjoin == "jc" or prevjoin == "l" then
526                    if currjoin == "d" or prevjoin == "jc" or prevjoin == "r" then
527                        local s = getstate(last)
528                        if s == s_isol then
529                            setstate(last,s_init)
530                        elseif s == s_fina then
531                            setstate(last,s_medi)
532                        end
533                        setstate(current,s_fina)
534                        prevstate = s_fina
535                    elseif prevjoin == "nj" or prevjoin == "l" then
536                        local s = getstate(last)
537                        if s == s_medi then
538                            setstate(last,s_fina)
539                        elseif s == s_init then
540                            setstate(last,s_isol)
541                        end
542                        setstate(current,s_isol)
543                        prevstate = s_isol
544                    end
545                    prevjoin = currjoin
546                    last = current
547                elseif prevjoin == "nj" or prevjoin == "r" then
548                    if s == s_medi then
549                        setstate(last,s_fina)
550                    elseif s == s_init then
551                        setstate(last,s_isol)
552                    end
553                    setstate(current,s_isol)
554                    prevjoin = currjoin
555                    prevstate = s_isol
556                    last = current
557                elseif last then
558                    wrapup()
559                end
560            else
561                if last then
562                    wrapup()
563                end
564                if id == math_code then -- a bit duplicate as we test for glyphs twice
565                    current = endofmath(current)
566                end
567            end
568            current = getnext(current)
569        end
570        if last then
571            wrapup()
572        end
573        return head, done
574    end
575
576end
577
578directives.register("otf.analyze.useunicodemarks",function(v)
579    analyzers.useunicodemarks = v
580end)
581