math-tag.lmt /size: 28 Kb    last modification: 2021-10-28 13:51
1if not modules then modules = { } end modules ['math-tag'] = {
2    version   = 1.001,
3    comment   = "companion to math-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- todo: have a local list with local tags that then get appended
10-- todo: use tex.getmathcodes (no table)
11-- todo: add more spacing details + check text stuff for latest additions
12-- todo: some more font related cleanup + adaption to new scaling
13-- todo: tracing
14-- todo: maybe use lpeg matchers
15
16local find, match = string.find, string.match
17local insert, remove, concat = table.insert, table.remove, table.concat
18
19local attributes        = attributes
20local nodes             = nodes
21
22local nuts              = nodes.nuts
23local tonut             = nuts.tonut
24
25local getchar           = nuts.getchar
26local getcharspec       = nuts.getcharspec
27local getdata           = nuts.getdata
28local getlist           = nuts.getlist
29local getfield          = nuts.getfield
30local getdisc           = nuts.getdisc
31local getattr           = nuts.getattr
32local getattrlist       = nuts.getattrlist
33local setattr           = nuts.setattr
34local getwidth          = nuts.getwidth
35
36local getnucleus        = nuts.getnucleus
37local getsub            = nuts.getsub
38local getsup            = nuts.getsup
39
40local setattributes     = nuts.setattributes
41
42local nextnode          = nuts.traversers.node
43
44local nodecodes         = nodes.nodecodes
45
46local noad_code         = nodecodes.noad           -- attr nucleus sub sup
47local accent_code       = nodecodes.accent         -- attr nucleus sub sup accent
48local radical_code      = nodecodes.radical        -- attr nucleus sub sup left degree
49local fraction_code     = nodecodes.fraction       -- attr nucleus sub sup left right
50local subbox_code       = nodecodes.subbox         -- attr list
51local submlist_code     = nodecodes.submlist       -- attr list
52local mathchar_code     = nodecodes.mathchar       -- attr fam char
53local mathtextchar_code = nodecodes.mathtextchar   -- attr fam char
54local delimiter_code    = nodecodes.delimiter      -- attr small_fam small_char large_fam large_char
55local style_code        = nodecodes.style          -- attr style
56local choice_code       = nodecodes.choice         -- attr display text script scriptscript
57local fence_code        = nodecodes.fence          -- attr subtype
58
59local accentcodes       = nodes.accentcodes
60local fencecodes        = nodes.fencecodes
61
62local fixedtopaccent_code    = accentcodes.fixedtop
63local fixedbottomaccent_code = accentcodes.fixedbottom
64local fixedbothaccent_code   = accentcodes.fixedboth
65
66local leftfence_code    = fencecodes.left
67local middlefence_code  = fencecodes.middle
68local rightfence_code   = fencecodes.right
69
70local kerncodes         = nodes.kerncodes
71
72local fontkern_code     = kerncodes.fontkern
73local italickern_code   = kerncodes.italickern
74
75local hlist_code        = nodecodes.hlist
76local vlist_code        = nodecodes.vlist
77local glyph_code        = nodecodes.glyph
78local disc_code         = nodecodes.disc
79local glue_code         = nodecodes.glue
80local kern_code         = nodecodes.kern
81local math_code         = nodecodes.math
82
83local processnoads      = noads.process
84
85local a_tagged          = attributes.private('tagged')
86local a_mathcategory    = attributes.private('mathcategory')
87local a_mathmode        = attributes.private('mathmode')
88
89local tags              = structures.tags
90
91local start_tagged      = tags.start
92local restart_tagged    = tags.restart
93local stop_tagged       = tags.stop
94local taglist           = tags.taglist
95
96local chardata          = characters.data
97
98local getmathcodes      = tex.getmathcodes
99local mathcodes         = mathematics.codes
100local ordinary_mathcode = mathcodes.ordinary
101local variable_mathcode = mathcodes.variable
102
103local fromunicode16     = fonts.mappings.fromunicode16
104local fontcharacters    = fonts.hashes.characters
105
106local report_tags       = logs.reporter("structure","tags")
107
108local process
109
110local function processsubsup(start)
111    -- At some point we might need to add an attribute signaling the
112    -- super- and subscripts because TeX and MathML use a different
113    -- order. The mrows are needed to keep mn's separated.
114    local nucleus = getnucleus(start)
115    local sup     = getsup(start)
116    local sub     = getsub(start)
117    if sub then
118        if sup then
119            setattr(start,a_tagged,start_tagged("msubsup"))
120         -- start_tagged("mrow")
121            process(nucleus)
122         -- stop_tagged()
123            start_tagged("mrow", { subscript = true })
124            process(sub)
125            stop_tagged()
126            start_tagged("mrow", { superscript = true })
127            process(sup)
128            stop_tagged()
129            stop_tagged()
130        else
131            setattr(start,a_tagged,start_tagged("msub"))
132         -- start_tagged("mrow")
133            process(nucleus)
134         -- stop_tagged()
135            start_tagged("mrow")
136            process(sub)
137            stop_tagged()
138            stop_tagged()
139        end
140    elseif sup then
141        setattr(start,a_tagged,start_tagged("msup"))
142     -- start_tagged("mrow")
143        process(nucleus)
144     -- stop_tagged()
145        start_tagged("mrow")
146        process(sup)
147        stop_tagged()
148        stop_tagged()
149    else
150        process(nucleus)
151    end
152end
153
154-- todo: check function here and keep attribute the same
155
156-- todo: variants -> original
157
158local actionstack = { }
159local fencesstack = { }
160
161-- glyph nodes and such can happen in under and over stuff
162
163local function getunicode(n) -- instead of getchar
164    local char, font = getcharspec(n)
165    local data = fontcharacters[font][char]
166    return data.unicode or char -- can be a table but unlikely for math characters
167end
168
169-------------------
170
171local content = { }
172local found   = false
173
174content[mathchar_code] = function() found = true end
175
176local function hascontent(head)
177    found = false
178    processnoads(head,content,"content")
179    return found
180end
181
182--------------------
183
184-- todo: use properties
185
186-- local function showtag(n,id,old)
187--     local attr = getattr(n,a_tagged)
188--     local curr = tags.current()
189--     report_tags("%s, node %s, attr %s:%s (%s), top %s (%s)",
190--         old and "before" or "after ",
191--         nodecodes[id],
192--         getattrlist(n),
193--         attr or "?",attr and taglist[attr].tagname or "?",
194--         curr or "?",curr and taglist[curr].tagname or "?"
195--     )
196-- end
197
198-- I need to bring this in sync with new or removed mathml 3, not that there has
199-- been many changes. It will happen in sync with other mathml updates in context
200-- where we also keep adapting to a cycling between either or not support in
201-- browsers, the come-and-go of alternatives like ascii math and mathjax. It's the
202-- web and browser support that drives this, not tex and its community. So, maybe
203-- I'll add some more detail here, nto that it matters much in the long run where we
204-- only focus on structure and let the engine deal with the details. Another reason
205-- to update this is that we can add some tracing (lmtx only).
206
207process = function(start) -- we cannot use the processor as we have no finalizers (yet)
208    local mtexttag = nil
209    for start, id, subtype in nextnode, start do -- current
210        if id == glyph_code or id == disc_code then
211            if not mtexttag then
212                mtexttag = start_tagged("mtext")
213            end
214            setattr(start,a_tagged,mtexttag)
215        elseif mtexttag and id == kern_code and subtype == fontkern_code or subtype == italickern_code then -- italickern
216            setattr(start,a_tagged,mtexttag)
217        else
218            if mtexttag then
219                stop_tagged()
220                mtexttag = nil
221            end
222            if id == mathchar_code then
223                local char = getchar(start)
224                local code = getmathcodes(char)
225                local tag
226                if code == ordinary_mathcode or code == variable_mathcode then
227                    local ch = chardata[char]
228                    local mc = ch and ch.mathclass
229                    if mc == "number" then
230                        tag = "mn"
231                    elseif mc == "variable" or not mc then -- variable is default
232                        tag = "mi"
233                    else
234                        tag = "mo"
235                    end
236                else
237                    tag = "mo"
238                end
239                local a = getattr(start,a_mathcategory)
240                if a then
241                    setattr(start,a_tagged,start_tagged(tag,{ mathcategory = a }))
242                else
243                    setattr(start,a_tagged,start_tagged(tag)) -- todo: a_mathcategory
244                end
245                stop_tagged()
246             -- showtag(start,id,false)
247                break -- okay?
248            elseif id == mathtextchar_code then -- or id == glyph_code
249                -- check for code
250                local a = getattr(start,a_mathcategory)
251                if a then
252                    setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext
253                else
254                    setattr(start,a_tagged,start_tagged("ms")) -- mtext
255                end
256                stop_tagged()
257             -- showtag(start,id,false)
258                break
259            elseif id == delimiter_code then
260                -- check for code
261                setattr(start,a_tagged,start_tagged("mo"))
262                stop_tagged()
263             -- showtag(start,id,false)
264                break
265            elseif id == style_code then
266                -- has a next
267            elseif id == noad_code then
268             -- setattr(start,a_tagged,tags.current())
269                processsubsup(start)
270            elseif id == subbox_code or id == hlist_code or id == vlist_code then
271                -- keep an eye on subbox_code and see what ends up in there
272                local attr = getattr(start,a_tagged)
273                if not attr then
274                    -- just skip
275                else
276                    local specification = taglist[attr]
277                    if specification then
278                        local tag = specification.tagname
279                        if tag == "formulacaption" then
280                            -- skip
281                        elseif tag == "mstacker" then
282                            local list = getlist(start)
283                            if list then
284                                process(list)
285                            end
286                        else
287                            if tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then
288                                tag = "mtext"
289                            end
290                            local text = start_tagged(tag)
291                            setattr(start,a_tagged,text)
292                            local list = getlist(start)
293                            if not list then
294                                -- empty list
295                            elseif not attr then
296                                -- box comes from strange place
297                                setattributes(list,a_tagged,text) -- only the first node ?
298                            else
299                                -- Beware, the first node in list is the actual list so we definitely
300                                -- need to nest. This approach is a hack, maybe I'll make a proper
301                                -- nesting feature to deal with this at another level. Here we just
302                                -- fake structure by enforcing the inner one.
303                                --
304                                -- todo: have a local list with local tags that then get appended
305                                --
306                                local tagdata = specification.taglist
307                                local common = #tagdata + 1
308                                local function runner(list,depth) -- quite inefficient
309                                    local cache = { } -- we can have nested unboxed mess so best local to runner
310                                    local keep = nil
311                                 -- local keep = { } -- win case we might need to move keep outside
312                                    for n, id, subtype in nextnode, list do
313                                        local mth = id == math_code and subtype
314                                        if mth == 0 then -- hm left_code
315                                         -- insert(keep,text)
316                                            keep = text
317                                            text = start_tagged("mrow")
318                                            common = common + 1
319                                        end
320                                        local aa = getattr(n,a_tagged)
321                                        if aa then
322                                            local ac = cache[aa]
323                                            if not ac then
324                                                local tagdata = taglist[aa].taglist
325                                                local extra = #tagdata
326                                                if common <= extra then
327                                                    for i=common,extra do
328                                                        ac = restart_tagged(tagdata[i]) -- can be made faster
329                                                    end
330                                                    for i=common,extra do
331                                                        stop_tagged() -- can be made faster
332                                                    end
333                                                else
334                                                    ac = text
335                                                end
336                                                cache[aa] = ac
337                                            end
338                                            setattr(n,a_tagged,ac)
339                                        else
340                                            setattr(n,a_tagged,text)
341                                        end
342                                        if id == hlist_code or id == vlist_code then
343                                            runner(getlist(n),depth+1)
344                                        elseif id == glyph_code then
345                                            -- this should not be needed
346                                         -- local components = getcomponents(n) -- unlikely set
347                                         -- if components then
348                                         --     runner(getcomponent,depth+1)
349                                         -- end
350                                        elseif id == disc_code then
351                                            -- this should not be needed
352                                            local pre, post, replace = getdisc(n)
353                                            if pre then
354                                                runner(pre,depth+1)
355                                            end
356                                            if post then
357                                                runner(post,depth+1)
358                                            end
359                                            if replace then
360                                                runner(replace,depth+1)
361                                            end
362                                        end
363                                        if mth == 1 then
364                                            stop_tagged()
365                                         -- text = remove(keep)
366                                            text = keep
367                                            common = common - 1
368                                        end
369                                    end
370                                end
371                                runner(list,0)
372                            end
373                            stop_tagged()
374                        end
375                    end
376                end
377            elseif id == submlist_code then -- normally a hbox
378                local list = getlist(start)
379                if list then
380                    local attr = getattr(start,a_tagged)
381                    local last = attr and taglist[attr]
382                    if last then
383                        local tag    = last.tagname
384                        local detail = last.detail
385                        if tag == "maction" then
386                            if detail == "" then
387                                setattr(start,a_tagged,start_tagged("mrow"))
388                                process(list)
389                                stop_tagged()
390                            elseif actionstack[#actionstack] == action then
391                                setattr(start,a_tagged,start_tagged("mrow"))
392                                process(list)
393                                stop_tagged()
394                            else
395                                insert(actionstack,action)
396                                setattr(start,a_tagged,start_tagged("mrow",{ detail = action }))
397                                process(list)
398                                stop_tagged()
399                                remove(actionstack)
400                            end
401                        elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then
402                            -- looks like it gets processed twice
403                            -- do we still end up here ?
404                            setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute
405                            process(list)
406                            stop_tagged()
407                        else
408                            setattr(start,a_tagged,start_tagged("mrow"))
409                            process(list)
410                            stop_tagged()
411                        end
412                    else -- never happens, we're always document
413                        setattr(start,a_tagged,start_tagged("mrow"))
414                        process(list)
415                        stop_tagged()
416                    end
417                end
418            elseif id == fraction_code then
419                local num   = getfield(start,"num")
420                local denom = getfield(start,"denom")
421                local left  = getfield(start,"left")
422                local right = getfield(start,"right")
423                if left then
424                   setattr(left,a_tagged,start_tagged("mo"))
425                   process(left)
426                   stop_tagged()
427                end
428                setattr(start,a_tagged,start_tagged("mfrac"))
429                process(num)
430                process(denom)
431                stop_tagged()
432                if right then
433                    setattr(right,a_tagged,start_tagged("mo"))
434                    process(right)
435                    stop_tagged()
436                end
437            elseif id == choice_code then
438                local display      = getfield(start,"display")
439                local text         = getfield(start,"text")
440                local script       = getfield(start,"script")
441                local scriptscript = getfield(start,"scriptscript")
442                if display then
443                    process(display)
444                end
445                if text then
446                    process(text)
447                end
448                if script then
449                    process(script)
450                end
451                if scriptscript then
452                    process(scriptscript)
453                end
454            elseif id == fence_code then
455                local delim = getfield(start,"delimiter")
456                if subtype == leftfence_code then
457                    -- left
458                    local properties = { }
459                    insert(fencesstack,properties)
460                    setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking
461                    if delim then
462                        start_tagged("ignore")
463                        local chr = getchar(delim)
464                        if chr ~= 0 then
465                            properties.left = chr
466                        end
467                        process(delim)
468                        stop_tagged()
469                    end
470                    start_tagged("mrow") -- begin of subsequence
471                elseif subtype == middlefence_code then
472                    -- middle
473                    if delim then
474                        start_tagged("ignore")
475                        local top = fencesstack[#fencesstack]
476                        local chr = getchar(delim)
477                        if chr ~= 0 then
478                            local mid = top.middle
479                            if mid then
480                                mid[#mid+1] = chr
481                            else
482                                top.middle = { chr }
483                            end
484                        end
485                        process(delim)
486                        stop_tagged()
487                    end
488                    stop_tagged()        -- end of subsequence
489                    start_tagged("mrow") -- begin of subsequence
490                elseif subtype == rightfence_code then
491                    local properties = remove(fencesstack)
492                    if not properties then
493                        report_tags("missing right fence")
494                        properties = { }
495                    end
496                    if delim then
497                        start_tagged("ignore")
498                        local chr = getchar(delim)
499                        if chr ~= 0 then
500                            properties.right = chr
501                        end
502                        process(delim)
503                        stop_tagged()
504                    end
505                    stop_tagged() -- end of subsequence
506                    stop_tagged()
507                else
508                    -- can't happen
509                end
510            elseif id == radical_code then
511                local left   = getfield(start,"left")
512                local degree = getfield(start,"degree")
513                if left then
514                    start_tagged("ignore")
515                    process(left) -- root symbol, ignored
516                    stop_tagged()
517                end
518                if degree and hascontent(degree) then
519                    setattr(start,a_tagged,start_tagged("mroot"))
520                    processsubsup(start)
521                    process(degree)
522                    stop_tagged()
523                else
524                    setattr(start,a_tagged,start_tagged("msqrt"))
525                    processsubsup(start)
526                    stop_tagged()
527                end
528            elseif id == accent_code then
529                local accent     = getfield(start,"accent")
530                local bot_accent = getfield(start,"bot_accent")
531                if bot_accent then
532                    if accent then
533                        setattr(start,a_tagged,start_tagged("munderover", {
534                            accent      = true,
535                            top         = getunicode(accent),
536                            bottom      = getunicode(bot_accent),
537                            topfixed    = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
538                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
539                        }))
540                        processsubsup(start)
541                        process(bot_accent)
542                        process(accent)
543                        stop_tagged()
544                    else
545                        setattr(start,a_tagged,start_tagged("munder", {
546                            accent      = true,
547                            bottom      = getunicode(bot_accent),
548                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
549                        }))
550                        processsubsup(start)
551                        process(bot_accent)
552                        stop_tagged()
553                    end
554                elseif accent then
555                    setattr(start,a_tagged,start_tagged("mover", {
556                        accent   = true,
557                        top      = getunicode(accent),
558                        topfixed = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
559                    }))
560                    processsubsup(start)
561                    process(accent)
562                    stop_tagged()
563                else
564                    processsubsup(start)
565                end
566            elseif id == glue_code then
567                -- before processing, so other intermathglue is not tagged
568                local em = fonts.hashes.emwidths[nuts.getfont(start)]
569                local wd = getwidth(start)
570                if em and wd then
571                    setattr(start,a_tagged,start_tagged("mspace",{ emfactor = wd/em }))
572                end
573                stop_tagged()
574            else
575                setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[i] }))
576                stop_tagged()
577            end
578        end
579-- showtag(start,id,false)
580    end
581    if mtexttag then
582        stop_tagged()
583    end
584end
585
586function noads.handlers.tags(head,style,penalties)
587    start_tagged("math", { mode = (getattr(head,a_mathmode) == 1) and "display" or "inline" })
588    setattr(head,a_tagged,start_tagged("mrow"))
589-- showtag(head,getid(head),true)
590    process(head)
591-- showtag(head,getid(head),false)
592    stop_tagged()
593    stop_tagged()
594end
595
596do
597
598    -- This one is meant for tracing (in m4all/m4mbo where it complements some other
599    -- tracing) but it actually can also replace the embedding feature although that
600    -- one might be better when we have more complex code with dependencies outside
601    -- the blob. I'll deal with that when it's needed (trivial). The current
602    -- interface is rather minimalistic.
603
604    local enabled = false
605    local export  = false
606    local allmath = false
607    local warned  = false
608
609    function mathematics.startcollecting()
610        if structures.tags.enabled() then
611            if not enabled then
612                nodes.tasks.enableaction("math", "noads.handlers.export")
613            end
614            enabled = true
615            export  = structures.tags.localexport
616            allmath = { }
617        elseif not warned then
618            report_tags("math collecting only works when tagging is enabled")
619            warned = true
620        end
621    end
622
623    function mathematics.stopcollecting()
624        export = false
625    end
626
627    local function collected(asstring)
628        local a = allmath or { }
629        return asstring and concat(a) or a
630    end
631
632    mathematics.collected = collected
633
634    interfaces.implement {
635        name      = "startcollectingmath",
636     -- public    = true,
637        protected = true,
638        actions   = mathematics.startcollecting
639    }
640
641    interfaces.implement {
642        name      = "stopcollectingmath",
643     -- public    = true,
644        protected = true,
645        actions   = mathematics.stopcollecting
646    }
647
648    interfaces.implement {
649        name      = "processcollectedmath",
650     -- public    = true,
651        protected = true,
652        arguments = "2 strings",
653        actions   = function(filename,buffername)
654            if filename and filename ~= "" then
655                io.savedata(filename,collected(true))
656            elseif buffername then
657                buffers.assign(buffername == interfaces.variables.yes and "" or buffername,collected(true))
658            else
659                return collected
660            end
661        end
662    }
663
664    interfaces.implement {
665        name      = "collectedmath",
666        usage     = "value",
667        protected = true,
668        public    = true,
669        actions = function(what)
670            if what == "value" then
671                return tokens.values.integer, allmath and #allmath or 0
672            else
673                context(allmath and allmath[tokens.scanners.integer()] or nil)
674            end
675        end
676    }
677
678    function noads.handlers.export(head)
679        if export then
680            allmath[#allmath+1] = export(head)
681        end
682        return head
683    end
684
685    nodes.tasks.appendaction("math", "finalizers", "noads.handlers.export", nil, "nonut", "disabled")
686
687end
688