math-tag.lua /size: 24 Kb    last modification: 2023-12-21 09:44
1if not modules then modules = { } end modules ['math-tag'] = {
2    version   = 1.001,
3    comment   = "companion to math-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- todo: have a local list with local tags that then get appended
10-- todo: use tex.getmathcodes (no table)
11
12-- use lpeg matchers
13
14local find, match = string.find, string.match
15local insert, remove, concat = table.insert, table.remove, table.concat
16
17local attributes        = attributes
18local nodes             = nodes
19
20local nuts              = nodes.nuts
21local tonut             = nuts.tonut
22
23local getnext           = nuts.getnext
24local getid             = nuts.getid
25local getchar           = nuts.getchar
26local getfont           = nuts.getfont
27local getlist           = nuts.getlist
28local getfield          = nuts.getfield
29local getdisc           = nuts.getdisc
30local getsubtype        = nuts.getsubtype
31local getattr           = nuts.getattr
32local getattrlist       = nuts.getattrlist
33local setattr           = nuts.setattr
34----- getcomponents     = nuts.getcomponents -- not really needed
35local getwidth          = nuts.getwidth
36
37local getnucleus        = nuts.getnucleus
38local getsub            = nuts.getsub
39local getsup            = nuts.getsup
40
41local setattributes     = nuts.setattributes
42
43local nextnode          = nuts.traversers.node
44
45local nodecodes         = nodes.nodecodes
46
47local noad_code         = nodecodes.noad           -- attr nucleus sub sup
48local accent_code       = nodecodes.accent         -- attr nucleus sub sup accent
49local radical_code      = nodecodes.radical        -- attr nucleus sub sup left degree
50local fraction_code     = nodecodes.fraction       -- attr nucleus sub sup left right
51local subbox_code       = nodecodes.subbox         -- attr list
52local submlist_code     = nodecodes.submlist       -- attr list
53local mathchar_code     = nodecodes.mathchar       -- attr fam char
54local mathtextchar_code = nodecodes.mathtextchar   -- attr fam char
55local delimiter_code    = nodecodes.delimiter      -- attr small_fam small_char large_fam large_char
56local style_code        = nodecodes.style          -- attr style
57local choice_code       = nodecodes.choice         -- attr display text script scriptscript
58local fence_code        = nodecodes.fence          -- attr subtype
59
60local accentcodes       = nodes.accentcodes
61local fencecodes        = nodes.fencecodes
62
63local fixedtopaccent_code    = accentcodes.fixedtop
64local fixedbottomaccent_code = accentcodes.fixedbottom
65local fixedbothaccent_code   = accentcodes.fixedboth
66
67local leftfence_code    = fencecodes.left
68local middlefence_code  = fencecodes.middle
69local rightfence_code   = fencecodes.right
70
71local kerncodes         = nodes.kerncodes
72
73local fontkern_code     = kerncodes.fontkern
74local italickern_code   = kerncodes.italickern
75
76local hlist_code        = nodecodes.hlist
77local vlist_code        = nodecodes.vlist
78local glyph_code        = nodecodes.glyph
79local disc_code         = nodecodes.disc
80local glue_code         = nodecodes.glue
81local kern_code         = nodecodes.kern
82local math_code         = nodecodes.math
83
84local processnoads      = noads.process
85
86local a_tagged          = attributes.private('tagged')
87local a_mathcategory    = attributes.private('mathcategory')
88local a_mathmode        = attributes.private('mathmode')
89
90local tags              = structures.tags
91
92local start_tagged      = tags.start
93local restart_tagged    = tags.restart
94local stop_tagged       = tags.stop
95local taglist           = tags.taglist
96
97local chardata          = characters.data
98
99local getmathcodes      = tex.getmathcodes
100local mathcodes         = mathematics.codes
101local ordinary_mathcode = mathcodes.ordinary
102
103local fromunicode16     = fonts.mappings.fromunicode16
104local fontcharacters    = fonts.hashes.characters
105
106local report_tags       = logs.reporter("structure","tags")
107
108local process
109
110local function processsubsup(start)
111    -- At some point we might need to add an attribute signaling the
112    -- super- and subscripts because TeX and MathML use a different
113    -- order. The mrows are needed to keep mn's separated.
114    local nucleus = getnucleus(start)
115    local sup     = getsup(start)
116    local sub     = getsub(start)
117    if sub then
118        if sup then
119            setattr(start,a_tagged,start_tagged("msubsup"))
120         -- start_tagged("mrow")
121            process(nucleus)
122         -- stop_tagged()
123            start_tagged("mrow", { subscript = true })
124            process(sub)
125            stop_tagged()
126            start_tagged("mrow", { superscript = true })
127            process(sup)
128            stop_tagged()
129            stop_tagged()
130        else
131            setattr(start,a_tagged,start_tagged("msub"))
132         -- start_tagged("mrow")
133            process(nucleus)
134         -- stop_tagged()
135            start_tagged("mrow")
136            process(sub)
137            stop_tagged()
138            stop_tagged()
139        end
140    elseif sup then
141        setattr(start,a_tagged,start_tagged("msup"))
142     -- start_tagged("mrow")
143        process(nucleus)
144     -- stop_tagged()
145        start_tagged("mrow")
146        process(sup)
147        stop_tagged()
148        stop_tagged()
149    else
150        process(nucleus)
151    end
152end
153
154-- todo: check function here and keep attribute the same
155
156-- todo: variants -> original
157
158local actionstack = { }
159local fencesstack = { }
160
161-- glyph nodes and such can happen in under and over stuff
162
163-- local function getunicode(n) -- instead of getchar
164--     local char = getchar(n)
165--  -- local font = getfontoffamily(getfield(n,"fam"))
166--     local font = getfont(n)
167--     local data = fontcharacters[font][char]
168--     return data.unicode or char
169-- end
170
171local function getunicode(n) -- instead of getchar
172 -- local char, font = isglyph(n) -- no, we have a mathchar
173    local char, font = getchar(n), getfont(n)
174    local data = fontcharacters[font][char]
175    return data.unicode or char -- can be a table but unlikely for math characters
176end
177
178-------------------
179
180local content = { }
181local found   = false
182
183content[mathchar_code] = function() found = true end
184
185local function hascontent(head)
186    found = false
187    processnoads(head,content,"content")
188    return found
189end
190
191--------------------
192
193-- todo: use properties
194
195-- local function showtag(n,id,old)
196--     local attr = getattr(n,a_tagged)
197--     local curr = tags.current()
198--     report_tags("%s, node %s, attr %s:%s (%s), top %s (%s)",
199--         old and "before" or "after ",
200--         nodecodes[id],
201--         getattrlist(n),
202--         attr or "?",attr and taglist[attr].tagname or "?",
203--         curr or "?",curr and taglist[curr].tagname or "?"
204--     )
205-- end
206
207process = function(start) -- we cannot use the processor as we have no finalizers (yet)
208    local mtexttag = nil
209    while start do
210        local id = getid(start)
211-- showtag(start,id,true)
212        if id == glyph_code or id == disc_code then
213            if not mtexttag then
214                mtexttag = start_tagged("mtext")
215            end
216            setattr(start,a_tagged,mtexttag)
217        elseif mtexttag and id == kern_code and (getsubtype(start) == fontkern_code or getsubtype(start) == italickern_code) then -- italickern
218            setattr(start,a_tagged,mtexttag)
219        else
220            if mtexttag then
221                stop_tagged()
222                mtexttag = nil
223            end
224            if id == mathchar_code then
225                local char = getchar(start)
226                local code = getmathcodes(char)
227                local tag
228                if code == ordinary_mathcode then
229                    local ch = chardata[char]
230                    local mc = ch and ch.mathclass
231                    if mc == "number" then
232                        tag = "mn"
233                    elseif mc == "variable" or not mc then -- variable is default
234                        tag = "mi"
235                    else
236                        tag = "mo"
237                    end
238                else
239                    tag = "mo"
240                end
241                local a = getattr(start,a_mathcategory)
242                if a then
243                    setattr(start,a_tagged,start_tagged(tag,{ mathcategory = a }))
244                else
245                    setattr(start,a_tagged,start_tagged(tag)) -- todo: a_mathcategory
246                end
247                stop_tagged()
248             -- showtag(start,id,false)
249                break -- okay?
250            elseif id == mathtextchar_code then -- or id == glyph_code
251                -- check for code
252                local a = getattr(start,a_mathcategory)
253                if a then
254                    setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext
255                else
256                    setattr(start,a_tagged,start_tagged("ms")) -- mtext
257                end
258                stop_tagged()
259             -- showtag(start,id,false)
260                break
261            elseif id == delimiter_code then
262                -- check for code
263                setattr(start,a_tagged,start_tagged("mo"))
264                stop_tagged()
265             -- showtag(start,id,false)
266                break
267            elseif id == style_code then
268                -- has a next
269            elseif id == noad_code then
270             -- setattr(start,a_tagged,tags.current())
271                processsubsup(start)
272            elseif id == subbox_code or id == hlist_code or id == vlist_code then
273                -- keep an eye on subbox_code and see what ends up in there
274                local attr = getattr(start,a_tagged)
275                if not attr then
276                    -- just skip
277                else
278                    local specification = taglist[attr]
279                    if specification then
280                        local tag = specification.tagname
281                        if tag == "formulacaption" then
282                            -- skip
283                        elseif tag == "mstacker" then
284                            local list = getlist(start)
285                            if list then
286                                process(list)
287                            end
288                        else
289                            if tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then
290                                tag = "mtext"
291                            end
292                            local text = start_tagged(tag)
293                            setattr(start,a_tagged,text)
294                            local list = getlist(start)
295                            if not list then
296                                -- empty list
297                            elseif not attr then
298                                -- box comes from strange place
299                                setattributes(list,a_tagged,text) -- only the first node ?
300                            else
301                                -- Beware, the first node in list is the actual list so we definitely
302                                -- need to nest. This approach is a hack, maybe I'll make a proper
303                                -- nesting feature to deal with this at another level. Here we just
304                                -- fake structure by enforcing the inner one.
305                                --
306                                -- todo: have a local list with local tags that then get appended
307                                --
308                                local tagdata = specification.taglist
309                                local common = #tagdata + 1
310                                local function runner(list,depth) -- quite inefficient
311                                    local cache = { } -- we can have nested unboxed mess so best local to runner
312                                    local keep = nil
313                                 -- local keep = { } -- win case we might need to move keep outside
314                                    for n, id, subtype in nextnode, list do
315                                        local mth = id == math_code and subtype
316                                        if mth == 0 then -- hm left_code
317                                         -- insert(keep,text)
318                                            keep = text
319                                            text = start_tagged("mrow")
320                                            common = common + 1
321                                        end
322                                        local aa = getattr(n,a_tagged)
323                                        if aa then
324                                            local ac = cache[aa]
325                                            if not ac then
326                                                local tagdata = taglist[aa].taglist
327                                                local extra = #tagdata
328                                                if common <= extra then
329                                                    for i=common,extra do
330                                                        ac = restart_tagged(tagdata[i]) -- can be made faster
331                                                    end
332                                                    for i=common,extra do
333                                                        stop_tagged() -- can be made faster
334                                                    end
335                                                else
336                                                    ac = text
337                                                end
338                                                cache[aa] = ac
339                                            end
340                                            setattr(n,a_tagged,ac)
341                                        else
342                                            setattr(n,a_tagged,text)
343                                        end
344                                        if id == hlist_code or id == vlist_code then
345                                            runner(getlist(n),depth+1)
346                                        elseif id == glyph_code then
347                                            -- this should not be needed
348                                         -- local components = getcomponents(n) -- unlikely set
349                                         -- if components then
350                                         --     runner(getcomponent,depth+1)
351                                         -- end
352                                        elseif id == disc_code then
353                                            -- this should not be needed
354                                            local pre, post, replace = getdisc(n)
355                                            if pre then
356                                                runner(pre,depth+1)
357                                            end
358                                            if post then
359                                                runner(post,depth+1)
360                                            end
361                                            if replace then
362                                                runner(replace,depth+1)
363                                            end
364                                        end
365                                        if mth == 1 then
366                                            stop_tagged()
367                                         -- text = remove(keep)
368                                            text = keep
369                                            common = common - 1
370                                        end
371                                    end
372                                end
373                                runner(list,0)
374                            end
375                            stop_tagged()
376                        end
377                    end
378                end
379            elseif id == submlist_code then -- normally a hbox
380                local list = getlist(start)
381                if list then
382                    local attr = getattr(start,a_tagged)
383                    local last = attr and taglist[attr]
384                    if last then
385                        local tag    = last.tagname
386                        local detail = last.detail
387                        if tag == "maction" then
388                            if detail == "" then
389                                setattr(start,a_tagged,start_tagged("mrow"))
390                                process(list)
391                                stop_tagged()
392                            elseif actionstack[#actionstack] == action then
393                                setattr(start,a_tagged,start_tagged("mrow"))
394                                process(list)
395                                stop_tagged()
396                            else
397                                insert(actionstack,action)
398                                setattr(start,a_tagged,start_tagged("mrow",{ detail = action }))
399                                process(list)
400                                stop_tagged()
401                                remove(actionstack)
402                            end
403                        elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then
404                            -- looks like it gets processed twice
405                            -- do we still end up here ?
406                            setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute
407                            process(list)
408                            stop_tagged()
409                        else
410                            setattr(start,a_tagged,start_tagged("mrow"))
411                            process(list)
412                            stop_tagged()
413                        end
414                    else -- never happens, we're always document
415                        setattr(start,a_tagged,start_tagged("mrow"))
416                        process(list)
417                        stop_tagged()
418                    end
419                end
420            elseif id == fraction_code then
421                local num   = getfield(start,"num")
422                local denom = getfield(start,"denom")
423                local left  = getfield(start,"left")
424                local right = getfield(start,"right")
425                if left then
426                   setattr(left,a_tagged,start_tagged("mo"))
427                   process(left)
428                   stop_tagged()
429                end
430                setattr(start,a_tagged,start_tagged("mfrac"))
431                process(num)
432                process(denom)
433                stop_tagged()
434                if right then
435                    setattr(right,a_tagged,start_tagged("mo"))
436                    process(right)
437                    stop_tagged()
438                end
439            elseif id == choice_code then
440                local display      = getfield(start,"display")
441                local text         = getfield(start,"text")
442                local script       = getfield(start,"script")
443                local scriptscript = getfield(start,"scriptscript")
444                if display then
445                    process(display)
446                end
447                if text then
448                    process(text)
449                end
450                if script then
451                    process(script)
452                end
453                if scriptscript then
454                    process(scriptscript)
455                end
456            elseif id == fence_code then
457                local subtype = getsubtype(start)
458                local delim   = getfield(start,"delim")
459                if subtype == leftfence_code then
460                    -- left
461                    local properties = { }
462                    insert(fencesstack,properties)
463                    setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking
464                    if delim then
465                        start_tagged("ignore")
466                        local chr = getchar(delim)
467                        if chr ~= 0 then
468                            properties.left = chr
469                        end
470                        process(delim)
471                        stop_tagged()
472                    end
473                    start_tagged("mrow") -- begin of subsequence
474                elseif subtype == middlefence_code then
475                    -- middle
476                    if delim then
477                        start_tagged("ignore")
478                        local top = fencesstack[#fencesstack]
479                        local chr = getchar(delim)
480                        if chr ~= 0 then
481                            local mid = top.middle
482                            if mid then
483                                mid[#mid+1] = chr
484                            else
485                                top.middle = { chr }
486                            end
487                        end
488                        process(delim)
489                        stop_tagged()
490                    end
491                    stop_tagged()        -- end of subsequence
492                    start_tagged("mrow") -- begin of subsequence
493                elseif subtype == rightfence_code then
494                    local properties = remove(fencesstack)
495                    if not properties then
496                        report_tags("missing right fence")
497                        properties = { }
498                    end
499                    if delim then
500                        start_tagged("ignore")
501                        local chr = getchar(delim)
502                        if chr ~= 0 then
503                            properties.right = chr
504                        end
505                        process(delim)
506                        stop_tagged()
507                    end
508                    stop_tagged() -- end of subsequence
509                    stop_tagged()
510                else
511                    -- can't happen
512                end
513            elseif id == radical_code then
514                local left   = getfield(start,"left")
515                local degree = getfield(start,"degree")
516                if left then
517                    start_tagged("ignore")
518                    process(left) -- root symbol, ignored
519                    stop_tagged()
520                end
521                if degree and hascontent(degree) then
522                    setattr(start,a_tagged,start_tagged("mroot"))
523                    processsubsup(start)
524                    process(degree)
525                    stop_tagged()
526                else
527                    setattr(start,a_tagged,start_tagged("msqrt"))
528                    processsubsup(start)
529                    stop_tagged()
530                end
531            elseif id == accent_code then
532                local subtype    = getsubtype(start)
533                local accent     = getfield(start,"accent")
534                local bot_accent = getfield(start,"bot_accent")
535                if bot_accent then
536                    if accent then
537                        setattr(start,a_tagged,start_tagged("munderover", {
538                            accent      = true,
539                            top         = getunicode(accent),
540                            bottom      = getunicode(bot_accent),
541                            topfixed    = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
542                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
543                        }))
544                        processsubsup(start)
545                        process(bot_accent)
546                        process(accent)
547                        stop_tagged()
548                    else
549                        setattr(start,a_tagged,start_tagged("munder", {
550                            accent      = true,
551                            bottom      = getunicode(bot_accent),
552                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
553                        }))
554                        processsubsup(start)
555                        process(bot_accent)
556                        stop_tagged()
557                    end
558                elseif accent then
559                    setattr(start,a_tagged,start_tagged("mover", {
560                        accent   = true,
561                        top      = getunicode(accent),
562                        topfixed = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
563                    }))
564                    processsubsup(start)
565                    process(accent)
566                    stop_tagged()
567                else
568                    processsubsup(start)
569                end
570            elseif id == glue_code then
571             -- setattr(start,a_tagged,start_tagged("mspace",{ width = getwidth(start) }))
572                setattr(start,a_tagged,start_tagged("mspace"))
573                stop_tagged()
574            else
575                setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[i] }))
576                stop_tagged()
577            end
578        end
579-- showtag(start,id,false)
580        start = getnext(start)
581    end
582    if mtexttag then
583        stop_tagged()
584    end
585end
586
587function noads.handlers.tags(head,style,penalties)
588    start_tagged("math", { mode = (getattr(head,a_mathmode) == 1) and "display" or "inline" })
589    setattr(head,a_tagged,start_tagged("mrow"))
590-- showtag(head,getid(head),true)
591    process(head)
592-- showtag(head,getid(head),false)
593    stop_tagged()
594    stop_tagged()
595end
596