math-tag.lua /size: 24 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['math-tag'] = {
2    version   = 1.001,
3    comment   = "companion to math-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- todo: have a local list with local tags that then get appended
10-- todo: use tex.getmathcodes (no table)
11
12-- use lpeg matchers
13
14local find, match = string.find, string.match
15local insert, remove, concat = table.insert, table.remove, table.concat
16
17local attributes        = attributes
18local nodes             = nodes
19
20local nuts              = nodes.nuts
21local tonut             = nuts.tonut
22
23local getnext           = nuts.getnext
24local getid             = nuts.getid
25local getchar           = nuts.getchar
26local getfont           = nuts.getfont
27local getlist           = nuts.getlist
28local getfield          = nuts.getfield
29local getdisc           = nuts.getdisc
30local getsubtype        = nuts.getsubtype
31local getattr           = nuts.getattr
32local getattrlist       = nuts.getattrlist
33local setattr           = nuts.setattr
34----- getcomponents     = nuts.getcomponents -- not really needed
35local getwidth          = nuts.getwidth
36
37local getnucleus        = nuts.getnucleus
38local getsub            = nuts.getsub
39local getsup            = nuts.getsup
40
41local setattributes     = nuts.setattributes
42
43local nextnode          = nuts.traversers.node
44
45local nodecodes         = nodes.nodecodes
46
47local noad_code         = nodecodes.noad           -- attr nucleus sub sup
48local accent_code       = nodecodes.accent         -- attr nucleus sub sup accent
49local radical_code      = nodecodes.radical        -- attr nucleus sub sup left degree
50local fraction_code     = nodecodes.fraction       -- attr nucleus sub sup left right
51local subbox_code       = nodecodes.subbox         -- attr list
52local submlist_code     = nodecodes.submlist       -- attr list
53local mathchar_code     = nodecodes.mathchar       -- attr fam char
54local mathtextchar_code = nodecodes.mathtextchar   -- attr fam char
55local delimiter_code    = nodecodes.delimiter      -- attr small_fam small_char large_fam large_char
56local style_code        = nodecodes.style          -- attr style
57local choice_code       = nodecodes.choice         -- attr display text script scriptscript
58local fence_code        = nodecodes.fence          -- attr subtype
59
60local accentcodes       = nodes.accentcodes
61local fencecodes        = nodes.fencecodes
62
63local fixedtopaccent_code    = accentcodes.fixedtop
64local fixedbottomaccent_code = accentcodes.fixedbottom
65local fixedbothaccent_code   = accentcodes.fixedboth
66
67local leftfence_code    = fencecodes.left
68local middlefence_code  = fencecodes.middle
69local rightfence_code   = fencecodes.right
70
71local kerncodes         = nodes.kerncodes
72
73local fontkern_code     = kerncodes.fontkern
74local italickern_code   = kerncodes.italickern
75
76local hlist_code        = nodecodes.hlist
77local vlist_code        = nodecodes.vlist
78local glyph_code        = nodecodes.glyph
79local disc_code         = nodecodes.disc
80local glue_code         = nodecodes.glue
81local kern_code         = nodecodes.kern
82local math_code         = nodecodes.math
83
84local processnoads      = noads.process
85
86local a_tagged          = attributes.private('tagged')
87local a_mathcategory    = attributes.private('mathcategory')
88local a_mathmode        = attributes.private('mathmode')
89
90local tags              = structures.tags
91
92local start_tagged      = tags.start
93local restart_tagged    = tags.restart
94local stop_tagged       = tags.stop
95local taglist           = tags.taglist
96
97local chardata          = characters.data
98
99local getmathcodes      = tex.getmathcodes
100local mathcodes         = mathematics.codes
101local ordinary_mathcode = mathcodes.ordinary
102local variable_mathcode = mathcodes.variable
103
104local fromunicode16     = fonts.mappings.fromunicode16
105local fontcharacters    = fonts.hashes.characters
106
107local report_tags       = logs.reporter("structure","tags")
108
109local process
110
111local function processsubsup(start)
112    -- At some point we might need to add an attribute signaling the
113    -- super- and subscripts because TeX and MathML use a different
114    -- order. The mrows are needed to keep mn's separated.
115    local nucleus = getnucleus(start)
116    local sup     = getsup(start)
117    local sub     = getsub(start)
118    if sub then
119        if sup then
120            setattr(start,a_tagged,start_tagged("msubsup"))
121         -- start_tagged("mrow")
122            process(nucleus)
123         -- stop_tagged()
124            start_tagged("mrow", { subscript = true })
125            process(sub)
126            stop_tagged()
127            start_tagged("mrow", { superscript = true })
128            process(sup)
129            stop_tagged()
130            stop_tagged()
131        else
132            setattr(start,a_tagged,start_tagged("msub"))
133         -- start_tagged("mrow")
134            process(nucleus)
135         -- stop_tagged()
136            start_tagged("mrow")
137            process(sub)
138            stop_tagged()
139            stop_tagged()
140        end
141    elseif sup then
142        setattr(start,a_tagged,start_tagged("msup"))
143     -- start_tagged("mrow")
144        process(nucleus)
145     -- stop_tagged()
146        start_tagged("mrow")
147        process(sup)
148        stop_tagged()
149        stop_tagged()
150    else
151        process(nucleus)
152    end
153end
154
155-- todo: check function here and keep attribute the same
156
157-- todo: variants -> original
158
159local actionstack = { }
160local fencesstack = { }
161
162-- glyph nodes and such can happen in under and over stuff
163
164-- local function getunicode(n) -- instead of getchar
165--     local char = getchar(n)
166--  -- local font = getfontoffamily(getfield(n,"fam"))
167--     local font = getfont(n)
168--     local data = fontcharacters[font][char]
169--     return data.unicode or char
170-- end
171
172local function getunicode(n) -- instead of getchar
173 -- local char, font = isglyph(n) -- no, we have a mathchar
174    local char, font = getchar(n), getfont(n)
175    local data = fontcharacters[font][char]
176    return data.unicode or char -- can be a table but unlikely for math characters
177end
178
179-------------------
180
181local content = { }
182local found   = false
183
184content[mathchar_code] = function() found = true end
185
186local function hascontent(head)
187    found = false
188    processnoads(head,content,"content")
189    return found
190end
191
192--------------------
193
194-- todo: use properties
195
196-- local function showtag(n,id,old)
197--     local attr = getattr(n,a_tagged)
198--     local curr = tags.current()
199--     report_tags("%s, node %s, attr %s:%s (%s), top %s (%s)",
200--         old and "before" or "after ",
201--         nodecodes[id],
202--         getattrlist(n),
203--         attr or "?",attr and taglist[attr].tagname or "?",
204--         curr or "?",curr and taglist[curr].tagname or "?"
205--     )
206-- end
207
208process = function(start) -- we cannot use the processor as we have no finalizers (yet)
209    local mtexttag = nil
210    while start do
211        local id = getid(start)
212-- showtag(start,id,true)
213        if id == glyph_code or id == disc_code then
214            if not mtexttag then
215                mtexttag = start_tagged("mtext")
216            end
217            setattr(start,a_tagged,mtexttag)
218        elseif mtexttag and id == kern_code and (getsubtype(start) == fontkern_code or getsubtype(start) == italickern_code) then -- italickern
219            setattr(start,a_tagged,mtexttag)
220        else
221            if mtexttag then
222                stop_tagged()
223                mtexttag = nil
224            end
225            if id == mathchar_code then
226                local char = getchar(start)
227                local code = getmathcodes(char)
228                local tag
229                if code == ordinary_mathcode or code == variable_mathcode then
230                    local ch = chardata[char]
231                    local mc = ch and ch.mathclass
232                    if mc == "number" then
233                        tag = "mn"
234                    elseif mc == "variable" or not mc then -- variable is default
235                        tag = "mi"
236                    else
237                        tag = "mo"
238                    end
239                else
240                    tag = "mo"
241                end
242                local a = getattr(start,a_mathcategory)
243                if a then
244                    setattr(start,a_tagged,start_tagged(tag,{ mathcategory = a }))
245                else
246                    setattr(start,a_tagged,start_tagged(tag)) -- todo: a_mathcategory
247                end
248                stop_tagged()
249             -- showtag(start,id,false)
250                break -- okay?
251            elseif id == mathtextchar_code then -- or id == glyph_code
252                -- check for code
253                local a = getattr(start,a_mathcategory)
254                if a then
255                    setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext
256                else
257                    setattr(start,a_tagged,start_tagged("ms")) -- mtext
258                end
259                stop_tagged()
260             -- showtag(start,id,false)
261                break
262            elseif id == delimiter_code then
263                -- check for code
264                setattr(start,a_tagged,start_tagged("mo"))
265                stop_tagged()
266             -- showtag(start,id,false)
267                break
268            elseif id == style_code then
269                -- has a next
270            elseif id == noad_code then
271             -- setattr(start,a_tagged,tags.current())
272                processsubsup(start)
273            elseif id == subbox_code or id == hlist_code or id == vlist_code then
274                -- keep an eye on subbox_code and see what ends up in there
275                local attr = getattr(start,a_tagged)
276                if not attr then
277                    -- just skip
278                else
279                    local specification = taglist[attr]
280                    if specification then
281                        local tag = specification.tagname
282                        if tag == "formulacaption" then
283                            -- skip
284                        elseif tag == "mstacker" then
285                            local list = getlist(start)
286                            if list then
287                                process(list)
288                            end
289                        else
290                            if tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then
291                                tag = "mtext"
292                            end
293                            local text = start_tagged(tag)
294                            setattr(start,a_tagged,text)
295                            local list = getlist(start)
296                            if not list then
297                                -- empty list
298                            elseif not attr then
299                                -- box comes from strange place
300                                setattributes(list,a_tagged,text) -- only the first node ?
301                            else
302                                -- Beware, the first node in list is the actual list so we definitely
303                                -- need to nest. This approach is a hack, maybe I'll make a proper
304                                -- nesting feature to deal with this at another level. Here we just
305                                -- fake structure by enforcing the inner one.
306                                --
307                                -- todo: have a local list with local tags that then get appended
308                                --
309                                local tagdata = specification.taglist
310                                local common = #tagdata + 1
311                                local function runner(list,depth) -- quite inefficient
312                                    local cache = { } -- we can have nested unboxed mess so best local to runner
313                                    local keep = nil
314                                 -- local keep = { } -- win case we might need to move keep outside
315                                    for n, id, subtype in nextnode, list do
316                                        local mth = id == math_code and subtype
317                                        if mth == 0 then -- hm left_code
318                                         -- insert(keep,text)
319                                            keep = text
320                                            text = start_tagged("mrow")
321                                            common = common + 1
322                                        end
323                                        local aa = getattr(n,a_tagged)
324                                        if aa then
325                                            local ac = cache[aa]
326                                            if not ac then
327                                                local tagdata = taglist[aa].taglist
328                                                local extra = #tagdata
329                                                if common <= extra then
330                                                    for i=common,extra do
331                                                        ac = restart_tagged(tagdata[i]) -- can be made faster
332                                                    end
333                                                    for i=common,extra do
334                                                        stop_tagged() -- can be made faster
335                                                    end
336                                                else
337                                                    ac = text
338                                                end
339                                                cache[aa] = ac
340                                            end
341                                            setattr(n,a_tagged,ac)
342                                        else
343                                            setattr(n,a_tagged,text)
344                                        end
345                                        if id == hlist_code or id == vlist_code then
346                                            runner(getlist(n),depth+1)
347                                        elseif id == glyph_code then
348                                            -- this should not be needed
349                                         -- local components = getcomponents(n) -- unlikely set
350                                         -- if components then
351                                         --     runner(getcomponent,depth+1)
352                                         -- end
353                                        elseif id == disc_code then
354                                            -- this should not be needed
355                                            local pre, post, replace = getdisc(n)
356                                            if pre then
357                                                runner(pre,depth+1)
358                                            end
359                                            if post then
360                                                runner(post,depth+1)
361                                            end
362                                            if replace then
363                                                runner(replace,depth+1)
364                                            end
365                                        end
366                                        if mth == 1 then
367                                            stop_tagged()
368                                         -- text = remove(keep)
369                                            text = keep
370                                            common = common - 1
371                                        end
372                                    end
373                                end
374                                runner(list,0)
375                            end
376                            stop_tagged()
377                        end
378                    end
379                end
380            elseif id == submlist_code then -- normally a hbox
381                local list = getlist(start)
382                if list then
383                    local attr = getattr(start,a_tagged)
384                    local last = attr and taglist[attr]
385                    if last then
386                        local tag    = last.tagname
387                        local detail = last.detail
388                        if tag == "maction" then
389                            if detail == "" then
390                                setattr(start,a_tagged,start_tagged("mrow"))
391                                process(list)
392                                stop_tagged()
393                            elseif actionstack[#actionstack] == action then
394                                setattr(start,a_tagged,start_tagged("mrow"))
395                                process(list)
396                                stop_tagged()
397                            else
398                                insert(actionstack,action)
399                                setattr(start,a_tagged,start_tagged("mrow",{ detail = action }))
400                                process(list)
401                                stop_tagged()
402                                remove(actionstack)
403                            end
404                        elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then
405                            -- looks like it gets processed twice
406                            -- do we still end up here ?
407                            setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute
408                            process(list)
409                            stop_tagged()
410                        else
411                            setattr(start,a_tagged,start_tagged("mrow"))
412                            process(list)
413                            stop_tagged()
414                        end
415                    else -- never happens, we're always document
416                        setattr(start,a_tagged,start_tagged("mrow"))
417                        process(list)
418                        stop_tagged()
419                    end
420                end
421            elseif id == fraction_code then
422                local num   = getfield(start,"num")
423                local denom = getfield(start,"denom")
424                local left  = getfield(start,"left")
425                local right = getfield(start,"right")
426                if left then
427                   setattr(left,a_tagged,start_tagged("mo"))
428                   process(left)
429                   stop_tagged()
430                end
431                setattr(start,a_tagged,start_tagged("mfrac"))
432                process(num)
433                process(denom)
434                stop_tagged()
435                if right then
436                    setattr(right,a_tagged,start_tagged("mo"))
437                    process(right)
438                    stop_tagged()
439                end
440            elseif id == choice_code then
441                local display      = getfield(start,"display")
442                local text         = getfield(start,"text")
443                local script       = getfield(start,"script")
444                local scriptscript = getfield(start,"scriptscript")
445                if display then
446                    process(display)
447                end
448                if text then
449                    process(text)
450                end
451                if script then
452                    process(script)
453                end
454                if scriptscript then
455                    process(scriptscript)
456                end
457            elseif id == fence_code then
458                local subtype = getsubtype(start)
459                local delim   = getfield(start,"delim")
460                if subtype == leftfence_code then
461                    -- left
462                    local properties = { }
463                    insert(fencesstack,properties)
464                    setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking
465                    if delim then
466                        start_tagged("ignore")
467                        local chr = getchar(delim)
468                        if chr ~= 0 then
469                            properties.left = chr
470                        end
471                        process(delim)
472                        stop_tagged()
473                    end
474                    start_tagged("mrow") -- begin of subsequence
475                elseif subtype == middlefence_code then
476                    -- middle
477                    if delim then
478                        start_tagged("ignore")
479                        local top = fencesstack[#fencesstack]
480                        local chr = getchar(delim)
481                        if chr ~= 0 then
482                            local mid = top.middle
483                            if mid then
484                                mid[#mid+1] = chr
485                            else
486                                top.middle = { chr }
487                            end
488                        end
489                        process(delim)
490                        stop_tagged()
491                    end
492                    stop_tagged()        -- end of subsequence
493                    start_tagged("mrow") -- begin of subsequence
494                elseif subtype == rightfence_code then
495                    local properties = remove(fencesstack)
496                    if not properties then
497                        report_tags("missing right fence")
498                        properties = { }
499                    end
500                    if delim then
501                        start_tagged("ignore")
502                        local chr = getchar(delim)
503                        if chr ~= 0 then
504                            properties.right = chr
505                        end
506                        process(delim)
507                        stop_tagged()
508                    end
509                    stop_tagged() -- end of subsequence
510                    stop_tagged()
511                else
512                    -- can't happen
513                end
514            elseif id == radical_code then
515                local left   = getfield(start,"left")
516                local degree = getfield(start,"degree")
517                if left then
518                    start_tagged("ignore")
519                    process(left) -- root symbol, ignored
520                    stop_tagged()
521                end
522                if degree and hascontent(degree) then
523                    setattr(start,a_tagged,start_tagged("mroot"))
524                    processsubsup(start)
525                    process(degree)
526                    stop_tagged()
527                else
528                    setattr(start,a_tagged,start_tagged("msqrt"))
529                    processsubsup(start)
530                    stop_tagged()
531                end
532            elseif id == accent_code then
533                local subtype    = getsubtype(start)
534                local accent     = getfield(start,"accent")
535                local bot_accent = getfield(start,"bot_accent")
536                if bot_accent then
537                    if accent then
538                        setattr(start,a_tagged,start_tagged("munderover", {
539                            accent      = true,
540                            top         = getunicode(accent),
541                            bottom      = getunicode(bot_accent),
542                            topfixed    = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
543                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
544                        }))
545                        processsubsup(start)
546                        process(bot_accent)
547                        process(accent)
548                        stop_tagged()
549                    else
550                        setattr(start,a_tagged,start_tagged("munder", {
551                            accent      = true,
552                            bottom      = getunicode(bot_accent),
553                            bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code,
554                        }))
555                        processsubsup(start)
556                        process(bot_accent)
557                        stop_tagged()
558                    end
559                elseif accent then
560                    setattr(start,a_tagged,start_tagged("mover", {
561                        accent   = true,
562                        top      = getunicode(accent),
563                        topfixed = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code,
564                    }))
565                    processsubsup(start)
566                    process(accent)
567                    stop_tagged()
568                else
569                    processsubsup(start)
570                end
571            elseif id == glue_code then
572             -- setattr(start,a_tagged,start_tagged("mspace",{ width = getwidth(start) }))
573                setattr(start,a_tagged,start_tagged("mspace"))
574                stop_tagged()
575            else
576                setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[i] }))
577                stop_tagged()
578            end
579        end
580-- showtag(start,id,false)
581        start = getnext(start)
582    end
583    if mtexttag then
584        stop_tagged()
585    end
586end
587
588function noads.handlers.tags(head,style,penalties)
589    start_tagged("math", { mode = (getattr(head,a_mathmode) == 1) and "display" or "inline" })
590    setattr(head,a_tagged,start_tagged("mrow"))
591-- showtag(head,getid(head),true)
592    process(head)
593-- showtag(head,getid(head),false)
594    stop_tagged()
595    stop_tagged()
596end
597