math-tag.lmt /size: 53 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['math-tag'] = {
2    version   = 1.001,
3    comment   = "companion to math-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- todo: have a local list with local tags that then get appended
10-- todo: use tex.getmathcodes (no table)
11-- todo: add more spacing details + check text stuff for latest additions
12-- todo: some more font related cleanup + adaption to new scaling
13-- todo: tracing
14-- todo: maybe use lpeg matchers
15
16-- todo: prime
17-- todo: middle in fraction
18
19local find, match, gsub = string.find, string.match, string.gsub
20local insert, remove, concat, setmetatableindex = table.insert, table.remove, table.concat, table.setmetatableindex
21
22local attributes         = attributes
23local nodes              = nodes
24
25local nuts               = nodes.nuts
26local tonut              = nuts.tonut
27
28local getchar            = nuts.getchar
29local getnext            = nuts.getnext
30local getprev            = nuts.getprev
31local getdata            = nuts.getdata
32local getlist            = nuts.getlist
33local getfield           = nuts.getfield
34local getdisc            = nuts.getdisc
35local getattr            = nuts.getattr
36local setattr            = nuts.setattr
37local getwidth           = nuts.getwidth
38local getoptions         = nuts.getoptions
39local getclass           = nuts.getclass
40local getprop            = nuts.getprop
41local getcharspec        = nuts.getcharspec
42local getchardict        = nuts.getchardict
43
44local getnumerator       = nuts.getnumerator
45local getdenominator     = nuts.getdenominator
46local getdelimiter       = nuts.getdelimiter
47local getleftdelimiter   = nuts.getleftdelimiter
48local getrightdelimiter  = nuts.getrightdelimiter
49local gettopdelimiter    = nuts.gettopdelimiter
50local getbottomdelimiter = nuts.getbottomdelimiter
51local getdegree          = nuts.getdegree
52local gettop             = nuts.gettop
53local getbottom          = nuts.getbottom
54local getchoice          = nuts.getchoice
55local getnucleus         = nuts.getnucleus
56
57local setattributes      = nuts.setattributes
58local nextnode           = nuts.traversers.node
59
60local nodecodes              = nodes.nodecodes
61
62local accentcodes            <const> = nodes.accentcodes
63local fencecodes             <const> = nodes.fencecodes
64local fractioncodes          <const> = nodes.fractioncodes
65local kerncodes              <const> = nodes.kerncodes
66
67local noad_code              <const> = nodecodes.noad
68local accent_code            <const> = nodecodes.accent
69local radical_code           <const> = nodecodes.radical
70local fraction_code          <const> = nodecodes.fraction
71local subbox_code            <const> = nodecodes.subbox
72local submlist_code          <const> = nodecodes.submlist
73local mathchar_code          <const> = nodecodes.mathchar
74local mathtextchar_code      <const> = nodecodes.mathtextchar
75local delimiter_code         <const> = nodecodes.delimiter
76local style_code             <const> = nodecodes.style
77local choice_code            <const> = nodecodes.choice
78local fence_code             <const> = nodecodes.fence
79
80local hlist_code             <const> = nodecodes.hlist
81local vlist_code             <const> = nodecodes.vlist
82local glyph_code             <const> = nodecodes.glyph
83local disc_code              <const> = nodecodes.disc
84local glue_code              <const> = nodecodes.glue
85local kern_code              <const> = nodecodes.kern
86local rule_code              <const> = nodecodes.rule
87local math_code              <const> = nodecodes.math
88
89local fixedtopaccent_code    <const> = accentcodes.fixedtop
90local fixedbottomaccent_code <const> = accentcodes.fixedbottom
91local fixedbothaccent_code   <const> = accentcodes.fixedboth
92
93local leftfence_code         <const> = fencecodes.left
94local middlefence_code       <const> = fencecodes.middle
95local rightfence_code        <const> = fencecodes.right
96local operatorfence_code     <const> = fencecodes.operator
97
98local atop_code              <const> = fractioncodes.atop
99local above_code             <const> = fractioncodes.above
100
101local fontkern_code          <const> = kerncodes.fontkern
102local italiccorrection_code  <const> = kerncodes.italiccorrection
103
104local hextensible_code       <const> = nodes.radicalcodes.hextensible
105local delimited_code         <const> = nodes.radicalcodes.delimited
106----- delimiterover_code     <const> = nodes.radicalcodes.delimiterover
107----- delimiterunder_code    <const> = nodes.radicalcodes.delimiterunder
108----- overdelimiter_code     <const> = nodes.radicalcodes.overdelimiter
109----- underdelimiter_code    <const> = nodes.radicalcodes.underdelimiter
110
111local lextensible_code       <const> = nodes.listcodes.hextensible
112local gextensible_code       <const> = nodes.glyphcodes.extensible
113
114local a_tagged               <const> = attributes.private('tagged')
115local a_mathcategory         <const> = attributes.private('mathcategory')
116local a_mathstack            <const> = attributes.private('mathstack')
117local a_mathmode             <const> = attributes.private('mathmode')
118local a_mathfamily           <const> = attributes.private('mathfamily')
119local a_mathdomain           <const> = attributes.private('mathdomain')
120
121local c_mathblobnesting      <const> = tex.iscount("currentmathblobnesting")
122local c_mathblob             <const> = tex.iscount("currentmathblob")
123
124local processnoads    = noads.process
125
126local getintegervalue = tex.getintegervalue
127local getmacro        = tokens.getters.macro
128
129local tags            = structures.tags
130
131local start_tagged    = tags.start
132local restart_tagged  = tags.restart
133local push_tagged     = tags.push
134local pop_tagged      = tags.pop
135local stop_tagged     = tags.stop
136local taglist         = tags.taglist
137
138----- chardata        = characters.data
139
140local getmathcodes    = tex.getmathcodes
141local classes         = mathematics.classes
142
143local classtotag = {
144    [classes.ordinary]     = "mi",
145    [classes.variable]     = "mi",
146    [classes.imaginary]    = "mi",
147    [classes.differential] = "mi",
148    [classes.exponential]  = "mi",
149    [classes.digit]        = "mn",
150    [classes.implication]  = "mo",
151    [classes.ghost]        = "mo",--always?
152--  [classes.relation]     = "mo",
153--  [classes.binary]       = "mo",
154--  [classes.punctuation]  = "mc",
155}
156
157local fromunicode16  = fonts.mappings.fromunicode16
158local fontcharacters = fonts.hashes.characters
159
160local report_tags = logs.reporter("structure","tags")
161
162local tagging = {
163 -- mfenced = true,
164    mfenced = false,
165}
166
167directives.register("structures.tags.math.mfenced", function(v) tagging.mfenced = v end)
168
169mathematics.tagging = tagging
170
171local process, processsubsup  do
172
173    local noadoptioncodes = tex.noadoptioncodes
174
175    local continuation_code          <const> = noadoptioncodes.continuation
176    local continuationhead_code      <const> = noadoptioncodes.continuationhead
177    local continuationkernel_code    <const> = noadoptioncodes.continuationkernel
178
179    local indexedsuperscript_code    <const> = noadoptioncodes.indexedsuperscript
180    local indexedsubscript_code      <const> = noadoptioncodes.indexedsubscript
181    local indexedsuperprescript_code <const> = noadoptioncodes.indexedsuperprescript
182    local indexedsubprescript_code   <const> = noadoptioncodes.indexedsubprescript
183
184    local limits_code                <const> = noadoptioncodes.limits
185
186 -- x[prime + sup]       : <msup>    <msup>    <mi>x</mi> <mi>sup</mi> </msup>              <mi>prime</mi> </msup>
187 -- x[prime + sub]       : <msubsup>           <mi>x</mi> <mi>sub</mi> </msubsup>           <mi>prime</mi> </msubsup>
188 -- x[prime + sup + sub] : <msup>    <msubsup> <mi>x</mi> <mi>sub</mi> <mi>sup</mi> </msup> <mi>prime</mi> </msup>
189 -- x[prime + sup + sub] : <msubsup>           <mi>x</mi> <mi>sub</mi> </msubsup>           <mi>prime</mi> </msubsup>
190 --
191 -- x[prime]             : <msup>              <mi>x</mi> <mi>prime</mi>            </msup>
192 -- x[sup]               : <msup>              <mi>x</mi> <mi>sup</mi>              </msup>
193 -- x[sub]               : <msub>              <mi>x</mi> <mi>sub</mi>              </msub>
194 -- x[sup + sub]         : <msubsup>           <mi>x</mi> <mi>sub</mi> <mi>sup</mi> </msubsup>
195
196    local t_prime = { script = "prime" } -- etc, todo: set on main table
197
198    local function simple(start,nucleus,prime,sup,sub,options,continuation,limits)
199        if prime then
200            setattr(start,a_tagged,start_tagged("msup", { prime = true }))
201            start_tagged("mrow")
202        end
203        if sub then
204            if sup then
205                local subindexed = ((options & indexedsubscript_code  ) > 0) or nil
206                local supindexed = ((options & indexedsuperscript_code) > 0) or nil
207                if continuation then
208                    continuation.subindexed = subindexed
209                    continuation.supindexed = supindexed
210                    continuation.limits     = limits
211                else
212                    continuation = {
213                        subindexed = subindexed,
214                        supindexed = supindexed,
215                        limits     = limits,
216                    }
217                end
218                setattr(start,a_tagged,start_tagged("msubsup",continuation))
219                process(nucleus)
220                start_tagged("mrow", { script = "sub" })
221                process(sub)
222                stop_tagged()
223                start_tagged("mrow", { script = "sup" })
224                process(sup)
225                stop_tagged()
226                stop_tagged()
227            else
228                local subindexed = ((options & indexedsubscript_code) > 0) or nil
229                if continuation then
230                    continuation.subindexed = subindexed
231                    continuation.limits     = limits
232                else
233                    continuation = {
234                        subindexed = subindexed,
235                        limits     = limits,
236                    }
237                end
238                setattr(start,a_tagged,start_tagged("msub",continuation))
239                process(nucleus)
240                start_tagged("mrow", { script = "sub" })
241                process(sub)
242                stop_tagged()
243                stop_tagged()
244            end
245        elseif sup then
246            local supindexed = ((options & indexedsuperscript_code) > 0) or nil
247            if continuation then
248                continuation.supindexed = supindexed
249                continuation.limits     = limits
250            else
251                continuation = {
252                    supindexed = supindexed,
253                    limits     = limits,
254                }
255            end
256            setattr(start,a_tagged,start_tagged("msup",continuation))
257            process(nucleus)
258            start_tagged("mrow", { script = "sup" })
259            process(sup)
260            stop_tagged()
261            stop_tagged()
262        else
263            process(nucleus)
264        end
265        if prime then
266            stop_tagged()
267            start_tagged("mrow", { script = "prime" })
268            process(prime)
269            stop_tagged()
270            stop_tagged()
271        end
272    end
273
274    local function complex(start,nucleus,prime,sup,sub,presup,presub,options,continuation,limits)
275        if prime then
276            setattr(start,a_tagged,start_tagged("msup", { prime = true }))
277            start_tagged("mrow")
278        end
279-- -- makes little sense
280-- if continuation then
281--     continuation.limits = limits
282-- else
283--     continuation = { limits = limits }
284-- end
285        start_tagged("mmultiscripts", continuation)
286        process(nucleus)
287     -- if prime then
288     --     start_tagged("mrow", { script = "prime" })
289     --     process(prime)
290     --     stop_tagged()
291     -- end
292        if sup then
293            start_tagged("mrow", { script = "sup" }) -- , indexed = ((options & indexedsuperscript_code) > 0) and "true" or nil  })
294            process(sup)
295            stop_tagged()
296        end
297        if sub then
298            start_tagged("mrow", { script = "sub" }) -- , indexed = ((options & indexedsubscript_code) > 0) and "true" or nil  })
299            process(sub)
300            stop_tagged()
301        end
302        if presup then
303            start_tagged("mrow", { script = "presup" }) -- , indexed = ((options & indexedsuperprescript_code) > 0) and "true" or nil  })
304            process(presup)
305            stop_tagged()
306        end
307        if presub then
308            start_tagged("mrow", { script = "presub" }) -- , indexed = ((options & indexedsubprescript_code) > 0) and "true" or nil  })
309            process(presub)
310            stop_tagged()
311        end
312        stop_tagged()
313        if prime then
314            stop_tagged()
315            start_tagged("mrow", { script = "prime" })
316            process(prime)
317            stop_tagged()
318            stop_tagged()
319        end
320    end
321
322    local multiprime = true
323
324    directives.register("structures.tags.math.multiprime", function(v) multiprime = v end)
325
326    processsubsup = function(start)
327        -- At some point we might need to add an attribute signaling the
328        -- super- and subscripts because TeX and MathML use a different
329        -- order. The mrows are needed to keep mn's separated.
330        local nucleus, prime, sup, sub, presup, presub = getnucleus(start,true)
331        local options = getoptions(start) or 0
332        local limits = (options & limits_code) > 0
333        local continuation = false
334        local c = (options & continuation_code) > 0
335        local h = (options & continuationhead_code) > 0
336        local k = (options & continuationkernel_code) > 0
337        local continuation = ((c or h or k) and {
338            continuation = { next = c, head = h, kernel = k}
339        }) or nil
340     -- if  presup or presub  or (               sup and prime) then
341        if (presup or presub) or (multiprime and sup and prime) then
342            complex(start,nucleus,prime,sup,sub,presup,presub,options,continuation,limits)
343        else
344            simple(start,nucleus,prime,sup,sub,options,continuation,limits)
345        end
346    end
347
348end
349
350-- todo: check function here and keep attribute the same
351
352-- todo: variants -> original
353
354local actionstack = { }
355local fencesstack = { }
356
357-- glyph nodes and such can happen in under and over stuff
358
359local function getunicode(n) -- instead of getchar
360    local char, font = getcharspec(n)
361    local data = fontcharacters[font][char]
362    return data.unicode or char -- can be a table but unlikely for math characters
363end
364
365-------------------
366
367local content = { }
368local found   = false
369
370content[mathchar_code] = function() found = true end
371
372local function hascontent(head)
373    found = false
374    processnoads(head,content,"content")
375    return found
376end
377
378--------------------
379
380-- todo: use properties
381
382-- local function showtag(n,id,old)
383--     local attr = getattr(n,a_tagged)
384--     local curr = tags.current()
385--     report_tags("%s, node %s, attr %s:%s (%s), top %s (%s)",
386--         old and "before" or "after ",
387--         nodecodes[id],
388--         getattrlist(n),
389--         attr or "?",attr and taglist[attr].tagname or "?",
390--         curr or "?",curr and taglist[curr].tagname or "?"
391--     )
392-- end
393
394-- I need to bring this in sync with new or removed mathml 3, not that there has
395-- been many changes. It will happen in sync with other mathml updates in context
396-- where we also keep adapting to a cycling between either or not support in
397-- browsers, the come-and-go of alternatives like ascii math and mathjax. It's the
398-- web and browser support that drives this, not tex and its community. So, maybe
399-- I'll add some more detail here, nto that it matters much in the long run where we
400-- only focus on structure and let the engine deal with the details. Another reason
401-- to update this is that we can add some tracing (lmtx only).
402
403-- This has been working ok for quite but in 2023 it's time to have a look at it
404-- again and see to what extend we need to adapt to new features. Around the time
405-- PG's Panopticom was put on youtube.
406
407local chardata = characters.data
408
409local mathnames   = mathematics.dictionaries.names
410local mathgroups  = mathematics.dictionaries.groups
411local mathclasses = mathematics.classes
412
413local everygroup      <const> = mathematics.dictionaries.names.everygroup
414local variable_class  <const> = classes.variable
415
416local function getproperties(n,class)
417    local props, group, index, font, char = getchardict(n)
418    if group == everygroup then
419        -- should be done at the lua end by defining a char bound group code
420        local c = chardata[char]
421        if c then
422            local g = c.mathgroup
423            if not g then
424                local s = c.mathspec
425                if s then
426                    g = s[1].group
427                end
428                if g then
429                    group = mathnames[g] or group
430                end
431            end
432        end
433    end
434    if group or class then
435        local swapped = getprop(n,"swappedclass")
436        if swapped then
437            class = swapped
438        end
439        return {
440            mathclass     = mathclasses[class] or class,
441            mathgroup     = mathgroups[group],
442            mathindex     = index,
443         -- mathfont      = font,
444            mathcharacter = char,
445        }
446    end
447end
448
449-- todo: why seen twice
450-- todo: get rid of detail
451
452do
453
454    local level = -1
455    local trace = false
456
457    trackers.register("export.trace.math", function(v) trace = v end)
458
459    local function show(n,id,where)
460-- inspect(getattr(n,a_tagged))
461-- inspect(taglist)
462        local a = getattr(n,a_tagged)
463        local s = a and taglist[a]
464        if s then
465            s = s.taglist
466        end
467        report_tags("%w %s : %S : %s : %s",
468            level,
469            where,
470            nodecodes[id],
471            a or "?",
472            s and concat(s," ", 3) or "untagged"
473        )
474    end
475
476    -- Beware, the first node in list is the actual list so we definitely need to nest.
477    -- This approach is a hack, maybe I'll make a proper nesting feature to deal with
478    -- this at another level. Here we just fake structure by enforcing the inner one.
479
480    -- todo: have a local list with local tags that then get appended
481
482    local function runner(td,nd,text,list) -- quite inefficient
483        local cache = { } -- we can have nested unboxed mess so best local to runner
484        local keep = nil
485     -- local keep = { } -- in case we might need to move keep outside
486        for n, id, subtype in nextnode, list do
487            local mth = id == math_code and subtype
488            if mth == 0 then -- begin in line / nested math box like stackers
489             -- insert(keep,text)
490                keep = text
491                text = start_tagged("mrow")
492             -- common = common + 1
493            end
494            local aa = getattr(n,a_tagged)
495            if aa then
496                -- here we could intercept formulacaption and formulanumber
497                local ac = cache[aa]
498                if not ac then
499                    local tagspec = taglist[aa]
500                    local tagdata = tagspec.taglist
501                    local common  = 0
502                    for i=1,nd do
503                        if td[i] == tagdata[i] then
504                            common = common + 1
505                        else
506                            break
507                        end
508                    end
509                    common = common + 1
510                    local extra = #tagdata
511                    if common <= extra then
512                        if trace then
513                            show(n,id," ")
514                        end
515                        for i=common,extra do
516                            -- don't we loose properties here?
517                            ac = restart_tagged(tagdata[i])
518                        end
519                        for i=common,extra do
520                            stop_tagged()
521                        end
522                    else
523                        ac = text
524                    end
525                    cache[aa] = ac
526                end
527                setattr(n,a_tagged,ac)
528            else
529                setattr(n,a_tagged,text)
530            end
531            if id == hlist_code or id == vlist_code then
532                runner(td,nd,text,getlist(n))
533         -- elseif id == glyph_code then
534            elseif id == disc_code then
535                -- this should not be needed
536                local pre, post, replace = getdisc(n)
537                if pre then
538                    runner(td,nd,text,pre)
539                end
540                if post then
541                    runner(td,nd,text,post)
542                end
543                if replace then
544                    runner(td,nd,text,replace)
545                end
546            end
547            if mth == 1 then -- end in line
548                stop_tagged()
549             -- text = remove(keep)
550                text = keep
551             -- common = common - 1
552            end
553        end
554    end
555
556    local function relocate(start,attr,tag,specification)
557        -- we can now end up with nexted "math"
558        local detail = specification.detail
559        local text   = start_tagged(tag, detail and { detail = detail } or nil)
560        setattr(start,a_tagged,text)
561        local list = getlist(start)
562        if list then
563            local tagdata = specification.taglist
564            runner(tagdata,#tagdata,text,list)
565        end
566        stop_tagged()
567    end
568
569    local function makeintotext(start)
570        setattr(start,a_tagged,start_tagged("mtext"))
571        stop_tagged()
572    end
573
574    process = function(start) -- we cannot use the processor as we have no finalizers (yet)
575        local mtexttag = nil
576        level = level + 1
577        for start, id, subtype in nextnode, start do -- current
578            if trace then
579                show(start,id,"+")
580            end
581            if id == glyph_code or id == disc_code then
582                if not mtexttag then
583                    mtexttag = start_tagged("mtext")
584                end
585                setattr(start,a_tagged,mtexttag)
586            elseif mtexttag and id == kern_code and (subtype == fontkern_code or subtype == italiccorrection_code) then
587                setattr(start,a_tagged,mtexttag)
588            elseif id == rule_code then
589             -- setattr(start,a_tagged,start_tagged("ignore"))
590             -- stop_tagged()
591            else
592                if mtexttag then
593                    stop_tagged()
594                    mtexttag = nil
595                end
596                if id == mathchar_code then
597                 -- local char = getchar(start) -- not used
598                    local properties = getproperties(start,subtype)
599                    local tag = classtotag[subtype] or "mo"
600                    properties.mathstack = getattr(start,a_mathstack)
601                    properties.mathcategory = getattr(start,a_mathcategory)
602                    setattr(start,a_tagged,start_tagged(tag,properties))
603                    stop_tagged()
604                    if trace then
605                        show(start,id,"C")
606                    end
607                    level = level - 1
608                    break -- okay?
609                elseif id == mathtextchar_code then -- or id == glyph_code
610                    -- check for code
611                    local a = getattr(start,a_mathcategory)
612                    if a then -- cache table
613                        setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext
614                    else
615                        setattr(start,a_tagged,start_tagged("ms")) -- mtext
616                    end
617                    stop_tagged()
618                    if trace then
619                        show(start,id,"T")
620                    end
621                    level = level - 1
622                    break
623                elseif id == delimiter_code then
624                    -- check for code
625                    local properties = getproperties(start,subtype)
626                    properties.delimiter = "true" -- inspect(properties)
627                    setattr(start,a_tagged,start_tagged("mo",properties))
628                    stop_tagged()
629                    if trace then
630                        show(start,id,"D")
631                    end
632                    level = level - 1
633                   break
634                elseif id == style_code then
635                    -- has a next
636                elseif id == noad_code then
637                    processsubsup(start)
638                elseif id == subbox_code or id == hlist_code or id == vlist_code then
639                    -- keep an eye on subbox_code and see what ends up in there
640                    -- a hlist can be a nested result (mlist_to_hlist)
641                    local attr = getattr(start,a_tagged)
642                    if attr then
643                        local specification = taglist[attr]
644                        if specification then
645                            local tag = specification.tagname
646--                             if tag == "formulacaption" then
647--                                 -- can this happen, skip
648--                             else
649                            if tag == "mstacker" then
650                                -- this still happens
651                                local list = getlist(start)
652                                if list then
653                                    process(list)
654                                end
655                            else
656                                if tag ~= "math" and
657                                   tag ~= "mtable" and tag ~= "mtd" and
658                                   tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot"
659-- and tag ~= "formulacaption"
660-- and tag ~= "formulanumber"
661
662                                then
663                                   tag = "mtext"
664                                end
665                                relocate(start,attr,tag,specification)
666                            end
667                        else
668                            makeintotext(start) -- last resort gamble
669                        end
670                    else
671                        makeintotext(start) -- last resort gamble
672                    end
673                elseif id == submlist_code then -- normally a hbox
674                    local list = getlist(start)
675                    if list then
676                        local attr = getattr(start,a_tagged)
677                        local last = attr and taglist[attr]
678                        if last then
679                            local tag    = last.tagname
680                            local detail = last.detail
681                            if tag == "munit" then
682                                setattr(start,a_tagged,start_tagged("mrow", {
683                                    mathunit = detail
684                                }))
685                                process(list)
686                                stop_tagged()
687                            elseif tag == "mfunction" then
688                                setattr(start,a_tagged,start_tagged("mrow", {
689                                    mathfunction = detail,
690                                    mathcategory = getattr(start,a_mathcategory),
691                                    mathstack    = getattr(start,a_mathstack),
692                                }))
693                                process(list)
694                                stop_tagged()
695                            elseif tag == "mstacker" then
696                                setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute
697                                process(list)
698                                stop_tagged()
699                            elseif tag == "mfunctionstack" then
700                                setattr(start,a_tagged,start_tagged("mrow", {
701                                    mathfunctionstack = detail,
702                                    mathstack         = getattr(start,a_mathstack),
703                                }))
704                                process(list)
705                                stop_tagged()
706                            elseif tag == "mfractionstack" then
707                                setattr(start,a_tagged,start_tagged("mrow", {
708                                    mathfractionstack = detail,
709                                 -- mathstack         = getattr(start,a_mathstack),
710                                }))
711                                process(list)
712                                stop_tagged()
713                            elseif tag == "mdelimitedstack" then
714                                setattr(start,a_tagged,start_tagged("mrow", {
715                                    mathdelimitedstack = detail,
716                                 -- mathstack          = getattr(start,a_mathstack),
717                                }))
718                                process(list)
719                                stop_tagged()
720                            elseif tag == "mdigits" then
721                                setattr(start,a_tagged,start_tagged("mrow", {
722                                    mathdigits = detail or "unknown",
723                                }))
724                                process(list)
725                                stop_tagged()
726                            else
727                                setattr(start,a_tagged,start_tagged("mrow"))
728                                process(list)
729                                stop_tagged()
730                            end
731                        else -- never happens, we're always document
732                            setattr(start,a_tagged,start_tagged("mrow"))
733                            process(list)
734                            stop_tagged()
735                        end
736                    end
737                elseif id == fraction_code then
738                    --
739                    -- if middle then we have a stacker!
740                    --
741                    local num    = getnumerator(start)
742                    local denom  = getdenominator(start)
743                    local left   = getleftdelimiter(start)
744                    local right  = getrightdelimiter(start)
745                    local middle = getdelimiter(start) -- todo get them all in one go
746                    if left then
747                       setattr(left,a_tagged,start_tagged("mo"))
748                       process(left)
749                       stop_tagged()
750                    end
751                    setattr(start,a_tagged,start_tagged("mfrac", {
752                        mathfractionrule = (subtype == atop_code or subtype == above_code) and "no" or "yes",
753                    }))
754                    process(num)
755                    process(denom)
756                    stop_tagged()
757                    if middle then
758                        setattr(middle,a_tagged,start_tagged("ignore"))
759                        stop_tagged()
760                    end
761                    if right then
762                        setattr(right,a_tagged,start_tagged("mo"))
763                        process(right)
764                        stop_tagged()
765                    end
766                elseif id == choice_code then
767                    local display      = getchoice(start,1)
768                    local text         = getchoice(start,2)
769                    local script       = getchoice(start,3)
770                    local scriptscript = getchoice(start,4)
771                    if display then
772                        process(display)
773                    end
774                    if text then
775                        process(text)
776                    end
777                    if script then
778                        process(script)
779                    end
780                    if scriptscript then
781                        process(scriptscript)
782                    end
783                elseif id == fence_code then
784                    local delimiter    = getdelimiter(start)
785                    local mfenced      = tagging.mfenced
786                    local mathcategory = getattr(start,a_mathcategory)
787                    if subtype == leftfence_code then
788                        local properties = { mathcategory = mathcategory }
789                        insert(fencesstack,properties)
790                        setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking
791                        if delimiter then
792                            if mfenced then
793                                start_tagged("ignore")
794                            end
795                            local chr = getchar(delimiter)
796                            if chr ~= 0 then
797                                properties.left = chr
798                            end
799                            process(delimiter)
800                            if mfenced then
801                                stop_tagged()
802                            end
803                        end
804                        start_tagged("mrow") -- begin of subsequence
805                    elseif subtype == middlefence_code then
806                        stop_tagged()        -- end of subsequence
807                        if delimiter then
808                            if mfenced then
809                                start_tagged("ignore")
810                            end
811                            local top = fencesstack[#fencesstack]
812                            local chr = getchar(delimiter)
813                            if chr ~= 0 then
814                                local mid = top.middle
815                                if mid then
816                                    mid[#mid+1] = chr
817                                else
818                                    top.middle = { chr }
819                                end
820                            end
821                            process(delimiter)
822                            if mfenced then
823                                stop_tagged()
824                            end
825                        end
826                        start_tagged("mrow") -- begin of subsequence
827                    elseif subtype == rightfence_code then
828                        stop_tagged() -- end of subsequence
829                        local properties = remove(fencesstack)
830                        if not properties then
831                            report_tags("missing right fence")
832                            properties = { }
833                        end
834                        if delimiter then
835                            if mfenced then
836                                start_tagged("ignore")
837                            end
838                            local chr = getchar(delimiter)
839                            if chr ~= 0 then
840                                properties.right = chr
841                            end
842                            process(delimiter)
843                            if mfenced then
844                                stop_tagged()
845                            end
846                        end
847                        stop_tagged()
848                    elseif subtype == operatorfence_code then
849                        local properties = {
850                            operator     = true,
851                            mathcategory = mathcategory,
852                        }
853                        local top = gettopdelimiter(start)
854                        local bot = getbottomdelimiter(start)
855                        insert(fencesstack,properties)
856                        setattr(start,a_tagged,start_tagged("mrow",properties))
857                        if top then
858                            if bot then
859                                start_tagged("msubsup")
860                            else
861                                start_tagged("msup")
862                            end
863                        elseif bot then
864                            start_tagged("msub")
865                        else
866                            start_tagged("mrow",properties)
867                        end
868                        if delimiter then
869                            start_tagged("mrow")
870                            local chr = getchar(delimiter)
871                            if chr ~= 0 then
872                                properties.left = chr
873                            end
874                            process(delimiter)
875                            stop_tagged()
876                        else
877                            -- error
878                        end
879                        if top or bot then
880                            if top then
881                                start_tagged("mrow", { script = "sup" })
882                                process(top)
883                                stop_tagged()
884                            end
885                            if bot then
886                                start_tagged("mrow", { script = "sub" })
887                                process(bot)
888                                stop_tagged()
889                            end
890                        end
891                        stop_tagged()
892                        start_tagged("mrow") -- begin of subsequence
893                    else -- no fence
894                        local a = getattr(start,a_mathcategory)
895                        local properties = a and { mathcategory = a }
896                        start_tagged("mrow",properties) -- begin of subsequence
897                        if delimiter then
898                            if mfenced then
899                                start_tagged("ignore")
900                            end
901                            process(delimiter)
902                            if mfenced then
903                                stop_tagged()
904                            end
905                        end
906                        stop_tagged()
907                    end
908                elseif id == radical_code then
909                    local left  = getleftdelimiter(start)
910                    local right = getrightdelimiter(start)
911                    if subtype == hextensible_code then
912                        -- eventually we have no radical but just some box
913                        if left then
914                            start_tagged("mo")
915                            process(left)
916                            stop_tagged()
917                        end
918                    elseif subtype == delimited_code then
919                        start_tagged("mdelimited",properties)
920                        if left then
921                            local properties = getproperties(left,subtype) or { }
922                            properties.delimiterlocation = "left"
923                            setattr(left,a_tagged,start_tagged("mo", properties))
924                            stop_tagged()
925                        end
926                        if right then
927                            local properties = getproperties(right,subtype) or { }
928                            properties.delimiterlocation = "right"
929                            setattr(right,a_tagged,start_tagged("mo",properties))
930                            stop_tagged()
931                        end
932                        processsubsup(start)
933                        stop_tagged()
934                    else
935                        local degree = getdegree(start)
936                        if left then
937                            start_tagged("ignore")
938                            process(left) -- root symbol, ignored
939                            stop_tagged()
940                        end
941                        if right then
942                            start_tagged("ignore")
943                            process(right) -- actuarian symbol, ignored
944                            stop_tagged()
945                        end
946                        if degree and hascontent(degree) then
947                            setattr(start,a_tagged,start_tagged("mroot"))
948                            processsubsup(start)
949                            process(degree)
950                            stop_tagged()
951                        else
952                            setattr(start,a_tagged,start_tagged("msqrt"))
953                            processsubsup(start)
954                            stop_tagged()
955                        end
956                    end
957                elseif id == accent_code then
958                    -- maybe tag the 'mo' so that we can reorder but we only use
959                    -- under and over anyway
960                    local topaccent    = gettop(start)
961                    local bottomaccent = getbottom(start)
962                    local middleaccent = getdelimiter(start)
963                    local mathcategory = getattr(start,a_mathcategory)
964                    local topfixed     = subtype == fixedbothaccent_code or subtype == fixedtopaccent_code
965                    local bottomfixed  = subtype == fixedbothaccent_code or subtype == fixedbottomaccent_code
966                    if bottomaccent then
967                        if topaccent then
968                            setattr(start,a_tagged,start_tagged("munderover", {
969                                accent       = true,
970                                top          = getunicode(topaccent),
971                                bottom       = getunicode(bottomaccent),
972                                topfixed     = topfixed,
973                                bottomfixed  = bottomfixed,
974                                mathcategory = mathcategory,
975                            }))
976                            if topfixed    then topfixed    = nil else topfixed    = { delimiter = "true" } end
977                            if bottomfixed then bottomfixed = nil else bottomfixed = { delimiter = "true" } end
978                            processsubsup(start)
979                            setattr(bottomaccent,a_tagged,start_tagged("mo",bottomfixed))
980                            stop_tagged()
981                            setattr(topaccent,a_tagged,start_tagged("mo",topfixed))
982                            stop_tagged()
983                            stop_tagged()
984                        else
985                            setattr(start,a_tagged,start_tagged("munder", {
986                                accent       = true,
987                                bottom       = getunicode(bottomaccent),
988                                bottomfixed  = bottomfixed,
989                                mathcategory = mathcategory,
990                            }))
991                            if bottomfixed then bottomfixed = nil else bottomfixed = { delimiter = "true" } end
992                            processsubsup(start)
993                            setattr(bottomaccent,a_tagged,start_tagged("mo",bottomfixed))
994                            stop_tagged()
995                            stop_tagged()
996                        end
997                    elseif topaccent then
998                        setattr(start,a_tagged,start_tagged("mover", {
999                            accent       = true,
1000                            top          = getunicode(topaccent),
1001                            topfixed     = topfixed,
1002                            mathcategory = mathcategory,
1003                        }))
1004                        if topfixed then topfixed = nil else topfixed = { delimiter = "true" } end
1005                        processsubsup(start)
1006                        setattr(topaccent,a_tagged,start_tagged("mo",topfixed))
1007                        stop_tagged()
1008                        stop_tagged()
1009                    else
1010                        processsubsup(start)
1011                    end
1012                elseif id == glue_code then
1013                    -- before processing, so other intermathglue is not tagged
1014                    local em = fonts.hashes.emwidths[nuts.getfont(start)]
1015                    local wd = getwidth(start)
1016                    if em and wd then
1017                        setattr(start,a_tagged,start_tagged("mspace",{ emfactor = wd/em }))
1018                        stop_tagged()
1019                    end
1020                else
1021                    -- rule boundary
1022                end
1023            end
1024            if trace then
1025                show(start,id,"-")
1026            end
1027        end
1028        if mtexttag then
1029            stop_tagged()
1030        end
1031        level = level - 1
1032    end
1033
1034end
1035
1036local standalone  = false
1037
1038directives.register("structures.tags.math.standalone", function(v) standalone = v end)
1039
1040function noads.handlers.tags(head,style,penalties,beginclass,endclass,level,style)
1041    if not context.trialtypesetting() then
1042        local a = tex.getattribute(a_mathfamily) -- brrr
1043        start_tagged("math", {
1044            mode       = (getattr(head,a_mathmode) == 1) and "display" or "inline",
1045            standalone = standalone,
1046            family     = mathematics.familyname(a),
1047            style      = style,
1048            input      = mathematics.lastinput,
1049            blob       = getintegervalue(c_mathblob),
1050            language   = getmacro("currentlanguage"),
1051            domain     = mathematics.getdomainname(tonumber(getattr(head,a_mathdomain))),
1052        })
1053        setattr(head,a_tagged,start_tagged("mrow"))
1054        if trace then
1055            report_tags("start math sweep at level %i",level)
1056        end
1057        process(head)
1058        if trace then
1059            report_tags("stop math sweep at level %i",level)
1060        end
1061        stop_tagged()
1062        stop_tagged()
1063    end
1064    mathematics.lastinput= nil
1065end
1066
1067do
1068
1069    -- This one is meant for tracing (in m4all/m4mbo where it complements some other
1070    -- tracing) but it actually can also replace the embedding feature although that
1071    -- one might be better when we have more complex code with dependencies outside
1072    -- the blob. I'll deal with that when it's needed (trivial). The current
1073    -- interface is rather minimalistic.
1074
1075    local enabled  = false
1076    local export   = false
1077    local warned   = false
1078    local shared   = { }
1079    local orders   = { }
1080    local hashed   = { }
1081    local blobdone = setmetatableindex("table")
1082
1083    local trace_blobs = false
1084    local report_blob = logs.reporter("math blob")
1085
1086    directives.register("structures.tags.math.blobs", function(v) trace_blobs = v end)
1087
1088    local function register(order,data)
1089        if not data then
1090            data = ""
1091        end
1092        local hash  = sha2.HEX256(data) -- maybe direct
1093        local index = hashed[hash]
1094        if index then
1095            if trace_blobs then
1096                report_blob("known, order %i, index %i, data: %s",order,index,gsub(shared[index].strip,"%s+",""))
1097            end
1098        else
1099            index = #shared + 1
1100            hashed[hash]  = index
1101            shared[index] = {
1102                data  = data,
1103                strip = xml.mml.stripped(data), -- we could delay this as it often only happens once
1104            }
1105            if trace_blobs then
1106                report_blob("register, order %i, index %i, data: %s",order,index,gsub(shared[index].strip,"%s+",""))
1107            end
1108        end
1109        orders[order] = index
1110        return index
1111    end
1112
1113    function mathematics.enablecollecting()
1114        if structures.tags.enabled() then
1115            if not enabled then
1116                nodes.tasks.enableaction("math","noads.handlers.export")
1117            end
1118            enabled = true
1119            export  = structures.tags.localexport
1120        elseif not warned then
1121            report_tags("math collecting only works when tagging is enabled")
1122            warned = true
1123        end
1124    end
1125
1126    function mathematics.disablecollecting()
1127        enabled = false
1128        export  = false
1129    end
1130
1131    local function getmathblob(purpose,order)
1132        local index = orders[order]
1133        if index then
1134            local entry = shared[index]
1135            if trace_blobs then
1136                report_blob("get math data, purpose %a, order %i, index %i, data: %s",purpose,order,index,gsub(entry.strip,"%s+", ""))
1137            end
1138            return entry.strip
1139        end
1140    end
1141
1142    local function gettextblob(purpose,language,order)
1143        if not order then
1144            order    = language
1145            language = "en"
1146        end
1147        local index = orders[order]
1148        if index then
1149            local entry = shared[index]
1150            local data  = entry and entry[language]
1151            if not data then
1152                -- when no labels we actually now duplicate but so be it
1153                local d = entry.data
1154                if d and d ~= "" then
1155                    data = xml.mml.verbose(d,language)
1156                    entry[language] = data
1157                else
1158                    entry[language] = ""
1159                end
1160
1161            end
1162            if trace_blobs then
1163                report_blob("get text data, purpose %a, order %i, index %i, data: %s",purpose,order,index,gsub(entry.strip,"%s+", ""))
1164            end
1165            return data
1166        end
1167    end
1168
1169    local function getblobindex(purpose,order)
1170        local index = orders[order] or 0
1171        if trace_blobs then
1172            report_blob("get math order, purpose %a, order %i, index %i",purpose,order,index)
1173        end
1174        return index
1175    end
1176
1177    local function getblobmapping(purpose,order)
1178        local index   = orders[order] or 0
1179        local blobs   = job.variables.collected.mathblobs
1180        local mapping = blobs and blobs[index] or 0
1181        if trace_blobs then
1182            report_blob("get math mapping, purpose %a, order %i, index %i, mapping %04X",purpose,order,index,mapping)
1183        end
1184        return index
1185    end
1186
1187    local function markblobindexdone(language,n)
1188        blobdone[language][n] = true
1189    end
1190
1191    mathematics.getmathblob       = getmathblob
1192    mathematics.gettextblob       = gettextblob
1193    mathematics.getblobindex      = getblobindex
1194    mathematics.markblobindexdone = markblobindexdone
1195
1196    local integer_value <const> = tokens.values.integer
1197    local boolean_value <const> = tokens.values.boolean
1198
1199    local implement  = interfaces.implement
1200    local context    = context
1201    local ctxescaped = context.ctxescaped
1202
1203    implement {
1204        name      = "getmathmathblob",
1205        protected = true,
1206        public    = true,
1207        arguments = "integer",
1208        actions   = function(n)
1209            context(getmathblob("tex",n) or "")
1210        end
1211    }
1212
1213    implement {
1214        name      = "getmathtextblob",
1215        protected = true,
1216        public    = true,
1217        arguments = { "string", "integer" },
1218        actions   = function(language,n)
1219            ctxescaped(gettextblob("tex",language,n) or "")
1220        end
1221    }
1222
1223    implement {
1224        name      = "getmathblobindex",
1225        public    = true,
1226        usage     = "value",
1227        arguments = "integer",
1228        actions   = function(n)
1229            return integer_value, getblobindex("tex",n)
1230        end
1231    }
1232
1233    implement {
1234        name      = "getmathblobmapping",
1235        public    = true,
1236        usage     = "value",
1237        arguments = "integer",
1238        actions   = function(n)
1239            return integer_value, getblobmapping("tex",n)
1240        end
1241    }
1242
1243    implement {
1244        name    = "lastblobindex", -- kind of obsolete
1245        public  = true,
1246        usage   = "value",
1247        actions = function()
1248            return integer_value, #shared
1249        end
1250    }
1251
1252    implement {
1253        name      = "markblobindexdone",
1254        protected = true,
1255        public    = true,
1256        arguments = { "string", "integer" },
1257        actions   = markblobindexdone
1258    }
1259
1260    implement {
1261        name      = "ifblobindexdone",
1262        public    = true,
1263        usage     = "condition",
1264        arguments = { "string", "integer" },
1265        actions = function(language,n)
1266            return boolean_value, blobdone[language][n]
1267        end
1268    }
1269
1270    implement {
1271        name      = "enablecollectingmath",
1272     -- public    = true,
1273        protected = true,
1274        actions   = mathematics.enablecollecting
1275    }
1276
1277    implement {
1278        name      = "disablecollectingmath",
1279     -- public    = true,
1280        protected = true,
1281        actions   = mathematics.disablecollecting
1282    }
1283
1284    implement {
1285        name      = "startcollectingmath", -- obsolete
1286     -- public    = true,
1287        protected = true,
1288        actions   = mathematics.enablecollecting
1289    }
1290
1291    implement {
1292        name      = "stopcollectingmath", -- obsolete
1293     -- public    = true,
1294        protected = true,
1295        actions   = function() end
1296    }
1297
1298    -- for now here .. will become a proper lpeg
1299
1300    local toascii  do
1301
1302        local utfbyte = utf.byte
1303        local gsub, format, find = string.gsub, string.format, string.find
1304
1305        local clean = setmetatableindex(function(t,k)
1306            local n = utfbyte(k)
1307            if n > 127 then
1308                n = format("&#x%X;",n)
1309            else
1310                n = false
1311            end
1312            t[k] = n
1313            return n
1314        end)
1315
1316        local pattern = utf8.charpattern
1317
1318        toascii = function(data)
1319            return (gsub(data,pattern,clean))
1320        end
1321
1322    end
1323
1324    implement {
1325        name      = "processcollectedmath",
1326     -- public    = true,
1327        protected = true,
1328        arguments = "4 strings",
1329        actions   = function(filename,buffername,n,option)
1330            local blob = n and tonumber(n) or getintegervalue(c_mathblob)
1331            local data = getmathblob("collect",blob) or ""
1332            if option == "ascii" then
1333                data = toascii(data)
1334            end
1335            if filename and filename ~= "" then
1336                io.savedata(filename,data)
1337            elseif buffername then -- always something
1338                buffers.assign(buffername == interfaces.variables.yes and "" or buffername,data)
1339            else
1340                return data
1341            end
1342        end
1343    }
1344
1345    implement {
1346        name      = "collectedmath",
1347        usage     = "value",
1348        protected = true,
1349        public    = true,
1350        actions = function(what)
1351            if what == "value" then
1352                return integer_value, #shared
1353            else
1354                context(getmathblob("collect",tokens.scanners.integer()) or "")
1355            end
1356        end
1357    }
1358
1359    local a_mathblob <const> = attributes.private('mathblob')
1360
1361    function noads.handlers.export(head)
1362        if export then
1363-- nuts.show(head)
1364            local nesting = getintegervalue(c_mathblobnesting)
1365            local order   = getintegervalue(c_mathblob)
1366            if nesting == 1 then
1367                if trace_blobs then
1368                    report_blob("export blob, order %i, level %i",order,nesting)
1369                end
1370                local blob = export(head,"math")
1371                if blob then
1372                    -- something can be wrong at a page break: todo
1373                    blob = string.gsub(blob,"^(.-<math)","<math")
1374                    register(order,blob)
1375                else
1376                    if trace_blobs then
1377                        report_blob("empty blob, order %i, level %i",order,nesting)
1378                    end
1379                end
1380            else
1381                if trace_blobs then
1382                    report_blob("nested blob, order %i, level %i",order,nesting)
1383                end
1384            end
1385        end
1386        return head
1387    end
1388
1389    nodes.tasks.appendaction("math", "finalizers", "noads.handlers.export", nil, "nonut", "disabled")
1390
1391end
1392