if not modules then modules = { } end modules ['math-tag'] = { version = 1.001, comment = "companion to math-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- todo: have a local list with local tags that then get appended -- todo: use tex.getmathcodes (no table) -- todo: add more spacing details + check text stuff for latest additions -- todo: some more font related cleanup + adaption to new scaling -- todo: tracing -- todo: maybe use lpeg matchers -- todo: prime -- todo: middle in fraction local find, match = string.find, string.match local insert, remove, concat = table.insert, table.remove, table.concat local attributes = attributes local nodes = nodes local nuts = nodes.nuts local tonut = nuts.tonut local getchar = nuts.getchar local getprev = nuts.getprev local getcharspec = nuts.getcharspec local getdata = nuts.getdata local getlist = nuts.getlist local getfield = nuts.getfield local getdisc = nuts.getdisc local getattr = nuts.getattr local getattrlist = nuts.getattrlist local setattr = nuts.setattr local getwidth = nuts.getwidth local getnumerator = nuts.getnumerator local getdenominator = nuts.getdenominator local getdelimiter = nuts.getdelimiter local getleftdelimiter = nuts.getleftdelimiter local getrightdelimiter = nuts.getrightdelimiter local getdegree = nuts.getdegree local gettop = nuts.gettop local getbottom = nuts.getbottom local getchoice = nuts.getchoice local getnucleus = nuts.getnucleus local setattributes = nuts.setattributes local nextnode = nuts.traversers.node local nodecodes = nodes.nodecodes local noad_code = nodecodes.noad local accent_code = nodecodes.accent local radical_code = nodecodes.radical local fraction_code = nodecodes.fraction local subbox_code = nodecodes.subbox local submlist_code = nodecodes.submlist local mathchar_code = nodecodes.mathchar local mathtextchar_code = nodecodes.mathtextchar local delimiter_code = nodecodes.delimiter local style_code = nodecodes.style local choice_code = nodecodes.choice local fence_code = nodecodes.fence local accentcodes = nodes.accentcodes local fencecodes = nodes.fencecodes local fixedtopaccent_code = accentcodes.fixedtop local fixedbottomaccent_code = accentcodes.fixedbottom local fixedbothaccent_code = accentcodes.fixedboth local hextensible_code = nodes.radicalcodes.hextensible local lextensible_code = nodes.listcodes.hextensible local gextensible_code = nodes.glyphcodes.extensible local leftfence_code = fencecodes.left local middlefence_code = fencecodes.middle local rightfence_code = fencecodes.right local operatorfence_code = fencecodes.operator local kerncodes = nodes.kerncodes local fontkern_code = kerncodes.fontkern local italickern_code = kerncodes.italickern local hlist_code = nodecodes.hlist local vlist_code = nodecodes.vlist local glyph_code = nodecodes.glyph local disc_code = nodecodes.disc local glue_code = nodecodes.glue local kern_code = nodecodes.kern local math_code = nodecodes.math local processnoads = noads.process local a_tagged = attributes.private('tagged') local a_mathcategory = attributes.private('mathcategory') local a_mathmode = attributes.private('mathmode') local tags = structures.tags local start_tagged = tags.start local restart_tagged = tags.restart local stop_tagged = tags.stop local taglist = tags.taglist ----- chardata = characters.data local getmathcodes = tex.getmathcodes ----- mathcodes = mathematics.codes local mathcodes = mathematics.classes local ordinary_mathcode = mathcodes.ordinary local digit_mathcode = mathcodes.digit local punctuation_mathcode = mathcodes.punctuation local active_mathcode = mathcodes.active local fromunicode16 = fonts.mappings.fromunicode16 local fontcharacters = fonts.hashes.characters local report_tags = logs.reporter("structure","tags") local process local function processnucleus(nucleus,prime) if prime then -- This should work wasn't it that browser handling of primes have an issue with -- "big semi raised text" vs "small supposedly superscripted". So let's play safe -- and use a superscript. Even then we get somewhat different positioning for the -- same primed character in q sqrt and (e.g.) a sequential integral. -- start_tagged("mrow", { prime = true }) start_tagged("msup", { prime = true }) process(nucleus) process(prime) stop_tagged() else process(nucleus) end end local function processsubsup(start) -- At some point we might need to add an attribute signaling the -- super- and subscripts because TeX and MathML use a different -- order. The mrows are needed to keep mn's separated. local nucleus, prime, sup, sub, presup, presub = getnucleus(start,true) if sub then if sup then setattr(start,a_tagged,start_tagged("msubsup")) processnucleus(nucleus,prime) start_tagged("mrow", { subscript = true }) process(sub) stop_tagged() start_tagged("mrow", { superscript = true }) process(sup) stop_tagged() stop_tagged() else setattr(start,a_tagged,start_tagged("msub")) processnucleus(nucleus,prime) start_tagged("mrow") process(sub) stop_tagged() stop_tagged() end elseif sup then setattr(start,a_tagged,start_tagged("msup")) processnucleus(nucleus,prime) start_tagged("mrow") process(sup) stop_tagged() stop_tagged() else processnucleus(nucleus,prime) end end -- todo: check function here and keep attribute the same -- todo: variants -> original local actionstack = { } local fencesstack = { } -- glyph nodes and such can happen in under and over stuff local function getunicode(n) -- instead of getchar local char, font = getcharspec(n) local data = fontcharacters[font][char] return data.unicode or char -- can be a table but unlikely for math characters end ------------------- local content = { } local found = false content[mathchar_code] = function() found = true end local function hascontent(head) found = false processnoads(head,content,"content") return found end -------------------- -- todo: use properties -- local function showtag(n,id,old) -- local attr = getattr(n,a_tagged) -- local curr = tags.current() -- report_tags("%s, node %s, attr %s:%s (%s), top %s (%s)", -- old and "before" or "after ", -- nodecodes[id], -- getattrlist(n), -- attr or "?",attr and taglist[attr].tagname or "?", -- curr or "?",curr and taglist[curr].tagname or "?" -- ) -- end -- I need to bring this in sync with new or removed mathml 3, not that there has -- been many changes. It will happen in sync with other mathml updates in context -- where we also keep adapting to a cycling between either or not support in -- browsers, the come-and-go of alternatives like ascii math and mathjax. It's the -- web and browser support that drives this, not tex and its community. So, maybe -- I'll add some more detail here, nto that it matters much in the long run where we -- only focus on structure and let the engine deal with the details. Another reason -- to update this is that we can add some tracing (lmtx only). -- This has been working ok for quite but in 2023 it's time to have a look at it -- again and see to what extend we need to adapt to new features. Around the time -- PG's Panopticom was put on youtube. process = function(start) -- we cannot use the processor as we have no finalizers (yet) local mtexttag = nil for start, id, subtype in nextnode, start do -- current if id == glyph_code or id == disc_code then if not mtexttag then mtexttag = start_tagged("mtext") end setattr(start,a_tagged,mtexttag) elseif mtexttag and id == kern_code and subtype == fontkern_code or subtype == italickern_code then -- italickern setattr(start,a_tagged,mtexttag) else if mtexttag then stop_tagged() mtexttag = nil end if id == mathchar_code then local char = getchar(start) local code = getmathcodes(char) -- local ch = chardata[char] -- local mc = ch and ch.mathclass local tag local properties= { class = code } -- we're a bit early so we can have active here and no rules get applied -- so maybe we have to correct some later on -- todo: we have way more now -- if code == ordinary_mathcode then -- if mc == "number" then -- tag = "mn" -- elseif mc == "variable" or not mc then -- variable is default -- tag = "mi" -- else -- tag = "mo" -- end -- else -- tag = "mo" -- end -- print(code,mathematics.classes[code]) -- print(ordinary_mathcode,digit_mathcode,active_mathcode) if code == ordinary_mathcode then tag = "mi" elseif code == digit_mathcode then tag = "mn" -- elseif code == punctuation_mathcode or code == active_mathcode then -- tag = "mo" else tag = "mo" end -- if mc == "open" or nc == "close" or mc == "middle" then -- properties = { maxsize = 1 } -- end local a = getattr(start,a_mathcategory) if a then -- todo / redo if properties then properties.mathcategory = a else properties = { mathcategory = a } end end setattr(start,a_tagged,start_tagged(tag,properties)) stop_tagged() -- showtag(start,id,false) break -- okay? elseif id == mathtextchar_code then -- or id == glyph_code -- check for code local a = getattr(start,a_mathcategory) if a then setattr(start,a_tagged,start_tagged("ms",{ mathcategory = a })) -- mtext else setattr(start,a_tagged,start_tagged("ms")) -- mtext end stop_tagged() -- showtag(start,id,false) break elseif id == delimiter_code then -- check for code setattr(start,a_tagged,start_tagged("mo")) stop_tagged() -- showtag(start,id,false) break elseif id == style_code then -- has a next elseif id == noad_code then -- setattr(start,a_tagged,tags.current()) processsubsup(start) elseif id == subbox_code or id == hlist_code or id == vlist_code then -- keep an eye on subbox_code and see what ends up in there -- a hlist can be a nested result (mlist_to_hlist) local attr = getattr(start,a_tagged) if not attr then -- just skip else local specification = taglist[attr] if specification then local tag = specification.tagname if tag == "formulacaption" then -- skip elseif tag == "mstacker" then local list = getlist(start) if list then process(list) end else if tag ~= "mtable" and tag ~= "mstackertop" and tag ~= "mstackermid" and tag ~= "mstackerbot" then tag = "mtext" end local detail = specification.detail local text = start_tagged(tag, detail and { detail = detail } or nil) setattr(start,a_tagged,text) local list = getlist(start) if not list then -- empty list elseif not attr then -- box comes from strange place setattributes(list,a_tagged,text) -- only the first node ? else -- Beware, the first node in list is the actual list so we definitely -- need to nest. This approach is a hack, maybe I'll make a proper -- nesting feature to deal with this at another level. Here we just -- fake structure by enforcing the inner one. -- -- todo: have a local list with local tags that then get appended -- local tagdata = specification.taglist local common = #tagdata + 1 local function runner(list,depth) -- quite inefficient local cache = { } -- we can have nested unboxed mess so best local to runner local keep = nil -- local keep = { } -- in case we might need to move keep outside for n, id, subtype in nextnode, list do local mth = id == math_code and subtype if mth == 0 then -- begin in line / nested math box like stackers -- insert(keep,text) keep = text text = start_tagged("mrow") common = common + 1 end local aa = getattr(n,a_tagged) if aa then local ac = cache[aa] if not ac then local tagdata = taglist[aa].taglist local extra = #tagdata if common <= extra then for i=common,extra do ac = restart_tagged(tagdata[i]) -- can be made faster end for i=common,extra do stop_tagged() -- can be made faster end else ac = text end cache[aa] = ac end setattr(n,a_tagged,ac) else setattr(n,a_tagged,text) end if id == hlist_code or id == vlist_code then -- if subtype == lextensible_code then -- print("L") -- end runner(getlist(n),depth+1) elseif id == glyph_code then -- if subtype == gextensible_code then -- print("G") -- end elseif id == disc_code then -- this should not be needed local pre, post, replace = getdisc(n) if pre then runner(pre,depth+1) end if post then runner(post,depth+1) end if replace then runner(replace,depth+1) end end if mth == 1 then -- end in line stop_tagged() -- text = remove(keep) text = keep common = common - 1 end end end runner(list,0) end stop_tagged() end end end elseif id == submlist_code then -- normally a hbox local list = getlist(start) if list then local attr = getattr(start,a_tagged) local last = attr and taglist[attr] if last then local tag = last.tagname local detail = last.detail if tag == "maction" then if detail == "" then setattr(start,a_tagged,start_tagged("mrow")) process(list) stop_tagged() elseif actionstack[#actionstack] == action then setattr(start,a_tagged,start_tagged("mrow")) process(list) stop_tagged() else insert(actionstack,action) setattr(start,a_tagged,start_tagged("mrow",{ detail = action })) process(list) stop_tagged() remove(actionstack) end elseif tag == "mstacker" then -- or tag == "mstackertop" or tag == "mstackermid" or tag == "mstackerbot" then -- looks like it gets processed twice -- do we still end up here ? setattr(start,a_tagged,restart_tagged(attr)) -- so we just reuse the attribute process(list) stop_tagged() else setattr(start,a_tagged,start_tagged("mrow")) process(list) stop_tagged() end else -- never happens, we're always document setattr(start,a_tagged,start_tagged("mrow")) process(list) stop_tagged() end end elseif id == fraction_code then -- -- if middle then we have a stacker! -- local num = getnumerator(start) local denom = getdenominator(start) local left = getleftdelimiter(start) local right = getrightdelimiter(start) if left then setattr(left,a_tagged,start_tagged("mo")) process(left) stop_tagged() end setattr(start,a_tagged,start_tagged("mfrac")) process(num) process(denom) stop_tagged() if right then setattr(right,a_tagged,start_tagged("mo")) process(right) stop_tagged() end elseif id == choice_code then local display = getchoice(start,1) local text = getchoice(start,2) local script = getchoice(start,3) local scriptscript = getchoice(start,4) if display then process(display) end if text then process(text) end if script then process(script) end if scriptscript then process(scriptscript) end elseif id == fence_code then local delimiter = getdelimiter(start) -- print(subtype,fencecodes[subtype],delimiter) if subtype == leftfence_code then local properties = { } insert(fencesstack,properties) setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking if delimiter then start_tagged("ignore") local chr = getchar(delimiter) if chr ~= 0 then properties.left = chr end process(delimiter) stop_tagged() end start_tagged("mrow") -- begin of subsequence elseif subtype == middlefence_code then if delimiter then start_tagged("ignore") local top = fencesstack[#fencesstack] local chr = getchar(delimiter) if chr ~= 0 then local mid = top.middle if mid then mid[#mid+1] = chr else top.middle = { chr } end end process(delimiter) stop_tagged() end stop_tagged() -- end of subsequence start_tagged("mrow") -- begin of subsequence elseif subtype == rightfence_code then local properties = remove(fencesstack) if not properties then report_tags("missing right fence") properties = { } end if delimiter then start_tagged("ignore") local chr = getchar(delimiter) if chr ~= 0 then properties.right = chr end process(delimiter) stop_tagged() end stop_tagged() -- end of subsequence stop_tagged() elseif subtype == operatorfence_code then -- the same as left but different key local properties = { } insert(fencesstack,properties) setattr(start,a_tagged,start_tagged("mfenced",properties)) -- needs checking if delimiter then local chr = getchar(delimiter) if chr ~= 0 then properties.operator = chr end process(delimiter) end processsubsup(start) start_tagged("mrow") -- begin of subsequence end elseif id == radical_code then if subtype == hextensible_code then -- eventually we have no radical but just some box else local left = getleftdelimiter(start) local right = getrightdelimiter(start) local degree = getdegree(start) if left then start_tagged("ignore") process(left) -- root symbol, ignored stop_tagged() end if right then start_tagged("ignore") process(lright) -- actuarian symbol, ignored stop_tagged() end if degree and hascontent(degree) then setattr(start,a_tagged,start_tagged("mroot")) processsubsup(start) process(degree) stop_tagged() else setattr(start,a_tagged,start_tagged("msqrt")) processsubsup(start) stop_tagged() end end elseif id == accent_code then local topaccent = gettop(start) local bottomaccent = getbottom(start) if bottomaccent then if topaccent then setattr(start,a_tagged,start_tagged("munderover", { accent = true, top = getunicode(topaccent), bottom = getunicode(bottomaccent), topfixed = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code, bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code, })) processsubsup(start) process(bottomaccent) process(topaccent) stop_tagged() else setattr(start,a_tagged,start_tagged("munder", { accent = true, bottom = getunicode(bottomaccent), bottomfixed = subtype == fixedbottomaccent_code or subtype == fixedbothaccent_code, })) processsubsup(start) process(bottomaccent) stop_tagged() end elseif topaccent then setattr(start,a_tagged,start_tagged("mover", { accent = true, top = getunicode(topaccent), topfixed = subtype == fixedtopaccent_code or subtype == fixedbothaccent_code, })) processsubsup(start) process(topaccent) stop_tagged() else processsubsup(start) end elseif id == glue_code then -- before processing, so other intermathglue is not tagged local em = fonts.hashes.emwidths[nuts.getfont(start)] local wd = getwidth(start) if em and wd then setattr(start,a_tagged,start_tagged("mspace",{ emfactor = wd/em })) stop_tagged() end else --rule boundary -- setattr(start,a_tagged,start_tagged("merror", { detail = nodecodes[id] })) -- stop_tagged() end end -- showtag(start,id,false) end if mtexttag then stop_tagged() end end function noads.handlers.tags(head,style,penalties) start_tagged("math", { mode = (getattr(head,a_mathmode) == 1) and "display" or "inline" }) setattr(head,a_tagged,start_tagged("mrow")) -- showtag(head,getid(head),true) process(head) -- showtag(head,getid(head),false) stop_tagged() stop_tagged() end do -- This one is meant for tracing (in m4all/m4mbo where it complements some other -- tracing) but it actually can also replace the embedding feature although that -- one might be better when we have more complex code with dependencies outside -- the blob. I'll deal with that when it's needed (trivial). The current -- interface is rather minimalistic. local enabled = false local export = false local allmath = false local warned = false function mathematics.startcollecting() if structures.tags.enabled() then if not enabled then nodes.tasks.enableaction("math", "noads.handlers.export") end enabled = true export = structures.tags.localexport allmath = { } elseif not warned then report_tags("math collecting only works when tagging is enabled") warned = true end end function mathematics.stopcollecting() export = false end local function collected(asstring) local a = allmath or { } return asstring and concat(a) or a end mathematics.collected = collected interfaces.implement { name = "startcollectingmath", -- public = true, protected = true, actions = mathematics.startcollecting } interfaces.implement { name = "stopcollectingmath", -- public = true, protected = true, actions = mathematics.stopcollecting } interfaces.implement { name = "processcollectedmath", -- public = true, protected = true, arguments = "2 strings", actions = function(filename,buffername) if filename and filename ~= "" then io.savedata(filename,collected(true)) elseif buffername then buffers.assign(buffername == interfaces.variables.yes and "" or buffername,collected(true)) else return collected end end } interfaces.implement { name = "collectedmath", usage = "value", protected = true, public = true, actions = function(what) if what == "value" then return tokens.values.integer, allmath and #allmath or 0 else context(allmath and allmath[tokens.scanners.integer()] or nil) end end } function noads.handlers.export(head) if export then allmath[#allmath+1] = export(head) end return head end nodes.tasks.appendaction("math", "finalizers", "noads.handlers.export", nil, "nonut", "disabled") end