if not modules then modules = { } end modules ['lpdf-tag'] = { version = 1.001, comment = "companion to lpdf-tag.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } local next, type = next, type local format, match, gmatch = string.format, string.match, string.gmatch local concat, sortedhash = table.concat, table.sortedhash local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C local settings_to_hash = utilities.parsers.settings_to_hash local formatters = string.formatters local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end) local trace_info = false trackers.register("structures.tags.info", function(v) trace_info = v end) local report_tags = logs.reporter("backend","tags") local pdfbackend = backends.registered.pdf local nodeinjections = pdfbackend.nodeinjections local codeinjections = pdfbackend.codeinjections local enableaction = nodes.tasks.enableaction local disableaction = nodes.tasks.disableaction local lpdf = lpdf local pdfdictionary = lpdf.dictionary local pdfarray = lpdf.array local pdfboolean = lpdf.boolean local pdfconstant = lpdf.constant local pdfreference = lpdf.reference local pdfunicode = lpdf.unicode local pdfmakenametree = lpdf.makenametree local addtocatalog = lpdf.addtocatalog local addtopageattributes = lpdf.addtopageattributes local pdfflushobject = lpdf.flushobject local pdfreserveobject = lpdf.reserveobject local pdfpagereference = lpdf.pagereference local texgetcount = tex.getcount local nodes = nodes local nodecodes = nodes.nodecodes local hlist_code = nodecodes.hlist local vlist_code = nodecodes.vlist local glyph_code = nodecodes.glyph local a_tagged = attributes.private('tagged') local a_image = attributes.private('image') local nuts = nodes.nuts local nodepool = nuts.pool local setstate = nodepool.setstate local register = nodepool.register local getid = nuts.getid local getattr = nuts.getattr local getprev = nuts.getprev local getnext = nuts.getnext local getlist = nuts.getlist local getchar = nuts.getchar local tailoflist = nuts.tail local setlink = nuts.setlink local setlist = nuts.setlist local copy_node = nuts.copy local tosequence = nuts.tosequence local nextnode = nuts.traversers.node local structure_kids -- delayed local structure_ref -- delayed local parent_ref -- delayed local root -- delayed local names = { } local tree = { } local firstintree = false local lastintree = false local elements = { } local structurestags = structures.tags local taglist = structurestags.taglist local specifications = structurestags.specifications local usedlabels = structurestags.labels local properties = structurestags.properties local usewithcare = structurestags.usewithcare local usedmapping = { } ----- tagsplitter = structurestags.patterns.splitter local embeddedtags = false -- true will id all, for tracing, otherwise table local f_tagid = formatters["%s-%04i"] local embeddedfilelist = pdfarray() -- /AF crap -- for testing, not that it was ever used: directives.register("structures.tags.embed",function(v) if type(v) == "string" then if type(embeddedtags) ~= "table" then embeddedtags = { } end for s in gmatch(v,"([^, ]+)") do embeddedtags[s] = true end elseif v and not embeddedtags then embeddedtags = true end end) -- for old times sake, not that it was ever used: directives.register("structures.tags.embedmath",function(v) if not v then -- only enable elseif embeddedtags == true then -- already all tagged elseif embeddedtags then embeddedtags.math = true else embeddedtags = { math = true } end end) function codeinjections.maptag(original,target,kind) mapping[original] = { target, kind or "inline" } end -- mostly the same as the annotations tree local usenamespace = false experiments.register("structures.tags.namespaces", function(v) usenamespace = v end) local namespaceurls = { mathml = "http://www.w3.org/1998/Math/MathML", } local function finishstructure() if root and #structure_kids > 0 then local nums = pdfarray() local n = 0 for i=firstintree,lastintree do local ti = tree[i] if ti then n = n + 1 ; nums[n] = i - 1 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(ti)) else report_tags("beware: missing page %i in tree", i) end end local parenttree = pdfdictionary { Nums = nums } local idtree = pdfmakenametree(names) -- local rolemaps = usenamespace and { } local rolemap = pdfdictionary() -- main one for k, v in next, usedmapping do k = usedlabels[k] or k local p = properties[k] if not p then print("UNDEFINED", k) end local n = p and p.namespace if rolemaps and n then local r = rolemaps[n] if not r then r = pdfdictionary() rolemaps[n] = r end r[k] = pdfconstant(k) -- maybe other tag else rolemap[k] = pdfconstant(p and p.pdf or "Span") -- or "Div" end end local namespaces = rolemaps and next(rolemaps) and pdfarray { } or nil if namespaces then for k, v in table.sortedhash(rolemaps) do namespaces[#namespaces+1] = pdfdictionary { Type = pdfconstant("Namespace"), NS = pdfunicode(namespaceurls[k] or k), RoleMapNS = v, } end end local structuretree = pdfdictionary { Type = pdfconstant("StructTreeRoot"), K = pdfreference(pdfflushobject(structure_kids)), ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)), IDTree = idtree, RoleMap = rolemap, -- sorted ? Namespaces = namespaces, } pdfflushobject(structure_ref,structuretree) addtocatalog("StructTreeRoot",pdfreference(structure_ref)) -- if lpdf.majorversion() == 1 then local markinfo = pdfdictionary { Marked = pdfboolean(true) or nil, -- UserProperties = pdfboolean(true), -- maybe some day -- Suspects = pdfboolean(true) or nil, -- AF = #embeddedfilelist > 0 and pdfreference(pdfflushobject(embeddedfilelist)) or nil, } addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo))) end -- for fulltag, element in sortedhash(elements) do -- sorting is easier on comparing pdf local kids = element.kids -- if element.tag == "link" then -- local d = kids[2] -- if type(d) == "table" then -- local refatt = element.refatt -- if refatt then -- local refobj = codeinjections.getrefobj(refatt) -- if refobj then -- d.Obj = pdfreference(refobj) -- end -- end -- end -- end pdfflushobject(element.knum,kids) end end end lpdf.registerdocumentfinalizer(finishstructure,"document structure") local index, pageref, pagenum, list = 0, nil, 0, nil local pdf_mcr = pdfconstant("MCR") local pdf_struct_element = pdfconstant("StructElem") local pdf_s = pdfconstant("S") local pdf_objr = pdfconstant("OBJR") local function initializepage() index = 0 pagenum = texgetcount("realpageno") pageref = pdfreference(pdfpagereference(pagenum)) list = pdfarray() -- hm, can be later than 1 if not firstintree then if pagenum > 1 then report_tags("beware: first page in tree is %i", pagenum) end firstintree = pagenum lastintree = pagenum end if pagenum > lastintree then lastintree = pagenum else -- report_tags("beware: page order problem in tree at page %i", pagenum) end tree[pagenum] = list -- we can flush after done, todo end local function finishpage() -- flush what can be flushed addtopageattributes("StructParents",pagenum-1) -- there might be more addtopageattributes("Tabs",s) end -- here we can flush and free elements that are finished local pdf_userproperties = pdfconstant("UserProperties") -- /O /Table -- /Headers [ ] local function makeattribute(t) if t and next(t) then local properties = pdfarray() for k, v in sortedhash(t) do -- easier on comparing pdf properties[#properties+1] = pdfdictionary { N = pdfunicode(k), V = pdfunicode(v), } end return pdfdictionary { O = pdf_userproperties, P = properties, } end end local function makeelement(fulltag,parent) local specification = specifications[fulltag] local tagname = specification.tagname local tagnameused = tagname local attributes = nil if tagname == "ignore" then return false elseif tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid" then -- TODO return true elseif tagname == "tabulatecell" then local d = structurestags.gettabulatecell(fulltag) if d and d.kind == 1 then tagnameused = "tabulateheadcell" end elseif tagname == "tablecell" then -- will become a plugin model local d = structurestags.gettablecell(fulltag) if d then if d.kind == 1 then tagnameused = "tableheadcell" end local rows = d.rows or 1 local cols = d.columns or 1 if rows > 1 or cols > 1 then attributes = pdfdictionary { O = pdfconstant("Table"), RowSpan = rows > 1 and rows or nil, ColSpan = cols > 1 and cols or nil, } end end end -- local detail = specification.detail local userdata = specification.userdata -- usedmapping[tagname] = true -- -- specification.attribute is unique -- local id = nil local af = nil if embeddedtags then local tagindex = specification.tagindex if embeddedtags == true or embeddedtags[tagname] then id = f_tagid(tagname,tagindex) af = job.fileobjreferences.collected[id] if af then local r = pdfreference(af) af = pdfarray { r } -- embeddedfilelist[#embeddedfilelist+1] = r end end end -- local k = pdfarray() local r = pdfreserveobject() local t = usedlabels[tagnameused] or tagnameused -- local a = nil local d = pdfdictionary { Type = pdf_struct_element, S = pdfconstant(t), ID = id, T = detail and detail or nil, P = parent.pref, Pg = pageref, K = pdfreference(r), -- A = a and makeattribute(a) or nil, A = attributes, -- Alt = " Who cares ", -- ActualText = " Hi Hans ", AF = af, } local s = pdfreference(pdfflushobject(d)) if id and names then names[id] = s end local kids = parent.kids kids[#kids+1] = s local e = { tag = t, pref = s, kids = k, knum = r, pnum = pagenum } elements[fulltag] = e return e end local f_BDC = formatters["/%s <> BDC"] local a_destination = attributes.private('destination') local a_reference = attributes.private('reference') local function makecontent(start,parent,id,specification) local tag = parent.tag local kids = parent.kids local last = index if id == "image" then local list = specification.taglist local data = usewithcare.images[list[#list]] local label = data and data.label local d = pdfdictionary { Type = pdf_mcr, Pg = pageref, MCID = last, Alt = pdfunicode(label ~= "" and label or "image"), } kids[#kids+1] = d elseif pagenum == parent.pnum then kids[#kids+1] = last -- if tag == "link" then -- local ra = getattr(start,a_reference) -- if ra then -- parent.refatt = ra -- kids[#kids+1] = pdfdictionary { -- Type = pdf_objr, -- Obj = pdfreference(0), -- } -- end -- end else local d = pdfdictionary { Type = pdf_mcr, Pg = pageref, MCID = last, } -- kids[#kids+1] = pdfreference(pdfflushobject(d)) kids[#kids+1] = d end -- index = index + 1 list[index] = parent.pref -- page related list -- return f_BDC(tag,last) end local function makeignore(specification) return "/Artifact BMC" end -- no need to adapt head, as we always operate on lists local EMCliteral = nil local visualize = nil local enabled = true local reduced = false updaters.register("tagging.state.disable", function() enabled = false end) updaters.register("tagging.state.enable", function() enabled = true end) directives.register("tagging.state.reduced", function(v) reduced = v end) function codeinjections.reducetags() report_tags("only outer level document tag used") reduced = true end function nodeinjections.addtags(head) if not enabled then return end if not EMCliteral then EMCliteral = register(setstate("EMC")) end local last = nil local ranges = { } local range = nil local nofranges = 0 if not root then structure_kids = pdfarray() structure_ref = pdfreserveobject() parent_ref = pdfreserveobject() root = { pref = pdfreference(structure_ref), kids = structure_kids } names = pdfarray() end initializepage() if reduced then local list = getlist(head) if list then ranges = { { 1, "glyph", list, tailoflist(list), head } } nofranges = 1 taglist = { { attribute = 1, metadata = { }, tagindex = 1, taglist = { "document>1" }, tagname = "document", }, } end else local function collectranges(head,list) for n, id in nextnode, head do if id == glyph_code then -- maybe also disc if getchar(n) ~= 0 then local at = getattr(n,a_tagged) or false -- false: pagebody or so, so artifact if last ~= at then range = { at, "glyph", n, n, list } -- attr id start stop list nofranges = nofranges + 1 ranges[nofranges] = range last = at elseif range then range[4] = n -- stop end end elseif id == hlist_code or id == vlist_code then local at = getattr(n,a_image) if at then local at = getattr(n,a_tagged) or false -- false: pagebody or so, so artifact nofranges = nofranges + 1 ranges[nofranges] = { at, "image", n, n, list } -- attr id start stop list last = nil else local list = getlist(n) if list then collectranges(list,n) end end end end end collectranges(head) end -- inspect(taglist) -- inspect(ranges) if trace_tags then for i=1,nofranges do local range = ranges[i] local attr = range[1] local id = range[2] local start = range[3] local stop = range[4] local tags = taglist[attr] if tags then -- not ok ... only first lines report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist) end end end local top = nil local noftop = 0 local function inject(start,stop,list,literal,left,right) local prev = getprev(start) if prev then setlink(prev,literal) end if left then setlink(literal,left,start) else setlink(literal,start) end if list and not prev then setlist(list,literal) end local literal = copy_node(EMCliteral) -- use insert instead: local next = getnext(stop) if next then setlink(literal,next) end if right then setlink(stop,right,literal) else setlink(stop,literal) end end -- local function inject(start,stop,list,literal,left,right) -- setlink(getprev(start) or list or true,literal,left or true,start) -- setlink(stop,right or true,copy_node(EMCliteral),getnext(stop)) -- end for i=1,nofranges do local range = ranges[i] local attr = range[1] local id = range[2] local start = range[3] local stop = range[4] local list = range[5] if attr then local specification = taglist[attr] local taglist = specification.taglist local noftags = #taglist local common = 0 local literal = nil local ignore = false if top then for i=1,noftags >= noftop and noftop or noftags do if top[i] == taglist[i] then common = i else break end end end local prev = common > 0 and elements[taglist[common]] or root for j=common+1,noftags do local tag = taglist[j] local prv = elements[tag] or makeelement(tag,prev) if prv == false then -- ignore this one prev = false ignore = true break elseif prv == true then -- skip this one else prev = prv end end if prev then literal = setstate(makecontent(start,prev,id,specification)) elseif ignore then literal = setstate(makeignore(specification)) else -- maybe also ignore or maybe better: comment or so end if literal then local left,right if trace_info then local name = specification.tagname if name then if not visualize then visualize = nodes.visualizers.register("tags") end left = visualize(name) right = visualize() end end inject(start,stop,list,literal,left,right) end top = taglist noftop = noftags else local literal = setstate(makeignore(specification)) inject(start,stop,list,literal) end end finishpage() return head end -- variant: more structure but funny collapsing in viewer -- function nodeinjections.addtags(head) -- -- local last, ranges, range = nil, { }, nil -- -- local function collectranges(head,list) -- for n, id in nextnode, head do -- if id == glyph_code then -- local at = getattr(n,a_tagged) -- if not at then -- range = nil -- elseif last ~= at then -- range = { at, "glyph", n, n, list } -- attr id start stop list -- ranges[#ranges+1] = range -- last = at -- elseif range then -- range[4] = n -- stop -- end -- elseif id == hlist_code or id == vlist_code then -- local at = getattr(n,a_image) -- if at then -- local at = getattr(n,a_tagged) -- if not at then -- range = nil -- else -- ranges[#ranges+1] = { at, "image", n, n, list } -- attr id start stop list -- end -- last = nil -- else -- local nl = getlist(n) -- collectranges(nl,n) -- end -- end -- end -- end -- -- initializepage() -- -- collectranges(head) -- -- if trace_tags then -- for i=1,#ranges do -- local range = ranges[i] -- local attr = range[1] -- local id = range[2] -- local start = range[3] -- local stop = range[4] -- local tags = taglist[attr] -- if tags then -- not ok ... only first lines -- report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist) -- end -- end -- end -- -- local top = nil -- local noftop = 0 -- local last = nil -- -- for i=1,#ranges do -- local range = ranges[i] -- local attr = range[1] -- local id = range[2] -- local start = range[3] -- local stop = range[4] -- local list = range[5] -- local specification = taglist[attr] -- local taglist = specification.taglist -- local noftags = #taglist -- local tag = nil -- local common = 0 -- -- local prev = root -- -- if top then -- for i=1,noftags >= noftop and noftop or noftags do -- if top[i] == taglist[i] then -- common = i -- else -- break -- end -- end -- end -- -- local result = { } -- local r = noftop - common -- if r > 0 then -- for i=1,r do -- result[i] = "EMC" -- end -- end -- -- local prev = common > 0 and elements[taglist[common]] or root -- -- for j=common+1,noftags do -- local tag = taglist[j] -- local prv = elements[tag] or makeelement(tag,prev) -- -- if prv == false then -- -- -- ignore this one -- -- prev = false -- -- break -- -- elseif prv == true then -- -- -- skip this one -- -- else -- prev = prv -- r = r + 1 -- result[r] = makecontent(start,prev,id) -- -- end -- end -- -- if r > 0 then -- local literal = setstate(concat(result,"\n")) -- -- use insert instead: -- local literal = setstate(result) -- local prev = getprev(start) -- if prev then -- setlink(prev,literal) -- end -- setlink(literal,start) -- if list and getlist(list) == start then -- setlist(list,literal) -- end -- end -- -- top = taglist -- noftop = noftags -- last = stop -- -- end -- -- if last and noftop > 0 then -- local result = { } -- for i=1,noftop do -- result[i] = "EMC" -- end -- local literal = setstate(concat(result,"\n")) -- -- use insert instead: -- local next = getnext(last) -- if next then -- setlink(literal,next) -- end -- setlink(last,literal) -- end -- -- finishpage() -- -- return head -- -- end -- this belongs elsewhere (export is not pdf related) local permitted = true local enabled = false function codeinjections.settaggingsupport(option) if option == false then if enabled then disableaction("shipouts","structures.tags.handler") -- disableaction("shipouts","nodes.handlers.accessibility") -- maybe not this one disableaction("math","noads.handlers.tags") enabled = false end if permitted then if trace_tags then report_tags("blocking structure tags") end permitted = false end end end function codeinjections.enabletags() if permitted and not enabled then structures.tags.handler = nodeinjections.addtags enableaction("shipouts","structures.tags.handler") -- enableaction("shipouts","nodes.handlers.accessibility") enableaction("math","noads.handlers.tags") -- maybe also textblock if trace_tags then report_tags("enabling structure tags") end enabled = true end end