if not modules then modules = { } end modules ['lxml-tex'] = { version = 1.001, comment = "companion to lxml-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- Because we split and resolve entities we use the direct printing -- interface and not the context one. If we ever do that there will -- be an cldf-xml helper library. local concat, insert, remove, sortedkeys, reversed = table.concat, table.insert, table.remove, table.sortedkeys, table.reverse local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select local lpegmatch = lpeg.match local P, S, C = lpeg.P, lpeg.S, lpeg.C local patterns = lpeg.patterns local setmetatableindex = table.setmetatableindex local formatters, strip, collapse = string.formatters, string.strip, utilities.strings.collapse local tex, xml = tex, xml local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered local basename, dirname, joinfile = file.basename, file.dirname, file.join lxml = lxml or { } local lxml = lxml local catcodenumbers = catcodes.numbers local ctxcatcodes = catcodenumbers.ctxcatcodes -- todo: use different method local notcatcodes = catcodenumbers.notcatcodes -- todo: use different method local commands = commands local context = context local contextsprint = context.sprint -- with catcodes (here we use fast variants, but with option for tracing) local ctx_doif = commands.doif local ctx_doifnot = commands.doifnot local ctx_doifelse = commands.doifelse local synctex = luatex.synctex local implement = interfaces.implement local xmlelements = xml.elements local xmlcollected = xml.collected local xmlsetproperty = xml.setproperty local xmlwithelements = xml.withelements local xmlserialize = xml.serialize local xmlcollect = xml.collect local xmltext = xml.text local xmltostring = xml.tostring local xmlapplylpath = xml.applylpath local xmlunspecialized = xml.unspecialized local xmldespecialized = xml.despecialized -- nicer in expanded xml local xmlprivatetoken = xml.privatetoken local xmlstripelement = xml.stripelement local xmlinclusion = xml.inclusion local xmlinclusions = xml.inclusions local xmlbadinclusions = xml.badinclusions local xmlcontent = xml.content local xmllastmatch = xml.lastmatch local xmlpushmatch = xml.pushmatch local xmlpopmatch = xml.popmatch local xmlstring = xml.string local xmlserializetotext = xml.serializetotext local xmlrename = xml.rename local variables = interfaces and interfaces.variables or { } local parsers = utilities.parsers local settings_to_hash = parsers.settings_to_hash local settings_to_set = parsers.settings_to_set local options_to_hash = parsers.options_to_hash local options_to_array = parsers.options_to_array local insertbeforevalue = utilities.tables.insertbeforevalue local insertaftervalue = utilities.tables.insertaftervalue local resolveprefix = resolvers.resolve local starttiming = statistics.starttiming local stoptiming = statistics.stoptiming local trace_setups = false trackers.register("lxml.setups", function(v) trace_setups = v end) local trace_loading = false trackers.register("lxml.loading", function(v) trace_loading = v end) local trace_access = false trackers.register("lxml.access", function(v) trace_access = v end) local trace_comments = false trackers.register("lxml.comments", function(v) trace_comments = v end) local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end) local trace_selectors = false trackers.register("lxml.selectors",function(v) trace_selectors = v end) local report_lxml = logs.reporter("lxml","tex") local report_xml = logs.reporter("xml","tex") local forceraw = false local p_texescape = patterns.texescape local tokenizedxmlw = context.tokenizedcs and context.tokenizedcs.xmlw directives.enable("xml.path.keeplastmatch") -- tex entities lxml.entities = lxml.entities or { } storage.register("lxml/entities",lxml.entities,"lxml.entities") local xmlentities = xml.entities -- these are more or less standard entities local texentities = lxml.entities -- these are specific for a tex run local reparsedentity = xml.reparsedentitylpeg -- \Ux{...} local unescapedentity = xml.unescapedentitylpeg local parsedentity = reparsedentity local useelement = false -- probably no longer needed / used function lxml.startunescaped() parsedentity = unescapedentity end function lxml.stopunescaped() parsedentity = reparsedentity end directives.register("lxml.entities.useelement",function(v) useelement = v end) function lxml.registerentity(key,value) texentities[key] = value if trace_entities then report_xml("registering tex entity %a as %a",key,value) end end function lxml.resolvedentity(str) if forceraw then -- should not happen as we then can as well bypass this function if trace_entities then report_xml("passing entity %a as &%s;",str,str) end context("&%s;",str) else local e = texentities[str] if e then local te = type(e) if te == "function" then if trace_entities then report_xml("passing entity %a using function",str) end e(str) elseif e then if trace_entities then report_xml("passing entity %a as %a using %a",str,e,"ctxcatcodes") end context(e) end return end local e = xmlentities[str] if e then local te = type(e) if te == "function" then e = e(str) end if e then if trace_entities then report_xml("passing entity %a as %a using %a",str,e,"notcatcodes") end contextsprint(notcatcodes,e) return end end -- resolve hex and dec, todo: escape # & etc for ctxcatcodes -- normally this is already solved while loading the file local chr, err = lpegmatch(parsedentity,str) if chr then if parsedentity == reparsedentity then if trace_entities then report_xml("passing entity %a as %a using %a",str,chr,"ctxcatcodes") end context(chr) else contextsprint(notcatcodes,chr) if trace_entities then report_xml("passing entity %a as %a using %a",str,chr,"notcatcodes") end end elseif err then if trace_entities then report_xml("passing faulty entity %a as %a",str,err) end context(err) elseif useelement then local tag = upperchars(str) if trace_entities then report_xml("passing entity %a to \\xmle using tag %a",str,tag) end contextsprint(texcatcodes,"\\xmle{") contextsprint(notcatcodes,e) contextsprint(texcatcodes,"}") else if trace_entities then report_xml("passing entity %a as %a using %a",str,str,"notcatcodes") end contextsprint(notcatcodes,str) end end end -- tex interface local loaded = lxml.loaded or { } lxml.loaded = loaded -- print(contextdirective("context-mathml-directive function reduction yes ")) -- print(contextdirective("context-mathml-directive function ")) xml.defaultprotocol = "tex" local finalizers = xml.finalizers finalizers.xml = finalizers.xml or { } finalizers.tex = finalizers.tex or { } local xmlfinalizers = finalizers.xml local texfinalizers = finalizers.tex -- serialization with entity handling local ampersand = P("&") local semicolon = P(";") local entity = (ampersand * C((1-semicolon)^1) * semicolon) / lxml.resolvedentity -- context.bold local _, xmltextcapture_yes = context.newtexthandler { catcodes = notcatcodes, exception = entity, } local _, xmltextcapture_nop = context.newtexthandler { catcodes = notcatcodes, } local _, xmlspacecapture_yes = context.newtexthandler { endofline = context.xmlcdataobeyedline, emptyline = context.xmlcdataobeyedline, simpleline = context.xmlcdataobeyedline, space = context.xmlcdataobeyedspace, catcodes = notcatcodes, exception = entity, } local _, xmlspacecapture_nop = context.newtexthandler { endofline = context.xmlcdataobeyedline, emptyline = context.xmlcdataobeyedline, simpleline = context.xmlcdataobeyedline, space = context.xmlcdataobeyedspace, catcodes = notcatcodes, } local _, xmllinecapture_yes = context.newtexthandler { endofline = context.xmlcdataobeyedline, emptyline = context.xmlcdataobeyedline, simpleline = context.xmlcdataobeyedline, catcodes = notcatcodes, exception = entity, } local _, xmllinecapture_nop = context.newtexthandler { endofline = context.xmlcdataobeyedline, emptyline = context.xmlcdataobeyedline, simpleline = context.xmlcdataobeyedline, catcodes = notcatcodes, } local _, ctxtextcapture_yes = context.newtexthandler { catcodes = ctxcatcodes, exception = entity, } local _, ctxtextcapture_nop = context.newtexthandler { catcodes = ctxcatcodes, } local xmltextcapture = xmltextcapture_yes local xmlspacecapture = xmlspacecapture_yes local xmllinecapture = xmllinecapture_yes local ctxtextcapture = ctxtextcapture_yes directives.register("lxml.entities.escaped",function(v) if v then xmltextcapture = xmltextcapture_yes xmlspacecapture = xmlspacecapture_yes xmllinecapture = xmllinecapture_yes ctxtextcapture = ctxtextcapture_yes else xmltextcapture = xmltextcapture_nop xmlspacecapture = xmlspacecapture_nop xmllinecapture = xmllinecapture_nop ctxtextcapture = ctxtextcapture_nop end end) -- cdata local toverbatim = context.newverbosehandler { line = context.xmlcdataobeyedline, space = context.xmlcdataobeyedspace, before = context.xmlcdatabefore, after = context.xmlcdataafter, } lxml.toverbatim = context.newverbosehandler { line = context.xmlcdataobeyedline, space = context.xmlcdataobeyedspace, before = context.xmlcdatabefore, after = context.xmlcdataafter, strip = true, } -- raw flushing function lxml.startraw() forceraw = true end function lxml.stopraw() forceraw = false end function lxml.rawroot() return rawroot end -- storage do local noferrors = 0 local errors = setmetatableindex("number") local errorhandler = xml.errorhandler function xml.errorhandler(message,filename) if filename and filename ~= "" then noferrors = noferrors + 1 errors[filename] = errors[filename] + 1 end errorhandler(message) -- (filename) end logs.registerfinalactions(function() if noferrors > 0 then local report = logs.startfilelogging("lxml","problematic xml files") for k, v in table.sortedhash(errors) do report("%4i %s",v,k) end logs.stopfilelogging() -- if logs.loggingerrors() then logs.starterrorlogging(report,"problematic xml files") for k, v in table.sortedhash(errors) do report("%4i %s",v,k) end logs.stoperrorlogging() end end end) end function lxml.store(id,root,filename) loaded[id] = root xmlsetproperty(root,"name",id) if filename then xmlsetproperty(root,"filename",filename) end end local splitter = lpeg.splitat("::") lxml.idsplitter = splitter function lxml.splitid(id) local d, i = lpegmatch(splitter,id) if d then return d, i else return "", id end end local function getid(id, qualified) if id then local lid = loaded[id] if lid then return lid elseif type(id) == "table" then return id else local d, i = lpegmatch(splitter,id) if d then local ld = loaded[d] if ld then local ldi = ld.index if ldi then local root = ldi[tonumber(i)] if root then if qualified then -- we need this else two args that confuse others return root, d else return root end elseif trace_access then report_lxml("%a has no index entry %a",d,i) end elseif trace_access then report_lxml("%a has no index",d) end elseif trace_access then report_lxml("%a is not loaded",d) end elseif trace_access then report_lxml("%a is not loaded",i) end end elseif trace_access then report_lxml("invalid id (nil)") end end lxml.id = getid -- we provide two names as locals can already use such lxml.getid = getid -- names and we don't want clashes function lxml.root(id) return loaded[id] end -- index local nofindices = 0 local function addindex(name,check_sum,force) local root = getid(name) if root and (not root.index or force) then -- weird, only called once local n, index, maxindex, check = 0, root.index or { }, root.maxindex or 0, root.check or { } local function nest(root) local dt = root.dt if not root.ix then maxindex = maxindex + 1 root.ix = maxindex check[maxindex] = root.tg -- still needed ? index[maxindex] = root n = n + 1 end if dt then for k=1,#dt do local dk = dt[k] if type(dk) == "table" then nest(dk) end end end end nest(root) nofindices = nofindices + n -- if type(name) ~= "string" then name = "unknown" end root.index = index root.maxindex = maxindex if trace_access then report_lxml("indexed entries %a, found nodes %a",tostring(name),maxindex) end end end lxml.addindex = addindex implement { name = "xmladdindex", arguments = "string", public = true, actions = addindex, } -- another cache local function lxmlapplylpath(id,pattern) -- better inline, saves call return xmlapplylpath(getid(id),pattern) end lxml.filter = lxmlapplylpath function lxml.filterlist(list,pattern) for s in gmatch(list,"[^, ]+") do -- we could cache a table xmlapplylpath(getid(s),pattern) end end function lxml.applyfunction(id,name) local f = xml.functions[name] return f and f(getid(id)) end -- rather new, indexed storage (backward refs), maybe i will merge this function lxml.checkindex(name) local root = getid(name) return root and root.index or 0 end if tokenizedxmlw then function lxml.withindex(name,n,command) -- will change as name is always there now local i, p = lpegmatch(splitter,n) if p then contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",n,"}") else contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",name,"::",n,"}") end end else function lxml.withindex(name,n,command) -- will change as name is always there now local i, p = lpegmatch(splitter,n) if p then contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",n,"}") else contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",name,"::",n,"}") end end end function lxml.getindex(name,n) -- will change as name is always there now local i, p = lpegmatch(splitter,n) if p then contextsprint(ctxcatcodes,n) else contextsprint(ctxcatcodes,name,"::",n) end end -- loading (to be redone, no overload) .. best use different methods and -- keep raw xml (at least as option) xml.originalload = xml.originalload or xml.load local noffiles = 0 local nofconverted = 0 local linenumbers = false synctex.registerenabler (function() linenumbers = true end) synctex.registerdisabler(function() linenumbers = false end) function xml.load(filename,settings) noffiles, nofconverted = noffiles + 1, nofconverted + 1 starttiming(xml) local ok, data = resolvers.loadbinfile(filename) settings = settings or { } settings.linenumbers = linenumbers settings.currentresource = filename local xmltable = xml.convert((ok and data) or "",settings) settings.currentresource = nil stoptiming(xml) return xmltable end local function entityconverter(id,str,ent) -- todo: disable tex entities when raw -- tex driven entity local t = texentities[str] if t then local p = xmlprivatetoken(str) -- only once -- context.xmlprivate(p,t) return p end -- dtd determined entity local e = ent and ent[str] if e then return e end -- predefined entity (mathml and so) local x = xmlentities[str] if x then return x end -- keep original somehow return xmlprivatetoken(str) end lxml.preprocessor = nil local function lxmlconvert(id,data,compress,currentresource) local settings = { -- we're now roundtrip anyway unify_predefined_entities = false, -- is also default utfize_entities = true, -- is also default resolve_predefined_entities = true, -- is also default resolve_entities = function(str,ent) return entityconverter(id,str,ent) end, currentresource = tostring(currentresource or id), preprocessor = lxml.preprocessor, linenumbers = linenumbers, } if compress and compress == variables.yes then settings.strip_cm_and_dt = true end return xml.convert(data,settings) end lxml.convert = lxmlconvert function lxml.load(id,filename,compress) filename = ctxrunner.preparedfile(filename) if trace_loading then report_lxml("loading file %a as %a",filename,id) end noffiles, nofconverted = noffiles + 1, nofconverted + 1 starttiming(xml) local ok, data = resolvers.loadbinfile(filename) -- local xmltable = lxmlconvert(id,(ok and data) or "",compress,formatters["id: %s, file: %s"](id,filename)) local xmltable = lxmlconvert(id,(ok and data) or "",compress,filename) stoptiming(xml) lxml.store(id,xmltable,filename) return xmltable, filename end function lxml.register(id,xmltable,filename) lxml.store(id,xmltable,filename) return xmltable end -- recurse prepare rootpath resolve basename local options_true = { "recurse", "prepare", "rootpath" } local options_nil = { "prepare", "rootpath" } function lxml.include(id,pattern,attribute,options) starttiming(xml) local root = getid(id) if options == true then -- downward compatible options = options_true elseif not options then -- downward compatible options = options_nil else options = settings_to_hash(options) or { } end xml.include(root,pattern,attribute,options.recurse,function(filename) if filename then -- preprocessing if options.prepare then filename = ctxrunner.preparedfile(filename) end -- handy if we have a flattened structure if options.basename then filename = basename(filename) end if options.resolve then filename = resolveprefix(filename) or filename end -- some protection if options.rootpath and dirname(filename) == "" and root.filename then local dn = dirname(root.filename) if dn ~= "" then filename = joinfile(dn,filename) end end if trace_loading then report_lxml("including file %a",filename) end noffiles, nofconverted = noffiles + 1, nofconverted + 1 return resolvers.loadtexfile(filename) or "", resolvers.findtexfile(filename) or "" else return "" end end) stoptiming(xml) end function lxml.filename(id) local e = getid(id) if e then context(e.cf) end end function lxml.fileline(id) local e = getid(id) if e then context(e.cl) end end function lxml.inclusion(id,default,base) local inclusion = xmlinclusion(getid(id),default) if inclusion then context(base and basename(inclusion) or inclusion) end end function lxml.inclusions(id,sorted) local inclusions = xmlinclusions(getid(id),sorted) if inclusions then context(concat(inclusions,",")) end end function lxml.badinclusions(id,sorted) local badinclusions = xmlbadinclusions(getid(id),sorted) if badinclusions then context(concat(badinclusions,",")) end end function lxml.save(id,name) xml.save(getid(id),name) end function xml.getbuffer(name,compress) -- we need to make sure that commands are processed if not name or name == "" then name = tex.jobname end nofconverted = nofconverted + 1 local data = buffers.getcontent(name) xmltostring(lxmlconvert(name,data,compress,format("buffer: %s",tostring(name or "?")))) -- one buffer end function lxml.loadbuffer(id,name,compress) starttiming(xml) nofconverted = nofconverted + 1 local data = buffers.collectcontent(name or id) -- name can be list local xmltable = lxmlconvert(id,data,compress,format("buffer: %s",tostring(name or id or "?"))) lxml.store(id,xmltable) stoptiming(xml) return xmltable, name or id end function lxml.loaddata(id,str,compress) starttiming(xml) nofconverted = nofconverted + 1 local xmltable = lxmlconvert(id,str or "",compress,format("id: %s",id)) lxml.store(id,xmltable) stoptiming(xml) return xmltable, id end -- e.command: -- -- string : setup -- true : text (no ) -- false : ignore -- function : call local function tex_doctype(e,handlers) -- ignore end local function tex_comment(e,handlers) if trace_comments then report_lxml("comment %a",e.dt[1]) end end local default_element_handler = xml.gethandlers("verbose").functions["@el@"] local setfilename = false local trace_name = false local report_name = logs.reporter("lxml") synctex.registerenabler (function() setfilename = synctex.setfilename end) synctex.registerdisabler(function() setfilename = false end) local function syncfilename(e,where) local cf = e.cf if cf then local cl = e.cl or 1 if trace_name then report_name("set filename, case %a, tag %a, file %a, line %a",where,e.tg,cf,cl) end setfilename(cf,cl); end end trackers.register("system.synctex.xml",function(v) trace_name = v end) local tex_element if tokenizedxmlw then -- local expandmacro = token.expandmacro tex_element = function(e,handlers) if setfilename then syncfilename(e,"element") end local command = e.command if command == nil then default_element_handler(e,handlers) elseif command == true then -- text (no ) / so, no mkii fallback then handlers.serialize(e.dt,handlers) elseif command == false then -- ignore else local tc = type(command) if tc == "string" then local rootname, ix = e.name, e.ix if rootname then if not ix then addindex(rootname,false,true) ix = e.ix end -- lmtx only, same performance, a bit more immediate: -- -- expandmacro(tokenizedxmlw,ctxcatcodes,true,command,true,rootname.."::"..ix) -- contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",rootname,"::",ix,"}") else report_lxml("fatal error: no index for %a",command) contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",ix or 0,"}") end elseif tc == "function" then command(e) end end end else tex_element = function(e,handlers) if setfilename then syncfilename(e,"element") end local command = e.command if command == nil then default_element_handler(e,handlers) elseif command == true then -- text (no ) / so, no mkii fallback then handlers.serialize(e.dt,handlers) elseif command == false then -- ignore else local tc = type(command) if tc == "string" then local rootname, ix = e.name, e.ix if rootname then if not ix then addindex(rootname,false,true) ix = e.ix end -- faster than context.xmlw contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",rootname,"::",ix,"}") -- contextsprint(ctxcatcodes,xmlw[command][rootname],ix,"}") else report_lxml("fatal error: no index for %a",command) contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",ix or 0,"}") -- contextsprint(ctxcatcodes,xmlw[command][false],ix or 0,"}") end elseif tc == "function" then command(e) end end end end -- -- local pihandlers = { } xml.pihandlers = pihandlers local space = S(" \n\r") local spaces = space^0 local class = C((1-space)^0) local key = class local rest = C(P(1)^0) local value = C(P(1-(space * -1))^0) local category = P("context-") * ( C((1-P("-"))^1) * P("-directive") + P("directive") * spaces * key ) local c_parser = category * spaces * value -- rest local k_parser = class * spaces * key * spaces * rest --value implement { name = "xmlinstalldirective", arguments = "2 strings", actions = function(name,csname) if csname then local keyvalueparser = k_parser / context[csname] local keyvaluechecker = function(category,rest,e) lpegmatch(keyvalueparser,rest) end pihandlers[name] = keyvaluechecker end end } local function tex_pi(e,handlers) local str = e.dt[1] if str and str ~= "" then local category, rest = lpegmatch(c_parser,str) if category and rest and #rest > 0 then local handler = pihandlers[category] if handler then handler(category,rest,e) end end end end local obeycdata = true function lxml.setcdata() obeycdata = true end function lxml.resetcdata() obeycdata = false end local function tex_cdata(e,handlers) if obeycdata then toverbatim(e.dt[1]) end end -- we could try to merge the conversion and flusher but we don't gain much and it makes tracing -- harder: xunspecialized = utf.remapper(xml.specialcodes,"dynamic",lxml.resolvedentity) local function tex_text(e) e = xmlunspecialized(e) lpegmatch(xmltextcapture,e) end -- local function ctx_text(e) -- can be just context(e) as we split there lpegmatch(ctxtextcapture,e) end local function tex_handle(...) contextsprint(ctxcatcodes,...) -- notcatcodes is active anyway end local xmltexhandler = xml.newhandlers { name = "tex", handle = tex_handle, functions = { -- ["@dc@"] = tex_document, ["@dt@"] = tex_doctype, -- ["@rt@"] = tex_root, ["@el@"] = tex_element, ["@pi@"] = tex_pi, ["@cm@"] = tex_comment, ["@cd@"] = tex_cdata, ["@tx@"] = tex_text, } } lxml.xmltexhandler = xmltexhandler -- begin of test local function tex_space(e) e = xmlunspecialized(e) lpegmatch(xmlspacecapture,e) end local xmltexspacehandler = xml.newhandlers { name = "texspace", handle = tex_handle, functions = { ["@dt@"] = tex_doctype, ["@el@"] = tex_element, ["@pi@"] = tex_pi, ["@cm@"] = tex_comment, ["@cd@"] = tex_cdata, ["@tx@"] = tex_space, } } local function tex_line(e) e = xmlunspecialized(e) lpegmatch(xmllinecapture,e) end local xmltexlinehandler = xml.newhandlers { name = "texline", handle = tex_handle, functions = { ["@dt@"] = tex_doctype, ["@el@"] = tex_element, ["@pi@"] = tex_pi, ["@cm@"] = tex_comment, ["@cd@"] = tex_cdata, ["@tx@"] = tex_line, } } function lxml.flushspacewise(id) -- keeps spaces and lines id = getid(id) local dt = id and id.dt if dt then xmlserialize(dt,xmltexspacehandler) end end function lxml.flushlinewise(id) -- keeps lines id = getid(id) local dt = id and id.dt if dt then xmlserialize(dt,xmltexlinehandler) end end -- end of test function lxml.serialize(root) xmlserialize(root,xmltexhandler) end function lxml.setaction(id,pattern,action) local collected = xmlapplylpath(getid(id),pattern) if collected then local nc = #collected if nc > 0 then for c=1,nc do collected[c].command = action end end end end local function sprint(root,p) -- check rawroot usage if root then local tr = type(root) if tr == "string" then -- can also be result of lpath -- rawroot = false -- ? if setfilename and p then syncfilename(p,"sprint s") end root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) elseif tr == "table" then if forceraw then rawroot = root -- contextsprint(ctxcatcodes,xmltostring(root)) -- goes wrong with % etc -- root = xmlunspecialized(xmltostring(root)) -- we loose < > & root = xmldespecialized(xmltostring(root)) lpegmatch(xmltextcapture,root) -- goes to toc else if setfilename and p then -- and not root.cl syncfilename(p,"sprint t") end xmlserialize(root,xmltexhandler) end end end end -- local function tprint(root) -- we can move sprint inline -- local tr = type(root) -- if tr == "table" then -- local n = #root -- if n == 0 then -- -- skip -- else -- for i=1,n do -- sprint(root[i]) -- end -- end -- elseif tr == "string" then -- root = xmlunspecialized(root) -- lpegmatch(xmltextcapture,root) -- end -- end local function tprint(root) -- we can move sprint inline local tr = type(root) if tr == "table" then local n = #root if n == 0 then -- skip else for i=1,n do -- sprint(root[i]) -- inlined because of filename: local ri = root[i] local tr = type(ri) if tr == "string" then -- can also be result of lpath if setfilename then syncfilename(ri,"tprint") end root = xmlunspecialized(ri) lpegmatch(xmltextcapture,ri) elseif tr == "table" then if forceraw then rawroot = ri root = xmldespecialized(xmltostring(ri)) lpegmatch(xmltextcapture,ri) -- goes to toc else xmlserialize(ri,xmltexhandler) end end end end elseif tr == "string" then root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) end end local function cprint(root) -- content if not root then -- rawroot = false -- quit elseif type(root) == 'string' then -- rawroot = false root = xmlunspecialized(root) lpegmatch(xmltextcapture,root) else if setfilename then syncfilename(root,"cprint") end local rootdt = root.dt if forceraw then rawroot = root -- contextsprint(ctxcatcodes,xmltostring(rootdt or root)) root = xmlunspecialized(xmltostring(root)) lpegmatch(xmltextcapture,root) -- goes to toc else xmlserialize(rootdt or root,xmltexhandler) end end end xml.sprint = sprint local xmlsprint = sprint -- calls ct mathml -> will be replaced xml.tprint = tprint local xmltprint = tprint -- only used here xml.cprint = cprint local xmlcprint = cprint -- calls ct mathml -> will be replaced -- now we can flush function lxml.main(id) local root = getid(id) xmlserialize(root,xmltexhandler) -- the real root (@rt@) end -- -- lines (untested) -- -- local buffer = { } -- -- local xmllinescapture = ( -- newline^2 / function() buffer[#buffer+1] = "" end + -- newline / function() buffer[#buffer] = buffer[#buffer] .. " " end + -- content / function(s) buffer[#buffer] = buffer[#buffer] .. s end -- )^0 -- -- local xmllineshandler = table.copy(xmltexhandler) -- -- xmllineshandler.handle = function(...) lpegmatch(xmllinescapture,concat{ ... }) end -- -- function lines(root) -- if not root then -- -- rawroot = false -- -- quit -- elseif type(root) == 'string' then -- -- rawroot = false -- lpegmatch(xmllinescapture,root) -- elseif next(root) then -- tr == 'table' -- xmlserialize(root,xmllineshandler) -- end -- end -- -- function xml.lines(root) -- used at all? -- buffer = { "" } -- lines(root) -- return result -- end local function to_text(e) if e.command == nil then local etg = e.tg if etg and e.special and etg ~= "@rt@" then e.command = false -- i.e. skip else e.command = true -- i.e. no end end end local function to_none(e) if e.command == nil then e.command = false -- i.e. skip end end -- setups local setups = { } function lxml.setcommandtotext(id) xmlwithelements(getid(id),to_text) end function lxml.setcommandtonone(id) xmlwithelements(getid(id),to_none) end function lxml.installsetup(what,document,setup,where) document = document or "*" local sd = setups[document] if not sd then sd = { } setups[document] = sd end for k=1,#sd do if sd[k] == setup then sd[k] = nil break end end if what == 1 then if trace_loading then report_lxml("prepending setup %a for %a",setup,document) end insert(sd,1,setup) elseif what == 2 then if trace_loading then report_lxml("appending setup %a for %a",setup,document) end insert(sd,setup) elseif what == 3 then if trace_loading then report_lxml("inserting setup %a for %a before %a",setup,document,where) end insertbeforevalue(sd,setup,where) elseif what == 4 then if trace_loading then report_lxml("inserting setup %a for %a after %a",setup,document,where) end insertaftervalue(sd,setup,where) end end function lxml.flushsetups(id,...) local done = { } for i=1,select("#",...) do local document = select(i,...) local sd = setups[document] if sd then for k=1,#sd do local v = sd[k] if not done[v] then if trace_loading then report_lxml("applying setup %02i : %a to %a",k,v,document) end contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}") done[v] = true end end elseif trace_loading then report_lxml("no setups for %a",document) end end end function lxml.resetsetups(document) if trace_loading then report_lxml("resetting all setups for %a",document) end setups[document] = { } end function lxml.removesetup(document,setup) local s = setups[document] if s then for i=1,#s do if s[i] == setup then if trace_loading then report_lxml("removing setup %a for %a",setup,document) end remove(t,i) break end end end end function lxml.setsetup(id,pattern,setup) if not setup or setup == "" or setup == "*" or setup == "-" or setup == "+" then local collected = xmlapplylpath(getid(id),pattern) if collected then local nc = #collected if nc > 0 then if trace_setups then for c=1,nc do local e = collected[c] local ix = e.ix or 0 if setup == "-" then e.command = false report_lxml("lpath matched (a) %5i: %s = %s -> skipped",c,ix,setup) elseif setup == "+" then e.command = true report_lxml("lpath matched (b) %5i: %s = %s -> text",c,ix,setup) else local tg = e.tg if tg then -- to be sure e.command = tg local ns = e.rn or e.ns if ns == "" then report_lxml("lpath matched (c) %5i: %s = %s -> %s",c,ix,tg,tg) else report_lxml("lpath matched (d) %5i: %s = %s:%s -> %s",c,ix,ns,tg,tg) end end end end elseif setup == "-" then for c=1,nc do collected[c].command = false end elseif setup == "+" then for c=1,nc do collected[c].command = true end else for c=1,nc do local e = collected[c] e.command = e.tg end end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","zero",pattern) end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","no",pattern) end else local a, b = match(setup,"^(.+:)([%*%-%+])$") if a and b then local collected = xmlapplylpath(getid(id),pattern) if collected then local nc = #collected if nc > 0 then if trace_setups then for c=1,nc do local e = collected[c] local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0 if b == "-" then e.command = false if ns == "" then report_lxml("lpath matched (e) %5i: %s = %s -> skipped",c,ix,tg) else report_lxml("lpath matched (f) %5i: %s = %s:%s -> skipped",c,ix,ns,tg) end elseif b == "+" then e.command = true if ns == "" then report_lxml("lpath matched (g) %5i: %s = %s -> text",c,ix,tg) else report_lxml("lpath matched (h) %5i: %s = %s:%s -> text",c,ix,ns,tg) end else e.command = a .. tg if ns == "" then report_lxml("lpath matched (i) %5i: %s = %s -> %s",c,ix,tg,e.command) else report_lxml("lpath matched (j) %5i: %s = %s:%s -> %s",c,ix,ns,tg,e.command) end end end elseif b == "-" then for c=1,nc do collected[c].command = false end elseif b == "+" then for c=1,nc do collected[c].command = true end else for c=1,nc do local e = collected[c] e.command = a .. e.tg end end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","zero",pattern) end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","no",pattern) end else local collected = xmlapplylpath(getid(id),pattern) if collected then local nc = #collected if nc > 0 then if trace_setups then for c=1,nc do local e = collected[c] e.command = setup local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0 if ns == "" then report_lxml("lpath matched (k) %5i: %s = %s -> %s",c,ix,tg,setup) else report_lxml("lpath matched (l) %5i: %s = %s:%s -> %s",c,ix,ns,tg,setup) end end else for c=1,nc do collected[c].command = setup end end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","zero",pattern) end elseif trace_setups then report_lxml("%s lpath matches for pattern: %s","no",pattern) end end end end -- finalizers local function first(collected) if collected and #collected > 0 then xmlsprint(collected[1]) end end local function last(collected) if collected then local nc = #collected if nc > 0 then xmlsprint(collected[nc]) end end end local function all(collected) if collected then local nc = #collected if nc > 0 then for c=1,nc do xmlsprint(collected[c]) end end end end texfinalizers.reverse = function(collected) if collected then local nc = #collected if nc >0 then for c=nc,1,-1 do xmlsprint(collected[c]) end end end end local function count(collected) contextsprint(ctxcatcodes,(collected and #collected) or 0) -- why ctxcatcodes end local function position(collected,n) -- todo: if not n then == match if collected then local nc = #collected if nc > 0 then n = tonumber(n) or 0 if n < 0 then n = nc + n + 1 end if n > 0 then local cn = collected[n] if cn then xmlsprint(cn) return end end end end end local function match(collected) -- is match in preceding collected, never change, see bibxml local m = collected and collected[1] contextsprint(ctxcatcodes,m and m.mi or 0) -- why ctxcatcodes end local function index(collected,n) if collected then local nc = #collected if nc > 0 then n = tonumber(n) or 0 if n < 0 then n = nc + n + 1 -- brrr end if n > 0 then local cn = collected[n] if cn then contextsprint(ctxcatcodes,cn.ni or 0) -- why ctxcatcodes return end end end end contextsprint(ctxcatcodes,0) -- why ctxcatcodes end -- the number of commands is often relative small but there can be many calls -- to this finalizer local command if tokenizedxmlw then command = function(collected,cmd,otherwise) local n = collected and #collected if n and n > 0 then local wildcard = find(cmd,"*",1,true) for c=1,n do -- maybe optimize for n=1 local e = collected[c] local ix = e.ix local name = e.name if name and not ix then addindex(name,false,true) ix = e.ix end if not ix or not name then report_lxml("no valid node index for element %a using command %s",name or "?",cmd) elseif wildcard then contextsprint(ctxcatcodes,tokenizedxmlw,"{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}") else contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",name,"::",ix,"}") end end elseif otherwise then contextsprint(ctxcatcodes,tokenizedxmlw,"{",otherwise,"}{#1}") end end else command = function(collected,cmd,otherwise) local n = collected and #collected if n and n > 0 then local wildcard = find(cmd,"*",1,true) for c=1,n do -- maybe optimize for n=1 local e = collected[c] local ix = e.ix local name = e.name if name and not ix then addindex(name,false,true) ix = e.ix end if not ix or not name then report_lxml("no valid node index for element %a using command %s",name or "?",cmd) elseif wildcard then contextsprint(ctxcatcodes,"\\xmlw{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}") else contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}") end end elseif otherwise then contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}") end end end -- local wildcards = setmetatableindex(function(t,k) -- local v = false -- if find(k,"*",1,true) then -- v = setmetatableindex(function(t,kk) -- local v = gsub(k,"%*",kk) -- t[k] = v -- -- report_lxml("wildcard %a key %a value %a",kk,k,v) -- return v -- end) -- end -- t[k] = v -- return v -- end) -- -- local function command(collected,cmd,otherwise) -- local n = collected and #collected -- if n and n > 0 then -- local wildcard = wildcards[cmd] -- for c=1,n do -- maybe optimize for n=1 -- local e = collected[c] -- local ix = e.ix -- local name = e.name -- if name and not ix then -- addindex(name,false,true) -- ix = e.ix -- end -- if not ix or not name then -- report_lxml("no valid node index for element %a using command %s",name or "?",cmd) -- elseif wildcard then -- contextsprint(ctxcatcodes,"\\xmlw{",wildcard[e.tg],"}{",name,"::",ix,"}") -- else -- contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}") -- end -- end -- elseif otherwise then -- contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}") -- end -- end local function attribute(collected,a,default) if collected and #collected > 0 then local at = collected[1].at local str = (at and at[a]) or default if str and str ~= "" then contextsprint(notcatcodes,str) end elseif default then contextsprint(notcatcodes,default) end end local function parameter(collected,p,default) if collected and #collected > 0 then local pa = collected[1].pa local str = (pa and pa[p]) or default if str and str ~= "" then contextsprint(notcatcodes,str) end elseif default then contextsprint(notcatcodes,default) end end local function chainattribute(collected,arguments,default) -- todo: optional levels if collected and #collected > 0 then local e = collected[1] while e do local at = e.at if at then local a = at[arguments] if a then contextsprint(notcatcodes,a) return end else break -- error end e = e.__p__ end end if default then contextsprint(notcatcodes,default) end end local function chainpath(collected,nonamespace) if collected and #collected > 0 then local e = collected[1] local t = { } while e do local tg = e.tg local rt = e.__p__ local ns = e.ns if tg == "@rt@" then break elseif rt.tg == "@rt@" then if nonamespace or not ns or ns == "" then t[#t+1] = tg else t[#t+1] = ns .. ":" .. tg end else if nonamespace or not ns or ns == "" then t[#t+1] = tg .. "[" .. e.ei .. "]" else t[#t+1] = ns .. ":" .. tg .. "[" .. e.ei .. "]" end end e = rt end contextsprint(notcatcodes,concat(reversed(t),"/")) end end local function text(collected) if collected then local nc = #collected if nc == 0 then -- nothing elseif nc == 1 then -- hardly any gain so this will go cprint(collected[1]) else for c=1,nc do cprint(collected[c]) end end end end local function ctxtext(collected) if collected then local nc = #collected if nc > 0 then for c=1,nc do contextsprint(ctxcatcodes,collected[c].dt) end end end end texfinalizers.stripped = function(collected) -- tricky as we strip in place if collected then local nc = #collected if nc > 0 then for c=1,nc do cprint(xmlstripelement(collected[c])) end end end end texfinalizers.collapsed = function(collected) if collected and #collected > 0 then local s = xmltext(collected[1]) if s ~= "" then sprint(collapse(s)) end end end texfinalizers.lower = function(collected) if not collected then local nc = #collected if nc > 0 then for c=1,nc do contextsprint(ctxcatcodes,lowerchars(collected[c].dt[1])) end end end end texfinalizers.upper = function(collected) if collected then local nc = #collected if nc > 0 then for c=1,nc do contextsprint(ctxcatcodes,upperchars(collected[c].dt[1])) end end end end local function number(collected) local nc = collected and #collected or 0 local n = 0 if nc > 0 then for c=1,nc do n = n + tonumber(collected[c].dt[1] or 0) end end contextsprint(ctxcatcodes,n) end local function concatrange(collected,start,stop,separator,lastseparator,textonly) -- test this on mml if collected then local nofcollected = #collected if nofcollected > 0 then local separator = separator or "" local lastseparator = lastseparator or separator or "" start, stop = (start == "" and 1) or tonumber(start) or 1, (stop == "" and nofcollected) or tonumber(stop) or nofcollected if stop < 0 then stop = nofcollected + stop end -- -1 == last-1 for i=start,stop do if textonly then xmlcprint(collected[i]) else xmlsprint(collected[i]) end if i == nofcollected then -- nothing elseif i == nofcollected-1 and lastseparator ~= "" then contextsprint(ctxcatcodes,lastseparator) elseif separator ~= "" then contextsprint(ctxcatcodes,separator) end end end end end local function concatlist(collected,separator,lastseparator,textonly) -- test this on mml concatrange(collected,false,false,separator,lastseparator,textonly) end local function depth(collected) local d = 0 if collected then local c = collected and collected[1] if c.tg then while c do d = d + 1 c = c.__p__ if not c then break end end end end contextsprint(ctxcatcodes,d) end -- todo just move up as not used local texfinalizers.first = first texfinalizers.last = last texfinalizers.all = all texfinalizers.count = count texfinalizers.command = command texfinalizers.attribute = attribute texfinalizers.param = parameter -- obsolete texfinalizers.parameter = parameter texfinalizers.text = text texfinalizers.ctxtext = ctxtext texfinalizers.context = ctxtext texfinalizers.position = position texfinalizers.match = match texfinalizers.index = index texfinalizers.concat = concatlist texfinalizers.concatrange = concatrange -- used below texfinalizers.chainattribute = chainattribute texfinalizers.chainpath = chainpath texfinalizers.default = all -- !! texfinalizers.depth = depth -- used below -- function texfinalizers.tag(collected,n) if collected then local nc = #collected if nc > 0 then n = tonumber(n) or 0 local c if n == 0 then c = collected[1] elseif n > 1 then c = collected[n] else c = collected[nc-n+1] end if c then contextsprint(ctxcatcodes,c.tg) end end end end function texfinalizers.name(collected,n) if collected then local nc = #collected if nc > 0 then local c if n == 0 or not n then c = collected[1] elseif n > 1 then c = collected[n] else c = collected[nc-n+1] end if c then local ns = c.ns if not ns or ns == "" then contextsprint(ctxcatcodes,c.tg) else contextsprint(ctxcatcodes,ns,":",c.tg) end end end end end function texfinalizers.tags(collected,nonamespace) if collected then local nc = #collected if nc > 0 then for c=1,nc do local e = collected[c] local ns = e.ns if nonamespace or (not ns or ns == "") then contextsprint(ctxcatcodes,e.tg) else contextsprint(ctxcatcodes,ns,":",e.tg) end end end end end -- local function verbatim(id,before,after) local e = getid(id) if e then if before then contextsprint(ctxcatcodes,before,"[",e.tg or "?","]") end lxml.toverbatim(xmltostring(e.dt)) -- lxml.toverbatim(xml.totext(e.dt)) if after then contextsprint(ctxcatcodes,after) end end end function lxml.inlineverbatim(id) verbatim(id,"\\startxmlinlineverbatim","\\stopxmlinlineverbatim") end function lxml.displayverbatim(id) verbatim(id,"\\startxmldisplayverbatim","\\stopxmldisplayverbatim") end lxml.verbatim = verbatim -- helpers function lxml.depth(id) depth { getid(id) } end function lxml.first(id,pattern) local collected = xmlapplylpath(getid(id),pattern) if collected then first(collected) end end function lxml.last(id,pattern) local collected = xmlapplylpath(getid(id),pattern) if collected then last(collected) end end function lxml.all(id,pattern) local collected = xmlapplylpath(getid(id),pattern) if collected then all(collected) end end function lxml.count(id,pattern) -- always needs to produce a result so no test here count(xmlapplylpath(getid(id),pattern)) end function lxml.attribute(id,pattern,a,default) local collected = xmlapplylpath(getid(id),pattern) if collected then attribute(collected,a,default) end end function lxml.parameter(id,pattern,p,default) local collected = xmlapplylpath(getid(id),pattern) if collected then parameter(collected,p,default) end end lxml.param = lxml.parameter function lxml.raw(id,pattern) -- the content, untouched by commands local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id) if collected and #collected > 0 then local s = xmltostring(collected[1].dt) if s ~= "" then contextsprint(notcatcodes,s) end end end -- templates function lxml.rawtex(id,pattern) -- the content, untouched by commands local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id) if collected and #collected > 0 then local s = xmltostring(collected[1].dt) if s ~= "" then contextsprint(notcatcodes,lpegmatch(p_texescape,s) or s) end end end function lxml.context(id,pattern) -- the content, untouched by commands if pattern then local collected = xmlapplylpath(getid(id),pattern) if collected and #collected > 0 then ctx_text(collected[1].dt[1]) end else local collected = getid(id) if collected then local dt = collected.dt if dt and #dt > 0 then ctx_text(dt[1]) end end end end function lxml.text(id,pattern) if pattern then local collected = xmlapplylpath(getid(id),pattern) if collected and #collected > 0 then text(collected) end else local e = getid(id) if e then text(e.dt) end end end function lxml.pure(id,pattern) if pattern then local collected = xmlapplylpath(getid(id),pattern) if collected and #collected > 0 then parsedentity = unescapedentity text(collected) parsedentity = reparsedentity end else parsedentity = unescapedentity local e = getid(id) if e then text(e.dt) end parsedentity = reparsedentity end end lxml.content = text function lxml.position(id,pattern,n) position(xmlapplylpath(getid(id),pattern),tonumber(n)) end function lxml.chainattribute(id,pattern,a,default) chainattribute(xmlapplylpath(getid(id),pattern),a,default) end function lxml.path(id,pattern,nonamespace) chainpath(xmlapplylpath(getid(id),pattern),nonamespace) end function lxml.concatrange(id,pattern,start,stop,separator,lastseparator,textonly) -- test this on mml concatrange(xmlapplylpath(getid(id),pattern),start,stop,separator,lastseparator,textonly) end function lxml.concat(id,pattern,separator,lastseparator,textonly) concatrange(xmlapplylpath(getid(id),pattern),false,false,separator,lastseparator,textonly) end function lxml.element(id,n) position(xmlapplylpath(getid(id),"/*"),tonumber(n)) -- tonumber handy end lxml.index = lxml.position function lxml.pos(id) local e = getid(id) contextsprint(ctxcatcodes,e and e.ni or 0) end do local att function lxml.att(id,a,default) local e = getid(id) if e then local at = e.at if at then -- normally always true att = at[a] if not att then if default and default ~= "" then att = default contextsprint(notcatcodes,default) end elseif att ~= "" then contextsprint(notcatcodes,att) else -- explicit empty is valid end elseif default and default ~= "" then att = default contextsprint(notcatcodes,default) end elseif default and default ~= "" then att = default contextsprint(notcatcodes,default) else att = "" end end function lxml.texatt(id,a,default) local e = getid(id) if e then local at = e.at if at then att = at[a] if att ~= "" then -- context(ctxcatcodes,att) context(att) end else att = "" end else att = "" end end function lxml.ifatt(id,a,value) local e = getid(id) if e then local at = e.at att = at and at[a] or "" else att = "" end return att == value end function lxml.ifattempty(id,a) local e = getid(id) if e then local at = e.at att = at and at[a] or "" else att = "" end return att == "" end function lxml.refatt(id,a) local e = getid(id) if e then local at = e.at if at then att = at[a] if att and att ~= "" then att = gsub(att,"^#+","") if att ~= "" then contextsprint(notcatcodes,att) return end end end end att = "" end function lxml.lastatt() contextsprint(notcatcodes,att) end implement { name = "xmldoifatt", arguments = "3 strings", public = true, actions = function(id,k,v) local e = getid(id) ctx_doif(e and e.at[k] == v or false) end } implement { name = "xmldoifnotatt", arguments = "3 strings", public = true, actions = function(id,k,v) local e = getid(id) ctx_doifnot(e and e.at[k] == v or false) end } implement { name = "xmldoifelseatt", arguments = "3 strings", public = true, actions = function(id,k,v) local e = getid(id) ctx_doifelse(e and e.at[k] == v or false) end } end do local par function lxml.par(id,p,default) local e = getid(id) if e then local pa = e.pa if pa then -- normally always true par = pa[p] if not par then if default and default ~= "" then par = default contextsprint(notcatcodes,default) end elseif par ~= "" then contextsprint(notcatcodes,par) else -- explicit empty is valid end elseif default and default ~= "" then par = default contextsprint(notcatcodes,default) end elseif default and default ~= "" then par = default contextsprint(notcatcodes,default) else par = "" end end function lxml.lastpar() contextsprint(notcatcodes,par) end end function lxml.name(id) local e = getid(id) if e then local ns = e.rn or e.ns if ns and ns ~= "" then contextsprint(ctxcatcodes,ns,":",e.tg) else contextsprint(ctxcatcodes,e.tg) end end end function lxml.match(id) local e = getid(id) contextsprint(ctxcatcodes,e and e.mi or 0) end function lxml.tag(id) -- tag vs name -> also in l-xml tag->name local e = getid(id) if e then local tg = e.tg if tg and tg ~= "" then contextsprint(ctxcatcodes,tg) end end end function lxml.namespace(id) local e = getid(id) if e then local ns = e.rn or e.ns if ns and ns ~= "" then contextsprint(ctxcatcodes,ns) end end end function lxml.flush(id) local e = getid(id) if e then local dt = e.dt if dt then xmlsprint(dt,e) end end end function lxml.lastmatch() local collected = xmllastmatch() if collected then all(collected) end end lxml.pushmatch = xmlpushmatch lxml.popmatch = xmlpopmatch function lxml.snippet(id,i) local e = getid(id) if e then local dt = e.dt if dt then local dti = dt[tonumber(i)] -- string in lxml if dti then xmlsprint(dti,e) end end end end function lxml.direct(id) local e = getid(id) if e then xmlsprint(e) end end if tokenizedxmlw then function lxml.command(id,pattern,cmd) local i, p = getid(id,true) local collected = xmlapplylpath(getid(i),pattern) -- again getid? if collected then local nc = #collected if nc > 0 then local rootname = p or i.name for c=1,nc do local e = collected[c] local ix = e.ix if not ix then addindex(rootname,false,true) ix = e.ix end contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",rootname,"::",ix,"}") end end end end else function lxml.command(id,pattern,cmd) local i, p = getid(id,true) local collected = xmlapplylpath(getid(i),pattern) -- again getid? if collected then local nc = #collected if nc > 0 then local rootname = p or i.name for c=1,nc do local e = collected[c] local ix = e.ix if not ix then addindex(rootname,false,true) ix = e.ix end contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",rootname,"::",ix,"}") end end end end end -- loops function lxml.collected(id,pattern,reverse) return xmlcollected(getid(id),pattern,reverse) end function lxml.elements(id,pattern,reverse) return xmlelements(getid(id),pattern,reverse) end -- testers do local found, empty = xml.found, xml.empty function lxml.doif (id,pattern) ctx_doif (found(getid(id),pattern)) end function lxml.doifnot (id,pattern) ctx_doifnot (found(getid(id),pattern)) end function lxml.doifelse (id,pattern) ctx_doifelse(found(getid(id),pattern)) end function lxml.doiftext (id,pattern) ctx_doif (not empty(getid(id),pattern)) end function lxml.doifnottext (id,pattern) ctx_doifnot (not empty(getid(id),pattern)) end function lxml.doifelsetext (id,pattern) ctx_doifelse(not empty(getid(id),pattern)) end -- special case: "*" and "" -> self else lpath lookup local function checkedempty(id,pattern) local e = getid(id) if not pattern or pattern == "" then local dt = e.dt local nt = #dt return (nt == 0) or (nt == 1 and dt[1] == "") else return empty(getid(id),pattern) end end xml.checkedempty = checkedempty function lxml.doifempty (id,pattern) ctx_doif (checkedempty(id,pattern)) end function lxml.doifnotempty (id,pattern) ctx_doifnot (checkedempty(id,pattern)) end function lxml.doifelseempty(id,pattern) ctx_doifelse(checkedempty(id,pattern)) end end -- status info statistics.register("xml load time", function() if noffiles > 0 or nofconverted > 0 then return format("%s seconds, %s files, %s converted", statistics.elapsedtime(xml), noffiles, nofconverted) else return nil end end) statistics.register("lxml preparation time", function() if noffiles > 0 or nofconverted > 0 then local calls = xml.lpathcalls() local cached = xml.lpathcached() if calls > 0 or cached > 0 then return format("%s seconds, %s nodes, %s lpath calls, %s cached calls", statistics.elapsedtime(lxml), nofindices, calls, cached) else return nil end else -- pretty close to zero so not worth mentioning end end) statistics.register("lxml lpath profile", function() local p = xml.profiled if p and next(p) then local s = table.sortedkeys(p) local tested, matched, finalized = 0, 0, 0 logs.pushtarget("logfile") logs.writer("\nbegin of lxml profile\n") logs.writer("\n tested matched finalized pattern\n\n") for i=1,#s do local pattern = s[i] local pp = p[pattern] local t, m, f = pp.tested, pp.matched, pp.finalized tested, matched, finalized = tested + t, matched + m, finalized + f logs.writer(format("%9i %9i %9i %s",t,m,f,pattern)) end logs.writer("\nend of lxml profile\n") logs.poptarget() return format("%s patterns, %s tested, %s matched, %s finalized (see log for details)",#s,tested,matched,finalized) else return nil end end) -- misc function lxml.nonspace(id,pattern) -- slow, todo loop xmltprint(xmlcollect(getid(id),pattern,true)) end function lxml.strip(id,pattern,nolines,anywhere,everywhere) xml.strip(getid(id),pattern,nolines,anywhere,everywhere) end function lxml.stripped(id,pattern,nolines) local root = getid(id) local str = xmltext(root,pattern) or "" str = gsub(str,"^%s*(.-)%s*$","%1") if nolines then str = gsub(str,"%s+"," ") end xmlsprint(str,root) end function lxml.delete(id,pattern) xml.delete(getid(id),pattern) end lxml.obsolete = { } lxml.get_id = getid lxml.obsolete.get_id = getid -- goodies: function texfinalizers.lettered(collected) if collected then local nc = #collected if nc > 0 then for c=1,nc do contextsprint(ctxcatcodes,lettered(collected[c].dt[1])) end end end end -- function texfinalizers.apply(collected,what) -- to be tested -- if collected then -- for c=1,#collected do -- contextsprint(ctxcatcodes,what(collected[c].dt[1])) -- end -- end -- end function lxml.toparameters(id) local e = getid(id) if e then local a = e.at if a and next(a) then local setups, s = { }, 0 for k, v in next, a do s = s + 1 setups[s] = k .. "=" .. v end setups = concat(setups,",") -- tracing context(setups) end end end local template = '\n\n\n\n%s' function lxml.tofile(id,pattern,filename,comment) local collected = xmlapplylpath(getid(id),pattern) if collected then io.savedata(filename,format(template,comment or "exported fragment",tostring(collected[1]))) else os.remove(filename) -- get rid of old content end end texfinalizers.upperall = xmlfinalizers.upperall texfinalizers.lowerall = xmlfinalizers.lowerall function lxml.tobuffer(id,pattern,name,unescaped,contentonly) local collected = xmlapplylpath(getid(id),pattern) if collected then local collected = collected[1] if unescaped == true then -- expanded entities ! if contentonly then collected = xmlserializetotext(collected.dt) else collected = xmlcontent(collected) end elseif unescaped == false then local t = { } xmlstring(collected,function(s) t[#t+1] = s end) collected = concat(t) else collected = tostring(collected) end buffers.assign(name,collected) else buffers.erase(name) end end -- parameters do local function setatt(id,name,value) local e = getid(id) if e then local a = e.at if a then a[name] = value else e.at = { [name] = value } end end end local function setpar(id,name,value) local e = getid(id) if e then local p = e.pa if p then p[name] = value else e.pa = { [name] = value } end end end lxml.setatt = setatt lxml.setpar = setpar function lxml.setattribute(id,pattern,name,value) local collected = xmlapplylpath(getid(id),pattern) if collected then for i=1,#collected do setatt(collected[i],name,value) end end end function lxml.setparameter(id,pattern,name,value) local collected = xmlapplylpath(getid(id),pattern) if collected then for i=1,#collected do setpar(collected[i],name,value) end end end lxml.setparam = lxml.setparameter end -- relatively new: do local permitted = nil local ctx_xmlinjector = context.xmlinjector xml.pihandlers["injector"] = function(category,rest,e) local options = options_to_array(rest) local action = options[1] if not action then return end local n = #options if n > 1 then local category = options[2] if category == "*" then ctx_xmlinjector(action) elseif permitted then if n == 2 then if permitted[category] then ctx_xmlinjector(action) end else for i=2,n do local category = options[i] if category == "*" or permitted[category] then ctx_xmlinjector(action) return end end end end else ctx_xmlinjector(action) end end local pattern = P("context-") * C((1-patterns.whitespace)^1) * C(P(1)^1) function lxml.applyselectors(id) local root = getid(id) local function filter(e) local dt = e.dt if not dt then report_lxml("error in selector, no data in %a",e.tg or "?") return end local ndt = #dt local done = false local i = 1 while i <= ndt do local dti = dt[i] if type(dti) == "table" then if dti.tg == "@pi@" then local text = dti.dt[1] local what, rest = lpegmatch(pattern,text) if what == "select" then local categories = options_to_hash(rest) if categories["begin"] then local okay = false if permitted then for k, v in next, permitted do if categories[k] then okay = k break end end end if okay then if trace_selectors then report_lxml("accepting selector: %s",okay) end else categories.begin = false if trace_selectors then report_lxml("rejecting selector: % t",sortedkeys(categories)) end end for j=i,ndt do local dtj = dt[j] if type(dtj) == "table" then local tg = dtj.tg if tg == "@pi@" then local text = dtj.dt[1] local what, rest = lpegmatch(pattern,text) if what == "select" then local categories = options_to_hash(rest) if categories["end"] then i = j break else -- error end end elseif not okay then dtj.tg = "@cm@" end else -- dt[j] = "" -- okay ? end end end elseif what == "include" then local categories = options_to_hash(rest) if categories["begin"] then local okay = false if permitted then for k, v in next, permitted do if categories[k] then okay = k break end end end if okay then if trace_selectors then report_lxml("accepting include: %s",okay) end else categories.begin = false if trace_selectors then report_lxml("rejecting include: % t",sortedkeys(categories)) end end if okay then for j=i,ndt do local dtj = dt[j] if type(dtj) == "table" then local tg = dtj.tg if tg == "@cm@" then local content = dtj.dt[1] local element = root and xml.toelement(content,root) dt[j] = element element.__p__ = dt -- needs checking done = true elseif tg == "@pi@" then local text = dtj.dt[1] local what, rest = lpegmatch(pattern,text) if what == "include" then local categories = options_to_hash(rest) if categories["end"] then i = j break else -- error end end end end end end end elseif dti then filter(dti) end end if done then -- probably not needed xml.reindex(dt) end end i = i + 1 end end xmlwithelements(root,filter) end function xml.setinjectors(set) local s = settings_to_set(set) if permitted then for k, v in next, s do permitted[k] = true end else permitted = s end end function xml.resetinjectors(set) if permitted and set and set ~= "" then local s = settings_to_set(set) for k, v in next, s do if v then permitted[k] = nil end end else permitted = nil end end end -- hm, maybe to ini to, these implements implement { name = "xmlsetinjectors", actions = xml.setinjectors, arguments = "string" } implement { name = "xmlresetinjectors", actions = xml.resetinjectors, arguments = "string" } implement { name = "xmlapplyselectors", actions = lxml.applyselectors, arguments = "string" } -- bonus: see x-lmx-html.mkiv function texfinalizers.xml(collected,name,setup) local root = collected[1] if not root then return end if not name or name == "" then report_lxml("missing name in xml finalizer") return end xmlrename(root,name) name = "lmx:" .. name buffers.assign(name,strip(xmltostring(root))) context.xmlprocessbuffer(name,name,setup or (name..":setup")) end -- experiment do local xmltoelement = xml.toelement local xmlreindex = xml.reindex function lxml.replace(root,pattern,whatever) if type(root) == "string" then root = lxml.getid(root) end local collected = xmlapplylpath(root,pattern) if collected then local isstring = type(whatever) == "string" for c=1,#collected do local e = collected[c] local p = e.__p__ if p then local d = p.dt local n = e.ni local w = isstring and whatever or whatever(e) if w then local t = xmltoelement(w,root).dt if t then t.__p__ = p if type(t) == "table" then local t1 = t[1] d[n] = t1 t1.at.type = e.at.type or t1.at.type for i=2,#t do n = n + 1 insert(d,n,t[i]) end else d[n] = t end xmlreindex(d) -- probably not needed end end end end end end -- function document.mess_around(root) -- lxml.replace( -- root, -- "p[@variant='foo']", -- function(c) -- return (string.gsub(tostring(c),"foo","%1")) -- end -- ) -- end end do local lpegmatch = lpegmatch local unescaper = lpeg.patterns.urlunescaper function xmlfinalizers.url(e,a) local u = #e > 0 and e[1].at[a] return u and lpegmatch(unescaper,u) end if CONTEXTLMTXMODE > 0 then function texfinalizers.url(e,a) local u = #e > 0 and e[1].at[a] if u then contextsprint(tex.hshcatcodes,string.texhashed(lpegmatch(unescaper,u))) end end else function texfinalizers.url(e,a) local u = #e > 0 and e[1].at[a] if u then -- context.verbatim(lpegmatch(unescaper,u)) -- no hash intercept here, verbatim is new per 23-09-06 context(lpegmatch(unescaper,u)) end end end end if CONTEXTLMTXMODE > 0 then local setmacro = tokens.setters.macro xmlfinalizers.tomacro = function(collected,macroname,index) if macroname and macroname ~= '' then if index == 'last' then index = #collected elseif index == 'first' then index = 1 else index = tonumber(index) or 1 end setmacro(tex.nilcatcodes,macroname,collapse(xmltext(collected[index]))) end end end