if not modules then modules = { } end modules ['lxml-tex'] = {
version = 1.001,
comment = "companion to lxml-ini.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
-- Because we split and resolve entities we use the direct printing
-- interface and not the context one. If we ever do that there will
-- be an cldf-xml helper library.
local concat, insert, remove, sortedkeys, reversed = table.concat, table.insert, table.remove, table.sortedkeys, table.reverse
local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match
local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select
local lpegmatch = lpeg.match
local P, S, C = lpeg.P, lpeg.S, lpeg.C
local patterns = lpeg.patterns
local setmetatableindex = table.setmetatableindex
local formatters, strip, collapse = string.formatters, string.strip, utilities.strings.collapse
local tex, xml = tex, xml
local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered
local basename, dirname, joinfile = file.basename, file.dirname, file.join
lxml = lxml or { }
local lxml = lxml
local catcodenumbers = catcodes.numbers
local ctxcatcodes = catcodenumbers.ctxcatcodes -- todo: use different method
local notcatcodes = catcodenumbers.notcatcodes -- todo: use different method
local commands = commands
local context = context
local contextsprint = context.sprint -- with catcodes (here we use fast variants, but with option for tracing)
local ctx_doif = commands.doif
local ctx_doifnot = commands.doifnot
local ctx_doifelse = commands.doifelse
local synctex = luatex.synctex
local implement = interfaces.implement
local xmlelements = xml.elements
local xmlcollected = xml.collected
local xmlsetproperty = xml.setproperty
local xmlwithelements = xml.withelements
local xmlserialize = xml.serialize
local xmlcollect = xml.collect
local xmltext = xml.text
local xmltostring = xml.tostring
local xmlapplylpath = xml.applylpath
local xmlunspecialized = xml.unspecialized
local xmldespecialized = xml.despecialized -- nicer in expanded xml
local xmlprivatetoken = xml.privatetoken
local xmlstripelement = xml.stripelement
local xmlinclusion = xml.inclusion
local xmlinclusions = xml.inclusions
local xmlbadinclusions = xml.badinclusions
local xmlcontent = xml.content
local xmllastmatch = xml.lastmatch
local xmlpushmatch = xml.pushmatch
local xmlpopmatch = xml.popmatch
local xmlstring = xml.string
local xmlserializetotext = xml.serializetotext
local xmlrename = xml.rename
local variables = interfaces and interfaces.variables or { }
local parsers = utilities.parsers
local settings_to_hash = parsers.settings_to_hash
local settings_to_set = parsers.settings_to_set
local options_to_hash = parsers.options_to_hash
local options_to_array = parsers.options_to_array
local insertbeforevalue = utilities.tables.insertbeforevalue
local insertaftervalue = utilities.tables.insertaftervalue
local resolveprefix = resolvers.resolve
local starttiming = statistics.starttiming
local stoptiming = statistics.stoptiming
local trace_setups = false trackers.register("lxml.setups", function(v) trace_setups = v end)
local trace_loading = false trackers.register("lxml.loading", function(v) trace_loading = v end)
local trace_access = false trackers.register("lxml.access", function(v) trace_access = v end)
local trace_comments = false trackers.register("lxml.comments", function(v) trace_comments = v end)
local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
local trace_selectors = false trackers.register("lxml.selectors",function(v) trace_selectors = v end)
local report_lxml = logs.reporter("lxml","tex")
local report_xml = logs.reporter("xml","tex")
local forceraw = false
local p_texescape = patterns.texescape
local tokenizedxmlw = context.tokenizedcs and context.tokenizedcs.xmlw
directives.enable("xml.path.keeplastmatch")
-- tex entities
lxml.entities = lxml.entities or { }
storage.register("lxml/entities",lxml.entities,"lxml.entities")
local xmlentities = xml.entities -- these are more or less standard entities
local texentities = lxml.entities -- these are specific for a tex run
local reparsedentity = xml.reparsedentitylpeg -- \Ux{...}
local unescapedentity = xml.unescapedentitylpeg
local parsedentity = reparsedentity
local useelement = false -- probably no longer needed / used
function lxml.startunescaped()
parsedentity = unescapedentity
end
function lxml.stopunescaped()
parsedentity = reparsedentity
end
directives.register("lxml.entities.useelement",function(v)
useelement = v
end)
function lxml.registerentity(key,value)
texentities[key] = value
if trace_entities then
report_xml("registering tex entity %a as %a",key,value)
end
end
function lxml.resolvedentity(str)
if forceraw then
-- should not happen as we then can as well bypass this function
if trace_entities then
report_xml("passing entity %a as &%s;",str,str)
end
context("&%s;",str)
else
local e = texentities[str]
if e then
local te = type(e)
if te == "function" then
if trace_entities then
report_xml("passing entity %a using function",str)
end
e(str)
elseif e then
if trace_entities then
report_xml("passing entity %a as %a using %a",str,e,"ctxcatcodes")
end
context(e)
end
return
end
local e = xmlentities[str]
if e then
local te = type(e)
if te == "function" then
e = e(str)
end
if e then
if trace_entities then
report_xml("passing entity %a as %a using %a",str,e,"notcatcodes")
end
contextsprint(notcatcodes,e)
return
end
end
-- resolve hex and dec, todo: escape # & etc for ctxcatcodes
-- normally this is already solved while loading the file
local chr, err = lpegmatch(parsedentity,str)
if chr then
if parsedentity == reparsedentity then
if trace_entities then
report_xml("passing entity %a as %a using %a",str,chr,"ctxcatcodes")
end
context(chr)
else
contextsprint(notcatcodes,chr)
if trace_entities then
report_xml("passing entity %a as %a using %a",str,chr,"notcatcodes")
end
end
elseif err then
if trace_entities then
report_xml("passing faulty entity %a as %a",str,err)
end
context(err)
elseif useelement then
local tag = upperchars(str)
if trace_entities then
report_xml("passing entity %a to \\xmle using tag %a",str,tag)
end
contextsprint(texcatcodes,"\\xmle{")
contextsprint(notcatcodes,e)
contextsprint(texcatcodes,"}")
else
if trace_entities then
report_xml("passing entity %a as %a using %a",str,str,"notcatcodes")
end
contextsprint(notcatcodes,str)
end
end
end
-- tex interface
local loaded = lxml.loaded or { }
lxml.loaded = loaded
-- print(contextdirective("context-mathml-directive function reduction yes "))
-- print(contextdirective("context-mathml-directive function "))
xml.defaultprotocol = "tex"
local finalizers = xml.finalizers
finalizers.xml = finalizers.xml or { }
finalizers.tex = finalizers.tex or { }
local xmlfinalizers = finalizers.xml
local texfinalizers = finalizers.tex
-- serialization with entity handling
local ampersand = P("&")
local semicolon = P(";")
local entity = (ampersand * C((1-semicolon)^1) * semicolon) / lxml.resolvedentity -- context.bold
local _, xmltextcapture_yes = context.newtexthandler {
catcodes = notcatcodes,
exception = entity,
}
local _, xmltextcapture_nop = context.newtexthandler {
catcodes = notcatcodes,
}
local _, xmlspacecapture_yes = context.newtexthandler {
endofline = context.xmlcdataobeyedline,
emptyline = context.xmlcdataobeyedline,
simpleline = context.xmlcdataobeyedline,
space = context.xmlcdataobeyedspace,
catcodes = notcatcodes,
exception = entity,
}
local _, xmlspacecapture_nop = context.newtexthandler {
endofline = context.xmlcdataobeyedline,
emptyline = context.xmlcdataobeyedline,
simpleline = context.xmlcdataobeyedline,
space = context.xmlcdataobeyedspace,
catcodes = notcatcodes,
}
local _, xmllinecapture_yes = context.newtexthandler {
endofline = context.xmlcdataobeyedline,
emptyline = context.xmlcdataobeyedline,
simpleline = context.xmlcdataobeyedline,
catcodes = notcatcodes,
exception = entity,
}
local _, xmllinecapture_nop = context.newtexthandler {
endofline = context.xmlcdataobeyedline,
emptyline = context.xmlcdataobeyedline,
simpleline = context.xmlcdataobeyedline,
catcodes = notcatcodes,
}
local _, ctxtextcapture_yes = context.newtexthandler {
catcodes = ctxcatcodes,
exception = entity,
}
local _, ctxtextcapture_nop = context.newtexthandler {
catcodes = ctxcatcodes,
}
local xmltextcapture = xmltextcapture_yes
local xmlspacecapture = xmlspacecapture_yes
local xmllinecapture = xmllinecapture_yes
local ctxtextcapture = ctxtextcapture_yes
directives.register("lxml.entities.escaped",function(v)
if v then
xmltextcapture = xmltextcapture_yes
xmlspacecapture = xmlspacecapture_yes
xmllinecapture = xmllinecapture_yes
ctxtextcapture = ctxtextcapture_yes
else
xmltextcapture = xmltextcapture_nop
xmlspacecapture = xmlspacecapture_nop
xmllinecapture = xmllinecapture_nop
ctxtextcapture = ctxtextcapture_nop
end
end)
-- cdata
local toverbatim = context.newverbosehandler {
line = context.xmlcdataobeyedline,
space = context.xmlcdataobeyedspace,
before = context.xmlcdatabefore,
after = context.xmlcdataafter,
}
lxml.toverbatim = context.newverbosehandler {
line = context.xmlcdataobeyedline,
space = context.xmlcdataobeyedspace,
before = context.xmlcdatabefore,
after = context.xmlcdataafter,
strip = true,
}
-- raw flushing
function lxml.startraw()
forceraw = true
end
function lxml.stopraw()
forceraw = false
end
function lxml.rawroot()
return rawroot
end
-- storage
do
local noferrors = 0
local errors = setmetatableindex("number")
local errorhandler = xml.errorhandler
function xml.errorhandler(message,filename)
if filename and filename ~= "" then
noferrors = noferrors + 1
errors[filename] = errors[filename] + 1
end
errorhandler(message) -- (filename)
end
logs.registerfinalactions(function()
if noferrors > 0 then
local report = logs.startfilelogging("lxml","problematic xml files")
for k, v in table.sortedhash(errors) do
report("%4i %s",v,k)
end
logs.stopfilelogging()
--
if logs.loggingerrors() then
logs.starterrorlogging(report,"problematic xml files")
for k, v in table.sortedhash(errors) do
report("%4i %s",v,k)
end
logs.stoperrorlogging()
end
end
end)
end
function lxml.store(id,root,filename)
loaded[id] = root
xmlsetproperty(root,"name",id)
if filename then
xmlsetproperty(root,"filename",filename)
end
end
local splitter = lpeg.splitat("::")
lxml.idsplitter = splitter
function lxml.splitid(id)
local d, i = lpegmatch(splitter,id)
if d then
return d, i
else
return "", id
end
end
local function getid(id, qualified)
if id then
local lid = loaded[id]
if lid then
return lid
elseif type(id) == "table" then
return id
else
local d, i = lpegmatch(splitter,id)
if d then
local ld = loaded[d]
if ld then
local ldi = ld.index
if ldi then
local root = ldi[tonumber(i)]
if root then
if qualified then -- we need this else two args that confuse others
return root, d
else
return root
end
elseif trace_access then
report_lxml("%a has no index entry %a",d,i)
end
elseif trace_access then
report_lxml("%a has no index",d)
end
elseif trace_access then
report_lxml("%a is not loaded",d)
end
elseif trace_access then
report_lxml("%a is not loaded",i)
end
end
elseif trace_access then
report_lxml("invalid id (nil)")
end
end
lxml.id = getid -- we provide two names as locals can already use such
lxml.getid = getid -- names and we don't want clashes
function lxml.root(id)
return loaded[id]
end
-- index
local nofindices = 0
local function addindex(name,check_sum,force)
local root = getid(name)
if root and (not root.index or force) then -- weird, only called once
local n, index, maxindex, check = 0, root.index or { }, root.maxindex or 0, root.check or { }
local function nest(root)
local dt = root.dt
if not root.ix then
maxindex = maxindex + 1
root.ix = maxindex
check[maxindex] = root.tg -- still needed ?
index[maxindex] = root
n = n + 1
end
if dt then
for k=1,#dt do
local dk = dt[k]
if type(dk) == "table" then
nest(dk)
end
end
end
end
nest(root)
nofindices = nofindices + n
--
if type(name) ~= "string" then
name = "unknown"
end
root.index = index
root.maxindex = maxindex
if trace_access then
report_lxml("indexed entries %a, found nodes %a",tostring(name),maxindex)
end
end
end
lxml.addindex = addindex
implement {
name = "xmladdindex",
arguments = "string",
public = true,
actions = addindex,
}
-- another cache
local function lxmlapplylpath(id,pattern) -- better inline, saves call
return xmlapplylpath(getid(id),pattern)
end
lxml.filter = lxmlapplylpath
function lxml.filterlist(list,pattern)
for s in gmatch(list,"[^, ]+") do -- we could cache a table
xmlapplylpath(getid(s),pattern)
end
end
function lxml.applyfunction(id,name)
local f = xml.functions[name]
return f and f(getid(id))
end
-- rather new, indexed storage (backward refs), maybe i will merge this
function lxml.checkindex(name)
local root = getid(name)
return root and root.index or 0
end
if tokenizedxmlw then
function lxml.withindex(name,n,command) -- will change as name is always there now
local i, p = lpegmatch(splitter,n)
if p then
contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",n,"}")
else
contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",name,"::",n,"}")
end
end
else
function lxml.withindex(name,n,command) -- will change as name is always there now
local i, p = lpegmatch(splitter,n)
if p then
contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",n,"}")
else
contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",name,"::",n,"}")
end
end
end
function lxml.getindex(name,n) -- will change as name is always there now
local i, p = lpegmatch(splitter,n)
if p then
contextsprint(ctxcatcodes,n)
else
contextsprint(ctxcatcodes,name,"::",n)
end
end
-- loading (to be redone, no overload) .. best use different methods and
-- keep raw xml (at least as option)
xml.originalload = xml.originalload or xml.load
local noffiles = 0
local nofconverted = 0
local linenumbers = false
synctex.registerenabler (function() linenumbers = true end)
synctex.registerdisabler(function() linenumbers = false end)
function xml.load(filename,settings)
noffiles, nofconverted = noffiles + 1, nofconverted + 1
starttiming(xml)
local ok, data = resolvers.loadbinfile(filename)
settings = settings or { }
settings.linenumbers = linenumbers
settings.currentresource = filename
local xmltable = xml.convert((ok and data) or "",settings)
settings.currentresource = nil
stoptiming(xml)
return xmltable
end
local function entityconverter(id,str,ent) -- todo: disable tex entities when raw
-- tex driven entity
local t = texentities[str]
if t then
local p = xmlprivatetoken(str)
-- only once
-- context.xmlprivate(p,t)
return p
end
-- dtd determined entity
local e = ent and ent[str]
if e then
return e
end
-- predefined entity (mathml and so)
local x = xmlentities[str]
if x then
return x
end
-- keep original somehow
return xmlprivatetoken(str)
end
lxml.preprocessor = nil
local function lxmlconvert(id,data,compress,currentresource)
local settings = { -- we're now roundtrip anyway
unify_predefined_entities = false, -- is also default
utfize_entities = true, -- is also default
resolve_predefined_entities = true, -- is also default
resolve_entities = function(str,ent) return entityconverter(id,str,ent) end,
currentresource = tostring(currentresource or id),
preprocessor = lxml.preprocessor,
linenumbers = linenumbers,
}
if compress and compress == variables.yes then
settings.strip_cm_and_dt = true
end
return xml.convert(data,settings)
end
lxml.convert = lxmlconvert
function lxml.load(id,filename,compress)
filename = ctxrunner.preparedfile(filename)
if trace_loading then
report_lxml("loading file %a as %a",filename,id)
end
noffiles, nofconverted = noffiles + 1, nofconverted + 1
starttiming(xml)
local ok, data = resolvers.loadbinfile(filename)
-- local xmltable = lxmlconvert(id,(ok and data) or "",compress,formatters["id: %s, file: %s"](id,filename))
local xmltable = lxmlconvert(id,(ok and data) or "",compress,filename)
stoptiming(xml)
lxml.store(id,xmltable,filename)
return xmltable, filename
end
function lxml.register(id,xmltable,filename)
lxml.store(id,xmltable,filename)
return xmltable
end
-- recurse prepare rootpath resolve basename
local options_true = { "recurse", "prepare", "rootpath" }
local options_nil = { "prepare", "rootpath" }
function lxml.include(id,pattern,attribute,options)
starttiming(xml)
local root = getid(id)
if options == true then
-- downward compatible
options = options_true
elseif not options then
-- downward compatible
options = options_nil
else
options = settings_to_hash(options) or { }
end
xml.include(root,pattern,attribute,options.recurse,function(filename)
if filename then
-- preprocessing
if options.prepare then
filename = ctxrunner.preparedfile(filename)
end
-- handy if we have a flattened structure
if options.basename then
filename = basename(filename)
end
if options.resolve then
filename = resolveprefix(filename) or filename
end
-- some protection
if options.rootpath and dirname(filename) == "" and root.filename then
local dn = dirname(root.filename)
if dn ~= "" then
filename = joinfile(dn,filename)
end
end
if trace_loading then
report_lxml("including file %a",filename)
end
noffiles, nofconverted = noffiles + 1, nofconverted + 1
return
resolvers.loadtexfile(filename) or "",
resolvers.findtexfile(filename) or ""
else
return ""
end
end)
stoptiming(xml)
end
function lxml.filename(id)
local e = getid(id)
if e then
context(e.cf)
end
end
function lxml.fileline(id)
local e = getid(id)
if e then
context(e.cl)
end
end
function lxml.inclusion(id,default,base)
local inclusion = xmlinclusion(getid(id),default)
if inclusion then
context(base and basename(inclusion) or inclusion)
end
end
function lxml.inclusions(id,sorted)
local inclusions = xmlinclusions(getid(id),sorted)
if inclusions then
context(concat(inclusions,","))
end
end
function lxml.badinclusions(id,sorted)
local badinclusions = xmlbadinclusions(getid(id),sorted)
if badinclusions then
context(concat(badinclusions,","))
end
end
function lxml.save(id,name)
xml.save(getid(id),name)
end
function xml.getbuffer(name,compress) -- we need to make sure that commands are processed
if not name or name == "" then
name = tex.jobname
end
nofconverted = nofconverted + 1
local data = buffers.getcontent(name)
xmltostring(lxmlconvert(name,data,compress,format("buffer: %s",tostring(name or "?")))) -- one buffer
end
function lxml.loadbuffer(id,name,compress)
starttiming(xml)
nofconverted = nofconverted + 1
local data = buffers.collectcontent(name or id) -- name can be list
local xmltable = lxmlconvert(id,data,compress,format("buffer: %s",tostring(name or id or "?")))
lxml.store(id,xmltable)
stoptiming(xml)
return xmltable, name or id
end
function lxml.loaddata(id,str,compress)
starttiming(xml)
nofconverted = nofconverted + 1
local xmltable = lxmlconvert(id,str or "",compress,format("id: %s",id))
lxml.store(id,xmltable)
stoptiming(xml)
return xmltable, id
end
-- e.command:
--
-- string : setup
-- true : text (no )
-- false : ignore
-- function : call
local function tex_doctype(e,handlers)
-- ignore
end
local function tex_comment(e,handlers)
if trace_comments then
report_lxml("comment %a",e.dt[1])
end
end
local default_element_handler = xml.gethandlers("verbose").functions["@el@"]
local setfilename = false
local trace_name = false
local report_name = logs.reporter("lxml")
synctex.registerenabler (function() setfilename = synctex.setfilename end)
synctex.registerdisabler(function() setfilename = false end)
local function syncfilename(e,where)
local cf = e.cf
if cf then
local cl = e.cl or 1
if trace_name then
report_name("set filename, case %a, tag %a, file %a, line %a",where,e.tg,cf,cl)
end
setfilename(cf,cl);
end
end
trackers.register("system.synctex.xml",function(v)
trace_name = v
end)
local tex_element
if tokenizedxmlw then
-- local expandmacro = token.expandmacro
tex_element = function(e,handlers)
if setfilename then
syncfilename(e,"element")
end
local command = e.command
if command == nil then
default_element_handler(e,handlers)
elseif command == true then
-- text (no ) / so, no mkii fallback then
handlers.serialize(e.dt,handlers)
elseif command == false then
-- ignore
else
local tc = type(command)
if tc == "string" then
local rootname, ix = e.name, e.ix
if rootname then
if not ix then
addindex(rootname,false,true)
ix = e.ix
end
-- lmtx only, same performance, a bit more immediate:
--
-- expandmacro(tokenizedxmlw,ctxcatcodes,true,command,true,rootname.."::"..ix)
--
contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",rootname,"::",ix,"}")
else
report_lxml("fatal error: no index for %a",command)
contextsprint(ctxcatcodes,tokenizedxmlw,"{",command,"}{",ix or 0,"}")
end
elseif tc == "function" then
command(e)
end
end
end
else
tex_element = function(e,handlers)
if setfilename then
syncfilename(e,"element")
end
local command = e.command
if command == nil then
default_element_handler(e,handlers)
elseif command == true then
-- text (no ) / so, no mkii fallback then
handlers.serialize(e.dt,handlers)
elseif command == false then
-- ignore
else
local tc = type(command)
if tc == "string" then
local rootname, ix = e.name, e.ix
if rootname then
if not ix then
addindex(rootname,false,true)
ix = e.ix
end
-- faster than context.xmlw
contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",rootname,"::",ix,"}")
-- contextsprint(ctxcatcodes,xmlw[command][rootname],ix,"}")
else
report_lxml("fatal error: no index for %a",command)
contextsprint(ctxcatcodes,"\\xmlw{",command,"}{",ix or 0,"}")
-- contextsprint(ctxcatcodes,xmlw[command][false],ix or 0,"}")
end
elseif tc == "function" then
command(e)
end
end
end
end
--
--
local pihandlers = { } xml.pihandlers = pihandlers
local space = S(" \n\r")
local spaces = space^0
local class = C((1-space)^0)
local key = class
local rest = C(P(1)^0)
local value = C(P(1-(space * -1))^0)
local category = P("context-") * (
C((1-P("-"))^1) * P("-directive")
+ P("directive") * spaces * key
)
local c_parser = category * spaces * value -- rest
local k_parser = class * spaces * key * spaces * rest --value
implement {
name = "xmlinstalldirective",
arguments = "2 strings",
actions = function(name,csname)
if csname then
local keyvalueparser = k_parser / context[csname]
local keyvaluechecker = function(category,rest,e)
lpegmatch(keyvalueparser,rest)
end
pihandlers[name] = keyvaluechecker
end
end
}
local function tex_pi(e,handlers)
local str = e.dt[1]
if str and str ~= "" then
local category, rest = lpegmatch(c_parser,str)
if category and rest and #rest > 0 then
local handler = pihandlers[category]
if handler then
handler(category,rest,e)
end
end
end
end
local obeycdata = true
function lxml.setcdata()
obeycdata = true
end
function lxml.resetcdata()
obeycdata = false
end
local function tex_cdata(e,handlers)
if obeycdata then
toverbatim(e.dt[1])
end
end
-- we could try to merge the conversion and flusher but we don't gain much and it makes tracing
-- harder: xunspecialized = utf.remapper(xml.specialcodes,"dynamic",lxml.resolvedentity)
local function tex_text(e)
e = xmlunspecialized(e)
lpegmatch(xmltextcapture,e)
end
--
local function ctx_text(e) -- can be just context(e) as we split there
lpegmatch(ctxtextcapture,e)
end
local function tex_handle(...)
contextsprint(ctxcatcodes,...) -- notcatcodes is active anyway
end
local xmltexhandler = xml.newhandlers {
name = "tex",
handle = tex_handle,
functions = {
-- ["@dc@"] = tex_document,
["@dt@"] = tex_doctype,
-- ["@rt@"] = tex_root,
["@el@"] = tex_element,
["@pi@"] = tex_pi,
["@cm@"] = tex_comment,
["@cd@"] = tex_cdata,
["@tx@"] = tex_text,
}
}
lxml.xmltexhandler = xmltexhandler
-- begin of test
local function tex_space(e)
e = xmlunspecialized(e)
lpegmatch(xmlspacecapture,e)
end
local xmltexspacehandler = xml.newhandlers {
name = "texspace",
handle = tex_handle,
functions = {
["@dt@"] = tex_doctype,
["@el@"] = tex_element,
["@pi@"] = tex_pi,
["@cm@"] = tex_comment,
["@cd@"] = tex_cdata,
["@tx@"] = tex_space,
}
}
local function tex_line(e)
e = xmlunspecialized(e)
lpegmatch(xmllinecapture,e)
end
local xmltexlinehandler = xml.newhandlers {
name = "texline",
handle = tex_handle,
functions = {
["@dt@"] = tex_doctype,
["@el@"] = tex_element,
["@pi@"] = tex_pi,
["@cm@"] = tex_comment,
["@cd@"] = tex_cdata,
["@tx@"] = tex_line,
}
}
function lxml.flushspacewise(id) -- keeps spaces and lines
id = getid(id)
local dt = id and id.dt
if dt then
xmlserialize(dt,xmltexspacehandler)
end
end
function lxml.flushlinewise(id) -- keeps lines
id = getid(id)
local dt = id and id.dt
if dt then
xmlserialize(dt,xmltexlinehandler)
end
end
-- end of test
function lxml.serialize(root)
xmlserialize(root,xmltexhandler)
end
function lxml.setaction(id,pattern,action)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
collected[c].command = action
end
end
end
end
local function sprint(root,p) -- check rawroot usage
if root then
local tr = type(root)
if tr == "string" then -- can also be result of lpath
-- rawroot = false -- ?
if setfilename and p then
syncfilename(p,"sprint s")
end
root = xmlunspecialized(root)
lpegmatch(xmltextcapture,root)
elseif tr == "table" then
if forceraw then
rawroot = root
-- contextsprint(ctxcatcodes,xmltostring(root)) -- goes wrong with % etc
-- root = xmlunspecialized(xmltostring(root)) -- we loose < > &
root = xmldespecialized(xmltostring(root))
lpegmatch(xmltextcapture,root) -- goes to toc
else
if setfilename and p then -- and not root.cl
syncfilename(p,"sprint t")
end
xmlserialize(root,xmltexhandler)
end
end
end
end
-- local function tprint(root) -- we can move sprint inline
-- local tr = type(root)
-- if tr == "table" then
-- local n = #root
-- if n == 0 then
-- -- skip
-- else
-- for i=1,n do
-- sprint(root[i])
-- end
-- end
-- elseif tr == "string" then
-- root = xmlunspecialized(root)
-- lpegmatch(xmltextcapture,root)
-- end
-- end
local function tprint(root) -- we can move sprint inline
local tr = type(root)
if tr == "table" then
local n = #root
if n == 0 then
-- skip
else
for i=1,n do
-- sprint(root[i]) -- inlined because of filename:
local ri = root[i]
local tr = type(ri)
if tr == "string" then -- can also be result of lpath
if setfilename then
syncfilename(ri,"tprint")
end
root = xmlunspecialized(ri)
lpegmatch(xmltextcapture,ri)
elseif tr == "table" then
if forceraw then
rawroot = ri
root = xmldespecialized(xmltostring(ri))
lpegmatch(xmltextcapture,ri) -- goes to toc
else
xmlserialize(ri,xmltexhandler)
end
end
end
end
elseif tr == "string" then
root = xmlunspecialized(root)
lpegmatch(xmltextcapture,root)
end
end
local function cprint(root) -- content
if not root then
-- rawroot = false
-- quit
elseif type(root) == 'string' then
-- rawroot = false
root = xmlunspecialized(root)
lpegmatch(xmltextcapture,root)
else
if setfilename then
syncfilename(root,"cprint")
end
local rootdt = root.dt
if forceraw then
rawroot = root
-- contextsprint(ctxcatcodes,xmltostring(rootdt or root))
root = xmlunspecialized(xmltostring(root))
lpegmatch(xmltextcapture,root) -- goes to toc
else
xmlserialize(rootdt or root,xmltexhandler)
end
end
end
xml.sprint = sprint local xmlsprint = sprint -- calls ct mathml -> will be replaced
xml.tprint = tprint local xmltprint = tprint -- only used here
xml.cprint = cprint local xmlcprint = cprint -- calls ct mathml -> will be replaced
-- now we can flush
function lxml.main(id)
local root = getid(id)
xmlserialize(root,xmltexhandler) -- the real root (@rt@)
end
-- -- lines (untested)
--
-- local buffer = { }
--
-- local xmllinescapture = (
-- newline^2 / function() buffer[#buffer+1] = "" end +
-- newline / function() buffer[#buffer] = buffer[#buffer] .. " " end +
-- content / function(s) buffer[#buffer] = buffer[#buffer] .. s end
-- )^0
--
-- local xmllineshandler = table.copy(xmltexhandler)
--
-- xmllineshandler.handle = function(...) lpegmatch(xmllinescapture,concat{ ... }) end
--
-- function lines(root)
-- if not root then
-- -- rawroot = false
-- -- quit
-- elseif type(root) == 'string' then
-- -- rawroot = false
-- lpegmatch(xmllinescapture,root)
-- elseif next(root) then -- tr == 'table'
-- xmlserialize(root,xmllineshandler)
-- end
-- end
--
-- function xml.lines(root) -- used at all?
-- buffer = { "" }
-- lines(root)
-- return result
-- end
local function to_text(e)
if e.command == nil then
local etg = e.tg
if etg and e.special and etg ~= "@rt@" then
e.command = false -- i.e. skip
else
e.command = true -- i.e. no
end
end
end
local function to_none(e)
if e.command == nil then
e.command = false -- i.e. skip
end
end
-- setups
local setups = { }
function lxml.setcommandtotext(id)
xmlwithelements(getid(id),to_text)
end
function lxml.setcommandtonone(id)
xmlwithelements(getid(id),to_none)
end
function lxml.installsetup(what,document,setup,where)
document = document or "*"
local sd = setups[document]
if not sd then sd = { } setups[document] = sd end
for k=1,#sd do
if sd[k] == setup then sd[k] = nil break end
end
if what == 1 then
if trace_loading then
report_lxml("prepending setup %a for %a",setup,document)
end
insert(sd,1,setup)
elseif what == 2 then
if trace_loading then
report_lxml("appending setup %a for %a",setup,document)
end
insert(sd,setup)
elseif what == 3 then
if trace_loading then
report_lxml("inserting setup %a for %a before %a",setup,document,where)
end
insertbeforevalue(sd,setup,where)
elseif what == 4 then
if trace_loading then
report_lxml("inserting setup %a for %a after %a",setup,document,where)
end
insertaftervalue(sd,setup,where)
end
end
function lxml.flushsetups(id,...)
local done = { }
for i=1,select("#",...) do
local document = select(i,...)
local sd = setups[document]
if sd then
for k=1,#sd do
local v = sd[k]
if not done[v] then
if trace_loading then
report_lxml("applying setup %02i : %a to %a",k,v,document)
end
contextsprint(ctxcatcodes,"\\xmlsetup{",id,"}{",v,"}")
done[v] = true
end
end
elseif trace_loading then
report_lxml("no setups for %a",document)
end
end
end
function lxml.resetsetups(document)
if trace_loading then
report_lxml("resetting all setups for %a",document)
end
setups[document] = { }
end
function lxml.removesetup(document,setup)
local s = setups[document]
if s then
for i=1,#s do
if s[i] == setup then
if trace_loading then
report_lxml("removing setup %a for %a",setup,document)
end
remove(t,i)
break
end
end
end
end
function lxml.setsetup(id,pattern,setup)
if not setup or setup == "" or setup == "*" or setup == "-" or setup == "+" then
local collected = xmlapplylpath(getid(id),pattern)
if collected then
local nc = #collected
if nc > 0 then
if trace_setups then
for c=1,nc do
local e = collected[c]
local ix = e.ix or 0
if setup == "-" then
e.command = false
report_lxml("lpath matched (a) %5i: %s = %s -> skipped",c,ix,setup)
elseif setup == "+" then
e.command = true
report_lxml("lpath matched (b) %5i: %s = %s -> text",c,ix,setup)
else
local tg = e.tg
if tg then -- to be sure
e.command = tg
local ns = e.rn or e.ns
if ns == "" then
report_lxml("lpath matched (c) %5i: %s = %s -> %s",c,ix,tg,tg)
else
report_lxml("lpath matched (d) %5i: %s = %s:%s -> %s",c,ix,ns,tg,tg)
end
end
end
end
elseif setup == "-" then
for c=1,nc do
collected[c].command = false
end
elseif setup == "+" then
for c=1,nc do
collected[c].command = true
end
else
for c=1,nc do
local e = collected[c]
e.command = e.tg
end
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","zero",pattern)
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","no",pattern)
end
else
local a, b = match(setup,"^(.+:)([%*%-%+])$")
if a and b then
local collected = xmlapplylpath(getid(id),pattern)
if collected then
local nc = #collected
if nc > 0 then
if trace_setups then
for c=1,nc do
local e = collected[c]
local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
if b == "-" then
e.command = false
if ns == "" then
report_lxml("lpath matched (e) %5i: %s = %s -> skipped",c,ix,tg)
else
report_lxml("lpath matched (f) %5i: %s = %s:%s -> skipped",c,ix,ns,tg)
end
elseif b == "+" then
e.command = true
if ns == "" then
report_lxml("lpath matched (g) %5i: %s = %s -> text",c,ix,tg)
else
report_lxml("lpath matched (h) %5i: %s = %s:%s -> text",c,ix,ns,tg)
end
else
e.command = a .. tg
if ns == "" then
report_lxml("lpath matched (i) %5i: %s = %s -> %s",c,ix,tg,e.command)
else
report_lxml("lpath matched (j) %5i: %s = %s:%s -> %s",c,ix,ns,tg,e.command)
end
end
end
elseif b == "-" then
for c=1,nc do
collected[c].command = false
end
elseif b == "+" then
for c=1,nc do
collected[c].command = true
end
else
for c=1,nc do
local e = collected[c]
e.command = a .. e.tg
end
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","zero",pattern)
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","no",pattern)
end
else
local collected = xmlapplylpath(getid(id),pattern)
if collected then
local nc = #collected
if nc > 0 then
if trace_setups then
for c=1,nc do
local e = collected[c]
e.command = setup
local ns, tg, ix = e.rn or e.ns, e.tg, e.ix or 0
if ns == "" then
report_lxml("lpath matched (k) %5i: %s = %s -> %s",c,ix,tg,setup)
else
report_lxml("lpath matched (l) %5i: %s = %s:%s -> %s",c,ix,ns,tg,setup)
end
end
else
for c=1,nc do
collected[c].command = setup
end
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","zero",pattern)
end
elseif trace_setups then
report_lxml("%s lpath matches for pattern: %s","no",pattern)
end
end
end
end
-- finalizers
local function first(collected)
if collected and #collected > 0 then
xmlsprint(collected[1])
end
end
local function last(collected)
if collected then
local nc = #collected
if nc > 0 then
xmlsprint(collected[nc])
end
end
end
local function all(collected)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
xmlsprint(collected[c])
end
end
end
end
texfinalizers.reverse = function(collected)
if collected then
local nc = #collected
if nc >0 then
for c=nc,1,-1 do
xmlsprint(collected[c])
end
end
end
end
local function count(collected)
contextsprint(ctxcatcodes,(collected and #collected) or 0) -- why ctxcatcodes
end
local function position(collected,n)
-- todo: if not n then == match
if collected then
local nc = #collected
if nc > 0 then
n = tonumber(n) or 0
if n < 0 then
n = nc + n + 1
end
if n > 0 then
local cn = collected[n]
if cn then
xmlsprint(cn)
return
end
end
end
end
end
local function match(collected) -- is match in preceding collected, never change, see bibxml
local m = collected and collected[1]
contextsprint(ctxcatcodes,m and m.mi or 0) -- why ctxcatcodes
end
local function index(collected,n)
if collected then
local nc = #collected
if nc > 0 then
n = tonumber(n) or 0
if n < 0 then
n = nc + n + 1 -- brrr
end
if n > 0 then
local cn = collected[n]
if cn then
contextsprint(ctxcatcodes,cn.ni or 0) -- why ctxcatcodes
return
end
end
end
end
contextsprint(ctxcatcodes,0) -- why ctxcatcodes
end
-- the number of commands is often relative small but there can be many calls
-- to this finalizer
local command
if tokenizedxmlw then
command = function(collected,cmd,otherwise)
local n = collected and #collected
if n and n > 0 then
local wildcard = find(cmd,"*",1,true)
for c=1,n do -- maybe optimize for n=1
local e = collected[c]
local ix = e.ix
local name = e.name
if name and not ix then
addindex(name,false,true)
ix = e.ix
end
if not ix or not name then
report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
elseif wildcard then
contextsprint(ctxcatcodes,tokenizedxmlw,"{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
else
contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",name,"::",ix,"}")
end
end
elseif otherwise then
contextsprint(ctxcatcodes,tokenizedxmlw,"{",otherwise,"}{#1}")
end
end
else
command = function(collected,cmd,otherwise)
local n = collected and #collected
if n and n > 0 then
local wildcard = find(cmd,"*",1,true)
for c=1,n do -- maybe optimize for n=1
local e = collected[c]
local ix = e.ix
local name = e.name
if name and not ix then
addindex(name,false,true)
ix = e.ix
end
if not ix or not name then
report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
elseif wildcard then
contextsprint(ctxcatcodes,"\\xmlw{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
else
contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
end
end
elseif otherwise then
contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
end
end
end
-- local wildcards = setmetatableindex(function(t,k)
-- local v = false
-- if find(k,"*",1,true) then
-- v = setmetatableindex(function(t,kk)
-- local v = gsub(k,"%*",kk)
-- t[k] = v
-- -- report_lxml("wildcard %a key %a value %a",kk,k,v)
-- return v
-- end)
-- end
-- t[k] = v
-- return v
-- end)
--
-- local function command(collected,cmd,otherwise)
-- local n = collected and #collected
-- if n and n > 0 then
-- local wildcard = wildcards[cmd]
-- for c=1,n do -- maybe optimize for n=1
-- local e = collected[c]
-- local ix = e.ix
-- local name = e.name
-- if name and not ix then
-- addindex(name,false,true)
-- ix = e.ix
-- end
-- if not ix or not name then
-- report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
-- elseif wildcard then
-- contextsprint(ctxcatcodes,"\\xmlw{",wildcard[e.tg],"}{",name,"::",ix,"}")
-- else
-- contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
-- end
-- end
-- elseif otherwise then
-- contextsprint(ctxcatcodes,"\\xmlw{",otherwise,"}{#1}")
-- end
-- end
local function attribute(collected,a,default)
if collected and #collected > 0 then
local at = collected[1].at
local str = (at and at[a]) or default
if str and str ~= "" then
contextsprint(notcatcodes,str)
end
elseif default then
contextsprint(notcatcodes,default)
end
end
local function parameter(collected,p,default)
if collected and #collected > 0 then
local pa = collected[1].pa
local str = (pa and pa[p]) or default
if str and str ~= "" then
contextsprint(notcatcodes,str)
end
elseif default then
contextsprint(notcatcodes,default)
end
end
local function chainattribute(collected,arguments,default) -- todo: optional levels
if collected and #collected > 0 then
local e = collected[1]
while e do
local at = e.at
if at then
local a = at[arguments]
if a then
contextsprint(notcatcodes,a)
return
end
else
break -- error
end
e = e.__p__
end
end
if default then
contextsprint(notcatcodes,default)
end
end
local function chainpath(collected,nonamespace)
if collected and #collected > 0 then
local e = collected[1]
local t = { }
while e do
local tg = e.tg
local rt = e.__p__
local ns = e.ns
if tg == "@rt@" then
break
elseif rt.tg == "@rt@" then
if nonamespace or not ns or ns == "" then
t[#t+1] = tg
else
t[#t+1] = ns .. ":" .. tg
end
else
if nonamespace or not ns or ns == "" then
t[#t+1] = tg .. "[" .. e.ei .. "]"
else
t[#t+1] = ns .. ":" .. tg .. "[" .. e.ei .. "]"
end
end
e = rt
end
contextsprint(notcatcodes,concat(reversed(t),"/"))
end
end
local function text(collected)
if collected then
local nc = #collected
if nc == 0 then
-- nothing
elseif nc == 1 then -- hardly any gain so this will go
cprint(collected[1])
else for c=1,nc do
cprint(collected[c])
end end
end
end
local function ctxtext(collected)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
contextsprint(ctxcatcodes,collected[c].dt)
end
end
end
end
texfinalizers.stripped = function(collected) -- tricky as we strip in place
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
cprint(xmlstripelement(collected[c]))
end
end
end
end
texfinalizers.collapsed = function(collected)
if collected and #collected > 0 then
local s = xmltext(collected[1])
if s ~= "" then
sprint(collapse(s))
end
end
end
texfinalizers.lower = function(collected)
if not collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
contextsprint(ctxcatcodes,lowerchars(collected[c].dt[1]))
end
end
end
end
texfinalizers.upper = function(collected)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
contextsprint(ctxcatcodes,upperchars(collected[c].dt[1]))
end
end
end
end
local function number(collected)
local nc = collected and #collected or 0
local n = 0
if nc > 0 then
for c=1,nc do
n = n + tonumber(collected[c].dt[1] or 0)
end
end
contextsprint(ctxcatcodes,n)
end
local function concatrange(collected,start,stop,separator,lastseparator,textonly) -- test this on mml
if collected then
local nofcollected = #collected
if nofcollected > 0 then
local separator = separator or ""
local lastseparator = lastseparator or separator or ""
start, stop = (start == "" and 1) or tonumber(start) or 1, (stop == "" and nofcollected) or tonumber(stop) or nofcollected
if stop < 0 then stop = nofcollected + stop end -- -1 == last-1
for i=start,stop do
if textonly then
xmlcprint(collected[i])
else
xmlsprint(collected[i])
end
if i == nofcollected then
-- nothing
elseif i == nofcollected-1 and lastseparator ~= "" then
contextsprint(ctxcatcodes,lastseparator)
elseif separator ~= "" then
contextsprint(ctxcatcodes,separator)
end
end
end
end
end
local function concatlist(collected,separator,lastseparator,textonly) -- test this on mml
concatrange(collected,false,false,separator,lastseparator,textonly)
end
local function depth(collected)
local d = 0
if collected then
local c = collected and collected[1]
if c.tg then
while c do
d = d + 1
c = c.__p__
if not c then
break
end
end
end
end
contextsprint(ctxcatcodes,d)
end
-- todo just move up as not used local
texfinalizers.first = first
texfinalizers.last = last
texfinalizers.all = all
texfinalizers.count = count
texfinalizers.command = command
texfinalizers.attribute = attribute
texfinalizers.param = parameter -- obsolete
texfinalizers.parameter = parameter
texfinalizers.text = text
texfinalizers.ctxtext = ctxtext
texfinalizers.context = ctxtext
texfinalizers.position = position
texfinalizers.match = match
texfinalizers.index = index
texfinalizers.concat = concatlist
texfinalizers.concatrange = concatrange -- used below
texfinalizers.chainattribute = chainattribute
texfinalizers.chainpath = chainpath
texfinalizers.default = all -- !!
texfinalizers.depth = depth -- used below
--
function texfinalizers.tag(collected,n)
if collected then
local nc = #collected
if nc > 0 then
n = tonumber(n) or 0
local c
if n == 0 then
c = collected[1]
elseif n > 1 then
c = collected[n]
else
c = collected[nc-n+1]
end
if c then
contextsprint(ctxcatcodes,c.tg)
end
end
end
end
function texfinalizers.name(collected,n)
if collected then
local nc = #collected
if nc > 0 then
local c
if n == 0 or not n then
c = collected[1]
elseif n > 1 then
c = collected[n]
else
c = collected[nc-n+1]
end
if c then
local ns = c.ns
if not ns or ns == "" then
contextsprint(ctxcatcodes,c.tg)
else
contextsprint(ctxcatcodes,ns,":",c.tg)
end
end
end
end
end
function texfinalizers.tags(collected,nonamespace)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
local e = collected[c]
local ns = e.ns
if nonamespace or (not ns or ns == "") then
contextsprint(ctxcatcodes,e.tg)
else
contextsprint(ctxcatcodes,ns,":",e.tg)
end
end
end
end
end
--
local function verbatim(id,before,after)
local e = getid(id)
if e then
if before then contextsprint(ctxcatcodes,before,"[",e.tg or "?","]") end
lxml.toverbatim(xmltostring(e.dt)) -- lxml.toverbatim(xml.totext(e.dt))
if after then contextsprint(ctxcatcodes,after) end
end
end
function lxml.inlineverbatim(id)
verbatim(id,"\\startxmlinlineverbatim","\\stopxmlinlineverbatim")
end
function lxml.displayverbatim(id)
verbatim(id,"\\startxmldisplayverbatim","\\stopxmldisplayverbatim")
end
lxml.verbatim = verbatim
-- helpers
function lxml.depth(id)
depth { getid(id) }
end
function lxml.first(id,pattern)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
first(collected)
end
end
function lxml.last(id,pattern)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
last(collected)
end
end
function lxml.all(id,pattern)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
all(collected)
end
end
function lxml.count(id,pattern)
-- always needs to produce a result so no test here
count(xmlapplylpath(getid(id),pattern))
end
function lxml.attribute(id,pattern,a,default)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
attribute(collected,a,default)
end
end
function lxml.parameter(id,pattern,p,default)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
parameter(collected,p,default)
end
end
lxml.param = lxml.parameter
function lxml.raw(id,pattern) -- the content, untouched by commands
local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
if collected and #collected > 0 then
local s = xmltostring(collected[1].dt)
if s ~= "" then
contextsprint(notcatcodes,s)
end
end
end
-- templates
function lxml.rawtex(id,pattern) -- the content, untouched by commands
local collected = (pattern and xmlapplylpath(getid(id),pattern)) or getid(id)
if collected and #collected > 0 then
local s = xmltostring(collected[1].dt)
if s ~= "" then
contextsprint(notcatcodes,lpegmatch(p_texescape,s) or s)
end
end
end
function lxml.context(id,pattern) -- the content, untouched by commands
if pattern then
local collected = xmlapplylpath(getid(id),pattern)
if collected and #collected > 0 then
ctx_text(collected[1].dt[1])
end
else
local collected = getid(id)
if collected then
local dt = collected.dt
if dt and #dt > 0 then
ctx_text(dt[1])
end
end
end
end
function lxml.text(id,pattern)
if pattern then
local collected = xmlapplylpath(getid(id),pattern)
if collected and #collected > 0 then
text(collected)
end
else
local e = getid(id)
if e then
text(e.dt)
end
end
end
function lxml.pure(id,pattern)
if pattern then
local collected = xmlapplylpath(getid(id),pattern)
if collected and #collected > 0 then
parsedentity = unescapedentity
text(collected)
parsedentity = reparsedentity
end
else
parsedentity = unescapedentity
local e = getid(id)
if e then
text(e.dt)
end
parsedentity = reparsedentity
end
end
lxml.content = text
function lxml.position(id,pattern,n)
position(xmlapplylpath(getid(id),pattern),tonumber(n))
end
function lxml.chainattribute(id,pattern,a,default)
chainattribute(xmlapplylpath(getid(id),pattern),a,default)
end
function lxml.path(id,pattern,nonamespace)
chainpath(xmlapplylpath(getid(id),pattern),nonamespace)
end
function lxml.concatrange(id,pattern,start,stop,separator,lastseparator,textonly) -- test this on mml
concatrange(xmlapplylpath(getid(id),pattern),start,stop,separator,lastseparator,textonly)
end
function lxml.concat(id,pattern,separator,lastseparator,textonly)
concatrange(xmlapplylpath(getid(id),pattern),false,false,separator,lastseparator,textonly)
end
function lxml.element(id,n)
position(xmlapplylpath(getid(id),"/*"),tonumber(n)) -- tonumber handy
end
lxml.index = lxml.position
function lxml.pos(id)
local e = getid(id)
contextsprint(ctxcatcodes,e and e.ni or 0)
end
do
local att
function lxml.att(id,a,default)
local e = getid(id)
if e then
local at = e.at
if at then
-- normally always true
att = at[a]
if not att then
if default and default ~= "" then
att = default
contextsprint(notcatcodes,default)
end
elseif att ~= "" then
contextsprint(notcatcodes,att)
else
-- explicit empty is valid
end
elseif default and default ~= "" then
att = default
contextsprint(notcatcodes,default)
end
elseif default and default ~= "" then
att = default
contextsprint(notcatcodes,default)
else
att = ""
end
end
function lxml.texatt(id,a,default)
local e = getid(id)
if e then
local at = e.at
if at then
att = at[a]
if att ~= "" then
-- context(ctxcatcodes,att)
context(att)
end
else
att = ""
end
else
att = ""
end
end
function lxml.ifatt(id,a,value)
local e = getid(id)
if e then
local at = e.at
att = at and at[a] or ""
else
att = ""
end
return att == value
end
function lxml.ifattempty(id,a)
local e = getid(id)
if e then
local at = e.at
att = at and at[a] or ""
else
att = ""
end
return att == ""
end
function lxml.refatt(id,a)
local e = getid(id)
if e then
local at = e.at
if at then
att = at[a]
if att and att ~= "" then
att = gsub(att,"^#+","")
if att ~= "" then
contextsprint(notcatcodes,att)
return
end
end
end
end
att = ""
end
function lxml.lastatt()
contextsprint(notcatcodes,att)
end
implement {
name = "xmldoifatt",
arguments = "3 strings",
public = true,
actions = function(id,k,v)
local e = getid(id)
ctx_doif(e and e.at[k] == v or false)
end
}
implement {
name = "xmldoifnotatt",
arguments = "3 strings",
public = true,
actions = function(id,k,v)
local e = getid(id)
ctx_doifnot(e and e.at[k] == v or false)
end
}
implement {
name = "xmldoifelseatt",
arguments = "3 strings",
public = true,
actions = function(id,k,v)
local e = getid(id)
ctx_doifelse(e and e.at[k] == v or false)
end
}
end
do
local par
function lxml.par(id,p,default)
local e = getid(id)
if e then
local pa = e.pa
if pa then
-- normally always true
par = pa[p]
if not par then
if default and default ~= "" then
par = default
contextsprint(notcatcodes,default)
end
elseif par ~= "" then
contextsprint(notcatcodes,par)
else
-- explicit empty is valid
end
elseif default and default ~= "" then
par = default
contextsprint(notcatcodes,default)
end
elseif default and default ~= "" then
par = default
contextsprint(notcatcodes,default)
else
par = ""
end
end
function lxml.lastpar()
contextsprint(notcatcodes,par)
end
end
function lxml.name(id)
local e = getid(id)
if e then
local ns = e.rn or e.ns
if ns and ns ~= "" then
contextsprint(ctxcatcodes,ns,":",e.tg)
else
contextsprint(ctxcatcodes,e.tg)
end
end
end
function lxml.match(id)
local e = getid(id)
contextsprint(ctxcatcodes,e and e.mi or 0)
end
function lxml.tag(id) -- tag vs name -> also in l-xml tag->name
local e = getid(id)
if e then
local tg = e.tg
if tg and tg ~= "" then
contextsprint(ctxcatcodes,tg)
end
end
end
function lxml.namespace(id)
local e = getid(id)
if e then
local ns = e.rn or e.ns
if ns and ns ~= "" then
contextsprint(ctxcatcodes,ns)
end
end
end
function lxml.flush(id)
local e = getid(id)
if e then
local dt = e.dt
if dt then
xmlsprint(dt,e)
end
end
end
function lxml.lastmatch()
local collected = xmllastmatch()
if collected then
all(collected)
end
end
lxml.pushmatch = xmlpushmatch
lxml.popmatch = xmlpopmatch
function lxml.snippet(id,i)
local e = getid(id)
if e then
local dt = e.dt
if dt then
local dti = dt[tonumber(i)] -- string in lxml
if dti then
xmlsprint(dti,e)
end
end
end
end
function lxml.direct(id)
local e = getid(id)
if e then
xmlsprint(e)
end
end
if tokenizedxmlw then
function lxml.command(id,pattern,cmd)
local i, p = getid(id,true)
local collected = xmlapplylpath(getid(i),pattern) -- again getid?
if collected then
local nc = #collected
if nc > 0 then
local rootname = p or i.name
for c=1,nc do
local e = collected[c]
local ix = e.ix
if not ix then
addindex(rootname,false,true)
ix = e.ix
end
contextsprint(ctxcatcodes,tokenizedxmlw,"{",cmd,"}{",rootname,"::",ix,"}")
end
end
end
end
else
function lxml.command(id,pattern,cmd)
local i, p = getid(id,true)
local collected = xmlapplylpath(getid(i),pattern) -- again getid?
if collected then
local nc = #collected
if nc > 0 then
local rootname = p or i.name
for c=1,nc do
local e = collected[c]
local ix = e.ix
if not ix then
addindex(rootname,false,true)
ix = e.ix
end
contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",rootname,"::",ix,"}")
end
end
end
end
end
-- loops
function lxml.collected(id,pattern,reverse)
return xmlcollected(getid(id),pattern,reverse)
end
function lxml.elements(id,pattern,reverse)
return xmlelements(getid(id),pattern,reverse)
end
-- testers
do
local found, empty = xml.found, xml.empty
function lxml.doif (id,pattern) ctx_doif (found(getid(id),pattern)) end
function lxml.doifnot (id,pattern) ctx_doifnot (found(getid(id),pattern)) end
function lxml.doifelse (id,pattern) ctx_doifelse(found(getid(id),pattern)) end
function lxml.doiftext (id,pattern) ctx_doif (not empty(getid(id),pattern)) end
function lxml.doifnottext (id,pattern) ctx_doifnot (not empty(getid(id),pattern)) end
function lxml.doifelsetext (id,pattern) ctx_doifelse(not empty(getid(id),pattern)) end
-- special case: "*" and "" -> self else lpath lookup
local function checkedempty(id,pattern)
local e = getid(id)
if not pattern or pattern == "" then
local dt = e.dt
local nt = #dt
return (nt == 0) or (nt == 1 and dt[1] == "")
else
return empty(getid(id),pattern)
end
end
xml.checkedempty = checkedempty
function lxml.doifempty (id,pattern) ctx_doif (checkedempty(id,pattern)) end
function lxml.doifnotempty (id,pattern) ctx_doifnot (checkedempty(id,pattern)) end
function lxml.doifelseempty(id,pattern) ctx_doifelse(checkedempty(id,pattern)) end
end
-- status info
statistics.register("xml load time", function()
if noffiles > 0 or nofconverted > 0 then
return format("%s seconds, %s files, %s converted", statistics.elapsedtime(xml), noffiles, nofconverted)
else
return nil
end
end)
statistics.register("lxml preparation time", function()
if noffiles > 0 or nofconverted > 0 then
local calls = xml.lpathcalls()
local cached = xml.lpathcached()
if calls > 0 or cached > 0 then
return format("%s seconds, %s nodes, %s lpath calls, %s cached calls",
statistics.elapsedtime(lxml), nofindices, calls, cached)
else
return nil
end
else
-- pretty close to zero so not worth mentioning
end
end)
statistics.register("lxml lpath profile", function()
local p = xml.profiled
if p and next(p) then
local s = table.sortedkeys(p)
local tested, matched, finalized = 0, 0, 0
logs.pushtarget("logfile")
logs.writer("\nbegin of lxml profile\n")
logs.writer("\n tested matched finalized pattern\n\n")
for i=1,#s do
local pattern = s[i]
local pp = p[pattern]
local t, m, f = pp.tested, pp.matched, pp.finalized
tested, matched, finalized = tested + t, matched + m, finalized + f
logs.writer(format("%9i %9i %9i %s",t,m,f,pattern))
end
logs.writer("\nend of lxml profile\n")
logs.poptarget()
return format("%s patterns, %s tested, %s matched, %s finalized (see log for details)",#s,tested,matched,finalized)
else
return nil
end
end)
-- misc
function lxml.nonspace(id,pattern) -- slow, todo loop
xmltprint(xmlcollect(getid(id),pattern,true))
end
function lxml.strip(id,pattern,nolines,anywhere,everywhere)
xml.strip(getid(id),pattern,nolines,anywhere,everywhere)
end
function lxml.stripped(id,pattern,nolines)
local root = getid(id)
local str = xmltext(root,pattern) or ""
str = gsub(str,"^%s*(.-)%s*$","%1")
if nolines then
str = gsub(str,"%s+"," ")
end
xmlsprint(str,root)
end
function lxml.delete(id,pattern)
xml.delete(getid(id),pattern)
end
lxml.obsolete = { }
lxml.get_id = getid lxml.obsolete.get_id = getid
-- goodies:
function texfinalizers.lettered(collected)
if collected then
local nc = #collected
if nc > 0 then
for c=1,nc do
contextsprint(ctxcatcodes,lettered(collected[c].dt[1]))
end
end
end
end
-- function texfinalizers.apply(collected,what) -- to be tested
-- if collected then
-- for c=1,#collected do
-- contextsprint(ctxcatcodes,what(collected[c].dt[1]))
-- end
-- end
-- end
function lxml.toparameters(id)
local e = getid(id)
if e then
local a = e.at
if a and next(a) then
local setups, s = { }, 0
for k, v in next, a do
s = s + 1
setups[s] = k .. "=" .. v
end
setups = concat(setups,",")
-- tracing
context(setups)
end
end
end
local template = '\n\n\n\n%s'
function lxml.tofile(id,pattern,filename,comment)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
io.savedata(filename,format(template,comment or "exported fragment",tostring(collected[1])))
else
os.remove(filename) -- get rid of old content
end
end
texfinalizers.upperall = xmlfinalizers.upperall
texfinalizers.lowerall = xmlfinalizers.lowerall
function lxml.tobuffer(id,pattern,name,unescaped,contentonly)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
local collected = collected[1]
if unescaped == true then
-- expanded entities !
if contentonly then
collected = xmlserializetotext(collected.dt)
else
collected = xmlcontent(collected)
end
elseif unescaped == false then
local t = { }
xmlstring(collected,function(s) t[#t+1] = s end)
collected = concat(t)
else
collected = tostring(collected)
end
buffers.assign(name,collected)
else
buffers.erase(name)
end
end
-- parameters
do
local function setatt(id,name,value)
local e = getid(id)
if e then
local a = e.at
if a then
a[name] = value
else
e.at = { [name] = value }
end
end
end
local function setpar(id,name,value)
local e = getid(id)
if e then
local p = e.pa
if p then
p[name] = value
else
e.pa = { [name] = value }
end
end
end
lxml.setatt = setatt
lxml.setpar = setpar
function lxml.setattribute(id,pattern,name,value)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
for i=1,#collected do
setatt(collected[i],name,value)
end
end
end
function lxml.setparameter(id,pattern,name,value)
local collected = xmlapplylpath(getid(id),pattern)
if collected then
for i=1,#collected do
setpar(collected[i],name,value)
end
end
end
lxml.setparam = lxml.setparameter
end
-- relatively new:
do
local permitted = nil
local ctx_xmlinjector = context.xmlinjector
xml.pihandlers["injector"] = function(category,rest,e)
local options = options_to_array(rest)
local action = options[1]
if not action then
return
end
local n = #options
if n > 1 then
local category = options[2]
if category == "*" then
ctx_xmlinjector(action)
elseif permitted then
if n == 2 then
if permitted[category] then
ctx_xmlinjector(action)
end
else
for i=2,n do
local category = options[i]
if category == "*" or permitted[category] then
ctx_xmlinjector(action)
return
end
end
end
end
else
ctx_xmlinjector(action)
end
end
local pattern = P("context-") * C((1-patterns.whitespace)^1) * C(P(1)^1)
function lxml.applyselectors(id)
local root = getid(id)
local function filter(e)
local dt = e.dt
if not dt then
report_lxml("error in selector, no data in %a",e.tg or "?")
return
end
local ndt = #dt
local done = false
local i = 1
while i <= ndt do
local dti = dt[i]
if type(dti) == "table" then
if dti.tg == "@pi@" then
local text = dti.dt[1]
local what, rest = lpegmatch(pattern,text)
if what == "select" then
local categories = options_to_hash(rest)
if categories["begin"] then
local okay = false
if permitted then
for k, v in next, permitted do
if categories[k] then
okay = k
break
end
end
end
if okay then
if trace_selectors then
report_lxml("accepting selector: %s",okay)
end
else
categories.begin = false
if trace_selectors then
report_lxml("rejecting selector: % t",sortedkeys(categories))
end
end
for j=i,ndt do
local dtj = dt[j]
if type(dtj) == "table" then
local tg = dtj.tg
if tg == "@pi@" then
local text = dtj.dt[1]
local what, rest = lpegmatch(pattern,text)
if what == "select" then
local categories = options_to_hash(rest)
if categories["end"] then
i = j
break
else
-- error
end
end
elseif not okay then
dtj.tg = "@cm@"
end
else
-- dt[j] = "" -- okay ?
end
end
end
elseif what == "include" then
local categories = options_to_hash(rest)
if categories["begin"] then
local okay = false
if permitted then
for k, v in next, permitted do
if categories[k] then
okay = k
break
end
end
end
if okay then
if trace_selectors then
report_lxml("accepting include: %s",okay)
end
else
categories.begin = false
if trace_selectors then
report_lxml("rejecting include: % t",sortedkeys(categories))
end
end
if okay then
for j=i,ndt do
local dtj = dt[j]
if type(dtj) == "table" then
local tg = dtj.tg
if tg == "@cm@" then
local content = dtj.dt[1]
local element = root and xml.toelement(content,root)
dt[j] = element
element.__p__ = dt -- needs checking
done = true
elseif tg == "@pi@" then
local text = dtj.dt[1]
local what, rest = lpegmatch(pattern,text)
if what == "include" then
local categories = options_to_hash(rest)
if categories["end"] then
i = j
break
else
-- error
end
end
end
end
end
end
end
elseif dti then
filter(dti)
end
end
if done then
-- probably not needed
xml.reindex(dt)
end
end
i = i + 1
end
end
xmlwithelements(root,filter)
end
function xml.setinjectors(set)
local s = settings_to_set(set)
if permitted then
for k, v in next, s do
permitted[k] = true
end
else
permitted = s
end
end
function xml.resetinjectors(set)
if permitted and set and set ~= "" then
local s = settings_to_set(set)
for k, v in next, s do
if v then
permitted[k] = nil
end
end
else
permitted = nil
end
end
end
-- hm, maybe to ini to, these implements
implement {
name = "xmlsetinjectors",
actions = xml.setinjectors,
arguments = "string"
}
implement {
name = "xmlresetinjectors",
actions = xml.resetinjectors,
arguments = "string"
}
implement {
name = "xmlapplyselectors",
actions = lxml.applyselectors,
arguments = "string"
}
-- bonus: see x-lmx-html.mkiv
function texfinalizers.xml(collected,name,setup)
local root = collected[1]
if not root then
return
end
if not name or name == "" then
report_lxml("missing name in xml finalizer")
return
end
xmlrename(root,name)
name = "lmx:" .. name
buffers.assign(name,strip(xmltostring(root)))
context.xmlprocessbuffer(name,name,setup or (name..":setup"))
end
-- experiment
do
local xmltoelement = xml.toelement
local xmlreindex = xml.reindex
function lxml.replace(root,pattern,whatever)
if type(root) == "string" then
root = lxml.getid(root)
end
local collected = xmlapplylpath(root,pattern)
if collected then
local isstring = type(whatever) == "string"
for c=1,#collected do
local e = collected[c]
local p = e.__p__
if p then
local d = p.dt
local n = e.ni
local w = isstring and whatever or whatever(e)
if w then
local t = xmltoelement(w,root).dt
if t then
t.__p__ = p
if type(t) == "table" then
local t1 = t[1]
d[n] = t1
t1.at.type = e.at.type or t1.at.type
for i=2,#t do
n = n + 1
insert(d,n,t[i])
end
else
d[n] = t
end
xmlreindex(d) -- probably not needed
end
end
end
end
end
end
-- function document.mess_around(root)
-- lxml.replace(
-- root,
-- "p[@variant='foo']",
-- function(c)
-- return (string.gsub(tostring(c),"foo","%1"))
-- end
-- )
-- end
end
do
local lpegmatch = lpegmatch
local unescaper = lpeg.patterns.urlunescaper
function xmlfinalizers.url(e,a)
local u = #e > 0 and e[1].at[a]
return u and lpegmatch(unescaper,u)
end
if CONTEXTLMTXMODE > 0 then
function texfinalizers.url(e,a)
local u = #e > 0 and e[1].at[a]
if u then
contextsprint(tex.hshcatcodes,string.texhashed(lpegmatch(unescaper,u)))
end
end
else
function texfinalizers.url(e,a)
local u = #e > 0 and e[1].at[a]
if u then
-- context.verbatim(lpegmatch(unescaper,u)) -- no hash intercept here, verbatim is new per 23-09-06
context(lpegmatch(unescaper,u))
end
end
end
end
if CONTEXTLMTXMODE > 0 then
local setmacro = tokens.setters.macro
xmlfinalizers.tomacro = function(collected,macroname,index)
if macroname and macroname ~= '' then
if index == 'last' then
index = #collected
elseif index == 'first' then
index = 1
else
index = tonumber(index) or 1
end
setmacro(tex.nilcatcodes,macroname,collapse(xmltext(collected[index])))
end
end
end