strc-tag.lmt /size: 36 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['strc-tag'] = {
2    version   = 1.001,
3    comment   = "companion to strc-tag.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- This is rather experimental code. Tagging happens on the fly and there are two analysers
10-- involved: the pdf backend tagger and the exporter. They share data but there are subtle
11-- differences. Each tag carries a specification and these can be accessed by attribute (the
12-- end of the chain tag) or by so called fullname which is a tagname combined with a number.
13
14local type, next = type, next
15local insert, remove, unpack, concat, merge, sortedhash = table.insert, table.remove, table.unpack, table.concat, table.merge, table.sortedhash
16local find, topattern, format = string.find, string.topattern, string.format
17local lpegmatch, P, S, C, Cc = lpeg.match, lpeg.P, lpeg.S, lpeg.C, lpeg.Cc
18local allocate = utilities.storage.allocate
19local settings_to_hash = utilities.parsers.settings_to_hash
20local setmetatableindex = table.setmetatableindex
21
22local trace_tags = false  trackers.register("structures.tags", function(v) trace_tags = v end)
23
24local report_tags = logs.reporter("structure","tags")
25
26local attributes      = attributes
27local structures      = structures
28local implement       = interfaces.implement
29
30local a_tagged        <const> = attributes.private('tagged')
31local unsetvalue      <const> = attributes.unsetvalue
32
33local codeinjections  = backends.codeinjections
34
35local texgetattribute = tex.getattribute
36local texsetattribute = tex.setattribute
37
38local taglist         = allocate() -- access by attribute
39local specifications  = allocate() -- access by fulltag
40local labels          = allocate()
41local stack           = { }
42local chain           = { }
43local ids             = { }
44local enabled         = false
45local tagcontext      = { }
46local tagpatterns     = { }
47local lasttags        = { }
48local stacksize       = 0
49local metadata        = nil -- applied to the next element
50local documentdata    = { }
51local extradata       = false
52
53local tags            = structures.tags
54tags.taglist          = taglist -- can best be hidden
55tags.labels           = labels
56tags.patterns         = tagpatterns
57tags.specifications   = specifications
58
59function tags.current()
60    if stacksize > 0 then
61        return stack[stacksize] -- maybe copy or proxy
62    end
63end
64
65-- Tags are internally stored as:
66--
67-- tag>number tag>number tag>number
68
69local p_splitter     = C((1-S(">"))^1) * P(">") * C(P(1)^1)
70tagpatterns.splitter = p_splitter
71
72-- Tagging is not really meant for structure and is very much driven by simple documents,
73-- which is why we have this H (version 1) and H* (version 2) without granularity. The fact
74-- that (2024) the standards are not really a showcase of how a pdf file should looks and
75-- the version 2 spec is at most a version 1 document means that we have to trial and error.
76--
77-- After some testing we decided to make all NonStruct because these can nest and have no
78-- demands on what's inside, nor is it sensitive for Span and Div. Using the sort of generic
79-- Sect is no option either because it needs some content item at the innermost level. So,
80-- instead we wait for UA-5 to eventually come up with proper generic tagging (instead of
81-- this curious mix of html, long and short tags, and assumptions wrt simple usage). Maybe
82-- we should also adapt UA-1 support to this.
83--
84-- MS/HH 2024
85--
86-- For ua-1 we now also go the NonStruct route: better this than some half-way mapping and
87-- better enforce the dumbest reflow possible if one runs into it. We used to distinguish
88-- but because the meanings have changed we can as wel ditch it for a user installable
89-- mapping which delegates the responsibility.
90--
91-- The standard mentions that meaning of the built in tags differs per versions so that is a
92-- good reason to drop usign them. In due time we will therefore simplify the following table.
93--
94-- Btw, we don't want this to cripple our normal structuring (including export) so we have
95-- some constraints as well as gatekeeping.
96--
97-- Musical timestamp (ua-2 upgrade): Mandoki Soulmates – The Big Quit (LYRIC VIDEO, 2024)
98-- which between the chorus lines has this sublte "visionary garbage" line ... indeed.
99
100-- Some NonStruct might become Part, Sect or Sub in which case titles and tage become Lbl
101-- but only when validators don't bark on it and the main structure is kept.
102
103local properties     = allocate { -- todo: more "record = true" to improve formatting
104
105    document           = { namespace = "context", nature = "display", pdf = "Document"  },
106    documentpart       = { namespace = "context", nature = "display", pdf = "DocumentFragment" },
107
108    division           = { namespace = "context", nature = "display", pdf = "NonStruct" },
109    paragraph          = { namespace = "context", nature = "mixed",   pdf = "P"         },
110    subparagraph       = { namespace = "context", nature = "mixed",   pdf = "P"         },
111    p                  = { namespace = "context", nature = "mixed",   pdf = "P"         },
112    highlight          = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
113    ornament           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
114    textdisplay        = { namespace = "context", nature = "display", pdf = "NonStruct" },
115    placeholder        = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
116    ["break"]          = { namespace = "context", nature = "display", pdf = "NonStruct" },
117
118    construct          = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
119    constructleft      = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
120    constructright     = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
121    constructcontent   = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
122
123    sectionblock       = { namespace = "context", nature = "display", pdf = "NonStruct" },
124
125    section            = { namespace = "context", nature = "display", pdf = "NonStruct" },
126    sectioncaption     = { namespace = "context", nature = "display", pdf = "NonStruct" , record = true },
127    sectiontitle       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
128    sectionnumber      = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
129    sectioncontent     = { namespace = "context", nature = "display", pdf = "NonStruct" },
130
131    itemgroup          = { namespace = "context", nature = "display", pdf = "NonStruct" },
132    item               = { namespace = "context", nature = "display", pdf = "NonStruct" },
133    itemtag            = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
134    itemcontent        = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
135    itemhead           = { namespace = "context", nature = "display", pdf = "NonStruct" },
136    itembody           = { namespace = "context", nature = "display", pdf = "NonStruct" },
137
138    items              = { namespace = "context", nature = "display", pdf = "Table"     },
139    itemsymbols        = { namespace = "context", nature = "mixed",   pdf = "TR"        },
140    itemsymbol         = { namespace = "context", nature = "inline",  pdf = "TD"        },
141    itemtexts          = { namespace = "context", nature = "mixed",   pdf = "TR"        },
142    itemtext           = { namespace = "context", nature = "inline",  pdf = "TD"        },
143
144    description        = { namespace = "context", nature = "display", pdf = "NonStruct" },
145    descriptiontag     = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
146    descriptioncontent = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
147    descriptionsymbol  = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
148
149    verbatimblock      = { namespace = "context", nature = "display", pdf = "NonStruct" },
150    verbatimlines      = { namespace = "context", nature = "display", pdf = "NonStruct" },
151    verbatimline       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
152    verbatim           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
153
154    lines              = { namespace = "context", nature = "display", pdf = "NonStruct" },
155    line               = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
156    linenumber         = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
157
158    synonym            = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
159    sorting            = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
160
161    register           = { namespace = "context", nature = "display", pdf = "NonStruct" },
162    registerlocation   = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
163    registersection    = { namespace = "context", nature = "display", pdf = "NonStruct" },
164    registertag        = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
165    registerentries    = { namespace = "context", nature = "display", pdf = "NonStruct" },
166    registerentry      = { namespace = "context", nature = "display", pdf = "NonStruct" },
167    registercontent    = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
168    registersee        = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
169    registerpages      = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
170    registerpage       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
171    registerseparator  = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
172    registerpagerange  = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
173
174    table              = { namespace = "context", nature = "display", pdf = "Table",    },
175    tablerow           = { namespace = "context", nature = "display", pdf = "TR",       },
176    tablecell          = { namespace = "context", nature = "mixed",   pdf = "TD",       },
177    tableheadcell      = { namespace = "context", nature = "mixed",   pdf = "TH",       },
178    tablehead          = { namespace = "context", nature = "display", pdf = "THEAD",    },
179    tablebody          = { namespace = "context", nature = "display", pdf = "TBODY",    },
180    tablefoot          = { namespace = "context", nature = "display", pdf = "TFOOT",    },
181
182    tabulate           = { namespace = "context", nature = "display", pdf = "Table",    },
183    tabulaterow        = { namespace = "context", nature = "display", pdf = "TR",       },
184    tabulatecell       = { namespace = "context", nature = "mixed",   pdf = "TD",       },
185    tabulateheadcell   = { namespace = "context", nature = "mixed",   pdf = "TH",       },
186    tabulatehead       = { namespace = "context", nature = "display", pdf = "THEAD",    },
187    tabulatebody       = { namespace = "context", nature = "display", pdf = "TBODY",    },
188    tabulatefoot       = { namespace = "context", nature = "display", pdf = "TFOOT",    },
189
190    list               = { namespace = "context", nature = "display", pdf = "NonStruct" },
191    listitem           = { namespace = "context", nature = "display", pdf = "NonStruct" },
192    listtag            = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
193    listcontent        = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
194    listdata           = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
195    listpage           = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
196    listtext           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
197
198    delimitedblock     = { namespace = "context", nature = "display", pdf = "NonStruct" },
199    delimited          = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
200    delimitedcontent   = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
201    delimitedsymbol    = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
202
203    subsentence        = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
204    subsentencecontent = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
205    subsentencesymbol  = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
206
207    label              = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
208    number             = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
209
210    float              = { namespace = "context", nature = "display", pdf = "NonStruct" },
211    floatcaption       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
212    floatlabel         = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
213    floatnumber        = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
214    floattext          = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
215    floatcontent       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
216
217    image              = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
218    mpgraphic          = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
219
220    formulaset         = { namespace = "context", nature = "display", pdf = "NonStruct" },
221    formula            = { namespace = "context", nature = "display", pdf = "NonStruct" },
222    formulacaption     = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
223    formulalabel       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
224    formulanumber      = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
225    formulacontent     = { namespace = "context", nature = "display", pdf = "NonStruct" },
226    subformula         = { namespace = "context", nature = "display", pdf = "NonStruct" },
227
228    link               = { namespace = "context", nature = "inline",  pdf = "Link"      },
229    reference          = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
230
231    navigation         = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
232    navigationbutton   = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
233    navigationmenu     = { namespace = "context", nature = "display", pdf = "NonStruct" },
234    navigationmenuitem = { namespace = "context", nature = "display", pdf = "NonStruct" },
235    navigationaction   = { namespace = "context", nature = "display", pdf = "NonStruct" },
236    navigationpage     = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
237
238    margintextblock    = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
239    margintext         = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
240    marginanchor       = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
241
242    linetext           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
243
244 -- math               = { namespace = "mathml",  nature = "inline",  pdf = "math"      },
245    math               = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
246    inlinemath         = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
247    displaymath        = { namespace = "context", nature = "display", pdf = "NonStruct" },
248
249    -- these are wrapped in math and only used in detailed math mode
250
251 -- mn                 = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "mn"            },
252 -- mi                 = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "mi"            },
253 -- mo                 = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "mo"            },
254 -- ms                 = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "ms"            },
255    mrow               = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mrow"          },
256 -- msubsup            = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "msubsup"       },
257 -- msub               = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "msub"          },
258 -- msup               = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "msup"          },
259 -- merror             = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "merror"        },
260 -- munderover         = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "munderover"    },
261 -- munder             = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "munder"        },
262 -- mover              = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mover"         },
263 -- mtext              = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "mtext"         },
264 -- mfrac              = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mfrac"         },
265 -- mroot              = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mroot"         },
266 -- msqrt              = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "msqrt"         },
267 -- mfenced            = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mfenced"       },
268 -- maction            = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "maction"       },
269 -- mmultiscripts      = { namespace = "mathml",  nature = "display", pua = "mathml", pdf = "mmultiscripts" },
270 -- mprescripts        = { namespace = "mathml",  nature = "mixed",   pua = "mathml", pdf = "mprescripts"   },
271
272    mn                 = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
273    mi                 = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
274    mo                 = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
275    ms                 = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
276    mrow               = { namespace = "context",  nature = "display", pdf = "Span"      },
277    msubsup            = { namespace = "context",  nature = "display", pdf = "Span"      },
278    msub               = { namespace = "context",  nature = "display", pdf = "Span"      },
279    msup               = { namespace = "context",  nature = "display", pdf = "Span"      },
280    merror             = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
281    munderover         = { namespace = "context",  nature = "display", pdf = "Span"      },
282    munder             = { namespace = "context",  nature = "display", pdf = "Span"      },
283    mover              = { namespace = "context",  nature = "display", pdf = "Span"      },
284    mtext              = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
285    mfrac              = { namespace = "context",  nature = "display", pdf = "Span"      },
286    mroot              = { namespace = "context",  nature = "display", pdf = "Span"      },
287    msqrt              = { namespace = "context",  nature = "display", pdf = "Span"      },
288    mfenced            = { namespace = "context",  nature = "display", pdf = "Span"      },
289    maction            = { namespace = "context",  nature = "display", pdf = "Span"      },
290    mmultiscripts      = { namespace = "context",  nature = "display", pdf = "Span"      },
291    mprescripts        = { namespace = "context",  nature = "mixed",   pdf = "Span"      },
292
293    -- these are internal ones
294
295    mstack             = { namespace = "context", nature = "display", pdf = "Span"      },
296    mstacker           = { namespace = "context", nature = "display", pdf = "Span"      },
297    mstackertop        = { namespace = "context", nature = "display", pdf = "Span"      },
298    mstackerbot        = { namespace = "context", nature = "display", pdf = "Span"      },
299    mstackermid        = { namespace = "context", nature = "display", pdf = "Span"      },
300    mextensible        = { namespace = "context", nature = "display", pdf = "Span"      },
301    mdelimited         = { namespace = "context", nature = "display", pdf = "Span"      },
302    mdelimitedstack    = { namespace = "context", nature = "display", pdf = "Span"      },
303    mfunction          = { namespace = "context", nature = "mixed",   pdf = "Span"      },
304    mfunctionstack     = { namespace = "context", nature = "display", pdf = "Span"      },
305    mfraction          = { namespace = "context", nature = "display", pdf = "Span"      },
306    mfractionstack     = { namespace = "context", nature = "display", pdf = "Span"      },
307    munit              = { namespace = "context", nature = "mixed",   pdf = "Span"      },
308    mdigits            = { namespace = "context", nature = "mixed",   pdf = "Span"      },
309    mc                 = { namespace = "context", nature = "mixed",   pdf = "Span"      },
310
311    -- these are also wrapped
312
313    mtable             = { namespace = "mathml",  nature = "display", pdf = "mtable",   },
314    mtr                = { namespace = "mathml",  nature = "display", pdf = "mtr",      },
315    mtd                = { namespace = "mathml",  nature = "display", pdf = "mtd",      },
316
317    ignore             = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
318    private            = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
319    metadata           = { namespace = "context", nature = "display", pdf = "NonStruct" },
320    metavariable       = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
321
322    mid                = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
323    sub                = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
324    sup                = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
325    subsup             = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
326
327    combination        = { namespace = "context", nature = "display", pdf = "NonStruct" },
328    combinationpair    = { namespace = "context", nature = "display", pdf = "NonStruct" },
329    combinationcontent = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
330    combinationcaption = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
331
332    publications       = { namespace = "context", nature = "display", pdf = "NonStruct" },
333    publication        = { namespace = "context", nature = "mixed",   pdf = "NonStruct" },
334    pubfld             = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
335
336    citation           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
337    cite               = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
338
339    narrower           = { namespace = "context", nature = "display", pdf = "NonStruct" },
340
341    block              = { namespace = "context", nature = "display", pdf = "NonStruct" },
342
343    userdata           = { namespace = "context", nature = "display", pdf = "NonStruct" },
344
345    quantity           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
346    unit               = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
347
348    verse              = { namespace = "context", nature = "display", pdf = "NonStruct" },
349    versetag           = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
350    verseseparator     = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
351    versecontent       = { namespace = "context", nature = "inline",  pdf = "NonStruct" },
352
353}
354
355local overloads = allocate {
356    -- this is needed for crappy pdf support
357}
358
359tags.properties = properties
360tags.overloads  = overloads
361
362directives.register("backend.usetags", function(v)
363    if type(v) == "string" then
364        local fullname = resolvers.findfile("lpdf-tag-imp-"..v..".lmt") or ""
365        if fullname ~= "" then
366            local n = 0
367            local c = 0
368            local data = table.load(fullname)
369            if data then
370                local merge = data.mapping
371                if merge then
372                    for k, v in next, merge do
373                        local p = properties[k]
374                        if p and p.pdf ~= v.pdf then
375                            p.pdf = v.pdf
376                            p.pua = v.pua or p.pua
377                            n = n + 1
378                        end
379                    end
380                end
381                merge = data.overloads
382                if merge then
383                    for maintag, details in sortedhash(merge) do
384                        local mapping = details.mapping
385                        if mapping then
386                            local okay = { }
387                            for detail, spec in sortedhash(mapping) do
388                                local tag = spec.tag
389                                local pdf = spec.pdf
390                                local pua = spec.pua
391                                if tag and pdf and pua then
392                                    if properties[tag] then
393                                        report_tags("tag %a can't be overloaded",tag)
394                                    else
395                                        okay[detail] = spec
396                                        properties[tag] = spec
397                                        spec.namespace = "user"
398                                        spec.original  = { maintag, detail }
399                                        c = c + 1
400                                    end
401                                else
402                                    report_tags("tag %a needs pdf and pua field",tag)
403                                end
404                            end
405                            if next(okay) then
406                                overloads[maintag] = {
407                                    criterium = details.criterium,
408                                    mapping   = okay,
409                                }
410                            end
411                        else
412                            report_tags("tag %a overload needs mapping",tag)
413                        end
414                    end
415                end
416            end
417            report_tags("%i pdf tags overloaded, %i crappy tags added, cross your fingers",n,c)
418        end
419    end
420end)
421
422
423local patterns = setmetatableindex(function(t,tag)
424    local v = topattern("^" .. tag .. ">")
425    t[tag] = v
426    return v
427end)
428
429function tags.locatedtag(tag)
430    local attribute = texgetattribute(a_tagged)
431    if attribute >= 0 then
432        local specification = taglist[attribute]
433        if specification then
434            local taglist = specification.taglist
435            local pattern = patterns[tag]
436            for i=#taglist,1,-1 do
437                local t = taglist[i]
438                if find(t,pattern) then
439                    return t
440                end
441            end
442        end
443    else
444        -- enabled but not auto
445    end
446    return false -- handy as bogus index
447end
448
449function structures.atlocation(str) -- not used
450    local specification = taglist[texgetattribute(a_tagged)]
451    if specification then
452        local list = specification.taglist
453        if list then
454            local pattern = patterns[str]
455            for i=#list,1,-1 do
456                if find(list[i],pattern) then
457                    return true
458                end
459            end
460        end
461    end
462end
463
464function tags.setproperty(tag,key,value)
465    local p = properties[tag]
466    if p then
467        p[key] = value
468    else
469        properties[tag] = { [key] = value, namespace = "user" }
470    end
471end
472
473function tags.setaspect(key,value)
474    local tag = chain[stacksize]
475    if tag then
476        local p = properties[tag]
477        if p then
478            p[key] = value
479        else
480            properties[tag] = { [key] = value, namespace = "user" }
481        end
482    end
483end
484
485function tags.registermetadata(data)
486    local d = settings_to_hash(data)
487    if #chain > 1 then
488        if metadata then
489            merge(metadata,d)
490        else
491            metadata = d
492        end
493    else
494        merge(documentdata,d)
495    end
496end
497
498function tags.getmetadata()
499    return documentdata or { }
500end
501
502function tags.registerextradata(name,serializer)
503    if type(serializer) == "function" then
504        if extradata then
505            extradata[name] = serializer
506        else
507            extradata = { [name] = serializer }
508        end
509    end
510end
511
512function tags.getextradata()
513    return extradata
514end
515
516function tags.enabled()
517    return enabled
518end
519
520local ignored = { } tags.ignored = ignored
521
522local function enabletags()
523    if not enabled then
524        if tex.systemmodes.export then
525            nodes.tasks.enableaction("math","noads.handlers.tags")
526        else
527            codeinjections.enabletags()
528        end
529        enabled = true
530    end
531end
532
533function tags.start(tag,specification)
534    if not enabled then
535        enabletags()
536    end
537    --
538    labels[tag] = tag -- can go away
539    --
540    local attribute = #taglist + 1
541    local tagindex  = (ids[tag] or 0) + 1
542    if tag == "ignore" then
543        ignored[attribute] = true
544    end
545    --
546    local completetag = tag .. ">" .. tagindex
547    --
548    ids[tag]      = tagindex
549    lasttags[tag] = tagindex
550    stacksize     = stacksize + 1
551    --
552    chain[stacksize] = completetag
553    stack[stacksize] = attribute
554    tagcontext[tag]  = completetag
555    --
556    local tagnesting = { unpack(chain,1,stacksize) } -- a copy so we can add actualtext
557    --
558    if specification then
559        specification.attribute = attribute
560        specification.tagindex  = tagindex
561        specification.taglist   = tagnesting
562        specification.tagname   = tag
563        if metadata then
564            specification.metadata = metadata
565            metadata = nil
566        end
567        local userdata = specification.userdata
568        if userdata == "" then
569            specification.userdata = nil
570        elseif type(userdata) == "string"  then
571            specification.userdata = settings_to_hash(userdata)
572        end
573        local detail = specification.detail
574        if detail == "" then
575            specification.detail = nil
576        end
577        local parents = specification.parents
578        if parents == "" then
579            specification.parents = nil
580        end
581    else
582        specification = {
583            attribute = attribute,
584            tagindex  = tagindex,
585            taglist   = tagnesting,
586            tagname   = tag,
587            metadata  = metadata,
588        }
589        metadata = nil
590    end
591    --
592    taglist[attribute]          = specification
593    specifications[completetag] = specification
594    --
595    if completetag == "document>1" then
596        specification.metadata = documentdata
597    end
598    --
599    texsetattribute(a_tagged,attribute)
600    return attribute
601end
602
603-- kind of messy:
604
605function tags.restart(attribute)
606    stacksize = stacksize + 1
607    if type(attribute) == "number" then
608        local taglist = taglist[attribute].taglist
609        chain[stacksize] = taglist[#taglist]
610    else
611        chain[stacksize] = attribute -- a string
612        attribute = #taglist + 1
613        taglist[attribute] = { taglist = { unpack(chain,1,stacksize) } }
614    end
615    stack[stacksize] = attribute
616    texsetattribute(a_tagged,attribute)
617    return attribute
618end
619
620do
621
622    local tag_ignore_level   <const> = 1
623    local tag_document_level <const> = 2
624
625    local tagstack = { }
626
627    -- todo: less push if the attribute is unchanged
628
629    function tags.push(attribute)
630        if not attribute then
631            attribute = tag_document_level
632        end
633        insert(tagstack, { texgetattribute(a_tagged), stacksize, stack[stacksize], chain })
634        chain            = attribute and { unpack(taglist[attribute].taglist) } or { }
635        stacksize        = #chain
636        stack[stacksize] = attribute
637    end
638
639    function tags.pop()
640        local s = remove(tagstack)
641        stacksize        = s[2]
642        chain            = s[4]
643        stack[stacksize] = s[3]
644        texsetattribute(a_tagged,s[1])
645    end
646
647end
648
649function tags.stop()
650    if stacksize > 0 then
651        stacksize = stacksize - 1
652    end
653    local t = stack[stacksize]
654    if not t then
655        if trace_tags then
656            report_tags("ignoring end tag, previous chain: %s",stacksize > 0 and concat(chain," ",1,stacksize) or "none")
657        end
658        t = unsetvalue
659    end
660    texsetattribute(a_tagged,t)
661    return t
662end
663
664function tags.getid(tag,detail)
665    return ids[tag] or "?"
666end
667
668function tags.last(tag)
669    return lasttags[tag] -- or false
670end
671
672function tags.lastinchain(tag)
673    if tag and tag ~= "" then
674        return tagcontext[tag]
675    else
676        return chain[stacksize]
677    end
678end
679
680local strip = C((1-S(">"))^1)
681
682function tags.elementtag()
683    local fulltag = chain[stacksize]
684    if fulltag then
685        return lpegmatch(strip,fulltag)
686    end
687end
688
689function tags.strip(fulltag)
690    return lpegmatch(strip,fulltag)
691end
692
693function tags.setuserproperties(tag,list)
694    if not list or list == "" then
695        tag, list = chain[stacksize], tag
696    else
697        tag = tagcontext[tag]
698    end
699    if tag then -- an attribute now
700        local l = settings_to_hash(list)
701        local s = specifications[tag]
702        if s then
703            local u = s.userdata
704            if u then
705                for k, v in next, l do
706                    u[k] = v
707                end
708            else
709                s.userdata = l
710            end
711        else
712           -- error
713        end
714    end
715end
716
717function tags.handler(head)  -- we need a dummy
718    return head, false
719end
720
721statistics.register("structure elements", function()
722    if enabled then
723        if stacksize > 0 then
724            for i=1,stacksize do
725                if not chain[i] then chain[i] = "ERROR" end
726            end
727            return format("%s element chains identified, open chain: %s ",#taglist,concat(chain," => ",1,stacksize))
728        else
729            return format("%s element chains identified",#taglist)
730        end
731    end
732end)
733
734directives.register("backend.addtags", function(v)
735    if not enabled then
736        enabletags()
737    end
738end)
739
740-- interface
741
742local starttag = tags.start
743
744implement {
745    name      = "strc_tags_start",
746    public    = true,
747    protected = true,
748    actions   = starttag,
749    arguments = "argument",
750}
751
752implement {
753    name      = "strc_tags_stop",
754    public    = true,
755    protected = true,
756    actions   = tags.stop,
757}
758
759implement {
760    name      = "strc_tags_start_userdata",
761    public    = true,
762    protected = true,
763    actions   = function(tag,userdata) starttag(tag,{ userdata = userdata }) end,
764    arguments = { "optional", "optional" },
765}
766
767implement {
768    name      = "strc_tags_start_detail",
769    public    = true,
770    protected = true,
771    actions   = function(tag,detail) starttag(tag,{ detail = detail }) end,
772    arguments = "2 arguments",
773}
774
775implement {
776    name      = "strc_tags_start_ignore",
777    public    = true,
778    protected = true,
779    actions   = function(detail) starttag("ignore",{ detail = detail }) end,
780    arguments = { "argument" },
781}
782
783implement {
784    name      = "strc_tags_start_chained",
785    public    = true,
786    protected = true,
787    actions   = function(tag,detail,parents) starttag(tag,{ detail = detail, parents = parents }) end,
788    arguments = "3 arguments",
789}
790
791implement {
792    name      = "strc_tags_set_aspect",
793    public    = true,
794    protected = true,
795    actions   = tags.setaspect,
796    arguments = "2 arguments"
797}
798
799implement {
800    name      = "settagproperty",
801    actions   = tags.setproperty,
802    arguments = "3 arguments"
803}
804
805implement {
806    name      = "setelementbackendtag",
807    public    = true,
808    protected = true,
809    actions   = tags.setproperty,
810--     arguments = { "optional", "'backend'", "optional" },
811    arguments = { "optional", "'pdf'", "optional" },
812}
813
814implement {
815    name      = "setelementnature",
816    public    = true,
817    protected = true,
818    actions   = tags.setproperty,
819    arguments = { "optional", "'nature'",  "optional" },
820}
821
822implement {
823    name      = "strc_tags_get_element_tag",
824    public    = true,
825    protected = true,
826    actions   = { tags.elementtag, context }
827}
828
829implement {
830    name      = "strc_tags_set_element_user_properties",
831    public    = true,
832    protected = true,
833    actions   = tags.setuserproperties,
834    arguments = { "optional", "optional" },
835}
836
837implement {
838    name      = "doifelseinelement",
839    public    = true,
840    protected = true,
841    actions   = { structures.atlocation, commands.doifelse },
842    arguments = "argument",
843}
844
845implement {
846    name      = "settaggedmetadata",
847    public    = true,
848    protected = true,
849    actions   = tags.registermetadata,
850    arguments = "optional",
851}
852
853implement {
854    name      = "strc_tags_document_push",
855    protected = true,
856    public    = true,
857    actions   = tags.push
858}
859
860implement {
861    name      = "strc_tags_document_pop",
862    protected = true,
863    public    = true,
864    actions   = tags.pop
865}
866