lpdf-tag-imp-crap.lmt /size: 13 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['lpdf-tag-imp-crap'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-tag.mkiv",
4    author    = "Hans Hagen & Mikael Sundqvist",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- You can enable this at your own risk by saying:
10--
11-- \enabledirectives[backend.usetags=crap]
12--
13-- which will try to satisfy the sloppy mapping in pdf. You can also use this file to
14-- roll out your own variant. We also use this file for testing so there coule be subtle
15-- changes over time.
16
17return {
18    name      = "old school pdf tagging",
19    version   = "1.00",
20    comment   = "This is our crappy level two mapping, sort of an example.",
21    author    = "Hans Hagen",
22    copyright = "ConTeXt development team",
23    mapping   = {
24
25        -- Part      : no structure
26        -- Sect      : structure
27        -- NonStruct : skip level
28        -- Sub       : line with lbl numbers
29
30        document           = { pua = "ua1", pdf = "Document"   },
31        documentpart       = { pua = "ua1", pdf = "NonStruct"  },
32
33        division           = { pua = "ua1", pdf = "Part"       },
34        paragraph          = { pua = "ua1", pdf = "P"          },
35        subparagraph       = { pua = "ua1", pdf = "P"          },
36        p                  = { pua = "ua1", pdf = "P"          },
37        highlight          = { pua = "ua1", pdf = "Span"       },
38        ornament           = { pua = "ua1", pdf = "Span"       },
39        textdisplay        = { pua = "ua1", pdf = "Div"        },
40        placeholder        = { pua = "ua1", pdf = "Span"       },
41
42        ["break"]          = { pua = "ua1", pdf = "Div"        },
43
44        construct          = { pua = "ua1", pdf = "Span"       },
45        constructleft      = { pua = "ua1", pdf = "Span"       },
46        constructright     = { pua = "ua1", pdf = "Span"       },
47        constructcontent   = { pua = "ua1", pdf = "NonStruct"  },
48
49        sectionblock       = { pua = "ua1", pdf = "Part"       },
50
51        section            = { pua = "ua1", pdf = "Sect"       }, -- Part
52        sectioncaption     = { pua = "ua1", pdf = "NonStruct"  },
53        sectiontitle       = { pua = "ua1", pdf = "H"          },
54        sectionnumber      = { pua = "ua1", pdf = "Lbl"        },
55        sectioncontent     = { pua = "ua1", pdf = "NonStruct"  },
56
57        itemgroup          = { pua = "ua1", pdf = "L"          },
58        item               = { pua = "ua1", pdf = "LI"         },
59        itemtag            = { pua = "ua1", pdf = "Lbl"        },
60        itemcontent        = { pua = "ua1", pdf = "LBody"      },
61        itemhead           = { pua = "ua1", pdf = "NonStruct"  },
62        itembody           = { pua = "ua1", pdf = "NonStruct"  },
63
64        items              = { pua = "ua1", pdf = "Div"        },
65        itemsymbols        = { pua = "ua1", pdf = "Div"        },
66        itemsymbol         = { pua = "ua1", pdf = "Span"       },
67        itemtexts          = { pua = "ua1", pdf = "Div"        },
68        itemtext           = { pua = "ua1", pdf = "Span"       },
69
70        description        = { pua = "ua1", pdf = "Sect"       },
71        descriptiontag     = { pua = "ua1", pdf = "Lbl"        },
72        descriptioncontent = { pua = "ua1", pdf = "NonStruct"  },
73        descriptionsymbol  = { pua = "ua1", pdf = "Lbl"        },
74
75        verbatimblock      = { pua = "ua1", pdf = "Part"       },
76        verbatimlines      = { pua = "ua1", pdf = "Part"       },
77        verbatimline       = { pua = "ua1", pdf = "Code"       },
78        verbatim           = { pua = "ua1", pdf = "Code"       },
79
80        lines              = { pua = "ua1", pdf = "Part"       },
81        line               = { pua = "ua1", pdf = "Code"       },
82        linenumber         = { pua = "ua1", pdf = "Span"       },
83
84        synonym            = { pua = "ua1", pdf = "Span"       },
85        sorting            = { pua = "ua1", pdf = "Span"       },
86
87        register           = { pua = "ua1", pdf = "Part"       },
88        registerlocation   = { pua = "ua1", pdf = "Span"       },
89        registersection    = { pua = "ua1", pdf = "Part"       },
90        registertag        = { pua = "ua1", pdf = "Span"       },
91        registerentries    = { pua = "ua1", pdf = "Part"       },
92        registerentry      = { pua = "ua1", pdf = "Part"       },
93        registercontent    = { pua = "ua1", pdf = "Span"       },
94        registersee        = { pua = "ua1", pdf = "Span"       },
95        registerpages      = { pua = "ua1", pdf = "Span"       },
96        registerpage       = { pua = "ua1", pdf = "Span"       },
97        registerseparator  = { pua = "ua1", pdf = "Span"       },
98        registerpagerange  = { pua = "ua1", pdf = "Span"       },
99
100        table              = { pua = "ua1", pdf = "Table"      },
101        tablerow           = { pua = "ua1", pdf = "TR"         },
102        tablecell          = { pua = "ua1", pdf = "TD"         },
103        tableheadcell      = { pua = "ua1", pdf = "TH"         },
104        tablehead          = { pua = "ua1", pdf = "THEAD"      },
105        tablebody          = { pua = "ua1", pdf = "TBODY"      },
106        tablefoot          = { pua = "ua1", pdf = "TFOOT"      },
107
108        tabulate           = { pua = "ua1", pdf = "Table"      },
109        tabulaterow        = { pua = "ua1", pdf = "TR"         },
110        tabulatecell       = { pua = "ua1", pdf = "TD"         },
111        tabulateheadcell   = { pua = "ua1", pdf = "TH"         },
112        tabulatehead       = { pua = "ua1", pdf = "THEAD"      },
113        tabulatebody       = { pua = "ua1", pdf = "TBODY"      },
114        tabulatefoot       = { pua = "ua1", pdf = "TFOOT"      },
115
116        list               = { pua = "ua1", pdf = "TOC"        },
117        listitem           = { pua = "ua1", pdf = "TOCI"       },
118        listtag            = { pua = "ua1", pdf = "Lbl"        },
119        listcontent        = { pua = "ua1", pdf = "NonStruct"  },
120        listdata           = { pua = "ua1", pdf = "NonStruct"  },
121        listpage           = { pua = "ua1", pdf = "Lbl"        },
122        listtext           = { pua = "ua1", pdf = "Span"       },
123
124        delimitedblock     = { pua = "ua1", pdf = "BlockQuote" },
125        delimited          = { pua = "ua1", pdf = "Quote"      },
126        delimitedcontent   = { pua = "ua1", pdf = "NonStruct"  },
127        delimitedsymbol    = { pua = "ua1", pdf = "Span"       },
128
129        subsentence        = { pua = "ua1", pdf = "Span"       },
130        subsentencecontent = { pua = "ua1", pdf = "Span"       },
131        subsentencesymbol  = { pua = "ua1", pdf = "Span"       },
132
133        label              = { pua = "ua1", pdf = "Span"       },
134        number             = { pua = "ua1", pdf = "Span"       },
135
136        float              = { pua = "ua1", pdf = "Part"       },
137        floatcaption       = { pua = "ua1", pdf = "Caption"    },
138        floatlabel         = { pua = "ua1", pdf = "Span"       },
139        floatnumber        = { pua = "ua1", pdf = "Span"       },
140        floattext          = { pua = "ua1", pdf = "Span"       },
141        floatcontent       = { pua = "ua1", pdf = "NonStruct"  },
142
143        image              = { pua = "ua1", pdf = "NonStruct"  },
144        mpgraphic          = { pua = "ua1", pdf = "NonStruct"  },
145
146        formulaset         = { pua = "ua1", pdf = "Part"       },
147        formula            = { pua = "ua1", pdf = "Part"       },
148        formulacaption     = { pua = "ua1", pdf = "Span"       },
149        formulalabel       = { pua = "ua1", pdf = "Span"       },
150        formulanumber      = { pua = "ua1", pdf = "Span"       },
151        formulacontent     = { pua = "ua1", pdf = "NonStruct"  },
152        subformula         = { pua = "ua1", pdf = "Part"       },
153
154        link               = { pua = "ua1", pdf = "Link"       },
155        reference          = { pua = "ua1", pdf = "NonStruct"  },
156
157        navigation         = { pua = "ua1", pdf = "NonStruct"  },
158        navigationbutton   = { pua = "ua1", pdf = "NonStruct"  },
159        navigationmenu     = { pua = "ua1", pdf = "NonStruct"  },
160        navigationmenuitem = { pua = "ua1", pdf = "NonStruct"  },
161        navigationaction   = { pua = "ua1", pdf = "NonStruct"  },
162        navigationpage     = { pua = "ua1", pdf = "NonStruct"  },
163
164        margintextblock    = { pua = "ua1", pdf = "Aside"      },
165        margintext         = { pua = "ua1", pdf = "NonStruct"  },
166        marginanchor       = { pua = "ua1", pdf = "Span"       },
167
168        linetext           = { pua = "ua1", pdf = "NonStruct"  },
169
170        -- no math here
171
172        ignore             = { pua = "ua1", pdf = "NonStruct"  },
173        private            = { pua = "ua1", pdf = "NonStruct"  },
174        metadata           = { pua = "ua1", pdf = "Part"       },
175        metavariable       = { pua = "ua1", pdf = "Span"       },
176
177        mid                = { pua = "ua1", pdf = "Span"       },
178        sub                = { pua = "ua1", pdf = "Span"       },
179        sup                = { pua = "ua1", pdf = "Span"       },
180        subsup             = { pua = "ua1", pdf = "Span"       },
181
182        combination        = { pua = "ua1", pdf = "Table"      },
183        combinationpair    = { pua = "ua1", pdf = "TR"         },
184        combinationcontent = { pua = "ua1", pdf = "TD"         },
185        combinationcaption = { pua = "ua1", pdf = "TD"         },
186
187        publications       = { pua = "ua1", pdf = "Part"       },
188        publication        = { pua = "ua1", pdf = "NonStruct"  },
189        pubfld             = { pua = "ua1", pdf = "Span"       },
190
191        citation           = { pua = "ua1", pdf = "Span"       },
192        cite               = { pua = "ua1", pdf = "Span"       },
193
194        narrower           = { pua = "ua1", pdf = "Part"       },
195
196        block              = { pua = "ua1", pdf = "Part"       },
197
198        userdata           = { pua = "ua1", pdf = "Part"       },
199
200        quantity           = { pua = "ua1", pdf = "Span"       },
201        unit               = { pua = "ua1", pdf = "Span"       },
202
203        verse              = { pua = "ua1", pdf = "Part"       },
204        versetag           = { pua = "ua1", pdf = "Lbl"        },
205        verseseparator     = { pua = "ua1", pdf = "Span"       },
206        versecontent       = { pua = "ua1", pdf = "NonStruct"  },
207
208    },
209
210    -- This is a hack to get around the specifications "We can't expect an
211    -- application to keep track of nested H's (but otherwise expect very complex
212    -- things things to be properly dealt with)". A typical example of bugs
213    -- becoming features, standards being not really standards as they get
214    -- adapted, etc. That said: it is up to the user to decide what to do but
215    -- don't blame us for the resulting less optimal structure.
216
217    -- Because we don't want to spoil the otherwise rather clean structure in
218    -- ConTeXt, this kicks in very late in the backend. We might extend this
219    -- hackery but there are limits to what is desired. After all, in over a
220    -- decade of pdf tagging nothing significant happened (we're speaking 2024)
221    -- nor proper viewer support showed up and we can anyway expect LLM's to deal
222    -- with proper tags anyway some day.
223
224    overloads = {
225
226        -- criterium : parent    : use parent "detail"
227        --             parents   : use first in parent "parents" list
228        --             otherwise : look at self "detail"
229
230        -- We need violate the proper structure by getting a Hn on the title so we
231        -- have to backtrack to what we're in, thereby also denying the proper
232        -- section instance. Don't ask. The plural "parents" will make sure we
233        -- consult the first in the chain and not the instance that is encoded in
234        -- "detail".
235
236        sectioncaption = {
237            criterium = "parents",
238            mapping   = {
239
240                part                         = { pua = "ua1", tag = "section_title_1",  pdf = "H1"  },
241
242                chapter                      = { pua = "ua1", tag = "section_title_2",  pdf = "H2"  },
243                title                        = { pua = "ua1", tag = "subject_title_2",  pdf = "H2"  },
244
245                section                      = { pua = "ua1", tag = "section_title_3",  pdf = "H3"  },
246                subject                      = { pua = "ua1", tag = "subject_title_3",  pdf = "H3"  },
247
248                subsection                   = { pua = "ua1", tag = "section_title_4",  pdf = "H4"  },
249                subsubject                   = { pua = "ua1", tag = "subject_title_4",  pdf = "H4"  },
250
251                subsubsection                = { pua = "ua1", tag = "section_title_5",  pdf = "H5"  },
252                subsubsubject                = { pua = "ua1", tag = "subject_title_5",  pdf = "H5"  },
253
254                subsubsubsection             = { pua = "ua1", tag = "section_title_6",  pdf = "H6"  },
255                subsubsubsubject             = { pua = "ua1", tag = "subject_title_6",  pdf = "H6"  },
256
257                subsubsubsubsection          = { pua = "ua1", tag = "section_title_7",  pdf = "H7"  },
258                subsubsubsubsubject          = { pua = "ua1", tag = "subject_title_7",  pdf = "H7"  },
259
260                subsubsubsubsubsection       = { pua = "ua1", tag = "section_title_8",  pdf = "H8"  },
261                subsubsubsubsubsubject       = { pua = "ua1", tag = "subject_title_8",  pdf = "H8"  },
262
263                subsubsubsubsubsubsection    = { pua = "ua1", tag = "section_title_9",  pdf = "H9"  },
264                subsubsubsubsubsubsubject    = { pua = "ua1", tag = "subject_title_9",  pdf = "H9"  },
265
266                subsubsubsubsubsubsubsection = { pua = "ua1", tag = "section_title_10", pdf = "H10" },
267                subsubsubsubsubsubsubsubject = { pua = "ua1", tag = "subject_title_10", pdf = "H10" },
268
269            },
270        },
271    },
272
273}
274