1if not modules then modules = { } end modules ['lpdf-tag'] = {
2 version = 1.001,
3 comment = "companion to lpdf-tag.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local next, type = next, type
10local format, match, gmatch = string.format, string.match, string.gmatch
11local concat, sortedhash = table.concat, table.sortedhash
12local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C
13local settings_to_hash = utilities.parsers.settings_to_hash
14local formatters = string.formatters
15
16local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end)
17local trace_info = false trackers.register("structures.tags.info", function(v) trace_info = v end)
18
19local report_tags = logs.reporter("backend","tags")
20
21local pdfbackend = backends.registered.pdf
22local nodeinjections = pdfbackend.nodeinjections
23local codeinjections = pdfbackend.codeinjections
24
25local enableaction = nodes.tasks.enableaction
26local disableaction = nodes.tasks.disableaction
27
28local lpdf = lpdf
29local pdfdictionary = lpdf.dictionary
30local pdfarray = lpdf.array
31local pdfboolean = lpdf.boolean
32local pdfconstant = lpdf.constant
33local pdfreference = lpdf.reference
34local pdfunicode = lpdf.unicode
35local pdfmakenametree = lpdf.makenametree
36
37local addtocatalog = lpdf.addtocatalog
38local addtopageattributes = lpdf.addtopageattributes
39
40local pdfflushobject = lpdf.flushobject
41local pdfreserveobject = lpdf.reserveobject
42local pdfpagereference = lpdf.pagereference
43
44local texgetcount = tex.getcount
45
46local nodes = nodes
47local nodecodes = nodes.nodecodes
48
49local hlist_code = nodecodes.hlist
50local vlist_code = nodecodes.vlist
51local glyph_code = nodecodes.glyph
52
53local a_tagged = attributes.private('tagged')
54local a_image = attributes.private('image')
55
56local nuts = nodes.nuts
57
58local nodepool = nuts.pool
59local setstate = nodepool.setstate
60local register = nodepool.register
61
62local getid = nuts.getid
63local getattr = nuts.getattr
64local getprev = nuts.getprev
65local getnext = nuts.getnext
66local getlist = nuts.getlist
67local getchar = nuts.getchar
68
69local tailoflist = nuts.tail
70local setlink = nuts.setlink
71local setlist = nuts.setlist
72
73local copy_node = nuts.copy
74local tosequence = nuts.tosequence
75
76local nextnode = nuts.traversers.node
77
78local structure_kids
79local structure_ref
80local parent_ref
81local root
82local names = { }
83local tree = { }
84local firstintree = false
85local lastintree = false
86local elements = { }
87
88local structurestags = structures.tags
89local taglist = structurestags.taglist
90local specifications = structurestags.specifications
91local usedlabels = structurestags.labels
92local properties = structurestags.properties
93local usewithcare = structurestags.usewithcare
94
95local usedmapping = { }
96
97
98
99local embeddedtags = false
100local f_tagid = formatters["%s-%04i"]
101local embeddedfilelist = pdfarray()
102
103
104
105directives.register("structures.tags.embed",function(v)
106 if type(v) == "string" then
107 if type(embeddedtags) ~= "table" then
108 embeddedtags = { }
109 end
110 for s in gmatch(v,"([^, ]+)") do
111 embeddedtags[s] = true
112 end
113 elseif v and not embeddedtags then
114 embeddedtags = true
115 end
116end)
117
118
119
120directives.register("structures.tags.embedmath",function(v)
121 if not v then
122
123 elseif embeddedtags == true then
124
125 elseif embeddedtags then
126 embeddedtags.math = true
127 else
128 embeddedtags = { math = true }
129 end
130end)
131
132function codeinjections.maptag(original,target,kind)
133 mapping[original] = { target, kind or "inline" }
134end
135
136
137
138local usenamespace = false experiments.register("structures.tags.namespaces", function(v) usenamespace = v end)
139
140local namespaceurls = {
141 mathml = "http://www.w3.org/1998/Math/MathML",
142}
143
144local function finishstructure()
145 if root and #structure_kids > 0 then
146 local nums = pdfarray()
147 local n = 0
148 for i=firstintree,lastintree do
149 local ti = tree[i]
150 if ti then
151 n = n + 1 ; nums[n] = i - 1
152 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(ti))
153 else
154 report_tags("beware: missing page %i in tree", i)
155 end
156 end
157 local parenttree = pdfdictionary {
158 Nums = nums
159 }
160 local idtree = pdfmakenametree(names)
161
162 local rolemaps = usenamespace and { }
163 local rolemap = pdfdictionary()
164 for k, v in next, usedmapping do
165 k = usedlabels[k] or k
166 local p = properties[k]
167 if not p then
168 print("UNDEFINED", k)
169 end
170 local n = p and p.namespace
171 if rolemaps and n then
172 local r = rolemaps[n]
173 if not r then
174 r = pdfdictionary()
175 rolemaps[n] = r
176 end
177 r[k] = pdfconstant(k)
178 else
179 rolemap[k] = pdfconstant(p and p.pdf or "Span")
180 end
181 end
182 local namespaces = rolemaps and next(rolemaps) and pdfarray { } or nil
183 if namespaces then
184 for k, v in table.sortedhash(rolemaps) do
185 namespaces[#namespaces+1] = pdfdictionary {
186 Type = pdfconstant("Namespace"),
187 NS = pdfunicode(namespaceurls[k] or k),
188 RoleMapNS = v,
189 }
190 end
191 end
192 local structuretree = pdfdictionary {
193 Type = pdfconstant("StructTreeRoot"),
194 K = pdfreference(pdfflushobject(structure_kids)),
195 ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)),
196 IDTree = idtree,
197 RoleMap = rolemap,
198 Namespaces = namespaces,
199 }
200 pdfflushobject(structure_ref,structuretree)
201 addtocatalog("StructTreeRoot",pdfreference(structure_ref))
202
203 if lpdf.majorversion() == 1 then
204 local markinfo = pdfdictionary {
205 Marked = pdfboolean(true) or nil,
206
207
208
209 }
210 addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo)))
211 end
212
213 for fulltag, element in sortedhash(elements) do
214 local kids = element.kids
215
216
217
218
219
220
221
222
223
224
225
226
227 pdfflushobject(element.knum,kids)
228 end
229 end
230end
231
232lpdf.registerdocumentfinalizer(finishstructure,"document structure")
233
234local index, pageref, pagenum, list = 0, nil, 0, nil
235
236local pdf_mcr = pdfconstant("MCR")
237local pdf_struct_element = pdfconstant("StructElem")
238local pdf_s = pdfconstant("S")
239local pdf_objr = pdfconstant("OBJR")
240
241local function initializepage()
242 index = 0
243 pagenum = texgetcount("realpageno")
244 pageref = pdfreference(pdfpagereference(pagenum))
245 list = pdfarray()
246
247 if not firstintree then
248 if pagenum > 1 then
249 report_tags("beware: first page in tree is %i", pagenum)
250 end
251 firstintree = pagenum
252 lastintree = pagenum
253 end
254 if pagenum > lastintree then
255 lastintree = pagenum
256 else
257
258 end
259 tree[pagenum] = list
260end
261
262local function finishpage()
263
264 addtopageattributes("StructParents",pagenum-1)
265
266 addtopageattributes("Tabs",s)
267end
268
269
270
271local pdf_userproperties = pdfconstant("UserProperties")
272
273
274
275
276local function makeattribute(t)
277 if t and next(t) then
278 local properties = pdfarray()
279 for k, v in sortedhash(t) do
280 properties[#properties+1] = pdfdictionary {
281 N = pdfunicode(k),
282 V = pdfunicode(v),
283 }
284 end
285 return pdfdictionary {
286 O = pdf_userproperties,
287 P = properties,
288 }
289 end
290end
291
292local function makeelement(fulltag,parent)
293 local specification = specifications[fulltag]
294 local tagname = specification.tagname
295 local tagnameused = tagname
296 local attributes = nil
297 if tagname == "ignore" then
298 return false
299 elseif tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid" then
300
301 return true
302 elseif tagname == "tabulatecell" then
303 local d = structurestags.gettabulatecell(fulltag)
304 if d and d.kind == 1 then
305 tagnameused = "tabulateheadcell"
306 end
307 elseif tagname == "tablecell" then
308
309 local d = structurestags.gettablecell(fulltag)
310 if d then
311 if d.kind == 1 then
312 tagnameused = "tableheadcell"
313 end
314 local rows = d.rows or 1
315 local cols = d.columns or 1
316 if rows > 1 or cols > 1 then
317 attributes = pdfdictionary {
318 O = pdfconstant("Table"),
319 RowSpan = rows > 1 and rows or nil,
320 ColSpan = cols > 1 and cols or nil,
321 }
322 end
323
324 end
325 end
326
327 local detail = specification.detail
328 local userdata = specification.userdata
329
330 usedmapping[tagname] = true
331
332
333
334 local id = nil
335 local af = nil
336 if embeddedtags then
337 local tagindex = specification.tagindex
338 if embeddedtags == true or embeddedtags[tagname] then
339 id = f_tagid(tagname,tagindex)
340 af = job.fileobjreferences.collected[id]
341 if af then
342 local r = pdfreference(af)
343 af = pdfarray { r }
344
345 end
346 end
347 end
348
349 local k = pdfarray()
350 local r = pdfreserveobject()
351 local t = usedlabels[tagnameused] or tagnameused
352
353 local d = pdfdictionary {
354 Type = pdf_struct_element,
355 S = pdfconstant(t),
356 ID = id,
357 T = detail and detail or nil,
358 P = parent.pref,
359 Pg = pageref,
360 K = pdfreference(r),
361
362 A = attributes,
363
364
365 AF = af,
366 }
367 local s = pdfreference(pdfflushobject(d))
368 if id and names then
369 names[id] = s
370 end
371 local kids = parent.kids
372 kids[#kids+1] = s
373 local e = {
374 tag = t,
375 pref = s,
376 kids = k,
377 knum = r,
378 pnum = pagenum
379 }
380 elements[fulltag] = e
381 return e
382end
383
384local f_BDC = formatters["/%s <</MCID %s>> BDC"]
385
386local a_destination = attributes.private('destination')
387local a_reference = attributes.private('reference')
388
389local function makecontent(start,parent,id,specification)
390 local tag = parent.tag
391 local kids = parent.kids
392 local last = index
393 if id == "image" then
394 local list = specification.taglist
395 local data = usewithcare.images[list[#list]]
396 local label = data and data.label
397 local d = pdfdictionary {
398 Type = pdf_mcr,
399 Pg = pageref,
400 MCID = last,
401 Alt = pdfunicode(label ~= "" and label or "image"),
402 }
403 kids[#kids+1] = d
404 elseif pagenum == parent.pnum then
405 kids[#kids+1] = last
406
407
408
409
410
411
412
413
414
415
416 else
417 local d = pdfdictionary {
418 Type = pdf_mcr,
419 Pg = pageref,
420 MCID = last,
421 }
422
423 kids[#kids+1] = d
424 end
425
426 index = index + 1
427 list[index] = parent.pref
428
429 return f_BDC(tag,last)
430end
431
432local function makeignore(specification)
433 return "/Artifact BMC"
434end
435
436
437
438local EMCliteral = nil
439local visualize = nil
440
441local enabled = true
442local reduced = false
443
444updaters.register("tagging.state.disable", function() enabled = false end)
445updaters.register("tagging.state.enable", function() enabled = true end)
446
447directives.register("tagging.state.reduced", function(v) reduced = v end)
448
449function codeinjections.reducetags()
450 report_tags("only outer level document tag used")
451 reduced = true
452end
453
454function nodeinjections.addtags(head)
455
456 if not enabled then
457 return
458 end
459
460 if not EMCliteral then
461 EMCliteral = register(setstate("EMC"))
462 end
463
464 local last = nil
465 local ranges = { }
466 local range = nil
467 local nofranges = 0
468
469 if not root then
470 structure_kids = pdfarray()
471 structure_ref = pdfreserveobject()
472 parent_ref = pdfreserveobject()
473 root = { pref = pdfreference(structure_ref), kids = structure_kids }
474 names = pdfarray()
475 end
476
477 initializepage()
478
479 if reduced then
480
481 local list = getlist(head)
482
483 if list then
484
485 ranges = {
486 { 1, "glyph", list, tailoflist(list), head }
487 }
488
489 nofranges = 1
490
491 taglist = {
492 {
493 attribute = 1,
494 metadata = { },
495 tagindex = 1,
496 taglist = { "document>1" },
497 tagname = "document",
498 },
499 }
500
501 end
502
503 else
504
505 local function collectranges(head,list)
506 for n, id in nextnode, head do
507 if id == glyph_code then
508
509 if getchar(n) ~= 0 then
510 local at = getattr(n,a_tagged) or false
511 if last ~= at then
512 range = { at, "glyph", n, n, list }
513 nofranges = nofranges + 1
514 ranges[nofranges] = range
515 last = at
516 elseif range then
517 range[4] = n
518 end
519 end
520 elseif id == hlist_code or id == vlist_code then
521 local at = getattr(n,a_image)
522 if at then
523 local at = getattr(n,a_tagged) or false
524 nofranges = nofranges + 1
525 ranges[nofranges] = { at, "image", n, n, list }
526 last = nil
527 else
528 local list = getlist(n)
529 if list then
530 collectranges(list,n)
531 end
532 end
533 end
534 end
535 end
536
537 collectranges(head)
538
539 end
540
541
542
543
544 if trace_tags then
545 for i=1,nofranges do
546 local range = ranges[i]
547 local attr = range[1]
548 local id = range[2]
549 local start = range[3]
550 local stop = range[4]
551 local tags = taglist[attr]
552 if tags then
553 report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist)
554 end
555 end
556 end
557
558 local top = nil
559 local noftop = 0
560
561 local function inject(start,stop,list,literal,left,right)
562 local prev = getprev(start)
563 if prev then
564 setlink(prev,literal)
565 end
566 if left then
567 setlink(literal,left,start)
568 else
569 setlink(literal,start)
570 end
571 if list and not prev then
572 setlist(list,literal)
573 end
574 local literal = copy_node(EMCliteral)
575
576 local next = getnext(stop)
577 if next then
578 setlink(literal,next)
579 end
580 if right then
581 setlink(stop,right,literal)
582 else
583 setlink(stop,literal)
584 end
585 end
586
587
588
589
590
591
592 for i=1,nofranges do
593
594 local range = ranges[i]
595 local attr = range[1]
596 local id = range[2]
597 local start = range[3]
598 local stop = range[4]
599 local list = range[5]
600
601 if attr then
602
603 local specification = taglist[attr]
604 local taglist = specification.taglist
605 local noftags = #taglist
606 local common = 0
607 local literal = nil
608 local ignore = false
609
610 if top then
611 for i=1,noftags >= noftop and noftop or noftags do
612 if top[i] == taglist[i] then
613 common = i
614 else
615 break
616 end
617 end
618 end
619
620 local prev = common > 0 and elements[taglist[common]] or root
621
622 for j=common+1,noftags do
623 local tag = taglist[j]
624 local prv = elements[tag] or makeelement(tag,prev)
625 if prv == false then
626
627 prev = false
628 ignore = true
629 break
630 elseif prv == true then
631
632 else
633 prev = prv
634 end
635 end
636 if prev then
637 literal = setstate(makecontent(start,prev,id,specification))
638 elseif ignore then
639 literal = setstate(makeignore(specification))
640 else
641
642 end
643
644 if literal then
645 local left,right
646 if trace_info then
647 local name = specification.tagname
648 if name then
649 if not visualize then
650 visualize = nodes.visualizers.register("tags")
651 end
652 left = visualize(name)
653 right = visualize()
654 end
655 end
656 inject(start,stop,list,literal,left,right)
657 end
658
659 top = taglist
660 noftop = noftags
661
662 else
663
664 local literal = setstate(makeignore(specification))
665
666 inject(start,stop,list,literal)
667
668 end
669
670 end
671
672 finishpage()
673
674 return head
675
676end
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829local permitted = true
830local enabled = false
831
832function codeinjections.settaggingsupport(option)
833 if option == false then
834 if enabled then
835 disableaction("shipouts","structures.tags.handler")
836
837 disableaction("math","noads.handlers.tags")
838 enabled = false
839 end
840 if permitted then
841 if trace_tags then
842 report_tags("blocking structure tags")
843 end
844 permitted = false
845 end
846 end
847end
848
849function codeinjections.enabletags()
850 if permitted and not enabled then
851 structures.tags.handler = nodeinjections.addtags
852 enableaction("shipouts","structures.tags.handler")
853
854 enableaction("math","noads.handlers.tags")
855
856 if trace_tags then
857 report_tags("enabling structure tags")
858 end
859 enabled = true
860 end
861end
862 |