1if not modules then modules = { } end modules ['lpdf-tag'] = {
2 version = 1.001,
3 comment = "companion to lpdf-tag.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9local next, type = next, type
10local format, match, gmatch = string.format, string.match, string.gmatch
11local concat, sortedhash = table.concat, table.sortedhash
12local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C
13local settings_to_hash = utilities.parsers.settings_to_hash
14local formatters = string.formatters
15
16local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end)
17local trace_info = false trackers.register("structures.tags.info", function(v) trace_info = v end)
18
19local report_tags = logs.reporter("backend","tags")
20
21local backends = backends
22local lpdf = lpdf
23local nodes = nodes
24
25local nodeinjections = backends.pdf.nodeinjections
26local codeinjections = backends.pdf.codeinjections
27
28local enableaction = nodes.tasks.enableaction
29local disableaction = nodes.tasks.disableaction
30
31local pdfdictionary = lpdf.dictionary
32local pdfarray = lpdf.array
33local pdfboolean = lpdf.boolean
34local pdfconstant = lpdf.constant
35local pdfreference = lpdf.reference
36local pdfunicode = lpdf.unicode
37local pdfflushobject = lpdf.flushobject
38local pdfreserveobject = lpdf.reserveobject
39local pdfpagereference = lpdf.pagereference
40local pdfmakenametree = lpdf.makenametree
41
42local addtocatalog = lpdf.addtocatalog
43local addtopageattributes = lpdf.addtopageattributes
44
45local texgetcount = tex.getcount
46
47local nodecodes = nodes.nodecodes
48
49local hlist_code = nodecodes.hlist
50local vlist_code = nodecodes.vlist
51local glyph_code = nodecodes.glyph
52
53local a_tagged = attributes.private('tagged')
54local a_image = attributes.private('image')
55
56local nuts = nodes.nuts
57
58local nodepool = nuts.pool
59local pageliteral = nodepool.pageliteral
60local register = nodepool.register
61
62local getid = nuts.getid
63local getattr = nuts.getattr
64local getprev = nuts.getprev
65local getnext = nuts.getnext
66local getlist = nuts.getlist
67local getchar = nuts.getchar
68
69local setlink = nuts.setlink
70local setlist = nuts.setlist
71
72local copy_node = nuts.copy
73local tosequence = nuts.tosequence
74
75local nextnode = nuts.traversers.node
76
77local structure_kids
78local structure_ref
79local parent_ref
80local root
81local names = { }
82local tree = { }
83local elements = { }
84
85local structurestags = structures.tags
86local taglist = structurestags.taglist
87local specifications = structurestags.specifications
88local usedlabels = structurestags.labels
89local properties = structurestags.properties
90local usewithcare = structurestags.usewithcare
91
92local usedmapping = { }
93
94
95
96local embeddedtags = false
97local f_tagid = formatters["%s-%04i"]
98local embeddedfilelist = pdfarray()
99
100
101
102directives.register("structures.tags.embed",function(v)
103 if type(v) == "string" then
104 if type(embeddedtags) ~= "table" then
105 embeddedtags = { }
106 end
107 for s in gmatch(v,"([^, ]+)") do
108 embeddedtags[s] = true
109 end
110 elseif v and not embeddedtags then
111 embeddedtags = true
112 end
113end)
114
115
116
117directives.register("structures.tags.embedmath",function(v)
118 if not v then
119
120 elseif embeddedtags == true then
121
122 elseif embeddedtags then
123 embeddedtags.math = true
124 else
125 embeddedtags = { math = true }
126 end
127end)
128
129function codeinjections.maptag(original,target,kind)
130 mapping[original] = { target, kind or "inline" }
131end
132
133
134
135local function finishstructure()
136 if root and #structure_kids > 0 then
137 local nums = pdfarray()
138 local n = 0
139 for i=1,#tree do
140 n = n + 1 ; nums[n] = i - 1
141 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(tree[i]))
142 end
143 local parenttree = pdfdictionary {
144 Nums = nums
145 }
146 local idtree = pdfmakenametree(names)
147
148 local rolemap = pdfdictionary()
149 for k, v in next, usedmapping do
150 k = usedlabels[k] or k
151 local p = properties[k]
152 rolemap[k] = pdfconstant(p and p.pdf or "Span")
153 end
154 local structuretree = pdfdictionary {
155 Type = pdfconstant("StructTreeRoot"),
156 K = pdfreference(pdfflushobject(structure_kids)),
157 ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)),
158 IDTree = idtree,
159 RoleMap = rolemap,
160 }
161 pdfflushobject(structure_ref,structuretree)
162 addtocatalog("StructTreeRoot",pdfreference(structure_ref))
163
164 if lpdf.majorversion() == 1 then
165 local markinfo = pdfdictionary {
166 Marked = pdfboolean(true) or nil,
167
168
169
170 }
171 addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo)))
172 end
173
174 for fulltag, element in sortedhash(elements) do
175 pdfflushobject(element.knum,element.kids)
176 end
177 end
178end
179
180lpdf.registerdocumentfinalizer(finishstructure,"document structure")
181
182local index, pageref, pagenum, list = 0, nil, 0, nil
183
184local pdf_mcr = pdfconstant("MCR")
185local pdf_struct_element = pdfconstant("StructElem")
186local pdf_s = pdfconstant("S")
187
188local function initializepage()
189 index = 0
190 pagenum = texgetcount("realpageno")
191 pageref = pdfreference(pdfpagereference(pagenum))
192 list = pdfarray()
193 tree[pagenum] = list
194end
195
196local function finishpage()
197
198 addtopageattributes("StructParents",pagenum-1)
199
200 addtopageattributes("Tabs",s)
201end
202
203
204
205local pdf_userproperties = pdfconstant("UserProperties")
206
207
208
209
210local function makeattribute(t)
211 if t and next(t) then
212 local properties = pdfarray()
213 for k, v in sortedhash(t) do
214 properties[#properties+1] = pdfdictionary {
215 N = pdfunicode(k),
216 V = pdfunicode(v),
217 }
218 end
219 return pdfdictionary {
220 O = pdf_userproperties,
221 P = properties,
222 }
223 end
224end
225
226local function makeelement(fulltag,parent)
227 local specification = specifications[fulltag]
228 local tagname = specification.tagname
229 local tagnameused = tagname
230 local attributes = nil
231 if tagname == "ignore" then
232 return false
233 elseif tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid"then
234
235 return true
236 elseif tagname == "tabulatecell" then
237 local d = structurestags.gettabulatecell(fulltag)
238 if d and d.kind == 1 then
239 tagnameused = "tabulateheadcell"
240 end
241 elseif tagname == "tablecell" then
242
243 local d = structurestags.gettablecell(fulltag)
244 if d then
245 if d.kind == 1 then
246 tagnameused = "tableheadcell"
247 end
248 local rows = d.rows or 1
249 local cols = d.columns or 1
250 if rows > 1 or cols > 1 then
251 attributes = pdfdictionary {
252 O = pdfconstant("Table"),
253 RowSpan = rows > 1 and rows or nil,
254 ColSpan = cols > 1 and cols or nil,
255 }
256 end
257
258 end
259 end
260
261 local detail = specification.detail
262 local userdata = specification.userdata
263
264 usedmapping[tagname] = true
265
266
267
268 local id = nil
269 local af = nil
270 if embeddedtags then
271 local tagindex = specification.tagindex
272 if embeddedtags == true or embeddedtags[tagname] then
273 id = f_tagid(tagname,tagindex)
274 af = job.fileobjreferences.collected[id]
275 if af then
276 local r = pdfreference(af)
277 af = pdfarray { r }
278
279 end
280 end
281 end
282
283 local k = pdfarray()
284 local r = pdfreserveobject()
285 local t = usedlabels[tagnameused] or tagnameused
286
287 local d = pdfdictionary {
288 Type = pdf_struct_element,
289 S = pdfconstant(t),
290 ID = id,
291 T = detail and detail or nil,
292 P = parent.pref,
293 Pg = pageref,
294 K = pdfreference(r),
295
296 A = attributes,
297
298
299 AF = af,
300 }
301 local s = pdfreference(pdfflushobject(d))
302 if id and names then
303 names[id] = s
304 end
305 local kids = parent.kids
306 kids[#kids+1] = s
307 local e = {
308 tag = t,
309 pref = s,
310 kids = k,
311 knum = r,
312 pnum = pagenum
313 }
314 elements[fulltag] = e
315 return e
316end
317
318local f_BDC = formatters["/%s <</MCID %s>> BDC"]
319
320local function makecontent(parent,id,specification)
321 local tag = parent.tag
322 local kids = parent.kids
323 local last = index
324 if id == "image" then
325 local list = specification.taglist
326 local data = usewithcare.images[list[#list]]
327 local label = data and data.label
328 local d = pdfdictionary {
329 Type = pdf_mcr,
330 Pg = pageref,
331 MCID = last,
332 Alt = pdfunicode(label ~= "" and label or "image"),
333 }
334 kids[#kids+1] = d
335 elseif pagenum == parent.pnum then
336 kids[#kids+1] = last
337 else
338 local d = pdfdictionary {
339 Type = pdf_mcr,
340 Pg = pageref,
341 MCID = last,
342 }
343
344 kids[#kids+1] = d
345 end
346
347 index = index + 1
348 list[index] = parent.pref
349
350 return f_BDC(tag,last)
351end
352
353local function makeignore(specification)
354 return "/Artifact BMC"
355end
356
357
358
359local EMCliteral = nil
360local visualize = nil
361
362function nodeinjections.addtags(head)
363
364 if not EMCliteral then
365 EMCliteral = register(pageliteral("EMC"))
366 end
367
368 local last = nil
369 local ranges = { }
370 local range = nil
371
372 if not root then
373 structure_kids = pdfarray()
374 structure_ref = pdfreserveobject()
375 parent_ref = pdfreserveobject()
376 root = { pref = pdfreference(structure_ref), kids = structure_kids }
377 names = pdfarray()
378 end
379
380 local function collectranges(head,list)
381 for n, id in nextnode, head do
382 if id == glyph_code then
383
384if getchar(n) ~= 0 then
385 local at = getattr(n,a_tagged) or false
386
387
388
389 if last ~= at then
390 range = { at, "glyph", n, n, list }
391 ranges[#ranges+1] = range
392 last = at
393 elseif range then
394 range[4] = n
395 end
396end
397 elseif id == hlist_code or id == vlist_code then
398 local at = getattr(n,a_image)
399 if at then
400 local at = getattr(n,a_tagged) or false
401
402
403
404 ranges[#ranges+1] = { at, "image", n, n, list }
405
406 last = nil
407 else
408 local list = getlist(n)
409 if list then
410 collectranges(list,n)
411 end
412 end
413 end
414 end
415 end
416
417 initializepage()
418
419 collectranges(head)
420
421 if trace_tags then
422 for i=1,#ranges do
423 local range = ranges[i]
424 local attr = range[1]
425 local id = range[2]
426 local start = range[3]
427 local stop = range[4]
428 local tags = taglist[attr]
429 if tags then
430 report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist)
431 end
432 end
433 end
434
435 local top = nil
436 local noftop = 0
437
438 local function inject(start,stop,list,literal,left,right)
439 local prev = getprev(start)
440 if prev then
441 setlink(prev,literal)
442 end
443 if left then
444 setlink(literal,left,start)
445 else
446 setlink(literal,start)
447 end
448 if list and not prev then
449 setlist(list,literal)
450 end
451 local literal = copy_node(EMCliteral)
452
453 local next = getnext(stop)
454 if next then
455 setlink(literal,next)
456 end
457 if right then
458 setlink(stop,right,literal)
459 else
460 setlink(stop,literal)
461 end
462 end
463
464 for i=1,#ranges do
465
466 local range = ranges[i]
467 local attr = range[1]
468 local id = range[2]
469 local start = range[3]
470 local stop = range[4]
471 local list = range[5]
472
473 if attr then
474
475 local specification = taglist[attr]
476 local taglist = specification.taglist
477 local noftags = #taglist
478 local common = 0
479 local literal = nil
480 local ignore = false
481
482 if top then
483 for i=1,noftags >= noftop and noftop or noftags do
484 if top[i] == taglist[i] then
485 common = i
486 else
487 break
488 end
489 end
490 end
491
492 local prev = common > 0 and elements[taglist[common]] or root
493
494 for j=common+1,noftags do
495 local tag = taglist[j]
496 local prv = elements[tag] or makeelement(tag,prev)
497 if prv == false then
498
499 prev = false
500 ignore = true
501 break
502 elseif prv == true then
503
504 else
505 prev = prv
506 end
507 end
508 if prev then
509 literal = pageliteral(makecontent(prev,id,specification))
510 elseif ignore then
511 literal = pageliteral(makeignore(specification))
512 else
513
514 end
515
516 if literal then
517 local left,right
518 if trace_info then
519 local name = specification.tagname
520 if name then
521 if not visualize then
522 visualize = nodes.visualizers.register("tags")
523 end
524 left = visualize(name)
525 right = visualize()
526 end
527 end
528 inject(start,stop,list,literal,left,right)
529 end
530
531 top = taglist
532 noftop = noftags
533
534 else
535
536 local literal = pageliteral(makeignore(specification))
537
538 inject(start,stop,list,literal)
539
540 end
541
542 end
543
544 finishpage()
545
546 return head
547
548end
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701local permitted = true
702local enabled = false
703
704function codeinjections.settaggingsupport(option)
705 if option == false then
706 if enabled then
707 disableaction("shipouts","structures.tags.handler")
708 disableaction("shipouts","nodes.handlers.accessibility")
709 disableaction("math","noads.handlers.tags")
710 enabled = false
711 end
712 if permitted then
713 if trace_tags then
714 report_tags("blocking structure tags")
715 end
716 permitted = false
717 end
718 end
719end
720
721function codeinjections.enabletags()
722 if permitted and not enabled then
723 structures.tags.handler = nodeinjections.addtags
724 enableaction("shipouts","structures.tags.handler")
725 enableaction("shipouts","nodes.handlers.accessibility")
726 enableaction("math","noads.handlers.tags")
727
728 if trace_tags then
729 report_tags("enabling structure tags")
730 end
731 enabled = true
732 end
733end
734 |