1if not modules then modules = { } end modules ['lpdf-tag'] = {
2 version = 1.001,
3 comment = "companion to lpdf-tag.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14local next, type, tonumber = next, type, tonumber
15local format, match, gmatch, find, gsub = string.format, string.match, string.gmatch, string.find, string.gsub
16local concat, sortedhash = table.concat, table.sortedhash
17local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C
18local settings_to_hash = utilities.parsers.settings_to_hash
19local formatters = string.formatters
20
21local trace_tags = false
22local trace_info = false
23local trace_math = false
24local trace_blobs = false
25local trace_internals = false
26local trace_suspects = false
27local trace_tree = false
28
29trackers.register("structures.tags", function(v) trace_tags = v end)
30trackers.register("structures.tags.info", function(v) trace_info = v end)
31trackers.register("structures.tags.math", function(v) trace_math = v attributes.viewerlayers.enable() end)
32trackers.register("structures.tags.blobs", function(v) trace_blobs = v end)
33trackers.register("structures.tags.internals", function(v) trace_internals = v end)
34trackers.register("structures.tags.suspects", function(v) trace_suspects = v end)
35trackers.register("structures.tags.showtree", function(v) trace_tree = v end)
36
37local detailedmath = false
38local actualtexts = { }
39
40local checklinks = true
41
42directives.register("structures.tags.math.detail", function(v) detailedmath = v end)
43
44local report_tags = logs.reporter("backend","tags")
45
46local pdfbackend = backends.registered.pdf
47local nodeinjections = pdfbackend.nodeinjections
48local codeinjections = pdfbackend.codeinjections
49
50local enableaction = nodes.tasks.enableaction
51local disableaction = nodes.tasks.disableaction
52
53local lpdf = lpdf
54local pdfdictionary = lpdf.dictionary
55local pdfarray = lpdf.array
56local pdfboolean = lpdf.boolean
57local pdfconstant = lpdf.constant
58local pdfreference = lpdf.reference
59local pdfunicode = lpdf.unicode
60local pdfstring = lpdf.string
61local pdfmakenametree = lpdf.makenametree
62
63local addtocatalog = lpdf.addtocatalog
64local addtopageattributes = lpdf.addtopageattributes
65
66local pdfflushobject = lpdf.flushobject
67local pdfreserveobject = lpdf.reserveobject
68local pdfpagereference = lpdf.pagereference
69
70local version = 1
71
72local texgetcount = tex.getcount
73
74local nodes = nodes
75local nodecodes = nodes.nodecodes
76
77local par_code <const> = nodecodes.par
78local hlist_code <const> = nodecodes.hlist
79local vlist_code <const> = nodecodes.vlist
80local glyph_code <const> = nodecodes.glyph
81local rule_code <const> = nodecodes.rule
82local glue_code <const> = nodecodes.glue
83
84local leaders_code <const> = nodes.gluecodes.leaders
85
86local empty_rule_code <const> = nodes.rulecodes.empty
87
88local a_tagged <const> = attributes.private('tagged')
89local a_image <const> = attributes.private('image')
90local a_mathblob <const> = attributes.private('mathblob')
91local a_taggedpar <const> = attributes.private("taggedpar")
92
93local nuts = nodes.nuts
94
95local nodepool = nuts.pool
96local setstate = nodepool.setstate
97local register = nodepool.register
98
99local getid = nuts.getid
100local getattr = nuts.getattr
101local getattrs = nuts.getattrs
102local getprev = nuts.getprev
103local getnext = nuts.getnext
104local getlist = nuts.getlist
105local getchar = nuts.getchar
106local getwhd = nuts.getwhd
107local getleader = nuts.getleader
108local getruledimensions = nuts.getruledimensions
109
110local tailoflist = nuts.tail
111local setlink = nuts.setlink
112local setlist = nuts.setlist
113
114local copy_node = nuts.copy
115local tosequence = nuts.tosequence
116
117local nextnode = nuts.traversers.node
118local nextcontent = nuts.traversers.content
119
120local structure_kids
121local structure_ref
122local parent_ref
123local root
124local names = { }
125local tree = { }
126local firstintree = false
127local lastintree = false
128local elements = { }
129local elementsorder = { }
130local nofelements = 0
131
132local structurestags = structures.tags
133local taglist = structurestags.taglist
134local specifications = structurestags.specifications
135local usedlabels = structurestags.labels
136local properties = structurestags.properties
137local overloads = structurestags.overloads
138local usewithcare = structurestags.usewithcare
139
140local pushtag = structurestags.push
141local poptag = structurestags.pop
142local starttag = structurestags.start
143local stoptag = structurestags.stop
144
145local usedmapping = { }
146
147local destinations = { }
148local references = { }
149
150
151
152
153
154
155
156local embeddedtags = false
157local embeddedfilelist = pdfarray()
158local blobfunctions = { }
159local tagtracers = { }
160
161local indirectlocalkids = false
162local indirectglobalkids = false
163
164
165
166
167
168
169
170local pagenumindices = { }
171local usedpages = false
172
173local linknumoffset = false
174local linknumindex = 0
175local linknumindices = { }
176local linknumentries = table.setmetatableindex(function(t,refatt)
177 if not linknumoffset then
178 local tagging = job.variables.collected.tagging
179 if tagging then
180 linknumoffset = tagging.basetreesize
181 else
182
183 linknumoffset = 1000
184 end
185 end
186 local v = linknumindex + linknumoffset
187 linknumindex = linknumindex + 1
188 linknumindices[linknumindex] = false
189
190 t[refatt] = v
191 return v
192end)
193
194local function setlinkstructureparent(refatt,objref)
195 local p = linknumentries[refatt]
196
197 linknumindices[p] = objref
198end
199
200function codeinjections.getlinkstructureparent(refatt)
201
202 return linknumentries[refatt]
203end
204
205local referencenumindices = { }
206local useddestinations = false
207
208function codeinjections.getreferencestructureobject(internal,page)
209 if not useddestinations then
210 local tagging = job.variables.collected.tagging
211 useddestinations = tagging and tagging.destinations or { }
212 usedpages = tagging and tagging.pages or { }
213 end
214 return (internal and useddestinations[internal]) or (page and usedpages[page])
215end
216
217
218
219local function embedsupportedtags(str)
220 if not embeddedtags then
221 embeddedtags = { }
222 end
223 if str then
224 for tag in gmatch(str,"([^, ]+)") do
225 embeddedtags[tag] = true
226 end
227 else
228 for tag in next, blobfunctions do
229 embeddedtags[tag] = true
230 end
231 end
232end
233
234directives.register("structures.tags.embed",function(v)
235 embedsupportedtags(type(v) == "string" and v or nil)
236end)
237
238function codeinjections.maptag(original,target,kind)
239 mapping[original] = { target, kind or "inline" }
240end
241
242local namespaces = false
243
244local namespaces = {
245 mathml = {
246 url = "http://www.w3.org/1998/Math/MathML",
247 force = false,
248 known = true,
249 map = { },
250 },
251 context = {
252 url = "http://www.contextgarden.net/pdf/context",
253 map = { },
254 },
255 user = {
256 url = "http://www.contextgarden.net/pdf/user",
257 map = { },
258 },
259 ua1 = {
260 url = "http://iso.org/pdf/ssn",
261 force = false,
262 known = true,
263 },
264 ua2 = {
265 url = "http://iso.org/pdf2/ssn",
266 force = false,
267 known = true,
268 },
269}
270
271local missingua = table.setmetatableindex(function(t,k)
272
273 local v = "NonStruct"
274 t[k] = v
275 return v
276end)
277
278statistics.register("pdf tags", function()
279 local k, v = next(missingua)
280 if k then
281 return format("unknown tags mapped to %s: % t",v,table.sortedkeys(missingua))
282 end
283end)
284
285local function concattags(tags)
286 local l = { }
287 local t = tags.taglist
288 local n = #t
289 for i=1,n do
290 l[i] = t[i] or "ERROR"
291 end
292 return concat(l," ",1,n)
293end
294
295local function checkoverload(tagname,tagnameused,detail,specification)
296 local o = overloads[tagnameused]
297 if o then
298 local d = detail
299 local c = o.criterium
300 if c == "parent" or c == "parents" then
301 local p = specification.taglist[#specification.taglist-1]
302 local s = specifications[p]
303 if c == "parents" then
304 p = s.parents
305 if p then
306 d = match(p,"%S+")
307 else
308 d = s.detail
309 end
310 else
311 d = s.detail
312 end
313 end
314 if d then
315 d = o.mapping[d]
316 if d then
317 tagname = d.tag
318 tagnameused = tagname
319 detail = nil
320 end
321 end
322 end
323 return tagname, tagnameused, detail
324end
325
326local function finishstructure()
327 if root and #structure_kids > 0 then
328 local nums = pdfarray()
329 local n = 0
330 if indirectglobalkids then
331 for i=firstintree,lastintree do
332 local ti = tree[i]
333 if ti then
334 n = n + 1 ; nums[n] = i - 1
335 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(ti))
336 else
337 report_tags("beware: missing page %i in tree", i)
338 end
339 end
340 else
341 for i=firstintree,lastintree do
342 local ti = tree[i]
343 if not ti then
344 report_tags("beware: missing page %i in tree", i)
345 elseif #ti > 0 then
346 n = n + 1 ; nums[n] = i - 1
347 n = n + 1 ; nums[n] = ti
348 end
349 end
350 end
351 local usedrolemap = nil
352 local usednamespaces = nil
353 local originals = { }
354 if version == 1 then
355 for k, v in next, usedmapping do
356 local k = usedlabels[k] or k
357 local p = properties[k]
358 if p then
359 local pdf = p.pdf
360 if not pdf then
361 pdf = missingua[pdf]
362 end
363 local r = pdfconstant(pdf)
364 if usedrolemap then
365 usedrolemap[k] = r
366 else
367 usedrolemap = pdfdictionary { [k] = r }
368 end
369 else
370 report_tags("beware: missing property %s", k)
371 end
372 end
373 else
374 for k, v in next, usedmapping do
375 local k = usedlabels[k] or k
376 local p = properties[k]
377 if p then
378 local s = p.namespace
379 local n = namespaces[s]
380 if n then
381 local pdf = p.pdf
382 local pua = p.pua
383 local ua = n
384 if not pdf then
385 pdf = missingua[pdf]
386 end
387 if pua == "ua1" then
388 ua = namespaces.ua1
389 ua.force = true
390 elseif pua == "mathml" then
391 ua = namespaces.mathml
392 ua.force = true
393 else
394 ua = namespaces.ua2
395 ua.force = true
396 end
397 if pdf ~= k then
398 ua = pdfarray { pdfconstant(pdf), ua.ref }
399 n.map[k] = ua
400 end
401 if not usednamespaces then
402 usednamespaces = pdfarray()
403 end
404
405 local original = p.original
406 if original then
407 local o = originals[s]
408 if not o then
409 o = pdfdictionary()
410 originals[s] = o
411 end
412 o[k] = pdfdictionary {
413 S = pdfconstant(original[1]),
414 T = pdfstring(original[2]),
415 }
416 end
417
418 else
419 report_tags("beware: missing namespace %s", s)
420 end
421 else
422 report_tags("beware: missing property %s", k)
423 end
424 end
425 if usednamespaces then
426 for k, v in sortedhash(namespaces) do
427 local o = originals[k]
428 if o then
429 o = pdfreference(pdfflushobject(o))
430 end
431 if v.force then
432 local n = pdfdictionary {
433 Type = pdfconstant("Namespace"),
434 NS = pdfunicode(v.url),
435 LMTX_NameSpace = pdfconstant(k),
436 LMTX_Originals = o,
437 }
438 pdfflushobject(v.num,n)
439 usednamespaces[#usednamespaces+1] = v.ref
440 else
441 local map = v.map
442 if map and next(map) then
443 local m = pdfreference(pdfflushobject(pdfdictionary(map)))
444 local n = pdfdictionary {
445 Type = pdfconstant("Namespace"),
446 NS = pdfunicode(v.url),
447 RoleMapNS = m,
448 LMTX_NameSpace = pdfconstant(k),
449 LMTX_Originals = o,
450 }
451 pdfflushobject(v.num,n)
452 usednamespaces[#usednamespaces+1] = v.ref
453 end
454 end
455 end
456 end
457 end
458 if indirectlocalkids then
459 for i=1,nofelements do
460 local fulltag = elementsorder[i]
461 local element = elements[fulltag]
462 local kids = element.kids
463 local knum = element.knum
464
465 if checklinks then
466 local tag = element.tag
467 if tag == "link" then
468 local ref = element.ref
469 if ref then
470 setlinkstructureparent(ref,element.pref)
471 end
472 elseif tag == "reference" then
473 local des = element.des
474 if des then
475 referencenumindices[des] = element.dnum
476 end
477 end
478 end
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495 pdfflushobject(knum,kids)
496 end
497 else
498 for i=1,nofelements do
499 local fulltag = elementsorder[i]
500 local element = elements[fulltag]
501 local dict = element.dict
502 if dict then
503 local dnum = element.dnum
504 local kids = element.kids
505 if checklinks then
506 local tag = element.tag
507 if tag == "link" then
508 local ref = element.ref
509 if ref then
510 setlinkstructureparent(ref,element.pref)
511 end
512 elseif tag == "reference" then
513 local des = element.des
514 if des then
515 referencenumindices[des] = element.dnum
516 end
517 elseif tag == "navigationpage" then
518 pagenumindices[element.pnum] = element.dnum
519 end
520 end
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540 pdfflushobject(dnum,dict)
541 end
542 end
543 end
544
545 local getinternalreference = structures.references.getinternalreference
546 local destinations = { }
547 for k, v in next, referencenumindices do
548 destinations[getinternalreference(k)] = v
549 end
550 job.variables.tobesaved.tagging = {
551 basetreesize = lastintree,
552 destinations = destinations,
553 pages = pagenumindices,
554 }
555
556 table.setmetatableindex(linknumindices)
557
558
559
560 for k, v in sortedhash(linknumindices) do
561 if v then
562 n = n + 1 ; nums[n] = k
563 n = n + 1 ; nums[n] = v
564 end
565 end
566
567 local parenttree = pdfdictionary {
568 Nums = nums
569 }
570 local idtree = pdfmakenametree(names)
571
572 local structuretree = pdfdictionary {
573 Type = pdfconstant("StructTreeRoot"),
574 K = pdfreference(pdfflushobject(structure_kids)),
575 ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)),
576 IDTree = idtree,
577 RoleMap = usedrolemap,
578 Namespaces = usednamespaces,
579
580
581
582
583
584
585
586 }
587 pdfflushobject(structure_ref,structuretree)
588 addtocatalog("StructTreeRoot",pdfreference(structure_ref))
589
590 if version == 1 then
591 local markinfo = pdfdictionary {
592 Marked = pdfboolean(true) or nil,
593
594
595
596 }
597 addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo)))
598 end
599
600 end
601
602 if trace_tree then
603 local p = false
604 local n = tonumber(trace_tree) or true
605 for i=1,nofelements do
606 local fulltag = elementsorder[i]
607 local element = elements[fulltag]
608 local specification = specifications[fulltag]
609 local pagenumber = element.pnum
610 if n == true and i > 1 and p ~= pagenumber then
611 report_tags("")
612 end
613 p = pagenumber
614 if n == true or n == p then
615 report_tags("% 5i %3i %s (%s)",i,pagenumber,concattags(specification),(element.des and "destination") or (element.ref and "reference") or "content")
616 end
617 end
618 end
619
620 if trace_blobs then
621 for k, v in sortedhash(actualtexts) do
622 local shared = v[1] and "-" or "+"
623 local index = v[2]
624 local text = v[3]
625 report_tags("blob %s : %5i : %04X : %s",shared,index,index,text)
626 end
627 end
628
629end
630
631lpdf.registerdocumentfinalizer(finishstructure,"document structure")
632
633local index, pageref, pagenum, list = 0, nil, 0, nil
634
635local pdf_mcr = pdfconstant("MCR")
636local pdf_struct_element = pdfconstant("StructElem")
637local pdf_s = pdfconstant("S")
638local pdf_objr = pdfconstant("OBJR")
639
640local c_realpageno <const> = tex.iscount("realpageno")
641
642local function initializepage()
643 index = 0
644 pagenum = texgetcount(c_realpageno)
645 pageref = pdfreference(pdfpagereference(pagenum))
646 list = pdfarray()
647 if not namespaced then
648 for k, v in sortedhash(namespaces) do
649 v.num = pdfreserveobject()
650 v.ref = pdfreference(v.num)
651 end
652 namespaced = true
653 end
654
655 if not firstintree then
656 if pagenum > 1 then
657 report_tags("beware: first page in tree is %i", pagenum)
658 end
659 firstintree = pagenum
660 lastintree = pagenum
661 end
662 if pagenum > lastintree then
663 lastintree = pagenum
664 else
665
666 end
667 tree[pagenum] = list
668end
669
670local function finishpage()
671
672 addtopageattributes("StructParents",pagenum-1)
673end
674
675
676
677local pdf_userproperties = pdfconstant("UserProperties")
678
679
680
681
682local function makeattribute(t)
683 if t and next(t) then
684 local properties = pdfarray()
685 for k, v in sortedhash(t) do
686 properties[#properties+1] = pdfdictionary {
687 N = pdfunicode(k),
688 V = pdfunicode(v),
689 }
690 end
691 return pdfdictionary {
692 O = pdf_userproperties,
693 P = properties,
694 }
695 end
696end
697
698local visualizetags = nil
699local visualizespecial = nil
700local visualizeblobs = nil
701local visualizesuspects = nil
702local visualizeinternals = nil
703local collectedsuspects = { }
704
705statistics.register("pdf tags", function()
706 if #collectedsuspects > 0 then
707 return formatters["suspects: % t"](collectedsuspects)
708 end
709end)
710
711
712local function checkvisualize()
713 if not visualizetags then
714 visualizetags = nodes.visualizers.register("tags")
715 visualizespecials = nodes.visualizers.register("specials",nil,nil,2.5,true)
716 visualizeblobs = nodes.visualizers.register("blobs",nil,nil,2.5,true)
717 visualizesuspects = nodes.visualizers.register("suspects")
718 visualizeinternals = nodes.visualizers.register("internals")
719 end
720end
721
722local function tagtracer(name,blob)
723 checkvisualize()
724 return visualizetags(name), visualizetags()
725end
726
727table.setmetatableindex(tagtracers,function(t,k)
728 t[k] = tagtracer
729 return tagtracer
730end)
731
732local makeelement do
733
734 do
735
736 local f_tagid = formatters["math-%04X"]
737 local f_tagfn = formatters["math-%04X.xml"]
738
739 local shared = { }
740 local bindex = 0
741 local btags = false
742
743 function blobfunctions.math(tagname,specification)
744 local tagindex = specification.tagindex
745 local id = f_tagid(tagindex)
746 local blob = specification.blob
747 if blob then
748 local blobindex = mathematics.getblobindex("pdf",blob)
749 if blobindex then
750 local index = shared[blobindex]
751 if not btags then
752 btags = { }
753 job.variables.tobesaved.mathblobs = btags
754 end
755 if index then
756 af = index[1]
757 btags[blob] = index[2]
758 else
759 bindex = bindex + 1
760 local blobname = f_tagid(blobindex)
761 local blobfile = f_tagfn(blobindex)
762 local blobdata = mathematics.getmathblob("pdf",blob)
763 af = codeinjections.embedfile {
764 force = true,
765 data = blobdata,
766 name = blobname,
767 file = blobfile,
768
769 hash = id,
770 forcereference = true,
771
772 mimetype = "application/mathml+xml",
773 relation = "Supplement",
774 }
775 af = pdfreference(pdfflushobject(pdfarray { af }))
776 shared[blobindex] = { af, blobindex }
777 btags[blob] = blobindex
778 end
779 actualtext = mathematics.gettextblob("pdf",specification.language or "en",blob)
780 if actualtext then
781 if trace_blobs then
782 actualtexts[blob] = { index and true or false, blobindex, actualtext }
783 end
784 actualtext = pdfunicode(actualtext)
785 end
786 return id, af, actualtext
787 end
788
789 else
790
791
792
793
794
795
796
797
798 end
799
800 end
801
802 local blobdone = { }
803
804 function tagtracers.math(name,specification,blob)
805 checkvisualize()
806 if blob and not blobdone[blob] then
807 local bname = btags[blob]
808 if bname then
809 bname = "M " .. blob .. " " .. f_tagid(bname)
810 else
811 bname = "M " .. blob
812 end
813 blobdone[blob] = true
814 if detailedmath then
815 return visualizeblobs(bname,name), visualizeblobs()
816 else
817 return visualizetags(bname), visualizetags()
818 end
819 else
820 return visualizetags(name), visualizetags()
821 end
822 end
823
824 function tagtracers.suspect(name)
825 checkvisualize()
826 return visualizesuspects("S " .. "mrow"), visualizesuspects()
827 end
828
829 function tagtracers.internallink(internal)
830 checkvisualize()
831 return visualizeinternals("L " .. internal), visualizeinternals()
832 end
833
834 function tagtracers.internalreference(internal)
835 checkvisualize()
836 return visualizeinternals("R " .. internal), visualizeinternals()
837 end
838
839 end
840
841 do
842
843 function tagtracers.link(name,specification,blob)
844 checkvisualize()
845 return visualizespecials(name), visualizespecials()
846 end
847
848 function tagtracers.reference(name,specification,blob)
849 checkvisualize()
850 return visualizespecials(name), visualizespecials()
851 end
852
853 end
854
855 do
856
857 local f_tagid = formatters["cite-%s"]
858 local f_tagfn = formatters["cite-%s.bib"]
859
860 local shared = { }
861 local bindex = 0
862 local btags = { }
863
864 function blobfunctions.cite(tagname,specification)
865 local detail = specification.detail
866 if detail then
867 local dataset, tag = match(detail,"^(.+)::(.+)$")
868 local index = shared[tag]
869 local id = f_tagid(tag)
870 if index then
871 af = index[1]
872 btags[tag] = index[2]
873 else
874 bindex = bindex + 1
875 local data = publications.datasets[dataset].luadata[tag] or "no data"
876 local blobname = f_tagid(tag)
877 local blobfile = f_tagfn(tag)
878 local blobdata = publications.savers.bib(false,false,{ [tag] = data })
879 af = codeinjections.embedfile {
880 force = true,
881 data = gsub(blobdata,"\n+$",""),
882 name = blobname,
883 file = blobfile,
884
885 hash = id,
886 forcereference = true,
887
888 mimetype = "application/x-bibtex",
889 relation = "Supplement",
890 }
891
892 af = pdfreference(pdfflushobject(pdfarray { af }))
893 shared[tag] = { af, blobname }
894 btags[tag] = blobname
895 end
896 actualtext = publications.meanings[tag]
897 if actualtext then
898 actualtext = pdfunicode(actualtext)
899 end
900 return id, af, actualtext
901 end
902 end
903
904 function tagtracers.cite(name,specification)
905 checkvisualize()
906 local detail = specification.detail
907 if detail then
908 local dataset, tag = match(detail,"^(.-)::(.-)$")
909 local bname = btags[tag]
910 if bname then
911 return visualizetags("C " .. bname), visualizetags()
912 end
913 end
914 return visualizetags(name), visualizetags()
915 end
916
917 end
918
919 local lastid = 0
920 local f_id = formatters["%X"]
921
922 local symbols = table.setmetatableindex (
923 {
924 ["1"] = "Disc",
925 ["2"] = "Circle",
926 ["3"] = "Square",
927 ["n"] = "Decimal",
928 ["I"] = "UpperRoman",
929 ["i"] = "LowerRoman",
930 ["A"] = "UpperAlpha",
931 ["a"] = "LowerAlpha",
932 },
933 function(t,k) return tonumber(k) and "Unordered" or "Ordered" end
934 )
935
936 makeelement = function(fulltag,parent)
937 local specification = specifications[fulltag]
938 local tagname = specification and specification.tagname or "ignore"
939 local tagnameused = tagname
940 local attributes = nil
941
942 if tagname == "ignore" then
943 return false
944 elseif tagname == "mstacker" or tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid" then
945
946 return true
947 elseif tagname == "mrow" then
948 return false
949 elseif tagname == "tabulatecell" then
950 local d = structurestags.gettabulatecell(fulltag)
951 if d and d.kind == 1 then
952 tagnameused = "tabulateheadcell"
953 end
954
955 elseif tagname == "tablecell" then
956
957 local d = structurestags.gettablecell(fulltag)
958 if d then
959 if d.kind == 1 then
960 tagnameused = "tableheadcell"
961 end
962 local rows = d.rows or 1
963 local cols = d.columns or 1
964 if rows > 1 or cols > 1 then
965 attributes = pdfdictionary {
966
967 O = pdfconstant("Table"),
968 RowSpan = rows > 1 and rows or nil,
969 ColSpan = cols > 1 and cols or nil,
970 }
971 end
972 end
973 elseif tagname == "itemgroup" then
974 local d = structurestags.getitemgroup(fulltag)
975 if d then
976 local symbol = d.symbol
977 if symbol then
978 attributes = pdfdictionary {
979 ListNumbering = pdfconstant(symbols[symbol] or "None"),
980 ContinuedList = d.continue and true or nil,
981 }
982 end
983 end
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999 end
1000
1001 local detail = specification.detail
1002 local userdata = specification.userdata
1003
1004 if version == 1 then
1005
1006 else
1007
1008 tagname, tagnameused, detail = checkoverload(tagname,tagnameused,detail,specification)
1009 end
1010
1011 usedmapping[tagname] = true
1012
1013
1014
1015 local af = nil
1016 local id = nil
1017 local actualtext = nil
1018
1019 if embeddedtags[tagname] then
1020 local action = blobfunctions[tagname]
1021 if action then
1022 id, af, actualtext = action(tagname,specification)
1023 end
1024 end
1025
1026
1027
1028
1029
1030
1031
1032 local namespace = nil
1033 if version > 1 then
1034 local p = properties[tagname]
1035 if p then
1036 namespace = namespaces[p.namespace].ref or nil
1037 else
1038 namespace = "user"
1039 properties[tagname] = { namespace = namespace, pdf = "Span", nature = "inline" }
1040 end
1041 end
1042
1043
1044 local kids = pdfarray()
1045 local tag = usedlabels[tagnameused] or tagnameused
1046 local subtype = pdfconstant(tag)
1047 local pref = parent.pref
1048 local pkids = parent.kids
1049 local element
1050 local dref, dnum
1051 if indirectlocalkids then
1052 local knum = pdfreserveobject()
1053 local dict = pdfdictionary {
1054
1055 S = subtype,
1056 ID = id,
1057 T = detail and detail or nil,
1058 P = pref,
1059 Pg = pageref,
1060 K = pdfreference(knum),
1061 A = attributes,
1062
1063 NS = namespace,
1064 ActualText = actualtext or nil,
1065 AF = af or nil,
1066 }
1067 dnum = pdfflushobject(dict)
1068 dref = pdfreference(dnum)
1069 element = {
1070 blob = af and true or false,
1071 tag = tag,
1072 pnum = pagenum,
1073 pref = dref,
1074 kids = kids,
1075 knum = knum,
1076 dnum = dnum,
1077 ref = tag == "link" and specification.reference or nil,
1078 des = tag == "reference" and specification.destination or nil,
1079 }
1080 else
1081 local dict = pdfdictionary {
1082
1083 S = subtype,
1084 ID = id,
1085 T = detail and detail or nil,
1086 P = pref,
1087 Pg = pageref,
1088 K = kids,
1089 A = attributes,
1090
1091 NS = namespace,
1092 ActualText = actualtext or nil,
1093 AF = af or nil,
1094 }
1095 dnum = pdfreserveobject()
1096 dref = pdfreference(dnum)
1097 element = {
1098 blob = af and true or false,
1099 tag = tag,
1100 pnum = pagenum,
1101 pref = dref,
1102 kids = kids,
1103 dict = dict,
1104 dnum = dnum,
1105 ref = tag == "link" and specification.reference or nil,
1106 des = tag == "reference" and specification.destination or nil,
1107 }
1108 end
1109 if id and names then
1110 names[id] = dref
1111 end
1112 pkids[#pkids+1] = dref
1113 elements[fulltag] = element
1114 nofelements = nofelements + 1
1115 elementsorder[nofelements] = fulltag
1116 return element
1117 end
1118
1119end
1120
1121local f_BDC = formatters["/%s <</MCID %s>> BDC"]
1122
1123local a_destination <const> = attributes.private('destination')
1124local a_reference <const> = attributes.private('reference')
1125
1126local references = { }
1127
1128local function makecontent(start,parent,id,specification,range)
1129 local tag = parent.tag
1130 local kids = parent.kids
1131 local last = index
1132 index = index + 1
1133 if id == "image" then
1134 local list = specification.taglist
1135 local data = usewithcare.images[list[#list]]
1136 local label = data and data.label or ""
1137 local d = pdfdictionary {
1138 Type = pdf_mcr,
1139 Pg = pageref,
1140 MCID = last,
1141 Alt = pdfunicode(label ~= "" and label or "image"),
1142 }
1143 kids[#kids+1] = d
1144 elseif pagenum == parent.pnum then
1145 kids[#kids+1] = last
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162 else
1163 local d = pdfdictionary {
1164 Type = pdf_mcr,
1165 Pg = pageref,
1166 MCID = last,
1167 }
1168
1169 kids[#kids+1] = d
1170 end
1171
1172 list[index] = parent.pref
1173
1174 return f_BDC(tag,last)
1175end
1176
1177local function makeignore(specification,range)
1178
1179 return "/Artifact BMC"
1180end
1181
1182
1183
1184local EMCliteral = nil
1185
1186
1187
1188
1189
1190
1191local tag_ignore_level <const> = 1
1192local tag_document_level <const> = 2
1193
1194local tag_image_state <const> = -1
1195local tag_ignore_state <const> = -2
1196local tag_link_state <const> = -3
1197local tag_reference_state <const> = -4
1198local tag_rule_state <const> = -5
1199
1200function nodeinjections.addtags(head,ispage)
1201
1202 if tex.systemmodes.export then
1203 return head
1204 elseif not tex.conditionals.c_strc_tags_global then
1205 return head
1206
1207 end
1208
1209
1210
1211
1212
1213 if not EMCliteral then
1214 EMCliteral = register(setstate("EMC"))
1215 end
1216
1217 local last = nil
1218 local ranges = { }
1219 local range = nil
1220 local nofranges = 0
1221
1222 if not root then
1223 structure_kids = pdfarray()
1224 structure_ref = pdfreserveobject()
1225 parent_ref = pdfreserveobject()
1226 root = { pref = pdfreference(structure_ref), kids = structure_kids }
1227 names = pdfarray()
1228 end
1229
1230 initializepage()
1231
1232 local mblob = false
1233 local ablob = { }
1234
1235 if ispage then
1236 pushtag()
1237 local ac = starttag("navigationpage")
1238 stoptag()
1239 nofranges = nofranges + 1
1240 ranges[nofranges] = { ac, "navigationpage" }
1241 poptag()
1242 end
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256 local lastpar = 0
1257 local lastparat = 0
1258
1259 local function collectranges(head,parent)
1260 for n, id, subtype in nextnode, head do
1261
1262 if id == glyph_code then
1263
1264 local at, blob, ap = getattrs(n,a_tagged,a_mathblob,a_taggedpar)
1265 if at == 0 then
1266 at = false
1267 elseif at == 1 then
1268 at = false
1269 elseif at then
1270
1271 else
1272 at = false
1273 end
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296 if not blob then
1297 mblob = false
1298 if last ~= at then
1299 range = { at, "glyph", n, n, parent }
1300 nofranges = nofranges + 1
1301 ranges[nofranges] = range
1302 last = at
1303 lastparat = at
1304 lastpar = ap
1305 elseif range then
1306 if lastpar ~= ap and at and lastparat == at then
1307
1308 pushtag(at < tag_document_level and tag_document_level or at)
1309 local ac = starttag("break")
1310 stoptag()
1311 poptag()
1312 range = { ac, "break", n, false }
1313 nofranges = nofranges + 1
1314 ranges[nofranges] = range
1315
1316 lastpar = ap
1317 range = { at, "glyph", n, n, parent }
1318 nofranges = nofranges + 1
1319 ranges[nofranges] = range
1320 last = at
1321 lastparat = at
1322 else
1323 range[4] = n
1324 end
1325 end
1326
1327 elseif blob == mblob and (last and last > 0) then
1328 if range then
1329 range[4] = n
1330 end
1331 last = at
1332 else
1333 mblob = blob
1334
1335 local a = ablob[blob]
1336 if not a then
1337 a = tag_document_level
1338 if at then
1339 local t = taglist[at].taglist
1340
1341
1342
1343
1344
1345
1346
1347 for i=1,#t do
1348 local s = specifications[t[i]]
1349 if s.tagname == "math" then
1350 a = s.attribute
1351 break
1352 end
1353 end
1354 end
1355 ablob[blob] = a
1356 end
1357 range = { a, "math", n, n, parent, blob }
1358 nofranges = nofranges + 1
1359 ranges[nofranges] = range
1360 last = at
1361
1362
1363
1364 end
1365
1366
1367 elseif id == hlist_code or id == vlist_code then
1368 local at, img = getattrs(n,a_tagged,a_image)
1369
1370 if img then
1371 range = { at or false, "image", n, n, parent }
1372 nofranges = nofranges + 1
1373 ranges[nofranges] = range
1374 last = tag_image_state
1375 mblob = false
1376
1377
1378
1379 local specification = taglist[at]
1380 if specification and specification.tagname == "mpgraphic" then
1381 local list = getlist(n)
1382 if list then
1383 collectranges(list,n)
1384 end
1385 end
1386
1387 elseif at == 0 then
1388 range = { false, "ignore", n, n, parent }
1389 nofranges = nofranges + 1
1390 ranges[nofranges] = range
1391 last = tag_ignore_state
1392 mblob = false
1393 else
1394 if at then
1395 local r, d = getattrs(n,a_reference,a_destination)
1396 if r and not references[r] then
1397
1398 local b = getattr(n,a_mathblob)
1399 if b then
1400 at = ablob[b]
1401 end
1402
1403 pushtag(at < tag_document_level and tag_document_level or at)
1404 local ac = starttag("link", { reference = r })
1405 stoptag()
1406 poptag()
1407 range = { ac, "link", n, false, parent }
1408 nofranges = nofranges + 1
1409 ranges[nofranges] = range
1410 last = tag_link_state
1411 references[r] = true
1412 end
1413 if d and not destinations[d] then
1414
1415 local b = getattr(n,a_mathblob)
1416 if b then
1417 at = ablob[b]
1418 end
1419
1420 pushtag(at < tag_document_level and tag_document_level or at)
1421 local ac = starttag("reference", { destination = d })
1422 stoptag()
1423 poptag()
1424 range = { ac, "reference", n, false, parent }
1425 nofranges = nofranges + 1
1426 ranges[nofranges] = range
1427 last = tag_reference_state
1428 mblob = false
1429 destinations[d] = true
1430 end
1431 end
1432 local list = getlist(n)
1433 if list then
1434 collectranges(list,n)
1435 end
1436 end
1437
1438
1439 elseif id == glue_code then
1440 if subtype >= leaders_code then
1441 local leader = getleader(n)
1442 if leader then
1443 collectranges(leader,n)
1444 end
1445 end
1446 elseif id == rule_code then
1447 if subtype == empty_rule_code then
1448
1449 else
1450 local w, h, d = getruledimensions(n)
1451 if (w ~= 0) and (h + d ~= 0) then
1452 local at, blob = getattrs(n,a_tagged,a_mathblob)
1453 if blob then
1454 at = false
1455 end
1456
1457
1458 if not at then
1459 at = false
1460 elseif at == 0 then
1461 at = false
1462 elseif at == 1 then
1463 at = false
1464 end
1465 if last ~= tag_rule_state and last ~= at then
1466 range = { false, "rule", n, n, parent, blob }
1467 nofranges = nofranges + 1
1468 ranges[nofranges] = range
1469 last = at
1470 last = tag_rule_state
1471 mblob = false
1472 elseif range then
1473 range[4] = n
1474 end
1475
1476 else
1477
1478 end
1479 end
1480 end
1481 end
1482 end
1483
1484 collectranges(head)
1485
1486 if trace_tags then
1487 report_tags("")
1488 report_tags(ispage and "begin page" or "begin object")
1489 report_tags("")
1490 for i=1,nofranges do
1491 local range = ranges[i]
1492 local attr = range[1]
1493 local id = range[2]
1494 local start = range[3]
1495 local stop = range[4]
1496
1497
1498 local pdf = ""
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519 local tags = taglist[attr]
1520 if tags then
1521 local s = concattags(tags)
1522 if id == "reference" then
1523 report_tags("R %5i %s%s",attr,s,pdf)
1524 elseif id == "link" then
1525 report_tags("L %5i %s%s",attr,s,pdf)
1526 elseif id == "break" then
1527 report_tags("B %5i %s%s",attr,s,pdf)
1528 elseif id == "navigationpage" then
1529 report_tags("P %5i %s : %i%s",attr,s,pagenum or 0,pdf)
1530 else
1531 report_tags("T %5i %s : %s%s",attr,s,nodes.listtoutf(start,false,true,stop),pdf)
1532 end
1533 else
1534 report_tags("-------")
1535 end
1536 end
1537 report_tags("")
1538 report_tags(ispage and "end page" or "end object")
1539 report_tags("")
1540 end
1541
1542 local top = nil
1543 local noftop = 0
1544
1545 local blobdone = { }
1546
1547 local function inject(start,stop,list,literal,left,right)
1548 local prev = getprev(start)
1549 if prev then
1550 setlink(prev,literal)
1551 end
1552 if left then
1553 setlink(literal,left,start)
1554 else
1555 setlink(literal,start)
1556 end
1557 if list and not prev then
1558 setlist(list,literal)
1559 end
1560 local finish = copy_node(EMCliteral)
1561 if stop then
1562
1563 local next = getnext(stop)
1564 if next then
1565 setlink(finish,next)
1566 end
1567 if right then
1568 setlink(stop,right,finish)
1569 else
1570 setlink(stop,finish)
1571 end
1572 else
1573 local next = getnext(literal)
1574 if next then
1575 setlink(finish,next)
1576 end
1577 if right then
1578 setlink(literal,right,finish)
1579 else
1580 setlink(literal,finish)
1581 end
1582 end
1583 end
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 for i=1,nofranges do
1595
1596 local range = ranges[i]
1597 local mblob = false
1598
1599 local attr = range[1]
1600 local id = range[2]
1601 local start = range[3]
1602 local stop = range[4]
1603 local list = range[5]
1604
1605
1606
1607
1608
1609 if attr == 0 then
1610 local literal = setstate(makeignore(false,range))
1611 inject(start,stop,list,literal)
1612 elseif attr then
1613
1614 local blob = range[6]
1615
1616 local specification = taglist[attr]
1617 local currentlist = specification.taglist
1618 local noftags = #currentlist
1619 local common = 0
1620 local literal = nil
1621 local ignore = false
1622
1623 if top then
1624 for i=1,noftags >= noftop and noftop or noftags do
1625 if top[i] == currentlist[i] then
1626 common = i
1627 else
1628 break
1629 end
1630 end
1631 end
1632 local prev = common > 0 and elements[currentlist[common]] or root
1633 if blob and not detailedmath then
1634 for j=common+1,noftags do
1635 local tag = currentlist[j]
1636 local prv = elements[tag] or makeelement(tag,prev)
1637 if prv == false then
1638
1639 prev = false
1640 ignore = true
1641 break
1642 elseif prv == true then
1643
1644 else
1645 prev = prv
1646 end
1647
1648 if find(tag,"^math>") then
1649 break
1650 end
1651 end
1652 else
1653 for j=common+1,noftags do
1654 local tag = currentlist[j]
1655 local prv = elements[tag] or makeelement(tag,prev)
1656 if prv == false then
1657
1658 prev = false
1659 ignore = true
1660 break
1661 elseif prv == true then
1662
1663 else
1664 prev = prv
1665 end
1666 end
1667 end
1668
1669 if prev then
1670 literal = setstate(makecontent(start,prev,id,specification,range))
1671 elseif ignore then
1672 literal = setstate(makeignore(specification,range))
1673 else
1674
1675 end
1676
1677 if literal then
1678 local left, right
1679 if trace_info or trace_math then
1680 local name = specification.tagname
1681 if name then
1682 left, right = tagtracers[name](name,specification,trace_math and blob or nil)
1683 end
1684 end
1685 if not left and trace_suspects then
1686 local name = specification.tagname
1687 if name == "mrow" then
1688 collectedsuspects[#collectedsuspects+1] = formatters["%i:%s"](pagenum,name)
1689 left, right = tagtracers.suspect(name)
1690 end
1691 end
1692 if not left and trace_internals then
1693 if id == "link" then
1694 left, right = tagtracers.internallink(attr or 0)
1695 elseif id == "reference" then
1696 left, right = tagtracers.internalreference(attr or 0)
1697 end
1698 end
1699 inject(start,stop,list,literal,left,right)
1700 end
1701
1702 top = currentlist
1703 noftop = noftags
1704
1705 else
1706 local literal = setstate(makeignore(specification,range))
1707 inject(start,stop,list,literal)
1708 end
1709
1710 end
1711
1712 finishpage()
1713
1714 return head
1715
1716end
1717
1718
1719
1720
1721
1722
1723local permitted = true
1724local enabled = false
1725local shipout = true
1726
1727directives.register("structures.tags.shipout",function(v) shipout = v end)
1728
1729function codeinjections.settaggingsupport(option)
1730 if option == false then
1731 if enabled then
1732 disableaction("shipouts","structures.tags.handler")
1733 disableaction("math","noads.handlers.tags")
1734 enabled = false
1735 end
1736 if permitted then
1737 if trace_tags then
1738 report_tags("blocking structure tags")
1739 end
1740 permitted = false
1741 end
1742 end
1743end
1744
1745function codeinjections.enabletags()
1746 if permitted and not enabled then
1747 structures.tags.handler = nodeinjections.addtags
1748 if shipout then
1749 enableaction("shipouts","structures.tags.handler")
1750 end
1751
1752
1753
1754 enableaction("math","noads.handlers.tags")
1755
1756 if not embeddedtags then
1757 embedsupportedtags()
1758 end
1759
1760 if trace_tags then
1761 report_tags("enabling structure tags")
1762 end
1763
1764 enabled = true
1765 version = lpdf.majorversion()
1766
1767
1768 updaters.apply("structures.tagging",version)
1769 structures.references.forceinnermode()
1770 end
1771end
1772
1773function codeinjections.discardpages(state)
1774 if state == true then
1775 state = false
1776 else
1777 state = true
1778 end
1779 lpdf.setpagestate(state)
1780end
1781 |