lpdf-ini.lmt /size: 45 Kb    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lpdf-ini'] = {
2    version   = 1.001,
3    optimize  = true,
4    comment   = "companion to lpdf-ini.mkiv",
5    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
6    copyright = "PRAGMA ADE / ConTeXt Development Team",
7    license   = "see context related readme files"
8}
9
10-- This file is the starting point for PDF related features. Although quite a bit
11-- evolved over time, most of what we do in MkIV and LMTX already was available in
12-- MkII (with e.g. pdfTeX) anyway, but it was implemented in TeX. We're talking of
13-- arbitrary annotations, media like audio and video, widgets (aka forms) with
14-- chains and appearances, comments, attachments, javascript based manipulation of
15-- layers, graphic trickery like shading, color spaces, transparancy, flash stuff,
16-- executing commands, accessing the interface, etc. In that respect there isn't
17-- much really new here, after all MkII was there before the turn of the century,
18-- but it's just more fun to maintain it in Lua than in low level TeX. Also, because
19-- we no longer deal with other engines, there is no need to go low level TeX, which
20-- makes for better code.
21--
22-- However, over the decades PDF evolved and it shows. For instance audio and video
23-- support changed and became worse. Some things were dropped (smil, flash, movies,
24-- audio). Using appearances for widgets became a pain because it sort of assumes
25-- that you construct these forms in acrobat which then leads to bugs becoming
26-- features which means that certain things simply don't work (initializations,
27-- chained widgets, funny dingabt defaults, etc), probably because they never were
28-- tested when viewers evolved.
29--
30-- Attachment are also a fragile bit. And comments that at some point became
31-- dependent on rendering annotations ... it all deserves no beauty price because
32-- reliable simplicity was replaced by unreliable complexity. Something that might
33-- work today often didn't in the past and might fail in the future, if only because
34-- it more relates to the viewer user interface, maybe changing security demands or
35-- whatever. We cannot predict this. A side effect is that we keep adapting and even
36-- worse, have to remove features that originally were expected to stay (media
37-- stuff). To some extend it's a waste of time to get it all supported, also because
38-- the open source viewers lag behind. It makes no sense to keep tons of code
39-- arround that will never be used (again).
40--
41-- Also, I don't think that these PDF features were added with something else than
42-- Acrobat in mind: a flexible system like TeX that actually could inject these low
43-- level features right from the moment that they showed up (and before they were
44-- fully tested) is not mainstream enough to be taken into account. One cannot blame
45-- a commercial product for its own priorities. The evolution of the web might also
46-- have interfered with the agendas.
47--
48-- As a consequence, the code that we use is spread over files and it might change
49-- over time as we try to adapt. But it's easy for the mentioned features to fix one
50-- aspect and break another. Eventually we might see more of these fancy features to
51-- be removed because they make no sense on the long run, than such features being
52-- added. In retrospect maybe many such features were just experiments: anchored in
53-- time for throw away documents (like presentations), never meant to be used on the
54-- long term. In that respect PDF is a disappointment.
55
56-- Comment: beware of "too many locals" problem here.
57
58local setmetatable, getmetatable, type, next, tostring, tonumber, rawset = setmetatable, getmetatable, type, next, tostring, tonumber, rawset
59local concat = table.concat
60local char, byte, format, sub, tohex = string.char, string.byte, string.format, string.sub, string.tohex
61local utfchar, utfbyte, utfvalues = utf.char, utf.byte, utf.values
62local sind, cosd, max, min = math.sind, math.cosd, math.max, math.min
63local sort, sortedhash = table.sort, table.sortedhash
64local P, C, R, S, Cc, Cs, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs, lpeg.V
65local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
66local formatters = string.formatters
67local isboolean = string.is_boolean
68local hextointeger, octtointeger = string.hextointeger,string.octtointeger
69
70local report_objects    = logs.reporter("backend","objects")
71local report_finalizing = logs.reporter("backend","finalizing")
72local report_blocked    = logs.reporter("backend","blocked")
73
74local implement         = interfaces and interfaces.implement
75local context           = context
76
77-- In ConTeXt MkIV we use utf8 exclusively so all strings get mapped onto a hex
78-- encoded utf16 string type between <>. We could probably save some bytes by using
79-- strings between () but then we end up with escaped ()\ characters too.
80
81pdf                     = type(pdf) == "table" and pdf or { }
82local factor            = number.dimenfactors.bp
83
84local pdfbackend        = backends and backends.registered.pdf or { }
85local codeinjections    = pdfbackend.codeinjections
86local nodeinjections    = pdfbackend.nodeinjections
87
88lpdf       = lpdf or { }
89local lpdf = lpdf
90lpdf.flags = lpdf.flags or { } -- will be filled later
91
92table.setmetatableindex(lpdf, function(t,k)
93    report_blocked("function %a is not accessible",k)
94    os.exit()
95end)
96
97local trace_finalizers = false  trackers.register("backend.finalizers", function(v) trace_finalizers = v end)
98local trace_resources  = false  trackers.register("backend.resources",  function(v) trace_resources  = v end)
99
100    -- helpers
101
102local f_hex_4 = formatters["%04X"]
103local f_hex_2 = formatters["%02X"]
104
105local h_hex_4 = table.setmetatableindex(function(t,k) -- we already have this somewhere
106    if k < 0 then
107     --report("fatal h_hex_4 error: %i",k)
108        return "0000"
109    elseif k < 256 then -- maybe 512
110        -- not sparse in this range
111        for i=0,255 do
112            t[i] = f_hex_4(i)
113        end
114        return t[k]
115    else
116        local v = f_hex_4(k)
117        t[k] = v
118        return v
119    end
120end)
121
122local h_hex_2 = table.setmetatableindex(function(t,k) -- we already have this somewhere
123    if type(k) == "string" then
124        local v = f_hex_2(byte(k))
125        t[k] = v
126        return v
127    elseif k < 0 or k > 255 then
128     -- report("fatal h_hex_2 error: %i",k)
129        return "00"
130    else
131        local v = f_hex_2(k)
132        t[k] = v
133        return v
134    end
135end)
136
137lpdf.h_hex_2 = h_hex_2
138lpdf.h_hex_4 = h_hex_4
139
140
141do
142
143    -- This is for a future feature (still under investigation and consideration). So,
144    -- it is work in progress (and brings a harmless overhead for now).
145
146    local initializers = { }
147
148    function lpdf.registerinitializer(initialize)
149        initializers[#initializers+1] = initialize
150    end
151
152    function lpdf.initialize(f)
153        for i=1,#initializers do
154            initializers[i]()
155        end
156    end
157
158end
159
160local pdfreserveobject
161local pdfimmediateobject
162
163updaters.register("backends.pdf.latebindings",function()
164    pdfreserveobject   = lpdf.reserveobject
165    pdfimmediateobject = lpdf.immediateobject
166end)
167
168do
169
170    local pdfgetmatrix, pdfhasmatrix, pdfgetpos
171
172    updaters.register("backends.pdf.latebindings",function()
173        job.positions.registerhandlers {
174            getpos  = drivers.getpos,
175            getrpos = drivers.getrpos,
176            gethpos = drivers.gethpos,
177            getvpos = drivers.getvpos,
178        }
179        pdfgetmatrix = lpdf.getmatrix
180        pdfhasmatrix = lpdf.hasmatrix
181        pdfgetpos    = drivers.getpos
182    end)
183
184    function lpdf.getpos() return pdfgetpos() end
185
186    -- local function transform(llx,lly,urx,ury,rx,sx,sy,ry)
187    --     local x1 = llx * rx + lly * sy
188    --     local y1 = llx * sx + lly * ry
189    --     local x2 = llx * rx + ury * sy
190    --     local y2 = llx * sx + ury * ry
191    --     local x3 = urx * rx + lly * sy
192    --     local y3 = urx * sx + lly * ry
193    --     local x4 = urx * rx + ury * sy
194    --     local y4 = urx * sx + ury * ry
195    --     llx = min(x1,x2,x3,x4);
196    --     lly = min(y1,y2,y3,y4);
197    --     urx = max(x1,x2,x3,x4);
198    --     ury = max(y1,y2,y3,y4);
199    --     return llx, lly, urx, ury
200    -- end
201    --
202    -- function lpdf.transform(llx,lly,urx,ury) -- not yet used so unchecked
203    --     if pdfhasmatrix() then
204    --         local sx, rx, ry, sy = pdfgetmatrix()
205    --         local w, h = urx - llx, ury - lly
206    --         return llx, lly, llx + sy*w - ry*h, lly + sx*h - rx*w
207    --      -- return transform(llx,lly,urx,ury,sx,rx,ry,sy)
208    --     else
209    --         return llx, lly, urx, ury
210    --     end
211    -- end
212
213    -- funny values for tx and ty
214
215    function lpdf.rectangle(width,height,depth,offset)
216        local tx, ty = pdfgetpos()
217        if offset then
218            tx     = tx     -   offset
219            ty     = ty     +   offset
220            width  = width  + 2*offset
221            height = height +   offset
222            depth  = depth  +   offset
223        end
224        if pdfhasmatrix() then
225            local rx, sx, sy, ry = pdfgetmatrix()
226            return
227                factor *  tx,
228                factor * (ty - ry*depth  + sx*width),
229                factor * (tx + rx*width  - sy*height),
230                factor * (ty + ry*height - sx*width)
231        else
232            return
233                factor *  tx,
234                factor * (ty - depth),
235                factor * (tx + width),
236                factor * (ty + height)
237        end
238    end
239
240end
241
242local tosixteen, fromsixteen, topdfdoc, frompdfdoc, toeight, fromeight
243
244do
245
246    local cache = table.setmetatableindex(function(t,k) -- can be made weak
247        local v = utfbyte(k)
248        if v < 0x10000 then
249            v = format("%04x",v)
250        else
251            v = v - 0x10000
252            v = format("%04x%04x",(v>>10)+0xD800,v%1024+0xDC00)
253        end
254        t[k] = v
255        return v
256    end)
257
258    local unified = Cs(Cc("<feff") * (lpeg.patterns.utf8character/cache)^1 * Cc(">"))
259
260    tosixteen = function(str) -- an lpeg might be faster (no table)
261        if not str or str == "" then
262            return "<feff>" -- not () as we want an indication that it's unicode
263        else
264            return lpegmatch(unified,str)
265        end
266    end
267
268    -- we could make a helper for this
269
270    local more = 0
271
272    local pattern = C(4) / function(s) -- needs checking !
273        local now = hextointeger(s)
274        if more > 0 then
275            now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000
276            more = 0
277            return utfchar(now)
278        elseif now >= 0xD800 and now <= 0xDBFF then
279            more = now
280            return "" -- else the c's end up in the stream
281        else
282            return utfchar(now)
283        end
284    end
285
286    local pattern = P(true) / function() more = 0 end * Cs(pattern^0)
287
288    fromsixteen = function(str)
289        if not str or str == "" then
290            return ""
291        else
292            return lpegmatch(pattern,str)
293        end
294    end
295
296    local toregime   = regimes and regimes.toregime
297    local fromregime = regimes and regimes.fromregime
298    local escaped    = Cs(
299        Cc("(")
300      * (
301            S("()\n\r\t\b\f")/"\\%0"
302          + P("\\")/"\\\\"
303          + P(1)
304        )^0
305      * Cc(")")
306    )
307
308    topdfdoc = function(str,default)
309        if not str or str == "" then
310            return ""
311        else
312            return lpegmatch(escaped,toregime("pdfdoc",str,default)) -- could be combined if needed
313        end
314    end
315
316    frompdfdoc = function(str)
317        if not str or str == "" then
318            return ""
319        else
320            return fromregime("pdfdoc",str)
321        end
322    end
323
324    if not toregime   then topdfdoc   = function(s) return s end end
325    if not fromregime then frompdfdoc = function(s) return s end end
326
327    toeight = function(str)
328        if not str or str == "" then
329            return "()"
330        else
331            return lpegmatch(escaped,str)
332        end
333    end
334
335    -- use an oct hash
336
337    local unescape = Cs((
338       P("\\")/"" * (
339           S("()")
340         + S("\n\r")^1 / ""
341         + S("nrtbf") / { n = "\n", r = "\r", t = "\t", b = "\b", f = "\f" }
342         + (lpegpatterns.octdigit * lpegpatterns.octdigit^-2) / function(s) return char(octtointeger(s)) end
343       )
344     + P("\\\\") / "\\"
345     + P(1)
346--      - P(")") -- when inlined
347     )^0)
348
349    fromeight = function(str)
350        if not str or str == "" then
351            return ""
352        else
353            return lpegmatch(unescape,str)
354        end
355    end
356
357    lpegpatterns.pdffromeight = unescape
358
359    local u_pattern = lpegpatterns.utfbom_16_be * lpegpatterns.utf16_to_utf8_be -- official
360                    + lpegpatterns.utfbom_16_le * lpegpatterns.utf16_to_utf8_le -- we've seen these
361
362    local h_pattern = lpegpatterns.hextobytes
363
364    local zero = S(" \n\r\t") + P("\\ ")
365    local one  = C(4)
366    local two  = P("d") * R("89","af") * C(2) * C(4)
367
368    local x_pattern = P { "start",
369        start     = V("wrapped") + V("unwrapped") + V("original"),
370        original  = Cs(P(1)^0),
371        wrapped   = P("<") * V("unwrapped") * P(">") * P(-1),
372        unwrapped = P("feff")
373                  * Cs( (
374                        zero  / ""
375                      + two   / function(a,b)
376                                    a = (hextointeger(a) - 0xD800) * 1024
377                                    b = (hextointeger(b) - 0xDC00)
378                                    return utfchar(a+b)
379                                end
380                      + one   / function(a)
381                                    return utfchar(hextointeger(a))
382                                end
383                    )^1 ) * P(-1)
384    }
385
386    function lpdf.frombytes(s,hex)
387        if not s or s == "" then
388            return ""
389        end
390        if hex then
391            local x = lpegmatch(x_pattern,s)
392            if x then
393                return x
394            end
395            local h = lpegmatch(h_pattern,s)
396            if h then
397                return h
398            end
399        else
400            local u = lpegmatch(u_pattern,s)
401            if u then
402                return u
403            end
404        end
405        return lpegmatch(unescape,s)
406    end
407
408    lpdf.tosixteen   = tosixteen
409    lpdf.toeight     = toeight
410    lpdf.topdfdoc    = topdfdoc
411    lpdf.fromsixteen = fromsixteen
412    lpdf.fromeight   = fromeight
413    lpdf.frompdfdoc  = frompdfdoc
414
415end
416
417local pdfescaped do
418
419    local replacer = S("\0\t\n\r\f ()[]{}/%%#\\") / {
420        ["\00"]="#00",
421        ["\09"]="#09",
422        ["\10"]="#0a",
423        ["\12"]="#0c",
424        ["\13"]="#0d",
425        [ " " ]="#20",
426        [ "#" ]="#23",
427        [ "%" ]="#25",
428        [ "(" ]="#28",
429        [ ")" ]="#29",
430        [ "/" ]="#2f",
431        [ "[" ]="#5b",
432        [ "\\"]="#5c",
433        [ "]" ]="#5d",
434        [ "{" ]="#7b",
435        [ "}" ]="#7d",
436    } + P(1)
437
438    local p_escaped_1 = Cs(Cc("/") * replacer^0)
439    local p_escaped_2 = Cs(          replacer^0)
440
441    pdfescaped = function(str,slash)
442        return lpegmatch(slash and p_escaped_1 or p_escaped_2,str) or str
443    end
444
445    lpdf.escaped = pdfescaped
446
447end
448
449local tostring_a, tostring_d
450
451do
452
453    local f_key_null       = formatters["%s null"]
454    local f_key_value      = formatters["%s %s"]
455 -- local f_key_dictionary = formatters["%s << % t >>"]
456 -- local f_dictionary     = formatters["<< % t >>"]
457    local f_key_dictionary = formatters["%s << %s >>"]
458    local f_dictionary     = formatters["<< %s >>"]
459 -- local f_key_array      = formatters["%s [ % t ]"]
460 -- local f_array          = formatters["[ % t ]"]
461    local f_key_array      = formatters["%s [ %s ]"]
462    local f_array          = formatters["[ %s ]"]
463    local f_key_number     = formatters["%s %N"]  -- always with max 9 digits and integer is possible
464    local f_tonumber       = formatters["%N"]     -- always with max 9 digits and integer is possible
465
466    tostring_d = function(t,contentonly,key)
467        if next(t) then
468            local r = { }
469            local n = 0
470            local e
471            for k, v in next, t do
472                if k == "__extra__" then
473                    e = v
474                elseif k == "__stream__" then
475                    -- do nothing (yet)
476                else
477                    n = n + 1
478                    r[n] = k
479                end
480            end
481            if n > 1 then
482                sort(r)
483            end
484            for i=1,n do
485                local k  = r[i]
486                local v  = t[k]
487                local tv = type(v)
488                -- mostly tables
489                --
490                k = pdfescaped(k,true)
491                --
492                if tv == "table" then
493                 -- local mv = getmetatable(v)
494                 -- if mv and mv.__lpdftype then
495                    if v.__lpdftype__ then
496                     -- if v == t then
497                     --     report_objects("ignoring circular reference in dictionary")
498                     --     r[i] = f_key_null(k)
499                     -- else
500                            r[i] = f_key_value(k,tostring(v))
501                     -- end
502                    elseif v[1] then
503                        r[i] = f_key_value(k,tostring_a(v))
504                    else
505                        r[i] = f_key_value(k,tostring_d(v))
506                    end
507                elseif tv == "string" then
508                    r[i] = f_key_value(k,toeight(v))
509                elseif tv == "number" then
510                    r[i] = f_key_number(k,v)
511                else
512                    r[i] = f_key_value(k,tostring(v))
513                end
514            end
515            if e then
516                r[n+1] = e
517            end
518            r = concat(r," ")
519            if contentonly then
520                return r
521            elseif key then
522                return f_key_dictionary(pdfescaped(key,true),r)
523            else
524                return f_dictionary(r)
525            end
526        elseif contentonly then
527            return ""
528        else
529            return "<< >>"
530        end
531    end
532
533    tostring_a = function(t,contentonly,key)
534        local tn = #t
535        if tn ~= 0 then
536            local r = { }
537            for k=1,tn do
538                local v = t[k]
539                local tv = type(v)
540                -- mostly numbers and tables
541                if tv == "number" then
542                    r[k] = f_tonumber(v)
543                elseif tv == "table" then
544                 -- local mv = getmetatable(v)
545                 -- if mv and mv.__lpdftype then
546                    if v.__lpdftype__ then
547                     -- if v == t then
548                     --     report_objects("ignoring circular reference in array")
549                     --     r[k] = "null"
550                     -- else
551                            r[k] = tostring(v)
552                     -- end
553                    elseif v[1] then
554                        r[k] = tostring_a(v)
555                    else
556                        r[k] = tostring_d(v)
557                    end
558                elseif tv == "string" then
559                    r[k] = toeight(v)
560                else
561                    r[k] = tostring(v)
562                end
563            end
564            local e = t.__extra__
565            if e then
566                r[tn+1] = e
567            end
568            r = concat(r," ")
569            if contentonly then
570                return r
571            elseif key then
572                return f_key_array(pdfescaped(key,true),r)
573            else
574                return f_array(r)
575            end
576        elseif contentonly then
577            return ""
578        else
579            return "[ ]"
580        end
581    end
582
583end
584
585local f_tonumber = formatters["%N"]
586
587local tostring_x = function(t) return concat(t," ")       end
588local tostring_s = function(t) return toeight(t[1])       end
589local tostring_p = function(t) return topdfdoc(t[1],t[2]) end
590local tostring_u = function(t) return tosixteen(t[1])     end
591----- tostring_n = function(t) return tostring(t[1])      end -- tostring not needed
592local tostring_n = function(t) return f_tonumber(t[1])    end -- tostring not needed
593local tostring_c = function(t) return t[1]                end -- already prefixed (hashed)
594local tostring_z = function()  return "null"              end
595local tostring_t = function()  return "true"              end
596local tostring_f = function()  return "false"             end
597local tostring_r = function(t) local n = t[1] return n and n > 0 and (n .. " 0 R") or "null" end
598
599local tostring_v = function(t)
600    local s = t[1]
601    if type(s) == "table" then
602        return concat(s)
603    else
604        return s
605    end
606end
607
608local tostring_l = function(t)
609    local s = t[1]
610    if not s or s == "" then
611        return "()"
612    elseif t[2] then
613        return "<" .. s .. ">"
614    else
615        return "(" .. s .. ")"
616    end
617end
618
619local function value_x(t) return t                  end
620local function value_s(t) return t[1]               end
621local function value_p(t) return t[1]               end
622local function value_u(t) return t[1]               end
623local function value_n(t) return t[1]               end
624local function value_c(t) return sub(t[1],2)        end
625local function value_d(t) return tostring_d(t,true) end
626local function value_a(t) return tostring_a(t,true) end
627local function value_z()  return nil                end
628local function value_t(t) return t.value or true    end
629local function value_f(t) return t.value or false   end
630local function value_r(t) return t[1] or 0          end -- null
631local function value_v(t) return t[1]               end
632local function value_l(t) return t[1]               end
633
634local function add_to_d(t,v)
635    local k = type(v)
636    if k == "string" then
637        if t.__extra__ then
638            t.__extra__ = t.__extra__ .. " " .. v
639        else
640            t.__extra__ = v
641        end
642    elseif k == "table" then
643        for k, v in next, v do
644            t[k] = v
645        end
646    end
647    return t
648end
649
650local function add_to_a(t,v)
651    local k = type(v)
652    if k == "string" then
653        if t.__extra__ then
654            t.__extra__ = t.__extra__ .. " " .. v
655        else
656            t.__extra__ = v
657        end
658    elseif k == "table" then
659        local n = #t
660        for i=1,#v do
661            n = n + 1
662            t[n] = v[i]
663        end
664    end
665    return t
666end
667
668local function add_x(t,k,v) rawset(t,k,tostring(v)) end
669
670local mt_x = { __index = { __lpdftype__ = "stream"     }, __tostring = tostring_x, __call = value_x, __newindex = add_x }
671local mt_d = { __index = { __lpdftype__ = "dictionary" }, __tostring = tostring_d, __call = value_d, __add = add_to_d }
672local mt_a = { __index = { __lpdftype__ = "array"      }, __tostring = tostring_a, __call = value_a, __add = add_to_a }
673local mt_u = { __index = { __lpdftype__ = "unicode"    }, __tostring = tostring_u, __call = value_u }
674local mt_s = { __index = { __lpdftype__ = "string"     }, __tostring = tostring_s, __call = value_s }
675local mt_p = { __index = { __lpdftype__ = "docstring"  }, __tostring = tostring_p, __call = value_p }
676local mt_n = { __index = { __lpdftype__ = "number"     }, __tostring = tostring_n, __call = value_n }
677local mt_c = { __index = { __lpdftype__ = "constant"   }, __tostring = tostring_c, __call = value_c }
678local mt_z = { __index = { __lpdftype__ = "null"       }, __tostring = tostring_z, __call = value_z }
679local mt_t = { __index = { __lpdftype__ = "true"       }, __tostring = tostring_t, __call = value_t }
680local mt_f = { __index = { __lpdftype__ = "false"      }, __tostring = tostring_f, __call = value_f }
681local mt_r = { __index = { __lpdftype__ = "reference"  }, __tostring = tostring_r, __call = value_r }
682local mt_v = { __index = { __lpdftype__ = "verbose"    }, __tostring = tostring_v, __call = value_v }
683local mt_l = { __index = { __lpdftype__ = "literal"    }, __tostring = tostring_l, __call = value_l }
684
685local function pdfstream(t) -- we need to add attributes
686    if t then
687        local tt = type(t)
688        if tt == "table" then
689            for i=1,#t do
690                t[i] = tostring(t[i])
691            end
692        elseif tt == "string" then
693            t = { t }
694        else
695            t = { tostring(t) }
696        end
697    end
698    return setmetatable(t or { },mt_x)
699end
700
701local function pdfdictionary(t)
702    return setmetatable(t or { },mt_d)
703end
704
705local function pdfarray(t)
706    if type(t) == "string" then
707        return setmetatable({ t },mt_a)
708    else
709        return setmetatable(t or { },mt_a)
710    end
711end
712
713local function pdfstring(str,default)
714    return setmetatable({ str or default or "" },mt_s)
715end
716
717local function pdfdocstring(str,default,defaultchar)
718    return setmetatable({ str or default or "", defaultchar or " " },mt_p)
719end
720
721local function pdfunicode(str,default)
722    return setmetatable({ str or default or "" },mt_u) -- could be a string
723end
724
725local function pdfliteral(str,hex) -- can also produce a hex <> instead of () literal
726    return setmetatable({ str, hex },mt_l)
727end
728
729local pdfnumber, pdfconstant
730
731do
732
733    local cache = { } -- can be weak
734
735    pdfnumber = function(n,default) -- 0-10
736        if not n then
737            n = default
738        end
739        local c = cache[n]
740        if not c then
741            c = setmetatable({ n },mt_n)
742        --  cache[n] = c -- too many numbers
743        end
744        return c
745    end
746
747    for i=-1,9 do cache[i] = pdfnumber(i) end
748
749    local escaped = lpdf.escaped
750
751    local cache = table.setmetatableindex(function(t,k)
752        local v = setmetatable({ escaped(k,true) }, mt_c)
753        t[k] = v
754        return v
755    end)
756
757    pdfconstant = function(str,default)
758        if not str then
759            str = default or "none"
760        end
761        return cache[str]
762    end
763
764end
765
766local pdfnull, pdfboolean, pdfreference, pdfverbose
767
768do
769
770    local p_null  = { } setmetatable(p_null, mt_z)
771    local p_true  = { } setmetatable(p_true, mt_t)
772    local p_false = { } setmetatable(p_false,mt_f)
773
774    pdfnull = function()
775        return p_null
776    end
777
778    pdfboolean = function(b,default)
779        if type(b) == "boolean" then
780            return b and p_true or p_false
781        else
782            return default and p_true or p_false
783        end
784    end
785
786    -- print(pdfboolean(false),pdfboolean(false,false),pdfboolean(false,true))
787    -- print(pdfboolean(true),pdfboolean(true,false),pdfboolean(true,true))
788    -- print(pdfboolean(nil,true),pdfboolean(nil,false))
789
790    local r_zero = setmetatable({ 0 },mt_r)
791
792    pdfreference = function(r)  -- maybe make a weak table
793        if r and r ~= 0 then
794            return setmetatable({ r },mt_r)
795        else
796            return r_zero
797        end
798    end
799
800    local v_zero  = setmetatable({ 0  },mt_v)
801    local v_empty = setmetatable({ "" },mt_v)
802
803    pdfverbose = function(t) -- maybe check for type
804        if t == 0 then
805            return v_zero
806        elseif t == "" then
807            return v_empty
808        else
809            return setmetatable({ t },mt_v)
810        end
811    end
812
813end
814
815lpdf.stream      = pdfstream -- THIS WILL PROBABLY CHANGE
816lpdf.dictionary  = pdfdictionary
817lpdf.array       = pdfarray
818lpdf.docstring   = pdfdocstring
819lpdf.string      = pdfstring
820lpdf.unicode     = pdfunicode
821lpdf.number      = pdfnumber
822lpdf.constant    = pdfconstant
823lpdf.null        = pdfnull
824lpdf.boolean     = pdfboolean
825lpdf.reference   = pdfreference
826lpdf.verbose     = pdfverbose
827lpdf.literal     = pdfliteral
828
829if not callbacks then return lpdf end
830
831-- three priority levels, default=2
832
833local pagefinalizers     = { { }, { }, { } }
834local documentfinalizers = { { }, { }, { } }
835
836local pageresources, pageattributes, pagesattributes
837
838local function resetpageproperties()
839    pageresources   = pdfdictionary()
840    pageattributes  = pdfdictionary()
841    pagesattributes = pdfdictionary()
842end
843
844function lpdf.getpageproperties()
845    return {
846        pageresources   = pageresources,
847        pageattributes  = pageattributes,
848        pagesattributes = pagesattributes,
849    }
850end
851
852resetpageproperties()
853
854lpdf.registerinitializer(resetpageproperties)
855
856local function addtopageresources  (k,v) pageresources  [k] = v end
857local function addtopageattributes (k,v) pageattributes [k] = v end
858local function addtopagesattributes(k,v) pagesattributes[k] = v end
859
860lpdf.addtopageresources   = addtopageresources
861lpdf.addtopageattributes  = addtopageattributes
862lpdf.addtopagesattributes = addtopagesattributes
863
864local function set(where,what,f,when,comment)
865    if type(when) == "string" then
866        when, comment = 2, when
867    elseif not when then
868        when = 2
869    end
870    local w = where[when]
871    w[#w+1] = { f, comment }
872    if trace_finalizers then
873        report_finalizing("%s set: [%s,%s]",what,when,#w)
874    end
875end
876
877local function run(where,what)
878    if trace_finalizers then
879        report_finalizing("start backend, category %a, n %a",what,#where)
880    end
881    for i=1,#where do
882        local w = where[i]
883        for j=1,#w do
884            local wj = w[j]
885            if trace_finalizers then
886                report_finalizing("%s finalizer: [%s,%s] %s",what,i,j,wj[2] or "")
887            end
888            wj[1]()
889        end
890    end
891    if trace_finalizers then
892        report_finalizing("stop finalizing")
893    end
894end
895
896local function registerpagefinalizer(f,when,comment)
897    set(pagefinalizers,"page",f,when,comment)
898end
899
900local function registerdocumentfinalizer(f,when,comment)
901    set(documentfinalizers,"document",f,when,comment)
902end
903
904lpdf.registerpagefinalizer     = registerpagefinalizer
905lpdf.registerdocumentfinalizer = registerdocumentfinalizer
906
907function lpdf.finalizepage(shipout)
908    if shipout and not environment.initex then
909     -- resetpageproperties() -- maybe better before
910        run(pagefinalizers,"page")
911        resetpageproperties() -- maybe better before
912    end
913end
914
915local finalized = false
916
917function lpdf.finalizedocument()
918    if not environment.initex and not finalized then
919        run(documentfinalizers,"document")
920        finalized = true
921    end
922end
923
924callbacks.register("finish_pdfpage", lpdf.finalizepage)
925callbacks.register("finish_pdffile", lpdf.finalizedocument)
926
927do
928
929    -- some minimal tracing, handy for checking the order
930
931    local function trace_set(what,key)
932        if trace_resources then
933            report_finalizing("setting key %a in %a",key,what)
934        end
935    end
936
937    local function trace_flush(what)
938        if trace_resources then
939            report_finalizing("flushing %a",what)
940        end
941    end
942
943    lpdf.protectresources = true
944
945    local catalog = pdfdictionary { Type = pdfconstant("Catalog") } -- nicer, but when we assign we nil the Type
946    local info    = pdfdictionary { Type = pdfconstant("Info")    } -- nicer, but when we assign we nil the Type
947    ----- names   = pdfdictionary { Type = pdfconstant("Names")   } -- nicer, but when we assign we nil the Type
948
949    local function checkcatalog()
950        if not environment.initex then
951            trace_flush("catalog")
952            return true
953        end
954    end
955
956    local function checkinfo()
957        if not environment.initex then
958            trace_flush("info")
959            if lpdf.majorversion() > 1 then
960                for k, v in next, info do
961                    if k == "CreationDate" or k == "ModDate" then
962                        -- mandate >= 2.0
963                    else
964                        info[k] = nil
965                    end
966                end
967            end
968            return true
969        end
970    end
971
972    local function flushcatalog()
973        if checkcatalog() then
974            catalog.Type = nil
975        end
976    end
977
978    local function flushinfo()
979        if checkinfo() then
980            info.Type = nil
981        end
982    end
983
984    function lpdf.getcatalog()
985        if checkcatalog() then
986            catalog.Type = pdfconstant("Catalog")
987            return pdfreference(pdfimmediateobject(tostring(catalog)))
988        end
989    end
990
991    function lpdf.getinfo()
992        if checkinfo() then
993            return pdfreference(pdfimmediateobject(tostring(info)))
994        end
995    end
996
997    function lpdf.addtocatalog(k,v)
998        if not (lpdf.protectresources and catalog[k]) then
999            trace_set("catalog",k)
1000            catalog[k] = v
1001        end
1002    end
1003
1004    function lpdf.addtoinfo(k,v)
1005        if not (lpdf.protectresources and info[k]) then
1006            trace_set("info",k)
1007            info[k] = v
1008        end
1009    end
1010
1011    local names = pdfdictionary {
1012     -- Type = pdfconstant("Names")
1013    }
1014
1015    local function flushnames()
1016        if next(names) and not environment.initex then
1017            names.Type = pdfconstant("Names")
1018            trace_flush("names")
1019            lpdf.addtocatalog("Names",pdfreference(pdfimmediateobject(tostring(names))))
1020        end
1021    end
1022
1023    function lpdf.addtonames(k,v)
1024        if not (lpdf.protectresources and names[k]) then
1025            trace_set("names",  k)
1026            names  [k] = v
1027        end
1028    end
1029
1030    local r_extgstates, r_colorspaces, r_patterns, r_shades
1031    local d_extgstates, d_colorspaces, d_patterns, d_shades
1032    local p_extgstates, p_colorspaces, p_patterns, p_shades
1033
1034    lpdf.registerinitializer(function()
1035        r_extgstates = nil ; r_colorspaces = nil ; r_patterns = nil ; r_shades = nil ;
1036        d_extgstates = nil ; d_colorspaces = nil ; d_patterns = nil ; d_shades = nil ;
1037        p_extgstates = nil ; p_colorspaces = nil ; p_patterns = nil ; p_shades = nil ;
1038    end)
1039
1040    local function checkextgstates () if d_extgstates  then addtopageresources("ExtGState", p_extgstates ) end end
1041    local function checkcolorspaces() if d_colorspaces then addtopageresources("ColorSpace",p_colorspaces) end end
1042    local function checkpatterns   () if d_patterns    then addtopageresources("Pattern",   p_patterns   ) end end
1043    local function checkshades     () if d_shades      then addtopageresources("Shading",   p_shades     ) end end
1044
1045    local function flushextgstates () if d_extgstates  then trace_flush("extgstates")  pdfimmediateobject(r_extgstates, tostring(d_extgstates )) end end
1046    local function flushcolorspaces() if d_colorspaces then trace_flush("colorspaces") pdfimmediateobject(r_colorspaces,tostring(d_colorspaces)) end end
1047    local function flushpatterns   () if d_patterns    then trace_flush("patterns")    pdfimmediateobject(r_patterns,   tostring(d_patterns   )) end end
1048    local function flushshades     () if d_shades      then trace_flush("shades")      pdfimmediateobject(r_shades,     tostring(d_shades     )) end end
1049
1050    -- patterns are special as they need resources to so we can get recursive references and in that case
1051    -- acrobat doesn't show anything (other viewers handle it well)
1052    --
1053    -- todo: share them
1054    -- todo: force when not yet set
1055
1056    local pdfgetfontobjectnumber
1057
1058    updaters.register("backends.pdf.latebindings",function()
1059        pdfgetfontobjectnumber = lpdf.getfontobjectnumber
1060    end)
1061
1062    local f_font = formatters["%s%d"]
1063
1064    function lpdf.collectedresources(options)
1065        local ExtGState  = d_extgstates  and next(d_extgstates ) and p_extgstates
1066        local ColorSpace = d_colorspaces and next(d_colorspaces) and p_colorspaces
1067        local Pattern    = d_patterns    and next(d_patterns   ) and p_patterns
1068        local Shading    = d_shades      and next(d_shades     ) and p_shades
1069        local Font
1070        if options and options.patterns == false then
1071            Pattern = nil
1072        end
1073        local fonts = options and options.fonts
1074        if fonts and next(fonts) then
1075            local prefix = options.fontprefix or "F"
1076            Font = pdfdictionary { }
1077            for k, v in sortedhash(fonts) do
1078                Font[f_font(prefix,v)] = pdfreference(pdfgetfontobjectnumber(k))
1079            end
1080        end
1081        if ExtGState or ColorSpace or Pattern or Shading or Font then
1082            local collected = pdfdictionary {
1083                ExtGState  = ExtGState,
1084                ColorSpace = ColorSpace,
1085                Pattern    = Pattern,
1086                Shading    = Shading,
1087                Font       = Font,
1088            }
1089            if options and options.serialize == false then
1090                return collected
1091            else
1092                return collected()
1093            end
1094        elseif options and options.notempty then
1095            return nil
1096        elseif options and options.serialize == false then
1097            return pdfdictionary { }
1098        else
1099            return ""
1100        end
1101    end
1102
1103    function lpdf.adddocumentextgstate (k,v)
1104        if not d_extgstates then
1105            r_extgstates = pdfreserveobject()
1106            d_extgstates = pdfdictionary()
1107            p_extgstates = pdfreference(r_extgstates)
1108        end
1109        d_extgstates[k] = v
1110    end
1111
1112    function lpdf.adddocumentcolorspace(k,v)
1113        if not d_colorspaces then
1114            r_colorspaces = pdfreserveobject()
1115            d_colorspaces = pdfdictionary()
1116            p_colorspaces = pdfreference(r_colorspaces)
1117        end
1118        d_colorspaces[k] = v
1119    end
1120
1121    function lpdf.adddocumentpattern(k,v)
1122        if not d_patterns then
1123            r_patterns = pdfreserveobject()
1124            d_patterns = pdfdictionary()
1125            p_patterns = pdfreference(r_patterns)
1126        end
1127        d_patterns[k] = v
1128    end
1129
1130    function lpdf.adddocumentshade(k,v)
1131        if not d_shades then
1132            r_shades = pdfreserveobject()
1133            d_shades = pdfdictionary()
1134            p_shades = pdfreference(r_shades)
1135        end
1136        d_shades[k] = v
1137    end
1138
1139    registerdocumentfinalizer(flushextgstates,3,"extended graphic states")
1140    registerdocumentfinalizer(flushcolorspaces,3,"color spaces")
1141    registerdocumentfinalizer(flushpatterns,3,"patterns")
1142    registerdocumentfinalizer(flushshades,3,"shades")
1143
1144    registerdocumentfinalizer(flushnames,3,"names") -- before catalog
1145    registerdocumentfinalizer(flushcatalog,3,"catalog")
1146    registerdocumentfinalizer(flushinfo,3,"info")
1147
1148    registerpagefinalizer(checkextgstates,3,"extended graphic states")
1149    registerpagefinalizer(checkcolorspaces,3,"color spaces")
1150    registerpagefinalizer(checkpatterns,3,"patterns")
1151    registerpagefinalizer(checkshades,3,"shades")
1152
1153end
1154
1155-- in strc-bkm: lpdf.registerdocumentfinalizer(function() structures.bookmarks.place() end,1)
1156
1157function lpdf.rotationcm(a)
1158    local s = sind(a)
1159    local c = cosd(a)
1160    return format("%.6F %.6F %.6F %.6F 0 0 cm",c,s,-s,c)
1161end
1162
1163-- return nil is nicer in test prints
1164
1165function lpdf.checkedkey(t,key,variant)
1166    local pn = t and t[key]
1167    if pn ~= nil then
1168        local tn = type(pn)
1169        if tn == variant then
1170            if variant == "string" then
1171                if pn ~= "" then
1172                    return pn
1173                end
1174            elseif variant == "table" then
1175                if next(pn) then
1176                    return pn
1177                end
1178            else
1179                return pn
1180            end
1181        elseif tn == "string" then
1182            if variant == "number" then
1183                return tonumber(pn)
1184            elseif variant == "boolean" then
1185                return isboolean(pn,nil,true)
1186            end
1187        end
1188    end
1189 -- return nil
1190end
1191
1192function lpdf.checkedvalue(value,variant) -- code not shared
1193    if value ~= nil then
1194        local tv = type(value)
1195        if tv == variant then
1196            if variant == "string" then
1197                if value ~= "" then
1198                    return value
1199                end
1200            elseif variant == "table" then
1201                if next(value) then
1202                    return value
1203                end
1204            else
1205                return value
1206            end
1207        elseif tv == "string" then
1208            if variant == "number" then
1209                return tonumber(value)
1210            elseif variant == "boolean" then
1211                return isboolean(value,nil,true)
1212            end
1213        end
1214    end
1215end
1216
1217function lpdf.limited(n,min,max,default)
1218    if not n then
1219        return default
1220    else
1221        n = tonumber(n)
1222        if not n then
1223            return default
1224        elseif n > max then
1225            return max
1226        elseif n < min then
1227            return min
1228        else
1229            return n
1230        end
1231    end
1232end
1233
1234-- The next variant of ActualText is what Taco and I could come up with
1235-- eventually. As of September 2013 Acrobat copies okay, Sumatra copies a
1236-- question mark, pdftotext injects an extra space and Okular adds a
1237-- newline plus space.
1238
1239-- return formatters["BT /Span << /ActualText (CONTEXT) >> BDC [<feff>] TJ % t EMC ET"](code)
1240
1241if implement then
1242
1243    local f_actual_text_p     = formatters["BT /Span << /ActualText <feff%s> >> BDC %s EMC ET"]
1244    local f_actual_text_b     = formatters["BT /Span << /ActualText <feff%s> >> BDC"]
1245    local f_actual_text_b_not = formatters["/Span << /ActualText <feff%s> >> BDC"]
1246    local f_actual_text       = formatters["/Span <</ActualText %s >> BDC"]
1247
1248    local s_actual_text_e     <const> = "EMC ET"
1249    local s_actual_text_e_not <const> = "EMC"
1250
1251    local context   = context
1252    local pdfdirect = nodes.pool.directliteral -- we can use nuts.write deep down
1253    local tounicode = fonts.mappings.tounicode
1254
1255    function codeinjections.unicodetoactualtext(unicode,pdfcode)
1256        return f_actual_text_p(type(unicode) == "string" and unicode or tounicode(unicode),pdfcode)
1257    end
1258
1259    function codeinjections.startunicodetoactualtext(unicode)
1260        return f_actual_text_b(type(unicode) == "string" and unicode or tounicode(unicode))
1261    end
1262
1263    function codeinjections.stopunicodetoactualtext()
1264        return s_actual_text_e
1265    end
1266
1267    function codeinjections.startunicodetoactualtextdirect(unicode)
1268        return f_actual_text_b_not(type(unicode) == "string" and unicode or tounicode(unicode))
1269    end
1270
1271    function codeinjections.stopunicodetoactualtextdirect()
1272        return s_actual_text_e_not
1273    end
1274
1275    implement {
1276        name      = "startactualtext",
1277        arguments = "string",
1278        actions   = function(str)
1279            context(pdfdirect(f_actual_text(tosixteen(str))))
1280        end
1281    }
1282
1283    implement {
1284        name      = "stopactualtext",
1285        actions   = function()
1286            context(pdfdirect("EMC"))
1287        end
1288    }
1289
1290    local setstate  = nodes.nuts.pool.setstate
1291
1292    function nodeinjections.startalternate(str)
1293        return setstate(f_actual_text(tosixteen(str)))
1294    end
1295
1296    function nodeinjections.stopalternate()
1297        return setstate("EMC")
1298    end
1299
1300
1301end
1302
1303-- Bah, tikz uses \immediate for some reason which is probably a bug, so the usage
1304-- will deal with that. However, we will not provide the serialization.
1305
1306if implement then
1307
1308    implement { name = "pdfbackendcurrentresources",                   public = true, untraced  = true,                            actions = { lpdf.collectedresources, context } }
1309    implement { name = "pdfbackendsetcatalog",        usage = "value", public = true, protected = true, arguments = "2 arguments", actions = lpdf.addtocatalog }
1310    implement { name = "pdfbackendsetinfo",           usage = "value", public = true, protected = true, arguments = "2 arguments", actions = function(a,b,c) lpdf.addtoinfo(a,b,c) end } -- gets adapted
1311    implement { name = "pdfbackendsetname",           usage = "value", public = true, protected = true, arguments = "2 arguments", actions = lpdf.addtonames }
1312    implement { name = "pdfbackendsetpageattribute",  usage = "value", public = true, protected = true, arguments = "2 arguments", actions = lpdf.addtopageattributes }
1313    implement { name = "pdfbackendsetpagesattribute", usage = "value", public = true, protected = true, arguments = "2 arguments", actions = lpdf.addtopagesattributes }
1314    implement { name = "pdfbackendsetpageresource",   usage = "value", public = true, protected = true, arguments = "2 arguments", actions = lpdf.addtopageresources }
1315    implement { name = "pdfbackendsetextgstate",      usage = "value", public = true, protected = true, arguments = "2 arguments", actions = function(a,b) lpdf.adddocumentextgstate (a,pdfverbose(b)) end }
1316    implement { name = "pdfbackendsetcolorspace",     usage = "value", public = true, protected = true, arguments = "2 arguments", actions = function(a,b) lpdf.adddocumentcolorspace(a,pdfverbose(b)) end }
1317    implement { name = "pdfbackendsetpattern",        usage = "value", public = true, protected = true, arguments = "2 arguments", actions = function(a,b) lpdf.adddocumentpattern   (a,pdfverbose(b)) end }
1318    implement { name = "pdfbackendsetshade",          usage = "value", public = true, protected = true, arguments = "2 arguments", actions = function(a,b) lpdf.adddocumentshade     (a,pdfverbose(b)) end }
1319
1320end
1321
1322-- more helpers: copy from lepd to lpdf
1323
1324function lpdf.copyconstant(v)
1325    if v ~= nil then
1326        return pdfconstant(v)
1327    end
1328end
1329
1330function lpdf.copyboolean(v)
1331    if v ~= nil then
1332        return pdfboolean(v)
1333    end
1334end
1335
1336function lpdf.copyunicode(v)
1337    if v then
1338        return pdfunicode(v)
1339    end
1340end
1341
1342function lpdf.copyarray(a)
1343    if a then
1344        local t = pdfarray()
1345        for i=1,#a do
1346            t[i] = a(i)
1347        end
1348        return t
1349    end
1350end
1351
1352function lpdf.copydictionary(d)
1353    if d then
1354        local t = pdfdictionary()
1355        for k, v in next, d do
1356            t[k] = d(k)
1357        end
1358        return t
1359    end
1360end
1361
1362function lpdf.copynumber(v)
1363    return v
1364end
1365
1366function lpdf.copyinteger(v)
1367    return v -- maybe checking or round ?
1368end
1369
1370function lpdf.copyfloat(v)
1371    return v
1372end
1373
1374function lpdf.copystring(v)
1375    if v then
1376        return pdfstring(v)
1377    end
1378end
1379
1380do
1381
1382    -- This is obsolete but old viewers might still use it as directive for what to
1383    -- send to a postscript printer.
1384
1385    local a_procset, d_procset
1386
1387    lpdf.registerinitializer(function()
1388        a_procset = nil
1389        d_procset = nil
1390    end)
1391
1392    function lpdf.procset(dict)
1393        if not a_procset then
1394            a_procset = pdfarray {
1395                pdfconstant("PDF"),
1396                pdfconstant("Text"),
1397                pdfconstant("ImageB"),
1398                pdfconstant("ImageC"),
1399                pdfconstant("ImageI"),
1400            }
1401            a_procset = pdfreference(pdfimmediateobject(tostring(a_procset)))
1402        end
1403        if dict then
1404            if not d_procset then
1405                d_procset = pdfdictionary {
1406                    ProcSet = a_procset
1407                }
1408                d_procset = pdfreference(pdfimmediateobject(tostring(d_procset)))
1409            end
1410            return d_procset
1411        else
1412            return a_procset
1413        end
1414    end
1415
1416end
1417