lpdf-aux.lua /size: 4497 b    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['lpdf-aux'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local tonumber = tonumber
10local format, concat = string.format, table.concat
11local utfchar, utfbyte, char = utf.char, utf.byte, string.char
12local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
13local P, C, R, S, Cc, Cs, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs, lpeg.V
14local rshift = bit32.rshift
15
16lpdf = lpdf or { }
17
18-- tosixteen --
19
20local cache = table.setmetatableindex(function(t,k) -- can be made weak
21    local v = utfbyte(k)
22    if v < 0x10000 then
23        v = format("%04x",v)
24    else
25        v = v - 0x10000
26        v = format("%04x%04x",rshift(v,10)+0xD800,v%1024+0xDC00)
27    end
28    t[k] = v
29    return v
30end)
31
32local unified = Cs(Cc("<feff") * (lpegpatterns.utf8character/cache)^1 * Cc(">"))
33
34function lpdf.tosixteen(str) -- an lpeg might be faster (no table)
35    if not str or str == "" then
36        return "<feff>" -- not () as we want an indication that it's unicode
37    else
38        return lpegmatch(unified,str)
39    end
40end
41
42-- fromsixteen --
43
44-- local zero = S(" \n\r\t") + P("\\ ")
45-- local one  = C(4)
46-- local two  = P("d") * R("89","af") * C(2) * C(4)
47--
48-- local pattern = P { "start",
49--     start     = V("wrapped") + V("unwrapped") + V("original"),
50--     original  = Cs(P(1)^0),
51--     wrapped   = P("<") * V("unwrapped") * P(">") * P(-1),
52--     unwrapped = P("feff")
53--               * Cs( (
54--                     zero  / ""
55--                   + two   / function(a,b)
56--                                 a = (tonumber(a,16) - 0xD800) * 1024
57--                                 b = (tonumber(b,16) - 0xDC00)
58--                                 return utfchar(a+b)
59--                             end
60--                   + one   / function(a)
61--                                 return utfchar(tonumber(a,16))
62--                             end
63--                 )^1 ) * P(-1)
64-- }
65--
66-- function lpdf.fromsixteen(s)
67--     return lpegmatch(pattern,s) or s
68-- end
69
70local more = 0
71
72local pattern = C(4) / function(s) -- needs checking !
73    local now = tonumber(s,16)
74    if more > 0 then
75        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000
76        more = 0
77        return utfchar(now)
78    elseif now >= 0xD800 and now <= 0xDBFF then
79        more = now
80        return "" -- else the c's end up in the stream
81    else
82        return utfchar(now)
83    end
84end
85
86local pattern = P(true) / function() more = 0 end * Cs(pattern^0)
87
88function lpdf.fromsixteen(str)
89    if not str or str == "" then
90        return ""
91    else
92        return lpegmatch(pattern,str)
93    end
94end
95
96-- frombytes --
97
98local b_pattern = Cs((P("\\")/"" * (
99    S("()")
100  + S("nrtbf")/ { n = "\n", r = "\r", t = "\t", b = "\b", f = "\f" }
101  + lpegpatterns.octdigit^-3 / function(s) return char(tonumber(s,8)) end)
102+ P(1))^0)
103
104local u_pattern = lpegpatterns.utfbom_16_be * lpegpatterns.utf16_to_utf8_be -- official
105                + lpegpatterns.utfbom_16_le * lpegpatterns.utf16_to_utf8_le -- we've seen these
106
107local h_pattern = lpegpatterns.hextobytes
108
109local zero = S(" \n\r\t") + P("\\ ")
110local one  = C(4)
111local two  = P("d") * R("89","af") * C(2) * C(4)
112
113local x_pattern = P { "start",
114    start     = V("wrapped") + V("unwrapped") + V("original"),
115    original  = Cs(P(1)^0),
116    wrapped   = P("<") * V("unwrapped") * P(">") * P(-1),
117    unwrapped = P("feff")
118              * Cs( (
119                    zero  / ""
120                  + two   / function(a,b)
121                                a = (tonumber(a,16) - 0xD800) * 1024
122                                b = (tonumber(b,16) - 0xDC00)
123                                return utfchar(a+b)
124                            end
125                  + one   / function(a)
126                                return utfchar(tonumber(a,16))
127                            end
128                )^1 ) * P(-1)
129}
130
131function lpdf.frombytes(s,hex)
132    if not s or s == "" then
133        return ""
134    end
135    if hex then
136        local x = lpegmatch(x_pattern,s)
137        if x then
138            return x
139        end
140        local h = lpegmatch(h_pattern,s)
141        if h then
142            return h
143        end
144    else
145        local u = lpegmatch(u_pattern,s)
146        if u then
147            return u
148        end
149    end
150    return lpegmatch(b_pattern,s)
151end
152
153-- done --
154