lpdf-aux.lmt /size: 4619 b    last modification: 2024-01-16 09:02
1if not modules then modules = { } end modules ['lpdf-aux'] = {
2    version   = 1.001,
3    comment   = "companion to lpdf-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local format, concat = string.format, table.concat
10local utfchar, utfbyte, char = utf.char, utf.byte, string.char
11local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
12local P, C, R, S, Cc, Cs, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cc, lpeg.Cs, lpeg.V
13----- rshift = bit32.rshift
14local hextointeger, octtointeger = string.hextointeger,string.octtointeger
15
16lpdf = lpdf or { }
17
18-- tosixteen --
19
20local cache = table.setmetatableindex(function(t,k) -- can be made weak
21    local v = utfbyte(k)
22    if v < 0x10000 then
23        v = format("%04x",v)
24    else
25        v = v - 0x10000
26     -- v = format("%04x%04x",rshift(v,10)+0xD800,v%1024+0xDC00)
27        v = format("%04x%04x",(v >> 10) + 0xD800,v%1024+0xDC00)
28    end
29    t[k] = v
30    return v
31end)
32
33local unified = Cs(Cc("<feff") * (lpegpatterns.utf8character/cache)^1 * Cc(">"))
34
35function lpdf.tosixteen(str) -- an lpeg might be faster (no table)
36    if not str or str == "" then
37        return "<feff>" -- not () as we want an indication that it's unicode
38    else
39        return lpegmatch(unified,str)
40    end
41end
42
43-- fromsixteen --
44
45-- local zero = S(" \n\r\t") + P("\\ ")
46-- local one  = C(4)
47-- local two  = P("d") * R("89","af") * C(2) * C(4)
48--
49-- local pattern = P { "start",
50--     start     = V("wrapped") + V("unwrapped") + V("original"),
51--     original  = Cs(P(1)^0),
52--     wrapped   = P("<") * V("unwrapped") * P(">") * P(-1),
53--     unwrapped = P("feff")
54--               * Cs( (
55--                     zero  / ""
56--                   + two   / function(a,b)
57--                                 a = (hextointeger(a) - 0xD800) * 1024
58--                                 b = (hextointeger(b) - 0xDC00)
59--                                 return utfchar(a+b)
60--                             end
61--                   + one   / function(a)
62--                                 return utfchar(hextointeger(a))
63--                             end
64--                 )^1 ) * P(-1)
65-- }
66--
67-- function lpdf.fromsixteen(s)
68--     return lpegmatch(pattern,s) or s
69-- end
70
71local more = 0
72
73local pattern = C(4) / function(s) -- needs checking !
74    local now = hextointeger(s)
75    if more > 0 then
76        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000
77        more = 0
78        return utfchar(now)
79    elseif now >= 0xD800 and now <= 0xDBFF then
80        more = now
81        return "" -- else the c's end up in the stream
82    else
83        return utfchar(now)
84    end
85end
86
87local pattern = P(true) / function() more = 0 end * Cs(pattern^0)
88
89function lpdf.fromsixteen(str)
90    if not str or str == "" then
91        return ""
92    else
93        return lpegmatch(pattern,str)
94    end
95end
96
97-- frombytes --
98
99local b_pattern = Cs((P("\\")/"" * (
100    S("()")
101  + S("nrtbf")/ { n = "\n", r = "\r", t = "\t", b = "\b", f = "\f" }
102  + lpegpatterns.octdigit^-3 / function(s) return char(octtointeger(s)) end)
103+ P(1))^0)
104
105local u_pattern = lpegpatterns.utfbom_16_be * lpegpatterns.utf16_to_utf8_be -- official
106                + lpegpatterns.utfbom_16_le * lpegpatterns.utf16_to_utf8_le -- we've seen these
107
108local h_pattern = lpegpatterns.hextobytes
109
110local zero = S(" \n\r\t") + P("\\ ")
111local one  = C(4)
112local two  = P("d") * R("89","af") * C(2) * C(4)
113
114local x_pattern = P { "start",
115    start     = V("wrapped") + V("unwrapped") + V("original"),
116    original  = Cs(P(1)^0),
117    wrapped   = P("<") * V("unwrapped") * P(">") * P(-1),
118    unwrapped = P("feff")
119              * Cs( (
120                    zero  / ""
121                  + two   / function(a,b)
122                                a = (hextointeger(a) - 0xD800) * 1024
123                                b = (hextointeger(b) - 0xDC00)
124                                return utfchar(a+b)
125                            end
126                  + one   / function(a)
127                                return utfchar(hextointeger(a))
128                            end
129                )^1 ) * P(-1)
130}
131
132function lpdf.frombytes(s,hex)
133    if not s or s == "" then
134        return ""
135    end
136    if hex then
137        local x = lpegmatch(x_pattern,s)
138        if x then
139            return x
140        end
141        local h = lpegmatch(h_pattern,s)
142        if h then
143            return h
144        end
145    else
146        local u = lpegmatch(u_pattern,s)
147        if u then
148            return u
149        end
150    end
151    return lpegmatch(b_pattern,s)
152end
153
154-- done --
155