lpdf-aux.lua /size: 4494 b    last modification: 2020-07-01 14:35
1
if
not
modules
then
modules
=
{
}
end
modules
[
'
lpdf-aux
'
]
=
{
2
version
=
1
.
001
,
3
comment
=
"
companion to lpdf-ini.mkiv
"
,
4
author
=
"
Hans Hagen, PRAGMA-ADE, Hasselt NL
"
,
5
copyright
=
"
PRAGMA ADE / ConTeXt Development Team
"
,
6
license
=
"
see context related readme files
"
7
}
8 9
local
tonumber
=
tonumber
10
local
format
,
concat
=
string
.
format
,
table
.
concat
11
local
utfchar
,
utfbyte
,
char
=
utf
.
char
,
utf
.
byte
,
string
.
char
12
local
lpegmatch
,
lpegpatterns
=
lpeg
.
match
,
lpeg
.
patterns
13
local
P
,
C
,
R
,
S
,
Cc
,
Cs
,
V
=
lpeg
.
P
,
lpeg
.
C
,
lpeg
.
R
,
lpeg
.
S
,
lpeg
.
Cc
,
lpeg
.
Cs
,
lpeg
.
V
14
local
rshift
=
bit32
.
rshift
15 16
lpdf
=
lpdf
or
{
}
17 18
-- tosixteen --
19 20
local
cache
=
table
.
setmetatableindex
(
function
(
t
,
k
)
-- can be made weak
21
local
v
=
utfbyte
(
k
)
22
if
v
<
0x10000
then
23
v
=
format
(
"
%04x
"
,
v
)
24
else
25
v
=
format
(
"
%04x%04x
"
,
rshift
(
v
,
10
)
,
v
%
1024
+
0xDC00
)
26
end
27
t
[
k
]
=
v
28
return
v
29
end
)
30 31
local
unified
=
Cs
(
Cc
(
"
<feff
"
)
*
(
lpegpatterns
.
utf8character
/
cache
)
^
1
*
Cc
(
"
>
"
)
)
32 33
function
lpdf
.
tosixteen
(
str
)
-- an lpeg might be faster (no table)
34
if
not
str
or
str
=
=
"
"
then
35
return
"
<feff>
"
-- not () as we want an indication that it's unicode
36
else
37
return
lpegmatch
(
unified
,
str
)
38
end
39
end
40 41
-- fromsixteen --
42 43
-- local zero = S(" \n\r\t") + P("\\ ")
44
-- local one = C(4)
45
-- local two = P("d") * R("89","af") * C(2) * C(4)
46
--
47
-- local pattern = P { "start",
48
-- start = V("wrapped") + V("unwrapped") + V("original"),
49
-- original = Cs(P(1)^0),
50
-- wrapped = P("<") * V("unwrapped") * P(">") * P(-1),
51
-- unwrapped = P("feff")
52
-- * Cs( (
53
-- zero / ""
54
-- + two / function(a,b)
55
-- a = (tonumber(a,16) - 0xD800) * 1024
56
-- b = (tonumber(b,16) - 0xDC00)
57
-- return utfchar(a+b)
58
-- end
59
-- + one / function(a)
60
-- return utfchar(tonumber(a,16))
61
-- end
62
-- )^1 ) * P(-1)
63
-- }
64
--
65
-- function lpdf.fromsixteen(s)
66
-- return lpegmatch(pattern,s) or s
67
-- end
68 69
local
more
=
0
70 71
local
pattern
=
C
(
4
)
/
function
(
s
)
-- needs checking !
72
local
now
=
tonumber
(
s
,
16
)
73
if
more
>
0
then
74
now
=
(
more
-0xD800
)
*
0x400
+
(
now
-0xDC00
)
+
0x10000
-- the 0x10000 smells wrong
75
more
=
0
76
return
utfchar
(
now
)
77
elseif
now
>
=
0xD800
and
now
<
=
0xDBFF
then
78
more
=
now
79
return
"
"
-- else the c's end up in the stream
80
else
81
return
utfchar
(
now
)
82
end
83
end
84 85
local
pattern
=
P
(
true
)
/
function
(
)
more
=
0
end
*
Cs
(
pattern
^
0
)
86 87
function
lpdf
.
fromsixteen
(
str
)
88
if
not
str
or
str
=
=
"
"
then
89
return
"
"
90
else
91
return
lpegmatch
(
pattern
,
str
)
92
end
93
end
94 95
-- frombytes --
96 97
local
b_pattern
=
Cs
(
(
P
(
"
\\
"
)
/
"
"
*
(
98
S
(
"
()
"
)
99
+
S
(
"
nrtbf
"
)
/
{
n
=
"
\n
"
,
r
=
"
\r
"
,
t
=
"
\t
"
,
b
=
"
\b
"
,
f
=
"
\f
"
}
100
+
lpegpatterns
.
octdigit
^
-3
/
function
(
s
)
return
char
(
tonumber
(
s
,
8
)
)
end
)
101
+
P
(
1
)
)
^
0
)
102 103
local
u_pattern
=
lpegpatterns
.
utfbom_16_be
*
lpegpatterns
.
utf16_to_utf8_be
-- official
104
+
lpegpatterns
.
utfbom_16_le
*
lpegpatterns
.
utf16_to_utf8_le
-- we've seen these
105 106
local
h_pattern
=
lpegpatterns
.
hextobytes
107 108
local
zero
=
S
(
"
\n\r\t
"
)
+
P
(
"
\\
"
)
109
local
one
=
C
(
4
)
110
local
two
=
P
(
"
d
"
)
*
R
(
"
89
"
,
"
af
"
)
*
C
(
2
)
*
C
(
4
)
111 112
local
x_pattern
=
P
{
"
start
"
,
113
start
=
V
(
"
wrapped
"
)
+
V
(
"
unwrapped
"
)
+
V
(
"
original
"
)
,
114
original
=
Cs
(
P
(
1
)
^
0
)
,
115
wrapped
=
P
(
"
<
"
)
*
V
(
"
unwrapped
"
)
*
P
(
"
>
"
)
*
P
(
-1
)
,
116
unwrapped
=
P
(
"
feff
"
)
117
*
Cs
(
(
118
zero
/
"
"
119
+
two
/
function
(
a
,
b
)
120
a
=
(
tonumber
(
a
,
16
)
-
0xD800
)
*
1024
121
b
=
(
tonumber
(
b
,
16
)
-
0xDC00
)
122
return
utfchar
(
a
+
b
)
123
end
124
+
one
/
function
(
a
)
125
return
utfchar
(
tonumber
(
a
,
16
)
)
126
end
127
)
^
1
)
*
P
(
-1
)
128
}
129 130
function
lpdf
.
frombytes
(
s
,
hex
)
131
if
not
s
or
s
=
=
"
"
then
132
return
"
"
133
end
134
if
hex
then
135
local
x
=
lpegmatch
(
x_pattern
,
s
)
136
if
x
then
137
return
x
138
end
139
local
h
=
lpegmatch
(
h_pattern
,
s
)
140
if
h
then
141
return
h
142
end
143
else
144
local
u
=
lpegmatch
(
u_pattern
,
s
)
145
if
u
then
146
return
u
147
end
148
end
149
return
lpegmatch
(
b_pattern
,
s
)
150
end
151 152
-- done --
153