1
4
5# include "luametatex.h"
6
7
13
14unsigned aux_str2uni(const unsigned char *text)
15{
16 if (text[0] < 0x80) {
17 return (unsigned) text[0];
18 } else if (text[0] <= 0xBF) {
19 return 0xFFFD;
20 } else if (text[0] <= 0xDF) {
21 if (text[1] >= 0x80 && text[1] < 0xC0) {
22 return (unsigned) (((text[0] & 0x1F) << 6) | (text[1] & 0x3F));
23 }
24 } else if (text[0] <= 0xEF) {
25 if (text[1] >= 0x80 && text[1] < 0xC0 && text[2] >= 0x80 && text[2] < 0xC0) {
26 return (unsigned) (((text[0] & 0xF) << 12) | ((text[1] & 0x3F) << 6) | (text[2] & 0x3F));
27 }
28 } else if (text[0] <= 0xF7) {
29 if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
30 text[1] >= 0xC0 || text[2] >= 0xC0 || text[3] >= 0xC0) {
31 return 0xFFFD;
32 } else {
33 int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
34 int w2 = ((text[2] & 0xF) << 6) | (text[3] & 0x3F);
35 w1 = (w1 << 6) | ((text[1] & 0xF) << 2) | ((text[2] & 0x30) >> 4);
36 return (unsigned) (w1 * 0x400 + w2 + 0x10000);
37 }
38 }
39 return 0xFFFD;
40}
41
42unsigned aux_str2uni_len(const unsigned char *text, int *len)
43{
44 if (text[0] < 0x80) {
45 *len = 1;
46 return (unsigned) text[0];
47 } else if (text[0] <= 0xBF) {
48 *len = 1;
49 return 0xFFFD;
50 } else if (text[0] <= 0xDF) {
51 if (text[1] >= 0x80 && text[1] < 0xC0) {
52 *len = 2;
53 return (unsigned) (((text[0] & 0x1F) << 6) | (text[1] & 0x3F));
54 }
55 } else if (text[0] <= 0xEF) {
56 if (text[1] >= 0x80 && text[1] < 0xC0 && text[2] >= 0x80 && text[2] < 0xC0) {
57 *len = 3;
58 return (unsigned) (((text[0] & 0xF) << 12) | ((text[1] & 0x3F) << 6) | (text[2] & 0x3F));
59 }
60 } else if (text[0] <= 0xF7) {
61 if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
62 text[1] >= 0xC0 || text[2] >= 0xC0 || text[3] >= 0xC0) {
63 *len = 4;
64 return 0xFFFD;
65 } else {
66 int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
67 int w2 = ((text[2] & 0xF) << 6) | (text[3] & 0x3F);
68 w1 = (w1 << 6) | ((text[1] & 0xF) << 2) | ((text[2] & 0x30) >> 4);
69 *len = 4;
70 return (unsigned) (w1 * 0x400 + w2 + 0x10000);
71 }
72 }
73 *len = 1;
74 return 0xFFFD;
75}
76
77unsigned char *aux_uni2str(unsigned unic)
78{
79 unsigned char *buf = lmt_memory_malloc(5);
80 if (buf) {
81 if (unic < 0x80) {
82 buf[0] = (unsigned char) unic;
83 buf[1] = '\0';
84 } else if (unic < 0x800) {
85 buf[0] = (unsigned char) (0xC0 | (unic >> 6));
86 buf[1] = (unsigned char) (0x80 | (unic & 0x3F));
87 buf[2] = '\0';
88 } else if (unic < 0x10000) {
89 buf[0] = (unsigned char) (0xE0 | (unic >> 12));
90 buf[1] = (unsigned char) (0x80 | ((unic >> 6) & 0x3F));
91 buf[2] = (unsigned char) (0x80 | (unic & 0x3F));
92 buf[3] = '\0';
93 } else if (unic < 0x110000) {
94 int u;
95 unic -= 0x10000;
96 u = (int) (((unic & 0xF0000) >> 16) + 1);
97 buf[0] = (unsigned char) (0xF0 | (u >> 2));
98 buf[1] = (unsigned char) (0x80 | ((u & 3) << 4) | ((unic & 0xF000) >> 12));
99 buf[2] = (unsigned char) (0x80 | ((unic & 0xFC0) >> 6));
100 buf[3] = (unsigned char) (0x80 | (unic & 0x3F));
101 buf[4] = '\0';
102 }
103 }
104 return buf;
105}
106
107
115
116char *aux_uni2string(char *utf8_text, unsigned unic)
117{
118
119 if (unic <= 0x7F) {
120 *utf8_text++ = (char) unic;
121 } else if (unic <= 0x7FF) {
122 *utf8_text++ = (char) (0xC0 | (unic >> 6));
123 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
124 } else if (unic <= 0xFFFF) {
125 *utf8_text++ = (char) (0xe0 | (unic >> 12));
126 *utf8_text++ = (char) (0x80 | ((unic >> 6) & 0x3F));
127 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
128 } else if (unic < 0x110000) {
129 unsigned u;
130 unic -= 0x10000;
131 u = ((unic & 0xF0000) >> 16) + 1;
132 *utf8_text++ = (char) (0xF0 | (u >> 2));
133 *utf8_text++ = (char) (0x80 | ((u & 3) << 4) | ((unic & 0xF000) >> 12));
134 *utf8_text++ = (char) (0x80 | ((unic & 0xFC0) >> 6));
135 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
136 }
137 return utf8_text;
138}
139
140
141
142unsigned aux_splitutf2uni(unsigned int *ubuf, const char *utf8buf)
143{
144 int len = (int) strlen(utf8buf);
145 unsigned int *upt = ubuf;
146 unsigned int *uend = ubuf + len;
147 const unsigned char *pt = (const unsigned char *) utf8buf;
148 const unsigned char *end = pt + len;
149 while (pt < end && *pt != '\0' && upt < uend) {
150 if (*pt <= 0x7F) {
151 *upt = *pt++;
152 } else if (*pt <= 0xDF) {
153 *upt = (unsigned int) (((*pt & 0x1F) << 6) | (pt[1] & 0x3F));
154 pt += 2;
155 } else if (*pt <= 0xEF) {
156 *upt = (unsigned int) (((*pt & 0xF) << 12) | ((pt[1] & 0x3F) << 6) | (pt[2] & 0x3F));
157 pt += 3;
158 } else {
159 int w1 = (((*pt & 0x7) << 2) | ((pt[1] & 0x30) >> 4)) - 1;
160 int w2 = ((pt[2] & 0xF) << 6) | (pt[3] & 0x3F);
161 w1 = (w1 << 6) | ((pt[1] & 0xF) << 2) | ((pt[2] & 0x30) >> 4);
162 *upt = (unsigned int) (w1 * 0x400 + w2 + 0x10000);
163 pt += 4;
164 }
165 ++upt;
166 }
167 *upt = 0;
168 return (unsigned int) (upt - ubuf);
169}
170
171size_t aux_utf8len(const char *text, size_t size)
172{
173 size_t ind = 0;
174 size_t num = 0;
175 while (ind < size) {
176 unsigned char i = (unsigned char) *(text + ind);
177 if (i < 0x80) {
178 ind += 1;
179 } else if (i >= 0xF0) {
180 ind += 4;
181 } else if (i >= 0xE0) {
182 ind += 3;
183 } else if (i >= 0xC0) {
184 ind += 2;
185 } else {
186 ind += 1;
187 }
188 num += 1;
189 }
190 return num;
191}
192 |