1
4
5# include "luametatex.h"
6
7
13
14unsigned aux_str2uni(const unsigned char *text)
15{
16 if (text[0] < 0x80) {
17 return (unsigned) text[0];
18 } else if (text[0] <= 0xBF) {
19 return 0xFFFD;
20 } else if (text[0] <= 0xDF) {
21 if (text[1] >= 0x80 && text[1] < 0xC0) {
22 return (unsigned) (((text[0] & 0x1F) << 6) | (text[1] & 0x3F));
23 }
24 } else if (text[0] <= 0xEF) {
25 if (text[1] >= 0x80 && text[1] < 0xC0 && text[2] >= 0x80 && text[2] < 0xC0) {
26 return (unsigned) (((text[0] & 0xF) << 12) | ((text[1] & 0x3F) << 6) | (text[2] & 0x3F));
27 }
28 } else if (text[0] <= 0xF7) {
29 if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
30 text[1] >= 0xC0 || text[2] >= 0xC0 || text[3] >= 0xC0) {
31 return 0xFFFD;
32 } else {
33 int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
34 int w2 = ((text[2] & 0xF) << 6) | (text[3] & 0x3F);
35 w1 = (w1 << 6) | ((text[1] & 0xF) << 2) | ((text[2] & 0x30) >> 4);
36 return (unsigned) (w1 * 0x400 + w2 + 0x10000);
37 }
38 }
39 return 0xFFFD;
40}
41
42unsigned aux_str2uni_len(const unsigned char *text, int *len)
43{
44 if (text[0] < 0x80) {
45 *len = 1;
46 return (unsigned) text[0];
47 } else if (text[0] <= 0xBF) {
48 *len = 1;
49 return 0xFFFD;
50 } else if (text[0] <= 0xDF) {
51 if (text[1] >= 0x80 && text[1] < 0xC0) {
52 *len = 2;
53 return (unsigned) (((text[0] & 0x1F) << 6) | (text[1] & 0x3F));
54 }
55 } else if (text[0] <= 0xEF) {
56 if (text[1] >= 0x80 && text[1] < 0xC0 && text[2] >= 0x80 && text[2] < 0xC0) {
57 *len = 3;
58 return (unsigned) (((text[0] & 0xF) << 12) | ((text[1] & 0x3F) << 6) | (text[2] & 0x3F));
59 }
60 } else if (text[0] <= 0xF7) {
61 if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
62 text[1] >= 0xC0 || text[2] >= 0xC0 || text[3] >= 0xC0) {
63 *len = 4;
64 return 0xFFFD;
65 } else {
66 int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
67 int w2 = ((text[2] & 0xF) << 6) | (text[3] & 0x3F);
68 w1 = (w1 << 6) | ((text[1] & 0xF) << 2) | ((text[2] & 0x30) >> 4);
69 *len = 4;
70 return (unsigned) (w1 * 0x400 + w2 + 0x10000);
71 }
72 }
73 *len = 1;
74 return 0xFFFD;
75}
76
77
78unsigned char *aux_uni2str(unsigned unic)
79{
80 unsigned char *buf = lmt_memory_malloc(5);
81 if (buf) {
82 if (unic < 0x80) {
83 buf[0] = (unsigned char) unic;
84 buf[1] = '\0';
85 } else if (unic < 0x800) {
86 buf[0] = (unsigned char) (0xC0 | (unic >> 6));
87 buf[1] = (unsigned char) (0x80 | (unic & 0x3F));
88 buf[2] = '\0';
89 } else if (unic < 0x10000) {
90 buf[0] = (unsigned char) (0xE0 | (unic >> 12));
91 buf[1] = (unsigned char) (0x80 | ((unic >> 6) & 0x3F));
92 buf[2] = (unsigned char) (0x80 | (unic & 0x3F));
93 buf[3] = '\0';
94 } else if (unic < 0x110000) {
95 int u;
96 unic -= 0x10000;
97 u = (int) (((unic & 0xF0000) >> 16) + 1);
98 buf[0] = (unsigned char) (0xF0 | (u >> 2));
99 buf[1] = (unsigned char) (0x80 | ((u & 3) << 4) | ((unic & 0xF000) >> 12));
100 buf[2] = (unsigned char) (0x80 | ((unic & 0xFC0) >> 6));
101 buf[3] = (unsigned char) (0x80 | (unic & 0x3F));
102 buf[4] = '\0';
103 }
104 }
105 return buf;
106}
107
108
116
117char *aux_uni2string(char *utf8_text, unsigned unic)
118{
119
120 if (unic <= 0x7F) {
121 *utf8_text++ = (char) unic;
122 } else if (unic <= 0x7FF) {
123 *utf8_text++ = (char) (0xC0 | (unic >> 6));
124 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
125 } else if (unic <= 0xFFFF) {
126 *utf8_text++ = (char) (0xe0 | (unic >> 12));
127 *utf8_text++ = (char) (0x80 | ((unic >> 6) & 0x3F));
128 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
129 } else if (unic < 0x110000) {
130 unsigned u;
131 unic -= 0x10000;
132 u = ((unic & 0xF0000) >> 16) + 1;
133 *utf8_text++ = (char) (0xF0 | (u >> 2));
134 *utf8_text++ = (char) (0x80 | ((u & 3) << 4) | ((unic & 0xF000) >> 12));
135 *utf8_text++ = (char) (0x80 | ((unic & 0xFC0) >> 6));
136 *utf8_text++ = (char) (0x80 | (unic & 0x3F));
137 }
138 return (utf8_text);
139}
140
141unsigned aux_splitutf2uni(unsigned int *ubuf, const char *utf8buf)
142{
143 int len = (int) strlen(utf8buf);
144 unsigned int *upt = ubuf;
145 unsigned int *uend = ubuf + len;
146 const unsigned char *pt = (const unsigned char *) utf8buf;
147 const unsigned char *end = pt + len;
148 while (pt < end && *pt != '\0' && upt < uend) {
149 if (*pt <= 0x7F) {
150 *upt = *pt++;
151 } else if (*pt <= 0xDF) {
152 *upt = (unsigned int) (((*pt & 0x1F) << 6) | (pt[1] & 0x3F));
153 pt += 2;
154 } else if (*pt <= 0xEF) {
155 *upt = (unsigned int) (((*pt & 0xF) << 12) | ((pt[1] & 0x3F) << 6) | (pt[2] & 0x3F));
156 pt += 3;
157 } else {
158 int w1 = (((*pt & 0x7) << 2) | ((pt[1] & 0x30) >> 4)) - 1;
159 int w2 = ((pt[2] & 0xF) << 6) | (pt[3] & 0x3F);
160 w1 = (w1 << 6) | ((pt[1] & 0xF) << 2) | ((pt[2] & 0x30) >> 4);
161 *upt = (unsigned int) (w1 * 0x400 + w2 + 0x10000);
162 pt += 4;
163 }
164 ++upt;
165 }
166 *upt = '\0';
167 return (unsigned int) (upt - ubuf);
168}
169
170size_t aux_utf8len(const char *text, size_t size)
171{
172 size_t ind = 0;
173 size_t num = 0;
174 while (ind < size) {
175 unsigned char i = (unsigned char) *(text + ind);
176 if (i < 0x80) {
177 ind += 1;
178 } else if (i >= 0xF0) {
179 ind += 4;
180 } else if (i >= 0xE0) {
181 ind += 3;
182 } else if (i >= 0xC0) {
183 ind += 2;
184 } else {
185 ind += 1;
186 }
187 num += 1;
188 }
189 return num;
190}
191 |