lmtstrlibext.c /size: 37 Kb    last modification: 2025-02-21 11:03
1/*
2    See license.txt in the root of this project.
3*/
4
5/* todo: byteconcat and utf concat (no separator) */
6
7# include "luametatex.h"
8
9/*tex Helpers */
10
11static inline int strlib_aux_tounicode(const char *s, size_t l, size_t *p)
12{
13    unsigned char i = s[*p];
14    *p += 1;
15    if (i < 0x80) {
16        return i;
17    } else if (i >= 0xF0) {
18        if ((*p + 2) < l) {
19            unsigned char j = s[*p];
20            unsigned char k = s[*p + 1];
21            unsigned char l = s[*p + 2];
22            if (j >= 0x80 && k >= 0x80 && l >= 0x80) {
23                *p += 3;
24                return (((((i - 0xF0) * 0x40) + (j - 0x80)) * 0x40) + (k - 0x80)) * 0x40 + (l - 0x80);
25            }
26        }
27    } else if (i >= 0xE0) {
28        if ((*p + 1) < l) {
29            unsigned char j = s[*p];
30            unsigned char k = s[*p + 1];
31            if (j >= 0x80 && k >= 0x80) {
32               *p += 2;
33               return (((i - 0xE0) * 0x40) + (j - 0x80)) * 0x40 + (k - 0x80);
34            }
35        }
36    } else if (i >= 0xC0) {
37        if (*p < l) {
38            unsigned char j = s[*p];
39            if (j >= 0x80) {
40               *p += 1;
41               return ((i - 0xC0) * 0x40) + (j - 0x80);
42            }
43        }
44    }
45    return 0xFFFD;
46}
47
48static inline int strlib_aux_tounichar(const char *s, size_t l, size_t p)
49{
50    unsigned char i = s[p++];
51    if (i < 0x80) {
52        return 1;
53    } else if (i >= 0xF0) {
54        if ((p + 2) < l) {
55            unsigned char j = s[p];
56            unsigned char k = s[p + 1];
57            unsigned char l = s[p + 2];
58            if (j >= 0x80 && k >= 0x80 && l >= 0x80) {
59                return 4;
60            }
61        }
62    } else if (i >= 0xE0) {
63        if ((p + 1) < l) {
64            unsigned char j = s[p];
65            unsigned char k = s[p + 1];
66            if (j >= 0x80 && k >= 0x80) {
67                return 3;
68            }
69        }
70    } else if (i >= 0xC0) {
71        if (p < l) {
72            unsigned char j = s[p];
73            if (j >= 0x80) {
74                return 2;
75            }
76        }
77    }
78    return 0;
79}
80
81static inline size_t strlib_aux_toline(const char *s, size_t l, size_t p, size_t *b)
82{
83    size_t i = p;
84    while (i < l) {
85        if (s[i] == 13) {
86            if ((i + 1) < l) {
87                if (s[i + 1] == 10) {
88                    *b = 2; /* cr lf */
89                } else {
90                    *b = 1; /* cr */
91                }
92            }
93            return i - p;
94        } else if (s[i] == 10) {
95            *b = 1; /* lf */
96            return i - p;
97        } else {
98            /* other */
99            i += 1;
100        }
101    }
102    return i - p ;
103}
104
105/*tex End of helpers. */
106
107static int strlib_aux_bytepairs(lua_State *L)
108{
109    size_t ls = 0;
110    const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
111    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
112    if (ind < ls) {
113        unsigned char i;
114        /*tex iterator */
115        if (ind + 1 < ls) {
116            lua_pushinteger(L, ind + 2);
117        } else {
118            lua_pushinteger(L, ind + 1);
119        }
120        lua_replace(L, lua_upvalueindex(2));
121        i = (unsigned char)*(s + ind);
122        /*tex byte one */
123        lua_pushinteger(L, i);
124        if (ind + 1 < ls) {
125            /*tex byte two */
126            i = (unsigned char)*(s + ind + 1);
127            lua_pushinteger(L, i);
128        } else {
129            /*tex odd string length */
130            lua_pushnil(L);
131        }
132        return 2;
133    } else {
134        return 0;
135    }
136}
137
138static int strlib_bytepairs(lua_State *L)
139{
140    luaL_checkstring(L, 1);
141    lua_settop(L, 1);
142    lua_pushinteger(L, 0);
143    lua_pushcclosure(L, strlib_aux_bytepairs, 2);
144    return 1;
145}
146
147static int strlib_aux_bytes(lua_State *L)
148{
149    size_t ls = 0;
150    const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
151    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
152    if (ind < ls) {
153        /*tex iterator */
154        lua_pushinteger(L, ind + 1);
155        lua_replace(L, lua_upvalueindex(2));
156        /*tex byte */
157        lua_pushinteger(L, (unsigned char)*(s + ind));
158        return 1;
159    } else {
160        return 0;
161    }
162}
163
164static int strlib_bytes(lua_State *L)
165{
166    luaL_checkstring(L, 1);
167    lua_settop(L, 1);
168    lua_pushinteger(L, 0);
169    lua_pushcclosure(L, strlib_aux_bytes, 2);
170    return 1;
171}
172
173static int strlib_aux_utf_failed(lua_State *L, int new_ind)
174{
175    lua_pushinteger(L, new_ind);
176    lua_replace(L, lua_upvalueindex(2));
177    lua_pushliteral(L, utf_fffd_string);
178    return 1;
179}
180
181/* kind of complex ... these masks */
182
183static int strlib_aux_utfcharacters(lua_State *L)
184{
185    static const unsigned char mask[4] = { 0x80, 0xE0, 0xF0, 0xF8 };
186    static const unsigned char mequ[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
187    size_t ls = 0;
188    const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
189    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
190    size_t l = ls;
191    if (ind >= l) {
192        return 0;
193    } else {
194        unsigned char c = (unsigned char) s[ind];
195        for (size_t j = 0; j < 4; j++) {
196            if ((c & mask[j]) == mequ[j]) {
197                if (ind + 1 + j > l) {
198                    /*tex The result will not fit. */
199                    return strlib_aux_utf_failed(L, (int) l);
200                }
201                for (size_t k = 1; k <= j; k++) {
202                    c = (unsigned char) s[ind + k];
203                    if ((c & 0xC0) != 0x80) {
204                        /*tex We have a bad follow byte. */
205                        return strlib_aux_utf_failed(L, (int) (ind + k));
206                    }
207                }
208                /*tex The iterator. */
209                lua_pushinteger(L, ind + j + 1);
210                lua_replace(L, lua_upvalueindex(2));
211                lua_pushlstring(L, ind + s, j + 1);
212                return 1;
213            }
214        }
215        return strlib_aux_utf_failed(L, (int) (ind + 1)); /* we found a follow byte! */
216    }
217}
218
219static int strlib_utfcharacters(lua_State *L)
220{
221    luaL_checkstring(L, 1);
222    lua_settop(L, 1);
223    lua_pushinteger(L, 0);
224    lua_pushcclosure(L, strlib_aux_utfcharacters, 2);
225    return 1;
226}
227
228static int strlib_aux_utfvalues(lua_State *L)
229{
230    size_t l = 0;
231    const char *s = lua_tolstring(L, lua_upvalueindex(1), &l);
232    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
233    if (ind < l) {
234        int v = strlib_aux_tounicode(s, l, &ind);
235        lua_pushinteger(L, ind);
236        lua_replace(L, lua_upvalueindex(2));
237        lua_pushinteger(L, v);
238        return 1;
239    } else {
240        return 0;
241    }
242}
243
244static int strlib_utfvalues(lua_State *L)
245{
246    luaL_checkstring(L, 1);
247    lua_settop(L, 1);
248    lua_pushinteger(L, 0);
249    lua_pushcclosure(L, strlib_aux_utfvalues, 2);
250    return 1;
251}
252
253static int strlib_aux_characterpairs(lua_State *L)
254{
255    size_t ls = 0;
256    const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
257    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
258    if (ind < ls) {
259        char b[1];
260        lua_pushinteger(L, ind + 2); /*tex So we can overshoot ls here. */
261        lua_replace(L, lua_upvalueindex(2));
262        b[0] = s[ind];
263        lua_pushlstring(L, b, 1);
264        if ((ind + 1) < ls) {
265            b[0] = s[ind + 1];
266            lua_pushlstring(L, b, 1);
267        } else {
268            lua_pushliteral(L, "");
269        }
270        return 2;
271    } else {
272        return 0;  /* string ended */
273    }
274}
275
276static int strlib_characterpairs(lua_State *L)
277{
278    luaL_checkstring(L, 1);
279    lua_settop(L, 1);
280    lua_pushinteger(L, 0);
281    lua_pushcclosure(L, strlib_aux_characterpairs, 2);
282    return 1;
283}
284
285static int strlib_aux_characters(lua_State *L)
286{
287    size_t ls = 0;
288    const char *s = lua_tolstring(L, lua_upvalueindex(1), &ls);
289    size_t ind = lmt_tointeger(L, lua_upvalueindex(2));
290    if (ind < ls) {
291        char b[1];
292        lua_pushinteger(L, ind + 1); /* iterator */
293        lua_replace(L, lua_upvalueindex(2));
294        b[0] = *(s + ind);
295        lua_pushlstring(L, b, 1);
296        return 1;
297    } else {
298        return 0;  /* string ended */
299    }
300}
301
302static int strlib_characters(lua_State *L)
303{
304    luaL_checkstring(L, 1);
305    lua_settop(L, 1);
306    lua_pushinteger(L, 0);
307    lua_pushcclosure(L, strlib_aux_characters, 2);
308    return 1;
309}
310
311static int strlib_bytetable(lua_State *L)
312{
313    size_t l;
314    const char *s = luaL_checklstring(L, 1, &l);
315    lua_createtable(L, (int) l, 0);
316    for (size_t i = 0; i < l; i++) {
317        lua_pushinteger(L, (unsigned char)*(s + i));
318        lua_rawseti(L, -2, i + 1);
319    }
320    return 1;
321}
322
323static int strlib_utfvaluetable(lua_State *L)
324{
325    size_t n = 1;
326    size_t l = 0;
327    size_t p = 0;
328    const char *s = luaL_checklstring(L, 1, &l);
329    lua_createtable(L, (int) l, 0);
330    while (p < l) {
331        lua_pushinteger(L, strlib_aux_tounicode(s, l, &p));
332        lua_rawseti(L, -2, n++);
333    }
334    return 1;
335}
336
337static int strlib_utfcharactertable(lua_State *L)
338{
339    size_t n = 1;
340    size_t l = 0;
341    size_t p = 0;
342    const char *s = luaL_checklstring(L, 1, &l);
343    lua_createtable(L, (int) l, 0);
344    while (p < l) {
345        int b = strlib_aux_tounichar(s, l, p);
346        if (b) {
347            lua_pushlstring(L, s + p, b);
348            p += b;
349        } else {
350            lua_pushliteral(L, utf_fffd_string);
351            p += 1;
352        }
353        lua_rawseti(L, -2, n++);
354    }
355    return 1;
356}
357
358static int strlib_linetable(lua_State *L)
359{
360    size_t n = 1;
361    size_t l = 0;
362    size_t p = 0;
363    const char *s = luaL_checklstring(L, 1, &l);
364    lua_createtable(L, (int) l, 0);
365    while (p < l) {
366        size_t b = 0;
367        size_t m = strlib_aux_toline(s, l, p, &b);
368        if (m) {
369            lua_pushlstring(L, s + p, m);
370        } else {
371            lua_pushliteral(L, "");
372        }
373        p += m + b;
374        lua_rawseti(L, -2, n++);
375    }
376    return 1;
377}
378
379/*tex
380
381    We provide a few helpers that we derived from the lua utf8 module and slunicode. That way we're
382    sort of covering a decent mix.
383
384*/
385
386# define MAXUNICODE 0x10FFFF
387
388/*tex
389
390    This is a combination of slunicode and utf8 converters but without mode and a bit faster on the
391    average than the utf8 one. The one character branch is a bit more efficient, as is preallocating
392    the buffer size.
393
394*/
395
396static inline void strlib_aux_add_utfchar(luaL_Buffer *b, unsigned u)
397{
398    if (u <= MAXUNICODE) {
399        if (0x80 > u) {
400            luaL_addchar(b, (unsigned char) u);
401        } else {
402            if (0x800 > u)
403                luaL_addchar(b, (unsigned char) (0xC0 | (u >> 6)));
404            else {
405                if (0x10000 > u)
406                    luaL_addchar(b, (unsigned char) (0xE0 | (u >> 12)));
407                else {
408                    luaL_addchar(b, (unsigned char) (0xF0 | (u >> 18)));
409                    luaL_addchar(b, (unsigned char) (0x80 | (0x3F & (u >> 12))));
410                }
411                luaL_addchar(b, 0x80 | (0x3F & (u >> 6)));
412            }
413            luaL_addchar(b, 0x80 | (0x3F & u));
414        }
415    }
416}
417
418static inline void strlib_aux_add_utfnumber(lua_State *L, luaL_Buffer *b, int index)
419{
420    strlib_aux_add_utfchar(b, (unsigned) lmt_tounsigned(L, index));
421}
422
423static inline void strlib_aux_add_utfstring(lua_State *L, luaL_Buffer *b, int index)
424{
425    size_t ls = 0;
426    const char *s = lua_tolstring(L, index, &ls);
427    luaL_addlstring(b, s, ls);
428}
429
430static inline void strlib_aux_add_utftable(lua_State *L, luaL_Buffer *b, int index)
431{
432    lua_Unsigned n = lua_rawlen(L, index);
433    if (n > 0) { 
434        for (lua_Unsigned i = 1; i <= n; i++) {
435            lua_rawgeti(L, index, i);
436            switch (lua_type(L, -1)) { 
437                case LUA_TNUMBER: 
438                    strlib_aux_add_utfnumber(L, b, -1);
439                    break;
440                case LUA_TTABLE:
441                    strlib_aux_add_utftable(L, b, -1);
442                    break;
443                case LUA_TSTRING: 
444                    strlib_aux_add_utfstring(L, b, -1);
445                    break;
446            }
447            lua_pop(L, 1);
448        }
449    }
450}
451
452static int strlib_utfcharacter(lua_State *L)
453{
454    int n = lua_gettop(L);
455    if (n == 1 && lua_type(L, 1) == LUA_TNUMBER) {
456        char u[6];
457        char *c = aux_uni2string(&u[0], (unsigned) lua_tointeger(L, 1));
458        *c = '\0';
459        lua_pushstring(L, u);
460        return 1;
461    } else {
462        luaL_Buffer b;
463        luaL_buffinitsize(L, &b, (size_t) n * 4); 
464        for (int i = 1; i <= n; i++) {
465            switch (lua_type(L, i)) {
466                case LUA_TNUMBER: 
467                    strlib_aux_add_utfnumber(L, &b, i);
468                    break;
469                case LUA_TTABLE: 
470                    strlib_aux_add_utftable(L, &b, i);
471                    break;
472                case LUA_TSTRING: 
473                    strlib_aux_add_utfstring(L, &b, i);
474                    break;
475            }
476        }
477        luaL_pushresult(&b);
478        return 1;
479    }
480}
481
482/*tex
483
484    The \UTF8 codepoint function takes two arguments, being positions in the string, while slunicode
485    byte takes two arguments representing the number of utf characters. The variant below always
486    returns all codepoints.
487
488*/
489
490static int strlib_utfvalue(lua_State *L)
491{
492    size_t l = 0;
493    size_t p = 0;
494    int i = 0;
495    const char *s = luaL_checklstring(L, 1, &l);
496    while (p < l) {
497        lua_pushinteger(L, strlib_aux_tounicode(s, l, &p));
498        i++;
499    }
500    return i;
501}
502
503/*tex This is a simplified version of utf8.len but without range. */
504
505static int strlib_utflength(lua_State *L)
506{
507    size_t ls = 0;
508    size_t ind = 0;
509    size_t n = 0;
510    const char *s = lua_tolstring(L, 1, &ls);
511    while (ind < ls) {
512        unsigned char i = (unsigned char) *(s + ind);
513        if (i < 0x80) {
514            ind += 1;
515        } else if (i >= 0xF0) {
516            ind += 4;
517        } else if (i >= 0xE0) {
518            ind += 3;
519        } else if (i >= 0xC0) {
520            ind += 2;
521        } else {
522            /*tex bad news, stupid recovery */
523            ind += 1;
524        }
525        n++;
526    }
527    lua_pushinteger(L, n);
528    return 1;
529}
530
531/*tex A handy one that formats a float but also strips trailing zeros. */
532
533static int strlib_format_f6(lua_State *L)
534{
535    double n = luaL_optnumber(L, 1, 0.0);
536    if (n == 0.0) {
537        lua_pushliteral(L, "0");
538    } else if (n == 1.0) {
539        lua_pushliteral(L, "1");
540    } else {
541        char s[128];
542        int i, l;
543        /* we could use sprintf here */
544        if (fmod(n, 1) == 0) {
545            i = snprintf(s, 128, "%i", (int) n);
546        } else {
547            if (lua_type(L, 2) == LUA_TSTRING) {
548                const char *f = lua_tostring(L, 2);
549                i = snprintf(s, 128, f, n);
550            } else {
551                i = snprintf(s, 128, "%0.6f", n) ;
552            }
553            l = i - 1;
554            while (l > 1) {
555                if (s[l - 1] == '.') {
556                    break;
557                } else if (s[l] == '0') {
558                    s[l] = '\0'; /* redundant */
559                    --i;
560                } else {
561                    break;
562                }
563                l--;
564            }
565        }
566        lua_pushlstring(L, s, i);
567    }
568    return 1;
569}
570
571/*tex
572    The next one is mostly provided as check because doing it in pure \LUA\ is not slower and it's
573    not a bottleneck anyway. There are soms subtle side effects when we don't check for these ranges,
574    especially the trigger bytes (|0xD7FF| etc.) because we can get negative numbers which means
575    wrapping around and such.
576*/
577
578static inline unsigned char strlib_aux_hexdigit(unsigned char n) {
579    return (n < 10 ? '0' : 'A' - 10) + n;
580}
581
582# define invalid_unicode(u) ( \
583    (u >= 0x00E000 && u <= 0x00F8FF) || \
584    (u >= 0x0F0000 && u <= 0x0FFFFF) || \
585    (u >= 0x100000 && u <= 0x10FFFF) || \
586 /* (u >= 0x00D800 && u <= 0x00DFFF)) { */ \
587    (u >= 0x00D7FF && u <= 0x00DFFF) \
588)
589
590static int strlib_format_tounicode16(lua_State *L)
591{
592    lua_Integer u = lua_tointeger(L, 1);
593    if (invalid_unicode(u)) {
594        lua_pushliteral(L, "FFFD");
595    } else if (u < 0xD7FF || (u > 0xDFFF && u <= 0xFFFF)) {
596        char s[4] ;
597        s[3] = strlib_aux_hexdigit((unsigned char) ((u & 0x000F) >>  0));
598        s[2] = strlib_aux_hexdigit((unsigned char) ((u & 0x00F0) >>  4));
599        s[1] = strlib_aux_hexdigit((unsigned char) ((u & 0x0F00) >>  8));
600        s[0] = strlib_aux_hexdigit((unsigned char) ((u & 0xF000) >> 12));
601        lua_pushlstring(L, s, 4);
602    } else {
603        unsigned u1, u2;
604        char     s[8] ;
605        u = u - 0x10000; /* negative when invalid range */
606        u1 = (unsigned) (u >> 10) + 0xD800;
607        u2 = (unsigned) (u % 0x400) + 0xDC00;
608        s[3] = strlib_aux_hexdigit((unsigned char) ((u1 & 0x000F) >>  0));
609        s[2] = strlib_aux_hexdigit((unsigned char) ((u1 & 0x00F0) >>  4));
610        s[1] = strlib_aux_hexdigit((unsigned char) ((u1 & 0x0F00) >>  8));
611        s[0] = strlib_aux_hexdigit((unsigned char) ((u1 & 0xF000) >> 12));
612        s[7] = strlib_aux_hexdigit((unsigned char) ((u2 & 0x000F) >>  0));
613        s[6] = strlib_aux_hexdigit((unsigned char) ((u2 & 0x00F0) >>  4));
614        s[5] = strlib_aux_hexdigit((unsigned char) ((u2 & 0x0F00) >>  8));
615        s[4] = strlib_aux_hexdigit((unsigned char) ((u2 & 0xF000) >> 12));
616        lua_pushlstring(L, s, 8);
617    }
618    return 1;
619}
620
621static int strlib_format_toutf8(lua_State *L) /* could be integrated into utfcharacter */
622{
623    if (lua_type(L, 1) == LUA_TTABLE) {
624        lua_Integer n = lua_rawlen(L, 1);
625        if (n > 0) {
626            luaL_Buffer b;
627            luaL_buffinitsize(L, &b, (n + 1) * 4);
628            for (lua_Integer i = 0; i <= n; i++) {
629                /* there should be one operation for getting a number from a table */
630                if (lua_rawgeti(L, 1, i) == LUA_TNUMBER) {
631                    unsigned u = (unsigned) lua_tointeger(L, -1);
632                    if (0x80 > u) {
633                        luaL_addchar(&b, (unsigned char) u);
634                    } else if (invalid_unicode(u)) {
635                        luaL_addchar(&b, 0xFF);
636                        luaL_addchar(&b, 0xFD);
637                    } else {
638                        if (0x800 > u)
639                            luaL_addchar(&b, (unsigned char) (0xC0 | (u >> 6)));
640                        else {
641                            if (0x10000 > u)
642                                luaL_addchar(&b, (unsigned char) (0xE0 | (u >> 12)));
643                            else {
644                                luaL_addchar(&b, (unsigned char) (0xF0 | (u >>18)));
645                                luaL_addchar(&b, (unsigned char) (0x80 | (0x3F & (u >> 12))));
646                            }
647                            luaL_addchar(&b, 0x80 | (0x3F & (u >> 6)));
648                        }
649                        luaL_addchar(&b, 0x80 | (0x3F & u));
650                    }
651                }
652                lua_pop(L, 1);
653            }
654            luaL_pushresult(&b);
655        } else {
656            lua_pushliteral(L, "");
657        }
658        return 1;
659    }
660    return 0;
661}
662
663/*
664static int strlib_format_toutf16(lua_State* L) {
665    if (lua_type(L, 1) == LUA_TTABLE) {
666        lua_Integer n = lua_rawlen(L, 1);
667        if (n > 0) {
668            luaL_Buffer b;
669            luaL_buffinitsize(L, &b, (n + 2) * 4);
670            for (lua_Integer i = 0; i <= n; i++) {
671                if (lua_rawgeti(L, 1, i) == LUA_TNUMBER) {
672                    unsigned u = (unsigned) lua_tointeger(L, -1);
673                    if (invalid_unicode(u)) {
674                        luaL_addchar(&b, 0xFF);
675                        luaL_addchar(&b, 0xFD);
676                    } else if (u < 0x10000) {
677                        luaL_addchar(&b, (unsigned char) ((u & 0x00FF)     ));
678                        luaL_addchar(&b, (unsigned char) ((u & 0xFF00) >> 8));
679                    } else {
680                        u = u - 0x10000;
681                        luaL_addchar(&b, (unsigned char) ((((u>>10)+0xD800) & 0x00FF)     ));
682                        luaL_addchar(&b, (unsigned char) ((((u>>10)+0xD800) & 0xFF00) >> 8));
683                        luaL_addchar(&b, (unsigned char) (( (u%1024+0xDC00) & 0x00FF)     ));
684                        luaL_addchar(&b, (unsigned char) (( (u%1024+0xDC00) & 0xFF00) >> 8));
685                    }
686                }
687                lua_pop(L, 1);
688            }
689            luaL_addchar(&b, 0);
690            luaL_addchar(&b, 0);
691            luaL_pushresult(&b);
692        } else {
693            lua_pushliteral(L, "");
694        }
695        return 1;
696    }
697    return 0;
698}
699*/
700
701static int strlib_format_toutf32(lua_State *L)
702{
703    if (lua_type(L, 1) == LUA_TTABLE) {
704        lua_Integer n = lua_rawlen(L, 1);
705        if (n > 0) {
706            luaL_Buffer b;
707            luaL_buffinitsize(L, &b, (n + 2) * 4);
708            for (lua_Integer i = 0; i <= n; i++) {
709                /* there should be one operation for getting a number from a table */
710                if (lua_rawgeti(L, 1, i) == LUA_TNUMBER) {
711                    unsigned u = (unsigned) lua_tointeger(L, -1);
712                    if (invalid_unicode(u)) {
713                        luaL_addchar(&b, 0x00);
714                        luaL_addchar(&b, 0x00);
715                        luaL_addchar(&b, 0xFF);
716                        luaL_addchar(&b, 0xFD);
717                    } else {
718                        luaL_addchar(&b, (unsigned char) ((u & 0x000000FF)      ));
719                        luaL_addchar(&b, (unsigned char) ((u & 0x0000FF00) >>  8));
720                        luaL_addchar(&b, (unsigned char) ((u & 0x00FF0000) >> 16));
721                        luaL_addchar(&b, (unsigned char) ((u & 0xFF000000) >> 24));
722                    }
723                }
724                lua_pop(L, 1);
725            }
726            for (int i = 0; i <= 3; i++) {
727                luaL_addchar(&b, 0);
728            }
729            luaL_pushresult(&b);
730        } else {
731            lua_pushliteral(L, "");
732        }
733        return 1;
734    }
735    return 0;
736}
737
738/* 
739    str, true       : big endian
740    str, false      : little endian
741    str, nil, true  : check bom, default to big endian 
742    str, nil, false : check bom, default to little endian 
743    str, nil, nil   : check bom, default to little endian 
744*/
745
746static int strlib_utf16toutf8(lua_State *L)
747{
748    size_t ls = 0;
749    const char *s = lua_tolstring(L, 1, &ls);
750    if (ls % 2) { 
751        --ls; 
752    }
753    if (ls) { 
754        luaL_Buffer b;
755        int more = 0;
756        int be = 1;
757        size_t i = 0;
758        luaL_buffinitsize(L, &b, ls); /* unlikely to be larger if we have latin */ 
759        if (lua_type(L, 2) == LUA_TBOOLEAN) {
760            be = lua_toboolean(L, 2);
761        } else if (s[0] == '\xFE' && s[1] == '\xFF') {
762            be = 1; 
763            i += 2;
764        } else if (s[0] == '\xFF' && s[1] == '\xEF') {  
765            be = 0; 
766            i += 2;
767        } else { 
768            be = lua_toboolean(L, 3);
769        }
770        while (i < ls) { 
771            unsigned char l = (unsigned char) s[i++];
772            unsigned char r = (unsigned char) s[i++];
773            unsigned now = be ? 256 * l + r : l + 256 * r;
774            if (more) { 
775                now = (more - 0xD800) * 0x400 + (now - 0xDC00) + 0x10000;
776                more = 0;
777                strlib_aux_add_utfchar(&b, now);
778            } else if (now >= 0xD800 && now <= 0xDBFF) { 
779                more = now;
780            } else { 
781                strlib_aux_add_utfchar(&b, now);
782            }
783        }
784        luaL_pushresult(&b);
785    } else {
786        lua_pushliteral(L, "");
787    } 
788    return 1;
789}
790
791// static char map[] = {
792//     '0', '1', '2', '3',
793//     '4', '5', '6', '7',
794//     '8', '9', 'A', 'B',
795//     'C', 'D', 'E', 'F',
796// };
797
798static int strlib_pack_rows_columns(lua_State* L)
799{
800    if (lua_type(L, 1) == LUA_TTABLE) {
801        lua_Integer rows = lua_rawlen(L, 1);
802        if (lua_rawgeti(L, 1, 1) == LUA_TTABLE) {
803            lua_Integer columns = lua_rawlen(L, -1);
804            switch (lua_rawgeti(L, -1, 1)) {
805                case LUA_TNUMBER:
806                    {
807                        lua_Integer size = rows * columns;
808                        unsigned char *result = lmt_memory_malloc(size);
809                        lua_pop(L, 2); /* row and cell */
810                        if (result) {
811                            unsigned char *first = result;
812                            for (lua_Integer r = 1; r <= rows; r++) {
813                                if (lua_rawgeti(L, -1, r) == LUA_TTABLE) {
814                                    for (lua_Integer c = 1; c <= columns; c++) {
815                                        if (lua_rawgeti(L, -1, c) == LUA_TNUMBER) {
816                                             lua_Integer v = lua_tointeger(L, -1);
817                                            *result++ = v < 0 ? 0 : v > 255 ? 255 : (unsigned char) v;
818                                        } else { 
819                                            *result++ = 0;
820                                        }
821                                        lua_pop(L, 1);
822                                    }
823                                }
824                                lua_pop(L, 1);
825                            }
826                            lua_pushlstring(L, (char *) first, result - first);
827                            return 1;
828                        }
829                    }
830                case LUA_TTABLE:
831                    {
832                        lua_Integer mode = lua_rawlen(L, -1);
833                        lua_Integer size = rows * columns * mode;
834                        unsigned char *result = lmt_memory_malloc(size);
835                        lua_pop(L, 2); /* row and cell */
836                        if (result) {
837                            unsigned char *first = result;
838                            for (lua_Integer r = 1; r <= rows; r++) {
839                                if (lua_rawgeti(L, -1, r) == LUA_TTABLE) {
840                                    for (lua_Integer c = 1; c <= columns; c++) {
841                                        if (lua_rawgeti(L, -1, c) == LUA_TTABLE) {
842                                            for (int i = 1; i <= mode; i++) {
843                                                if (lua_rawgeti(L, -1, i) == LUA_TNUMBER) {
844                                                    lua_Integer v = lua_tointeger(L, -1);
845                                                    *result++ = v < 0 ? 0 : v > 255 ? 255 : (unsigned char) v;
846                                                } else { 
847                                                    *result++ = 0;
848                                                }
849                                                lua_pop(L, 1);
850                                            }
851                                        }
852                                        lua_pop(L, 1);
853                                    }
854                                }
855                                lua_pop(L, 1);
856                            }
857                            lua_pushlstring(L, (char *) first, result - first);
858                            return 1;
859                        }
860                    }
861            }
862        }
863    }
864    lua_pushnil(L);
865    return 1;
866}
867
868/*tex 
869    This converts a hex string to characters. Spacing is ignored and invalid characters result in 
870    a false result. Empty strings are okay. 
871*/
872
873static int strlib_hextocharacters(lua_State *L)
874{
875    size_t ls = 0;
876    const char *s = lua_tolstring(L, 1, &ls);
877    if (ls > 0) {
878        luaL_Buffer b;
879        luaL_buffinitsize(L, &b, ls/2);
880        while (1) { 
881            unsigned char first = *s++;
882            switch (first) {
883                case ' ': case '\n': case '\r': case '\t':
884                    continue;
885                case '\0': 
886                    goto DONE;
887                default: 
888                    {
889                        unsigned char second = *s++;
890                        switch (second) {
891                            case ' ': case '\n': case '\r': case '\t':
892                                continue;
893                            case '\0': 
894                                goto BAD;
895                            default: 
896                                { 
897                                    unsigned char chr;
898                                    if (first >= '0' && first <= '9') {
899                                        chr = 16 * (first - '0');
900                                    } else if (first>= 'A' && first <= 'F') {
901                                        chr = 16 * (first - 'A' + 10);
902                                    } else if (first >= 'a' && first <= 'f') {
903                                        chr = 16 * (first - 'a' + 10);
904                                    } else { 
905                                        goto BAD;
906                                    }
907                                    if (second >= '0' && second <= '9') {
908                                        chr += second - '0';
909                                    } else if (second >= 'A' && second <= 'F') {
910                                        chr += second - 'A' + 10;
911                                    } else if (second >= 'a' && second <= 'f') {
912                                        chr += second - 'a' + 10;
913                                    } else { 
914                                        goto BAD;
915                                    }
916                                    luaL_addchar(&b, chr);
917                                    break;
918                                }
919                        }
920                        break;
921                    }
922            }
923        }
924      DONE:
925        luaL_pushresult(&b);
926        return 1;
927      BAD:
928        lua_pushboolean(L, 0);
929        return 1;
930    } else { 
931        lua_pushliteral(L, "");
932        return 1;
933    }
934}
935
936static int strlib_octtointeger(lua_State *L)
937{
938    const char *s = lua_tostring(L, 1);
939//  lua_Integer n = 0;
940//     int negate = *s == '-';
941//     if (negate) {
942//         s++;
943//     }
944//     while (*s && n < 0xFFFFFFFF) { /* large enough */
945//         if (*s >= '0' && *s <= '7') {
946//             n = n * 8 + *s - '0';
947//         } else { 
948//             break;
949//         }
950//         s++;
951//     }    
952//  lua_pushinteger(L, negate ? -n : n);
953    lua_pushinteger(L, strtoul(s, NULL, 8));
954    return 1; 
955}
956
957static int strlib_dectointeger(lua_State *L)
958{
959    const char *s = lua_tostring(L, 1);
960//  lua_Integer n = 0;
961//  int negate = *s == '-';
962//  if (negate) {
963//      s++;
964//  }
965//  while (*s && n < 0xFFFFFFFF) { /* large enough */
966//      if (*s >= '0' && *s <= '9') {
967//          n = n * 10 + *s - '0';
968//      } else { 
969//          break;
970//      }
971//      s++;
972//  }    
973//  lua_pushinteger(L, negate ? -n : n);
974//  lua_pushinteger(L, atol(s));
975    lua_pushinteger(L, strtoul(s, NULL, 10));
976    return 1; 
977}
978
979static int strlib_hextointeger(lua_State *L)
980{
981    const char *s = lua_tostring(L, 1);
982//  lua_Integer n = 0;
983//  int negate = *s == '-';
984//  if (negate) {
985//      s++;
986//  }
987//  while (*s && n < 0xFFFFFFFF) { /* large enough */
988//      if (*s >= '0' && *s <= '9') {
989//          n = n * 16 + *s - '0';
990//      } else if (*s >= 'A' && *s <= 'F') {
991//          n = n * 16 + *s - 'A' + 10;
992//      } else if (*s >= 'a' && *s <= 'f') {
993//          n = n * 16 + *s - 'a' + 10;
994//      } else { 
995//          break;
996//      }
997//      s++;
998//  }    
999//  lua_pushinteger(L, negate ? -n : n);
1000    lua_pushinteger(L, strtoul(s, NULL, 16));
1001    return 1; 
1002}
1003
1004static int strlib_chrtointeger(lua_State *L)
1005{
1006    lua_Integer n = 0;
1007    size_t l = 0;
1008    const char *s = lua_tolstring(L, 1, &l);
1009    if (l > 0) {
1010        size_t p = 0;
1011        while (p < l && n < 0xFFFFFFFF) { /* large enough */
1012            n = n * 255 + (unsigned char) s[p];
1013            p++;
1014        }    
1015        lua_pushinteger(L, n);
1016    }
1017    return 1; 
1018}
1019
1020static const luaL_Reg strlib_function_list[] = {
1021    { "characters",        strlib_characters         },
1022    { "characterpairs",    strlib_characterpairs     },
1023    { "bytes",             strlib_bytes              },
1024    { "bytepairs",         strlib_bytepairs          },
1025    { "bytetable",         strlib_bytetable          },
1026    { "linetable",         strlib_linetable          },
1027    { "utfvalues",         strlib_utfvalues          },
1028    { "utfcharacters",     strlib_utfcharacters      },
1029    { "utfcharacter",      strlib_utfcharacter       },
1030    { "utfvalue",          strlib_utfvalue           },
1031    { "utflength",         strlib_utflength          },
1032    { "utfvaluetable",     strlib_utfvaluetable      },
1033    { "utfcharactertable", strlib_utfcharactertable  },
1034    { "f6",                strlib_format_f6          },
1035    { "tounicode16",       strlib_format_tounicode16 },
1036    { "toutf8",            strlib_format_toutf8      },
1037 /* { "toutf16",           strlib_format_toutf16     }, */ /* untested */
1038    { "toutf32",           strlib_format_toutf32     },
1039    { "utf16toutf8",       strlib_utf16toutf8        },
1040    { "packrowscolumns",   strlib_pack_rows_columns  },
1041    { "hextocharacters",   strlib_hextocharacters    },
1042    { "octtointeger",      strlib_octtointeger       },
1043    { "dectointeger",      strlib_dectointeger       },
1044    { "hextointeger",      strlib_hextointeger       },
1045    { "chrtointeger",      strlib_chrtointeger       },
1046    { NULL,                NULL                      },
1047};
1048
1049/*
1050    The next (old, moved here) experiment was used to check if using some buffer is more efficient
1051    than using a table that we concat. It makes no difference. If we ever use this, the initializer
1052    |luaextend_string_buffer| will be merged into |luaextend_string|. We could gain a little on a
1053    bit more efficient |luaL_checkudata| as we use elsewhere because in practice (surprise) its
1054    overhead makes buffers like this {\em 50 percent} slower than the concatinated variant and
1055    twice as slow when we reuse a temporary table. It's just better to stay at the \LUA\ end.
1056
1057    Replacing the userdata test with a dedicated test gives a speed boost but we're still some 
1058    {\em 10 percent} slower. So, for now we comment this feature. 
1059*/
1060
1061# if (0) 
1062
1063    typedef struct lmt_string_buffer {
1064        char   *buffer;
1065        size_t  length;
1066        size_t  size;
1067        size_t  step;
1068    } lmt_string_buffer;
1069
1070    static lmt_string_buffer *strlib_buffer_instance(lua_State *L)
1071    {
1072        lmt_string_buffer *b = (lmt_string_buffer *) lua_touserdata(L, 1);
1073        if (b && lua_getmetatable(L, 1)) {
1074            lua_get_metatablelua(string_buffer_instance);
1075            if (! lua_rawequal(L, -1, -2)) {
1076                b = NULL;
1077            } else if (! b->buffer) {
1078                b = NULL;
1079            }
1080            lua_pop(L, 2);
1081            return b;
1082        }
1083        return NULL;
1084    }
1085
1086    static int strlib_buffer_gc(lua_State *L)
1087    {
1088        lmt_string_buffer *b = strlib_buffer_instance(L);
1089        if (b) {
1090            lmt_memory_free(b->buffer);
1091        }
1092        return 0;
1093    }
1094
1095    static int strlib_buffer_new(lua_State *L)
1096    {
1097        size_t size = lmt_optsizet(L, 1, LUAL_BUFFERSIZE);
1098        size_t step = lmt_optsizet(L, 2, size);
1099        lmt_string_buffer *b = (lmt_string_buffer *) lua_newuserdatauv(L, sizeof(lmt_string_buffer), 0);
1100        b->buffer = lmt_memory_malloc(size);
1101        b->size   = size;
1102        b->step   = step;
1103        b->length = 0;
1104        lua_get_metatablelua(string_buffer_instance);
1105        lua_setmetatable(L, -2);
1106        return 1;
1107    }
1108
1109    static int strlib_buffer_add(lua_State *L)
1110    {
1111        lmt_string_buffer *b = strlib_buffer_instance(L);
1112        if (b) {
1113            switch (lua_type(L, 2)) {
1114                case LUA_TSTRING:
1115                case LUA_TNUMBER:
1116                    {
1117                        size_t l;
1118                        const char *s = lua_tolstring(L, 2, &l);
1119                        size_t length = b->length + l;
1120                        if (length >= b->size) {
1121                            while (length >= b->size) {
1122                                 b->size += b->step;
1123                            }
1124                            b->buffer = lmt_memory_realloc(b->buffer, b->size);
1125                        }
1126                        memcpy(&b->buffer[b->length], s, l);
1127                        b->length = length;
1128                    }
1129                    break;
1130                default:
1131                    break;
1132            }
1133        }
1134        return 0;
1135    }
1136
1137    static int strlib_buffer_get_data(lua_State *L)
1138    {
1139        lmt_string_buffer *b = strlib_buffer_instance(L);
1140        if (b) {
1141            lua_pushlstring(L, b->buffer, b->length);
1142            lua_pushinteger(L, (int) b->length);
1143            return 2;
1144        } else {
1145            lua_pushnil(L);
1146            return 1;
1147        }
1148    }
1149
1150    static int strlib_buffer_get_size(lua_State *L)
1151    {
1152        lmt_string_buffer *b = strlib_buffer_instance(L);
1153        lua_pushinteger(L, b ? b->length : 0);
1154        return 1;
1155    }
1156
1157    static const luaL_Reg strlib_function_list_buffer[] = {
1158        { "newbuffer",         strlib_buffer_new         },
1159        { "addtobuffer",       strlib_buffer_add         },
1160        { "getbufferdata",     strlib_buffer_get_data    },
1161        { "getbuffersize",     strlib_buffer_get_size    },
1162        { NULL,                NULL                      },
1163    };
1164
1165    static int luaextend_string_buffer(lua_State *L)
1166    {
1167        lua_getglobal(L, "string");
1168        for (const luaL_Reg *lib = strlib_function_list_buffer; lib->name; lib++) {
1169            lua_pushcfunction(L, lib->func);
1170            lua_setfield(L, -2, lib->name);
1171        }
1172        lua_pop(L, 1);
1173        luaL_newmetatable(L, STRING_BUFFER_INSTANCE);
1174        lua_pushcfunction(L, strlib_buffer_gc);
1175        lua_setfield(L, -2, "__gc");
1176        lua_pop(L, 1);
1177        return 1;
1178    }
1179
1180# else 
1181
1182    static int luaextend_string_buffer(lua_State *L)
1183    {
1184        (void) L;
1185        return 0;
1186    }
1187
1188# endif 
1189
1190int luaextend_string(lua_State * L)
1191{
1192    lua_getglobal(L, "string");
1193    for (const luaL_Reg *lib = strlib_function_list; lib->name; lib++) {
1194        lua_pushcfunction(L, lib->func);
1195        lua_setfield(L, -2, lib->name);
1196    }
1197    lua_pop(L, 1);
1198    luaextend_string_buffer(L);
1199    return 1;
1200}
1201