lmtlanguagelib.c /size: 13 Kb    last modification: 2024-01-16 10:22
1/*
2    See license.txt in the root of this project.
3*/
4
5/*tex
6
7    This is the interface to everything that relates to hyphenation in the frontend: defining
8    a new language, setting properties for hyphenation, loading patterns and exceptions.
9
10*/
11
12# include "luametatex.h"
13
14# define LANGUAGE_METATABLE "luatex.language"
15# define LANGUAGE_FUNCTIONS "luatex.language.wordhandlers"
16
17/* todo: get rid of top */
18
19typedef struct languagelib_language {
20    tex_language *lang;
21} languagelib_language;
22
23static int languagelib_new(lua_State *L)
24{
25    languagelib_language *ulang = lua_newuserdatauv(L, sizeof(tex_language *), 0);
26    if (lua_type(L, 1) == LUA_TNUMBER) {
27        halfword lualang = lmt_tohalfword(L, 1);
28        ulang->lang = tex_get_language(lualang);
29        if (! ulang->lang) {
30            return luaL_error(L, "undefined language %d", lualang);
31        }
32    } else {
33        ulang->lang = tex_new_language(-1);
34        if (! ulang->lang) {
35            return luaL_error(L, "no room for a new language");
36        }
37    }
38    luaL_getmetatable(L, LANGUAGE_METATABLE);
39    lua_setmetatable(L, -2);
40    return 1;
41}
42
43static tex_language *languagelib_object(lua_State* L)
44{
45    tex_language *lang = NULL;
46    switch (lua_type(L, 1)) {
47        case LUA_TNUMBER:
48            lang = tex_get_language(lmt_tohalfword(L, 1));
49            break;
50        case LUA_TUSERDATA:
51            {
52                languagelib_language *ulang = lua_touserdata(L, 1);
53                if (ulang && lua_getmetatable(L, 1)) {
54                    luaL_getmetatable(L, LANGUAGE_METATABLE);
55                    if (lua_rawequal(L, -1, -2)) {
56                        lang = ulang->lang;
57                    }
58                    lua_pop(L, 2);
59                }
60                break;
61            }
62        case LUA_TBOOLEAN:
63            if (lua_toboolean(L, 1)) {
64                lang = tex_get_language(language_par);
65            }
66            break;
67    }
68    if (! lang) {
69        luaL_error(L, "argument should be a valid language id, language object, or true");
70    }
71    return lang;
72}
73
74static int languagelib_id(lua_State *L)
75{
76    tex_language *lang = languagelib_object(L);
77    lua_pushinteger(L, lang->id);
78    return 1;
79}
80
81static int languagelib_patterns(lua_State *L)
82{
83    tex_language *lang = languagelib_object(L);
84    if (lua_gettop(L) == 1) {
85        if (lang->patterns) {
86            lua_pushstring(L, (char *) hnj_dictionary_tostring(lang->patterns));
87        } else {
88            lua_pushnil(L);
89        }
90        return 1;
91    } else if (lua_type(L, 2) == LUA_TSTRING) {
92        tex_load_patterns(lang, (const unsigned char *) lua_tostring(L, 2));
93        return 0;
94    } else {
95        return luaL_error(L, "argument should be a string");
96    }
97}
98
99static int languagelib_clear_patterns(lua_State *L)
100{
101    tex_language *lang = languagelib_object(L);
102    tex_clear_patterns(lang);
103    return 0;
104}
105
106static int languagelib_hyphenation(lua_State *L)
107{
108    tex_language *lang = languagelib_object(L);
109    if (lua_gettop(L) == 1) {
110        if (lang->exceptions) {
111            luaL_Buffer b;
112            int done = 0;
113            luaL_buffinit(L, &b);
114            if (lua_rawgeti(L, LUA_REGISTRYINDEX, lang->exceptions) == LUA_TTABLE) {
115                lua_pushnil(L);
116                while (lua_next(L, -2)) {
117                    if (done) {
118                        luaL_addlstring(&b, " ", 1);
119                    } else {
120                        done = 1;
121                    }
122                    luaL_addvalue(&b);
123                }
124            }
125            luaL_pushresult(&b);
126        } else {
127            lua_pushnil(L);
128        }
129        return 1;
130    } else if (lua_type(L, 2) == LUA_TSTRING) {
131        tex_load_hyphenation(lang, (const unsigned char *) lua_tostring(L, 2));
132        return 0;
133    } else {
134        return luaL_error(L, "argument should be a string");
135    }
136}
137
138static int languagelib_pre_hyphen_char(lua_State *L)
139{
140    tex_language *lang = languagelib_object(L);
141    if (lua_gettop(L) == 1) {
142        lua_pushinteger(L, lang->pre_hyphen_char);
143        return 1;
144    } else if (lua_type(L, 2) == LUA_TNUMBER) {
145        lang->pre_hyphen_char = lmt_tohalfword(L, 2);
146    } else {
147        return luaL_error(L, "argument should be a character number");
148    }
149    return 0;
150}
151
152static int languagelib_post_hyphen_char(lua_State *L)
153{
154    tex_language *lang = languagelib_object(L);
155    if (lua_gettop(L) == 1) {
156        lua_pushinteger(L, lang->post_hyphen_char);
157        return 1;
158    } else if (lua_type(L, 2) == LUA_TNUMBER) {
159        lang->post_hyphen_char = lmt_tohalfword(L, 2);
160    } else {
161        return luaL_error(L, "argument should be a character number");
162    }
163    return 0;
164}
165
166static int languagelib_pre_exhyphen_char(lua_State *L)
167{
168    tex_language *lang = languagelib_object(L);
169    if (lua_gettop(L) == 1) {
170        lua_pushinteger(L, lang->pre_exhyphen_char);
171        return 1;
172    } else if (lua_type(L, 2) == LUA_TNUMBER) {
173        lang->pre_exhyphen_char = lmt_tohalfword(L, 2);
174        return 0;
175    } else {
176        return luaL_error(L, "argument should be a character number");
177    }
178}
179
180/* We push nuts! */
181
182int lmt_handle_word(tex_language *lang, const char *original, const char *word, int length, halfword first, halfword last, char **replacement)
183{
184    if (lang->wordhandler && word && first && last) {
185        lua_State *L = lmt_lua_state.lua_instance;
186        int stacktop = lua_gettop(L);
187        int result = 0;
188        int res;
189        *replacement = NULL;
190        lua_pushcfunction(L, lmt_traceback); /* goes before function */
191        lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_language_state.handler_table_id);
192        lua_rawgeti(L, -1, lang->id);
193        lua_pushinteger(L, lang->id);
194        lua_pushstring(L, original);
195        lua_pushstring(L, word);
196        lua_pushinteger(L, length);
197        lua_pushinteger(L, first);
198        lua_pushinteger(L, last);
199        res = lua_pcall(L, 6, 1, 0);
200        if (res) {
201            lua_remove(L, stacktop + 1);
202            lmt_error(L, "function call", -1, res == LUA_ERRRUN ? 0 : 1);
203        }
204        ++lmt_language_state.handler_count;
205        switch (lua_type(L, -1)) {
206            case LUA_TSTRING:
207                *replacement = (char *) lmt_memory_strdup(lua_tostring(L, -1));
208                break;
209            case LUA_TNUMBER:
210                result = lmt_tointeger(L, -1);
211                break;
212            default:
213                break;
214        }
215        lua_settop(L, stacktop);
216        return result;
217    }
218    return 0;
219}
220
221void lmt_initialize_languages(void)
222{
223     lua_State *L = lmt_lua_state.lua_instance;
224     lua_newtable(L);
225     lmt_language_state.handler_table_id = luaL_ref(L, LUA_REGISTRYINDEX);
226     lua_pushstring(L, LANGUAGE_FUNCTIONS);
227     lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_language_state.handler_table_id);
228     lua_settable(L, LUA_REGISTRYINDEX);
229}
230
231static int languagelib_setwordhandler(lua_State* L)
232{
233    tex_language *lang = languagelib_object(L);
234    switch (lua_type(L, 2)) {
235        case LUA_TBOOLEAN:
236            if (lua_toboolean(L, 2)) {
237                goto DEFAULT;
238            } else {
239                // fall-through
240            }
241        case LUA_TNIL:
242            {
243                if (lang->wordhandler) {
244                    lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_language_state.handler_table_id);
245                    lua_pushnil(L);
246                    lua_rawseti(L, -2, lang->id);
247                    lang->wordhandler = 0;
248                }
249                break;
250            }
251        case LUA_TFUNCTION:
252            {
253                lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_language_state.handler_table_id);
254                lua_pushvalue(L, 2);
255                lua_rawseti(L, -2, lang->id);
256                lang->wordhandler = 1;
257                break;
258            }
259        default:
260          DEFAULT:
261            return luaL_error(L, "argument should be a function, false or nil");
262    }
263    return 0;
264}
265
266static int languagelib_sethjcode(lua_State *L)
267{
268    tex_language *lang = languagelib_object(L);
269    if (lua_type(L, 2) == LUA_TNUMBER) {
270        halfword i = lmt_tohalfword(L, 2) ;
271        if (lua_type(L, 3) == LUA_TNUMBER) {
272            tex_set_hj_code(lang->id, i, lmt_tohalfword(L, 3), -1);
273        } else {
274            tex_set_hj_code(lang->id, i, i, -1);
275        }
276        return 0;
277    } else {
278        return luaL_error(L, "argument should be a character number");
279    }
280}
281
282static int languagelib_gethjcode(lua_State *L)
283{
284    tex_language *lang = languagelib_object(L);
285    if (lua_type(L, 2) == LUA_TNUMBER) {
286        lua_pushinteger(L, tex_get_hj_code(lang->id, lmt_tohalfword(L, 2)));
287        return 1;
288    } else {
289        return luaL_error(L, "argument should be a character number");
290    }
291}
292
293static int languagelib_post_exhyphen_char(lua_State *L)
294{
295    tex_language *lang = languagelib_object(L);
296    if (lua_gettop(L) == 1) {
297        lua_pushinteger(L, lang->post_exhyphen_char);
298        return 1;
299    } else if (lua_type(L, 2) == LUA_TNUMBER) {
300        lang->post_exhyphen_char = lmt_tohalfword(L, 2);
301        return 0;
302    } else {
303        return luaL_error(L, "argument should be a character number");
304    }
305}
306
307static int languagelib_hyphenation_min(lua_State *L)
308{
309    tex_language *lang = languagelib_object(L);
310    if (lua_gettop(L) == 1) {
311        lua_pushinteger(L, lang->hyphenation_min);
312        return 1;
313    } else if (lua_type(L, 2) == LUA_TNUMBER) {
314        lang->hyphenation_min = lmt_tohalfword(L, 2);
315        return 0;
316    } else {
317        return luaL_error(L, "argument should be a number");
318    }
319}
320
321static int languagelib_clear_hyphenation(lua_State *L)
322{
323    tex_language *lang = languagelib_object(L);
324    tex_clear_hyphenation(lang);
325    return 0;
326}
327
328static int languagelib_clean(lua_State *L)
329{
330    char *cleaned = NULL;
331    if (lua_type(L, 1) == LUA_TSTRING) {
332        tex_clean_hyphenation(cur_lang_par, lua_tostring(L, 1), &cleaned);
333    } else {
334        tex_language *lang = languagelib_object(L);
335        if (lang) {
336            if (lua_type(L, 2) == LUA_TSTRING) {
337                tex_clean_hyphenation(lang->id, lua_tostring(L, 2), &cleaned);
338            } else {
339                return luaL_error(L, "second argument should be a string");
340            }
341        } else {
342            return luaL_error(L, "first argument should be a string or language");
343        }
344    }
345    lua_pushstring(L, cleaned);
346    lmt_memory_free(cleaned);
347    return 1;
348}
349
350static int languagelib_hyphenate(lua_State *L)
351{
352    halfword h = lmt_check_isnode(L, 1);
353    halfword t = null;
354    if (lua_isuserdata(L, 2)) {
355        t = lmt_check_isnode(L, 2);
356    }
357    if (! t) {
358        t = h;
359        while (node_next(t)) {
360            t = node_next(t);
361        }
362    }
363    tex_hyphenate_list(h, t);
364    lmt_push_node_fast(L, h);
365    lmt_push_node_fast(L, t);
366    lua_pushboolean(L, 1);
367    return 3;
368}
369
370static int languagelib_current(lua_State *L)
371{
372    lua_pushinteger(L, language_par);
373    return 1;
374}
375
376static int languagelib_has_language(lua_State *L)
377{
378    halfword h = lmt_check_isnode(L, 1);
379    while (h) {
380        if (node_type(h) == glyph_node && get_glyph_language(h) > 0) {
381            lua_pushboolean(L, 1);
382            return 1;
383        } else {
384            h = node_next(h);
385        }
386    }
387    lua_pushboolean(L,0);
388    return 1;
389}
390
391static const struct luaL_Reg langlib_metatable[] = {
392    { "clearpatterns",     languagelib_clear_patterns     },
393    { "clearhyphenation",  languagelib_clear_hyphenation  },
394    { "patterns",          languagelib_patterns           },
395    { "hyphenation",       languagelib_hyphenation        },
396    { "prehyphenchar",     languagelib_pre_hyphen_char    },
397    { "posthyphenchar",    languagelib_post_hyphen_char   },
398    { "preexhyphenchar",   languagelib_pre_exhyphen_char  },
399    { "postexhyphenchar",  languagelib_post_exhyphen_char },
400    { "hyphenationmin",    languagelib_hyphenation_min    },
401    { "sethjcode",         languagelib_sethjcode          },
402    { "gethjcode",         languagelib_gethjcode          },
403    { "setwordhandler",    languagelib_setwordhandler     },
404    { "id",                languagelib_id                 },
405    { NULL,                NULL                           },
406};
407
408static const struct luaL_Reg langlib_function_list[] = {
409    { "clearpatterns",     languagelib_clear_patterns     },
410    { "clearhyphenation",  languagelib_clear_hyphenation  },
411    { "patterns",          languagelib_patterns           },
412    { "hyphenation",       languagelib_hyphenation        },
413    { "prehyphenchar",     languagelib_pre_hyphen_char    },
414    { "posthyphenchar",    languagelib_post_hyphen_char   },
415    { "preexhyphenchar",   languagelib_pre_exhyphen_char  },
416    { "postexhyphenchar",  languagelib_post_exhyphen_char },
417    { "hyphenationmin",    languagelib_hyphenation_min    },
418    { "sethjcode",         languagelib_sethjcode          },
419    { "gethjcode",         languagelib_gethjcode          },
420    { "setwordhandler",    languagelib_setwordhandler     },
421    { "id",                languagelib_id                 },
422    { "clean",             languagelib_clean              }, /* maybe obsolete */
423    { "has_language",      languagelib_has_language       },
424    { "hyphenate",         languagelib_hyphenate          },
425    { "current",           languagelib_current            },
426    { "new",               languagelib_new                },
427    { NULL,                NULL                           },
428};
429
430int luaopen_language(lua_State *L)
431{
432    luaL_newmetatable(L, LANGUAGE_METATABLE);
433    lua_pushvalue(L, -1);
434    lua_setfield(L, -2, "__index");
435    luaL_setfuncs(L, langlib_metatable, 0);
436    lua_newtable(L);
437    luaL_setfuncs(L, langlib_function_list, 0);
438    return 1;
439}
440