1
4
5# include "luametatex.h"
6
7
22
23language_state_info lmt_language_state = {
24 .languages = NULL,
25 .language_data = {
26 .minimum = min_language_size,
27 .maximum = max_language_size,
28 .size = memory_data_unset,
29 .step = stp_language_size,
30 .allocated = 0,
31 .itemsize = sizeof(tex_language *),
32 .top = 0,
33 .ptr = 0,
34 .initial = memory_data_unset,
35 .offset = 0,
36 .extra = 0,
37 },
38 .handler_table_id = 0,
39 .handler_count = 0,
40 .list_count = 0,
41 .checked_count = 0,
42 .exceptions_count = 0,
43 .hyphenated_count = 0,
44 .nothing_count = 0,
45 .shared_word_buffer = { 0 },
46 .shared_uword_buffer = { 0 },
47};
48
49
54
55static void tex_aux_reset_language(halfword id)
56{
57 tex_language *lang = lmt_language_state.languages[id];
58 lang->id = id;
59 lang->exceptions = 0;
60 lang->patterns = NULL;
61 lang->wordhandler = 0;
62 lang->pre_hyphen_char = '-';
63 lang->post_hyphen_char = 0;
64 lang->pre_exhyphen_char = 0;
65 lang->post_exhyphen_char = 0;
66 lang->hyphenation_min = -1;
67 lang->hjcode_head = NULL;
68}
69
70
74
75static halfword tex_aux_new_language_id(halfword id)
76{
77 int top;
78 if (id >= 0) {
79 if (id <= lmt_language_state.language_data.top) {
80 if (lmt_language_state.languages[id]) {
81 return tex_formatted_error("languages", "the language with id %d is already created", id);
82 } else {
83 return id;
84 }
85 } else if (id > lmt_language_state.language_data.maximum) {
86 goto OVERFLOWERROR;
87 } else {
88 top = id;
89 }
90 } else if (lmt_language_state.language_data.ptr < lmt_language_state.language_data.top) {
91 ++lmt_language_state.language_data.ptr;
92 return lmt_language_state.language_data.ptr;
93 } else if (lmt_language_state.language_data.top >= lmt_language_state.language_data.maximum) {
94 goto OVERFLOWERROR;
95 } else if (lmt_language_state.language_data.top + lmt_language_state.language_data.step > lmt_language_state.language_data.maximum) {
96 top = lmt_language_state.language_data.maximum;
97 } else {
98 top = lmt_language_state.language_data.top + lmt_language_state.language_data.step;
99 }
100
101 {
102 tex_language **tmp = aux_reallocate_array(lmt_language_state.languages, sizeof(tex_language *), top, 0);
103 if (tmp) {
104 for (int i = lmt_language_state.language_data.top + 1; i <= top; i++) {
105 tmp[i] = NULL;
106 }
107 lmt_language_state.languages = tmp;
108 lmt_language_state.language_data.allocated = top;
109 lmt_language_state.language_data.top = top;
110 lmt_language_state.language_data.ptr += 1;
111 return lmt_language_state.language_data.ptr;
112 }
113 }
114 OVERFLOWERROR:
115 tex_overflow_error("languages", lmt_language_state.language_data.maximum);
116 return 0;
117}
118
119void tex_initialize_languages(void)
120{
121 tex_language **tmp = aux_allocate_clear_array(sizeof(tex_language *), lmt_language_state.language_data.minimum, 0);
122 if (tmp) {
123 for (int i = 0; i < lmt_language_state.language_data.minimum; i++) {
124 tmp[i] = NULL;
125 }
126 lmt_language_state.languages = tmp;
127 lmt_language_state.language_data.allocated = lmt_language_state.language_data.minimum;
128 lmt_language_state.language_data.top = lmt_language_state.language_data.minimum;
129 } else {
130 tex_overflow_error("languages", lmt_language_state.language_data.minimum);
131 }
132}
133
134
140
141int tex_is_valid_language(halfword n)
142{
143 if (n == 0) {
144 return 1;
145 } else if (n > 0 && n <= lmt_language_state.language_data.top) {
146 return lmt_language_state.languages[n] ? 1 : 0;
147 } else {
148 return 0;
149 }
150}
151
152tex_language *tex_new_language(halfword n)
153{
154 halfword id = tex_aux_new_language_id(n);
155 if (id >= 0) {
156 tex_language *lang = lmt_memory_malloc(sizeof(struct tex_language));
157 if (lang) {
158 lmt_language_state.languages[id] = lang;
159 lmt_language_state.language_data.extra += sizeof(struct tex_language);
160 tex_aux_reset_language(id);
161 if (saving_hyph_codes_par) {
162
166 tex_hj_codes_from_lc_codes(id);
167 }
168 } else {
169 tex_overflow_error("language", sizeof(struct tex_language));
170 }
171 return lang;
172 } else {
173 return NULL;
174 }
175}
176
177tex_language *tex_get_language(halfword n)
178{
179 if (n >= 0) {
180 if (n <= lmt_language_state.language_data.top && lmt_language_state.languages[n]) {
181 return lmt_language_state.languages[n];
182 }
183 if (n <= lmt_language_state.language_data.maximum) {
184 return tex_new_language(n);
185 }
186 }
187 return NULL;
188}
189
190
193
194
205
206void tex_dump_language_data(dumpstream f)
207{
208 dump_int(f, lmt_language_state.language_data.top);
209 dump_int(f, lmt_language_state.language_data.ptr);
210 if (lmt_language_state.language_data.top > 0) {
211 for (int i = 0; i < lmt_language_state.language_data.top; i++) {
212 tex_language *lang = lmt_language_state.languages[i];
213 if (lang) {
214 dump_via_uchar(f, 1);
215 dump_int(f, lang->id);
216 dump_int(f, lang->pre_hyphen_char);
217 dump_int(f, lang->post_hyphen_char);
218 dump_int(f, lang->pre_exhyphen_char);
219 dump_int(f, lang->post_exhyphen_char);
220 dump_int(f, lang->hyphenation_min);
221 tex_dump_language_hj_codes(f, i);
222 } else {
223 dump_via_uchar(f, 0);
224 }
225 }
226 }
227}
228
229void tex_undump_language_data(dumpstream f)
230{
231 int top, ptr;
232 undump_int(f, top);
233 undump_int(f, ptr);
234 if (top > 0) {
235 tex_language **tmp = aux_allocate_clear_array(sizeof(tex_language *), top, 0);
236 if (tmp) {
237 lmt_language_state.language_data.top = top;
238 lmt_language_state.language_data.ptr = ptr;
239 lmt_language_state.languages = tmp;
240 lmt_language_state.language_data.allocated = top;
241 for (int i = 0; i < top; i++) {
242 unsigned char marker;
243 undump_uchar(f, marker);
244 if (marker == 1) {
245 tex_language *lang = lmt_memory_malloc(sizeof(struct tex_language));
246 if (lang) {
247 lmt_language_state.languages[i] = lang;
248 lmt_language_state.language_data.extra += sizeof(struct tex_language);
249 lang->exceptions = 0;
250 lang->patterns = NULL;
251 lang->wordhandler = 0;
252 lang->hjcode_head = NULL;
253 undump_int(f, lang->id);
254 undump_int(f, lang->pre_hyphen_char);
255 undump_int(f, lang->post_hyphen_char);
256 undump_int(f, lang->pre_exhyphen_char);
257 undump_int(f, lang->post_exhyphen_char);
258 undump_int(f, lang->hyphenation_min);
259 tex_undump_language_hj_codes(f, i);
260 if (lang->id != i) {
261 tex_formatted_warning("languages", "undumped language id mismatch: %d <> %d", lang->id, i);
262 lang->id = i;
263 }
264 } else {
265 tex_overflow_error("languages", i);
266 }
267 tmp[i] = lang;
268 } else {
269 tmp[i] = NULL;
270 }
271 }
272 lmt_language_state.language_data.initial = lmt_language_state.language_data.ptr;
273 } else {
274 tex_overflow_error("languages", top);
275 lmt_language_state.language_data.initial = 0;
276 }
277 } else {
278
279 tex_initialize_languages();
280 }
281}
282
283
284
285void tex_set_pre_hyphen_char(halfword n, halfword v)
286{
287 struct tex_language *l = tex_get_language(n);
288 if (l) {
289 l->pre_hyphen_char = v;
290 }
291}
292
293void tex_set_post_hyphen_char(halfword n, halfword v)
294{
295 struct tex_language *l = tex_get_language(n);
296 if (l) {
297 l->post_hyphen_char = v;
298 }
299}
300
301void tex_set_pre_exhyphen_char(halfword n, halfword v)
302{
303 struct tex_language *l = tex_get_language(n);
304 if (l) {
305 l->pre_exhyphen_char = v;
306 }
307}
308
309void tex_set_post_exhyphen_char(halfword n, halfword v)
310{
311 struct tex_language *l = tex_get_language(n);
312 if (l) {
313 l->post_exhyphen_char = v;
314 }
315}
316
317halfword tex_get_pre_hyphen_char(halfword n)
318{
319 struct tex_language *l = tex_get_language(n);
320 return l ? l->pre_hyphen_char : -1;
321}
322
323halfword tex_get_post_hyphen_char(halfword n)
324{
325 struct tex_language *l = tex_get_language(n);
326 return l ? l->post_hyphen_char : -1;
327}
328
329halfword tex_get_pre_exhyphen_char(halfword n)
330{
331 struct tex_language *l = tex_get_language(n);
332 return l ? l->pre_exhyphen_char : -1;
333}
334
335halfword tex_get_post_exhyphen_char(halfword n)
336{
337 struct tex_language *l = tex_get_language(n);
338 return (l) ? (int) l->post_exhyphen_char : -1;
339}
340
341void tex_set_hyphenation_min(halfword n, halfword v)
342{
343 struct tex_language *l = tex_get_language(n);
344 if (l) {
345 l->hyphenation_min = v;
346 }
347}
348
349halfword tex_get_hyphenation_min(halfword n)
350{
351 struct tex_language *l = tex_get_language((int) n);
352 return l ? l->hyphenation_min : -1;
353}
354
355void tex_load_patterns(struct tex_language *lang, const unsigned char *buff)
356{
357 if ((! lang) || (! buff) || strlen((const char *) buff) == 0) {
358 return;
359 } else {
360 if (! lang->patterns) {
361 lang->patterns = hnj_dictionary_new();
362 }
363 hnj_dictionary_load(lang->patterns, buff, tracing_hyphenation_par > 0);
364 }
365}
366
367void tex_clear_patterns(struct tex_language *lang)
368{
369 if (lang && lang->patterns) {
370 hnj_dictionary_clear(lang->patterns);
371 }
372}
373
374void tex_load_tex_patterns(halfword curlang, halfword head)
375{
376
377 char *s = tex_tokenlist_to_tstring(head, 1, NULL, 0, 0, 0, 0, 1);
378 if (s) {
379 tex_load_patterns(tex_get_language(curlang), (unsigned char *) s);
380 }
381}
382
383
386
387
388# define tex_isspace(c) (c == ' ')
389
390# define word_buffer lmt_language_state.shared_word_buffer
391# define uword_buffer lmt_language_state.shared_uword_buffer
392
393const char *tex_clean_hyphenation(halfword id, const char *buff, char **cleaned)
394{
395 int items = 0;
396
397
398
399 int i = 0;
400 char *uindex = (char *) word_buffer;
401 const char *s = buff;
402 while (*s && ! tex_isspace((unsigned char)*s)) {
403 word_buffer[i++] = (unsigned char) *s;
404 s++;
405 if ((s - buff) > max_size_of_word) {
406
407 *cleaned = NULL;
408 tex_handle_error(
409 normal_error_type,
410 "Exception too long",
411 NULL
412 );
413 return s;
414 }
415 }
416
417 word_buffer[i] = '\0';
418
419 aux_splitutf2uni(uword_buffer, (const char *) word_buffer);
420
424 i = 0;
425 while (uword_buffer[i] > 0) {
426 unsigned u = uword_buffer[i++];
427 if (u == '-') {
428
429 } else if (u == '=') {
430 unsigned c = tex_get_hj_code(id, '-');
431 uindex = aux_uni2string(uindex, (! c || c <= 32) ? '-' : c);
432 } else if (u == '{') {
433 u = uword_buffer[i++];
434 items = 0;
435 while (u && u != '}') {
436 u = uword_buffer[i++];
437 }
438 if (u == '}') {
439 items++;
440 u = uword_buffer[i++];
441 }
442 while (u && u != '}') {
443 u = uword_buffer[i++];
444 }
445 if (u == '}') {
446 items++;
447 u = uword_buffer[i++];
448 }
449 if (u == '{') {
450 u = uword_buffer[i++];
451 }
452 while (u && u != '}') {
453 unsigned c = tex_get_hj_code(id, u);
454 uindex = aux_uni2string(uindex, (! c || c <= 32) ? u : c);
455 u = uword_buffer[i++];
456 }
457 if (u == '}') {
458 items++;
459 }
460 if (items != 3) {
461
462 *cleaned = NULL;
463 tex_handle_error(
464 normal_error_type,
465 "Exception syntax error, a discretionary has three components: {}{}{}.",
466 NULL
467 );
468 return s;
469 } else {
470
471 if (uword_buffer[i] == '(') {
472 while (uword_buffer[++i] && uword_buffer[i] != ')') { };
473 if (uword_buffer[i] != ')') {
474 tex_handle_error(
475 normal_error_type,
476 "Exception syntax error, an alternative replacement is defined as (text).",
477 NULL
478 );
479 return s;
480 } else if (uword_buffer[i]) {
481 i++;
482 }
483 }
484
485 if (uword_buffer[i] == '[') {
486 if (uword_buffer[i+1] && uword_buffer[i+1] >= '0' && uword_buffer[i+1] <= '9' && uword_buffer[i+2] && uword_buffer[i+2] == ']') {
487 i += 3;
488 } else {
489 tex_handle_error(
490 normal_error_type,
491 "Exception syntax error, a penalty is defined as [digit].",
492 NULL
493 );
494 return s;
495 }
496 }
497 }
498 } else {
499 unsigned c = tex_get_hj_code(id, u);
500 uindex = aux_uni2string(uindex, (! c || c <= 32) ? u : c);
501 }
502 }
503 *uindex = '\0';
504 *cleaned = lmt_memory_strdup((char *) word_buffer);
505 return s;
506}
507
508void tex_load_hyphenation(struct tex_language *lang, const unsigned char *buff)
509{
510 if (lang) {
511 lua_State *L = lmt_lua_state.lua_instance;
512 const char *s = (const char *) buff;
513 char *cleaned = NULL;
514 int id = lang->id;
515 if (lang->exceptions == 0) {
516 lua_newtable(L);
517 lang->exceptions = luaL_ref(L, LUA_REGISTRYINDEX);
518 }
519 lua_rawgeti(L, LUA_REGISTRYINDEX, lang->exceptions);
520 while (*s) {
521 while (tex_isspace((unsigned char) *s)) {
522 s++;
523 }
524 if (*s) {
525 const char *value = s;
526 s = tex_clean_hyphenation(id, s, &cleaned);
527 if (cleaned) {
528 size_t len = s - value;
529 if (len > 0) {
530 lua_pushstring(L, cleaned);
531 lua_pushlstring(L, value, len);
532 lua_rawset(L, -3);
533 }
534 lmt_memory_free(cleaned);
535 } else {
536
537 }
538 }
539 }
540 lua_pop(L, 1);
541 }
542}
543
544void tex_clear_hyphenation(struct tex_language *lang)
545{
546 if (lang && lang->exceptions != 0) {
547 lua_State *L = lmt_lua_state.lua_instance;
548 luaL_unref(L, LUA_REGISTRYINDEX, lang->exceptions);
549 lang->exceptions = 0;
550 }
551}
552
553void tex_load_tex_hyphenation(halfword curlang, halfword head)
554{
555 char *s = tex_tokenlist_to_tstring(head, 1, NULL, 0, 0, 0, 0, 1);
556 if (s) {
557 tex_load_hyphenation(tex_get_language(curlang), (unsigned char *) s);
558 }
559}
560
561static halfword tex_aux_insert_discretionary(halfword t, halfword pre, halfword post, halfword replace, quarterword subtype, int penalty)
562{
563
564 halfword d = tex_new_disc_node(subtype);
565 halfword a = node_attr(t) ;
566 disc_penalty(d) = penalty;
567 if (t == replace) {
568
569 tex_try_couple_nodes(d, node_next(t));
570 tex_try_couple_nodes(node_prev(t), d);
571 node_prev(t) = null;
572 node_next(t) = null;
573 replace = t;
574 } else {
575
576 tex_try_couple_nodes(d, node_next(t));
577 tex_couple_nodes(t, d);
578 }
579 if (a) {
580 tex_attach_attribute_list_attribute(d, a);
581 }
582 tex_set_disc_field(d, pre_break_code, pre);
583 tex_set_disc_field(d, post_break_code, post);
584 tex_set_disc_field(d, no_break_code, replace);
585 return d;
586}
587
588static halfword tex_aux_insert_syllable_discretionary(halfword t, language_variables *lan)
589{
590 halfword n = tex_new_disc_node(syllable_discretionary_code);
591 disc_penalty(n) = hyphen_penalty_par;
592 tex_couple_nodes(n, node_next(t));
593 tex_couple_nodes(t, n);
594 tex_attach_attribute_list_attribute(n, get_attribute_list(t));
595 if (lan->pre_hyphen_char > 0) {
596 halfword g = tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), lan->pre_hyphen_char, t);
597 tex_set_disc_field(n, pre_break_code, g);
598 set_glyph_disccode(g, glyph_disc_syllable);
599 }
600 if (lan->post_hyphen_char > 0) {
601 halfword g = tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), lan->post_hyphen_char, t);
602 tex_set_disc_field(n, post_break_code, g);
603 set_glyph_disccode(g, glyph_disc_syllable);
604 }
605 return n;
606}
607
608static halfword tex_aux_compound_word_break(halfword t, halfword clang, halfword chr)
609{
610 halfword prechar, postchar, pre, post, disc;
611 if (chr == ex_hyphen_char_par) {
612 halfword pre_exhyphen_char = tex_get_pre_exhyphen_char(clang);
613 halfword post_exhyphen_char = tex_get_post_exhyphen_char(clang);
614 prechar = pre_exhyphen_char > 0 ? pre_exhyphen_char : ex_hyphen_char_par;
615 postchar = post_exhyphen_char > 0 ? post_exhyphen_char : null;
616 } else {
617
618 prechar = chr;
619 postchar = null;
620 }
621 pre = prechar > 0 ? tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), prechar, t) : null;
622 post = postchar > 0 ? tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), postchar, t) : null;
623 if (pre) {
624 set_glyph_disccode(pre, glyph_disc_automatic);
625 }
626 if (post) {
627 set_glyph_disccode(post, glyph_disc_automatic);
628 }
629 disc = tex_aux_insert_discretionary(t, pre, post, t, automatic_discretionary_code, tex_automatic_disc_penalty(glyph_hyphenate(t)));
630 return disc;
631}
632
633static char *tex_aux_hyphenation_exception(int exceptions, char *w)
634{
635 lua_State *L = lmt_lua_state.lua_instance;
636 char *ret = NULL;
637 if (lua_rawgeti(L, LUA_REGISTRYINDEX, exceptions) == LUA_TTABLE) {
638
639 lua_pushstring(L, w);
640 lua_rawget(L, -2);
641 if (lua_type(L, -1) == LUA_TSTRING) {
642 ret = lmt_memory_strdup(lua_tostring(L, -1));
643 }
644 lua_pop(L, 2);
645 } else {
646 lua_pop(L, 1);
647 }
648 return ret;
649}
650
651
657
658# define zws 0x200B
659# define zwnj 0x200C
660# define zwj 0x200D
661
662static halfword tex_aux_find_exception_part(unsigned int *j, unsigned int *uword, int len, halfword parent, char final)
663{
664 halfword head = null;
665 halfword tail = null;
666 unsigned i = *j;
667 int noligature = 0;
668 int nokerning = 0;
669
670 i++;
671 while (i < (unsigned) len && uword[i + 1] != (unsigned int) final) {
672 if (tail) {
673 switch (uword[i + 1]) {
674 case zwj:
675 noligature = 1;
676 nokerning = 0;
677 break;
678 case zwnj:
679 noligature = 1;
680 nokerning = 1;
681 break;
682 default:
683 {
684 halfword s = tex_new_glyph_node(glyph_unset_subtype, glyph_font(parent), (int) uword[i + 1], parent);
685 tex_couple_nodes(tail, s);
686 if (noligature) {
687 tex_add_glyph_option(tail, glyph_option_no_right_ligature);
688 tex_add_glyph_option(s, glyph_option_no_left_ligature);
689 noligature = 0;
690 }
691 if (nokerning) {
692 tex_add_glyph_option(tail, glyph_option_no_right_kern);
693 tex_add_glyph_option(s, glyph_option_no_left_kern);
694 nokerning = 0;
695 }
696 set_glyph_disccode(head, glyph_disc_syllable);
697 tail = node_next(tail);
698 break;
699 }
700 }
701 } else {
702 head = tex_new_glyph_node(glyph_unset_subtype, glyph_font(parent), (int) uword[i + 1], parent);
703 set_glyph_disccode(head, glyph_disc_syllable);
704 tail = head;
705 }
706 i++;
707 }
708 *j = ++i;
709 return head;
710}
711
712static int tex_aux_count_exception_part(unsigned int *j, unsigned int *uword, int len)
713{
714 int n = 0;
715 unsigned i = *j;
716
717 i++;
718 while (i < (unsigned) len && uword[i + 1] != '}') {
719 n++;
720 i++;
721 }
722 *j = ++i;
723 return n;
724}
725
726static void tex_aux_show_exception_error(const char *part)
727{
728 tex_handle_error(
729 normal_error_type,
730 "Invalid %s part in exception",
731 part,
732 "Exception discretionaries should contain three pairs of braced items.\n"
733 "No intervening spaces are allowed."
734 );
735}
736
737
743
744static void tex_aux_do_exception(halfword wordstart, halfword r, char *replacement)
745{
746 halfword t = wordstart;
747 language_variables langdata;
748 unsigned uword[max_size_of_word_buffer];
749 unsigned len = aux_splitutf2uni(uword, replacement);
750 int clang = get_glyph_language(wordstart);
751 langdata.pre_hyphen_char = tex_get_pre_hyphen_char(clang);
752 langdata.post_hyphen_char = tex_get_post_hyphen_char(clang);
753 for (unsigned i = 0; i < len; i++) {
754 if (uword[i + 1] == 0 ) {
755
756 break;
757 } else if (uword[i + 1] == '-') {
758
759 if (node_next(t) == r) {
760 break;
761 } else {
762 tex_aux_insert_syllable_discretionary(t, &langdata);
763
764 t = node_next(t);
765 }
766 } else if (uword[i + 1] == '=') {
767
768 t = node_next(t);
769 } else if (uword[i + 1] == '{') {
770
771 halfword pre = null;
772 halfword post = null;
773 halfword replace = null;
774 int count = 0;
775 int alternative = null;
776 halfword penalty;
777
778 pre = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, '}');
779 if (i == len || uword[i + 1] != '{') {
780 tex_aux_show_exception_error("pre");
781 }
782
783 post = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, '}');
784 if (i == len || uword[i + 1] != '{') {
785 tex_aux_show_exception_error("post");
786 }
787
788 count = tex_aux_count_exception_part(&i, uword, (int) len);
789 if (i == len) {
790 tex_aux_show_exception_error("replace");
791 } else if (uword[i] && uword[i + 1] == '(') {
792 alternative = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, ')');;
793 }
794
795 if (node_next(t) == r) {
796 break;
797 } else {
798
799 if (count > 0) {
800
801 halfword q = t;
802 replace = node_next(q);
803 while (count > 0 && q) {
804 halfword t = node_type(q);
805 q = node_next(q);
806 if (t == glyph_node || t == disc_node) {
807 count--;
808 } else {
809 break ;
810 }
811 }
812
813 tex_try_couple_nodes(t, node_next(q));
814
815 node_next(q) = null;
816 if (alternative) {
817 tex_flush_node_list(replace);
818 replace = alternative;
819 } else {
820
821 q = replace ;
822 while (q) {
823 halfword n = node_next(q);
824 if (node_type(q) == disc_node) {
825
826 halfword nb = disc_no_break_head(q);
827 disc_no_break_head(q) = null;
828 node_prev(nb) = null ;
829
830 if (q == replace) {
831 replace = nb;
832 } else {
833 tex_try_couple_nodes(node_prev(q), nb);
834 }
835
836 tex_try_couple_nodes(nb, n);
837
838 tex_flush_node(q);
839 }
840 q = n ;
841 }
842 }
843 }
844
845 if (uword[i] && uword[i + 1] == '[') {
846 i += 2;
847 if (uword[i] && uword[i] >= '0' && uword[i] <= '9') {
848 if (exception_penalty_par > 0) {
849 if (exception_penalty_par > infinite_penalty) {
850 penalty = exception_penalty_par;
851 } else {
852 penalty = (uword[i] - '0') * exception_penalty_par ;
853 }
854 } else if (exception_penalty_par < 0) {
855 penalty = hyphen_penalty_par;
856 } else {
857 penalty = (uword[i] - '0') * hyphen_penalty_par ;
858 }
859 ++i;
860 while (uword[i] && uword[i] != ']') {
861 ++i;
862 }
863 } else {
864 penalty = hyphen_penalty_par;
865 }
866 } else {
867 penalty = hyphen_penalty_par;
868 }
869
870 t = tex_aux_insert_discretionary(t, pre, post, replace, normal_discretionary_code, penalty);
871
872 t = node_next(t);
873
877 if (uword[i] && uword[i + 1] == '{') {
878 i--;
879 t = node_prev(t);
880 }
881 }
882 } else {
883 t = node_next(t);
884 }
885
886 if (! t || node_next(t) == r) {
887 break;
888 }
889 }
890}
891
892
986
987static inline halfword tex_aux_is_hyphen_char(halfword chr)
988{
989 if (tex_get_hc_code(chr)) {
990 return tex_get_hc_code(chr);
991 } else if (chr == ex_hyphen_char_par) {
992 return chr;
993 } else {
994 return 0;
995 }
996}
997
998static halfword tex_aux_find_next_wordstart(halfword r, halfword first_language)
999{
1000 int start_ok = 1;
1001 halfword lastglyph = r;
1002 while (r) {
1003 switch (node_type(r)) {
1004 case boundary_node:
1005 if (node_subtype(r) == word_boundary) {
1006 start_ok = 1;
1007 }
1008 break;
1009 case disc_node:
1010 start_ok = has_disc_option(r, disc_option_post_word);
1011 break;
1012 case hlist_node:
1013 case vlist_node:
1014 case rule_node:
1015 case dir_node:
1016 case whatsit_node:
1017 if (hyphenation_permitted(glyph_hyphenate(lastglyph), strict_start_hyphenation_mode)) {
1018 start_ok = 0;
1019 }
1020 break;
1021 case glue_node:
1022 start_ok = 1;
1023 break;
1024 case math_node:
1025 if (node_subtype(r) == begin_inline_math) {
1026 int mathlevel = 1;
1027 while (mathlevel > 0) {
1028 r = node_next(r);
1029 if (! r) {
1030 return r;
1031 } else if (node_type(r) == math_node) {
1032 if (node_subtype(r) == begin_inline_math) {
1033 mathlevel++;
1034 } else {
1035 mathlevel--;
1036 }
1037 }
1038 }
1039 }
1040 break;
1041 case glyph_node:
1042 {
1043
1047 int chr = glyph_character(r);
1048 int hyp = tex_aux_is_hyphen_char(chr);
1049 lastglyph = r;
1050 if (hyp) {
1051 if (hyphenation_permitted(glyph_hyphenate(r), ignore_bounds_hyphenation_mode)) {
1052
1053 } else {
1054
1055 halfword t = node_next(r) ;
1056
1057 if (t && (node_type(t) == glyph_node) && (! tex_aux_is_hyphen_char(glyph_character(t))) && ! hyphenation_permitted(glyph_hyphenate(r), automatic_hyphenation_mode)) {
1058
1059 r = tex_aux_compound_word_break(r, get_glyph_language(r), hyp);
1060
1061 start_ok = 1;
1062 } else {
1063
1064 while (t && (node_type(t) == glyph_node) && tex_aux_is_hyphen_char(glyph_character(t))) {
1065 r = t ;
1066 t = node_next(r) ;
1067 }
1068 if (t) {
1069
1070 start_ok = 0;
1071 } else {
1072
1073 return null;
1074 }
1075 }
1076 }
1077 } else if (start_ok && (get_glyph_language(r) >= first_language) && get_glyph_dohyph(r)) {
1078 int l = tex_get_hj_code(get_glyph_language(r), chr);
1079 if (l > 0) {
1080 if (l == chr || l <= 32 || get_glyph_uchyph(r)) {
1081 return r;
1082 } else {
1083 start_ok = 0;
1084 }
1085 } else {
1086
1087 }
1088 } else {
1089
1090 }
1091 }
1092 break;
1093 default:
1094 start_ok = 0;
1095 break;
1096 }
1097 r = node_next(r);
1098 }
1099 return r;
1100}
1101
1102
1147
1148static int tex_aux_valid_wordend(halfword end_word, halfword r)
1149{
1150 if (r) {
1151 switch (node_type(r)) {
1152
1153
1154
1155
1156
1157 case disc_node:
1158 return has_disc_option(r, disc_option_pre_word);
1159 case hlist_node:
1160 case vlist_node:
1161 case rule_node:
1162 case dir_node:
1163 case whatsit_node:
1164 case insert_node:
1165 case adjust_node:
1166 return ! hyphenation_permitted(glyph_hyphenate(end_word), strict_end_hyphenation_mode);
1167 }
1168 }
1169 return 1;
1170}
1171
1172void tex_handle_hyphenation(halfword head, halfword tail)
1173{
1174 if (head && node_next(head)) {
1175 int callback_id = lmt_callback_defined(hyphenate_callback);
1176 if (callback_id > 0) {
1177 lua_State *L = lmt_lua_state.lua_instance;
1178 int top = 0;
1179 if (lmt_callback_okay(L, callback_id, &top)) {
1180 int i;
1181 lmt_node_list_to_lua(L, head);
1182 lmt_node_list_to_lua(L, tail);
1183 i = lmt_callback_call(L, 2, 0, top);
1184 if (i) {
1185 lmt_callback_error(L, top, i);
1186 } else {
1187 lmt_callback_wrapup(L, top);
1188 }
1189 }
1190 } else if (callback_id == 0) {
1191 tex_hyphenate_list(head, tail);
1192 } else {
1193
1194 }
1195 }
1196}
1197
1198static int tex_aux_hnj_hyphen_hyphenate(
1199 hjn_dictionary *dict,
1200 halfword first,
1201 halfword last,
1202 int length,
1203 halfword left,
1204 halfword right,
1205 language_variables *lan
1206)
1207{
1208
1209 int ext_word_len = length + 2;
1210 int hyphen_len = ext_word_len + 1;
1211
1216 char *hyphens = lmt_memory_calloc(hyphen_len, sizeof(unsigned char));
1217 if (hyphens) {
1218 halfword here;
1219 int state = 0;
1220 int char_num = 0;
1221 int done = 0;
1222 ++lmt_language_state.word_count;
1223
1224 node_next(begin_period) = first;
1225 node_next(end_period) = node_next(last);
1226 node_next(last) = end_period;
1227
1228 for (here = begin_period, char_num = 0; here != node_next(end_period); here = node_next(here)) {
1229 int ch;
1230 if (here == begin_period || here == end_period) {
1231 ch = '.';
1232 } else {
1233 ch = tex_get_hj_code(get_glyph_language(here), glyph_character(here));
1234 if (ch <= 32) {
1235 ch = glyph_character(here);
1236 }
1237 }
1238 while (state != -1) {
1239 hjn_state *hstate = &dict->states[state];
1240 for (int k = 0; k < hstate->num_trans; k++) {
1241 if (hstate->trans[k].uni_ch == ch) {
1242 char *match;
1243 state = hstate->trans[k].new_state;
1244 match = dict->states[state].match;
1245 if (match) {
1246
1253 int offset = (int) (char_num + 2 - (int) strlen(match));
1254 for (int m = 0; match[m]; m++) {
1255 if (hyphens[offset + m] < match[m]) {
1256 hyphens[offset + m] = match[m];
1257 }
1258 }
1259 }
1260 goto NEXTLETTER;
1261 }
1262 }
1263 state = hstate->fallback_state;
1264 }
1265
1266 state = 0;
1267 NEXTLETTER:;
1268 char_num++;
1269 }
1270
1271 node_next(last) = node_next(end_period);
1272
1276 for (here = first, char_num = 2; here != left; here = node_next(here)) {
1277 char_num++;
1278 }
1279 for (; here != right; here = node_next(here)) {
1280 if (hyphens[char_num] & 1) {
1281 here = tex_aux_insert_syllable_discretionary(here, lan);
1282 done += 1;
1283 }
1284 char_num++;
1285 }
1286 lmt_memory_free(hyphens);
1287 return done;
1288 } else {
1289 tex_overflow_error("patterns", hyphen_len);
1290 return 0;
1291 }
1292}
1293
1294
1295
1296static int tex_aux_still_okay(halfword f, halfword l, halfword r, int n, const char *utf8original) {
1297 if (_valid_node_(f) && _valid_node_(l) && node_next(l) == r) {
1298 int i = 0;
1299 while (f) {
1300 ++i;
1301 if (node_type(f) != glyph_node) {
1302 tex_normal_warning("language", "the hyphenated word contains non-glyphs, skipping");
1303 return 0;
1304 } else {
1305 int cl;
1306 halfword c = (halfword) aux_str2uni_len((const unsigned char *) utf8original, &cl);
1307 utf8original += cl;
1308 if (! (c && c == glyph_character(f))) {
1309 tex_normal_warning("language", "the hyphenated word contains different characters, skipping");
1310 return 0;
1311 } else if (f != l) {
1312 f = node_next(f);
1313 } else if (i == n) {
1314 return 1;
1315 } else {
1316 tex_normal_warning("language", "the hyphenated word changed length, skipping");
1317 return 0;
1318 }
1319 }
1320 }
1321 }
1322 tex_normal_warning("language", "the hyphenation list is messed up, skipping");
1323 return 0;
1324}
1325
1326static void tex_aux_hyphenate_show(halfword beg, halfword end)
1327{
1328 if (_valid_node_(beg) && _valid_node_(end)) {
1329 halfword nxt = node_next(end);
1330 node_next(end) = null;
1331 tex_show_node_list(beg, 100, 10000);
1332 node_next(end) = nxt;
1333 }
1334}
1335
1336
1337
1338static inline int is_traditional_hyphen(halfword n)
1339{
1340 return (
1341 (glyph_character(n) == ex_hyphen_char_par)
1342 && (has_font_text_control(glyph_font(n),text_control_collapse_hyphens))
1343 && (hyphenation_permitted(glyph_hyphenate(n),collapse_hyphenation_mode))
1344 );
1345}
1346
1347static inline int is_apostrophe(halfword n)
1348{
1349 return (
1350 (glyph_character(n) == ex_apostrophe_char_par)
1351 && (has_font_text_control(glyph_font(n),text_control_replace_apostrophe))
1352 && (hyphenation_permitted(glyph_hyphenate(n),replace_apostrophe_hyphenation_mode))
1353 );
1354}
1355int tex_collapse_list(halfword head, halfword c1, halfword c2, halfword c3, halfword c4)
1356{
1357
1358 halfword found = 0;
1359 if (head && c1 && c2 && c3) {
1360 halfword n1 = head;
1361 while (n1) {
1362 halfword n2 = node_next(n1);
1363 switch (node_type(n1)) {
1364 case glyph_node:
1365 if (is_traditional_hyphen(n1)) {
1366 set_glyph_discpart(n1, glyph_discpart_always);
1367 if (n2 && node_type(n2) == glyph_node && is_traditional_hyphen(n2) && glyph_font(n1) == glyph_font(n2)) {
1368 halfword n3 = node_next(n2);
1369 if (n3 && node_type(n3) == glyph_node && is_traditional_hyphen(n3) && glyph_font(n1) == glyph_font(n3)) {
1370 halfword n4 = node_next(n3);
1371 glyph_character(n1) = c3;
1372 tex_try_couple_nodes(n1, n4);
1373 tex_flush_node(n2);
1374 tex_flush_node(n3);
1375 n1 = n4;
1376 } else {
1377 glyph_character(n1) = c2;
1378 tex_try_couple_nodes(n1, n3);
1379 tex_flush_node(n2);
1380 n1 = n3;
1381 }
1382 found = 1;
1383 goto AGAIN;
1384 } else {
1385 glyph_character(n1) = c1;
1386 }
1387 } else if (is_apostrophe(n1)) {
1388 glyph_character(n1) = c4;
1389 found = 1;
1390 }
1391 break;
1392 case disc_node:
1393 {
1394 halfword done = 0;
1395 if (disc_pre_break_head(n1) && tex_collapse_list(disc_pre_break_head(n1), c1, c2, c3, c4)) {
1396 ++done;
1397 }
1398 if (disc_post_break_head(n1) && tex_collapse_list(disc_post_break_head(n1), c1, c2, c3, c4)) {
1399 ++done;
1400 }
1401 if (disc_no_break_head(n1) && tex_collapse_list(disc_no_break_head(n1), c1, c2, c3, c4)) {
1402 ++done;
1403 }
1404 if (done) {
1405 tex_check_disc_field(n1);
1406 }
1407 break;
1408 }
1409 default:
1410 break;
1411 }
1412 n1 = n2;
1413 AGAIN:;
1414 }
1415 }
1416 return found;
1417}
1418
1419void tex_hyphenate_list(halfword head, halfword tail)
1420{
1421
1422 if (tail) {
1423 halfword first_language = first_valid_language_par;
1424 halfword trace = tracing_hyphenation_par;
1425 halfword r = head;
1426 halfword nothing = 1;
1427 ++lmt_language_state.list_count;
1428
1443 while (r && node_type(r) != glyph_node) {
1444 r = node_next(r);
1445 }
1446 if (r) {
1447
1448 halfword saved_hyphen_penalty_par = hyphen_penalty_par;
1449 halfword saved_ex_hyphen_penalty_par = ex_hyphen_penalty_par;
1450 halfword p = tex_find_par_par(head);
1451 int penalties_pushed = node_type(p) == par_node;
1452 ++lmt_language_state.checked_count;
1453 if (penalties_pushed) {
1454 hyphen_penalty_par = tex_get_par_par(p, par_hyphen_penalty_code);
1455 ex_hyphen_penalty_par = tex_get_par_par(p, par_ex_hyphen_penalty_code);
1456 }
1457
1458 r = tex_aux_find_next_wordstart(r, first_language);
1459 if (r) {
1460 language_variables langdata;
1461 char utf8word[max_size_of_word_buffer];
1462 char utf8original[max_size_of_word_buffer];
1463 char *utf8ptr = utf8word;
1464 char *utf8ori = utf8original;
1465 int word_length = 0;
1466 int explicit_hyphen = 0;
1467 int last_char = 0;
1468 int valid = 0;
1469 halfword explicit_start = null;
1470 halfword saved_tail = node_next(tail);
1471 halfword penalty = tex_new_penalty_node(0, word_penalty_subtype);
1472
1473 tex_attach_attribute_list_copy(penalty, r);
1474 tex_couple_nodes(tail, penalty);
1475 while (r) {
1476 halfword word_start = r;
1477 int word_language = get_glyph_language(word_start);
1478 if (tex_is_valid_language(word_language)) {
1479 halfword word_end = r;
1480 int lhmin = get_glyph_lhmin(word_start);
1481 int rhmin = get_glyph_rhmin(word_start);
1482 int hmin = tex_get_hyphenation_min(word_language);
1483 halfword word_font = glyph_font(word_start);
1484 if (! tex_is_valid_font(word_font) || font_hyphen_char(word_font) < 0) {
1485
1486 word_font = 0;
1487 }
1488 langdata.pre_hyphen_char = tex_get_pre_hyphen_char(word_language);
1489 langdata.post_hyphen_char = tex_get_post_hyphen_char(word_language);
1490 while (r && node_type(r) == glyph_node && word_language == get_glyph_language(r)) {
1491 halfword chr = glyph_character(r);
1492 halfword hyp = tex_aux_is_hyphen_char(chr);
1493 if (word_language >= first_language) {
1494 last_char = tex_get_hj_code(word_language, chr);
1495 if (last_char > 0) {
1496 goto GOFORWARD;
1497 }
1498 }
1499 if (hyp) {
1500 last_char = hyp;
1501
1502
1503
1504 } else {
1505 break;
1506 }
1507 GOFORWARD:
1508
1509 explicit_hyphen = hyp;
1510 if (explicit_hyphen && node_next(r) && node_type(node_next(r)) != glyph_node && hyphenation_permitted(glyph_hyphenate(r), ignore_bounds_hyphenation_mode)) {
1511
1512 explicit_hyphen = 0;
1513 }
1514 if (explicit_hyphen) {
1515 break;
1516 } else {
1517 word_length++;
1518 if (word_length >= max_size_of_word) {
1519
1520 while (r && node_type(r) == glyph_node) {
1521 r = node_next(r);
1522 }
1523 goto PICKUP;
1524 } else {
1525 if (last_char <= 32) {
1526 if (last_char == 32) {
1527 last_char = 0 ;
1528 }
1529 if (word_length <= lhmin) {
1530 lhmin = lhmin - last_char + 1 ;
1531 if (lhmin < 0) {
1532 lhmin = 1;
1533 }
1534 }
1535 if (word_length >= rhmin) {
1536 rhmin = rhmin - last_char + 1 ;
1537 if (rhmin < 0) {
1538 rhmin = 1;
1539 }
1540 }
1541 hmin = hmin - last_char + 1 ;
1542 if (hmin < 0) {
1543 rhmin = 1;
1544 }
1545 last_char = chr ;
1546 }
1547 utf8ori = aux_uni2string(utf8ori, (unsigned) chr);
1548 utf8ptr = aux_uni2string(utf8ptr, (unsigned) last_char);
1549 word_end = r;
1550 r = node_next(r);
1551 }
1552 }
1553 }
1554 if (explicit_hyphen) {
1555
1556 if ((get_glyph_discpart(r) == glyph_discpart_replace && ! hyphenation_permitted(glyph_hyphenate(r), syllable_hyphenation_mode))) {
1557
1562 valid = 1;
1563 goto MESSYCODE;
1564 } else {
1565
1566 halfword t = node_next(r);
1567 if (t && node_type(t) == glyph_node && ! tex_aux_is_hyphen_char(glyph_character(t)) && hyphenation_permitted(glyph_hyphenate(t), automatic_hyphenation_mode)) {
1568
1569 halfword g = r;
1570 set_glyph_disccode(g, glyph_disc_automatic);
1571 r = tex_aux_compound_word_break(r, get_glyph_language(g), explicit_hyphen);
1572 if (trace > 1) {
1573 *utf8ori = 0;
1574 tex_begin_diagnostic();
1575 tex_print_format("[language: compound word break after %s]", utf8original);
1576 tex_end_diagnostic();
1577 }
1578 if (hyphenation_permitted(glyph_hyphenate(g), compound_hyphenation_mode)) {
1579 explicit_hyphen = 0;
1580 if (hyphenation_permitted(glyph_hyphenate(g), force_handler_hyphenation_mode) || hyphenation_permitted(glyph_hyphenate(g), feedback_compound_hyphenation_mode)) {
1581 set_disc_option(r, disc_option_pre_word | disc_option_post_word);
1582 explicit_start = null;
1583 valid = 1;
1584 goto MESSYCODE;
1585 } else {
1586 if (! explicit_start) {
1587 explicit_start = word_start;
1588 }
1589
1590 utf8ptr = aux_uni2string(utf8ptr, '-');
1591 r = t;
1592 continue;
1593 }
1594 }
1595 } else {
1596
1597 while (t && node_type(t) == glyph_node && tex_aux_is_hyphen_char(glyph_character(t))) {
1598 set_glyph_disccode(t, glyph_disc_automatic);
1599 r = t;
1600 t = node_next(r);
1601 }
1602 if (! t) {
1603
1604 r = null;
1605 }
1606 }
1607 }
1608 } else {
1609 valid = tex_aux_valid_wordend(word_end, r);
1610 MESSYCODE:
1611
1612 if (word_font && word_language >= first_language) {
1613
1614 struct tex_language *lang = lmt_language_state.languages[word_language];
1615 if (lang) {
1616 char *replacement = NULL;
1617 halfword start = explicit_start ? explicit_start : word_start;
1618 int okay = word_length >= lhmin + rhmin && (hmin <= 0 || word_length >= hmin) && hyphenation_permitted(glyph_hyphenate(start), syllable_hyphenation_mode);
1619 *utf8ptr = '\0';
1620 *utf8ori = '\0';
1621 if (lang->wordhandler && hyphenation_permitted(glyph_hyphenate(start), force_handler_hyphenation_mode)) {
1622 halfword restart = node_prev(start);
1623 int done = lmt_handle_word(lang, utf8original, utf8word, word_length, start, word_end, &replacement);
1624 if (replacement) {
1625 if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) {
1626 goto EXCEPTIONS2;
1627 } else {
1628 goto PICKUP;
1629 }
1630 } else {
1631
1632 switch (done) {
1633 case 1:
1634 if (_valid_node_(restart)) {
1635 r = restart;
1636 } else if (_valid_node_(start)) {
1637 r = node_prev(start);
1638 }
1639 if (! r) {
1640 if (_valid_node_(head)) {
1641 tex_normal_warning("language", "the hyphenation list is messed up, recovering");
1642 r = head;
1643 } else {
1644 tex_normal_error("language", "the hyphenated head is messed up, aborting");
1645 return;
1646 }
1647 }
1648 goto PICKUP;
1649 case 2:
1650 if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) {
1651 goto EXCEPTIONS1;
1652 } else {
1653 goto PICKUP;
1654 }
1655 case 3:
1656 if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) {
1657 goto PATTERNS;
1658 } else {
1659 goto PICKUP;
1660 }
1661 default:
1662 if (_valid_node_(r)) {
1663 goto PICKUP;
1664 } else if (_valid_node_(tail)) {
1665 tex_normal_warning("language", "the hyphenation list is messed up, quitting");
1666 goto ABORT;
1667 } else {
1668
1669 return;
1670 }
1671 }
1672 }
1673 }
1674 if (! okay || ! valid) {
1675 goto PICKUP;
1676 }
1677
1684 EXCEPTIONS1:
1685 if (lang->exceptions) {
1686 replacement = tex_aux_hyphenation_exception(lang->exceptions, utf8word);
1687 }
1688 EXCEPTIONS2:
1689 if (replacement) {
1690
1691 halfword start = explicit_start ? explicit_start : word_start;
1692 halfword beg = node_prev(start);
1693 ++lmt_language_state.exceptions_count;
1694 nothing = 0;
1695 tex_aux_do_exception(start, r, replacement);
1696 if (trace > 1) {
1697 tex_begin_diagnostic();
1698 tex_print_format("[language: exception %s to %s]", utf8original, replacement);
1699 if (trace > 2) {
1700 tex_aux_hyphenate_show(node_next(beg), node_prev(r));
1701 }
1702 tex_end_diagnostic();
1703 }
1704 lmt_memory_free(replacement);
1705 goto PICKUP;
1706 }
1707 PATTERNS:
1708 if (lang->patterns) {
1709 if (explicit_start) {
1710
1711 } else if (hyphenation_permitted(glyph_hyphenate(word_start), syllable_hyphenation_mode)) {
1712 halfword left = word_start;
1713 halfword right = r;
1714 for (int i = lhmin; i > 1; i--) {
1715 left = node_next(left);
1716 if (! left || left == right) {
1717 goto PICKUP;
1718 }
1719 }
1720 if (right != left) {
1721 int done = 0;
1722 for (int i = rhmin; i > 0; i--) {
1723 right = node_prev(right);
1724 if (! right || right == left) {
1725 goto PICKUP;
1726 }
1727 }
1728 done = tex_aux_hnj_hyphen_hyphenate(lang->patterns, word_start, word_end, word_length, left, right, &langdata);
1729 if (done) {
1730 ++lmt_language_state.hyphenated_count;
1731 nothing = 0;
1732 }
1733 if (trace > 1) {
1734 tex_begin_diagnostic();
1735 if (done) {
1736 tex_print_format("[language: hyphenated %s at %i positions]", utf8original, done);
1737 if (trace > 2) {
1738 tex_aux_hyphenate_show(node_next(left), node_prev(right));
1739 }
1740 } else {
1741 tex_print_format("[language: not hyphenated %s]", utf8original);
1742 }
1743 tex_end_diagnostic();
1744 }
1745 }
1746 }
1747 }
1748 }
1749 }
1750 }
1751 }
1752 PICKUP:
1753 explicit_start = null ;
1754 explicit_hyphen = 0;
1755 word_length = 0;
1756 utf8ptr = utf8word;
1757 utf8ori = utf8original;
1758 if (r) {
1759 r = tex_aux_find_next_wordstart(r, first_language);
1760 } else {
1761 break;
1762 }
1763 }
1764 ABORT:
1765 tex_flush_node(node_next(tail));
1766 node_next(tail) = saved_tail;
1767 }
1768
1769 if (penalties_pushed) {
1770 hyphen_penalty_par = saved_hyphen_penalty_par;
1771 ex_hyphen_penalty_par = saved_ex_hyphen_penalty_par;
1772 }
1773
1774 }
1775 if (nothing) {
1776 ++lmt_language_state.nothing_count;
1777 }
1778 }
1779}
1780
1781halfword tex_glyph_to_discretionary(halfword glyph, quarterword code, int keepkern)
1782{
1783 halfword prev = node_prev(glyph);
1784 halfword next = node_next(glyph);
1785 halfword disc = tex_new_disc_node(code);
1786 halfword kern = null;
1787 if (keepkern && next && node_type(next) == kern_node && node_subtype(next) == italic_kern_subtype) {
1788 kern = node_next(next);
1789 next = node_next(kern);
1790 node_next(kern) = null;
1791 } else {
1792 node_next(glyph) = null;
1793 }
1794 node_prev(glyph) = null;
1795 tex_attach_attribute_list_copy(disc, glyph);
1796 tex_set_disc_field(disc, pre_break_code, tex_copy_node_list(glyph, null));
1797 tex_set_disc_field(disc, post_break_code, tex_copy_node_list(glyph, null));
1798 tex_set_disc_field(disc, no_break_code, glyph);
1799 tex_try_couple_nodes(prev, disc);
1800 tex_try_couple_nodes(disc, next);
1801 return disc;
1802} |