1if not modules then modules = { } end modules ['lang-hyp'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77local type, rawget, rawset, tonumber, next = type, rawget, rawset, tonumber, next
78
79local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
80local lpegmatch = lpeg.match
81
82local context = context
83
84local concat = table.concat
85local insert = table.insert
86local remove = table.remove
87local formatters = string.formatters
88local utfchar = utf.char
89local utfbyte = utf.byte
90
91if not characters then
92 require("char-ini")
93end
94
95local setmetatableindex = table.setmetatableindex
96
97
98
99local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
100local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
101
102local report = logs.reporter("hyphenator")
103
104local implement = interfaces and interfaces.implement or function() end
105
106languages = languages or { }
107local hyphenators = languages.hyphenators or { }
108languages.hyphenators = hyphenators
109local traditional = hyphenators.traditional or { }
110hyphenators.traditional = traditional
111
112local dictionaries = setmetatableindex(function(t,k)
113 local v = {
114 patterns = { },
115 hyphenated = { },
116 specials = { },
117 exceptions = { },
118 loaded = false,
119 }
120 t[k] = v
121 return v
122end)
123
124hyphenators.dictionaries = dictionaries
125
126local character = lpeg.patterns.utf8character
127local digit = R("09")
128local weight = digit/tonumber + Cc(0)
129local fence = P(".")
130local hyphen = P("-")
131local space = P(" ")
132local char = character - space
133local validcharacter = (character - S("./"))
134local keycharacter = character - S("/")
135
136local specpart = (P("/") * Cf ( Ct("") *
137 Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
138 Cg ( Cc("after") * C((1-P(","))^1) ) *
139 ( P(",") *
140 Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
141 Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
142 )^-1
143 , rawset))^-1
144
145local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
146
147local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
148local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1)
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164local make_pattern_c = Ct((P(1)/tonumber)^1)
165
166
167
168local cache = setmetatableindex(function(t,k)
169 local v = lpegmatch(make_pattern_c,k)
170 t[k] = v
171 return v
172end)
173
174local weight_n = digit + Cc("0")
175local fence_n = fence / "0"
176local char_n = validcharacter / ""
177local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
178local make_pattern_n = basepart_n * specpart
179
180local function register_pattern(patterns,specials,str,specification)
181 local k = lpegmatch(make_hashkey_p,str)
182
183 local v1, v2 = lpegmatch(make_pattern_n,str)
184 patterns[k] = v1
185 if specification then
186 specials[k] = specification
187 elseif v2 then
188 specials[k] = v2
189 end
190end
191
192local function unregister_pattern(patterns,specials,str)
193 local k = lpegmatch(make_hashkey_p,str)
194 patterns[k] = nil
195 specials[k] = nil
196end
197
198local p_lower = lpeg.patterns.utf8lower
199
200local function register_exception(exceptions,str,specification)
201 local l = lpegmatch(p_lower,str)
202 local k = lpegmatch(make_hashkey_e,l)
203 local v = lpegmatch(make_pattern_e,l)
204 exceptions[k] = v
205end
206
207local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
208local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
209local p_split = Ct(C(character)^1)
210
211function traditional.loadpatterns(language,filename)
212 local dictionary = dictionaries[language]
213 if not dictionary.loaded then
214 if not filename or filename == "" then
215 filename = "lang-" .. language
216 end
217 filename = file.addsuffix(filename,"lua")
218 local fullname = resolvers.findfile(filename)
219 if fullname and fullname ~= "" then
220 local specification = dofile(fullname)
221 if specification then
222 local patterns = specification.patterns
223 if patterns then
224 local data = patterns.data
225 if data and data ~= "" then
226 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
227 end
228 end
229 local exceptions = specification.exceptions
230 if exceptions then
231 local data = exceptions.data
232 if data and data ~= "" then
233 lpegmatch(p_exception,data,1,dictionary.exceptions)
234 end
235 end
236 end
237 end
238 dictionary.loaded = true
239 end
240 return dictionary
241end
242
243local lcchars = characters.lcchars
244local uccodes = characters.uccodes
245local categories = characters.categories
246local nofwords = 0
247local nofhashed = 0
248
249local steps = nil
250local f_show = formatters["%w%s"]
251
252local function show_log()
253 if trace_steps == true then
254 report()
255 local w = #steps[1][1]
256 for i=1,#steps do
257 local s = steps[i]
258 report("%s%w%S %S",s[1],w - #s[1] + 3,s[2],s[3] or "")
259 end
260 report()
261 end
262end
263
264local function show_1(wsplit)
265 local u = concat(wsplit," ")
266 steps = { { f_show(0,u), f_show(0,u) } }
267end
268
269local function show_2(c,m,wsplit,done,i,spec)
270 local s = lpegmatch(p_split,c)
271 local t = { }
272 local n = #m
273 local w = #wsplit
274 for j=1,n do
275 t[#t+1] = m[j]
276 t[#t+1] = s[j]
277 end
278 local m = 2*i-2
279 local l = #t
280 local s = spec and table.sequenced(spec) or ""
281 if m == 0 then
282 steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
283 elseif i+1 == w then
284 steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
285 else
286 steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
287 end
288end
289
290local function show_3(wsplit,done)
291 local t = { }
292 local h = { }
293 local n = #wsplit
294 for i=1,n do
295 local w = wsplit[i]
296 if i > 1 then
297 local d = done[i]
298 t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
299 h[#h+1] = d
300 end
301 t[#t+1] = w
302 h[#h+1] = w
303 end
304 steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
305 show_log()
306end
307
308local function show_4(wsplit,done)
309 steps = { { concat(wsplit," ") } }
310 show_log()
311end
312
313function traditional.lasttrace()
314 return steps
315end
316
317
318
319
320
321
322
323
324
325
326local function hyphenate(dictionary,word,n)
327 nofwords = nofwords + 1
328 local hyphenated = dictionary.hyphenated
329 local isstring = type(word) == "string"
330 if isstring then
331 local done = hyphenated[word]
332 if done ~= nil then
333 return done
334 end
335 elseif n then
336 local done = hyphenated[concat(word,"",1,n)]
337 if done ~= nil then
338 return done
339 end
340 else
341 local done = hyphenated[concat(word)]
342 if done ~= nil then
343 return done
344 end
345 end
346 local key
347 if isstring then
348 key = word
349 word = lpegmatch(p_split,word)
350 if not n then
351 n = #word
352 end
353 else
354 if not n then
355 n = #word
356 end
357 key = concat(word,"",1,n)
358 end
359 local l = 1
360 local w = { "." }
361
362 for i=1,n do
363 local c = word[i]
364
365 l = l + 1
366 w[l] = lcchars[c] or c
367 end
368 l = l + 1
369 w[l] = "."
370 local c = concat(w,"",2,l-1)
371
372 local done = hyphenated[c]
373 if done ~= nil then
374 hyphenated[key] = done
375 nofhashed = nofhashed + 1
376 return done
377 end
378
379 local exceptions = dictionary.exceptions
380 local exception = exceptions[c]
381 if exception then
382 if trace_steps then
383 show_4(w,exception)
384 end
385 hyphenated[key] = exception
386 nofhashed = nofhashed + 1
387 return exception
388 end
389
390 if trace_steps then
391 show_1(w)
392 end
393
394 local specials = dictionary.specials
395 local patterns = dictionary.patterns
396
397 local spec
398 for i=1,l do
399 for j=i,l do
400 local c = concat(w,"",i,j)
401 local m = patterns[c]
402 if m then
403 local s = specials[c]
404 if not done then
405 done = { }
406 spec = nil
407
408
409 for i=1,l do
410 done[i] = 0
411 end
412 end
413
414
415 for k=1,#m do
416 local new = m[k]
417 if not new then
418 break
419 elseif new == true then
420 report("fatal error")
421 break
422 elseif new > 0 then
423 local pos = i + k - 1
424 local old = done[pos]
425 if not old then
426
427 elseif new > old then
428 done[pos] = new
429 if s then
430 local b = i + (s.start or 1) - 1
431 if b > 0 then
432 local e = b + (s.length or 2) - 1
433 if e > 0 then
434 if pos >= b and pos <= e then
435 if spec then
436 spec[pos] = { s, k - 1 }
437 else
438 spec = { [pos] = { s, k - 1 } }
439 end
440 end
441 end
442 end
443 end
444 end
445 end
446 end
447 if trace_steps and done then
448 show_2(c,m,w,done,i,s)
449 end
450 end
451 end
452 end
453 if trace_steps and done then
454 show_3(w,done)
455 end
456 if done then
457 local okay = false
458 for i=3,#done do
459 if done[i] % 2 == 1 then
460 done[i-2] = spec and spec[i] or true
461 okay = true
462 else
463 done[i-2] = false
464 end
465 end
466 if okay then
467 done[#done] = nil
468 done[#done] = nil
469 else
470 done = false
471 end
472 else
473 done = false
474 end
475 hyphenated[key] = done
476 nofhashed = nofhashed + 1
477 return done
478end
479
480function traditional.gettrace(language,word)
481 if not word or word == "" then
482 return
483 end
484 local dictionary = dictionaries[language]
485 if dictionary then
486 local hyphenated = dictionary.hyphenated
487 hyphenated[word] = nil
488 hyphenate(dictionary,word)
489 return steps
490 end
491end
492
493local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
494
495function traditional.installmethod(name,f)
496 if rawget(methods,name) then
497 report("overloading %a is not permitted",name)
498 else
499 methods[name] = f
500 end
501end
502
503local s_detail_1 = "-"
504local f_detail_2 = formatters["%s-%s"]
505local f_detail_3 = formatters["{%s}{%s}{}"]
506local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
507
508function traditional.injecthyphens(dictionary,word,specification)
509 if not word then
510 return false
511 end
512 if not specification then
513 return word
514 end
515 local hyphens = hyphenate(dictionary,word)
516 if not hyphens then
517 return word
518 end
519
520
521
522
523 local word = lpegmatch(p_split,word)
524 local size = #word
525
526 local leftmin = specification.leftcharmin or 2
527 local rightmin = size - (specification.rightcharmin or leftmin)
528 local leftchar = specification.leftchar
529 local rightchar = specification.rightchar
530
531 local result = { }
532 local rsize = 0
533 local position = 1
534
535 while position <= size do
536 if position >= leftmin and position <= rightmin then
537 local hyphen = hyphens[position]
538 if not hyphen then
539 rsize = rsize + 1
540 result[rsize] = word[position]
541 position = position + 1
542 elseif hyphen == true then
543 rsize = rsize + 1
544 result[rsize] = word[position]
545 rsize = rsize + 1
546 if leftchar and rightchar then
547 result[rsize] = f_detail_3(rightchar,leftchar)
548 else
549 result[rsize] = s_detail_1
550 end
551 position = position + 1
552 else
553 local o, h = hyphen[2]
554 if o then
555 h = hyphen[1]
556 else
557 h = hyphen
558 o = 1
559 end
560 local b = position - o + (h.start or 1)
561 local e = b + (h.length or 2) - 1
562 if b > 0 and e >= b then
563 for i=1,b-position do
564 rsize = rsize + 1
565 result[rsize] = word[position]
566 position = position + 1
567 end
568 rsize = rsize + 1
569 if leftchar and rightchar then
570 result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
571 else
572 result[rsize] = f_detail_2(h.before,h.after)
573 end
574 position = e + 1
575 else
576
577 rsize = rsize + 1
578 result[rsize] = word[position]
579 position = position + 1
580 end
581 end
582 else
583 rsize = rsize + 1
584 result[rsize] = word[position]
585 position = position + 1
586 end
587 end
588 return concat(result)
589end
590
591do
592
593 local word = C((1-space)^1)
594 local spaces = space^1
595
596 local u_pattern = (Carg(1) * Carg(2) * word / unregister_pattern + spaces)^1
597 local r_pattern = (Carg(1) * Carg(2) * word * Carg(3) / register_pattern + spaces)^1
598 local e_pattern = (Carg(1) * word / register_exception + spaces)^1
599
600 function traditional.registerpattern(language,str,specification)
601 local dictionary = dictionaries[language]
602 if specification == false then
603 lpegmatch(u_pattern,str,1,dictionary.patterns,dictionary.specials)
604
605 else
606 lpegmatch(r_pattern,str,1,dictionary.patterns,dictionary.specials,type(specification) == "table" and specification or false)
607
608 end
609 end
610
611 function traditional.registerexception(language,str)
612 lpegmatch(e_pattern,str,1,dictionaries[language].exceptions)
613 end
614
615end
616
617
618
619if context then
620
621 local nodecodes = nodes.nodecodes
622 local disccodes = nodes.disccodes
623
624 local glyph_code = nodecodes.glyph
625 local disc_code = nodecodes.disc
626 local math_code = nodecodes.math
627 local hlist_code = nodecodes.hlist
628
629 local automaticdisc_code = disccodes.automatic
630 local regulardisc_code = disccodes.regular
631
632 local nuts = nodes.nuts
633 local tonode = nodes.tonode
634 local nodepool = nuts.pool
635
636 local new_disc = nodepool.disc
637 local new_penalty = nodepool.penalty
638
639 local getfield = nuts.getfield
640 local getfont = nuts.getfont
641 local getid = nuts.getid
642 local getattr = nuts.getattr
643 local getnext = nuts.getnext
644 local getprev = nuts.getprev
645 local getsubtype = nuts.getsubtype
646 local getlist = nuts.getlist
647 local getlanguage = nuts.getlanguage
648 local getattrlist = nuts.getattrlist
649 local setattrlist = nuts.setattrlist
650 local isglyph = nuts.isglyph
651 local ischar = nuts.ischar
652
653 local setchar = nuts.setchar
654 local setdisc = nuts.setdisc
655 local setlink = nuts.setlink
656 local setprev = nuts.setprev
657 local setnext = nuts.setnext
658
659 local insertbefore = nuts.insertbefore
660 local insertafter = nuts.insertafter
661 local copy_node = nuts.copy
662 local copylist = nuts.copylist
663 local remove_node = nuts.remove
664 local endofmath = nuts.endofmath
665 local node_tail = nuts.tail
666
667 local nexthlist = nuts.traversers.hlist
668 local nextdisc = nuts.traversers.disc
669
670 local setcolor = nodes.tracers.colors.set
671
672 local variables = interfaces.variables
673 local v_reset = variables.reset
674 local v_yes = variables.yes
675 local v_word = variables.word
676 local v_all = variables.all
677
678 local settings_to_array = utilities.parsers.settings_to_array
679
680 local unsetvalue = attributes.unsetvalue
681 local texsetattribute = tex.setattribute
682
683 local prehyphenchar = language.prehyphenchar
684 local posthyphenchar = language.posthyphenchar
685 local preexhyphenchar = language.preexhyphenchar
686 local postexhyphenchar = language.postexhyphenchar
687
688 local a_hyphenation = attributes.private("hyphenation")
689
690 local interwordpenalty = 5000
691
692 function traditional.loadpatterns(language)
693 return dictionaries[language]
694 end
695
696
697
698 setmetatableindex(dictionaries,function(t,k)
699 if type(k) == "string" then
700
701
702 languages.getnumber(k)
703 end
704 local specification = languages.getdata(k)
705 local dictionary = {
706 patterns = { },
707 exceptions = { },
708 hyphenated = { },
709 specials = { },
710 instance = false,
711 characters = { },
712 unicodes = { },
713 }
714 if specification then
715 local resources = specification.resources
716 if resources then
717 local characters = dictionary.characters or { }
718 local unicodes = dictionary.unicodes or { }
719 for i=1,#resources do
720 local r = resources[i]
721 if not r.in_dictionary then
722 r.in_dictionary = true
723 local patterns = r.patterns
724 if patterns then
725 local data = patterns.data
726 if data then
727
728 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
729 end
730 local extra = patterns.extra
731 if extra then
732
733 lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
734 end
735 end
736 local exceptions = r.exceptions
737 if exceptions then
738 local data = exceptions.data
739 if data and data ~= "" then
740 lpegmatch(p_exception,data,1,dictionary.exceptions)
741 end
742 end
743 local usedchars = lpegmatch(p_split,patterns.characters)
744 for i=1,#usedchars do
745 local char = usedchars[i]
746 local code = utfbyte(char)
747 local upper = uccodes[code]
748 characters[char] = code
749 unicodes [code] = char
750 if type(upper) == "table" then
751 for i=1,#upper do
752 local u = upper[i]
753 unicodes[u] = utfchar(u)
754 end
755 else
756 unicodes[upper] = utfchar(upper)
757 end
758 end
759 end
760 end
761 dictionary.characters = characters
762 dictionary.unicodes = unicodes
763 setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
764 end
765 t[specification.number] = dictionary
766 dictionary.instance = specification.instance
767 end
768 t[k] = dictionary
769 return dictionary
770 end)
771
772
773
774
775
776
777
778
779
780
781 local featuresets = hyphenators.featuresets or { }
782 hyphenators.featuresets = featuresets
783
784 storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
785
786 local noffeaturesets = storage.shared.noflanguagesfeaturesets
787
788 storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
789
790
791
792 local function register(name,featureset)
793 noffeaturesets = noffeaturesets + 1
794 featureset.attribute = noffeaturesets
795 featuresets[noffeaturesets] = featureset
796 featuresets[name] = featureset
797 storage.shared.noflanguagesfeaturesets = noffeaturesets
798 return noffeaturesets
799 end
800
801 local function makeset(...)
802
803 local set = { }
804 for i=1,select("#",...) do
805 local list = select(i,...)
806 local kind = type(list)
807 local used = nil
808 if kind == "string" then
809 if list == v_all then
810
811 return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
812 elseif list ~= "" then
813 used = lpegmatch(p_split,list)
814 set = set or { }
815 for i=1,#used do
816 local char = used[i]
817 set[utfbyte(char)] = char
818 end
819 end
820 elseif kind == "table" then
821 if next(list) then
822 set = set or { }
823 for byte, char in next, list do
824 set[byte] = char == true and utfchar(byte) or char
825 end
826 elseif #list > 0 then
827 set = set or { }
828 for i=1,#list do
829 local l = list[i]
830 if type(l) == "number" then
831 set[l] = utfchar(l)
832 else
833 set[utfbyte(l)] = l
834 end
835 end
836 end
837 end
838 end
839 return set
840 end
841
842
843
844 local defaulthyphens = {
845 [0x002D] = true,
846 [0x00AD] = 0x002D,
847
848
849
850 [0x2010] = true,
851
852
853 [0x2013] = true,
854 [0x2014] = true,
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870 }
871
872 local defaultjoiners = {
873 [0x200C] = true,
874 [0x200D] = true,
875 }
876
877 local function somehyphenchar(c)
878 c = tonumber(c)
879 return c ~= 0 and c or nil
880 end
881
882 local function definefeatures(name,featureset)
883 local extrachars = featureset.characters
884 local hyphenchars = featureset.hyphens
885 local joinerchars = featureset.joiners
886 local alternative = featureset.alternative
887 local rightwordmin = tonumber(featureset.rightwordmin)
888 local charmin = tonumber(featureset.charmin)
889 local leftcharmin = tonumber(featureset.leftcharmin)
890 local rightcharmin = tonumber(featureset.rightcharmin)
891 local leftchar = somehyphenchar(featureset.leftchar)
892 local rightchar = somehyphenchar(featureset.rightchar)
893 local rightchars = featureset.rightchars
894local rightedge = featureset.rightedge
895local autohyphen = v_yes
896local hyphenonly = v_yes
897 rightchars = rightchars == v_word and true or tonumber(rightchars)
898 joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
899 hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
900
901 featureset.extrachars = makeset(joinerchars or "",extrachars or "")
902 featureset.hyphenchars = makeset(hyphenchars or "")
903 featureset.alternative = alternative or "hyphenate"
904 featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
905 featureset.charmin = charmin and charmin > 0 and charmin or nil
906 featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
907 featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
908 featureset.rightchars = rightchars
909 featureset.leftchar = leftchar
910 featureset.rightchar = rightchar
911
912featureset.autohyphen = autohyphen == v_yes
913featureset.hyphenonly = hyphenonly == v_yes
914 return register(name,featureset)
915 end
916
917 local function setfeatures(n)
918 if not n or n == v_reset then
919 n = false
920 else
921 local f = featuresets[n]
922 if not f and type(n) == "string" then
923 local t = settings_to_array(n)
924 local s = { }
925 for i=1,#t do
926 local ti = t[i]
927 local fs = featuresets[ti]
928 if fs then
929 for k, v in next, fs do
930 s[k] = v
931 end
932 end
933 end
934 n = register(n,s)
935 else
936 n = f and f.attribute
937 end
938 end
939 texsetattribute(a_hyphenation,n or unsetvalue)
940 end
941
942 traditional.definefeatures = definefeatures
943 traditional.setfeatures = setfeatures
944
945 implement {
946 name = "definehyphenationfeatures",
947 actions = definefeatures,
948 arguments = {
949 "string",
950 {
951 { "characters" },
952 { "hyphens" },
953 { "joiners" },
954 { "rightchars" },
955 { "rightwordmin", "integer" },
956 { "charmin", "integer" },
957 { "leftcharmin", "integer" },
958 { "rightcharmin", "integer" },
959 { "leftchar", "integer" },
960 { "rightchar", "integer" },
961 { "alternative" },
962 { "rightedge" },
963 }
964 }
965 }
966
967 implement {
968 name = "sethyphenationfeatures",
969 actions = setfeatures,
970 arguments = "string"
971 }
972
973 implement {
974 name = "registerhyphenationpattern",
975 actions = traditional.registerpattern,
976 arguments = { "string", "string", "boolean" }
977 }
978
979 implement {
980 name = "registerhyphenationexception",
981 actions = traditional.registerexception,
982 arguments = "2 strings",
983 }
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 local starttiming = statistics.starttiming
1009 local stoptiming = statistics.stoptiming
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 function traditional.hyphenate(head)
1029
1030 local first = head
1031 local tail = nil
1032 local last = nil
1033 local current = first
1034 local dictionary = nil
1035 local instance = nil
1036 local characters = nil
1037 local unicodes = nil
1038 local exhyphenchar = tex.exhyphenchar
1039 local extrachars = nil
1040 local hyphenchars = nil
1041 local language = nil
1042 local lastfont = nil
1043 local start = nil
1044 local stop = nil
1045 local word = { }
1046 local size = 0
1047
1048
1049 local leftexchar = false
1050 local rightexchar = false
1051 local leftmin = 0
1052 local rightmin = 0
1053 local charmin = 1
1054 local leftcharmin = nil
1055 local rightcharmin = nil
1056
1057 local rightwordmin = nil
1058 local rightchars = nil
1059 local leftchar = nil
1060 local rightchar = nil
1061 local attr = nil
1062 local lastwordlast = nil
1063 local hyphenated = hyphenate
1064
1065 local exhyphenpenalty = tex.exhyphenpenalty
1066 local hyphenpenalty = tex.hyphenpenalty
1067 local autohyphen = false
1068 local hyphenonly = false
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 starttiming(traditional)
1082
1083 local function insertpenalty()
1084 local p = new_penalty(interwordpenalty)
1085 setattrlist(p,last)
1086 if trace_visualize then
1087 nuts.setvisual(p,"penalty")
1088 end
1089 last = getprev(last)
1090 first, last = insertafter(first,last,p)
1091 end
1092
1093 local function synchronizefeatureset(a)
1094 local f = a and featuresets[a]
1095 if f then
1096 hyphenated = methods[f.alternative or "hyphenate"]
1097 extrachars = f.extrachars
1098 hyphenchars = f.hyphenchars
1099 rightwordmin = f.rightwordmin
1100 charmin = f.charmin
1101 leftcharmin = f.leftcharmin
1102 rightcharmin = f.rightcharmin
1103 leftchar = f.leftchar
1104 rightchar = f.rightchar
1105
1106 rightchars = f.rightchars
1107 autohyphen = f.autohyphen
1108 hyphenonly = f.hyphenonly
1109 if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
1110
1111 if not tail then
1112 tail = node_tail(first)
1113 end
1114 last = tail
1115 local inword = false
1116 local count = 0
1117 while last and rightwordmin > 0 do
1118 local id = getid(last)
1119 if id == glyph_code then
1120 count = count + 1
1121 inword = true
1122 if trace_visualize then
1123 setcolor(last,"darkgreen")
1124 end
1125 elseif inword then
1126 inword = false
1127 rightwordmin = rightwordmin - 1
1128 if rightchars == true then
1129 if rightwordmin > 0 then
1130 insertpenalty()
1131 end
1132 elseif rightchars and count <= rightchars then
1133 insertpenalty()
1134 end
1135 end
1136 last = getprev(last)
1137 end
1138 lastwordlast = rightwordmin
1139 end
1140 if not charmin or charmin == 0 then
1141 charmin = 1
1142 end
1143 else
1144 hyphenated = methods.hyphenate
1145 extrachars = false
1146 hyphenchars = false
1147 rightwordmin = false
1148 charmin = 1
1149 leftcharmin = false
1150 rightcharmin = false
1151 leftchar = false
1152 rightchar = false
1153
1154 autohyphen = false
1155 hyphenonly = false
1156 end
1157
1158 return a
1159 end
1160
1161 local function flush(hyphens)
1162
1163 local rightmin = size - rightmin
1164 local result = { }
1165 local rsize = 0
1166 local position = 1
1167
1168
1169
1170
1171
1172
1173
1174 while position <= size do
1175 if position >= leftmin and position <= rightmin then
1176 local hyphen = hyphens[position]
1177 if not hyphen then
1178 rsize = rsize + 1
1179 result[rsize] = word[position]
1180 position = position + 1
1181 elseif hyphen == true then
1182 rsize = rsize + 1
1183 result[rsize] = word[position]
1184 rsize = rsize + 1
1185 result[rsize] = true
1186 position = position + 1
1187 else
1188 local o, h = hyphen[2]
1189 if o then
1190
1191 h = hyphen[1]
1192 else
1193
1194 h = hyphen
1195 o = 1
1196 end
1197 local b = position - o + (h.start or 1)
1198 local e = b + (h.length or 2) - 1
1199 if b > 0 and e >= b then
1200 for i=1,b-position do
1201 rsize = rsize + 1
1202 result[rsize] = word[position]
1203 position = position + 1
1204 end
1205 rsize = rsize + 1
1206 result[rsize] = {
1207 h.before or "",
1208 h.after or "",
1209 concat(word,"",b,e),
1210 h.right,
1211 h.left,
1212 }
1213 position = e + 1
1214 else
1215
1216 rsize = rsize + 1
1217 result[rsize] = word[position]
1218 position = position + 1
1219 end
1220 end
1221 else
1222 rsize = rsize + 1
1223 result[rsize] = word[position]
1224 position = position + 1
1225 end
1226 end
1227
1228 local function serialize(replacement,leftchar,rightchar)
1229 if not replacement then
1230 return
1231 elseif replacement == true then
1232 local glyph = copy_node(stop)
1233 setchar(glyph,leftchar or rightchar)
1234 return glyph
1235 end
1236 local head = nil
1237 local current = nil
1238 if leftchar then
1239 head = copy_node(stop)
1240 current = head
1241 setchar(head,leftchar)
1242 end
1243 local rsize = #replacement
1244 if rsize == 1 then
1245 local glyph = copy_node(stop)
1246 setchar(glyph,characters[replacement])
1247 if head then
1248 insertafter(current,current,glyph)
1249 else
1250 head = glyph
1251 end
1252 current = glyph
1253 elseif rsize > 0 then
1254 local list = lpegmatch(p_split,replacement)
1255 for i=1,#list do
1256 local glyph = copy_node(stop)
1257 setchar(glyph,characters[list[i]])
1258 if head then
1259 insertafter(current,current,glyph)
1260 else
1261 head = glyph
1262 end
1263 current = glyph
1264 end
1265 end
1266 if rightchar then
1267 local glyph = copy_node(stop)
1268 insertafter(current,current,glyph)
1269 setchar(glyph,rightchar)
1270 end
1271 return head
1272 end
1273
1274 local current = start
1275 local attrnode = start
1276
1277 for i=1,rsize do
1278 local r = result[i]
1279 if r == true then
1280 local disc = new_disc()
1281 local pre = nil
1282 local post = nil
1283 if rightchar then
1284 pre = serialize(true,rightchar)
1285 end
1286 if leftchar then
1287 post = serialize(true,leftchar)
1288 end
1289 setdisc(disc,pre,post,nil,regulardisc_code,hyphenpenalty)
1290 if attrnode then
1291 setattrlist(disc,attrnode)
1292 end
1293
1294 insertbefore(first,current,disc)
1295 elseif type(r) == "table" then
1296 local disc = new_disc()
1297 local pre = r[1]
1298 local post = r[2]
1299 local replace = r[3]
1300 local right = r[4] ~= false and rightchar
1301 local left = r[5] ~= false and leftchar
1302 if pre then
1303 if pre ~= "" then
1304 pre = serialize(pre,false,right)
1305 else
1306 pre = nil
1307 end
1308 end
1309 if post then
1310 if post ~= "" then
1311 post = serialize(post,left,false)
1312 else
1313 post = nil
1314 end
1315 end
1316 if replace then
1317 if replace ~= "" then
1318 replace = serialize(replace)
1319 else
1320 replace = nil
1321 end
1322 end
1323
1324 setdisc(disc,pre,post,replace,regulardisc_code,hyphenpenalty)
1325 if attrnode then
1326 setattrlist(disc,attrnode)
1327 end
1328 insertbefore(first,current,disc)
1329 else
1330 setchar(current,characters[r])
1331 if i < rsize then
1332 current = getnext(current)
1333 end
1334 end
1335 end
1336 if current and current ~= stop then
1337 local current = getnext(current)
1338 local last = getnext(stop)
1339 while current ~= last do
1340 first, current = remove_node(first,current,true)
1341 end
1342 end
1343 end
1344
1345 local function inject(leftchar,rightchar,code,attrnode)
1346 if first ~= current then
1347 local disc = new_disc()
1348 first, current, glyph = remove_node(first,current)
1349 first, current = insertbefore(first,current,disc)
1350 if trace_visualize then
1351 setcolor(glyph,"darkred")
1352 setcolor(disc,"darkgreen")
1353 end
1354 local pre = nil
1355 local post = nil
1356 local replace = glyph
1357 if leftchar and leftchar > 0 then
1358 post = copy_node(glyph)
1359 setchar(post,leftchar)
1360 end
1361 pre = copy_node(glyph)
1362 setchar(pre,rightchar and rightchar > 0 and rightchar or code)
1363 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1364 if attrnode then
1365 setattrlist(disc,attrnode)
1366 end
1367 end
1368 return current
1369 end
1370
1371 local function injectseries(current,last,next,attrnode)
1372 local disc = new_disc()
1373 local start = current
1374 first, current = insertbefore(first,current,disc)
1375 setprev(start)
1376 setnext(last)
1377 if next then
1378 setlink(current,next)
1379 else
1380 setnext(current)
1381 end
1382 local pre = copylist(start)
1383 local post = nil
1384 local replace = start
1385 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1386 if attrnode then
1387 setattrlist(disc,attrnode)
1388 end
1389 return current
1390 end
1391
1392 local a = getattr(first,a_hyphenation)
1393 if a ~= attr then
1394 attr = synchronizefeatureset(a)
1395 end
1396
1397
1398
1399
1400
1401
1402 local skipping = false
1403
1404
1405
1406 while current and current ~= last do
1407 local code, id = isglyph(current)
1408 if code then
1409 if skipping then
1410 current = getnext(current)
1411 else
1412 local lang = getlanguage(current)
1413 local font = getfont(current)
1414 if lang ~= language or font ~= lastfont then
1415 if dictionary and size > charmin and leftmin + rightmin <= size then
1416
1417 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1418
1419 else
1420 local hyphens = hyphenated(dictionary,word,size)
1421 if hyphens then
1422 flush(hyphens)
1423 end
1424 end
1425 end
1426 lastfont = font
1427 if language ~= lang and lang > 0 then
1428
1429 dictionary = dictionaries[lang]
1430 instance = dictionary.instance
1431 characters = dictionary.characters
1432 unicodes = dictionary.unicodes
1433
1434 local a = getattr(current,a_hyphenation)
1435 attr = synchronizefeatureset(a)
1436 leftchar = leftchar or (instance and posthyphenchar (instance))
1437 rightchar = rightchar or (instance and prehyphenchar (instance))
1438 leftexchar = (instance and preexhyphenchar (instance))
1439 rightexchar = (instance and postexhyphenchar(instance))
1440 leftmin = leftcharmin or getfield(current,"lhmin")
1441 rightmin = rightcharmin or getfield(current,"rhmin")
1442 if not leftchar or leftchar < 0 then
1443 leftchar = false
1444 end
1445 if not rightchar or rightchar < 0 then
1446 rightchar = false
1447 end
1448
1449 local char = unicodes[code] or (extrachars and extrachars[code])
1450 if char then
1451 word[1] = char
1452 size = 1
1453 start = current
1454 else
1455 size = 0
1456 end
1457 else
1458 size = 0
1459 end
1460 language = lang
1461 elseif language <= 0 then
1462
1463 elseif size > 0 then
1464 local char = unicodes[code] or (extrachars and extrachars[code])
1465 if char then
1466 size = size + 1
1467 word[size] = char
1468 elseif dictionary then
1469 if not hyphenonly or code ~= exhyphenchar then
1470 if size > charmin and leftmin + rightmin <= size then
1471 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1472
1473 else
1474 local hyphens = hyphenated(dictionary,word,size)
1475 if hyphens then
1476 flush(hyphens)
1477 end
1478 end
1479 end
1480 end
1481 size = 0
1482 if code == exhyphenchar then
1483 local next = getnext(current)
1484 local last = current
1485 local font = getfont(current)
1486 while next and ischar(next,font) == code do
1487 last = next
1488 next = getnext(next)
1489 end
1490 if not autohyphen then
1491 current = last
1492 elseif current == last then
1493 current = inject(leftexchar,rightexchar,code,current)
1494 else
1495 current = injectseries(current,last,next,current)
1496 end
1497 if hyphenonly then
1498 skipping = true
1499 end
1500 elseif hyphenchars then
1501 local char = hyphenchars[code]
1502 if char == true then
1503 char = code
1504 end
1505 if char then
1506 current = inject(leftchar and char or nil,rightchar and char or nil,char,current)
1507 end
1508 end
1509 end
1510 else
1511 local a = getattr(current,a_hyphenation)
1512 if a ~= attr then
1513 attr = synchronizefeatureset(a)
1514 leftchar = leftchar or (instance and posthyphenchar (instance))
1515 rightchar = rightchar or (instance and prehyphenchar (instance))
1516 leftexchar = (instance and preexhyphenchar (instance))
1517 rightexchar = (instance and postexhyphenchar(instance))
1518 leftmin = leftcharmin or getfield(current,"lhmin")
1519 rightmin = rightcharmin or getfield(current,"rhmin")
1520 if not leftchar or leftchar < 0 then
1521 leftchar = false
1522 end
1523 if not rightchar or rightchar < 0 then
1524 rightchar = false
1525 end
1526 end
1527
1528 local char = unicodes[code] or (extrachars and extrachars[code])
1529 if char then
1530 word[1] = char
1531 size = 1
1532 start = current
1533 end
1534 end
1535 stop = current
1536 current = getnext(current)
1537 end
1538 else
1539 if skipping then
1540 skipping = false
1541 end
1542 if id == disc_code then
1543 size = 0
1544 current = getnext(current)
1545 if hyphenonly then
1546 skipping = true
1547 end
1548
1549
1550
1551 else
1552 current = id == math_code and getnext(endofmath(current)) or getnext(current)
1553 end
1554 if size > 0 then
1555 if dictionary and size > charmin and leftmin + rightmin <= size then
1556 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1557
1558 else
1559 local hyphens = hyphenated(dictionary,word,size)
1560 if hyphens then
1561 flush(hyphens)
1562 end
1563 end
1564 end
1565 size = 0
1566 end
1567 end
1568 end
1569
1570
1571 if dictionary and size > charmin and leftmin + rightmin <= size then
1572 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1573
1574 else
1575 local hyphens = hyphenated(dictionary,word,size)
1576 if hyphens then
1577 flush(hyphens)
1578 end
1579 end
1580 end
1581
1582 stoptiming(traditional)
1583
1584 return head
1585 end
1586
1587 statistics.register("hyphenation",function()
1588 if nofwords > 0 or statistics.elapsed(traditional) > 0 then
1589 return string.format("%s words hyphenated, %s unique, used time %s",
1590 nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
1591 end
1592 end)
1593
1594 local texmethod = "builders.kernel.hyphenation"
1595 local oldmethod = texmethod
1596 local newmethod = texmethod
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612 local hyphenate = language.hyphenate
1613 local hyphenating = nuts.hyphenating
1614 local methods = { }
1615 local usedmethod = false
1616 local stack = { }
1617
1618 local original = hyphenating and
1619 function(head)
1620 return (hyphenating(head))
1621 end
1622 or
1623 function(head)
1624 hyphenate(tonode(head))
1625 return head
1626 end
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638 local getcount = tex.getcount
1639
1640 hyphenators.methods = methods
1641 local optimize = false
1642
1643 directives.register("hyphenator.optimize", function(v) optimize = v end)
1644
1645 function hyphenators.handler(head,groupcode)
1646 if usedmethod then
1647 if optimize and (groupcode == "hbox" or groupcode == "adjustedhbox") then
1648 if getcount("hyphenstate") > 0 then
1649 forced = false
1650 return usedmethod(head)
1651 else
1652 return head
1653 end
1654 else
1655 return usedmethod(head)
1656 end
1657 else
1658 return head
1659 end
1660 end
1661
1662 methods.tex = original
1663 methods.original = original
1664 methods.expanded = original
1665 methods.traditional = languages.hyphenators.traditional.hyphenate
1666 methods.none = false
1667
1668 usedmethod = original
1669
1670 local function setmethod(method)
1671 usedmethod = type(method) == "string" and methods[method]
1672 if usedmethod == nil then
1673 usedmethod = methods.tex
1674 end
1675 end
1676 local function pushmethod(method)
1677 insert(stack,usedmethod)
1678 usedmethod = type(method) == "string" and methods[method]
1679 if usedmethod == nil then
1680 usedmethod = methods.tex
1681 end
1682 end
1683 local function popmethod()
1684 usedmethod = remove(stack) or methods.tex
1685 end
1686
1687 hyphenators.setmethod = setmethod
1688 hyphenators.pushmethod = pushmethod
1689 hyphenators.popmethod = popmethod
1690
1691 directives.register("hyphenators.method",setmethod)
1692
1693 function hyphenators.setup(specification)
1694 local method = specification.method
1695 if method then
1696 setmethod(method)
1697 end
1698 end
1699
1700 implement { name = "sethyphenationmethod", actions = setmethod, arguments = "string" }
1701 implement { name = "pushhyphenation", actions = pushmethod, arguments = "string" }
1702 implement { name = "pophyphenation", actions = popmethod }
1703
1704
1705
1706 local context = context
1707 local ctx_NC = context.NC
1708 local ctx_NR = context.NR
1709 local ctx_verbatim = context.verbatim
1710
1711 function hyphenators.showhyphenationtrace(language,word)
1712 if not word or word == "" then
1713 return
1714 end
1715 local saved = trace_steps
1716 trace_steps = "silent"
1717 local steps = traditional.gettrace(language,word)
1718 trace_steps = saved
1719 if steps then
1720 local n = #steps
1721 if n > 0 then
1722 context.starttabulate { "|r|l|l|l|" }
1723 for i=1,n do
1724 local s = steps[i]
1725 ctx_NC() if i > 1 and i < n then context(i-1) end
1726 ctx_NC() ctx_verbatim(s[1])
1727 ctx_NC() ctx_verbatim(s[2])
1728 ctx_NC() ctx_verbatim(s[3])
1729 ctx_NC()
1730 ctx_NR()
1731 end
1732 context.stoptabulate()
1733 end
1734 end
1735 end
1736
1737 implement {
1738 name = "showhyphenationtrace",
1739 actions = hyphenators.showhyphenationtrace,
1740 arguments = "2 strings",
1741 }
1742
1743 function nodes.stripdiscretionaries(head)
1744 for l in nexthlist, head do
1745 for d in nextdisc, getlist(l) do
1746 remove_node(h,false,true)
1747 end
1748 end
1749 return head
1750 end
1751
1752
1753else
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804end
1805
1806 |