1if not modules then modules = { } end modules ['lang-hyp'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77local type, rawget, rawset, tonumber, next = type, rawget, rawset, tonumber, next
78
79local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
80local lpegmatch = lpeg.match
81
82local context = context
83
84local concat = table.concat
85local insert = table.insert
86local remove = table.remove
87local formatters = string.formatters
88local utfchar = utf.char
89local utfbyte = utf.byte
90
91if not characters then
92 require("char-ini")
93end
94
95local setmetatableindex = table.setmetatableindex
96
97
98
99local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
100local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
101
102local report = logs.reporter("hyphenator")
103
104local implement = interfaces and interfaces.implement or function() end
105
106languages = languages or { }
107local hyphenators = languages.hyphenators or { }
108languages.hyphenators = hyphenators
109local traditional = hyphenators.traditional or { }
110hyphenators.traditional = traditional
111
112local dictionaries = setmetatableindex(function(t,k)
113 local v = {
114 patterns = { },
115 hyphenated = { },
116 specials = { },
117 exceptions = { },
118 loaded = false,
119 }
120 t[k] = v
121 return v
122end)
123
124hyphenators.dictionaries = dictionaries
125
126local character = lpeg.patterns.utf8character
127local digit = R("09")
128local weight = digit/tonumber + Cc(0)
129local fence = P(".")
130local hyphen = P("-")
131local space = P(" ")
132local char = character - space
133local validcharacter = (character - S("./"))
134local keycharacter = character - S("/")
135
136local specpart = (P("/") * Cf ( Ct("") *
137 Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
138 Cg ( Cc("after") * C((1-P(","))^1) ) *
139 ( P(",") *
140 Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
141 Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
142 )^-1
143 , rawset))^-1
144
145local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
146
147local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
148local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1)
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164local make_pattern_c = Ct((P(1)/tonumber)^1)
165
166
167
168local cache = setmetatableindex(function(t,k)
169 local v = lpegmatch(make_pattern_c,k)
170 t[k] = v
171 return v
172end)
173
174local weight_n = digit + Cc("0")
175local fence_n = fence / "0"
176local char_n = validcharacter / ""
177local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
178local make_pattern_n = basepart_n * specpart
179
180local function register_pattern(patterns,specials,str,specification)
181 local k = lpegmatch(make_hashkey_p,str)
182
183 local v1, v2 = lpegmatch(make_pattern_n,str)
184 patterns[k] = v1
185 if specification then
186 specials[k] = specification
187 elseif v2 then
188 specials[k] = v2
189 end
190end
191
192local function unregister_pattern(patterns,specials,str)
193 local k = lpegmatch(make_hashkey_p,str)
194 patterns[k] = nil
195 specials[k] = nil
196end
197
198local p_lower = lpeg.patterns.utf8lower
199
200local function register_exception(exceptions,str,specification)
201 local l = lpegmatch(p_lower,str)
202 local k = lpegmatch(make_hashkey_e,l)
203 local v = lpegmatch(make_pattern_e,l)
204 exceptions[k] = v
205end
206
207local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
208local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
209local p_split = Ct(C(character)^1)
210
211function traditional.loadpatterns(language,filename)
212 local dictionary = dictionaries[language]
213 if not dictionary.loaded then
214 if not filename or filename == "" then
215 filename = "lang-" .. language
216 end
217 filename = file.addsuffix(filename,"lua")
218 local fullname = resolvers.findfile(filename)
219 if fullname and fullname ~= "" then
220 local specification = dofile(fullname)
221 if specification then
222 local patterns = specification.patterns
223 if patterns then
224 local data = patterns.data
225 if data and data ~= "" then
226 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
227 end
228 end
229 local exceptions = specification.exceptions
230 if exceptions then
231 local data = exceptions.data
232 if data and data ~= "" then
233 lpegmatch(p_exception,data,1,dictionary.exceptions)
234 end
235 end
236 dictionary.lefthyphenmin = patterns.lefthyphenmin
237 dictionary.righthyphenmin = patterns.righthyphenmin
238 end
239 end
240 dictionary.loaded = true
241 end
242 return dictionary
243end
244
245local lcchars = characters.lcchars
246local uccodes = characters.uccodes
247local categories = characters.categories
248local nofwords = 0
249local nofhashed = 0
250
251local steps = nil
252local f_show = formatters["%w%s"]
253
254local function show_log()
255 if trace_steps == true then
256 report()
257 local w = #steps[1][1]
258 for i=1,#steps do
259 local s = steps[i]
260 report("%s%w%S %S",s[1],w - #s[1] + 3,s[2] or s[1],s[3] or "")
261 end
262 report()
263 end
264end
265
266local function show_1(wsplit)
267 local u = concat(wsplit," ")
268 steps = { { f_show(0,u), f_show(0,u) } }
269end
270
271local function show_2(c,m,wsplit,done,i,spec)
272 local s = lpegmatch(p_split,c)
273 local t = { }
274 local n = #m
275 local w = #wsplit
276 for j=1,n do
277 t[#t+1] = m[j]
278 t[#t+1] = s[j]
279 end
280 local m = 2*i-2
281 local l = #t
282 local s = spec and table.sequenced(spec) or ""
283 if m == 0 then
284 steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
285 elseif i+1 == w then
286 steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
287 else
288 steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
289 end
290end
291
292local function show_3(wsplit,done)
293 local t = { }
294 local h = { }
295 local n = #wsplit
296 for i=1,n do
297 local w = wsplit[i]
298 if i > 1 then
299 local d = done[i]
300 t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
301 h[#h+1] = d
302 end
303 t[#t+1] = w
304 h[#h+1] = w
305 end
306 steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
307 show_log()
308end
309
310local function show_4(wsplit,done)
311 steps = { { concat(wsplit," ") } }
312 show_log()
313end
314
315function traditional.lasttrace()
316 return steps
317end
318
319
320
321
322
323
324
325
326
327
328local function hyphenate(dictionary,word,n)
329 nofwords = nofwords + 1
330 local hyphenated = dictionary.hyphenated
331 local isstring = type(word) == "string"
332 if isstring then
333 local done = hyphenated[word]
334 if done ~= nil then
335 return done
336 end
337 elseif n then
338 local done = hyphenated[concat(word,"",1,n)]
339 if done ~= nil then
340 return done
341 end
342 else
343 local done = hyphenated[concat(word)]
344 if done ~= nil then
345 return done
346 end
347 end
348 local key
349 if isstring then
350 key = word
351 word = lpegmatch(p_split,word)
352 if not n then
353 n = #word
354 end
355 else
356 if not n then
357 n = #word
358 end
359 key = concat(word,"",1,n)
360 end
361 local l = 1
362 local w = { "." }
363
364 for i=1,n do
365 local c = word[i]
366
367 l = l + 1
368 w[l] = lcchars[c] or c
369 end
370 l = l + 1
371 w[l] = "."
372 local c = concat(w,"",2,l-1)
373
374 local done = hyphenated[c]
375 if done ~= nil then
376 hyphenated[key] = done
377 nofhashed = nofhashed + 1
378 return done
379 end
380
381 local exceptions = dictionary.exceptions
382 local exception = exceptions[c]
383 if exception then
384 if trace_steps then
385 show_4(w,exception)
386 end
387 hyphenated[key] = exception
388 nofhashed = nofhashed + 1
389 return exception
390 end
391
392 if trace_steps then
393 show_1(w)
394 end
395
396 local specials = dictionary.specials
397 local patterns = dictionary.patterns
398
399 local spec
400 for i=1,l do
401 for j=i,l do
402 local c = concat(w,"",i,j)
403 local m = patterns[c]
404 if m then
405 local s = specials[c]
406 if not done then
407 done = { }
408 spec = nil
409
410
411 for i=1,l do
412 done[i] = 0
413 end
414 end
415
416
417 for k=1,#m do
418 local new = m[k]
419 if not new then
420 break
421 elseif new == true then
422 report("fatal error")
423 break
424 elseif new > 0 then
425 local pos = i + k - 1
426 local old = done[pos]
427 if not old then
428
429 elseif new > old then
430 done[pos] = new
431 if s then
432 local b = i + (s.start or 1) - 1
433 if b > 0 then
434 local e = b + (s.length or 2) - 1
435 if e > 0 then
436 if pos >= b and pos <= e then
437 if spec then
438 spec[pos] = { s, k - 1 }
439 else
440 spec = { [pos] = { s, k - 1 } }
441 end
442 end
443 end
444 end
445 end
446 end
447 end
448 end
449 if trace_steps and done then
450 show_2(c,m,w,done,i,s)
451 end
452 end
453 end
454 end
455 if trace_steps and done then
456 show_3(w,done)
457 end
458 if done then
459 local okay = false
460 for i=3,#done do
461 if done[i] % 2 == 1 then
462 done[i-2] = spec and spec[i] or true
463 okay = true
464 else
465 done[i-2] = false
466 end
467 end
468 if okay then
469 done[#done] = nil
470 done[#done] = nil
471 else
472 done = false
473 end
474 else
475 done = false
476 end
477 hyphenated[key] = done
478 nofhashed = nofhashed + 1
479 return done
480end
481
482function traditional.gettrace(language,word)
483 if not word or word == "" then
484 return
485 end
486 local dictionary = dictionaries[language]
487 if dictionary then
488 local hyphenated = dictionary.hyphenated
489 hyphenated[word] = nil
490 hyphenate(dictionary,word)
491 return steps
492 end
493end
494
495local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
496
497function traditional.installmethod(name,f)
498 if rawget(methods,name) then
499 report("overloading %a is not permitted",name)
500 else
501 methods[name] = f
502 end
503end
504
505local s_detail_1 = "-"
506local f_detail_2 = formatters["%s-%s"]
507local f_detail_3 = formatters["{%s}{%s}{}"]
508local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
509
510function traditional.injecthyphens(dictionary,word,specification)
511 if not word then
512 return false
513 end
514 if not specification then
515 return word
516 end
517 local hyphens = hyphenate(dictionary,word)
518 if not hyphens then
519 return word
520 end
521
522
523
524
525 local word = lpegmatch(p_split,word)
526 local size = #word
527
528 local leftmin = specification.leftcharmin or 2
529 local rightmin = size - (specification.rightcharmin or leftmin)
530 local leftchar = specification.leftchar
531 local rightchar = specification.rightchar
532
533 local result = { }
534 local rsize = 0
535 local position = 1
536
537 while position <= size do
538 if position >= leftmin and position <= rightmin then
539 local hyphen = hyphens[position]
540 if not hyphen then
541 rsize = rsize + 1
542 result[rsize] = word[position]
543 position = position + 1
544 elseif hyphen == true then
545 rsize = rsize + 1
546 result[rsize] = word[position]
547 rsize = rsize + 1
548 if leftchar and rightchar then
549 result[rsize] = f_detail_3(rightchar,leftchar)
550 else
551 result[rsize] = s_detail_1
552 end
553 position = position + 1
554 else
555 local o, h = hyphen[2]
556 if o then
557 h = hyphen[1]
558 else
559 h = hyphen
560 o = 1
561 end
562 local b = position - o + (h.start or 1)
563 local e = b + (h.length or 2) - 1
564 if b > 0 and e >= b then
565 for i=1,b-position do
566 rsize = rsize + 1
567 result[rsize] = word[position]
568 position = position + 1
569 end
570 rsize = rsize + 1
571 if leftchar and rightchar then
572 result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
573 else
574 result[rsize] = f_detail_2(h.before,h.after)
575 end
576 position = e + 1
577 else
578
579 rsize = rsize + 1
580 result[rsize] = word[position]
581 position = position + 1
582 end
583 end
584 else
585 rsize = rsize + 1
586 result[rsize] = word[position]
587 position = position + 1
588 end
589 end
590 return concat(result)
591end
592
593do
594
595 local word = C((1-space)^1)
596 local spaces = space^1
597
598 local u_pattern = (Carg(1) * Carg(2) * word / unregister_pattern + spaces)^1
599 local r_pattern = (Carg(1) * Carg(2) * word * Carg(3) / register_pattern + spaces)^1
600 local e_pattern = (Carg(1) * word / register_exception + spaces)^1
601
602 function traditional.registerpattern(language,str,specification)
603 local dictionary = dictionaries[language]
604 if specification == false then
605 lpegmatch(u_pattern,str,1,dictionary.patterns,dictionary.specials)
606
607 else
608 lpegmatch(r_pattern,str,1,dictionary.patterns,dictionary.specials,type(specification) == "table" and specification or false)
609
610 end
611 end
612
613 function traditional.registerexception(language,str)
614 lpegmatch(e_pattern,str,1,dictionaries[language].exceptions)
615 end
616
617end
618
619
620
621if context then
622
623 local nodecodes = nodes.nodecodes
624 local disccodes = nodes.disccodes
625
626 local glyph_code = nodecodes.glyph
627 local disc_code = nodecodes.disc
628 local math_code = nodecodes.math
629 local hlist_code = nodecodes.hlist
630
631 local automaticdisc_code = disccodes.automatic
632 local regulardisc_code = disccodes.regular
633
634 local nuts = nodes.nuts
635 local tonode = nodes.tonode
636 local nodepool = nuts.pool
637
638 local new_disc = nodepool.disc
639 local new_penalty = nodepool.penalty
640
641 local getfield = nuts.getfield
642 local getfont = nuts.getfont
643 local getid = nuts.getid
644 local getattr = nuts.getattr
645 local getnext = nuts.getnext
646 local getprev = nuts.getprev
647 local getsubtype = nuts.getsubtype
648 local getlist = nuts.getlist
649 local getlanguage = nuts.getlanguage
650 local getattrlist = nuts.getattrlist
651 local setattrlist = nuts.setattrlist
652 local isglyph = nuts.isglyph
653 local ischar = nuts.ischar
654
655 local setchar = nuts.setchar
656 local setdisc = nuts.setdisc
657 local setlink = nuts.setlink
658 local setprev = nuts.setprev
659 local setnext = nuts.setnext
660
661 local insertbefore = nuts.insertbefore
662 local insertafter = nuts.insertafter
663 local copy_node = nuts.copy
664 local copylist = nuts.copylist
665 local remove_node = nuts.remove
666 local endofmath = nuts.endofmath
667 local node_tail = nuts.tail
668
669 local nexthlist = nuts.traversers.hlist
670 local nextdisc = nuts.traversers.disc
671
672 local setcolor = nodes.tracers.colors.set
673
674 local variables = interfaces.variables
675 local v_reset = variables.reset
676 local v_yes = variables.yes
677 local v_word = variables.word
678 local v_all = variables.all
679
680 local settings_to_array = utilities.parsers.settings_to_array
681
682 local unsetvalue = attributes.unsetvalue
683 local texsetattribute = tex.setattribute
684
685 local prehyphenchar = lang.prehyphenchar
686 local posthyphenchar = lang.posthyphenchar
687 local preexhyphenchar = lang.preexhyphenchar
688 local postexhyphenchar = lang.postexhyphenchar
689
690 local a_hyphenation = attributes.private("hyphenation")
691
692 local interwordpenalty = 5000
693
694 function traditional.loadpatterns(language)
695 return dictionaries[language]
696 end
697
698
699
700 setmetatableindex(dictionaries,function(t,k)
701 if type(k) == "string" then
702
703
704 languages.getnumber(k)
705 end
706 local specification = languages.getdata(k)
707 local dictionary = {
708 patterns = { },
709 exceptions = { },
710 hyphenated = { },
711 specials = { },
712 instance = false,
713 characters = { },
714 unicodes = { },
715 }
716 if specification then
717 local resources = specification.resources
718 if resources then
719 local characters = dictionary.characters or { }
720 local unicodes = dictionary.unicodes or { }
721 for i=1,#resources do
722 local r = resources[i]
723 if not r.in_dictionary then
724 r.in_dictionary = true
725 local patterns = r.patterns
726 if patterns then
727 local data = patterns.data
728 if data then
729
730 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
731 end
732 local extra = patterns.extra
733 if extra then
734
735 lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
736 end
737 end
738 local exceptions = r.exceptions
739 if exceptions then
740 local data = exceptions.data
741 if data and data ~= "" then
742 lpegmatch(p_exception,data,1,dictionary.exceptions)
743 end
744 end
745 local usedchars = lpegmatch(p_split,patterns.characters)
746 for i=1,#usedchars do
747 local char = usedchars[i]
748 local code = utfbyte(char)
749 local upper = uccodes[code]
750 characters[char] = code
751 unicodes [code] = char
752 if type(upper) == "table" then
753 for i=1,#upper do
754 local u = upper[i]
755 unicodes[u] = utfchar(u)
756 end
757 else
758 unicodes[upper] = utfchar(upper)
759 end
760 end
761 end
762 end
763 dictionary.characters = characters
764 dictionary.unicodes = unicodes
765 setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
766 end
767 t[specification.number] = dictionary
768 dictionary.instance = specification.instance
769 end
770 t[k] = dictionary
771 return dictionary
772 end)
773
774
775
776
777
778
779
780
781
782
783 local featuresets = hyphenators.featuresets or { }
784 hyphenators.featuresets = featuresets
785
786 storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
787
788 local noffeaturesets = storage.shared.noflanguagesfeaturesets
789
790 storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
791
792
793
794 local function register(name,featureset)
795 noffeaturesets = noffeaturesets + 1
796 featureset.attribute = noffeaturesets
797 featuresets[noffeaturesets] = featureset
798 featuresets[name] = featureset
799 storage.shared.noflanguagesfeaturesets = noffeaturesets
800 return noffeaturesets
801 end
802
803 local function makeset(...)
804
805 local set = { }
806 for i=1,select("#",...) do
807 local list = select(i,...)
808 local kind = type(list)
809 local used = nil
810 if kind == "string" then
811 if list == v_all then
812
813 return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
814 elseif list ~= "" then
815 used = lpegmatch(p_split,list)
816 set = set or { }
817 for i=1,#used do
818 local char = used[i]
819 set[utfbyte(char)] = char
820 end
821 end
822 elseif kind == "table" then
823 if next(list) then
824 set = set or { }
825 for byte, char in next, list do
826 set[byte] = char == true and utfchar(byte) or char
827 end
828 elseif #list > 0 then
829 set = set or { }
830 for i=1,#list do
831 local l = list[i]
832 if type(l) == "number" then
833 set[l] = utfchar(l)
834 else
835 set[utfbyte(l)] = l
836 end
837 end
838 end
839 end
840 end
841 return set
842 end
843
844
845
846 local defaulthyphens = {
847 [0x002D] = true,
848 [0x00AD] = 0x002D,
849
850
851
852 [0x2010] = true,
853
854
855 [0x2013] = true,
856 [0x2014] = true,
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872 }
873
874 local defaultjoiners = {
875 [0x200C] = true,
876 [0x200D] = true,
877 }
878
879 local function somehyphenchar(c)
880 c = tonumber(c)
881 return c ~= 0 and c or nil
882 end
883
884 local function definefeatures(name,featureset)
885 local extrachars = featureset.characters
886 local hyphenchars = featureset.hyphens
887 local joinerchars = featureset.joiners
888 local alternative = featureset.alternative
889 local rightwordmin = tonumber(featureset.rightwordmin)
890 local charmin = tonumber(featureset.charmin)
891 local leftcharmin = tonumber(featureset.leftcharmin)
892 local rightcharmin = tonumber(featureset.rightcharmin)
893 local leftchar = somehyphenchar(featureset.leftchar)
894 local rightchar = somehyphenchar(featureset.rightchar)
895 local rightchars = featureset.rightchars
896local rightedge = featureset.rightedge
897local autohyphen = v_yes
898local hyphenonly = v_yes
899 rightchars = rightchars == v_word and true or tonumber(rightchars)
900 joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
901 hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
902
903 featureset.extrachars = makeset(joinerchars or "",extrachars or "")
904 featureset.hyphenchars = makeset(hyphenchars or "")
905 featureset.alternative = alternative or "hyphenate"
906 featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
907 featureset.charmin = charmin and charmin > 0 and charmin or nil
908 featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
909 featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
910 featureset.rightchars = rightchars
911 featureset.leftchar = leftchar
912 featureset.rightchar = rightchar
913
914featureset.autohyphen = autohyphen == v_yes
915featureset.hyphenonly = hyphenonly == v_yes
916 return register(name,featureset)
917 end
918
919 local function setfeatures(n)
920 if not n or n == v_reset then
921 n = false
922 else
923 local f = featuresets[n]
924 if not f and type(n) == "string" then
925 local t = settings_to_array(n)
926 local s = { }
927 for i=1,#t do
928 local ti = t[i]
929 local fs = featuresets[ti]
930 if fs then
931 for k, v in next, fs do
932 s[k] = v
933 end
934 end
935 end
936 n = register(n,s)
937 else
938 n = f and f.attribute
939 end
940 end
941 texsetattribute(a_hyphenation,n or unsetvalue)
942 end
943
944 traditional.definefeatures = definefeatures
945 traditional.setfeatures = setfeatures
946
947 implement {
948 name = "definehyphenationfeatures",
949 actions = definefeatures,
950 arguments = {
951 "string",
952 {
953 { "characters" },
954 { "hyphens" },
955 { "joiners" },
956 { "rightchars" },
957 { "rightwordmin", "integer" },
958 { "charmin", "integer" },
959 { "leftcharmin", "integer" },
960 { "rightcharmin", "integer" },
961 { "leftchar", "integer" },
962 { "rightchar", "integer" },
963 { "alternative" },
964 { "rightedge" },
965 }
966 }
967 }
968
969 implement {
970 name = "sethyphenationfeatures",
971 actions = setfeatures,
972 arguments = "string"
973 }
974
975 implement {
976 name = "registerhyphenationpattern",
977 actions = traditional.registerpattern,
978 arguments = { "string", "string", "boolean" }
979 }
980
981 implement {
982 name = "registerhyphenationexception",
983 actions = traditional.registerexception,
984 arguments = "2 strings",
985 }
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010 local starttiming = statistics.starttiming
1011 local stoptiming = statistics.stoptiming
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030 function traditional.hyphenate(head)
1031
1032 local first = head
1033 local tail = nil
1034 local last = nil
1035 local current = first
1036 local dictionary = nil
1037 local instance = nil
1038 local characters = nil
1039 local unicodes = nil
1040 local exhyphenchar = tex.exhyphenchar
1041 local extrachars = nil
1042 local hyphenchars = nil
1043 local language = nil
1044 local lastfont = nil
1045 local start = nil
1046 local stop = nil
1047 local word = { }
1048 local size = 0
1049 local leftchar = false
1050 local rightchar = false
1051 local leftexchar = false
1052 local rightexchar = false
1053 local leftmin = 0
1054 local rightmin = 0
1055 local charmin = 1
1056 local leftcharmin = nil
1057 local rightcharmin = nil
1058
1059 local rightwordmin = nil
1060 local rightchars = nil
1061 local leftchar = nil
1062 local rightchar = nil
1063 local attr = nil
1064 local lastwordlast = nil
1065 local hyphenated = hyphenate
1066
1067 local exhyphenpenalty = tex.exhyphenpenalty
1068 local hyphenpenalty = tex.hyphenpenalty
1069 local autohyphen = false
1070 local hyphenonly = false
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083 starttiming(traditional)
1084
1085 local function insertpenalty()
1086 local p = new_penalty(interwordpenalty)
1087 setattrlist(p,last)
1088 if trace_visualize then
1089 nuts.setvisual(p,"penalty")
1090 end
1091 last = getprev(last)
1092 first, last = insertafter(first,last,p)
1093 end
1094
1095 local function synchronizefeatureset(a)
1096 local f = a and featuresets[a]
1097 if f then
1098 hyphenated = methods[f.alternative or "hyphenate"]
1099 extrachars = f.extrachars
1100 hyphenchars = f.hyphenchars
1101 rightwordmin = f.rightwordmin
1102 charmin = f.charmin
1103 leftcharmin = f.leftcharmin
1104 rightcharmin = f.rightcharmin
1105 leftchar = f.leftchar
1106 rightchar = f.rightchar
1107
1108 rightchars = f.rightchars
1109 autohyphen = f.autohyphen
1110 hyphenonly = f.hyphenonly
1111 if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
1112
1113 if not tail then
1114 tail = node_tail(first)
1115 end
1116 last = tail
1117 local inword = false
1118 local count = 0
1119 while last and rightwordmin > 0 do
1120 local id = getid(last)
1121 if id == glyph_code then
1122 count = count + 1
1123 inword = true
1124 if trace_visualize then
1125 setcolor(last,"darkgreen")
1126 end
1127 elseif inword then
1128 inword = false
1129 rightwordmin = rightwordmin - 1
1130 if rightchars == true then
1131 if rightwordmin > 0 then
1132 insertpenalty()
1133 end
1134 elseif rightchars and count <= rightchars then
1135 insertpenalty()
1136 end
1137 end
1138 last = getprev(last)
1139 end
1140 lastwordlast = rightwordmin
1141 end
1142 if not charmin or charmin == 0 then
1143 charmin = 1
1144 end
1145 else
1146 hyphenated = methods.hyphenate
1147 extrachars = false
1148 hyphenchars = false
1149 rightwordmin = false
1150 charmin = 1
1151 leftcharmin = false
1152 rightcharmin = false
1153 leftchar = false
1154 rightchar = false
1155
1156 autohyphen = false
1157 hyphenonly = false
1158 end
1159
1160 return a
1161 end
1162
1163 local function flush(hyphens)
1164
1165 local rightmin = size - rightmin
1166 local result = { }
1167 local rsize = 0
1168 local position = 1
1169
1170
1171
1172
1173
1174
1175
1176 while position <= size do
1177 if position >= leftmin and position <= rightmin then
1178 local hyphen = hyphens[position]
1179 if not hyphen then
1180 rsize = rsize + 1
1181 result[rsize] = word[position]
1182 position = position + 1
1183 elseif hyphen == true then
1184 rsize = rsize + 1
1185 result[rsize] = word[position]
1186 rsize = rsize + 1
1187 result[rsize] = true
1188 position = position + 1
1189 else
1190 local o, h = hyphen[2]
1191 if o then
1192
1193 h = hyphen[1]
1194 else
1195
1196 h = hyphen
1197 o = 1
1198 end
1199 local b = position - o + (h.start or 1)
1200 local e = b + (h.length or 2) - 1
1201 if b > 0 and e >= b then
1202 for i=1,b-position do
1203 rsize = rsize + 1
1204 result[rsize] = word[position]
1205 position = position + 1
1206 end
1207 rsize = rsize + 1
1208 result[rsize] = {
1209 h.before or "",
1210 h.after or "",
1211 concat(word,"",b,e),
1212 h.right,
1213 h.left,
1214 }
1215 position = e + 1
1216 else
1217
1218 rsize = rsize + 1
1219 result[rsize] = word[position]
1220 position = position + 1
1221 end
1222 end
1223 else
1224 rsize = rsize + 1
1225 result[rsize] = word[position]
1226 position = position + 1
1227 end
1228 end
1229
1230 local function serialize(replacement,leftchar,rightchar)
1231 if not replacement then
1232 return
1233 elseif replacement == true then
1234 local glyph = copy_node(stop)
1235 setchar(glyph,leftchar or rightchar)
1236 return glyph
1237 end
1238 local head = nil
1239 local current = nil
1240 if leftchar then
1241 head = copy_node(stop)
1242 current = head
1243 setchar(head,leftchar)
1244 end
1245 local rsize = #replacement
1246 if rsize == 1 then
1247 local glyph = copy_node(stop)
1248 setchar(glyph,characters[replacement])
1249 if head then
1250 insertafter(current,current,glyph)
1251 else
1252 head = glyph
1253 end
1254 current = glyph
1255 elseif rsize > 0 then
1256 local list = lpegmatch(p_split,replacement)
1257 for i=1,#list do
1258 local glyph = copy_node(stop)
1259 setchar(glyph,characters[list[i]])
1260 if head then
1261 insertafter(current,current,glyph)
1262 else
1263 head = glyph
1264 end
1265 current = glyph
1266 end
1267 end
1268 if rightchar then
1269 local glyph = copy_node(stop)
1270 insertafter(current,current,glyph)
1271 setchar(glyph,rightchar)
1272 end
1273 return head
1274 end
1275
1276 local current = start
1277 local attrnode = start
1278
1279 for i=1,rsize do
1280 local r = result[i]
1281 if r == true then
1282 local disc = new_disc()
1283 local pre = nil
1284 local post = nil
1285 if rightchar then
1286 pre = serialize(true,rightchar)
1287 end
1288 if leftchar then
1289 post = serialize(true,leftchar)
1290 end
1291 setdisc(disc,pre,post,nil,regulardisc_code,hyphenpenalty)
1292 if attrnode then
1293 setattrlist(disc,attrnode)
1294 end
1295
1296 insertbefore(first,current,disc)
1297 elseif type(r) == "table" then
1298 local disc = new_disc()
1299 local pre = r[1]
1300 local post = r[2]
1301 local replace = r[3]
1302 local right = r[4] ~= false and rightchar
1303 local left = r[5] ~= false and leftchar
1304 if pre then
1305 if pre ~= "" then
1306 pre = serialize(pre,false,right)
1307 else
1308 pre = nil
1309 end
1310 end
1311 if post then
1312 if post ~= "" then
1313 post = serialize(post,left,false)
1314 else
1315 post = nil
1316 end
1317 end
1318 if replace then
1319 if replace ~= "" then
1320 replace = serialize(replace)
1321 else
1322 replace = nil
1323 end
1324 end
1325
1326 setdisc(disc,pre,post,replace,regulardisc_code,hyphenpenalty)
1327 if attrnode then
1328 setattrlist(disc,attrnode)
1329 end
1330 insertbefore(first,current,disc)
1331 else
1332 setchar(current,characters[r])
1333 if i < rsize then
1334 current = getnext(current)
1335 end
1336 end
1337 end
1338 if current and current ~= stop then
1339 local current = getnext(current)
1340 local last = getnext(stop)
1341 while current ~= last do
1342 first, current = remove_node(first,current,true)
1343 end
1344 end
1345
1346 end
1347
1348 local function inject(leftchar,rightchar,code,attrnode)
1349 if first ~= current then
1350 local disc = new_disc()
1351 first, current, glyph = remove_node(first,current)
1352 first, current = insertbefore(first,current,disc)
1353 if trace_visualize then
1354 setcolor(glyph,"darkred")
1355 setcolor(disc,"darkgreen")
1356 end
1357 local pre = nil
1358 local post = nil
1359 local replace = glyph
1360 if leftchar and leftchar > 0 then
1361 post = copy_node(glyph)
1362 setchar(post,leftchar)
1363 end
1364 pre = copy_node(glyph)
1365 setchar(pre,rightchar and rightchar > 0 and rightchar or code)
1366 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1367 if attrnode then
1368 setattrlist(disc,attrnode)
1369 end
1370 end
1371 return current
1372 end
1373
1374 local function injectseries(current,last,next,attrnode)
1375 local disc = new_disc()
1376 local start = current
1377 first, current = insertbefore(first,current,disc)
1378 setprev(start)
1379 setnext(last)
1380 if next then
1381 setlink(current,next)
1382 else
1383 setnext(current)
1384 end
1385 local pre = copylist(start)
1386 local post = nil
1387 local replace = start
1388 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1389 if attrnode then
1390 setattrlist(disc,attrnode)
1391 end
1392 return current
1393 end
1394
1395 local a = getattr(first,a_hyphenation)
1396 if a ~= attr then
1397 attr = synchronizefeatureset(a)
1398 end
1399
1400
1401
1402
1403
1404
1405 local skipping = false
1406
1407
1408
1409 while current and current ~= last do
1410 local code, id = isglyph(current)
1411 if code then
1412 if skipping then
1413 current = getnext(current)
1414 else
1415 local lang = getlanguage(current)
1416 local font = getfont(current)
1417 if lang ~= language or font ~= lastfont then
1418 if dictionary and size > charmin and leftmin + rightmin <= size then
1419
1420 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1421
1422 else
1423 local hyphens = hyphenated(dictionary,word,size)
1424 if hyphens then
1425 flush(hyphens)
1426 end
1427 end
1428 end
1429 lastfont = font
1430 if language ~= lang and lang > 0 then
1431
1432 dictionary = dictionaries[lang]
1433 instance = dictionary.instance
1434 characters = dictionary.characters
1435 unicodes = dictionary.unicodes
1436
1437 local a = getattr(current,a_hyphenation)
1438 attr = synchronizefeatureset(a)
1439 leftchar = leftchar or (instance and posthyphenchar (instance))
1440 rightchar = rightchar or (instance and prehyphenchar (instance))
1441 leftexchar = (instance and preexhyphenchar (instance))
1442 rightexchar = (instance and postexhyphenchar(instance))
1443 leftmin = leftcharmin or getfield(current,"left")
1444 rightmin = rightcharmin or getfield(current,"right")
1445 if not leftchar or leftchar < 0 then
1446 leftchar = false
1447 end
1448 if not rightchar or rightchar < 0 then
1449 rightchar = false
1450 end
1451
1452 local char = unicodes[code] or (extrachars and extrachars[code])
1453 if char then
1454 word[1] = char
1455 size = 1
1456 start = current
1457 else
1458 size = 0
1459 end
1460 else
1461 size = 0
1462 end
1463 language = lang
1464 elseif language <= 0 then
1465
1466 elseif size > 0 then
1467 local char = unicodes[code] or (extrachars and extrachars[code])
1468 if char then
1469 size = size + 1
1470 word[size] = char
1471 elseif dictionary then
1472 if not hyphenonly or code ~= exhyphenchar then
1473 if size > charmin and leftmin + rightmin <= size then
1474 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1475
1476 else
1477 local hyphens = hyphenated(dictionary,word,size)
1478 if hyphens then
1479 flush(hyphens)
1480 end
1481 end
1482 end
1483 end
1484 size = 0
1485 if code == exhyphenchar then
1486 local next = getnext(current)
1487 local last = current
1488 local font = getfont(current)
1489 while next and ischar(next,font) == code do
1490 last = next
1491 next = getnext(next)
1492 end
1493 if not autohyphen then
1494 current = last
1495 elseif current == last then
1496 current = inject(leftexchar,rightexchar,code,current)
1497 else
1498 current = injectseries(current,last,next,current)
1499 end
1500 if hyphenonly then
1501 skipping = true
1502 end
1503 elseif hyphenchars then
1504 local char = hyphenchars[code]
1505 if char == true then
1506 char = code
1507 end
1508 if char then
1509 current = inject(leftchar and char or nil,rightchar and char or nil,char,current)
1510 end
1511 end
1512 end
1513 else
1514 local a = getattr(current,a_hyphenation)
1515 if a ~= attr then
1516 attr = synchronizefeatureset(a)
1517 leftchar = leftchar or (instance and posthyphenchar (instance))
1518 rightchar = rightchar or (instance and prehyphenchar (instance))
1519 leftexchar = (instance and preexhyphenchar (instance))
1520 rightexchar = (instance and postexhyphenchar(instance))
1521 leftmin = leftcharmin or getfield(current,"left")
1522 rightmin = rightcharmin or getfield(current,"right")
1523 if not leftchar or leftchar < 0 then
1524 leftchar = false
1525 end
1526 if not rightchar or rightchar < 0 then
1527 rightchar = false
1528 end
1529 end
1530
1531 local char = unicodes[code] or (extrachars and extrachars[code])
1532 if char then
1533 word[1] = char
1534 size = 1
1535 start = current
1536 end
1537 end
1538 stop = current
1539 current = getnext(current)
1540 end
1541 else
1542 if skipping then
1543 skipping = false
1544 end
1545 if id == disc_code then
1546 size = 0
1547 current = getnext(current)
1548 if hyphenonly then
1549 skipping = true
1550 end
1551
1552
1553
1554 else
1555 current = id == math_code and getnext(endofmath(current)) or getnext(current)
1556 end
1557 if size > 0 then
1558 if dictionary and size > charmin and leftmin + rightmin <= size then
1559 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1560
1561 else
1562 local hyphens = hyphenated(dictionary,word,size)
1563 if hyphens then
1564 flush(hyphens)
1565 end
1566 end
1567 end
1568 size = 0
1569 end
1570 end
1571 end
1572
1573
1574 if dictionary and size > charmin and leftmin + rightmin <= size then
1575 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1576
1577 else
1578 local hyphens = hyphenated(dictionary,word,size)
1579 if hyphens then
1580 flush(hyphens)
1581 end
1582 end
1583 end
1584
1585 stoptiming(traditional)
1586
1587 return head
1588 end
1589
1590 statistics.register("hyphenation",function()
1591 if nofwords > 0 or statistics.elapsed(traditional) > 0 then
1592 return string.format("%s words hyphenated, %s unique, used time %s",
1593 nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
1594 end
1595 end)
1596
1597 local texmethod = "builders.kernel.hyphenation"
1598 local oldmethod = texmethod
1599 local newmethod = texmethod
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615 local hyphenate = lang.hyphenate
1616 local hyphenating = nuts.hyphenating
1617 local methods = { }
1618 local usedmethod = false
1619 local stack = { }
1620
1621 local original = hyphenating and
1622 function(head)
1623 return (hyphenating(head))
1624 end
1625 or
1626 function(head)
1627 hyphenate(tonode(head))
1628 return head
1629 end
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641 local getcount = tex.getcount
1642
1643 hyphenators.methods = methods
1644 local optimize = false
1645
1646 directives.register("hyphenator.optimize", function(v) optimize = v end)
1647
1648 function hyphenators.handler(head,groupcode)
1649 if usedmethod then
1650 if optimize and (groupcode == "hbox" or groupcode == "adjusted_hbox") then
1651 if getcount("hyphenstate") > 0 then
1652 forced = false
1653 return usedmethod(head)
1654 else
1655 return head
1656 end
1657 else
1658 return usedmethod(head)
1659 end
1660 else
1661 return head
1662 end
1663 end
1664
1665 methods.tex = original
1666 methods.original = original
1667 methods.expanded = original
1668 methods.traditional = languages.hyphenators.traditional.hyphenate
1669 methods.none = false
1670
1671 usedmethod = original
1672
1673 local function setmethod(method)
1674 usedmethod = type(method) == "string" and methods[method]
1675 if usedmethod == nil then
1676 usedmethod = methods.tex
1677 end
1678 end
1679 local function pushmethod(method)
1680 insert(stack,usedmethod)
1681 usedmethod = type(method) == "string" and methods[method]
1682 if usedmethod == nil then
1683 usedmethod = methods.tex
1684 end
1685 end
1686 local function popmethod()
1687 usedmethod = remove(stack) or methods.tex
1688 end
1689
1690 hyphenators.setmethod = setmethod
1691 hyphenators.pushmethod = pushmethod
1692 hyphenators.popmethod = popmethod
1693
1694 directives.register("hyphenators.method",setmethod)
1695
1696 function hyphenators.setup(specification)
1697 local method = specification.method
1698 if method then
1699 setmethod(method)
1700 end
1701 end
1702
1703 implement { name = "sethyphenationmethod", actions = setmethod, arguments = "string" }
1704 implement { name = "pushhyphenation", actions = pushmethod, arguments = "string" }
1705 implement { name = "pophyphenation", actions = popmethod }
1706
1707
1708
1709 local context = context
1710 local ctx_NC = context.NC
1711 local ctx_NR = context.NR
1712 local ctx_verbatim = context.verbatim
1713
1714 function hyphenators.showhyphenationtrace(language,word)
1715 if not word or word == "" then
1716 return
1717 end
1718 local saved = trace_steps
1719 trace_steps = "silent"
1720 local steps = traditional.gettrace(language,word)
1721 trace_steps = saved
1722 if steps then
1723 local n = #steps
1724 if n > 0 then
1725 context.starttabulate { "|r|l|l|l|" }
1726 for i=1,n do
1727 local s = steps[i]
1728 ctx_NC() if i > 1 and i < n then context(i-1) end
1729 ctx_NC() ctx_verbatim(s[1])
1730 ctx_NC() ctx_verbatim(s[2])
1731 ctx_NC() ctx_verbatim(s[3])
1732 ctx_NC()
1733 ctx_NR()
1734 end
1735 context.stoptabulate()
1736 end
1737 end
1738 end
1739
1740 implement {
1741 name = "showhyphenationtrace",
1742 actions = hyphenators.showhyphenationtrace,
1743 arguments = "2 strings",
1744 }
1745
1746 function nodes.stripdiscretionaries(head)
1747 for l in nexthlist, head do
1748 for d in nextdisc, getlist(l) do
1749 remove_node(h,false,true)
1750 end
1751 end
1752 return head
1753 end
1754
1755
1756else
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807end
1808
1809 |