1if not modules then modules = { } end modules ['lang-hyp'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77local type, rawget, rawset, tonumber, next = type, rawget, rawset, tonumber, next
78
79local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
80local lpegmatch = lpeg.match
81
82local context = context
83
84local concat = table.concat
85local insert = table.insert
86local remove = table.remove
87local formatters = string.formatters
88local utfchar = utf.char
89local utfbyte = utf.byte
90
91if not characters then
92 require("char-ini")
93end
94
95local setmetatableindex = table.setmetatableindex
96
97
98
99local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
100local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
101
102local report = logs.reporter("hyphenator")
103
104local implement = interfaces and interfaces.implement or function() end
105
106languages = languages or { }
107local hyphenators = languages.hyphenators or { }
108languages.hyphenators = hyphenators
109local traditional = hyphenators.traditional or { }
110hyphenators.traditional = traditional
111
112local dictionaries = setmetatableindex(function(t,k)
113 local v = {
114 patterns = { },
115 hyphenated = { },
116 specials = { },
117 exceptions = { },
118 loaded = false,
119 }
120 t[k] = v
121 return v
122end)
123
124hyphenators.dictionaries = dictionaries
125
126local character = lpeg.patterns.utf8character
127local digit = R("09")
128local weight = digit/tonumber + Cc(0)
129local fence = P(".")
130local hyphen = P("-")
131local space = P(" ")
132local char = character - space
133local validcharacter = (character - S("./"))
134local keycharacter = character - S("/")
135
136local specpart = (P("/") * Cf ( Ct("") *
137 Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
138 Cg ( Cc("after") * C((1-P(","))^1) ) *
139 ( P(",") *
140 Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
141 Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
142 )^-1
143 , rawset))^-1
144
145local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
146
147local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
148local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1)
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164local make_pattern_c = Ct((P(1)/tonumber)^1)
165
166
167
168local cache = setmetatableindex(function(t,k)
169 local v = lpegmatch(make_pattern_c,k)
170 t[k] = v
171 return v
172end)
173
174local weight_n = digit + Cc("0")
175local fence_n = fence / "0"
176local char_n = validcharacter / ""
177local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
178local make_pattern_n = basepart_n * specpart
179
180local function register_pattern(patterns,specials,str,specification)
181 local k = lpegmatch(make_hashkey_p,str)
182
183 local v1, v2 = lpegmatch(make_pattern_n,str)
184 patterns[k] = v1
185 if specification then
186 specials[k] = specification
187 elseif v2 then
188 specials[k] = v2
189 end
190end
191
192local function unregister_pattern(patterns,specials,str)
193 local k = lpegmatch(make_hashkey_p,str)
194 patterns[k] = nil
195 specials[k] = nil
196end
197
198local p_lower = lpeg.patterns.utf8lower
199
200local function register_exception(exceptions,str,specification)
201 local l = lpegmatch(p_lower,str)
202 local k = lpegmatch(make_hashkey_e,l)
203 local v = lpegmatch(make_pattern_e,l)
204 exceptions[k] = v
205end
206
207local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
208local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
209local p_split = Ct(C(character)^1)
210
211function traditional.loadpatterns(language,filename)
212 local dictionary = dictionaries[language]
213 if not dictionary.loaded then
214 if not filename or filename == "" then
215 filename = "lang-" .. language
216 end
217 filename = file.addsuffix(filename,"lua")
218 local fullname = resolvers.findfile(filename)
219 if fullname and fullname ~= "" then
220 local specification = dofile(fullname)
221 if specification then
222 local patterns = specification.patterns
223 if patterns then
224 local data = patterns.data
225 if data and data ~= "" then
226 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
227 end
228 end
229 local exceptions = specification.exceptions
230 if exceptions then
231 local data = exceptions.data
232 if data and data ~= "" then
233 lpegmatch(p_exception,data,1,dictionary.exceptions)
234 end
235 end
236 end
237 end
238 dictionary.loaded = true
239 end
240 return dictionary
241end
242
243local lcchars = characters.lcchars
244local uccodes = characters.uccodes
245local categories = characters.categories
246local nofwords = 0
247local nofhashed = 0
248
249local steps = nil
250local f_show = formatters["%w%s"]
251
252local function show_log()
253 if trace_steps == true then
254 report()
255 local w = #steps[1][1]
256 for i=1,#steps do
257 local s = steps[i]
258 report("%s%w%S %S",s[1],w - #s[1] + 3,s[2],s[3] or "")
259 end
260 report()
261 end
262end
263
264local function show_1(wsplit)
265 local u = concat(wsplit," ")
266 steps = { { f_show(0,u), f_show(0,u) } }
267end
268
269local function show_2(c,m,wsplit,done,i,spec)
270 local s = lpegmatch(p_split,c)
271 local t = { }
272 local n = #m
273 local w = #wsplit
274 for j=1,n do
275 t[#t+1] = m[j]
276 t[#t+1] = s[j]
277 end
278 local m = 2*i-2
279 local l = #t
280 local s = spec and table.sequenced(spec) or ""
281 if m == 0 then
282 steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
283 elseif i+1 == w then
284 steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
285 else
286 steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
287 end
288end
289
290local function show_3(wsplit,done)
291 local t = { }
292 local h = { }
293 local n = #wsplit
294 for i=1,n do
295 local w = wsplit[i]
296 if i > 1 then
297 local d = done[i]
298 t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
299 h[#h+1] = d
300 end
301 t[#t+1] = w
302 h[#h+1] = w
303 end
304 steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
305 show_log()
306end
307
308local function show_4(wsplit,done)
309 steps = { { concat(wsplit," ") } }
310 show_log()
311end
312
313function traditional.lasttrace()
314 return steps
315end
316
317
318
319
320
321
322
323
324
325
326local function hyphenate(dictionary,word,n)
327 nofwords = nofwords + 1
328 local hyphenated = dictionary.hyphenated
329 local isstring = type(word) == "string"
330 if isstring then
331 local done = hyphenated[word]
332 if done ~= nil then
333 return done
334 end
335 elseif n then
336 local done = hyphenated[concat(word,"",1,n)]
337 if done ~= nil then
338 return done
339 end
340 else
341 local done = hyphenated[concat(word)]
342 if done ~= nil then
343 return done
344 end
345 end
346 local key
347 if isstring then
348 key = word
349 word = lpegmatch(p_split,word)
350 if not n then
351 n = #word
352 end
353 else
354 if not n then
355 n = #word
356 end
357 key = concat(word,"",1,n)
358 end
359 local l = 1
360 local w = { "." }
361
362 for i=1,n do
363 local c = word[i]
364
365 l = l + 1
366 w[l] = lcchars[c] or c
367 end
368 l = l + 1
369 w[l] = "."
370 local c = concat(w,"",2,l-1)
371
372 local done = hyphenated[c]
373 if done ~= nil then
374 hyphenated[key] = done
375 nofhashed = nofhashed + 1
376 return done
377 end
378
379 local exceptions = dictionary.exceptions
380 local exception = exceptions[c]
381 if exception then
382 if trace_steps then
383 show_4(w,exception)
384 end
385 hyphenated[key] = exception
386 nofhashed = nofhashed + 1
387 return exception
388 end
389
390 if trace_steps then
391 show_1(w)
392 end
393
394 local specials = dictionary.specials
395 local patterns = dictionary.patterns
396
397 local spec
398 for i=1,l do
399 for j=i,l do
400 local c = concat(w,"",i,j)
401 local m = patterns[c]
402 if m then
403 local s = specials[c]
404 if not done then
405 done = { }
406 spec = nil
407
408
409 for i=1,l do
410 done[i] = 0
411 end
412 end
413
414
415 for k=1,#m do
416 local new = m[k]
417 if not new then
418 break
419 elseif new == true then
420 report("fatal error")
421 break
422 elseif new > 0 then
423 local pos = i + k - 1
424 local old = done[pos]
425 if not old then
426
427 elseif new > old then
428 done[pos] = new
429 if s then
430 local b = i + (s.start or 1) - 1
431 if b > 0 then
432 local e = b + (s.length or 2) - 1
433 if e > 0 then
434 if pos >= b and pos <= e then
435 if spec then
436 spec[pos] = { s, k - 1 }
437 else
438 spec = { [pos] = { s, k - 1 } }
439 end
440 end
441 end
442 end
443 end
444 end
445 end
446 end
447 if trace_steps and done then
448 show_2(c,m,w,done,i,s)
449 end
450 end
451 end
452 end
453 if trace_steps and done then
454 show_3(w,done)
455 end
456 if done then
457 local okay = false
458 for i=3,#done do
459 if done[i] % 2 == 1 then
460 done[i-2] = spec and spec[i] or true
461 okay = true
462 else
463 done[i-2] = false
464 end
465 end
466 if okay then
467 done[#done] = nil
468 done[#done] = nil
469 else
470 done = false
471 end
472 else
473 done = false
474 end
475 hyphenated[key] = done
476 nofhashed = nofhashed + 1
477 return done
478end
479
480function traditional.gettrace(language,word)
481 if not word or word == "" then
482 return
483 end
484 local dictionary = dictionaries[language]
485 if dictionary then
486 local hyphenated = dictionary.hyphenated
487 hyphenated[word] = nil
488 hyphenate(dictionary,word)
489 return steps
490 end
491end
492
493local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
494
495function traditional.installmethod(name,f)
496 if rawget(methods,name) then
497 report("overloading %a is not permitted",name)
498 else
499 methods[name] = f
500 end
501end
502
503local s_detail_1 = "-"
504local f_detail_2 = formatters["%s-%s"]
505local f_detail_3 = formatters["{%s}{%s}{}"]
506local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
507
508function traditional.injecthyphens(dictionary,word,specification)
509 if not word then
510 return false
511 end
512 if not specification then
513 return word
514 end
515 local hyphens = hyphenate(dictionary,word)
516 if not hyphens then
517 return word
518 end
519
520
521
522
523 local word = lpegmatch(p_split,word)
524 local size = #word
525
526 local leftmin = specification.leftcharmin or 2
527 local rightmin = size - (specification.rightcharmin or leftmin)
528 local leftchar = specification.leftchar
529 local rightchar = specification.rightchar
530
531 local result = { }
532 local rsize = 0
533 local position = 1
534
535 while position <= size do
536 if position >= leftmin and position <= rightmin then
537 local hyphen = hyphens[position]
538 if not hyphen then
539 rsize = rsize + 1
540 result[rsize] = word[position]
541 position = position + 1
542 elseif hyphen == true then
543 rsize = rsize + 1
544 result[rsize] = word[position]
545 rsize = rsize + 1
546 if leftchar and rightchar then
547 result[rsize] = f_detail_3(rightchar,leftchar)
548 else
549 result[rsize] = s_detail_1
550 end
551 position = position + 1
552 else
553 local o, h = hyphen[2]
554 if o then
555 h = hyphen[1]
556 else
557 h = hyphen
558 o = 1
559 end
560 local b = position - o + (h.start or 1)
561 local e = b + (h.length or 2) - 1
562 if b > 0 and e >= b then
563 for i=1,b-position do
564 rsize = rsize + 1
565 result[rsize] = word[position]
566 position = position + 1
567 end
568 rsize = rsize + 1
569 if leftchar and rightchar then
570 result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
571 else
572 result[rsize] = f_detail_2(h.before,h.after)
573 end
574 position = e + 1
575 else
576
577 rsize = rsize + 1
578 result[rsize] = word[position]
579 position = position + 1
580 end
581 end
582 else
583 rsize = rsize + 1
584 result[rsize] = word[position]
585 position = position + 1
586 end
587 end
588 return concat(result)
589end
590
591do
592
593 local word = C((1-space)^1)
594 local spaces = space^1
595
596 local u_pattern = (Carg(1) * Carg(2) * word / unregister_pattern + spaces)^1
597 local r_pattern = (Carg(1) * Carg(2) * word * Carg(3) / register_pattern + spaces)^1
598 local e_pattern = (Carg(1) * word / register_exception + spaces)^1
599
600 function traditional.registerpattern(language,str,specification)
601 local dictionary = dictionaries[language]
602 if specification == false then
603 lpegmatch(u_pattern,str,1,dictionary.patterns,dictionary.specials)
604
605 else
606 lpegmatch(r_pattern,str,1,dictionary.patterns,dictionary.specials,type(specification) == "table" and specification or false)
607
608 end
609 end
610
611 function traditional.registerexception(language,str)
612 lpegmatch(e_pattern,str,1,dictionaries[language].exceptions)
613 end
614
615end
616
617
618
619if context then
620
621 local nodecodes = nodes.nodecodes
622 local disccodes = nodes.disccodes
623
624 local glyph_code = nodecodes.glyph
625 local disc_code = nodecodes.disc
626 local math_code = nodecodes.math
627 local hlist_code = nodecodes.hlist
628
629 local automaticdisc_code = disccodes.automatic
630 local regulardisc_code = disccodes.regular
631
632 local nuts = nodes.nuts
633 local tonode = nodes.tonode
634 local nodepool = nuts.pool
635
636 local new_disc = nodepool.disc
637 local new_penalty = nodepool.penalty
638
639 local getfield = nuts.getfield
640 local getfont = nuts.getfont
641 local getid = nuts.getid
642 local getattr = nuts.getattr
643 local getnext = nuts.getnext
644 local getprev = nuts.getprev
645 local getsubtype = nuts.getsubtype
646 local getlist = nuts.getlist
647 local getlanguage = nuts.getlanguage
648 local getattrlist = nuts.getattrlist
649 local setattrlist = nuts.setattrlist
650 local isglyph = nuts.isglyph
651 local ischar = nuts.ischar
652
653 local setchar = nuts.setchar
654 local setdisc = nuts.setdisc
655 local setlink = nuts.setlink
656 local setprev = nuts.setprev
657 local setnext = nuts.setnext
658
659 local insertbefore = nuts.insertbefore
660 local insertafter = nuts.insertafter
661 local copy_node = nuts.copy
662 local copylist = nuts.copylist
663 local remove_node = nuts.remove
664 local endofmath = nuts.endofmath
665 local node_tail = nuts.tail
666
667 local nexthlist = nuts.traversers.hlist
668 local nextdisc = nuts.traversers.disc
669
670 local setcolor = nodes.tracers.colors.set
671
672 local variables = interfaces.variables
673 local v_reset = variables.reset
674 local v_yes = variables.yes
675 local v_word = variables.word
676 local v_all = variables.all
677
678 local settings_to_array = utilities.parsers.settings_to_array
679
680 local unsetvalue = attributes.unsetvalue
681 local texsetattribute = tex.setattribute
682
683 local prehyphenchar = lang.prehyphenchar
684 local posthyphenchar = lang.posthyphenchar
685 local preexhyphenchar = lang.preexhyphenchar
686 local postexhyphenchar = lang.postexhyphenchar
687
688 local a_hyphenation = attributes.private("hyphenation")
689
690 local interwordpenalty = 5000
691
692 function traditional.loadpatterns(language)
693 return dictionaries[language]
694 end
695
696
697
698 setmetatableindex(dictionaries,function(t,k)
699 if type(k) == "string" then
700
701
702 languages.getnumber(k)
703 end
704 local specification = languages.getdata(k)
705 local dictionary = {
706 patterns = { },
707 exceptions = { },
708 hyphenated = { },
709 specials = { },
710 instance = false,
711 characters = { },
712 unicodes = { },
713 }
714 if specification then
715 local resources = specification.resources
716 if resources then
717 local characters = dictionary.characters or { }
718 local unicodes = dictionary.unicodes or { }
719 for i=1,#resources do
720 local r = resources[i]
721 if not r.in_dictionary then
722 r.in_dictionary = true
723 local patterns = r.patterns
724 if patterns then
725 local data = patterns.data
726 if data then
727
728 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
729 end
730 local extra = patterns.extra
731 if extra then
732
733 lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
734 end
735 end
736 local exceptions = r.exceptions
737 if exceptions then
738 local data = exceptions.data
739 if data and data ~= "" then
740 lpegmatch(p_exception,data,1,dictionary.exceptions)
741 end
742 end
743 local usedchars = lpegmatch(p_split,patterns.characters)
744 for i=1,#usedchars do
745 local char = usedchars[i]
746 local code = utfbyte(char)
747 local upper = uccodes[code]
748 characters[char] = code
749 unicodes [code] = char
750 if type(upper) == "table" then
751 for i=1,#upper do
752 local u = upper[i]
753 unicodes[u] = utfchar(u)
754 end
755 else
756 unicodes[upper] = utfchar(upper)
757 end
758 end
759 end
760 end
761 dictionary.characters = characters
762 dictionary.unicodes = unicodes
763 setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
764 end
765 t[specification.number] = dictionary
766 dictionary.instance = specification.instance
767 end
768 t[k] = dictionary
769 return dictionary
770 end)
771
772
773
774
775
776
777
778
779
780
781 local featuresets = hyphenators.featuresets or { }
782 hyphenators.featuresets = featuresets
783
784 storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
785
786 local noffeaturesets = storage.shared.noflanguagesfeaturesets
787
788 storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
789
790
791
792 local function register(name,featureset)
793 noffeaturesets = noffeaturesets + 1
794 featureset.attribute = noffeaturesets
795 featuresets[noffeaturesets] = featureset
796 featuresets[name] = featureset
797 storage.shared.noflanguagesfeaturesets = noffeaturesets
798 return noffeaturesets
799 end
800
801 local function makeset(...)
802
803 local set = { }
804 for i=1,select("#",...) do
805 local list = select(i,...)
806 local kind = type(list)
807 local used = nil
808 if kind == "string" then
809 if list == v_all then
810
811 return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
812 elseif list ~= "" then
813 used = lpegmatch(p_split,list)
814 set = set or { }
815 for i=1,#used do
816 local char = used[i]
817 set[utfbyte(char)] = char
818 end
819 end
820 elseif kind == "table" then
821 if next(list) then
822 set = set or { }
823 for byte, char in next, list do
824 set[byte] = char == true and utfchar(byte) or char
825 end
826 elseif #list > 0 then
827 set = set or { }
828 for i=1,#list do
829 local l = list[i]
830 if type(l) == "number" then
831 set[l] = utfchar(l)
832 else
833 set[utfbyte(l)] = l
834 end
835 end
836 end
837 end
838 end
839 return set
840 end
841
842
843
844 local defaulthyphens = {
845 [0x002D] = true,
846 [0x00AD] = 0x002D,
847
848
849
850 [0x2010] = true,
851
852
853 [0x2013] = true,
854 [0x2014] = true,
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870 }
871
872 local defaultjoiners = {
873 [0x200C] = true,
874 [0x200D] = true,
875 }
876
877 local function somehyphenchar(c)
878 c = tonumber(c)
879 return c ~= 0 and c or nil
880 end
881
882 local function definefeatures(name,featureset)
883 local extrachars = featureset.characters
884 local hyphenchars = featureset.hyphens
885 local joinerchars = featureset.joiners
886 local alternative = featureset.alternative
887 local rightwordmin = tonumber(featureset.rightwordmin)
888 local charmin = tonumber(featureset.charmin)
889 local leftcharmin = tonumber(featureset.leftcharmin)
890 local rightcharmin = tonumber(featureset.rightcharmin)
891 local leftchar = somehyphenchar(featureset.leftchar)
892 local rightchar = somehyphenchar(featureset.rightchar)
893 local rightchars = featureset.rightchars
894local rightedge = featureset.rightedge
895local autohyphen = v_yes
896local hyphenonly = v_yes
897 rightchars = rightchars == v_word and true or tonumber(rightchars)
898 joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
899 hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
900
901 featureset.extrachars = makeset(joinerchars or "",extrachars or "")
902 featureset.hyphenchars = makeset(hyphenchars or "")
903 featureset.alternative = alternative or "hyphenate"
904 featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
905 featureset.charmin = charmin and charmin > 0 and charmin or nil
906 featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
907 featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
908 featureset.rightchars = rightchars
909 featureset.leftchar = leftchar
910 featureset.rightchar = rightchar
911
912featureset.autohyphen = autohyphen == v_yes
913featureset.hyphenonly = hyphenonly == v_yes
914 return register(name,featureset)
915 end
916
917 local function setfeatures(n)
918 if not n or n == v_reset then
919 n = false
920 else
921 local f = featuresets[n]
922 if not f and type(n) == "string" then
923 local t = settings_to_array(n)
924 local s = { }
925 for i=1,#t do
926 local ti = t[i]
927 local fs = featuresets[ti]
928 if fs then
929 for k, v in next, fs do
930 s[k] = v
931 end
932 end
933 end
934 n = register(n,s)
935 else
936 n = f and f.attribute
937 end
938 end
939 texsetattribute(a_hyphenation,n or unsetvalue)
940 end
941
942 traditional.definefeatures = definefeatures
943 traditional.setfeatures = setfeatures
944
945 implement {
946 name = "definehyphenationfeatures",
947 actions = definefeatures,
948 arguments = {
949 "string",
950 {
951 { "characters" },
952 { "hyphens" },
953 { "joiners" },
954 { "rightchars" },
955 { "rightwordmin", "integer" },
956 { "charmin", "integer" },
957 { "leftcharmin", "integer" },
958 { "rightcharmin", "integer" },
959 { "leftchar", "integer" },
960 { "rightchar", "integer" },
961 { "alternative" },
962 { "rightedge" },
963 }
964 }
965 }
966
967 implement {
968 name = "sethyphenationfeatures",
969 actions = setfeatures,
970 arguments = "string"
971 }
972
973 implement {
974 name = "registerhyphenationpattern",
975 actions = traditional.registerpattern,
976 arguments = { "string", "string", "boolean" }
977 }
978
979 implement {
980 name = "registerhyphenationexception",
981 actions = traditional.registerexception,
982 arguments = "2 strings",
983 }
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 local starttiming = statistics.starttiming
1009 local stoptiming = statistics.stoptiming
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 function traditional.hyphenate(head)
1029
1030 local first = head
1031 local tail = nil
1032 local last = nil
1033 local current = first
1034 local dictionary = nil
1035 local instance = nil
1036 local characters = nil
1037 local unicodes = nil
1038 local exhyphenchar = tex.exhyphenchar
1039 local extrachars = nil
1040 local hyphenchars = nil
1041 local language = nil
1042 local lastfont = nil
1043 local start = nil
1044 local stop = nil
1045 local word = { }
1046 local size = 0
1047 local leftchar = false
1048 local rightchar = false
1049 local leftexchar = false
1050 local rightexchar = false
1051 local leftmin = 0
1052 local rightmin = 0
1053 local charmin = 1
1054 local leftcharmin = nil
1055 local rightcharmin = nil
1056
1057 local rightwordmin = nil
1058 local rightchars = nil
1059 local leftchar = nil
1060 local rightchar = nil
1061 local attr = nil
1062 local lastwordlast = nil
1063 local hyphenated = hyphenate
1064
1065 local exhyphenpenalty = tex.exhyphenpenalty
1066 local hyphenpenalty = tex.hyphenpenalty
1067 local autohyphen = false
1068 local hyphenonly = false
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081 starttiming(traditional)
1082
1083 local function insertpenalty()
1084 local p = new_penalty(interwordpenalty)
1085 setattrlist(p,last)
1086 if trace_visualize then
1087 nuts.setvisual(p,"penalty")
1088 end
1089 last = getprev(last)
1090 first, last = insertafter(first,last,p)
1091 end
1092
1093 local function synchronizefeatureset(a)
1094 local f = a and featuresets[a]
1095 if f then
1096 hyphenated = methods[f.alternative or "hyphenate"]
1097 extrachars = f.extrachars
1098 hyphenchars = f.hyphenchars
1099 rightwordmin = f.rightwordmin
1100 charmin = f.charmin
1101 leftcharmin = f.leftcharmin
1102 rightcharmin = f.rightcharmin
1103 leftchar = f.leftchar
1104 rightchar = f.rightchar
1105
1106 rightchars = f.rightchars
1107 autohyphen = f.autohyphen
1108 hyphenonly = f.hyphenonly
1109 if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
1110
1111 if not tail then
1112 tail = node_tail(first)
1113 end
1114 last = tail
1115 local inword = false
1116 local count = 0
1117 while last and rightwordmin > 0 do
1118 local id = getid(last)
1119 if id == glyph_code then
1120 count = count + 1
1121 inword = true
1122 if trace_visualize then
1123 setcolor(last,"darkgreen")
1124 end
1125 elseif inword then
1126 inword = false
1127 rightwordmin = rightwordmin - 1
1128 if rightchars == true then
1129 if rightwordmin > 0 then
1130 insertpenalty()
1131 end
1132 elseif rightchars and count <= rightchars then
1133 insertpenalty()
1134 end
1135 end
1136 last = getprev(last)
1137 end
1138 lastwordlast = rightwordmin
1139 end
1140 if not charmin or charmin == 0 then
1141 charmin = 1
1142 end
1143 else
1144 hyphenated = methods.hyphenate
1145 extrachars = false
1146 hyphenchars = false
1147 rightwordmin = false
1148 charmin = 1
1149 leftcharmin = false
1150 rightcharmin = false
1151 leftchar = false
1152 rightchar = false
1153
1154 autohyphen = false
1155 hyphenonly = false
1156 end
1157
1158 return a
1159 end
1160
1161 local function flush(hyphens)
1162
1163 local rightmin = size - rightmin
1164 local result = { }
1165 local rsize = 0
1166 local position = 1
1167
1168
1169
1170
1171
1172
1173
1174 while position <= size do
1175 if position >= leftmin and position <= rightmin then
1176 local hyphen = hyphens[position]
1177 if not hyphen then
1178 rsize = rsize + 1
1179 result[rsize] = word[position]
1180 position = position + 1
1181 elseif hyphen == true then
1182 rsize = rsize + 1
1183 result[rsize] = word[position]
1184 rsize = rsize + 1
1185 result[rsize] = true
1186 position = position + 1
1187 else
1188 local o, h = hyphen[2]
1189 if o then
1190
1191 h = hyphen[1]
1192 else
1193
1194 h = hyphen
1195 o = 1
1196 end
1197 local b = position - o + (h.start or 1)
1198 local e = b + (h.length or 2) - 1
1199 if b > 0 and e >= b then
1200 for i=1,b-position do
1201 rsize = rsize + 1
1202 result[rsize] = word[position]
1203 position = position + 1
1204 end
1205 rsize = rsize + 1
1206 result[rsize] = {
1207 h.before or "",
1208 h.after or "",
1209 concat(word,"",b,e),
1210 h.right,
1211 h.left,
1212 }
1213 position = e + 1
1214 else
1215
1216 rsize = rsize + 1
1217 result[rsize] = word[position]
1218 position = position + 1
1219 end
1220 end
1221 else
1222 rsize = rsize + 1
1223 result[rsize] = word[position]
1224 position = position + 1
1225 end
1226 end
1227
1228 local function serialize(replacement,leftchar,rightchar)
1229 if not replacement then
1230 return
1231 elseif replacement == true then
1232 local glyph = copy_node(stop)
1233 setchar(glyph,leftchar or rightchar)
1234 return glyph
1235 end
1236 local head = nil
1237 local current = nil
1238 if leftchar then
1239 head = copy_node(stop)
1240 current = head
1241 setchar(head,leftchar)
1242 end
1243 local rsize = #replacement
1244 if rsize == 1 then
1245 local glyph = copy_node(stop)
1246 setchar(glyph,characters[replacement])
1247 if head then
1248 insertafter(current,current,glyph)
1249 else
1250 head = glyph
1251 end
1252 current = glyph
1253 elseif rsize > 0 then
1254 local list = lpegmatch(p_split,replacement)
1255 for i=1,#list do
1256 local glyph = copy_node(stop)
1257 setchar(glyph,characters[list[i]])
1258 if head then
1259 insertafter(current,current,glyph)
1260 else
1261 head = glyph
1262 end
1263 current = glyph
1264 end
1265 end
1266 if rightchar then
1267 local glyph = copy_node(stop)
1268 insertafter(current,current,glyph)
1269 setchar(glyph,rightchar)
1270 end
1271 return head
1272 end
1273
1274 local current = start
1275 local attrnode = start
1276
1277 for i=1,rsize do
1278 local r = result[i]
1279 if r == true then
1280 local disc = new_disc()
1281 local pre = nil
1282 local post = nil
1283 if rightchar then
1284 pre = serialize(true,rightchar)
1285 end
1286 if leftchar then
1287 post = serialize(true,leftchar)
1288 end
1289 setdisc(disc,pre,post,nil,regulardisc_code,hyphenpenalty)
1290 if attrnode then
1291 setattrlist(disc,attrnode)
1292 end
1293
1294 insertbefore(first,current,disc)
1295 elseif type(r) == "table" then
1296 local disc = new_disc()
1297 local pre = r[1]
1298 local post = r[2]
1299 local replace = r[3]
1300 local right = r[4] ~= false and rightchar
1301 local left = r[5] ~= false and leftchar
1302 if pre then
1303 if pre ~= "" then
1304 pre = serialize(pre,false,right)
1305 else
1306 pre = nil
1307 end
1308 end
1309 if post then
1310 if post ~= "" then
1311 post = serialize(post,left,false)
1312 else
1313 post = nil
1314 end
1315 end
1316 if replace then
1317 if replace ~= "" then
1318 replace = serialize(replace)
1319 else
1320 replace = nil
1321 end
1322 end
1323
1324 setdisc(disc,pre,post,replace,regulardisc_code,hyphenpenalty)
1325 if attrnode then
1326 setattrlist(disc,attrnode)
1327 end
1328 insertbefore(first,current,disc)
1329 else
1330 setchar(current,characters[r])
1331 if i < rsize then
1332 current = getnext(current)
1333 end
1334 end
1335 end
1336 if current and current ~= stop then
1337 local current = getnext(current)
1338 local last = getnext(stop)
1339 while current ~= last do
1340 first, current = remove_node(first,current,true)
1341 end
1342 end
1343
1344 end
1345
1346 local function inject(leftchar,rightchar,code,attrnode)
1347 if first ~= current then
1348 local disc = new_disc()
1349 first, current, glyph = remove_node(first,current)
1350 first, current = insertbefore(first,current,disc)
1351 if trace_visualize then
1352 setcolor(glyph,"darkred")
1353 setcolor(disc,"darkgreen")
1354 end
1355 local pre = nil
1356 local post = nil
1357 local replace = glyph
1358 if leftchar and leftchar > 0 then
1359 post = copy_node(glyph)
1360 setchar(post,leftchar)
1361 end
1362 pre = copy_node(glyph)
1363 setchar(pre,rightchar and rightchar > 0 and rightchar or code)
1364 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1365 if attrnode then
1366 setattrlist(disc,attrnode)
1367 end
1368 end
1369 return current
1370 end
1371
1372 local function injectseries(current,last,next,attrnode)
1373 local disc = new_disc()
1374 local start = current
1375 first, current = insertbefore(first,current,disc)
1376 setprev(start)
1377 setnext(last)
1378 if next then
1379 setlink(current,next)
1380 else
1381 setnext(current)
1382 end
1383 local pre = copylist(start)
1384 local post = nil
1385 local replace = start
1386 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1387 if attrnode then
1388 setattrlist(disc,attrnode)
1389 end
1390 return current
1391 end
1392
1393 local a = getattr(first,a_hyphenation)
1394 if a ~= attr then
1395 attr = synchronizefeatureset(a)
1396 end
1397
1398
1399
1400
1401
1402
1403 local skipping = false
1404
1405
1406
1407 while current and current ~= last do
1408 local code, id = isglyph(current)
1409 if code then
1410 if skipping then
1411 current = getnext(current)
1412 else
1413 local lang = getlanguage(current)
1414 local font = getfont(current)
1415 if lang ~= language or font ~= lastfont then
1416 if dictionary and size > charmin and leftmin + rightmin <= size then
1417
1418 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1419
1420 else
1421 local hyphens = hyphenated(dictionary,word,size)
1422 if hyphens then
1423 flush(hyphens)
1424 end
1425 end
1426 end
1427 lastfont = font
1428 if language ~= lang and lang > 0 then
1429
1430 dictionary = dictionaries[lang]
1431 instance = dictionary.instance
1432 characters = dictionary.characters
1433 unicodes = dictionary.unicodes
1434
1435 local a = getattr(current,a_hyphenation)
1436 attr = synchronizefeatureset(a)
1437 leftchar = leftchar or (instance and posthyphenchar (instance))
1438 rightchar = rightchar or (instance and prehyphenchar (instance))
1439 leftexchar = (instance and preexhyphenchar (instance))
1440 rightexchar = (instance and postexhyphenchar(instance))
1441 leftmin = leftcharmin or getfield(current,"left")
1442 rightmin = rightcharmin or getfield(current,"right")
1443 if not leftchar or leftchar < 0 then
1444 leftchar = false
1445 end
1446 if not rightchar or rightchar < 0 then
1447 rightchar = false
1448 end
1449
1450 local char = unicodes[code] or (extrachars and extrachars[code])
1451 if char then
1452 word[1] = char
1453 size = 1
1454 start = current
1455 else
1456 size = 0
1457 end
1458 else
1459 size = 0
1460 end
1461 language = lang
1462 elseif language <= 0 then
1463
1464 elseif size > 0 then
1465 local char = unicodes[code] or (extrachars and extrachars[code])
1466 if char then
1467 size = size + 1
1468 word[size] = char
1469 elseif dictionary then
1470 if not hyphenonly or code ~= exhyphenchar then
1471 if size > charmin and leftmin + rightmin <= size then
1472 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1473
1474 else
1475 local hyphens = hyphenated(dictionary,word,size)
1476 if hyphens then
1477 flush(hyphens)
1478 end
1479 end
1480 end
1481 end
1482 size = 0
1483 if code == exhyphenchar then
1484 local next = getnext(current)
1485 local last = current
1486 local font = getfont(current)
1487 while next and ischar(next,font) == code do
1488 last = next
1489 next = getnext(next)
1490 end
1491 if not autohyphen then
1492 current = last
1493 elseif current == last then
1494 current = inject(leftexchar,rightexchar,code,current)
1495 else
1496 current = injectseries(current,last,next,current)
1497 end
1498 if hyphenonly then
1499 skipping = true
1500 end
1501 elseif hyphenchars then
1502 local char = hyphenchars[code]
1503 if char == true then
1504 char = code
1505 end
1506 if char then
1507 current = inject(leftchar and char or nil,rightchar and char or nil,char,current)
1508 end
1509 end
1510 end
1511 else
1512 local a = getattr(current,a_hyphenation)
1513 if a ~= attr then
1514 attr = synchronizefeatureset(a)
1515 leftchar = leftchar or (instance and posthyphenchar (instance))
1516 rightchar = rightchar or (instance and prehyphenchar (instance))
1517 leftexchar = (instance and preexhyphenchar (instance))
1518 rightexchar = (instance and postexhyphenchar(instance))
1519 leftmin = leftcharmin or getfield(current,"left")
1520 rightmin = rightcharmin or getfield(current,"right")
1521 if not leftchar or leftchar < 0 then
1522 leftchar = false
1523 end
1524 if not rightchar or rightchar < 0 then
1525 rightchar = false
1526 end
1527 end
1528
1529 local char = unicodes[code] or (extrachars and extrachars[code])
1530 if char then
1531 word[1] = char
1532 size = 1
1533 start = current
1534 end
1535 end
1536 stop = current
1537 current = getnext(current)
1538 end
1539 else
1540 if skipping then
1541 skipping = false
1542 end
1543 if id == disc_code then
1544 size = 0
1545 current = getnext(current)
1546 if hyphenonly then
1547 skipping = true
1548 end
1549
1550
1551
1552 else
1553 current = id == math_code and getnext(endofmath(current)) or getnext(current)
1554 end
1555 if size > 0 then
1556 if dictionary and size > charmin and leftmin + rightmin <= size then
1557 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1558
1559 else
1560 local hyphens = hyphenated(dictionary,word,size)
1561 if hyphens then
1562 flush(hyphens)
1563 end
1564 end
1565 end
1566 size = 0
1567 end
1568 end
1569 end
1570
1571
1572 if dictionary and size > charmin and leftmin + rightmin <= size then
1573 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1574
1575 else
1576 local hyphens = hyphenated(dictionary,word,size)
1577 if hyphens then
1578 flush(hyphens)
1579 end
1580 end
1581 end
1582
1583 stoptiming(traditional)
1584
1585 return head
1586 end
1587
1588 statistics.register("hyphenation",function()
1589 if nofwords > 0 or statistics.elapsed(traditional) > 0 then
1590 return string.format("%s words hyphenated, %s unique, used time %s",
1591 nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
1592 end
1593 end)
1594
1595 local texmethod = "builders.kernel.hyphenation"
1596 local oldmethod = texmethod
1597 local newmethod = texmethod
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613 local hyphenate = lang.hyphenate
1614 local hyphenating = nuts.hyphenating
1615 local methods = { }
1616 local usedmethod = false
1617 local stack = { }
1618
1619 local original = hyphenating and
1620 function(head)
1621 return (hyphenating(head))
1622 end
1623 or
1624 function(head)
1625 hyphenate(tonode(head))
1626 return head
1627 end
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639 local getcount = tex.getcount
1640
1641 hyphenators.methods = methods
1642 local optimize = false
1643
1644 directives.register("hyphenator.optimize", function(v) optimize = v end)
1645
1646 function hyphenators.handler(head,groupcode)
1647 if usedmethod then
1648 if optimize and (groupcode == "hbox" or groupcode == "adjusted_hbox") then
1649 if getcount("hyphenstate") > 0 then
1650 forced = false
1651 return usedmethod(head)
1652 else
1653 return head
1654 end
1655 else
1656 return usedmethod(head)
1657 end
1658 else
1659 return head
1660 end
1661 end
1662
1663 methods.tex = original
1664 methods.original = original
1665 methods.expanded = original
1666 methods.traditional = languages.hyphenators.traditional.hyphenate
1667 methods.none = false
1668
1669 usedmethod = original
1670
1671 local function setmethod(method)
1672 usedmethod = type(method) == "string" and methods[method]
1673 if usedmethod == nil then
1674 usedmethod = methods.tex
1675 end
1676 end
1677 local function pushmethod(method)
1678 insert(stack,usedmethod)
1679 usedmethod = type(method) == "string" and methods[method]
1680 if usedmethod == nil then
1681 usedmethod = methods.tex
1682 end
1683 end
1684 local function popmethod()
1685 usedmethod = remove(stack) or methods.tex
1686 end
1687
1688 hyphenators.setmethod = setmethod
1689 hyphenators.pushmethod = pushmethod
1690 hyphenators.popmethod = popmethod
1691
1692 directives.register("hyphenators.method",setmethod)
1693
1694 function hyphenators.setup(specification)
1695 local method = specification.method
1696 if method then
1697 setmethod(method)
1698 end
1699 end
1700
1701 implement { name = "sethyphenationmethod", actions = setmethod, arguments = "string" }
1702 implement { name = "pushhyphenation", actions = pushmethod, arguments = "string" }
1703 implement { name = "pophyphenation", actions = popmethod }
1704
1705
1706
1707 local context = context
1708 local ctx_NC = context.NC
1709 local ctx_NR = context.NR
1710 local ctx_verbatim = context.verbatim
1711
1712 function hyphenators.showhyphenationtrace(language,word)
1713 if not word or word == "" then
1714 return
1715 end
1716 local saved = trace_steps
1717 trace_steps = "silent"
1718 local steps = traditional.gettrace(language,word)
1719 trace_steps = saved
1720 if steps then
1721 local n = #steps
1722 if n > 0 then
1723 context.starttabulate { "|r|l|l|l|" }
1724 for i=1,n do
1725 local s = steps[i]
1726 ctx_NC() if i > 1 and i < n then context(i-1) end
1727 ctx_NC() ctx_verbatim(s[1])
1728 ctx_NC() ctx_verbatim(s[2])
1729 ctx_NC() ctx_verbatim(s[3])
1730 ctx_NC()
1731 ctx_NR()
1732 end
1733 context.stoptabulate()
1734 end
1735 end
1736 end
1737
1738 implement {
1739 name = "showhyphenationtrace",
1740 actions = hyphenators.showhyphenationtrace,
1741 arguments = "2 strings",
1742 }
1743
1744 function nodes.stripdiscretionaries(head)
1745 for l in nexthlist, head do
1746 for d in nextdisc, getlist(l) do
1747 remove_node(h,false,true)
1748 end
1749 end
1750 return head
1751 end
1752
1753
1754else
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805end
1806
1807 |