1if not modules then modules = { } end modules ['lang-hyp'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79local type, rawget, rawset, tonumber, next = type, rawget, rawset, tonumber, next
80
81local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
82local lpegmatch = lpeg.match
83
84local context = context
85
86local concat = table.concat
87local insert = table.insert
88local remove = table.remove
89local formatters = string.formatters
90local utfchar = utf.char
91local utfbyte = utf.byte
92
93if not characters then
94 require("char-ini")
95end
96
97local setmetatableindex = table.setmetatableindex
98
99
100
101local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
102local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
103
104local report = logs.reporter("hyphenator")
105
106local implement = interfaces and interfaces.implement or function() end
107
108languages = languages or { }
109local hyphenators = languages.hyphenators or { }
110languages.hyphenators = hyphenators
111local traditional = hyphenators.traditional or { }
112hyphenators.traditional = traditional
113
114local dictionaries = setmetatableindex(function(t,k)
115 local v = {
116 patterns = { },
117 hyphenated = { },
118 specials = { },
119 exceptions = { },
120 loaded = false,
121 }
122 t[k] = v
123 return v
124end)
125
126hyphenators.dictionaries = dictionaries
127
128local character = lpeg.patterns.utf8character
129local digit = R("09")
130local weight = digit/tonumber + Cc(0)
131local fence = P(".")
132local hyphen = P("-")
133local space = P(" ")
134local char = character - space
135local validcharacter = (character - S("./"))
136local keycharacter = character - S("/")
137
138local specpart = (P("/") * Cf ( Ct("") *
139 Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
140 Cg ( Cc("after") * C((1-P(","))^1) ) *
141 ( P(",") *
142 Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
143 Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
144 )^-1
145 , rawset))^-1
146
147local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
148
149local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
150local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1)
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166local make_pattern_c = Ct((P(1)/tonumber)^1)
167
168
169
170local cache = setmetatableindex(function(t,k)
171 local v = lpegmatch(make_pattern_c,k)
172 t[k] = v
173 return v
174end)
175
176local weight_n = digit + Cc("0")
177local fence_n = fence / "0"
178local char_n = validcharacter / ""
179local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
180local make_pattern_n = basepart_n * specpart
181
182local function register_pattern(patterns,specials,str,specification)
183 local k = lpegmatch(make_hashkey_p,str)
184
185 local v1, v2 = lpegmatch(make_pattern_n,str)
186 patterns[k] = v1
187 if specification then
188 specials[k] = specification
189 elseif v2 then
190 specials[k] = v2
191 end
192end
193
194local function unregister_pattern(patterns,specials,str)
195 local k = lpegmatch(make_hashkey_p,str)
196 patterns[k] = nil
197 specials[k] = nil
198end
199
200local p_lower = lpeg.patterns.utf8lower
201
202local function register_exception(exceptions,str,specification)
203 local l = lpegmatch(p_lower,str)
204 local k = lpegmatch(make_hashkey_e,l)
205 local v = lpegmatch(make_pattern_e,l)
206 exceptions[k] = v
207end
208
209local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
210local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
211local p_split = Ct(C(character)^1)
212
213function traditional.loadpatterns(language,filename)
214 local dictionary = dictionaries[language]
215 if not dictionary.loaded then
216 if not filename or filename == "" then
217 filename = "lang-" .. language
218 end
219 filename = file.addsuffix(filename,"lua")
220 local fullname = resolvers.findfile(filename)
221 if fullname and fullname ~= "" then
222 local specification = dofile(fullname)
223 if specification then
224 local patterns = specification.patterns
225 if patterns then
226 local data = patterns.data
227 if data and data ~= "" then
228 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
229 end
230 end
231 local exceptions = specification.exceptions
232 if exceptions then
233 local data = exceptions.data
234 if data and data ~= "" then
235 lpegmatch(p_exception,data,1,dictionary.exceptions)
236 end
237 end
238 dictionary.lefthyphenmin = patterns.lefthyphenmin
239 dictionary.righthyphenmin = patterns.righthyphenmin
240 end
241 end
242 dictionary.loaded = true
243 end
244 return dictionary
245end
246
247local lcchars = characters.lcchars
248local uccodes = characters.uccodes
249local categories = characters.categories
250local nofwords = 0
251local nofhashed = 0
252
253local steps = nil
254local f_show = formatters["%w%s"]
255
256local function show_log()
257 if trace_steps == true then
258 report()
259 local w = #steps[1][1]
260 for i=1,#steps do
261 local s = steps[i]
262 report("%s%w%S %S",s[1],w - #s[1] + 3,s[2] or s[1],s[3] or "")
263 end
264 report()
265 end
266end
267
268local function show_1(wsplit)
269 local u = concat(wsplit," ")
270 steps = { { f_show(0,u), f_show(0,u) } }
271end
272
273local function show_2(c,m,wsplit,done,i,spec)
274 local s = lpegmatch(p_split,c)
275 local t = { }
276 local n = #m
277 local w = #wsplit
278 for j=1,n do
279 t[#t+1] = m[j]
280 t[#t+1] = s[j]
281 end
282 local m = 2*i-2
283 local l = #t
284 local s = spec and table.sequenced(spec) or ""
285 if m == 0 then
286 steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
287 elseif i+1 == w then
288 steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
289 else
290 steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
291 end
292end
293
294local function show_3(wsplit,done)
295 local t = { }
296 local h = { }
297 local n = #wsplit
298 for i=1,n do
299 local w = wsplit[i]
300 if i > 1 then
301 local d = done[i]
302 t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
303 h[#h+1] = d
304 end
305 t[#t+1] = w
306 h[#h+1] = w
307 end
308 steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
309 show_log()
310end
311
312local function show_4(wsplit,done)
313 steps = { { concat(wsplit," ") } }
314 show_log()
315end
316
317function traditional.lasttrace()
318 return steps
319end
320
321
322
323
324
325
326
327
328
329
330local function hyphenate(dictionary,word,n)
331 nofwords = nofwords + 1
332 local hyphenated = dictionary.hyphenated
333 local isstring = type(word) == "string"
334 if isstring then
335 local done = hyphenated[word]
336 if done ~= nil then
337 return done
338 end
339 elseif n then
340 local done = hyphenated[concat(word,"",1,n)]
341 if done ~= nil then
342 return done
343 end
344 else
345 local done = hyphenated[concat(word)]
346 if done ~= nil then
347 return done
348 end
349 end
350 local key
351 if isstring then
352 key = word
353 word = lpegmatch(p_split,word)
354 if not n then
355 n = #word
356 end
357 else
358 if not n then
359 n = #word
360 end
361 key = concat(word,"",1,n)
362 end
363 local l = 1
364 local w = { "." }
365
366 for i=1,n do
367 local c = word[i]
368
369 l = l + 1
370 w[l] = lcchars[c] or c
371 end
372 l = l + 1
373 w[l] = "."
374 local c = concat(w,"",2,l-1)
375
376 local done = hyphenated[c]
377 if done ~= nil then
378 hyphenated[key] = done
379 nofhashed = nofhashed + 1
380 return done
381 end
382
383 local exceptions = dictionary.exceptions
384 local exception = exceptions[c]
385 if exception then
386 if trace_steps then
387 show_4(w,exception)
388 end
389 hyphenated[key] = exception
390 nofhashed = nofhashed + 1
391 return exception
392 end
393
394 if trace_steps then
395 show_1(w)
396 end
397
398 local specials = dictionary.specials
399 local patterns = dictionary.patterns
400
401 local spec
402 for i=1,l do
403 for j=i,l do
404 local c = concat(w,"",i,j)
405 local m = patterns[c]
406 if m then
407 local s = specials[c]
408 if not done then
409 done = { }
410 spec = nil
411
412
413 for i=1,l do
414 done[i] = 0
415 end
416 end
417
418
419 for k=1,#m do
420 local new = m[k]
421 if not new then
422 break
423 elseif new == true then
424 report("fatal error")
425 break
426 elseif new > 0 then
427 local pos = i + k - 1
428 local old = done[pos]
429 if not old then
430
431 elseif new > old then
432 done[pos] = new
433 if s then
434 local b = i + (s.start or 1) - 1
435 if b > 0 then
436 local e = b + (s.length or 2) - 1
437 if e > 0 then
438 if pos >= b and pos <= e then
439 if spec then
440 spec[pos] = { s, k - 1 }
441 else
442 spec = { [pos] = { s, k - 1 } }
443 end
444 end
445 end
446 end
447 end
448 end
449 end
450 end
451 if trace_steps and done then
452 show_2(c,m,w,done,i,s)
453 end
454 end
455 end
456 end
457 if trace_steps and done then
458 show_3(w,done)
459 end
460 if done then
461 local okay = false
462 for i=3,#done do
463 if done[i] % 2 == 1 then
464 done[i-2] = spec and spec[i] or true
465 okay = true
466 else
467 done[i-2] = false
468 end
469 end
470 if okay then
471 done[#done] = nil
472 done[#done] = nil
473 else
474 done = false
475 end
476 else
477 done = false
478 end
479 hyphenated[key] = done
480 nofhashed = nofhashed + 1
481 return done
482end
483
484function traditional.gettrace(language,word)
485 if not word or word == "" then
486 return
487 end
488 local dictionary = dictionaries[language]
489 if dictionary then
490 local hyphenated = dictionary.hyphenated
491 hyphenated[word] = nil
492 hyphenate(dictionary,word)
493 return steps
494 end
495end
496
497local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
498
499function traditional.installmethod(name,f)
500 if rawget(methods,name) then
501 report("overloading %a is not permitted",name)
502 else
503 methods[name] = f
504 end
505end
506
507local s_detail_1 = "-"
508local f_detail_2 = formatters["%s-%s"]
509local f_detail_3 = formatters["{%s}{%s}{}"]
510local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
511
512function traditional.injecthyphens(dictionary,word,specification)
513 if not word then
514 return false
515 end
516 if not specification then
517 return word
518 end
519 local hyphens = hyphenate(dictionary,word)
520 if not hyphens then
521 return word
522 end
523
524
525
526
527 local word = lpegmatch(p_split,word)
528 local size = #word
529
530 local leftmin = specification.leftcharmin or 2
531 local rightmin = size - (specification.rightcharmin or leftmin)
532 local leftchar = specification.leftchar
533 local rightchar = specification.rightchar
534
535 local result = { }
536 local rsize = 0
537 local position = 1
538
539 while position <= size do
540 if position >= leftmin and position <= rightmin then
541 local hyphen = hyphens[position]
542 if not hyphen then
543 rsize = rsize + 1
544 result[rsize] = word[position]
545 position = position + 1
546 elseif hyphen == true then
547 rsize = rsize + 1
548 result[rsize] = word[position]
549 rsize = rsize + 1
550 if leftchar and rightchar then
551 result[rsize] = f_detail_3(rightchar,leftchar)
552 else
553 result[rsize] = s_detail_1
554 end
555 position = position + 1
556 else
557 local o, h = hyphen[2]
558 if o then
559 h = hyphen[1]
560 else
561 h = hyphen
562 o = 1
563 end
564 local b = position - o + (h.start or 1)
565 local e = b + (h.length or 2) - 1
566 if b > 0 and e >= b then
567 for i=1,b-position do
568 rsize = rsize + 1
569 result[rsize] = word[position]
570 position = position + 1
571 end
572 rsize = rsize + 1
573 if leftchar and rightchar then
574 result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
575 else
576 result[rsize] = f_detail_2(h.before,h.after)
577 end
578 position = e + 1
579 else
580
581 rsize = rsize + 1
582 result[rsize] = word[position]
583 position = position + 1
584 end
585 end
586 else
587 rsize = rsize + 1
588 result[rsize] = word[position]
589 position = position + 1
590 end
591 end
592 return concat(result)
593end
594
595do
596
597 local word = C((1-space)^1)
598 local spaces = space^1
599
600 local u_pattern = (Carg(1) * Carg(2) * word / unregister_pattern + spaces)^1
601 local r_pattern = (Carg(1) * Carg(2) * word * Carg(3) / register_pattern + spaces)^1
602 local e_pattern = (Carg(1) * word / register_exception + spaces)^1
603
604 function traditional.registerpattern(language,str,specification)
605 local dictionary = dictionaries[language]
606 if specification == false then
607 lpegmatch(u_pattern,str,1,dictionary.patterns,dictionary.specials)
608
609 else
610 lpegmatch(r_pattern,str,1,dictionary.patterns,dictionary.specials,type(specification) == "table" and specification or false)
611
612 end
613 end
614
615 function traditional.registerexception(language,str)
616 lpegmatch(e_pattern,str,1,dictionaries[language].exceptions)
617 end
618
619end
620
621
622
623if context then
624
625 local nodecodes = nodes.nodecodes
626 local disccodes = nodes.disccodes
627
628 local glyph_code <const> = nodecodes.glyph
629 local disc_code <const> = nodecodes.disc
630 local math_code <const> = nodecodes.math
631 local hlist_code <const> = nodecodes.hlist
632
633 local automaticdisc_code <const> = disccodes.automatic
634 local regulardisc_code <const> = disccodes.regular
635
636 local nuts = nodes.nuts
637 local tonode = nodes.tonode
638 local nodepool = nuts.pool
639
640 local new_disc = nodepool.disc
641 local new_penalty = nodepool.penalty
642
643 local getfield = nuts.getfield
644 local getfont = nuts.getfont
645 local getid = nuts.getid
646 local getattr = nuts.getattr
647 local getnext = nuts.getnext
648 local getprev = nuts.getprev
649 local getsubtype = nuts.getsubtype
650 local getlist = nuts.getlist
651 local getlanguage = nuts.getlanguage
652 local setattrlist = nuts.setattrlist
653 local isglyph = nuts.isglyph
654 local ischar = nuts.ischar
655
656 local setchar = nuts.setchar
657 local setdisc = nuts.setdisc
658 local setlink = nuts.setlink
659 local setprev = nuts.setprev
660 local setnext = nuts.setnext
661
662 local insertbefore = nuts.insertbefore
663 local insertafter = nuts.insertafter
664 local copy_node = nuts.copy
665 local copylist = nuts.copylist
666 local remove_node = nuts.remove
667 local endofmath = nuts.endofmath
668 local node_tail = nuts.tail
669
670 local nexthlist = nuts.traversers.hlist
671 local nextdisc = nuts.traversers.disc
672
673 local setcolor = nodes.tracers.colors.set
674
675 local variables = interfaces.variables
676 local v_reset <const> = variables.reset
677 local v_yes <const> = variables.yes
678 local v_word <const> = variables.word
679 local v_all <const> = variables.all
680
681 local settings_to_array = utilities.parsers.settings_to_array
682
683 local texsetattribute = tex.setattribute
684
685 local prehyphenchar = language.prehyphenchar
686 local posthyphenchar = language.posthyphenchar
687 local preexhyphenchar = language.preexhyphenchar
688 local postexhyphenchar = language.postexhyphenchar
689
690 local a_hyphenation <const> = attributes.private("hyphenation")
691 local unsetvalue <const> = attributes.unsetvalue
692
693 local interwordpenalty = 5000
694
695 function traditional.loadpatterns(language)
696 return dictionaries[language]
697 end
698
699
700
701 setmetatableindex(dictionaries,function(t,k)
702 if type(k) == "string" then
703
704
705 languages.getnumber(k)
706 end
707 local specification = languages.getdata(k)
708 local dictionary = {
709 patterns = { },
710 exceptions = { },
711 hyphenated = { },
712 specials = { },
713 instance = false,
714 characters = { },
715 unicodes = { },
716 }
717 if specification then
718 local resources = specification.resources
719 if resources then
720 local characters = dictionary.characters or { }
721 local unicodes = dictionary.unicodes or { }
722 for i=1,#resources do
723 local r = resources[i]
724 if not r.in_dictionary then
725 r.in_dictionary = true
726 local patterns = r.patterns
727 if patterns then
728 local data = patterns.data
729 if data then
730
731 lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
732 end
733 local extra = patterns.extra
734 if extra then
735
736 lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
737 end
738 end
739 local exceptions = r.exceptions
740 if exceptions then
741 local data = exceptions.data
742 if data and data ~= "" then
743 lpegmatch(p_exception,data,1,dictionary.exceptions)
744 end
745 end
746 local usedchars = lpegmatch(p_split,patterns.characters)
747 for i=1,#usedchars do
748 local char = usedchars[i]
749 local code = utfbyte(char)
750 local upper = uccodes[code]
751 characters[char] = code
752 unicodes [code] = char
753 if type(upper) == "table" then
754 for i=1,#upper do
755 local u = upper[i]
756 unicodes[u] = utfchar(u)
757 end
758 else
759 unicodes[upper] = utfchar(upper)
760 end
761 end
762 end
763 end
764 dictionary.characters = characters
765 dictionary.unicodes = unicodes
766 setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
767 end
768 t[specification.number] = dictionary
769 dictionary.instance = specification.instance
770 end
771 t[k] = dictionary
772 return dictionary
773 end)
774
775
776
777
778
779
780
781
782
783
784 local featuresets = hyphenators.featuresets or { }
785 hyphenators.featuresets = featuresets
786
787 storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
788
789 local noffeaturesets = storage.shared.noflanguagesfeaturesets
790
791 storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
792
793
794
795 local function register(name,featureset)
796 noffeaturesets = noffeaturesets + 1
797 featureset.attribute = noffeaturesets
798 featuresets[noffeaturesets] = featureset
799 featuresets[name] = featureset
800 storage.shared.noflanguagesfeaturesets = noffeaturesets
801 return noffeaturesets
802 end
803
804 local function makeset(...)
805
806 local set = { }
807 for i=1,select("#",...) do
808 local list = select(i,...)
809 local kind = type(list)
810 local used = nil
811 if kind == "string" then
812 if list == v_all then
813
814 return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
815 elseif list ~= "" then
816 used = lpegmatch(p_split,list)
817 set = set or { }
818 for i=1,#used do
819 local char = used[i]
820 set[utfbyte(char)] = char
821 end
822 end
823 elseif kind == "table" then
824 if next(list) then
825 set = set or { }
826 for byte, char in next, list do
827 set[byte] = char == true and utfchar(byte) or char
828 end
829 elseif #list > 0 then
830 set = set or { }
831 for i=1,#list do
832 local l = list[i]
833 if type(l) == "number" then
834 set[l] = utfchar(l)
835 else
836 set[utfbyte(l)] = l
837 end
838 end
839 end
840 end
841 end
842 return set
843 end
844
845
846
847 local defaulthyphens = {
848 [0x002D] = true,
849 [0x00AD] = 0x002D,
850
851
852
853 [0x2010] = true,
854
855
856 [0x2013] = true,
857 [0x2014] = true,
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873 }
874
875 local defaultjoiners = {
876 [0x200C] = true,
877 [0x200D] = true,
878 }
879
880 local function somehyphenchar(c)
881 c = tonumber(c)
882 return c ~= 0 and c or nil
883 end
884
885 local function definefeatures(name,featureset)
886 local extrachars = featureset.characters
887 local hyphenchars = featureset.hyphens
888 local joinerchars = featureset.joiners
889 local alternative = featureset.alternative
890 local rightwordmin = tonumber(featureset.rightwordmin)
891 local charmin = tonumber(featureset.charmin)
892 local leftcharmin = tonumber(featureset.leftcharmin)
893 local rightcharmin = tonumber(featureset.rightcharmin)
894 local leftchar = somehyphenchar(featureset.leftchar)
895 local rightchar = somehyphenchar(featureset.rightchar)
896 local rightchars = featureset.rightchars
897local rightedge = featureset.rightedge
898local autohyphen = v_yes
899local hyphenonly = v_yes
900 rightchars = rightchars == v_word and true or tonumber(rightchars)
901 joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
902 hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
903
904 featureset.extrachars = makeset(joinerchars or "",extrachars or "")
905 featureset.hyphenchars = makeset(hyphenchars or "")
906 featureset.alternative = alternative or "hyphenate"
907 featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
908 featureset.charmin = charmin and charmin > 0 and charmin or nil
909 featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
910 featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
911 featureset.rightchars = rightchars
912 featureset.leftchar = leftchar
913 featureset.rightchar = rightchar
914
915featureset.autohyphen = autohyphen == v_yes
916featureset.hyphenonly = hyphenonly == v_yes
917 return register(name,featureset)
918 end
919
920 local function setfeatures(n)
921 if not n or n == v_reset then
922 n = false
923 else
924 local f = featuresets[n]
925 if not f and type(n) == "string" then
926 local t = settings_to_array(n)
927 local s = { }
928 for i=1,#t do
929 local ti = t[i]
930 local fs = featuresets[ti]
931 if fs then
932 for k, v in next, fs do
933 s[k] = v
934 end
935 end
936 end
937 n = register(n,s)
938 else
939 n = f and f.attribute
940 end
941 end
942 texsetattribute(a_hyphenation,n or unsetvalue)
943 end
944
945 traditional.definefeatures = definefeatures
946 traditional.setfeatures = setfeatures
947
948 implement {
949 name = "definehyphenationfeatures",
950 actions = definefeatures,
951 arguments = {
952 "string",
953 {
954 { "characters" },
955 { "hyphens" },
956 { "joiners" },
957 { "rightchars" },
958 { "rightwordmin", "integer" },
959 { "charmin", "integer" },
960 { "leftcharmin", "integer" },
961 { "rightcharmin", "integer" },
962 { "leftchar", "integer" },
963 { "rightchar", "integer" },
964 { "alternative" },
965 { "rightedge" },
966 }
967 }
968 }
969
970 implement {
971 name = "sethyphenationfeatures",
972 actions = setfeatures,
973 arguments = "string"
974 }
975
976 implement {
977 name = "registerhyphenationpattern",
978 actions = traditional.registerpattern,
979 arguments = { "string", "string", "boolean" }
980 }
981
982 implement {
983 name = "registerhyphenationexception",
984 actions = traditional.registerexception,
985 arguments = "2 strings",
986 }
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011 local starttiming = statistics.starttiming
1012 local stoptiming = statistics.stoptiming
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031 function traditional.hyphenate(head)
1032
1033 local first = head
1034 local tail = nil
1035 local last = nil
1036 local current = first
1037 local dictionary = nil
1038 local instance = nil
1039 local characters = nil
1040 local unicodes = nil
1041 local exhyphenchar = tex.exhyphenchar
1042 local extrachars = nil
1043 local hyphenchars = nil
1044 local language = nil
1045 local lastfont = nil
1046 local start = nil
1047 local stop = nil
1048 local word = { }
1049 local size = 0
1050
1051
1052 local leftexchar = false
1053 local rightexchar = false
1054 local leftmin = 0
1055 local rightmin = 0
1056 local charmin = 1
1057 local leftcharmin = nil
1058 local rightcharmin = nil
1059
1060 local rightwordmin = nil
1061 local rightchars = nil
1062 local leftchar = nil
1063 local rightchar = nil
1064 local attr = nil
1065 local lastwordlast = nil
1066 local hyphenated = hyphenate
1067
1068 local exhyphenpenalty = tex.exhyphenpenalty
1069 local hyphenpenalty = tex.hyphenpenalty
1070 local autohyphen = false
1071 local hyphenonly = false
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084 starttiming(traditional)
1085
1086 local function insertpenalty()
1087 local p = new_penalty(interwordpenalty)
1088 setattrlist(p,last)
1089 if trace_visualize then
1090 nuts.setvisual(p,"penalty")
1091 end
1092 last = getprev(last)
1093 first, last = insertafter(first,last,p)
1094 end
1095
1096 local function synchronizefeatureset(a)
1097 local f = a and featuresets[a]
1098 if f then
1099 hyphenated = methods[f.alternative or "hyphenate"]
1100 extrachars = f.extrachars
1101 hyphenchars = f.hyphenchars
1102 rightwordmin = f.rightwordmin
1103 charmin = f.charmin
1104 leftcharmin = f.leftcharmin
1105 rightcharmin = f.rightcharmin
1106 leftchar = f.leftchar
1107 rightchar = f.rightchar
1108
1109 rightchars = f.rightchars
1110 autohyphen = f.autohyphen
1111 hyphenonly = f.hyphenonly
1112 if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
1113
1114 if not tail then
1115 tail = node_tail(first)
1116 end
1117 last = tail
1118 local inword = false
1119 local count = 0
1120 while last and rightwordmin > 0 do
1121 local id = getid(last)
1122 if id == glyph_code then
1123 count = count + 1
1124 inword = true
1125 if trace_visualize then
1126 setcolor(last,"darkgreen")
1127 end
1128 elseif inword then
1129 inword = false
1130 rightwordmin = rightwordmin - 1
1131 if rightchars == true then
1132 if rightwordmin > 0 then
1133 insertpenalty()
1134 end
1135 elseif rightchars and count <= rightchars then
1136 insertpenalty()
1137 end
1138 end
1139 last = getprev(last)
1140 end
1141 lastwordlast = rightwordmin
1142 end
1143 if not charmin or charmin == 0 then
1144 charmin = 1
1145 end
1146 else
1147 hyphenated = methods.hyphenate
1148 extrachars = false
1149 hyphenchars = false
1150 rightwordmin = false
1151 charmin = 1
1152 leftcharmin = false
1153 rightcharmin = false
1154 leftchar = false
1155 rightchar = false
1156
1157 autohyphen = false
1158 hyphenonly = false
1159 end
1160
1161 return a
1162 end
1163
1164 local function flush(hyphens)
1165
1166 local rightmin = size - rightmin
1167 local result = { }
1168 local rsize = 0
1169 local position = 1
1170
1171
1172
1173
1174
1175
1176
1177 while position <= size do
1178 if position >= leftmin and position <= rightmin then
1179 local hyphen = hyphens[position]
1180 if not hyphen then
1181 rsize = rsize + 1
1182 result[rsize] = word[position]
1183 position = position + 1
1184 elseif hyphen == true then
1185 rsize = rsize + 1
1186 result[rsize] = word[position]
1187 rsize = rsize + 1
1188 result[rsize] = true
1189 position = position + 1
1190 else
1191 local o, h = hyphen[2]
1192 if o then
1193
1194 h = hyphen[1]
1195 else
1196
1197 h = hyphen
1198 o = 1
1199 end
1200 local b = position - o + (h.start or 1)
1201 local e = b + (h.length or 2) - 1
1202 if b > 0 and e >= b then
1203 for i=1,b-position do
1204 rsize = rsize + 1
1205 result[rsize] = word[position]
1206 position = position + 1
1207 end
1208 rsize = rsize + 1
1209 result[rsize] = {
1210 h.before or "",
1211 h.after or "",
1212 concat(word,"",b,e),
1213 h.right,
1214 h.left,
1215 }
1216 position = e + 1
1217 else
1218
1219 rsize = rsize + 1
1220 result[rsize] = word[position]
1221 position = position + 1
1222 end
1223 end
1224 else
1225 rsize = rsize + 1
1226 result[rsize] = word[position]
1227 position = position + 1
1228 end
1229 end
1230
1231 local function serialize(replacement,leftchar,rightchar)
1232 if not replacement then
1233 return
1234 elseif replacement == true then
1235 local glyph = copy_node(stop)
1236 setchar(glyph,leftchar or rightchar)
1237 return glyph
1238 end
1239 local head = nil
1240 local current = nil
1241 if leftchar then
1242 head = copy_node(stop)
1243 current = head
1244 setchar(head,leftchar)
1245 end
1246 local rsize = #replacement
1247 if rsize == 1 then
1248 local glyph = copy_node(stop)
1249 setchar(glyph,characters[replacement])
1250 if head then
1251 insertafter(current,current,glyph)
1252 else
1253 head = glyph
1254 end
1255 current = glyph
1256 elseif rsize > 0 then
1257 local list = lpegmatch(p_split,replacement)
1258 for i=1,#list do
1259 local glyph = copy_node(stop)
1260 setchar(glyph,characters[list[i]])
1261 if head then
1262 insertafter(current,current,glyph)
1263 else
1264 head = glyph
1265 end
1266 current = glyph
1267 end
1268 end
1269 if rightchar then
1270 local glyph = copy_node(stop)
1271 insertafter(current,current,glyph)
1272 setchar(glyph,rightchar)
1273 end
1274 return head
1275 end
1276
1277 local current = start
1278 local attrnode = start
1279
1280 for i=1,rsize do
1281 local r = result[i]
1282 if r == true then
1283 local disc = new_disc()
1284 local pre = nil
1285 local post = nil
1286 if rightchar then
1287 pre = serialize(true,rightchar)
1288 end
1289 if leftchar then
1290 post = serialize(true,leftchar)
1291 end
1292 setdisc(disc,pre,post,nil,regulardisc_code,hyphenpenalty)
1293 if attrnode then
1294 setattrlist(disc,attrnode)
1295 end
1296
1297 insertbefore(first,current,disc)
1298 elseif type(r) == "table" then
1299 local disc = new_disc()
1300 local pre = r[1]
1301 local post = r[2]
1302 local replace = r[3]
1303 local right = r[4] ~= false and rightchar
1304 local left = r[5] ~= false and leftchar
1305 if pre then
1306 if pre ~= "" then
1307 pre = serialize(pre,false,right)
1308 else
1309 pre = nil
1310 end
1311 end
1312 if post then
1313 if post ~= "" then
1314 post = serialize(post,left,false)
1315 else
1316 post = nil
1317 end
1318 end
1319 if replace then
1320 if replace ~= "" then
1321 replace = serialize(replace)
1322 else
1323 replace = nil
1324 end
1325 end
1326
1327 setdisc(disc,pre,post,replace,regulardisc_code,hyphenpenalty)
1328 if attrnode then
1329 setattrlist(disc,attrnode)
1330 end
1331 insertbefore(first,current,disc)
1332 else
1333 setchar(current,characters[r])
1334 if i < rsize then
1335 current = getnext(current)
1336 end
1337 end
1338 end
1339 if current and current ~= stop then
1340 local current = getnext(current)
1341 local last = getnext(stop)
1342 while current ~= last do
1343 first, current = remove_node(first,current,true)
1344 end
1345 end
1346 end
1347
1348 local function inject(leftchar,rightchar,code,attrnode)
1349 if first ~= current then
1350 local disc = new_disc()
1351 first, current, glyph = remove_node(first,current)
1352 first, current = insertbefore(first,current,disc)
1353 if trace_visualize then
1354 setcolor(glyph,"darkred")
1355 setcolor(disc,"darkgreen")
1356 end
1357 local pre = nil
1358 local post = nil
1359 local replace = glyph
1360 if leftchar and leftchar > 0 then
1361 post = copy_node(glyph)
1362 setchar(post,leftchar)
1363 end
1364 pre = copy_node(glyph)
1365 setchar(pre,rightchar and rightchar > 0 and rightchar or code)
1366 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1367 if attrnode then
1368 setattrlist(disc,attrnode)
1369 end
1370 end
1371 return current
1372 end
1373
1374 local function injectseries(current,last,next,attrnode)
1375 local disc = new_disc()
1376 local start = current
1377 first, current = insertbefore(first,current,disc)
1378 setprev(start)
1379 setnext(last)
1380 if next then
1381 setlink(current,next)
1382 else
1383 setnext(current)
1384 end
1385 local pre = copylist(start)
1386 local post = nil
1387 local replace = start
1388 setdisc(disc,pre,post,replace,automaticdisc_code,hyphenpenalty)
1389 if attrnode then
1390 setattrlist(disc,attrnode)
1391 end
1392 return current
1393 end
1394
1395 local a = getattr(first,a_hyphenation)
1396 if a ~= attr then
1397 attr = synchronizefeatureset(a)
1398 end
1399
1400
1401
1402
1403
1404
1405 local skipping = false
1406
1407
1408
1409 while current and current ~= last do
1410 local code, id = isglyph(current)
1411 if code then
1412 if skipping then
1413 current = getnext(current)
1414 else
1415 local lang = getlanguage(current)
1416 local font = getfont(current)
1417 if lang ~= language or font ~= lastfont then
1418 if dictionary and size > charmin and leftmin + rightmin <= size then
1419
1420 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1421
1422 else
1423 local hyphens = hyphenated(dictionary,word,size)
1424 if hyphens then
1425 flush(hyphens)
1426 end
1427 end
1428 end
1429 lastfont = font
1430 if language ~= lang and lang > 0 then
1431
1432 dictionary = dictionaries[lang]
1433 instance = dictionary.instance
1434 characters = dictionary.characters
1435 unicodes = dictionary.unicodes
1436
1437 local a = getattr(current,a_hyphenation)
1438 attr = synchronizefeatureset(a)
1439 leftchar = leftchar or (instance and posthyphenchar (instance))
1440 rightchar = rightchar or (instance and prehyphenchar (instance))
1441 leftexchar = (instance and preexhyphenchar (instance))
1442 rightexchar = (instance and postexhyphenchar(instance))
1443 leftmin = leftcharmin or getfield(current,"lhmin")
1444 rightmin = rightcharmin or getfield(current,"rhmin")
1445 if not leftchar or leftchar < 0 then
1446 leftchar = false
1447 end
1448 if not rightchar or rightchar < 0 then
1449 rightchar = false
1450 end
1451
1452 local char = unicodes[code] or (extrachars and extrachars[code])
1453 if char then
1454 word[1] = char
1455 size = 1
1456 start = current
1457 else
1458 size = 0
1459 end
1460 else
1461 size = 0
1462 end
1463 language = lang
1464 elseif language <= 0 then
1465
1466 elseif size > 0 then
1467 local char = unicodes[code] or (extrachars and extrachars[code])
1468 if char then
1469 size = size + 1
1470 word[size] = char
1471 elseif dictionary then
1472 if not hyphenonly or code ~= exhyphenchar then
1473 if size > charmin and leftmin + rightmin <= size then
1474 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1475
1476 else
1477 local hyphens = hyphenated(dictionary,word,size)
1478 if hyphens then
1479 flush(hyphens)
1480 end
1481 end
1482 end
1483 end
1484 size = 0
1485 if code == exhyphenchar then
1486 local next = getnext(current)
1487 local last = current
1488 local font = getfont(current)
1489 while next and ischar(next,font) == code do
1490 last = next
1491 next = getnext(next)
1492 end
1493 if not autohyphen then
1494 current = last
1495 elseif current == last then
1496 current = inject(leftexchar,rightexchar,code,current)
1497 else
1498 current = injectseries(current,last,next,current)
1499 end
1500 if hyphenonly then
1501 skipping = true
1502 end
1503 elseif hyphenchars then
1504 local char = hyphenchars[code]
1505 if char == true then
1506 char = code
1507 end
1508 if char then
1509 current = inject(leftchar and char or nil,rightchar and char or nil,char,current)
1510 end
1511 end
1512 end
1513 else
1514 local a = getattr(current,a_hyphenation)
1515 if a ~= attr then
1516 attr = synchronizefeatureset(a)
1517 leftchar = leftchar or (instance and posthyphenchar (instance))
1518 rightchar = rightchar or (instance and prehyphenchar (instance))
1519 leftexchar = (instance and preexhyphenchar (instance))
1520 rightexchar = (instance and postexhyphenchar(instance))
1521 leftmin = leftcharmin or getfield(current,"lhmin")
1522 rightmin = rightcharmin or getfield(current,"rhmin")
1523 if not leftchar or leftchar < 0 then
1524 leftchar = false
1525 end
1526 if not rightchar or rightchar < 0 then
1527 rightchar = false
1528 end
1529 end
1530
1531 local char = unicodes[code] or (extrachars and extrachars[code])
1532 if char then
1533 word[1] = char
1534 size = 1
1535 start = current
1536 end
1537 end
1538 stop = current
1539 current = getnext(current)
1540 end
1541 else
1542 if skipping then
1543 skipping = false
1544 end
1545 if id == disc_code then
1546 size = 0
1547 current = getnext(current)
1548 if hyphenonly then
1549 skipping = true
1550 end
1551
1552
1553
1554 else
1555 current = id == math_code and getnext(endofmath(current)) or getnext(current)
1556 end
1557 if size > 0 then
1558 if dictionary and size > charmin and leftmin + rightmin <= size then
1559 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1560
1561 else
1562 local hyphens = hyphenated(dictionary,word,size)
1563 if hyphens then
1564 flush(hyphens)
1565 end
1566 end
1567 end
1568 size = 0
1569 end
1570 end
1571 end
1572
1573
1574 if dictionary and size > charmin and leftmin + rightmin <= size then
1575 if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then
1576
1577 else
1578 local hyphens = hyphenated(dictionary,word,size)
1579 if hyphens then
1580 flush(hyphens)
1581 end
1582 end
1583 end
1584
1585 stoptiming(traditional)
1586
1587 return head
1588 end
1589
1590 statistics.register("hyphenation",function()
1591 if nofwords > 0 or statistics.elapsed(traditional) > 0 then
1592 return string.format("%s words hyphenated, %s unique, used time %s",
1593 nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
1594 end
1595 end)
1596
1597 local texmethod = "builders.kernel.hyphenation"
1598 local oldmethod = texmethod
1599 local newmethod = texmethod
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615 local hyphenate = language.hyphenate
1616 local hyphenating = nuts.hyphenating
1617 local methods = { }
1618 local usedmethod = false
1619 local stack = { }
1620
1621 local original = hyphenating and
1622 function(head)
1623 return (hyphenating(head))
1624 end
1625 or
1626 function(head)
1627 hyphenate(tonode(head))
1628 return head
1629 end
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641 local getcount = tex.getcount
1642
1643 hyphenators.methods = methods
1644 local optimize = false
1645
1646 directives.register("hyphenator.optimize", function(v) optimize = v end)
1647
1648 function hyphenators.handler(head,groupcode)
1649 if usedmethod then
1650 if optimize and (groupcode == "hbox" or groupcode == "adjustedhbox") then
1651 if getcount("hyphenstate") > 0 then
1652 forced = false
1653 return usedmethod(head)
1654 else
1655 return head
1656 end
1657 else
1658 return usedmethod(head)
1659 end
1660 else
1661 return head
1662 end
1663 end
1664
1665 methods.tex = original
1666 methods.original = original
1667 methods.expanded = original
1668 methods.traditional = languages.hyphenators.traditional.hyphenate
1669 methods.none = false
1670
1671 usedmethod = original
1672
1673 local function setmethod(method)
1674 usedmethod = type(method) == "string" and methods[method]
1675 if usedmethod == nil then
1676 usedmethod = methods.tex
1677 end
1678 end
1679 local function pushmethod(method)
1680 insert(stack,usedmethod)
1681 usedmethod = type(method) == "string" and methods[method]
1682 if usedmethod == nil then
1683 usedmethod = methods.tex
1684 end
1685 end
1686 local function popmethod()
1687 usedmethod = remove(stack) or methods.tex
1688 end
1689
1690 hyphenators.setmethod = setmethod
1691 hyphenators.pushmethod = pushmethod
1692 hyphenators.popmethod = popmethod
1693
1694 directives.register("hyphenators.method",setmethod)
1695
1696 function hyphenators.setup(specification)
1697 local method = specification.method
1698 if method then
1699 setmethod(method)
1700 end
1701 end
1702
1703 implement { name = "sethyphenationmethod", actions = setmethod, arguments = "string" }
1704 implement { name = "pushhyphenation", actions = pushmethod, arguments = "string" }
1705 implement { name = "pophyphenation", actions = popmethod }
1706
1707
1708
1709 local context = context
1710 local ctx_NC = context.NC
1711 local ctx_NR = context.NR
1712 local ctx_verbatim = context.verbatim
1713
1714 function hyphenators.showhyphenationtrace(language,word)
1715 if not word or word == "" then
1716 return
1717 end
1718 local saved = trace_steps
1719 trace_steps = "silent"
1720 local steps = traditional.gettrace(language,word)
1721 trace_steps = saved
1722 if steps then
1723 local n = #steps
1724 if n > 0 then
1725 context.starttabulate { "|r|l|l|l|" }
1726 for i=1,n do
1727 local s = steps[i]
1728 ctx_NC() if i > 1 and i < n then context(i-1) end
1729 ctx_NC() ctx_verbatim(s[1])
1730 ctx_NC() ctx_verbatim(s[2])
1731 ctx_NC() ctx_verbatim(s[3])
1732 ctx_NC()
1733 ctx_NR()
1734 end
1735 context.stoptabulate()
1736 end
1737 end
1738 end
1739
1740 implement {
1741 name = "showhyphenationtrace",
1742 actions = hyphenators.showhyphenationtrace,
1743 arguments = "2 strings",
1744 }
1745
1746 function nodes.stripdiscretionaries(head)
1747 for l in nexthlist, head do
1748 for d in nextdisc, getlist(l) do
1749 remove_node(h,false,true)
1750 end
1751 end
1752 return head
1753 end
1754
1755
1756else
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807end
1808
1809 |