1if not modules then modules = { } end modules ['lang-ini'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22local type, tonumber, next = type, tonumber, next
23local utfbyte, utflength = utf.byte, utf.length
24local format, gsub, gmatch, find = string.format, string.gsub, string.gmatch, string.find
25local concat, sortedkeys, sortedhash, keys, insert, tohash = table.concat, table.sortedkeys, table.sortedhash, table.keys, table.insert, table.tohash
26local setmetatableindex = table.setmetatableindex
27local utfvalues, strip, utfcharacters = string.utfvalues, string.strip, utf.characters
28
29local context = context
30local commands = commands
31local implement = interfaces.implement
32
33local settings_to_array = utilities.parsers.settings_to_array
34local settings_to_set = utilities.parsers.settings_to_set
35
36local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
37local trace_goodies = false trackers.register("languages.goodies", function(v) trace_goodies = v end)
38local trace_applied = false trackers.register("languages.applied", function(v) trace_applied = v end)
39
40local report_initialization = logs.reporter("languages","initialization")
41local report_goodies = logs.reporter("languages","goodies")
42
43local prehyphenchar = language.prehyphenchar
44local posthyphenchar = language.posthyphenchar
45local preexhyphenchar = language.preexhyphenchar
46local postexhyphenchar = language.postexhyphenchar
47
48
49local sethjcode = language.sethjcode
50local currentlanguage = language.current
51
52local uccodes = characters.uccodes
53local lccodes = characters.lccodes
54
55local new_language = language.new
56
57languages = languages or {}
58local languages = languages
59
60languages.version = 1.010
61
62languages.registered = languages.registered or { }
63local registered = languages.registered
64
65languages.associated = languages.associated or { }
66local associated = languages.associated
67
68languages.numbers = languages.numbers or { }
69local numbers = languages.numbers
70
71languages.data = languages.data or { }
72local data = languages.data
73
74storage.register("languages/registered",registered,"languages.registered")
75storage.register("languages/associated",associated,"languages.associated")
76storage.register("languages/numbers", numbers, "languages.numbers")
77storage.register("languages/data", data, "languages.data")
78
79local v_reset <const> = interfaces.variables.reset
80local v_yes <const> = interfaces.variables.yes
81
82local nofloaded = 0
83
84local function resolve(tag)
85 local data, instance = registered[tag], nil
86 if data then
87 instance = data.instance
88 if not instance then
89 instance = new_language(data.number)
90 data.instance = instance
91 end
92 end
93 return data, instance
94end
95
96local function tolang(what)
97 if not what then
98 what = currentlanguage()
99 end
100 if type(what) == "userdata" then
101 return what
102 end
103 local tag = numbers[what]
104 local data = tag and registered[tag] or registered[what]
105 if data then
106 local instance = data.instance
107 if not instance then
108 instance = new_language(data.number)
109 data.instance = instance
110 end
111 return instance
112 end
113end
114
115function languages.getdata(tag)
116 if tag then
117 return registered[tag] or registered[numbers[tag]]
118 else
119 return registered[numbers[currentlanguage()]]
120 end
121end
122
123languages.tolang = tolang
124
125
126
127
128local function validdata(loaded,what,tag)
129 local dataset = loaded[what]
130 if dataset then
131 local data = dataset.data
132 if not data or data == "" then
133
134 elseif dataset.compression == "zlib" then
135 data = zlib.decompress(data)
136 if dataset.length and dataset.length ~= #data then
137 report_initialization("compression error in %a for language %a","patterns",what,tag)
138 end
139 return data
140 else
141 return data
142 end
143 end
144end
145
146
147
148
149
150
151
152
153local function sethjcodes(instance,loaded,what,factor)
154 local l = loaded[what]
155 local c = l and l.characters
156 if c then
157 local hjcounts = factor and languages.hjcounts or false
158
159 local h = loaded.codehash
160 if not h then
161 h = { }
162 loaded.codehash = h
163 end
164
165 local function setcode(code)
166 local l = lccodes[code]
167 local u = uccodes[code]
168 local s = l
169 if type(s) ~= "number" then
170 l = code
171 s = code
172 end
173 if hjcounts then
174 local c = hjcounts[s]
175 if c then
176 c = c.count
177 if not c then
178
179 elseif c <= 0 then
180
181 s = 32
182 elseif c >= 31 then
183
184 s = 31
185 else
186
187 s = c
188 end
189 end
190 end
191 sethjcode(instance,l,s)
192 if u ~= l and type(u) == "number" then
193 sethjcode(instance,u,s)
194 h[u] = s
195 end
196 end
197
198 local s = tex.savinghyphcodes
199 tex.savinghyphcodes = 0
200 if type(c) == "table" then
201 if #c > 0 then
202
203 for i=1,#c do
204 local v = c[i]
205 setcode(type(v) == "string" and utfbyte(v) or v)
206 end
207 else
208
209 for k, v in sortedhash(c) do
210 if v then
211 setcode(utfbyte(k))
212 end
213 end
214 end
215 elseif type(c) == "string" then
216 for l in utfvalues(c) do
217 setcode(l)
218 end
219 end
220 tex.savinghyphcodes = s
221 end
222end
223
224local function addhjcodestoinstance(instance,characters)
225 if type(characters) == "table" then
226 local nofcharacters = #characters
227 if nofcharacters > 0 then
228
229 for i=1,nofcharacters do
230 local v = characters[i]
231 local h = type(v) == "string" and utfbyte(v) or v
232 sethjcode(instance,h,h)
233 end
234 else
235
236 for k, v in next, characters do
237 if v then
238 local h = type(k) == "string" and utfbyte(k) or k
239 sethjcode(instance,h,h)
240 end
241 end
242 end
243 elseif type(characters) == "string" then
244 for h in utfvalues(characters) do
245 sethjcode(instance,h,h)
246 end
247 end
248end
249
250
251
252local P, S, R, C, Cs, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.match, lpeg.patterns
253
254local utfsplit = utf.split
255
256local space = lpegpatterns.space
257local whitespace = lpegpatterns.whitespace^1
258local nospace = lpegpatterns.utf8char - whitespace
259local digit = lpegpatterns.digit
260
261local endofstring = #whitespace
262
263local word = (digit/"")^0 * (digit/"" * endofstring + digit/" " + nospace)^1
264local anyword = (1-whitespace)^1
265local analyze = Ct((whitespace + Cs(word))^1)
266
267local function unique(tag,requested,loaded)
268 local nofloaded = #loaded
269 if nofloaded == 0 then
270 return ""
271 elseif nofloaded == 1 then
272 return loaded[1]
273 else
274 insert(loaded,1," ")
275
276 loaded = concat(loaded," ")
277 local t = lpegmatch(analyze,loaded) or { }
278 local h = { }
279 local b = { }
280 for i=1,#t do
281 local ti = t[i]
282 local hi = h[ti]
283 if not hi then
284 h[ti] = 1
285 elseif hi == 1 then
286 h[ti] = 2
287 b[#b+1] = utfsplit(ti," ")
288 end
289 end
290
291 local nofbad = #b
292 if nofbad > 0 then
293 local word
294 for i=1,nofbad do
295 local bi = b[i]
296 local p = P(bi[1])
297 for i=2,#bi do
298 p = p * digit * P(bi[i])
299 end
300 if word then
301 word = word + p
302 else
303 word = p
304 end
305 report_initialization("language %a, patterns %a, discarding conflict (0-9)%{[0-9]}t(0-9)",tag,requested,bi)
306 end
307 t, h, b = nil, nil, nil
308 local someword = digit^0 * word * digit^0 * endofstring / ""
309
310 local strip = Cs((someword + anyword + whitespace)^1)
311 return lpegmatch(strip,loaded) or loaded
312 else
313 return loaded
314 end
315 end
316end
317
318local shared = false
319
320local function loaddefinitions(tag,specification)
321 statistics.starttiming(languages)
322 local data, instance = resolve(tag)
323 local requested = specification.patterns or ""
324 local definitions = settings_to_array(requested)
325 if #definitions > 0 then
326 if trace_patterns then
327 report_initialization("pattern specification for language %a: %s",tag,specification.patterns)
328 end
329 local ploaded = instance:patterns()
330 local eloaded = instance:hyphenation()
331 if not ploaded or ploaded == "" then
332 ploaded = { }
333 else
334 ploaded = { ploaded }
335 end
336 if not eloaded or eloaded == "" then
337 eloaded = { }
338 else
339 eloaded = { eloaded }
340 end
341 local dataused = data.used
342 local ok = false
343 local resources = data.resources or { }
344 data.resources = resources
345 if not shared then
346 local found = resolvers.findfile("lang-exc.lua")
347 if found then
348 shared = dofile(found)
349 if type(shared) == "table" then
350 shared = concat(shared," ")
351 else
352 shared = true
353 end
354 else
355 shared = true
356 end
357 end
358 for i=1,#definitions do
359 local definition = definitions[i]
360 if definition == "" then
361
362 elseif definition == v_reset then
363 if trace_patterns then
364 report_initialization("clearing patterns for language %a",tag)
365 end
366 instance:clearpatterns()
367 instance:clearhyphenation()
368 ploaded = { }
369 eloaded = { }
370 elseif not dataused[definition] then
371 dataused[definition] = definition
372 local filename = "lang-" .. definition .. ".lua"
373 local fullname = resolvers.findfile(filename) or ""
374 if fullname == "" then
375 fullname = resolvers.findfile(filename .. ".gz") or ""
376 end
377 if fullname ~= "" then
378 if trace_patterns then
379 report_initialization("loading definition %a for language %a from %a",definition,tag,fullname)
380 end
381 local suffix, gzipped = gzip.suffix(fullname)
382 local loaded = table.load(fullname,gzipped and gzip.load)
383 if loaded then
384 ok, nofloaded = true, nofloaded + 1
385 sethjcodes(instance,loaded,"patterns",specification.factor)
386 sethjcodes(instance,loaded,"exceptions",specification.factor)
387 local p = validdata(loaded,"patterns",tag)
388 local e = validdata(loaded,"exceptions",tag)
389 if p and p ~= "" then
390 ploaded[#ploaded+1] = p
391 end
392 if e and e ~= "" then
393 eloaded[#eloaded+1] = e
394 end
395 resources[#resources+1] = loaded
396 else
397 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
398 end
399 elseif trace_patterns then
400 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
401 end
402 elseif trace_patterns then
403 report_initialization("definition %a for language %a already loaded",definition,tag)
404 end
405 end
406 if #ploaded > 0 then
407
408 instance:clearpatterns()
409 instance:patterns(unique(tag,requested,ploaded))
410 end
411 if #eloaded > 0 then
412
413 instance:clearhyphenation()
414 instance:hyphenation(concat(eloaded," "))
415 end
416 if type(shared) == "string" then
417 instance:hyphenation(shared)
418 end
419 return ok
420 elseif trace_patterns then
421 report_initialization("no definitions for language %a",tag)
422 end
423 statistics.stoptiming(languages)
424end
425
426storage.shared.noflanguages = storage.shared.noflanguages or 0
427
428local noflanguages = storage.shared.noflanguages
429
430function languages.define(tag,parent)
431 noflanguages = noflanguages + 1
432 if trace_patterns then
433 report_initialization("assigning number %a to %a",noflanguages,tag)
434 end
435 numbers[noflanguages] = tag
436 numbers[tag] = noflanguages
437 registered[tag] = {
438 tag = tag,
439 parent = parent or "",
440 patterns = "",
441 loaded = false,
442 used = { },
443 dirty = true,
444 number = noflanguages,
445 instance = nil,
446 synonyms = { },
447 }
448 storage.shared.noflanguages = noflanguages
449end
450
451function languages.setsynonym(synonym,tag)
452 local l = registered[tag]
453 if l then
454 l.synonyms[synonym] = true
455 end
456end
457
458function languages.installed(separator)
459 return concat(sortedkeys(registered),separator or ",")
460end
461
462function languages.current(n)
463 return numbers[n and tonumber(n) or currentlanguage()]
464end
465
466function languages.associate(tag,script,language)
467 associated[tag] = { script, language }
468end
469
470function languages.association(tag)
471 if not tag then
472 tag = numbers[currentlanguage()]
473 elseif type(tag) == "number" then
474 tag = numbers[tag]
475 end
476 local lat = tag and associated[tag]
477 if lat then
478 return lat[1], lat[2]
479 end
480end
481
482function languages.loadable(tag,defaultlanguage)
483 local l = registered[tag]
484 if l and resolvers.findfile("lang-"..l.patterns..".lua") then
485 return true
486 else
487 return false
488 end
489end
490
491
492
493
494function languages.unload(tag)
495 local l = registered[tag]
496 if l then
497 l.dirty = true
498 end
499end
500
501
502
503function languages.prehyphenchar (what) return prehyphenchar (tolang(what)) end
504function languages.posthyphenchar (what) return posthyphenchar (tolang(what)) end
505function languages.preexhyphenchar (what) return preexhyphenchar (tolang(what)) end
506function languages.postexhyphenchar(what) return postexhyphenchar(tolang(what)) end
507
508
509
510
511
512
513
514
515
516local invalid = { "{", "}", "(", ")", "-", " " }
517
518local function collecthjcodes(data,str)
519 local found = data.extras and data.extras.characters or { }
520 if type(str) == "string" then
521 for s in utfcharacters(str) do
522 if not found[s] then
523 found[s] = true
524 end
525 end
526 elseif type(str) == "table" then
527 for i=1,#str do
528 local s = str[i]
529 if not found[s] then
530 found[s] = true
531 end
532 end
533 end
534 for i=1,#invalid do
535 local c = invalid[i]
536 if found[c] then
537 found[c] = nil
538 end
539 end
540 data.extras = { characters = found }
541 sethjcodes(data.instance,data,"extras",data.factor)
542end
543
544function languages.loadwords(tag,filename)
545 local data, instance = resolve(tag)
546 if data then
547 statistics.starttiming(languages)
548 local str = io.loaddata(filename) or ""
549 collecthjcodes(data,str)
550 instance:hyphenation(str)
551 statistics.stoptiming(languages)
552 end
553end
554
555
556function languages.setexceptions(tag,str)
557 local data, instance = resolve(tag)
558 if data then
559 str = strip(str)
560 collecthjcodes(data,str)
561 instance:hyphenation(str)
562 end
563end
564
565function languages.setpatterns(tag,str)
566 local data, instance = resolve(tag)
567 if data then
568 str = strip(str)
569 collecthjcodes(data,str)
570 instance:patterns(str)
571 end
572end
573
574local function setwordhandler(tag,action)
575 local data, instance = resolve(tag)
576 if data then
577 instance:setwordhandler(action)
578 end
579end
580
581languages.setwordhandler = setwordhandler
582
583function languages.setoptions(tag,str)
584 languages.addgoodiesdata(tag,{ { words = str } })
585
586 languages.setgoodieshandler { tag = tag, goodies = tag }
587end
588
589function languages.hyphenate(tag,str)
590
591 local data, instance = resolve(tag)
592 if data then
593 return instance:hyphenate(str)
594 else
595 return str
596 end
597end
598
599
600
601
602
603
604
605
606
607local expand ; do
608
609 local nuts = nodes.nuts
610 local nextglyph = nuts.traversers.glyph
611 local setoptions = nuts.setoptions
612
613 local getnext = nuts.getnext
614 local getprev = nuts.getprev
615 local setchar = nuts.setchar
616 local setnext = nuts.setnext
617 local setlink = nuts.setlink
618 local setfield = nuts.setfield
619 local setdisc = nuts.setdisc
620 local getprop = nuts.getprop
621 local setprop = nuts.setprop
622 local setattrlist = nuts.setattrlist
623
624 local new_disc = nuts.pool.disc
625 local new_glyph = nuts.pool.glyph
626 local copy_node = nuts.copy
627 local flushlist = nuts.flushlist
628
629 local glyphoptioncodes = tex.glyphoptioncodes
630
631 local lower = characters.lower
632 local replacer = utf.replacer
633 local utfchartabletopattern = lpeg.utfchartabletopattern
634
635 local report = logs.reporter("languages","goodies")
636
637
638
639 local goodiesdata = setmetatableindex(function(t,k)
640 local v = {
641 properties = { },
642 replacements = { },
643 characters = { },
644 exceptions = { },
645 substitutions = { },
646 experiments = { },
647 }
648 t[k] = v
649 return v
650 end)
651
652
653
654 local compound_disc_code <const> = tex.discoptioncodes.preword | tex.discoptioncodes.postword
655
656 local function setcompound(current,id,first,last,lh,rh,hyphen)
657 local prev = getprev(current)
658
659
660
661 local prechar = prehyphenchar(id)
662 local postchar = posthyphenchar(id)
663 local pre = prechar and copy_node(current)
664 local post = postchar and copy_node(current)
665 local replace = hyphen and prechar and copy_node(current)
666 local disc = new_disc()
667 if pre then
668 setchar(pre,prechar)
669 end
670 if post then
671 setchar(post,postchar)
672 end
673 if replace then
674 setchar(replace,prechar)
675 end
676 setattrlist(disc,current)
677 setoptions(disc,compound_disc_code)
678 setdisc(disc,pre,post,replace)
679 setlink(prev,disc,current)
680 if lh then
681 setfield(first,"rhmin",rh)
682 end
683
684 if rh then
685 setfield(current,"lhmin",lh)
686 end
687
688 end
689
690 local setcompounds = setmetatableindex(function(t,l)
691 local v = setmetatableindex(function(t,r)
692 local v = function(current,id,first,last) return setcompound(current,id,first,last,l,r) end
693 t[r] = v
694 return v
695 end)
696 t[l] = v
697 return v
698 end)
699
700 local sethyphens = setmetatableindex(function(t,l)
701 local v = setmetatableindex(function(t,r)
702 local v = function(current,id,first,last) return setcompound(current,id,first,last,l,r,true) end
703 t[r] = v
704 return v
705 end)
706 t[l] = v
707 return v
708 end)
709
710 local function replaceword(first,last,old,new,oldlen)
711 local oldlen = utflength(old)
712 local newlen = utflength(new)
713 if newlen == 0 then
714
715 elseif newlen <= oldlen then
716 for s in utfvalues(new) do
717 setchar(first,s)
718 first = getnext(first)
719 end
720 if newlen < oldlen then
721
722 local after = getnext(last)
723 local before = getprev(first)
724 setnext(last)
725 setlink(before,after)
726 flushlist(first)
727 end
728 else
729 local i = 0
730 local l = getnext(last)
731 for s in utfvalues(new) do
732 i = i + 1
733 if i > oldlen then
734 local g = copy_node(first)
735 setlink(first,g,l)
736 setchar(g,s)
737 first = g
738 elseif i == oldlen then
739 setchar(first,s)
740 else
741 setchar(first,s)
742 first = getnext(first)
743 end
744 end
745 end
746 end
747
748
749
750
751
752
753 local lh, rh = false, false
754
755 local cache = setmetatableindex(function(t,k)
756 local v = 0
757 if k == "compound" then
758 v = setcompounds[lh][rh]
759 elseif k == "hyphen" then
760 v = sethyphens[lh][rh]
761 else
762 v = 0
763 for s in gmatch(k,"%w+") do
764 local o = glyphoptioncodes[s]
765
766 if o then
767 v = v | o
768 end
769 end
770 end
771 t[k] = v
772 return v
773 end)
774
775 local function checkglyphproperties(options)
776
777 for word, list in sortedhash(options) do
778 if type(list) == "string" then
779 options[word] = options[list]
780 else
781 for index, option in sortedhash(list) do
782 if type(option) == "string" then
783 list[index] = cache[option]
784 end
785 end
786 end
787 end
788 end
789
790
791
792
793
794
795 local sequencers = utilities.sequencers
796 local newsequencer = sequencers.new
797 local appendgroup = sequencers.appendgroup
798 local prependaction = sequencers.prependaction
799 local appendaction = sequencers.appendaction
800 local enableaction = sequencers.enableaction
801 local disableaction = sequencers.disableaction
802
803 local template = {
804 arguments = "s",
805 returnvalues = "r,i",
806 results = "r,i",
807 }
808
809 local registeredactions = setmetatableindex ( function(t,tag)
810 local actions = newsequencer(template)
811 appendgroup(actions,"user")
812 t[tag] = actions
813 return actions
814 end )
815
816 languages.registeredactions = registeredactions
817
818 function languages.installhandler(tag,func)
819 local todo = not rawget(registeredactions,tag)
820 local actions = registeredactions[tag]
821 appendaction(actions,"user",func)
822 enableaction(actions,func)
823 report("installing handler %a for language %a",func,tag)
824 if todo then
825 setwordhandler(tag,function(n,original,remapped,length,first,last)
826 local runner = actions.runner
827 if runner then
828 if getprop(first,"replaced") then
829
830 else
831 local r, result = runner(original)
832 if not r or original == r then
833 return result or 0
834 else
835 setprop(first,"replaced",true)
836 replaceword(first,last,original,r,length)
837 return 1
838 end
839 end
840 end
841 return 2
842 end)
843 end
844 end
845
846 local appliedoptions = setmetatableindex("table")
847 languages.appliedoptions = appliedoptions
848
849 languages.setgoodieshandler = function(specification)
850 if type(specification) == "table" then
851 local tag = specification.tag
852 local goodies = specification.goodies or tag
853 local result = specification.result or 2
854 local data = goodiesdata[goodies]
855 local properties = data.properties
856 local replacements = data.replacements
857 local substitutions = data.substitutions
858 local characters = data.characters
859 local exceptions = data.exceptions
860 local experiments = data.experiments
861 local replacer = nil
862 local substituter = nil
863 local d, instance = resolve(tag)
864 local done = false
865
866 if type(characters) == "table" and characters and next(characters) then
867 addhjcodestoinstance(instance,characters)
868 if trace_goodies then
869 report_goodies("registering %a characters for %a",goodies,tag)
870 end
871 done = true
872 end
873 if type(properties) == "table" and next(properties) then
874 checkglyphproperties(properties)
875 if trace_goodies then
876 report_goodies("registering %a properties for %a",goodies,tag)
877 end
878 done = true
879 end
880 if type(replacements) == "table" and next(replacements) then
881 replacer = Cs((utfchartabletopattern(replacements) / replacements + 1)^0)
882 if trace_goodies then
883 report_goodies("registering %a replacer for %a",goodies,tag)
884 end
885 done = true
886 end
887 if type(substitutions) == "table" and next(substitutions) then
888 substituter = Cs((utfchartabletopattern(substitutions) / substitutions + 1)^0)
889 if trace_goodies then
890 report_goodies("registering %a substitutor for %a",goodies,tag)
891 end
892 done = true
893 end
894 if type(exceptions) == "table" and next(exceptions) then
895 done = true
896 else
897 exceptions = false
898 end
899 if type(experiments) == "table" and next(experiments) then
900 done = true
901 if trace_goodies then
902 report_goodies("registering %a experiments for %a",goodies,tag)
903 end
904 else
905 experiments = false
906 end
907 if done then
908 local registered = registeredactions[tag]
909 local applied = appliedoptions[tag]
910 setwordhandler(tag,function(n,original,remapped,length,first,last)
911 local runner = registered.runner
912 if runner then
913 if getprop(first,"replaced") then
914
915 else
916 local r, result = runner(original)
917 if not r then
918 if trace_goodies then
919 report_goodies("kept by runner: %s => %s, result %i",original,remapped, result or 0)
920 end
921 return result or 0
922 elseif original == r then
923 if result then
924 if trace_goodies then
925 report_goodies("kept by runner: %s => %s, result %i",original,remapped, result)
926 end
927 return result
928 else
929 if trace_goodies then
930 report_goodies("kept by runner: %s => %s, continue",original,remapped)
931 end
932 end
933 else
934 if trace_goodies then
935 report_goodies("replaced by runner: %s => %s => %s, restart",original,remapped,r)
936 end
937 setprop(first,"replaced",true)
938 replaceword(first,last,original,r,length)
939 return 1
940 end
941 end
942 end
943 local result = 2
944 local o = properties[remapped]
945 ::again::
946 if o then
947 if trace_goodies then
948 report("properties: %s %s",original,remapped)
949 end
950 if trace_applied then
951 applied[original] = (applied[original] or 0) + 1
952 end
953 local index = 0
954 for g, c in nextglyph, first do
955 index = index + 1
956 local oi = o[index]
957 if oi then
958 if type(oi) == "function" then
959 oi(g,n,first,last)
960 result = 1
961 else
962 setoptions(g,oi)
963 end
964 end
965 if g == last then
966 break
967 end
968 end
969 return result
970 end
971 if replacer then
972
973 if getprop(first,"replaced") then
974
975 else
976 local r = lpegmatch(replacer,original)
977 if original == r then
978 if trace_goodies then
979 report_goodies("kept: %s => %s",original,remapped)
980 end
981 else
982 if trace_goodies then
983 report_goodies("replaced: %s => %s => %s",original,remapped,r)
984 end
985 setprop(first,"replaced",true)
986 replaceword(first,last,original,r,length)
987 result = 1
988 end
989 end
990 return result
991 end
992 if substituter then
993 if getprop(first,"replaced") then
994
995 else
996 local r = lpegmatch(substituter,original)
997 if original == r then
998 if trace_goodies then
999 report_goodies("kept: %s => %s",original,remapped)
1000 end
1001 else
1002 if trace_goodies then
1003 report_goodies("substituted: %s => %s => %s",original,remapped,r)
1004 end
1005 setprop(first,"replaced",true)
1006 if not properties[r] then
1007 o = expand(r)
1008 properties[original] = o
1009 goto again
1010 end
1011 end
1012 end
1013 end
1014 if exceptions then
1015 local exception = exceptions[original]
1016 if exception then
1017 if trace_goodies then
1018 report_goodies("exception: %s => %s",original,exception)
1019 end
1020 result = exception
1021 else
1022 result = 3
1023 end
1024 return result
1025 end
1026
1027 if experiments then
1028 for i=1,#experiments do
1029 local result = experiments[i](original)
1030 if result then
1031 if trace_goodies then
1032 report_goodies("experiment: %s => %s",original,result)
1033 end
1034 return result
1035 end
1036 end
1037 return 3
1038 end
1039 if trace_goodies then
1040 report_goodies("ignored: %s => %s",original,remapped)
1041 end
1042 return result
1043 end)
1044 elseif trace_goodies then
1045 report_goodies("nothing useable in %a for %a",goodies,tag)
1046 end
1047 else
1048 setwordhandler(tag)
1049 end
1050 end
1051
1052 local norightligature_option <const> = glyphoptioncodes.norightligature
1053 local noleftligature_option <const> = glyphoptioncodes.noleftligature
1054 local norightkern_option <const> = glyphoptioncodes.norightkern
1055 local noleftkern_option <const> = glyphoptioncodes.noleftkern
1056
1057 local function applyaction(oc,v,n)
1058 if oc == "noligature" then
1059 if n > 0 then
1060 local vv = v[n-1]
1061 if vv then
1062 v[n-1] = vv | norightligature_option
1063 else
1064 v[n-1] = norightligature_option
1065 end
1066 end
1067 v[n] = noleftligature_option
1068 elseif oc == "compound" then
1069 if n > 1 then
1070
1071 v[n] = setcompounds[lh][rh]
1072 return true
1073 end
1074 elseif oc == "hyphen" then
1075 if n > 1 then
1076 v[n] = sethyphens[lh][rh]
1077 return true
1078 end
1079 elseif oc == "nokern" then
1080 if n > 0 then
1081 local vv = v[n-1]
1082 if vv then
1083 v[n-1] = vv | norightkern_option
1084 else
1085 v[n-1] = norightkern_option
1086 end
1087 end
1088 v[n] = noleftkern_option
1089 elseif oc == "noleftkern" then
1090 v[n] = noleftkern_option
1091 elseif oc == "norightkern" then
1092 if n > 0 then
1093 local vv = v[n-1]
1094 if vv then
1095 v[n-1] = vv | norightkern_option
1096 else
1097 v[n-1] = norightkern_option
1098 end
1099 end
1100 else
1101 for s in gmatch(oc,"%w+") do
1102 if applyaction(s,v,n) then
1103 return
1104 end
1105 end
1106 end
1107 end
1108
1109
1110
1111
1112
1113
1114
1115
1116 local actions = {
1117 ["|"] = "noligature",
1118 ["="] = "nokern",
1119 ["<"] = "noleftkern",
1120 [">"] = "norightkern",
1121 ["+"] = "compound",
1122 ["-"] = "hyphen",
1123 }
1124
1125 local function analyzed(m,a,t,k)
1126 local v = { }
1127 local n = 1
1128 if m == true then
1129 for c in gmatch(k,".") do
1130 local ac = a[c]
1131 if not ac then
1132 n = n + 1
1133 else
1134 applyaction(ac,v,n)
1135 end
1136 end
1137 elseif type(m) == "number" then
1138 local i = 0
1139 for c in gmatch(k,".") do
1140 local ac = a[c]
1141 if not ac then
1142 n = n + 1
1143 else
1144 i = i + 1
1145 if i == m then
1146 applyaction(ac,v,n)
1147 break
1148 end
1149 end
1150 end
1151 elseif type(m) == "table" then
1152
1153 m = tohash(m)
1154 local i = 0
1155 for c in gmatch(k,".") do
1156 local ac = a[c]
1157 if not ac then
1158 n = n + 1
1159 else
1160 i = i + 1
1161 if m[i] then
1162 applyaction(ac,v,n)
1163 end
1164 end
1165 end
1166 end
1167 t[k] = v
1168 return v
1169 end
1170
1171 local cache = setmetatableindex(function(t,m)
1172 local v = setmetatableindex(function(t,a)
1173 local v = setmetatableindex(function(t,k)
1174 return analyzed(m,a,t,k)
1175 end)
1176 t[m] = v
1177 return v
1178 end)
1179 t[m] = v
1180 return v
1181 end)
1182
1183 expand = function(str)
1184 return analyzed(true,actions,{},str)
1185 end
1186
1187
1188
1189 local replace1 = Cs ( ( S("|=<>+-.0123456789")/"" + lpegpatterns.utf8character )^0 )
1190 local replace2 = Cs ( ( S("|=<>+-.0123456789") + lpegpatterns.utf8character/".")^0 )
1191
1192 local function stripped(str)
1193
1194 str = gsub(str,"%-%-[^\n]*\n","")
1195 str = gsub(str,"%%[^\n]*\n","")
1196 str = gsub(str,"%s+"," ")
1197 str = gsub(str,"^%s+","")
1198 str = gsub(str,"%s+$","")
1199 return str
1200 end
1201
1202 local registerexceptions do
1203
1204 local lbrace = P("{")
1205 local rbrace = P("}")
1206 local lbracket = P("[")
1207 local rbracket = P("]")
1208 local lparent = P("(")
1209 local rparent = P(")")
1210 local hyphen = P("-")
1211
1212 local p = Cs ( (
1213 lbrace * ((1-rbrace)^0) * rbrace
1214 * lbrace * ((1-rbrace)^0) * rbrace
1215 * lbrace * C((1-rbrace)^0) * rbrace * (lparent * C((1-rparent)^0) * rparent)^0 / function(a,b) return b or a end
1216 + (lbracket * (1-rbracket)^0 * rbracket) / ""
1217 + hyphen / ""
1218 + lpegpatterns.utf8character
1219 )^0 )
1220
1221 registerexceptions = function(target,str)
1222 local kind = type(str)
1223 if kind == "string" then
1224 for v in gmatch(stripped(str),"%S+") do
1225 local k = lpegmatch(p,v)
1226 if k ~= v then
1227 target[k] = v
1228 end
1229 end
1230 elseif kind == "table" then
1231 local n = #str
1232 if n > 0 then
1233 for i=1,n do
1234 local v = str[i]
1235 local k = lpegmatch(p,v)
1236 if k ~= v then
1237 target[k] = v
1238 end
1239 end
1240 else
1241
1242 for k, v in next, str do
1243 target[k] = v
1244 end
1245 end
1246 end
1247 end
1248
1249 end
1250
1251 local registerexperiments do
1252
1253 registerexperiments = function(target,str)
1254 local kind = type(str)
1255 if kind == "function" then
1256 target[#target+1] = str
1257 end
1258 end
1259
1260 end
1261
1262 function languages.strippedgoodiewords(str)
1263 return lpegmatch(replace1,stripped(str))
1264 end
1265
1266 local splitter = lpeg.tsplitat(" ")
1267
1268 local function addgoodies(tag,list,filename)
1269 local np = 0
1270 local nd = 0
1271 local nw = 0
1272 local nl = #list
1273
1274 local data = goodiesdata[tag]
1275 local properties = data.properties
1276 local replacements = data.replacements
1277 local substitutions = data.substitutions
1278 local characters = data.characters
1279 local exceptions = data.exceptions
1280 local experiments = data.experiments
1281 if filename then
1282 if not data.goodies then
1283 data.goodies = { }
1284 end
1285 insert(data.goodies,filename)
1286 end
1287
1288 lh = false
1289 rh = false
1290
1291 for i=1,nl do
1292 local l = list[i]
1293 if type(l) == "table" then
1294 local w = l.words
1295 local p = l.patterns
1296 local s = l.substitutions
1297 local c = l.characters
1298 local e = l.exceptions
1299 local x = l.experiments
1300 lh = l.left or false
1301 rh = l.right or false
1302 if c then
1303 for v in utfvalues(c) do
1304 characters[v] = true
1305 end
1306 end
1307 if w then
1308 local prefixes = l.prefixes
1309 local nofprefixes = 0
1310 local suffixes = l.suffixes
1311 local nofsuffixes = 0
1312 if prefixes then
1313 prefixes = lpegmatch(splitter,lower(stripped(prefixes)))
1314 nofprefixes = #prefixes
1315 end
1316 if suffixes then
1317 suffixes = lpegmatch(splitter,lower(stripped(suffixes)))
1318 nofsuffixes = #suffixes
1319 end
1320 w = lower(stripped(w))
1321 if p then
1322 local pattern = Cs((utfchartabletopattern(p) / p + 1)^0)
1323 w = lpegmatch(pattern,w)
1324 np = np + 1
1325 else
1326 nd = nd + 1
1327 end
1328 local m = l.matches
1329 if not m then
1330 m = true
1331 end
1332 local a = l.actions
1333 if a then
1334 setmetatableindex(a,actions)
1335 else
1336 a = actions
1337 end
1338 local cach = cache[m][a]
1339 if nofprefixes > 0 then
1340 if nofsuffixes > 0 then
1341 for wrd in gmatch(w,"%S+") do
1342 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1343 nw = nw + 1
1344 for i=1,nofprefixes do
1345 local tmp = prefixes[i] .. wrd
1346 for i=1,nofsuffixes do
1347 local str = tmp .. suffixes[i]
1348 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1349 nw = nw + 1
1350 end
1351 end
1352 end
1353 else
1354 for wrd in gmatch(w,"%S+") do
1355 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1356 nw = nw + 1
1357 for i=1,nofprefixes do
1358 local str = prefixes[i] .. wrd
1359 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1360 nw = nw + 1
1361 end
1362 end
1363 end
1364 elseif nofsuffixes > 0 then
1365 for wrd in gmatch(w,"%S+") do
1366 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1367 nw = nw + 1
1368 for i=1,nofsuffixes do
1369 local str = wrd .. suffixes[i]
1370 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1371 nw = nw + 1
1372 end
1373 end
1374 else
1375 for wrd in gmatch(w,"%S+") do
1376 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1377 nw = nw + 1
1378 end
1379 end
1380 elseif s then
1381 for k, v in next, s do
1382 substitutions[k] = v
1383 end
1384 elseif p then
1385 for k, v in next, p do
1386 replacements[k] = v
1387 end
1388 elseif e then
1389 registerexceptions(exceptions,e)
1390 elseif x then
1391 registerexperiments(experiments,x)
1392 end
1393 end
1394 end
1395
1396 lh = false
1397 rh = false
1398
1399 return { np = np, nd = nd, nw = nw, nl = nl }
1400 end
1401
1402 function languages.goodiefiles(tag)
1403 local d = goodiesdata[tag]
1404 return d and d.goodies
1405 end
1406
1407 function languages.addgoodiesfile(tag,filename)
1408 local fullname = resolvers.findfile(file.addsuffix(filename,"llg")) or ""
1409 if fullname == "" then
1410 report_goodies("file %a is not found",filename)
1411 else
1412 local list = table.load(fullname)
1413 if not list then
1414 report_goodies("file %a is invalid",fullname)
1415 else
1416 list = list.options
1417 if not list then
1418 report_goodies("file %a has no options",fullname)
1419 else
1420 local ok = addgoodies(tag,list,filename)
1421 report_goodies("tag %a, file %a loaded, %i lists, %i via patterns, %i direct, %i words",
1422 tag,fullname,ok.nl,ok.np,ok.nd,ok.nw)
1423 end
1424 end
1425 end
1426 end
1427
1428 function languages.addgoodiesdata(tag,list)
1429 local ok = addgoodies(tag,list)
1430 report_goodies("tag %a, data loaded, %i lists, %i via patterns, %i direct, %i words",
1431 tag,ok.nl,ok.np,ok.nd,ok.nw)
1432 end
1433
1434end
1435
1436if environment.initex then
1437
1438 function languages.getnumber()
1439 return 0
1440 end
1441
1442else
1443
1444 function languages.getnumber(tag,default,patterns,goodies,factor)
1445 local l = registered[tag]
1446 if l then
1447 if l.dirty then
1448 l.factor = factor == v_yes and true or false
1449 if trace_patterns then
1450 report_initialization("checking patterns for %a with default %a",tag,default)
1451 end
1452
1453 if patterns and patterns ~= "" then
1454 if l.patterns ~= patterns then
1455 l.patterns = patterns
1456 if trace_patterns then
1457 report_initialization("loading patterns for %a using specification %a",tag,patterns)
1458 end
1459 loaddefinitions(tag,l)
1460 else
1461
1462 end
1463 elseif l.patterns == "" then
1464 l.patterns = tag
1465 if trace_patterns then
1466 report_initialization("loading patterns for %a using tag",tag)
1467 end
1468 local ok = loaddefinitions(tag,l)
1469 if not ok and tag ~= default then
1470 l.patterns = default
1471 if trace_patterns then
1472 report_initialization("loading patterns for %a using default",tag)
1473 end
1474 loaddefinitions(tag,l)
1475 end
1476 end
1477 if goodies and goodies ~= "" then
1478 goodies = settings_to_array(goodies)
1479 for i=1,#goodies do
1480 local goodie = goodies[i]
1481
1482 languages.addgoodiesfile(tag,goodie)
1483 end
1484 languages.setgoodieshandler {
1485 tag = tag,
1486 goodies = tag,
1487 }
1488 end
1489 l.loaded = true
1490 l.dirty = false
1491 end
1492 return l.number
1493 else
1494 return 0
1495 end
1496 end
1497
1498 numbers[0] = "null"
1499
1500 registered.null = {
1501 number = 0,
1502 instance = new_language(0),
1503 }
1504
1505end
1506
1507
1508
1509
1510
1511languages.logger = languages.logger or { }
1512
1513function languages.logger.report()
1514 local result, r = { }, 0
1515 for tag, l in sortedhash(registered) do
1516 if l.loaded then
1517 r = r + 1
1518 result[r] = format("%s:%s:%s",tag,l.parent,l.number)
1519 end
1520 end
1521 return r > 0 and concat(result," ") or "none"
1522end
1523
1524
1525
1526languages.associate('en','latn','eng')
1527languages.associate('uk','latn','eng')
1528languages.associate('nl','latn','nld')
1529languages.associate('de','latn','deu')
1530languages.associate('fr','latn','fra')
1531
1532statistics.register("loaded patterns", function()
1533 local result = languages.logger.report()
1534 if result ~= "none" then
1535
1536 return format("%s, load time: %s",result,statistics.elapsedtime(languages))
1537 end
1538end)
1539
1540
1541
1542
1543
1544
1545
1546
1547implement {
1548 name = "languagenumber",
1549 actions = { languages.getnumber, context },
1550 arguments = "5 strings"
1551}
1552
1553implement {
1554 name = "installedlanguages",
1555 actions = { languages.installed, context },
1556}
1557
1558implement {
1559 name = "definelanguage",
1560 actions = languages.define,
1561 arguments = "2 strings"
1562}
1563
1564implement {
1565 name = "setlanguagesynonym",
1566 actions = languages.setsynonym,
1567 arguments = "2 strings"
1568}
1569
1570implement {
1571 name = "unloadlanguage",
1572 actions = languages.unload,
1573 arguments = "string"
1574}
1575
1576implement {
1577 name = "setlanguageexceptions",
1578 actions = languages.setexceptions,
1579 arguments = "2 strings"
1580}
1581
1582implement {
1583 name = "setlanguagepatterns",
1584 actions = languages.setpatterns,
1585 arguments = "2 strings"
1586}
1587
1588implement {
1589 name = "setlanguageoptions",
1590 actions = languages.setoptions,
1591 arguments = "2 strings"
1592}
1593
1594implement {
1595 name = "currentprehyphenchar",
1596 actions = function()
1597 local c = prehyphenchar(tolang())
1598 if c and c > 0 then
1599 context.char(c)
1600 end
1601 end
1602}
1603
1604implement {
1605 name = "currentposthyphenchar",
1606 actions = function()
1607 local c = posthyphenchar(tolang())
1608 if c and c > 0 then
1609 context.char(c)
1610 end
1611 end
1612}
1613 |