1if not modules then modules = { } end modules ['lang-ini'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22local type, tonumber, next = type, tonumber, next
23local utfbyte, utflength = utf.byte, utf.length
24local format, gsub, gmatch, find = string.format, string.gsub, string.gmatch, string.find
25local concat, sortedkeys, sortedhash, keys, insert, tohash = table.concat, table.sortedkeys, table.sortedhash, table.keys, table.insert, table.tohash
26local setmetatableindex = table.setmetatableindex
27local utfvalues, strip, utfcharacters = string.utfvalues, string.strip, utf.characters
28
29local context = context
30local commands = commands
31local implement = interfaces.implement
32
33local settings_to_array = utilities.parsers.settings_to_array
34local settings_to_set = utilities.parsers.settings_to_set
35
36local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
37local trace_goodies = false trackers.register("languages.goodies", function(v) trace_goodies = v end)
38local trace_applied = false trackers.register("languages.applied", function(v) trace_applied = v end)
39
40local report_initialization = logs.reporter("languages","initialization")
41local report_goodies = logs.reporter("languages","goodies")
42
43local prehyphenchar = language.prehyphenchar
44local posthyphenchar = language.posthyphenchar
45local preexhyphenchar = language.preexhyphenchar
46local postexhyphenchar = language.postexhyphenchar
47
48
49local sethjcode = language.sethjcode
50local currentlanguage = language.current
51
52local uccodes = characters.uccodes
53local lccodes = characters.lccodes
54
55local new_language = language.new
56
57languages = languages or {}
58local languages = languages
59
60languages.version = 1.010
61
62languages.registered = languages.registered or { }
63local registered = languages.registered
64
65languages.associated = languages.associated or { }
66local associated = languages.associated
67
68languages.numbers = languages.numbers or { }
69local numbers = languages.numbers
70
71languages.data = languages.data or { }
72local data = languages.data
73
74storage.register("languages/registered",registered,"languages.registered")
75storage.register("languages/associated",associated,"languages.associated")
76storage.register("languages/numbers", numbers, "languages.numbers")
77storage.register("languages/data", data, "languages.data")
78
79local variables = interfaces.variables
80
81local v_reset = variables.reset
82local v_yes = variables.yes
83
84local nofloaded = 0
85
86local function resolve(tag)
87 local data, instance = registered[tag], nil
88 if data then
89 instance = data.instance
90 if not instance then
91 instance = new_language(data.number)
92 data.instance = instance
93 end
94 end
95 return data, instance
96end
97
98local function tolang(what)
99 if not what then
100 what = currentlanguage()
101 end
102 if type(what) == "userdata" then
103 return what
104 end
105 local tag = numbers[what]
106 local data = tag and registered[tag] or registered[what]
107 if data then
108 local instance = data.instance
109 if not instance then
110 instance = new_language(data.number)
111 data.instance = instance
112 end
113 return instance
114 end
115end
116
117function languages.getdata(tag)
118 if tag then
119 return registered[tag] or registered[numbers[tag]]
120 else
121 return registered[numbers[currentlanguage()]]
122 end
123end
124
125languages.tolang = tolang
126
127
128
129
130local function validdata(loaded,what,tag)
131 local dataset = loaded[what]
132 if dataset then
133 local data = dataset.data
134 if not data or data == "" then
135
136 elseif dataset.compression == "zlib" then
137 data = zlib.decompress(data)
138 if dataset.length and dataset.length ~= #data then
139 report_initialization("compression error in %a for language %a","patterns",what,tag)
140 end
141 return data
142 else
143 return data
144 end
145 end
146end
147
148
149
150
151
152
153
154
155local function sethjcodes(instance,loaded,what,factor)
156 local l = loaded[what]
157 local c = l and l.characters
158 if c then
159 local hjcounts = factor and languages.hjcounts or false
160
161 local h = loaded.codehash
162 if not h then
163 h = { }
164 loaded.codehash = h
165 end
166
167 local function setcode(code)
168 local l = lccodes[code]
169 local u = uccodes[code]
170 local s = l
171 if type(s) ~= "number" then
172 l = code
173 s = code
174 end
175 if hjcounts then
176 local c = hjcounts[s]
177 if c then
178 c = c.count
179 if not c then
180
181 elseif c <= 0 then
182
183 s = 32
184 elseif c >= 31 then
185
186 s = 31
187 else
188
189 s = c
190 end
191 end
192 end
193 sethjcode(instance,l,s)
194 if u ~= l and type(u) == "number" then
195 sethjcode(instance,u,s)
196 h[u] = s
197 end
198 end
199
200 local s = tex.savinghyphcodes
201 tex.savinghyphcodes = 0
202 if type(c) == "table" then
203 if #c > 0 then
204
205 for i=1,#c do
206 local v = c[i]
207 setcode(type(v) == "string" and utfbyte(v) or v)
208 end
209 else
210
211 for k, v in sortedhash(c) do
212 if v then
213 setcode(utfbyte(k))
214 end
215 end
216 end
217 elseif type(c) == "string" then
218 for l in utfvalues(c) do
219 setcode(l)
220 end
221 end
222 tex.savinghyphcodes = s
223 end
224end
225
226local function addhjcodestoinstance(instance,characters)
227 if type(characters) == "table" then
228 local nofcharacters = #characters
229 if nofcharacters > 0 then
230
231 for i=1,nofcharacters do
232 local v = characters[i]
233 local h = type(v) == "string" and utfbyte(v) or v
234 sethjcode(instance,h,h)
235 end
236 else
237
238 for k, v in next, characters do
239 if v then
240 local h = type(k) == "string" and utfbyte(k) or k
241 sethjcode(instance,h,h)
242 end
243 end
244 end
245 elseif type(characters) == "string" then
246 for h in utfvalues(characters) do
247 sethjcode(instance,h,h)
248 end
249 end
250end
251
252
253
254local P, S, R, C, Cs, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cs, lpeg.Ct, lpeg.match, lpeg.patterns
255
256local utfsplit = utf.split
257
258local space = lpegpatterns.space
259local whitespace = lpegpatterns.whitespace^1
260local nospace = lpegpatterns.utf8char - whitespace
261local digit = lpegpatterns.digit
262
263local endofstring = #whitespace
264
265local word = (digit/"")^0 * (digit/"" * endofstring + digit/" " + nospace)^1
266local anyword = (1-whitespace)^1
267local analyze = Ct((whitespace + Cs(word))^1)
268
269local function unique(tag,requested,loaded)
270 local nofloaded = #loaded
271 if nofloaded == 0 then
272 return ""
273 elseif nofloaded == 1 then
274 return loaded[1]
275 else
276 insert(loaded,1," ")
277
278 loaded = concat(loaded," ")
279 local t = lpegmatch(analyze,loaded) or { }
280 local h = { }
281 local b = { }
282 for i=1,#t do
283 local ti = t[i]
284 local hi = h[ti]
285 if not hi then
286 h[ti] = 1
287 elseif hi == 1 then
288 h[ti] = 2
289 b[#b+1] = utfsplit(ti," ")
290 end
291 end
292
293 local nofbad = #b
294 if nofbad > 0 then
295 local word
296 for i=1,nofbad do
297 local bi = b[i]
298 local p = P(bi[1])
299 for i=2,#bi do
300 p = p * digit * P(bi[i])
301 end
302 if word then
303 word = word + p
304 else
305 word = p
306 end
307 report_initialization("language %a, patterns %a, discarding conflict (0-9)%{[0-9]}t(0-9)",tag,requested,bi)
308 end
309 t, h, b = nil, nil, nil
310 local someword = digit^0 * word * digit^0 * endofstring / ""
311
312 local strip = Cs((someword + anyword + whitespace)^1)
313 return lpegmatch(strip,loaded) or loaded
314 else
315 return loaded
316 end
317 end
318end
319
320local shared = false
321
322local function loaddefinitions(tag,specification)
323 statistics.starttiming(languages)
324 local data, instance = resolve(tag)
325 local requested = specification.patterns or ""
326 local definitions = settings_to_array(requested)
327 if #definitions > 0 then
328 if trace_patterns then
329 report_initialization("pattern specification for language %a: %s",tag,specification.patterns)
330 end
331 local ploaded = instance:patterns()
332 local eloaded = instance:hyphenation()
333 if not ploaded or ploaded == "" then
334 ploaded = { }
335 else
336 ploaded = { ploaded }
337 end
338 if not eloaded or eloaded == "" then
339 eloaded = { }
340 else
341 eloaded = { eloaded }
342 end
343 local dataused = data.used
344 local ok = false
345 local resources = data.resources or { }
346 data.resources = resources
347 if not shared then
348 local found = resolvers.findfile("lang-exc.lua")
349 if found then
350 shared = dofile(found)
351 if type(shared) == "table" then
352 shared = concat(shared," ")
353 else
354 shared = true
355 end
356 else
357 shared = true
358 end
359 end
360 for i=1,#definitions do
361 local definition = definitions[i]
362 if definition == "" then
363
364 elseif definition == v_reset then
365 if trace_patterns then
366 report_initialization("clearing patterns for language %a",tag)
367 end
368 instance:clearpatterns()
369 instance:clearhyphenation()
370 ploaded = { }
371 eloaded = { }
372 elseif not dataused[definition] then
373 dataused[definition] = definition
374 local filename = "lang-" .. definition .. ".lua"
375 local fullname = resolvers.findfile(filename) or ""
376 if fullname == "" then
377 fullname = resolvers.findfile(filename .. ".gz") or ""
378 end
379 if fullname ~= "" then
380 if trace_patterns then
381 report_initialization("loading definition %a for language %a from %a",definition,tag,fullname)
382 end
383 local suffix, gzipped = gzip.suffix(fullname)
384 local loaded = table.load(fullname,gzipped and gzip.load)
385 if loaded then
386 ok, nofloaded = true, nofloaded + 1
387 sethjcodes(instance,loaded,"patterns",specification.factor)
388 sethjcodes(instance,loaded,"exceptions",specification.factor)
389 local p = validdata(loaded,"patterns",tag)
390 local e = validdata(loaded,"exceptions",tag)
391 if p and p ~= "" then
392 ploaded[#ploaded+1] = p
393 end
394 if e and e ~= "" then
395 eloaded[#eloaded+1] = e
396 end
397 resources[#resources+1] = loaded
398 else
399 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
400 end
401 elseif trace_patterns then
402 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
403 end
404 elseif trace_patterns then
405 report_initialization("definition %a for language %a already loaded",definition,tag)
406 end
407 end
408 if #ploaded > 0 then
409
410 instance:clearpatterns()
411 instance:patterns(unique(tag,requested,ploaded))
412 end
413 if #eloaded > 0 then
414
415 instance:clearhyphenation()
416 instance:hyphenation(concat(eloaded," "))
417 end
418 if type(shared) == "string" then
419 instance:hyphenation(shared)
420 end
421 return ok
422 elseif trace_patterns then
423 report_initialization("no definitions for language %a",tag)
424 end
425 statistics.stoptiming(languages)
426end
427
428storage.shared.noflanguages = storage.shared.noflanguages or 0
429
430local noflanguages = storage.shared.noflanguages
431
432function languages.define(tag,parent)
433 noflanguages = noflanguages + 1
434 if trace_patterns then
435 report_initialization("assigning number %a to %a",noflanguages,tag)
436 end
437 numbers[noflanguages] = tag
438 numbers[tag] = noflanguages
439 registered[tag] = {
440 tag = tag,
441 parent = parent or "",
442 patterns = "",
443 loaded = false,
444 used = { },
445 dirty = true,
446 number = noflanguages,
447 instance = nil,
448 synonyms = { },
449 }
450 storage.shared.noflanguages = noflanguages
451end
452
453function languages.setsynonym(synonym,tag)
454 local l = registered[tag]
455 if l then
456 l.synonyms[synonym] = true
457 end
458end
459
460function languages.installed(separator)
461 return concat(sortedkeys(registered),separator or ",")
462end
463
464function languages.current(n)
465 return numbers[n and tonumber(n) or currentlanguage()]
466end
467
468function languages.associate(tag,script,language)
469 associated[tag] = { script, language }
470end
471
472function languages.association(tag)
473 if not tag then
474 tag = numbers[currentlanguage()]
475 elseif type(tag) == "number" then
476 tag = numbers[tag]
477 end
478 local lat = tag and associated[tag]
479 if lat then
480 return lat[1], lat[2]
481 end
482end
483
484function languages.loadable(tag,defaultlanguage)
485 local l = registered[tag]
486 if l and resolvers.findfile("lang-"..l.patterns..".lua") then
487 return true
488 else
489 return false
490 end
491end
492
493
494
495
496function languages.unload(tag)
497 local l = registered[tag]
498 if l then
499 l.dirty = true
500 end
501end
502
503
504
505function languages.prehyphenchar (what) return prehyphenchar (tolang(what)) end
506function languages.posthyphenchar (what) return posthyphenchar (tolang(what)) end
507function languages.preexhyphenchar (what) return preexhyphenchar (tolang(what)) end
508function languages.postexhyphenchar(what) return postexhyphenchar(tolang(what)) end
509
510
511
512
513
514
515
516
517
518local invalid = { "{", "}", "(", ")", "-", " " }
519
520local function collecthjcodes(data,str)
521 local found = data.extras and data.extras.characters or { }
522 if type(str) == "string" then
523 for s in utfcharacters(str) do
524 if not found[s] then
525 found[s] = true
526 end
527 end
528 elseif type(str) == "table" then
529 for i=1,#str do
530 local s = str[i]
531 if not found[s] then
532 found[s] = true
533 end
534 end
535 end
536 for i=1,#invalid do
537 local c = invalid[i]
538 if found[c] then
539 found[c] = nil
540 end
541 end
542 data.extras = { characters = found }
543 sethjcodes(data.instance,data,"extras",data.factor)
544end
545
546function languages.loadwords(tag,filename)
547 local data, instance = resolve(tag)
548 if data then
549 statistics.starttiming(languages)
550 local str = io.loaddata(filename) or ""
551 collecthjcodes(data,str)
552 instance:hyphenation(str)
553 statistics.stoptiming(languages)
554 end
555end
556
557
558function languages.setexceptions(tag,str)
559 local data, instance = resolve(tag)
560 if data then
561 str = strip(str)
562 collecthjcodes(data,str)
563 instance:hyphenation(str)
564 end
565end
566
567function languages.setpatterns(tag,str)
568 local data, instance = resolve(tag)
569 if data then
570 str = strip(str)
571 collecthjcodes(data,str)
572 instance:patterns(str)
573 end
574end
575
576local function setwordhandler(tag,action)
577 local data, instance = resolve(tag)
578 if data then
579 instance:setwordhandler(action)
580 end
581end
582
583languages.setwordhandler = setwordhandler
584
585function languages.setoptions(tag,str)
586 languages.addgoodiesdata(tag,{ { words = str } })
587
588 languages.setgoodieshandler { tag = tag, goodies = tag }
589end
590
591function languages.hyphenate(tag,str)
592
593 local data, instance = resolve(tag)
594 if data then
595 return instance:hyphenate(str)
596 else
597 return str
598 end
599end
600
601
602
603
604
605
606
607
608
609local expand ; do
610
611 local nuts = nodes.nuts
612 local nextglyph = nuts.traversers.glyph
613 local setoptions = nuts.setoptions
614
615 local getnext = nuts.getnext
616 local getprev = nuts.getprev
617 local setchar = nuts.setchar
618 local setnext = nuts.setnext
619 local setlink = nuts.setlink
620 local setfield = nuts.setfield
621 local setdisc = nuts.setdisc
622 local getprop = nuts.getprop
623 local setprop = nuts.setprop
624 local setattrlist = nuts.setattrlist
625
626 local new_disc = nuts.pool.disc
627 local new_glyph = nuts.pool.glyph
628 local copy_node = nuts.copy
629 local flushlist = nuts.flushlist
630
631 local glyphoptioncodes = tex.glyphoptioncodes
632
633 local lower = characters.lower
634 local replacer = utf.replacer
635 local utfchartabletopattern = lpeg.utfchartabletopattern
636
637 local report = logs.reporter("languages","goodies")
638
639
640
641 local goodiesdata = setmetatableindex(function(t,k)
642 local v = {
643 properties = { },
644 replacements = { },
645 characters = { },
646 exceptions = { },
647 substitutions = { },
648 }
649 t[k] = v
650 return v
651 end)
652
653
654
655 local compound_disc_code = tex.discoptioncodes.preword | tex.discoptioncodes.postword
656
657 local function setcompound(current,id,first,last,lh,rh,hyphen)
658 local prev = getprev(current)
659
660
661
662 local prechar = prehyphenchar(id)
663 local postchar = posthyphenchar(id)
664 local pre = prechar and copy_node(current)
665 local post = postchar and copy_node(current)
666 local replace = hyphen and prechar and copy_node(current)
667 local disc = new_disc()
668 if pre then
669 setchar(pre,prechar)
670 end
671 if post then
672 setchar(post,postchar)
673 end
674 if replace then
675 setchar(replace,prechar)
676 end
677 setattrlist(disc,current)
678 setoptions(disc,0x3)
679 setdisc(disc,pre,post,replace)
680 setlink(prev,disc,current)
681 if lh then
682 setfield(first,"rhmin",rh)
683 end
684
685 if rh then
686 setfield(current,"lhmin",lh)
687 end
688
689 end
690
691 local setcompounds = setmetatableindex(function(t,l)
692 local v = setmetatableindex(function(t,r)
693 local v = function(current,id,first,last) return setcompound(current,id,first,last,l,r) end
694 t[r] = v
695 return v
696 end)
697 t[l] = v
698 return v
699 end)
700
701 local sethyphens = setmetatableindex(function(t,l)
702 local v = setmetatableindex(function(t,r)
703 local v = function(current,id,first,last) return setcompound(current,id,first,last,l,r,true) end
704 t[r] = v
705 return v
706 end)
707 t[l] = v
708 return v
709 end)
710
711 local function replaceword(first,last,old,new,oldlen)
712 local oldlen = utflength(old)
713 local newlen = utflength(new)
714 if newlen == 0 then
715
716 elseif newlen <= oldlen then
717 for s in utfvalues(new) do
718 setchar(first,s)
719 first = getnext(first)
720 end
721 if newlen < oldlen then
722
723 local after = getnext(last)
724 local before = getprev(first)
725 setnext(last)
726 setlink(before,after)
727 flushlist(first)
728 end
729 else
730 local i = 0
731 local l = getnext(last)
732 for s in utfvalues(new) do
733 i = i + 1
734 if i > oldlen then
735 local g = copy_node(first)
736 setlink(first,g,l)
737 setchar(g,s)
738 first = g
739 elseif i == oldlen then
740 setchar(first,s)
741 else
742 setchar(first,s)
743 first = getnext(first)
744 end
745 end
746 end
747 end
748
749
750
751
752
753
754 local lh, rh = false, false
755
756 local cache = setmetatableindex(function(t,k)
757 local v = 0
758 if k == "compound" then
759 v = setcompounds[lh][rh]
760 elseif k == "hyphen" then
761 v = sethyphens[lh][rh]
762 else
763 v = 0
764 for s in gmatch(k,"%w+") do
765 local o = glyphoptioncodes[s]
766
767 if o then
768 v = v | o
769 end
770 end
771 end
772 t[k] = v
773 return v
774 end)
775
776 local function checkglyphproperties(options)
777
778 for word, list in sortedhash(options) do
779 if type(list) == "string" then
780 options[word] = options[list]
781 else
782 for index, option in sortedhash(list) do
783 if type(option) == "string" then
784 list[index] = cache[option]
785 end
786 end
787 end
788 end
789 end
790
791
792
793
794
795
796 local sequencers = utilities.sequencers
797 local newsequencer = sequencers.new
798 local appendgroup = sequencers.appendgroup
799 local prependaction = sequencers.prependaction
800 local appendaction = sequencers.appendaction
801 local enableaction = sequencers.enableaction
802 local disableaction = sequencers.disableaction
803
804 local template = {
805 arguments = "s",
806 returnvalues = "r,i",
807 results = "r,i",
808 }
809
810 local registeredactions = setmetatableindex ( function(t,tag)
811 local actions = newsequencer(template)
812 appendgroup(actions,"user")
813 t[tag] = actions
814 return actions
815 end )
816
817 languages.registeredactions = registeredactions
818
819 function languages.installhandler(tag,func)
820 local todo = not rawget(registeredactions,tag)
821 local actions = registeredactions[tag]
822 appendaction(actions,"user",func)
823 enableaction(actions,func)
824 report("installing handler %a for language %a",func,tag)
825 if todo then
826 setwordhandler(tag,function(n,original,remapped,length,first,last)
827 local runner = actions.runner
828 if runner then
829 if getprop(first,"replaced") then
830
831 else
832 local r, result = runner(original)
833 if not r or original == r then
834 return result or 0
835 else
836 setprop(first,"replaced",true)
837 replaceword(first,last,original,r,length)
838 return 1
839 end
840 end
841 end
842 return 2
843 end)
844 end
845 end
846
847 local appliedoptions = setmetatableindex("table")
848 languages.appliedoptions = appliedoptions
849
850 languages.setgoodieshandler = function(specification)
851 if type(specification) == "table" then
852 local tag = specification.tag
853 local goodies = specification.goodies or tag
854 local result = specification.result or 2
855 local data = goodiesdata[goodies]
856 local properties = data.properties
857 local replacements = data.replacements
858 local substitutions = data.substitutions
859 local characters = data.characters
860 local exceptions = data.exceptions
861 local replacer = nil
862 local substituter = nil
863 local d, instance = resolve(tag)
864 local done = false
865
866 if type(characters) == "table" and characters and next(characters) then
867 addhjcodestoinstance(instance,characters)
868 if trace_goodies then
869 report_goodies("registering %a characters for %a",goodies,tag)
870 end
871 done = true
872 end
873 if type(properties) == "table" and next(properties) then
874 checkglyphproperties(properties)
875 if trace_goodies then
876 report_goodies("registering %a properties for %a",goodies,tag)
877 end
878 done = true
879 end
880 if type(replacements) == "table" and next(replacements) then
881 replacer = Cs((utfchartabletopattern(replacements) / replacements + 1)^0)
882 if trace_goodies then
883 report_goodies("registering %a replacer for %a",goodies,tag)
884 end
885 done = true
886 end
887 if type(substitutions) == "table" and next(substitutions) then
888 substituter = Cs((utfchartabletopattern(substitutions) / substitutions + 1)^0)
889 if trace_goodies then
890 report_goodies("registering %a substitutor for %a",goodies,tag)
891 end
892 done = true
893 end
894 if type(exceptions) == "table" and next(exceptions) then
895 done = true
896 else
897 exceptions = false
898 end
899 if done then
900 local registered = registeredactions[tag]
901 local applied = appliedoptions[tag]
902 setwordhandler(tag,function(n,original,remapped,length,first,last)
903 local runner = registered.runner
904 if runner then
905 if getprop(first,"replaced") then
906
907 else
908 local r, result = runner(original)
909 if not r then
910 if trace_goodies then
911 report_goodies("kept by runner: %s => %s, result %i",original,remapped, result or 0)
912 end
913 return result or 0
914 elseif original == r then
915 if result then
916 if trace_goodies then
917 report_goodies("kept by runner: %s => %s, result %i",original,remapped, result)
918 end
919 return result
920 else
921 if trace_goodies then
922 report_goodies("kept by runner: %s => %s, continue",original,remapped)
923 end
924 end
925 else
926 if trace_goodies then
927 report_goodies("replaced by runner: %s => %s => %s, restart",original,remapped,r)
928 end
929 setprop(first,"replaced",true)
930 replaceword(first,last,original,r,length)
931 return 1
932 end
933 end
934 end
935 local result = 2
936 local o = properties[remapped]
937 ::again::
938 if o then
939 if trace_goodies then
940 report("properties: %s %s",original,remapped)
941 end
942 if trace_applied then
943 applied[original] = (applied[original] or 0) + 1
944 end
945 local index = 0
946 for g, c in nextglyph, first do
947 index = index + 1
948 local oi = o[index]
949 if oi then
950 if type(oi) == "function" then
951 oi(g,n,first,last)
952 result = 1
953 else
954 setoptions(g,oi)
955 end
956 end
957 if g == last then
958 break
959 end
960 end
961 return result
962 end
963 if replacer then
964
965 if getprop(first,"replaced") then
966
967 else
968 local r = lpegmatch(replacer,original)
969 if original == r then
970 if trace_goodies then
971 report_goodies("kept: %s => %s",original,remapped)
972 end
973 else
974 if trace_goodies then
975 report_goodies("replaced: %s => %s => %s",original,remapped,r)
976 end
977 setprop(first,"replaced",true)
978 replaceword(first,last,original,r,length)
979 result = 1
980 end
981 end
982 return result
983 end
984 if substituter then
985 if getprop(first,"replaced") then
986
987 else
988 local r = lpegmatch(substituter,original)
989 if original == r then
990 if trace_goodies then
991 report_goodies("kept: %s => %s",original,remapped)
992 end
993 else
994 if trace_goodies then
995 report_goodies("substituted: %s => %s => %s",original,remapped,r)
996 end
997 setprop(first,"replaced",true)
998 if not properties[r] then
999 o = expand(r)
1000 properties[original] = o
1001 goto again
1002 end
1003 end
1004 end
1005 end
1006 if exceptions then
1007 local exception = exceptions[original]
1008 if exception then
1009 if trace_goodies then
1010 report_goodies("exception: %s => %s",original,exception)
1011 end
1012 result = exception
1013 else
1014 result = 3
1015 end
1016 return result
1017 end
1018 if trace_goodies then
1019 report_goodies("ignored: %s => %s",original,remapped)
1020 end
1021 return result
1022 end)
1023 elseif trace_goodies then
1024 report_goodies("nothing useable in %a for %a",goodies,tag)
1025 end
1026 else
1027 setwordhandler(tag)
1028 end
1029 end
1030
1031 local norightligature_option = glyphoptioncodes.norightligature
1032 local noleftligature_option = glyphoptioncodes.noleftligature
1033 local norightkern_option = glyphoptioncodes.norightkern
1034 local noleftkern_option = glyphoptioncodes.noleftkern
1035
1036 local function applyaction(oc,v,n)
1037 if oc == "noligature" then
1038 if n > 0 then
1039 local vv = v[n-1]
1040 if vv then
1041 v[n-1] = vv | norightligature_option
1042 else
1043 v[n-1] = norightligature_option
1044 end
1045 end
1046 v[n] = noleftligature_option
1047 elseif oc == "compound" then
1048 if n > 1 then
1049
1050 v[n] = setcompounds[lh][rh]
1051 return true
1052 end
1053 elseif oc == "hyphen" then
1054 if n > 1 then
1055 v[n] = sethyphens[lh][rh]
1056 return true
1057 end
1058 elseif oc == "nokern" then
1059 if n > 0 then
1060 local vv = v[n-1]
1061 if vv then
1062 v[n-1] = vv | norightkern_option
1063 else
1064 v[n-1] = norightkern_option
1065 end
1066 end
1067 v[n] = noleftkern_option
1068 elseif oc == "noleftkern" then
1069 v[n] = noleftkern_option
1070 elseif oc == "norightkern" then
1071 if n > 0 then
1072 local vv = v[n-1]
1073 if vv then
1074 v[n-1] = vv | norightkern_option
1075 else
1076 v[n-1] = norightkern_option
1077 end
1078 end
1079 else
1080 for s in gmatch(oc,"%w+") do
1081 if applyaction(s,v,n) then
1082 return
1083 end
1084 end
1085 end
1086 end
1087
1088
1089
1090
1091
1092
1093
1094
1095 local actions = {
1096 ["|"] = "noligature",
1097 ["="] = "nokern",
1098 ["<"] = "noleftkern",
1099 [">"] = "norightkern",
1100 ["+"] = "compound",
1101 ["-"] = "hyphen",
1102 }
1103
1104 local function analyzed(m,a,t,k)
1105 local v = { }
1106 local n = 1
1107 if m == true then
1108 for c in gmatch(k,".") do
1109 local ac = a[c]
1110 if not ac then
1111 n = n + 1
1112 else
1113 applyaction(ac,v,n)
1114 end
1115 end
1116 elseif type(m) == "number" then
1117 local i = 0
1118 for c in gmatch(k,".") do
1119 local ac = a[c]
1120 if not ac then
1121 n = n + 1
1122 else
1123 i = i + 1
1124 if i == m then
1125 applyaction(ac,v,n)
1126 break
1127 end
1128 end
1129 end
1130 elseif type(m) == "table" then
1131
1132 m = tohash(m)
1133 local i = 0
1134 for c in gmatch(k,".") do
1135 local ac = a[c]
1136 if not ac then
1137 n = n + 1
1138 else
1139 i = i + 1
1140 if m[i] then
1141 applyaction(ac,v,n)
1142 end
1143 end
1144 end
1145 else
1146
1147 end
1148 t[k] = v
1149 return v
1150 end
1151
1152 local cache = setmetatableindex(function(t,m)
1153 local v = setmetatableindex(function(t,a)
1154 local v = setmetatableindex(function(t,k)
1155 return analyzed(m,a,t,k)
1156 end)
1157 t[m] = v
1158 return v
1159 end)
1160 t[m] = v
1161 return v
1162 end)
1163
1164 expand = function(str)
1165 return analyzed(true,actions,{},str)
1166 end
1167
1168
1169
1170 local replace1 = Cs ( ( S("|=<>+-.0123456789")/"" + lpegpatterns.utf8character )^0 )
1171 local replace2 = Cs ( ( S("|=<>+-.0123456789") + lpegpatterns.utf8character/".")^0 )
1172
1173 local function stripped(str)
1174
1175 str = gsub(str,"%-%-[^\n]*\n","")
1176 str = gsub(str,"%%[^\n]*\n","")
1177 str = gsub(str,"%s+"," ")
1178 str = gsub(str,"^%s+","")
1179 str = gsub(str,"%s+$","")
1180 return str
1181 end
1182
1183 local registerexceptions do
1184
1185 local lbrace = P("{")
1186 local rbrace = P("}")
1187 local lbracket = P("[")
1188 local rbracket = P("]")
1189 local lparent = P("(")
1190 local rparent = P(")")
1191 local hyphen = P("-")
1192
1193 local p = Cs ( (
1194 lbrace * ((1-rbrace)^0) * rbrace
1195 * lbrace * ((1-rbrace)^0) * rbrace
1196 * lbrace * C((1-rbrace)^0) * rbrace * (lparent * C((1-rparent)^0) * rparent)^0 / function(a,b) return b or a end
1197 + (lbracket * (1-rbracket)^0 * rbracket) / ""
1198 + hyphen / ""
1199 + lpegpatterns.utf8character
1200 )^0 )
1201
1202 registerexceptions = function(target,str)
1203 local kind = type(str)
1204 if kind == "string" then
1205 for v in gmatch(stripped(str),"%S+") do
1206 local k = lpegmatch(p,v)
1207 if k ~= v then
1208 target[k] = v
1209 end
1210 end
1211 elseif kind == "table" then
1212 local n = #str
1213 if n > 0 then
1214 for i=1,n do
1215 local v = str[i]
1216 local k = lpegmatch(p,v)
1217 if k ~= v then
1218 target[k] = v
1219 end
1220 end
1221 else
1222
1223 for k, v in next, str do
1224 target[k] = v
1225 end
1226 end
1227 end
1228 end
1229
1230 end
1231
1232 function languages.strippedgoodiewords(str)
1233 return lpegmatch(replace1,stripped(str))
1234 end
1235
1236 local splitter = lpeg.tsplitat(" ")
1237
1238 local function addgoodies(tag,list,filename)
1239 local np = 0
1240 local nd = 0
1241 local nw = 0
1242 local nl = #list
1243
1244 local data = goodiesdata[tag]
1245 local properties = data.properties
1246 local replacements = data.replacements
1247 local substitutions = data.substitutions
1248 local characters = data.characters
1249 local exceptions = data.exceptions
1250 if filename then
1251 if not data.goodies then
1252 data.goodies = { }
1253 end
1254 insert(data.goodies,filename)
1255 end
1256
1257 lh = false
1258 rh = false
1259
1260 for i=1,nl do
1261 local l = list[i]
1262 if type(l) == "table" then
1263 local w = l.words
1264 local p = l.patterns
1265 local s = l.substitutions
1266 local c = l.characters
1267 local e = l.exceptions
1268 lh = l.left or false
1269 rh = l.right or false
1270 if c then
1271 for v in utfvalues(c) do
1272 characters[v] = true
1273 end
1274 end
1275 if w then
1276 local prefixes = l.prefixes
1277 local nofprefixes = 0
1278 local suffixes = l.suffixes
1279 local nofsuffixes = 0
1280 if prefixes then
1281 prefixes = lpegmatch(splitter,lower(stripped(prefixes)))
1282 nofprefixes = #prefixes
1283 end
1284 if suffixes then
1285 suffixes = lpegmatch(splitter,lower(stripped(suffixes)))
1286 nofsuffixes = #suffixes
1287 end
1288 w = lower(stripped(w))
1289 if p then
1290 local pattern = Cs((utfchartabletopattern(p) / p + 1)^0)
1291 w = lpegmatch(pattern,w)
1292 np = np + 1
1293 else
1294 nd = nd + 1
1295 end
1296 local m = l.matches
1297 if not m then
1298 m = true
1299 end
1300 local a = l.actions
1301 if a then
1302 setmetatableindex(a,actions)
1303 else
1304 a = actions
1305 end
1306 local cach = cache[m][a]
1307 if nofprefixes > 0 then
1308 if nofsuffixes > 0 then
1309 for wrd in gmatch(w,"%S+") do
1310 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1311 nw = nw + 1
1312 for i=1,nofprefixes do
1313 local tmp = prefixes[i] .. wrd
1314 for i=1,nofsuffixes do
1315 local str = tmp .. suffixes[i]
1316 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1317 nw = nw + 1
1318 end
1319 end
1320 end
1321 else
1322 for wrd in gmatch(w,"%S+") do
1323 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1324 nw = nw + 1
1325 for i=1,nofprefixes do
1326 local str = prefixes[i] .. wrd
1327 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1328 nw = nw + 1
1329 end
1330 end
1331 end
1332 elseif nofsuffixes > 0 then
1333 for wrd in gmatch(w,"%S+") do
1334 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1335 nw = nw + 1
1336 for i=1,nofsuffixes do
1337 local str = wrd .. suffixes[i]
1338 properties[lpegmatch(replace1,str)] = cach[lpegmatch(replace2,str)]
1339 nw = nw + 1
1340 end
1341 end
1342 else
1343 for wrd in gmatch(w,"%S+") do
1344 properties[lpegmatch(replace1,wrd)] = cach[lpegmatch(replace2,wrd)]
1345 nw = nw + 1
1346 end
1347 end
1348 elseif s then
1349 for k, v in next, s do
1350 substitutions[k] = v
1351 end
1352 elseif p then
1353 for k, v in next, p do
1354 replacements[k] = v
1355 end
1356 elseif e then
1357 registerexceptions(exceptions,e)
1358 end
1359 end
1360 end
1361
1362 lh = false
1363 rh = false
1364
1365 return { np = np, nd = nd, nw = nw, nl = nl }
1366 end
1367
1368 function languages.goodiefiles(tag)
1369 local d = goodiesdata[tag]
1370 return d and d.goodies
1371 end
1372
1373 function languages.addgoodiesfile(tag,filename)
1374 local fullname = resolvers.findfile(file.addsuffix(filename,"llg")) or ""
1375 if fullname == "" then
1376 report_goodies("file %a is not found",filename)
1377 else
1378 local list = table.load(fullname)
1379 if not list then
1380 report_goodies("file %a is invalid",fullname)
1381 else
1382 list = list.options
1383 if not list then
1384 report_goodies("file %a has no options",fullname)
1385 else
1386 local ok = addgoodies(tag,list,filename)
1387 report_goodies("tag %a, file %a loaded, %i lists, %i via patterns, %i direct, %i words",
1388 tag,fullname,ok.nl,ok.np,ok.nd,ok.nw)
1389 end
1390 end
1391 end
1392 end
1393
1394 function languages.addgoodiesdata(tag,list)
1395 local ok = addgoodies(tag,list)
1396 report_goodies("tag %a, data loaded, %i lists, %i via patterns, %i direct, %i words",
1397 tag,ok.nl,ok.np,ok.nd,ok.nw)
1398 end
1399
1400end
1401
1402if environment.initex then
1403
1404 function languages.getnumber()
1405 return 0
1406 end
1407
1408else
1409
1410 function languages.getnumber(tag,default,patterns,goodies,factor)
1411 local l = registered[tag]
1412 if l then
1413 if l.dirty then
1414 l.factor = factor == v_yes and true or false
1415 if trace_patterns then
1416 report_initialization("checking patterns for %a with default %a",tag,default)
1417 end
1418
1419 if patterns and patterns ~= "" then
1420 if l.patterns ~= patterns then
1421 l.patterns = patterns
1422 if trace_patterns then
1423 report_initialization("loading patterns for %a using specification %a",tag,patterns)
1424 end
1425 loaddefinitions(tag,l)
1426 else
1427
1428 end
1429 elseif l.patterns == "" then
1430 l.patterns = tag
1431 if trace_patterns then
1432 report_initialization("loading patterns for %a using tag",tag)
1433 end
1434 local ok = loaddefinitions(tag,l)
1435 if not ok and tag ~= default then
1436 l.patterns = default
1437 if trace_patterns then
1438 report_initialization("loading patterns for %a using default",tag)
1439 end
1440 loaddefinitions(tag,l)
1441 end
1442 end
1443 if goodies and goodies ~= "" then
1444 goodies = settings_to_array(goodies)
1445 for i=1,#goodies do
1446 local goodie = goodies[i]
1447
1448 languages.addgoodiesfile(tag,goodie)
1449 end
1450 languages.setgoodieshandler {
1451 tag = tag,
1452 goodies = tag,
1453 }
1454 end
1455 l.loaded = true
1456 l.dirty = false
1457 end
1458 return l.number
1459 else
1460 return 0
1461 end
1462 end
1463
1464 numbers[0] = "null"
1465
1466 registered.null = {
1467 number = 0,
1468 instance = new_language(0),
1469 }
1470
1471end
1472
1473
1474
1475
1476
1477languages.logger = languages.logger or { }
1478
1479function languages.logger.report()
1480 local result, r = { }, 0
1481 for tag, l in sortedhash(registered) do
1482 if l.loaded then
1483 r = r + 1
1484 result[r] = format("%s:%s:%s",tag,l.parent,l.number)
1485 end
1486 end
1487 return r > 0 and concat(result," ") or "none"
1488end
1489
1490
1491
1492languages.associate('en','latn','eng')
1493languages.associate('uk','latn','eng')
1494languages.associate('nl','latn','nld')
1495languages.associate('de','latn','deu')
1496languages.associate('fr','latn','fra')
1497
1498statistics.register("loaded patterns", function()
1499 local result = languages.logger.report()
1500 if result ~= "none" then
1501
1502 return format("%s, load time: %s",result,statistics.elapsedtime(languages))
1503 end
1504end)
1505
1506
1507
1508
1509
1510
1511
1512
1513implement {
1514 name = "languagenumber",
1515 actions = { languages.getnumber, context },
1516 arguments = "5 strings"
1517}
1518
1519implement {
1520 name = "installedlanguages",
1521 actions = { languages.installed, context },
1522}
1523
1524implement {
1525 name = "definelanguage",
1526 actions = languages.define,
1527 arguments = "2 strings"
1528}
1529
1530implement {
1531 name = "setlanguagesynonym",
1532 actions = languages.setsynonym,
1533 arguments = "2 strings"
1534}
1535
1536implement {
1537 name = "unloadlanguage",
1538 actions = languages.unload,
1539 arguments = "string"
1540}
1541
1542implement {
1543 name = "setlanguageexceptions",
1544 actions = languages.setexceptions,
1545 arguments = "2 strings"
1546}
1547
1548implement {
1549 name = "setlanguagepatterns",
1550 actions = languages.setpatterns,
1551 arguments = "2 strings"
1552}
1553
1554implement {
1555 name = "setlanguageoptions",
1556 actions = languages.setoptions,
1557 arguments = "2 strings"
1558}
1559
1560implement {
1561 name = "currentprehyphenchar",
1562 actions = function()
1563 local c = prehyphenchar(tolang())
1564 if c and c > 0 then
1565 context.char(c)
1566 end
1567 end
1568}
1569
1570implement {
1571 name = "currentposthyphenchar",
1572 actions = function()
1573 local c = posthyphenchar(tolang())
1574 if c and c > 0 then
1575 context.char(c)
1576 end
1577 end
1578}
1579 |