1if not modules then modules = { } end modules ['lang-ini'] = {
2 version = 1.001,
3 comment = "companion to lang-ini.mkiv",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files"
7}
8
9
10
11
12
13
14
15
16
17
18
19
20
21local type, tonumber, next = type, tonumber, next
22local utfbyte = utf.byte
23local format, gsub, gmatch, find = string.format, string.gsub, string.gmatch, string.find
24local concat, sortedkeys, sortedhash, keys, insert = table.concat, table.sortedkeys, table.sortedhash, table.keys, table.insert
25local utfvalues, strip, utfcharacters = string.utfvalues, string.strip, utf.characters
26
27local context = context
28local commands = commands
29local implement = interfaces.implement
30
31local settings_to_array = utilities.parsers.settings_to_array
32local settings_to_set = utilities.parsers.settings_to_set
33
34local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
35
36local report_initialization = logs.reporter("languages","initialization")
37
38local lang = lang
39language = lang
40
41local prehyphenchar = language.prehyphenchar
42local posthyphenchar = language.posthyphenchar
43local preexhyphenchar = language.preexhyphenchar
44local postexhyphenchar = language.postexhyphenchar
45
46
47local sethjcode = language.sethjcode
48
49local uccodes = characters.uccodes
50local lccodes = characters.lccodes
51
52local new_language = language.new
53
54languages = languages or {}
55local languages = languages
56
57languages.version = 1.010
58
59languages.registered = languages.registered or { }
60local registered = languages.registered
61
62languages.associated = languages.associated or { }
63local associated = languages.associated
64
65languages.numbers = languages.numbers or { }
66local numbers = languages.numbers
67
68languages.data = languages.data or { }
69local data = languages.data
70
71storage.register("languages/registered",registered,"languages.registered")
72storage.register("languages/associated",associated,"languages.associated")
73storage.register("languages/numbers", numbers, "languages.numbers")
74storage.register("languages/data", data, "languages.data")
75
76local variables = interfaces.variables
77
78local v_reset = variables.reset
79local v_yes = variables.yes
80
81local nofloaded = 0
82
83local function resolve(tag)
84 local data, instance = registered[tag], nil
85 if data then
86 instance = data.instance
87 if not instance then
88 instance = new_language(data.number)
89 data.instance = instance
90 end
91 end
92 return data, instance
93end
94
95local function tolang(what)
96 if not what then
97 what = tex.language
98 end
99 local tag = numbers[what]
100 local data = tag and registered[tag] or registered[what]
101 if data then
102 local instance = data.lang
103 if not instance then
104 instance = new_language(data.number)
105 data.instance = instance
106 end
107 return instance
108 end
109end
110
111function languages.getdata(tag)
112 if tag then
113 return registered[tag] or registered[numbers[tag]]
114 else
115 return registered[numbers[tex.language]]
116 end
117end
118
119
120
121
122
123
124local function validdata(loaded,what,tag)
125 local dataset = loaded[what]
126 if dataset then
127 local data = dataset.data
128 if not data or data == "" then
129
130 elseif dataset.compression == "zlib" then
131 data = zlib.decompress(data)
132 if dataset.length and dataset.length ~= #data then
133 report_initialization("compression error in %a for language %a","patterns",what,tag)
134 end
135 return data
136 else
137 return data
138 end
139 end
140end
141
142
143
144
145
146
147
148
149local function sethjcodes(instance,loaded,what,factor)
150 local l = loaded[what]
151 local c = l and l.characters
152 if c then
153 local hjcounts = factor and languages.hjcounts or false
154
155 local h = loaded.codehash
156 if not h then
157 h = { }
158 loaded.codehash = h
159 end
160
161 local function setcode(code)
162 local l = lccodes[code]
163 local u = uccodes[code]
164 local s = l
165 if type(s) ~= "number" then
166 l = code
167 s = code
168 end
169 if hjcounts then
170 local c = hjcounts[s]
171 if c then
172 c = c.count
173 if not c then
174
175 elseif c <= 0 then
176
177 s = 32
178 elseif c >= 31 then
179
180 s = 31
181 else
182
183 s = c
184 end
185 end
186 end
187 sethjcode(instance,l,s)
188 h[l] = s
189 if u ~= l and type(u) == "number" then
190 sethjcode(instance,u,s)
191 h[u] = s
192 end
193 end
194
195 local s = tex.savinghyphcodes
196 tex.savinghyphcodes = 0
197 if type(c) == "table" then
198 for l in sortedhash(c) do
199 setcode(utfbyte(l))
200 end
201 else
202 for l in utfvalues(c) do
203 setcode(l)
204 end
205 end
206 tex.savinghyphcodes = s
207 end
208end
209
210
211
212local P, R, Cs, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.match, lpeg.patterns
213
214local utfsplit = utf.split
215
216local space = lpegpatterns.space
217local whitespace = lpegpatterns.whitespace^1
218local nospace = lpegpatterns.utf8char - whitespace
219local digit = lpegpatterns.digit
220
221local endofstring = #whitespace
222
223local word = (digit/"")^0 * (digit/"" * endofstring + digit/" " + nospace)^1
224local anyword = (1-whitespace)^1
225local analyze = Ct((whitespace + Cs(word))^1)
226
227local function unique(tag,requested,loaded)
228 local nofloaded = #loaded
229 if nofloaded == 0 then
230 return ""
231 elseif nofloaded == 1 then
232 return loaded[1]
233 else
234 insert(loaded,1," ")
235
236 loaded = concat(loaded," ")
237 local t = lpegmatch(analyze,loaded) or { }
238 local h = { }
239 local b = { }
240 for i=1,#t do
241 local ti = t[i]
242 local hi = h[ti]
243 if not hi then
244 h[ti] = 1
245 elseif hi == 1 then
246 h[ti] = 2
247 b[#b+1] = utfsplit(ti," ")
248 end
249 end
250
251 local nofbad = #b
252 if nofbad > 0 then
253 local word
254 for i=1,nofbad do
255 local bi = b[i]
256 local p = P(bi[1])
257 for i=2,#bi do
258 p = p * digit * P(bi[i])
259 end
260 if word then
261 word = word + p
262 else
263 word = p
264 end
265 report_initialization("language %a, patterns %a, discarding conflict (0-9)%{[0-9]}t(0-9)",tag,requested,bi)
266 end
267 t, h, b = nil, nil, nil
268 local someword = digit^0 * word * digit^0 * endofstring / ""
269
270 local strip = Cs((someword + anyword + whitespace)^1)
271 return lpegmatch(strip,loaded) or loaded
272 else
273 return loaded
274 end
275 end
276end
277
278local shared = false
279
280local function loaddefinitions(tag,specification)
281 statistics.starttiming(languages)
282 local data, instance = resolve(tag)
283 local requested = specification.patterns or ""
284 local definitions = settings_to_array(requested)
285 if #definitions > 0 then
286 if trace_patterns then
287 report_initialization("pattern specification for language %a: %s",tag,specification.patterns)
288 end
289 local ploaded = instance:patterns()
290 local eloaded = instance:hyphenation()
291 if not ploaded or ploaded == "" then
292 ploaded = { }
293 else
294 ploaded = { ploaded }
295 end
296 if not eloaded or eloaded == "" then
297 eloaded = { }
298 else
299 eloaded = { eloaded }
300 end
301 local dataused = data.used
302 local ok = false
303 local resources = data.resources or { }
304 data.resources = resources
305 if not shared then
306 local found = resolvers.findfile("lang-exc.lua")
307 if found then
308 shared = dofile(found)
309 if type(shared) == "table" then
310 shared = concat(shared," ")
311 else
312 shared = true
313 end
314 else
315 shared = true
316 end
317 end
318 for i=1,#definitions do
319 local definition = definitions[i]
320 if definition == "" then
321
322 elseif definition == v_reset then
323 if trace_patterns then
324 report_initialization("clearing patterns for language %a",tag)
325 end
326 instance:clear_patterns()
327 instance:clear_hyphenation()
328 ploaded = { }
329 eloaded = { }
330 elseif not dataused[definition] then
331 dataused[definition] = definition
332 local filename = "lang-" .. definition .. ".lua"
333 local fullname = resolvers.findfile(filename) or ""
334 if fullname == "" then
335 fullname = resolvers.findfile(filename .. ".gz") or ""
336 end
337 if fullname ~= "" then
338 if trace_patterns then
339 report_initialization("loading definition %a for language %a from %a",definition,tag,fullname)
340 end
341 local suffix, gzipped = gzip.suffix(fullname)
342 local loaded = table.load(fullname,gzipped and gzip.load)
343 if loaded then
344 ok, nofloaded = true, nofloaded + 1
345 sethjcodes(instance,loaded,"patterns",specification.factor)
346 sethjcodes(instance,loaded,"exceptions",specification.factor)
347 local p = validdata(loaded,"patterns",tag)
348 local e = validdata(loaded,"exceptions",tag)
349 if p and p ~= "" then
350 ploaded[#ploaded+1] = p
351 end
352 if e and e ~= "" then
353 eloaded[#eloaded+1] = e
354 end
355 resources[#resources+1] = loaded
356 else
357 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
358 end
359 elseif trace_patterns then
360 report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
361 end
362 elseif trace_patterns then
363 report_initialization("definition %a for language %a already loaded",definition,tag)
364 end
365 end
366 if #ploaded > 0 then
367
368 instance:clear_patterns()
369 instance:patterns(unique(tag,requested,ploaded))
370 end
371 if #eloaded > 0 then
372
373 instance:clear_hyphenation()
374 instance:hyphenation(concat(eloaded," "))
375 end
376 if type(shared) == "string" then
377 instance:hyphenation(shared)
378 end
379 return ok
380 elseif trace_patterns then
381 report_initialization("no definitions for language %a",tag)
382 end
383 statistics.stoptiming(languages)
384end
385
386storage.shared.noflanguages = storage.shared.noflanguages or 0
387
388local noflanguages = storage.shared.noflanguages
389
390function languages.define(tag,parent)
391 noflanguages = noflanguages + 1
392 if trace_patterns then
393 report_initialization("assigning number %a to %a",noflanguages,tag)
394 end
395 numbers[noflanguages] = tag
396 registered[tag] = {
397 tag = tag,
398 parent = parent or "",
399 patterns = "",
400 loaded = false,
401 used = { },
402 dirty = true,
403 number = noflanguages,
404 instance = nil,
405 synonyms = { },
406 }
407 storage.shared.noflanguages = noflanguages
408end
409
410function languages.setsynonym(synonym,tag)
411 local l = registered[tag]
412 if l then
413 l.synonyms[synonym] = true
414 end
415end
416
417function languages.installed(separator)
418 return concat(sortedkeys(registered),separator or ",")
419end
420
421function languages.current(n)
422 return numbers[n and tonumber(n) or tex.language]
423end
424
425function languages.associate(tag,script,language)
426 associated[tag] = { script, language }
427end
428
429function languages.association(tag)
430 if not tag then
431 tag = numbers[tex.language]
432 elseif type(tag) == "number" then
433 tag = numbers[tag]
434 end
435 local lat = tag and associated[tag]
436 if lat then
437 return lat[1], lat[2]
438 end
439end
440
441function languages.loadable(tag,defaultlanguage)
442 local l = registered[tag]
443 if l and resolvers.findfile("lang-"..l.patterns..".lua") then
444 return true
445 else
446 return false
447 end
448end
449
450
451
452
453function languages.unload(tag)
454 local l = registered[tag]
455 if l then
456 l.dirty = true
457 end
458end
459
460if environment.initex then
461
462 function languages.getnumber()
463 return 0
464 end
465
466else
467
468 function languages.getnumber(tag,default,patterns,factor)
469 local l = registered[tag]
470 if l then
471 if l.dirty then
472 l.factor = factor == v_yes and true or false
473 if trace_patterns then
474 report_initialization("checking patterns for %a with default %a",tag,default)
475 end
476
477 if patterns and patterns ~= "" then
478 if l.patterns ~= patterns then
479 l.patterns = patterns
480 if trace_patterns then
481 report_initialization("loading patterns for %a using specification %a",tag,patterns)
482 end
483 loaddefinitions(tag,l)
484 else
485
486 end
487 elseif l.patterns == "" then
488 l.patterns = tag
489 if trace_patterns then
490 report_initialization("loading patterns for %a using tag",tag)
491 end
492 local ok = loaddefinitions(tag,l)
493 if not ok and tag ~= default then
494 l.patterns = default
495 if trace_patterns then
496 report_initialization("loading patterns for %a using default",tag)
497 end
498 loaddefinitions(tag,l)
499 end
500 end
501 l.loaded = true
502 l.dirty = false
503 end
504 return l.number
505 else
506 return 0
507 end
508 end
509end
510
511
512
513function languages.prehyphenchar (what) return prehyphenchar (tolang(what)) end
514function languages.posthyphenchar (what) return posthyphenchar (tolang(what)) end
515function languages.preexhyphenchar (what) return preexhyphenchar (tolang(what)) end
516function languages.postexhyphenchar(what) return postexhyphenchar(tolang(what)) end
517
518
519
520
521
522
523
524
525
526local invalid = { "{", "}", "-" }
527
528local function collecthjcodes(data,str)
529 local found = data.extras and data.extras.characters or { }
530 for s in utfcharacters(str) do
531 if not found[s] then
532 found[s] = true
533 end
534 end
535 for i=1,#invalid do
536 local c = invalid[i]
537 if found[c] then
538 found[c] = nil
539 end
540 end
541 data.extras = { characters = found }
542 sethjcodes(data.instance,data,"extras",data.factor)
543end
544
545function languages.loadwords(tag,filename)
546 local data, instance = resolve(tag)
547 if data then
548 statistics.starttiming(languages)
549 local str = io.loaddata(filename) or ""
550 collecthjcodes(data,str)
551 instance:hyphenation(str)
552 statistics.stoptiming(languages)
553 end
554end
555
556
557function languages.setexceptions(tag,str)
558 local data, instance = resolve(tag)
559 if data then
560 str = strip(str)
561 collecthjcodes(data,str)
562 instance:hyphenation(str)
563 end
564end
565
566function languages.hyphenate(tag,str)
567
568 local data, instance = resolve(tag)
569 if data then
570 return instance:hyphenate(str)
571 else
572 return str
573 end
574end
575
576
577
578
579
580languages.logger = languages.logger or { }
581
582function languages.logger.report()
583 local result, r = { }, 0
584 for tag, l in sortedhash(registered) do
585 if l.loaded then
586 r = r + 1
587 result[r] = format("%s:%s:%s",tag,l.parent,l.number)
588 end
589 end
590 return r > 0 and concat(result," ") or "none"
591end
592
593
594
595languages.associate('en','latn','eng')
596languages.associate('uk','latn','eng')
597languages.associate('nl','latn','nld')
598languages.associate('de','latn','deu')
599languages.associate('fr','latn','fra')
600
601statistics.register("loaded patterns", function()
602 local result = languages.logger.report()
603 if result ~= "none" then
604
605 return format("%s, load time: %s",result,statistics.elapsedtime(languages))
606 end
607end)
608
609
610
611
612
613
614
615
616implement {
617 name = "languagenumber",
618 actions = { languages.getnumber, context },
619 arguments = "4 strings"
620}
621
622implement {
623 name = "installedlanguages",
624 actions = { languages.installed, context },
625}
626
627implement {
628 name = "definelanguage",
629 actions = languages.define,
630 arguments = "2 strings"
631}
632
633implement {
634 name = "setlanguagesynonym",
635 actions = languages.setsynonym,
636 arguments = "2 strings"
637}
638
639implement {
640 name = "unloadlanguage",
641 actions = languages.unload,
642 arguments = "string"
643}
644
645implement {
646 name = "setlanguageexceptions",
647 actions = languages.setexceptions,
648 arguments = "2 strings"
649}
650
651implement {
652 name = "currentprehyphenchar",
653 actions = function()
654 local c = prehyphenchar(tolang())
655 if c and c > 0 then
656 context.char(c)
657 end
658 end
659}
660
661implement {
662 name = "currentposthyphenchar",
663 actions = function()
664 local c = posthyphenchar(tolang())
665 if c and c > 0 then
666 context.char(c)
667 end
668 end
669}
670 |