lang-def.lua /size: 13 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['lang-def'] = {
2    version   = 1.001,
3    comment   = "companion to lang-ini.mkiv",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7 -- dataonly  = true, -- saves 10K
8}
9
10local rawget = rawget
11local lower = string.lower
12
13languages               = languages or { }
14local languages         = languages
15languages.data          = languages.data or { }
16local data              = languages.data
17
18local allocate          = utilities.storage.allocate
19local setmetatableindex = table.setmetatableindex
20
21-- The specifications are based on an analysis done by Arthur. The
22-- names of tags were changed by Hans. The data is not yet used but
23-- will be some day.
24--
25-- description
26--
27-- The description is only meant as an indication; for example 'no' is
28-- "Norwegian, undetermined" because that's really what it is.
29--
30-- script
31--
32-- This is the 4-letter script tag according to ISO 15924, the
33-- official standard.
34--
35-- bibliographical and terminological
36--
37-- Then we have *two* ISO-639 3-letter tags: one is supposed to be used
38-- for "bibliographical" purposes, the other for "terminological".  The
39-- first one is quite special (and mostly used in American libraries),
40-- and the more interesting one is the other (apparently it's that one
41-- we find everywhere).
42--
43-- context
44--
45-- These are the ones used in ConteXt. Kind of numberplate ones.
46--
47-- opentype
48--
49-- This is the 3-letter OpenType language tag, obviously.
50--
51-- variant
52--
53-- This is actually the rfc4646: an extension of ISO-639 that also defines
54-- codes for variants like de-1901 for "German, 1901 orthography" or zh-Hans for
55-- "Chinese, simplified characters" ('Hans' is the ISO-15924 tag for
56-- "HAN ideographs, Simplified" :-)  As I said yesterday, I think this
57-- should be the reference since it's exactly what we want: it's really
58-- standard (it's a RFC) and it's more than simply languages.  To my
59-- knowledge this is the only system that addresses this issue.
60--
61-- Warning: it's not unique!  Because we have two "German" languages
62-- (and could, potentially, have two Chinese, etc.)
63--
64-- Beware: the abbreviations are lowercased, which makes it more
65-- convenient to use them.
66--
67-- todo: add default features
68
69local specifications = allocate {
70    {
71        ["description"] = "Dutch",
72        ["script"] = "latn",
73     -- ["bibliographical"] = "nld",
74     -- ["terminological"] = "nld",
75        ["context"] = "nl",
76        ["opentype"] = "nld",
77        ["variant"] = "nl",
78    },
79    {
80        ["description"] = "Basque",
81        ["script"] = "latn",
82        ["bibliographical"] = "baq",
83        ["terminological"] = "eus",
84        ["context"] = "ba",
85        ["opentype"] = "euq",
86        ["variant"] = "eu",
87    },
88    {
89        ["description"] = "Welsh",
90        ["script"] = "latn",
91        ["bibliographical"] = "wel",
92        ["terminological"] = "cym",
93        ["context"] = "cy",
94        ["opentype"] = "wel",
95        ["variant"] = "cy",
96    },
97    {
98        ["description"] = "Icelandic",
99        ["script"] = "latn",
100        ["bibliographical"] = "ice",
101        ["terminological"] = "isl",
102        ["context"] = "is",
103        ["opentype"] = "isl",
104        ["variant"] = "is",
105    },
106    {
107        ["description"] = "Norwegian, undetermined",
108        ["script"] = "latn",
109        ["bibliographical"] = "nor",
110        ["terminological"] = "nor",
111        ["context"] = "no",
112        ["variant"] = "no",
113    },
114    {
115        ["description"] = "Norwegian bokmal",
116        ["script"] = "latn",
117        ["bibliographical"] = "nob",
118        ["terminological"] = "nob",
119        ["opentype"] = "nor", -- not sure!
120        ["variant"] = "nb",
121    },
122    {
123        ["description"] = "Norwegian nynorsk",
124        ["script"] = "latn",
125        ["bibliographical"] = "nno",
126        ["terminological"] = "nno",
127        ["opentype"] = "nny",
128        ["variant"] = "nn",
129    },
130    {
131        ["description"] = "Ancient Greek",
132        ["script"] = "grek",
133        ["bibliographical"] = "grc",
134        ["terminological"] = "grc",
135        ["context"] = "agr",
136        ["variant"] = "grc",
137    },
138    {
139        ["description"] = "German, 1901 orthography",
140        ["script"] = "latn",
141        ["terminological"] = "deu",
142        ["context"] = "deo",
143        ["opentype"] = "deu",
144        ["variant"] = "de-1901",
145    },
146    {
147        ["description"] = "German, 1996 orthography",
148        ["script"] = "latn",
149        ["bibliographical"] = "ger",
150        ["terminological"] = "deu",
151        ["context"] = "de",
152        ["opentype"] = "deu",
153        ["variant"] = "de-1996",
154    },
155    {
156        ["description"] = "Afrikaans",
157        ["script"] = "latn",
158        ["bibliographical"] = "afr",
159        ["terminological"] = "afr",
160        ["context"] = "af",
161        ["opentype"] = "afk",
162        ["variant"] = "af",
163    },
164    {
165        ["description"] = "Catalan",
166        ["script"] = "latn",
167        ["bibliographical"] = "cat",
168        ["terminological"] = "cat",
169        ["context"] = "ca",
170        ["opentype"] = "cat",
171        ["variant"] = "ca",
172    },
173    {
174        ["description"] = "Czech",
175        ["script"] = "latn",
176        ["bibliographical"] = "cze",
177        ["terminological"] = "ces",
178        ["context"] = "cz",
179        ["opentype"] = "csy",
180        ["variant"] = "cs",
181    },
182    {
183        ["description"] = "Greek",
184        ["script"] = "grek",
185        ["bibliographical"] = "gre",
186        ["terminological"] = "ell",
187        ["context"] = "gr",
188        ["opentype"] = "ell",
189        ["variant"] = "el",
190    },
191    {
192        ["description"] = "American English",
193        ["script"] = "latn",
194        ["bibliographical"] = "eng",
195        ["terminological"] = "eng",
196        ["context"] = "us",
197        ["opentype"] = "eng",
198        ["variant"] = "en-US",
199    },
200    {
201        ["description"] = "British English",
202        ["script"] = "latn",
203        ["bibliographical"] = "eng",
204        ["terminological"] = "eng",
205        ["context"] = "uk",
206        ["opentype"] = "eng",
207        ["variant"] = "en-UK", -- Could be en-GB as well ...
208    },
209    {
210        ["description"] = "Spanish",
211        ["script"] = "latn",
212        ["bibliographical"] = "spa",
213        ["terminological"] = "spa",
214        ["context"] = "es",
215        ["opentype"] = "esp",
216        ["variant"] = "es",
217    },
218    {
219        ["description"] = "Finnish",
220        ["script"] = "latn",
221        ["bibliographical"] = "fin",
222        ["terminological"] = "fin",
223        ["context"] = "fi",
224        ["opentype"] = "fin",
225        ["variant"] = "fi",
226    },
227    {
228        ["description"] = "Estonian",
229        ["script"] = "latn",
230        ["bibliographical"] = "est",
231        ["terminological"] = "est",
232        ["context"] = "et",
233        ["opentype"] = "est",
234        ["variant"] = "et",
235    },
236    {
237        ["description"] = "French",
238        ["script"] = "latn",
239        ["bibliographical"] = "fre",
240        ["terminological"] = "fra",
241        ["context"] = "fr",
242        ["opentype"] = "fra",
243        ["variant"] = "fr",
244    },
245    {
246        ["description"] = "Croatian",
247        ["script"] = "latn",
248        ["bibliographical"] = "scr",
249        ["terminological"] = "hrv",
250        ["context"] = "hr",
251        ["opentype"] = "hrv",
252        ["variant"] = "hr",
253    },
254    {
255        ["description"] = "Hungarian",
256        ["script"] = "latn",
257        ["bibliographical"] = "hun",
258        ["terminological"] = "hun",
259        ["context"] = "hu",
260        ["opentype"] = "hun",
261        ["variant"] = "hu",
262    },
263    {
264        ["description"] = "Italian",
265        ["script"] = "latn",
266        ["bibliographical"] = "ita",
267        ["terminological"] = "ita",
268        ["context"] = "it",
269        ["opentype"] = "ita",
270        ["variant"] = "it",
271    },
272    {
273        ["description"] = "Japanese",
274        ["script"] = "jpan",
275        ["bibliographical"] = "jpn",
276        ["terminological"] = "jpn",
277        ["context"] = "ja",
278        ["opentype"] = "jan",
279        ["variant"] = "ja",
280    },
281    {
282        ["description"] = "Latin",
283        ["script"] = "latn",
284        ["bibliographical"] = "lat",
285        ["terminological"] = "lat",
286        ["context"] = "la",
287        ["opentype"] = "lat",
288        ["variant"] = "la",
289    },
290    {
291        ["description"] = "Portuguese",
292        ["script"] = "latn",
293        ["bibliographical"] = "por",
294        ["terminological"] = "por",
295        ["context"] = "pt",
296        ["opentype"] = "ptg",
297        ["variant"] = "pt",
298    },
299    {
300        ["description"] = "Polish",
301        ["script"] = "latn",
302        ["bibliographical"] = "pol",
303        ["terminological"] = "pol",
304        ["context"] = "pl",
305        ["opentype"] = "plk",
306        ["variant"] = "pl",
307    },
308    {
309        ["description"] = "Romanian",
310        ["script"] = "latn",
311        ["bibliographical"] = "rum",
312        ["terminological"] = "ron",
313        ["context"] = "ro",
314        ["opentype"] = "rom",
315        ["variant"] = "ro",
316    },
317    {
318        ["description"] = "Russian",
319        ["script"] = "cyrl",
320        ["bibliographical"] = "rus",
321        ["terminological"] = "rus",
322        ["context"] = "ru",
323        ["opentype"] = "rus",
324        ["variant"] = "ru",
325    },
326    {
327        ["description"] = "Slovak",
328        ["script"] = "latn",
329        ["bibliographical"] = "slo",
330        ["terminological"] = "slk",
331        ["context"] = "sk",
332        ["opentype"] = "sky",
333        ["variant"] = "sk",
334    },
335    {
336        ["description"] = "Slovenian",
337        ["script"] = "latn",
338        ["bibliographical"] = "slv",
339        ["terminological"] = "slv",
340        ["context"] = "sl",
341        ["opentype"] = "slv",
342        ["variant"] = "sl",
343    },
344    {
345        ["description"] = "Swedish",
346        ["script"] = "latn",
347        ["bibliographical"] = "swe",
348        ["terminological"] = "swe",
349        ["context"] = "sv",
350        ["opentype"] = "sve",
351        ["variant"] = "sv",
352    },
353    {
354        ["description"] = "Thai",
355        ["script"] = "thai",
356     -- ["bibliographical"] = "",
357     -- ["terminological"] = "",
358        ["context"] = "th",
359        ["opentype"] = "tha",
360     -- ["variant"] = "",
361    },
362    {
363        ["description"] = "Turkish",
364        ["script"] = "latn",
365        ["bibliographical"] = "tur",
366        ["terminological"] = "tur",
367        ["context"] = "tr",
368        ["opentype"] = "trk",
369        ["variant"] = "tr",
370    },
371    {
372        ["description"] = "Vietnamese",
373        ["script"] = "latn",
374        ["bibliographical"] = "vie",
375        ["terminological"] = "vie",
376        ["context"] = "vn",
377        ["opentype"] = "vit",
378        ["variant"] = "vi",
379    },
380    {
381        ["description"] = "Chinese, simplified",
382        ["script"] = "hans",
383        ["opentypescript"] = "hani",
384        ["bibliographical"] = "chi",
385        ["terminological"] = "zho",
386        ["context"] = "cn",
387        ["opentype"] = "zhs",
388        ["variant"] = "zh-hans",
389    },
390}
391
392data.specifications = specifications
393
394local variants        = { }   data.variants        = variants
395local contexts        = { }   data.contexts        = contexts
396local records         = { }   data.records         = records
397local scripts         = { }   data.scripts         = scripts
398local opentypes       = { }   data.opentypes       = opentypes
399local opentypescripts = { }   data.opentypescripts = opentypescripts
400
401for k=1,#specifications do
402    local specification = specifications[k]
403    local variant = specification.variant
404    if variant then
405        variants[lower(variant)] = specification
406    end
407    local opentype = specification.opentype
408    if opentype then
409        opentypes[lower(opentype)] = specification
410    end
411    local script = specification.script
412    if script then
413        scripts[lower(script)] = specification
414    end
415    local opentypescript = specification.opentypescript
416    if opentypescript then
417        opentypescripts[lower(opentypescript)] = specification
418    end
419    local context = context
420    if context then
421        if type(context) == "table" then
422            for k=1,#context do
423                contexts[context[k]] = specification
424            end
425        else
426            contexts[context] = specification
427        end
428    end
429end
430
431local defaultvariant = variants["en-us"]
432
433local function get(k,key)
434    local v = rawget(variants,k) or rawget(opentypes,k) or rawget(contexts,k)
435    return v and v[key]
436end
437
438setmetatableindex(variants, function(t,k)
439    k = lower(k)
440    local v = get(k,"language") or defaultvariant.language
441    t[k] = v
442    return v
443end)
444
445setmetatableindex(opentypes, function(t,k)
446    k = lower(k)
447    local v = get(k,"opentype") or "dflt"
448    t[k] = v
449    return v
450end)
451
452setmetatableindex(opentypescripts, function(t,k)
453    k = lower(k)
454    local v = get(k,"opentypescript") or get(k,"script") or defaultvariant.opentypescript or defaultvariant.script
455    t[k] = v
456    return v
457end)
458
459setmetatableindex(contexts, function(t,k)
460    k = lower(str)
461    local v = get(k,"context") or defaultvariant.context
462    v = type(v) == "table" and v[1] or v
463    t[k] = v
464    return v
465end)
466
467setmetatableindex(records, function(t,k) -- how useful is this one?
468    k = lower(k)
469    local v = get(k) or defaultvariant
470    t[k] = v
471    return v
472end)
473
474-- print(opentypes.nl,opentypescripts.nl)
475-- print(opentypes.de,opentypescripts.de)
476