mtx-babel.lua /size: 10 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['mtx-babel'] = {
2    version   = 1.002,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- data tables by Thomas A. Schmitz
10
11local helpinfo = [[
12<?xml version="1.0"?>
13<application>
14 <metadata>
15  <entry name="name">mtx-babel</entry>
16  <entry name="detail">Babel Input To UTF Conversion</entry>
17  <entry name="version">1.20</entry>
18 </metadata>
19 <flags>
20  <category name="basic">
21   <subcategory>
22    <flag name="language" value="string"><short>conversion language (e.g. greek)</short></flag>
23    <flag name="structure" value="string"><short>obey given structure (e.g. 'document', default: 'context')</short></flag>
24    <flag name="convert"><short>convert babel codes into utf</short></flag>
25   </subcategory>
26  </category>
27 </flags>
28</application>
29]]
30
31local application = logs.application {
32    name     = "mtx-babel",
33    banner   = "Babel Input To UTF Conversion 1.20",
34    helpinfo = helpinfo,
35}
36
37local report = application.report
38
39scripts       = scripts       or { }
40scripts.babel = scripts.babel or { }
41
42do
43
44    local converters = { }
45
46    -- greek
47
48    local replace_01 = { -- <' * |
49        a = "",
50        h = "",
51        w = "",
52    }
53
54    local replace_02 = { -- >' * |
55        a = "",
56        h = "",
57        w = "",
58    }
59
60    local replace_03 = { -- <` * |
61        a = "",
62        h = "",
63        w = "",
64    }
65
66    local replace_04 = { -- >` * |
67        a = "",
68        h = "",
69        w = "",
70    }
71
72    local replace_05 = { -- <~ * |
73        a = "",
74        h = "",
75        w = "",
76    }
77
78    local replace_06 = { -- >~ * |
79        a = "",
80        h = "",
81        w = ""
82    }
83
84    local replace_07 = { -- "' *
85        i = "",
86        u = "",
87    }
88
89    local replace_08 = { -- "` *
90        i = "",
91        u = "",
92    }
93
94    local replace_09 = { -- "~ *
95        i = "",
96        u = "",
97    }
98
99    local replace_10 = { -- <' *
100        a = "",
101        e = "",
102        h = "",
103        i = "",
104        o = "",
105        u = "",
106        w = "",
107        A = "",
108        E = "",
109        H = "",
110        I = "",
111        O = "",
112        U = "",
113        W = "",
114    }
115
116    local replace_11 = { -- >' *
117        a = "",
118        e = "",
119        h = "",
120        i = "",
121        o = "",
122        u = "",
123        w = "",
124        A = "",
125        E = "",
126        H = "",
127        I = "",
128        O = "",
129        U = "῎Υ",
130        W = "",
131    }
132
133    local replace_12 = { -- <` *
134        a = "",
135        e = "",
136        h = "",
137        i = "",
138        o = "",
139        u = "",
140        w = "",
141        A = "",
142        E = "",
143        H = "",
144        I = "",
145        O = "",
146        U = "",
147        W = "",
148    }
149
150    local replace_13 = { -- >` *
151        a = "",
152        e = "",
153        h = "",
154        i = "",
155        o = "",
156        u = "",
157        w = "",
158        A = "",
159        E = "",
160        H = "",
161        I = "",
162        O = "",
163        U = "῍Υ",
164        W = "",
165    }
166
167    local replace_14 = { -- <~ *
168        a = "",
169        h = "",
170        i = "",
171        u = "",
172        w = "",
173        A = "",
174        H = "",
175        I = "Ἷ",
176        U = "",
177        W = "",
178    }
179
180    local replace_15 = { -- >~ *
181        a = "",
182        h = "",
183        i = "",
184        u = "",
185        w = "",
186        A = "",
187        H = "",
188        I = "",
189        U = "῏Υ",
190        W = "",
191    }
192
193    local replace_16 = { -- ' * |
194        a = "",
195        h = "",
196        w = "",
197    }
198
199    local replace_17 = { -- ` * |
200        a = "",
201        h = "",
202        w = "",
203    }
204
205    local replace_18 = { -- ~ * |
206        a = "",
207        h = "",
208        w = ""
209    }
210
211    local replace_19 = { -- ' *
212        a = "",
213        e = "",
214        h = "",
215        i = "",
216        o = "",
217        u = "",
218        w = "",
219    ["'"] = "",
220    }
221
222    local replace_20 = { -- ` *
223        a = "",
224        e = "",
225        h = "",
226        i = "",
227        o = "",
228        u = "",
229        w = "",
230    }
231
232    local replace_21 = { -- ~ *
233        a = "",
234        h = "",
235        i = "",
236        u = "",
237        w = "",
238    }
239
240    local replace_22 = { -- < *
241        a = "",
242        e = "",
243        h = "",
244        i = "",
245        o = "",
246        u = "",
247        w = "",
248        r = "",
249        A = "",
250        E = "",
251        H = "",
252        I = "",
253        O = "",
254        U = "",
255        W = "",
256        R = "",
257    }
258
259    local replace_23 = { -- > *
260        a = "",
261        e = "",
262        h = "",
263        i = "",
264        o = "",
265        u = "",
266        w = "",
267        A = "",
268        E = "",
269        H = "",
270        I = "",
271        O = "",
272        U = "᾿Υ",
273        W = "",
274    }
275
276    local replace_24 = { -- * |
277        a = "",
278        h = "",
279        w = "",
280    }
281
282    local replace_25 = { -- " *
283        i = "ϊ",
284        u = "ϋ",
285    }
286
287    local replace_26 = { -- *
288        a = "α",
289        b = "β",
290        g = "γ",
291        d = "δ",
292        e = "ε",
293        z = "ζ",
294        h = "η",
295        j = "θ",
296        i = "ι",
297        k = "κ",
298        l = "λ",
299        m = "μ",
300        n = "ν",
301        x = "ξ",
302        o = "ο",
303        p = "π",
304        r = "ρ",
305        s = "σ",
306        c = "ς",
307        t = "τ",
308        u = "υ",
309        f = "φ",
310        q = "χ",
311        y = "ψ",
312        w = "ω",
313        A = "Α",
314        B = "Β",
315        G = "Γ",
316        D = "Δ",
317        E = "Ε",
318        Z = "Ζ",
319        H = "Η",
320        J = "Θ",
321        I = "Ι",
322        K = "Κ",
323        L = "Λ",
324        M = "Μ",
325        N = "Ν",
326        X = "Ξ",
327        O = "Ο",
328        P = "Π",
329        R = "Ρ",
330        S = "Σ",
331        T = "Τ",
332        U = "Υ",
333        F = "Φ",
334        Q = "Χ",
335        Y = "Ψ",
336        W = "Ω",
337    [";"] = "·",
338    ["?"] = ";",
339    }
340
341    local P, R, S, V, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Cs
342
343    local skips_01 = P("\\")   * R("az", "AZ")^1
344    local skips_02 = P("[")    * (1- S("[]"))^1  * P("]")
345
346    local greek_01 = (P("<'")  * Cs(1) * P('|')) / replace_01
347    local greek_02 = (P(">'")  * Cs(1) * P('|')) / replace_02
348    local greek_03 = (P("<`")  * Cs(1) * P('|')) / replace_03
349    local greek_04 = (P(">`")  * Cs(1) * P('|')) / replace_04
350    local greek_05 = (P("<~")  * Cs(1) * P('|')) / replace_05
351    local greek_06 = (P(">~")  * Cs(1) * P('|')) / replace_06
352    local greek_07 = (P('"\'') * Cs(1)         ) / replace_07
353    local greek_08 = (P('"`')  * Cs(1)         ) / replace_08
354    local greek_09 = (P('"~')  * Cs(1)         ) / replace_09
355    local greek_10 = (P("<'")  * Cs(1)         ) / replace_10
356    local greek_11 = (P(">'")  * Cs(1)         ) / replace_11
357    local greek_12 = (P("<`")  * Cs(1)         ) / replace_12
358    local greek_13 = (P(">`")  * Cs(1)         ) / replace_13
359    local greek_14 = (P("<~")  * Cs(1)         ) / replace_14
360    local greek_15 = (P(">~")  * Cs(1)         ) / replace_15
361    local greek_16 = (P("'")   * Cs(1) * P('|')) / replace_16
362    local greek_17 = (P("`")   * Cs(1) * P('|')) / replace_17
363    local greek_18 = (P("~")   * Cs(1) * P('|')) / replace_18
364    local greek_19 = (P("'")   * Cs(1)         ) / replace_19
365    local greek_20 = (P("`")   * Cs(1)         ) / replace_20
366    local greek_21 = (P("~")   * Cs(1)         ) / replace_21
367    local greek_22 = (P("<")   * Cs(1)         ) / replace_22
368    local greek_23 = (P(">")   * Cs(1)         ) / replace_23
369    local greek_24 = (Cs(1)    * P('|')        ) / replace_24
370    local greek_25 = (P('"')   * Cs(1)         ) / replace_25
371    local greek_26 = (Cs(1)                    ) / replace_26
372
373    local skips =
374        skips_01 + skips_02
375
376    local greek =
377        greek_01 + greek_02 + greek_03 + greek_04 + greek_05 +
378        greek_06 + greek_07 + greek_08 + greek_09 + greek_10 +
379        greek_11 + greek_12 + greek_13 + greek_14 + greek_15 +
380        greek_16 + greek_17 + greek_18 + greek_19 + greek_20 +
381        greek_21 + greek_22 + greek_23 + greek_24 + greek_25 +
382        greek_26
383
384    local spacing      = S(" \n\r\t")
385    local startgreek   = P("\\startgreek")
386    local stopgreek    = P("\\stopgreek")
387    local localgreek   = P("\\localgreek")
388    local lbrace       = P("{")
389    local rbrace       = P("}")
390
391    local documentparser = Cs((skips + greek + 1)^0)
392
393    local contextgrammar = Cs ( P { "scan",
394        ["scan"]     = (V("global") + V("local") + skips + 1)^0,
395        ["global"]   = startgreek * ((skips + greek + 1)-stopgreek )^0 ,
396        ["local"]    = localgreek * V("grouped"),
397        ["grouped"]  = spacing^0 * lbrace * (V("grouped") + skips + (greek - rbrace))^0 * rbrace,
398    } )
399
400    converters['greek'] = {
401        document = documentparser,
402        context  = contextgrammar,
403    }
404
405    -- lpeg.print(parser): 254 lines
406
407    function scripts.babel.convert(filename)
408        if filename and filename ~= empty then
409            local data = io.loaddata(filename) or ""
410            if data ~= "" then
411                local language  = environment.argument("language")  or ""
412                if language ~= "" then
413                    local converter = converters[language]
414                    if converter then
415                        local structure = environment.argument("structure") or "document"
416                        converter = converter[structure]
417                        if converter then
418                            report("converting '%s' using language '%s' with structure '%s'", filename, language, structure)
419                            data = converter:match(data)
420                            local newfilename = filename .. ".utf"
421                            io.savedata(newfilename, data)
422                            report("converted data saved in '%s'", newfilename)
423                        else
424                            report("unknown structure '%s' language '%s'", structure, language)
425                        end
426                    else
427                        report("no converter for language '%s'", language)
428                    end
429                else
430                    report("provide language")
431                end
432            else
433                report("no data in '%s'",filename)
434            end
435        end
436    end
437
438    --~ print(contextgrammar:match [[
439    --~ oeps abg \localgreek{a}
440    --~ \startgreek abg \stopgreek \oeps
441    --~ oeps abg \localgreek{a{b}\oeps g}
442    --~ ]])
443
444end
445
446if environment.argument("convert") then
447    scripts.babel.convert(environment.files[1] or "")
448elseif environment.argument("exporthelp") then
449   application.export(environment.argument("exporthelp"),environment.files[1])
450else
451    application.help()
452end
453