mtx-patterns.lua /size: 30 Kb    last modification: 2021-10-28 13:50
1if not modules then modules = { } end modules ['mtx-patterns'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local format, find, concat, gsub, match, gmatch = string.format, string.find, table.concat, string.gsub, string.match, string.gmatch
10local byte, char = utf.byte, utf.char
11local addsuffix = file.addsuffix
12local lpegmatch, lpegsplit, lpegpatterns, validutf8 = lpeg.match, lpeg.split, lpeg.patterns, lpeg.patterns.validutf8
13local P, V, Cs = lpeg.P, lpeg.V, lpeg.Cs
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19  <entry name="name">mtx-patterns</entry>
20  <entry name="detail">ConTeXt Pattern File Management</entry>
21  <entry name="version">0.20</entry>
22 </metadata>
23 <flags>
24  <category name="basic">
25   <subcategory>
26    <flag name="convert"><short>generate context language files (mnemonic driven, if not given then all)</short></flag>
27    <flag name="check"><short>check pattern file (or those used by context when no file given)</short></flag>
28    <flag name="path"><short>source path where hyph-foo.tex files are stored</short></flag>
29    <flag name="destination"><short>destination path</short></flag>
30    <flag name="specification"><short>additional patterns: e.g.: =cy,hyph-cy,welsh</short></flag>
31    <flag name="compress"><short>compress data</short></flag>
32    <flag name="words"><short>update words in given file</short></flag>
33    <flag name="hyphenate"><short>show hypephenated words</short></flag>
34   </subcategory>
35  </category>
36 </flags>
37 <examples>
38  <category>
39   <title>Examples</title>
40   <subcategory>
41    <example><command>mtxrun --script pattern --check hyph-*.tex</command></example>
42    <example><command>mtxrun --script pattern --check   --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns</command></example>
43    <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns</command></example>
44    <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns</command></example>
45    <example><command>mtxrun --script pattern --hyphenate --language=nl --left=3 nogalwiedes inderdaad</command></example>
46   </subcategory>
47  </category>
48 </examples>
49</application>
50]]
51
52local application = logs.application {
53    name     = "mtx-patterns",
54    banner   = "ConTeXt Pattern File Management 0.20",
55    helpinfo = helpinfo,
56}
57
58local report = application.report
59
60scripts          = scripts          or { }
61scripts.patterns = scripts.patterns or { }
62
63local permitted_characters = table.tohash {
64    0x0009, -- tab
65    0x0027, -- apostrofe
66    0x02BC, -- modifier apostrofe (used in greek)
67    0x002D, -- hyphen
68    0x200C, -- zwnj
69    0x2019, -- quote right
70    0x1FBD, -- greek, but no letter: symbol modifier
71    0x1FBF, -- greek, but no letter: symbol modifier
72}
73
74local ignored_ancient_greek = table.tohash {
75    0x1FD3, -- greekiotadialytikatonos (also 0x0390)
76    0x1FE3, -- greekupsilondialytikatonos (also 0x03B0)
77    0x1FBD, -- greek, but no letter: symbol modifier
78    0x1FBF, -- greek, but no letter: symbol modifier
79    0x03F2, -- greeksigmalunate
80    0x02BC, -- modifier apostrofe)
81}
82
83local ignored_french = table.tohash {
84    0x02BC, -- modifier apostrofe
85}
86
87local replaced_whatever =  {
88    [char(0x2019)] = char(0x0027)
89}
90
91scripts.patterns.list = {
92    { "af",  "hyph-af",            "afrikaans" },
93 -- { "ar",  "hyph-ar",            "arabic" },
94 -- { "as",  "hyph-as",            "assamese" },
95    { "bg",  "hyph-bg",            "bulgarian" },
96 -- { "bn",  "hyph-bn",            "bengali" },
97    { "ca",  "hyph-ca",            "catalan" },
98 -- { "??",  "hyph-cop",           "coptic" },
99    { "cs",  "hyph-cs",            "czech" },
100    { "cy",  "hyph-cy",            "welsh" },
101    { "da",  "hyph-da",            "danish" },
102    { "deo", "hyph-de-1901",       "german, old spelling" },
103    { "de",  "hyph-de-1996",       "german, new spelling" },
104 -- { "??",  "hyph-de-ch-1901",    "swiss german" },
105 -- { "??",  "hyph-el-monoton",    "greek" },
106 -- { "gr",  "hyph-el-polyton",    "greek" },
107    { "agr", "hyph-grc",           "ancient greek", ignored_ancient_greek },
108    { "gb",  "hyph-en-gb",         "british english" },
109    { "us",  "hyph-en-us",         "american english" },
110    { "eo",  "hyph-eo",            "esperanto" },
111    { "es",  "hyph-es",            "spanish" },
112    { "et",  "hyph-et",            "estonian" },
113    { "eu",  "hyph-eu",            "basque" },
114 -- { "fa",  "hyph-fa",            "farsi" },
115    { "fi",  "hyph-fi",            "finnish" },
116    { "fr",  "hyph-fr",            "french", ignored_french },
117 -- { "??",  "hyph-ga",            "irish" },
118 -- { "??",  "hyph-gl",            "galician" },
119 -- { "gu",  "hyph-gu",            "gujarati" },
120 -- { "hi",  "hyph-hi",            "hindi" },
121    { "hr",  "hyph-hr",            "croatian" },
122 -- { "??",  "hyph-hsb",           "upper sorbian" },
123    { "hu",  "hyph-hu",            "hungarian" },
124 -- { "hy",  "hyph-hy",            "armenian" },
125 -- { "??",  "hyph-ia",            "interlingua" },
126 -- { "??",  "hyph-id",            "indonesian" },
127    { "is",  "hyph-is",            "icelandic" },
128    { "it",  "hyph-it",            "italian" },
129 -- { "??",  "hyph-kmr",           "kurmanji" },
130 -- { "kn",  "hyph-kn",            "kannada" },
131    { "la",  "hyph-la",            "latin" },
132    { "ala", "hyph-la-x-classic",  "ancient latin" },
133 -- { "lo",  "hyph-lo",            "lao" },
134    { "lt",  "hyph-lt",            "lithuanian" },
135    { "lv",  "hyph-lv",            "latvian" },
136    { "mk",  "hyph-mk",            "macedonian" },
137    { "ml",  "hyph-ml",            "malayalam" },
138    { "mn",  "hyph-mn-cyrl",       "mongolian, cyrillic script" },
139 -- { "mr",  "hyph-mr",            "..." },
140    { "nb",  "hyph-nb",            "norwegian bokmål" },
141    { "nl",  "hyph-nl",            "dutch" },
142    { "nn",  "hyph-nn",            "norwegian nynorsk" },
143 -- { "or",  "hyph-or",            "oriya" },
144 -- { "pa",  "hyph-pa",            "panjabi" },
145 -- { "",    "hyph-",              "" },
146    { "pl",  "hyph-pl",            "polish" },
147    { "pt",  "hyph-pt",            "portuguese" },
148    { "ro",  "hyph-ro",            "romanian" },
149    { "ru",  "hyph-ru",            "russian" },
150 -- { "sa",  "hyph-sa",            "sanskrit" },
151    { "sk",  "hyph-sk",            "slovak" },
152    { "sl",  "hyph-sl",            "slovenian" },
153    { "sq",  "hyph-sq",            "albanian" },
154    { "sr",  "hyph-sr",            "serbian", false, { "hyph-sr-cyrl", "hyph-sr-latn" }, },
155 -- { "sr",  "hyph-sr-cyrl",       "serbian", false },
156 -- { "sr",  "hyph-sr-latn",       "serbian" },
157    { "sv",  "hyph-sv",            "swedish" },
158 -- { "ta",  "hyph-ta",            "tamil" },
159 -- { "te",  "hyph-te",            "telugu" },
160    { "th",  "hyph-th",            "thai" },
161    { "tk",  "hyph-tk",            "turkmen" },
162    { "tr",  "hyph-tr",            "turkish" },
163    { "uk",  "hyph-uk",            "ukrainian" },
164    { "zh",  "hyph-zh-latn-pinyin","zh-latn, chinese pinyin" },
165}
166
167-- stripped down from lpeg example:
168
169function utf.check(str)
170    return lpegmatch(lpegpatterns.validutf8,str)
171end
172
173-- *.tex
174-- *.hyp.txt *.pat.txt *.lic.txt *.chr.txt
175
176function scripts.patterns.load(path,name,mnemonic,ignored, merged)
177    local fullname = file.join(path,name)
178    local basename = name
179    local texfile  = addsuffix(fullname,"tex")
180    local hypfile  = addsuffix(fullname,"hyp.txt")
181    local patfile  = addsuffix(fullname,"pat.txt")
182    local licfile  = addsuffix(fullname,"lic.txt")
183 -- local chrfile  = addsuffix(fullname,"chr.txt")
184    local okay = true
185    local hyphenations, patterns, comment, stripset = "", "", "", ""
186    local splitpatternsnew, splithyphenationsnew = { }, { }
187    local splitpatternsold, splithyphenationsold = { }, { }
188    local usedpatterncharactersnew, usedhyphenationcharactersnew = { }, { }
189    if merged then
190        -- no version info
191        report("using merged txt files %s.[hyp|pat|lic].txt",name)
192        for i=1,#merged do
193            local fullname = file.join(path,merged[i])
194            comment      = comment       .. (io.loaddata(addsuffix(fullname,"lic.txt")) or "") .. "\n\n"
195            patterns     = patterns      .. (io.loaddata(addsuffix(fullname,"pat.txt")) or "") .. "\n\n"
196            hyphenations = hyphenations  .. (io.loaddata(addsuffix(fullname,"hyp.txt")) or "") .. "\n\n"
197        end
198    elseif lfs.isfile(patfile) then
199        -- no version info
200        report("using txt files %s.[hyp|pat|lic].txt",name)
201        comment      = io.loaddata(licfile) or ""
202        patterns     = io.loaddata(patfile) or ""
203        hyphenations = io.loaddata(hypfile) or ""
204    elseif lfs.isfile(texfile) then
205        -- version info in comment blob
206        report("using tex file %s.txt",name)
207        local data = io.loaddata(texfile) or ""
208        if data ~= "" then
209            data = gsub(data,"([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
210                local subname = addsuffix(subname,"tex")
211                local subfull = file.join(file.dirname(texfile),subname)
212                local subdata = io.loaddata(subfull) or ""
213                if subdata == "" then
214                    report("%s: no subfile %s",basename,subname)
215                end
216                return previous .. subdata
217            end)
218            data = gsub(data,"%%.-[\n\r]","")
219            data = gsub(data," *[\n\r]+","\n")
220            patterns     = match(data,"\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
221            hyphenations = match(data,"\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
222            comment      = match(data,"^(.-)[\n\r]\\patterns") or ""
223        else
224            okay = false
225        end
226    else
227        okay = false
228    end
229    if okay then
230        -- split into lines
231        local how = lpegpatterns.whitespace^1
232        splitpatternsnew = lpegsplit(how,patterns)
233        splithyphenationsnew = lpegsplit(how,hyphenations)
234    end
235    if okay then
236        -- remove comments
237        local function check(data,splitdata,name)
238            if find(data,"%%") then
239                for i=1,#splitdata do
240                    local line = splitdata[i]
241                    if find(line,"%%") then
242                        splitdata[i] = gsub(line,"%%.*$","")
243                        report("%s: removing comment: %s",basename,line)
244                    end
245                end
246            end
247        end
248        check(patterns,splitpatternsnew,patfile)
249        check(hyphenations,splithyphenationsnew,hypfile)
250    end
251    if okay then
252        -- remove lines with commands
253        local function check(data,splitdata,name)
254            if find(data,"\\") then
255                for i=1,#splitdata do
256                    local line = splitdata[i]
257                    if find(line,"\\") then
258                        splitdata[i] = ""
259                        report("%s: removing line with command: %s",basename,line)
260                    end
261                end
262            end
263        end
264        check(patterns,splitpatternsnew,patfile)
265        check(hyphenations,splithyphenationsnew,hypfile)
266    end
267    if okay then
268        -- check for valid utf
269        local function check(data,splitdata,name)
270            for i=1,#splitdata do
271                local line = splitdata[i]
272                local ok = lpegmatch(validutf8,line)
273                if not ok then
274                    splitdata[i] = ""
275                    report("%s: removing line with invalid utf: %s",basename,line)
276                end
277            end
278            -- check for commands being used in comments
279        end
280        check(patterns,splitpatternsnew,patfile)
281        check(hyphenations,splithyphenationsnew,hypfile)
282    end
283    if okay then
284        -- remove funny lines
285        local cd = characters.data
286        local stripped = { }
287        local function check(splitdata,special,name)
288            local used = { }
289            for i=1,#splitdata do
290                local line = splitdata[i]
291                for b in line:utfvalues() do -- could be an lpeg
292                    if b == special then
293                        -- not registered
294                    elseif permitted_characters[b] then
295                        used[char(b)] = true
296                    else
297                        local cdb = cd[b]
298                        if not cdb then
299                            report("%s: no entry in chardata for character %C",basename,b)
300                        else
301                            local ct = cdb.category
302                            if ct == "lu" or ct == "ll" or ct == "lo" or ct == "mn" or ct == "mc" then -- hm, really mn and mc ?
303                                used[char(b)] = true
304                            elseif ct == "nd" then
305                                -- number
306                            elseif ct == "cf" then
307                                report("%s: %s line with suspected utf character %C, category %s: %s",basename,"keeping",b,ct,line)
308                                used[char(b)] = true
309                            else -- maybe accent cf  (200D)
310                                report("%s: %s line with suspected utf character %C, category %s: %s",basename,"removing",b,ct,line)
311                                splitdata[i] = ""
312                                break
313                            end
314                        end
315                    end
316                end
317            end
318            return used
319        end
320        usedpatterncharactersnew = check(splitpatternsnew,byte("."))
321        usedhyphenationcharactersnew = check(splithyphenationsnew,byte("-"))
322        for k, v in next, stripped do
323            report("%s: entries that contain character %C have been omitted",basename,k)
324        end
325    end
326    if okay then
327        local function stripped(what,ignored)
328            -- ignored (per language)
329            local p = nil
330            if ignored then
331                for k, v in next, ignored do
332                    if p then
333                        p = p + P(char(k))
334                    else
335                        p = P(char(k))
336                    end
337                end
338                p = P{ p + 1 * V(1) } -- anywhere
339            end
340            -- replaced (all languages)
341            local r = nil
342            for k, v in next, replaced_whatever do
343                if r then
344                    r = r + P(k)/v
345                else
346                    r = P(k)/v
347                end
348            end
349            r = Cs((r + 1)^0)
350            local result = { }
351            for i=1,#what do
352                local line = what[i]
353                if p and lpegmatch(p,line) then
354                    report("%s: discarding conflicting pattern: %s",basename,line)
355                else -- we can speed this up by testing for replacements in the string
356                    local l = lpegmatch(r,line)
357                    if l ~= line then
358                        report("%s: sanitizing pattern: %s -> %s (for old patterns)",basename,line,l)
359                    end
360                    result[#result+1] = l
361                end
362            end
363            return result
364        end
365
366        splitpatternsold = stripped(splitpatternsnew,ignored)
367        splithyphenationsold = stripped(splithyphenationsnew,ignored)
368
369    end
370    if okay then
371        -- discarding duplicates
372        local function check(data,splitdata,name)
373            local used, collected = { }, { }
374            for i=1,#splitdata do
375                local line = splitdata[i]
376                if line == "" then
377                    -- discard
378                elseif used[line] then
379                    -- discard
380                    report("%s: discarding duplicate pattern: %s",basename,line)
381                else
382                    used[line] = true
383                    collected[#collected+1] = line
384                end
385            end
386            return collected
387        end
388        splitpatternsnew = check(patterns,splitpatternsnew,patfile)
389        splithyphenationsnew = check(hyphenations,splithyphenationsnew,hypfile)
390        splitpatternsold = check(patterns,splitpatternsold,patfile)
391        splithyphenationsold = check(hyphenations,splithyphenationsold,hypfile)
392    end
393    if not okay then
394        report("no valid file %s.*",name)
395    end
396
397    local function getused(t)
398        local u = { }
399        for k, v in next, t do
400            if ignored and ignored[k] then
401            elseif replaced_whatever[k] then
402            else
403                u[k] = v
404            end
405        end
406        return u
407    end
408    local usedpatterncharactersold = getused(usedpatterncharactersnew)
409    local usedhyphenationcharactersold = getused(usedhyphenationcharactersnew)
410
411    return okay,
412        splitpatternsnew, splithyphenationsnew, splitpatternsold, splithyphenationsold, comment, stripset,
413        usedpatterncharactersnew, usedhyphenationcharactersnew, usedpatterncharactersold, usedhyphenationcharactersold
414end
415
416function scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,
417        pusednew,husednew,pusedold,husedold,ignored)
418    local nofpatternsnew, nofhyphenationsnew = #patternsnew, #hyphenationsnew
419    local nofpatternsold, nofhyphenationsold = #patternsold, #hyphenationsold
420    report("language %s has %s old and %s new patterns and %s old and %s new exceptions",mnemonic,nofpatternsold,nofpatternsnew,nofhyphenationsold,nofhyphenationsnew)
421    if mnemonic ~= "??" then
422        local punew = concat(table.sortedkeys(pusednew), " ")
423        local hunew = concat(table.sortedkeys(husednew), " ")
424        local puold = concat(table.sortedkeys(pusedold), " ")
425        local huold = concat(table.sortedkeys(husedold), " ")
426
427        local rmefile = file.join(destination,"lang-"..mnemonic..".rme")
428        local patfile = file.join(destination,"lang-"..mnemonic..".pat")
429        local hypfile = file.join(destination,"lang-"..mnemonic..".hyp")
430        local luafile = file.join(destination,"lang-"..mnemonic..".lua") -- suffix might change to llg
431
432        local topline = "% generated by mtxrun --script pattern --convert"
433        local banner = "% for comment and copyright, see " .. file.basename(rmefile)
434        report("saving language data for %s",mnemonic)
435        if not comment or comment == "" then comment = "% no comment" end
436        if not type(destination) == "string" then destination = "." end
437
438        local compression = environment.arguments.compress and "zlib" or nil
439
440        local lines = string.splitlines(comment)
441        for i=1,#lines do
442            if not find(lines[i],"^%%") then
443                lines[i] = "% " .. lines[i]
444            end
445        end
446
447        local metadata = {
448         -- texcomment = comment,
449            texcomment = concat(lines,"\n"),
450            source     = name,
451            mnemonic   = mnemonic,
452        }
453
454        local patterndata, hyphenationdata
455        if nofpatternsnew > 0 then
456            local data = concat(patternsnew," ")
457            patterndata = {
458                n              = nofpatternsnew,
459                compression    = compression,
460                length         = #data,
461                data           = compression and zlib.compress(data,9) or data,
462                characters     = concat(table.sortedkeys(pusednew),""),
463                lefthyphenmin  = 1, -- determined by pattern author
464                righthyphenmax = 1, -- determined by pattern author
465            }
466        else
467            patterndata = {
468                n = 0,
469            }
470        end
471        if nofhyphenationsnew > 0 then
472            local data = concat(hyphenationsnew," ")
473            hyphenationdata = {
474                n           = nofhyphenationsnew,
475                compression = compression,
476                length      = #data,
477                data        = compression and zlib.compress(data,9) or data,
478                characters  = concat(table.sortedkeys(husednew),""),
479            }
480        else
481            hyphenationdata = {
482                n = 0,
483            }
484        end
485        local data = {
486            -- a prelude to language goodies, like we have font goodies and in
487            -- mkiv we can use this file directly
488            version    = "1.001",
489            comment    = topline,
490            metadata   = metadata,
491            patterns   = patterndata,
492            exceptions = hyphenationdata,
493        }
494
495        os.remove(rmefile)
496        os.remove(patfile)
497        os.remove(hypfile)
498        os.remove(luafile)
499
500        io.savedata(rmefile,format("%s\n\n%s",topline,comment))
501        io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,puold,concat(patternsold,"\n")))
502        io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,huold,concat(hyphenationsold,"\n")))
503        io.savedata(luafile,table.serialize(data,true))
504    end
505end
506
507function scripts.patterns.prepare()
508    --
509    dofile(resolvers.findfile("char-def.lua"))
510    --
511    local specification = environment.argument("specification")
512    if specification then
513        local components = utilities.parsers.settings_to_array(specification)
514        if #components == 3 then
515            table.insert(scripts.patterns.list,1,components)
516            report("specification added: %s %s %s",table.unpack(components))
517        else
518            report('invalid specification: %q, "xx,lang-yy,zzzz" expected',specification)
519        end
520    end
521end
522
523function scripts.patterns.check()
524    local path = environment.argument("path") or "."
525    local files = environment.files
526    local only  = false
527    if #files > 0 then
528        only = table.tohash(files)
529    end
530    for k, v in next, scripts.patterns.list do
531        local mnemonic, name, ignored, merged = v[1], v[2], v[4], v[5]
532        if not only or only[mnemonic] then
533            report("checking language %s, file %s", mnemonic, name)
534            local okay = scripts.patterns.load(path,name,mnemonic,ignored, merged)
535            if not okay then
536                report("there are errors that need to be fixed")
537            end
538            report()
539        end
540    end
541end
542
543function scripts.patterns.convert()
544    local path = environment.argument("path") or "."
545    if path == "" then
546        report("provide sourcepath using --path ")
547    else
548        local destination = environment.argument("destination") or "."
549        if path == destination then
550            report("source path and destination path should differ (use --path and/or --destination)")
551        else
552            local files = environment.files
553            local only  = false
554            if #files > 0 then
555                only = table.tohash(files)
556            end
557            for k, v in next, scripts.patterns.list do
558                local mnemonic, name, ignored, merged = v[1], v[2], v[4], v[5]
559                if not only or only[mnemonic] then
560                    report("converting language %s, file %s", mnemonic, name)
561                    local okay, patternsnew, hyphenationsnew, patternsold, hyphenationsold, comment, stripped,
562                        pusednew, husednew, pusedold, husedold = scripts.patterns.load(path,name,mnemonic,ignored,merged)
563                    if okay then
564                        scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,
565                            pusednew,husednew,pusedold,husedold,ignored)
566                    else
567                        report("convertion aborted due to error(s)")
568                    end
569                    report()
570                end
571            end
572        end
573    end
574end
575
576local function valid(filename)
577    local specification = table.load(filename)
578    if not specification then
579        return false
580    end
581    local lists = specification.lists
582    if not lists then
583        return false
584    end
585    return specification, lists
586end
587
588function scripts.patterns.words()
589    if environment.arguments.update then
590        local compress = environment.arguments.compress
591        for i=1,#environment.files do
592            local filename = environment.files[i]
593            local fullname = resolvers.findfile(filename)
594            if fullname and fullname ~= "" then
595                report("checking file %a",fullname)
596                local specification, lists = valid(fullname)
597                if specification and #lists> 0 then
598                    report("updating %a of language %a",filename,specification.language)
599                    for i=1,#lists do
600                        local entry = lists[i]
601                        local filename = entry.filename
602                        if filename then
603                            local fullname = resolvers.findfile(filename)
604                            if fullname then
605                                report("adding words from %a",fullname)
606                                local data = io.loaddata(fullname) or ""
607                                data = string.strip(data)
608                                data = string.gsub(data,"%s+"," ")
609                                if compress then
610                                    entry.data        = zlib.compress(data,9)
611                                    entry.compression = "zlib"
612                                    entry.length      = #data
613                                else
614                                    entry.data        = data
615                                    entry.compression = nil
616                                    entry.length      = #data
617                                end
618                            else
619                                entry.data        = ""
620                                entry.compression = nil
621                                entry.length      = 0
622                            end
623                        else
624                            entry.data        = ""
625                            entry.compression = nil
626                            entry.length      = 0
627                        end
628                    end
629                    specification.version   = "1.00"
630                    specification.timestamp =  os.localtime()
631                    report("updated file %a is saved",filename)
632                    table.save(filename,specification)
633                else
634                    report("no file %a",filename)
635                end
636            else
637                report("nothing done")
638            end
639        end
640    else
641        report("provide --update")
642    end
643end
644
645-- mtxrun --script patterns --hyphenate --language=nl nogalwiedes --left=3
646--
647-- hyphenator      |
648-- hyphenator      | . n o g a l w i e d e s .         . n o g a l w i e d e s .
649-- hyphenator      | .0n4                               0 4 0 0 0 0 0 0 0 0 0 0
650-- hyphenator      |    0o0g0a4l0                       0 4 0 0 4 0 0 0 0 0 0 0
651-- hyphenator      |      1g0a0                         0 4 1 0 4 0 0 0 0 0 0 0
652-- hyphenator      |          0l1w0                     0 4 1 0 4 1 0 0 0 0 0 0
653-- hyphenator      |              4i0e0                 0 4 1 0 4 1 4 0 0 0 0 0
654-- hyphenator      |              0i0e3d0e0             0 4 1 0 4 1 4 0 3 0 0 0
655-- hyphenator      |                0e1d0               0 4 1 0 4 1 4 0 3 0 0 0
656-- hyphenator      |                  1d0e0             0 4 1 0 4 1 4 0 3 0 0 0
657-- hyphenator      |                  0d0e2s0           0 4 1 0 4 1 4 0 3 0 2 0
658-- hyphenator      |                      4s0.          0 4 1 0 4 1 4 0 3 0 4 0
659-- hyphenator      | .0n4o1g0a4l1w4i0e3d0e4s0.         . n o-g a l-w i e-d e s .
660-- hyphenator      |
661-- mtx-patterns    | nl 3 3 : nogalwiedes : nogal-wie-des
662
663function scripts.patterns.hyphenate()
664    require("lang-hyp")
665    local traditional   = languages.hyphenators.traditional
666    local left          = tonumber(environment.arguments.left)  or 3
667    local right         = tonumber(environment.arguments.right) or 3
668    local language      = environment.arguments.language or "us"
669    local dictionary    = traditional.loadpatterns(language)
670    local words         = environment.files
671    local specification = {
672        leftcharmin     = left,
673        rightcharmin    = right,
674        leftchar        = false,
675        rightchar       = false,
676    }
677    trackers.enable("hyphenator.steps")
678    for i=1,#words do
679        local word = words[i]
680        report("%s %s %s : %s : %s",
681            language, left, right,
682            word,
683            traditional.injecthyphens(dictionary,word,specification)
684        )
685    end
686end
687
688if environment.argument("check") then
689    scripts.patterns.prepare()
690    scripts.patterns.check()
691elseif environment.argument("convert") then
692    scripts.patterns.prepare()
693    scripts.patterns.convert()
694elseif environment.argument("words") then
695    scripts.patterns.words() -- for the moment here
696elseif environment.argument("hyphenate") then
697    scripts.patterns.hyphenate() -- for the moment here
698elseif environment.argument("exporthelp") then
699    application.export(environment.argument("exporthelp"),environment.files[1])
700else
701    application.help()
702end
703
704-- mtxrun --script pattern --check   hyph-*.tex
705-- mtxrun --script pattern --check   --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
706-- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
707-- mtxrun --script pattern --convert --path=c:/data/repositories/tex-hyphen/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
708--
709-- use this call:
710--
711-- setlocal
712--
713-- rem tugsvn checkout:
714--
715-- set patternsroot=c:/data/develop/svn-hyphen/trunk
716--
717-- rem github checkout:
718--
719-- set patternsroot=c:/data/repositories/tex-hyphen
720--
721-- del /q c:\data\develop\tex-context\tex\texmf-local\tex\context\patterns\*
722-- del /q c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns\*
723-- del /q c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns\*
724--
725-- mtxrun --script pattern --convert --path=%patternsroot%/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=c:/data/develop/tmp/patterns
726--
727-- copy /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
728-- copy /Y lang*.pat c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
729-- copy /Y lang*.rme c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
730-- copy /Y lang*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
731--
732-- move /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
733-- move /Y lang*.pat c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
734-- move /Y lang*.rme c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
735-- move /Y lang*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
736--
737-- mtxrun --script pattern --words --update word-th.lua --compress
738--
739-- copy /Y word*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
740-- move /Y word*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
741--
742-- mtxrun --generate
743--
744-- endlocal
745