mtx-patterns.lua /size: 33 Kb    last modification: 2025-02-21 11:03
1if not modules then modules = { } end modules ['mtx-patterns'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9local format, find, concat, gsub, match, gmatch = string.format, string.find, table.concat, string.gsub, string.match, string.gmatch
10local byte, char = utf.byte, utf.char
11local addsuffix = file.addsuffix
12local lpegmatch, lpegsplit, lpegpatterns, validutf8 = lpeg.match, lpeg.split, lpeg.patterns, lpeg.patterns.validutf8
13local P, V, Cs = lpeg.P, lpeg.V, lpeg.Cs
14
15local helpinfo = [[
16<?xml version="1.0"?>
17<application>
18 <metadata>
19  <entry name="name">mtx-patterns</entry>
20  <entry name="detail">ConTeXt Pattern File Management</entry>
21  <entry name="version">0.20</entry>
22 </metadata>
23 <flags>
24  <category name="basic">
25   <subcategory>
26    <flag name="convert"><short>generate context language files (mnemonic driven, if not given then all)</short></flag>
27    <flag name="check"><short>check pattern file (or those used by context when no file given)</short></flag>
28    <flag name="path"><short>source path where hyph-foo.tex files are stored</short></flag>
29    <flag name="destination"><short>destination path</short></flag>
30    <flag name="specification"><short>additional patterns: e.g.: =cy,hyph-cy,welsh</short></flag>
31    <flag name="compress"><short>compress data</short></flag>
32    <flag name="words"><short>update words in given file</short></flag>
33    <flag name="hyphenate"><short>show hypephenated words</short></flag>
34   </subcategory>
35  </category>
36 </flags>
37 <examples>
38  <category>
39   <title>Examples</title>
40   <subcategory>
41    <example><command>mtxrun --script pattern --check hyph-*.tex</command></example>
42    <example><command>mtxrun --script pattern --check   --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns</command></example>
43    <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/tex --destination=e:/tmp/patterns</command></example>
44    <example><command>mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns</command></example>
45    <example><command>mtxrun --script pattern --hyphenate --language=nl --left=3 nogalwiedes inderdaad</command></example>
46   </subcategory>
47  </category>
48 </examples>
49</application>
50]]
51
52local application = logs.application {
53    name     = "mtx-patterns",
54    banner   = "ConTeXt Pattern File Management 0.20",
55    helpinfo = helpinfo,
56}
57
58local report = application.report
59
60scripts          = scripts          or { }
61scripts.patterns = scripts.patterns or { }
62
63local permitted_characters = table.tohash {
64    0x0009, -- tab
65    0x0027, -- apostrofe
66    0x02BC, -- modifier apostrofe (used in greek)
67    0x002D, -- hyphen
68    0x200C, -- zwnj
69    0x2019, -- quote right
70    0x1FBD, -- greek, but no letter: symbol modifier
71    0x1FBF, -- greek, but no letter: symbol modifier
72}
73
74local ignored_ancient_greek = table.tohash {
75    0x1FD3, -- greekiotadialytikatonos (also 0x0390)
76    0x1FE3, -- greekupsilondialytikatonos (also 0x03B0)
77    0x1FBD, -- greek, but no letter: symbol modifier
78    0x1FBF, -- greek, but no letter: symbol modifier
79    0x03F2, -- greeksigmalunate
80    0x02BC, -- modifier apostrofe)
81}
82
83local ignored_french = table.tohash {
84    0x02BC, -- modifier apostrofe
85}
86
87local replaced_whatever =  {
88    [char(0x2019)] = char(0x0027)
89}
90
91scripts.patterns.list = {
92    { mnemonic = "af",  name = "hyph-af",            comment = "afrikaans" },
93 -- { mnemonic = "ar",  name = "hyph-ar",            comment = "arabic" },
94 -- { mnemonic = "as",  name = "hyph-as",            comment = "assamese" },
95    { mnemonic = "bg",  name = "hyph-bg",            comment = "bulgarian" },
96    { mnemonic = "bn",  name = "hyph-bn",            comment = "bengali" },
97    { mnemonic = "ca",  name = "hyph-ca",            comment = "catalan" },
98 -- { mnemonic = "??",  name = "hyph-cop",           comment = "coptic" },
99    { mnemonic = "cs",  name = "hyph-cs",            comment = "czech" },
100    { mnemonic = "cy",  name = "hyph-cy",            comment = "welsh" },
101    { mnemonic = "da",  name = "hyph-da",            comment = "danish" },
102    { mnemonic = "deo", name = "hyph-de-1901",       comment = "german, old spelling" },
103    { mnemonic = "de",  name = "hyph-de-1996",       comment = "german, new spelling" },
104 -- { mnemonic = "??",  name = "hyph-de-ch-1901",    comment = "swiss german" },
105 -- { mnemonic = "??",  name = "hyph-el-polyton",    comment = "greek" },
106    { mnemonic = "gr",  name = "hyph-el-monoton",    comment = "greek" },
107    { mnemonic = "agr", name = "hyph-grc",           comment = "ancient greek", ignored = ignored_ancient_greek },
108    { mnemonic = "gb",  name = "hyph-en-gb",         comment = "british english" },
109    { mnemonic = "us",  name = "hyph-en-us",         comment = "american english", lefthyphenmin = 3, righthyphenmin = 2 },
110    { mnemonic = "eo",  name = "hyph-eo",            comment = "esperanto" },
111    { mnemonic = "es",  name = "hyph-es",            comment = "spanish" },
112    { mnemonic = "et",  name = "hyph-et",            comment = "estonian" },
113    { mnemonic = "eu",  name = "hyph-eu",            comment = "basque" },
114 -- { mnemonic = "fa",  name = "hyph-fa",            comment = "farsi" },
115    { mnemonic = "fi",  name = "hyph-fi",            comment = "finnish" },
116    { mnemonic = "fr",  name = "hyph-fr",            comment = "french", ignored = ignored_french },
117 -- { mnemonic = "??",  name = "hyph-ga",            comment = "irish" },
118 -- { mnemonic = "??",  name = "hyph-gl",            comment = "galician" },
119    { mnemonic = "gu",  name = "hyph-gu",            comment = "gujarati" },
120    { mnemonic = "hi",  name = "hyph-hi",            comment = "hindi" },
121    { mnemonic = "hr",  name = "hyph-hr",            comment = "croatian" },
122 -- { mnemonic = "??",  name = "hyph-hsb",           comment = "upper sorbian" },
123    { mnemonic = "hu",  name = "hyph-hu",            comment = "hungarian" },
124    { mnemonic = "hy",  name = "hyph-hy",            comment = "armenian" },
125 -- { mnemonic = "??",  name = "hyph-ia",            comment = "interlingua" },
126    { mnemonic = "id",  name = "hyph-id",            comment = "indonesian" },
127    { mnemonic = "is",  name = "hyph-is",            comment = "icelandic" },
128    { mnemonic = "it",  name = "hyph-it",            comment = "italian" },
129 -- { mnemonic = "??",  name = "hyph-kmr",           comment = "kurmanji" },
130    { mnemonic = "kn",  name = "hyph-kn",            comment = "kannada" },
131    { mnemonic = "la",  name = "hyph-la",            comment = "latin" },
132    { mnemonic = "ala", name = "hyph-la-x-classic",  comment = "ancient latin" },
133 -- { mnemonic = "lo",  name = "hyph-lo",            comment = "lao" },
134    { mnemonic = "lt",  name = "hyph-lt",            comment = "lithuanian" },
135    { mnemonic = "lv",  name = "hyph-lv",            comment = "latvian" },
136    { mnemonic = "mk",  name = "hyph-mk",            comment = "macedonian" },
137    { mnemonic = "ml",  name = "hyph-ml",            comment = "malayalam" },
138    { mnemonic = "mn",  name = "hyph-mn-cyrl",       comment = "mongolian, cyrillic script" },
139 -- { mnemonic = "mr",  name = "hyph-mr",            comment = "..." },
140    { mnemonic = "nb",  name = "hyph-nb",            comment = "norwegian bokmål" },
141    { mnemonic = "nl",  name = "hyph-nl",            comment = "dutch" },
142    { mnemonic = "nn",  name = "hyph-nn",            comment = "norwegian nynorsk" },
143 -- { mnemonic = "or",  name = "hyph-or",            comment = "oriya" },
144 -- { mnemonic = "pa",  name = "hyph-pa",            comment = "panjabi" },
145 -- { mnemonic = "",    name = "hyph-",              comment = "" },
146    { mnemonic = "pl",  name = "hyph-pl",            comment = "polish" },
147    { mnemonic = "pt",  name = "hyph-pt",            comment = "portuguese" },
148    { mnemonic = "ro",  name = "hyph-ro",            comment = "romanian" },
149    { mnemonic = "ru",  name = "hyph-ru",            comment = "russian" },
150    { mnemonic = "sa",  name = "hyph-sa",            comment = "sanskrit" },
151    { mnemonic = "sk",  name = "hyph-sk",            comment = "slovak" },
152    { mnemonic = "sl",  name = "hyph-sl",            comment = "slovenian" },
153    { mnemonic = "sq",  name = "hyph-sq",            comment = "albanian" },
154    { mnemonic = "sr",  name = "hyph-sr",            comment = "serbian", merged = { "hyph-sh-cyrl", "hyph-sh-latn" }, },
155 -- { mnemonic = "sr",  name = "hyph-sr-cyrl",       comment = "serbian", },
156 -- { mnemonic = "sr",  name = "hyph-sr-latn",       comment = "serbian" },
157    { mnemonic = "sv",  name = "hyph-sv",            comment = "swedish" },
158    { mnemonic = "ta",  name = "hyph-ta",            comment = "tamil" },
159    { mnemonic = "te",  name = "hyph-te",            comment = "telugu" },
160    { mnemonic = "th",  name = "hyph-th",            comment = "thai" },
161    { mnemonic = "tk",  name = "hyph-tk",            comment = "turkmen" },
162    { mnemonic = "tr",  name = "hyph-tr",            comment = "turkish" },
163    { mnemonic = "uk",  name = "hyph-uk",            comment = "ukrainian" },
164    { mnemonic = "zh",  name = "hyph-zh-latn-pinyin",comment = "zh-latn, chinese pinyin" },
165}
166
167-- stripped down from lpeg example:
168
169function utf.check(str)
170    return lpegmatch(lpegpatterns.validutf8,str)
171end
172
173-- *.tex
174-- *.hyp.txt *.pat.txt *.lic.txt *.chr.txt
175
176function scripts.patterns.load(path,name,mnemonic,ignored,merged)
177    local fullname = file.join(path,name)
178    local basename = name
179    local texfile  = addsuffix(fullname,"tex")
180    local hypfile  = addsuffix(fullname,"hyp.txt")
181    local patfile  = addsuffix(fullname,"pat.txt")
182    local licfile  = addsuffix(fullname,"lic.txt")
183 -- local chrfile  = addsuffix(fullname,"chr.txt")
184    local okay = true
185    local hyphenations, patterns, comment, stripset = "", "", "", ""
186    local splitpatternsnew, splithyphenationsnew = { }, { }
187    local splitpatternsold, splithyphenationsold = { }, { }
188    local usedpatterncharactersnew, usedhyphenationcharactersnew = { }, { }
189    if merged then
190        -- no version info
191        report("using merged txt files %s.[hyp|pat|lic].txt",name)
192        for i=1,#merged do
193            local fullname = file.join(path,merged[i])
194            comment      = comment       .. (io.loaddata(addsuffix(fullname,"lic.txt")) or "") .. "\n\n"
195            patterns     = patterns      .. (io.loaddata(addsuffix(fullname,"pat.txt")) or "") .. "\n\n"
196            hyphenations = hyphenations  .. (io.loaddata(addsuffix(fullname,"hyp.txt")) or "") .. "\n\n"
197        end
198    elseif lfs.isfile(patfile) then
199        -- no version info
200        report("using txt files %s.[hyp|pat|lic].txt",name)
201        comment      = io.loaddata(licfile) or ""
202        patterns     = io.loaddata(patfile) or ""
203        hyphenations = io.loaddata(hypfile) or ""
204    elseif lfs.isfile(texfile) then
205        -- version info in comment blob
206        report("using tex file %s.txt",name)
207        local data = io.loaddata(texfile) or ""
208        if data ~= "" then
209            data = gsub(data,"([\n\r])\\input ([^ \n\r]+)", function(previous,subname)
210                local subname = addsuffix(subname,"tex")
211                local subfull = file.join(file.dirname(texfile),subname)
212                local subdata = io.loaddata(subfull) or ""
213                if subdata == "" then
214                    report("%s: no subfile %s",basename,subname)
215                end
216                return previous .. subdata
217            end)
218            data = gsub(data,"%%.-[\n\r]","")
219            data = gsub(data," *[\n\r]+","\n")
220            patterns     = match(data,"\\patterns[%s]*{[%s]*(.-)[%s]*}") or ""
221            hyphenations = match(data,"\\hyphenation[%s]*{[%s]*(.-)[%s]*}") or ""
222            comment      = match(data,"^(.-)[\n\r]\\patterns") or ""
223        else
224            okay = false
225        end
226    else
227        okay = false
228    end
229    if okay then
230        -- split into lines
231        local how = lpegpatterns.whitespace^1
232        splitpatternsnew = lpegsplit(how,patterns)
233        splithyphenationsnew = lpegsplit(how,hyphenations)
234    end
235    if okay then
236        -- remove comments
237        local function check(data,splitdata,name)
238            if find(data,"%%") then
239                for i=1,#splitdata do
240                    local line = splitdata[i]
241                    if find(line,"%%") then
242                        splitdata[i] = gsub(line,"%%.*$","")
243                        report("%s: removing comment: %s",basename,line)
244                    end
245                end
246            end
247        end
248        check(patterns,splitpatternsnew,patfile)
249        check(hyphenations,splithyphenationsnew,hypfile)
250    end
251    if okay then
252        -- remove lines with commands
253        local function check(data,splitdata,name)
254            if find(data,"\\") then
255                for i=1,#splitdata do
256                    local line = splitdata[i]
257                    if find(line,"\\") then
258                        splitdata[i] = ""
259                        report("%s: removing line with command: %s",basename,line)
260                    end
261                end
262            end
263        end
264        check(patterns,splitpatternsnew,patfile)
265        check(hyphenations,splithyphenationsnew,hypfile)
266    end
267    if okay then
268        -- check for valid utf
269        local function check(data,splitdata,name)
270            for i=1,#splitdata do
271                local line = splitdata[i]
272                local ok = lpegmatch(validutf8,line)
273                if not ok then
274                    splitdata[i] = ""
275                    report("%s: removing line with invalid utf: %s",basename,line)
276                end
277            end
278            -- check for commands being used in comments
279        end
280        check(patterns,splitpatternsnew,patfile)
281        check(hyphenations,splithyphenationsnew,hypfile)
282    end
283    if okay then
284        -- remove funny lines
285        local cd = characters.data
286        local stripped = { }
287        local function check(splitdata,special,name)
288            local used = { }
289            for i=1,#splitdata do
290                local line = splitdata[i]
291                for b in line:utfvalues() do -- could be an lpeg
292                    if b == special then
293                        -- not registered
294                    elseif permitted_characters[b] then
295                        used[char(b)] = true
296                    else
297                        local cdb = cd[b]
298                        if not cdb then
299                            report("%s: no entry in chardata for character %C",basename,b)
300                        else
301                            local ct = cdb.category
302                            if ct == "lu" or ct == "ll" or ct == "lo" or ct == "mn" or ct == "mc" then -- hm, really mn and mc ?
303                                used[char(b)] = true
304                            elseif ct == "nd" then
305                                -- number
306                            elseif ct == "cf" then
307                                report("%s: %s line with suspected utf character %C, category %s: %s",basename,"keeping",b,ct,line)
308                                used[char(b)] = true
309                            else -- maybe accent cf  (200D)
310                                report("%s: %s line with suspected utf character %C, category %s: %s",basename,"removing",b,ct,line)
311                                splitdata[i] = ""
312                                break
313                            end
314                        end
315                    end
316                end
317            end
318            return used
319        end
320        usedpatterncharactersnew = check(splitpatternsnew,byte("."))
321        usedhyphenationcharactersnew = check(splithyphenationsnew,byte("-"))
322        for k, v in next, stripped do
323            report("%s: entries that contain character %C have been omitted",basename,k)
324        end
325    end
326    if okay then
327        local function stripped(what,ignored)
328            -- ignored (per language)
329            local p = nil
330            if ignored then
331                for k, v in next, ignored do
332                    if p then
333                        p = p + P(char(k))
334                    else
335                        p = P(char(k))
336                    end
337                end
338                p = P{ p + 1 * V(1) } -- anywhere
339            end
340            -- replaced (all languages)
341            local r = nil
342            for k, v in next, replaced_whatever do
343                if r then
344                    r = r + P(k)/v
345                else
346                    r = P(k)/v
347                end
348            end
349            r = Cs((r + 1)^0)
350            local result = { }
351            for i=1,#what do
352                local line = what[i]
353                if p and lpegmatch(p,line) then
354                    report("%s: discarding conflicting pattern: %s",basename,line)
355                else -- we can speed this up by testing for replacements in the string
356                    local l = lpegmatch(r,line)
357                    if l ~= line then
358                        report("%s: sanitizing pattern: %s -> %s (for old patterns)",basename,line,l)
359                    end
360                    result[#result+1] = l
361                end
362            end
363            return result
364        end
365        splitpatternsold = stripped(splitpatternsnew,ignored)
366        splithyphenationsold = stripped(splithyphenationsnew,ignored)
367
368    end
369    if okay then
370        -- discarding duplicates
371        local function check(data,splitdata,name)
372            local used, collected = { }, { }
373            for i=1,#splitdata do
374                local line = splitdata[i]
375                if line == "" then
376                    -- discard
377                elseif used[line] then
378                    -- discard
379                    report("%s: discarding duplicate pattern: %s",basename,line)
380                else
381                    used[line] = true
382                    collected[#collected+1] = line
383                end
384            end
385            return collected
386        end
387        splitpatternsnew = check(patterns,splitpatternsnew,patfile)
388        splithyphenationsnew = check(hyphenations,splithyphenationsnew,hypfile)
389        splitpatternsold = check(patterns,splitpatternsold,patfile)
390        splithyphenationsold = check(hyphenations,splithyphenationsold,hypfile)
391    end
392    if not okay then
393        report("no valid file %s.*",name)
394    end
395
396    local function getused(t)
397        local u = { }
398        for k, v in next, t do
399            if ignored and ignored[k] then
400            elseif replaced_whatever[k] then
401            else
402                u[k] = v
403            end
404        end
405        return u
406    end
407    local usedpatterncharactersold = getused(usedpatterncharactersnew)
408    local usedhyphenationcharactersold = getused(usedhyphenationcharactersnew)
409
410    return okay,
411        splitpatternsnew, splithyphenationsnew, splitpatternsold, splithyphenationsold, comment, stripset,
412        usedpatterncharactersnew, usedhyphenationcharactersnew, usedpatterncharactersold, usedhyphenationcharactersold
413end
414
415function scripts.patterns.save(destination,mnemonic,name,patternsnew,hyphenationsnew,patternsold,hyphenationsold,comment,stripped,
416        pusednew,husednew,pusedold,husedold,ignored,lefthyphenmin,righthyphenmin)
417    local nofpatternsnew, nofhyphenationsnew = #patternsnew, #hyphenationsnew
418    local nofpatternsold, nofhyphenationsold = #patternsold, #hyphenationsold
419    report("language %s has %s old and %s new patterns and %s old and %s new exceptions",mnemonic,nofpatternsold,nofpatternsnew,nofhyphenationsold,nofhyphenationsnew)
420    if mnemonic ~= "??" then
421        local punew = concat(table.sortedkeys(pusednew), " ")
422        local hunew = concat(table.sortedkeys(husednew), " ")
423        local puold = concat(table.sortedkeys(pusedold), " ")
424        local huold = concat(table.sortedkeys(husedold), " ")
425
426        local rmefile = file.join(destination,"lang-"..mnemonic..".rme")
427        local patfile = file.join(destination,"lang-"..mnemonic..".pat")
428        local hypfile = file.join(destination,"lang-"..mnemonic..".hyp")
429        local luafile = file.join(destination,"lang-"..mnemonic..".lua") -- suffix might change to llg
430
431        local topline = "% generated by mtxrun --script pattern --convert"
432        local banner = "% for comment and copyright, see " .. file.basename(rmefile)
433        report("saving language data for %s",mnemonic)
434        if not comment or comment == "" then comment = "% no comment" end
435        if not type(destination) == "string" then destination = "." end
436
437        local compression = environment.arguments.compress and "zlib" or nil
438
439        local lines = string.splitlines(comment)
440        for i=1,#lines do
441            if not find(lines[i],"^%%") then
442                lines[i] = "% " .. lines[i]
443            end
444        end
445
446        local metadata = {
447         -- texcomment = comment,
448            texcomment = concat(lines,"\n"),
449            source     = name,
450            mnemonic   = mnemonic,
451        }
452
453        local patterndata, hyphenationdata
454        if nofpatternsnew > 0 then
455            local data = concat(patternsnew," ")
456            patterndata = {
457                n              = nofpatternsnew,
458                compression    = compression,
459                length         = #data,
460                data           = compression and zlib.compress(data,9) or data,
461                characters     = concat(table.sortedkeys(pusednew),""),
462                lefthyphenmin  = lefthyphenmin  or 1, -- determined by pattern author, sometimes set
463                righthyphenmin = righthyphenmin or 1,
464            }
465        else
466            patterndata = {
467                n = 0,
468            }
469        end
470        if nofhyphenationsnew > 0 then
471            local data = concat(hyphenationsnew," ")
472            hyphenationdata = {
473                n           = nofhyphenationsnew,
474                compression = compression,
475                length      = #data,
476                data        = compression and zlib.compress(data,9) or data,
477                characters  = concat(table.sortedkeys(husednew),""),
478            }
479        else
480            hyphenationdata = {
481                n = 0,
482            }
483        end
484        local data = {
485            -- a prelude to language goodies, like we have font goodies and in
486            -- mkiv we can use this file directly
487            version    = "1.001",
488            comment    = topline,
489            metadata   = metadata,
490            patterns   = patterndata,
491            exceptions = hyphenationdata,
492        }
493
494        os.remove(rmefile)
495        os.remove(patfile)
496        os.remove(hypfile)
497        os.remove(luafile)
498
499        io.savedata(rmefile,format("%s\n\n%s",topline,comment))
500        io.savedata(patfile,format("%s\n\n%s\n\n%% used: %s\n\n\\patterns{\n%s}",topline,banner,puold,concat(patternsold,"\n")))
501        io.savedata(hypfile,format("%s\n\n%s\n\n%% used: %s\n\n\\hyphenation{\n%s}",topline,banner,huold,concat(hyphenationsold,"\n")))
502        io.savedata(luafile,table.serialize(data,true))
503    end
504end
505
506function scripts.patterns.prepare()
507    --
508    dofile(resolvers.findfile("char-def.lua"))
509    --
510    local specification = environment.argument("specification")
511    if specification then
512        local components = utilities.parsers.settings_to_array(specification)
513        if #components == 3 then
514            table.insert(scripts.patterns.list,1,components)
515            report("specification added: %s %s %s",table.unpack(components))
516        else
517            report('invalid specification: %q, "xx,lang-yy,zzzz" expected',specification)
518        end
519    end
520end
521
522function scripts.patterns.check()
523    local path = environment.argument("path") or "."
524    local files = environment.files
525    local only  = false
526    if #files > 0 then
527        only = table.tohash(files)
528    end
529    for k, v in next, scripts.patterns.list do
530        local mnemonic = v.mnemonic
531        local name     = v.name
532        local comment  = v.comment
533        local ignored  = v.ignored
534        local merged   = v.merged
535        if not only or only[mnemonic] then
536            report("checking language %s, file %s", mnemonic, name)
537            local okay = scripts.patterns.load(path,name,mnemonic,ignored,merged)
538            if not okay then
539                report("there are errors that need to be fixed")
540            end
541            report()
542        end
543    end
544end
545
546function scripts.patterns.convert()
547    local path = environment.argument("path") or "."
548    if path == "" then
549        report("provide sourcepath using --path ")
550    else
551        local destination = environment.argument("destination") or "."
552        if path == destination then
553            report("source path and destination path should differ (use --path and/or --destination)")
554        else
555            local files = environment.files
556            local only  = false
557            if #files > 0 then
558                only = table.tohash(files)
559            end
560            for k, v in next, scripts.patterns.list do
561                local mnemonic = v.mnemonic
562                local name     = v.name
563                local comment  = v.comment
564                local ignored  = v.ignored
565                local merged   = v.merged
566                if not only or only[mnemonic] then
567                    report("converting language %s, file %s", mnemonic, name)
568                    local okay, patternsnew, hyphenationsnew, patternsold, hyphenationsold, comment, stripped,
569                        pusednew, husednew, pusedold, husedold = scripts.patterns.load(path,name,mnemonic,ignored,merged)
570                    if okay then
571                        scripts.patterns.save(
572                            destination,mnemonic,name,
573                            patternsnew,hyphenationsnew,patternsold,hyphenationsold,
574                            comment,stripped,
575                            pusednew,husednew,pusedold,husedold,ignored,
576                            v.lefthyphenmin,v.righthyphenmin
577                        )
578                    else
579                        report("convertion aborted due to error(s)")
580                    end
581                    report()
582                end
583            end
584        end
585    end
586end
587
588local function valid(filename)
589    local specification = table.load(filename)
590    if not specification then
591        return false
592    end
593    local lists = specification.lists
594    if not lists then
595        return false
596    end
597    return specification, lists
598end
599
600function scripts.patterns.words()
601    if environment.arguments.update then
602        local compress = environment.arguments.compress
603        for i=1,#environment.files do
604            local filename = environment.files[i]
605            local fullname = resolvers.findfile(filename)
606            if fullname and fullname ~= "" then
607                report("checking file %a",fullname)
608                local specification, lists = valid(fullname)
609                if specification and #lists> 0 then
610                    report("updating %a of language %a",filename,specification.language)
611                    for i=1,#lists do
612                        local entry = lists[i]
613                        local filename = entry.filename
614                        if filename then
615                            local fullname = resolvers.findfile(filename)
616                            if fullname then
617                                report("adding words from %a",fullname)
618                                local data = io.loaddata(fullname) or ""
619                                data = string.strip(data)
620                                data = string.gsub(data,"%s+"," ")
621                                if compress then
622                                    entry.data        = zlib.compress(data,9)
623                                    entry.compression = "zlib"
624                                    entry.length      = #data
625                                else
626                                    entry.data        = data
627                                    entry.compression = nil
628                                    entry.length      = #data
629                                end
630                            else
631                                entry.data        = ""
632                                entry.compression = nil
633                                entry.length      = 0
634                            end
635                        else
636                            entry.data        = ""
637                            entry.compression = nil
638                            entry.length      = 0
639                        end
640                    end
641                    specification.version   = "1.00"
642                    specification.timestamp =  os.localtime()
643                    report("updated file %a is saved",filename)
644                    table.save(filename,specification)
645                else
646                    report("no file %a",filename)
647                end
648            else
649                report("nothing done")
650            end
651        end
652    else
653        report("provide --update")
654    end
655end
656
657-- mtxrun --script patterns --hyphenate --language=nl nogalwiedes --left=3
658--
659-- hyphenator      |
660-- hyphenator      | . n o g a l w i e d e s .         . n o g a l w i e d e s .
661-- hyphenator      | .0n4                               0 4 0 0 0 0 0 0 0 0 0 0
662-- hyphenator      |    0o0g0a4l0                       0 4 0 0 4 0 0 0 0 0 0 0
663-- hyphenator      |      1g0a0                         0 4 1 0 4 0 0 0 0 0 0 0
664-- hyphenator      |          0l1w0                     0 4 1 0 4 1 0 0 0 0 0 0
665-- hyphenator      |              4i0e0                 0 4 1 0 4 1 4 0 0 0 0 0
666-- hyphenator      |              0i0e3d0e0             0 4 1 0 4 1 4 0 3 0 0 0
667-- hyphenator      |                0e1d0               0 4 1 0 4 1 4 0 3 0 0 0
668-- hyphenator      |                  1d0e0             0 4 1 0 4 1 4 0 3 0 0 0
669-- hyphenator      |                  0d0e2s0           0 4 1 0 4 1 4 0 3 0 2 0
670-- hyphenator      |                      4s0.          0 4 1 0 4 1 4 0 3 0 4 0
671-- hyphenator      | .0n4o1g0a4l1w4i0e3d0e4s0.         . n o-g a l-w i e-d e s .
672-- hyphenator      |
673-- mtx-patterns    | nl 3 3 : nogalwiedes : nogal-wie-des
674
675function scripts.patterns.hyphenate()
676    require("lang-hyp")
677    local traditional   = languages.hyphenators.traditional
678    local language      = environment.arguments.language or "us"
679    local dictionary    = traditional.loadpatterns(language)
680    local left          = tonumber(environment.arguments.left)  or dictionary.lefthyphenmin  or 3
681    local right         = tonumber(environment.arguments.right) or dictionary.righthyphenmin or 3
682    local words         = environment.files
683    local specification = {
684        leftcharmin     = left,
685        rightcharmin    = right,
686        leftchar        = false,
687        rightchar       = false,
688    }
689    trackers.enable("hyphenator.steps")
690    for i=1,#words do
691        local word = words[i]
692        report("%s %s %s : %s : %s",
693            language, left, right,
694            word,
695            traditional.injecthyphens(dictionary,word,specification)
696        )
697    end
698end
699
700if environment.argument("check") then
701    scripts.patterns.prepare()
702    scripts.patterns.check()
703elseif environment.argument("convert") then
704    scripts.patterns.prepare()
705    scripts.patterns.convert()
706elseif environment.argument("words") then
707    scripts.patterns.words() -- for the moment here
708elseif environment.argument("hyphenate") then
709    scripts.patterns.hyphenate() -- for the moment here
710elseif environment.argument("exporthelp") then
711    application.export(environment.argument("exporthelp"),environment.files[1])
712else
713    application.help()
714end
715
716-- mtxrun --script pattern --check   hyph-*.tex
717-- mtxrun --script pattern --check   --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns
718-- mtxrun --script pattern --convert --path=c:/data/develop/svn-hyphen/trunk/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
719-- mtxrun --script pattern --convert --path=c:/data/repositories/tex-hyphen/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=e:/tmp/patterns
720--
721-- use this call:
722--
723-- setlocal
724--
725-- rem tugsvn checkout:
726--
727-- set patternsroot=c:/data/develop/svn-hyphen/trunk
728--
729-- rem github checkout:
730--
731-- set patternsroot=c:/data/repositories/tex-hyphen
732--
733-- del /q c:\data\develop\tex-context\tex\texmf-local\tex\context\patterns\*
734-- del /q c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns\*
735-- del /q c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns\*
736--
737-- mtxrun --script pattern --convert --path=%patternsroot%/hyph-utf8/tex/generic/hyph-utf8/patterns/txt --destination=c:/data/develop/tmp/patterns
738--
739-- copy /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
740-- copy /Y lang*.pat c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
741-- copy /Y lang*.rme c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
742-- copy /Y lang*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
743--
744-- move /Y lang*.hyp c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
745-- move /Y lang*.pat c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
746-- move /Y lang*.rme c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
747-- move /Y lang*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
748--
749-- mtxrun --script pattern --words --update word-th.lua --compress
750--
751-- copy /Y word*.lua c:\data\develop\tex-context\tex\texmf-context\tex\context\patterns
752-- move /Y word*.lua c:\data\develop\tex-context\tex\texmf-mine\tex\context\patterns
753--
754-- mtxrun --generate
755--
756-- endlocal
757