mtx-chars.lua /size: 12 Kb    last modification: 2020-07-01 14:35
1if not modules then modules = { } end modules ['mtx-chars'] = {
2    version   = 1.001,
3    comment   = "companion to mtxrun.lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files"
7}
8
9-- obsolete: --stix                convert stix table to math table
10
11local helpinfo = [[
12<?xml version="1.0"?>
13<application>
14 <metadata>
15  <entry name="name">mtx-chars</entry>
16  <entry name="detail">MkII Character Table Generators</entry>
17  <entry name="version">0.10</entry>
18 </metadata>
19 <flags>
20  <category name="basic">
21   <subcategory>
22    <flag name="xtx"><short>generate xetx-*.tex (used by xetex)</short></flag>
23    <flag name="pdf"><short>generate pdfr-def.tex (used by pdftex)</short></flag>
24    <flag name="entities"><short>generate entities table</short></flag>
25   </subcategory>
26  </category>
27 </flags>
28</application>
29]]
30
31local application = logs.application {
32    name     = "mtx-chars",
33    banner   = "MkII Character Table Generators 0.10",
34    helpinfo = helpinfo,
35}
36
37local report = application.report
38
39local format, gmatch, upper, lower, find = string.format, string.gmatch, string.upper, string.lower, string.find
40local formatters = string.formatters
41local tonumber, type = tonumber, type
42local concat = table.concat
43local utfchar = utf.char
44
45scripts       = scripts       or { }
46scripts.chars = scripts.chars or { }
47
48function scripts.chars.stixtomkiv(inname,outname)
49    report("we no longer use this options but use our own tables instead")
50end
51
52local banner_pdf_1 = [[
53% filename : pdfr-def.tex
54% comment  : generated by mtxrun --script chars --pdf
55% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
56% copyright: PRAGMA ADE / ConTeXt Development Team
57% license  : see context related readme files
58%
59]]
60
61local banner_pdf_2 = [[
62%
63\endinput
64]]
65
66local f_tounicode   = formatters['\\pdfglyphtounicode{%s}{%04X}%%\n']
67local f_case        = formatters['\\setXTXcharcodes "%05X "%05X "%05X %% %s\n']
68local f_range       = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n']
69local f_classes     = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n']
70local f_charclass_a = formatters['\\defineXTXcharinjectionclass[lb:%s]\n']
71local f_charclass_b = formatters['\\dosetXTXcharacterclass{"%05X}{lb:%s}\n']
72local f_charclass_c = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n']
73local f_hex         = formatters['%s %05X"']
74local f_unicode     = formatters['U+%05X']
75local f_entity      = formatters['    ["%s"] = %q, -- %s']
76
77function scripts.chars.makepdfr()
78    local chartable = resolvers.findfile("char-def.lua") or ""
79    if chartable ~= "" then
80        dofile(chartable)
81        if characters and characters.data then
82            local f = io.open("pdfr-def.tex", 'w')
83            if f then
84                f:write(banner_pdf_1)
85                local cd = characters.data
86                local sd = table.sortedkeys(cd)
87                for i=1,#sd do
88                    local char = cd[sd[i]]
89                    if char.adobename then
90                        f:write(f_tounicode(char.adobename,char.unicodeslot))
91                    end
92                end
93                f:write(banner_pdf_2)
94                f:close()
95            end
96        end
97    end
98end
99
100local banner_utf_module = formatters [ [[
101%% filename : %s
102%% comment  : generated by mtxrun --script chars --xtx
103%% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
104%% copyright: PRAGMA ADE / ConTeXt Development Team
105%% license  : see context related readme files
106]] ]
107
108local banner_utf_mappings = [[
109
110% lc/uc/catcode mappings
111
112]]
113
114local banner_utf_patch = [[
115
116% patch needed for turkish
117
118\setXTXcharcodes "201C "201C "201C
119\setXTXcharcodes "201D "201D "201D
120
121% patch needed for french
122
123\setXTXcharcodes "2019 "2019 "2019
124
125]]
126
127local banner_utf_names = [[
128
129% named characters mapped onto utf (\\char is needed for accents)
130
131]]
132
133local banner_utf_classes = [[
134
135% some character classes for xetex; seems to be rather hard coded, these numbers
136% and also a mix of several classes; here we do linebreaks
137
138]]
139
140local banner_utf_finish = [[
141
142\endinput
143]]
144
145local xtxclasses = {
146    id =   1,
147    ex =   3,
148    is =   3,
149    cm = 256,
150    op =   2,
151    ns =   3,
152    cl =   3,
153}
154
155function scripts.chars.makeencoutf()
156    local chartable = resolvers.findfile("char-def.lua") or ""
157    if chartable ~= "" then
158        dofile(chartable)
159        local function open(name,banner)
160            local f = io.open(name,'w')
161            if f then
162                report("writing '%s'",name)
163                f:write(banner_utf_module(name))
164                f:write(banner)
165                f:write()
166                return f
167            end
168        end
169        local function close(f)
170            f:write(banner_utf_finish)
171            f:close()
172        end
173        local data = characters and characters.data
174        if data then
175            local list = table.sortedkeys(characters.data)
176            local f = open("xetx-utf.mkii",banner_utf_mappings)
177            if f then
178                for i=1,#list do
179                    local code = list[i]
180                    if code <= 0xFFFF then
181                        local chr = data[code]
182                        local cc = chr.category
183                        if cc == 'll' or cc == 'lu' or cc == 'lt' then
184                            local lccode = chr.lccode or code
185                            local uccode = chr.uccode or code
186                            if type(lccode) == "table" then
187                                lccode = code
188                            end
189                            if type(uccode) == "table" then
190                                uccode = code
191                            end
192                            f:write(f_case(code,lccode,uccode,chr.description))
193                        end
194                    end
195                end
196                f:write("\n")
197                for i=1,#list do
198                    local code = list[i]
199                    local chr = data[code]
200                    if chr and chr.range then
201                        local cc = chr.category
202                        if cc == 'lo' then
203                            f:write(f_range(code,chr.range))
204                        end
205                    end
206                end
207                f:write(banner_utf_patch)
208                close(f)
209            end
210            local f = open("xetx-chr.mkii",banner_utf_names)
211            if f then
212                local length = 0
213                for i=1,#list do
214                    local code = list[i]
215                    if code > 0x5B and code <= 0xFFFF then
216                        local chr = data[code]
217                        if chr then
218                            local l = #(chr.contextname or "")
219                            if l > length then
220                                length = l
221                            end
222                        end
223                    end
224                end
225                local f_def = formatters["\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n"]
226                for i=1,#list do
227                    local code = list[i]
228                    if code > 0x5B and code <= 0xFFFF then
229                        local chr = data[code]
230                        if chr then
231                            local contextname = chr.contextname
232                            if contextname and not find(contextname,"space$") then
233                                local ch = utfchar(code)
234                                f:write(f_def(contextname, code, chr.description, ch))
235                            end
236                        end
237                    end
238                end
239                close(f)
240            end
241            local f = open("xetx-cls.mkii",banner_utf_classes)
242            if f then
243                for k, v in next, xtxclasses do
244                    f:write(f_charclass_a(k))
245                end
246                f:write("\n")
247                local i_first, i_last, i_clb = nil, nil, nil
248                local function flush()
249                    if i_first then
250                        if i_first == i_last then
251                            f:write(f_charclass_b(i_first,i_clb))
252                        else
253                            f:write(f_charclass_c(i_first,i_last,i_clb))
254                        end
255                    end
256                    i_first, i_last, i_clb = nil, nil, nil
257                end
258                for i=1,#list do
259                    local code      = list[i]
260                    local code_next = list[i+1]
261                    local chr       = data[code]
262                    local chr_next  = data[code_next]
263                    local clb       = chr and chr.linebreak
264                    local lbc       = xtxclasses[clb]
265                    if not lbc then
266                        flush()
267                    elseif clb == i_clb then
268                        if i_first then
269                            i_last = code
270                        else
271                            i_first, i_last, i_clb = code, code, clb
272                        end
273                    else
274                        flush()
275                        i_first, i_last, i_clb = code, code, clb
276                    end
277                end
278                flush()
279                f:write("\n")
280                for i=1,#list do
281                    local code = list[i]
282                    local chr = data[code]
283                    if chr and chr.range then
284                        local lbc = chr.linebreak
285                        if xtxclasses[lbc] then
286                            f:write(f_classes(code,chr.range,lbc))
287                        end
288                    end
289                end
290                close(f)
291            end
292        end
293    end
294end
295
296local entityfiles = {
297    "http://www.w3.org/2003/entities/2007/w3centities-f.ent",
298    "http://www.w3.org/2003/entities/2007/htmlmathml-f.ent",
299}
300
301function scripts.chars.xmlentities()
302    local done = { }
303    local entities = { "local entities = utilities.storage.allocate {" }
304    for i=1,#entityfiles do
305        local f = entityfiles[i]
306        local s = url.hashed(f)
307        local b = file.basename(s.path)
308        local n = resolvers.findfile(b)
309        local data = io.loaddata(n)
310        for name, value in gmatch(data,'<!ENTITY +(%S+) +"(.-)" *>') do
311            if not done[name] then
312                done[name] = true
313                local str, hex
314                local low = lower(name)
315                if name == "newline" then
316                    -- let's forget about that one
317                elseif name == "lt" then
318                    str, hex = "<", f_hex(hex,c)
319                elseif name == "gt" then
320                    str, hex = ">", f_hex(hex,c)
321                elseif name == "amp" then
322                    str, hex = "&", f_hex(hex,c)
323                else
324                    for t, c in gmatch(value,"&#([x]*)([^;]+);") do
325                        if t == "x" then
326                            c = tonumber(c,16)
327                        else
328                            c = tonumber(c)
329                        end
330                        if str then
331                            str, hex = str .. utfchar(c), f_hex(hex,c)
332                        else
333                            str, hex = utfchar(c), f_unicode(c)
334                        end
335                    end
336                end
337                if str and hex then
338                    entities[#entities+1] = f_entity(name,str,hex)
339                end
340            end
341        end
342    end
343    entities[#entities+1] = "}"
344    io.savedata("xmlentities.tmp",concat(entities,"\n"))
345end
346
347if environment.argument("stix") then
348    local inname  = environment.files[1] or ""
349    local outname = environment.files[2] or ""
350    scripts.chars.stixtomkiv(inname,outname)
351elseif environment.argument("entities") then
352    scripts.chars.xmlentities()
353elseif environment.argument("xtx") then
354    scripts.chars.makeencoutf()
355elseif environment.argument("pdf") then
356    scripts.chars.makepdfr()
357elseif environment.argument("exporthelp") then
358    application.export(environment.argument("exporthelp"),environment.files[1])
359else
360    application.help()
361end
362