if not modules then modules = { } end modules ['mtx-chars'] = { version = 1.001, comment = "companion to mtxrun.lua", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- obsolete: --stix convert stix table to math table local helpinfo = [[ mtx-chars MkII Character Table Generators 0.10 generate xetx-*.tex (used by xetex) generate pdfr-def.tex (used by pdftex) generate entities table ]] local application = logs.application { name = "mtx-chars", banner = "MkII Character Table Generators 0.10", helpinfo = helpinfo, } local report = application.report local format, gmatch, upper, lower, find = string.format, string.gmatch, string.upper, string.lower, string.find local formatters = string.formatters local tonumber, type = tonumber, type local concat = table.concat local utfchar = utf.char scripts = scripts or { } scripts.chars = scripts.chars or { } function scripts.chars.stixtomkiv(inname,outname) report("we no longer use this options but use our own tables instead") end local banner_pdf_1 = [[ % filename : pdfr-def.tex % comment : generated by mtxrun --script chars --pdf % author : Hans Hagen, PRAGMA-ADE, Hasselt NL % copyright: PRAGMA ADE / ConTeXt Development Team % license : see context related readme files % ]] local banner_pdf_2 = [[ % \endinput ]] local f_tounicode = formatters['\\pdfglyphtounicode{%s}{%04X}%%\n'] local f_case = formatters['\\setXTXcharcodes "%05X "%05X "%05X %% %s\n'] local f_range = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n'] local f_classes = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n'] local f_charclass_a = formatters['\\defineXTXcharinjectionclass[lb:%s]\n'] local f_charclass_b = formatters['\\dosetXTXcharacterclass{"%05X}{lb:%s}\n'] local f_charclass_c = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n'] local f_hex = formatters['%s %05X"'] local f_unicode = formatters['U+%05X'] local f_entity = formatters[' ["%s"] = %q, -- %s'] function scripts.chars.makepdfr() local chartable = resolvers.findfile("char-def.lua") or "" if chartable ~= "" then dofile(chartable) if characters and characters.data then local f = io.open("pdfr-def.tex", 'w') if f then f:write(banner_pdf_1) local cd = characters.data local sd = table.sortedkeys(cd) for i=1,#sd do local char = cd[sd[i]] if char.adobename then f:write(f_tounicode(char.adobename,char.unicodeslot)) end end f:write(banner_pdf_2) f:close() end end end end local banner_utf_module = formatters [ [[ %% filename : %s %% comment : generated by mtxrun --script chars --xtx %% author : Hans Hagen, PRAGMA-ADE, Hasselt NL %% copyright: PRAGMA ADE / ConTeXt Development Team %% license : see context related readme files ]] ] local banner_utf_mappings = [[ % lc/uc/catcode mappings ]] local banner_utf_patch = [[ % patch needed for turkish \setXTXcharcodes "201C "201C "201C \setXTXcharcodes "201D "201D "201D % patch needed for french \setXTXcharcodes "2019 "2019 "2019 ]] local banner_utf_names = [[ % named characters mapped onto utf (\\char is needed for accents) ]] local banner_utf_classes = [[ % some character classes for xetex; seems to be rather hard coded, these numbers % and also a mix of several classes; here we do linebreaks ]] local banner_utf_finish = [[ \endinput ]] local xtxclasses = { id = 1, ex = 3, is = 3, cm = 256, op = 2, ns = 3, cl = 3, } function scripts.chars.makeencoutf() local chartable = resolvers.findfile("char-def.lua") or "" if chartable ~= "" then dofile(chartable) local function open(name,banner) local f = io.open(name,'w') if f then report("writing '%s'",name) f:write(banner_utf_module(name)) f:write(banner) f:write() return f end end local function close(f) f:write(banner_utf_finish) f:close() end local data = characters and characters.data if data then local list = table.sortedkeys(characters.data) local f = open("xetx-utf.mkii",banner_utf_mappings) if f then for i=1,#list do local code = list[i] if code <= 0xFFFF then local chr = data[code] local cc = chr.category if cc == 'll' or cc == 'lu' or cc == 'lt' then local lccode = chr.lccode or code local uccode = chr.uccode or code if type(lccode) == "table" then lccode = code end if type(uccode) == "table" then uccode = code end f:write(f_case(code,lccode,uccode,chr.description)) end end end f:write("\n") for i=1,#list do local code = list[i] local chr = data[code] if chr and chr.range then local cc = chr.category if cc == 'lo' then f:write(f_range(code,chr.range)) end end end f:write(banner_utf_patch) close(f) end local f = open("xetx-chr.mkii",banner_utf_names) if f then local length = 0 for i=1,#list do local code = list[i] if code > 0x5B and code <= 0xFFFF then local chr = data[code] if chr then local l = #(chr.contextname or "") if l > length then length = l end end end end local f_def = formatters["\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n"] for i=1,#list do local code = list[i] if code > 0x5B and code <= 0xFFFF then local chr = data[code] if chr then local contextname = chr.contextname if contextname and not find(contextname,"space$") then local ch = utfchar(code) f:write(f_def(contextname, code, chr.description, ch)) end end end end close(f) end local f = open("xetx-cls.mkii",banner_utf_classes) if f then for k, v in next, xtxclasses do f:write(f_charclass_a(k)) end f:write("\n") local i_first, i_last, i_clb = nil, nil, nil local function flush() if i_first then if i_first == i_last then f:write(f_charclass_b(i_first,i_clb)) else f:write(f_charclass_c(i_first,i_last,i_clb)) end end i_first, i_last, i_clb = nil, nil, nil end for i=1,#list do local code = list[i] local code_next = list[i+1] local chr = data[code] local chr_next = data[code_next] local clb = chr and chr.linebreak local lbc = xtxclasses[clb] if not lbc then flush() elseif clb == i_clb then if i_first then i_last = code else i_first, i_last, i_clb = code, code, clb end else flush() i_first, i_last, i_clb = code, code, clb end end flush() f:write("\n") for i=1,#list do local code = list[i] local chr = data[code] if chr and chr.range then local lbc = chr.linebreak if xtxclasses[lbc] then f:write(f_classes(code,chr.range,lbc)) end end end close(f) end end end end local entityfiles = { "http://www.w3.org/2003/entities/2007/w3centities-f.ent", "http://www.w3.org/2003/entities/2007/htmlmathml-f.ent", } function scripts.chars.xmlentities() local done = { } local entities = { "local entities = utilities.storage.allocate {" } for i=1,#entityfiles do local f = entityfiles[i] local s = url.hashed(f) local b = file.basename(s.path) local n = resolvers.findfile(b) local data = io.loaddata(n) for name, value in gmatch(data,'') do if not done[name] then done[name] = true local str, hex local low = lower(name) if name == "newline" then -- let's forget about that one elseif name == "lt" then str, hex = "<", f_hex(hex,c) elseif name == "gt" then str, hex = ">", f_hex(hex,c) elseif name == "amp" then str, hex = "&", f_hex(hex,c) else for t, c in gmatch(value,"&#([x]*)([^;]+);") do if t == "x" then c = tonumber(c,16) else c = tonumber(c) end if str then str, hex = str .. utfchar(c), f_hex(hex,c) else str, hex = utfchar(c), f_unicode(c) end end end if str and hex then entities[#entities+1] = f_entity(name,str,hex) end end end end entities[#entities+1] = "}" io.savedata("xmlentities.tmp",concat(entities,"\n")) end if environment.argument("stix") then local inname = environment.files[1] or "" local outname = environment.files[2] or "" scripts.chars.stixtomkiv(inname,outname) elseif environment.argument("entities") then scripts.chars.xmlentities() elseif environment.argument("xtx") then scripts.chars.makeencoutf() elseif environment.argument("pdf") then scripts.chars.makepdfr() elseif environment.argument("exporthelp") then application.export(environment.argument("exporthelp"),environment.files[1]) else application.help() end