mtx-chars.lua / last modification: 2020-01-30 14:16
if not modules then modules = { } end modules ['mtx-chars'] = {
    version   = 1.001,
    comment   = "companion to mtxrun.lua",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- obsolete: --stix                convert stix table to math table

local helpinfo = [[
<?xml version="1.0"?>
<application>
 <metadata>
  <entry name="name">mtx-chars</entry>
  <entry name="detail">MkII Character Table Generators</entry>
  <entry name="version">0.10</entry>
 </metadata>
 <flags>
  <category name="basic">
   <subcategory>
    <flag name="xtx"><short>generate xetx-*.tex (used by xetex)</short></flag>
    <flag name="pdf"><short>generate pdfr-def.tex (used by pdftex)</short></flag>
    <flag name="entities"><short>generate entities table</short></flag>
   </subcategory>
  </category>
 </flags>
</application>
]]

local application = logs.application {
    name     = "mtx-chars",
    banner   = "MkII Character Table Generators 0.10",
    helpinfo = helpinfo,
}

local report = application.report

local format, gmatch, upper, lower, find = string.format, string.gmatch, string.upper, string.lower, string.find
local formatters = string.formatters
local tonumber, type = tonumber, type
local concat = table.concat
local utfchar = utf.char

scripts       = scripts       or { }
scripts.chars = scripts.chars or { }

function scripts.chars.stixtomkiv(inname,outname)
    report("we no longer use this options but use our own tables instead")
end

local banner_pdf_1 = [[
% filename : pdfr-def.tex
% comment  : generated by mtxrun --script chars --pdf
% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
% copyright: PRAGMA ADE / ConTeXt Development Team
% license  : see context related readme files
%
]]

local banner_pdf_2 = [[
%
\endinput
]]

local f_tounicode   = formatters['\\pdfglyphtounicode{%s}{%04X}%%\n']
local f_case        = formatters['\\setXTXcharcodes "%05X "%05X "%05X %% %s\n']
local f_range       = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharcodes\\recurselevel\\recurselevel\\recurselevel}\n']
local f_classes     = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n']
local f_charclass_a = formatters['\\defineXTXcharinjectionclass[lb:%s]\n']
local f_charclass_b = formatters['\\dosetXTXcharacterclass{"%05X}{lb:%s}\n']
local f_charclass_c = formatters['\\dofastrecurse{"%05X}{"%05X}{1}{\\dosetXTXcharacterclass\\fastrecursecounter{lb:%s}}\n']
local f_hex         = formatters['%s %05X"']
local f_unicode     = formatters['U+%05X']
local f_entity      = formatters['    ["%s"] = %q, -- %s']

function scripts.chars.makepdfr()
    local chartable = resolvers.findfile("char-def.lua") or ""
    if chartable ~= "" then
        dofile(chartable)
        if characters and characters.data then
            local f = io.open("pdfr-def.tex", 'w')
            if f then
                f:write(banner_pdf_1)
                local cd = characters.data
                local sd = table.sortedkeys(cd)
                for i=1,#sd do
                    local char = cd[sd[i]]
                    if char.adobename then
                        f:write(f_tounicode(char.adobename,char.unicodeslot))
                    end
                end
                f:write(banner_pdf_2)
                f:close()
            end
        end
    end
end

local banner_utf_module = formatters [ [[
%% filename : %s
%% comment  : generated by mtxrun --script chars --xtx
%% author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
%% copyright: PRAGMA ADE / ConTeXt Development Team
%% license  : see context related readme files
]] ]

local banner_utf_mappings = [[

% lc/uc/catcode mappings

]]

local banner_utf_patch = [[

% patch needed for turkish

\setXTXcharcodes "201C "201C "201C
\setXTXcharcodes "201D "201D "201D

% patch needed for french

\setXTXcharcodes "2019 "2019 "2019

]]

local banner_utf_names = [[

% named characters mapped onto utf (\\char is needed for accents)

]]

local banner_utf_classes = [[

% some character classes for xetex; seems to be rather hard coded, these numbers
% and also a mix of several classes; here we do linebreaks

]]

local banner_utf_finish = [[

\endinput
]]

local xtxclasses = {
    id =   1,
    ex =   3,
    is =   3,
    cm = 256,
    op =   2,
    ns =   3,
    cl =   3,
}

function scripts.chars.makeencoutf()
    local chartable = resolvers.findfile("char-def.lua") or ""
    if chartable ~= "" then
        dofile(chartable)
        local function open(name,banner)
            local f = io.open(name,'w')
            if f then
                report("writing '%s'",name)
                f:write(banner_utf_module(name))
                f:write(banner)
                f:write()
                return f
            end
        end
        local function close(f)
            f:write(banner_utf_finish)
            f:close()
        end
        local data = characters and characters.data
        if data then
            local list = table.sortedkeys(characters.data)
            local f = open("xetx-utf.mkii",banner_utf_mappings)
            if f then
                for i=1,#list do
                    local code = list[i]
                    if code <= 0xFFFF then
                        local chr = data[code]
                        local cc = chr.category
                        if cc == 'll' or cc == 'lu' or cc == 'lt' then
                            local lccode = chr.lccode or code
                            local uccode = chr.uccode or code
                            if type(lccode) == "table" then
                                lccode = code
                            end
                            if type(uccode) == "table" then
                                uccode = code
                            end
                            f:write(f_case(code,lccode,uccode,chr.description))
                        end
                    end
                end
                f:write("\n")
                for i=1,#list do
                    local code = list[i]
                    local chr = data[code]
                    if chr and chr.range then
                        local cc = chr.category
                        if cc == 'lo' then
                            f:write(f_range(code,chr.range))
                        end
                    end
                end
                f:write(banner_utf_patch)
                close(f)
            end
            local f = open("xetx-chr.mkii",banner_utf_names)
            if f then
                local length = 0
                for i=1,#list do
                    local code = list[i]
                    if code > 0x5B and code <= 0xFFFF then
                        local chr = data[code]
                        if chr then
                            local l = #(chr.contextname or "")
                            if l > length then
                                length = l
                            end
                        end
                    end
                end
                local f_def = formatters["\\def\\%-".. length .. "s{\\char\"%05X } %% %s: %s\n"]
                for i=1,#list do
                    local code = list[i]
                    if code > 0x5B and code <= 0xFFFF then
                        local chr = data[code]
                        if chr then
                            local contextname = chr.contextname
                            if contextname and not find(contextname,"space$") then
                                local ch = utfchar(code)
                                f:write(f_def(contextname, code, chr.description, ch))
                            end
                        end
                    end
                end
                close(f)
            end
            local f = open("xetx-cls.mkii",banner_utf_classes)
            if f then
                for k, v in next, xtxclasses do
                    f:write(f_charclass_a(k))
                end
                f:write("\n")
                local i_first, i_last, i_clb = nil, nil, nil
                local function flush()
                    if i_first then
                        if i_first == i_last then
                            f:write(f_charclass_b(i_first,i_clb))
                        else
                            f:write(f_charclass_c(i_first,i_last,i_clb))
                        end
                    end
                    i_first, i_last, i_clb = nil, nil, nil
                end
                for i=1,#list do
                    local code      = list[i]
                    local code_next = list[i+1]
                    local chr       = data[code]
                    local chr_next  = data[code_next]
                    local clb       = chr and chr.linebreak
                    local lbc       = xtxclasses[clb]
                    if not lbc then
                        flush()
                    elseif clb == i_clb then
                        if i_first then
                            i_last = code
                        else
                            i_first, i_last, i_clb = code, code, clb
                        end
                    else
                        flush()
                        i_first, i_last, i_clb = code, code, clb
                    end
                end
                flush()
                f:write("\n")
                for i=1,#list do
                    local code = list[i]
                    local chr = data[code]
                    if chr and chr.range then
                        local lbc = chr.linebreak
                        if xtxclasses[lbc] then
                            f:write(f_classes(code,chr.range,lbc))
                        end
                    end
                end
                close(f)
            end
        end
    end
end

local entityfiles = {
    "http://www.w3.org/2003/entities/2007/w3centities-f.ent",
    "http://www.w3.org/2003/entities/2007/htmlmathml-f.ent",
}

function scripts.chars.xmlentities()
    local done = { }
    local entities = { "local entities = utilities.storage.allocate {" }
    for i=1,#entityfiles do
        local f = entityfiles[i]
        local s = url.hashed(f)
        local b = file.basename(s.path)
        local n = resolvers.findfile(b)
        local data = io.loaddata(n)
        for name, value in gmatch(data,'<!ENTITY +(%S+) +"(.-)" *>') do
            if not done[name] then
                done[name] = true
                local str, hex
                local low = lower(name)
                if name == "newline" then
                    -- let's forget about that one
                elseif name == "lt" then
                    str, hex = "<", f_hex(hex,c)
                elseif name == "gt" then
                    str, hex = ">", f_hex(hex,c)
                elseif name == "amp" then
                    str, hex = "&", f_hex(hex,c)
                else
                    for t, c in gmatch(value,"&#([x]*)([^;]+);") do
                        if t == "x" then
                            c = tonumber(c,16)
                        else
                            c = tonumber(c)
                        end
                        if str then
                            str, hex = str .. utfchar(c), f_hex(hex,c)
                        else
                            str, hex = utfchar(c), f_unicode(c)
                        end
                    end
                end
                if str and hex then
                    entities[#entities+1] = f_entity(name,str,hex)
                end
            end
        end
    end
    entities[#entities+1] = "}"
    io.savedata("xmlentities.tmp",concat(entities,"\n"))
end

if environment.argument("stix") then
    local inname  = environment.files[1] or ""
    local outname = environment.files[2] or ""
    scripts.chars.stixtomkiv(inname,outname)
elseif environment.argument("entities") then
    scripts.chars.xmlentities()
elseif environment.argument("xtx") then
    scripts.chars.makeencoutf()
elseif environment.argument("pdf") then
    scripts.chars.makepdfr()
elseif environment.argument("exporthelp") then
    application.export(environment.argument("exporthelp"),environment.files[1])
else
    application.help()
end