font-uni.mkii / last modification: 2020-01-30 14:15
%D \module
%D   [       file=font-uni,
%D        version=1999.10.10,
%D          title=\CONTEXT\ Font Macros,
%D       subtitle=\UNICODE,
%D         author=Hans Hagen,
%D           date=\currentdate,
%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
%C
%C This module is part of the \CONTEXT\ macro||package and is
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.

\writestatus{loading}{ConTeXt Font Macros / Unicode}

%D In \XETEX, unicode support is straightforward, so we
%D simply output a \type {\char} with a 16||bit number.

\ifnum\texengine=\xetexengine
    \unexpanded\def\uchar#1#2{\char\numexpr#2+#1*\pluscclvi\relax}
    \let\uc\uchar
    \expandafter \endinput
\fi

%D Now comes the more traditional 8 bit \TEX\ hackery.

%D I wrote this module when Wang Lei asked me how to use
%D Chinese in \CONTEXT. From the samples he sent me, I deduced
%D that some mixture of one and two byte encoding was used,
%D which he confirmed. Since \TEX\ normally does not use the
%D characters $>127$, so as long as the two byte characters
%D have a first character with code $>127$, we can use active
%D characters to handle them. In an optimistic mood, I called
%D this module the \UNICODE\ font module. In the module that
%D handles Chinese, we will see that some more interpretation
%D is involved, which is why the macros handling those
%D characters look ahead.

\unprotect

%D \macros
%D   {handleunicodeflowglyph, uchar,
%D    handleunicodeglyph, insertunicodeglyph,
%D    unicodeposition, unicodeone, unicodetwo}
%D
%D For the moment \UNICODE\ support is rather primitive but
%D nevertheless effective. The reference to \UNICODE\ is not
%D entirely correct, since in many cases one will use \quote
%D {older} mappings, but in principle, \UNICODE\ can be
%D supported.
%D
%D We expect each character to come as two eight bit
%D characters. Those doubles are handled by making all
%D characters in the range $>127$ active, so that they can
%D pick up the next one, and act upon both their values.
%D Internally only numbers are used. A first implementation
%D simply internally prefixed the second part of the \UNICODE\
%D pair with \type {\string} or \type {\char}, but this was
%D not that handy when it came to testing those values.
%D Because in principle we are dealing with an encoding, the
%D making active is handled in \type {enco-uni}.
%D
%D There are two commands to handle unicode characters:
%D
%D \starttyping
%D \handleunicodeflowglyph{number}{character}
%D \uchar{number}{number}
%D \stoptyping
%D
%D The first one can be assigned to an active character, the
%D second one can be used to directly access a glyph. Both
%D command call \type {\handleunicodeglyph} that in turn
%D calls \type {\insertunicodeglyph}. Both can be overruled
%D in specialized modules. The low level command \type
%D {\unicodeglyph} can best be left untouched, which is not
%D so much a problem because there is a hook into this macro:
%D \type {\unicodecharcommand}.
%D
%D In most cases one will redefine \type {\handleunicodeglyph}
%D in such a way that it identifies special situations first,
%D takes some actions next, calls \type {\insertunicodeglyph},
%D if needed with \type {\unicodecharcommand} changed, and
%D finally does some finishing:
%D
%D \starttyping
%D \def\handleunicodeglyph
%D   {take actions based on \unicodeone-two-position cq. \nextutoken
%D    redefine \unicodecharcommand if needed
%D    expand \insertunicodeglyph
%D    take some final actions}
%D \stoptyping

\newcount\unicodeposition

%D The multistep approach is needed to pick up the second
%D token, since this token can have any value and any
%D catcode.

% the \relax trick prevents eating up the space (needed for
% korean

\def\handleunicodeflowglyph#1#2%
  {\begingroup
   \edef\unicodeone{#1}%
   \@EA\afterassignment\@EA\dohandleunicodeflowglyph % two redundant ea's
     \@EA\chardef\@EA\nexttoken\@EA`\string#2\relax}

\def\dohandleunicodeflowglyph\relax
  {\futurelet\nextutoken\dodohandleunicodeflowglyph}

\def\dodohandleunicodeflowglyph % todo tex (or maybe no longer)
  {\edef\unicodetwo{\the\nexttoken}%
   \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
   \handleunicodeglyph
   \endgroup}

\unexpanded\def\uchar#1#2% use as standalone glyph
  {\begingroup
   \edef\unicodeone{#1}%
   \edef\unicodetwo{#2}%
   \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
   \handleunicodeglyph
   \endgroup}

\let\nextutoken\relax

\unexpanded\def\lookaheaduchar#1#2%
  {\def\dolookaheaduchar{\uchar{#1}{#2}\let\nextutoken\relax}%
   \futurelet\nextutoken\dolookaheaduchar}

\def\dohandleucflowglyph
  {\unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
   \handleunicodeglyph
   \endgroup}

\unexpanded\def\uc#1#2% used in tricky situations
  {\begingroup
   \edef\unicodeone{#1}%
   \edef\unicodetwo{#2}%
   \futurelet\nextutoken\dohandleucflowglyph}

\def\insertunicodeglyph
  {\unicodeglyph\unicodeone\unicodetwo}

\let\handleunicodeglyph\insertunicodeglyph

%D One can use the \type {\unicodeposition} in the macros
%D that handle pre and post material.

%D \macros
%D   {unicodestyle, unicodecharcommand}
%D
%D Each character pair will become one glyph. Because \TEX\
%D cannot handle fonts with more that 256 characters, we use
%D \TFM\ files for each range. The first character of the pair
%D is appended to the name of a font, and the second is used to
%D access the glyph in that font. This means that a particular
%D font is split up in subfonts with names in the range:
%D
%D \starttyping
%D <filename>80 ... <filename>ff
%D \stoptyping
%D
%D The \type {<filename>} as well as the composed name are
%D mapped ones. The next macros take care of this mapping.
%D Let us assume that the next mapping has taken place,
%D
%D \starttyping
%D \definefontsynonym [UnicodeRegular] [gbsong]
%D \stoptyping
%D
%D Let us also assume that we are dealing with the range \type
%D {b1}. Given that a font name results from:
%D
%D \starttyping
%D \truefontname{\truefontname{UnicodeRegular}b1}
%D \stoptyping
%D
%D we get \type {gbsongb1}. The outer \type {\truefontname}
%D takes care of additional mapping, so when we say:
%D
%D \starttyping
%D \definefontsynonym [gbsongb1] [gbsong-b1]
%D \stoptyping
%D
%D the filename used will be \type {gbsong-b1}. From the next
%D definition it will be clear that other fontshapes are also
%D supported. The prefix \type {Unicode} is mapped!
%D
%D The command \type {\unicodecharcommand} can be used to
%D handle special cases. At that moment \type {1em} is known.

\def\unicodestyle
  {\truefontname\s!Unicode\fontstylesuffix}

\let\unicodecharcommand\firstofoneargument

\unexpanded\def\unicodeglyph#1#2% watch the double mapping
  {\begingroup
   \getvalue{@@\currentucharmapping\strippedcsname\uchar}{#1}{#2}% map to a to hex font range
   \bodyfontsize\unicodescale\bodyfontsize
   % readable:
   % \doifelsefontsynonym{\unicodestyle\unicodeone}
   %   {\font\unicodefont=\truefontname{\unicodestyle\unicodeone}
   %       at \currentfontscale\bodyfontsize}
   %   {\font\unicodefont=\truefontname{\truefontname\unicodestyle\unicodeone}
   %       at \currentfontscale\bodyfontsize}%
   % unreadable but more efficient:
   \font\unicodefont=\truefontname{\doifelsefontsynonym{\unicodestyle
     \unicodeone}\empty\truefontname\unicodestyle\unicodeone}
      at \currentfontscale\bodyfontsize
   \unicodestrut % off by default
   \unicodefont\unicodecharcommand{\char\unicodetwo\relax}%
   \endgroup}

%D This handler is used by default, for instance in:
%D
%D \starttyping
%D \defineunicodefont [MySwitch] [MyFont] % [strut=no,command=\insertunicodeglyph]
%D
%D \definefontsynonym [MyFontRegular40] [Sans]
%D \definefontsynonym [MyFontBold40]    [SansBold]
%D
%D {\MySwitch     \uchar{"40}{`a}}
%D {\MySwitch \bf \uchar{"40}{`a}}
%D \stoptyping
%D
%D \starttyping
%D \definefontsynonym [MyFontRegular] [Sans]
%D \definefontsynonym [MyFontBold]    [SansBold]
%D \stoptyping
%D
%D Is also possible, but in that case the number is appended to the raw font
%D name!

%D \macros
%D   {currentucharmapping,defineucharmapping}
%D
%D A (plane,char) pair can be remapped using a uchar mapping
%D function. The default mapping is to convert the plane to a
%D lowercase hexadecimal number, and leave the number
%D untouched. The current remapping is kept in a macro.

\let\currentucharmapping\s!default

\def\defineucharmapping#1%
  {\setvalue{@@#1\strippedcsname\uchar}}

\defineucharmapping{\s!default}#1#2%
  {\edef\unicodeone{\lchexnumbers{#1}}\edef\unicodetwo{#2}}

%D An example of a remapping is the following:
%D
%D \starttyping
%D \defineucharmapping{GBK}#1#2%
%D   {\unicodeposition=#1
%D    \advance\unicodeposition -129
%D    \multiply\unicodeposition 190
%D    \advance\unicodeposition #2
%D    \advance\unicodeposition-\ifnum#2>127 65\else64\fi
%D    \dorepositionunicode}
%D \stoptyping
%D
%D This maps the GBK vector onto a compact GBK one. The
%D auxiliary macro is defined here as a goody.

\def\dorepositionunicode
  {\dosetdivision\unicodeposition{256}\scratchcounter
   \advance\scratchcounter \plusone
   \edef\unicodeone{\ifnum\scratchcounter<10 0\fi\the\scratchcounter}%
   \dosetmodulo\unicodeposition{256}\scratchcounter
   \edef\unicodetwo{\the\scratchcounter}}

%D \macros
%D   {setunicodestrut, setunicodescale, nextutoken,
%D    handleunicodeglyph, insertunicodeglyph}
%D
%D A careful analysis of the previous macros, learns that the
%D process of mapping comes down to:
%D
%D \startitemize[packed,n]
%D \item  taking care of preceding material (and spacing)
%D \item  defining the font at \type {\currentfontscale} $\times$
%D       \type {\unicodescale} $\times$ \type {\bodyfontsize}
%D \item  inserting a \type {\unicodestrut}
%D \item  inserting the character (glyph)
%D \item  executing some actions afterwards
%D \stopitemize
%D
%D The actions before and after placing the glyph, is up to
%D the user supplied handler. This handler (\type
%D {\handleunicodeglpyh}) must, at a certain moment, insert
%D the glyph using \type {\insertunicodeglyph}

\def\setunicodescale#1%
  {\def\unicodescale{#1}}

\def\dosetunicodestrut#1#2% height depth
  {\def\unicodestrut
     {\vrule
        \!!width \zeropoint
        \!!height#1\strutht
        \!!depth #2\strutdp
        \relax}}

\def\setunicodestrut#1#2% height depth
  {\ifdim#1\strutht>\zeropoint
     \dosetunicodestrut{#1}{#2}%
   \else\ifdim#1\strutdp>\zeropoint
     \dosetunicodestrut{#1}{#2}%
   \else
     \let\unicodestrut\empty
   \fi\fi}

\def\resetunicodestrut
  {\let\unicodestrut\empty}

%D The additional scaling and strut default to:

\setunicodescale{1}
\setunicodestrut{1}{1}

%D But better is not to have a strut added by default:

\resetunicodestrut

%D The actual code for the additional actions as well as
%D specific spacing is handled outside these routines. The
%D character after the two that are under treatment is
%D available in \type {\nextutoken}.

%D \macros
%D   {defineunicodefont, setupunicodefont}
%D
%D Apart from this rather low level implementation, we also
%D provide a more user friendly alternative. Given that one
%D has defined:
%D
%D \starttyping
%D \defineunicodefont
%D   [SimChi] [SimplifiedChinese]
%D   [\c!scale=0.85,
%D    \c!height=1.25,
%D    \c!depth=1.00,
%D    \c!interlinespaceinterlinie=yes,
%D    \c!conversion=\chinesenumber,
%D    \c!command=\handlechineseunicodeglyph]
%D \stoptyping
%D
%D Together with:
%D
%D \starttyping
%D \definefontsynonym [SimplifiedChineseRegular] [gbsong]
%D \definefontsynonym [SimplifiedChineseSlanted] [gbsongsl]
%D \stoptyping
%D
%D we can now switch to Simplified Chinese by saying \type
%D {SimChi}. Some values can be changed afterwards with
%D
%D \starttyping
%D \setupunicodefont[SimChi][...=...]
%D \stoptyping
%D
%D Specific initializations can be assigned to \type
%D {commands}.

\def\defineunicodefont
  {\dotripleempty\dodefineunicodefont}

\def\dodefineunicodefont[#1][#2][#3]%
  {\doifassignmentelse{#3}
     {\setupunicodefont[#1][#3]}
     {\doifelsenothing{#3}
        {\setupunicodefont[#1][#3]}
        {\copyparameters
           [\??uc#1][\??uc#3]
           [\c!height,\c!depth,\c!scale,\c!commands,\c!strut,
            \c!interlinespace,\c!command,\c!conversion]}}%
   \doifelsenothing{#2}
     {\setvalue{#1}{[uc font #1 undefined]}}
     {\setvalue{\??uc#1\c!file}{#2}%
      \doifundefined{\??ff#2\s!Bold}
        {\definefontsynonym[#2\s!Bold]       [#2\s!Regular]%
         \definefontsynonym[#2\s!Slanted]    [#2\s!Regular]%
         \definefontsynonym[#2\s!Italic]     [#2\s!Regular]%
         \definefontsynonym[#2\s!BoldSlanted][#2\s!Slanted]%
         \definefontsynonym[#2\s!BoldItalic] [#2\s!Italic]}%
      \unexpanded\setvalue{#1}{\enableunicodefont{#1}}}}

\def\setupunicodefont
  {\dodoubleempty\dosetupunicodefont}

\def\dosetupunicodefont[#1][#2]% also predefines
  {\doifundefined{\??uc#1\c!command}
     {\copyparameters
        [\??uc#1][\??uc\s!default]
        [\c!height,\c!depth,\c!scale,\c!commands,\v!strut,
         \c!interlinespace,\c!command,\c!conversion]}%
   \getparameters[\??uc#1][#2]}

\def\enableunicodefont#1%
  {\definefontsynonym[\s!Unicode][\getvalue{\??uc#1\c!file}]%
   \def\unicodescale             {\getvalue{\??uc#1\c!scale}}%
   \def\unicodeheight            {\getvalue{\??uc#1\c!height}}%
   \def\unicodedepth             {\getvalue{\??uc#1\c!depth}}%
   \def\unicodedigits            {\getvalue{\??uc#1\c!conversion}}%
   \def\handleunicodeglyph       {\getvalue{\??uc#1\c!command}}%
   \doifnot\currentregime{utf}{\enableregime[unicode]}%
   % the following \relax's are realy needed
   \doifvalue{\??uc#1\c!interlinespace}\v!yes
      \setupinterlinespace\relax
   \doifelsevalue{\??uc#1\c!strut}\v!yes
      {\setunicodestrut\unicodeheight\unicodedepth}
      {\resetunicodestrut}%
   \getvalue{\??uc#1\c!commands}\relax}

%D \macros
%D   {unicodedigits}
%D
%D For convenience we also predefine a number conversion
%D macro:

\let\unicodedigits\number

%D Because we cannot be sure of the pressence of all font
%D styles, we remap some by default.

\definefontsynonym [\s!Unicode\s!Bold]        [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!Slanted]     [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!Italic]      [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!BoldSlanted] [\s!Unicode\s!Slanted]
\definefontsynonym [\s!Unicode\s!BoldItalic]  [\s!Unicode\s!Italic]

\setupunicodefont
  [\s!default]
  [\c!height=1,
   \c!depth=1,
   \c!scale=1,
   \c!strut=\v!no,
   \c!interlinespace=\v!no,
   \c!command=\insertunicodeglyph,
   \c!conversion=\number]

\protect \endinput