font-uni.mkii / last modification: 2020-01-30 14:15
%D \module
%D   [       file=font-uni,
%D        version=1999.10.10,
%D          title=\CONTEXT\ Font Macros,
%D       subtitle=\UNICODE,
%D         author=Hans Hagen,
%D           date=\currentdate,
%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
%C This module is part of the \CONTEXT\ macro||package and is
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.

\writestatus{loading}{ConTeXt Font Macros / Unicode}

%D In \XETEX, unicode support is straightforward, so we
%D simply output a \type {\char} with a 16||bit number.

    \expandafter \endinput

%D Now comes the more traditional 8 bit \TEX\ hackery.

%D I wrote this module when Wang Lei asked me how to use
%D Chinese in \CONTEXT. From the samples he sent me, I deduced
%D that some mixture of one and two byte encoding was used,
%D which he confirmed. Since \TEX\ normally does not use the
%D characters $>127$, so as long as the two byte characters
%D have a first character with code $>127$, we can use active
%D characters to handle them. In an optimistic mood, I called
%D this module the \UNICODE\ font module. In the module that
%D handles Chinese, we will see that some more interpretation
%D is involved, which is why the macros handling those
%D characters look ahead.


%D \macros
%D   {handleunicodeflowglyph, uchar,
%D    handleunicodeglyph, insertunicodeglyph,
%D    unicodeposition, unicodeone, unicodetwo}
%D For the moment \UNICODE\ support is rather primitive but
%D nevertheless effective. The reference to \UNICODE\ is not
%D entirely correct, since in many cases one will use \quote
%D {older} mappings, but in principle, \UNICODE\ can be
%D supported.
%D We expect each character to come as two eight bit
%D characters. Those doubles are handled by making all
%D characters in the range $>127$ active, so that they can
%D pick up the next one, and act upon both their values.
%D Internally only numbers are used. A first implementation
%D simply internally prefixed the second part of the \UNICODE\
%D pair with \type {\string} or \type {\char}, but this was
%D not that handy when it came to testing those values.
%D Because in principle we are dealing with an encoding, the
%D making active is handled in \type {enco-uni}.
%D There are two commands to handle unicode characters:
%D \starttyping
%D \handleunicodeflowglyph{number}{character}
%D \uchar{number}{number}
%D \stoptyping
%D The first one can be assigned to an active character, the
%D second one can be used to directly access a glyph. Both
%D command call \type {\handleunicodeglyph} that in turn
%D calls \type {\insertunicodeglyph}. Both can be overruled
%D in specialized modules. The low level command \type
%D {\unicodeglyph} can best be left untouched, which is not
%D so much a problem because there is a hook into this macro:
%D \type {\unicodecharcommand}.
%D In most cases one will redefine \type {\handleunicodeglyph}
%D in such a way that it identifies special situations first,
%D takes some actions next, calls \type {\insertunicodeglyph},
%D if needed with \type {\unicodecharcommand} changed, and
%D finally does some finishing:
%D \starttyping
%D \def\handleunicodeglyph
%D   {take actions based on \unicodeone-two-position cq. \nextutoken
%D    redefine \unicodecharcommand if needed
%D    expand \insertunicodeglyph
%D    take some final actions}
%D \stoptyping


%D The multistep approach is needed to pick up the second
%D token, since this token can have any value and any
%D catcode.

% the \relax trick prevents eating up the space (needed for
% korean

   \@EA\afterassignment\@EA\dohandleunicodeflowglyph % two redundant ea's


\def\dodohandleunicodeflowglyph % todo tex (or maybe no longer)

\unexpanded\def\uchar#1#2% use as standalone glyph




\unexpanded\def\uc#1#2% used in tricky situations


%D One can use the \type {\unicodeposition} in the macros
%D that handle pre and post material.

%D \macros
%D   {unicodestyle, unicodecharcommand}
%D Each character pair will become one glyph. Because \TEX\
%D cannot handle fonts with more that 256 characters, we use
%D \TFM\ files for each range. The first character of the pair
%D is appended to the name of a font, and the second is used to
%D access the glyph in that font. This means that a particular
%D font is split up in subfonts with names in the range:
%D \starttyping
%D <filename>80 ... <filename>ff
%D \stoptyping
%D The \type {<filename>} as well as the composed name are
%D mapped ones. The next macros take care of this mapping.
%D Let us assume that the next mapping has taken place,
%D \starttyping
%D \definefontsynonym [UnicodeRegular] [gbsong]
%D \stoptyping
%D Let us also assume that we are dealing with the range \type
%D {b1}. Given that a font name results from:
%D \starttyping
%D \truefontname{\truefontname{UnicodeRegular}b1}
%D \stoptyping
%D we get \type {gbsongb1}. The outer \type {\truefontname}
%D takes care of additional mapping, so when we say:
%D \starttyping
%D \definefontsynonym [gbsongb1] [gbsong-b1]
%D \stoptyping
%D the filename used will be \type {gbsong-b1}. From the next
%D definition it will be clear that other fontshapes are also
%D supported. The prefix \type {Unicode} is mapped!
%D The command \type {\unicodecharcommand} can be used to
%D handle special cases. At that moment \type {1em} is known.



\unexpanded\def\unicodeglyph#1#2% watch the double mapping
   \getvalue{@@\currentucharmapping\strippedcsname\uchar}{#1}{#2}% map to a to hex font range
   % readable:
   % \doifelsefontsynonym{\unicodestyle\unicodeone}
   %   {\font\unicodefont=\truefontname{\unicodestyle\unicodeone}
   %       at \currentfontscale\bodyfontsize}
   %   {\font\unicodefont=\truefontname{\truefontname\unicodestyle\unicodeone}
   %       at \currentfontscale\bodyfontsize}%
   % unreadable but more efficient:
      at \currentfontscale\bodyfontsize
   \unicodestrut % off by default

%D This handler is used by default, for instance in:
%D \starttyping
%D \defineunicodefont [MySwitch] [MyFont] % [strut=no,command=\insertunicodeglyph]
%D \definefontsynonym [MyFontRegular40] [Sans]
%D \definefontsynonym [MyFontBold40]    [SansBold]
%D {\MySwitch     \uchar{"40}{`a}}
%D {\MySwitch \bf \uchar{"40}{`a}}
%D \stoptyping
%D \starttyping
%D \definefontsynonym [MyFontRegular] [Sans]
%D \definefontsynonym [MyFontBold]    [SansBold]
%D \stoptyping
%D Is also possible, but in that case the number is appended to the raw font
%D name!

%D \macros
%D   {currentucharmapping,defineucharmapping}
%D A (plane,char) pair can be remapped using a uchar mapping
%D function. The default mapping is to convert the plane to a
%D lowercase hexadecimal number, and leave the number
%D untouched. The current remapping is kept in a macro.




%D An example of a remapping is the following:
%D \starttyping
%D \defineucharmapping{GBK}#1#2%
%D   {\unicodeposition=#1
%D    \advance\unicodeposition -129
%D    \multiply\unicodeposition 190
%D    \advance\unicodeposition #2
%D    \advance\unicodeposition-\ifnum#2>127 65\else64\fi
%D    \dorepositionunicode}
%D \stoptyping
%D This maps the GBK vector onto a compact GBK one. The
%D auxiliary macro is defined here as a goody.

   \advance\scratchcounter \plusone
   \edef\unicodeone{\ifnum\scratchcounter<10 0\fi\the\scratchcounter}%

%D \macros
%D   {setunicodestrut, setunicodescale, nextutoken,
%D    handleunicodeglyph, insertunicodeglyph}
%D A careful analysis of the previous macros, learns that the
%D process of mapping comes down to:
%D \startitemize[packed,n]
%D \item  taking care of preceding material (and spacing)
%D \item  defining the font at \type {\currentfontscale} $\times$
%D       \type {\unicodescale} $\times$ \type {\bodyfontsize}
%D \item  inserting a \type {\unicodestrut}
%D \item  inserting the character (glyph)
%D \item  executing some actions afterwards
%D \stopitemize
%D The actions before and after placing the glyph, is up to
%D the user supplied handler. This handler (\type
%D {\handleunicodeglpyh}) must, at a certain moment, insert
%D the glyph using \type {\insertunicodeglyph}


\def\dosetunicodestrut#1#2% height depth
        \!!width \zeropoint
        \!!depth #2\strutdp

\def\setunicodestrut#1#2% height depth


%D The additional scaling and strut default to:


%D But better is not to have a strut added by default:


%D The actual code for the additional actions as well as
%D specific spacing is handled outside these routines. The
%D character after the two that are under treatment is
%D available in \type {\nextutoken}.

%D \macros
%D   {defineunicodefont, setupunicodefont}
%D Apart from this rather low level implementation, we also
%D provide a more user friendly alternative. Given that one
%D has defined:
%D \starttyping
%D \defineunicodefont
%D   [SimChi] [SimplifiedChinese]
%D   [\c!scale=0.85,
%D    \c!height=1.25,
%D    \c!depth=1.00,
%D    \c!interlinespaceinterlinie=yes,
%D    \c!conversion=\chinesenumber,
%D    \c!command=\handlechineseunicodeglyph]
%D \stoptyping
%D Together with:
%D \starttyping
%D \definefontsynonym [SimplifiedChineseRegular] [gbsong]
%D \definefontsynonym [SimplifiedChineseSlanted] [gbsongsl]
%D \stoptyping
%D we can now switch to Simplified Chinese by saying \type
%D {SimChi}. Some values can be changed afterwards with
%D \starttyping
%D \setupunicodefont[SimChi][...=...]
%D \stoptyping
%D Specific initializations can be assigned to \type
%D {commands}.


     {\setvalue{#1}{[uc font #1 undefined]}}
        {\definefontsynonym[#2\s!Bold]       [#2\s!Regular]%
         \definefontsynonym[#2\s!Slanted]    [#2\s!Regular]%
         \definefontsynonym[#2\s!Italic]     [#2\s!Regular]%
         \definefontsynonym[#2\s!BoldItalic] [#2\s!Italic]}%


\def\dosetupunicodefont[#1][#2]% also predefines

   \def\unicodescale             {\getvalue{\??uc#1\c!scale}}%
   \def\unicodeheight            {\getvalue{\??uc#1\c!height}}%
   \def\unicodedepth             {\getvalue{\??uc#1\c!depth}}%
   \def\unicodedigits            {\getvalue{\??uc#1\c!conversion}}%
   \def\handleunicodeglyph       {\getvalue{\??uc#1\c!command}}%
   % the following \relax's are realy needed

%D \macros
%D   {unicodedigits}
%D For convenience we also predefine a number conversion
%D macro:


%D Because we cannot be sure of the pressence of all font
%D styles, we remap some by default.

\definefontsynonym [\s!Unicode\s!Bold]        [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!Slanted]     [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!Italic]      [\s!Unicode\s!Regular]
\definefontsynonym [\s!Unicode\s!BoldSlanted] [\s!Unicode\s!Slanted]
\definefontsynonym [\s!Unicode\s!BoldItalic]  [\s!Unicode\s!Italic]


\protect \endinput