enco-ini.mkii / last modification: 2020-01-30 14:15
%D \module
%D   [       file=enco-ini,
%D        version=2007.02.19, % 2000.12.27, % 1998.12.03,
%D          title=\CONTEXT\ Encoding Macros,
%D       subtitle=Initialization,
%D         author=Hans Hagen,
%D           date=\currentdate,
%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
%C
%C This module is part of the \CONTEXT\ macro||package and is
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.

%D Quite some code will be moved to the mk files once we're ready
%D for it.

%D This module is a reimplementation of the module that handled
%D composed characters and non \ASCII\ characters. The changed
%D are not that fundamental, and mainly concerns moving
%D definitions of specific glyphs and accents to other files as
%D well as moving plain handling of accents to this module
%D instead of overloading plain \TEX\ commands.

%D Patterns are kind of mixed with font encodings and
%D mappings. Alas.

\ifx\synchronizepatterns\undefined \let\synchronizepatterns\relax \fi

%D While dealing with input (the text source) and output (the
%D glyphs), encoding comes into view. To summarize a few:
%D
%D \startitemize
%D \item  Bytes in the input file are mapped to an internal
%D       representation. An~\type {a} often stays an~\type {a},
%D       but~\type {\"e} can become either one code or become
%D       two codes (ending in overlapping glyphs).
%D \item  Characters can be made active and mapped onto another
%D       character.
%D \item  When changing case, characters are mapped onto
%D       themselves, their case||counterpart or a reasonable
%D       alternative, like~\"e onto~e.
%D \item  Single character representations in a \DVI\ file can
%D       be mapped onto one or more characters, either of not
%D       in more than one font file (virtual fonts).
%D \item  In the final format, fonts collections can be
%D       partially embedded, thereby losing the one||to||one
%D       relation between several instances of one font.
%D \item  For special purposes, individual characters should be
%D       mapped onto a dedicated encoding vector, for instance
%D       \PDF\ document encoding.
%D \stopitemize
%D
%D These and other kind of mappings are to be dealt with, and
%D the exact way of dealing often depends on the language to be
%D typeset.

\writestatus{loading}{ConTeXt Encoding Macros / Initialization}

\unprotect

%D First we define a few local or not yet initialized constants.

\def\@map@{@m@ap@} % mapping prefix
\def\@fha@{@f@ha@} % font prefix
\def\@cas@{@c@as@} % casecom prefix

\ifx\currentlanguage\undefined \let\currentlanguage\s!en \fi

%D \macros
%D   {setupencoding}
%D
%D The following setup command is used to tune encoding
%D handling.

\def\setupencoding
  {\dosingleargument\dosetupencoding}

\def\dosetupencoding[#1]%
  {\getparameters[\??ec][#1]%
   \edef\defaultencoding
     {\ifx\@@ecdefault\empty\s!default\else\@@ecdefault\fi}}

%D \macros
%D   {useencoding}
%D
%D Encodings things are defined in separate files and are
%D loaded only once, using:
%D
%D \showsetup{useencoding}

\def\douseencoding#1%
  {\doifundefined{\c!file\f!encodingprefix#1}%
     {\letvalue{\c!file\f!encodingprefix#1}\empty
      \makeshortfilename[\truefilename{\f!encodingprefix#1}]%
      \startreadingfile
        \readsysfile{\shortfilename.mkii}
          {\showmessage\m!encodings2{#1}}
          {\showmessage\m!encodings3{#1}}%
      \stopreadingfile}}

\def\useencoding[#1]%
  {\processcommalist[#1]\douseencoding}

%D \macros
%D   {startmapping,enablemapping}
%D
%D In order to process patterns, convert from lower to
%D uppercase and vise versa and some more, we provide a
%D mechanism to define mappings. The first real application
%D of this command was:
%D
%D \starttyping
%D \startmapping [something]
%D   \definecasemap 165 181 165
%D   \definecasemap 171 187 171
%D   ...
%D   \defineuppercasecom \i  {I}
%D   \defineuppercasecom \l  \L
%D   \definelowercasecom \AE \ae
%D   ...
%D \stopmapping
%D \stoptyping
%D
%D So, character 165 becomes 181 in uppercase and 165 in
%D lowercase. A mapping is activated with \type {\enablemapping}.

\def\startsavingmappingtoks#1%
  {\bgroup
   \edef\charactermapping{@#1@}%
   \checkmappingtoks
   \setmappingtoks
   \the\mappingtoks}

\def\stopsavingmappingtoks
  {\global\mappingtoks\emptytoks
   \dostepwiserecurse{0}{255}\plusone
     {\edef\@@expanded
        {\the\mappingtoks
         \ifnum\recurselevel>127
           \noexpand\settoletterunlessactive{\recurselevel}%
         \fi
         \lccode\recurselevel\ifnum\lccode\recurselevel=\zerocount\zerocount\else\space\the\lccode\recurselevel\space\fi
         \uccode\recurselevel\ifnum\uccode\recurselevel=\zerocount\zerocount\else\space\the\uccode\recurselevel\space\fi
         \ifnum\sfcode\recurselevel=\plusthousand\else\sfcode\recurselevel=\the\sfcode\recurselevel\space\fi
        }%
      \global\mappingtoks\expandafter{\@@expanded}}%
   \egroup
   \let\enabledmapping\empty
   \enablemapping[\currentmapping]}

\def\startmapping[#1]%
  {\startsavingmappingtoks{#1}}

\def\stopmapping
  {\stopsavingmappingtoks}

\def\optimizemapping[#1]%
  {\startsavingmappingtoks{#1}%
   % nothing, just an automatic cleanup
   \stopsavingmappingtoks
   % we need to resync
   %\let\enabledmapping\relax
  }%\enablemapping[\currentmapping]}

\def\setmappingtoks
  {\@EA\let\@EA\mappingtoks\csname\@map@\charactermapping\endcsname
   \@EA\let\@EA\casecomtoks\csname\@cas@\charactermapping\endcsname}

\def\checkmappingtoks
  {\ifundefined{\@map@\charactermapping}%
     \expandafter\newtoks\csname\@map@\charactermapping\endcsname
   \fi
   \ifundefined{\@cas@\charactermapping}%
     \expandafter\newtoks\csname\@cas@\charactermapping\endcsname
   \fi}

\def\definecasemap #1 #2 #3 % code lower upper
  {\doifelse{#2}{to}
     {\presetcaserange{#1}{#3}}
     {\lccode#1=#2\relax
      \uccode#1=#3\relax}%
   \ignorespaces}

%D Saves a few tokens

\def\definecaseswap #1 #2 % lower upper
  {\lccode#1=#1\relax
   \uccode#2=#2\relax
   \lccode#2=#1\relax
   \uccode#1=#2\relax
   \ignorespaces}

\def\definecaseself #1 % lower=upper=self
  {\lccode#1=#1\relax
   \uccode#1=#1\relax
   \ignorespaces}

%D Watch the \type {\definecasemap 127 to 255} option!
%D Dedicated to Taco there is also:

\def\definecasemaps #1 to #2 lc #3 uc #4 % from to lc+ uc+
  {\dostepwiserecurse{#1}{#2}\plusone
     {\scratchcounter\recurselevel\advance\scratchcounter#3\lccode\recurselevel=\scratchcounter
      \scratchcounter\recurselevel\advance\scratchcounter#4\uccode\recurselevel=\scratchcounter}%
   \ignorespaces}

%D This can be used like:
%D
%D \starttyping
%D \definecasemaps 128 to 156 lc  32 uc 0
%D \definecasemaps 160 to 188 lc -32 uc 0
%D \definecasemaps 160 to 188 lc -32 uc 0
%D \definecasemaps 192 to 255 lc  32 uc 0
%D \stoptyping
%D
%D and saves a lot of typing (copying).

\def\resetcaserange #1 to #2
  {\dostepwiserecurse{#1}{#2}\plusone
     {\lccode\recurselevel\zerocount
      \uccode\recurselevel\zerocount}%
   \ignorespaces}

\def\presetcaserange#1#2% could be pre-expanded
  {\dostepwiserecurse{#1}{#2}\plusone
     {\lccode\recurselevel=\recurselevel
      \uccode\recurselevel=\recurselevel}%
   \ignorespaces}

\def\setcasemap #1 #2 #3 %
  {\settoletterunlessactive{#1}%
   \lccode #1=#2
   \uccode #1=#3 }

\def\setcaseswap #1 #2 %
  {\settoletterunlessactive{#1}%
   \settoletterunlessactive{#2}%
   \lccode #1=#1
   \uccode #2=#2
   \lccode #2=#1
   \uccode #1=#2 }

\def\setcaseself #1 %
  {\settoletterunlessactive{#1}%
   \lccode #1=#1
   \uccode #1=#1 }

\def\definespacemap #1 #2 % code sfcode
  {\sfcode#1=#2%
   \ignorespaces}

\def\setspacemap #1 #2 %
  {\settootherunlessactive{#1}%
   %\lccode #1=\zerocount
   %\uccode #1=\zerocount
   \sfcode #1=#2 }

\def\defineuppercasecom#1#2%
  {\global\casecomtoks\expandafter{\the\casecomtoks\setuppercasecom#1{#2}}%
   \ignorespaces}

\def\definelowercasecom#1#2%
  {\global\casecomtoks\expandafter{\the\casecomtoks\setlowercasecom#1{#2}}%
   \ignorespaces}

\let\setuppercasecom\gobbletwoarguments
\let\setlowercasecom\gobbletwoarguments

\def\setcasecom#1#2{\def#1{#2}}

\let\enabledmapping\empty % indirect, needed to handle default too

\def\enablemapping[#1]%
  {\edef\charactermapping{@#1@}%
   \ifx\enabledmapping\charactermapping \else
     \doifdefined{\@map@\charactermapping}
       {%\expandafter\showthe\csname\@map@\charactermapping\endcsname\endcsname
        \the\csname\@map@\charactermapping\endcsname}%
     % == \the\executeifdefined{\@map@\charactermapping}\emptytoks
     \edef\enabledmapping{\charactermapping}%
     \enablelanguagespecifics[\currentlanguage]% new
   % \edef\enabledmapping{\charactermapping\currentlanguage}% can be comma list
   \fi
   \synchronizepatterns}

% on behalf of font switching:

\def\fastenablemapping#1%
  {\edef\charactermapping{@#1@}%
   \ifx\enabledmapping\charactermapping \else
     \@EA\ifx\csname\@map@\charactermapping\endcsname\relax\else
       \the\csname\@map@\charactermapping\endcsname
     \fi
     % == \the\executeifdefined{\@map@\charactermapping}\emptytoks
     \let\enabledmapping\charactermapping
     \enablelanguagespecifics[\currentlanguage]% to faster
   \fi}

%D This macro wil be implemented in \type {lang-ini.tex}.

\ifx\enablelanguagespecifics\undefined
  \def\enablelanguagespecifics[#1]{}
\fi

%D Further on we have to take some precautions when dealing
%D with special characters like~\type{~}, \type{_}
%D and~\type{^}, so let us define ourselve some handy macros
%D first.

\def\protectfontcharacters
  {\edef\unprotectfontcharacters
     {\catcode`\noexpand ~=\the\catcode`~\relax
      \catcode`\noexpand _=\the\catcode`_\relax
      \catcode`\noexpand ^=\the\catcode`^\relax}%
   \catcode`~=\@@letter
   \catcode`_=\@@letter
   \catcode`^=\@@letter\relax}

%D The completeness of the Computer Modern Roman typefaces
%D makes clear how incomplete other faces are. To honour 7~bit
%D \ASCII, these fonts were designed using only the first 127
%D values of the 256 ones that can be presented by one byte.
%D Nowadays 8~bit character codings are more common, mainly
%D because they permit us to predefine some composed
%D characters, which are needed in most european languages.
%D
%D Supporting more than the standard \TEX\ encoding vector
%D |<|which in itself is far from standard and differs per
%D font|>| puts a burden on the fonts mechanism. The \CONTEXT\
%D mechanism is far from complete, but can handle several
%D schemes at once. The main problem lays in the accented
%D characters and ligatures like~ff, although handling
%D ligatures is not the responsibility of this module.
%D
%D By default, we use \PLAIN\ \TEX's approach of placing
%D accents. All other schemes sooner or later give problems
%D when we distribute \DVI||files are distributed across
%D machines and platforms. Nevertheless, we have to take care
%D of different encoding vectors, which tell us where to find
%D the characters we need. This means that all kind of
%D character placement macro's like \type{\"} and \type{\ae}
%D have to be implemented  and adapted in a way that suits
%D these vectors.
%D
%D The main difference between different vector is the way
%D accents are ordered and/or the availability of prebuilt
%D accented characters. Accented characters can for instance be
%D called for by sequences like \type{\"e}. Here the \type{\"}
%D is defined as:
%D
%D \starttyping
%D \def\"#1{{\accent"7F #1}}
%D \stoptyping
%D
%D This macro places the accent \accent"7F {} on top of an~e
%D gives \"e. Some fonts however can have prebuild accents and
%D use a more direct approach like
%D
%D \starttyping
%D \def\"#1{\if#1e\char 235\else ... \fi}
%D \stoptyping
%D
%D The latter approach is not used in \CONTEXT, because we
%D store relevant combinations of accents and characters in
%D individual macros.

%D We define character substitutes and commands with definition
%D commands like:
%D
%D \starttyping
%D \startcoding[texnansi]
%D
%D \defineaccent " a           228
%D \defineaccent ^ e           234
%D \defineaccent ' {\dotlessi} 237
%D
%D \definecharacter ae 230
%D \definecharacter oe 156
%D
%D \definecommand b \texnansiencodedb
%D \definecommand c \texnansiencodedc
%D
%D \stopcoding
%D \stoptyping
%D
%D The last argument of \type{\defineaccent} and
%D \type{\definecharacter} tells \TEX\ the position of the
%D accented character in the encoding vector. In order to
%D complish this, we tag each implementation with the character
%D coding identifier. We therefore need two auxiliary variables
%D \type{\characterencoding} and \type{\nocharacterencoding}. These
%D contain the current and default encoding vectors and both
%D default to the \PLAIN\ one.

\edef\characterencoding   {@\s!default @}
\edef\nocharacterencoding {@\s!default @}
\edef\charactermapping    {@\s!default @}

% todo, else \d j == \dj, print file and check

\def\accentprefix   {}%{*}
\def\commandprefix  {}%{=}
\def\characterprefix{}%{-}

%D \macros
%D   {startcoding, reducetocoding}
%D
%D Before we can redefine accents and special characters, we
%D have to tell \CONTEXT\ what encoding is in force. The next
%D command is responsible for doing this and also takes care of
%D the definition of the recoding commands. We use the \type
%D {\start}||\type {\stop}||commands for definitions and the
%D \type {\reduceto}||command for local switching to
%D simplified commands.

% etex : \ifcsname

\def\justhandleaccent#1#2% \empty makes #2={} save % no \unexpanded
  {\ifundefined{\accentprefix\characterencoding#1\string#2\empty}%
     #2%
   \else
     \csname\accentprefix\characterencoding#1\string#2\empty\endcsname
   \fi}

\def\justhandlecommand#1% % no \unexpanded, otherwise pdfdoc will fail
  {\ifundefined{\commandprefix\characterencoding#1}% as well as hyph patterns
     #1%
   \else
     \csname\commandprefix\characterencoding#1\endcsname
   \fi}

\def\enableencoding
  {\dodoubleempty\doenableencoding}

\def\doenableencoding[#1][#2]% main fallback
  {\iffirstargument\edef\characterencoding{@#1@}\fi
   \edef\nocharacterencoding{@\ifsecondargument#2\else\s!default\fi @}%
   \synchronizepatterns}

\edef\xnocharacterencoding{@\s!default @}

\def\fastenableencoding#1%
  {\edef\characterencoding{@#1@}%
   \let\nocharacterencoding\xnocharacterencoding}

\def\startencoding
  {\dodoubleempty\dostartencoding}

\def\dostartencoding[#1][#2]% encoding regime
  {%\showmessage\m!encodings1{#1}%
   \pushmacro\characterencoding
   \pushmacro\currentregime
   \pushmacro\dohandleaccent              % still needed?
   \pushmacro\dohandlecommand             % still needed?
   \pushmacro\doautosetregime
   \let\dohandleaccent\donthandleaccent   % still needed?
   \let\dohandlecommand\donthandlecommand % still needed?
   %let\definesortkey\savesortkey
   \edef\characterencoding{@#1@}%
   \doifelsenothing{#2}%
     {\let\doautosetregime\gobbletwoarguments}
     {\def\currentregime{#2}}}

\def\stopencoding
  {\popmacro\doautosetregime
   \popmacro\dohandlecommand           % still needed?
   \popmacro\dohandleaccent            % still needed?
   \popmacro\currentregime
   \popmacro\characterencoding}

% probably obsolete (hm, not yet)

\def\reducetocoding[#1]% use grouped!
  {\doifsomething{#1}
     {\let\dohandleaccent \justhandleaccent
      \let\dohandlecommand\justhandlecommand
      \enableencoding[#1]%
      \enablelanguagespecifics[\currentlanguage]}}

\let\startcoding  \startencoding
\def\stopcoding  {\stopencoding}
\let\enablecoding \enableencoding

%D The use of these macros are not limited to font
%D definition files, but may also be used when loading
%D patterns.

%D \macros
%D   {definesortkey,flushsortkeys,flushsortkey}
%D
%D Yet another definition concerns sorting of indexes and
%D lists.
%D
%D \starttyping
%D \definesortkey {\'e} {e} {a} {\'e}
%D \stoptyping
%D
%D The first argument denotes the string to be treated. The
%D second argument is the raw replacement, while the third
%D argument determines the sort order given the replacement.
%D The last argument is used as entry in the index (a, b, etc).
%D
%D The keys can be flushed using \type {\flushsortkeys}
%D which in turn results in a sequence of calls to \type
%D {\flushsortkey}, a macro taking 4~arguments.
%D
%D This mechanism is currently being tested and subjected to
%D changes! Obsolete:

\let\definesortkey\gobblefourarguments
\let\savesortkey  \gobblefourarguments
\let\flushsortkeys\relax
\let\flushsortkey \relax

%D \macros
%D   {defineaccent, definecharacter, definecommand}
%D
%D The actual definition of accents, special characters and
%D commands is done with the next three commands.

\def\defineaccent
  {\protectfontcharacters
   \dodefineaccent}

\def\dodefineaccent#1 #2 %
  {\unprotectfontcharacters
   \dododefineaccent#1 #2 }

\def\dododefineaccent#1 #2 #3 %
  {\setvalue{#1}{\dohandleaccent{#1}}%
   \doifnumberelse{\string#3}
     {\setvalue{\accentprefix\characterencoding#1\string#2}{\char#3 }} % space added
     {\setvalue{\accentprefix\characterencoding#1\string#2}{#3}}}

\def\dohandleaccent#1#2%
  {\ifcsname\accentprefix\characterencoding#1\string#2\empty\endcsname
     \csname\accentprefix\characterencoding#1\string#2\empty\endcsname
   \else\ifcsname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname
     \csname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname
   \else\ifcsname\accentprefix\characterencoding#1\endcsname
     \csname\accentprefix\characterencoding#1\endcsname{#2}%
   \else%\ifcsname\accentprefix\nocharacterencoding#1\endcsname
     \csname\accentprefix\nocharacterencoding#1\endcsname{#2}%
%   \else
%     \donormaltextaccent{#1}{#2}%
   \fi\fi\fi}%\fi}

\def\patternchar#1 {\rawcharacter{#1}} % space is part of character definition !

% \ifx \enablepatterntokens\undefined
%   \def\handlepatterntoken#1]{\csname#1\endcsname}
% \fi

% we need to postpone catcode changes, e.g. hr patterns
% have \catcode" -> which fails when " is letter

\def\pathypsettings
  {\ifx \enablepatterntokens\undefined
     \defineactivecharacter [ {\handlepatterntoken}%
   \else
     \enablepatterntokens
   \fi
   \let\dochar\thechr
   \lccode16=16 % brrr, extra quote in ec (turkish)
   \lccode17=17 % brrr, extra quote in ec (turkish)
   \lccode`\-=`\-
   \lccode`\&#39;=`\&#39;
   \lccode`\&quot;=`\&quot;
   \relax}

\def\patterns   {\pathypsettings\normalpatterns   }
\def\hyphenation{\pathypsettings\normalhyphenation}

%D Because we don't want to use the second command grouped, we
%D (re)define it as follows:

\def\hyphenation
  {\begingroup\def\hyphenation{\normalhyphenation{\the\scratchtoks}\endgroup}%
   \pathypsettings\afterassignment\hyphenation\scratchtoks=}

%D This is not needed for patterns because they are loaded grouped
%D anyway and it saves us an assignment. Can go ... no longer
%D shared patterns.

\def\startpatternloading#1#2#3% % we should use \everypatternloading
  {\startreadingfile
   \bgroup
   % let's get rid of interfering stuff
   \let\everyjob\scratchtoks
   \let\message \gobbleoneargument
   % we want direct characters
   \let\char\patternchar
   \doifelsenothing{#2}{\enableencoding[ec]}{\enableencoding[#2]}%
   \doifelsenothing{#3}{\enablemapping [ec]}{\enablemapping [#3]}%
   \expanded{\doifinstring{\f!languageprefix}{#1}}
     {\ifx \enablepatternxml\undefined \else
        \enablepatternxml
      \fi}%
   \let\dohandleaccent\normaldohandleaccent}

\def\stoppatternloading
  {\egroup
   \stopreadingfile}

           \def\thechr#1{\char#1 } % just in case \relax interferes
\unexpanded\def\numchr#1{\char#1\relax}
\unexpanded\def\strchr#1{\csname#1\endcsname}

\let\dochar\numchr

\def\startdirectcharacters {\pushmacro\dochar \let\dochar\thechr}
\def\stopdirectcharacters  {\popmacro \dochar}

\def\definecharacter#1 #2 %
  {\ifundefined{#1}\setvalue{#1}{\dohandlecharacter{#1}}\fi
   \doifnumberelse{\string#2}
     {\setvalue{\characterprefix\characterencoding\string#1}{\dochar{#2}}%
      \doautosetregime{#1}{#2}}
     {\setvalue{\characterprefix\characterencoding\string#1}{#2}}}

\def\dohandlecharacter#1%
  {\csname\characterprefix\ifcsname\characterprefix\characterencoding#1\endcsname
     \characterencoding\else\nocharacterencoding\fi#1\endcsname}

% \def\fallbackpatternchar{x} % makes no sense, duplicate patterns

\def\defaultcharacter#1%
  {\csname\characterprefix\nocharacterencoding\strippedcsname#1\endcsname}

%D Instead of numbers, a command may be entered.

\def\definecommand#1 #2 %
  {\setvalue{\string#1}{\dohandlecommand{#1}}%
  %\redefinecommand #1 % just to be sure
   \setvalue{\commandprefix\characterencoding\string#1}{#2}}

%D Here we see that redefining accents is characters is more
%D or less the same as redefining commands. We also could have
%D said:
%D
%D \starttyping
%D \def\defineaccent#1 #2 {\definecommand#1\string#2 \char}
%D \def\definecharacter#1 {\definecommand#1 \char}
%D \stoptyping

%D \macros
%D   {defineaccentcommand}
%D
%D When needed, one can overload the default positions of the
%D accents. The \PLAIN\ \TEX\ defaults are:
%D
%D \starttyping
%D \defineaccentcommand `  18
%D \defineaccentcommand '  19
%D \defineaccentcommand v  20
%D \defineaccentcommand u  21
%D \defineaccentcommand =  22
%D \defineaccentcommand ^  94
%D \defineaccentcommand .  95
%D \defineaccentcommand H 125  % "7D
%D \defineaccentcommand ~ 126  % "7E
%D \defineaccentcommand " 127  % "7F
%D \stoptyping

\def\defineaccentcommand
  {\protectfontcharacters
   \dodefineaccentcommand}

\def\dodefineaccentcommand#1 #2 % \string toegevoegd
  {\doifnumberelse{\string#2}
     {\setvalue{\accentprefix\characterencoding\string#1}##1{{\accent#2 ##1}}}
     {\setvalue{\accentprefix\characterencoding\string#1}##1{{#2##1}}}%
   \unprotectfontcharacters}

%D We don't have to define them for the default \PLAIN\ case.
%D Commands may be used instead of character codes.

%D \macros
%D   {redefinecommand}
%D
%D Redefinition of encoding dependant commands like \type{\b}
%D and \type{\c} can be triggered by:
%D
%D \starttyping
%D \redefinecommand  b  % something math
%D \redefinecommand  c  % something math
%D \stoptyping
%D
%D Handling of characters is easier than handling accents
%D because here we don't have to take care of arguments. We
%D just call for the right glyph in the right place.
%D
%D The \type{\next} construction permits handling of commands
%D that take arguments. This means that we can use this
%D command to redefine accent handling commands too
%D (although today the next is not needed any longer in test
%D macros).

\def\redefinecommand#1 %
  {% no \unexpanded, else pdfdoc fails
   \setvalue{\string#1}{\dohandlecommand{#1}}}%

\def\dohandlecommand#1%
   {\csname\commandprefix
      \ifcsname\commandprefix\characterencoding#1\endcsname
        \characterencoding
      \else
        \nocharacterencoding
      \fi
    #1\endcsname}

%D \macros
%D   {currentencoding, currentmapping}
%D
%D When we show 'm, we don't want to see the protection
%D measures.

\def\currentencoding{\@EA\dopureencodingname\characterencoding}
\def\currentmapping {\@EA\dopureencodingname\charactermapping }

\def\dopureencodingname @#1@{#1}

\def\pureencodingname#1{\@EA\dopureencodingname#1}

%D \macros
%D   {showaccents, showcharacters,
%D    showcharacterbounds, showhyphenations}
%D
%D Encoding is a tricky business. Therefore we provide a
%D a few macros that show most of the characters involved. The
%D next two tables show the result of \type {\showaccents}.
%D
%D \placetable
%D   {The special glyphs in default encoding.}
%D   {\showaccents}
%D
%D \placetable
%D   {The special glyphs in texnansi encoding.}
%D   {\switchtobodyfont[lbr]\showaccents}
%D
%D The command
%D
%D \starttyping
%D \showhyphenations{doordefini\"eren}
%D \stoptyping
%D
%D can be used to check the correct loading of hyphenation
%D patterns.

\fetchruntimecommand \showaccents         {\f!encodingprefix\s!run}
\fetchruntimecommand \showcharacters      {\f!encodingprefix\s!run}
\fetchruntimecommand \showcharacterbounds {\f!encodingprefix\s!run}
\fetchruntimecommand \showhyphenations    {\f!encodingprefix\s!run}
\fetchruntimecommand \showmapping         {\f!encodingprefix\s!run}

%D \macros
%D   {everyuppercase, EveryUppercase,
%D    everyuppercase, EveryUppercase}
%D
%D When we want to uppercase strings of characters, we have to
%D take care of those characters that have a special meaning or
%D are only accessible by means of macros. The next hack was
%D introduced when Tobias Burnus started translating head and
%D label texts into spanish and italian. The first application
%D of this token register therefore can be found in the module
%D that deals with these texts.

\newevery \everyuppercase \EveryUppercase
\newevery \everylowercase \EveryLowercase

%D This magic trick maps takes care of mapping from lower to
%D upper case and reverse.

\def\reloadmapping{\the\executeifdefined{\@cas@\charactermapping}\emptytoks}

\appendtoks\let\setuppercasecom\setcasecom\to\everyuppercase
\appendtoks\let\setlowercasecom\setcasecom\to\everylowercase

\appendtoks\reloadmapping\to\everyuppercase % slow, will be sped up
\appendtoks\reloadmapping\to\everylowercase % slow, will be sped up

\newtoks\everyULmap

\appendtoks\let\remapcase\remapuppercase\the\everyULmap\to\everyuppercase
\appendtoks\let\remapcase\remaplowercase\the\everyULmap\to\everylowercase

\let\remapcase\gobbletwoarguments

\def\remapuppercase#1#2{\let#2#1}  % more efficient:
\def\remaplowercase#1#2{\let#1#2}  \let\remaplowercase\let

\def\defineLCcharacter #1 #2 %
  {\appendtoks\let\to\everylowercase
   \@EA\appendtoks\csname#1\endcsname\to\everylowercase
   \@EA\appendtoks\csname#2\endcsname\to\everylowercase}

\def\defineUCcharacter #1 #2 %
  {\appendtoks\let\to\everyuppercase
   \@EA\appendtoks\csname#1\endcsname\to\everyuppercase
   \@EA\appendtoks\csname#2\endcsname\to\everyuppercase}

\def\defineULcharacter #1 #2 %
  {\appendtoks\remapcase\to\everyULmap
   \@EA\appendtoks\csname#1\endcsname\to\everyULmap
   \@EA\appendtoks\csname#2\endcsname\to\everyULmap}

% slightly faster with \smallcapped's but far more hash and stringspace
%
% \newif\ifuppercase \appendtoks\uppercasetrue\to\everyuppercase
% \newif\iflowercase \appendtoks\lowercasetrue\to\everylowercase
%
% \def\defineULcharacter #1 #2 %
%   {\def\!!stringa{@#1}\@EA\letvalue\@EA\!!stringa\csname#1\endcsname
%    \def\!!stringa{@#2}\@EA\letvalue\@EA\!!stringa\csname#2\endcsname
%    \setvalue{#1}{\getvalue{@\ifuppercase#2\else#1\fi}}%
%    \setvalue{#2}{\getvalue{@\iflowercase#1\else#2\fi}}}

% 2 = tricky, since expanding \definedfont[lowcasename] ... goes wrong

\chardef\uppercasemode\plusthree % 0=ignore 1=normal 2=expand 3=auto
\chardef\casecommode  \plusone   % 0=noexpand 1=expand

\def\setcasecom #1#2{\def#1{\ifcase\casecommode\noexpand#1\else#2\fi}}

% \def\OEPS{whatever}
%
% \startmapping[ec]
%   \defineuppercasecom \oeps {\getvalue{OEPS}}
% \stopmapping
%
% \WORD{xx \oeps}

\def\douppercase#1%
  {\bgroup
   \let\douppercase\firstofoneargument
   \the\everyuppercase % currently also checks uppercasemode
   \let\dochar\rawcharacter
   \ifcase\uppercasemode
     #1%
   \or % No expansion here, otherwise \getvalue problems! Default!!!
    %\edef\next{#1}% keep this to prevent roll back
    %\uppercase\expandafter{\next}% keep this to prevent roll back
     \uppercase{#1}%
   \or
     \chardef\casecommode\zerocount
     \let\docasecom\firstoftwoarguments
     \edef\ascii{#1}%
     \edef\ascii{\expandafter\uppercase\expandafter{\ascii}}% needed when in regime
     \chardef\casecommode\plusone
     \ascii
   \else
     % mode three may trigger setting 2 elsewhere (e.g. regime test)
     \uppercase{#1}%
   \fi
   \egroup}

\prependtoksonce
  \doifnot\currentregime\s!default
    {\ifnum\uppercasemode=\plusthree \chardef\uppercasemode\plustwo \fi}%
\to \everyuppercase

%D \macros
%D   {everysanitize, EverySanitize}
%D
%D Whenever we are sanitizing strings, like we sometimes do
%D when we deal with specials, the next token register can be
%D called.

\newevery \everysanitize \EverySanitize

%D \macros
%D   {defineuclass,defineudigit,udigit}
%D
%D The next few macros are experimental and needed for unicoded
%D chinese characters.

\def\defineuclass #1 #2 #3 %
  {\setvalue{uc\the\numexpr#2*256+#3\relax}{#1}}

\def\defineudigit #1 #2 #3 {\setvalue{\characterencoding uc#1}{\uchar{#2}{#3}}}

%D It may look strange, but for the moment, we want the encoding
%D to be part of the digit specification. This may change!

\unexpanded\def\udigit#1#2{\getvalue{@#1@uc\number#2}}

%D \macros
%D   {uchar, octuchar, hexuchar}

\ifx\uchar\undefined \def\uchar#1#2{(\number#1,\number#2)} \fi

\def\octuchar#1#2{\uchar{`#1}{`#2}}
\def\hexuchar#1#2{\uchar{"#1}{"#2}}

%D Basics and fallbacks.

\newif\ifignoreaccent

\let\textaccent      \accent
\let\normaltextaccent\textaccent

% ** we will explicitly embrace the two arguments, since in definitions
% this may not be the case, and we don't want faulty expansions like
% "\dobuildtextaccent \char 18 a" but "\dobuildtextaccent {\char 18}{a}"
% instead

\def\buildmathaccent#1%
  {\mathaccent#1 }

\def\buildtextaccent#1#2% **
  {\ifignoreaccent
     \expandafter\nobuildtextaccent
   \else
     \expandafter\dobuildtextaccent
   \fi{#1}{#2}}

\unexpanded\def\nobuildtextaccent#1#2%
  {#2}

\unexpanded\def\dobuildtextaccent#1#2%
  {{\let\char\normalaccent#1\let\char\normalchar#2}}

% some fake ones, name will change into build

\unexpanded\def\bottomaccent#1#2#3#4#5% down right slantcorrection accent char
  {\dontleavehmode % why this align mess
   \vtop
     {\forgetall
      \baselineskip\zeropoint
      \lineskip#1%
      \everycr\emptytoks
      \tabskip\zeropoint
      \lineskiplimit\zeropoint
      \setbox0\hbox{#4}%
      \halign
        {##\crcr\hbox{#5}\crcr
         \hidewidth
         \hskip#2\wd0
         \hskip-#3\slantperpoint % in plain 1ex * dimenless value
         \vbox to .2ex{\box0\vss}\hidewidth
         \crcr}}}

\def\buildtextmacron   {\bottomaccent{.25ex}{0}{15}{\textmacron}}
\def\buildtextbottomdot{\bottomaccent{.25ex}{0}{5}{\textbottomdot}}
\def\buildtextcedilla  {\bottomaccent{0ex}{0}{5}{\textcedilla}}
\def\buildtextogonek   {\bottomaccent{-.1ex}{.5}{0}{\textogonek}}

%D A collectors item:

\def\buildtextbottomcomma{\bottomaccent{.15ex}{0}{5}{\tx,}}

%D Rarely needed but there:

\unexpanded\def\topaccent#1#2#3#4#5% down right slantcorrection accent char
  {\dontleavehmode
   \bgroup
     \setbox0\hbox{#4}%
     \setbox2\hbox{#5}%
     \hbox to \wd2 \bgroup
        \hss\copy2\hss
        \hskip-\wd2
        \hss\hskip#2\wd0\hskip-#3\slantperpoint\raise#1\hbox{#4}\hss
     \egroup
   \egroup}

\def\buildtextgrave{\topaccent{0pt}{0}{15}{\textgrave}} % e.g.

% \definecharacter schwa      {\hbox{\rotate[rotation=180,location=high]{\hbox{e}}}}
% \definecharacter schwagrave {\buildtextgrave\schwa}

% math stuff, will change

\def\definemathaccent#1 #2%
  {\setvalue{\string#1}{#2}%
   \setvalue{normalmathaccent\string#1}{#2}}

\def\donormalmathaccent#1%
  {\getvalue{normalmathaccent\string#1}}

%D Some precautions:

\ifx\usepdffontresource\undefined
  \def\usepdffontresource #1 {} % this will be defined elsewhere
\fi

\def\donthandleaccent        #1{\expandafter\string\csname#1\endcsname\space}
\def\donthandlecommand       #1{\expandafter\string\csname#1\endcsname\space}
\def\donthandlecharacter     #1{\expandafter\string\csname#1\endcsname\space}

\def\stringifyhandleaccent   #1{\strchr{#1}}
\def\stringifyhandlecommand  #1{\strchr{#1}}
\def\stringifyhandlecharacter#1{\strchr{#1}}

\def\keephandleaccent        #1{\expandafter\noexpand\csname#1\endcsname}
\def\keephandlecommand       #1{\expandafter\noexpand\csname#1\endcsname}
\def\keephandlecharacter     #1{\expandafter\noexpand\csname#1\endcsname}

\def\handleaccent            #1{\csname#1\endcsname}
\def\handlecommand           #1{\csname#1\endcsname}
\def\handlecharacter         #1{\csname#1\endcsname}

\def\dontexpandencoding
  {\let\dohandleaccent   \donthandleaccent
   \let\dohandlecommand  \donthandlecommand
   \let\dohandlecharacter\donthandlecharacter}

\def\keepencodedtokens
  {\let\dohandleaccent   \keephandleaccent
   \let\dohandlecommand  \keephandlecommand
   \let\dohandlecharacter\keephandlecharacter}

\def\literateencodedtokens
  {% \let\dohandleaccent   \keephandleaccent
   % \let\dohandlecommand  \keephandlecommand
   \let\dohandlecharacter\keephandlecharacter}

\def\stringifyencodedtokens
  {% \let\dohandleaccent   \stringifyhandleaccent
   % \let\dohandlecommand  \stringifyhandlecommand
   \let\dohandlecharacter\stringifyhandlecharacter}

\unexpanded\def\uhandleaccent   #1{\csname#1\endcsname}
\unexpanded\def\uhandlecommand  #1{\csname#1\endcsname}
\unexpanded\def\uhandlecharacter#1{\csname#1\endcsname}

\def\dontexpandencodedtokens
  {\def\dohandleaccent   {\uhandleaccent}%
   \def\dohandlecommand  {\uhandlecommand}%
   \def\dohandlecharacter{\uhandlecharacter}}

% no longer: \def\convertencodedtokens{\dontexpandencoding} but:

\def\convertencodedtokens{\stringifyencodedtokens}

% test case:
%
% \enableregime[cp1250]
% \mainlanguage[cz]
%
% \starttext
%
% \title{Ϭuޯu餭 kon졺p
% \placelist[chapter][criterium=all]
%
% \startbuffer
% <chapter>
%     <title>Ϭuޯu餭 kon졺p󛱴itle>
% </chapter>
% \stopbuffer
%
% \defineXMLenvironment
%   [chapter]
%   {\defineXMLsave[title]}
%   {\expanded{\chapter{\XMLflush{title}}}}
% \processXMLbuffer
%
% \setuphead[chapter][expansion=yes]
% \defineXMLenvironment
%   [chapter]
%   {\defineXMLsave[title]}
%   {\chapter{\XMLflush{title}}}
% \processXMLbuffer
%
% \stoptext

%D Still valid? To be checked:

\def\doignoreaccent #1#2{\string#1\string#2}%
\def\doignorecommand  #1{\string#1}
\def\doignorecharacter#1{\string#1}

\def\ignoreencoding
  {\let\dohandleaccent   \doignoreaccent
   \let\dohandlecommand  \doignorecommand
   \let\dohandlecharacter\doignorecharacter}

\appendtoks
  \ignoreencoding
\to \everycleanupfeatures

\appendtoks
  \keepencodedtokens
\to \everysafeexpanded

%D Now we will not redefine any more, so:

\let\normaldohandleaccent   \dohandleaccent
\let\normaldohandlecharacter\dohandlecharacter

\definecommand ` {\buildtextaccent\textgrave}
\definecommand ' {\buildtextaccent\textacute}
\definecommand r {\buildtextaccent\textring}
\definecommand v {\buildtextaccent\textcaron}
\definecommand u {\buildtextaccent\textbreve}
\definecommand = {\buildtextaccent\textmacron}
\definecommand ^ {\buildtextaccent\textcircumflex}
\definecommand . {\buildtextaccent\textdotaccent}
\definecommand H {\buildtextaccent\texthungarumlaut}
\definecommand ~ {\buildtextaccent\texttilde}
\definecommand " {\buildtextaccent\textdiaeresis}

\definecommand c {\buildtextcedilla}
\definecommand b {\buildtextmacron}
\definecommand d {\buildtextbottomdot}
\definecommand k {\buildtextogonek}

\definemathaccent acute     {\buildmathaccent\mathacute}
\definemathaccent grave     {\buildmathaccent\mathgrave}
\definemathaccent ddot      {\buildmathaccent\mathddot}
\definemathaccent tilde     {\buildmathaccent\mathtilde}
\definemathaccent bar       {\buildmathaccent\mathbar}
\definemathaccent breve     {\buildmathaccent\mathbreve}
\definemathaccent check     {\buildmathaccent\mathcheck}
\definemathaccent hat       {\buildmathaccent\mathhat}
\definemathaccent vec       {\buildmathaccent\mathvec}
\definemathaccent dot       {\buildmathaccent\mathdot}
\definemathaccent widetilde {\buildmathaccent\mathwidetilde}
\definemathaccent widehat   {\buildmathaccent\mathwidehat}

\useencoding[def] % defaults (partly simplified)
\useencoding[acc] % accent commands
\useencoding[raw] % simplified (incomplete)
\useencoding[com] % a few commands
\useencoding[cas] % case mapping, not needed in mkiv
\useencoding[mis] % a few commands

%D We preload several encodings:

\ifnum\texengine=\xetexengine
    \setupencoding[\s!default=\s!default]
\else
    \useencoding[ans,il2,ec,tbo,pdf,pol,qx,t5,l7x,cyr,agr] % pol and il2 will go away, not needed in mkiv, uc removed
    \useencoding[032,033,037] % fallbacks for some unicode chars
    \setupencoding[\s!default=ec] % was: [\s!default=\s!default]
\fi

\protect \endinput