enco-ini.mkiv / last modification: 2020-01-30 14:16
%D \module
%D   [       file=enco-ini,
%D        version=2007.02.19, % 2000.12.27, % 1998.12.03,
%D          title=\CONTEXT\ Encoding Macros,
%D       subtitle=Initialization,
%D         author=Hans Hagen,
%D           date=\currentdate,
%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
%C
%C This module is part of the \CONTEXT\ macro||package and is
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.

%D This is stripped down version of th eoriginal enco-ini.tex
%D file. For more details you might want to study the \MKII\ file
%D but since \LUATEX\ is unicode inside we need less code.

% When dealing with characters we have four cases to take into account
% when moving from mkii to mkiv:

% 1. <byte 200>   => ref to slot 200 in current font
% 2. \char 200    => ref to slot 200 in current font
% 3. <active 200> => can (e.g.) map to another slot in current font
% 4. \namedglyph  => can map to some slot in some font

% Using case 2 for special characters is doomed to fail because we are not going
% to intercept these on the fly as happens automatically with traditional font
% encoding handling. We could do that in a node pass but it's not worth the effort
% because we seldom use this case in a document source.

% We can consider using utf as internal format for mkii. The main reason for not
% doing this before was that it was slow. On the other hand, it would make dealing
% with utility files easier. However, we've now kind of frozen mkii.

\writestatus{loading}{ConTeXt Encoding Macros / Initialization}

\unprotect

%D Obsolete (but sometimes used in styles)

\let\defaultencoding\s!default

%D \macros
%D   {defineaccent, definecharacter, definecommand}

\installcorenamespace{accents}

\unexpanded\def\defineaccent#1 #2 #3 %
  {\enco_define_accent_command{#1}%
   \enco_define_accent{#1}{#2}{#3}}

\unexpanded\def\enco_define_accent_command#1%
  {\setevalue{\string#1}{\noexpand\enco_handle_accent{\string#1}}}

\unexpanded\def\enco_define_accent#1#2#3% no spaces, used low level
  {\setvalue{\??accents\string#1\string#2\empty}{#3}}

% the following dirty trick is needed to catch \asciimath{\^{1/5}log}:

\def\enco_handle_accent#1#2% expandable because we want them in the tuc file
  {\csname\??accents
     \ifcsname\??accents\string#1#2\empty\endcsname
       \string#1#2\empty
     \else\ifcsname\??accents\string#1\string#2\empty\endcsname
       \string#1\string#2\empty
     \else
       \endcsname#2\csname\??accents % very dirty trick: ignore accent but keep char
     \fi\fi
   \endcsname}

\letvalue{\??accents}\empty

\unexpanded\def\enco_define_command#1#2%
  {\setuvalue{\string#1}{#2}}

\let\dohandleaccent       \enco_handle_accent         % maybe useful
\let\dodefineaccent       \enco_define_accent         % used at the lua end
\let\dodefineaccentcommand\enco_define_accent_command % used at the lua end
\let\dodefinecommand      \enco_define_command        % used at the lua end

\unexpanded\def\definecharacter#1 #2 %
  {\doifelsenumber{\string#2}
     {\setevalue{\string#1}{\utfchar{#2}}} % or {\expandafter\chardef\csname#1\endcsname#2\relax}
     {\setuvalue{\string#1}{#2}}}

\unexpanded\def\definecommand#1 #2 %
  {\setuvalue{\string#1}{#2}}

%D \macros
%D   {everyuppercase, everylowercase, everysanitize}

\newtoks \everyuppercase
\newtoks \everylowercase
\newtoks \everysanitize

%D Accent handling (try to avoid this):

% \buildtextaccent\greekdasia\greekalphamacron
% \buildtextaccent\textacute q

\newbox\b_enco_accent

\def\buildmathaccent#1%
  {\mathaccent#1 }

% \unexpanded\def\buildtextaccent#1#2% we could do all at the lua end
%   {\begingroup                     % but that's no fun (yet)
%    \setbox\b_enco_accent\hbox{#1}%
%    \scratchcounter\cldcontext{nodes.firstcharinbox(\number\b_enco_accent)}\relax
%    \ifcase\scratchcounter\else\accent\scratchcounter\fi
%    \relax#2%
%    \endgroup}

% \unexpanded\def\buildtextaccent#1#2% we could do all at the lua end
%   {\begingroup                     % but that's no fun (yet)
%    \setbox\b_enco_accent\hbox{#1}%
%    \clf_buildtextaccent\b_enco_accent#2%
%    \endgroup}
%
% This one can handle font collections too. The accent command is a scanner
% and the same font and otherwise discards the character (imo it could
% better drop the accent).

\unexpanded\def\buildtextaccent#1#2% we could do all at the lua end
  {\dontleavehmode\begingroup      % but that's no fun (yet)
   \setbox\scratchboxone\hbox{#1}% accent
   \setbox\scratchboxtwo\hbox{#2}% character
   \scratchheight\dimexpr\ht\scratchboxtwo-\ht\scratchboxone\relax
   \scratchdepth \dimexpr\dp\scratchboxtwo-\dp\scratchboxone\relax
   \scratchwidth \wd\scratchboxtwo
   \hbox to \wd\ifdim\wd\scratchboxone>\wd\scratchboxtwo\scratchboxone\else\scratchboxtwo\fi\bgroup
     \hss\box\scratchboxtwo\hss
     \hskip-\scratchwidth
     \hss
     \ifdim\ht\scratchboxone>\exheight
       % top accent
       \raise\dimexpr\scratchheight+\exheight/3\relax
     \else
       \lower-\dimexpr\scratchdepth+\exheight/3\relax
     \fi
     \box\scratchboxone
     \hss
   \egroup
   \endgroup}

\unexpanded\def\bottomaccent#1#2#3#4#5% down right slantcorrection accent char
  {\dontleavehmode % why this align mess
   \vtop
     {\forgetall
      \baselineskip\zeropoint
      \lineskip#1%
      \everycr\emptytoks
      \tabskip\zeropoint
      \lineskiplimit\zeropoint
      \setbox0\hbox{#4}%
      \halign
        {##\crcr\hbox{#5}\crcr
         \hidewidth
         \hskip#2\wd0
         \hskip-#3\slantperpoint % in plain 1ex * dimenless value
         \vpack to .2\exheight{\box0\vss}\hidewidth
         \crcr}}}

\unexpanded\def\buildtextmacron     {\bottomaccent{.25ex}{0}{15}{\textmacron}}
\unexpanded\def\buildtextbottomdot  {\bottomaccent{.25ex}{0}{5}{\textbottomdot}}
\unexpanded\def\buildtextcedilla    {\bottomaccent{0ex}{0}{5}{\textcedilla}}
\unexpanded\def\buildtextogonek     {\bottomaccent{-.1ex}{.5}{0}{\textogonek}}
\unexpanded\def\buildtextbottomcomma{\bottomaccent{.15ex}{0}{5}{\tx,}}

\let\d\buildtextbottomdot

\unexpanded\def\topaccent#1#2#3#4#5% down right slantcorrection accent char
  {\dontleavehmode
   \bgroup
     \setbox0\hbox{#4}%
     \setbox2\hbox{#5}%
     \hbox to \wd2 \bgroup
        \hss\copy2\hss
        \hskip-\wd2
        \hss\hskip#2\wd0\hskip-#3\slantperpoint\raise#1\hbox{#4}\hss
     \egroup
   \egroup}

\unexpanded\def\buildtextgrave
  {\topaccent{0pt}{0}{15}{\textgrave}} % e.g.

\unexpanded\def\definemathaccent#1 #2%
  {\setvalue{#1}{\mathaccent#2 }}

%D Math (will move):

\definemathaccent acute     \mathacute
\definemathaccent grave     \mathgrave
\definemathaccent ddot      \mathddot
\definemathaccent tilde     \mathtilde
\definemathaccent bar       \mathbar
\definemathaccent breve     \mathbreve
\definemathaccent check     \mathcheck
\definemathaccent hat       \mathhat
\definemathaccent vec       \mathvec
\definemathaccent dot       \mathdot
% \definemathaccent widetilde \mathwidetilde
% \definemathaccent widehat   \mathwidehat

% from enco-com:

\def\AA{Å}  \def\aa{å}
\def\AE{Æ}  \def\ae{æ}
\def\CC{Ç}  \def\cc{ç}
\def \L{Ł}  \def \l{ł}
\def \O{Ø}  \def \o{ø}
\def\OE{Œ}  \def\oe{œ}
\def\SZ{SS} \def\sz{ß} \def\SS{ß}
\def\IJ{IJ}  \def\ij{ij}

% from enco-def:

\def\i{ı}
\def\j{ȷ}

\def\S{§} \def\textS {§} % obsolete (surfaced in bibliographic files)
\def\P{} \def\textP {} % obsolete (surfaced in bibliographic files)

\def\eszett  {ß} \def\Eszett  {SS} \def\Ssharp{SS}
\def\lslash  {ł} \def\Lslash  {Ł}
\def\dslash  {đ} \def\Dslash  {Đ}
\def\oslash  {ø} \def\Oslash  {Ø}
\def\dcroat  {đ} \def\Dcroat  {Đ}
\def\kcedilla{ķ} \def\Kcedilla{Ķ}
\def\lcedilla{ļ} \def\Lcedilla{Ļ}
\def\ncedilla{ņ} \def\Ncedilla{Ņ}
\def\rcedilla{ŗ} \def\Rcedilla{Ŗ}
\def\aumlaut {ä} \def\Aumlaut {Ä}
\def\eumlaut {ë} \def\Eumlaut {Ë}
\def\iumlaut {ï} \def\Iumlaut {Ï}
\def\oumlaut {ö} \def\Oumlaut {Ö}
\def\uumlaut {ü} \def\Uumlaut {Ü}

% for old times sake (obsolete)

\def\textflorin{ƒ} \def\florin  {ƒ}
\def\pound     {£} \def\sterling{£}
\def\promille  {} \def\permille{}

% tex specific

\ifdefined\textpercent
    \let\percent   \textpercent
    \let\procent   \textpercent
    \let\ampersand \textampersand
    \let\dollar    \textdollar
    \let\hash      \texthash
\else
    \def\percent   {\textpercent}
    \def\procent   {\textpercent}
    \def\ampersand {\textampersand}
    \def\dollar    {\textdollar}
    \def\hash      {\texthash}
\fi

% from enco-mis:

\unexpanded\def\fakepercent
  {\mathematics{\normalsuperscript{\scriptscriptstyle0}\kern-.25\emwidth/\kern-.2\emwidth\normalsubscript{\scriptscriptstyle0}}}

\unexpanded\def\fakeperthousand
  {\mathematics{\normalsuperscript{\scriptscriptstyle0}\kern-.25\emwidth/\kern-.2\emwidth\normalsubscript{\scriptscriptstyle00}}}

\unexpanded\def\fakepermine
  {\dontleavehmode
   \bgroup
   \setbox\scratchbox\hbox
     {\mathematics{+}}%
   \hbox to \wd\scratchbox
     {\hss
      \mathematics{\normalsuperscript{\scriptscriptstyle-}\kern-.4\emwidth/\kern-.3\emwidth\normalsubscript{\scriptscriptstyle-}}%
      \hss}%
   \egroup}

\def\permine{\fakepermine}

% some more

\ifdefined\softhyphen \else
    \let\softhyphen\explicitdiscretionary
\fi

\def\hyphen           {\softhyphen}
\def\compoundwordmark {\hyphen}
\def\cwm              {\hyphen}
\def\nonbreakinghyphen{\hyphen}
\def\breakinghyphen   {\hyphen\prewordbreak}

% quotes

\def\lowerleftsingleninequote  {\quotesinglebase}
\def\lowerleftdoubleninequote  {\quotedblbase}
\def\lowerrightsingleninequote {\quotesinglebase}
\def\lowerrightdoubleninequote {\quotedblbase}

\def\upperleftsingleninequote  {\quoteright}
\def\upperleftdoubleninequote  {\quotedblright}
\def\upperrightsingleninequote {\quoteright}
\def\upperrightdoubleninequote {\quotedblright}

\def\upperleftsinglesixquote   {\quoteleft}
\def\upperleftdoublesixquote   {\quotedblleft}
\def\upperrightsinglesixquote  {\quoteleft}
\def\upperrightdoublesixquote  {\quotedblleft}

\def\leftsubguillemot          {\guilsingleleft}
\def\rightsubguillemot         {\guilsingleright}

% left-overs (some day in private unicode space, so that we can roundtrip)

\unexpanded\def\textblacksquare   {\dontleavehmode\hbox{\vrule\s!width.3\s!em\s!height.4\s!em\s!depth-.1\s!em}}
\unexpanded\def\schwa             {\hbox{\rotate[\c!rotation=180,\c!location=\v!high]{\hbox{e}}}}
\unexpanded\def\schwagrave        {\buildtextgrave\schwa}

\chardef\textcontrolspace"2423

\installcorenamespace{controlspace}

% \unexpanded\def\fallbackcontrolspace % beware: non-matching widths
%   {\hbox to \interwordspace{\hss\getglyph{LMTypewriter-Regular}\textcontrolspace\hss}%

\unexpanded\def\fallbackcontrolspace % beware, current font, we also need to honor color
  {\hbox to \interwordspace \bgroup
     \hss
     \ifcsname\??controlspace\number\interwordspace\endcsname
       \csname\??controlspace\number\interwordspace\endcsname
     \else
       \enco_fast_control_space_define % only regular
     \fi
     \textcontrolspace
     \hss
   \egroup}

\unexpanded\def\enco_fast_control_space_define
  {\scratchdimen\interwordspace
   \definedfont[LMTypewriter-Regular at \the\dimexpr\currentfontbodyscale\dimexpr\fontbody]% see font-sym.mkiv
   \expandafter\glet\csname\??controlspace\number\scratchdimen\endcsname\lastrawfontcall}

\unexpanded\def\normalcontrolspace
  {\iffontchar\font\textcontrolspace
     \textcontrolspace
   \else
     \fallbackcontrolspace
   \fi}

\let\textvisiblespace\normalcontrolspace

\unexpanded\def\optionalcontrolspace
  {\iffontchar\font\textcontrolspace
     \textcontrolspace
   \else
     \asciispacechar % used for export !
   \fi}

% \unexpanded\def\fastcontrolspace % no glyph resolving after first (use grouped)
%   {\enco_fast_control_space}
%
% \def\enco_fast_control_space
%   {\iffontchar\font\textcontrolspace
%      \enco_fast_control_space_nop
%    \else
%      \enco_fast_control_space_yes
%    \fi
%    \enco_fast_control_space}
%
% \newbox\b_enco_control_space
%
% \def\enco_fast_control_space_nop
%   {\let\enco_fast_control_space\textcontrolspace}
%
% \def\enco_fast_control_space_yes
%   {\setbox\b_enco_control_space\fallbackcontrolspace
%    \let\enco_fast_control_space\flushcontrolspacebox}
%
% \def\flushcontrolspacebox
%   {\copy\b_enco_control_space}

% a few defaults (\<whatever>{}), we really need the verbose \empty as it will be
% stringified .. anyhow, we define this at the lua end now but keep it here as a
% reference
%
% \defineaccent ^ {\empty} {\textcircumflex}
% \defineaccent ` {\empty} {\textgrave}
% \defineaccent ~ {\empty} {\texttilde}
% \defineaccent " {\empty} {\textdiaeresis}
% \defineaccent ' {\empty} {\textacute}
% \defineaccent . {\empty} {\textdotaccent}
% \defineaccent = {\empty} {\textmacron}
% \defineaccent c {\empty} {\textcedilla}
% \defineaccent H {\empty} {\texthungarumlaut}
% \defineaccent k {\empty} {\textogonek}
% \defineaccent r {\empty} {\textring}
% \defineaccent u {\empty} {\textbreve}
% \defineaccent v {\empty} {\textcaron}

\clf_defineaccents % one time

%D A smaller and bolder variant, more like the math and monospaced ones.

\unexpanded\def\fakeunderscore
  {\relax\ifmmode
     \vrule\s!depth .12\fontexheight\mathstylefont\normalmathstyle\s!width \fontinterwordspace\mathstylefont\normalmathstyle\s!height\zeropoint\relax
   \else
     \dontleavehmode\hbox{\vrule\s!depth .12\fontexheight\font\s!width \fontinterwordspace\font\s!height\zeropoint}%
   \fi}

\unexpanded\def\fakeunderscores{\let\_\fakeunderscore}
\unexpanded\def\textunderscores{\let\_\textunderscore}

\textunderscores

\ifdefined\mathunderscore \else \let\mathunderscore\fakeunderscore \fi
\ifdefined\textunderscore \else \let\textunderscore\fakeunderscore \fi

\unexpanded\def\normalunderscore{\ifmmode\mathunderscore\else\textunderscore\fi}

\let\_\normalunderscore

%D To be sorted out:

\unexpanded\def\textminus
  {\char  \iffontchar\font"2012 "2012 % figuredash
     \else\iffontchar\font"2013 "2013 % endash
     \else\iffontchar\font"2212 "2212 % math minus
                                "002D % hyphen
     \fi\fi\fi}

\unexpanded\def\textplus
  {\char"002B } % plus

%D Moved from core-mis:

\unexpanded\def\celsius   #1{#1\mathematics{^\circ}C}
\unexpanded\def\inch        {\mathematics{\prime\prime}} % was: \hbox{\rm\char125\relax}
\unexpanded\def\fraction#1#2{\mathematics{#1\over#2}}

% \def\periodswidth  {.5em}
% \def\periodsdefault{3}    % was 5, but now it's like \unknown
%
% \unexpanded\def\periods
%   {\dosingleempty\enco_periods}
%
% \def\doperiods[#1]% todo: also n=,width= or maybe just #1,#2
%   {\dontleavehmode
%    \begingroup
%    \scratchdimen\periodswidth
%    \hbox to \iffirstargument#1\else\periodsdefault\fi \scratchdimen
%      {\leaders\hbox to \scratchdimen{\hss\periodsymbol\hss}\hss}%
%    \endgroup}
%
% better for export:
%
% \unexpanded\def\enco_periods[#1]% todo: also n=,width= or maybe just #1,#2
%   {\dontleavehmode
%    \hbox\bgroup
%    \setbox\scratchbox\hbox to \periodswidth{\hss\periodsymbol\hss}%
%    \dorecurse{\iffirstargument#1\else\periodsdefault\fi}{\copy\scratchbox}%
%    \egroup}
%
% \unexpanded\def\unknown
%   {\periods\relax} % relax prevents lookahead for []
%
% per request:

%D \startbuffer
%D \startlines
%D     x\periods x
%D     x\periods[10]x
%D     x\periods[n=10,symbol={,}]x
%D     x\periods[n=4,symbol={!!},width=1em]x
%D     x\periods[n=4,symbol={!!},width=fit]x
%D     x\periods[n=4,symbol={!!},width=fit,distance=1em]x
%D     x\unknown x
%D \stoplines
%D \stopbuffer
%D
%D \typbuffer \getbuffer

\def\periodswidth  {.5\emwidth}  % downward compatible
\def\periodsdefault{3}           % downward compatible

\installcorenamespace {periods}

\installsetuponlycommandhandler \??periods {periods}

\setupperiods
  [\c!n=\periodsdefault,
   \c!width=\periodswidth, % can also be \v!fit
   \c!distance=.25\emwidth,
   \c!symbol=.]

\unexpanded\def\periods
  {\dontleavehmode
   \hbox\bgroup
   \doifelsenextoptional\enco_periods_yes\enco_periods_nop}

\unexpanded\def\enco_periods_yes[#1]%
  {\doifelseassignment{#1}
     {\setupcurrentperiods[#1]%
      \scratchcounter\periodsparameter\c!n}
     {\doifelsenothing{#1}
        {\scratchcounter\periodsparameter\c!n}%
        {\scratchcounter#1}}%
   \enco_periods_finish}

\unexpanded\def\enco_periods_nop
  {\scratchcounter\periodsparameter\c!n
   \enco_periods_finish}

\unexpanded\def\enco_periods_finish
  {\edef\p_width{\periodsparameter\c!width}%
   \ifx\p_width\v!fit
     \enco_periods_finish_fit
   \else
     \enco_periods_finish_width
   \fi
   \egroup}

\unexpanded\def\enco_periods_finish_width
  {\setbox\scratchbox\hbox to \p_width
     {\hss\periodsparameter\c!symbol\hss}%
   \dorecurse\scratchcounter{\copy\scratchbox}}

\unexpanded\def\enco_periods_finish_fit
  {\edef\p_symbol{\periodsparameter\c!symbol}%
   \scratchdistance\periodsparameter\c!distance
   \hskip\scratchdistance
   \dorecurse\scratchcounter{\p_symbol\hskip\scratchdistance}}

\unexpanded\def\unknown
  {\dontleavehmode
   \hbox\bgroup
   \enco_periods_nop}

%D Was missing:

\unexpanded\def\ampersand{\mathortext\mathampersand\textampersand}

%D Left-overs:

\appendtoks
    \let\buildtextaccent\secondoftwoarguments
\to \everysimplifycommands

%D A plain one:

% \unexpanded\def\t#1{%
%   \dontleavehmode
%   \begingroup
%     \setbox\scratchboxone\hpack{#1}%
%     \setbox\scratchboxtwo\hpack\bgroup
%       \iffontchar\font"0361\relax
%          \char"0361\relax
%       \else
%          \iffontchar\font"2040\relax\else
%            \the\textfont0
%          \fi
%          \char"2040
%       \fi
%     \egroup
%     \scratchdimenone\wd\ifdim\wd\scratchboxone>\wd\scratchboxtwo\scratchboxone\else\scratchboxtwo\fi
%     \scratchdimentwo\dimexpr-\ht\scratchboxtwo+\ht\scratchboxone+.45\exheight\relax
%     \hpack to \scratchdimenone\bgroup
%       \hpack to \scratchdimenone{\hss\box\scratchboxone\hss}%
%       \hskip   -\scratchdimenone
%       \hpack to \scratchdimenone{\hss\raise\scratchdimentwo\box\scratchboxtwo\hss}%
%     \egroup
%   \endgroup}

\protect \endinput