lang-mis.mkxl /size: 16 Kb    last modification: 2025-02-21 11:03
1%D \module
2%D   [       file=lang-mis,
3%D        version=1997.03.20, % used to be supp-lan.tex
4%D          title=\CONTEXT\ Language Macros,
5%D       subtitle=Compounds,
6%D         author=Hans Hagen,
7%D           date=\currentdate,
8%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
9%C
10%C This module is part of the \CONTEXT\ macro||package and is
11%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
12%C details.
13
14%D This one will be updated stepwise to \LMTX. See lang-mis.mkiv for previous
15%D implementations and removed code.
16
17\writestatus{loading}{ConTeXt Language Macros / Compounds}
18
19%D More or less replaced.
20
21%D \gdef\starttest#1\stoptest{\starttabulate[|l|l|p|]#1\stoptabulate}
22%D \gdef\test     #1{\NC\detokenize{#1}\NC\hyphenatedword{#1}\NC#1\NC\NR}
23
24\unprotect
25
26%D One of \TEX's strong points in building paragraphs is the way hyphenations are
27%D handled. Although for real good hyphenation of non||english languages some
28%D extensions to the program are needed, fairly good results can be reached with the
29%D standard mechanisms and an additional macro, at least in Dutch.
30%D
31%D \CONTEXT\ originates in the wish to typeset educational materials, especially in
32%D a technical environment. In production oriented environments, a lot of compound
33%D words are used. Because the Dutch language poses no limits on combining words, we
34%D often favor putting dashes between those words, because it facilitates reading,
35%D at least for those who are not that accustomed to it.
36%D
37%D In \TEX\ compound words, separated by a hyphen, are not hyphenated at all. In
38%D spite of the multiple pass paragraph typesetting this can lead to parts of words
39%D sticking into the margin. The solution lays in saying \type
40%D {spoelwater||terugwinunit} instead of \type {spoelwater-terugwinunit}. By using a
41%D one character command like \type {|}, delimited by the same character \type {|},
42%D we get ourselves both a decent visualization (in \TEXEDIT\ and colored verbatim
43%D we color these commands yellow) and an efficient way of combining words.
44%D
45%D The sequence \type{||} simply leads to two words connected by a hyphen. Because
46%D we want to distinguish such a hyphen from the one inserted when \TEX\ hyphenates
47%D a word, we use a bit longer one.
48%D
49%D \hyphenation {spoel-wa-ter te-rug-win-unit}
50%D
51%D \starttest
52%D \test {spoelwater||terugwinunit}
53%D \stoptest
54%D
55%D As we already said, the \type{|} is a command. This commands accepts an optional
56%D argument before it's delimiter, which is also a \type{|}.
57%D
58%D \hyphenation {po-ly-meer che-mie}
59%D
60%D \starttest
61%D \test {polymeer|*|chemie}
62%D \stoptest
63%D
64%D Arguments like \type{*} are not interpreted and inserted directly, in contrary to
65%D arguments like:
66%D
67%D \starttest
68%D \test {polymeer|~|chemie}
69%D \test {|(|polymeer|)|chemie}
70%D \test {polymeer|(|chemie|)| }
71%D \stoptest
72%D
73%D Although such situations seldom occur |<|we typeset thousands of pages before we
74%D encountered one that forced us to enhance this mechanism|>| we also have to take
75%D care of comma's.
76%D
77%D  \hyphenation {uit-stel-len}
78%D
79%D  \starttest
80%D  \test {op||, in|| en uitstellen}
81%D  \stoptest
82%D
83%D The next special case (concerning quotes) was brought to my attention by Piet
84%D Tutelaers, one of the driving forces behind rebuilding hyphenation patterns for
85%D the dutch language.\footnote{In 1996 the spelling of the dutch language has been
86%D slightly reformed which made this topic actual again.} We'll also take care of
87%D this case.
88%D
89%D \starttest
90%D \test {AOW|'|er}
91%D \test {cd|'|tje}
92%D \test {ex|-|PTT|'|er}
93%D \test {rock|-|'n|-|roller}
94%D \stoptest
95%D
96%D Tobias Burnus pointed out that I should also support something like
97%D
98%D \starttest
99%D \test {well|_|known}
100%D \stoptest
101%D
102%D to stress the compoundness of hyphenated words.
103%D
104%D Of course we also have to take care of the special case:
105%D
106%D \starttest
107%D \test {text||color and ||font}
108%D \stoptest
109
110%D \macros
111%D   {installdiscretionaries}
112%D
113%D The mechanism described here is one of the older inner parts of \CONTEXT. The
114%D most recent extensions concerns some special cases as well as the possibility to
115%D install other characters as delimiters. The prefered way of specifying compound
116%D words is using \type{||}, which is installed by:
117%D
118%D \starttyping
119%D \installdiscretionary | -
120%D \stoptyping
121%D
122%D We used to have an installable mechanism but in the perspective of \MKIV\ and
123%D especialy \LMTX\ it no longer makes sense to complicate the code, so from now on
124%D we only deal with the active bar. Older code can be seen in the archives. It also
125%D means that we now just hardcode the bar. We also deal with math differently.
126
127%D \macros
128%D   {compoundhyphen}
129%D
130%D Now let's go to the macros. First we define some variables. In the main \CONTEXT\
131%D modules these can be tuned by a setup command. Watch the (maybe) better looking
132%D compound hyphen.
133
134\ifdefined\compoundhyphen \else
135
136    % This will be overloaded in typo-del.mkxl for a better variant that
137    % copies like a single hyphen when the right feature is enabled.
138
139    \permanent\protected\def\compoundhyphen{\hbox{-\kern-.10775\emwidth-}}
140
141\fi
142
143%D The last two variables are needed for subsentences |<|like this one|>| which we
144%D did not yet mention. We want to enable breaking but at the same time don't want
145%D compound characters like |-| or || to be separated from the words. \TEX\ hackers
146%D will recognise the next two macro's:
147
148\ifdefined\prewordbreak \else \permanent\protected\def\prewordbreak    {\penalty\plustenthousand\hskip\zeroskip\relax} \fi
149\ifdefined\postwordbreak\else \permanent\protected\def\postwordbreak   {\penalty\zerocount      \hskip\zeroskip\relax} \fi
150\ifdefined\hspaceamount \else                     \def\hspaceamount#1#2{.16667\emwidth}                                 \fi % will be overloaded
151
152\permanent\protected\def\permithyphenation{\ifhmode\wordboundary\fi} % doesn't remove spaces
153
154%D \macros
155%D   {beginofsubsentence,endofsubsentence,
156%D    beginofsubsentencespacing,endofsubsentencespacing}
157%D
158%D In the previous macros we provided two hooks which can be used to support nested
159%D sub||sentences. In \CONTEXT\ these hooks are used to insert a small space when
160%D needed.
161%D
162%D The following piece of code is a torture test compound handling. The \type
163%D {\relax} before the \type {\ifmmode} is needed because of the alignment scanner
164%D (in \ETEX\ this problem is not present because there a protected macro is not
165%D expanded. Thanks to Tobias Burnus for providing this example.
166%D
167%D \startformula
168%D   \left|f(x_n)-{1\over2}\right| =
169%D      {\cases{|{1\over2}-x_n| &for $0\le x_n < {1\over2}$\cr
170%D              |x_n-{1\over2}| &for ${1\over2}<x_n\le1$   \cr}}
171%D \stopformula
172
173\installcorenamespace{discretionaryaction}
174\installcorenamespace{discretionarymode}
175
176\aliased\let\installdiscretionaries\gobbletwoarguments % this alias will go
177\aliased\let\installdiscretionary  \gobbletwoarguments % this alias will go
178
179\setnewconstant\discretionarymode\plusone
180
181\permanent\protected\def\ignorediscretionaries{\discretionarymode\zerocount}
182\permanent\protected\def\obeydiscretionaries  {\discretionarymode\plusone}
183
184\def\lang_discretionaries_command
185  {\begincsname\??discretionarymode
186     \ifcase\discretionarymode
187       n%
188     \else
189       t%
190     \fi
191   \endcsname}
192
193% \catcode\barasciicode\activecatcode
194% \amcode \barasciicode\othercatcode
195
196\letcatcodecommand\ctxcatcodes\barasciicode\lang_discretionaries_command
197
198%D The macro \type {\lang_discretionaries_check_before} takes care of loners like
199%D \type {||word}, while it counterpart \type {\lang_discretionaries_check_after} is
200%D responsible for handling the comma.
201
202\newconditional\punctafterdiscretionary
203\newconditional\spaceafterdiscretionary
204
205\def\lang_discretionaries_check_before %is used grouped
206  {\ifvmode
207     \dontleavehmode
208   \fi
209   \ifhmode
210    %\begingroup
211    %\setbox\scratchbox\lastbox
212    %\ifzeropt\wd\scratchbox
213    %  \box\scratchbox\relax
214    %  \endgroup
215    %  \let\postwordbreak\prewordbreak
216    %\else
217    %  \box\scratchbox\relax
218    %  \endgroup
219    %\fi
220   \fi}
221
222\def\lang_discretionaries_check_after
223  {\punctafterdiscretionary\conditionalfalse
224   \spaceafterdiscretionary\conditionalfalse
225   \ifx\blankspace\nexttoken \spaceafterdiscretionary\conditionaltrue \orelse
226   \ifx\space     \nexttoken \spaceafterdiscretionary\conditionaltrue \orelse
227   \ifx          .\nexttoken \punctafterdiscretionary\conditionaltrue \orelse
228   \ifx          ,\nexttoken \punctafterdiscretionary\conditionaltrue \orelse
229   \ifx          :\nexttoken \punctafterdiscretionary\conditionaltrue \orelse
230   \ifx          ;\nexttoken \punctafterdiscretionary\conditionaltrue \fi}
231
232\edefcsname\??discretionarymode n\endcsname
233  {\detokenize{|}}
234
235\defcsname\??discretionarymode t\endcsname#1|%
236  {\bgroup
237   \def\next{\lang_discretionaries_handle{#1}}%
238   \futurelet\nexttoken\next}%
239
240\permanent\protected\def\lang_discretionaries_handle#1%
241  {\edef\lang_discretionaries_token{\detokenize{#1}}%
242   \lang_discretionaries_check_after
243   \ifempty\lang_discretionaries_token
244     \ifx|\nexttoken % takes care of |||
245       \ifcsname\??discretionaryaction\string|\endcsname
246         \lastnamedcs
247       \orelse\ifconditional\spaceafterdiscretionary
248         \wordboundary\hbox{\letterbar}\relax
249       \orelse\ifconditional\punctafterdiscretionary
250         \wordboundary\hbox{\letterbar}\wordboundary
251       \else
252         \wordboundary\hbox{\letterbar}\wordboundary
253       \fi
254       \def\next{\afterassignment\egroup\let\next=}%
255     \else
256       \lang_discretionaries_check_before
257       \ifcsname\??discretionaryaction\endcsname
258         \lastnamedcs
259       \orelse\ifconditional\spaceafterdiscretionary
260         \wordboundary\defaultdiscretionaryhyphen\relax
261       \orelse\ifconditional\punctafterdiscretionary
262         \wordboundary\defaultdiscretionaryhyphen\relax
263       \else
264         \wordboundary\defaultdiscretionaryhyphen\wordboundary
265       \fi
266       \let\next\egroup
267     \fi
268   \orelse\ifcsname\??discretionaryaction\lang_discretionaries_token\endcsname
269     \lastnamedcs
270     \let\next\egroup
271   \else
272     \lang_discretionaries_check_before
273     \ifconditional\spaceafterdiscretionary
274       \wordboundary\hbox{#1}\relax
275     \orelse\ifconditional\punctafterdiscretionary
276       \wordboundary\hbox{#1}\relax
277     \else
278       \wordboundary\discretionary{\hbox{#1}}{}{\hbox{#1}}\wordboundary
279      %\discretionary options \plusthree{\hbox{#1}}{}{\hbox{#1}}%
280     \fi
281     \let\next\egroup
282   \fi
283   \next}
284
285%D \macros
286%D   {directdiscretionary}
287%D
288%D In those situations where the nature of characters is less predictable, we can
289%D use the more direct approach:
290
291\permanent\protected\def\directdiscretionary
292  {\begincsname\??discretionarymode
293     \ifcase\discretionarymode
294       n%
295     \else
296       d%
297     \fi
298   \endcsname}
299
300\permanent\protected\def\indirectdiscretionary
301  {\begincsname\??discretionarymode
302     \ifcase\discretionarymode
303       n%
304     \else
305       i%
306     \fi
307   \endcsname}
308
309\protected\defcsname\??discretionarymode d\endcsname#1%
310  {\ifcsname\??discretionaryaction\detokenize{#1}\endcsname
311     \expandafter\lastnamedcs
312   \else
313     \expandafter\indirectdiscretionary
314   \fi{#1}}
315
316\protected\defcsname\??discretionarymode i\endcsname#1%
317  {\wordboundary\discretionary{\hbox{#1}}{}{\hbox{#1}}\wordboundary}
318 %{\discretionary options \plusthree{\hbox{#1}}{}{\hbox{#1}}}
319
320\permanent\protected\def\definetextmodediscretionary #1
321  {\defcsname\??discretionaryaction\detokenize{#1}\endcsname}
322
323% \start \hsize 1mm
324% test |||test test|||, test\blank
325% test test|-|, test|-| and test|-|test\blank
326% test test|_|, test|_| and test|_|test\blank
327% test cd|'|tje\blank
328% test |(|test test|)|, test\blank
329% test test test|x|, test\blank
330% test|~|test
331% test|^|test
332% \stop
333
334% x\discretionary{1}{2}{3}xxxxxxx
335% xxxxxxx\discretionary{1}{2}{3}x
336%
337% xxx3xxx
338% xxx1<newline>2xxx
339
340\def\lang_discretionaries_hyphen_like#1#2%
341  {\ifconditional\spaceafterdiscretionary
342     \wordboundary\hbox{#1}\relax
343   \orelse\ifconditional\punctafterdiscretionary
344     \wordboundary\hbox{#1}\relax
345   \else
346     \wordboundary#2\wordboundary
347   \fi}
348
349\definetextmodediscretionary {}
350  {\lang_discretionaries_hyphen_like\textmodehyphen\textmodehyphendiscretionary}
351
352\definetextmodediscretionary -
353  {\lang_discretionaries_hyphen_like\normalhyphen\normalhyphendiscretionary}
354
355\definetextmodediscretionary _
356  {\lang_discretionaries_hyphen_like\composedhyphen\composedhyphendiscretionary}
357
358\definetextmodediscretionary )
359  {\lang_discretionaries_hyphen_like{)}{\discretionary{-)}{}{)}}}
360
361\definetextmodediscretionary (
362  {\ifdim\lastskip>\zeropoint
363     (\wordboundary
364   \else
365     \wordboundary\discretionary{}{(-}{(}\wordboundary
366    %\discretionary options \plusthree{}{(-}{(}%
367   \fi}
368
369\definetextmodediscretionary ~
370  {\wordboundary\discretionary{-}{}{\thinspace}\wordboundary}
371 %{\discretionary options \plusthree{-}{}{\thinspace}}
372
373\definetextmodediscretionary '
374  {\wordboundary\discretionary{-}{}{'}\wordboundary}
375 %{\discretionary options \plusthree{-}{}{'}}
376
377\definetextmodediscretionary ^
378  {\wordboundary
379   \discretionary{\hbox{\normalstartimath|\normalstopimath}}{}{\hbox{\normalstartimath|\normalstopimath}}%
380   \wordboundary} % bugged
381 %{\discretionary options \plusthree{\hbox{\normalstartimath|\normalstopimath}}{}{\hbox{\normalstartimath|\normalstopimath}}}
382
383\definetextmodediscretionary <
384  {\beginofsubsentence\wordboundary\beginofsubsentencespacing
385   \aftergroup\ignorespaces} % tricky, we need to go over the \nextnextnext
386
387\definetextmodediscretionary >
388  {\removeunwantedspaces
389   \endofsubsentencespacing\wordboundary\endofsubsentence}
390
391\definetextmodediscretionary =
392  {\removeunwantedspaces
393   \wordboundary\midsentence\wordboundary
394   \aftergroup\ignorespaces}
395
396% french
397
398\definetextmodediscretionary : {\removeunwantedspaces\wordboundary\kern\hspaceamount\empty{:}:}
399\definetextmodediscretionary ; {\removeunwantedspaces\wordboundary\kern\hspaceamount\empty{;};}
400\definetextmodediscretionary ? {\removeunwantedspaces\wordboundary\kern\hspaceamount\empty{?}?}
401\definetextmodediscretionary ! {\removeunwantedspaces\wordboundary\kern\hspaceamount\empty{!}!}
402
403\definetextmodediscretionary * {\wordboundary\discretionary{-}{}{\kern.05\emwidth}\wordboundary}
404
405% spanish
406
407\definetextmodediscretionary ?? {\wordboundary\questiondown}
408\definetextmodediscretionary !! {\wordboundary\exclamdown}
409
410\permanent\protected\def\defaultdiscretionaryhyphen{\compoundhyphen}
411
412%D \macros
413%D   {fakecompoundhyphen}
414%D
415%D In headers and footers as well as in active pieces of text we need a dirty hack.
416%D Try to imagine what is needed to savely break the next text across a line and at
417%D the same time make the words interactive.
418%D
419%D \starttyping
420%D \goto{Some||Long||Word}
421%D \stoptyping
422
423\permanent\protected\def\fakecompoundhyphen
424  {\enforced\permanent\protected\def\|{\mathortext\vert\lang_compounds_fake_hyphen}}
425
426\def\lang_compounds_fake_hyphen
427  {\enforced\permanent\protected\def##1|%
428     {\ifempty{##1}\compoundhyphen\else##1\fi
429      \wordboundary % was a signal
430      \allowbreak}}
431
432%D \macros
433%D   {midworddiscretionary}
434%D
435%D If needed, one can add a discretionary hyphen using \type
436%D {\midworddiscretionary}. This macro does the same as \PLAIN\ \TEX's \type {\-},
437%D but, like the ones implemented earlier, this one also looks ahead for spaces and
438%D grouping tokens.
439
440\permanent\protected\def\midworddiscretionary
441  {\futurelet\nexttoken\lang_discretionaries_mid_word}
442
443\def\lang_discretionaries_mid_word
444  {\ifx\nexttoken\blankspace\orelse
445   \ifx\nexttoken\bgroup    \orelse
446   \ifx\nexttoken\egroup    \orelse
447     \discretionary{-}{}{}%
448   \fi}
449
450% \aliased\let\ignorecompoundcharacter\relax
451
452%D \macros
453%D   {disablediscretionaries,disablecompoundcharacter}
454%D
455%D Occasionally we need to disable this mechanism. For the moment we assume that
456%D \type {|} is used.
457
458\aliased\let\disablediscretionaries   \ignorediscretionaries
459%aliased\let\disablecompoundcharacters\ignorecompoundcharacter
460
461%D \macros
462%D   {normalcompound}
463%D
464%D Handy in for instance XML. (Kind of obsolete)
465
466\ifdefined\normalcompound \else \aliased\let\normalcompound=| \fi
467
468%D \macros
469%D   {compound}
470%D
471%D We will overload the already active \type {|} so we have to save its meaning in
472%D order to be able to use this handy macro.
473%D
474%D \starttyping
475%D so test\compound{}test can be used instead of test||test
476%D \stoptyping
477
478\permanent\protected\gdef\compound#1{|#1|}
479
480\appendtoks
481    \enforced\permanent\protected\def|#1|{\ifx#1\empty\empty-\else#1\fi}%
482\to \everysimplifycommands
483
484%D Here we hook some code into the clean up mechanism needed for verbatim data.
485
486\appendtoks
487    %disablecompoundcharacters
488    \disablediscretionaries
489\to \everycleanupfeatures
490
491%D Here:
492%D
493%D \startbuffer
494%D {\red somelongword}{\blue \compounddiscretionary}{\green somelongword}
495%D \stopbuffer
496%D
497%D \typebuffer \blank {\hsize3mm\getbuffer\par} \blank
498
499\permanent\protected\def\compounddiscretionary
500   {\discretionary
501      options \plusthree
502      {\ifnum\prehyphenchar >\zerocount\char\prehyphenchar \fi}%
503      {\ifnum\posthyphenchar>\zerocount\char\posthyphenchar\fi}%
504      {\ifnum\posthyphenchar>\zerocount\char\posthyphenchar\fi}}
505
506% \setcatcodetable\prtcatcodes % because we activated the bar
507
508\protect \endinput
509