lang-frq.mkxl /size: 7047 b    last modification: 2023-12-21 09:44
1%D \module
2%D   [       file=lang-frq,
3%D        version=2004.01.15,
4%D          title=\CONTEXT\ Language Macros,
5%D       subtitle=Frequency Tables,
6%D         author=Hans Hagen,
7%D           date=\currentdate,
8%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
9%C
10%C This module is part of the \CONTEXT\ macro||package and is
11%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
12%C details.
13
14\endinput
15
16\writestatus{loading}{ConTeXt Language Macros / Frequency Tables}
17
18%D Some day I might redo this \LUA. But anyway, who uses it. It's rather old code.
19
20\unprotect
21
22%M \usemodule[layout]
23
24%D \macros
25%D   {charwidthmethod}
26%D
27%D This module implements a method for determining the width of an
28%D average character in a language. It uses the dimensions of the
29%D current fonts.
30%D
31%D \def\ShwChrWd#1#2#3%
32%D   {\chardef\charwidthmethod#1\relax
33%D    \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)}
34%D
35%D \starttabulate[|c|c|c|c|c|c|]
36%D \HL
37%D \NC      \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR
38%D \HL
39%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR
40%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR
41%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR
42%D \HL
43%D \stoptabulate
44%D
45%D Method~1 ignores the widths and assumes that each character has a
46%D width of .5em, which is true for most monospaced fonts. Method~2
47%D takes the x as starting point, and assumes that it's height kind of
48%D matches its width. Method~3 is the best one, and determines the
49%D average width based on the language specific character table.
50%D Method~4 is a mixture between the first two methods: character
51%D specific widths applied to an equal distribution. Method~0 reports
52%D the total count, which normally is~100.
53
54\setnewconstant\charwidthmethod=3 % 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex
55
56%D \macros
57%D  {charwidthlanguage}
58%D
59%D The language used for the calculations is defined as:
60
61\def\charwidthlanguage{\currentmainlanguage}
62
63%D \macros
64%D   {charfreq}
65%D
66%D This method comes into action in the following macro:
67
68\def\charfreq#1 #2 % character fraction
69  {+(\ifcase\charwidthmethod
70     #2\dimexpr100\onepoint\relax
71   \or
72     #2\dimexpr\emwidth/2\relax
73   \or
74     #2\dimexpr\exheight\relax
75   \or
76     #2\scaledfontcharwd\font`#1%
77   \or
78     \dimexpr100\scaledfontcharwd\font`#1/\charactertsize\charwidthlanguage\relax % ugly hack
79   \else
80     #2\dimexpr\exheight\relax
81   \fi)}
82
83%D \macros
84%D   {startcharactertable}
85%D
86%D A frequency table is defined with the following macro. The \type
87%D {charfreq} macro is used in this table.
88
89\installcorenamespace{frequencywidths}
90\installcorenamespace{frequencycounts}
91
92\aliased\let\stopcharactertable\relax
93
94\permanent\protected\def\startcharactertable[#1]#2\stopcharactertable % \dimexpr has fuzzy lookahead
95  {\startnointerference
96     \gdefcsname\??frequencywidths#1\endcsname{#2}% the width vector
97     \scratchcounter\zerocount \def\charfreq##1 ##2 {\advanceby\scratchcounter\plusone} #2%
98     \xdefcsname\??frequencycounts#1\endcsname{\the\scratchcounter}% the character count
99   \stopnointerference}
100
101%D \macros
102%D   {charactertable,charactertsize}
103%D
104%D The table content as well as the number of entries can be fetched with
105%D the following two macros. The architecture of the table and calling
106%D macro permits a fully expandable application.
107
108\permanent\def\charactertable#1%
109  {\csname\??frequencywidths\ifcsname\??frequencywidths#1\endcsname#1\else\s!en\fi\endcsname}
110
111\permanent\def\charactertsize#1%
112  {\csname\??frequencycounts\ifcsname\??frequencycounts#1\endcsname#1\else\s!en\fi\endcsname}
113
114%D Although it is of hardly any use, you can inherit a character table:
115%D
116%D \starttyping
117%D \startcharactertable[cz] \charactertable{en} \stopcharactertable
118%D \stoptyping
119
120\startcharactertable[en]
121    % empty
122\stopcharactertable % kind of default
123
124%D \macros
125%D   {averagecharwidth}
126%D
127%D This macro reports the average width for the current main
128%D language (\the \dimexpr (\averagecharwidth)).
129
130\permanent\def\averagecharwidth{\dimexpr(\zeropoint\charactertable\charwidthlanguage)/100\relax}
131
132\permanent\protected\def\showcharfreq
133  {\hbox\bgroup
134     \charwidthlanguage:%
135     \dostepwiserecurse\zerocount\plusfour\plusone
136       {\setconstant\charwidthmethod\recurselevel\relax
137        \enspace\recurselevel/\the\dimexpr(\averagecharwidth)}%
138   \egroup}
139
140%D Just for fun, we show a few frequency tables as graphic (\in {figure}
141%D [fig:charfreq]).
142%D
143%D \startbuffer
144%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue]
145%D
146%D \def\charfreq#1 #2 %
147%D   {\startMPdrawing
148%D      interim linejoin := butt ;
149%D      a := ASCII "#1" ;
150%D      if (a >= (ASCII "a")) and (a <= (ASCII "z")) :
151%D         draw ((0,#2*.25cm)--origin--(0,#2*.5cm))
152%D           shifted (a*4mm+o,0)
153%D           withpen pencircle scaled .5mm
154%D           withcolor c;
155%D      fi ;
156%D    \stopMPdrawing}
157%D
158%D \resetMPdrawing
159%D \startMPdrawing
160%D   numeric a, o ; a := o := 0 ;
161%D   color c ; c := .5white ;
162%D   string s ; s := "" ;
163%D \stopMPdrawing
164%D
165%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing
166%D \charactertable{en}
167%D
168%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing
169%D \charactertable{nl}
170%D
171%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing
172%D \charactertable{de}
173%D
174%D \startMPdrawing
175%D   for a := ASCII "a" upto ASCII "z" :
176%D     draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ;
177%D   endfor ;
178%D \stopMPdrawing
179%D
180%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing
181%D \stopbuffer
182%D
183%D \placefigure
184%D   [here]
185%D   [fig:charfreq]
186%D   {The character distributions for English, Dutch and German.}
187%D   {\getbuffer}
188%D
189%D A few samples of usage of this mechanism are shown below:
190%D
191%D \startbuffer
192%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
193%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
194%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
195%D \stopbuffer
196%D
197%D \typebuffer \getbuffer
198%D
199%D Although the widthts differ, the consequenes for breaking the paragraph
200%D into lines are minimal.
201
202%D \macros
203%D   {freezeaveragecharacterwidth}
204%D
205%D This macro can be used to make sure that the width does not change during a
206%D page break when another font is used.
207
208\aliased\let\normalaveragecharacterwidth\averagecharacterwidth
209
210\permanent\def\freezeaveragecharacterwidth % global
211  {\enforced\xdef\averagecharacterwidth{\dimexpr\the\normalaveragecharacterwidth\relax}}
212
213%D Example:
214%D
215%D \starttyping
216%D \input lang-frq.mkiv
217%D \input lang-frd.mkiv
218%D
219%D \setupbodyfont
220%D   [dejavu]
221%D
222%D \setemeasure{textwidth}{\the\dimexpr70\averagecharwidth}
223%D
224%D \setuplayout
225%D   [width=\measure{textwidth}]
226%D
227%D \showframe
228%D
229%D \starttext
230%D     \input ward
231%D \stoptext
232%D \stoptyping
233
234\protect \endinput
235