lang-frq.mkiv /size: 6935 b    last modification: 2020-07-01 14:35
1%D \module
2%D   [       file=lang-frq,
3%D        version=2004.01.15,
4%D          title=\CONTEXT\ Language Macros,
5%D       subtitle=Frequency Tables,
6%D         author=Hans Hagen,
7%D           date=\currentdate,
8%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
9%C
10%C This module is part of the \CONTEXT\ macro||package and is
11%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
12%C details.
13
14\endinput
15
16\writestatus{loading}{ConTeXt Language Macros / Frequency Tables}
17
18%D Some day I might redo this \LUA. But anyway, who uses it. It's rather
19%D old code.
20
21\unprotect
22
23%M \usemodule[layout]
24
25%D \macros
26%D   {charwidthmethod}
27%D
28%D This module implements a method for determining the width of an
29%D average character in a language. It uses the dimensions of the
30%D current fonts.
31%D
32%D \def\ShwChrWd#1#2#3%
33%D   {\chardef\charwidthmethod#1\relax
34%D    \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)}
35%D
36%D \starttabulate[|c|c|c|c|c|c|]
37%D \HL
38%D \NC      \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR
39%D \HL
40%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR
41%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR
42%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR
43%D \HL
44%D \stoptabulate
45%D
46%D Method~1 ignores the widths and assumes that each character has a
47%D width of .5em, which is true for most monospaced fonts. Method~2
48%D takes the x as starting point, and assumes that it's height kind of
49%D matches its width. Method~3 is the best one, and determines the
50%D average width based on the language specific character table.
51%D Method~4 is a mixture between the first two methods: character
52%D specific widths applied to an equal distribution. Method~0 reports
53%D the total count, which normally is~100.
54
55\chardef\charwidthmethod=3 % 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex
56
57%D \macros
58%D  {charwidthlanguage}
59%D
60%D The language used for the calculations is defined as:
61
62\def\charwidthlanguage{\currentmainlanguage}
63
64%D \macros
65%D   {charfreq}
66%D
67%D This method comes into action in the following macro:
68
69\def\charfreq#1 #2 % character fraction
70  {+(\ifcase\charwidthmethod
71     #2\dimexpr100\onepoint\relax
72   \or
73     #2\dimexpr\emwidth/2\relax
74   \or
75     #2\dimexpr\exheight\relax
76   \or
77     #2\fontcharwd\font`#1%
78   \or
79     \dimexpr100\fontcharwd\font`#1/\charactertsize\charwidthlanguage\relax % ugly hack
80   \else
81     #2\dimexpr\exheight\relax
82   \fi)}
83
84%D \macros
85%D   {startcharactertable}
86%D
87%D A frequency table is defined with the following macro. The \type
88%D {charfreq} macro is used in this table.
89
90\installcorenamespace{frequencywidths}
91\installcorenamespace{frequencycounts}
92
93\let\stopcharactertable\relax
94
95\unexpanded\def\startcharactertable[#1]#2\stopcharactertable % \dimexpr has fuzzy lookahead
96  {\startnointerference
97     \setgvalue{\??frequencywidths#1}{#2}% the width vector
98     \scratchcounter\zerocount \def\charfreq##1 ##2 {\advance\scratchcounter\plusone} #2%
99     \setxvalue{\??frequencycounts#1}{\the\scratchcounter}% the character count
100   \stopnointerference}
101
102%D \macros
103%D   {charactertable,charactertsize}
104%D
105%D The table content as well as the number of entries can be fetched with
106%D the following two macros. The architecture of the table and calling
107%D macro permits a fully expandable application.
108
109\def\charactertable#1%
110  {\csname\??frequencywidths\ifcsname\??frequencywidths#1\endcsname#1\else\s!en\fi\endcsname}
111
112\def\charactertsize#1%
113  {\csname\??frequencycounts\ifcsname\??frequencycounts#1\endcsname#1\else\s!en\fi\endcsname}
114
115%D Although it is of hardly any use, you can inherit a character table:
116%D
117%D \starttyping
118%D \startcharactertable[cz] \charactertable{en} \stopcharactertable
119%D \stoptyping
120
121\startcharactertable[en]
122    % empty
123\stopcharactertable % kind of default
124
125%D \macros
126%D   {averagecharwidth}
127%D
128%D This macro reports the average width for the current main
129%D language (\the \dimexpr (\averagecharwidth)).
130
131\def\averagecharwidth{\dimexpr(\zeropoint\charactertable\charwidthlanguage)/100\relax}
132
133\unexpanded\def\showcharfreq
134  {\hbox\bgroup
135     \charwidthlanguage:%
136     \dostepwiserecurse\zerocount\plusfour\plusone
137       {\chardef\charwidthmethod\recurselevel\relax
138        \enspace\recurselevel/\the\dimexpr(\averagecharwidth)}%
139   \egroup}
140
141%D Just for fun, we show a few frequency tables as graphic (\in {figure}
142%D [fig:charfreq]).
143%D
144%D \startbuffer
145%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue]
146%D
147%D \def\charfreq#1 #2 %
148%D   {\startMPdrawing
149%D      interim linejoin := butt ;
150%D      a := ASCII "#1" ;
151%D      if (a >= (ASCII "a")) and (a <= (ASCII "z")) :
152%D         draw ((0,#2*.25cm)--origin--(0,#2*.5cm))
153%D           shifted (a*4mm+o,0)
154%D           withpen pencircle scaled .5mm
155%D           withcolor c;
156%D      fi ;
157%D    \stopMPdrawing}
158%D
159%D \resetMPdrawing
160%D \startMPdrawing
161%D   numeric a, o ; a := o := 0 ;
162%D   color c ; c := .5white ;
163%D   string s ; s := "" ;
164%D \stopMPdrawing
165%D
166%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing
167%D \charactertable{en}
168%D
169%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing
170%D \charactertable{nl}
171%D
172%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing
173%D \charactertable{de}
174%D
175%D \startMPdrawing
176%D   for a := ASCII "a" upto ASCII "z" :
177%D     draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ;
178%D   endfor ;
179%D \stopMPdrawing
180%D
181%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing
182%D \stopbuffer
183%D
184%D \placefigure
185%D   [here]
186%D   [fig:charfreq]
187%D   {The character distributions for English, Dutch and German.}
188%D   {\getbuffer}
189%D
190%D A few samples of usage of this mechanism are shown below:
191%D
192%D \startbuffer
193%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
194%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
195%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
196%D \stopbuffer
197%D
198%D \typebuffer \getbuffer
199%D
200%D Although the widthts differ, the consequenes for breaking the paragraph
201%D into lines are minimal.
202
203%D \macros
204%D   {freezeaveragecharacterwidth}
205%D
206%D This macro can be used to make sure that the width does not change during a
207%D page break when another font is used.
208
209\let\normalaveragecharacterwidth\averagecharacterwidth
210
211\unexpanded\def\freezeaveragecharacterwidth % global
212  {\xdef\averagecharacterwidth{\dimexpr\the\normalaveragecharacterwidth\relax}}
213
214%D Example:
215%D
216%D \starttyping
217%D \input lang-frq.mkiv
218%D \input lang-frd.mkiv
219%D
220%D \setupbodyfont
221%D   [dejavu]
222%D
223%D \setemeasure{textwidth}{\the\dimexpr70\averagecharwidth}
224%D
225%D \setuplayout
226%D   [width=\measure{textwidth}]
227%D
228%D \showframe
229%D
230%D \starttext
231%D     \input ward
232%D \stoptext
233%D \stoptyping
234
235\protect \endinput
236