lang-frq.mkiv /size: 6935 b    last modification: 2020-07-01 14:35
1
%D \module
2
%D [ file=lang-frq,
3
%D version=2004.01.15,
4
%D title=\CONTEXT\ Language Macros,
5
%D subtitle=Frequency Tables,
6
%D author=Hans Hagen,
7
%D date=\currentdate,
8
%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
9
%C
10
%C This module is part of the \CONTEXT\ macro||package and is
11
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
12
%C details.
13 14
\endinput
15 16
\writestatus
{
loading
}{
ConTeXt
Language
Macros
/
Frequency
Tables
}
17 18
%D Some day I might redo this \LUA. But anyway, who uses it. It's rather
19
%D old code.
20 21
\unprotect
22 23
%M \usemodule[layout]
24 25
%D \macros
26
%D {charwidthmethod}
27
%D
28
%D This module implements a method for determining the width of an
29
%D average character in a language. It uses the dimensions of the
30
%D current fonts.
31
%D
32
%D \def\ShwChrWd#1#2#3%
33
%D {\chardef\charwidthmethod#1\relax
34
%D \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)}
35
%D
36
%D \starttabulate[|c|c|c|c|c|c|]
37
%D \HL
38
%D \NC \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR
39
%D \HL
40
%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR
41
%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR
42
%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR
43
%D \HL
44
%D \stoptabulate
45
%D
46
%D Method~1 ignores the widths and assumes that each character has a
47
%D width of .5em, which is true for most monospaced fonts. Method~2
48
%D takes the x as starting point, and assumes that it's height kind of
49
%D matches its width. Method~3 is the best one, and determines the
50
%D average width based on the language specific character table.
51
%D Method~4 is a mixture between the first two methods: character
52
%D specific widths applied to an equal distribution. Method~0 reports
53
%D the total count, which normally is~100.
54 55
\chardef
\charwidthmethod
=
3
% 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex
56 57
%D \macros
58
%D {charwidthlanguage}
59
%D
60
%D The language used for the calculations is defined as:
61 62
\def
\charwidthlanguage
{
\currentmainlanguage
}
63 64
%D \macros
65
%D {charfreq}
66
%D
67
%D This method comes into action in the following macro:
68 69
\def
\charfreq
#
1
#
2
% character fraction
70
{
+
(
\ifcase
\charwidthmethod
71
#
2
\dimexpr
1
0
0
\onepoint
\relax
72
\or
73
#
2
\dimexpr
\emwidth
/
2
\relax
74
\or
75
#
2
\dimexpr
\exheight
\relax
76
\or
77
#
2
\fontcharwd\font
`
#
1
%
78
\or
79
\dimexpr
1
0
0
\fontcharwd\font
`
#
1
/
\charactertsize
\charwidthlanguage
\relax
% ugly hack
80
\else
81
#
2
\dimexpr
\exheight
\relax
82
\fi
)
}
83 84
%D \macros
85
%D {startcharactertable}
86
%D
87
%D A frequency table is defined with the following macro. The \type
88
%D {charfreq} macro is used in this table.
89 90
\installcorenamespace
{
frequencywidths
}
91
\installcorenamespace
{
frequencycounts
}
92 93
\let
\stopcharactertable
\relax
94 95
\unexpanded
\def
\startcharactertable
[#
1
]#
2
\stopcharactertable
% \dimexpr has fuzzy lookahead
96
{
\startnointerference
97
\setgvalue
{
\??frequencywidths
#
1
}{
#
2
}
% the width vector
98
\scratchcounter
\zerocount
\def
\charfreq
##
1
##
2
{
\advance
\scratchcounter
\plusone
}
#
2
%
99
\setxvalue
{
\??frequencycounts
#
1
}{
\the
\scratchcounter
}
% the character count
100
\stopnointerference
}
101 102
%D \macros
103
%D {charactertable,charactertsize}
104
%D
105
%D The table content as well as the number of entries can be fetched with
106
%D the following two macros. The architecture of the table and calling
107
%D macro permits a fully expandable application.
108 109
\def
\charactertable
#
1
%
110
{
\csname
\??frequencywidths
\ifcsname
\??frequencywidths
#
1
\endcsname
#
1
\else
\s!en
\fi\endcsname
}
111 112
\def
\charactertsize
#
1
%
113
{
\csname
\??frequencycounts
\ifcsname
\??frequencycounts
#
1
\endcsname
#
1
\else
\s!en
\fi\endcsname
}
114 115
%D Although it is of hardly any use, you can inherit a character table:
116
%D
117
%D \starttyping
118
%D \startcharactertable[cz] \charactertable{en} \stopcharactertable
119
%D \stoptyping
120 121
\startcharactertable
[
en
]
122
% empty
123
\stopcharactertable
% kind of default
124 125
%D \macros
126
%D {averagecharwidth}
127
%D
128
%D This macro reports the average width for the current main
129
%D language (\the \dimexpr (\averagecharwidth)).
130 131
\def
\averagecharwidth
{
\dimexpr
(
\zeropoint
\charactertable
\charwidthlanguage
)
/
1
0
0
\relax
}
132 133
\unexpanded
\def
\showcharfreq
134
{
\hbox
\bgroup
135
\charwidthlanguage
:
%
136
\dostepwiserecurse
\zerocount\plusfour\plusone
137
{
\chardef
\charwidthmethod
\recurselevel
\relax
138
\enspace\recurselevel
/
\the\dimexpr
(
\averagecharwidth
)
}
%
139
\egroup
}
140 141
%D Just for fun, we show a few frequency tables as graphic (\in {figure}
142
%D [fig:charfreq]).
143
%D
144
%D \startbuffer
145
%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue]
146
%D
147
%D \def\charfreq#1 #2 %
148
%D {\startMPdrawing
149
%D interim linejoin := butt ;
150
%D a := ASCII "#1" ;
151
%D if (a >= (ASCII "a")) and (a <= (ASCII "z")) :
152
%D draw ((0,#2*.25cm)--origin--(0,#2*.5cm))
153
%D shifted (a*4mm+o,0)
154
%D withpen pencircle scaled .5mm
155
%D withcolor c;
156
%D fi ;
157
%D \stopMPdrawing}
158
%D
159
%D \resetMPdrawing
160
%D \startMPdrawing
161
%D numeric a, o ; a := o := 0 ;
162
%D color c ; c := .5white ;
163
%D string s ; s := "" ;
164
%D \stopMPdrawing
165
%D
166
%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing
167
%D \charactertable{en}
168
%D
169
%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing
170
%D \charactertable{nl}
171
%D
172
%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing
173
%D \charactertable{de}
174
%D
175
%D \startMPdrawing
176
%D for a := ASCII "a" upto ASCII "z" :
177
%D draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ;
178
%D endfor ;
179
%D \stopMPdrawing
180
%D
181
%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing
182
%D \stopbuffer
183
%D
184
%D \placefigure
185
%D [here]
186
%D [fig:charfreq]
187
%D {The character distributions for English, Dutch and German.}
188
%D {\getbuffer}
189
%D
190
%D A few samples of usage of this mechanism are shown below:
191
%D
192
%D \startbuffer
193
%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
194
%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
195
%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank}
196
%D \stopbuffer
197
%D
198
%D \typebuffer \getbuffer
199
%D
200
%D Although the widthts differ, the consequenes for breaking the paragraph
201
%D into lines are minimal.
202 203
%D \macros
204
%D {freezeaveragecharacterwidth}
205
%D
206
%D This macro can be used to make sure that the width does not change during a
207
%D page break when another font is used.
208 209
\let
\normalaveragecharacterwidth
\averagecharacterwidth
210 211
\unexpanded
\def
\freezeaveragecharacterwidth
% global
212
{
\xdef
\averagecharacterwidth
{
\dimexpr\the
\normalaveragecharacterwidth
\relax
}}
213 214
%D Example:
215
%D
216
%D \starttyping
217
%D \input lang-frq.mkiv
218
%D \input lang-frd.mkiv
219
%D
220
%D \setupbodyfont
221
%D [dejavu]
222
%D
223
%D \setemeasure{textwidth}{\the\dimexpr70\averagecharwidth}
224
%D
225
%D \setuplayout
226
%D [width=\measure{textwidth}]
227
%D
228
%D \showframe
229
%D
230
%D \starttext
231
%D \input ward
232
%D \stoptext
233
%D \stoptyping
234 235
\protect
\endinput
236