1%D \module 2%D [ file=lang-frq, 3%D version=2004.01.15, 4%D title=\CONTEXT\ Language Macros, 5%D subtitle=Frequency Tables, 6%D author=Hans Hagen, 7%D date=\currentdate, 8%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] 9%C 10%C This module is part of the \CONTEXT\ macro||package and is 11%C therefore copyrighted by \PRAGMA. See mreadme.pdf for 12%C details. 13 14\endinput 15 16\writestatus{loading}{ConTeXt Language Macros / Frequency Tables} 17 18%D Some day I might redo this \LUA. But anyway, who uses it. It's rather old code. 19 20\unprotect 21 22%M \usemodule[layout] 23 24%D \macros 25%D {charwidthmethod} 26%D 27%D This module implements a method for determining the width of an 28%D average character in a language. It uses the dimensions of the 29%D current fonts. 30%D 31%D \def\ShwChrWd#1#2#3% 32%D {\chardef\charwidthmethod#1\relax 33%D \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)} 34%D 35%D \starttabulate[|c|c|c|c|c|c|] 36%D \HL 37%D \NC \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR 38%D \HL 39%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR 40%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR 41%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR 42%D \HL 43%D \stoptabulate 44%D 45%D Method~1 ignores the widths and assumes that each character has a 46%D width of .5em, which is true for most monospaced fonts. Method~2 47%D takes the x as starting point, and assumes that it's height kind of 48%D matches its width. Method~3 is the best one, and determines the 49%D average width based on the language specific character table. 50%D Method~4 is a mixture between the first two methods: character 51%D specific widths applied to an equal distribution. Method~0 reports 52%D the total count, which normally is~100. 53 54\setnewconstant\charwidthmethod=3 % 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex 55 56%D \macros 57%D {charwidthlanguage} 58%D 59%D The language used for the calculations is defined as: 60 61\def\charwidthlanguage{\currentmainlanguage} 62 63%D \macros 64%D {charfreq} 65%D 66%D This method comes into action in the following macro: 67 68\def\charfreq#1 #2 % character fraction 69 {+(\ifcase\charwidthmethod 70 #2\dimexpr100\onepoint\relax 71 \or 72 #2\dimexpr\emwidth/2\relax 73 \or 74 #2\dimexpr\exheight\relax 75 \or 76 #2\scaledfontcharwd\font`#1% 77 \or 78 \dimexpr100\scaledfontcharwd\font`#1/\charactertsize\charwidthlanguage\relax % ugly hack 79 \else 80 #2\dimexpr\exheight\relax 81 \fi)} 82 83%D \macros 84%D {startcharactertable} 85%D 86%D A frequency table is defined with the following macro. The \type 87%D {charfreq} macro is used in this table. 88 89\installcorenamespace{frequencywidths} 90\installcorenamespace{frequencycounts} 91 92\aliased\let\stopcharactertable\relax 93 94\permanent\protected\def\startcharactertable[#1]#2\stopcharactertable % \dimexpr has fuzzy lookahead 95 {\startnointerference 96 \gdefcsname\??frequencywidths#1\endcsname{#2}% the width vector 97 \scratchcounter\zerocount \def\charfreq##1 ##2 {\advanceby\scratchcounter\plusone} #2% 98 \xdefcsname\??frequencycounts#1\endcsname{\the\scratchcounter}% the character count 99 \stopnointerference} 100 101%D \macros 102%D {charactertable,charactertsize} 103%D 104%D The table content as well as the number of entries can be fetched with 105%D the following two macros. The architecture of the table and calling 106%D macro permits a fully expandable application. 107 108\permanent\def\charactertable#1% 109 {\csname\??frequencywidths\ifcsname\??frequencywidths#1\endcsname#1\else\s!en\fi\endcsname} 110 111\permanent\def\charactertsize#1% 112 {\csname\??frequencycounts\ifcsname\??frequencycounts#1\endcsname#1\else\s!en\fi\endcsname} 113 114%D Although it is of hardly any use, you can inherit a character table: 115%D 116%D \starttyping 117%D \startcharactertable[cz] \charactertable{en} \stopcharactertable 118%D \stoptyping 119 120\startcharactertable[en] 121 % empty 122\stopcharactertable % kind of default 123 124%D \macros 125%D {averagecharwidth} 126%D 127%D This macro reports the average width for the current main 128%D language (\the \dimexpr (\averagecharwidth)). 129 130\permanent\def\averagecharwidth{\dimexpr(\zeropoint\charactertable\charwidthlanguage)/100\relax} 131 132\permanent\protected\def\showcharfreq 133 {\hbox\bgroup 134 \charwidthlanguage:% 135 \dostepwiserecurse\zerocount\plusfour\plusone 136 {\setconstant\charwidthmethod\recurselevel\relax 137 \enspace\recurselevel/\todimension{\averagecharwidth}}% 138 \egroup} 139 140%D Just for fun, we show a few frequency tables as graphic (\in {figure} 141%D [fig:charfreq]). 142%D 143%D \startbuffer 144%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue] 145%D 146%D \def\charfreq#1 #2 % 147%D {\startMPdrawing 148%D interim linejoin := butt ; 149%D a := ASCII "#1" ; 150%D if (a >= (ASCII "a")) and (a <= (ASCII "z")) : 151%D draw ((0,#2*.25cm)--origin--(0,#2*.5cm)) 152%D shifted (a*4mm+o,0) 153%D withpen pencircle scaled .5mm 154%D withcolor c; 155%D fi ; 156%D \stopMPdrawing} 157%D 158%D \resetMPdrawing 159%D \startMPdrawing 160%D numeric a, o ; a := o := 0 ; 161%D color c ; c := .5white ; 162%D string s ; s := "" ; 163%D \stopMPdrawing 164%D 165%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing 166%D \charactertable{en} 167%D 168%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing 169%D \charactertable{nl} 170%D 171%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing 172%D \charactertable{de} 173%D 174%D \startMPdrawing 175%D for a := ASCII "a" upto ASCII "z" : 176%D draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ; 177%D endfor ; 178%D \stopMPdrawing 179%D 180%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing 181%D \stopbuffer 182%D 183%D \placefigure 184%D [here] 185%D [fig:charfreq] 186%D {The character distributions for English, Dutch and German.} 187%D {\getbuffer} 188%D 189%D A few samples of usage of this mechanism are shown below: 190%D 191%D \startbuffer 192%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 193%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 194%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 195%D \stopbuffer 196%D 197%D \typebuffer \getbuffer 198%D 199%D Although the widthts differ, the consequenes for breaking the paragraph 200%D into lines are minimal. 201 202%D \macros 203%D {freezeaveragecharacterwidth} 204%D 205%D This macro can be used to make sure that the width does not change during a 206%D page break when another font is used. 207 208\aliased\let\normalaveragecharacterwidth\averagecharacterwidth 209 210\permanent\def\freezeaveragecharacterwidth % global 211 {\enforced\xdef\averagecharacterwidth{\dimexpr\the\normalaveragecharacterwidth\relax}} 212 213%D Example: 214%D 215%D \starttyping 216%D \input lang-frq.mkiv 217%D \input lang-frd.mkiv 218%D 219%D \setupbodyfont 220%D [dejavu] 221%D 222%D \setemeasure{textwidth}{\the\dimexpr70\averagecharwidth} 223%D 224%D \setuplayout 225%D [width=\measure{textwidth}] 226%D 227%D \showframe 228%D 229%D \starttext 230%D \input ward 231%D \stoptext 232%D \stoptyping 233 234\protect \endinput 235 |