1%D \module 2%D [ file=lang-frq, 3%D version=2004.01.15, 4%D title=\CONTEXT\ Language Macros, 5%D subtitle=Frequency Tables, 6%D author=Hans Hagen, 7%D date=\currentdate, 8%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] 9%C 10%C This module is part of the \CONTEXT\ macro||package and is 11%C therefore copyrighted by \PRAGMA. See mreadme.pdf for 12%C details. 13 14\endinput 15 16\writestatus{loading}{ConTeXt Language Macros / Frequency Tables} 17 18%D Some day I might redo this \LUA. But anyway, who uses it. It's rather 19%D old code. 20 21\unprotect 22 23%M \usemodule[layout] 24 25%D \macros 26%D {charwidthmethod} 27%D 28%D This module implements a method for determining the width of an 29%D average character in a language. It uses the dimensions of the 30%D current fonts. 31%D 32%D \def\ShwChrWd#1#2#3% 33%D {\chardef\charwidthmethod#1\relax 34%D \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)} 35%D 36%D \starttabulate[|c|c|c|c|c|c|] 37%D \HL 38%D \NC \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR 39%D \HL 40%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR 41%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR 42%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR 43%D \HL 44%D \stoptabulate 45%D 46%D Method~1 ignores the widths and assumes that each character has a 47%D width of .5em, which is true for most monospaced fonts. Method~2 48%D takes the x as starting point, and assumes that it's height kind of 49%D matches its width. Method~3 is the best one, and determines the 50%D average width based on the language specific character table. 51%D Method~4 is a mixture between the first two methods: character 52%D specific widths applied to an equal distribution. Method~0 reports 53%D the total count, which normally is~100. 54 55\chardef\charwidthmethod=3 % 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex 56 57%D \macros 58%D {charwidthlanguage} 59%D 60%D The language used for the calculations is defined as: 61 62\def\charwidthlanguage{\currentmainlanguage} 63 64%D \macros 65%D {charfreq} 66%D 67%D This method comes into action in the following macro: 68 69\def\charfreq#1 #2 % character fraction 70 {+(\ifcase\charwidthmethod 71 #2\dimexpr100\onepoint\relax 72 \or 73 #2\dimexpr\emwidth/2\relax 74 \or 75 #2\dimexpr\exheight\relax 76 \or 77 #2\fontcharwd\font`#1% 78 \or 79 \dimexpr100\fontcharwd\font`#1/\charactertsize\charwidthlanguage\relax % ugly hack 80 \else 81 #2\dimexpr\exheight\relax 82 \fi)} 83 84%D \macros 85%D {startcharactertable} 86%D 87%D A frequency table is defined with the following macro. The \type 88%D {charfreq} macro is used in this table. 89 90\installcorenamespace{frequencywidths} 91\installcorenamespace{frequencycounts} 92 93\let\stopcharactertable\relax 94 95\unexpanded\def\startcharactertable[#1]#2\stopcharactertable % \dimexpr has fuzzy lookahead 96 {\startnointerference 97 \setgvalue{\??frequencywidths#1}{#2}% the width vector 98 \scratchcounter\zerocount \def\charfreq##1 ##2 {\advance\scratchcounter\plusone} #2% 99 \setxvalue{\??frequencycounts#1}{\the\scratchcounter}% the character count 100 \stopnointerference} 101 102%D \macros 103%D {charactertable,charactertsize} 104%D 105%D The table content as well as the number of entries can be fetched with 106%D the following two macros. The architecture of the table and calling 107%D macro permits a fully expandable application. 108 109\def\charactertable#1% 110 {\csname\??frequencywidths\ifcsname\??frequencywidths#1\endcsname#1\else\s!en\fi\endcsname} 111 112\def\charactertsize#1% 113 {\csname\??frequencycounts\ifcsname\??frequencycounts#1\endcsname#1\else\s!en\fi\endcsname} 114 115%D Although it is of hardly any use, you can inherit a character table: 116%D 117%D \starttyping 118%D \startcharactertable[cz] \charactertable{en} \stopcharactertable 119%D \stoptyping 120 121\startcharactertable[en] 122 % empty 123\stopcharactertable % kind of default 124 125%D \macros 126%D {averagecharwidth} 127%D 128%D This macro reports the average width for the current main 129%D language (\the \dimexpr (\averagecharwidth)). 130 131\def\averagecharwidth{\dimexpr(\zeropoint\charactertable\charwidthlanguage)/100\relax} 132 133\unexpanded\def\showcharfreq 134 {\hbox\bgroup 135 \charwidthlanguage:% 136 \dostepwiserecurse\zerocount\plusfour\plusone 137 {\chardef\charwidthmethod\recurselevel\relax 138 \enspace\recurselevel/\the\dimexpr(\averagecharwidth)}% 139 \egroup} 140 141%D Just for fun, we show a few frequency tables as graphic (\in {figure} 142%D [fig:charfreq]). 143%D 144%D \startbuffer 145%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue] 146%D 147%D \def\charfreq#1 #2 % 148%D {\startMPdrawing 149%D interim linejoin := butt ; 150%D a := ASCII "#1" ; 151%D if (a >= (ASCII "a")) and (a <= (ASCII "z")) : 152%D draw ((0,#2*.25cm)--origin--(0,#2*.5cm)) 153%D shifted (a*4mm+o,0) 154%D withpen pencircle scaled .5mm 155%D withcolor c; 156%D fi ; 157%D \stopMPdrawing} 158%D 159%D \resetMPdrawing 160%D \startMPdrawing 161%D numeric a, o ; a := o := 0 ; 162%D color c ; c := .5white ; 163%D string s ; s := "" ; 164%D \stopMPdrawing 165%D 166%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing 167%D \charactertable{en} 168%D 169%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing 170%D \charactertable{nl} 171%D 172%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing 173%D \charactertable{de} 174%D 175%D \startMPdrawing 176%D for a := ASCII "a" upto ASCII "z" : 177%D draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ; 178%D endfor ; 179%D \stopMPdrawing 180%D 181%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing 182%D \stopbuffer 183%D 184%D \placefigure 185%D [here] 186%D [fig:charfreq] 187%D {The character distributions for English, Dutch and German.} 188%D {\getbuffer} 189%D 190%D A few samples of usage of this mechanism are shown below: 191%D 192%D \startbuffer 193%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 194%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 195%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} 196%D \stopbuffer 197%D 198%D \typebuffer \getbuffer 199%D 200%D Although the widthts differ, the consequenes for breaking the paragraph 201%D into lines are minimal. 202 203%D \macros 204%D {freezeaveragecharacterwidth} 205%D 206%D This macro can be used to make sure that the width does not change during a 207%D page break when another font is used. 208 209\let\normalaveragecharacterwidth\averagecharacterwidth 210 211\unexpanded\def\freezeaveragecharacterwidth % global 212 {\xdef\averagecharacterwidth{\dimexpr\the\normalaveragecharacterwidth\relax}} 213 214%D Example: 215%D 216%D \starttyping 217%D \input lang-frq.mkiv 218%D \input lang-frd.mkiv 219%D 220%D \setupbodyfont 221%D [dejavu] 222%D 223%D \setemeasure{textwidth}{\the\dimexpr70\averagecharwidth} 224%D 225%D \setuplayout 226%D [width=\measure{textwidth}] 227%D 228%D \showframe 229%D 230%D \starttext 231%D \input ward 232%D \stoptext 233%D \stoptyping 234 235\protect \endinput 236 |