context-2020-tokens.tex /size: 9417 b    last modification: 2021-10-28 13:50
1% language=us
2
3\usemodule[present-boring,abbreviations-logos,system-tokens]
4
5\startdocument
6  [title={TOKENS},
7   banner={tokens as I see them},
8   location={context\enspace {\bf 2020}\enspace meeting}]
9
10\starttitle[title=About tokens]
11
12\startitemize
13
14\startitem Like nodes, it's a common term used in programming. \stopitem
15\startitem In \TEX\ The Program tokens and nodes are therefore omni|-|present. \stopitem
16\startitem For most users they are irrelevant concepts. \stopitem
17\startitem But we will explain them anyway. \stopitem
18\startitem Let's try to avoid the snobbish token|-|speak sometimes heard in the community. \stopitem
19\startitem So \unknown\ I won't correct you as long as you don't correct me. \stopitem
20\startitem Let's now enter the world of tokens in the na\"ive way. \stopitem
21
22\stopitemize
23
24\stoptitle
25
26\starttitle[title=What are tokens]
27
28\startitemize
29
30\startitem It is an internal data structure, effectively a (32 bit) integer. \stopitem
31\startitem This integer encodes a command (opcode) and an char code (operand). \stopitem
32\startitem But often it's not a character but more a sub command. \stopitem
33\startitem Input is converted into tokens. \stopitem
34\startitem Tokens are either expanded (interpreted) or stored. \stopitem
35\startitem When they are stored they are part of a larger data structure, a memory word. \stopitem
36\startitem Token memory is an array of such memory words. \stopitem
37\startitem The token memory \quote {word} has two integers: a token value and an index into token memory. \stopitem
38\startitem That way \TEX\ can have forward linked lists of tokens. \stopitem
39\startitem A hash table maps control sequences onto indices into token memory. \stopitem
40
41\stopitemize
42
43\stoptitle
44
45\starttitle[title=Some implementation details]
46
47\startitemize
48
49\startitem Sometimes there is special head token at the start. \stopitem
50\startitem A head token makes for easier appending of extra tokens. \stopitem
51\startitem Shared lists use the head node for a reference count. \stopitem
52\startitem Original \TEX\ uses global temporary lists. \stopitem
53\startitem This is needed when we expand (nested) and need to report issues. \stopitem
54\startitem This is not needed when we just serialize (which we do a lot in \LUATEX). \stopitem
55\startitem So, this is all optimized for performance and memory consumption. \stopitem
56\startitem Freed tokens are collected in a cache so tokens can get scattered. \stopitem
57\startitem In \LUAMETATEX\ we stay as close to original \TEX\ as possible. \stopitem
58\startitem But the \LUA\ interfaces force us to occasionally divert. \stopitem
59
60\stopitemize
61
62\stoptitle
63
64\starttitle[title=A schematic view of tokens]
65
66A token value:
67
68\startlinecorrection[blank]
69    \setupTABLE[each][align=middle]
70    \setupTABLE[c][1][width=22mm]
71    \setupTABLE[c][2][width=42mm]
72    \bTABLE
73        \bTR \bTD cmd \eTD \bTD chr \eTD \eTR
74    \eTABLE
75\stoplinecorrection
76
77Token memory:
78
79\startlinecorrection[blank]
80    \setupTABLE[each][align=middle]
81    \setupTABLE[c][1][width=8mm]
82    \setupTABLE[c][2][width=64mm]
83    \setupTABLE[c][3][width=64mm]
84    \bTABLE
85        \bTR \bTD 1 \eTD \bTD info \eTD \bTD link \eTD \eTR
86        \bTR \bTD 2 \eTD \bTD info \eTD \bTD link \eTD \eTR
87        \bTR \bTD 3 \eTD \bTD info \eTD \bTD link \eTD \eTR
88        \bTR \bTD n \eTD \bTD info \eTD \bTD link \eTD \eTR
89    \eTABLE
90\stoplinecorrection
91
92\stoptitle
93
94\starttitle[title=Looking up control sequences]
95
96\startitemize
97
98\startitem A very visible to-be-token is a \type {\controlsequence}. \stopitem
99\startitem When read, the name will be looked up in the hash table. \stopitem
100\startitem When found its value will point to the table of equivalents. \stopitem
101\startitem That table keeps track of:
102    \startitemize
103        \startitem the type (cmd) \stopitem
104        \startitem the current level (grouping) \stopitem
105        \startitem the current meaning (token list) \stopitem
106    \stopitemize
107\stopitem
108\stopitemize
109
110\stoptitle
111
112\starttitle[title=The (big) table of equivalents (simplified)]
113
114\startlinecorrection[blank]
115    \bTABLE
116        \bTR \bTD[ny=4] main hash \eTD \bTD null control sequence              \eTD \eTR
117        \bTR                           \bTD 128K hash entries                  \eTD \eTR
118        \bTR                           \bTD frozen control sequences           \eTD \eTR
119        \bTR                           \bTD special sequences (undefined)      \eTD \eTR
120        \bTR \bTD[ny=7] registers \eTD \bTD  17 internal & 64K user glues      \eTD \eTR
121        \bTR                           \bTD   4 internal & 64K user mu glues   \eTD \eTR
122        \bTR                           \bTD  12 internal & 64K user tokens     \eTD \eTR
123        \bTR                           \bTD   2 internal & 64K user boxes      \eTD \eTR
124        \bTR                           \bTD 116 internal & 64K user integers   \eTD \eTR
125        \bTR                           \bTD   0 internal & 64K user attribute  \eTD \eTR
126        \bTR                           \bTD  22 internal & 64K user dimensions \eTD \eTR
127        \bTR \bTD specifications  \eTD \bTD   5 internal &   0 user            \eTD \eTR
128        \bTR \bTD extra hash      \eTD \bTD additional entries (grows dynamic) \eTD \eTR
129    \eTABLE
130\stoplinecorrection
131
132\stoptitle
133
134\starttitle[title=The hash table (simplified)]
135
136The hash table runs parallel to the main hash. On the todo list is is to move the
137registers to its own tables and make them dynamic.
138
139\startlinecorrection[blank]
140    \setupTABLE[each][align=middle]
141    \setupTABLE[c][1][width=16mm]
142    \setupTABLE[c][2][width=64mm]
143    \setupTABLE[c][3][width=64mm]
144    \bTABLE
145        \bTR \bTD 1     \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
146        \bTR \bTD 2     \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
147        \bTR \bTD n     \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
148        \bTR \bTD n + 1 \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
149        \bTR \bTD n + 2 \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
150        \bTR \bTD n + m \eTD \bTD string index \eTD \bTD equivalents or (next > n) index \eTD \eTR
151    \eTABLE
152\stoplinecorrection
153
154Equivalents (registers direct, macros indirect i.e.\ token lists):
155
156\startlinecorrection[blank]
157    \setupTABLE[each][align=middle]
158    \setupTABLE[c][1][width=8mm]
159    \setupTABLE[c][2][width=32mm]
160    \setupTABLE[c][3][width=32mm]
161    \setupTABLE[c][4][width=64mm]
162    \bTABLE
163        \bTR \bTD 1 \eTD \bTD level \eTD \bTD type \eTD \bTD value \eTD \eTR
164        \bTR \bTD 2 \eTD \bTD level \eTD \bTD type \eTD \bTD value \eTD \eTR
165        \bTR \bTD 3 \eTD \bTD level \eTD \bTD type \eTD \bTD value \eTD \eTR
166        \bTR \bTD n \eTD \bTD level \eTD \bTD type \eTD \bTD value \eTD \eTR
167    \eTABLE
168\stoplinecorrection
169
170\stoptitle
171
172\starttitle[title=Other data management]
173
174\startitemize
175\startitem Grouping is handled by a nesting stack. \stopitem
176\startitem Nested conditionals (\type {\if...}) have their own stack. \stopitem
177\startitem The values before assignments are saved on the save stack. \stopitem
178\startitem Also other local changes (housekeeping) ends up in the save stack. \stopitem
179\startitem Token lists and macro aliases have references pointers (reuse). \stopitem
180\startitem Attributes, being linked node lists, have their own management. \stopitem
181\stopitemize
182
183\stoptitle
184
185\starttitle[title=Example 1: in the input]
186
187\startbuffer
188\luatokentable{1 \bf{2} 3\what {!}}
189\stopbuffer
190
191\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer}
192
193\stoptitle
194
195\starttitle[title=Example 2: in the input]
196
197\startbuffer
198\luatokentable{a \the\scratchcounter b \the\parindent \hbox to 10pt{x}}
199\stopbuffer
200
201\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer}
202
203\stoptitle
204
205\starttitle[title=Example 3: user registers]
206
207\startbuffer
208\scratchtoks{foo \framed{\red 123}456}
209
210\luatokentable\scratchtoks
211\stopbuffer
212
213\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer}
214
215\stoptitle
216
217\starttitle[title=Example 4: internal variables]
218
219\startbuffer
220\luatokentable\everypar
221\stopbuffer
222
223\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer}
224
225\stoptitle
226
227\starttitle[title=Example 5: macro definitions]
228
229\startbuffer
230\protected\def\whatever#1[#2](#3)\relax{oeps #1 and #2 & #3 done ## error}
231
232\luatokentable\whatever
233\stopbuffer
234
235\typebuffer \blank[line] {\switchtobodyfont[8pt] \startcolumns \getbuffer \stopcolumns}
236
237\stoptitle
238
239\starttitle[title=Example 6: commands]
240
241\startbuffer
242\luatokentable\startitemize
243\stopbuffer
244
245\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer}
246
247\stoptitle
248
249\starttitle[title=Example 7: commands]
250
251\startbuffer
252\luatokentable\doifelse
253\stopbuffer
254
255\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer }
256
257\stoptitle
258
259\starttitle[title=Example 8: nothing]
260
261\startbuffer
262\luatokentable\relax
263\stopbuffer
264
265\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer }
266
267\stoptitle
268
269\starttitle[title=Example 9: hashes]
270
271\startbuffer
272\edef\foo#1#2{(#1)(\letterhash)(#2)}  \luatokentable\foo
273\stopbuffer
274
275\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer }
276
277\stoptitle
278
279\starttitle[title=Example 10: nesting]
280
281\startbuffer
282\def\foo#1{\def\foo##1{(#1)(##1)}}  \luatokentable\foo
283\stopbuffer
284
285\typebuffer \blank[line] {\switchtobodyfont[8pt] \getbuffer }
286
287\stoptitle
288
289\stopdocument
290