% language=us runpath=texruns:manuals/luametatex

% \showenginevalues{alignmentcontextvalues}
% \showenginevalues{appendlinecontextvalues}
% \showenginevalues{automigrationvalues}
% \showenginevalues{autoparagraphvalues}
% \showenginevalues{getbalancestepoptionvalues}
% \showenginevalues{getbalancecallbackvalues}
% \showenginevalues{breakcontextvalues}
% \showenginevalues{buildcontextvalues}
% \showenginevalues{charactercontrolvalues}
% \showenginevalues{charactertagvalues}
% \showenginevalues{doublescriptoptionvalues}
% \showenginevalues{discpartvalues}
% \showenginevalues{glyphdiscvalues}
% \showenginevalues{fillvalues}
% \showenginevalues{flagvalues}
% \showenginevalues{groupvalues}
% \showenginevalues{hyphenationvalues}
% \showenginevalues{iovalues}
% \showenginevalues{kerneloptionvalues}
% \showenginevalues{linebreakstatevalues}
% \showenginevalues{mathclassoptionvalues}
% \showenginevalues{mathcontrolvalues}
% \showenginevalues{mathgluevalues}
% \showenginevalues{mathoptionvalues}
% \showenginevalues{mathparametervalues}
% \showenginevalues{mathscriptordervalues}
% \showenginevalues{mathscriptsmodevalues}
% \showenginevalues{mathsurroundvalues}
% \showenginevalues{mathvariantpresets}
% \showenginevalues{mathvariantvalues}
% \showenginevalues{modevalues}
% \showenginevalues{mvloptionvalues}
% \showenginevalues{normalizelinevalues}
% \showenginevalues{normalizeparvalues}
% \showenginevalues{packtypevalues}
% \showenginevalues{pagecontextvalues}
% \showenginevalues{parametermodevalues}
% \showenginevalues{parcontextvalues}
% \showenginevalues{parmodevalues}
% \showenginevalues{partriggervalues}
% \showenginevalues{prepoststatevalues}
% \showenginevalues{protrusionboundaryvalues}
% \showenginevalues{runstatevalues}
% \showenginevalues{shapingpenaltiesvalues}
% \showenginevalues{specialmathclassvalues}
% \showenginevalues{specificationoptionvalues}
% \showenginevalues{textcontrolvalues}
% \showenginevalues{uleaderlocationvalues}
% \showenginevalues{unitclassvalues}

%  function node.direct.addmargins
%  function node.direct.addxoffset
%  function node.direct.addxymargins
%  function node.direct.addyoffset

%  function node.direct.appendaftertail
%  function node.direct.appendtocurrentlist
%  function node.direct.prependbeforehead

%  function node.direct.collapsing

%  function node.direct.copyonly

%  function node.direct.effectiveglue

%  function node.direct.exchange

%  function node.direct.findattributerange
%  function node.direct.firstchar
%  function node.direct.firstglyph
%  function node.direct.firstitalicglyph

%  function node.direct.flushnode

%  function node.direct.getanchors

%  function node.direct.getattribute
%  function node.direct.getattributelist
%  function node.direct.getattributes
%
%  function node.direct.setattributelist
%  function node.direct.setattributes
%
%  function node.direct.unsetattributes
%  function node.direct.patchattributes
%  function node.direct.getusedattributes
%  function node.direct.currentattributes

%  function node.direct.getbox
%
%  function node.direct.setbox

%  function node.direct.getchar
%  function node.direct.getchardict
%  function node.direct.getcharspec
%
%  function node.direct.setchar
%  function node.direct.setchardict

%  function node.direct.getchoice
%
%  function node.direct.setchoice

%  function node.direct.getclass
%  function node.direct.setclass

%  function node.direct.getcontrol
%  function node.direct.setcontrol

%  function node.direct.getcornerkerns

%  function node.direct.getdata
%  function node.direct.setdata

%  function node.direct.getdegree
%  function node.direct.setdegree

%  function node.direct.getdelimiter
%  function node.direct.setdelimiter

%  function node.direct.setdirection
%  function node.direct.getdirection

%  function node.direct.getexcept
%  function node.direct.setexcept

%  ....
%
%  function node.direct.setfam

%  function node.direct.getfield
%  function node.direct.setfield

%  function node.direct.getfont
%  function node.direct.setfont

%  function node.direct.getgeometry
%  function node.direct.setgeometry

%  function node.direct.getglue
%  function node.direct.setglue

%  function node.direct.getglyphdata
%  function node.direct.setglyphdata

%  function node.direct.getid
%  function node.direct.getidsubtype

%  function node.direct.getindex
%  function node.direct.setindex

%  function node.direct.getinputfields
%  function node.direct.setinputfields

%  function node.direct.getkern
%  function node.direct.setkern

%  function node.direct.getglyphdimensions
%  function node.direct.getruledimensions
%  function node.direct.getlistdimensions
%  function node.direct.getkerndimension
%
%  function node.direct.setruledimensions

%  function node.direct.getlanguage
%  function node.direct.setlanguage

%  function node.direct.getnodes

%  function node.direct.getnormalizedline

%  function node.direct.getnumerator
%  function node.direct.setnumerator

%  function node.direct.getdenominator
%  function node.direct.setdenominator

%  function node.direct.getoffsets
%  function node.direct.setoffsets

%  function node.direct.getoptions
%  function node.direct.setoptions

%  function node.direct.getorientation
%  function node.direct.setorientation

%  function node.direct.getparstate

%  function node.direct.getpenalty
%  function node.direct.setpenalty

%  function node.direct.getpropertiestable

%  function node.direct.getfam

%  function node.direct.getrightdelimiter
%  function node.direct.getleftdelimiter
%  function node.direct.gettopdelimiter
%  function node.direct.getbottomdelimiter
%
%  function node.direct.setbottomdelimiter
%  function node.direct.setrightdelimiter
%  function node.direct.setleftdelimiter
%  function node.direct.settopdelimiter

%  function node.direct.getscript
%  function node.direct.setscript

%  function node.direct.getspeciallist
%  function node.direct.setspeciallist

%  function node.direct.getstate (n,[state])
%
%  function node.direct.setstate

%  function node.direct.getscale
%  function node.direct.getscales
%
%  function node.direct.setscales

%  function node.direct.getsubtype
%
%  function node.direct.setsubtype

%  function node.direct.getbottom
%  function node.direct.gettop
%
%  function node.direct.setbottom
%  function node.direct.settop

%  function node.direct.getwordrange

%  function node.direct.getxscale
%  function node.direct.getxyscales
%  function node.direct.getyscale

%  function node.direct.hasattribute
%  function node.direct.hasdimensions
%  function node.direct.hasdiscoption
%  function node.direct.hasgeometry
%  function node.direct.hasglyphoption

%  function node.direct.hyphenating

%  function node.direct.ignoremathskip

%  function node.direct.isboth
%  function node.direct.ischar
%  function node.direct.isdirect
%  function node.direct.isglyph
%  function node.direct.isitalicglyph
%  function node.direct.isnext
%  function node.direct.isnextchar
%  function node.direct.isnextglyph
%  function node.direct.isnode
%  function node.direct.isprev
%  function node.direct.isprevchar
%  function node.direct.isprevglyph
%  function node.direct.issimilarglyph
%  function node.direct.isspeciallist
%  function node.direct.isvalid

%  function node.direct.lastnode

%  function node.direct.makeextensible

%  function node.direct.migrate

%  function node.direct.naturalhsize

%  function node.direct.newcontinuationatom

%  function node.direct.newmathglyph
%  function node.direct.newtextglyph

%  function node.direct.patchparshape

%  function node.direct.protectglyphsnone

%  function node.direct.removefromlist

%  function node.direct.repack
%  function node.direct.reverse

%  function node.direct.serialized
%  function node.direct.setanchors

%  function node.direct.setlink

%  function node.direct.setsplit

%  function node.direct.settotal

%  function node.direct.show

%  function node.direct.softenhyphens

%  function node.direct.startofpar

%  function node.direct.tostring
%  function node.direct.tovaliddirect

%  function node.direct.traverseitalic
%  function node.direct.traverseleader

%  function node.direct.usedlist
%  function node.direct.usesfont

%  function node.direct.verticalbreak

%  function node.direct.xscaled
%  function node.direct.yscaled

%  function node.direct.isloop -- undocumented

%  node.shared.size
%  node.shared.id
%  node.shared.getcachestate

%  node.hybrid.gluetostring

\environment luametatex-style

\startdocument[title=Nodes]

\startsection[title={Introduction}]

The (to be) typeset content is collected in a double linked list of so called
nodes. A node is an array of values. When looked at from the \LUA\ end you can
either seen them as \type [option=LUA] {<t:userdata>} or as \type [option=LUA]
{<t:integer>}. In the case of userdata you access fields like this:

\starttyping [option=LUA]
local width = foo.width -- foo is userdata
\stoptyping

while the indexed variant uses:

\starttyping [option=LUA]
local width = nodes.direct.getwidth(foo) -- foo is an integer
\stoptyping

In \CONTEXT\ we mostly use the second variant but it's a matter of taste so users
can you whatever they like most. When you print a userdata node you see something
like this:

\startlines
\startluacode
context.type(tostring(node.new("glyph"))) context.par()
context.type(tostring(node.new("hlist"))) context.par()
context.type(tostring(node.new("glue" ))) context.par()
\stopluacode
\stoplines

The number in the middle is the one you would also see if you use the indexed
approach and often these numbers are kind of large. A number \type {13295}
doesn't mean that we have that many nodes. The engine has a large array of memory
words (pairs of 32 bit integers) and a node is a slice of then with the index
pointing to where we start. So, if we have a node that has 5 value pairs, the
slice runs from \type {13295} upto \type {13299} that consume 40 bytes.

In this chapter we introduce the nodes that are exposed to the user. We will
discuss the relevant fields as well as ways to access them. Because there are
similar fields in different nodes, we can share accessors.

It is important to notice that not all fields that can be accessed (set and get)
are under full user control. For instance, in math we have a \type {noad} type
that is actually shared between several construct (like atoms, accents and
fences) and not all parameters make sense for each of them. Some properties are
set while the formula is assembled. It fits in the \LUAMETATEX\ concept to open
up everything but abusing this can lead to side effects. It makes no sense to add
all kind of safeguards against wrong or unintended usage because in the end only
a few users will go that low level anyway.

Not all fields mentioned are accessible in the userdata variant. It is also good
to notice that some fields are fabricated, for instance \type {total} is the sum
of \type {height} and \type {depth}.

\stopsection

\startsection[title={\LUA\ node representation}]

As mentioned, nodes are represented in \LUA\ as user data objects with a variable
set of fields or by a numeric identifier when requested and we showed that when
you print a node user data object you will see these numbers.

\startfourrows
\ctxlua{moduledata.node.codes("types")}
\stopfourrows

You can ask for a list of fields with \type {node.fields} and for valid subtypes
with \type {node.subtypes}. There are plenty specific field values and you can
some idea about them by calling \type {tex.get*values()} which returns a table if
numbers (exclusive numbers or bits). We use these to get the tables that are
shown with each node type.

There are a lot of helpers and below we show them per node type. In later
sections some will come back organized by type of usage. Trivial getters and
setters will not be discussed. It's good to know that some getters take more
arguments where the second one can for instance trigger more return values. The
number of arguments to a setter can also be more than a few. As with everything
\LUAMETATEX\ the \CONTEXT\ sources can also be seen as  a reference.

\stopsection

\startsection[title={Main text nodes}]

These are the nodes that comprise actual typesetting commands. A few fields are
present in all nodes regardless of their type, these are: \type {next}, \type
{id} and \type {subtype}. The \type {subtype} is sometimes just a dummy entry
because not all nodes actually use the \type {subtype}, but this way you can be
sure that all nodes accept it as a valid field name, and that is often handy in
node list traversal. In the following tables \type {next} and \type {id} are not
explicitly mentioned. Besides these three fields, almost all nodes also have an
\type {attr} field, and there is a also a field called \type {prev}.

\startsubsection[title={hlist and vlist, aka boxes}]

These lists share fields and subtypes although some subtypes can only occur in
horizontal lists while others are unique for vertical lists.

\showenginefields   {hlist}
\showenginesubtypes {hlist}
\showenginevalues   {directionvalues}
\showenginevalues   {listgeometryvalues}
\showenginevalues   {listanchorvalues}
\showenginevalues   {listsignvalues}

The \type {shift} is a displacement perpendicular to the character (horizontal) or
line (vertical) progression direction.

The \type {orientation}, \type {woffset}, \type {hoffset}, \type {doffset}, \type
{xoffset} and \type {yoffset} fields are special. They can be used to make the
backend rotate and shift boxes which can be handy in for instance vertical
typesetting. Because they relate to (and depend on the) the backend they are not
discussed here (yet). The \type {pre} and \type {post} fields refer to migrated
material in both list types, while the adjusted variants only make sense in
horizontal lists.

\showengineusagepernode{hlist}

\stopsubsection

\startsubsection[title={rule}]

Contrary to traditional \TEX, \LUATEX\ has more subtypes subtypes because we also
use rules to store reuseable objects and images. However, in \LUAMETATEX\ these
are gone but we reserve these subtypes. Apart form the basic rules a lot is up to
the backend.

\showenginefields   {rule}
\showenginesubtypes {rule}
\showenginevalues   {ruleoptionvalues}

The width, height and depth of regular rules defaults to the special value of
$\cldcontext {tex . magicconstants . runningrule}$ which indicates a running rule
that adapts its dimensions to the box that it sits in.

The \type {left} and type {right} keys are somewhat special (and experimental).
When rules are auto adapting to the surrounding box width you can enforce a shift
to the right by setting \type {left}. The value is also subtracted from the width
which can be a value set by the engine itself and is not entirely under user
control. The \type {right} is also subtracted from the width. It all happens in
the backend so these are not affecting the calculations in the frontend (actually
the auto settings also happen in the backend). For a vertical rule \type {left}
affects the height and \type {right} affects the depth. There is no matching
interface at the \TEX\ end (although we can have more keywords for rules it would
complicate matters and introduce a speed penalty.) However, you can just
construct a rule node with \LUA\ and write it to the \TEX\ input. The \type
{outline} subtype is just a convenient variant and the \type {transform} field
specifies the width of the outline. The \type {xoffset} and \type {yoffset}
fields can be used to shift rules. Because they relate to (and depend on the) the
backend they are not discussed here (yet). Of course all this assumes that the
backend deals with it. Internally fields with different names can use the same
variable, depending on the subtype; dedicated names just make more sense.

\showengineusagepernode{rule}

\stopsubsection

\startsubsection[title={insert}]

This node relates to the \type {\insert} primitive and support the fields:

\showenginefields {insert}

Here the subtype indicates the class of the insert and that number is also used
to access the box, dimen and skip registers that relate to the insert, if we
use inserts in the traditional way.

\showengineusagepernode{insert}

\stopsubsection

\startsubsection[title={mark}]

This one relates to the \type {\marks} primitive and only has a few fields, one
being a token list as field which is kind of rare.

\showenginefields   {mark}
\showenginesubtypes {mark}

\showengineusagepernode{mark}

\stopsubsection

\startsubsection[title={adjust}]

This node results from \type {\vadjust} usage:

\showenginefields   {adjust}
\showenginesubtypes {adjust}
\showenginevalues   {adjustoptionvalues}

\showengineusagepernode{vadjust}

\stopsubsection

\startsubsection[title={disc (discretionary)}]

The \typ {\discretionary}, \typ {\explicitdiscretionary} and \typ
{\automaticdiscretionary} primitives as well as the discretionary that comes from
hyphenation all have the pre, post and replace lists. Because these lists have
head and tail pointers the getters and setters handle this for you.

\showenginefields   {disc}
\showenginesubtypes {disc}
\showenginevalues   {discoptionvalues}

\showengineusagepernode{disc}

\stopsubsection

\startsubsection[title={math}]

Math nodes represent the boundaries of a math formula, normally wrapped between
\type {$} and \type {$}. The glue fields are only used when the \type {surround}
field is zero.

\showenginefields   {math}
\showenginesubtypes {math}

\showengineusagepernode{math}

\stopsubsection

\startsubsection[title={glue}]

Skips are about the only type of data objects in traditional \TEX\ that are not a
simple value. They are inserted when \TEX\ sees a space in the text flow but also
by \type {\hskip} and \type {skip}. The structure that represents the glue
components of a skip internally is called a \type {gluespec}. In \LUAMETATEX\ we
don't use the spec itself but just its values.

\showenginefields   {glue}
\showenginesubtypes {glue}
\showenginevalues   {glueoptionvalues}

Note that we use the key \type {width} in both horizontal and vertical glue. This
suited the \TEX\ internals well so we decided to stick to that naming.

The effective width of some glue subtypes depends on the stretch or shrink needed
to make the encapsulating box fit its dimensions. For instance, in a paragraph
lines normally have glue representing spaces and these stretch or shrink to make
the content fit in the available space. The \type {effectiveglue} function that
takes a glue node and a parent (hlist or vlist) returns the effective width of
that glue item. When you pass \type {true} as third argument the value will be
rounded.

\showengineusagepernode{glue}

\stopsubsection

\startsubsection[title={gluespec}]

Internally \LUAMETATEX\ (like its ancestors) also uses nodes to store data that
is not seen in node lists. For instance the state of expression scanning (\type
{\dimexpr} etc.) and conditionals (\type {\ifcase} etc.) is also kept in lists of
nodes. A glue, which has five components, is stored in a node as well, so, where
most registers store just a number, a skip register (of internal quantity) uses a
pointer to a glue spec node. It has similar fields as glue nodes, which is not
surprising because in the past (and other engines than \LUATEX) a glue node also
has its values stored in a glue spec. This has some advantages because often the
values are the same, so for instance spacing related skips were not resolved
immediately but pointed to the current value of a space related internal register
(like \type {\spaceskip}). But, in \LUATEX\ and therefore \LUAMETATEX\ we do
resolve these quantities immediately and we put the current values in the glue
nodes.

\showenginefields {gluespec}

You will only find these nodes in a few places, for instance when you query an
internal quantity. In principle we could do without them as we have interfaces
that use the five numbers instead. For compatibility reasons we keep glue spec
nodes exposed but this might change in the future. Of course there are no
subtypes here because it's just a data store.

\showengineusagepernode{gluespec}

\stopsubsection

\startsubsection[title={kern}]

The \type {\kern} command creates such nodes but for instance the font and math
machinery can also add them.

\showenginefields   {kern}
\showenginesubtypes {kern}

\showengineusagepernode{kern}

\stopsubsection

\startsubsection[title={penalty}]

The \type {\penalty} command is one that generates these nodes. There is not much
to tell about them, apart from that in \LUAMETATEX\ they have options and a
possible spread related \type {nepalty} field that is used internally.

\showenginefields   {penalty}
\showenginesubtypes {penalty}
\showenginevalues   {penaltyoptionvalues}

\showengineusagepernode{penalty}

\stopsubsection

\startsubsection[title={glyph}]

These are probably the mostly used nodes and although you can push them in the
current list with for instance \prm {char} \TEX\ will normally do it for you when
it considers some input to be text. Glyph nodes are relatively large and have many
fields.

\showenginefields   {glyph}
\showenginesubtypes {glyph}
\showenginevalues   {glyphoptionvalues}
\showenginevalues   {glyphdiscvalues}
\showenginevalues   {discpartvalues}
\showenginevalues   {glyphprotectionvalues}

The \type {width}, \type {height} and \type {depth} values are read|-|only.
In \LUATEX\ \type {expansion} has been introduced as part of the separation between
front- and backend. It is the result of extensive experiments with a more
efficient implementation of expansion. Early versions of \LUATEX\ already
replaced multiple instances of fonts in the backend by scaling but contrary to
\PDFTEX\ in \LUATEX\ we now also got rid of font copies in the frontend and
replaced them by expansion factors that travel with glyph nodes. Apart from a
cleaner approach this is also a step towards a better separation between front-
and backend.

% The \type {ischar} function checks if a node is a glyph node with a subtype still
% less than 256. This function can be used to determine if applying font logic to a
% glyph node makes sense. The value \type {nil} gets returned when the node is not
% a glyph, a character number is returned if the node is still tagged as character
% and \type {false} gets returned otherwise. When nil is returned, the id is also
% returned. The \type {isglyph} variant doesn't check for a subtype being less
% than 256, so it returns either the character value or nil plus the id. These
% helpers are not always faster than separate calls but they sometimes permit
% making more readable tests. The \type {usesfont} helpers takes a node
% and font id and returns true when a glyph or disc node references that font.

% The \type {isnextchar} and \type {isprevchar} return a next node, a character
% code (or false) and an node id or next character code. The four \type {is}
% checkers take a node and optionally a font, data, state, scale, xscale and yscale
% value that are then checked.

\showengineusagepernode{glyph}

\stopsubsection

\startsubsection[title={boundary}]

This node relates to the \type {\noboundary}, \type {\boundary}, \type
{\protrusionboundary}, \type {\wordboundary} etc. These are relative small nodes
that determine what happens before and after them.

\showenginefields   {boundary}
\showenginesubtypes {boundary}
\showenginevalues   {protrusionboundaryvalues}

\showengineusagepernode{boundary}

\stopsubsection

\startsubsection[title={par}]

This node is inserted at the start of a paragraph. You should not mess too much
with this one. They are also inserted when \type {\local...} primitives are used
that relate boxes to positions in the line and overload certain parameters that
play a role in the line break routine. There are many fields!

\showenginefields[2]{par}
\showenginesubtypes {par}

\showengineusagepernode{par}

\stopsubsection

\startsubsection[title={dir}]

Direction nodes mark parts of the running text that need a change of direction
and the \type {\textdirection} command generates them. Contrary to \LUATEX\
we only have two directions.

\showenginefields   {dir}
\showenginesubtypes {dir}

\showengineusagepernode{dir}

\stopsubsection

\startsubsection[title={whatsit}]

A whatsit node is a real simple one and it only has a subtype. It is even less
than a user node (which it actually could be) and uses hardly any memory. What
you do with it it entirely up to you: it's is real minimalistic. You can assign a
subtype and it has attributes. It is all up to the user (and the backend) how
they are handled.

\showenginefields {whatsit}

\showengineusagepernode{whatsit}

\stopsubsection

\startsubsection[title={attribute}]

This is a small node but used a lot. When an attribute is set and travels with a
node, we actually have a forward (only) linked list with a head node that keeps a
reference count. These lists are (to be) sorted by attribute index. Normally you
will {\em not} mess directly with these list because you can get unwanted side
effects.

\showenginefields   {attribute}
\showenginesubtypes {attribute}

\showengineusagepernode{attribute}

\stopsubsection

\startsubsection[title={alignrecord}]

This node can be encountered in alignments and will eventually become a \type
{hlist} or \type {vlist} node. It therefore has the same size and fields as those
nodes. However, the following fields are overloaded by other parameters: \type
{woffset}, \type {hoffset}, \type {doffset}, \type {xoffset}, \type {yoffset},
\type {orientation}, \type {pre} and \type {post}. Be careful!

\showenginefields {alignrecord}

\showengineusagepernode{alignrecord}

\stopsubsection

\startsubsection[title={unset}]

This node can be encountered in alignments and will eventually become a \type
{hlist} or \type {vlist} node. It therefore has the same size and fields as those
nodes. However, the following fields are (at least temporarily) there and they
use the slots of \type {woffset}, \type {hoffset}, \type {doffset} and \type
{orientation}. Be careful!

\showenginefields {unset}

\showengineusagepernode{unset}

\stopsubsection

\stopsection

\startsection[title={Math nodes}]

\startsubsection[title=The concept]

Many object fields in math mode are either simple characters in a specific family
or math lists or node lists: \type {mathchar}, \type {mathtextchar}, {subbox}
and \type {submlist} and \type {delimiter}. These are endpoints and therefore the
\type {next} and \type {prev} fields of these these subnodes are unused.

There is a subset of nodes dedicated to math called noads. These are used for
simple atoms, fractions, fences, accents and radicals. When you enter a formula,
\TEX\ creates a node list with regular (math) nodes and noads. Then it hands over
the list the math processing engine. The result of that is a nodelist without
noads. Most of the noads contain subnodes so that the list of possible fields is
actually quite small. Math formulas are both a linked list and a tree. For
instance in $e = mc^2$ there is a linked list \type {e = m c} but the \type {c}
has a superscript branch that itself can be a list with branches.

Eventually I might give a more detailed description of the differences between
the five noad variants but for now the following has to do. One will quite likely
not set that many fields at the \LUA\ end but running over the many sub lists can
make sense. One has to know what the engine is doing anyway.

\stopsubsection

\startsubsection[title=noad]

First, there are the objects (the \TEX book calls them \quote {atoms}) that are
associated with the simple math objects: ord, op, bin, rel, open, close, punct,
inner, over, under, vcenter. These all have the same fields, and they are
combined into a single node type with separate subtypes for differentiation.
However, before reading on you should realize that \LUAMETATEX\ has an extended
math engine. We hale not only more classes, we also have many more keys in the
nodes. We won't cover these details here.

\showenginefields   {noad}
\showenginesubtypes {noad}
\showenginevalues   {noadoptionvalues}

In addition to the subtypes (related to classes) that the engines knows of, there
can be user defined subtypes. Not all fields make sense for every derives noad:
\type {accent}, \type {fence}, \type {fraction} or \type {radical} but there we
(currently) only mention the additional ones. These additional fields are taken
from a pool of extra fields. Not all fields are always accessible for these nodes.

\showengineusagepernode{noad}

\stopsubsection

\startsubsection[title=mathchar]

The \type {mathchar} is the simplest subnode field, it contains the character and
family for a single glyph object. The family eventually resolves on a reference
to a font. Internally this nodes is one of the math kernel nodes.

\showenginefields   {mathchar}
%showenginesubtypes {mathchar}
\showenginevalues   {kerneloptionvalues}

\showengineusagepernode{mathchar}

\stopsubsection

\startsubsection[title=mathtextchar]

The \type {mathtextchar} is a special case that you will not normally encounter,
it arises temporarily during math list conversion (its sole function is to
suppress a following italic correction). Internally this nodes is one of the math
kernel nodes.

\showenginefields   {mathtextchar}
%showenginesubtypes {mathtextchar}
\showenginevalues   {kerneloptionvalues}

\showengineusagepernode{mathtextchar}

\stopsubsection

\startsubsection[title=subbox]

These \type {subbox} subnode is used for subsidiary list items where
the \type {list} points to a \quote {normal} vbox or hbox.

\showenginefields   {subbox}
%showenginesubtypes {subbox}

\showengineusagepernode{subbox}

\stopsubsection

\startsubsection[title=submlist]

In \type {submlist} subnode the \type {list} points to a math list that is yet to
be converted. Their fields

\showenginefields   {submlist}
%showenginesubtypes {submlist}

\showengineusagepernode{submlist}

\stopsubsection

\startsubsection[title={delimiter}]

There is a fifth subnode type that is used exclusively for delimiter fields. As
before, the \type {next} and \type {prev} fields are unused, but we do have:

\showenginefields   {delimiter}
%showenginesubtypes {delimiter}

The fields \type {largechar} and \type {largefamily} can be zero, in that case
the font that is set for the \type {smallfamily} is expected to provide the large
version as an extension to the \type {smallchar}.

\showengineusagepernode{delimiter}

\stopsubsection

\startsubsection[title={accent}]

Accent nodes deal with stuff on top or below a math constructs.

\showenginefields   {accent}
\showenginesubtypes {accent}

{\em For more fields see \type {noad}. At some point we might move fields from
that list to here but only when the engine also gets that split.}

\showengineusagepernode{accent}

\stopsubsection

\startsubsection[title={style}]

These nodes are signals to switch to another math style. Currently the subtype is
actually used to store the style but don't rely on that for the future.

\showenginefields   {style}
%showenginesubtypes {style}
\showenginevalues   {mathstylenamevalues}
\showenginevalues   {mathstylevalues}

\showengineusagepernode{style}

\stopsubsection

\startsubsection[title={parameter}]

These nodes are used to (locally) set math parameters. The subtype reflects a math style.

\showenginefields {parameter}

\showengineusagepernode{parameter}

\stopsubsection

\startsubsection[title={choice}]

Most of the fields of this node are lists. Depending on the subtype different
field names are used.

\showenginefields   {choice}
\showenginesubtypes {choice}

\showengineusagepernode{choice}

\stopsubsection

\startsubsection[title={radical}]

Radical nodes are the most complex as they deal with scripts as well as
constructed large symbols. Warning: never assign a node list to the \type
{nucleus}, \type {sub}, \type {sup}, \type {left}, or \type {degree} field unless
you are sure its internal link structure is correct, otherwise an error can be
triggered.

\showenginefields   {radical}
\showenginesubtypes {radical}

{\em For more fields see \type {noad}. At some point we might move fields from
that list to here but only when the engine also gets that split.}

\showengineusagepernode{radical}

\stopsubsection

\startsubsection[title={fraction}]

Fraction nodes are also used for delimited cases, hence the \type {left} and
\type {right} fields among.

\showenginefields   {fraction}
\showenginesubtypes {fraction}

{\em For more fields see \type {noad}. At some point we might move fields from
that list to here but only when the engine also gets that split.}

\showengineusagepernode{fraction}

\stopsubsection

\startsubsection[title={fence}]

Fence nodes come in pairs but either one can be a dummy (this period driven empty
fence). Some of these fields are used by the renderer and might get adapted in
the process.

\showenginefields   {fence}
\showenginesubtypes {fence}

{\em For more fields see \type {noad}. At some point we might move fields from
that list to here but only when the engine also gets that split.}

\showengineusagepernode{fence}

\stopsubsection

\stopsection

\startsection[title=Helpers]

\startsubsection[title={Introduction}]

The userdata node variant has accessors on that object but when we use the
indexed variant we use functions. As a consequence there are more helpers for
direct nodes that for userdata nodes and many of them accept more arguments or
have multiple return values. When you use \CONTEXT\ you will notice that instead
of the \typ {node.direct} namespace we use \type {nuts}. Among the reasons is
that we had an intermediate variant in \CONTEXT\ \MKIV\ before we had these
direct nodes. That variant was more efficient than the userdata accessors and
triggered the introduction of direct nodes after which we dropped the
intermediate variant. So, for \CONTEXT\ users direct nodes are nuts.

\stopsubsection

\startsubsection[title={Housekeeping}]

This function returns an array that maps node id numbers to node type strings,
providing an overview of the possible top|-|level \type {id} types.

\starttyping[option=LUA]
function node.types ( )
    return <t:table> -- identifiers
end
\stoptyping

This shows the names of the nodes and their internal numbers. Not all nodes are
visible unless one goes really deep down into lists. The next two convert a name
to its internal numeric representation and vise versa. The numbers don't relate
to importance or some ordering; they just appear in the order that is handy for
the engine. Commands like this are rather optimized so performance should be ok
but you can of course always store the id in a \LUA\ number.

\starttyping[option=LUA]
function node.id ( <t:string> name )
    return <t:integer> -- identifier
end

function node.type ( <t:integer> identifier )
    return <t:string> -- name
end
\stoptyping

This function returns an indexed table with valid field names for a particular
type of node. Some fields (like \type {total}) can be constructed from other
fields.

\starttyping[option=LUA]
function node.fields ( <t:integer> identifier | <t:string> name )
    return <t:table> -- fields
end
\stoptyping

The \type {hasfield} function returns a boolean that is only true if \type {n} is
actually a node, and it has the field.This function probably is not that useful
but some nodes don't have a \type {subtype}, \type {attr} or \type {prev} field
and this is a way to test for that.

\starttyping[option=LUA]
function node.direct.hasfield ( <t:direct> n | <t:string> name )
    return <t:boolean> -- okay
end
\stoptyping

The \type {new} function creates a new node. All its fields are initialized to
either zero or \type {nil} except for \type {id} and \type {subtype}. Instead of
numbers you can also use strings (names). If you pass a second argument the
subtype will be set too.

\starttyping[option=LUA]
function node.direct.new (
    <t:number> id | <t:string> name
)
    return <t:direct.> -- node
end

function node.direct.new (
    <t:number> id | <t:string> name,
    <t:number> | <t:string> subtype
)
    return <t:direct.> -- node
end
\stoptyping

As already has been mentioned, you are responsible for making sure that nodes
created this way are used only once, and are freed when you don't pass them
back somehow.

The next one frees node \type {n} from \TEX's memory. Be careful: no checks are
done on whether this node is still pointed to from a register or some \type
{next} field: it is up to you to make sure that the internal data structures
remain correct. Fields that point to nodes or lists are flushed too. So, when
you used their content for something else you need to set them to nil first.

\starttyping[option=LUA]
function node.direct.free ( <t:direct> n )
    return <t:direct> -- next
end
\stoptyping

The \type {free} function returns the next field of the freed node, while the
\type {flushnode} alternative returns nothing.

\starttyping[option=LUA]
function node.direct.flush ( <t:direct> n )
    -- no return values
end
\stoptyping

A list starting with node \type {n} can be flushed from \TEX's memory too. Be
careful: no checks are done on whether any of these nodes is still pointed to
from a register or some \type {next} field: it is up to you to make sure that the
internal data structures remain correct.

\starttyping[option=LUA]
function node.direct.flushlist ( <t:direct> n )
    -- no return values
end
\stoptyping

When you free for instance a discretionary node, \type {flushlist} is applied to
the \type {pre}, \type {post}, \type {replace} so you don't need to do that
yourself. Assigning them \type {nil} won't free those lists!

This creates a deep copy of node \type {n}, including all nested lists as in the case
of a hlist or vlist node. Only the \type {next} field is not copied.

\starttyping[option=LUA]
function node.direct.copy ( <t:direct> n )
    return <t:direct> -- copy
end
\stoptyping

A deep copy of the node list that starts at \type {n} can be created too. If
\type {m} is also given, the copy stops just before node \type {m}.

\starttyping[option=LUA]
function node.direct.copylist ( <t:direct> n )
    return <t:direct> -- copy
end

function node.direct.copylist ( <t:direct> n, <t:direct> m )
    return <t:direct> -- copy
end
\stoptyping

Note that you cannot copy attribute lists this way. However, there is normally no
need to copy attribute lists because when you do assignments to the \type {attr}
field or make changes to specific attributes, the needed copying and freeing
takes place automatically. When you change a value of an attribute {\em in} a
list, it will affect all the nodes that share that list.

\starttyping[option=LUA]
function node.direct.write ( <t:direct> n )
    -- no return values
end
\stoptyping

This function will append a node list to \TEX's \quote {current list}. The node
list is not deep|-|copied! There is no error checking either! You might need to
enforce horizontal mode in order for this to work as expected.

\stopsubsection

\startsubsection[title={Manipulating lists}]

Unless there is a bug or a callback messes up a node list is dual linked. In original
\TEX\ nodes had to be small so nodes only had a next pointer. If you run into an issue
you can use the next helper to sure that the node list is double linked.

\starttyping[option=LUA]
function node.direct.slide ( <t:direct> n)
    return <t:direct> -- tail
end
\stoptyping

In most cases \TEX\ itself only uses \type {next} pointers but your other
callbacks might expect proper \type {prev} pointers too. So, when you run into
issues or are in doubt, apply the slide function before you return the list. You
can also get the tail without sliding:

\starttyping[option=LUA]
function node.direct.tail ( <t:direct> n )
    return <t:direct> -- tail
end
\stoptyping

For tracing purposes we have a few counters. The first one returns the number of
nodes contained in the node list that starts at \type {n}. If \type {m} is also
supplied it stops at \type {m} instead of at the end of the list. The node \type
{m} is not counted.

\starttyping[option=LUA]
function node.direct.length (
    <t:direct> n
)
    return <t:integer>
end

function node.direct.length (
    <t:direct> n,
    <t:direct> m
)
    return <t:integer>
end
\stoptyping

The second one the number of nodes contained in the node list that starts at
\type {n} that have a matching \type {id} field. If \type {m} is also supplied,
counting stops at \type {m} instead of at the end of the list. The node \type {m}
is not counted. This function also accept string \type {id}'s.

\starttyping[option=LUA]
function node.direct.count (
    <t:integer> id,
    <t:direct>  n
)
    return <t:integer>
end

function node.direct.count (
    <t:integer> id,
    <t:direct>  n,
    <t:direct>  m
)
    return <t:integer>
end
\stoptyping

This function removes the node \type {current} from the list following \type
{head}. It is your responsibility to make sure it is really part of that list.
The return values are the new \type {head} and \type {current} nodes. The
returned \type {current} is the node following the \type {current} in the calling
argument, and is only passed back as a convenience (or \type {nil}, if there is
no such node). The returned \type {head} is more important, because if the
function is called with \type {current} equal to \type {head}, it will be
changed. When the third argument is passed, the node is freed.

\starttyping[option=LUA]
function node.direct.remove ( <t:direct> head, <t:direct> current )
    return
        <t:direct> head,
        <t:direct> current,
        <t:direct> removed
end

function node.direct.remove ( <t:direct> head, <t:direct> current, <t:boolean> free)
    return
        <t:direct> -- head,
        <t:direct> -- current
end
\stoptyping

This function inserts the node \type {new} before \type {current} into the list
following \type {head}. It is your responsibility to make sure that \type
{current} is really part of that list. The return values are the (potentially
mutated) \type {head} and the node \type {new}, set up to be part of the list
(with correct \type {next} field). If \type {head} is initially \type {nil}, it
will become \type {new}.

\starttyping[option=LUA]
function node.direct.insertbefore (
    <t:direct> head,
    <t:direct> current,
    <t:direct> new
)
    return
        <t:direct>, -- head
        <t:direct>  -- new
end
\stoptyping

This function inserts the node \type {new} after \type {current} into the list
following \type {head}. It is your responsibility to make sure that \type
{current} is really part of that list. The return values are the \type {head} and
the node \type {new}, set up to be part of the list (with correct \type {next}
field). If \type {head} is initially \type {nil}, it will become \type {new}.

\starttyping[option=LUA]
function node.direct.insertafter (
    <t:direct> head,
    <t:direct> current,
    <t:direct> new
)
    return
        <t:direct>, -- head
        <t:direct>  -- new
end
\stoptyping

You can also mess with the list by changing the \type {next} or \type {prev} fields, using:

\starttyping[option=LUA]
function node.direct.setprev ( <t:direct> n, <t:direct> prv                 ) end
function node.direct.setnext ( <t:direct> n, <t:direct> nxt                 ) end
function node.direct.setboth ( <t:direct> n, <t:direct> prv, <t:direct> nxt ) end
\stoptyping

The next function pops the last node from \TEX's \quote{current list}. It returns
that node, or \type {nil} if the current list is empty.

\starttyping[option=LUA]
function node.direct.lastnode ( )
    return <t:direct> n
end
\stoptyping

This helper returns the location of the first match at or after node \type {n}:

\starttyping[option=LUA]
function node.direct.findnode ( <t:direct> n, <t:integer> subtype )
    return <t:direct> -- n
end

function node.direct.findnode ( <t:direct> n )
    return
        <t:direct>, -- n
        <t:integer> -- subtype
end
\stoptyping

\stopsubsection

\startsubsection[title={Traversing}]

The easiest do-it-yourself approach to run over a list of nodes is to use one of
the following functions:

\starttyping[option=LUA]
function node.direct.getnext ( <t:direct> n )
    return <t:direct> | <t:nil>
end

function node.direct.getprev ( <t:direct> n )
    return <t:direct> | <t:nil>
end

function node.direct.getboth ( <t:direct> n )
    return
        <t:direct> | <t:nil>, -- prev
        <t:direct> | <t:nil>  -- next
end
\stoptyping

Instead of using these you can use one of the iterators that loops over the node
list that starts at \type {n}.

\starttyping[option=LUA]
function node.direct.traverse ( <t:direct> n )
    return
        <t:direct>  t,
        <t:integer> id,
        <t:integer> subtype
end
\stoptyping

Typically code looks like this:

\starttyping[option=LUA]
for n in node.traverse(head) do
    -- whatever
end
\stoptyping

which is functionally equivalent to:

\starttyping[option=LUA]
do
    local n
    local function f (head,var)
        local t
        if var == nil then
            t = head
        else
            t = var.next
        end
        return t
    end
    while true do
        n = f (head, n)
        if n == nil then
            break
        end
        -- whatever
    end
end
\stoptyping

It should be clear from the definition of the function \type {f} that even though
it is possible to add or remove nodes from the node list while traversing, you
have to take great care to make sure all the \type {next} (and \type {prev})
pointers remain valid.

If the above is unclear to you, see the section \quote {For Statement} in the
\LUA\ Reference Manual.

This is an iterator that loops over all the nodes in the list that starts at
\type {n} that have a matching \type {id} field. See the previous section for
details. The change is in the local function \type {f}, which now does an extra
while loop checking against the upvalue \type {id}, kind of like:

\starttyping
local function f(head,var)
    local t
    if var == nil then
        t = head
    else
        t = var.next
    end
    while not t.id == id do
        t = t.next
    end
    return t
end
\stoptyping

This and the previously discussed \type {traverse} are the only traverses
provided for userdata nodes.

\starttyping[option=LUA]
function node.direct.traverseid ( <t:integer> id, <t:direct> n )
    return
        <t:direct>  t,
        <t:integer> subtype
end
\stoptyping

The \type {traversechar} iterator loops over the \type {glyph} nodes in a list.
Only nodes with a subtype less than 256 are seen.

{\em NEEDS CHECKING: protected check}

\starttyping[option=LUA]
function node.direct.traversechar ( <t:direct> n )
    return
        <t:direct>,  -- n
        <t:integer>, -- char
        <t:integer>  -- font
end
\stoptyping

The \type {traverseglyph} iterator loops over a list and returns the list and
filters all glyphs:

\starttyping[option=LUA]
function node.direct.traverseglyph ( <t:direct> n )
    return
        <t:direct>,  -- n
        <t:integer>, -- char
        <t:integer>  -- font
end
\stoptyping

This iterator loops over the \type {hlist} and \type {vlist} nodes in a list. The
four return values can save some time compared to fetching these fields but in
practice you seldom need them all.

\starttyping[option=LUA]
function node.direct.traverselist ( <t:direct> n )
    return
        <t:direct>,  -- n
        <t:integer>, -- identifier
        <t:integer>, -- subtype
        <t:direct>   -- list
end
\stoptyping

This iterator loops over nodes that have content: \type {hlist}, \type {vlist},
\type {glue} with leaders, \type {glyph}, \type {disc} and \type {rule} nodes.

\starttyping[option=LUA]
function node.direct.traversecontent ( <t:direct> n )
    return
        <t:direct>,  -- n
        <t:integer>, -- identifier
        <t:integer>, -- subtype
        <t:direct>   -- listorleader
end
\stoptyping

The traversers also support backward traversal. An optional extra boolean triggers
this. Yet another optional boolean will automatically start at the end of the
given list. So, if we want both we use:

\starttyping[option=LUA]
function node.direct.traverse (
    <t:direct>  n,
    <t:boolean> reverse,
    <t:boolean> startatend
)
    return
        <t:direct>  t,
        <t:integer> id,
        <t:integer> subtype
end
\stoptyping

\stopsubsection

\startsubsection[title={Glyphs}]

Glyphs have a lot of parameters and there are many setters and getters that
can access them. Some generic ones, like \type {getwidth} are discussed
in other subsections, some are more specific to glyphs:

\starttyping[option=LUA]
function node.direct.getslant  ( <t:direct> g ) return <t:integer> end
function node.direct.getweight ( <t:direct> g ) return <t:integer> end
\stoptyping

and

\starttyping[option=LUA]
function node.direct.setslant  ( <t:direct> g, <t:integer> slant  ) end
function node.direct.setweight ( <t:direct> g, <t:integer> weight ) end
\stoptyping

\stopsubsection

\startsubsection[title={Glue}]

You can set the five properties of a glue in one go. If a non|-|numeric value is
passed the property becomes zero.

\starttyping[option=LUA]
function node.direct.setglue ( <t:direct> n )
    -- no return values
end

function node.direct.setglue (
    <t:direct>  n,
    <t:integer> width,
    <t:integer> stretch,
    <t:integer> shrink,
    <t:integer> stretchorder,
    <t:integer> shrinkorder
)
    -- no return values
end
\stoptyping

When you pass values, only arguments that are numbers are assigned so the next
call will only adapt the width and shrink.

\starttyping[option=LUA]
node.direct.setglue(n,655360,false,65536)
\stoptyping


When a list node is passed, you set the glue, order and sign instead. The next
call will return five values or nothing when no glue is passed.

\starttyping[option=LUA]
function node.direct.getglue ( <t:direct> n )
    return
        <t:integer>, -- width
        <t:integer>, -- stretch
        <t:integer>, -- shrink
        <t:integer>, -- stretchorder
        <t:integer>  -- shrinkorder
\stoptyping

When the second argument is false, only the width is returned (this is consistent
with \type {tex.get}). When a list node is passed, you get back the glue that is
set, the order of that glue and the sign.

This function returns \type {true} when the width, stretch and shrink properties
are all zero.

\starttyping[option=LUA]
function node.direct.iszeroglue ( <t:direct> n )
    return <t:boolean> -- allzero
end
\stoptyping

Glue is not only, well, glue. The to be filled space can also be occupied by a rule,
boxes, glyphs and what more. You can get the list that makes this with:

\starttyping[option=LUA]
function node.direct.getleader ( <t:direct> n )
    return <t:direct> -- list
end
\stoptyping

and set the list with

\starttyping[option=LUA]
function node.direct.setleader ( <t:direct> n, <t:direct> l | <t:nil> )
    -- no return values
end
\stoptyping

\stopsubsection

\startsubsection[title={Attributes}]

Assignments to attributes registers result in assigning lists with set attributes
to nodes and the implementation is non|-|trivial because the value that is
attached to a node is essentially a (sorted) sparse array of key|-|value pairs.
It is generally easiest to deal with attribute lists and attributes by using the
dedicated functions in the \type {node} library.

An \type {attribute} comes in two variants, indicated by subtype. Because
attributes are stored in a sorted linked list, and because they are shared, the
first node is a list reference node and the following ones are value nodes. So,
most attribute nodes are value nodes. These are forward linked lists. Because
there are assumptions to how these list are build you should rely on the helpers,
also because details might change.

This returns the currently active list of attributes, if there is one.

\starttyping[option=LUA]
function node.direct.currentattr()
    return <t:direct> -- list
end
\stoptyping

The intended usage of \type {currentattr} is as follows (we use the userdata
interface here):

\starttyping[option=LUA]
local x1 = node.new("glyph")
x1.attr = node.currentattr()
local x2 = node.new("glyph")
x2.attr = node.currentattr()
\stoptyping

or:

\starttyping[option=LUA]
local x1 = node.new("glyph")
local x2 = node.new("glyph")
local ca = node.currentattr()
x1.attr = ca
x2.attr = ca
\stoptyping

The attribute lists are reference counted and the assignment takes care of
incrementing the count. You cannot expect the value \type {ca} to be valid any
more when you assign attributes (using \type {tex.setattribute}) or when control
has been passed back to \TEX.

\starttyping[option=LUA]
<number> v = node.hasattribute ( <node> n, <number> id )
<number> v = node.hasattribute ( <node> n, <number> id, <number> val )
\stoptyping

Tests if a node has the attribute with number \type {id} set. If \type {val} is
also supplied, also tests if the value matches \type {val}. It returns the value,
or, if no match is found, \type {nil}.

\starttyping[option=LUA]
function node.direct.getattribute ( <t:direct> n, <t:integer> id )
    return <t:integer> -- value
end
\stoptyping

The previous function tests if a node has an attribute with number \type {id}
set. It returns the value, or, if no match is found, \type {nil}. If no \type
{id} is given then the zero attributes is assumed.

%
\starttyping[option=LUA]
function node.direct.findattribute ( <t:direct> n, <t:integer> id )
    return
        <t:integer>, -- value
        <t:direct>   -- node
end
\stoptyping

Finds the first node that has attribute with number \type {id} set. It returns
the value and the node if there is a match and otherwise nothing.

\starttyping[option=LUA]
function node.direct.setattribute ( <t:direct> n, <t:integer> id, <t:integer> value )
    -- no return values
end
\stoptyping

Sets the attribute with number \type {id} to the value \type {value}. Duplicate
assignments are ignored.

\starttyping[option=LUA]
function node.direct.unsetattribute ( <t:direct> n, <t:integer> id )
    return <t:integer> -- value
end

function node.direct.unsetattribute ( <t:direct> n, <t:integer> id, <t:integer> value )
    return <t:integer> -- value
end
\stoptyping

Unsets the attribute with number \type {id}. If \type {value} is also supplied,
it will only perform this operation if the value matches \type {value}. Missing
attributes or attribute|-|value pairs are ignored. If the attribute was actually
deleted, the function returns its old value, otherwise it returns \type {nil}.

\stopsubsection

\startsubsection[title={Glyph handling}]

Processing a character stream into a visual representation using glyphs is one of
the important processes in the engine. In \TEX82 this happens in two places. When
the text is read ligaturing and kerning takes place and the list can, if needed,
be packed into a box because the dimensions are now known. When that list is to
become a paragraph it might be that lines get split and when a word can be
hyphenated the ligaturing and kerning is reverted, the word gets hyphenated,
ligatures and kerns get reapplied and the process goes on.

In \OPENTYPE\ processing characters is way more complex. Even if we delegate this
to a library, the fact that we have a mix of text and whatever, potential
hyphenation as well as spaces turned glue, means that we need to do some juggling
with nodes. For that reason hyphenation (of the whole list), ligaturing and
kerning has been split into clearly separates stages. One can still apply the
original \TEX\ variants but in practice it is \LUA\ that does the juggling of
nodes in more complex situations. And we're not only talking of font processing.
For instance, additional inter-character kerning can be done in \LUA\ too.

This all means that we have quite a repertoire of helpers that deal with glyph
processing efficiently.

We can locate the first node in the list starting at \type {n} that is a glyph node
with a subtype indicating it is a glyph, or \type {nil}. If \type {m} is given,
processing stops at (but including) that node, otherwise processing stops at the
end of the list. The \type {char} and \type {glyph} variants check for the
protected field being (yet) unset or (already) set.

\starttyping[option=LUA]
function node.direct.firstglyphnode ( <t:direct> n )
    return <t:direct> -- n
end

function node.direct.firstglyphnode ( <t:direct> n, <t:direct> m )
    return <t:direct> -- n
end
\stoptyping

The next functions can be used to determine if processing is needed. We distinguish between
a character (unprocessed) and a glyph (processed or unprocessed). When we check for a glyph
there are three possible outcomes:

\starttyping[option=LUA]
function node.direct.isglyph ( <t:direct> n )
    return
        <t:nil>,
        <t:nil>
end

function node.direct.isglyph ( <t:direct> n )
    return
        <t:false>,
        <t:integer> -- identifier
end

function node.direct.isglyph ( <t:direct> n )
    return
        <t:integer>, -- character
        <t:integer>  -- font
end
\stoptyping

Checking for a processed character is more complicated. If the glyph has been processed and
the protected property has been set, we get this:

\starttyping[option=LUA]
function node.direct.ischar ( <t:direct> n )
    return <t:false>
end
\stoptyping

If that's not the case additional arguments are checked. If we don't pass a valid
integer, the character value is returned:

\starttyping[option=LUA]
function node.direct.ischar ( <t:direct> n, <t:integer> font )
    return <t:integer> -- character
end
\stoptyping

btu when we passed a font identifier indeed we check if that one matches the one in the
glyph and if not again we get:

\starttyping[option=LUA]
function node.direct.ischar ( <t:direct> n, <t:integer> font )
    return <t:false> --
end
\stoptyping

From there on we check for more arguments to match the glyph fields:

\starttyping[option=LUA]
function node.direct.ischar (
    <t:direct>  n,
    <t:integer> font,
    <t:integer> data
)
    return <t:false> | <t:integer> -- character

end

function node.direct.ischar (
    <t:direct>  n,
    <t:integer> font,
    <t:integer> data,
    <t:integer> state
)
    return <t:false> | <t:integer> -- character

end

function node.direct.ischar (
    <t:direct>  n,
    <t:integer> font,
    <t:integer> scale,
    <t:integer> xscale,
    <t:integer> yscale,
)
    return <t:false> | <t:integer> -- character

end

function node.direct.ischar (
    <t:direct>  n,
    <t:integer> font,
    <t:integer> data,
    <t:integer> scale,
    <t:integer> xscale,
    <t:integer> yscale,
)
    return <t:false> | <t:integer> -- character

end
\stoptyping

There are reasons for these combined tests and they can be found in the \CONTEXT\
font handler. A related helper is one that compares the \type {font}, \type
{data}, \type {scale}, \type {xscale}, \type {yscale}, \type {slant} and \type
{weight}.

\starttyping[option=LUA]
function node.direct.issimilarglyph ( <t:direct>  one, <t:direct> two )
    return <t:boolean> -- similar
end
\stoptyping

This function returns the first glyph or disc node in the given list:

\starttyping[option=LUA]
function node.direct.hasglyph ( <t:direct> n )
    return <t:direct> -- n
end
\stoptyping

Traditional \TEX\ ligature processing can be achieved with the next helper. This
assumes that the ligature information is present in the font. In \CONTEXT\ we
call this base mode processing.

\starttyping[option=LUA]
function node.direct.ligaturing ( <t:direct> first )
    return
        <t:direct>, -- head
        <t:direct>, -- tail
        <t:boolean> -- success
end

function node.direct.ligaturing ( <t:direct> first, <t:direct> last )
    return
        <t:direct>, -- head
        <t:direct>, -- tail
        <t:boolean> -- success
end
\stoptyping

Traditional \TEX\ font kern processing can be achieved with the next helper. This
assumes that the kern information is present in the font. In \CONTEXT\ we
call this base mode processing.

\starttyping[option=LUA]
function node.direct.kerning ( <t:direct> first )
    return
        <t:direct>, -- head
        <t:direct>, -- tail
        <t:boolean> -- success
end

function node.direct.kerning ( <t:direct> first, <t:direct> last )
    return
        <t:direct>, -- head
        <t:direct>, -- tail
        <t:boolean> -- success
end
\stoptyping

When processing is done, you can mark the glyph nodes as protected in order to
prevent redundant processing, for instance because boxed material gets unboxed.
Where in \LUATEX\ the subtype gets changed by adding or subtracting 256, in
\LUAMETATEX\ we have a dedicated (small) protection field.

\starttyping[option=LUA]
function node.direct.protectglyph ( <t:direct> n )
    -- no return values
end

function node.direct.protectglyphs ( <t:direct> first, <t:direct> last )
    -- no return values
end
\stoptyping

The opposite action can also be done.

\starttyping[option=LUA]
function node.direct.unprotectglyph ( <t:direct> n )
    -- no return values
end

function node.direct.unprotectglyphs ( <t:direct> first, <t:direct> last )
    -- no return values
end
\stoptyping

The next function checks if protrusion is active at a line boundary, in which case the glyph
node can be skipped. It's not that useful in the end.

\starttyping[option=LUA]
function node.direct.protrusionskipable ( <t:direct> n )
    return <t:boolean> -- skippable
end
\stoptyping

Once we're done we can freeze leaders: apply the glue to the leader and freeze
the boxes or whatever is at hand.

\starttyping[option=LUA]
function node.direct.flattenleaders ( <t:direct> n )
    return
        <t:direct>, -- head
        <t:integer> -- count
end
\stoptyping

\stopsubsection

\startsubsection[title=Discretionaries]

Discretionaries and glyphs are the carriers of text. Where the core of \type
{glyph} nodes are the \type {font} and \type {char} fields, in \type {disc} nodes
we have to focus on the \type {pre}, \type {post} and \type{replace} fields.
These point to linked lists that are a mix of glyph, kerns and (in \LUAMETATEX\
fixed width) glue. here are the accessors: \footnote {These are a bit more
generic because they also return fields from \type {choice} nodes and possibly
\type {hlist} and \type {vlist} nodes.}

\starttyping[option=LUA]
function node.direct.getpost ( <t:direct> d, <t:boolean> tailtoo )
    return
        <t:direct>, -- head
        <t:direct>  -- tail
end

function node.direct.getpre ( <t:direct> d, <t:boolean> tailtoo )
    return
        <t:direct>, -- head
        <t:direct>  -- tail
end

function node.direct.getreplace ( <t:direct> d, <t:boolean> tailtoo )
    return
        <t:direct>, -- head
        <t:direct>  -- tail
end
\stoptyping

\starttyping[option=LUA]
function node.direct.getdisc ( <t:direct> d, <t:boolean> tailtoo )
    return
        <t:direct>, -- prehead
        <t:direct>, -- posthead
        <t:direct>, -- replacehead
        <t:direct>, -- pretail
        <t:direct>, -- posttail
        <t:direct>  -- replacetail
end
\stoptyping

We also have setters:

\starttyping[option=LUA]
function node.direct.setpost    ( <t:direct> d, <t:direct> | <t:nil> ) end
function node.direct.setpre     ( <t:direct> d, <t:direct> | <t:nil> ) end
function node.direct.setreplace ( <t:direct> d, <t:direct> | <t:nil> ) end
\stoptyping

A major update can be done with this one:

\starttyping[option=LUA]
function node.direct.setdisc (
    <t:direct>,            -- discretionary
    <t:direct>  | <t:nil>, -- pre
    <t:direct>  | <t:nil>, -- post
    <t:direct>  | <t:nil>, -- replace
    <t:subtype> | <t:nil>, -- subtype
    <t:subtype> | <t:nil>  -- penalty
)
    -- no return values
end
\stoptyping

From this you can deduce that we can also say:

\starttyping[option=LUA]
function node.direct.getpenalty ( <t:direct> d )
    return <t:integer> -- penalty
end

function node.direct.setpenalty ( <t:direct> d, <t:integer> penalty )
    -- no return value
end
\stoptyping

The next pair targets glyphs and normally you will not use the setter, because
the engine takes care of setting that state.

\starttyping[option=LUA]
function node.direct.getdiscpart ( <t:direct> g )
    return
        <t:integer>, -- part
        <t:integer>, -- after
        <t:integer>  -- code

end

function node.direct.setdiscpart (
    <t:direct>  g,
    <t:integer> part
    <t:integer> after
    <t:integer> code
)
    -- no return value
end
\stoptyping

The part and after properties relate to discretionary nodes that might have been
flattened. The complication in (tracing) here is that information is lost so we
store the states in the glyph node.

\showenginevalues{discpartvalues}

The code properties relate to where the (usually hyphen) character comes from:

\showenginevalues{glyphdiscvalues}

When you fool around with disc nodes you need to be aware of the fact that they
have a special internal data structure. As long as you reassign the fields when
you have extended the lists it's ok because then the tail pointers get updated,
but when you add to list without reassigning you might end up in trouble when
the linebreak routine kicks in. You can call this function to check the list for
issues with disc nodes.

\starttyping[option=LUA]
function node.direct.checkdiscretionary ( <t:direct> n )
    -- no return values
end
\stoptyping

The plural variant runs over all disc nodes in a list, the singular variant
checks one node only (it also checks if the node is a disc node).

\starttyping[option=LUA]
function node.direct.checkdiscretionaries ( <t:direct> head )
    -- no return values
end
\stoptyping

This function will remove the discretionaries in the list and inject the replace
field when set.

\starttyping[option=LUA]
function node.direct.flattendiscretionaries ( <t:direct> n )
    return
        <t:direct>, -- head
        <t:integer> -- count
end
\stoptyping

\stopsubsection

\startsubsection[title=Packaging and dimensions]

At some point a node list has to be packed in either a horizontal or vertical
box. There are restrictions to what can get packed, for instance you cannot have
glyphs in a vertical list.


The \type {hpack} function creates a new hlist by packaging the list that begins
at node \type {n} into a horizontal box. With only a single argument, this box is
created using the natural width of its components. In the three argument form,
\type {info} must be either \type {additional} or \type {exactly}, and \type {w}
is the additional (\type {\hbox spread}) or exact (\type {\hbox to}) width to be
used. The second return value is the badness of the generated box.

\starttyping[option=LUA]
function node.direct.hpack (
    <t:direct> list
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end

function node.direct.hpack (
    <t:direct>  list,
    <t:integer> width,
    <t:string>  info -- "additional" | "exactly"
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end

function node.direct.hpack (
    <t:direct>  list,
    <t:integer> width,
    <t:string>  info, -- "additional" | "exactly"
    <t:integer> direction
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end
\stoptyping

% Caveat: there can be unexpected side|-|effects to this function, like migration
% of \type {mark} and \type {insert} nodes. Also note that the content of \type {h}
% is the original node list \type {n}: if you free \type {h} you will also free the
% node list itself, unless you explicitly set the \type {list} field to \type {nil}
% beforehand. And in a similar way, calling free on \type {n} will invalidate \type
% {h} as well!

The \type {vpack} function creates a new vlist by packaging the list that begins
at node \type {n} into a vertical box. With only a single argument, this box is
created using the natural height of its components. In the three argument form,
\type {info} must be either \type {additional} or \type {exactly}, and \type {w}
is the additional (\type {\vbox spread}) or exact (\type {\vbox to}) height to be
used.

\starttyping[option=LUA]
function node.direct.vpack (
    <t:direct> list
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end

function node.direct.vpack (
    <t:direct>  list,
    <t:integer> height,
    <t:string>  info -- "additional" | "exactly"
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end

function node.direct.vpack (
    <t:direct>  list,
    <t:integer> height,
    <t:string>  info, -- "additional" | "exactly"
    <t:integer> direction
)
    return
        <t:direct>, -- box
        <t:integer> -- badness
end
\stoptyping

This function calculates the natural in|-|line dimensions of the node list starting
at node \type {first} and terminating just before node \type {last} (or the end of the
list, if there is no second argument). The return values are scaled points.

\starttyping[option=LUA]
function node.direct.dimensions (
    <t:direct> first,
    <t:direct> last
)
    return
        <t:integer>, -- width
        <t:integer>, -- height
        <t:integer>  -- depth
end
\stoptyping

This alternative calling method takes glue settings into account and is especially useful for
finding the actual width of a sublist of nodes that are already boxed, for
example in code like this, which prints the width of the space in between the
\type {a} and \type {b} as it would be if \type {\box0} was used as-is:

\starttyping
\setbox0 = \hbox to 20pt {a b}

\directlua{print (node.dimensions(
    tex.box[0].glueset,
    tex.box[0].gluesign,
    tex.box[0].glueorder,
    tex.box[0].head.next,
    node.tail(tex.box[0].head)
)) }
\stoptyping

You need to keep in mind that this is one of the few places in \TEX\ where floats
are used, which means that you can get small differences in rounding when you
compare the width reported by \type {hpack} with \type {dimensions}.

\starttyping[option=LUA]
function node.direct.dimensions (
    <t:number>  glueset,
    <t:integer> gluesign
    <t:integer> glueorder,
    <t:direct>  first,
    <t:direct>  last
)
    return
        <t:integer>, -- width
        <t:integer>, -- height
        <t:integer>  -- depth
end
\stoptyping

This alternative saves a few lookups and can be more convenient in some
cases:

\starttyping[option=LUA]
function node.direct.rangedimensions (
    <t:direct> parent,
    <t:direct> first,
    <t:direct> last
)
    return
        <t:integer>, -- width
        <t:integer>, -- height
        <t:integer>  -- depth
end
\stoptyping

If you only need the width, a simple and somewhat more efficient variant is this,
where again \type {last} is optional:

\starttyping[option=LUA]
function node.direct.naturalwidth (
    <t:direct> first,
    <t:direct> last
)
    return <t:integer> -- width
end
\stoptyping

More low level are the following helpers. They accept various kind of nodes
\type {hlist}, \type {vlist}, \type {unset}, \type {rule}, \type {glyph} or \type {glue}
(because these can have a leader).

\starttyping[option=LUA]
function node.direct.getwhd ( <t:direct> n )
    return
        <t:dimension>, -- width
        <t:dimension>, -- height
        <t:dimension>  -- depth
end
\stoptyping

In case of as glyph you can also get the expansion:

\starttyping[option=LUA]
function node.direct.getwhd ( <t:direct> n, <t:true> expansion )
    return
        <t:dimension>, -- width
        <t:dimension>, -- height
        <t:dimension>, -- depth
        <t:integer>    -- expansion
end
\stoptyping

The \type {getwidth} accepts even more node types: \type {hlist}, \type {vlist},
\type {unset}, \type {align}, \type {rule}, \type {glue}, \type {gluespec}, \type
{glyph}, \type {kern} and \type {math} (surround).

\starttyping[option=LUA]
function node.direct.getwidth ( <t:direct> n )
    return <t:dimension> -- width
end
\stoptyping

And for glyphs:

\starttyping[option=LUA]
function node.direct.getwidth ( <t:direct> n, <t:true> expansion )
    return
        <t:dimension>,-- width
        <t:dimension> -- expansion
end
\stoptyping

The getter for height operates on \type {hlist}, \type {vlist}, \type {unset},
\type {rule}, \type {insert} and \type {fence}.

\starttyping[option=LUA]
function node.direct.getheight ( <t:direct> n )
    return <t:dimension> -- height
end
\stoptyping

For the depth we have a different repertoire: \type {hlist}, \type {vlist}, \type
{unset}, \type {rule}, \type {insert}, \type {glyph} and \type {fence}.

\starttyping[option=LUA]
function node.direct.getdepth ( <t:direct> n )
    return <t:dimension> -- depth
end
\stoptyping

For \type {hlist}, \type {vlist}, \type {unset}, \type {rule}, \type
{insert_node:}, \type {glyph} and \type {fence} we can get the total of height
and depth:

\starttyping[option=LUA]
function node.direct.gettotal ( <t:direct> n )
    return <t:dimension> -- height + depth
end
\stoptyping

Only \type {hlist} and \type {vlist} have a (vertical or horizontal) shift:

\starttyping[option=LUA]
function node.direct.getshift ( <t:direct> n )
    return <t:dimension> -- shift
end
\stoptyping

This one is only valid for \type {glyph} and \type {kern} nodes:

\starttyping[option=LUA]
function node.direct.getexpansion ( <t:direct> n )
    return <t:dimension> -- expansion
end
\stoptyping

Before we move on we mention the setters:

\starttyping[option=LUA]
function node.direct.setwidth     ( <t:direct> n, <t:dimension> width     ) end
function node.direct.setheight    ( <t:direct> n, <t:dimension> height    ) end
function node.direct.setdepth     ( <t:direct> n, <t:dimension> depth     ) end
function node.direct.setshift     ( <t:direct> n, <t:dimension> shift     ) end
function node.direct.setexpansion ( <t:direct> n, <t:integer>   expansion ) end
\stoptyping

The combined one ignores values that are no number, so passing (e.g.) \type {nil}
or (nicer) \type {false} will retain the value.

\starttyping[option=LUA]
function nodedirect.setwhd (
    <t:direct>    node,
    <t:dimension> width,
    <t:dimension> height,
    <t:dimension> depth,
    -- no return values
end
\stoptyping

These \type {hlist} and \type {vlist} nodes (but others as well have) a field
called \type {list}:

\starttyping[option=LUA]
function node.direct.getlist ( <t:direct> b )
    return <t:direct> -- list
end

function node.direct.setlist ( <t:direct> b, <t:direct> list )
    -- nothing to return
end
\stoptyping

When a list is packages, glue is resolved and the list node gets its glue properties
set so that the backend can apply the stretch and shrink to the glue amount. There might
be situations where you want to do this explicitly, which is why we provide:

\starttyping[option=LUA]
function node.direct.freeze ( <t:direct> b )
    -- nothing to return
end
\stoptyping

In \LUAMETATEX\ we can handle nested marks, inserts and adjusts, and
pre and post material can get bound to a box. We can use these to access them:

\starttyping[option=LUA]
function node.direct.getpost ( <t:direct> b, <t:boolean> tailtoo )
    return
        <t:direct>, -- head
        <t:direct>  -- tail
end

function node.direct.getpre ( <t:direct> b, <t:boolean> tailtoo )
    return
        <t:direct>, -- head
        <t:direct>  -- tail
end
\stoptyping

and these to set them, although they are unlikely candidates for that.

\starttyping[option=LUA]
function node.direct.setpost ( <t:direct> b, <t:direct> | <t:nil> ) end
function node.direct.setpre  ( <t:direct> b, <t:direct> | <t:nil> ) end
\stoptyping

\stopsubsection

\startsubsection[title={Math}]

We start with the function that runs the internal \quote {mlist to hlist}
conversion that turns a the yet unprocessed math list into a horizontal list. The
interface is the same as for the callback callback {mlisttohlist}.

\starttyping[option=LUA]
function node.direct.mlisttohlist (
    <t:direct>  list,
    <t:string>  displaytype,
    <t:boolean> penalties
)
   <t:direct> -- result
end
\stoptyping

When you have a horizontal list with math you can locate the relevant portion
with:

\starttyping[option=LUA]
function node.direct.beginofmath ( <t:direct> n ) return <t:direct> end
function node.direct.endofmath   ( <t:direct> n ) return <t:direct> end
\stoptyping

You can for instance use these helpers to skip over math in case you're
processing text.

The math noads have a nucleus and scripts. In \LUAMETATEX\ we have the usual
super- and subscript but also prescripts and a primescript, so five scripts in
total so naturally we have getters for these:

\starttyping[option=LUA]
function node.direct.getnucleus ( <t:direct> n ) return <t:direct> | <t:nil> end
function node.direct.getprime   ( <t:direct> n ) return <t:direct> | <t:nil> end
function node.direct.getsup     ( <t:direct> n ) return <t:direct> | <t:nil> end
function node.direct.getsub     ( <t:direct> n ) return <t:direct> | <t:nil> end
function node.direct.getsuppre  ( <t:direct> n ) return <t:direct> | <t:nil> end
function node.direct.getsubpre  ( <t:direct> n ) return <t:direct> | <t:nil> end
\stoptyping

plus:

\starttyping[option=LUA]
function node.direct.getscripts ( <t:direct> n )
    return
        <t:direct>, -- primescript
        <t:direct>, -- superscript
        <t:direct>, -- subscript
        <t:direct>, -- superprescript
        <t:direct>  -- subprescript
end
\stoptyping

These are complemented by setters. When the second argument is not passes (or nil)
the field is reset.

\starttyping[option=LUA]
function node.direct.setnucleus ( <t:direct> n, <t:direct> nucleus        ) end
function node.direct.setprime   ( <t:direct> n, <t:direct> primescript    ) end
function node.direct.setsup     ( <t:direct> n, <t:direct> superscript    ) end
function node.direct.setsub     ( <t:direct> n, <t:direct> subscript      ) end
function node.direct.setsuppre  ( <t:direct> n, <t:direct> superprescript ) end
function node.direct.setsubpre  ( <t:direct> n, <t:direct> subprescript   ) end
\stoptyping

And of course:

\starttyping[option=LUA]
function node.direct.getscripts (
    <t:direct> primescript,
    <t:direct> superscript,
    <t:direct> subscript,
    <t:direct> superprescript,
    <t:direct> subprescript
)
    -- no return values
end
\stoptyping

In the discretionaries subsection we mention accessing pre, post and replace
fields. These functions can also be used for \type {choice} nodes. Discussing
this is currently beyond this manual.

\stopsubsection

\startsubsection[title={MVL}]

Some properties of the currently used main vertical list can be fetched with:

\starttyping[option=LUA]
function node.direct.getmvllist (
    -- currently no parameters
)
    return
        <t:direct>,  -- head
        <t:direct>,  -- tail
        <t:integer> -- mvl
end
\stoptyping

\stopsubsection

\startsubsection[title={Balancing}]

The \type {node.direct.vbalance} function will either disappear or get accompanied
by related helpers (mirroring primitives); it depends on what \CONTEXT\ needs.

Updating marks is done with the following set of helpers, that just call the code
that does the same before handing over content to the output routine:

\starttyping[option=LUA]
function nodes.direct.updatetopmarks ( )
    return <t:boolean> -- done
end

function nodes.direct.updatefirstmarks ( )
    return <t:boolean> -- done
end

function nodes.direct.updatefirstandbotmark ( <t:direct> box )
    -- no return value
end

function nodes.direct.updatemarks ( <t:direct> box )
    return <t:boolean> -- done
end
\stoptyping

\stopsubsection

\startsubsection[title={\SYNCTEX}]

You can set and query the \SYNCTEX\ fields, a file number aka tag and a line
number, for a \type {glue}, \type {kern}, \type {hlist}, \type {vlist}, \type
{rule} and \type {math} nodes as well as \type {glyph} nodes (although this last
one is not used in native \SYNCTEX).

\starttyping[option=LUA]
function node.direct.setsynctexfields ( <t:integer> fileid, <t:integer> line )
    -- no return values
end

function node.direct.getsynctexfields ( <t:direct> n )
    return
        <t:integer>, -- fileid
        <t:integer>  -- line
end
\stoptyping

Of course you need to know what you're doing as no checking on sane values takes
place. Also, the \SYNCTEX\ interpreter used in editors is rather peculiar and has
some assumptions (heuristics) and there are different incompatible versions
floating around. Even more important to notice is that the engine doesn't do
anything with this so support is upto \LUA.

\stopsubsection

\startsubsection[title={Two access models}]

Deep down in \TEX\ a node has a number which is a numeric entry in a memory
table. In fact, this model, where \TEX\ manages memory is real fast and one of
the reasons why plugging in callbacks that operate on nodes is quite fast too.
Each node gets a number that is in fact an index in the memory table and that
number often is reported when you print node related information. You go from
user data nodes and there numeric references and back with:

\starttyping[option=LUA]
function node.todirect ( <t:node>   n) return <t:direct> end
function node.tonode   ( <t:direct> d) return <t:node>   end
\stoptyping

The user data model is rather robust as it is a virtual interface with some
additional checking while the more direct access which uses the node numbers
directly. However, even with user data you can get into troubles when you free
nodes that are no longer allocated or mess up lists. If you apply \type
{tostring} to a node you see its internal (direct) number and id.

The userdata model provides key based access while the direct model always
accesses fields via functions:

\starttyping[option=LUA]
local c = nodeobject.char
local c = getfield(nodenumber,"char")
\stoptyping

If you use the direct model, even if you know that you deal with numbers, you
should not depend on that property but treat it as an abstraction just like
traditional nodes. In fact, the fact that we use a simple basic datatype has the
penalty that less checking can be done, but less checking is also the reason why
it's somewhat faster. An important aspect is that one cannot mix both methods,
but you can cast both models. So, multiplying a node number makes no sense.

So our advice is: use the indexed (table) approach when possible and investigate
the direct one when speed might be a real issue. For that reason \LUATEX\ also
provide the \type {get*} and \type {set*} functions in the top level node
namespace. There is a limited set of getters. When implementing this direct
approach the regular index by key variant was also optimized, so direct access
only makes sense when nodes are accessed millions of times (which happens in some
font processing for instance).

We're talking mostly of getters because setters are less important. Documents
have not that many content related nodes and setting many thousands of properties
is hardly a burden contrary to millions of consultations.

Normally you will access nodes like this:

\starttyping
local next = current.next
if next then
    -- do something
end
\stoptyping

Here \type {next} is not a real field, but a virtual one. Accessing it results in
a metatable method being called. In practice it boils down to looking up the node
type and based on the node type checking for the field name. In a worst case you
have a node type that sits at the end of the lookup list and a field that is last
in the lookup chain. However, in successive versions of \LUATEX\ these lookups
have been optimized and the most frequently accessed nodes and fields have a
higher priority.

In the direct namespace there are more helpers and most of them are accompanied
by setters. The getters and setters are clever enough to see what node is meant.
We don't deal with whatsit nodes: their fields are always accessed by name. It
doesn't make sense to add getters for all fields, we just identifier the most
likely candidates. In complex documents, many node and fields types never get
seen, or seen only a few times, but for instance glyphs are candidates for such
optimization.

In previous sections we only show the functions in the \type {node.direct}
namespace. The following functions are available in both \type {node} and \type
{node.direct}:

\startthreerows
\startluacode
    context.starttabulate { "|T|" }
    for k, v in table.sortedhash(node.direct) do
        if type(v) == "function" and node[k] then
            context.NC() context(k)
            context.NC() context.NR()
        end
    end
    context.stoptabulate()
\stopluacode
\stopthreerows

In \CONTEXT\ these are duplicated in \type {nodes.nuts} so that is the reference.
Quite some functions gets mapped onto the \type {nodes} namespace. In addition we
emulate some userdata functions and add some of our own. We show them here because
this manual takes \CONTEXT\ as reference.

\starttworows
\startluacode
    context.starttabulate { "|T|c|c|" }
        context.BC() context("node.direct")
        context.BC() context("node")
        context.BC() context("nodes")
        context.NC() context.NR()
        context.NC()
        context.NC()
        context.NC()
        context.NC() context.NR()
        -- todo: add a little space here
        for k, v in table.sortedhash(node.direct) do
            if type(v) == "function" then
                context.NC() context(k)
                context.NC() if node [k] then context("$⋆$") end
                context.NC() if nodes[k] then context("$⋆$") end
                context.NC() context.NR()
            end
        end
    context.stoptabulate()
\stopluacode
\stoptworows

The following functions are in the \CONTEXT\ \type {nodes} namespace but don't
come from the library. Again, we show them here because \CONTEXT\ is the
reference.

\starttworows
\startluacode
    context.starttabulate { "|T|c|c|" }
        context.BC() context("nodes")
        context.BC() context("nodes.nuts")
        context.BC() context("node")
        context.NC() context.NR()
        context.NC()
        context.NC()
        context.NC()
        context.NC() context.NR()
        for k, v in table.sortedhash(nodes) do
            if type(v) == "function" and not node.direct[k] then
                context.NC() context(k)
                context.NC() if nodes.nuts[k] then context("$⋆$") end
                context.NC() if node      [k] then context("$⋆$") end
                context.NC() context.NR()
            end
        end
    context.stoptabulate()
\stopluacode
\stoptworows

We have quite some helpers and some accept different node types. Here is the
repertoire:

\startluacode
    context.starttabulate { "|lT|plT|" }
    for name, usage in table.sortedhash(moduledata.repertoire) do
        context.NC() context(name)
        context.NC() context("% t",table.sortedkeys(usage))
        context.NC() context.NR()
    end
    context.stoptabulate()
\stopluacode

\stopsubsection

\startsubsection[title={Properties}]

Attributes are a convenient way to relate extra information to a node. You can
assign them at the \TEX\ end as well as at the \LUA\ end and consult them at the
\LUA\ end. One big advantage is that they obey grouping. They are linked lists
and normally checking for them is pretty efficient, even if you use a lot of
them. A macro package has to provide some way to manage these attributes at the
\TEX\ end because otherwise clashes in their usage can occur.

Each node also can have a properties table and you can assign values to this
table using the \type {setproperty} function and get properties using the \type
{getproperty} function. Managing properties is way more demanding than managing
attributes.

Take the following example:

\starttyping
\directlua {
    local n = node.new("glyph")

    node.setproperty(n,"foo")
    print(node.getproperty(n))

    node.setproperty(n,"bar")
    print(node.getproperty(n))

    node.free(n)
}
\stoptyping

This will print \type {foo} and \type {bar} which in itself is not that useful
when multiple mechanisms want to use this feature. A variant is:

\starttyping
\directlua {
    local n = node.new("glyph")

    node.setproperty(n,{ one = "foo", two = "bar" })
    print(node.getproperty(n).one)
    print(node.getproperty(n).two)

    node.free(n)
}
\stoptyping

This time we store two properties with the node. It really makes sense to have a
table as property because that way we can store more. But in order for that to
work well you need to do it this way:

\starttyping
\directlua {
    local n = node.new("glyph")

    local t = node.getproperty(n)

    if not t then
        t = { }
        node.setproperty(n,t)
    end
    t.one = "foo"
    t.two = "bar"

    print(node.getproperty(n).one)
    print(node.getproperty(n).two)

    node.free(n)
}
\stoptyping

Here our own properties will not overwrite other users properties unless of
course they use the same keys. So, eventually you will end up with something:

\starttyping
\directlua {
    local n = node.new("glyph")

    local t = node.getproperty(n)

    if not t then
        t = { }
        node.setproperty(n,t)
    end
    t.myself = { one = "foo", two = "bar" }

    print(node.getproperty(n).myself.one)
    print(node.getproperty(n).myself.two)

    node.free(n)
}
\stoptyping

This assumes that only you use \type {myself} as subtable. The possibilities are
endless but care is needed. For instance, the generic font handler that ships
with \CONTEXT\ uses the \type {injections} subtable and you should not mess with
that one!

There are a few helper functions that you normally should not touch as user: \typ
{getpropertiestable} and will give the table that stores properties (using
direct entries) and you can best not mess too much with that one either because
\LUAMETATEX\ itself will make sure that entries related to nodes will get wiped when
nodes get freed, so that the \LUA\ garbage collector can do its job. In fact, the
main reason why we have this mechanism is that it saves the user (or macro
package) some work. One can easily write a property mechanism in \LUA\ where
after a shipout properties gets cleaned up but it's not entirely trivial to make
sure that with each freed node also its properties get freed, due to the fact
that there can be nodes left over for a next page. And having a callback bound to
the node deallocator would add way to much overhead.

When we copy a node list that has a table as property, there are several
possibilities: we do the same as a new node, we copy the entry to the table in
properties (a reference), we do a deep copy of a table in the properties, we
create a new table and give it the original one as a metatable. After some
experiments (that also included timing) with these scenarios we decided that a
deep copy made no sense, nor did nilling. In the end both the shallow copy and
the metatable variant were both ok, although the second one is slower. The most
important aspect to keep in mind is that references to other nodes in properties
no longer can be valid for that copy. We could use two tables (one unique and one
shared) or metatables but that only complicates matters.

When defining a new node, we could already allocate a table but it is rather easy
to do that at the lua end e.g.\ using a metatable \type {__index} method. That
way it is under macro package control. When deleting a node, we could keep the
slot (e.g. setting it to false) but it could make memory consumption raise
unneeded when we have temporary large node lists and after that only small lists.
Both are not done because in the end this is what happens now: when a node is
copied, and it has a table as property, the new node will share that table. The
copy gets its own table with the original table as metatable.

A few more experiments were done. For instance: copy attributes to the properties
so that we have fast access at the \LUA\ end. In the end the overhead is not
compensated by speed and convenience, in fact, attributes are not that slow when
it comes to accessing them. So this was rejected.

Another experiment concerned a bitset in the node but again the gain compared to
attributes could be neglected and given the small amount of available bits it also
demands a pretty strong agreement over what bit represents what, and this is
unlikely to succeed in the \TEX\ community. It doesn't pay off.

Just in case one wonders why properties make sense: it is not so much speed that
we gain, but more convenience: storing all kinds of (temporary) data in attributes
is no fun and this mechanism makes sure that properties are cleaned up when a
node is freed. Also, the advantage of a more or less global properties table is
that we stay at the \LUA\ end. An alternative is to store a reference in the node
itself but that is complicated by the fact that the register has some limitations
(no numeric keys) and we also don't want to mess with it too much.

\stopsubsection

\stopsection

\stopdocument