scite-context-lexer-pdf.lua /size: 8773 b    last modification: 2020-07-01 14:35
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for pdf",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9-- pdf is normally static .. i.e. not edited so we don't really
10-- need embedded lexers.
11
12local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
13
14local lexer             = require("scite-context-lexer")
15local context           = lexer.context
16local patterns          = context.patterns
17
18local token             = lexer.token
19
20local pdflexer          = lexer.new("pdf","scite-context-lexer-pdf")
21local whitespace        = pdflexer.whitespace
22
23----- pdfobjectlexer    = lexer.load("scite-context-lexer-pdf-object")
24----- pdfxreflexer      = lexer.load("scite-context-lexer-pdf-xref")
25
26local anything          = patterns.anything
27local space             = patterns.space
28local spacing           = patterns.spacing
29local nospacing         = patterns.nospacing
30local anything          = patterns.anything
31local restofline        = patterns.restofline
32
33local t_whitespace      = token(whitespace, spacing)
34local t_spacing         = token("default",  spacing)
35----- t_rest            = token("default",  nospacing)
36local t_rest            = token("default",  anything)
37
38local p_comment         = P("%") * restofline
39local t_comment         = token("comment", p_comment)
40
41-- whatever
42
43local space             = patterns.space
44local spacing           = patterns.spacing
45local nospacing         = patterns.nospacing
46local anything          = patterns.anything
47local newline           = patterns.eol
48local real              = patterns.real
49local cardinal          = patterns.cardinal
50local alpha             = patterns.alpha
51
52local lparent           = P("(")
53local rparent           = P(")")
54local langle            = P("<")
55local rangle            = P(">")
56local escape            = P("\\")
57local unicodetrigger    = P("feff")
58
59local nametoken         = 1 - space - S("<>/[]()")
60local name              = P("/") * nametoken^1
61
62local p_string          = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
63
64local t_spacing         = token("default", spacing)
65local t_spaces          = token("default", spacing)^0
66local t_rest            = token("default", nospacing) -- anything
67
68local p_stream          = P("stream")
69local p_endstream       = P("endstream")
70local p_obj             = P("obj")
71local p_endobj          = P("endobj")
72local p_reference       = P("R")
73
74local p_objectnumber    = patterns.cardinal
75local p_comment         = P("%") * (1-S("\n\r"))^0
76
77local t_string          = token("quote",    lparent)
78                        * token("string",   p_string)
79                        * token("quote",    rparent)
80local t_unicode         = token("quote",    langle)
81                        * token("plain",    unicodetrigger)
82                        * token("string",   (1-rangle)^1)
83                        * token("quote",    rangle)
84local t_whatsit         = token("quote",    langle)
85                        * token("string",   (1-rangle)^1)
86                        * token("quote",    rangle)
87local t_keyword         = token("command",  name)
88local t_constant        = token("constant", name)
89local t_number          = token("number",   real)
90--    t_reference       = token("number",   cardinal)
91--                      * t_spacing
92--                      * token("number",   cardinal)
93local t_reserved        = token("number",   P("true") + P("false") + P("null"))
94--    t_reference       = token("warning",  cardinal * spacing * cardinal * spacing)
95--                      * token("keyword",  p_reference)
96local t_reference       = token("warning",  cardinal)
97                        * t_spacing
98                        * token("warning",  cardinal)
99                        * t_spacing
100                        * token("keyword",  p_reference)
101
102local t_comment         = token("comment",  p_comment)
103
104local t_openobject      = token("warning",  p_objectnumber)
105                        * t_spacing
106                        * token("warning",  p_objectnumber)
107                        * t_spacing
108                        * token("keyword",  p_obj)
109--    t_openobject      = token("warning",  p_objectnumber * spacing)
110--                      * token("warning",  p_objectnumber * spacing)
111--                      * token("keyword",  p_obj)
112local t_closeobject     = token("keyword",  p_endobj)
113
114local t_opendictionary  = token("grouping", P("<<"))
115local t_closedictionary = token("grouping", P(">>"))
116
117local t_openarray       = token("grouping", P("["))
118local t_closearray      = token("grouping", P("]"))
119
120local t_stream          = token("keyword", p_stream)
121                        * token("text",    (1 - p_endstream)^1)
122                        * token("keyword", p_endstream)
123
124local t_other           = t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit
125
126local t_dictionary      = { "dictionary",
127                            dictionary = t_opendictionary
128                                       * (t_spaces * t_keyword * t_spaces * V("whatever"))^0
129                                       * t_spaces
130                                       * t_closedictionary,
131                            array      = t_openarray
132                                       * (t_spaces * V("whatever"))^0
133                                       * t_spaces
134                                       * t_closearray,
135                            whatever   = V("dictionary")
136                                       + V("array")
137                                       + t_other,
138                        }
139
140local t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
141                            dictionary = t_dictionary.dictionary,
142                            array      = t_dictionary.array,
143                            whatever   = t_dictionary.whatever,
144                            object     = t_openobject
145                                       * t_spaces
146                                       * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + t_other)
147                                       * t_spaces
148                                       * t_closeobject,
149                            number     = t_number,
150                        }
151
152-- objects ... sometimes NUL characters play havoc ... and in xref we have
153-- issues with embedded lexers that have spaces in the start and stop
154-- conditions and this cannot be handled well either ... so, an imperfect
155-- solution ... but anyway, there is not that much that can end up in
156-- the root of the tree see we're sort of safe
157
158local p_trailer         = P("trailer")
159local t_trailer         = token("keyword", p_trailer)
160                        * t_spacing
161                        * t_dictionary
162--    t_trailer         = token("keyword", p_trailer * spacing)
163--                      * t_dictionary
164
165local p_startxref       = P("startxref")
166local t_startxref       = token("keyword", p_startxref)
167                        * t_spacing
168                        * token("number", cardinal)
169--    t_startxref       = token("keyword", p_startxref * spacing)
170--                      * token("number", cardinal)
171
172local p_xref            = P("xref")
173local t_xref            = token("keyword",p_xref)
174                        * t_spacing
175                        * token("number", cardinal)
176                        * t_spacing
177                        * token("number", cardinal)
178                        * spacing
179--    t_xref            = token("keyword",p_xref)
180--                      * token("number", spacing * cardinal * spacing * cardinal * spacing)
181
182local t_number          = token("number", cardinal)
183                        * t_spacing
184                        * token("number", cardinal)
185                        * t_spacing
186                        * token("keyword", S("fn"))
187--    t_number          = token("number", cardinal * spacing * cardinal * spacing)
188--                      * token("keyword", S("fn"))
189
190pdflexer._rules = {
191    { "whitespace", t_whitespace },
192    { "object",     t_object     },
193    { "comment",    t_comment    },
194    { "trailer",    t_trailer    },
195    { "startxref",  t_startxref  },
196    { "xref",       t_xref       },
197    { "number",     t_number     },
198    { "rest",       t_rest       },
199}
200
201pdflexer._tokenstyles = context.styleset
202
203-- lexer.inspect(pdflexer)
204
205-- collapser: obj endobj stream endstream
206
207pdflexer._foldpattern = p_obj + p_endobj + p_stream + p_endstream
208
209pdflexer._foldsymbols = {
210    ["keyword"] = {
211        ["obj"]       =  1,
212        ["endobj"]    = -1,
213        ["stream"]    =  1,
214        ["endstream"] = -1,
215    },
216}
217
218return pdflexer
219