scite-context-lexer-pdf.lua /size: 8650 b    last modification: 2021-10-28 13:49
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for pdf",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9-- pdf is normally static .. i.e. not edited so we don't really
10-- need embedded lexers.
11
12local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
13
14local lexers            = require("scite-context-lexer")
15
16local patterns          = lexers.patterns
17local token             = lexers.token
18
19local pdflexer          = lexers.new("pdf","scite-context-lexer-pdf")
20local pdfwhitespace     = pdflexer.whitespace
21
22----- pdfobjectlexer    = lexers.load("scite-context-lexer-pdf-object")
23----- pdfxreflexer      = lexers.load("scite-context-lexer-pdf-xref")
24
25local anything          = patterns.anything
26local space             = patterns.space
27local spacing           = patterns.spacing
28local nospacing         = patterns.nospacing
29local anything          = patterns.anything
30local restofline        = patterns.restofline
31
32local t_whitespace      = token(pdfwhitespace, spacing)
33local t_spacing         = token("default", spacing)
34----- t_rest            = token("default", nospacing)
35local t_rest            = token("default", anything)
36
37local p_comment         = P("%") * restofline
38local t_comment         = token("comment", p_comment)
39
40-- whatever
41
42local space             = patterns.space
43local spacing           = patterns.spacing
44local nospacing         = patterns.nospacing
45local anything          = patterns.anything
46local newline           = patterns.eol
47local real              = patterns.real
48local cardinal          = patterns.cardinal
49local alpha             = patterns.alpha
50
51local lparent           = P("(")
52local rparent           = P(")")
53local langle            = P("<")
54local rangle            = P(">")
55local escape            = P("\\")
56local unicodetrigger    = P("feff")
57
58local nametoken         = 1 - space - S("<>/[]()")
59local name              = P("/") * nametoken^1
60
61local p_string          = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
62
63local t_spacing         = token("default", spacing)
64local t_spaces          = token("default", spacing)^0
65local t_rest            = token("default", nospacing) -- anything
66
67local p_stream          = P("stream")
68local p_endstream       = P("endstream")
69local p_obj             = P("obj")
70local p_endobj          = P("endobj")
71local p_reference       = P("R")
72
73local p_objectnumber    = patterns.cardinal
74local p_comment         = P("%") * (1-S("\n\r"))^0
75
76local t_string          = token("quote",    lparent)
77                        * token("string",   p_string)
78                        * token("quote",    rparent)
79local t_unicode         = token("quote",    langle)
80                        * token("plain",    unicodetrigger)
81                        * token("string",   (1-rangle)^1)
82                        * token("quote",    rangle)
83local t_whatsit         = token("quote",    langle)
84                        * token("string",   (1-rangle)^1)
85                        * token("quote",    rangle)
86local t_keyword         = token("command",  name)
87local t_constant        = token("constant", name)
88local t_number          = token("number",   real)
89--    t_reference       = token("number",   cardinal)
90--                      * t_spacing
91--                      * token("number",   cardinal)
92local t_reserved        = token("number",   P("true") + P("false") + P("null"))
93--    t_reference       = token("warning",  cardinal * spacing * cardinal * spacing)
94--                      * token("keyword",  p_reference)
95local t_reference       = token("warning",  cardinal)
96                        * t_spacing
97                        * token("warning",  cardinal)
98                        * t_spacing
99                        * token("keyword",  p_reference)
100
101local t_comment         = token("comment",  p_comment)
102
103local t_openobject      = token("warning",  p_objectnumber)
104                        * t_spacing
105                        * token("warning",  p_objectnumber)
106                        * t_spacing
107                        * token("keyword",  p_obj)
108--    t_openobject      = token("warning",  p_objectnumber * spacing)
109--                      * token("warning",  p_objectnumber * spacing)
110--                      * token("keyword",  p_obj)
111local t_closeobject     = token("keyword",  p_endobj)
112
113local t_opendictionary  = token("grouping", P("<<"))
114local t_closedictionary = token("grouping", P(">>"))
115
116local t_openarray       = token("grouping", P("["))
117local t_closearray      = token("grouping", P("]"))
118
119local t_stream          = token("keyword", p_stream)
120                        * token("text",    (1 - p_endstream)^1)
121                        * token("keyword", p_endstream)
122
123local t_other           = t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit
124
125local t_dictionary      = { "dictionary",
126                            dictionary = t_opendictionary
127                                       * (t_spaces * t_keyword * t_spaces * V("whatever"))^0
128                                       * t_spaces
129                                       * t_closedictionary,
130                            array      = t_openarray
131                                       * (t_spaces * V("whatever"))^0
132                                       * t_spaces
133                                       * t_closearray,
134                            whatever   = V("dictionary")
135                                       + V("array")
136                                       + t_other,
137                        }
138
139local t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
140                            dictionary = t_dictionary.dictionary,
141                            array      = t_dictionary.array,
142                            whatever   = t_dictionary.whatever,
143                            object     = t_openobject
144                                       * t_spaces
145                                       * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + t_other)
146                                       * t_spaces
147                                       * t_closeobject,
148                            number     = t_number,
149                        }
150
151-- objects ... sometimes NUL characters play havoc ... and in xref we have
152-- issues with embedded lexers that have spaces in the start and stop
153-- conditions and this cannot be handled well either ... so, an imperfect
154-- solution ... but anyway, there is not that much that can end up in
155-- the root of the tree see we're sort of safe
156
157local p_trailer         = P("trailer")
158local t_trailer         = token("keyword", p_trailer)
159                        * t_spacing
160                        * t_dictionary
161--    t_trailer         = token("keyword", p_trailer * spacing)
162--                      * t_dictionary
163
164local p_startxref       = P("startxref")
165local t_startxref       = token("keyword", p_startxref)
166                        * t_spacing
167                        * token("number", cardinal)
168--    t_startxref       = token("keyword", p_startxref * spacing)
169--                      * token("number", cardinal)
170
171local p_xref            = P("xref")
172local t_xref            = token("keyword",p_xref)
173                        * t_spacing
174                        * token("number", cardinal)
175                        * t_spacing
176                        * token("number", cardinal)
177                        * spacing
178--    t_xref            = token("keyword",p_xref)
179--                      * token("number", spacing * cardinal * spacing * cardinal * spacing)
180
181local t_number          = token("number", cardinal)
182                        * t_spacing
183                        * token("number", cardinal)
184                        * t_spacing
185                        * token("keyword", S("fn"))
186--    t_number          = token("number", cardinal * spacing * cardinal * spacing)
187--                      * token("keyword", S("fn"))
188
189pdflexer.rules = {
190    { "whitespace", t_whitespace },
191    { "object",     t_object     },
192    { "comment",    t_comment    },
193    { "trailer",    t_trailer    },
194    { "startxref",  t_startxref  },
195    { "xref",       t_xref       },
196    { "number",     t_number     },
197    { "rest",       t_rest       },
198}
199
200-- lexer.inspect(pdflexer)
201
202-- collapser: obj endobj stream endstream
203
204pdflexer.folding = {
205    ["obj"]       = { ["keyword"] =  1 },
206    ["endobj"]    = { ["keyword"] = -1 },
207    ["stream"]    = { ["keyword"] =  1 },
208    ["endstream"] = { ["keyword"] = -1 },
209}
210
211return pdflexer
212