1local info = {
2 version = 1.002,
3 comment = "scintilla lpeg lexer for pdf",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7}
8
9
10
11
12local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
13
14local lexer = require("scite-context-lexer")
15local context = lexer.context
16local patterns = context.patterns
17
18local token = lexer.token
19
20local pdflexer = lexer.new("pdf","scite-context-lexer-pdf")
21local whitespace = pdflexer.whitespace
22
23
24
25
26local anything = patterns.anything
27local space = patterns.space
28local spacing = patterns.spacing
29local nospacing = patterns.nospacing
30local anything = patterns.anything
31local restofline = patterns.restofline
32
33local t_whitespace = token(whitespace, spacing)
34local t_spacing = token("default", spacing)
35
36local t_rest = token("default", anything)
37
38local p_comment = P("%") * restofline
39local t_comment = token("comment", p_comment)
40
41
42
43local space = patterns.space
44local spacing = patterns.spacing
45local nospacing = patterns.nospacing
46local anything = patterns.anything
47local newline = patterns.eol
48local real = patterns.real
49local cardinal = patterns.cardinal
50local alpha = patterns.alpha
51
52local lparent = P("(")
53local rparent = P(")")
54local langle = P("<")
55local rangle = P(">")
56local escape = P("\\")
57local unicodetrigger = P("feff")
58
59local nametoken = 1 - space - S("<>/[]()")
60local name = P("/") * nametoken^1
61
62local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
63
64local t_spacing = token("default", spacing)
65local t_spaces = token("default", spacing)^0
66local t_rest = token("default", nospacing)
67
68local p_stream = P("stream")
69local p_endstream = P("endstream")
70local p_obj = P("obj")
71local p_endobj = P("endobj")
72local p_reference = P("R")
73
74local p_objectnumber = patterns.cardinal
75local p_comment = P("%") * (1-S("\n\r"))^0
76
77local t_string = token("quote", lparent)
78 * token("string", p_string)
79 * token("quote", rparent)
80local t_unicode = token("quote", langle)
81 * token("plain", unicodetrigger)
82 * token("string", (1-rangle)^1)
83 * token("quote", rangle)
84local t_whatsit = token("quote", langle)
85 * token("string", (1-rangle)^1)
86 * token("quote", rangle)
87local t_keyword = token("command", name)
88local t_constant = token("constant", name)
89local t_number = token("number", real)
90
91
92
93local t_reserved = token("number", P("true") + P("false") + P("null"))
94
95
96local t_reference = token("warning", cardinal)
97 * t_spacing
98 * token("warning", cardinal)
99 * t_spacing
100 * token("keyword", p_reference)
101
102local t_comment = token("comment", p_comment)
103
104local t_openobject = token("warning", p_objectnumber)
105 * t_spacing
106 * token("warning", p_objectnumber)
107 * t_spacing
108 * token("keyword", p_obj)
109
110
111
112local t_closeobject = token("keyword", p_endobj)
113
114local t_opendictionary = token("grouping", P("<<"))
115local t_closedictionary = token("grouping", P(">>"))
116
117local t_openarray = token("grouping", P("["))
118local t_closearray = token("grouping", P("]"))
119
120local t_stream = token("keyword", p_stream)
121 * token("text", (1 - p_endstream)^1)
122 * token("keyword", p_endstream)
123
124local t_other = t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit
125
126local t_dictionary = { "dictionary",
127 dictionary = t_opendictionary
128 * (t_spaces * t_keyword * t_spaces * V("whatever"))^0
129 * t_spaces
130 * t_closedictionary,
131 array = t_openarray
132 * (t_spaces * V("whatever"))^0
133 * t_spaces
134 * t_closearray,
135 whatever = V("dictionary")
136 + V("array")
137 + t_other,
138 }
139
140local t_object = { "object",
141 dictionary = t_dictionary.dictionary,
142 array = t_dictionary.array,
143 whatever = t_dictionary.whatever,
144 object = t_openobject
145 * t_spaces
146 * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + t_other)
147 * t_spaces
148 * t_closeobject,
149 number = t_number,
150 }
151
152
153
154
155
156
157
158local p_trailer = P("trailer")
159local t_trailer = token("keyword", p_trailer)
160 * t_spacing
161 * t_dictionary
162
163
164
165local p_startxref = P("startxref")
166local t_startxref = token("keyword", p_startxref)
167 * t_spacing
168 * token("number", cardinal)
169
170
171
172local p_xref = P("xref")
173local t_xref = token("keyword",p_xref)
174 * t_spacing
175 * token("number", cardinal)
176 * t_spacing
177 * token("number", cardinal)
178 * spacing
179
180
181
182local t_number = token("number", cardinal)
183 * t_spacing
184 * token("number", cardinal)
185 * t_spacing
186 * token("keyword", S("fn"))
187
188
189
190pdflexer._rules = {
191 { "whitespace", t_whitespace },
192 { "object", t_object },
193 { "comment", t_comment },
194 { "trailer", t_trailer },
195 { "startxref", t_startxref },
196 { "xref", t_xref },
197 { "number", t_number },
198 { "rest", t_rest },
199}
200
201pdflexer._tokenstyles = context.styleset
202
203
204
205
206
207pdflexer._foldpattern = p_obj + p_endobj + p_stream + p_endstream
208
209pdflexer._foldsymbols = {
210 ["keyword"] = {
211 ["obj"] = 1,
212 ["endobj"] = -1,
213 ["stream"] = 1,
214 ["endstream"] = -1,
215 },
216}
217
218return pdflexer
219 |