1local info = {
2 version = 1.002,
3 comment = "scintilla lpeg lexer for xml",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7}
8
9
10
11
12
13
14
15local global, string, table, lpeg = _G, string, table, lpeg
16local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
17local type = type
18local match, find = string.match, string.find
19
20local lexers = require("scite-context-lexer")
21
22local patterns = lexers.patterns
23local token = lexers.token
24
25local xmllexer = lexers.new("xml","scite-context-lexer-xml")
26local xmlwhitespace = xmllexer.whitespace
27
28local xmlcommentlexer = lexers.load("scite-context-lexer-xml-comment")
29local xmlcdatalexer = lexers.load("scite-context-lexer-xml-cdata")
30local xmlscriptlexer = lexers.load("scite-context-lexer-xml-script")
31local lualexer = lexers.load("scite-context-lexer-lua")
32
33
34local space = patterns.space
35local any = patterns.any
36
37local dquote = P('"')
38local squote = P("'")
39local colon = P(":")
40local semicolon = P(";")
41local equal = P("=")
42local ampersand = P("&")
43
44
45
46
47
48
49
50
51
52
53
54
55
56local name = (
57 R("az","AZ","09")
58 + S("_-.")
59 + patterns.utf8two + patterns.utf8three + patterns.utf8four
60 )^1
61local openbegin = P("<")
62local openend = P("</")
63local closebegin = P("/>") + P(">")
64local closeend = P(">")
65local opencomment = P("<!--")
66local closecomment = P("-->")
67local openinstruction = P("<?")
68local closeinstruction = P("?>")
69local opencdata = P("<![CDATA[")
70local closecdata = P("]]>")
71local opendoctype = P("<!DOCTYPE")
72local closedoctype = P("]>") + P(">")
73local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend
74local closescript = openend * (P("script") + P("SCRIPT")) * closeend
75
76local openlua = "<?lua"
77local closelua = "?>"
78
79
80
81
82
83
84
85
86local entity = ampersand * (1-semicolon)^1 * semicolon
87
88local utfchar = lexers.helpers.utfchar
89local wordtoken = patterns.wordtoken
90local iwordtoken = patterns.iwordtoken
91local wordpattern = patterns.wordpattern
92local iwordpattern = patterns.iwordpattern
93local invisibles = patterns.invisibles
94local styleofword = lexers.styleofword
95local setwordlist = lexers.setwordlist
96local validwords = false
97local validminimum = 3
98
99
100
101
102
103xmllexer.preamble = Cmt(P("<?xml " + P(true)), function(input,i)
104 validwords = false
105 validminimum = 3
106 local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>")
107 if language then
108 validwords, validminimum = setwordlist(language)
109 end
110 return false
111end)
112
113local t_word =
114 C(iwordpattern) * Cp() / function(s,p) return styleofword(validwords,validminimum,s,p) end
115
116local t_rest =
117 token("default", any)
118
119local t_text =
120 token("default", (1-S("<>&")-space)^1)
121
122local t_spacing =
123 token(xmlwhitespace, space^1)
124
125local t_optionalwhitespace =
126 token("default", space^1)^0
127
128local t_localspacing =
129 token("default", space^1)
130
131
132
133
134
135
136local t_sstring =
137 token("quote",dquote)
138 * token("string",(1-dquote)^0)
139 * token("quote",dquote)
140
141local t_dstring =
142 token("quote",squote)
143 * token("string",(1-squote)^0)
144 * token("quote",squote)
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169local t_docstr = t_dstring + t_sstring
170
171local t_docent = token("command",P("<!ENTITY"))
172 * t_optionalwhitespace
173 * token("keyword",name)
174 * t_optionalwhitespace
175 * (
176 (
177 token("constant",P("SYSTEM"))
178 * t_optionalwhitespace
179 * t_docstr
180 * t_optionalwhitespace
181 * token("constant",P("NDATA"))
182 * t_optionalwhitespace
183 * token("keyword",name)
184 ) + (
185 token("constant",P("PUBLIC"))
186 * t_optionalwhitespace
187 * t_docstr
188 ) + (
189 t_docstr
190 )
191 )
192 * t_optionalwhitespace
193 * token("command",P(">"))
194
195local t_docele = token("command",P("<!ELEMENT"))
196 * t_optionalwhitespace
197 * token("keyword",name)
198 * t_optionalwhitespace
199 * token("command",P("("))
200 * (
201 t_localspacing
202 + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY"))
203 + token("text",P(","))
204 + token("comment",(1-S(",)"))^1)
205 )^1
206 * token("command",P(")"))
207 * t_optionalwhitespace
208 * token("command",P(">"))
209
210local t_docset = token("command",P("["))
211 * t_optionalwhitespace
212 * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0))
213 * t_optionalwhitespace
214 * token("command",P("]"))
215
216local t_doctype = token("command",P("<!DOCTYPE"))
217 * t_optionalwhitespace
218 * token("keyword",name)
219 * t_optionalwhitespace
220 * (
221 (
222 token("constant",P("PUBLIC"))
223 * t_optionalwhitespace
224 * t_docstr
225 * t_optionalwhitespace
226 * t_docstr
227 * t_optionalwhitespace
228 ) + (
229 token("constant",P("SYSTEM"))
230 * t_optionalwhitespace
231 * t_docstr
232 * t_optionalwhitespace
233 )
234 )^-1
235 * t_docset^-1
236 * t_optionalwhitespace
237 * token("command",P(">"))
238
239lexers.embed(xmllexer, lualexer, token("command", openlua), token("command", closelua))
240lexers.embed(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment))
241lexers.embed(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata))
242lexers.embed(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript))
243
244
245
246
247
248
249
250
251
252local t_name =
253 token("plain",name * colon)^-1
254 * token("keyword",name)
255
256
257
258
259
260
261
262
263
264local t_key =
265 token("plain",name * colon)^-1
266 * token("constant",name)
267
268local t_attributes = (
269 t_optionalwhitespace
270 * t_key
271 * t_optionalwhitespace
272 * token("plain",equal)
273 * t_optionalwhitespace
274 * (t_dstring + t_sstring)
275 * t_optionalwhitespace
276)^0
277
278local t_open =
279 token("keyword",openbegin)
280 * (
281 t_name
282 * t_optionalwhitespace
283 * t_attributes
284 * token("keyword",closebegin)
285 +
286 token("error",(1-closebegin)^1)
287 )
288
289local t_close =
290 token("keyword",openend)
291 * (
292 t_name
293 * t_optionalwhitespace
294 * token("keyword",closeend)
295 +
296 token("error",(1-closeend)^1)
297 )
298
299local t_entity =
300 token("constant",entity)
301
302local t_instruction =
303 token("command",openinstruction * P("xml"))
304 * t_optionalwhitespace
305 * t_attributes
306 * t_optionalwhitespace
307 * token("command",closeinstruction)
308 + token("command",openinstruction * name)
309 * token("default",(1-closeinstruction)^1)
310 * token("command",closeinstruction)
311
312local t_invisible =
313 token("invisible",invisibles^1)
314
315xmllexer.rules = {
316 { "whitespace", t_spacing },
317 { "word", t_word },
318
319
320
321 { "doctype", t_doctype },
322 { "instruction", t_instruction },
323 { "close", t_close },
324 { "open", t_open },
325 { "entity", t_entity },
326 { "invisible", t_invisible },
327 { "rest", t_rest },
328}
329
330xmllexer.folding = {
331 ["</"] = { ["keyword"] = -1 },
332 ["/>"] = { ["keyword"] = -1 },
333 ["<"] = { ["keyword"] = 1 },
334 ["<?"] = { ["command"] = 1 },
335 ["<!--"] = { ["command"] = 1 },
336 ["?>"] = { ["command"] = -1 },
337 ["-->"] = { ["command"] = -1 },
338 [">"] = { ["command"] = -1 },
339}
340
341return xmllexer
342 |