1local info = {
2 version = 1.002,
3 comment = "scintilla lpeg lexer for xml",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7}
8
9
10
11
12
13
14
15local global, string, table, lpeg = _G, string, table, lpeg
16local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
17local type = type
18local match, find = string.match, string.find
19
20local lexers = require("scite-context-lexer")
21
22local patterns = lexers.patterns
23local token = lexers.token
24
25local xmllexer = lexers.new("xml","scite-context-lexer-xml")
26local xmlwhitespace = xmllexer.whitespace
27
28local xmlcommentlexer = lexers.load("scite-context-lexer-xml-comment")
29local xmlcdatalexer = lexers.load("scite-context-lexer-xml-cdata")
30local xmlscriptlexer = lexers.load("scite-context-lexer-xml-script")
31local lualexer = lexers.load("scite-context-lexer-lua")
32
33local space = patterns.space
34local any = patterns.any
35
36local dquote = P('"')
37local squote = P("'")
38local colon = P(":")
39local semicolon = P(";")
40local equal = P("=")
41local ampersand = P("&")
42
43
44
45
46
47
48
49
50
51
52
53
54
55local name = (
56 R("az","AZ","09")
57 + S("_-.")
58 + patterns.utf8two + patterns.utf8three + patterns.utf8four
59 )^1
60local openbegin = P("<")
61local openend = P("</")
62local closebegin = P("/>") + P(">")
63local closeend = P(">")
64local opencomment = P("<!--")
65local closecomment = P("-->")
66local openinstruction = P("<?")
67local closeinstruction = P("?>")
68local opencdata = P("<![CDATA[")
69local closecdata = P("]]>")
70local opendoctype = P("<!DOCTYPE")
71local closedoctype = P("]>") + P(">")
72local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend
73local closescript = openend * (P("script") + P("SCRIPT")) * closeend
74local charpattern = lexers.helpers.charpattern
75
76local openlua = "<?lua"
77local closelua = "?>"
78
79
80
81
82
83
84
85
86local entity = ampersand * (1-semicolon)^1 * semicolon
87
88local wordtoken = patterns.wordtoken
89local iwordtoken = patterns.iwordtoken
90local wordpattern = patterns.wordpattern
91local iwordpattern = patterns.iwordpattern
92local invisibles = patterns.invisibles
93local styleofword = lexers.styleofword
94local setwordlist = lexers.setwordlist
95local validwords = false
96local validminimum = 3
97
98
99
100
101
102xmllexer.preamble = Cmt(P("<?xml " + P(true)), function(input,i)
103 validwords = false
104 validminimum = 3
105 local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>")
106 if language then
107 validwords, validminimum = setwordlist(language)
108 end
109 return false
110end)
111
112local t_word =
113 C(iwordpattern) * Cp() / function(s,p) return styleofword(validwords,validminimum,s,p) end
114
115local t_rest =
116 token("default", charpattern)
117
118local t_text =
119 token("default", (charpattern-S("<>&")-space)^1)
120
121local t_spacing =
122 token(xmlwhitespace, space^1)
123
124local t_optionalwhitespace =
125 token("default", space^1)^0
126
127local t_localspacing =
128 token("default", space^1)
129
130
131
132
133
134
135local t_sstring =
136 token("quote",dquote)
137 * token("string",(1-dquote)^0)
138 * token("quote",dquote)
139
140local t_dstring =
141 token("quote",squote)
142 * token("string",(1-squote)^0)
143 * token("quote",squote)
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168local t_docstr = t_dstring + t_sstring
169
170local t_docent = token("command",P("<!ENTITY"))
171 * t_optionalwhitespace
172 * token("keyword",name)
173 * t_optionalwhitespace
174 * (
175 (
176 token("constant",P("SYSTEM"))
177 * t_optionalwhitespace
178 * t_docstr
179 * t_optionalwhitespace
180 * token("constant",P("NDATA"))
181 * t_optionalwhitespace
182 * token("keyword",name)
183 ) + (
184 token("constant",P("PUBLIC"))
185 * t_optionalwhitespace
186 * t_docstr
187 ) + (
188 t_docstr
189 )
190 )
191 * t_optionalwhitespace
192 * token("command",P(">"))
193
194local t_docele = token("command",P("<!ELEMENT"))
195 * t_optionalwhitespace
196 * token("keyword",name)
197 * t_optionalwhitespace
198 * token("command",P("("))
199 * (
200 t_localspacing
201 + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY"))
202 + token("text",P(","))
203 + token("comment",(1-S(",)"))^1)
204 )^1
205 * token("command",P(")"))
206 * t_optionalwhitespace
207 * token("command",P(">"))
208
209local t_docset = token("command",P("["))
210 * t_optionalwhitespace
211 * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0))
212 * t_optionalwhitespace
213 * token("command",P("]"))
214
215local t_doctype = token("command",P("<!DOCTYPE"))
216 * t_optionalwhitespace
217 * token("keyword",name)
218 * t_optionalwhitespace
219 * (
220 (
221 token("constant",P("PUBLIC"))
222 * t_optionalwhitespace
223 * t_docstr
224 * t_optionalwhitespace
225 * t_docstr
226 * t_optionalwhitespace
227 ) + (
228 token("constant",P("SYSTEM"))
229 * t_optionalwhitespace
230 * t_docstr
231 * t_optionalwhitespace
232 )
233 )^-1
234 * t_docset^-1
235 * t_optionalwhitespace
236 * token("command",P(">"))
237
238lexers.embed(xmllexer, lualexer, token("command", openlua), token("command", closelua))
239lexers.embed(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment))
240lexers.embed(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata))
241lexers.embed(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript))
242
243
244
245
246
247
248
249
250
251local t_name =
252 token("plain",name * colon)^-1
253 * token("keyword",name)
254
255
256
257
258
259
260
261
262
263local t_key =
264 token("plain",name * colon)^-1
265 * token("constant",name)
266
267local t_attributes = (
268 t_optionalwhitespace
269 * t_key
270 * t_optionalwhitespace
271 * token("plain",equal)
272 * t_optionalwhitespace
273 * (t_dstring + t_sstring)
274 * t_optionalwhitespace
275)^0
276
277local t_open =
278 token("keyword",openbegin)
279 * (
280 t_name
281 * t_optionalwhitespace
282 * t_attributes
283 * token("keyword",closebegin)
284 +
285 token("error",(1-closebegin)^1)
286 )
287
288local t_close =
289 token("keyword",openend)
290 * (
291 t_name
292 * t_optionalwhitespace
293 * token("keyword",closeend)
294 +
295 token("error",(1-closeend)^1)
296 )
297
298local t_entity =
299 token("constant",entity)
300
301local t_instruction =
302 token("command",openinstruction * P("xml"))
303 * t_optionalwhitespace
304 * t_attributes
305 * t_optionalwhitespace
306 * token("command",closeinstruction)
307 + token("command",openinstruction * name)
308 * token("default",(1-closeinstruction)^1)
309 * token("command",closeinstruction)
310
311local t_invisible =
312 token("invisible",invisibles^1)
313
314xmllexer.rules = {
315 { "whitespace", t_spacing },
316 { "word", t_word },
317
318
319
320 { "doctype", t_doctype },
321 { "instruction", t_instruction },
322 { "close", t_close },
323 { "open", t_open },
324 { "entity", t_entity },
325 { "invisible", t_invisible },
326 { "rest", t_rest },
327}
328
329xmllexer.folding = {
330 ["</"] = { ["keyword"] = -1 },
331 ["/>"] = { ["keyword"] = -1 },
332 ["<"] = { ["keyword"] = 1 },
333 ["<?"] = { ["command"] = 1 },
334 ["<!--"] = { ["command"] = 1 },
335 ["?>"] = { ["command"] = -1 },
336 ["-->"] = { ["command"] = -1 },
337 [">"] = { ["command"] = -1 },
338}
339
340return xmllexer
341 |