1local info = {
2 version = 1.002,
3 comment = "scintilla lpeg lexer for xml",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7}
8
9
10
11
12
13
14
15local global, string, table, lpeg = _G, string, table, lpeg
16local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
17local type = type
18local match, find = string.match, string.find
19
20local lexer = require("scite-context-lexer")
21local context = lexer.context
22local patterns = context.patterns
23
24local token = lexer.token
25local exact_match = lexer.exact_match
26
27local xmllexer = lexer.new("xml","scite-context-lexer-xml")
28local whitespace = xmllexer.whitespace
29
30local xmlcommentlexer = lexer.load("scite-context-lexer-xml-comment")
31local xmlcdatalexer = lexer.load("scite-context-lexer-xml-cdata")
32local xmlscriptlexer = lexer.load("scite-context-lexer-xml-script")
33local lualexer = lexer.load("scite-context-lexer-lua")
34
35local space = patterns.space
36local any = patterns.any
37
38local dquote = P('"')
39local squote = P("'")
40local colon = P(":")
41local semicolon = P(";")
42local equal = P("=")
43local ampersand = P("&")
44
45local name = (R("az","AZ","09") + S("_-."))^1
46local openbegin = P("<")
47local openend = P("</")
48local closebegin = P("/>") + P(">")
49local closeend = P(">")
50local opencomment = P("<!--")
51local closecomment = P("-->")
52local openinstruction = P("<?")
53local closeinstruction = P("?>")
54local opencdata = P("<![CDATA[")
55local closecdata = P("]]>")
56local opendoctype = P("<!DOCTYPE")
57local closedoctype = P("]>") + P(">")
58local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend
59local closescript = openend * (P("script") + P("SCRIPT")) * closeend
60
61local openlua = "<?lua"
62local closelua = "?>"
63
64
65
66
67
68
69
70
71local entity = ampersand * (1-semicolon)^1 * semicolon
72
73local utfchar = context.utfchar
74local wordtoken = context.patterns.wordtoken
75local iwordtoken = context.patterns.iwordtoken
76local wordpattern = context.patterns.wordpattern
77local iwordpattern = context.patterns.iwordpattern
78local invisibles = context.patterns.invisibles
79local checkedword = context.checkedword
80local styleofword = context.styleofword
81local setwordlist = context.setwordlist
82local validwords = false
83local validminimum = 3
84
85
86
87
88
89local t_preamble = Cmt(P("<?xml "), function(input,i,_)
90 if i < 200 then
91 validwords, validminimum = false, 3
92 local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>")
93
94
95
96 if language then
97 validwords, validminimum = setwordlist(language)
98 end
99 end
100 return false
101end)
102
103local t_word =
104
105 iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp()
106
107local t_rest =
108 token("default", any)
109
110local t_text =
111 token("default", (1-S("<>&")-space)^1)
112
113local t_spacing =
114 token(whitespace, space^1)
115
116local t_optionalwhitespace =
117 token("default", space^1)^0
118
119local t_localspacing =
120 token("default", space^1)
121
122
123
124
125
126
127local t_sstring =
128 token("quote",dquote)
129 * token("string",(1-dquote)^0)
130 * token("quote",dquote)
131
132local t_dstring =
133 token("quote",squote)
134 * token("string",(1-squote)^0)
135 * token("quote",squote)
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160local t_docstr = t_dstring + t_sstring
161
162local t_docent = token("command",P("<!ENTITY"))
163 * t_optionalwhitespace
164 * token("keyword",name)
165 * t_optionalwhitespace
166 * (
167 (
168 token("constant",P("SYSTEM"))
169 * t_optionalwhitespace
170 * t_docstr
171 * t_optionalwhitespace
172 * token("constant",P("NDATA"))
173 * t_optionalwhitespace
174 * token("keyword",name)
175 ) + (
176 token("constant",P("PUBLIC"))
177 * t_optionalwhitespace
178 * t_docstr
179 ) + (
180 t_docstr
181 )
182 )
183 * t_optionalwhitespace
184 * token("command",P(">"))
185
186local t_docele = token("command",P("<!ELEMENT"))
187 * t_optionalwhitespace
188 * token("keyword",name)
189 * t_optionalwhitespace
190 * token("command",P("("))
191 * (
192 t_localspacing
193 + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY"))
194 + token("text",P(","))
195 + token("comment",(1-S(",)"))^1)
196 )^1
197 * token("command",P(")"))
198 * t_optionalwhitespace
199 * token("command",P(">"))
200
201local t_docset = token("command",P("["))
202 * t_optionalwhitespace
203 * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0))
204 * t_optionalwhitespace
205 * token("command",P("]"))
206
207local t_doctype = token("command",P("<!DOCTYPE"))
208 * t_optionalwhitespace
209 * token("keyword",name)
210 * t_optionalwhitespace
211 * (
212 (
213 token("constant",P("PUBLIC"))
214 * t_optionalwhitespace
215 * t_docstr
216 * t_optionalwhitespace
217 * t_docstr
218 * t_optionalwhitespace
219 ) + (
220 token("constant",P("SYSTEM"))
221 * t_optionalwhitespace
222 * t_docstr
223 * t_optionalwhitespace
224 )
225 )^-1
226 * t_docset^-1
227 * t_optionalwhitespace
228 * token("command",P(">"))
229
230lexer.embed_lexer(xmllexer, lualexer, token("command", openlua), token("command", closelua))
231lexer.embed_lexer(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment))
232lexer.embed_lexer(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata))
233lexer.embed_lexer(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript))
234
235
236
237
238
239
240
241
242
243local t_name =
244 token("plain",name * colon)^-1
245 * token("keyword",name)
246
247
248
249
250
251
252
253
254
255local t_key =
256 token("plain",name * colon)^-1
257 * token("constant",name)
258
259local t_attributes = (
260 t_optionalwhitespace
261 * t_key
262 * t_optionalwhitespace
263 * token("plain",equal)
264 * t_optionalwhitespace
265 * (t_dstring + t_sstring)
266 * t_optionalwhitespace
267)^0
268
269local t_open =
270 token("keyword",openbegin)
271 * (
272 t_name
273 * t_optionalwhitespace
274 * t_attributes
275 * token("keyword",closebegin)
276 +
277 token("error",(1-closebegin)^1)
278 )
279
280local t_close =
281 token("keyword",openend)
282 * (
283 t_name
284 * t_optionalwhitespace
285 * token("keyword",closeend)
286 +
287 token("error",(1-closeend)^1)
288 )
289
290local t_entity =
291 token("constant",entity)
292
293local t_instruction =
294 token("command",openinstruction * P("xml"))
295 * t_optionalwhitespace
296 * t_attributes
297 * t_optionalwhitespace
298 * token("command",closeinstruction)
299 + token("command",openinstruction * name)
300 * token("default",(1-closeinstruction)^1)
301 * token("command",closeinstruction)
302
303local t_invisible =
304 token("invisible",invisibles^1)
305
306
307
308
309xmllexer._rules = {
310 { "whitespace", t_spacing },
311 { "preamble", t_preamble },
312 { "word", t_word },
313
314
315
316 { "doctype", t_doctype },
317 { "instruction", t_instruction },
318 { "close", t_close },
319 { "open", t_open },
320 { "entity", t_entity },
321 { "invisible", t_invisible },
322 { "rest", t_rest },
323}
324
325xmllexer._tokenstyles = context.styleset
326
327xmllexer._foldpattern = P("</") + P("<") + P("/>")
328+ P("<!--") + P("-->")
329
330xmllexer._foldsymbols = {
331 _patterns = {
332 "</",
333 "/>",
334 "<",
335 },
336 ["keyword"] = {
337 ["</"] = -1,
338 ["/>"] = -1,
339 ["<"] = 1,
340 },
341 ["command"] = {
342 ["</"] = -1,
343 ["/>"] = -1,
344 ["<!--"] = 1,
345 ["-->"] = -1,
346 ["<"] = 1,
347 },
348}
349
350return xmllexer
351 |