scite-context-lexer-lua.lua /size: 10 Kb    last modification: 2021-10-28 13:49
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
10local match, find = string.match, string.find
11local setmetatable = setmetatable
12
13local lexers        = require("scite-context-lexer")
14
15local patterns      = lexers.patterns
16local token         = lexers.token
17
18local lualexer      = lexers.new("lua","scite-context-lexer-lua")
19
20local luawhitespace = lualexer.whitespace
21
22local stringlexer   = lexers.load("scite-context-lexer-lua-longstring")
23----- labellexer    = lexers.load("scite-context-lexer-lua-labelstring")
24
25local directives = { } -- communication channel
26
27local keywords = {
28    "and", "break", "do", "else", "elseif", "end", "false", "for", "function", -- "goto",
29    "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true",
30    "until", "while",
31}
32
33local functions = {
34    "assert", "collectgarbage", "dofile", "error", "getmetatable",
35    "ipairs", "load", "loadfile", "module", "next", "pairs",
36    "pcall", "print", "rawequal", "rawget", "rawset", "require",
37    "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select",
38
39    "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", "utf8",
40}
41
42local constants = {
43    "_G", "_VERSION", "_M", "...", "_ENV",
44    -- here too
45    "__add", "__call", "__concat", "__div", "__idiv", "__eq", "__gc", "__index",
46    "__le", "__lt", "__metatable", "__mode", "__mul", "__newindex",
47    "__pow", "__sub", "__tostring", "__unm", "__len",
48    "__pairs", "__ipairs",
49    "__close",
50    "NaN",
51   "<const>", "<toclose>",
52}
53
54local internals = { -- __
55    "add", "call", "concat", "div", "idiv", "eq", "gc", "index",
56    "le", "lt", "metatable", "mode", "mul", "newindex",
57    "pow", "sub", "tostring", "unm", "len",
58    "pairs", "ipairs",
59    "close",
60}
61
62local depricated = {
63    "arg", "arg.n",
64    "loadstring", "setfenv", "getfenv",
65    "pack",
66}
67
68local csnames = { -- todo: option
69    "commands",
70    "context",
71 -- "ctxcmd",
72 -- "ctx",
73    "metafun",
74    "metapost",
75}
76
77local level         = nil
78local setlevel      = function(_,i,s) level = s return i end
79
80local equals        = P("=")^0
81
82local longonestart  = P("[[")
83local longonestop   = P("]]")
84local longonestring = (1-longonestop)^0
85
86local longtwostart  = P("[") * Cmt(equals,setlevel) * P("[")
87local longtwostop   = P("]") *     equals           * P("]")
88
89local sentinels = { } setmetatable(sentinels, { __index = function(t,k) local v = "]" .. k .. "]" t[k] = v return v end })
90
91local longtwostring = P(function(input,index)
92    if level then
93     -- local sentinel = "]" .. level .. "]"
94        local sentinel = sentinels[level]
95        local _, stop = find(input,sentinel,index,true)
96        return stop and stop + 1 - #sentinel or #input + 1
97    end
98end)
99
100local longtwostring_body = longtwostring
101
102local longtwostring_end = P(function(input,index)
103    if level then
104     -- local sentinel = "]" .. level .. "]"
105        local sentinel = sentinels[level]
106        local _, stop = find(input,sentinel,index,true)
107        return stop and stop + 1 or #input + 1
108    end
109end)
110
111local longcomment = Cmt(#("[[" + ("[" * C(equals) * "[")), function(input,index,level)
112 -- local sentinel = "]" .. level .. "]"
113    local sentinel = sentinels[level]
114    local _, stop = find(input,sentinel,index,true)
115    return stop and stop + 1 or #input + 1
116end)
117
118local space         = patterns.space -- S(" \n\r\t\f\v")
119local any           = patterns.any
120local eol           = patterns.eol
121local exactmatch    = patterns.exactmatch
122local justmatch     = patterns.justmatch
123
124local squote        = P("'")
125local dquote        = P('"')
126local escaped       = P("\\") * P(1)
127local dashes        = P("--")
128
129local spacing       = token(luawhitespace, space^1)
130local rest          = token("default", any)
131
132local shortcomment  = token("comment", dashes * (1-eol)^0)
133local longcomment   = token("comment", dashes * longcomment)
134
135-- fails on very long string with \ at end of lines (needs embedded lexer)
136-- and also on newline before " but it makes no sense to waste time on it
137
138local shortstring   = token("quote",  dquote)
139                    * token("string", (escaped + (1-dquote))^0)
140                    * token("quote",  dquote)
141                    + token("quote",  squote)
142                    * token("string", (escaped + (1-squote))^0)
143                    * token("quote",  squote)
144
145----- longstring    = token("quote",  longonestart)
146-----               * token("string", longonestring)
147-----               * token("quote",  longonestop)
148-----               + token("quote",  longtwostart)
149-----               * token("string", longtwostring)
150-----               * token("quote",  longtwostop)
151
152local string        = shortstring
153-----               + longstring
154
155lexers.embed(lualexer, stringlexer, token("quote",longtwostart), token("string",longtwostring_body) * token("quote",longtwostring_end))
156
157local integer       = P("-")^-1 * (patterns.hexadecimal + patterns.decimal)
158local number        = token("number", patterns.float + integer)
159                    * (token("error",R("AZ","az","__")^1))^0
160
161-- officially 127-255 are ok but not utf so useless
162
163----- validword     = R("AZ","az","__") * R("AZ","az","__","09")^0
164
165local utf8character = P(1) * R("\128\191")^1
166local validword     = (R("AZ","az","__") + utf8character) * (R("AZ","az","__","09") + utf8character)^0
167local validsuffix   = (R("AZ","az")      + utf8character) * (R("AZ","az","__","09") + utf8character)^0
168
169local identifier    = token("default",validword)
170
171----- operator      = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]()
172----- operator      = token("special", S('+-*/%^#=<>;:,{}[]()') + P('..') + P('.') + P('~=') ) -- maybe split off {}[]()
173----- operator      = token("special", S('+-*/%^#=<>;:,{}[]().') + P('~=') ) -- no ^1 because of nested lexers
174local operator      = token("special", S('+-*/%^#=<>;:,{}[]().|~')) -- no ^1 because of nested lexers
175
176local optionalspace = spacing^0
177local hasargument   = #S("{([")
178
179-- ideal should be an embedded lexer ..
180
181local gotokeyword   = token("keyword", P("goto"))
182                    * spacing
183                    * token("grouping",validword)
184local gotolabel     = token("keyword", P("::"))
185                    * (spacing + shortcomment)^0
186                    * token("grouping",validword)
187                    * (spacing + shortcomment)^0
188                    * token("keyword", P("::"))
189
190local p_keywords    = exactmatch(keywords)
191local p_functions   = exactmatch(functions)
192local p_constants   = exactmatch(constants)
193local p_internals   = P("__")
194                    * exactmatch(internals)
195
196local p_finish      = #(1-R("az","AZ","__"))
197
198local p_csnames     = justmatch(csnames)
199local p_ctnames     = P("ctx") * R("AZ","az","__")^0
200local keyword       = token("keyword", p_keywords)
201local builtin       = token("plain",   p_functions)
202local constant      = token("data",    p_constants)
203local internal      = token("data",    p_internals)
204local csname        = token("user",    p_csnames + p_ctnames)
205                    * p_finish * optionalspace * (
206                        hasargument
207                      + ( token("special", S(".:")) * optionalspace * token("user", validword) )^1
208                      )^-1
209
210-- we could also check S(".:") * p_keyword etc, could be faster
211
212local identifier    = token("default", validword)
213                    * ( optionalspace * token("special", S(".:")) * optionalspace * (
214                            token("warning", p_keywords) +
215                            token("data", p_internals) + -- needs checking
216                            token("default", validword )
217                    ) )^0
218
219lualexer.rules = {
220    { "whitespace",   spacing      },
221    { "keyword",      keyword      }, -- can be combined
222    { "function",     builtin      }, -- can be combined
223    { "constant",     constant     }, -- can be combined
224    { "csname",       csname       },
225    { "goto",         gotokeyword  },
226    { "identifier",   identifier   },
227    { "string",       string       },
228    { "number",       number       },
229    { "longcomment",  longcomment  },
230    { "shortcomment", shortcomment },
231    { "label",        gotolabel    },
232    { "operator",     operator     },
233    { "rest",         rest         },
234}
235
236lualexer.folding = {
237    -- challenge:  if=0  then=1  else=-1  elseif=-1
238    ["if"]       = { ["keyword"] =  1 }, -- if .. [then|else] .. end
239    ["do"]       = { ["keyword"] =  1 }, -- [while] do .. end
240    ["function"] = { ["keyword"] =  1 }, -- function .. end
241    ["repeat"]   = { ["keyword"] =  1 }, -- repeat .. until
242    ["until"]    = { ["keyword"] = -1 },
243    ["end"]      = { ["keyword"] = -1 },
244 -- ["else"]     = { ["keyword"] =  1 },
245 -- ["elseif"]   = { ["keyword"] =  1 }, -- already catched by if
246 -- ["elseif"]   = { ["keyword"] =  0 },
247    ["["] = {
248        ["comment"] =  1,
249     -- ["quote"]   =  1, -- confusing
250    },
251    ["]"] = {
252        ["comment"] = -1
253     -- ["quote"]   = -1, -- confusing
254    },
255 -- ["("] = { ["special"] =  1 },
256 -- [")"] = { ["special"] = -1 },
257    ["{"] = { ["special"] =  1 },
258    ["}"] = { ["special"] = -1 },
259}
260
261-- embedded in tex:
262
263local cstoken         = R("az","AZ","\127\255") + S("@!?_")
264local texcsname       = P("\\") * cstoken^1
265local commentline     = P("%") * (1-S("\n\r"))^0
266
267local texcomment      = token("comment", Cmt(commentline, function() return directives.cld_inline end))
268
269local longthreestart  = P("\\!!bs")
270local longthreestop   = P("\\!!es")
271local longthreestring = (1-longthreestop)^0
272
273local texstring       = token("quote",  longthreestart)
274                      * token("string", longthreestring)
275                      * token("quote",  longthreestop)
276
277local texcommand      = token("warning", texcsname)
278
279lualexer.directives = directives
280
281lualexer.rules_cld = {
282    { "whitespace",   spacing      },
283    { "texstring",    texstring    },
284    { "texcomment",   texcomment   },
285    { "texcommand",   texcommand   },
286    { "keyword",      keyword      },
287    { "function",     builtin      },
288    { "csname",       csname       },
289    { "goto",         gotokeyword  },
290    { "constant",     constant     },
291    { "identifier",   identifier   },
292    { "string",       string       },
293    { "longcomment",  longcomment  },
294    { "shortcomment", shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline)
295    { "number",       number       },
296    { "label",        gotolabel    },
297    { "operator",     operator     },
298    { "rest",         rest         },
299}
300
301return lualexer
302