scite-context-lexer-lua.lua /size: 10 Kb    last modification: 2025-02-21 11:03
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for lua",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
10local match, find = string.match, string.find
11local setmetatable = setmetatable
12
13local lexers        = require("scite-context-lexer")
14
15local patterns      = lexers.patterns
16local token         = lexers.token
17
18local lualexer      = lexers.new("lua","scite-context-lexer-lua")
19
20local luawhitespace = lualexer.whitespace
21
22local stringlexer   = lexers.load("scite-context-lexer-lua-longstring")
23----- labellexer    = lexers.load("scite-context-lexer-lua-labelstring")
24
25local directives = { } -- communication channel
26
27local keywords = {
28    "and", "break", "do", "else", "elseif", "end", "false", "for", "function", -- "goto",
29    "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true",
30    "until", "while",
31}
32
33local functions = {
34    "assert", "collectgarbage", "dofile", "error", "getmetatable",
35    "ipairs", "load", "loadfile", "module", "next", "pairs",
36    "pcall", "print", "rawequal", "rawget", "rawset", "require",
37    "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select",
38
39    "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", "utf8",
40}
41
42local constants = {
43    "_G", "_VERSION", "_M", "...", "_ENV",
44    -- here too
45    "__add", "__call", "__concat", "__div", "__idiv", "__eq", "__gc", "__index",
46    "__le", "__lt", "__metatable", "__mode", "__mul", "__newindex",
47    "__pow", "__sub", "__tostring", "__unm", "__len",
48    "__pairs", "__ipairs",
49    "__close",
50    "NaN",
51   "<const>", "<toclose>",
52   -- special for context documentation
53   "<t:number>", "<t:string>", "<t:boolean>", "<t:function>", "<t:integer>", "<t:table>", "<t:userdata>", "<t:nil>",
54   "<t:direct>", "<t:node>", "<t:token>", "<t:instance>",
55   "<t:false>", "<t:true>",
56   "<t:file>",
57   "<t:posit>", "<t:complex>", "<t:decimal>",
58   "<t:mp>", "<t:mpobj>",
59   "<t:pdf>", "<t:pdfobj>",
60   "<t:hybrid>", "<t:whatever>"
61}
62
63local internals = { -- __
64    "add", "call", "concat", "div", "idiv", "eq", "gc", "index",
65    "le", "lt", "metatable", "mode", "mul", "newindex",
66    "pow", "sub", "tostring", "unm", "len",
67    "pairs", "ipairs",
68    "close",
69}
70
71local depricated = {
72    "arg", "arg.n",
73    "loadstring", "setfenv", "getfenv",
74    "pack",
75}
76
77local csnames = { -- todo: option
78    "commands",
79    "context",
80 -- "ctxcmd",
81 -- "ctx",
82    "metafun",
83    "metapost",
84}
85
86local level         = nil
87local setlevel      = function(_,i,s) level = s return i end
88
89local equals        = P("=")^0
90
91local longonestart  = P("[[")
92local longonestop   = P("]]")
93local longonestring = (1-longonestop)^0
94
95local longtwostart  = P("[") * Cmt(equals,setlevel) * P("[")
96local longtwostop   = P("]") *     equals           * P("]")
97
98local sentinels = { } setmetatable(sentinels, { __index = function(t,k) local v = "]" .. k .. "]" t[k] = v return v end })
99
100local longtwostring = P(function(input,index)
101    if level then
102     -- local sentinel = "]" .. level .. "]"
103        local sentinel = sentinels[level]
104        local _, stop = find(input,sentinel,index,true)
105        return stop and stop + 1 - #sentinel or #input + 1
106    end
107end)
108
109local longtwostring_body = longtwostring
110
111local longtwostring_end = P(function(input,index)
112    if level then
113     -- local sentinel = "]" .. level .. "]"
114        local sentinel = sentinels[level]
115        local _, stop = find(input,sentinel,index,true)
116        return stop and stop + 1 or #input + 1
117    end
118end)
119
120local longcomment = Cmt(#("[[" + ("[" * C(equals) * "[")), function(input,index,level)
121 -- local sentinel = "]" .. level .. "]"
122    local sentinel = sentinels[level]
123    local _, stop = find(input,sentinel,index,true)
124    return stop and stop + 1 or #input + 1
125end)
126
127local space         = patterns.space -- S(" \n\r\t\f\v")
128local any           = patterns.any
129local eol           = patterns.eol
130local exactmatch    = patterns.exactmatch
131local justmatch     = patterns.justmatch
132
133local squote        = P("'")
134local dquote        = P('"')
135local escaped       = P("\\") * P(1)
136local dashes        = P("--")
137
138local spacing       = token(luawhitespace, space^1)
139local rest          = token("default", any)
140
141local shortcomment  = token("comment", dashes * (1-eol)^0)
142local longcomment   = token("comment", dashes * longcomment)
143
144-- fails on very long string with \ at end of lines (needs embedded lexer)
145-- and also on newline before " but it makes no sense to waste time on it
146
147local shortstring   = token("quote",  dquote)
148                    * token("string", (escaped + (1-dquote))^0)
149                    * token("quote",  dquote)
150                    + token("quote",  squote)
151                    * token("string", (escaped + (1-squote))^0)
152                    * token("quote",  squote)
153
154----- longstring    = token("quote",  longonestart)
155-----               * token("string", longonestring)
156-----               * token("quote",  longonestop)
157-----               + token("quote",  longtwostart)
158-----               * token("string", longtwostring)
159-----               * token("quote",  longtwostop)
160
161local string        = shortstring
162-----               + longstring
163
164lexers.embed(lualexer, stringlexer, token("quote",longtwostart), token("string",longtwostring_body) * token("quote",longtwostring_end))
165
166local integer       = P("-")^-1 * (patterns.hexadecimal + patterns.decimal)
167local number        = token("number", patterns.float + integer)
168                    * (token("error",R("AZ","az","__")^1))^0
169
170-- officially 127-255 are ok but not utf so useless
171
172----- validword     = R("AZ","az","__") * R("AZ","az","__","09")^0
173
174local utf8character = P(1) * R("\128\191")^1
175local validword     = (R("AZ","az","__") + utf8character) * (R("AZ","az","__","09") + utf8character)^0
176local validsuffix   = (R("AZ","az")      + utf8character) * (R("AZ","az","__","09") + utf8character)^0
177
178local identifier    = token("default",validword)
179
180----- operator      = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]()
181----- operator      = token("special", S('+-*/%^#=<>;:,{}[]()') + P('..') + P('.') + P('~=') ) -- maybe split off {}[]()
182----- operator      = token("special", S('+-*/%^#=<>;:,{}[]().') + P('~=') ) -- no ^1 because of nested lexers
183local operator      = token("special", S('+-*/%^#=<>;:,{}[]().|~')) -- no ^1 because of nested lexers
184
185local optionalspace = spacing^0
186local hasargument   = #S("{([")
187
188-- ideal should be an embedded lexer ..
189
190local gotokeyword   = token("keyword", P("goto"))
191                    * spacing
192                    * token("grouping",validword)
193local gotolabel     = token("keyword", P("::"))
194                    * (spacing + shortcomment)^0
195                    * token("grouping",validword)
196                    * (spacing + shortcomment)^0
197                    * token("keyword", P("::"))
198
199local p_keywords    = exactmatch(keywords)
200local p_functions   = exactmatch(functions)
201local p_constants   = exactmatch(constants)
202local p_internals   = P("__")
203                    * exactmatch(internals)
204
205local p_finish      = #(1-R("az","AZ","__"))
206
207local p_csnames     = justmatch(csnames)
208local p_ctnames     = P("ctx") * R("AZ","az","__")^0
209local keyword       = token("keyword", p_keywords)
210local builtin       = token("plain",   p_functions)
211local constant      = token("data",    p_constants)
212local internal      = token("data",    p_internals)
213local csname        = token("user",    p_csnames + p_ctnames)
214                    * p_finish * optionalspace * (
215                        hasargument
216                      + ( token("special", S(".:")) * optionalspace * token("user", validword) )^1
217                      )^-1
218
219-- we could also check S(".:") * p_keyword etc, could be faster
220
221local identifier    = token("default", validword)
222                    * ( optionalspace * token("special", S(".:")) * optionalspace * (
223                            token("warning", p_keywords) +
224                            token("data", p_internals) + -- needs checking
225                            token("default", validword )
226                    ) )^0
227
228lualexer.rules = {
229    { "whitespace",   spacing      },
230    { "keyword",      keyword      }, -- can be combined
231    { "function",     builtin      }, -- can be combined
232    { "constant",     constant     }, -- can be combined
233    { "csname",       csname       },
234    { "goto",         gotokeyword  },
235    { "identifier",   identifier   },
236    { "string",       string       },
237    { "number",       number       },
238    { "longcomment",  longcomment  },
239    { "shortcomment", shortcomment },
240    { "label",        gotolabel    },
241    { "operator",     operator     },
242    { "rest",         rest         },
243}
244
245lualexer.folding = {
246    -- challenge:  if=0  then=1  else=-1  elseif=-1
247    ["if"]       = { ["keyword"] =  1 }, -- if .. [then|else] .. end
248    ["do"]       = { ["keyword"] =  1 }, -- [while] do .. end
249    ["function"] = { ["keyword"] =  1 }, -- function .. end
250    ["repeat"]   = { ["keyword"] =  1 }, -- repeat .. until
251    ["until"]    = { ["keyword"] = -1 },
252    ["end"]      = { ["keyword"] = -1 },
253 -- ["else"]     = { ["keyword"] =  1 },
254 -- ["elseif"]   = { ["keyword"] =  1 }, -- already catched by if
255 -- ["elseif"]   = { ["keyword"] =  0 },
256    ["["] = {
257        ["comment"] =  1,
258     -- ["quote"]   =  1, -- confusing
259    },
260    ["]"] = {
261        ["comment"] = -1
262     -- ["quote"]   = -1, -- confusing
263    },
264 -- ["("] = { ["special"] =  1 },
265 -- [")"] = { ["special"] = -1 },
266    ["{"] = { ["special"] =  1 },
267    ["}"] = { ["special"] = -1 },
268}
269
270-- embedded in tex:
271
272local cstoken         = R("az","AZ","\127\255") + S("@!?_")
273local texcsname       = P("\\") * cstoken^1
274local commentline     = P("%") * (1-S("\n\r"))^0
275
276local texcomment      = token("comment", Cmt(commentline, function() return directives.cld_inline end))
277
278local longthreestart  = P("\\!!bs")
279local longthreestop   = P("\\!!es")
280local longthreestring = (1-longthreestop)^0
281
282local texstring       = token("quote",  longthreestart)
283                      * token("string", longthreestring)
284                      * token("quote",  longthreestop)
285
286local texcommand      = token("warning", texcsname)
287
288lualexer.directives = directives
289
290lualexer.rules_cld = {
291    { "whitespace",   spacing      },
292    { "texstring",    texstring    },
293    { "texcomment",   texcomment   },
294    { "texcommand",   texcommand   },
295    { "keyword",      keyword      },
296    { "function",     builtin      },
297    { "csname",       csname       },
298    { "goto",         gotokeyword  },
299    { "constant",     constant     },
300    { "identifier",   identifier   },
301    { "string",       string       },
302    { "longcomment",  longcomment  },
303    { "shortcomment", shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline)
304    { "number",       number       },
305    { "label",        gotolabel    },
306    { "operator",     operator     },
307    { "rest",         rest         },
308}
309
310return lualexer
311