scite-context-lexer-tex.lua /size: 19 Kb    last modification: 2021-10-28 13:49
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for context",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local string, table, lpeg = string, table, lpeg
10local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct
11local type, next = type, next
12local concat = table.concat
13local find, match, lower, upper, gsub = string.find, string.match, string.lower, string.upper, string.gsub
14
15local lexers        = require("scite-context-lexer")
16
17local patterns      = lexers.patterns
18local token         = lexers.token
19local report        = lexers.report
20
21local contextlexer  = lexers.new("tex","scite-context-lexer-tex")
22local texwhitespace = contextlexer.whitespace
23
24local cldlexer      = lexers.load("scite-context-lexer-cld")
25-- local cldlexer      = lexers.load("scite-context-lexer-lua")
26local mpslexer      = lexers.load("scite-context-lexer-mps")
27
28local commands      = { en = { } }
29local primitives    = { }
30local helpers       = { }
31local constants     = { }
32
33do -- todo: only once, store in global
34
35    -- commands helpers primitives
36
37    local collected   = { }
38
39    local definitions = lexers.loaddefinitions("scite-context-data-interfaces")
40
41    if definitions then
42        local used = { }
43        for interface, list in next, definitions do
44            if interface ~= "common" then
45                used[#used+1] = interface
46                local c = { }
47                -- these are shared
48                local list = definitions.common
49                if list then
50                    for i=1,#list do
51                        c[list[i]] = true
52                    end
53                end
54                -- normally this one is empty
55                list = definitions.en
56                if list then
57                    for i=1,#list do
58                        c[list[i]] = true
59                    end
60                end
61                -- these are interface specific
62                if interface ~= "en" then
63                    for i=1,#list do
64                        c[list[i]] = true
65                    end
66                end
67                commands[interface] = c
68            end
69        end
70        table.sort(used)
71        report("context user interfaces '%s' supported",concat(used," "))
72    end
73
74    local definitions = lexers.loaddefinitions("scite-context-data-context")
75    local overloaded  = { }
76
77    if definitions then
78        helpers   = definitions.helpers   or { }
79        constants = definitions.constants or { }
80        for i=1,#helpers do
81            overloaded[helpers[i]] = true
82        end
83        for i=1,#constants do
84            overloaded[constants[i]] = true
85        end
86    end
87
88    local definitions = lexers.loaddefinitions("scite-context-data-tex")
89
90    if definitions then
91        local function add(data,normal)
92            for k, v in next, data do
93                if v ~= "/" and v ~= "-" then
94                    if not overloaded[v] then
95                        primitives[#primitives+1] = v
96                    end
97                    if normal then
98                        v = "normal" .. v
99                        if not overloaded[v] then
100                            primitives[#primitives+1] = v
101                        end
102                    end
103                end
104            end
105        end
106        add(definitions.tex,true)
107        add(definitions.etex,true)
108        add(definitions.pdftex,true)
109     -- add(definitions.aleph,true)
110     -- add(definitions.omega,true)
111        add(definitions.luatex,true)
112        add(definitions.xetex,true)
113    end
114
115end
116
117local currentcommands = commands.en or { }
118
119local cstoken = R("az","AZ","\127\255") + S("@!?_")
120
121local knowncommand = Cmt(cstoken^1, function(_,i,s)
122    return currentcommands[s] and i
123end)
124
125local utfchar      = lexers.helpers.utfchar
126local wordtoken    = lexers.patterns.wordtoken
127local iwordtoken   = lexers.patterns.iwordtoken
128local wordpattern  = lexers.patterns.wordpattern
129local iwordpattern = lexers.patterns.iwordpattern
130local invisibles   = lexers.patterns.invisibles
131local styleofword  = lexers.styleofword
132local setwordlist  = lexers.setwordlist
133
134local validwords   = false
135local validminimum = 3
136
137-- % language=uk (space before key is mandate)
138
139contextlexer.preamble = Cmt(P("% ") + P(true), function(input,i)
140    currentcommands = false
141    validwords      = false
142    validminimum    = 3
143    local s, e, line = find(input,"^(.-)[\n\r]",1) -- combine with match
144    if line then
145        local interface = match(line," interface=([a-z][a-z]+)")
146        local language  = match(line," language=([a-z][a-z]+)")
147        if interface and #interface == 2 then
148         -- report("enabling context user interface '%s'",interface)
149            currentcommands  = commands[interface]
150        end
151        if language then
152            validwords, validminimum = setwordlist(language)
153        end
154    end
155    if not currentcommands then
156        currentcommands = commands.en or { }
157    end
158    return false -- so we go back and now handle the line as comment
159end)
160
161local commentline            = P("%") * (1-S("\n\r"))^0
162local endline                = S("\n\r")^1
163
164local space                  = patterns.space -- S(" \n\r\t\f\v")
165local any                    = patterns.any
166local exactmatch             = patterns.exactmatch
167local backslash              = P("\\")
168local hspace                 = S(" \t")
169
170local p_spacing              = space^1
171local p_rest                 = any
172
173local p_preamble             = knownpreamble
174local p_comment              = commentline
175local p_command              = backslash * knowncommand
176----- p_constant             = backslash * exactmatch(constants)
177----- p_helper               = backslash * exactmatch(helpers)
178----- p_primitive            = backslash * exactmatch(primitives)
179
180local p_csdone               = #(1-cstoken) + P(-1)
181
182----- p_command              = backslash * lexers.helpers.utfchartabletopattern(currentcommands) * p_csdone
183local p_constant             = backslash * lexers.helpers.utfchartabletopattern(constants)       * p_csdone
184local p_helper               = backslash * lexers.helpers.utfchartabletopattern(helpers)         * p_csdone
185local p_primitive            = backslash * lexers.helpers.utfchartabletopattern(primitives)      * p_csdone
186
187local p_ifprimitive          = P("\\if") * cstoken^1
188local p_csname               = backslash * (cstoken^1 + P(1))
189local p_grouping             = S("{$}")
190local p_special              = S("#()[]<>=\"")
191local p_extra                = S("`~%^&_-+/\'|")
192local p_text                 = iwordtoken^1 --maybe add punctuation and space
193
194local p_reserved             = backslash * (
195                                    P("??") + R("az") * P("!")
196                               ) * cstoken^1
197
198local p_number               = lexers.patterns.real
199----- p_unit                 = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") + P("cc") + P("dd") + P("dk")
200local p_unit                 = lexers.helpers.utfchartabletopattern { "pt", "bp", "sp", "mm", "cm", "cc", "dd", "dk" }
201
202-- no looking back           = #(1-S("[=")) * cstoken^3 * #(1-S("=]"))
203
204local p_word                 = C(iwordpattern) * Cp() / function(s,p) return styleofword(validwords,validminimum,s,p) end -- a bit of a hack
205
206----- p_text                 = (1 - p_grouping - p_special - p_extra - backslash - space + hspace)^1
207
208-- keep key pressed at end-of syst-aux.mkiv:
209--
210-- 0 : 15 sec
211-- 1 : 13 sec
212-- 2 : 10 sec
213--
214-- the problem is that quite some style subtables get generated so collapsing ranges helps
215-- although in the new scite approach this is less an issue (no lua tables)
216
217local option = 0 -- otherwise we get e.g. \btx... and \xml... in commands colors ...
218
219-- if option == 1 then
220--
221--     p_comment                = p_comment^1
222--     p_grouping               = p_grouping^1
223--     p_special                = p_special^1
224--     p_extra                  = p_extra^1
225--
226--     p_command                = p_command^1
227--     p_constant               = p_constant^1
228--     p_helper                 = p_helper^1
229--     p_primitive              = p_primitive^1
230--     p_ifprimitive            = p_ifprimitive^1
231--     p_reserved               = p_reserved^1
232--
233-- elseif option == 2 then
234--
235--     local included           = space^0
236--
237--     p_comment                = (p_comment     * included)^1
238--     p_grouping               = (p_grouping    * included)^1
239--     p_special                = (p_special     * included)^1
240--     p_extra                  = (p_extra       * included)^1
241
242--     p_command                = (p_command     * included)^1
243--     p_constant               = (p_constant    * included)^1
244--     p_helper                 = (p_helper      * included)^1
245--     p_primitive              = (p_primitive   * included)^1
246--     p_ifprimitive            = (p_ifprimitive * included)^1
247--     p_reserved               = (p_reserved    * included)^1
248--
249-- end
250
251local p_invisible = invisibles^1
252
253local spacing                = token(texwhitespace, p_spacing    )
254
255local rest                   = token("default",     p_rest       )
256local comment                = token("comment",     p_comment    )
257local command                = token("command",     p_command    )
258local constant               = token("data",        p_constant   )
259local helper                 = token("plain",       p_helper     )
260local primitive              = token("primitive",   p_primitive  )
261local ifprimitive            = token("primitive",   p_ifprimitive)
262local reserved               = token("reserved",    p_reserved   )
263local csname                 = token("user",        p_csname     )
264local grouping               = token("grouping",    p_grouping   )
265local number                 = token("number",      p_number     )
266                             * token("constant",    p_unit       )
267local special                = token("special",     p_special    )
268local reserved               = token("reserved",    p_reserved   ) -- reserved internal preproc
269local extra                  = token("extra",       p_extra      )
270local invisible              = token("invisible",   p_invisible  )
271local text                   = token("default",     p_text       )
272local word                   = p_word
273
274----- startluacode           = token("grouping",    P("\\startluacode"))
275----- stopluacode            = token("grouping",    P("\\stopluacode"))
276
277local luastatus = false
278local luatag    = nil
279local lualevel  = 0
280
281local function startdisplaylua(_,i,s)
282    luatag = s
283    luastatus = "display"
284    cldlexer.directives.cld_inline = false
285    return true
286end
287
288local function stopdisplaylua(_,i,s)
289    local ok = luatag == s
290    if ok then
291        cldlexer.directives.cld_inline = false
292        luastatus = false
293    end
294    return ok
295end
296
297local function startinlinelua(_,i,s)
298    if luastatus == "display" then
299        return false
300    elseif not luastatus then
301        luastatus = "inline"
302        cldlexer.directives.cld_inline = true
303        lualevel = 1
304        return true
305    else-- if luastatus == "inline" then
306        lualevel = lualevel + 1
307        return true
308    end
309end
310
311local function stopinlinelua_b(_,i,s) -- {
312    if luastatus == "display" then
313        return false
314    elseif luastatus == "inline" then
315        lualevel = lualevel + 1 -- ?
316        return false
317    else
318        return true
319    end
320end
321
322local function stopinlinelua_e(_,i,s) -- }
323    if luastatus == "display" then
324        return false
325    elseif luastatus == "inline" then
326        lualevel = lualevel - 1
327        local ok = lualevel <= 0 -- was 0
328        if ok then
329            cldlexer.directives.cld_inline = false
330            luastatus = false
331        end
332        return ok
333    else
334        return true
335    end
336end
337
338contextlexer.resetparser = function()
339    luastatus = false
340    luatag    = nil
341    lualevel  = 0
342end
343
344local luaenvironment         = P("lua") * (P("setups") + P("code") + P("parameterset") + P(true))
345                             + P("ctxfunction") * (P("definition") + P(true))
346
347local inlinelua              = P("\\") * (
348                                    P("ctx") * (P("lua") + P("command") + P("late") * (P("lua") + P("command")) + P("function"))
349                                  + P("cld") * (P("command") + P("context"))
350                                  + P("lua") * (P("expr") + P("script") + P("thread"))
351                                  + (P("direct") + P("late")) * P("lua")
352                               )
353
354local startlua               = P("\\start") * Cmt(luaenvironment,startdisplaylua)
355                             + P("<?lua") * Cmt(P(true),startdisplaylua)
356                             + inlinelua * space^0 * ( Cmt(P("{"),startinlinelua) )
357
358local stoplua                = P("\\stop") * Cmt(luaenvironment,stopdisplaylua)
359                             + P("?>") * Cmt(P(true),stopdisplaylua)
360                             + Cmt(P("{"),stopinlinelua_b)
361                             + Cmt(P("}"),stopinlinelua_e)
362
363local startluacode           = token("embedded", startlua)
364local stopluacode            = #stoplua * token("embedded", stoplua)
365
366local luacall                = P("clf_") * R("az","__","AZ")^1
367
368local metafuncall            = ( P("reusable") + P("usable") + P("unique") + P("use") + P("reuse") + P("overlay") ) * ("MPgraphic")
369                             + P("uniqueMPpagegraphic")
370                             + P("MPpositiongraphic")
371
372local metafunenvironment     = metafuncall -- ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic")
373                             + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("initializations") + P("definitions") + P("extensions") + P("graphic") + P("calculation") )
374
375local startmetafun           = P("\\start") * metafunenvironment
376local stopmetafun            = P("\\stop")  * metafunenvironment -- todo match start
377
378----- subsystem              = token("embedded", P("\\xml") * R("az")^1 + (P("\\st") * (P("art") + P("op")) * P("xmlsetups")))
379local subsystemtags          = P("xml") + P("btx") -- will be pluggable or maybe even a proper list of valid commands
380local subsystemmacro         = P("\\") * (subsystemtags * R("az")^1 + (R("az")-subsystemtags)^1 * subsystemtags * R("az")^1)
381local subsystem              = token("embedded", subsystemmacro)
382
383local openargument           = token("special", P("{"))
384local closeargument          = token("special", P("}"))
385local argumentcontent        = token("default",(1-P("}"))^0) -- maybe space needs a treatment
386
387local metafunarguments       = (spacing^0 * openargument * argumentcontent * closeargument)^-2
388
389local startmetafuncode       = token("embedded", startmetafun) * metafunarguments
390local stopmetafuncode        = token("embedded", stopmetafun)
391
392local callers                = token("embedded", P("\\") * metafuncall) * metafunarguments
393                             + token("embedded", P("\\") * luacall)
394
395lexers.embed(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
396lexers.embed(contextlexer, cldlexer, startluacode,     stopluacode)
397
398contextlexer.rules = {
399    { "whitespace",  spacing     },
400    { "word",        word        },
401    { "text",        text        }, -- non words
402    { "comment",     comment     },
403    { "constant",    constant    },
404 -- { "subsystem",   subsystem   },
405    { "callers",     callers     },
406    { "subsystem",   subsystem   },
407    { "ifprimitive", ifprimitive },
408    { "helper",      helper      },
409    { "command",     command     },
410    { "primitive",   primitive   },
411 -- { "subsystem",   subsystem   },
412    { "reserved",    reserved    },
413    { "csname",      csname      },
414 -- { "whatever",    specialword }, -- not yet, crashes
415    { "grouping",    grouping    },
416 -- { "number",      number      },
417    { "special",     special     },
418    { "extra",       extra       },
419    { "invisible",   invisible   },
420    { "rest",        rest        },
421}
422
423-- Watch the text grabber, after all, we're talking mostly of text (beware,
424-- no punctuation here as it can be special). We might go for utf here.
425
426local web = lexers.loadluafile("scite-context-lexer-web-snippets")
427
428if web then
429
430    contextlexer.rules_web = {
431        { "whitespace",  spacing     },
432        { "text",        text        }, -- non words
433        { "comment",     comment     },
434        { "constant",    constant    },
435        { "callers",     callers     },
436        { "ifprimitive", ifprimitive },
437        { "helper",      helper      },
438        { "command",     command     },
439        { "primitive",   primitive   },
440        { "reserved",    reserved    },
441        { "csname",      csname      },
442        { "grouping",    grouping    },
443        { "special",     special     },
444        { "extra",       extra       },
445        { "invisible",   invisible   },
446        { "web",         web.pattern },
447        { "rest",        rest        },
448    }
449
450else
451
452    contextlexer.rules_web = {
453        { "whitespace",  spacing     },
454        { "text",        text        }, -- non words
455        { "comment",     comment     },
456        { "constant",    constant    },
457        { "callers",     callers     },
458        { "ifprimitive", ifprimitive },
459        { "helper",      helper      },
460        { "command",     command     },
461        { "primitive",   primitive   },
462        { "reserved",    reserved    },
463        { "csname",      csname      },
464        { "grouping",    grouping    },
465        { "special",     special     },
466        { "extra",       extra       },
467        { "invisible",   invisible   },
468        { "rest",        rest        },
469    }
470
471end
472
473contextlexer.folding = {
474    ["\\start"] = {
475        ["command"]  = 1,
476        ["constant"] = 1,
477        ["data"]     = 1,
478        ["user"]     = 1,
479        ["embedded"] = 1,
480     -- ["helper"]   = 1,
481        ["plain"]    = 1,
482    },
483    ["\\stop"] = {
484        ["command"]  = -1,
485        ["constant"] = -1,
486        ["data"]     = -1,
487        ["user"]     = -1,
488        ["embedded"] = -1,
489     -- ["helper"]   = -1,
490        ["plain"]    = -1,
491    },
492    ["{"] = {
493        ["grouping"] = 1,
494    },
495    ["}"] = {
496        ["grouping"] = -1,
497    },
498}
499
500do
501
502    local lastcurrent = nil
503    local allcommands = { }
504
505    function contextlexer.completion(str)
506        if currentcommands then
507            if lastcurrent ~= currentcommands then
508                allcommands = { }
509                for k, v in next, currentcommands do
510                    allcommands[#allcommands+1] = "\\" .. k
511                end
512                for k, v in next, helpers do
513                    allcommands[#allcommands+1] = "\\" .. k
514                end
515                for k, v in next, primitives do
516                    allcommands[#allcommands+1] = "\\" .. k
517                end
518             -- for k, v in next, constants do
519             --     allcommands[#constants+1] = "\\" .. k
520             -- end
521                lastcurrent = currentcommands
522                table.sort(allcommands)
523            end
524            if find(str,"^\\") then
525                local str  = "^" .. str
526                local list = { }
527                local n    = 0
528                for i=1,#allcommands do
529                    local c = allcommands[i]
530                    if find(c,str) then
531                        n = n + 1 ; list[n] = c
532                    end
533                end
534                if n > 0 then
535                    return list and concat(list," ")
536                end
537            end
538        end
539    end
540
541end
542
543return contextlexer
544