scite-context-lexer-tex.lua /size: 20 Kb    last modification: 2021-10-28 13:49
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for context",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9-- maybe: _LINEBYLINE variant for large files (no nesting)
10-- maybe: protected_macros
11
12--[[
13
14  experiment dd 2009/10/28 .. todo:
15
16  -- figure out if tabs instead of splits are possible
17  -- locate an option to enter name in file dialogue (like windows permits)
18  -- figure out why loading a file fails
19  -- we cannot print to the log pane
20  -- we cannot access props["keywordclass.macros.context.en"]
21  -- lexer.get_property only handles integers
22  -- we cannot run a command to get the location of mult-def.lua
23
24  -- local interface = props["keywordclass.macros.context.en"]
25  -- local interface = lexer.get_property("keywordclass.macros.context.en","")
26
27]]--
28
29local global, string, table, lpeg = _G, string, table, lpeg
30local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct
31local type, next = type, next
32local find, match, lower, upper = string.find, string.match, string.lower, string.upper
33
34local lexer        = require("scite-context-lexer")
35local context      = lexer.context
36local patterns     = context.patterns
37local inform       = context.inform
38
39local token        = lexer.token
40local exact_match  = lexer.exact_match
41
42local contextlexer = lexer.new("tex","scite-context-lexer-tex")
43local whitespace   = contextlexer.whitespace
44
45local cldlexer     = lexer.load("scite-context-lexer-cld")
46local mpslexer     = lexer.load("scite-context-lexer-mps")
47
48local commands     = { en = { } }
49local primitives   = { }
50local helpers      = { }
51local constants    = { }
52
53do -- todo: only once, store in global
54
55    -- commands helpers primitives
56
57    local definitions = context.loaddefinitions("scite-context-data-interfaces")
58
59    if definitions then
60        local used = { }
61        for interface, list in next, definitions do
62            if interface ~= "common" then
63                used[#used+1] = interface
64                local c = { }
65                -- these are shared
66                local list = definitions.common
67                if list then
68                    for i=1,#list do
69                        c[list[i]] = true
70                    end
71                end
72                -- normally this one is empty
73                list = definitions.en
74                if list then
75                    for i=1,#list do
76                        c[list[i]] = true
77                    end
78                end
79                -- these are interface specific
80                if interface ~= "en" then
81                    for i=1,#list do
82                        c[list[i]] = true
83                    end
84                end
85                commands[interface] = c
86            end
87        end
88        table.sort(used)
89        inform("context user interfaces '%s' supported",table.concat(used," "))
90    end
91
92    local definitions = context.loaddefinitions("scite-context-data-context")
93    local overloaded  = { }
94
95    if definitions then
96        helpers   = definitions.helpers   or { }
97        constants = definitions.constants or { }
98        for i=1,#helpers do
99            overloaded[helpers[i]] = true
100        end
101        for i=1,#constants do
102            overloaded[constants[i]] = true
103        end
104    end
105
106    local definitions = context.loaddefinitions("scite-context-data-tex")
107
108    if definitions then
109        local function add(data,normal)
110            for k, v in next, data do
111                if v ~= "/" and v ~= "-" then
112                    if not overloaded[v] then
113                        primitives[#primitives+1] = v
114                    end
115                    if normal then
116                        v = "normal" .. v
117                        if not overloaded[v] then
118                            primitives[#primitives+1] = v
119                        end
120                    end
121                end
122            end
123        end
124        add(definitions.tex,true)
125        add(definitions.etex,true)
126        add(definitions.pdftex,true)
127        add(definitions.aleph,true)
128        add(definitions.omega,true)
129        add(definitions.luatex,true)
130        add(definitions.xetex,true)
131    end
132
133end
134
135local currentcommands = commands.en or { }
136
137local cstoken = R("az","AZ","\127\255") + S("@!?_")
138
139local knowncommand = Cmt(cstoken^1, function(_,i,s)
140    return currentcommands[s] and i
141end)
142
143local utfchar      = context.utfchar
144local wordtoken    = context.patterns.wordtoken
145local iwordtoken   = context.patterns.iwordtoken
146local wordpattern  = context.patterns.wordpattern
147local iwordpattern = context.patterns.iwordpattern
148local invisibles   = context.patterns.invisibles
149local checkedword  = context.checkedword
150local styleofword  = context.styleofword
151local setwordlist  = context.setwordlist
152local validwords   = false
153local validminimum = 3
154
155-- % language=uk
156
157-- fails (empty loop message) ... latest lpeg issue?
158
159-- todo: Make sure we only do this at the beginning .. a pitty that we
160-- can't store a state .. now is done too often.
161
162local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ")
163    if i < 10 then
164        validwords, validminimum = false, 3
165        local s, e, word = find(input,"^(.-)[\n\r]",i) -- combine with match
166        if word then
167            local interface = match(word,"interface=([a-z][a-z]+)")
168            if interface and #interface == 2 then
169                inform("enabling context user interface '%s'",interface)
170                currentcommands  = commands[interface] or commands.en or { }
171            end
172            local language = match(word,"language=([a-z][a-z]+)")
173            validwords, validminimum = setwordlist(language)
174        end
175    end
176    return false
177end)
178
179-- -- the token list contains { "style", endpos } entries
180-- --
181-- -- in principle this is faster but it is also crash sensitive for large files
182
183-- local constants_hash  = { } for i=1,#constants  do constants_hash [constants [i]] = true end
184-- local helpers_hash    = { } for i=1,#helpers    do helpers_hash   [helpers   [i]] = true end
185-- local primitives_hash = { } for i=1,#primitives do primitives_hash[primitives[i]] = true end
186
187-- local specialword = Ct( P("\\") * Cmt( C(cstoken^1), function(input,i,s)
188--     if currentcommands[s] then
189--         return true, "command", i
190--     elseif constants_hash[s] then
191--         return true, "data", i
192--     elseif helpers_hash[s] then
193--         return true, "plain", i
194--     elseif primitives_hash[s] then
195--         return true, "primitive", i
196--     else -- if starts with if then primitive
197--         return true, "user", i
198--     end
199-- end) )
200
201-- local specialword = P("\\") * Cmt( C(cstoken^1), function(input,i,s)
202--     if currentcommands[s] then
203--         return true, { "command", i }
204--     elseif constants_hash[s] then
205--         return true, { "data", i }
206--     elseif helpers_hash[s] then
207--         return true, { "plain", i }
208--     elseif primitives_hash[s] then
209--         return true, { "primitive", i }
210--     else -- if starts with if then primitive
211--         return true, { "user", i }
212--     end
213-- end)
214
215-- experiment: keep space with whatever ... less tables
216
217-- 10pt
218
219local commentline            = P("%") * (1-S("\n\r"))^0
220local endline                = S("\n\r")^1
221
222local space                  = patterns.space -- S(" \n\r\t\f\v")
223local any                    = patterns.any
224local backslash              = P("\\")
225local hspace                 = S(" \t")
226
227local p_spacing              = space^1
228local p_rest                 = any
229
230local p_preamble             = knownpreamble
231local p_comment              = commentline
232----- p_command              = backslash * knowncommand
233----- p_constant             = backslash * exact_match(constants)
234----- p_helper               = backslash * exact_match(helpers)
235----- p_primitive            = backslash * exact_match(primitives)
236
237local p_csdone               = #(1-cstoken) + P(-1)
238
239local p_command              = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * p_csdone
240local p_constant             = backslash * lexer.helpers.utfchartabletopattern(constants)       * p_csdone
241local p_helper               = backslash * lexer.helpers.utfchartabletopattern(helpers)         * p_csdone
242local p_primitive            = backslash * lexer.helpers.utfchartabletopattern(primitives)      * p_csdone
243
244local p_ifprimitive          = P("\\if") * cstoken^1
245local p_csname               = backslash * (cstoken^1 + P(1))
246local p_grouping             = S("{$}")
247local p_special              = S("#()[]<>=\"")
248local p_extra                = S("`~%^&_-+/\'|")
249local p_text                 = iwordtoken^1 --maybe add punctuation and space
250
251local p_reserved             = backslash * (
252                                    P("??") + R("az") * P("!")
253                               ) * cstoken^1
254
255local p_number               = context.patterns.real
256local p_unit                 = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") + P("cc") + P("dd")
257
258-- no looking back           = #(1-S("[=")) * cstoken^3 * #(1-S("=]"))
259
260-- This one gives stack overflows:
261--
262-- local p_word = Cmt(iwordpattern, function(_,i,s)
263--     if validwords then
264--         return checkedword(validwords,validminimum,s,i)
265--     else
266--      -- return true, { "text", i }
267--         return true, "text", i
268--     end
269-- end)
270--
271-- So we use this one instead:
272
273----- p_word = Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined
274local p_word =  iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
275
276----- p_text = (1 - p_grouping - p_special - p_extra - backslash - space + hspace)^1
277
278-- keep key pressed at end-of syst-aux.mkiv:
279--
280-- 0 : 15 sec
281-- 1 : 13 sec
282-- 2 : 10 sec
283--
284-- the problem is that quite some style subtables get generated so collapsing ranges helps
285
286local option = 1
287
288if option == 1 then
289
290    p_comment                = p_comment^1
291    p_grouping               = p_grouping^1
292    p_special                = p_special^1
293    p_extra                  = p_extra^1
294
295    p_command                = p_command^1
296    p_constant               = p_constant^1
297    p_helper                 = p_helper^1
298    p_primitive              = p_primitive^1
299    p_ifprimitive            = p_ifprimitive^1
300    p_reserved               = p_reserved^1
301
302elseif option == 2 then
303
304    local included           = space^0
305
306    p_comment                = (p_comment     * included)^1
307    p_grouping               = (p_grouping    * included)^1
308    p_special                = (p_special     * included)^1
309    p_extra                  = (p_extra       * included)^1
310
311    p_command                = (p_command     * included)^1
312    p_constant               = (p_constant    * included)^1
313    p_helper                 = (p_helper      * included)^1
314    p_primitive              = (p_primitive   * included)^1
315    p_ifprimitive            = (p_ifprimitive * included)^1
316    p_reserved               = (p_reserved    * included)^1
317
318end
319
320local p_invisible = invisibles^1
321
322local spacing                = token(whitespace,  p_spacing    )
323
324local rest                   = token("default",   p_rest       )
325local preamble               = token("preamble",  p_preamble   )
326local comment                = token("comment",   p_comment    )
327local command                = token("command",   p_command    )
328local constant               = token("data",      p_constant   )
329local helper                 = token("plain",     p_helper     )
330local primitive              = token("primitive", p_primitive  )
331local ifprimitive            = token("primitive", p_ifprimitive)
332local reserved               = token("reserved",  p_reserved   )
333local csname                 = token("user",      p_csname     )
334local grouping               = token("grouping",  p_grouping   )
335local number                 = token("number",    p_number     )
336                             * token("constant",  p_unit       )
337local special                = token("special",   p_special    )
338local reserved               = token("reserved",  p_reserved   ) -- reserved internal preproc
339local extra                  = token("extra",     p_extra      )
340local invisible              = token("invisible", p_invisible  )
341local text                   = token("default",   p_text       )
342local word                   = p_word
343
344----- startluacode           = token("grouping", P("\\startluacode"))
345----- stopluacode            = token("grouping", P("\\stopluacode"))
346
347local luastatus = false
348local luatag    = nil
349local lualevel  = 0
350
351local function startdisplaylua(_,i,s)
352    luatag = s
353    luastatus = "display"
354    cldlexer._directives.cld_inline = false
355    return true
356end
357
358local function stopdisplaylua(_,i,s)
359    local ok = luatag == s
360    if ok then
361        cldlexer._directives.cld_inline = false
362        luastatus = false
363    end
364    return ok
365end
366
367local function startinlinelua(_,i,s)
368    if luastatus == "display" then
369        return false
370    elseif not luastatus then
371        luastatus = "inline"
372        cldlexer._directives.cld_inline = true
373        lualevel = 1
374        return true
375    else-- if luastatus == "inline" then
376        lualevel = lualevel + 1
377        return true
378    end
379end
380
381local function stopinlinelua_b(_,i,s) -- {
382    if luastatus == "display" then
383        return false
384    elseif luastatus == "inline" then
385        lualevel = lualevel + 1 -- ?
386        return false
387    else
388        return true
389    end
390end
391
392local function stopinlinelua_e(_,i,s) -- }
393    if luastatus == "display" then
394        return false
395    elseif luastatus == "inline" then
396        lualevel = lualevel - 1
397        local ok = lualevel <= 0 -- was 0
398        if ok then
399            cldlexer._directives.cld_inline = false
400            luastatus = false
401        end
402        return ok
403    else
404        return true
405    end
406end
407
408contextlexer._reset_parser = function()
409    luastatus = false
410    luatag    = nil
411    lualevel  = 0
412end
413
414local luaenvironment         = P("lua") * (P("setups") + P("code") + P("parameterset") + P(true))
415                             + P("ctxfunction") * (P("definition") + P(true))
416
417local inlinelua              = P("\\") * (
418                                    P("ctx") * (P("lua") + P("command") + P("late") * (P("lua") + P("command")) + P("function"))
419                                  + P("cld") * (P("command") + P("context"))
420                                  + P("lua") * (P("expr") + P("script") + P("thread"))
421                                  + (P("direct") + P("late")) * P("lua")
422                               )
423
424local startlua               = P("\\start") * Cmt(luaenvironment,startdisplaylua)
425                             + P("<?lua") * Cmt(P(true),startdisplaylua)
426                             + inlinelua * space^0 * ( Cmt(P("{"),startinlinelua) )
427
428local stoplua                = P("\\stop") * Cmt(luaenvironment,stopdisplaylua)
429                             + P("?>") * Cmt(P(true),stopdisplaylua)
430                             + Cmt(P("{"),stopinlinelua_b)
431                             + Cmt(P("}"),stopinlinelua_e)
432
433local startluacode           = token("embedded", startlua)
434local stopluacode            = #stoplua * token("embedded", stoplua)
435
436local luacall                = P("clf_") * R("az","__","AZ")^1
437
438local metafuncall            = ( P("reusable") + P("usable") + P("unique") + P("use") + P("reuse") + P("overlay") ) * ("MPgraphic")
439                             + P("uniqueMPpagegraphic")
440                             + P("MPpositiongraphic")
441
442local metafunenvironment     = metafuncall -- ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic")
443                             + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("initializations") + P("definitions") + P("extensions") + P("graphic") + P("calculation") )
444
445local startmetafun           = P("\\start") * metafunenvironment
446local stopmetafun            = P("\\stop")  * metafunenvironment -- todo match start
447
448----- subsystem              = token("embedded", P("\\xml") * R("az")^1 + (P("\\st") * (P("art") + P("op")) * P("xmlsetups")))
449local subsystemtags          = P("xml") + P("btx") -- will be pluggable or maybe even a proper list of valid commands
450local subsystemmacro         = P("\\") * (subsystemtags * R("az")^1 + (R("az")-subsystemtags)^1 * subsystemtags * R("az")^1)
451local subsystem              = token("embedded", subsystemmacro)
452
453local openargument           = token("special", P("{"))
454local closeargument          = token("special", P("}"))
455local argumentcontent        = token("default",(1-P("}"))^0) -- maybe space needs a treatment
456
457local metafunarguments       = (spacing^0 * openargument * argumentcontent * closeargument)^-2
458
459local startmetafuncode       = token("embedded", startmetafun) * metafunarguments
460local stopmetafuncode        = token("embedded", stopmetafun)
461
462local callers                = token("embedded", P("\\") * metafuncall) * metafunarguments
463                             + token("embedded", P("\\") * luacall)
464
465lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
466lexer.embed_lexer(contextlexer, cldlexer, startluacode,     stopluacode)
467
468-- preamble is inefficient as it probably gets called each time (so some day I really need to
469-- patch the plugin)
470
471contextlexer._preamble = preamble
472
473contextlexer._rules = {
474    { "whitespace",  spacing     },
475 -- { "preamble",    preamble    },
476    { "word",        word        },
477    { "text",        text        }, -- non words
478    { "comment",     comment     },
479    { "constant",    constant    },
480 -- { "subsystem",   subsystem   },
481    { "callers",     callers     },
482    { "subsystem",   subsystem   },
483    { "ifprimitive", ifprimitive },
484    { "helper",      helper      },
485    { "command",     command     },
486    { "primitive",   primitive   },
487 -- { "subsystem",   subsystem   },
488    { "reserved",    reserved    },
489    { "csname",      csname      },
490 -- { "whatever",    specialword }, -- not yet, crashes
491    { "grouping",    grouping    },
492 -- { "number",      number      },
493    { "special",     special     },
494    { "extra",       extra       },
495    { "invisible",   invisible   },
496    { "rest",        rest        },
497}
498
499-- Watch the text grabber, after all, we're talking mostly of text (beware,
500-- no punctuation here as it can be special). We might go for utf here.
501
502local web = lexer.loadluafile("scite-context-lexer-web-snippets")
503
504if web then
505
506    lexer.inform("supporting web snippets in tex lexer")
507
508    contextlexer._rules_web = {
509        { "whitespace",  spacing     },
510        { "text",        text        }, -- non words
511        { "comment",     comment     },
512        { "constant",    constant    },
513        { "callers",     callers     },
514        { "ifprimitive", ifprimitive },
515        { "helper",      helper      },
516        { "command",     command     },
517        { "primitive",   primitive   },
518        { "reserved",    reserved    },
519        { "csname",      csname      },
520        { "grouping",    grouping    },
521        { "special",     special     },
522        { "extra",       extra       },
523        { "invisible",   invisible   },
524        { "web",         web.pattern },
525        { "rest",        rest        },
526    }
527
528else
529
530    lexer.report("not supporting web snippets in tex lexer")
531
532    contextlexer._rules_web = {
533        { "whitespace",  spacing     },
534        { "text",        text        }, -- non words
535        { "comment",     comment     },
536        { "constant",    constant    },
537        { "callers",     callers     },
538        { "ifprimitive", ifprimitive },
539        { "helper",      helper      },
540        { "command",     command     },
541        { "primitive",   primitive   },
542        { "reserved",    reserved    },
543        { "csname",      csname      },
544        { "grouping",    grouping    },
545        { "special",     special     },
546        { "extra",       extra       },
547        { "invisible",   invisible   },
548        { "rest",        rest        },
549    }
550
551end
552
553contextlexer._tokenstyles = context.styleset
554
555local environment = {
556    ["\\start"] = 1, ["\\stop"] = -1,
557 -- ["\\begin"] = 1, ["\\end" ] = -1,
558}
559
560-- local block = {
561--     ["\\begin"] = 1, ["\\end" ] = -1,
562-- }
563
564local group = {
565    ["{"] = 1, ["}"] = -1,
566}
567
568contextlexer._foldpattern = P("\\" ) * (P("start") + P("stop")) + S("{}") -- separate entry else interference
569
570contextlexer._foldsymbols = { -- these need to be style references .. todo: multiple styles
571    _patterns    = {
572        "\\start", "\\stop", -- regular environments
573     -- "\\begin", "\\end",  -- (moveable) blocks
574        "[{}]",
575    },
576    ["command"]  = environment,
577    ["constant"] = environment,
578    ["data"]     = environment,
579    ["user"]     = environment,
580    ["embedded"] = environment,
581    ["helper"]   = environment,
582    ["plain"]    = environment,
583    ["grouping"] = group,
584}
585
586-- context.inspect(contextlexer)
587
588return contextlexer
589