scite-context-lexer-bibtex.lua /size: 6337 b    last modification: 2020-07-01 14:35
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for bibtex",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local global, string, table, lpeg = _G, string, table, lpeg
10local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
11local type = type
12
13local lexer       = require("scite-context-lexer")
14local context     = lexer.context
15local patterns    = context.patterns
16
17local token       = lexer.token
18local exact_match = lexer.exact_match
19
20local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex")
21local whitespace  = bibtexlexer.whitespace
22
23local escape, left, right = P("\\"), P('{'), P('}')
24
25patterns.balanced = P {
26    [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
27    [2] = left * V(1) * right
28}
29
30-- taken from bibl-bib.lua
31
32local anything     = patterns.anything
33local percent      = P("%")
34local start        = P("@")
35local comma        = P(",")
36local hash         = P("#")
37local escape       = P("\\")
38local single       = P("'")
39local double       = P('"')
40local left         = P('{')
41local right        = P('}')
42local lineending   = S("\n\r")
43local space        = S(" \t\n\r\f")
44local spaces       = space^1
45local equal        = P("=")
46
47local keyword      = (R("az","AZ","09") + S("@_:-"))^1
48----- s_quoted     = ((escape*single) + spaces + (1-single))^0
49----- d_quoted     = ((escape*double) + spaces + (1-double))^0
50local s_quoted     = ((escape*single) + (1-single))^0
51local d_quoted     = ((escape*double) + (1-double))^0
52
53local balanced     = patterns.balanced
54
55local t_spacing    = token(whitespace, space^1)
56local t_optionalws = token("default", space^1)^0
57
58local t_equal      = token("operator",equal)
59local t_left       = token("grouping",left)
60local t_right      = token("grouping",right)
61local t_comma      = token("operator",comma)
62local t_hash       = token("operator",hash)
63
64local t_s_value    = token("operator",single)
65                   * token("text",s_quoted)
66                   * token("operator",single)
67local t_d_value    = token("operator",double)
68                   * token("text",d_quoted)
69                   * token("operator",double)
70local t_b_value    = token("operator",left)
71                   * token("text",balanced)
72                   * token("operator",right)
73local t_r_value    = token("text",keyword)
74
75local t_keyword    = token("keyword",keyword)
76local t_key        = token("command",keyword)
77local t_label      = token("warning",keyword)
78
79local t_somevalue  = t_s_value + t_d_value + t_b_value + t_r_value
80local t_value      = t_somevalue
81                   * ((t_optionalws * t_hash * t_optionalws) * t_somevalue)^0
82
83local t_assignment = t_optionalws
84                   * t_key
85                   * t_optionalws
86                   * t_equal
87                   * t_optionalws
88                   * t_value
89
90local t_shortcut   = t_keyword
91                   * t_optionalws
92                   * t_left
93                   * t_optionalws
94                   * (t_assignment * t_comma^0)^0
95                   * t_optionalws
96                   * t_right
97
98local t_definition = t_keyword
99                   * t_optionalws
100                   * t_left
101                   * t_optionalws
102                   * t_label
103                   * t_optionalws
104                   * t_comma
105                   * (t_assignment * t_comma^0)^0
106                   * t_optionalws
107                   * t_right
108
109local t_comment    = t_keyword
110                   * t_optionalws
111                   * t_left
112                   * token("text",(1-t_right)^0)
113                   * t_optionalws
114                   * t_right
115
116local t_forget     = token("comment",percent^1 * (1-lineending)^0)
117
118local t_rest       = token("default",anything)
119
120-- this kind of lexing seems impossible as the size of the buffer passed to the lexer is not
121-- large enough .. but we can cheat and use this:
122--
123-- function OnOpen(filename) editor:Colourise(1,editor.TextLength) end -- or is it 0?
124
125-- somehow lexing fails on this more complex lexer when we insert something, there is no
126-- backtracking to whitespace when we have no embedded lexer, so we fake one ... this works
127-- to some extend but not in all cases (e.g. editing inside line fails) .. maybe i need to
128-- patch the dll ... (better not)
129
130local dummylexer = lexer.load("scite-context-lexer-dummy","bib-dum")
131
132local dummystart = token("embedded",P("\001")) -- an unlikely to be used character
133local dummystop  = token("embedded",P("\002")) -- an unlikely to be used character
134
135lexer.embed_lexer(bibtexlexer,dummylexer,dummystart,dummystop)
136
137-- maybe we need to define each functional block as lexer (some 4) so i'll do that when
138-- this issue is persistent ... maybe consider making a local lexer options (not load,
139-- just lexer.new or so) .. or maybe do the reverse, embed the main one in a dummy child
140
141bibtexlexer._rules = {
142    { "whitespace",  t_spacing    },
143    { "forget",      t_forget     },
144    { "shortcut",    t_shortcut   },
145    { "definition",  t_definition },
146    { "comment",     t_comment    },
147    { "rest",        t_rest       },
148}
149
150-- local t_assignment = t_key
151--                    * t_optionalws
152--                    * t_equal
153--                    * t_optionalws
154--                    * t_value
155--
156-- local t_shortcut   = t_keyword
157--                    * t_optionalws
158--                    * t_left
159--
160-- local t_definition = t_keyword
161--                    * t_optionalws
162--                    * t_left
163--                    * t_optionalws
164--                    * t_label
165--                    * t_optionalws
166--                    * t_comma
167--
168-- bibtexlexer._rules = {
169--     { "whitespace",  t_spacing    },
170--     { "assignment",  t_assignment },
171--     { "definition",  t_definition },
172--     { "shortcut",    t_shortcut   },
173--     { "right",       t_right      },
174--     { "comma",       t_comma      },
175--     { "forget",      t_forget     },
176--     { "comment",     t_comment    },
177--     { "rest",        t_rest       },
178-- }
179
180bibtexlexer._tokenstyles = context.styleset
181
182bibtexlexer._foldpattern = P("{") + P("}")
183
184bibtexlexer._foldsymbols = {
185    _patterns = {
186        "{",
187        "}",
188    },
189    ["grouping"] = {
190        ["{"] =  1,
191        ["}"] = -1,
192    },
193}
194
195return bibtexlexer
196