scite-context-lexer.lua /size: 93 Kb    last modification: 2021-10-28 13:49
1local info = {
2    version   = 1.400,
3    comment   = "basics for scintilla lpeg lexer for context/metafun",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7    comment   = "contains copyrighted code from mitchell.att.foicica.com",
8
9}
10
11-- We need a copy of this file to lexer.lua in the same path. This was not needed
12-- before version 10 but I can't figure out what else to do. It looks like there
13-- is some loading of lexer.lua but I can't see where.
14
15-- For a while it looked like we're stuck with scite 3 because there would be no
16-- update of scintillua for the newer versions (c++ changes) but now it looks that
17-- there will be updates (2021). There is a dll for scite >= 5 but it doesn't
18-- work (yet). In version 5.20+ the scintillua dll makes scite crash (alsl when I
19-- use the recommended import). In an early 5.02 loading the (shipped) lpeg lexer
20-- does nothing at all. There have been changes in the lua interface too but I need
21-- to compare the old and new lib. For now I gave up and got back to version 3+. It
22-- would be nice if error messages would go to the log pane so that wget an idea
23-- what happens. After all the code involved (below) is not that much and not that
24-- complex either.
25--
26-- Actually, scite 5.22 also crashed when a program was launched so better wait
27-- for a while. (In the worst case, when it all stops working, we need to migrate
28-- to visual code, which is out backup/fallback plan.) I didn't test if the latest
29-- textadept still works with our lexer variant. In the meantime that editor has
30-- grown to some 30 MB so it is no longer a lightweight option (scite with scintilla
31-- is still quite small).
32
33if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
34
35local log      = false
36local trace    = false
37local detail   = false
38local show     = false -- nice for tracing (also for later)
39local collapse = false -- can save some 15% (maybe easier on scintilla)
40local inspect  = false -- can save some 15% (maybe easier on scintilla)
41
42-- local log      = true
43-- local trace    = true
44
45-- GET GOING
46--
47-- You need to copy this file over lexer.lua. In principle other lexers could work
48-- too but not now. Maybe some day. All patterns will move into the patterns name
49-- space. I might do the same with styles. If you run an older version of SciTE you
50-- can take one of the archives. Pre 3.41 versions can just be copied to the right
51-- path, as there we still use part of the normal lexer. Below we mention some
52-- issues with different versions of SciTE. We try to keep up with changes but best
53-- check careful if the version that yuou install works as expected because SciTE
54-- and the scintillua dll need to be in sync.
55--
56-- REMARK
57--
58-- We started using lpeg lexing as soon as it came available. Because we had rather
59-- demanding files and also wanted to use nested lexers, we ended up with our own
60-- variant. At least at that time this was more robust and also much faster (as we
61-- have some pretty large Lua data files and also work with large xml files). As a
62-- consequence successive versions had to be adapted to changes in the (at that time
63-- still unstable) api. In addition to lexing we also have spell checking and such.
64-- Around version 3.60 things became more stable so I don't expect to change much.
65--
66-- LEXING
67--
68-- When pc's showed up we wrote our own editor (texedit) in MODULA 2. It was fast,
69-- had multiple overlapping (text) windows, could run in the at most 1M memory at
70-- that time, etc. The realtime file browsing with lexing that we had at that time
71-- is still on my current wish list. The color scheme and logic that we used related
72-- to the logic behind the ConTeXt user interface that evolved.
73--
74-- Later I rewrote the editor in perl/tk. I don't like the perl syntax but tk
75-- widgets are very powerful and hard to beat. In fact, TextAdept reminds me of
76-- that: wrap your own interface around a framework (tk had an edit control that one
77-- could control completely not that different from scintilla). Last time I checked
78-- it still ran fine so I might try to implement something like its file handling in
79-- TextAdept.
80--
81-- In the end I settled for SciTE for which I wrote TeX and MetaPost lexers that
82-- could handle keyword sets. With respect to lexing (syntax highlighting) ConTeXt
83-- has a long history, if only because we need it for manuals. Anyway, in the end we
84-- arrived at lpeg based lexing (which is quite natural as we have lots of lpeg
85-- usage in ConTeXt). The basic color schemes haven't changed much. The most
86-- prominent differences are the nested lexers.
87--
88-- In the meantime I made the lexer suitable for typesetting sources which was no
89-- big deal as we already had that in place (ConTeXt used lpeg from the day it
90-- showed up so we have several lexing options there too).
91--
92-- Keep in mind that in ConTeXt (typesetting) lexing can follow several approaches:
93-- line based (which is handy for verbatim mode), syntax mode (which is nice for
94-- tutorials), and tolerant mode (so that one can also show bad examples or errors).
95-- These demands can clash.
96--
97-- HISTORY
98--
99-- The remarks below are more for myself so that I keep track of changes in the
100-- way we adapt to the changes in the scintillua and scite.
101--
102-- The fold and lex functions are copied and patched from original code by Mitchell
103-- (see lexer.lua) in the scintillua distribution. So whatever I say below, assume
104-- that all errors are mine. The ability to use lpeg in scintilla is a real nice
105-- addition and a brilliant move. The code is a byproduct of the (mainly Lua based)
106-- TextAdept which at the time I ran into it was a rapidly moving target so I
107-- decided to stick ot SciTE. When I played with it, it had no realtime output pane
108-- although that seems to be dealt with now (2017). I need to have a look at it in
109-- more detail but a first test again made the output hang and it was a bit slow too
110-- (and I also want the log pane as SciTE has it, on the right, in view). So, for
111-- now I stick to SciTE even when it's somewhat crippled by the fact that we cannot
112-- hook our own (language dependent) lexer into the output pane (somehow the
113-- errorlist lexer is hard coded into the editor). Hopefully that will change some
114-- day. The ConTeXt distribution has cmd runner for textdept that will plug in the
115-- lexers discussed here as well as a dedicated runner. Considere it an experiment.
116--
117-- The basic code hasn't changed much but we had to adapt a few times to changes in
118-- the api and/or work around bugs. Starting with SciTE version 3.20 there was an
119-- issue with coloring. We still lacked a connection with SciTE itself (properties
120-- as well as printing to the log pane) and we could not trace this (on windows).
121-- However on unix we can see messages! As far as I can see, there are no
122-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be/have been in
123-- Scintilla itself. So we went back to 3.10. Indicators of issues are: no lexing of
124-- 'next' and 'goto <label>' in the Lua lexer and no brace highlighting either.
125-- Interesting is that it does work ok in the cld lexer (so the Lua code is okay).
126-- All seems to be ok again in later versions, so, when you update best check first
127-- and just switch back to an older version as normally a SciTE update is not
128-- critital. When char-def.lua lexes real fast this is a signal that the lexer quits
129-- somewhere halfway. Maybe there are some hard coded limitations on the amount of
130-- styles and/or length of names.
131--
132-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay
133-- again. So, this version assumes 3.24 or higher. In 3.24 we have a different token
134-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't
135-- check other changes but will do that when I run into issues. I had already
136-- optimized these small tables by hashing which was much more efficient (and maybe
137-- even more efficient than the current approach) but this is no longer needed. For
138-- the moment we keep some of that code around as I don't know what happens in
139-- future versions. I'm anyway still happy with this kind of lexing.
140--
141-- In 3.31 another major change took place: some helper constants (maybe they're no
142-- longer constants) and functions were moved into the lexer modules namespace but
143-- the functions are assigned to the Lua module afterward so we cannot alias them
144-- beforehand. We're probably getting close to a stable interface now. At that time
145-- for the first time I considered making a whole copy and patch the other functions
146-- too as we need an extra nesting model. However, I don't want to maintain too
147-- much. An unfortunate change in 3.03 is that no longer a script can be specified.
148-- This means that instead of loading the extensions via the properties file, we now
149-- need to load them in our own lexers, unless of course we replace lexer.lua
150-- completely (which adds another installation issue).
151--
152-- Another change has been that _LEXERHOME is no longer available. It looks like
153-- more and more functionality gets dropped so maybe at some point we need to ship
154-- our own dll/so files. For instance, I'd like to have access to the current
155-- filename and other SciTE properties. We could then cache some info with each
156-- file, if only we had knowledge of what file we're dealing with. This all makes a
157-- nice installation more complex and (worse) makes it hard to share files between
158-- different editors usign s similar directory structure.
159--
160-- For huge files folding can be pretty slow and I do have some large ones that I
161-- keep open all the time. Loading is normally no ussue, unless one has remembered
162-- the status and the cursor is at the last line of a 200K line file. Optimizing the
163-- fold function brought down loading of char-def.lua from 14 sec => 8 sec.
164-- Replacing the word_match function and optimizing the lex function gained another
165-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure
166-- (no subtables) brings loading down to a few seconds.
167--
168-- When the lexer path is copied to the TextAdept lexer path, and the theme
169-- definition to theme path (as lexer.lua), the lexer works there as well. Although
170-- ... when I decided to check the state of TextAdept I had to adapt some loader
171-- code. The solution is not pretty but works and also permits overloading. When I
172-- have time and motive I will make a proper setup file to tune the look and feel a
173-- bit more than we do now. The TextAdept editor nwo has tabs and a console so it
174-- has become more useable for me (it's still somewhat slower than SciTE).
175-- Interesting is that the jit version of TextAdept crashes on lexing large files
176-- (and does not feel faster either; maybe a side effect of known limitations as we
177-- know that Luajit is more limited than stock Lua).
178--
179-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name ..
180-- '_whitespace' which means that we need to have it frozen at the moment we load
181-- another lexer. Because spacing is used to revert to a parent lexer we need to
182-- make sure that we load children as late as possible in order not to get the wrong
183-- whitespace trigger. This took me quite a while to figure out (not being that
184-- familiar with the internals). The lex and fold functions have been optimized. It
185-- is a pitty that there is no proper print available. Another thing needed is a
186-- default style in our own theme style definition, as otherwise we get wrong nested
187-- lexers, especially if they are larger than a view. This is the hardest part of
188-- getting things right.
189--
190-- It's a pitty that there is no scintillua library for the OSX version of SciTE.
191-- Even better would be to have the scintillua library as integral part of SciTE as
192-- that way I could use OSX alongside windows and linux (depending on needs). Also
193-- nice would be to have a proper interface to SciTE then because currently the
194-- lexer is rather isolated and the Lua version does not provide all standard
195-- libraries. It would also be good to have lpeg support in the regular SciTE Lua
196-- extension (currently you need to pick it up from someplace else). I keep hoping.
197--
198-- With 3.41 the interface changed again so it became time to look into the C++ code
199-- and consider compiling and patching myself, something that I like to avoid.
200-- Loading is more complicated now as the lexer gets loaded automatically so we have
201-- little control over extending the code now. After a few days trying all kind of
202-- solutions I decided to follow a different approach: drop in a complete
203-- replacement. This of course means that I need to keep track of even more changes
204-- (which for sure will happen) but at least I get rid of interferences. Till 3.60
205-- the api (lexing and configuration) was simply too unstable across versions which
206-- is a pitty because we expect authors to install SciTE without hassle. Maybe in a
207-- few years things will have stabelized. Maybe it's also not really expected that
208-- one writes lexers at all. A side effect is that I now no longer will use shipped
209-- lexers for languages that I made no lexer for, but just the built-in ones in
210-- addition to the ConTeXt lpeg lexers. Not that it matters much as the ConTeXt
211-- lexers cover what I need (and I can always write more). For editing TeX files one
212-- only needs a limited set of lexers (TeX, MetaPost, Lua, BibTeX, C/W, PDF, SQL,
213-- etc). I can add more when I want.
214--
215-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu
216-- which left me with an incompatible SciTE and lexer library and updating was not
217-- possible due to the lack of 64 bit libraries. We'll see what the future brings.
218-- For now I can use SciTE under wine on linux. The fact that scintillua ships
219-- independently is a showstopper.
220--
221-- Promissing is that the library now can use another Lua instance so maybe some day
222-- it will get properly in SciTE and we can use more clever scripting.
223--
224-- In some lexers we use embedded ones even if we could do it directly, The reason
225-- is that when the end token is edited (e.g. -->), backtracking to the space before
226-- the begin token (e.g. <!--) results in applying the surrounding whitespace which
227-- in turn means that when the end token is edited right, backtracking doesn't go
228-- back. One solution (in the dll) would be to backtrack several space categories.
229-- After all, lexing is quite fast (applying the result is much slower).
230--
231-- For some reason the first blob of text tends to go wrong (pdf and web). It would
232-- be nice to have 'whole doc' initial lexing. Quite fishy as it makes it impossible
233-- to lex the first part well (for already opened documents) because only a partial
234-- text is passed.
235--
236-- So, maybe I should just write this from scratch (assuming more generic usage)
237-- because after all, the dll expects just tables, based on a string. I can then
238-- also do some more aggressive resource sharing (needed when used generic).
239--
240-- I think that nested lexers are still bugged (esp over longer ranges). It never
241-- was robust or maybe it's simply not meant for too complex cases (well, it
242-- probably *is* tricky material). The 3.24 version was probably the best so far.
243-- The fact that styles bleed between lexers even if their states are isolated is an
244-- issue. Another issus is that zero characters in the text passed to the lexer can
245-- mess things up (pdf files have them in streams).
246--
247-- For more complex 'languages', like web or xml, we need to make sure that we use
248-- e.g. 'default' for spacing that makes up some construct. Ok, we then still have a
249-- backtracking issue but less.
250--
251-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++
252-- and we ship an ini file for that editor with some installation instructions
253-- embedded. Also, TextAdept has a console so that we can run realtime. The spawner
254-- is still not perfect (sometimes hangs) but it was enough reason to spend time on
255-- making our lexer work with TextAdept and create a setup.
256--
257-- Some bad news. The interface changed (again) in textadept 10, some for the better
258-- (but a bit different from what happens here) and some for the worse, especially
259-- moving some code to the init file so we now need some bad hacks. I decided to
260-- stay with the old method of defining lexers and because the lexer cannot be run
261-- in parallel any more (some change in the binary?) I will probably also cleanup
262-- code below as we no longer need to be compatible. Unfortunately textadept is too
263-- much a moving target to simply kick in some (tex related) production flow (apart
264-- from the fact that it doesn't yet have the scite like realtime console). I'll
265-- keep an eye on it. Because we don't need many added features I might as well decide
266-- to make a lean and mean instance (after all the license permits forking).
267
268-- TRACING
269--
270-- The advantage is that we now can check more easily with regular Lua(TeX). We can
271-- also use wine and print to the console (somehow stdout is intercepted there.) So,
272-- I've added a bit of tracing. Interesting is to notice that each document gets its
273-- own instance which has advantages but also means that when we are spellchecking
274-- we reload the word lists each time. (In the past I assumed a shared instance and
275-- took some precautions. But I can fix this.)
276--
277-- TODO
278--
279-- It would be nice if we could load some ConTeXt Lua modules (the basic set) and
280-- then use resolvers and such. But it might not work well with scite.
281--
282-- The current lexer basics are still a mix between old and new. Maybe I should redo
283-- some more. This is probably easier in TextAdept than in SciTE.
284--
285-- We have to make sure we don't overload ConTeXt definitions when this code is used
286-- in ConTeXt. I still have to add some of the goodies that we have there in lexers
287-- into these.
288--
289-- Maybe I should use a special stripped on the one hand and extended version of the
290-- dll (stable api) and at least add a bit more interfacing to scintilla.
291--
292-- I need to investigate if we can use the already built in Lua instance so that we
293-- can combine the power of lexing with extensions.
294--
295-- I need to play with hotspot and other properties like indicators (whatever they
296-- are).
297--
298-- I want to get rid of these lexers.STYLE_XX and lexers.XX things. This is possible
299-- when we give up compatibility. Generalize the helpers that I wrote for SciTE so
300-- that they also can be used TextAdept.
301--
302-- I can make an export to ConTeXt, but first I'll redo the code that makes the
303-- grammar, as we only seem to need
304--
305--   lexer._TOKENSTYLES : table
306--   lexer._CHILDREN    : flag
307--   lexer._EXTRASTYLES : table
308--   lexer._GRAMMAR     : flag
309--
310--   lexers.load        : function
311--   lexers.lex         : function
312--
313-- So, if we drop compatibility with other lex definitions, we can make things
314-- simpler. However, in the meantime one can just do this:
315--
316--    context --extra=listing --scite [--compact --verycompact] somefile.tex
317--
318-- and get a printable document. So, this todo is a bit obsolete.
319--
320-- Properties is an ugly mess ... due to chages in the interface we're now left
321-- with some hybrid that sort of works ok
322
323-- textadept: buffer:colourise(0,-1)
324
325local lpeg  = require("lpeg")
326
327local global = _G
328local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
329local concat, sort = table.concat, table.sort
330local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
331local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
332local lpegmatch = lpeg.match
333
334local usage   = (textadept and "textadept") or (resolvers and "context") or "scite"
335local nesting = 0
336local output  = nil
337
338----- print   = textadept and ui and ui.print or print -- crashes when ui is not yet defined
339
340local function print(...)
341    if not output then
342        output = io.open("lexer.log","w")
343    end
344    output:write(...,"\n")
345    output:flush()
346end
347
348local function report(fmt,str,...)
349    if log then
350        if str then
351            fmt = format(fmt,str,...)
352        end
353        print(format("scite lpeg lexer > %s > %s",nesting == 0 and "-" or nesting,fmt))
354    end
355end
356
357local function inform(...)
358    if log and trace then
359        report(...)
360    end
361end
362
363inform("loading context lexer module (global table: %s)",tostring(global))
364
365do
366
367    local floor    = math and math.floor
368    local format   = format
369    local tonumber = tonumber
370
371    if not floor then
372
373        if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then
374            floor = function(n)
375                return tonumber(format("%d",n))
376            end
377        else
378            -- 5.3 has a mixed number system and format %d doesn't work with
379            -- floats any longer ... no fun
380            floor = function(n)
381                return (n - n % 1)
382            end
383        end
384
385        math = math or { }
386
387        math.floor = floor
388
389    end
390
391end
392
393local floor = math.floor
394
395if not package.searchpath then
396
397    -- Unfortunately the io library is only available when we end up
398    -- in this branch of code.
399
400    inform("using adapted function 'package.searchpath' (if used at all)")
401
402    function package.searchpath(name,path)
403        local tried = { }
404        for part in gmatch(path,"[^;]+") do
405            local filename = gsub(part,"%?",name)
406            local f = io.open(filename,"r")
407            if f then
408                inform("file found on path: %s",filename)
409                f:close()
410                return filename
411            end
412            tried[#tried + 1] = format("no file '%s'",filename)
413        end
414        -- added: local path .. for testing
415        local f = io.open(filename,"r")
416        if f then
417            inform("file found on current path: %s",filename)
418            f:close()
419            return filename
420        end
421        --
422        tried[#tried + 1] = format("no file '%s'",filename)
423        return nil, concat(tried,"\n")
424    end
425
426end
427
428local lexers              = { }
429local context             = { }
430local helpers             = { }
431lexers.context            = context
432lexers.helpers            = helpers
433
434local patterns            = { }
435context.patterns          = patterns -- todo: lexers.patterns
436
437context.report            = report
438context.inform            = inform
439
440lexers.LEXERPATH          = package.path -- can be multiple paths separated by ;
441
442if resolvers then
443    -- todo: set LEXERPATH
444    -- todo: set report
445end
446
447local function sortedkeys(hash) -- simple version, good enough for here
448    local t, n = { }, 0
449    for k, v in next, hash do
450        t[#t+1] = k
451        local l = #tostring(k)
452        if l > n then
453            n = l
454        end
455    end
456    sort(t)
457    return t, n
458end
459
460helpers.sortedkeys = sortedkeys
461
462local usedlexers          = { }
463local parent_lexer        = nil
464
465-- The problem with styles is that there is some nasty interaction with scintilla
466-- and each version of lexer dll/so has a different issue. So, from now on we will
467-- just add them here. There is also a limit on some 30 styles. Maybe I should
468-- hash them in order to reuse.
469
470-- todo: work with proper hashes and analyze what styles are really used by a
471-- lexer
472
473local default = {
474    "nothing", "whitespace", "comment", "string", "number", "keyword",
475    "identifier", "operator", "error", "preprocessor", "constant", "variable",
476    "function", "type", "label",  "embedded",
477    "quote", "special", "extra", "reserved", "okay", "warning",
478    "command", "internal", "preamble", "grouping", "primitive", "plain",
479    "user",
480    -- not used (yet) .. we cross the 32 boundary so had to patch the initializer, see (1)
481    "char", "class", "data", "definition", "invisible", "regex",
482    "standout", "tag",
483    "text",
484}
485
486local predefined = {
487    "default", "linenumber", "bracelight", "bracebad", "controlchar",
488    "indentguide", "calltip",
489    -- seems new
490    "folddisplaytext"
491}
492
493-- Bah ... ugly ... nicer would be a proper hash .. we now have properties
494-- as well as STYLE_* and some connection between them ... why .. ok, we
495-- could delay things but who cares. Anyway, at this moment the properties
496-- are still unknown.
497
498local function preparestyles(list)
499    local reverse = { }
500    for i=1,#list do
501        local k = list[i]
502        local K = upper(k)
503        local s = "style." .. k
504        lexers[K] = k -- is this used
505        lexers["STYLE_"..K] = "$(" .. k .. ")"
506        reverse[k] = true
507    end
508    return reverse
509end
510
511local defaultstyles    = preparestyles(default)
512local predefinedstyles = preparestyles(predefined)
513
514-- These helpers are set afterwards so we delay their initialization ... there
515-- is no need to alias each time again and this way we can more easily adapt
516-- to updates.
517
518-- These keep changing (values, functions, tables ...) so we nee to check these
519-- with each update. Some of them are set in the loader (the require 'lexer' is
520-- in fact not a real one as the lexer code is loaded in the dll). It's also not
521-- getting more efficient.
522
523-- FOLD_BASE         = lexers.FOLD_BASE         or SC_FOLDLEVELBASE
524-- FOLD_HEADER       = lexers.FOLD_HEADER       or SC_FOLDLEVELHEADERFLAG
525-- FOLD_BLANK        = lexers.FOLD_BLANK        or SC_FOLDLEVELWHITEFLAG
526-- get_style_at      = lexers.get_style_at      or GetStyleAt
527-- get_indent_amount = lexers.get_indent_amount or GetIndentAmount
528-- get_property      = lexers.get_property      or GetProperty
529-- get_fold_level    = lexers.get_fold_level    or GetFoldLevel
530
531-- It needs checking: do we have access to all properties now? I'll clean
532-- this up anyway as I want a simple clean and stable model.
533
534-- This is somewhat messy. The lexer dll provides some virtual fields:
535--
536-- + property
537-- + property_int
538-- + style_at
539-- + fold_level
540-- + indent_amount
541--
542-- but for some reasons not:
543--
544-- + property_expanded
545--
546-- As a consequence we need to define it here because otherwise the
547-- lexer will crash. The fuzzy thing is that we don't have to define
548-- the property and property_int tables but we do have to define the
549-- expanded beforehand. The folding properties are no longer interfaced
550-- so the interface to scite is now rather weak (only a few hard coded
551-- properties).
552
553local FOLD_BASE     = 0
554local FOLD_HEADER   = 0
555local FOLD_BLANK    = 0
556
557local style_at      = { }
558local indent_amount = { }
559local fold_level    = { }
560
561local function check_main_properties()
562    if not lexers.property then
563        lexers.property = { }
564    end
565    if not lexers.property_int then
566        lexers.property_int = setmetatable({ }, {
567            __index    = function(t,k)
568                -- why the tostring .. it relies on lua casting to a number when
569                -- doing a comparison
570                return tonumber(lexers.property[k]) or 0 -- tostring removed
571            end,
572         -- __newindex = function(t,k,v)
573         --     report("properties are read-only, '%s' is not changed",k)
574         -- end,
575        })
576    end
577end
578
579lexers.property_expanded = setmetatable({ }, {
580    __index   = function(t,k)
581        -- better be safe for future changes .. what if at some point this is
582        -- made consistent in the dll ... we need to keep an eye on that
583        local property = lexers.property
584        if not property then
585            check_main_properties()
586        end
587        --
588--         return gsub(property[k],"[$%%]%b()", function(k)
589--             return t[sub(k,3,-2)]
590--         end)
591        local v = property[k]
592        if v then
593            v = gsub(v,"[$%%]%b()", function(k)
594                return t[sub(k,3,-2)]
595            end)
596        end
597        return v
598    end,
599    __newindex = function(t,k,v)
600        report("properties are read-only, '%s' is not changed",k)
601    end,
602})
603
604-- A downward compatible feature but obsolete:
605
606-- local function get_property(tag,default)
607--     return lexers.property_int[tag] or lexers.property[tag] or default
608-- end
609
610-- We still want our own properties (as it keeps changing so better play
611-- safe from now on). At some point I can freeze them.
612
613local function check_properties(lexer)
614    if lexer.properties then
615        return lexer
616    end
617    check_main_properties()
618    -- we use a proxy
619    local mainproperties = lexers.property
620    local properties = { }
621    local expanded = setmetatable({ }, {
622        __index = function(t,k)
623            return gsub(properties[k] or mainproperties[k],"[$%%]%b()", function(k)
624                return t[sub(k,3,-2)]
625            end)
626        end,
627    })
628    lexer.properties = setmetatable(properties, {
629        __index = mainproperties,
630        __call = function(t,k,default) -- expands
631            local v = expanded[k]
632            local t = type(default)
633            if t == "number" then
634                return tonumber(v) or default
635            elseif t == "boolean" then
636                return v == nil and default or v
637            else
638                return v or default
639            end
640        end,
641    })
642    return lexer
643end
644
645-- do
646--     lexers.property = { foo = 123, red = "R" }
647--     local a = check_properties({})  print("a.foo",a.properties.foo)
648--     a.properties.foo = "bar"        print("a.foo",a.properties.foo)
649--     a.properties.foo = "bar:$(red)" print("a.foo",a.properties.foo) print("a.foo",a.properties("foo"))
650-- end
651
652local function set(value,default)
653    if value == 0 or value == false or value == "0" then
654        return false
655    elseif value == 1 or value == true or value == "1" then
656        return true
657    else
658        return default
659    end
660end
661
662local function check_context_properties()
663    local property = lexers.property -- let's hope that this stays
664    log      = set(property["lexer.context.log"],     log)
665    trace    = set(property["lexer.context.trace"],   trace)
666    detail   = set(property["lexer.context.detail"],  detail)
667    show     = set(property["lexer.context.show"],    show)
668    collapse = set(property["lexer.context.collapse"],collapse)
669    inspect  = set(property["lexer.context.inspect"], inspect)
670end
671
672function context.registerproperties(p) -- global
673    check_main_properties()
674    local property = lexers.property -- let's hope that this stays
675    for k, v in next, p do
676        property[k] = v
677    end
678    check_context_properties()
679end
680
681context.properties = setmetatable({ }, {
682    __index    = lexers.property,
683    __newindex = function(t,k,v)
684        check_main_properties()
685        lexers.property[k] = v
686        check_context_properties()
687    end,
688})
689
690-- We want locals to we set them delayed. Once.
691
692local function initialize()
693    FOLD_BASE     = lexers.FOLD_BASE
694    FOLD_HEADER   = lexers.FOLD_HEADER
695    FOLD_BLANK    = lexers.FOLD_BLANK
696    --
697    style_at      = lexers.style_at      -- table
698    indent_amount = lexers.indent_amount -- table
699    fold_level    = lexers.fold_level    -- table
700    --
701    check_main_properties()
702    --
703    initialize = nil
704end
705
706-- Style handler.
707--
708-- The property table will be set later (after loading) by the library. The
709-- styleset is not needed any more as we predefine all styles as defaults
710-- anyway (too bug sensitive otherwise).
711
712local function tocolors(colors)
713    local colorset     = { }
714    local property_int = lexers.property_int or { }
715    for k, v in next, colors do
716        if type(v) == "table" then
717            local r, g, b = v[1], v[2], v[3]
718            if r and g and b then
719                v = tonumber(format("%02X%02X%02X",b,g,r),16) or 0 -- hm
720            elseif r then
721                v = tonumber(format("%02X%02X%02X",r,r,r),16) or 0
722            else
723                v = 0
724            end
725        end
726        colorset[k] = v
727        property_int["color."..k] = v
728    end
729    return colorset
730end
731
732local function toproperty(specification)
733    local serialized = { }
734    for key, value in next, specification do
735        if value == true then
736            serialized[#serialized+1] = key
737        elseif type(value) == "table" then
738            local r, g, b = value[1], value[2], value[3]
739            if r and g and b then
740                value = format("#%02X%02X%02X",r,g,b) or "#000000"
741            elseif r then
742                value = format("#%02X%02X%02X",r,r,r) or "#000000"
743            else
744                value = "#000000"
745            end
746            serialized[#serialized+1] = key .. ":" .. value
747        else
748            serialized[#serialized+1] = key .. ":" .. tostring(value)
749        end
750    end
751    return concat(serialized,",")
752end
753
754local function tostyles(styles)
755    local styleset = { }
756    local property = lexers.property or { }
757    for k, v in next, styles do
758        v = toproperty(v)
759        styleset[k] = v
760        property["style."..k] = v
761    end
762    return styleset
763end
764
765context.toproperty = toproperty
766context.tostyles   = tostyles
767context.tocolors   = tocolors
768
769-- If we had one instance/state of Lua as well as all regular libraries
770-- preloaded we could use the context base libraries. So, let's go poor-
771-- mans solution now.
772
773function context.registerstyles(styles)
774    local styleset   = tostyles(styles)
775    context.styles   = styles
776    context.styleset = styleset
777    if detail then
778        local t, n = sortedkeys(styleset)
779        local template = "  %-" .. n .. "s : %s"
780        report("initializing styleset:")
781        for i=1,#t do
782            local k = t[i]
783            report(template,k,styleset[k])
784        end
785    elseif trace then
786        report("initializing styleset")
787    end
788end
789
790function context.registercolors(colors) -- needed for textadept
791    local colorset   = tocolors(colors)
792    context.colors   = colors
793    context.colorset = colorset
794    if detail then
795        local t, n = sortedkeys(colorset)
796        local template = "  %-" .. n .. "s : %i"
797        report("initializing colorset:")
798        for i=1,#t do
799            local k = t[i]
800            report(template,k,colorset[k])
801        end
802    elseif trace then
803        report("initializing colorset")
804    end
805end
806
807-- Some spell checking related stuff. Unfortunately we cannot use a path set
808-- by property. This will get a hook for resolvers.
809
810local locations = {
811   "context/lexers",      -- context lexers
812   "context/lexers/data", -- context lexers
813   "../lexers",           -- original lexers
814   "../lexers/data",      -- original lexers
815   ".",                   -- whatever
816   "./data",              -- whatever
817}
818
819-- local function collect(name)
820--     local root = gsub(lexers.LEXERPATH or ".","/.-lua$","") .. "/" -- this is a horrible hack
821--  -- report("module '%s' locating '%s'",tostring(lexers),name)
822--     for i=1,#locations do
823--         local fullname =  root .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
824--         if trace then
825--             report("attempt to locate '%s'",fullname)
826--         end
827--         local okay, result = pcall(function () return dofile(fullname) end)
828--         if okay then
829--             return result, fullname
830--         end
831--     end
832-- end
833
834local collect
835
836if usage == "context" then
837
838    collect = function(name)
839        return require(name), name
840    end
841
842else
843
844    collect = function(name)
845        local rootlist = lexers.LEXERPATH or "."
846        for root in gmatch(rootlist,"[^;]+") do
847            local root = gsub(root,"/[^/]-lua$","")
848            for i=1,#locations do
849                local fullname =  root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
850                if trace then
851                    report("attempt to locate '%s'",fullname)
852                end
853                local okay, result = pcall(function () return dofile(fullname) end)
854                if okay then
855                    return result, fullname
856                end
857            end
858        end
859    --     return require(name), name
860    end
861
862end
863
864function context.loadluafile(name)
865    local data, fullname = collect(name)
866    if data then
867        if trace then
868            report("lua file '%s' has been loaded",fullname)
869        end
870        return data, fullname
871    end
872    if not textadept then
873        report("unable to load lua file '%s'",name)
874    end
875end
876
877-- in fact we could share more as we probably process the data but then we need
878-- to have a more advanced helper
879
880local cache = { }
881
882function context.loaddefinitions(name)
883    local data = cache[name]
884    if data then
885        if trace then
886            report("reusing definitions '%s'",name)
887        end
888        return data
889    elseif trace and data == false then
890        report("definitions '%s' were not found",name)
891    end
892    local data, fullname = collect(name)
893    if not data then
894        if not textadept then
895            report("unable to load definition file '%s'",name)
896        end
897        data = false
898    elseif trace then
899        report("definition file '%s' has been loaded",fullname)
900        if detail then
901            local t, n = sortedkeys(data)
902            local template = "  %-" .. n .. "s : %s"
903            for i=1,#t do
904                local k = t[i]
905                local v = data[k]
906                if type(v) ~= "table" then
907                    report(template,k,tostring(v))
908                elseif #v > 0 then
909                    report(template,k,#v)
910                else
911                    -- no need to show hash
912                end
913            end
914        end
915    end
916    cache[name] = data
917    return type(data) == "table" and data
918end
919
920-- A bit of regression in textadept > 10 so updated ... done a bit different.
921-- We don't use this in the context lexers anyway.
922
923function context.word_match(words,word_chars,case_insensitive)
924    -- used to be proper tables ...
925    if type(words) == "string" then
926        local clean = gsub(words,"%-%-[^\n]+","")
927        local split = { }
928        for s in gmatch(clean,"%S+") do
929            split[#split+1] = s
930        end
931        words = split
932    end
933    local list = { }
934    for i=1,#words do
935        list[words[i]] = true
936    end
937    if case_insensitive then
938        for i=1,#words do
939            list[lower(words[i])] = true
940        end
941    end
942    local chars = S(word_chars or "")
943    for i=1,#words do
944        chars = chars + S(words[i])
945    end
946    local match = case_insensitive and
947            function(input,index,word)
948                -- We can speed mixed case if needed.
949                return (list[word] or list[lower(word)]) and index or nil
950            end
951        or
952            function(input,index,word)
953                return list[word] and index or nil
954            end
955    return Cmt(chars^1,match)
956end
957
958-- Patterns are grouped in a separate namespace but the regular lexers expect
959-- shortcuts to be present in the lexers library. Maybe I'll incorporate some
960-- of l-lpeg later.
961
962do
963
964    local anything             = P(1)
965    local idtoken              = R("az","AZ","\127\255","__")
966    local digit                = R("09")
967    local sign                 = S("+-")
968    local period               = P(".")
969    local octdigit             = R("07")
970    local hexdigit             = R("09","AF","af")
971    local lower                = R("az")
972    local upper                = R("AZ")
973    local alpha                = upper + lower
974    local space                = S(" \n\r\t\f\v")
975    local eol                  = S("\r\n")
976    local backslash            = P("\\")
977    local decimal              = digit^1
978    local octal                = P("0")
979                               * octdigit^1
980    local hexadecimal          = P("0") * S("xX")
981                               * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1)
982                               * (S("pP") * sign^-1 * hexdigit^1)^-1 -- *
983    local integer              = sign^-1
984                               * (hexadecimal + octal + decimal)
985    local float                = sign^-1
986                               * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1)
987                               * S("eE") * sign^-1 * digit^1 -- *
988
989    patterns.idtoken           = idtoken
990    patterns.digit             = digit
991    patterns.sign              = sign
992    patterns.period            = period
993    patterns.octdigit          = octdigit
994    patterns.hexdigit          = hexdigit
995    patterns.ascii             = R("\000\127") -- useless
996    patterns.extend            = R("\000\255") -- useless
997    patterns.control           = R("\000\031")
998    patterns.lower             = lower
999    patterns.upper             = upper
1000    patterns.alpha             = alpha
1001    patterns.decimal           = decimal
1002    patterns.octal             = octal
1003    patterns.hexadecimal       = hexadecimal
1004    patterns.float             = float
1005    patterns.cardinal          = decimal
1006
1007    patterns.signeddecimal     = sign^-1 * decimal
1008    patterns.signedoctal       = sign^-1 * octal
1009    patterns.signedhexadecimal = sign^-1 * hexadecimal
1010    patterns.integer           = integer
1011    patterns.real              =
1012        sign^-1 * (                    -- at most one
1013            digit^1 * period * digit^0 -- 10.0 10.
1014          + digit^0 * period * digit^1 -- 0.10 .10
1015          + digit^1                    -- 10
1016       )
1017
1018    patterns.anything          = anything
1019    patterns.any               = anything
1020    patterns.restofline        = (1-eol)^1
1021    patterns.space             = space
1022    patterns.spacing           = space^1
1023    patterns.nospacing         = (1-space)^1
1024    patterns.eol               = eol
1025    patterns.newline           = P("\r\n") + eol
1026    patterns.backslash         = backslash
1027
1028    local endof                = S("\n\r\f")
1029
1030    patterns.startofline       = P(function(input,index)
1031        return (index == 1 or lpegmatch(endof,input,index-1)) and index
1032    end)
1033
1034    -- These are the expected ones for other lexers. Maybe all in own namespace
1035    -- and provide compatibility layer. or should I just remove them?
1036
1037    lexers.any            = anything
1038    lexers.ascii          = ascii
1039    lexers.extend         = extend
1040    lexers.alpha          = alpha
1041    lexers.digit          = digit
1042    lexers.alnum          = alpha + digit
1043    lexers.lower          = lower
1044    lexers.upper          = upper
1045    lexers.xdigit         = hexdigit
1046    lexers.cntrl          = control
1047    lexers.graph          = R("!~")
1048    lexers.print          = R(" ~")
1049    lexers.punct          = R("!/", ":@", "[\'", "{~")
1050    lexers.space          = space
1051    lexers.newline        = S("\r\n\f")^1
1052    lexers.nonnewline     = 1 - lexers.newline
1053    lexers.nonnewline_esc = 1 - (lexers.newline + '\\') + backslash * anything
1054    lexers.dec_num        = decimal
1055    lexers.oct_num        = octal
1056    lexers.hex_num        = hexadecimal
1057    lexers.integer        = integer
1058    lexers.float          = float
1059    lexers.word           = (alpha + "_") * (alpha + digit + "_")^0 -- weird, why digits
1060
1061end
1062
1063-- end of patterns
1064
1065function context.exact_match(words,word_chars,case_insensitive)
1066    local characters = concat(words)
1067    local pattern -- the concat catches _ etc
1068    if word_chars == true or word_chars == false or word_chars == nil then
1069        word_chars = ""
1070    end
1071    if type(word_chars) == "string" then
1072        pattern = S(characters) + patterns.idtoken
1073        if case_insensitive then
1074            pattern = pattern + S(upper(characters)) + S(lower(characters))
1075        end
1076        if word_chars ~= "" then
1077            pattern = pattern + S(word_chars)
1078        end
1079    elseif word_chars then
1080        pattern = word_chars
1081    end
1082    if case_insensitive then
1083        local list = { }
1084        if #words == 0 then
1085            for k, v in next, words do
1086                list[lower(k)] = v
1087            end
1088        else
1089            for i=1,#words do
1090                list[lower(words[i])] = true
1091            end
1092        end
1093        return Cmt(pattern^1, function(_,i,s)
1094            return list[lower(s)] -- and i or nil
1095        end)
1096    else
1097        local list = { }
1098        if #words == 0 then
1099            for k, v in next, words do
1100                list[k] = v
1101            end
1102        else
1103            for i=1,#words do
1104                list[words[i]] = true
1105            end
1106        end
1107        return Cmt(pattern^1, function(_,i,s)
1108            return list[s] -- and i or nil
1109        end)
1110    end
1111end
1112
1113function context.just_match(words)
1114    local p = P(words[1])
1115    for i=2,#words do
1116        p = p + P(words[i])
1117    end
1118    return p
1119end
1120
1121-- spell checking (we can only load lua files)
1122--
1123-- return {
1124--     min = 3,
1125--     max = 40,
1126--     n = 12345,
1127--     words = {
1128--         ["someword"]    = "someword",
1129--         ["anotherword"] = "Anotherword",
1130--     },
1131-- }
1132
1133local lists    = { }
1134local disabled = false
1135
1136function context.disablewordcheck()
1137    disabled = true
1138end
1139
1140function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
1141    if not tag or tag == "" then
1142        return false, 3
1143    end
1144    local list = lists[tag]
1145    if not list then
1146        list = context.loaddefinitions("spell-" .. tag)
1147        if not list or type(list) ~= "table" then
1148            if not textadept then
1149                report("invalid spell checking list for '%s'",tag)
1150            end
1151            list = { words = false, min = 3 }
1152        else
1153            list.words = list.words or false
1154            list.min   = list.min or 3
1155        end
1156        lists[tag] = list
1157    end
1158    if trace then
1159        report("enabling spell checking for '%s' with minimum '%s'",tag,list.min)
1160    end
1161    return list.words, list.min
1162end
1163
1164patterns.wordtoken   = R("az","AZ","\127\255")
1165patterns.wordpattern = patterns.wordtoken^3 -- todo: if limit and #s < limit then
1166
1167function context.checkedword(validwords,validminimum,s,i) -- ,limit
1168    if not validwords then -- or #s < validminimum then
1169        return true, "text", i -- true, "default", i
1170    else
1171        -- keys are lower
1172        local word = validwords[s]
1173        if word == s then
1174            return true, "okay", i -- exact match
1175        elseif word then
1176            return true, "warning", i -- case issue
1177        else
1178            local word = validwords[lower(s)]
1179            if word == s then
1180                return true, "okay", i -- exact match
1181            elseif word then
1182                return true, "warning", i -- case issue
1183            elseif upper(s) == s then
1184                return true, "warning", i -- probably a logo or acronym
1185            else
1186                return true, "error", i
1187            end
1188        end
1189    end
1190end
1191
1192function context.styleofword(validwords,validminimum,s) -- ,limit
1193    if not validwords or #s < validminimum then
1194        return "text"
1195    else
1196        -- keys are lower
1197        local word = validwords[s]
1198        if word == s then
1199            return "okay" -- exact match
1200        elseif word then
1201            return "warning" -- case issue
1202        else
1203            local word = validwords[lower(s)]
1204            if word == s then
1205                return "okay" -- exact match
1206            elseif word then
1207                return "warning" -- case issue
1208            elseif upper(s) == s then
1209                return "warning" -- probably a logo or acronym
1210            else
1211                return "error"
1212            end
1213        end
1214    end
1215end
1216
1217-- overloaded functions
1218
1219local h_table, b_table, n_table = { }, { }, { } -- from the time small tables were used (optimization)
1220
1221setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEADER } t[level] = v return v end })
1222setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK  } t[level] = v return v end })
1223setmetatable(n_table, { __index = function(t,level) local v = { level              } t[level] = v return v end })
1224
1225local newline = patterns.newline
1226local p_yes   = Cp() * Cs((1-newline)^1) * newline^-1
1227local p_nop   = newline
1228
1229local folders = { }
1230
1231-- Snippets from the > 10 code .. but we do things different so ...
1232
1233local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
1234    local folder = folders[lexer]
1235    if not folder then
1236        --
1237        local pattern, folds, text, start_pos, line_num, prev_level, current_level
1238        --
1239        local fold_symbols = lexer._foldsymbols
1240        local fold_pattern = lexer._foldpattern -- use lpeg instead (context extension)
1241        --
1242        -- textadept >= 10
1243        --
1244     -- local zerosumlines = lexer.property_int["fold.on.zero.sum.lines"] > 0 -- not done
1245     -- local compact      = lexer.property_int['fold.compact'] > 0           -- not done
1246     -- local lowercase    = lexer._CASEINSENSITIVEFOLDPOINTS                 -- useless (utf will distort)
1247        --
1248        if fold_pattern then
1249            -- if no functions are found then we could have a faster one
1250            fold_pattern = Cp() * C(fold_pattern) / function(s,match)
1251                local symbols = fold_symbols[style_at[start_pos + s]]
1252                if symbols then
1253                    local l = symbols[match]
1254                    if l then
1255                        current_level = current_level + l
1256                    end
1257                end
1258            end
1259            local action_y = function()
1260                folds[line_num] = prev_level
1261                if current_level > prev_level then
1262                    folds[line_num] = prev_level + FOLD_HEADER
1263                end
1264                if current_level < FOLD_BASE then
1265                    current_level = FOLD_BASE
1266                end
1267                prev_level = current_level
1268                line_num = line_num + 1
1269            end
1270            local action_n = function()
1271                folds[line_num] = prev_level + FOLD_BLANK
1272                line_num = line_num + 1
1273            end
1274            pattern = ((fold_pattern + (1-newline))^1 * newline / action_y + newline/action_n)^0
1275
1276         else
1277            -- the traditional one but a bit optimized
1278            local fold_symbols_patterns = fold_symbols._patterns
1279            local action_y = function(pos,line)
1280                for j=1, #fold_symbols_patterns do
1281                    for s, match in gmatch(line,fold_symbols_patterns[j]) do -- "()(" .. patterns[i] .. ")"
1282                        local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
1283                        local l = symbols and symbols[match]
1284                        local t = type(l)
1285                        if t == "number" then
1286                            current_level = current_level + l
1287                        elseif t == "function" then
1288                            current_level = current_level + l(text, pos, line, s, match)
1289                        end
1290                    end
1291                end
1292                folds[line_num] = prev_level
1293                if current_level > prev_level then
1294                    folds[line_num] = prev_level + FOLD_HEADER
1295                end
1296                if current_level < FOLD_BASE then
1297                    current_level = FOLD_BASE
1298                end
1299                prev_level = current_level
1300                line_num = line_num + 1
1301            end
1302            local action_n = function()
1303                folds[line_num] = prev_level + FOLD_BLANK
1304                line_num = line_num + 1
1305            end
1306            pattern = (p_yes/action_y + p_nop/action_n)^0
1307        end
1308        --
1309        local reset_parser = lexer._reset_parser
1310        --
1311        folder = function(_text_,_start_pos_,_start_line_,_start_level_)
1312            if reset_parser then
1313                reset_parser()
1314            end
1315            folds         = { }
1316            text          = _text_
1317            start_pos     = _start_pos_
1318            line_num      = _start_line_
1319            prev_level    = _start_level_
1320            current_level = prev_level
1321            lpegmatch(pattern,text)
1322         -- make folds collectable
1323            local t = folds
1324            folds = nil
1325            return t
1326        end
1327        folders[lexer] = folder
1328    end
1329    return folder(text,start_pos,start_line,start_level,lexer)
1330end
1331
1332local folds, current_line, prev_level
1333
1334local function action_y()
1335    local current_level = FOLD_BASE + indent_amount[current_line]
1336    if current_level > prev_level then -- next level
1337        local i = current_line - 1
1338        local f
1339        while true do
1340            f = folds[i]
1341            if not f then
1342                break
1343            elseif f[2] == FOLD_BLANK then
1344                i = i - 1
1345            else
1346                f[2] = FOLD_HEADER -- low indent
1347                break
1348            end
1349        end
1350        folds[current_line] = { current_level } -- high indent
1351    elseif current_level < prev_level then -- prev level
1352        local f = folds[current_line - 1]
1353        if f then
1354            f[1] = prev_level -- high indent
1355        end
1356        folds[current_line] = { current_level } -- low indent
1357    else -- same level
1358        folds[current_line] = { prev_level }
1359    end
1360    prev_level = current_level
1361    current_line = current_line + 1
1362end
1363
1364local function action_n()
1365    folds[current_line] = { prev_level, FOLD_BLANK }
1366    current_line = current_line + 1
1367end
1368
1369local pattern = ( S("\t ")^0 * ( (1-patterns.eol)^1 / action_y + P(true) / action_n) * newline )^0
1370
1371local function fold_by_indentation(text,start_pos,start_line,start_level)
1372    -- initialize
1373    folds        = { }
1374    current_line = start_line
1375    prev_level   = start_level
1376    -- define
1377    -- -- not here .. pattern binds and local functions are not frozen
1378    -- analyze
1379    lpegmatch(pattern,text)
1380    -- flatten
1381    for line, level in next, folds do
1382        folds[line] = level[1] + (level[2] or 0)
1383    end
1384    -- done, make folds collectable
1385    local t = folds
1386    folds = nil
1387    return t
1388end
1389
1390local function fold_by_line(text,start_pos,start_line,start_level)
1391    local folds = { }
1392    -- can also be lpeg'd
1393    for _ in gmatch(text,".-\r?\n") do
1394        folds[start_line] = n_table[start_level] -- { start_level } -- stile tables ? needs checking
1395        start_line = start_line + 1
1396    end
1397    return folds
1398end
1399
1400local threshold_by_lexer       =  512 * 1024 -- we don't know the filesize yet
1401local threshold_by_parsing     =  512 * 1024 -- we don't know the filesize yet
1402local threshold_by_indentation =  512 * 1024 -- we don't know the filesize yet
1403local threshold_by_line        =  512 * 1024 -- we don't know the filesize yet
1404
1405function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
1406    if text == "" then
1407        return { }
1408    end
1409    if initialize then
1410        initialize()
1411    end
1412    local fold_by_lexer   = lexer._fold
1413    local fold_by_symbols = lexer._foldsymbols
1414    local filesize        = 0 -- we don't know that
1415    if fold_by_lexer then
1416        if filesize <= threshold_by_lexer then
1417            return fold_by_lexer(text,start_pos,start_line,start_level,lexer)
1418        end
1419    elseif fold_by_symbols then -- and lexer.properties("fold.by.parsing",1) > 0 then
1420        if filesize <= threshold_by_parsing then
1421            return fold_by_parsing(text,start_pos,start_line,start_level,lexer)
1422        end
1423    elseif lexer._FOLDBYINDENTATION or lexer.properties("fold.by.indentation",1) > 0 then
1424        if filesize <= threshold_by_indentation then
1425            return fold_by_indentation(text,start_pos,start_line,start_level,lexer)
1426        end
1427    elseif lexer._FOLDBYLINE or lexer.properties("fold.by.line",1) > 0 then
1428        if filesize <= threshold_by_line then
1429            return fold_by_line(text,start_pos,start_line,start_level,lexer)
1430        end
1431    end
1432    return { }
1433end
1434
1435-- The following code is mostly unchanged:
1436
1437local function add_rule(lexer,id,rule) -- unchanged
1438    if not lexer._RULES then
1439        lexer._RULES     = { }
1440        lexer._RULEORDER = { }
1441    end
1442    lexer._RULES[id] = rule
1443    lexer._RULEORDER[#lexer._RULEORDER + 1] = id
1444end
1445
1446local function modify_rule(lexer,id,rule) -- needed for textadept > 10
1447    if lexer._lexer then
1448        lexer = lexer._lexer
1449    end
1450    lexer._RULES[id] = rule
1451end
1452
1453local function get_rule(lexer,id) -- needed for textadept > 10
1454    if lexer._lexer then
1455        lexer = lexer._lexer
1456    end
1457    return lexer._RULES[id]
1458end
1459
1460-- I finally figured out that adding more styles was an issue because of several
1461-- reasons:
1462--
1463-- + in old versions there was a limit in the amount, so we overran the built-in
1464--   hard coded scintilla range
1465-- + then, the add_style function didn't check for already known ones, so again
1466--   we had an overrun (with some magic that could be avoided)
1467-- + then, when I messed with a new default set I realized that there is no check
1468--   in initializing _TOKENSTYLES (here the inspect function helps)
1469-- + of course it was mostly a side effect of passing all the used styles to the
1470--   _tokenstyles instead of only the not-default ones but such a thing should not
1471--   matter (read: intercepted)
1472--
1473-- This finally removed a head-ache and was revealed by lots of tracing, which I
1474-- should have built in way earlier.
1475
1476local function add_style(lexer,token_name,style) -- changed a bit around 3.41
1477    -- We don't add styles that are already defined as this can overflow the
1478    -- amount possible (in old versions of scintilla).
1479    if defaultstyles[token_name] then
1480        if trace and detail then
1481            report("default style '%s' is ignored as extra style",token_name)
1482        end
1483        if textadept then
1484            -- go on, stored per buffer
1485        else
1486            return
1487        end
1488    elseif predefinedstyles[token_name] then
1489        if trace and detail then
1490            report("predefined style '%s' is ignored as extra style",token_name)
1491        end
1492        if textadept then
1493            -- go on, stored per buffer
1494        else
1495            return
1496        end
1497    else
1498        if trace and detail then
1499            report("adding extra style '%s' as '%s'",token_name,style)
1500        end
1501    end
1502    -- This is unchanged. We skip the dangerous zone.
1503    local num_styles = lexer._numstyles
1504    if num_styles == 32 then
1505        num_styles = num_styles + 8
1506    end
1507    if num_styles >= 255 then
1508        report("there can't be more than %s styles",255)
1509    end
1510    lexer._TOKENSTYLES[token_name] = num_styles
1511    lexer._EXTRASTYLES[token_name] = style
1512    lexer._numstyles = num_styles + 1
1513    -- hm, the original (now) also copies to the parent ._lexer
1514end
1515
1516local function check_styles(lexer)
1517    -- Here we also use a check for the dangerous zone. That way we can have a
1518    -- larger default set. The original code just assumes that #default is less
1519    -- than the dangerous zone's start.
1520    local numstyles   = 0
1521    local tokenstyles = { }
1522    for i=1, #default do
1523        if numstyles == 32 then
1524            numstyles = numstyles + 8
1525        end
1526        tokenstyles[default[i]] = numstyles
1527        numstyles = numstyles + 1
1528    end
1529    -- Unchanged.
1530    for i=1, #predefined do
1531        tokenstyles[predefined[i]] = i + 31
1532    end
1533    lexer._TOKENSTYLES  = tokenstyles
1534    lexer._numstyles    = numstyles
1535    lexer._EXTRASTYLES  = { }
1536    return lexer
1537end
1538
1539-- At some point an 'any' append showed up in the original code ...
1540-- but I see no need to catch that case ... beter fix the specification.
1541--
1542-- hm, why are many joined twice
1543
1544local function join_tokens(lexer) -- slightly different from the original (no 'any' append)
1545    local patterns = lexer._RULES
1546    local order    = lexer._RULEORDER
1547 -- report("lexer: %s, tokens: %s",lexer._NAME,table.concat(order," + "))
1548    if patterns and order then
1549        local token_rule = patterns[order[1]] -- normally whitespace
1550        for i=2,#order do
1551            token_rule = token_rule + patterns[order[i]]
1552        end
1553        if lexer._TYPE ~= "context" then
1554           token_rule = token_rule + lexers.token(lexers.DEFAULT, patterns.any)
1555        end
1556        lexer._TOKENRULE = token_rule
1557        return token_rule
1558    else
1559        return P(1)
1560    end
1561end
1562
1563-- hm, maybe instead of a grammer just a flat one
1564
1565local function add_lexer(grammar, lexer) -- mostly the same as the original
1566    local token_rule = join_tokens(lexer)
1567    local lexer_name = lexer._NAME
1568    local children   = lexer._CHILDREN
1569    for i=1,#children do
1570        local child = children[i]
1571        if child._CHILDREN then
1572            add_lexer(grammar, child)
1573        end
1574        local child_name        = child._NAME
1575        local rules             = child._EMBEDDEDRULES[lexer_name]
1576        local rules_token_rule  = grammar["__" .. child_name] or rules.token_rule
1577        local pattern           = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1
1578        grammar[child_name]     = pattern * V(lexer_name)
1579        local embedded_child    = "_" .. child_name
1580        grammar[embedded_child] = rules.start_rule * pattern
1581        token_rule              = V(embedded_child) + token_rule
1582    end
1583    if trace then
1584        report("adding lexer '%s' with %s children",lexer_name,#children)
1585    end
1586    grammar["__" .. lexer_name] = token_rule
1587    grammar[lexer_name]         = token_rule^0
1588end
1589
1590local function build_grammar(lexer,initial_rule) -- same as the original
1591    local children   = lexer._CHILDREN
1592    local lexer_name = lexer._NAME
1593    local preamble   = lexer._preamble
1594    local grammar    = lexer._grammar
1595 -- if grammar then
1596 --     -- experiment
1597 -- elseif children then
1598    if children then
1599        if not initial_rule then
1600            initial_rule = lexer_name
1601        end
1602        grammar = { initial_rule }
1603        add_lexer(grammar, lexer)
1604        lexer._INITIALRULE = initial_rule
1605        grammar = Ct(P(grammar))
1606        if trace then
1607            report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
1608        end
1609    else
1610        grammar = Ct(join_tokens(lexer)^0)
1611        if trace then
1612            report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
1613        end
1614    end
1615    if preamble then
1616        grammar = preamble^-1 * grammar
1617    end
1618    lexer._GRAMMAR = grammar
1619end
1620
1621-- So far. We need these local functions in the next one.
1622
1623local lineparsers = { }
1624
1625local maxmatched  = 100
1626
1627local function collapsed(t)
1628    local lasttoken = nil
1629    local lastindex = nil
1630    for i=1,#t,2 do
1631        local token    = t[i]
1632        local position = t[i+1]
1633        if token == lasttoken then
1634            t[lastindex] = position
1635        elseif lastindex then
1636            lastindex = lastindex + 1
1637            t[lastindex] = token
1638            lastindex = lastindex + 1
1639            t[lastindex] = position
1640            lasttoken = token
1641        else
1642            lastindex = i+1
1643            lasttoken = token
1644        end
1645    end
1646    for i=#t,lastindex+1,-1 do
1647        t[i] = nil
1648    end
1649    return t
1650end
1651
1652local function matched(lexer,grammar,text)
1653 -- text = string.gsub(text,"\z","!")
1654    local t = lpegmatch(grammar,text)
1655    if trace then
1656        if show then
1657            report("output of lexer: %s (max %s entries)",lexer._NAME,maxmatched)
1658            local s = lexer._TOKENSTYLES
1659            local p = 1
1660            for i=1,2*maxmatched,2 do
1661                local n = i + 1
1662                local ti = t[i]
1663                local tn = t[n]
1664                if ti then
1665                    local txt = sub(text,p,tn-1)
1666                    if txt then
1667                        txt = gsub(txt,"[%s]"," ")
1668                    else
1669                        txt = "!no text!"
1670                    end
1671                    report("%4i : %s > %s (%s) (%s)",floor(n/2),ti,tn,s[ti] or "!unset!",txt)
1672                    p = tn
1673                else
1674                    break
1675                end
1676            end
1677        end
1678        report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
1679        if collapse then
1680            t = collapsed(t)
1681            report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
1682        end
1683    elseif collapse then
1684        t = collapsed(t)
1685    end
1686    return t
1687end
1688
1689-- Todo: make nice generic lexer (extra argument with start/stop commands) for
1690-- context itself.
1691--
1692-- In textadept >= 10 grammar building seem to have changed a bit. So, in retrospect
1693-- I could better have just dropped compatibility and stick to ctx lexers only.
1694
1695function context.lex(lexer,text,init_style)
1696 -- local lexer = global._LEXER
1697    local grammar = lexer._GRAMMAR
1698    if initialize then
1699        initialize()
1700    end
1701    if not grammar then
1702        return { }
1703    elseif lexer._LEXBYLINE then -- we could keep token
1704        local tokens = { }
1705        local offset = 0
1706        local noftokens = 0
1707        local lineparser = lineparsers[lexer]
1708        if not lineparser then -- probably a cmt is more efficient
1709            lineparser = C((1-newline)^0 * newline) / function(line)
1710                local length = #line
1711                local line_tokens = length > 0 and lpegmatch(grammar,line)
1712                if line_tokens then
1713                    for i=1,#line_tokens,2 do
1714                        noftokens = noftokens + 1
1715                        tokens[noftokens] = line_tokens[i]
1716                        noftokens = noftokens + 1
1717                        tokens[noftokens] = line_tokens[i + 1] + offset
1718                    end
1719                end
1720                offset = offset + length
1721                if noftokens > 0 and tokens[noftokens] ~= offset then
1722                    noftokens = noftokens + 1
1723                    tokens[noftokens] = "default"
1724                    noftokens = noftokens + 1
1725                    tokens[noftokens] = offset + 1
1726                end
1727            end
1728            lineparser = lineparser^0
1729            lineparsers[lexer] = lineparser
1730        end
1731        lpegmatch(lineparser,text)
1732        return tokens
1733    elseif lexer._CHILDREN then
1734        local hash = lexer._HASH -- hm, was _hash
1735        if not hash then
1736            hash = { }
1737            lexer._HASH = hash
1738        end
1739        grammar = hash[init_style]
1740        if grammar then
1741            lexer._GRAMMAR = grammar
1742         -- lexer._GRAMMAR = lexer._GRAMMAR or grammar
1743        else
1744            for style, style_num in next, lexer._TOKENSTYLES do
1745                if style_num == init_style then
1746                    -- the name of the lexers is filtered from the whitespace
1747                    -- specification .. weird code, should be a reverse hash
1748                    local lexer_name = match(style,"^(.+)_whitespace") or lexer._NAME
1749                    if lexer._INITIALRULE ~= lexer_name then
1750                        grammar = hash[lexer_name]
1751                        if not grammar then
1752                            build_grammar(lexer,lexer_name)
1753                            grammar = lexer._GRAMMAR
1754                            hash[lexer_name] = grammar
1755                        end
1756                    end
1757                    break
1758                end
1759            end
1760            grammar = grammar or lexer._GRAMMAR
1761            hash[init_style] = grammar
1762        end
1763        if trace then
1764            report("lexing '%s' with initial style '%s' and %s children", lexer._NAME,init_style,#lexer._CHILDREN or 0)
1765        end
1766        return matched(lexer,grammar,text)
1767    else
1768        if trace then
1769            report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
1770        end
1771        return matched(lexer,grammar,text)
1772    end
1773end
1774
1775-- hm, changed in 3.24 .. no longer small table but one table (so we could remove our
1776-- agressive optimization which worked quite well)
1777
1778function context.token(name, patt)
1779    return patt * Cc(name) * Cp()
1780end
1781
1782-- The next ones were mostly unchanged (till now), we moved it here when 3.41
1783-- became close to impossible to combine with cq. overload and a merge was
1784-- the only solution. It makes later updates more painful but the update to
1785-- 3.41 was already a bit of a nightmare anyway.
1786
1787-- Loading lexers is rather interwoven with what the dll/so sets and
1788-- it changes over time. So, we need to keep an eye on changes. One
1789-- problem that we always faced were the limitations in length of
1790-- lexer names (as they get app/prepended occasionally to strings with
1791-- a hard coded limit). So, we always used alternative names and now need
1792-- to make sure this doesn't clash. As I no longer intend to use shipped
1793-- lexers I could strip away some of the code in the future, but keeping
1794-- it as reference makes sense.
1795
1796-- I spend quite some time figuring out why 3.41 didn't work or crashed which
1797-- is hard when no stdout is available and when the io library is absent. In
1798-- the end of of the problems was in the _NAME setting. We set _NAME
1799-- to e.g. 'tex' but load from a file with a longer name, which we do
1800-- as we don't want to clash with existing files, we end up in
1801-- lexers not being found.
1802
1803local whitespaces = { }
1804
1805local function push_whitespace(name)
1806    table.insert(whitespaces,lexers.WHITESPACE or "whitespace")
1807    lexers.WHITESPACE = name .. "_whitespace"
1808end
1809
1810local function pop_whitespace()
1811    lexers.WHITESPACE = table.remove(whitespaces) or "whitespace"
1812end
1813
1814local function check_whitespace(lexer,name)
1815    if lexer then
1816        lexer.whitespace = (name or lexer.name or lexer._NAME) .. "_whitespace"
1817    end
1818end
1819
1820function context.new(name,filename)
1821    local lexer = {
1822        _TYPE        = "context",
1823        --
1824        _NAME        = name,       -- used for token building
1825        _FILENAME    = filename,   -- for diagnostic purposed
1826        --
1827        name         = name,
1828        filename     = filename,
1829    }
1830    if trace then
1831        report("initializing lexer tagged '%s' from file '%s'",name,filename or name)
1832    end
1833    check_whitespace(lexer)
1834    check_styles(lexer)
1835    check_properties(lexer)
1836    lexer._tokenstyles = context.styleset
1837    return lexer
1838end
1839
1840local function nolexer(name)
1841    local lexer = {
1842        _TYPE  = "unset",
1843        _NAME  = name,
1844     -- _rules = { },
1845    }
1846    check_styles(lexer)
1847    check_whitespace(lexer)
1848    check_properties(lexer)
1849    return lexer
1850end
1851
1852local function load_lexer(name,namespace)
1853    if trace then
1854        report("loading lexer file '%s'",name)
1855    end
1856    push_whitespace(namespace or name) -- for traditional lexers .. no alt_name yet
1857    local lexer, fullname = context.loadluafile(name)
1858    pop_whitespace()
1859    if not lexer then
1860        report("invalid lexer file '%s'",name)
1861    elseif trace then
1862        report("lexer file '%s' has been loaded",fullname)
1863    end
1864    if type(lexer) ~= "table" then
1865        if trace then
1866            report("lexer file '%s' gets a dummy lexer",name)
1867        end
1868        return nolexer(name)
1869    end
1870    if lexer._TYPE ~= "context" then
1871        lexer._TYPE = "native"
1872        check_styles(lexer)
1873        check_whitespace(lexer,namespace or name)
1874        check_properties(lexer)
1875    end
1876    if not lexer._NAME then
1877        lexer._NAME = name -- so: filename
1878    end
1879    if name ~= namespace then
1880        lexer._NAME = namespace
1881    end
1882    return lexer
1883end
1884
1885-- tracing ...
1886
1887local function inspect_lexer(lexer,level)
1888    -- If we had the regular libs available I could use the usual
1889    -- helpers.
1890    local parent = lexer._lexer
1891    lexer._lexer = nil -- prevent endless recursion
1892    local name = lexer._NAME
1893    local function showstyles_1(tag,styles)
1894        local numbers = { }
1895        for k, v in next, styles do
1896            numbers[v] = k
1897        end
1898        -- sort by number and make number hash too
1899        local keys = sortedkeys(numbers)
1900        for i=1,#keys do
1901            local k = keys[i]
1902            local v = numbers[k]
1903            report("[%s %s] %s %s = %s",level,name,tag,k,v)
1904        end
1905    end
1906    local function showstyles_2(tag,styles)
1907        local keys = sortedkeys(styles)
1908        for i=1,#keys do
1909            local k = keys[i]
1910            local v = styles[k]
1911            report("[%s %s] %s %s = %s",level,name,tag,k,v)
1912        end
1913    end
1914    local keys = sortedkeys(lexer)
1915    for i=1,#keys do
1916        local k = keys[i]
1917        local v = lexer[k]
1918        report("[%s %s] root key : %s = %s",level,name,k,tostring(v))
1919    end
1920    showstyles_1("token style",lexer._TOKENSTYLES)
1921    showstyles_2("extra style",lexer._EXTRASTYLES)
1922    local children = lexer._CHILDREN
1923    if children then
1924        for i=1,#children do
1925            inspect_lexer(children[i],level+1)
1926        end
1927    end
1928    lexer._lexer = parent
1929end
1930
1931function context.inspect(lexer)
1932    inspect_lexer(lexer,0)
1933end
1934
1935-- An optional second argument has been introduced so that one can embed a lexer
1936-- more than once ... maybe something to look into (as not it's done by remembering
1937-- the start sequence ... quite okay but maybe suboptimal ... anyway, never change
1938-- a working solution).
1939
1940-- namespace can be automatic: if parent then use name of parent (chain)
1941
1942-- The original lexer framework had a rather messy user uinterface (e.g. moving
1943-- stuff from _rules to _RULES at some point but I could live with that. Now it uses
1944-- add_ helpers. But the subsystem is still not clean and pretty. Now, I can move to
1945-- the add_ but there is no gain in it so we support a mix which gives somewhat ugly
1946-- code. In fact, there should be proper subtables for this. I might actually do
1947-- this because we now always overload the normal lexer (parallel usage seems no
1948-- longer possible). For SciTE we can actually do a conceptual upgrade (more the
1949-- context way) because there is no further development there. That way we could
1950-- make even more advanced lexers.
1951
1952local savedrequire = require
1953
1954local escapes = {
1955    ["%"] = "%%",
1956    ["."] = "%.",
1957    ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
1958    ["["] = "%[", ["]"] = "%]",
1959    ["("] = "%(", [")"] = "%)",
1960 -- ["{"] = "%{", ["}"] = "%}"
1961 -- ["^"] = "%^", ["$"] = "%$",
1962}
1963
1964function context.loadlexer(filename,namespace)
1965
1966    if textadept then
1967        require = function(name)
1968            return savedrequire(name == "lexer" and "scite-context-lexer" or name)
1969        end
1970    end
1971
1972    nesting = nesting + 1
1973    if not namespace then
1974        namespace = filename
1975    end
1976    local lexer = usedlexers[namespace] -- we load by filename but the internal name can be short
1977    if lexer then
1978        if trace then
1979            report("reusing lexer '%s'",namespace)
1980        end
1981        nesting = nesting - 1
1982        return lexer
1983    elseif trace then
1984        report("loading lexer '%s'",namespace)
1985    end
1986    --
1987    if initialize then
1988        initialize()
1989    end
1990    --
1991    parent_lexer = nil
1992    --
1993    lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
1994    usedlexers[filename] = lexer
1995    --
1996    if not lexer._rules and not lexer._lexer and not lexer_grammar then -- hmm should be lexer._grammar
1997        lexer._lexer = parent_lexer
1998    end
1999    --
2000    if lexer._lexer then
2001        local _l = lexer._lexer
2002        local _r = lexer._rules
2003        local _s = lexer._tokenstyles
2004        if not _l._tokenstyles then
2005            _l._tokenstyles = { }
2006        end
2007        if _r then
2008            local rules = _l._rules
2009            local name  = lexer.name
2010            for i=1,#_r do
2011                local rule = _r[i]
2012                rules[#rules + 1] = {
2013                    name .. "_" .. rule[1],
2014                    rule[2],
2015                }
2016            end
2017        end
2018        if _s then
2019            local tokenstyles = _l._tokenstyles
2020            for token, style in next, _s do
2021                tokenstyles[token] = style
2022            end
2023        end
2024        lexer = _l
2025    end
2026    --
2027    local _r = lexer._rules
2028    local _g = lexer._grammar
2029 -- if _r or _g then
2030    if _r then
2031        local _s = lexer._tokenstyles
2032        if _s then
2033            for token, style in next, _s do
2034                add_style(lexer, token, style)
2035            end
2036        end
2037        if _r then
2038            for i=1,#_r do
2039                local rule = _r[i]
2040                add_rule(lexer, rule[1], rule[2])
2041            end
2042        end
2043        build_grammar(lexer)
2044    else
2045        -- other lexers
2046        build_grammar(lexer)
2047    end
2048    --
2049    add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE)
2050    --
2051    local foldsymbols = lexer._foldsymbols
2052    if foldsymbols then
2053        local patterns = foldsymbols._patterns
2054        if patterns then
2055            for i = 1, #patterns do
2056                patterns[i] = "()(" .. gsub(patterns[i],".",escapes) .. ")"
2057            end
2058        end
2059    end
2060    --
2061    lexer.lex  = lexers.lex
2062    lexer.fold = lexers.fold
2063    --
2064    nesting = nesting - 1
2065    --
2066    if inspect then
2067        context.inspect(lexer)
2068    end
2069    --
2070    if textadept then
2071        require = savedrequire
2072    end
2073    --
2074    return lexer
2075end
2076
2077-- I probably need to check this occasionally with the original as I've messed around a bit
2078-- in the past to get nesting working well as one can hit the max number of styles, get
2079-- clashes due to fuzzy inheritance etc. so there is some interplay with the other patched
2080-- code.
2081
2082function context.embed_lexer(parent, child, start_rule, end_rule) -- mostly the same as the original
2083    local embeddedrules = child._EMBEDDEDRULES
2084    if not embeddedrules then
2085        embeddedrules = { }
2086        child._EMBEDDEDRULES = embeddedrules
2087    end
2088    if not child._RULES then
2089        local rules = child._rules
2090        if not rules then
2091            report("child lexer '%s' has no rules",child._NAME or "unknown")
2092            rules = { }
2093            child._rules = rules
2094        end
2095        for i=1,#rules do
2096            local rule = rules[i]
2097            add_rule(child, rule[1], rule[2])
2098        end
2099    end
2100    embeddedrules[parent._NAME] = {
2101        ["start_rule"] = start_rule,
2102        ["token_rule"] = join_tokens(child),
2103        ["end_rule"]   = end_rule
2104    }
2105    local children = parent._CHILDREN
2106    if not children then
2107        children = { }
2108        parent._CHILDREN = children
2109    end
2110    children[#children + 1] = child
2111    local tokenstyles = parent._tokenstyles
2112    if not tokenstyles then
2113        tokenstyles = { }
2114        parent._tokenstyles = tokenstyles
2115    end
2116    local childname = child._NAME
2117    local whitespace = childname .. "_whitespace"
2118    tokenstyles[whitespace] = lexers.STYLE_WHITESPACE -- all these STYLE_THINGS will go .. just a proper hash
2119    if trace then
2120        report("using whitespace '%s' as trigger for '%s' with property '%s'",whitespace,childname,lexers.STYLE_WHITESPACE)
2121    end
2122    local childstyles = child._tokenstyles
2123    if childstyles then
2124        for token, style in next, childstyles do
2125            tokenstyles[token] = style
2126        end
2127    end
2128    -- new, a bit redone, untested, no clue yet what it is for
2129    local parentsymbols = parent._foldsymbols
2130    local childsymbols  = child ._foldsymbols
2131    if not parentsymbols then
2132        parentsymbols = { }
2133        parent._foldsymbols = parentsymbols
2134    end
2135    if childsymbols then
2136        for token, symbols in next, childsymbols do
2137            local tokensymbols = parentsymbols[token]
2138            if not tokensymbols then
2139                tokensymbols = { }
2140                parentsymbols[token] = tokensymbols
2141            end
2142            for k, v in next, symbols do
2143                if type(k) == 'number' then
2144                    tokensymbols[#tokensymbols + 1] = v
2145                elseif not tokensymbols[k] then
2146                    tokensymbols[k] = v
2147                end
2148            end
2149        end
2150    end
2151    --
2152    child._lexer = parent
2153    parent_lexer = parent
2154end
2155
2156-- we now move the adapted code to the lexers namespace
2157
2158lexers.new         = context.new
2159lexers.load        = context.loadlexer
2160------.loadlexer   = context.loadlexer
2161lexers.loadluafile = context.loadluafile
2162lexers.embed_lexer = context.embed_lexer
2163lexers.fold        = context.fold
2164lexers.lex         = context.lex
2165lexers.token       = context.token
2166lexers.word_match  = context.word_match
2167lexers.exact_match = context.exact_match
2168lexers.just_match  = context.just_match
2169lexers.inspect     = context.inspect
2170lexers.report      = context.report
2171lexers.inform      = context.inform
2172
2173-- helper .. alas ... in scite the lexer's lua instance is rather crippled .. not
2174-- even math is part of it
2175
2176do
2177
2178    local floor    = math and math.floor
2179    local char     = string.char
2180    local format   = format
2181    local tonumber = tonumber
2182
2183    local function utfchar(n)
2184        if n < 0x80 then
2185            return char(n)
2186        elseif n < 0x800 then
2187            return char(
2188                0xC0 + floor(n/0x40),
2189                0x80 + (n % 0x40)
2190            )
2191        elseif n < 0x10000 then
2192            return char(
2193                0xE0 + floor(n/0x1000),
2194                0x80 + (floor(n/0x40) % 0x40),
2195                0x80 + (n % 0x40)
2196            )
2197        elseif n < 0x40000 then
2198            return char(
2199                0xF0 + floor(n/0x40000),
2200                0x80 + floor(n/0x1000),
2201                0x80 + (floor(n/0x40) % 0x40),
2202                0x80 + (n % 0x40)
2203            )
2204        else
2205         -- return char(
2206         --     0xF1 + floor(n/0x1000000),
2207         --     0x80 + floor(n/0x40000),
2208         --     0x80 + floor(n/0x1000),
2209         --     0x80 + (floor(n/0x40) % 0x40),
2210         --     0x80 + (n % 0x40)
2211         -- )
2212            return "?"
2213        end
2214    end
2215
2216    context.utfchar = utfchar
2217
2218 -- -- the next one is good enough for use here but not perfect (see context for a
2219 -- -- better one)
2220 --
2221 -- local function make(t)
2222 --     local p
2223 --     for k, v in next, t do
2224 --         if not p then
2225 --             if next(v) then
2226 --                 p = P(k) * make(v)
2227 --             else
2228 --                 p = P(k)
2229 --             end
2230 --         else
2231 --             if next(v) then
2232 --                 p = p + P(k) * make(v)
2233 --             else
2234 --                 p = p + P(k)
2235 --             end
2236 --         end
2237 --     end
2238 --     return p
2239 -- end
2240 --
2241 -- function lpeg.utfchartabletopattern(list)
2242 --     local tree = { }
2243 --     for i=1,#list do
2244 --         local t = tree
2245 --         for c in gmatch(list[i],".") do
2246 --             if not t[c] then
2247 --                 t[c] = { }
2248 --             end
2249 --             t = t[c]
2250 --         end
2251 --     end
2252 --     return make(tree)
2253 -- end
2254
2255    local utf8next         = R("\128\191")
2256    local utf8one          = R("\000\127")
2257    local utf8two          = R("\194\223") * utf8next
2258    local utf8three        = R("\224\239") * utf8next * utf8next
2259    local utf8four         = R("\240\244") * utf8next * utf8next * utf8next
2260
2261    local utfidentifier    = utf8two + utf8three + utf8four
2262    helpers.utfidentifier  = (R("AZ","az","__")      + utfidentifier)
2263                           * (R("AZ","az","__","09") + utfidentifier)^0
2264
2265    helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast
2266    helpers.utfbytepattern = utf8one   / byte
2267                           + utf8two   / function(s) local c1, c2         = byte(s,1,2) return   c1 * 64 + c2                       -    12416 end
2268                           + utf8three / function(s) local c1, c2, c3     = byte(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end
2269                           + utf8four  / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
2270
2271    local p_false          = P(false)
2272    local p_true           = P(true)
2273
2274    local function make(t)
2275        local function making(t)
2276            local p    = p_false
2277            local keys = sortedkeys(t)
2278            for i=1,#keys do
2279                local k = keys[i]
2280                if k ~= "" then
2281                    local v = t[k]
2282                    if v == true then
2283                        p = p + P(k) * p_true
2284                    elseif v == false then
2285                        -- can't happen
2286                    else
2287                        p = p + P(k) * making(v)
2288                    end
2289                end
2290            end
2291            if t[""] then
2292                p = p + p_true
2293            end
2294            return p
2295        end
2296        local p    = p_false
2297        local keys = sortedkeys(t)
2298        for i=1,#keys do
2299            local k = keys[i]
2300            if k ~= "" then
2301                local v = t[k]
2302                if v == true then
2303                    p = p + P(k) * p_true
2304                elseif v == false then
2305                    -- can't happen
2306                else
2307                    p = p + P(k) * making(v)
2308                end
2309            end
2310        end
2311        return p
2312    end
2313
2314    local function collapse(t,x)
2315        if type(t) ~= "table" then
2316            return t, x
2317        else
2318            local n = next(t)
2319            if n == nil then
2320                return t, x
2321            elseif next(t,n) == nil then
2322                -- one entry
2323                local k = n
2324                local v = t[k]
2325                if type(v) == "table" then
2326                    return collapse(v,x..k)
2327                else
2328                    return v, x .. k
2329                end
2330            else
2331                local tt = { }
2332                for k, v in next, t do
2333                    local vv, kk = collapse(v,k)
2334                    tt[kk] = vv
2335                end
2336                return tt, x
2337            end
2338        end
2339    end
2340
2341    function helpers.utfchartabletopattern(list)
2342        local tree = { }
2343        local n = #list
2344        if n == 0 then
2345            for s in next, list do
2346                local t = tree
2347                local p, pk
2348                for c in gmatch(s,".") do
2349                    if t == true then
2350                        t = { [c] = true, [""] = true }
2351                        p[pk] = t
2352                        p = t
2353                        t = false
2354                    elseif t == false then
2355                        t = { [c] = false }
2356                        p[pk] = t
2357                        p = t
2358                        t = false
2359                    else
2360                        local tc = t[c]
2361                        if not tc then
2362                            tc = false
2363                            t[c] = false
2364                        end
2365                        p = t
2366                        t = tc
2367                    end
2368                    pk = c
2369                end
2370                if t == false then
2371                    p[pk] = true
2372                elseif t == true then
2373                    -- okay
2374                else
2375                    t[""] = true
2376                end
2377            end
2378        else
2379            for i=1,n do
2380                local s = list[i]
2381                local t = tree
2382                local p, pk
2383                for c in gmatch(s,".") do
2384                    if t == true then
2385                        t = { [c] = true, [""] = true }
2386                        p[pk] = t
2387                        p = t
2388                        t = false
2389                    elseif t == false then
2390                        t = { [c] = false }
2391                        p[pk] = t
2392                        p = t
2393                        t = false
2394                    else
2395                        local tc = t[c]
2396                        if not tc then
2397                            tc = false
2398                            t[c] = false
2399                        end
2400                        p = t
2401                        t = tc
2402                    end
2403                    pk = c
2404                end
2405                if t == false then
2406                    p[pk] = true
2407                elseif t == true then
2408                    -- okay
2409                else
2410                    t[""] = true
2411                end
2412            end
2413        end
2414        collapse(tree,"")
2415    --     inspect(tree)
2416        return make(tree)
2417    end
2418
2419    patterns.invisibles = helpers.utfchartabletopattern {
2420        utfchar(0x00A0), -- nbsp
2421        utfchar(0x2000), -- enquad
2422        utfchar(0x2001), -- emquad
2423        utfchar(0x2002), -- enspace
2424        utfchar(0x2003), -- emspace
2425        utfchar(0x2004), -- threeperemspace
2426        utfchar(0x2005), -- fourperemspace
2427        utfchar(0x2006), -- sixperemspace
2428        utfchar(0x2007), -- figurespace
2429        utfchar(0x2008), -- punctuationspace
2430        utfchar(0x2009), -- breakablethinspace
2431        utfchar(0x200A), -- hairspace
2432        utfchar(0x200B), -- zerowidthspace
2433        utfchar(0x202F), -- narrownobreakspace
2434        utfchar(0x205F), -- math thinspace
2435    }
2436
2437    -- now we can make:
2438
2439    patterns.iwordtoken   = patterns.wordtoken - patterns.invisibles
2440    patterns.iwordpattern = patterns.iwordtoken^3
2441
2442end
2443
2444-- The following helpers are not used, partially replaced by other mechanisms and
2445-- when needed I'll first optimize them. I only made them somewhat more readable.
2446
2447function lexers.delimited_range(chars, single_line, no_escape, balanced) -- unchanged
2448    local s = sub(chars,1,1)
2449    local e = #chars == 2 and sub(chars,2,2) or s
2450    local range
2451    local b = balanced and s or ""
2452    local n = single_line and "\n" or ""
2453    if no_escape then
2454        local invalid = S(e .. n .. b)
2455        range = patterns.any - invalid
2456    else
2457        local invalid = S(e .. n .. b) + patterns.backslash
2458        range = patterns.any - invalid + patterns.backslash * patterns.any
2459    end
2460    if balanced and s ~= e then
2461        return P {
2462            s * (range + V(1))^0 * e
2463        }
2464    else
2465        return s * range^0 * P(e)^-1
2466    end
2467end
2468
2469function lexers.starts_line(patt) -- unchanged
2470    return P ( function(input, index)
2471        if index == 1 then
2472            return index
2473        end
2474        local char = sub(input,index - 1,index - 1)
2475        if char == "\n" or char == "\r" or char == "\f" then
2476            return index
2477        end
2478    end ) * patt
2479end
2480
2481function lexers.last_char_includes(s) -- unchanged
2482    s = "[" .. gsub(s,"[-%%%[]", "%%%1") .. "]"
2483    return P ( function(input, index)
2484        if index == 1 then
2485            return index
2486        end
2487        local i = index
2488        while match(sub(input,i - 1,i - 1),"[ \t\r\n\f]") do
2489            i = i - 1
2490        end
2491        if match(sub(input,i - 1,i - 1),s) then
2492            return index
2493        end
2494    end)
2495end
2496
2497function lexers.nested_pair(start_chars, end_chars) -- unchanged
2498    local s = start_chars
2499    local e = P(end_chars)^-1
2500    return P {
2501        s * (patterns.any - s - end_chars + V(1))^0 * e
2502    }
2503end
2504
2505local function prev_line_is_comment(prefix, text, pos, line, s) -- unchanged
2506    local start = find(line,"%S")
2507    if start < s and not find(line,prefix,start,true) then
2508        return false
2509    end
2510    local p = pos - 1
2511    if sub(text,p,p) == "\n" then
2512        p = p - 1
2513        if sub(text,p,p) == "\r" then
2514            p = p - 1
2515        end
2516        if sub(text,p,p) ~= "\n" then
2517            while p > 1 and sub(text,p - 1,p - 1) ~= "\n"
2518                do p = p - 1
2519            end
2520            while find(sub(text,p,p),"^[\t ]$") do
2521                p = p + 1
2522            end
2523            return sub(text,p,p + #prefix - 1) == prefix
2524        end
2525    end
2526    return false
2527end
2528
2529local function next_line_is_comment(prefix, text, pos, line, s)
2530    local p = find(text,"\n",pos + s)
2531    if p then
2532        p = p + 1
2533        while find(sub(text,p,p),"^[\t ]$") do
2534            p = p + 1
2535        end
2536        return sub(text,p,p + #prefix - 1) == prefix
2537    end
2538    return false
2539end
2540
2541function lexers.fold_line_comments(prefix)
2542    local property_int = lexers.property_int
2543    return function(text, pos, line, s)
2544        if property_int["fold.line.comments"] == 0 then
2545            return 0
2546        end
2547        if s > 1 and match(line,"^%s*()") < s then
2548            return 0
2549        end
2550        local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
2551        local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
2552        if not prev_line_comment and next_line_comment then
2553            return 1
2554        end
2555        if prev_line_comment and not next_line_comment then
2556            return -1
2557        end
2558        return 0
2559    end
2560end
2561
2562-- There are some fundamental changes in textadept version 10 and I don't want to
2563-- adapt again so we go the reverse route: map new to old. This is needed because
2564-- we need to load other lexers which is teh result of not being able to load the
2565-- lexer framework in parallel. Something happened in 10 that makes the main lexer
2566-- always enforced so now we need to really replace that one (and even then it loads
2567-- twice (i can probably sort that out). Maybe there's now some hard coded magic
2568-- in the binary.
2569
2570if textadept then
2571
2572    -- Folds are still somewhat weak because of the end condition not being
2573    -- bound to a start .. probably to complex and it seems to work anyhow. As
2574    -- we have extended thinsg we just remap.
2575
2576    local function add_fold_point(lexer,token_name,start_symbol,end_symbol)
2577        if type(start_symbol) == "string" then
2578            local foldsymbols = lexer._foldsymbols
2579            if not foldsymbols then
2580                foldsymbols        = { }
2581                lexer._foldsymbols = foldsymbols
2582            end
2583            local patterns = foldsymbols._patterns
2584            if not patterns then
2585                patterns              = { }
2586                usedpatt              = { } -- > 10 uses a mixed index/hash (we don't use patterns)
2587                foldsymbols._patterns = patterns
2588                foldsymbols._usedpatt = usedpatt
2589            end
2590            local foldsymbol = foldsymbols[token_name]
2591            if not foldsymbol then
2592                foldsymbol = { }
2593                foldsymbols[token_name] = foldsymbol
2594            end
2595            if not usedpatt[start_symbol] then
2596                patterns[#patterns+1] = start_symbol
2597                usedpatt[start_symbol] = true
2598            end
2599            if type(end_symbol) == "string" then
2600                foldsymbol[start_symbol] =  1
2601                foldsymbol[end_symbol]  = -1
2602                if not usedpatt[end_symbol] then
2603                    patterns[#patterns+1] = end_symbol
2604                    usedpatt[end_symbol]  = true
2605                end
2606            else
2607                foldsymbol[start_symbol] = end_symbol
2608            end
2609        end
2610    end
2611
2612    local function add_style(lexer,name,style)
2613        local tokenstyles = lexer._tokenstyles
2614        if not tokenstyles then
2615            tokenstyles        = { }
2616            lexer._tokenstyles = tokenstyles
2617        end
2618        tokenstyles[name] = style
2619    end
2620
2621    local function add_rule(lexer,id,rule)
2622        local rules = lexer._rules
2623        if not rules then
2624            rules        = { }
2625            lexer._rules = rules
2626        end
2627        rules[#rules+1] = { id, rule }
2628    end
2629
2630    local function modify_rule(lexer,id,rule) -- needed for textadept > 10
2631        if lexer._lexer then
2632            lexer = lexer._lexer
2633        end
2634        local RULES = lexer._RULES
2635        if RULES then
2636            RULES[id] = rule
2637        end
2638    end
2639
2640    local function get_rule(lexer,id) -- needed for textadept > 10
2641        if lexer._lexer then
2642            lexer = lexer._lexer
2643        end
2644        local RULES = lexer._RULES
2645        if RULES then
2646            return RULES[id]
2647        end
2648    end
2649
2650    local new = context.new
2651    local lmt = {
2652        __index = {
2653
2654            add_rule       = add_rule,
2655            modify_rule    = modify_rule,
2656            get_rule       = get_rule,
2657            add_style      = add_style,
2658            add_fold_point = add_fold_point,
2659
2660            join_tokens    = join_tokens,
2661            build_grammar  = build_grammar,
2662
2663            embed          = lexers.embed,
2664            lex            = lexers.lex,
2665            fold           = lexers.fold
2666
2667        }
2668    }
2669
2670    function lexers.new(name,options)
2671        local lexer = new(name)
2672        if options then
2673            lexer._LEXBYLINE                 = options['lex_by_line']
2674            lexer._FOLDBYINDENTATION         = options['fold_by_indentation']
2675            lexer._CASEINSENSITIVEFOLDPOINTS = options['case_insensitive_fold_points']
2676            lexer._lexer                     = options['inherit']
2677        end
2678        setmetatable(lexer,lmt)
2679        return lexer
2680    end
2681
2682end
2683
2684-- done
2685
2686return lexers
2687