scite-context-lexer-txt.lua /size: 2322 b    last modification: 2020-07-01 14:35
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for plain text (with spell checking)",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
10local find, match = string.find, string.match
11
12local lexer        = require("scite-context-lexer")
13local context      = lexer.context
14local patterns     = context.patterns
15
16local token        = lexer.token
17
18local textlexer    = lexer.new("txt","scite-context-lexer-txt")
19local whitespace   = textlexer.whitespace
20
21local space        = patterns.space
22local any          = patterns.any
23local wordtoken    = patterns.wordtoken
24local wordpattern  = patterns.wordpattern
25
26local checkedword  = context.checkedword
27local styleofword  = context.styleofword
28local setwordlist  = context.setwordlist
29local validwords   = false
30local validminimum = 3
31
32-- local styleset    = context.newstyleset {
33--     "default",
34--     "text", "okay", "error", "warning",
35--     "preamble",
36-- }
37
38-- [#!-%] language=uk
39
40local p_preamble = Cmt((S("#!-%") * P(" ")), function(input,i,_) -- todo: utf bomb no longer #
41    if i == 1 then -- < 10 then
42        validwords, validminimum = false, 3
43        local s, e, line = find(input,"^[#!%-%%](.+)[\n\r]",i)
44        if line then
45            local language = match(line,"language=([a-z]+)")
46            if language then
47                validwords, validminimum = setwordlist(language)
48            end
49        end
50    end
51    return false
52end)
53
54local t_preamble =
55    token("preamble", p_preamble)
56
57local t_word =
58    wordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
59
60local t_text =
61    token("default", wordtoken^1)
62
63local t_rest =
64    token("default", (1-wordtoken-space)^1)
65
66local t_spacing =
67    token(whitespace, space^1)
68
69textlexer._rules = {
70    { "whitespace", t_spacing  },
71    { "preamble",   t_preamble },
72    { "word",       t_word     }, -- words >= 3
73    { "text",       t_text     }, -- non words
74    { "rest",       t_rest     },
75}
76
77textlexer._LEXBYLINE   = true -- new (needs testing, not yet as the system changed in 3.24)
78textlexer._tokenstyles = context.styleset
79
80return textlexer
81