scite-context-lexer-sql.lua /size: 13 Kb    last modification: 2020-07-01 14:35
1local info = {
2    version   = 1.001,
3    comment   = "scintilla lpeg lexer for sql",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9local P, R, S = lpeg.P, lpeg.R, lpeg.S
10
11local lexer       = require("scite-context-lexer")
12local context     = lexer.context
13local patterns    = context.patterns
14
15local token       = lexer.token
16local exact_match = lexer.exact_match
17
18local sqllexer    = lexer.new("sql","scite-context-lexer-sql")
19local whitespace  = sqllexer.whitespace
20
21-- ANSI SQL 92 | 99 | 2003
22
23local keywords_standard = {
24    "absolute", "action", "add", "after", "all", "allocate", "alter", "and", "any",
25    "are", "array", "as", "asc", "asensitive", "assertion", "asymmetric", "at",
26    "atomic", "authorization", "avg", "before", "begin", "between", "bigint",
27    "binary", "bit", "bit_length", "blob", "boolean", "both", "breadth", "by",
28    "call", "called", "cascade", "cascaded", "case", "cast", "catalog", "char",
29    "char_length", "character", "character_length", "check", "clob", "close",
30    "coalesce", "collate", "collation", "column", "commit", "condition", "connect",
31    "connection", "constraint", "constraints", "constructor", "contains", "continue",
32    "convert", "corresponding", "count", "create", "cross", "cube", "current",
33    "current_date", "current_default_transform_group", "current_path",
34    "current_role", "current_time", "current_timestamp",
35    "current_transform_group_for_type", "current_user", "cursor", "cycle", "data",
36    "date", "day", "deallocate", "dec", "decimal", "declare", "default",
37    "deferrable", "deferred", "delete", "depth", "deref", "desc", "describe",
38    "descriptor", "deterministic", "diagnostics", "disconnect", "distinct", "do",
39    "domain", "double", "drop", "dynamic", "each", "element", "else", "elseif",
40    "end", "equals", "escape", "except", "exception", "exec", "execute", "exists",
41    "exit", "external", "extract", "false", "fetch", "filter", "first", "float",
42    "for", "foreign", "found", "free", "from", "full", "function", "general", "get",
43    "global", "go", "goto", "grant", "group", "grouping", "handler", "having",
44    "hold", "hour", "identity", "if", "immediate", "in", "indicator", "initially",
45    "inner", "inout", "input", "insensitive", "insert", "int", "integer",
46    "intersect", "interval", "into", "is", "isolation", "iterate", "join", "key",
47    "language", "large", "last", "lateral", "leading", "leave", "left", "level",
48    "like", "local", "localtime", "localtimestamp", "locator", "loop", "lower",
49    "map", "match", "max", "member", "merge", "method", "min", "minute", "modifies",
50    "module", "month", "multiset", "names", "national", "natural", "nchar", "nclob",
51    "new", "next", "no", "none", "not", "null", "nullif", "numeric", "object",
52    "octet_length", "of", "old", "on", "only", "open", "option", "or", "order",
53    "ordinality", "out", "outer", "output", "over", "overlaps", "pad", "parameter",
54    "partial", "partition", "path", "position", "precision", "prepare", "preserve",
55    "primary", "prior", "privileges", "procedure", "public", "range", "read",
56    "reads", "real", "recursive", "ref", "references", "referencing", "relative",
57    "release", "repeat", "resignal", "restrict", "result", "return", "returns",
58    "revoke", "right", "role", "rollback", "rollup", "routine", "row", "rows",
59    "savepoint", "schema", "scope", "scroll", "search", "second", "section",
60    "select", "sensitive", "session", "session_user", "set", "sets", "signal",
61    "similar", "size", "smallint", "some", "space", "specific", "specifictype",
62    "sql", "sqlcode", "sqlerror", "sqlexception", "sqlstate", "sqlwarning", "start",
63    "state", "static", "submultiset", "substring", "sum", "symmetric", "system",
64    "system_user", "table", "tablesample", "temporary", "then", "time", "timestamp",
65    "timezone_hour", "timezone_minute", "to", "trailing", "transaction", "translate",
66    "translation", "treat", "trigger", "trim", "true", "under", "undo", "union",
67    "unique", "unknown", "unnest", "until", "update", "upper", "usage", "user",
68    "using", "value", "values", "varchar", "varying", "view", "when", "whenever",
69    "where", "while", "window", "with", "within", "without", "work", "write", "year",
70    "zone",
71}
72
73-- The dialects list is taken from drupal.org with standard subtracted.
74--
75-- MySQL 3.23.x | 4.x | 5.x
76-- PostGreSQL 8.1
77-- MS SQL Server 2000
78-- MS ODBC
79-- Oracle 10.2
80
81local keywords_dialects = {
82    "a", "abort", "abs", "access", "ada", "admin", "aggregate", "alias", "also",
83    "always", "analyse", "analyze", "assignment", "attribute", "attributes", "audit",
84    "auto_increment", "avg_row_length", "backup", "backward", "bernoulli", "bitvar",
85    "bool", "break", "browse", "bulk", "c", "cache", "cardinality", "catalog_name",
86    "ceil", "ceiling", "chain", "change", "character_set_catalog",
87    "character_set_name", "character_set_schema", "characteristics", "characters",
88    "checked", "checkpoint", "checksum", "class", "class_origin", "cluster",
89    "clustered", "cobol", "collation_catalog", "collation_name", "collation_schema",
90    "collect", "column_name", "columns", "command_function", "command_function_code",
91    "comment", "committed", "completion", "compress", "compute", "condition_number",
92    "connection_name", "constraint_catalog", "constraint_name", "constraint_schema",
93    "containstable", "conversion", "copy", "corr", "covar_pop", "covar_samp",
94    "createdb", "createrole", "createuser", "csv", "cume_dist", "cursor_name",
95    "database", "databases", "datetime", "datetime_interval_code",
96    "datetime_interval_precision", "day_hour", "day_microsecond", "day_minute",
97    "day_second", "dayofmonth", "dayofweek", "dayofyear", "dbcc", "defaults",
98    "defined", "definer", "degree", "delay_key_write", "delayed", "delimiter",
99    "delimiters", "dense_rank", "deny", "derived", "destroy", "destructor",
100    "dictionary", "disable", "disk", "dispatch", "distinctrow", "distributed", "div",
101    "dual", "dummy", "dump", "dynamic_function", "dynamic_function_code", "enable",
102    "enclosed", "encoding", "encrypted", "end-exec", "enum", "errlvl", "escaped",
103    "every", "exclude", "excluding", "exclusive", "existing", "exp", "explain",
104    "fields", "file", "fillfactor", "final", "float4", "float8", "floor", "flush",
105    "following", "force", "fortran", "forward", "freetext", "freetexttable",
106    "freeze", "fulltext", "fusion", "g", "generated", "granted", "grants",
107    "greatest", "header", "heap", "hierarchy", "high_priority", "holdlock", "host",
108    "hosts", "hour_microsecond", "hour_minute", "hour_second", "identified",
109    "identity_insert", "identitycol", "ignore", "ilike", "immutable",
110    "implementation", "implicit", "include", "including", "increment", "index",
111    "infile", "infix", "inherit", "inherits", "initial", "initialize", "insert_id",
112    "instance", "instantiable", "instead", "int1", "int2", "int3", "int4", "int8",
113    "intersection", "invoker", "isam", "isnull", "k", "key_member", "key_type",
114    "keys", "kill", "lancompiler", "last_insert_id", "least", "length", "less",
115    "limit", "lineno", "lines", "listen", "ln", "load", "location", "lock", "login",
116    "logs", "long", "longblob", "longtext", "low_priority", "m", "matched",
117    "max_rows", "maxextents", "maxvalue", "mediumblob", "mediumint", "mediumtext",
118    "message_length", "message_octet_length", "message_text", "middleint",
119    "min_rows", "minus", "minute_microsecond", "minute_second", "minvalue",
120    "mlslabel", "mod", "mode", "modify", "monthname", "more", "move", "mumps",
121    "myisam", "name", "nesting", "no_write_to_binlog", "noaudit", "nocheck",
122    "nocompress", "nocreatedb", "nocreaterole", "nocreateuser", "noinherit",
123    "nologin", "nonclustered", "normalize", "normalized", "nosuperuser", "nothing",
124    "notify", "notnull", "nowait", "nullable", "nulls", "number", "octets", "off",
125    "offline", "offset", "offsets", "oids", "online", "opendatasource", "openquery",
126    "openrowset", "openxml", "operation", "operator", "optimize", "optionally",
127    "options", "ordering", "others", "outfile", "overlay", "overriding", "owner",
128    "pack_keys", "parameter_mode", "parameter_name", "parameter_ordinal_position",
129    "parameter_specific_catalog", "parameter_specific_name",
130    "parameter_specific_schema", "parameters", "pascal", "password", "pctfree",
131    "percent", "percent_rank", "percentile_cont", "percentile_disc", "placing",
132    "plan", "pli", "postfix", "power", "preceding", "prefix", "preorder", "prepared",
133    "print", "proc", "procedural", "process", "processlist", "purge", "quote",
134    "raid0", "raiserror", "rank", "raw", "readtext", "recheck", "reconfigure",
135    "regexp", "regr_avgx", "regr_avgy", "regr_count", "regr_intercept", "regr_r2",
136    "regr_slope", "regr_sxx", "regr_sxy", "regr_syy", "reindex", "reload", "rename",
137    "repeatable", "replace", "replication", "require", "reset", "resource",
138    "restart", "restore", "returned_cardinality", "returned_length",
139    "returned_octet_length", "returned_sqlstate", "rlike", "routine_catalog",
140    "routine_name", "routine_schema", "row_count", "row_number", "rowcount",
141    "rowguidcol", "rowid", "rownum", "rule", "save", "scale", "schema_name",
142    "schemas", "scope_catalog", "scope_name", "scope_schema", "second_microsecond",
143    "security", "self", "separator", "sequence", "serializable", "server_name",
144    "setof", "setuser", "share", "show", "shutdown", "simple", "soname", "source",
145    "spatial", "specific_name", "sql_big_result", "sql_big_selects",
146    "sql_big_tables", "sql_calc_found_rows", "sql_log_off", "sql_log_update",
147    "sql_low_priority_updates", "sql_select_limit", "sql_small_result",
148    "sql_warnings", "sqlca", "sqrt", "ssl", "stable", "starting", "statement",
149    "statistics", "status", "stddev_pop", "stddev_samp", "stdin", "stdout",
150    "storage", "straight_join", "strict", "string", "structure", "style",
151    "subclass_origin", "sublist", "successful", "superuser", "synonym", "sysdate",
152    "sysid", "table_name", "tables", "tablespace", "temp", "template", "terminate",
153    "terminated", "text", "textsize", "than", "ties", "tinyblob", "tinyint",
154    "tinytext", "toast", "top", "top_level_count", "tran", "transaction_active",
155    "transactions_committed", "transactions_rolled_back", "transform", "transforms",
156    "trigger_catalog", "trigger_name", "trigger_schema", "truncate", "trusted",
157    "tsequal", "type", "uescape", "uid", "unbounded", "uncommitted", "unencrypted",
158    "unlisten", "unlock", "unnamed", "unsigned", "updatetext", "use",
159    "user_defined_type_catalog", "user_defined_type_code", "user_defined_type_name",
160    "user_defined_type_schema", "utc_date", "utc_time", "utc_timestamp", "vacuum",
161    "valid", "validate", "validator", "var_pop", "var_samp", "varbinary", "varchar2",
162    "varcharacter", "variable", "variables", "verbose", "volatile", "waitfor",
163    "width_bucket", "writetext", "x509", "xor", "year_month", "zerofill",
164}
165
166local space         = patterns.space -- S(" \n\r\t\f\v")
167local any           = patterns.any
168local restofline    = patterns.restofline
169local startofline   = patterns.startofline
170
171local squote        = P("'")
172local dquote        = P('"')
173local bquote        = P('`')
174local escaped       = P("\\") * P(1)
175
176local begincomment  = P("/*")
177local endcomment    = P("*/")
178
179local decimal       = patterns.decimal
180local float         = patterns.float
181local integer       = P("-")^-1 * decimal
182
183local spacing       = token(whitespace, space^1)
184local rest          = token("default", any)
185
186local shortcomment  = token("comment", (P("#") + P("--")) * restofline^0)
187local longcomment   = token("comment", begincomment * (1-endcomment)^0 * endcomment^-1)
188
189local p_validword   = R("AZ","az","__") * R("AZ","az","__","09")^0
190local identifier    = token("default",p_validword)
191
192local shortstring   = token("quote",  dquote) -- can be shared
193                    * token("string", (escaped + (1-dquote))^0)
194                    * token("quote",  dquote)
195                    + token("quote",  squote)
196                    * token("string", (escaped + (1-squote))^0)
197                    * token("quote",  squote)
198                    + token("quote",  bquote)
199                    * token("string", (escaped + (1-bquote))^0)
200                    * token("quote",  bquote)
201
202local p_keywords_s  = exact_match(keywords_standard,nil,true)
203local p_keywords_d  = exact_match(keywords_dialects,nil,true)
204local keyword_s     = token("keyword", p_keywords_s)
205local keyword_d     = token("command", p_keywords_d)
206
207local number        = token("number", float + integer)
208local operator      = token("special", S("+-*/%^!=<>;:{}[]().&|?~"))
209
210sqllexer._tokenstyles = context.styleset
211
212sqllexer._foldpattern = P("/*") + P("*/") + S("{}") -- separate entry else interference
213
214sqllexer._foldsymbols = {
215    _patterns = {
216        "/%*",
217        "%*/",
218    },
219    ["comment"] = {
220        ["/*"] =  1,
221        ["*/"] = -1,
222    }
223}
224
225sqllexer._rules = {
226    { "whitespace",   spacing      },
227    { "keyword-s",    keyword_s    },
228    { "keyword-d",    keyword_d    },
229    { "identifier",   identifier   },
230    { "string",       shortstring  },
231    { "longcomment",  longcomment  },
232    { "shortcomment", shortcomment },
233    { "number",       number       },
234    { "operator",     operator     },
235    { "rest",         rest         },
236}
237
238return sqllexer
239