1local info = {
2 version = 1.400,
3 comment = "basics for scintilla lpeg lexer for context/metafun",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7 comment = "contains copyrighted code from mitchell.att.foicica.com",
8
9}
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
34
35local log = false
36local trace = false
37local detail = false
38local show = false
39local collapse = false
40local inspect = false
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325local lpeg = require("lpeg")
326
327local global = _G
328local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
329local concat, sort = table.concat, table.sort
330local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
331local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
332local lpegmatch = lpeg.match
333
334local usage = (textadept and "textadept") or (resolvers and "context") or "scite"
335local nesting = 0
336local output = nil
337
338
339
340local function print(...)
341 if not output then
342 output = io.open("lexer.log","w")
343 end
344 output:write(...,"\n")
345 output:flush()
346end
347
348local function report(fmt,str,...)
349 if log then
350 if str then
351 fmt = format(fmt,str,...)
352 end
353 print(format("scite lpeg lexer > %s > %s",nesting == 0 and "-" or nesting,fmt))
354 end
355end
356
357local function inform(...)
358 if log and trace then
359 report(...)
360 end
361end
362
363inform("loading context lexer module (global table: %s)",tostring(global))
364
365do
366
367 local floor = math and math.floor
368 local format = format
369 local tonumber = tonumber
370
371 if not floor then
372
373 if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then
374 floor = function(n)
375 return tonumber(format("%d",n))
376 end
377 else
378
379
380 floor = function(n)
381 return (n - n % 1)
382 end
383 end
384
385 math = math or { }
386
387 math.floor = floor
388
389 end
390
391end
392
393local floor = math.floor
394
395if not package.searchpath then
396
397
398
399
400 inform("using adapted function 'package.searchpath' (if used at all)")
401
402 function package.searchpath(name,path)
403 local tried = { }
404 for part in gmatch(path,"[^;]+") do
405 local filename = gsub(part,"%?",name)
406 local f = io.open(filename,"r")
407 if f then
408 inform("file found on path: %s",filename)
409 f:close()
410 return filename
411 end
412 tried[#tried + 1] = format("no file '%s'",filename)
413 end
414
415 local f = io.open(filename,"r")
416 if f then
417 inform("file found on current path: %s",filename)
418 f:close()
419 return filename
420 end
421
422 tried[#tried + 1] = format("no file '%s'",filename)
423 return nil, concat(tried,"\n")
424 end
425
426end
427
428local lexers = { }
429local context = { }
430local helpers = { }
431lexers.context = context
432lexers.helpers = helpers
433
434local patterns = { }
435context.patterns = patterns
436
437context.report = report
438context.inform = inform
439
440lexers.LEXERPATH = package.path
441
442if resolvers then
443
444
445end
446
447local function sortedkeys(hash)
448 local t, n = { }, 0
449 for k, v in next, hash do
450 t[#t+1] = k
451 local l = #tostring(k)
452 if l > n then
453 n = l
454 end
455 end
456 sort(t)
457 return t, n
458end
459
460helpers.sortedkeys = sortedkeys
461
462local usedlexers = { }
463local parent_lexer = nil
464
465
466
467
468
469
470
471
472
473local default = {
474 "nothing", "whitespace", "comment", "string", "number", "keyword",
475 "identifier", "operator", "error", "preprocessor", "constant", "variable",
476 "function", "type", "label", "embedded",
477 "quote", "special", "extra", "reserved", "okay", "warning",
478 "command", "internal", "preamble", "grouping", "primitive", "plain",
479 "user",
480
481 "char", "class", "data", "definition", "invisible", "regex",
482 "standout", "tag",
483 "text",
484}
485
486local predefined = {
487 "default", "linenumber", "bracelight", "bracebad", "controlchar",
488 "indentguide", "calltip",
489
490 "folddisplaytext"
491}
492
493
494
495
496
497
498local function preparestyles(list)
499 local reverse = { }
500 for i=1,#list do
501 local k = list[i]
502 local K = upper(k)
503 local s = "style." .. k
504 lexers[K] = k
505 lexers["STYLE_"..K] = "$(" .. k .. ")"
506 reverse[k] = true
507 end
508 return reverse
509end
510
511local defaultstyles = preparestyles(default)
512local predefinedstyles = preparestyles(predefined)
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553local FOLD_BASE = 0
554local FOLD_HEADER = 0
555local FOLD_BLANK = 0
556
557local style_at = { }
558local indent_amount = { }
559local fold_level = { }
560
561local function check_main_properties()
562 if not lexers.property then
563 lexers.property = { }
564 end
565 if not lexers.property_int then
566 lexers.property_int = setmetatable({ }, {
567 __index = function(t,k)
568
569
570 return tonumber(lexers.property[k]) or 0
571 end,
572
573
574
575 })
576 end
577end
578
579lexers.property_expanded = setmetatable({ }, {
580 __index = function(t,k)
581
582
583 local property = lexers.property
584 if not property then
585 check_main_properties()
586 end
587
588
589
590
591 local v = property[k]
592 if v then
593 v = gsub(v,"[$%%]%b()", function(k)
594 return t[sub(k,3,-2)]
595 end)
596 end
597 return v
598 end,
599 __newindex = function(t,k,v)
600 report("properties are read-only, '%s' is not changed",k)
601 end,
602})
603
604
605
606
607
608
609
610
611
612
613local function check_properties(lexer)
614 if lexer.properties then
615 return lexer
616 end
617 check_main_properties()
618
619 local mainproperties = lexers.property
620 local properties = { }
621 local expanded = setmetatable({ }, {
622 __index = function(t,k)
623 return gsub(properties[k] or mainproperties[k],"[$%%]%b()", function(k)
624 return t[sub(k,3,-2)]
625 end)
626 end,
627 })
628 lexer.properties = setmetatable(properties, {
629 __index = mainproperties,
630 __call = function(t,k,default)
631 local v = expanded[k]
632 local t = type(default)
633 if t == "number" then
634 return tonumber(v) or default
635 elseif t == "boolean" then
636 return v == nil and default or v
637 else
638 return v or default
639 end
640 end,
641 })
642 return lexer
643end
644
645
646
647
648
649
650
651
652local function set(value,default)
653 if value == 0 or value == false or value == "0" then
654 return false
655 elseif value == 1 or value == true or value == "1" then
656 return true
657 else
658 return default
659 end
660end
661
662local function check_context_properties()
663 local property = lexers.property
664 log = set(property["lexer.context.log"], log)
665 trace = set(property["lexer.context.trace"], trace)
666 detail = set(property["lexer.context.detail"], detail)
667 show = set(property["lexer.context.show"], show)
668 collapse = set(property["lexer.context.collapse"],collapse)
669 inspect = set(property["lexer.context.inspect"], inspect)
670end
671
672function context.registerproperties(p)
673 check_main_properties()
674 local property = lexers.property
675 for k, v in next, p do
676 property[k] = v
677 end
678 check_context_properties()
679end
680
681context.properties = setmetatable({ }, {
682 __index = lexers.property,
683 __newindex = function(t,k,v)
684 check_main_properties()
685 lexers.property[k] = v
686 check_context_properties()
687 end,
688})
689
690
691
692local function initialize()
693 FOLD_BASE = lexers.FOLD_BASE
694 FOLD_HEADER = lexers.FOLD_HEADER
695 FOLD_BLANK = lexers.FOLD_BLANK
696
697 style_at = lexers.style_at
698 indent_amount = lexers.indent_amount
699 fold_level = lexers.fold_level
700
701 check_main_properties()
702
703 initialize = nil
704end
705
706
707
708
709
710
711
712local function tocolors(colors)
713 local colorset = { }
714 local property_int = lexers.property_int or { }
715 for k, v in next, colors do
716 if type(v) == "table" then
717 local r, g, b = v[1], v[2], v[3]
718 if r and g and b then
719 v = tonumber(format("%02X%02X%02X",b,g,r),16) or 0
720 elseif r then
721 v = tonumber(format("%02X%02X%02X",r,r,r),16) or 0
722 else
723 v = 0
724 end
725 end
726 colorset[k] = v
727 property_int["color."..k] = v
728 end
729 return colorset
730end
731
732local function toproperty(specification)
733 local serialized = { }
734 for key, value in next, specification do
735 if value == true then
736 serialized[#serialized+1] = key
737 elseif type(value) == "table" then
738 local r, g, b = value[1], value[2], value[3]
739 if r and g and b then
740 value = format("#%02X%02X%02X",r,g,b) or "#000000"
741 elseif r then
742 value = format("#%02X%02X%02X",r,r,r) or "#000000"
743 else
744 value = "#000000"
745 end
746 serialized[#serialized+1] = key .. ":" .. value
747 else
748 serialized[#serialized+1] = key .. ":" .. tostring(value)
749 end
750 end
751 return concat(serialized,",")
752end
753
754local function tostyles(styles)
755 local styleset = { }
756 local property = lexers.property or { }
757 for k, v in next, styles do
758 v = toproperty(v)
759 styleset[k] = v
760 property["style."..k] = v
761 end
762 return styleset
763end
764
765context.toproperty = toproperty
766context.tostyles = tostyles
767context.tocolors = tocolors
768
769
770
771
772
773function context.registerstyles(styles)
774 local styleset = tostyles(styles)
775 context.styles = styles
776 context.styleset = styleset
777 if detail then
778 local t, n = sortedkeys(styleset)
779 local template = " %-" .. n .. "s : %s"
780 report("initializing styleset:")
781 for i=1,#t do
782 local k = t[i]
783 report(template,k,styleset[k])
784 end
785 elseif trace then
786 report("initializing styleset")
787 end
788end
789
790function context.registercolors(colors)
791 local colorset = tocolors(colors)
792 context.colors = colors
793 context.colorset = colorset
794 if detail then
795 local t, n = sortedkeys(colorset)
796 local template = " %-" .. n .. "s : %i"
797 report("initializing colorset:")
798 for i=1,#t do
799 local k = t[i]
800 report(template,k,colorset[k])
801 end
802 elseif trace then
803 report("initializing colorset")
804 end
805end
806
807
808
809
810local locations = {
811 "context/lexers",
812 "context/lexers/data",
813 "../lexers",
814 "../lexers/data",
815 ".",
816 "./data",
817}
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834local collect
835
836if usage == "context" then
837
838 collect = function(name)
839 return require(name), name
840 end
841
842else
843
844 collect = function(name)
845 local rootlist = lexers.LEXERPATH or "."
846 for root in gmatch(rootlist,"[^;]+") do
847 local root = gsub(root,"/[^/]-lua$","")
848 for i=1,#locations do
849 local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua"
850 if trace then
851 report("attempt to locate '%s'",fullname)
852 end
853 local okay, result = pcall(function () return dofile(fullname) end)
854 if okay then
855 return result, fullname
856 end
857 end
858 end
859
860 end
861
862end
863
864function context.loadluafile(name)
865 local data, fullname = collect(name)
866 if data then
867 if trace then
868 report("lua file '%s' has been loaded",fullname)
869 end
870 return data, fullname
871 end
872 if not textadept then
873 report("unable to load lua file '%s'",name)
874 end
875end
876
877
878
879
880local cache = { }
881
882function context.loaddefinitions(name)
883 local data = cache[name]
884 if data then
885 if trace then
886 report("reusing definitions '%s'",name)
887 end
888 return data
889 elseif trace and data == false then
890 report("definitions '%s' were not found",name)
891 end
892 local data, fullname = collect(name)
893 if not data then
894 if not textadept then
895 report("unable to load definition file '%s'",name)
896 end
897 data = false
898 elseif trace then
899 report("definition file '%s' has been loaded",fullname)
900 if detail then
901 local t, n = sortedkeys(data)
902 local template = " %-" .. n .. "s : %s"
903 for i=1,#t do
904 local k = t[i]
905 local v = data[k]
906 if type(v) ~= "table" then
907 report(template,k,tostring(v))
908 elseif #v > 0 then
909 report(template,k,#v)
910 else
911
912 end
913 end
914 end
915 end
916 cache[name] = data
917 return type(data) == "table" and data
918end
919
920
921
922
923function context.word_match(words,word_chars,case_insensitive)
924
925 if type(words) == "string" then
926 local clean = gsub(words,"%-%-[^\n]+","")
927 local split = { }
928 for s in gmatch(clean,"%S+") do
929 split[#split+1] = s
930 end
931 words = split
932 end
933 local list = { }
934 for i=1,#words do
935 list[words[i]] = true
936 end
937 if case_insensitive then
938 for i=1,#words do
939 list[lower(words[i])] = true
940 end
941 end
942 local chars = S(word_chars or "")
943 for i=1,#words do
944 chars = chars + S(words[i])
945 end
946 local match = case_insensitive and
947 function(input,index,word)
948
949 return (list[word] or list[lower(word)]) and index or nil
950 end
951 or
952 function(input,index,word)
953 return list[word] and index or nil
954 end
955 return Cmt(chars^1,match)
956end
957
958
959
960
961
962do
963
964 local anything = P(1)
965 local idtoken = R("az","AZ","\127\255","__")
966 local digit = R("09")
967 local sign = S("+-")
968 local period = P(".")
969 local octdigit = R("07")
970 local hexdigit = R("09","AF","af")
971 local lower = R("az")
972 local upper = R("AZ")
973 local alpha = upper + lower
974 local space = S(" \n\r\t\f\v")
975 local eol = S("\r\n")
976 local backslash = P("\\")
977 local decimal = digit^1
978 local octal = P("0")
979 * octdigit^1
980 local hexadecimal = P("0") * S("xX")
981 * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1)
982 * (S("pP") * sign^-1 * hexdigit^1)^-1
983 local integer = sign^-1
984 * (hexadecimal + octal + decimal)
985 local float = sign^-1
986 * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1)
987 * S("eE") * sign^-1 * digit^1
988
989 patterns.idtoken = idtoken
990 patterns.digit = digit
991 patterns.sign = sign
992 patterns.period = period
993 patterns.octdigit = octdigit
994 patterns.hexdigit = hexdigit
995 patterns.ascii = R("\000\127")
996 patterns.extend = R("\000\255")
997 patterns.control = R("\000\031")
998 patterns.lower = lower
999 patterns.upper = upper
1000 patterns.alpha = alpha
1001 patterns.decimal = decimal
1002 patterns.octal = octal
1003 patterns.hexadecimal = hexadecimal
1004 patterns.float = float
1005 patterns.cardinal = decimal
1006
1007 patterns.signeddecimal = sign^-1 * decimal
1008 patterns.signedoctal = sign^-1 * octal
1009 patterns.signedhexadecimal = sign^-1 * hexadecimal
1010 patterns.integer = integer
1011 patterns.real =
1012 sign^-1 * (
1013 digit^1 * period * digit^0
1014 + digit^0 * period * digit^1
1015 + digit^1
1016 )
1017
1018 patterns.anything = anything
1019 patterns.any = anything
1020 patterns.restofline = (1-eol)^1
1021 patterns.space = space
1022 patterns.spacing = space^1
1023 patterns.nospacing = (1-space)^1
1024 patterns.eol = eol
1025 patterns.newline = P("\r\n") + eol
1026 patterns.backslash = backslash
1027
1028 local endof = S("\n\r\f")
1029
1030 patterns.startofline = P(function(input,index)
1031 return (index == 1 or lpegmatch(endof,input,index-1)) and index
1032 end)
1033
1034
1035
1036
1037 lexers.any = anything
1038 lexers.ascii = ascii
1039 lexers.extend = extend
1040 lexers.alpha = alpha
1041 lexers.digit = digit
1042 lexers.alnum = alpha + digit
1043 lexers.lower = lower
1044 lexers.upper = upper
1045 lexers.xdigit = hexdigit
1046 lexers.cntrl = control
1047 lexers.graph = R("!~")
1048 lexers.print = R(" ~")
1049 lexers.punct = R("!/", ":@", "[\'", "{~")
1050 lexers.space = space
1051 lexers.newline = S("\r\n\f")^1
1052 lexers.nonnewline = 1 - lexers.newline
1053 lexers.nonnewline_esc = 1 - (lexers.newline + '\\') + backslash * anything
1054 lexers.dec_num = decimal
1055 lexers.oct_num = octal
1056 lexers.hex_num = hexadecimal
1057 lexers.integer = integer
1058 lexers.float = float
1059 lexers.word = (alpha + "_") * (alpha + digit + "_")^0
1060
1061end
1062
1063
1064
1065function context.exact_match(words,word_chars,case_insensitive)
1066 local characters = concat(words)
1067 local pattern
1068 if word_chars == true or word_chars == false or word_chars == nil then
1069 word_chars = ""
1070 end
1071 if type(word_chars) == "string" then
1072 pattern = S(characters) + patterns.idtoken
1073 if case_insensitive then
1074 pattern = pattern + S(upper(characters)) + S(lower(characters))
1075 end
1076 if word_chars ~= "" then
1077 pattern = pattern + S(word_chars)
1078 end
1079 elseif word_chars then
1080 pattern = word_chars
1081 end
1082 if case_insensitive then
1083 local list = { }
1084 if #words == 0 then
1085 for k, v in next, words do
1086 list[lower(k)] = v
1087 end
1088 else
1089 for i=1,#words do
1090 list[lower(words[i])] = true
1091 end
1092 end
1093 return Cmt(pattern^1, function(_,i,s)
1094 return list[lower(s)]
1095 end)
1096 else
1097 local list = { }
1098 if #words == 0 then
1099 for k, v in next, words do
1100 list[k] = v
1101 end
1102 else
1103 for i=1,#words do
1104 list[words[i]] = true
1105 end
1106 end
1107 return Cmt(pattern^1, function(_,i,s)
1108 return list[s]
1109 end)
1110 end
1111end
1112
1113function context.just_match(words)
1114 local p = P(words[1])
1115 for i=2,#words do
1116 p = p + P(words[i])
1117 end
1118 return p
1119end
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133local lists = { }
1134local disabled = false
1135
1136function context.disablewordcheck()
1137 disabled = true
1138end
1139
1140function context.setwordlist(tag,limit)
1141 if not tag or tag == "" then
1142 return false, 3
1143 end
1144 local list = lists[tag]
1145 if not list then
1146 list = context.loaddefinitions("spell-" .. tag)
1147 if not list or type(list) ~= "table" then
1148 if not textadept then
1149 report("invalid spell checking list for '%s'",tag)
1150 end
1151 list = { words = false, min = 3 }
1152 else
1153 list.words = list.words or false
1154 list.min = list.min or 3
1155 end
1156 lists[tag] = list
1157 end
1158 if trace then
1159 report("enabling spell checking for '%s' with minimum '%s'",tag,list.min)
1160 end
1161 return list.words, list.min
1162end
1163
1164patterns.wordtoken = R("az","AZ","\127\255")
1165patterns.wordpattern = patterns.wordtoken^3
1166
1167function context.checkedword(validwords,validminimum,s,i)
1168 if not validwords then
1169 return true, "text", i
1170 else
1171
1172 local word = validwords[s]
1173 if word == s then
1174 return true, "okay", i
1175 elseif word then
1176 return true, "warning", i
1177 else
1178 local word = validwords[lower(s)]
1179 if word == s then
1180 return true, "okay", i
1181 elseif word then
1182 return true, "warning", i
1183 elseif upper(s) == s then
1184 return true, "warning", i
1185 else
1186 return true, "error", i
1187 end
1188 end
1189 end
1190end
1191
1192function context.styleofword(validwords,validminimum,s)
1193 if not validwords or #s < validminimum then
1194 return "text"
1195 else
1196
1197 local word = validwords[s]
1198 if word == s then
1199 return "okay"
1200 elseif word then
1201 return "warning"
1202 else
1203 local word = validwords[lower(s)]
1204 if word == s then
1205 return "okay"
1206 elseif word then
1207 return "warning"
1208 elseif upper(s) == s then
1209 return "warning"
1210 else
1211 return "error"
1212 end
1213 end
1214 end
1215end
1216
1217
1218
1219local h_table, b_table, n_table = { }, { }, { }
1220
1221setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEADER } t[level] = v return v end })
1222setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK } t[level] = v return v end })
1223setmetatable(n_table, { __index = function(t,level) local v = { level } t[level] = v return v end })
1224
1225local newline = patterns.newline
1226local p_yes = Cp() * Cs((1-newline)^1) * newline^-1
1227local p_nop = newline
1228
1229local folders = { }
1230
1231
1232
1233local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
1234 local folder = folders[lexer]
1235 if not folder then
1236
1237 local pattern, folds, text, start_pos, line_num, prev_level, current_level
1238
1239 local fold_symbols = lexer._foldsymbols
1240 local fold_pattern = lexer._foldpattern
1241
1242
1243
1244
1245
1246
1247
1248 if fold_pattern then
1249
1250 fold_pattern = Cp() * C(fold_pattern) / function(s,match)
1251 local symbols = fold_symbols[style_at[start_pos + s]]
1252 if symbols then
1253 local l = symbols[match]
1254 if l then
1255 current_level = current_level + l
1256 end
1257 end
1258 end
1259 local action_y = function()
1260 folds[line_num] = prev_level
1261 if current_level > prev_level then
1262 folds[line_num] = prev_level + FOLD_HEADER
1263 end
1264 if current_level < FOLD_BASE then
1265 current_level = FOLD_BASE
1266 end
1267 prev_level = current_level
1268 line_num = line_num + 1
1269 end
1270 local action_n = function()
1271 folds[line_num] = prev_level + FOLD_BLANK
1272 line_num = line_num + 1
1273 end
1274 pattern = ((fold_pattern + (1-newline))^1 * newline / action_y + newline/action_n)^0
1275
1276 else
1277
1278 local fold_symbols_patterns = fold_symbols._patterns
1279 local action_y = function(pos,line)
1280 for j=1, #fold_symbols_patterns do
1281 for s, match in gmatch(line,fold_symbols_patterns[j]) do
1282 local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
1283 local l = symbols and symbols[match]
1284 local t = type(l)
1285 if t == "number" then
1286 current_level = current_level + l
1287 elseif t == "function" then
1288 current_level = current_level + l(text, pos, line, s, match)
1289 end
1290 end
1291 end
1292 folds[line_num] = prev_level
1293 if current_level > prev_level then
1294 folds[line_num] = prev_level + FOLD_HEADER
1295 end
1296 if current_level < FOLD_BASE then
1297 current_level = FOLD_BASE
1298 end
1299 prev_level = current_level
1300 line_num = line_num + 1
1301 end
1302 local action_n = function()
1303 folds[line_num] = prev_level + FOLD_BLANK
1304 line_num = line_num + 1
1305 end
1306 pattern = (p_yes/action_y + p_nop/action_n)^0
1307 end
1308
1309 local reset_parser = lexer._reset_parser
1310
1311 folder = function(_text_,_start_pos_,_start_line_,_start_level_)
1312 if reset_parser then
1313 reset_parser()
1314 end
1315 folds = { }
1316 text = _text_
1317 start_pos = _start_pos_
1318 line_num = _start_line_
1319 prev_level = _start_level_
1320 current_level = prev_level
1321 lpegmatch(pattern,text)
1322
1323 local t = folds
1324 folds = nil
1325 return t
1326 end
1327 folders[lexer] = folder
1328 end
1329 return folder(text,start_pos,start_line,start_level,lexer)
1330end
1331
1332local folds, current_line, prev_level
1333
1334local function action_y()
1335 local current_level = FOLD_BASE + indent_amount[current_line]
1336 if current_level > prev_level then
1337 local i = current_line - 1
1338 local f
1339 while true do
1340 f = folds[i]
1341 if not f then
1342 break
1343 elseif f[2] == FOLD_BLANK then
1344 i = i - 1
1345 else
1346 f[2] = FOLD_HEADER
1347 break
1348 end
1349 end
1350 folds[current_line] = { current_level }
1351 elseif current_level < prev_level then
1352 local f = folds[current_line - 1]
1353 if f then
1354 f[1] = prev_level
1355 end
1356 folds[current_line] = { current_level }
1357 else
1358 folds[current_line] = { prev_level }
1359 end
1360 prev_level = current_level
1361 current_line = current_line + 1
1362end
1363
1364local function action_n()
1365 folds[current_line] = { prev_level, FOLD_BLANK }
1366 current_line = current_line + 1
1367end
1368
1369local pattern = ( S("\t ")^0 * ( (1-patterns.eol)^1 / action_y + P(true) / action_n) * newline )^0
1370
1371local function fold_by_indentation(text,start_pos,start_line,start_level)
1372
1373 folds = { }
1374 current_line = start_line
1375 prev_level = start_level
1376
1377
1378
1379 lpegmatch(pattern,text)
1380
1381 for line, level in next, folds do
1382 folds[line] = level[1] + (level[2] or 0)
1383 end
1384
1385 local t = folds
1386 folds = nil
1387 return t
1388end
1389
1390local function fold_by_line(text,start_pos,start_line,start_level)
1391 local folds = { }
1392
1393 for _ in gmatch(text,".-\r?\n") do
1394 folds[start_line] = n_table[start_level]
1395 start_line = start_line + 1
1396 end
1397 return folds
1398end
1399
1400local threshold_by_lexer = 512 * 1024
1401local threshold_by_parsing = 512 * 1024
1402local threshold_by_indentation = 512 * 1024
1403local threshold_by_line = 512 * 1024
1404
1405function context.fold(lexer,text,start_pos,start_line,start_level)
1406 if text == "" then
1407 return { }
1408 end
1409 if initialize then
1410 initialize()
1411 end
1412 local fold_by_lexer = lexer._fold
1413 local fold_by_symbols = lexer._foldsymbols
1414 local filesize = 0
1415 if fold_by_lexer then
1416 if filesize <= threshold_by_lexer then
1417 return fold_by_lexer(text,start_pos,start_line,start_level,lexer)
1418 end
1419 elseif fold_by_symbols then
1420 if filesize <= threshold_by_parsing then
1421 return fold_by_parsing(text,start_pos,start_line,start_level,lexer)
1422 end
1423 elseif lexer._FOLDBYINDENTATION or lexer.properties("fold.by.indentation",1) > 0 then
1424 if filesize <= threshold_by_indentation then
1425 return fold_by_indentation(text,start_pos,start_line,start_level,lexer)
1426 end
1427 elseif lexer._FOLDBYLINE or lexer.properties("fold.by.line",1) > 0 then
1428 if filesize <= threshold_by_line then
1429 return fold_by_line(text,start_pos,start_line,start_level,lexer)
1430 end
1431 end
1432 return { }
1433end
1434
1435
1436
1437local function add_rule(lexer,id,rule)
1438 if not lexer._RULES then
1439 lexer._RULES = { }
1440 lexer._RULEORDER = { }
1441 end
1442 lexer._RULES[id] = rule
1443 lexer._RULEORDER[#lexer._RULEORDER + 1] = id
1444end
1445
1446local function modify_rule(lexer,id,rule)
1447 if lexer._lexer then
1448 lexer = lexer._lexer
1449 end
1450 lexer._RULES[id] = rule
1451end
1452
1453local function get_rule(lexer,id)
1454 if lexer._lexer then
1455 lexer = lexer._lexer
1456 end
1457 return lexer._RULES[id]
1458end
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476local function add_style(lexer,token_name,style)
1477
1478
1479 if defaultstyles[token_name] then
1480 if trace and detail then
1481 report("default style '%s' is ignored as extra style",token_name)
1482 end
1483 if textadept then
1484
1485 else
1486 return
1487 end
1488 elseif predefinedstyles[token_name] then
1489 if trace and detail then
1490 report("predefined style '%s' is ignored as extra style",token_name)
1491 end
1492 if textadept then
1493
1494 else
1495 return
1496 end
1497 else
1498 if trace and detail then
1499 report("adding extra style '%s' as '%s'",token_name,style)
1500 end
1501 end
1502
1503 local num_styles = lexer._numstyles
1504 if num_styles == 32 then
1505 num_styles = num_styles + 8
1506 end
1507 if num_styles >= 255 then
1508 report("there can't be more than %s styles",255)
1509 end
1510 lexer._TOKENSTYLES[token_name] = num_styles
1511 lexer._EXTRASTYLES[token_name] = style
1512 lexer._numstyles = num_styles + 1
1513
1514end
1515
1516local function check_styles(lexer)
1517
1518
1519
1520 local numstyles = 0
1521 local tokenstyles = { }
1522 for i=1, #default do
1523 if numstyles == 32 then
1524 numstyles = numstyles + 8
1525 end
1526 tokenstyles[default[i]] = numstyles
1527 numstyles = numstyles + 1
1528 end
1529
1530 for i=1, #predefined do
1531 tokenstyles[predefined[i]] = i + 31
1532 end
1533 lexer._TOKENSTYLES = tokenstyles
1534 lexer._numstyles = numstyles
1535 lexer._EXTRASTYLES = { }
1536 return lexer
1537end
1538
1539
1540
1541
1542
1543
1544local function join_tokens(lexer)
1545 local patterns = lexer._RULES
1546 local order = lexer._RULEORDER
1547
1548 if patterns and order then
1549 local token_rule = patterns[order[1]]
1550 for i=2,#order do
1551 token_rule = token_rule + patterns[order[i]]
1552 end
1553 if lexer._TYPE ~= "context" then
1554 token_rule = token_rule + lexers.token(lexers.DEFAULT, patterns.any)
1555 end
1556 lexer._TOKENRULE = token_rule
1557 return token_rule
1558 else
1559 return P(1)
1560 end
1561end
1562
1563
1564
1565local function add_lexer(grammar, lexer)
1566 local token_rule = join_tokens(lexer)
1567 local lexer_name = lexer._NAME
1568 local children = lexer._CHILDREN
1569 for i=1,#children do
1570 local child = children[i]
1571 if child._CHILDREN then
1572 add_lexer(grammar, child)
1573 end
1574 local child_name = child._NAME
1575 local rules = child._EMBEDDEDRULES[lexer_name]
1576 local rules_token_rule = grammar["__" .. child_name] or rules.token_rule
1577 local pattern = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1
1578 grammar[child_name] = pattern * V(lexer_name)
1579 local embedded_child = "_" .. child_name
1580 grammar[embedded_child] = rules.start_rule * pattern
1581 token_rule = V(embedded_child) + token_rule
1582 end
1583 if trace then
1584 report("adding lexer '%s' with %s children",lexer_name,#children)
1585 end
1586 grammar["__" .. lexer_name] = token_rule
1587 grammar[lexer_name] = token_rule^0
1588end
1589
1590local function build_grammar(lexer,initial_rule)
1591 local children = lexer._CHILDREN
1592 local lexer_name = lexer._NAME
1593 local preamble = lexer._preamble
1594 local grammar = lexer._grammar
1595
1596
1597
1598 if children then
1599 if not initial_rule then
1600 initial_rule = lexer_name
1601 end
1602 grammar = { initial_rule }
1603 add_lexer(grammar, lexer)
1604 lexer._INITIALRULE = initial_rule
1605 grammar = Ct(P(grammar))
1606 if trace then
1607 report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
1608 end
1609 else
1610 grammar = Ct(join_tokens(lexer)^0)
1611 if trace then
1612 report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
1613 end
1614 end
1615 if preamble then
1616 grammar = preamble^-1 * grammar
1617 end
1618 lexer._GRAMMAR = grammar
1619end
1620
1621
1622
1623local lineparsers = { }
1624
1625local maxmatched = 100
1626
1627local function collapsed(t)
1628 local lasttoken = nil
1629 local lastindex = nil
1630 for i=1,#t,2 do
1631 local token = t[i]
1632 local position = t[i+1]
1633 if token == lasttoken then
1634 t[lastindex] = position
1635 elseif lastindex then
1636 lastindex = lastindex + 1
1637 t[lastindex] = token
1638 lastindex = lastindex + 1
1639 t[lastindex] = position
1640 lasttoken = token
1641 else
1642 lastindex = i+1
1643 lasttoken = token
1644 end
1645 end
1646 for i=#t,lastindex+1,-1 do
1647 t[i] = nil
1648 end
1649 return t
1650end
1651
1652local function matched(lexer,grammar,text)
1653
1654 local t = lpegmatch(grammar,text)
1655 if trace then
1656 if show then
1657 report("output of lexer: %s (max %s entries)",lexer._NAME,maxmatched)
1658 local s = lexer._TOKENSTYLES
1659 local p = 1
1660 for i=1,2*maxmatched,2 do
1661 local n = i + 1
1662 local ti = t[i]
1663 local tn = t[n]
1664 if ti then
1665 local txt = sub(text,p,tn-1)
1666 if txt then
1667 txt = gsub(txt,"[%s]"," ")
1668 else
1669 txt = "!no text!"
1670 end
1671 report("%4i : %s > %s (%s) (%s)",floor(n/2),ti,tn,s[ti] or "!unset!",txt)
1672 p = tn
1673 else
1674 break
1675 end
1676 end
1677 end
1678 report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
1679 if collapse then
1680 t = collapsed(t)
1681 report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
1682 end
1683 elseif collapse then
1684 t = collapsed(t)
1685 end
1686 return t
1687end
1688
1689
1690
1691
1692
1693
1694
1695function context.lex(lexer,text,init_style)
1696
1697 local grammar = lexer._GRAMMAR
1698 if initialize then
1699 initialize()
1700 end
1701 if not grammar then
1702 return { }
1703 elseif lexer._LEXBYLINE then
1704 local tokens = { }
1705 local offset = 0
1706 local noftokens = 0
1707 local lineparser = lineparsers[lexer]
1708 if not lineparser then
1709 lineparser = C((1-newline)^0 * newline) / function(line)
1710 local length = #line
1711 local line_tokens = length > 0 and lpegmatch(grammar,line)
1712 if line_tokens then
1713 for i=1,#line_tokens,2 do
1714 noftokens = noftokens + 1
1715 tokens[noftokens] = line_tokens[i]
1716 noftokens = noftokens + 1
1717 tokens[noftokens] = line_tokens[i + 1] + offset
1718 end
1719 end
1720 offset = offset + length
1721 if noftokens > 0 and tokens[noftokens] ~= offset then
1722 noftokens = noftokens + 1
1723 tokens[noftokens] = "default"
1724 noftokens = noftokens + 1
1725 tokens[noftokens] = offset + 1
1726 end
1727 end
1728 lineparser = lineparser^0
1729 lineparsers[lexer] = lineparser
1730 end
1731 lpegmatch(lineparser,text)
1732 return tokens
1733 elseif lexer._CHILDREN then
1734 local hash = lexer._HASH
1735 if not hash then
1736 hash = { }
1737 lexer._HASH = hash
1738 end
1739 grammar = hash[init_style]
1740 if grammar then
1741 lexer._GRAMMAR = grammar
1742
1743 else
1744 for style, style_num in next, lexer._TOKENSTYLES do
1745 if style_num == init_style then
1746
1747
1748 local lexer_name = match(style,"^(.+)_whitespace") or lexer._NAME
1749 if lexer._INITIALRULE ~= lexer_name then
1750 grammar = hash[lexer_name]
1751 if not grammar then
1752 build_grammar(lexer,lexer_name)
1753 grammar = lexer._GRAMMAR
1754 hash[lexer_name] = grammar
1755 end
1756 end
1757 break
1758 end
1759 end
1760 grammar = grammar or lexer._GRAMMAR
1761 hash[init_style] = grammar
1762 end
1763 if trace then
1764 report("lexing '%s' with initial style '%s' and %s children", lexer._NAME,init_style,#lexer._CHILDREN or 0)
1765 end
1766 return matched(lexer,grammar,text)
1767 else
1768 if trace then
1769 report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
1770 end
1771 return matched(lexer,grammar,text)
1772 end
1773end
1774
1775
1776
1777
1778function context.token(name, patt)
1779 return patt * Cc(name) * Cp()
1780end
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803local whitespaces = { }
1804
1805local function push_whitespace(name)
1806 table.insert(whitespaces,lexers.WHITESPACE or "whitespace")
1807 lexers.WHITESPACE = name .. "_whitespace"
1808end
1809
1810local function pop_whitespace()
1811 lexers.WHITESPACE = table.remove(whitespaces) or "whitespace"
1812end
1813
1814local function check_whitespace(lexer,name)
1815 if lexer then
1816 lexer.whitespace = (name or lexer.name or lexer._NAME) .. "_whitespace"
1817 end
1818end
1819
1820function context.new(name,filename)
1821 local lexer = {
1822 _TYPE = "context",
1823
1824 _NAME = name,
1825 _FILENAME = filename,
1826
1827 name = name,
1828 filename = filename,
1829 }
1830 if trace then
1831 report("initializing lexer tagged '%s' from file '%s'",name,filename or name)
1832 end
1833 check_whitespace(lexer)
1834 check_styles(lexer)
1835 check_properties(lexer)
1836 lexer._tokenstyles = context.styleset
1837 return lexer
1838end
1839
1840local function nolexer(name)
1841 local lexer = {
1842 _TYPE = "unset",
1843 _NAME = name,
1844
1845 }
1846 check_styles(lexer)
1847 check_whitespace(lexer)
1848 check_properties(lexer)
1849 return lexer
1850end
1851
1852local function load_lexer(name,namespace)
1853 if trace then
1854 report("loading lexer file '%s'",name)
1855 end
1856 push_whitespace(namespace or name)
1857 local lexer, fullname = context.loadluafile(name)
1858 pop_whitespace()
1859 if not lexer then
1860 report("invalid lexer file '%s'",name)
1861 elseif trace then
1862 report("lexer file '%s' has been loaded",fullname)
1863 end
1864 if type(lexer) ~= "table" then
1865 if trace then
1866 report("lexer file '%s' gets a dummy lexer",name)
1867 end
1868 return nolexer(name)
1869 end
1870 if lexer._TYPE ~= "context" then
1871 lexer._TYPE = "native"
1872 check_styles(lexer)
1873 check_whitespace(lexer,namespace or name)
1874 check_properties(lexer)
1875 end
1876 if not lexer._NAME then
1877 lexer._NAME = name
1878 end
1879 if name ~= namespace then
1880 lexer._NAME = namespace
1881 end
1882 return lexer
1883end
1884
1885
1886
1887local function inspect_lexer(lexer,level)
1888
1889
1890 local parent = lexer._lexer
1891 lexer._lexer = nil
1892 local name = lexer._NAME
1893 local function showstyles_1(tag,styles)
1894 local numbers = { }
1895 for k, v in next, styles do
1896 numbers[v] = k
1897 end
1898
1899 local keys = sortedkeys(numbers)
1900 for i=1,#keys do
1901 local k = keys[i]
1902 local v = numbers[k]
1903 report("[%s %s] %s %s = %s",level,name,tag,k,v)
1904 end
1905 end
1906 local function showstyles_2(tag,styles)
1907 local keys = sortedkeys(styles)
1908 for i=1,#keys do
1909 local k = keys[i]
1910 local v = styles[k]
1911 report("[%s %s] %s %s = %s",level,name,tag,k,v)
1912 end
1913 end
1914 local keys = sortedkeys(lexer)
1915 for i=1,#keys do
1916 local k = keys[i]
1917 local v = lexer[k]
1918 report("[%s %s] root key : %s = %s",level,name,k,tostring(v))
1919 end
1920 showstyles_1("token style",lexer._TOKENSTYLES)
1921 showstyles_2("extra style",lexer._EXTRASTYLES)
1922 local children = lexer._CHILDREN
1923 if children then
1924 for i=1,#children do
1925 inspect_lexer(children[i],level+1)
1926 end
1927 end
1928 lexer._lexer = parent
1929end
1930
1931function context.inspect(lexer)
1932 inspect_lexer(lexer,0)
1933end
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952local savedrequire = require
1953
1954local escapes = {
1955 ["%"] = "%%",
1956 ["."] = "%.",
1957 ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
1958 ["["] = "%[", ["]"] = "%]",
1959 ["("] = "%(", [")"] = "%)",
1960
1961
1962}
1963
1964function context.loadlexer(filename,namespace)
1965
1966 if textadept then
1967 require = function(name)
1968 return savedrequire(name == "lexer" and "scite-context-lexer" or name)
1969 end
1970 end
1971
1972 nesting = nesting + 1
1973 if not namespace then
1974 namespace = filename
1975 end
1976 local lexer = usedlexers[namespace]
1977 if lexer then
1978 if trace then
1979 report("reusing lexer '%s'",namespace)
1980 end
1981 nesting = nesting - 1
1982 return lexer
1983 elseif trace then
1984 report("loading lexer '%s'",namespace)
1985 end
1986
1987 if initialize then
1988 initialize()
1989 end
1990
1991 parent_lexer = nil
1992
1993 lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
1994 usedlexers[filename] = lexer
1995
1996 if not lexer._rules and not lexer._lexer and not lexer_grammar then
1997 lexer._lexer = parent_lexer
1998 end
1999
2000 if lexer._lexer then
2001 local _l = lexer._lexer
2002 local _r = lexer._rules
2003 local _s = lexer._tokenstyles
2004 if not _l._tokenstyles then
2005 _l._tokenstyles = { }
2006 end
2007 if _r then
2008 local rules = _l._rules
2009 local name = lexer.name
2010 for i=1,#_r do
2011 local rule = _r[i]
2012 rules[#rules + 1] = {
2013 name .. "_" .. rule[1],
2014 rule[2],
2015 }
2016 end
2017 end
2018 if _s then
2019 local tokenstyles = _l._tokenstyles
2020 for token, style in next, _s do
2021 tokenstyles[token] = style
2022 end
2023 end
2024 lexer = _l
2025 end
2026
2027 local _r = lexer._rules
2028 local _g = lexer._grammar
2029
2030 if _r then
2031 local _s = lexer._tokenstyles
2032 if _s then
2033 for token, style in next, _s do
2034 add_style(lexer, token, style)
2035 end
2036 end
2037 if _r then
2038 for i=1,#_r do
2039 local rule = _r[i]
2040 add_rule(lexer, rule[1], rule[2])
2041 end
2042 end
2043 build_grammar(lexer)
2044 else
2045
2046 build_grammar(lexer)
2047 end
2048
2049 add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE)
2050
2051 local foldsymbols = lexer._foldsymbols
2052 if foldsymbols then
2053 local patterns = foldsymbols._patterns
2054 if patterns then
2055 for i = 1, #patterns do
2056 patterns[i] = "()(" .. gsub(patterns[i],".",escapes) .. ")"
2057 end
2058 end
2059 end
2060
2061 lexer.lex = lexers.lex
2062 lexer.fold = lexers.fold
2063
2064 nesting = nesting - 1
2065
2066 if inspect then
2067 context.inspect(lexer)
2068 end
2069
2070 if textadept then
2071 require = savedrequire
2072 end
2073
2074 return lexer
2075end
2076
2077
2078
2079
2080
2081
2082function context.embed_lexer(parent, child, start_rule, end_rule)
2083 local embeddedrules = child._EMBEDDEDRULES
2084 if not embeddedrules then
2085 embeddedrules = { }
2086 child._EMBEDDEDRULES = embeddedrules
2087 end
2088 if not child._RULES then
2089 local rules = child._rules
2090 if not rules then
2091 report("child lexer '%s' has no rules",child._NAME or "unknown")
2092 rules = { }
2093 child._rules = rules
2094 end
2095 for i=1,#rules do
2096 local rule = rules[i]
2097 add_rule(child, rule[1], rule[2])
2098 end
2099 end
2100 embeddedrules[parent._NAME] = {
2101 ["start_rule"] = start_rule,
2102 ["token_rule"] = join_tokens(child),
2103 ["end_rule"] = end_rule
2104 }
2105 local children = parent._CHILDREN
2106 if not children then
2107 children = { }
2108 parent._CHILDREN = children
2109 end
2110 children[#children + 1] = child
2111 local tokenstyles = parent._tokenstyles
2112 if not tokenstyles then
2113 tokenstyles = { }
2114 parent._tokenstyles = tokenstyles
2115 end
2116 local childname = child._NAME
2117 local whitespace = childname .. "_whitespace"
2118 tokenstyles[whitespace] = lexers.STYLE_WHITESPACE
2119 if trace then
2120 report("using whitespace '%s' as trigger for '%s' with property '%s'",whitespace,childname,lexers.STYLE_WHITESPACE)
2121 end
2122 local childstyles = child._tokenstyles
2123 if childstyles then
2124 for token, style in next, childstyles do
2125 tokenstyles[token] = style
2126 end
2127 end
2128
2129 local parentsymbols = parent._foldsymbols
2130 local childsymbols = child ._foldsymbols
2131 if not parentsymbols then
2132 parentsymbols = { }
2133 parent._foldsymbols = parentsymbols
2134 end
2135 if childsymbols then
2136 for token, symbols in next, childsymbols do
2137 local tokensymbols = parentsymbols[token]
2138 if not tokensymbols then
2139 tokensymbols = { }
2140 parentsymbols[token] = tokensymbols
2141 end
2142 for k, v in next, symbols do
2143 if type(k) == 'number' then
2144 tokensymbols[#tokensymbols + 1] = v
2145 elseif not tokensymbols[k] then
2146 tokensymbols[k] = v
2147 end
2148 end
2149 end
2150 end
2151
2152 child._lexer = parent
2153 parent_lexer = parent
2154end
2155
2156
2157
2158lexers.new = context.new
2159lexers.load = context.loadlexer
2160
2161lexers.loadluafile = context.loadluafile
2162lexers.embed_lexer = context.embed_lexer
2163lexers.fold = context.fold
2164lexers.lex = context.lex
2165lexers.token = context.token
2166lexers.word_match = context.word_match
2167lexers.exact_match = context.exact_match
2168lexers.just_match = context.just_match
2169lexers.inspect = context.inspect
2170lexers.report = context.report
2171lexers.inform = context.inform
2172
2173
2174
2175
2176do
2177
2178 local floor = math and math.floor
2179 local char = string.char
2180 local format = format
2181 local tonumber = tonumber
2182
2183 local function utfchar(n)
2184 if n < 0x80 then
2185 return char(n)
2186 elseif n < 0x800 then
2187 return char(
2188 0xC0 + floor(n/0x40),
2189 0x80 + (n % 0x40)
2190 )
2191 elseif n < 0x10000 then
2192 return char(
2193 0xE0 + floor(n/0x1000),
2194 0x80 + (floor(n/0x40) % 0x40),
2195 0x80 + (n % 0x40)
2196 )
2197 elseif n < 0x40000 then
2198 return char(
2199 0xF0 + floor(n/0x40000),
2200 0x80 + floor(n/0x1000),
2201 0x80 + (floor(n/0x40) % 0x40),
2202 0x80 + (n % 0x40)
2203 )
2204 else
2205
2206
2207
2208
2209
2210
2211
2212 return "?"
2213 end
2214 end
2215
2216 context.utfchar = utfchar
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255 local utf8next = R("\128\191")
2256 local utf8one = R("\000\127")
2257 local utf8two = R("\194\223") * utf8next
2258 local utf8three = R("\224\239") * utf8next * utf8next
2259 local utf8four = R("\240\244") * utf8next * utf8next * utf8next
2260
2261 local utfidentifier = utf8two + utf8three + utf8four
2262 helpers.utfidentifier = (R("AZ","az","__") + utfidentifier)
2263 * (R("AZ","az","__","09") + utfidentifier)^0
2264
2265 helpers.utfcharpattern = P(1) * utf8next^0
2266 helpers.utfbytepattern = utf8one / byte
2267 + utf8two / function(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
2268 + utf8three / function(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
2269 + utf8four / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
2270
2271 local p_false = P(false)
2272 local p_true = P(true)
2273
2274 local function make(t)
2275 local function making(t)
2276 local p = p_false
2277 local keys = sortedkeys(t)
2278 for i=1,#keys do
2279 local k = keys[i]
2280 if k ~= "" then
2281 local v = t[k]
2282 if v == true then
2283 p = p + P(k) * p_true
2284 elseif v == false then
2285
2286 else
2287 p = p + P(k) * making(v)
2288 end
2289 end
2290 end
2291 if t[""] then
2292 p = p + p_true
2293 end
2294 return p
2295 end
2296 local p = p_false
2297 local keys = sortedkeys(t)
2298 for i=1,#keys do
2299 local k = keys[i]
2300 if k ~= "" then
2301 local v = t[k]
2302 if v == true then
2303 p = p + P(k) * p_true
2304 elseif v == false then
2305
2306 else
2307 p = p + P(k) * making(v)
2308 end
2309 end
2310 end
2311 return p
2312 end
2313
2314 local function collapse(t,x)
2315 if type(t) ~= "table" then
2316 return t, x
2317 else
2318 local n = next(t)
2319 if n == nil then
2320 return t, x
2321 elseif next(t,n) == nil then
2322
2323 local k = n
2324 local v = t[k]
2325 if type(v) == "table" then
2326 return collapse(v,x..k)
2327 else
2328 return v, x .. k
2329 end
2330 else
2331 local tt = { }
2332 for k, v in next, t do
2333 local vv, kk = collapse(v,k)
2334 tt[kk] = vv
2335 end
2336 return tt, x
2337 end
2338 end
2339 end
2340
2341 function helpers.utfchartabletopattern(list)
2342 local tree = { }
2343 local n = #list
2344 if n == 0 then
2345 for s in next, list do
2346 local t = tree
2347 local p, pk
2348 for c in gmatch(s,".") do
2349 if t == true then
2350 t = { [c] = true, [""] = true }
2351 p[pk] = t
2352 p = t
2353 t = false
2354 elseif t == false then
2355 t = { [c] = false }
2356 p[pk] = t
2357 p = t
2358 t = false
2359 else
2360 local tc = t[c]
2361 if not tc then
2362 tc = false
2363 t[c] = false
2364 end
2365 p = t
2366 t = tc
2367 end
2368 pk = c
2369 end
2370 if t == false then
2371 p[pk] = true
2372 elseif t == true then
2373
2374 else
2375 t[""] = true
2376 end
2377 end
2378 else
2379 for i=1,n do
2380 local s = list[i]
2381 local t = tree
2382 local p, pk
2383 for c in gmatch(s,".") do
2384 if t == true then
2385 t = { [c] = true, [""] = true }
2386 p[pk] = t
2387 p = t
2388 t = false
2389 elseif t == false then
2390 t = { [c] = false }
2391 p[pk] = t
2392 p = t
2393 t = false
2394 else
2395 local tc = t[c]
2396 if not tc then
2397 tc = false
2398 t[c] = false
2399 end
2400 p = t
2401 t = tc
2402 end
2403 pk = c
2404 end
2405 if t == false then
2406 p[pk] = true
2407 elseif t == true then
2408
2409 else
2410 t[""] = true
2411 end
2412 end
2413 end
2414 collapse(tree,"")
2415
2416 return make(tree)
2417 end
2418
2419 patterns.invisibles = helpers.utfchartabletopattern {
2420 utfchar(0x00A0),
2421 utfchar(0x2000),
2422 utfchar(0x2001),
2423 utfchar(0x2002),
2424 utfchar(0x2003),
2425 utfchar(0x2004),
2426 utfchar(0x2005),
2427 utfchar(0x2006),
2428 utfchar(0x2007),
2429 utfchar(0x2008),
2430 utfchar(0x2009),
2431 utfchar(0x200A),
2432 utfchar(0x200B),
2433 utfchar(0x202F),
2434 utfchar(0x205F),
2435 }
2436
2437
2438
2439 patterns.iwordtoken = patterns.wordtoken - patterns.invisibles
2440 patterns.iwordpattern = patterns.iwordtoken^3
2441
2442end
2443
2444
2445
2446
2447function lexers.delimited_range(chars, single_line, no_escape, balanced)
2448 local s = sub(chars,1,1)
2449 local e = #chars == 2 and sub(chars,2,2) or s
2450 local range
2451 local b = balanced and s or ""
2452 local n = single_line and "\n" or ""
2453 if no_escape then
2454 local invalid = S(e .. n .. b)
2455 range = patterns.any - invalid
2456 else
2457 local invalid = S(e .. n .. b) + patterns.backslash
2458 range = patterns.any - invalid + patterns.backslash * patterns.any
2459 end
2460 if balanced and s ~= e then
2461 return P {
2462 s * (range + V(1))^0 * e
2463 }
2464 else
2465 return s * range^0 * P(e)^-1
2466 end
2467end
2468
2469function lexers.starts_line(patt)
2470 return P ( function(input, index)
2471 if index == 1 then
2472 return index
2473 end
2474 local char = sub(input,index - 1,index - 1)
2475 if char == "\n" or char == "\r" or char == "\f" then
2476 return index
2477 end
2478 end ) * patt
2479end
2480
2481function lexers.last_char_includes(s)
2482 s = "[" .. gsub(s,"[-%%%[]", "%%%1") .. "]"
2483 return P ( function(input, index)
2484 if index == 1 then
2485 return index
2486 end
2487 local i = index
2488 while match(sub(input,i - 1,i - 1),"[ \t\r\n\f]") do
2489 i = i - 1
2490 end
2491 if match(sub(input,i - 1,i - 1),s) then
2492 return index
2493 end
2494 end)
2495end
2496
2497function lexers.nested_pair(start_chars, end_chars)
2498 local s = start_chars
2499 local e = P(end_chars)^-1
2500 return P {
2501 s * (patterns.any - s - end_chars + V(1))^0 * e
2502 }
2503end
2504
2505local function prev_line_is_comment(prefix, text, pos, line, s)
2506 local start = find(line,"%S")
2507 if start < s and not find(line,prefix,start,true) then
2508 return false
2509 end
2510 local p = pos - 1
2511 if sub(text,p,p) == "\n" then
2512 p = p - 1
2513 if sub(text,p,p) == "\r" then
2514 p = p - 1
2515 end
2516 if sub(text,p,p) ~= "\n" then
2517 while p > 1 and sub(text,p - 1,p - 1) ~= "\n"
2518 do p = p - 1
2519 end
2520 while find(sub(text,p,p),"^[\t ]$") do
2521 p = p + 1
2522 end
2523 return sub(text,p,p + #prefix - 1) == prefix
2524 end
2525 end
2526 return false
2527end
2528
2529local function next_line_is_comment(prefix, text, pos, line, s)
2530 local p = find(text,"\n",pos + s)
2531 if p then
2532 p = p + 1
2533 while find(sub(text,p,p),"^[\t ]$") do
2534 p = p + 1
2535 end
2536 return sub(text,p,p + #prefix - 1) == prefix
2537 end
2538 return false
2539end
2540
2541function lexers.fold_line_comments(prefix)
2542 local property_int = lexers.property_int
2543 return function(text, pos, line, s)
2544 if property_int["fold.line.comments"] == 0 then
2545 return 0
2546 end
2547 if s > 1 and match(line,"^%s*()") < s then
2548 return 0
2549 end
2550 local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
2551 local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
2552 if not prev_line_comment and next_line_comment then
2553 return 1
2554 end
2555 if prev_line_comment and not next_line_comment then
2556 return -1
2557 end
2558 return 0
2559 end
2560end
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570if textadept then
2571
2572
2573
2574
2575
2576 local function add_fold_point(lexer,token_name,start_symbol,end_symbol)
2577 if type(start_symbol) == "string" then
2578 local foldsymbols = lexer._foldsymbols
2579 if not foldsymbols then
2580 foldsymbols = { }
2581 lexer._foldsymbols = foldsymbols
2582 end
2583 local patterns = foldsymbols._patterns
2584 if not patterns then
2585 patterns = { }
2586 usedpatt = { }
2587 foldsymbols._patterns = patterns
2588 foldsymbols._usedpatt = usedpatt
2589 end
2590 local foldsymbol = foldsymbols[token_name]
2591 if not foldsymbol then
2592 foldsymbol = { }
2593 foldsymbols[token_name] = foldsymbol
2594 end
2595 if not usedpatt[start_symbol] then
2596 patterns[#patterns+1] = start_symbol
2597 usedpatt[start_symbol] = true
2598 end
2599 if type(end_symbol) == "string" then
2600 foldsymbol[start_symbol] = 1
2601 foldsymbol[end_symbol] = -1
2602 if not usedpatt[end_symbol] then
2603 patterns[#patterns+1] = end_symbol
2604 usedpatt[end_symbol] = true
2605 end
2606 else
2607 foldsymbol[start_symbol] = end_symbol
2608 end
2609 end
2610 end
2611
2612 local function add_style(lexer,name,style)
2613 local tokenstyles = lexer._tokenstyles
2614 if not tokenstyles then
2615 tokenstyles = { }
2616 lexer._tokenstyles = tokenstyles
2617 end
2618 tokenstyles[name] = style
2619 end
2620
2621 local function add_rule(lexer,id,rule)
2622 local rules = lexer._rules
2623 if not rules then
2624 rules = { }
2625 lexer._rules = rules
2626 end
2627 rules[#rules+1] = { id, rule }
2628 end
2629
2630 local function modify_rule(lexer,id,rule)
2631 if lexer._lexer then
2632 lexer = lexer._lexer
2633 end
2634 local RULES = lexer._RULES
2635 if RULES then
2636 RULES[id] = rule
2637 end
2638 end
2639
2640 local function get_rule(lexer,id)
2641 if lexer._lexer then
2642 lexer = lexer._lexer
2643 end
2644 local RULES = lexer._RULES
2645 if RULES then
2646 return RULES[id]
2647 end
2648 end
2649
2650 local new = context.new
2651 local lmt = {
2652 __index = {
2653
2654 add_rule = add_rule,
2655 modify_rule = modify_rule,
2656 get_rule = get_rule,
2657 add_style = add_style,
2658 add_fold_point = add_fold_point,
2659
2660 join_tokens = join_tokens,
2661 build_grammar = build_grammar,
2662
2663 embed = lexers.embed,
2664 lex = lexers.lex,
2665 fold = lexers.fold
2666
2667 }
2668 }
2669
2670 function lexers.new(name,options)
2671 local lexer = new(name)
2672 if options then
2673 lexer._LEXBYLINE = options['lex_by_line']
2674 lexer._FOLDBYINDENTATION = options['fold_by_indentation']
2675 lexer._CASEINSENSITIVEFOLDPOINTS = options['case_insensitive_fold_points']
2676 lexer._lexer = options['inherit']
2677 end
2678 setmetatable(lexer,lmt)
2679 return lexer
2680 end
2681
2682end
2683
2684
2685
2686return lexers
2687 |