1local info = {
2 version = 1.002,
3 comment = "scintilla lpeg lexer for plain text (with spell checking)",
4 author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5 copyright = "PRAGMA ADE / ConTeXt Development Team",
6 license = "see context related readme files",
7}
8
9local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
10local find, match = string.find, string.match
11
12local lexer = require("scite-context-lexer")
13local context = lexer.context
14local patterns = context.patterns
15
16local token = lexer.token
17
18local bidilexer = lexer.new("bidi","scite-context-lexer-bidi")
19local whitespace = bidilexer.whitespace
20
21local space = patterns.space
22local any = patterns.any
23
24
25
26
27require("char-def")
28
29characters.directions = { }
30
31setmetatable(characters.directions,{ __index = function(t,k)
32 local d = data[k]
33 if d then
34 local v = d.direction
35 if v then
36 t[k] = v
37 return v
38 end
39 end
40 t[k] = false
41 return false
42end })
43
44characters.mirrors = { }
45
46setmetatable(characters.mirrors,{ __index = function(t,k)
47 local d = data[k]
48 if d then
49 local v = d.mirror
50 if v then
51 t[k] = v
52 return v
53 end
54 end
55 t[k] = false
56 return false
57end })
58
59characters.textclasses = { }
60
61setmetatable(characters.textclasses,{ __index = function(t,k)
62 local d = data[k]
63 if d then
64 local v = d.textclass
65 if v then
66 t[k] = v
67 return v
68 end
69 end
70 t[k] = false
71 return false
72end })
73
74local directiondata = characters.directions
75local mirrordata = characters.mirrors
76local textclassdata = characters.textclasses
77
78local maximum_stack = 0xFF
79local analyze_fences = false
80
81local whitespace = {
82 lre = true,
83 rle = true,
84 lro = true,
85 rlo = true,
86 pdf = true,
87 bn = true,
88 ws = true,
89}
90
91local b_s_ws_on = {
92 b = true,
93 s = true,
94 ws = true,
95 on = true
96}
97
98local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } }
99local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
100local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
101local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
102local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } }
103
104local list = { }
105local stack = { }
106
107setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })
108
109local function build_list(head)
110
111 local size = 0
112 lpegmatch(pattern,head)
113 return list, size
114end
115
116local function resolve_fences(list,size,start,limit)
117
118 local nofstack = 0
119 for i=start,limit do
120 local entry = list[i]
121 if entry.direction == "on" then
122 local char = entry.char
123 local mirror = mirrordata[char]
124 if mirror then
125 local class = textclassdata[char]
126 entry.mirror = mirror
127 entry.class = class
128 if class == "open" then
129 nofstack = nofstack + 1
130 local stacktop = stack[nofstack]
131 stacktop[1] = mirror
132 stacktop[2] = i
133 stacktop[3] = false
134 elseif nofstack == 0 then
135
136 elseif class == "close" then
137 while nofstack > 0 do
138 local stacktop = stack[nofstack]
139 if stacktop[1] == char then
140 local open = stacktop[2]
141 local close = i
142 list[open ].paired = close
143 list[close].paired = open
144 break
145 else
146
147 end
148 nofstack = nofstack - 1
149 end
150 end
151 end
152 end
153 end
154end
155
156local function get_baselevel(list,size,direction)
157 if direction == "TRT" then
158 return 1, "TRT", true
159 elseif direction == "TLT" then
160 return 0, "TLT", true
161 end
162
163 for i=1,size do
164 local entry = list[i]
165 local direction = entry.direction
166 if direction == "r" or direction == "al" then
167 return 1, "TRT", true
168 elseif direction == "l" then
169 return 0, "TLT", true
170 end
171 end
172 return 0, "TLT", false
173end
174
175local function resolve_explicit(list,size,baselevel)
176
177
178 local level = baselevel
179 local override = "on"
180 local nofstack = 0
181 for i=1,size do
182 local entry = list[i]
183 local direction = entry.direction
184
185 if direction == "rle" then
186 if nofstack < maximum_stack then
187 nofstack = nofstack + 1
188 local stacktop = stack[nofstack]
189 stacktop[1] = level
190 stacktop[2] = override
191 level = level + (level % 2 == 1 and 2 or 1)
192 override = "on"
193 entry.level = level
194 entry.direction = "bn"
195 entry.remove = true
196 end
197
198 elseif direction == "lre" then
199 if nofstack < maximum_stack then
200 nofstack = nofstack + 1
201 local stacktop = stack[nofstack]
202 stacktop[1] = level
203 stacktop[2] = override
204 level = level + (level % 2 == 1 and 1 or 2)
205 override = "on"
206 entry.level = level
207 entry.direction = "bn"
208 entry.remove = true
209 end
210
211 elseif direction == "rlo" then
212 if nofstack < maximum_stack then
213 nofstack = nofstack + 1
214 local stacktop = stack[nofstack]
215 stacktop[1] = level
216 stacktop[2] = override
217 level = level + (level % 2 == 1 and 2 or 1)
218 override = "r"
219 entry.level = level
220 entry.direction = "bn"
221 entry.remove = true
222 end
223
224 elseif direction == "lro" then
225 if nofstack < maximum_stack then
226 nofstack = nofstack + 1
227 local stacktop = stack[nofstack]
228 stacktop[1] = level
229 stacktop[2] = override
230 level = level + (level % 2 == 1 and 1 or 2)
231 override = "l"
232 entry.level = level
233 entry.direction = "bn"
234 entry.remove = true
235 end
236
237 elseif direction == "pdf" then
238 if nofstack < maximum_stack then
239 local stacktop = stack[nofstack]
240 level = stacktop[1]
241 override = stacktop[2]
242 nofstack = nofstack - 1
243 entry.level = level
244 entry.direction = "bn"
245 entry.remove = true
246 end
247
248 else
249 entry.level = level
250 if override ~= "on" then
251 entry.direction = override
252 end
253 end
254 end
255
256
257
258
259
260
261end
262
263local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
264
265
266 for i=start,limit do
267 local entry = list[i]
268 if entry.direction == "nsm" then
269 if i == start then
270 entry.direction = orderbefore
271 else
272 entry.direction = list[i-1].direction
273 end
274 end
275 end
276
277
278
279 for i=start,limit do
280 local entry = list[i]
281 if entry.direction == "en" then
282 for j=i-1,start,-1 do
283 local prev = list[j]
284 local direction = prev.direction
285 if direction == "al" then
286 entry.direction = "an"
287 break
288 elseif direction == "r" or direction == "l" then
289 break
290 end
291 end
292 end
293 end
294
295
296
297 for i=start,limit do
298 local entry = list[i]
299 if entry.direction == "al" then
300 entry.direction = "r"
301 end
302 end
303
304
305
306
307
308 if false then
309 for i=start+1,limit-1 do
310 local entry = list[i]
311 local direction = entry.direction
312 if direction == "es" then
313 if list[i-1].direction == "en" and list[i+1].direction == "en" then
314 entry.direction = "en"
315 end
316 elseif direction == "cs" then
317 local prevdirection = list[i-1].direction
318 if prevdirection == "en" then
319 if list[i+1].direction == "en" then
320 entry.direction = "en"
321 end
322 elseif prevdirection == "an" and list[i+1].direction == "an" then
323 entry.direction = "an"
324 end
325 end
326 end
327 else
328 local runner = start + 2
329 local before = list[start]
330 local entry = list[start + 1]
331 local after = list[runner]
332 while after do
333 local direction = entry.direction
334 if direction == "es" then
335 if before.direction == "en" and after.direction == "en" then
336 entry.direction = "en"
337 end
338 elseif direction == "cs" then
339 local prevdirection = before.direction
340 if prevdirection == "en" then
341 if after.direction == "en" then
342 entry.direction = "en"
343 end
344 elseif prevdirection == "an" and after.direction == "an" then
345 entry.direction = "an"
346 end
347 end
348 before = current
349 current = after
350 after = list[runner]
351 runner = runner + 1
352 end
353 end
354
355
356
357 local i = start
358 while i <= limit do
359 if list[i].direction == "et" then
360 local runstart = i
361 local runlimit = runstart
362 for i=runstart,limit do
363 if list[i].direction == "et" then
364 runlimit = i
365 else
366 break
367 end
368 end
369 local rundirection = runstart == start and sor or list[runstart-1].direction
370 if rundirection ~= "en" then
371 rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
372 end
373 if rundirection == "en" then
374 for j=runstart,runlimit do
375 list[j].direction = "en"
376 end
377 end
378 i = runlimit
379 end
380 i = i + 1
381 end
382
383
384
385 for i=start,limit do
386 local entry = list[i]
387 local direction = entry.direction
388 if direction == "es" or direction == "et" or direction == "cs" then
389 entry.direction = "on"
390 end
391 end
392
393
394 for i=start,limit do
395 local entry = list[i]
396 if entry.direction == "en" then
397 local prev_strong = orderbefore
398 for j=i-1,start,-1 do
399 local direction = list[j].direction
400 if direction == "l" or direction == "r" then
401 prev_strong = direction
402 break
403 end
404 end
405 if prev_strong == "l" then
406 entry.direction = "l"
407 end
408 end
409 end
410end
411
412local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
413
414 for i=start,limit do
415 local entry = list[i]
416 if b_s_ws_on[entry.direction] then
417
418 local leading_direction, trailing_direction, resolved_direction
419 local runstart = i
420 local runlimit = runstart
421 for j=runstart+1,limit do
422 if b_s_ws_on[list[j].direction] then
423 runlimit = j
424 else
425 break
426 end
427 end
428 if runstart == start then
429 leading_direction = orderbefore
430 else
431 leading_direction = list[runstart-1].direction
432 if leading_direction == "en" or leading_direction == "an" then
433 leading_direction = "r"
434 end
435 end
436 if runlimit == limit then
437 trailing_direction = orderafter
438 else
439 trailing_direction = list[runlimit+1].direction
440 if trailing_direction == "en" or trailing_direction == "an" then
441 trailing_direction = "r"
442 end
443 end
444 if leading_direction == trailing_direction then
445
446 resolved_direction = leading_direction
447 else
448
449 resolved_direction = entry.level % 2 == 1 and "r" or "l"
450 end
451 for j=runstart,runlimit do
452 list[j].direction = resolved_direction
453 end
454 i = runlimit
455 end
456 i = i + 1
457 end
458end
459
460local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
461 for i=start,limit do
462 local entry = list[i]
463 local level = entry.level
464 local direction = entry.direction
465 if level % 2 ~= 1 then
466
467 if direction == "r" then
468 entry.level = level + 1
469 elseif direction == "an" or direction == "en" then
470 entry.level = level + 2
471 end
472 else
473
474 if direction == "l" or direction == "en" or direction == "an" then
475 entry.level = level + 1
476 end
477 end
478 end
479end
480
481local function resolve_levels(list,size,baselevel,analyze_fences)
482
483 local start = 1
484 while start < size do
485 local level = list[start].level
486 local limit = start + 1
487 while limit < size and list[limit].level == level do
488 limit = limit + 1
489 end
490 local prev_level = start == 1 and baselevel or list[start-1].level
491 local next_level = limit == size and baselevel or list[limit+1].level
492 local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
493 local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
494
495 resolve_weak(list,size,start,limit,orderbefore,orderafter)
496
497 if analyze_fences then
498 resolve_fences(list,size,start,limit)
499 end
500
501 resolve_neutral(list,size,start,limit,orderbefore,orderafter)
502
503 resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
504 start = limit
505 end
506
507 for i=1,size do
508 local entry = list[i]
509 local direction = entry.original
510
511 if direction == "s" or direction == "b" then
512 entry.level = baselevel
513
514 for j=i-1,1,-1 do
515 local entry = list[j]
516 if whitespace[entry.original] then
517 entry.level = baselevel
518 else
519 break
520 end
521 end
522 end
523 end
524
525 for i=size,1,-1 do
526 local entry = list[i]
527 if whitespace[entry.original] then
528 entry.level = baselevel
529 else
530 break
531 end
532 end
533
534 if analyze_fences then
535 for i=1,size do
536 local entry = list[i]
537 if entry.level % 2 == 1 then
538 if entry.mirror and not entry.paired then
539 entry.mirror = false
540 end
541
542 elseif entry.mirror then
543 entry.mirror = false
544 end
545 end
546 else
547 for i=1,size do
548 local entry = list[i]
549 if entry.level % 2 == 1 then
550 local mirror = mirrordata[entry.char]
551 if mirror then
552 entry.mirror = mirror
553 end
554 end
555 end
556 end
557end
558
559local index = 1
560
561local function process(head,direction)
562 local list, size = build_list(head)
563 local baselevel = get_baselevel(list,size,direction)
564 resolve_explicit(list,size,baselevel)
565 resolve_levels(list,size,baselevel,analyze_fences)
566 index = 1
567 return list, size
568end
569
570
571
572
573local utf = lexer.helpers.utfbytepattern
574
575
576
577
578
579
580
581
582
583
584
585bidilexer._grammar = #utf * function(s,i)
586 process(s)
587 local t = { }
588 local n = 0
589 for i=1,size do
590 n = n + 1 t[n] = i
591 n = n + 1 t[n] = "error"
592 end
593 return t
594end
595
596bidilexer._tokenstyles = context.styleset
597
598return bidilexer
599 |