scite-context-lexer-pdf-object.lua /size: 6165 b    last modification: 2020-07-01 14:35
1local info = {
2    version   = 1.002,
3    comment   = "scintilla lpeg lexer for pdf objects",
4    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
5    copyright = "PRAGMA ADE / ConTeXt Development Team",
6    license   = "see context related readme files",
7}
8
9-- no longer used: nesting lexers with whitespace in start/stop is unreliable
10
11local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V
12
13local lexer             = require("scite-context-lexer")
14local context           = lexer.context
15local patterns          = context.patterns
16
17local token             = lexer.token
18
19local pdfobjectlexer    = lexer.new("pdfobj","scite-context-lexer-pdf-object")
20local whitespace        = pdfobjectlexer.whitespace
21
22local space             = patterns.space
23local spacing           = patterns.spacing
24local nospacing         = patterns.nospacing
25local anything          = patterns.anything
26local newline           = patterns.eol
27local real              = patterns.real
28local cardinal          = patterns.cardinal
29
30local lparent           = P("(")
31local rparent           = P(")")
32local langle            = P("<")
33local rangle            = P(">")
34local escape            = P("\\")
35local unicodetrigger    = P("feff")
36
37local nametoken         = 1 - space - S("<>/[]()")
38local name              = P("/") * nametoken^1
39
40local p_string          = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
41
42local t_spacing         = token(whitespace, spacing)
43local t_spaces          = token(whitespace, spacing)^0
44local t_rest            = token("default",  nospacing) -- anything
45
46local p_stream          = P("stream")
47local p_endstream       = P("endstream")
48local p_obj             = P("obj")
49local p_endobj          = P("endobj")
50local p_reference       = P("R")
51
52local p_objectnumber    = patterns.cardinal
53local p_comment         = P("%") * (1-S("\n\r"))^0
54
55local t_string          = token("quote",    lparent)
56                        * token("string",   p_string)
57                        * token("quote",    rparent)
58local t_unicode         = token("quote",    langle)
59                        * token("plain",    unicodetrigger)
60                        * token("string",   (1-rangle)^1)
61                        * token("quote",    rangle)
62local t_whatsit         = token("quote",    langle)
63                        * token("string",   (1-rangle)^1)
64                        * token("quote",    rangle)
65local t_keyword         = token("command",  name)
66local t_constant        = token("constant", name)
67local t_number          = token("number",   real)
68--    t_reference       = token("number",   cardinal)
69--                      * t_spacing
70--                      * token("number",   cardinal)
71local t_reserved        = token("number",   P("true") + P("false") + P("NULL"))
72local t_reference       = token("warning",  cardinal)
73                        * t_spacing
74                        * token("warning",  cardinal)
75                        * t_spacing
76                        * token("keyword",  p_reference)
77
78local t_comment         = token("comment",  p_comment)
79
80local t_openobject      = token("warning",  p_objectnumber * spacing)
81--                         * t_spacing
82                        * token("warning",  p_objectnumber * spacing)
83--                         * t_spacing
84                        * token("keyword",  p_obj)
85local t_closeobject     = token("keyword",  p_endobj)
86
87local t_opendictionary  = token("grouping", P("<<"))
88local t_closedictionary = token("grouping", P(">>"))
89
90local t_openarray       = token("grouping", P("["))
91local t_closearray      = token("grouping", P("]"))
92
93-- todo: comment
94
95local t_stream          = token("keyword", p_stream)
96--                      * token("default", newline * (1-newline*p_endstream*newline)^1 * newline)
97--                         * token("text", (1 - p_endstream)^1)
98                        * (token("text", (1 - p_endstream-spacing)^1) + t_spacing)^1
99                        * token("keyword", p_endstream)
100
101local t_dictionary      = { "dictionary",
102                            dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
103                            array      = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
104                            whatever   = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
105                        }
106
107----- t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
108-----                       object     = t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
109-----                       dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
110-----                       array      = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
111-----                       whatever   = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
112-----                       number     = t_number,
113-----                   }
114
115local t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
116                            dictionary = t_dictionary.dictionary,
117                            array      = t_dictionary.array,
118                            whatever   = t_dictionary.whatever,
119                            object     = t_openobject^-1 * t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
120                            number     = t_number,
121                        }
122
123pdfobjectlexer._shared = {
124    dictionary  = t_dictionary,
125    object      = t_object,
126    stream      = t_stream,
127}
128
129pdfobjectlexer._rules = {
130    { "whitespace", t_spacing }, -- in fact, here we don't want whitespace as it's top level lexer work
131    { "object",     t_object  },
132}
133
134pdfobjectlexer._tokenstyles = context.styleset
135
136return pdfobjectlexer
137