035f5e578af48492912c9c8a94017ebf3f44771f
[Packages/TYPO3.CMS.git] / typo3 / sysext / t3editor / jslib / parsetyposcript.js
1 /* TypoScript parser
2 *
3 * based on parsejavascript.js by Marijn Haverbeke
4 *
5 * A parser that can be plugged into the CodeMirror system has to
6 * implement the following interface: It is a function that, when
7 * called with a string stream (stringstream.js) as an argument,
8 * returns a MochiKit-style iterator (object with a 'next' method).
9 * This iterator, when called, consumes some input from the string
10 * stream, and returns a token object. Token objects must have a
11 * 'value' property (the text they represent), a 'style' property (the
12 * CSS style that should be used to colour them). Tokens for newline
13 * characters must also have a 'lexicalContext' property, which has an
14 * 'indentation' method that can be used to determine the proper
15 * indentation level for the next line. This method optionally takes
16 * the first character of the next line as an argument, which it can
17 * use to adjust the indentation level.
18 *
19 * So far this should be easy. The hard part is that the iterator
20 * produced by the parse function must also have a 'copy' method. This
21 * method, called without arguments, returns a function representing
22 * the current state of the parser. When this function is later called
23 * with a string stream as its argument, it returns a parser iterator
24 * object that resumes parsing using the old state and the new input
25 * stream. It may assume that only one parser is active at a time, and
26 * clobber the state of the old parser (the implementation below
27 * certianly does).
28 */
29
30 // Parse function for TypoScript. Makes use of the tokenizer from
31 // tokenizetyposcript.js. Note that your parsers do not have to be
32 // this complicated -- if you don't want to recognize local variables,
33 // in many languages it is enough to just look for braces, semicolons,
34 // parentheses, etc, and know when you are inside a string or comment.
35 var parseTypoScript = function() {
36 // Token types that can be considered to be atoms.
37 var atomicTypes = setObject("atom", "number", "variable", "string", "regexp");
38
39 // Constructor for the lexical context objects.
40 function TSLexical(indented, column, type, align, prev) {
41 // indentation at start of this line
42 this.indented = indented;
43 // column at which this scope was opened
44 this.column = column;
45 // type of scope ('vardef', 'stat' (statement), '[', '{', or '(')
46 this.type = type;
47 // '[', '{', or '(' blocks that have any text after their opening
48 // character are said to be 'aligned' -- any lines below are
49 // indented all the way to the opening character.
50 if (align != null)
51 this.align = align;
52 // Parent scope, if any.
53 this.prev = prev;
54 }
55 // My favourite indentation rules.
56 TSLexical.prototype.indentation = function(firstChar) {
57 var closing = firstChar == this.type;
58 if (this.type == "}")
59 return this.indented + 2;
60
61 else if (this.align)
62 return this.column - (closing ? 1 : 0);
63 else
64 return this.indented + (closing ? 0 : 2);
65 }
66
67 // The parser-iterator-producing function itself.
68 return function(input){
69 // Wrap the input in a token stream
70 var tokens = tokenizeTypoScript(input);
71 // The parser state. cc is a stack of actions that have to be
72 // performed to finish the current statement. For example we might
73 // know that we still need to find a closing parenthesis and a
74 // semicolon. Actions at the end of the stack go first. It is
75 // initialized with an infinitely looping action that consumes
76 // whole statements.
77 var cc = [statements];
78 // Context contains information about the current local scope, the
79 // variables defined in that, and the scopes above it.
80 var context = null;
81 // The lexical scope, used mostly for indentation.
82 var lexical = new TSLexical(-2, 0, "block", false);
83 // Current column, and the indentation at the start of the current
84 // line. Used to create lexical scope objects.
85 var column = 0;
86 var indented = 0;
87 // Variables which are used by the mark, cont, and pass functions
88 // below to communicate with the driver loop in the 'next'
89 // function.
90 var consume, marked;
91
92 // The iterator object.
93 var parser = {next: next, copy: copy};
94
95 function next(){
96 // Start by performing any 'lexical' actions (adjusting the
97 // lexical variable), or the operations below will be working
98 // with the wrong lexical state.
99 while(cc[cc.length - 1].lex)
100 cc.pop()();
101
102 // Fetch a token.
103 var token = tokens.next();
104 // Adjust column and indented.
105 if (token.type == "whitespace" && column == 0)
106 indented = token.value.length;
107 column += token.value.length;
108 if (token.type == "newline"){
109 indented = column = 0;
110 // If the lexical scope's align property is still undefined at
111 // the end of the line, it is an un-aligned scope.
112 if (!("align" in lexical))
113 lexical.align = false;
114 // Newline tokens get a lexical context associated with them,
115 // which is used for indentation.
116 token.lexicalContext = lexical;
117 }
118 // No more processing for meaningless tokens.
119 if (token.type == "whitespace" || token.type == "newline" || token.type == "comment")
120 return token;
121 // When a meaningful token is found and the lexical scope's
122 // align is undefined, it is an aligned scope.
123 if (!("align" in lexical))
124 lexical.align = true;
125
126 // Execute actions until one 'consumes' the token and we can
127 // return it. Marked is used to
128 while(true){
129 consume = marked = false;
130 // Take and execute the topmost action.
131 cc.pop()(token.type, token.name);
132 if (consume){
133 // Marked is used to change the style of the current token.
134 if (marked)
135 token.style = marked;
136
137 return token;
138 }
139 }
140 }
141
142 // This makes a copy of the parser state. It stores all the
143 // stateful variables in a closure, and returns a function that
144 // will restore them when called with a new input stream. Note
145 // that the cc array has to be copied, because it is contantly
146 // being modified. Lexical objects are not mutated, and context
147 // objects are not mutated in a harmful way, so they can be shared
148 // between runs of the parser.
149 function copy(){
150 var _context = context, _lexical = lexical, _cc = cc.concat([]), _regexp = tokens.regexp, _comment = tokens.inComment;
151
152 return function(input){
153 context = _context;
154 lexical = _lexical;
155 cc = _cc.concat([]); // copies the array
156 column = indented = 0;
157 tokens = tokenizeTypoScript(input);
158 tokens.regexp = _regexp;
159 tokens.inComment = _comment;
160 return parser;
161 };
162 }
163
164 // Helper function for pushing a number of actions onto the cc
165 // stack in reverse order.
166 function push(fs){
167 for (var i = fs.length - 1; i >= 0; i--)
168 cc.push(fs[i]);
169 }
170 // cont and pass are used by the action functions to add other
171 // actions to the stack. cont will cause the current token to be
172 // consumed, pass will leave it for the next action.
173 function cont(){
174 push(arguments);
175 consume = true;
176 }
177 function pass(){
178 push(arguments);
179 consume = false;
180 }
181 // Used to change the style of the current token.
182 function mark(style){
183 marked = style;
184 }
185
186 // Push a new scope. Will automatically link the the current
187 // scope.
188 function pushcontext(){
189 context = {prev: context, vars: {"this": true, "arguments": true}};
190 }
191 // Pop off the current scope.
192 function popcontext(){
193 context = context.prev;
194 }
195 // Register a variable in the current scope.
196 function register(varname){
197 if (context){
198 mark("variabledef");
199 context.vars[varname] = true;
200 }
201 }
202
203
204 // Push a new lexical context of the given type.
205 function pushlex(type){
206 var result = function(){
207 lexical = new TSLexical(indented, column, type, null, lexical)
208 };
209 result.lex = true;
210 return result;
211 }
212 // Pop off the current lexical context.
213 function poplex(){
214 lexical = lexical.prev;
215 }
216 poplex.lex = true;
217 // The 'lex' flag on these actions is used by the 'next' function
218 // to know they can (and have to) be ran before moving on to the
219 // next token.
220
221 // Creates an action that discards tokens until it finds one of
222 // the given type.
223 function expect(wanted){
224 return function(type){
225 if (type == wanted) cont();
226 else cont(arguments.callee);
227 };
228 }
229
230 // Looks for a statement, and then calls itself.
231 function statements(type){
232 return pass(statement, statements);
233 }
234 // Dispatches various types of statements based on the type of the
235 // current token.
236 function statement(type){
237 if (type == "{") cont(pushlex("{"), block, poplex);
238 // else if (type == "[") cont(pushlex("]"), condition, poplex);
239 else cont();
240 }
241
242 // Dispatch expression types.
243 function expression(type){
244 if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
245 else if (type == "function") cont(functiondef);
246 else if (type == "keyword c") cont(expression);
247 else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex);
248 else if (type == "operator") cont(expression);
249 else if (type == "[") cont(pushlex("]"), commasep(expression), expect("]"), poplex);
250 else if (type == "{") cont(pushlex("}"), commasep(objprop), expect("}"), poplex);
251 }
252 // Called for places where operators, function calls, or
253 // subscripts are valid. Will skip on to the next action if none
254 // is found.
255 function maybeoperator(type){
256 if (type == "operator") cont(expression);
257 else if (type == "(") cont(pushlex(")"), expression, commasep(expression), expect(")"), poplex);
258 else if (type == ".") cont(property, maybeoperator);
259 else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex);
260 }
261 // When a statement starts with a variable name, it might be a
262 // label. If no colon follows, it's a regular statement.
263 function maybelabel(type){
264 if (type == ":") cont(poplex, statement);
265 else pass(maybeoperator, expect(";"), poplex);
266 }
267 // Property names need to have their style adjusted -- the
268 // tokenizer think they are variables.
269 function property(type){
270 if (type == "variable") {mark("property"); cont();}
271 }
272 // This parses a property and its value in an object literal.
273 function objprop(type){
274 if (type == "variable") mark("property");
275 if (atomicTypes.hasOwnProperty(type)) cont(expect(":"), expression);
276 }
277 // Parses a comma-separated list of the things that are recognized
278 // by the 'what' argument.
279 function commasep(what){
280 function proceed(type) {
281 if (type == ",") cont(what, proceed);
282 };
283 return function() {
284 pass(what, proceed);
285 };
286 }
287
288 // Look for statements until a closing brace is found.
289 function block(type){
290 if (type == "}") cont();
291 else pass(statement, block);
292 }
293
294 // Look for statements until a closing brace is found.
295 function condition(type){
296 if (type == "]") cont();
297 else pass(statement, block);
298 }
299
300
301 // Variable definitions are split into two actions -- 1 looks for
302 // a name or the end of the definition, 2 looks for an '=' sign or
303 // a comma.
304 function vardef1(type, value){
305 if (type == "variable"){register(value); cont(vardef2);}
306 else cont();
307 }
308 function vardef2(type){
309 if (type == "operator") cont(expression, vardef2);
310 else if (type == ",") cont(vardef1);
311 }
312 // For loops.
313 function forspec1(type, value){
314 if (type == "var") cont(vardef1, forspec2);
315 else cont(expression, forspec2);
316 }
317 function forspec2(type){
318 if (type == ",") cont(forspec1);
319 if (type == ";") cont(expression, expect(";"), expression);
320 }
321 // A function definition creates a new context, and the variables
322 // in its argument list have to be added to this context.
323 function functiondef(type, value){
324 if (type == "variable"){register(value); cont(functiondef);}
325 else if (type == "(") cont(pushcontext, commasep(funarg), expect(")"), statement, popcontext);
326 }
327 function funarg(type, value){
328 if (type == "variable"){register(value); cont();}
329 }
330
331 return parser;
332 }
333 }();