3 * @brief EJS Lexical Analyser
4 * @overview EJS lexical analyser. This implementes a lexical analyser
5 * for a subset of the JavaScript language.
7 /********************************* Copyright **********************************/
11 * Copyright (c) Mbedthis Software LLC, 2003-2006. All Rights Reserved.
12 * Portions Copyright (c) GoAhead Software, 1995-2000. All Rights Reserved.
14 * This software is distributed under commercial and open source licenses.
15 * You may use the GPL open source license described below or you may acquire
16 * a commercial license from Mbedthis Software. You agree to be fully bound
17 * by the terms of either license. Consult the LICENSE.TXT distributed with
18 * this software for full details.
20 * This software is open source; you can redistribute it and/or modify it
21 * under the terms of the GNU General Public License as published by the
22 * Free Software Foundation; either version 2 of the License, or (at your
23 * option) any later version. See the GNU General Public License for more
24 * details at: http://www.mbedthis.com/downloads/gplLicense.html
26 * This program is distributed WITHOUT ANY WARRANTY; without even the
27 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
29 * This GPL license does NOT permit incorporating this software into
30 * proprietary programs. If you are unable to comply with the GPL, you must
31 * acquire a commercial license to use this software. Commercial licenses
32 * for this software and support services are available from Mbedthis
33 * Software at http://www.mbedthis.com
37 /********************************** Includes **********************************/
43 /****************************** Forward Declarations **************************/
45 static int getLexicalToken(Ejs *ep, int state);
46 static int tokenAddChar(Ejs *ep, int c);
47 static int inputGetc(Ejs *ep);
48 static void inputPutback(Ejs *ep, int c);
49 static int charConvert(Ejs *ep, int base, int maxDig);
50 static void parseNumber(Ejs *ep, EjsType type);
52 /************************************* Code ***********************************/
54 * Open a new input script
57 int ejsLexOpenScript(Ejs *ep, const char *script)
64 if ((ip = mprAllocTypeZeroed(ep, EjsInput)) == NULL) {
65 return MPR_ERR_MEMORY;
69 ip->procName = ep->proc ? ep->proc->procName : NULL;
70 ip->fileName = ep->fileName ? ep->fileName : NULL;
73 * Create the parse token buffer and script buffer
75 ip->tokServp = ip->tokbuf;
76 ip->tokEndp = ip->tokbuf;
79 ip->scriptSize = strlen(script);
80 ip->scriptServp = (char*) ip->script;
85 ip->putBackIndex = -1;
90 /******************************************************************************/
92 * Close the input script
95 void ejsLexCloseScript(Ejs *ep)
103 ep->input = ip->next;
108 /******************************************************************************/
110 * Initialize an input state structure
113 int ejsInitInputState(EjsInput *ip)
117 memset(ip, 0, sizeof(*ip));
118 ip->putBackIndex = -1;
122 /******************************************************************************/
124 * Save the input state
127 void ejsLexSaveInputState(Ejs *ep, EjsInput *state)
139 for (i = 0; i <= ip->putBackIndex; i++) {
140 mprStrcpy(state->putBack[i].tokbuf, EJS_MAX_TOKEN,
141 ip->putBack[i].tokbuf);
142 state->putBack[i].tid = ip->putBack[i].tid;
145 mprStrcpy(state->line, sizeof(state->line), ip->line);
147 state->lineColumn = ip->lineColumn;
148 state->lineNumber = ip->lineNumber;
151 /******************************************************************************/
153 * Restore the input state
156 void ejsLexRestoreInputState(Ejs *ep, EjsInput *state)
168 mprStrcpy(ip->tokbuf, sizeof(ip->tokbuf), state->tokbuf);
169 ip->tokServp = state->tokServp;
170 ip->tokEndp = state->tokEndp;
172 ip->script = state->script;
173 ip->scriptServp = state->scriptServp;
174 ip->scriptSize = state->scriptSize;
176 ip->putBackIndex = state->putBackIndex;
177 for (i = 0; i <= ip->putBackIndex; i++) {
178 tp = &ip->putBack[i];
179 tp->tid = state->putBack[i].tid;
180 mprStrcpy(tp->tokbuf, sizeof(tp->tokbuf), state->putBack[i].tokbuf);
183 mprStrcpy(ip->line, sizeof(ip->line), state->line);
185 ip->lineColumn = state->lineColumn;
186 ip->lineNumber = state->lineNumber;
189 /******************************************************************************/
191 * Free a saved input state
194 void ejsLexFreeInputState(Ejs *ep, EjsInput *state)
199 state->putBackIndex = -1;
200 state->lineColumn = 0;
203 /******************************************************************************/
205 * Get the next EJS token
208 int ejsLexGetToken(Ejs *ep, int state)
212 ep->tid = getLexicalToken(ep, state);
216 /******************************************************************************/
219 * Check for reserved words "if", "else", "var", "for", "delete", "function",
220 * "class", "extends", "public", "private", "protected", "try", "catch",
221 * "finally", "throw", "return", "get", "set", "this", "module", "each"
223 * The "new" and "in" reserved words are handled below. The "true", "false",
224 * "null" "typeof" and "undefined" reserved words are handled as global
227 * Other reserved words not supported:
228 * "break", "case", "continue", "default", "do",
229 * "instanceof", "switch", "while", "with"
231 * ECMA extensions reserved words (not supported):
232 * "abstract", "boolean", "byte", "char", "const",
233 * "debugger", "double", "enum", "export",
234 * "final", "float", "goto", "implements", "import", "int",
235 * "interface", "long", "native", "package",
236 * "short", "static", "super", "synchronized", "transient", "volatile"
238 * FUTURE -- use a hash lookup
241 static int checkReservedWord(Ejs *ep, int state, int c, int tid)
243 /* FUTURE -- probably should return for all tokens != EJS_TOK_ID */
244 /* FUTURE -- Should have a hash for this. MUCH faster. */
246 if (!isalpha(ep->token[0]) || tid == EJS_TOK_LITERAL) {
249 if (state == EJS_STATE_STMT) {
250 /* FUTURE OPT -- convert to hash lookup */
251 if (strcmp(ep->token, "if") == 0) {
254 } else if (strcmp(ep->token, "else") == 0) {
257 } else if (strcmp(ep->token, "var") == 0) {
260 } else if (strcmp(ep->token, "new") == 0) {
263 } else if (strcmp(ep->token, "for") == 0) {
266 } else if (strcmp(ep->token, "delete") == 0) {
268 return EJS_TOK_DELETE;
269 } else if (strcmp(ep->token, "function") == 0) {
271 return EJS_TOK_FUNCTION;
272 } else if (strcmp(ep->token, "class") == 0) {
274 return EJS_TOK_CLASS;
275 } else if (strcmp(ep->token, "module") == 0) {
277 return EJS_TOK_MODULE;
278 } else if (strcmp(ep->token, "extends") == 0) {
280 return EJS_TOK_EXTENDS;
281 } else if (strcmp(ep->token, "try") == 0) {
284 } else if (strcmp(ep->token, "catch") == 0) {
286 return EJS_TOK_CATCH;
287 } else if (strcmp(ep->token, "finally") == 0) {
289 return EJS_TOK_FINALLY;
290 } else if (strcmp(ep->token, "throw") == 0) {
292 return EJS_TOK_THROW;
293 } else if (strcmp(ep->token, "public") == 0) {
295 return EJS_TOK_PUBLIC;
296 } else if (strcmp(ep->token, "protected") == 0) {
298 return EJS_TOK_PROTECTED;
299 } else if (strcmp(ep->token, "private") == 0) {
301 return EJS_TOK_PRIVATE;
302 } else if (strcmp(ep->token, "get") == 0) {
305 } else if (strcmp(ep->token, "set") == 0) {
308 } else if (strcmp(ep->token, "extends") == 0) {
310 return EJS_TOK_EXTENDS;
311 } else if (strcmp(ep->token, "try") == 0) {
314 } else if (strcmp(ep->token, "catch") == 0) {
316 return EJS_TOK_CATCH;
317 } else if (strcmp(ep->token, "finally") == 0) {
319 return EJS_TOK_FINALLY;
320 } else if (strcmp(ep->token, "throw") == 0) {
322 return EJS_TOK_THROW;
323 } else if (strcmp(ep->token, "public") == 0) {
325 return EJS_TOK_PUBLIC;
326 } else if (strcmp(ep->token, "protected") == 0) {
328 return EJS_TOK_PROTECTED;
329 } else if (strcmp(ep->token, "private") == 0) {
331 return EJS_TOK_PRIVATE;
332 } else if (strcmp(ep->token, "get") == 0) {
335 } else if (strcmp(ep->token, "set") == 0) {
338 } else if (strcmp(ep->token, "each") == 0) {
341 } else if (strcmp(ep->token, "return") == 0) {
342 if ((c == ';') || (c == '(')) {
345 return EJS_TOK_RETURN;
348 } else if (state == EJS_STATE_EXPR) {
349 if (strcmp(ep->token, "new") == 0) {
352 } else if (strcmp(ep->token, "in") == 0) {
355 } else if (strcmp(ep->token, "function") == 0) {
357 return EJS_TOK_FUNCTION;
360 } else if (state == EJS_STATE_DEC) {
361 if (strcmp(ep->token, "extends") == 0) {
363 return EJS_TOK_EXTENDS;
369 /******************************************************************************/
371 * Get the next EJS token
374 static int getLexicalToken(Ejs *ep, int state)
378 int done, tid, c, quote, style, idx, isHex;
386 type = BLD_FEATURE_NUM_TYPE_ID;
390 * Use a putback tokens first. Don't free strings as caller needs access.
392 if (ip->putBackIndex >= 0) {
393 idx = ip->putBackIndex;
394 tid = ip->putBack[idx].tid;
395 ep->token = (char*) ip->putBack[idx].tokbuf;
396 tid = checkReservedWord(ep, state, 0, tid);
400 ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf;
401 *ip->tokServp = '\0';
403 if ((c = inputGetc(ep)) < 0) {
408 * Main lexical analyser
410 for (done = 0; !done; ) {
419 if ((c = inputGetc(ep)) < 0)
421 } while (c == ' ' || c == '\t' || c == '\r');
425 return EJS_TOK_NEWLINE;
429 return EJS_TOK_LPAREN;
433 return EJS_TOK_RPAREN;
437 return EJS_TOK_LBRACKET;
441 return EJS_TOK_RBRACKET;
445 return EJS_TOK_PERIOD;
449 return EJS_TOK_LBRACE;
453 return EJS_TOK_RBRACE;
456 if ((c = inputGetc(ep)) < 0) {
457 ejsSyntaxError(ep, 0);
462 tokenAddChar(ep, EJS_EXPR_PLUS);
465 tokenAddChar(ep, EJS_EXPR_INC);
466 return EJS_TOK_INC_DEC;
469 if ((c = inputGetc(ep)) < 0) {
470 ejsSyntaxError(ep, 0);
475 tokenAddChar(ep, EJS_EXPR_MINUS);
478 tokenAddChar(ep, EJS_EXPR_DEC);
479 return EJS_TOK_INC_DEC;
482 tokenAddChar(ep, EJS_EXPR_MUL);
486 tokenAddChar(ep, EJS_EXPR_MOD);
491 * Handle the division operator and comments
493 if ((c = inputGetc(ep)) < 0) {
494 ejsSyntaxError(ep, 0);
497 if (c != '*' && c != '/') {
499 tokenAddChar(ep, EJS_EXPR_DIV);
504 * Eat comments. Both C and C++ comment styles are supported.
507 if ((c = inputGetc(ep)) < 0) {
511 ejsSyntaxError(ep, 0);
514 if (c == '\n' && style == '/') {
516 } else if (c == '*') {
530 * Continue looking for a token, so get the next character
532 if ((c = inputGetc(ep)) < 0) {
537 case '<': /* < and <= */
538 if ((c = inputGetc(ep)) < 0) {
539 ejsSyntaxError(ep, 0);
543 tokenAddChar(ep, EJS_EXPR_LSHIFT);
545 } else if (c == '=') {
546 tokenAddChar(ep, EJS_EXPR_LESSEQ);
549 tokenAddChar(ep, EJS_EXPR_LESS);
553 case '>': /* > and >= */
554 if ((c = inputGetc(ep)) < 0) {
555 ejsSyntaxError(ep, 0);
559 tokenAddChar(ep, EJS_EXPR_RSHIFT);
561 } else if (c == '=') {
562 tokenAddChar(ep, EJS_EXPR_GREATEREQ);
565 tokenAddChar(ep, EJS_EXPR_GREATER);
570 if ((c = inputGetc(ep)) < 0) {
571 ejsSyntaxError(ep, 0);
575 tokenAddChar(ep, EJS_EXPR_EQ);
579 return EJS_TOK_ASSIGNMENT;
581 case '!': /* "!=" or "!"*/
582 if ((c = inputGetc(ep)) < 0) {
583 ejsSyntaxError(ep, 0);
587 tokenAddChar(ep, EJS_EXPR_NOTEQ);
591 tokenAddChar(ep, EJS_EXPR_BOOL_COMP);
600 return EJS_TOK_COMMA;
604 return EJS_TOK_COLON;
607 if ((c = inputGetc(ep)) < 0 || c != '|') {
608 ejsSyntaxError(ep, 0);
611 tokenAddChar(ep, EJS_COND_OR);
612 return EJS_TOK_LOGICAL;
615 if ((c = inputGetc(ep)) < 0 || c != '&') {
616 ejsSyntaxError(ep, 0);
619 tokenAddChar(ep, EJS_COND_AND);
620 return EJS_TOK_LOGICAL;
622 case '\"': /* String quote */
625 if ((c = inputGetc(ep)) < 0) {
626 ejsSyntaxError(ep, 0);
632 * Check for escape sequence characters
639 * Octal support, \101 maps to 65 = 'A'. Put first
640 * char back so converter will work properly.
643 c = charConvert(ep, 8, 3);
659 * Hex support, \x41 maps to 65 = 'A'
661 c = charConvert(ep, 16, 2);
665 * Unicode support, \x0401 maps to 65 = 'A'
667 c = charConvert(ep, 16, 2);
668 c = c*16 + charConvert(ep, 16, 2);
676 if (tokenAddChar(ep, '\\') < 0) {
681 if (tokenAddChar(ep, c) < 0) {
685 if (tokenAddChar(ep, c) < 0) {
689 if ((c = inputGetc(ep)) < 0) {
690 ejsSyntaxError(ep, "Unmatched Quote");
694 return EJS_TOK_LITERAL;
697 if (tokenAddChar(ep, c) < 0) {
700 if ((c = inputGetc(ep)) < 0) {
703 if (tolower(c) == 'x') {
704 if (tokenAddChar(ep, c) < 0) {
707 if ((c = inputGetc(ep)) < 0) {
712 parseNumber(ep, type);
714 return EJS_TOK_NUMBER;
716 } else if (! isdigit(c)) {
717 #if BLD_FEATURE_FLOATING_POINT
718 if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') {
720 type = EJS_TYPE_FLOAT;
724 parseNumber(ep, type);
726 return EJS_TOK_NUMBER;
729 /* Fall through to get more digits */
731 case '1': case '2': case '3': case '4':
732 case '5': case '6': case '7': case '8': case '9':
735 if (tokenAddChar(ep, c) < 0) {
738 if ((c = inputGetc(ep)) < 0) {
741 } while (isxdigit(c));
744 #if BLD_FEATURE_FLOATING_POINT
746 if (tokenAddChar(ep, c) < 0) {
749 if ((c = inputGetc(ep)) < 0) {
753 if (c == '.' || c == 'e' || c == 'f') {
754 type = EJS_TYPE_FLOAT;
756 } while (isdigit(c) || c == '.' || c == 'e' ||
758 ((type == EJS_TYPE_FLOAT) && (c == '+' || c == '-')));
761 if (tokenAddChar(ep, c) < 0) {
764 if ((c = inputGetc(ep)) < 0) {
767 } while (isdigit(c));
771 parseNumber(ep, type);
773 return EJS_TOK_NUMBER;
777 * Identifiers or a function names
781 if ((c = inputGetc(ep)) < 0) {
784 if (c == '\n' || c == '\r') {
787 } else if (tokenAddChar(ep, c) < 0) {
790 if ((c = inputGetc(ep)) < 0) {
793 if (!isalnum(c) && c != '$' && c != '_' &&
794 c != '\\' && c != '@') {
798 if (*ep->token == '\0') {
803 if (! isalpha((int) *ep->token) && *ep->token != '$' &&
804 *ep->token != '_' && *ep->token != '@') {
805 ejsError(ep, EJS_SYNTAX_ERROR, "Invalid identifier %s",
810 tid = checkReservedWord(ep, state, c, EJS_TOK_ID);
811 if (tid != EJS_TOK_ID) {
816 * Skip white space after token to find out whether this is
819 while (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
820 if ((c = inputGetc(ep)) < 0)
825 if ((strlen(ep->token) + 1) >= EJS_MAX_ID) {
826 ejsError(ep, EJS_SYNTAX_ERROR,
827 "Identifier too big. Max is %d letters.", EJS_MAX_ID);
835 * Putback the last extra character for next time
841 /******************************************************************************/
843 static void parseNumber(Ejs *ep, EjsType type)
847 ep->tokenNumber.integer = ejsParseInteger(ep->token);
848 ep->tokenNumber.type = type;
851 #if BLD_FEATURE_FLOATING_POINT
853 ep->tokenNumber.floating = atof(ep->token);
854 ep->tokenNumber.type = type;
858 #if BLD_FEATURE_INT64
860 ep->tokenNumber.integer64 = ejsParseInteger64(ep->token);
861 ep->tokenNumber.type = type;
867 /******************************************************************************/
869 * Convert a hex or octal character back to binary, return original char if
873 static int charConvert(Ejs *ep, int base, int maxDig)
875 int i, c, lval, convChar;
878 for (i = 0; i < maxDig; i++) {
879 if ((c = inputGetc(ep)) < 0) {
883 * Initialize to out of range value
888 } else if (c >= 'a' && c <= 'f') {
889 convChar = c - 'a' + 10;
890 } else if (c >= 'A' && c <= 'F') {
891 convChar = c - 'A' + 10;
894 * If unexpected character then return it to buffer.
896 if (convChar >= base) {
900 lval = (lval * base) + convChar;
905 /******************************************************************************/
907 * Putback the last token read. Accept at most one push back token.
910 void ejsLexPutbackToken(Ejs *ep, int tid, char *string)
920 ip->putBackIndex += 1;
922 mprAssert(ip->putBackIndex < EJS_TOKEN_STACK);
923 idx = ip->putBackIndex;
925 tp = &ip->putBack[idx];
928 mprStrcpy(tp->tokbuf, sizeof(tp->tokbuf), string);
931 /******************************************************************************/
933 * Add a character to the token buffer
936 static int tokenAddChar(Ejs *ep, int c)
944 if (ip->tokEndp >= &ip->tokbuf[sizeof(ip->tokbuf) - 1]) {
945 ejsSyntaxError(ep, "Token too big");
954 /******************************************************************************/
956 * Get another input character
959 static int inputGetc(Ejs *ep)
967 if (ip->scriptSize <= 0) {
971 c = (uchar) (*ip->scriptServp++);
975 * For debugging, accumulate the line number and the currenly parsed line
979 if (ip->lineColumn > 0) {
980 printf("PARSED: %s\n", ip->line);
985 } else if ((ip->lineColumn + 2) < sizeof(ip->line)) {
986 ip->line[ip->lineColumn++] = c;
987 ip->line[ip->lineColumn] = '\0';
992 /******************************************************************************/
994 * Putback a character onto the input queue
997 static void inputPutback(Ejs *ep, int c)
1005 *--ip->scriptServp = c;
1007 if (--(ip->lineColumn) < 0) {
1010 mprAssert(ip->line);
1011 mprAssert(ip->lineColumn >= 0);
1012 mprAssert(ip->lineColumn < sizeof(ip->line));
1013 ip->line[ip->lineColumn] = '\0';
1017 /******************************************************************************/
1020 void ejsLexDummy() {}
1022 /******************************************************************************/
1023 #endif /* BLD_FEATURE_EJS */
1031 * vim600: sw=4 ts=4 fdm=marker
1032 * vim<600: sw=4 ts=4