5 JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3);
7 static void jsY_error(js_State *J, const char *fmt, ...)
14 vsnprintf(msgbuf, 256, fmt, ap);
17 snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline);
20 js_newsyntaxerror(J, buf);
24 static const char *tokenstring[] = {
26 "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'",
27 "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'",
28 "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'",
29 "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'",
30 "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
31 "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
32 "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'",
33 "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
34 "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
35 "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
36 "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
37 "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'",
38 "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
39 "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
40 "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
41 "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'",
43 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
44 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
45 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
46 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
47 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
48 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
49 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
50 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
52 "(identifier)", "(number)", "(string)", "(regexp)",
54 "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='",
55 "'<<'", "'>>'", "'>>>'", "'&&'", "'||'",
56 "'+='", "'-='", "'*='", "'/='", "'%='",
57 "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='",
60 "'break'", "'case'", "'catch'", "'continue'", "'debugger'",
61 "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'",
62 "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'",
63 "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'",
64 "'void'", "'while'", "'with'",
67 const char *jsY_tokenstring(int token)
69 if (token >= 0 && token < (int)nelem(tokenstring))
70 if (tokenstring[token])
71 return tokenstring[token];
75 static const char *keywords[] = {
76 "break", "case", "catch", "continue", "debugger", "default", "delete",
77 "do", "else", "false", "finally", "for", "function", "if", "in",
78 "instanceof", "new", "null", "return", "switch", "this", "throw",
79 "true", "try", "typeof", "var", "void", "while", "with",
82 int jsY_findword(const char *s, const char **list, int num)
88 int c = strcmp(s, list[m]);
99 static int jsY_findkeyword(js_State *J, const char *s)
101 int i = jsY_findword(s, keywords, nelem(keywords));
103 J->text = keywords[i];
104 return TK_BREAK + i; /* first keyword + i */
106 J->text = js_intern(J, s);
107 return TK_IDENTIFIER;
110 int jsY_iswhite(int c)
112 return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF;
115 int jsY_isnewline(int c)
117 return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
120 #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
121 #define isdigit(c) (c >= '0' && c <= '9')
122 #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
124 static int jsY_isidentifierstart(int c)
126 return isalpha(c) || c == '$' || c == '_' || isalpharune(c);
129 static int jsY_isidentifierpart(int c)
131 return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c);
134 static int jsY_isdec(int c)
141 return isdigit(c) || ishex(c);
146 if (c >= '0' && c <= '9') return c - '0';
147 if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
148 if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
152 static void jsY_next(js_State *J)
155 J->source += chartorune(&c, J->source);
156 /* consume CR LF as one unit */
157 if (c == '\r' && *J->source == '\n')
159 if (jsY_isnewline(c)) {
166 #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0)
168 #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x)
170 static void jsY_unescape(js_State *J)
172 if (jsY_accept(J, '\\')) {
173 if (jsY_accept(J, 'u')) {
175 if (!jsY_ishex(J->lexchar)) goto error; x |= jsY_tohex(J->lexchar) << 12; jsY_next(J);
176 if (!jsY_ishex(J->lexchar)) goto error; x |= jsY_tohex(J->lexchar) << 8; jsY_next(J);
177 if (!jsY_ishex(J->lexchar)) goto error; x |= jsY_tohex(J->lexchar) << 4; jsY_next(J);
178 if (!jsY_ishex(J->lexchar)) goto error; x |= jsY_tohex(J->lexchar);
183 jsY_error(J, "unexpected escape sequence");
187 static void textinit(js_State *J)
189 if (!J->lexbuf.text) {
190 J->lexbuf.cap = 4096;
191 J->lexbuf.text = js_malloc(J, J->lexbuf.cap);
196 static void textpush(js_State *J, Rune c)
199 if (J->lexbuf.len + n > J->lexbuf.cap) {
200 J->lexbuf.cap = J->lexbuf.cap * 2;
201 J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap);
203 J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c);
206 static char *textend(js_State *J)
209 return J->lexbuf.text;
212 static void lexlinecomment(js_State *J)
214 while (J->lexchar && J->lexchar != '\n')
218 static int lexcomment(js_State *J)
220 /* already consumed initial '/' '*' sequence */
221 while (J->lexchar != 0) {
222 if (jsY_accept(J, '*')) {
223 while (J->lexchar == '*')
225 if (jsY_accept(J, '/'))
233 static double lexhex(js_State *J)
236 if (!jsY_ishex(J->lexchar))
237 jsY_error(J, "malformed hexadecimal number");
238 while (jsY_ishex(J->lexchar)) {
239 n = n * 16 + jsY_tohex(J->lexchar);
247 static double lexinteger(js_State *J)
250 if (!jsY_isdec(J->lexchar))
251 jsY_error(J, "malformed number");
252 while (jsY_isdec(J->lexchar)) {
253 n = n * 10 + (J->lexchar - '0');
259 static double lexfraction(js_State *J)
263 while (jsY_isdec(J->lexchar)) {
264 n = n * 10 + (J->lexchar - '0');
271 static double lexexponent(js_State *J)
274 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
275 if (jsY_accept(J, '-')) sign = -1;
276 else if (jsY_accept(J, '+')) sign = 1;
278 return sign * lexinteger(J);
283 static int lexnumber(js_State *J)
288 if (jsY_accept(J, '0')) {
289 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
290 J->number = lexhex(J);
293 if (jsY_isdec(J->lexchar))
294 jsY_error(J, "number with leading zero");
296 if (jsY_accept(J, '.'))
298 } else if (jsY_accept(J, '.')) {
299 if (!jsY_isdec(J->lexchar))
304 if (jsY_accept(J, '.'))
314 if (jsY_isidentifierstart(J->lexchar))
315 jsY_error(J, "number with letter suffix");
323 static int lexnumber(js_State *J)
325 const char *s = J->source - 1;
327 if (jsY_accept(J, '0')) {
328 if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
329 J->number = lexhex(J);
332 if (jsY_isdec(J->lexchar))
333 jsY_error(J, "number with leading zero");
334 if (jsY_accept(J, '.')) {
335 while (jsY_isdec(J->lexchar))
338 } else if (jsY_accept(J, '.')) {
339 if (!jsY_isdec(J->lexchar))
341 while (jsY_isdec(J->lexchar))
344 while (jsY_isdec(J->lexchar))
346 if (jsY_accept(J, '.')) {
347 while (jsY_isdec(J->lexchar))
352 if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
353 if (J->lexchar == '-' || J->lexchar == '+')
355 while (jsY_isdec(J->lexchar))
359 if (jsY_isidentifierstart(J->lexchar))
360 jsY_error(J, "number with letter suffix");
362 J->number = js_strtod(s, NULL);
369 static int lexescape(js_State *J)
373 /* already consumed '\' */
375 if (jsY_accept(J, '\n'))
378 switch (J->lexchar) {
381 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
382 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
383 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
384 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
389 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
390 if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
393 case '0': textpush(J, 0); jsY_next(J); break;
394 case '\\': textpush(J, '\\'); jsY_next(J); break;
395 case '\'': textpush(J, '\''); jsY_next(J); break;
396 case '"': textpush(J, '"'); jsY_next(J); break;
397 case 'b': textpush(J, '\b'); jsY_next(J); break;
398 case 'f': textpush(J, '\f'); jsY_next(J); break;
399 case 'n': textpush(J, '\n'); jsY_next(J); break;
400 case 'r': textpush(J, '\r'); jsY_next(J); break;
401 case 't': textpush(J, '\t'); jsY_next(J); break;
402 case 'v': textpush(J, '\v'); jsY_next(J); break;
403 default: textpush(J, J->lexchar); jsY_next(J); break;
408 static int lexstring(js_State *J)
417 while (J->lexchar != q) {
418 if (J->lexchar == 0 || J->lexchar == '\n')
419 jsY_error(J, "string not terminated");
420 if (jsY_accept(J, '\\')) {
422 jsY_error(J, "malformed escape sequence");
424 textpush(J, J->lexchar);
432 J->text = js_intern(J, s);
436 /* the ugliest language wart ever... */
437 static int isregexpcontext(int last)
456 static int lexregexp(js_State *J)
462 /* already consumed initial '/' */
467 while (J->lexchar != '/' || inclass) {
468 if (J->lexchar == 0 || J->lexchar == '\n') {
469 jsY_error(J, "regular expression not terminated");
470 } else if (jsY_accept(J, '\\')) {
471 if (jsY_accept(J, '/')) {
475 if (J->lexchar == 0 || J->lexchar == '\n')
476 jsY_error(J, "regular expression not terminated");
477 textpush(J, J->lexchar);
481 if (J->lexchar == '[' && !inclass)
483 if (J->lexchar == ']' && inclass)
485 textpush(J, J->lexchar);
496 while (jsY_isidentifierpart(J->lexchar)) {
497 if (jsY_accept(J, 'g')) ++g;
498 else if (jsY_accept(J, 'i')) ++i;
499 else if (jsY_accept(J, 'm')) ++m;
500 else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar);
503 if (g > 1 || i > 1 || m > 1)
504 jsY_error(J, "duplicated flag in regular expression");
506 J->text = js_intern(J, s);
508 if (g) J->number += JS_REGEXP_G;
509 if (i) J->number += JS_REGEXP_I;
510 if (m) J->number += JS_REGEXP_M;
514 /* simple "return [no Line Terminator here] ..." contexts */
515 static int isnlthcontext(int last)
528 static int jsY_lexx(js_State *J)
533 J->lexline = J->line; /* save location of beginning of token */
535 while (jsY_iswhite(J->lexchar))
538 if (jsY_accept(J, '\n')) {
540 if (isnlthcontext(J->lasttoken))
545 if (jsY_accept(J, '/')) {
546 if (jsY_accept(J, '/')) {
549 } else if (jsY_accept(J, '*')) {
551 jsY_error(J, "multi-line comment not terminated");
553 } else if (isregexpcontext(J->lasttoken)) {
555 } else if (jsY_accept(J, '=')) {
562 if (J->lexchar >= '0' && J->lexchar <= '9') {
566 switch (J->lexchar) {
567 case '(': jsY_next(J); return '(';
568 case ')': jsY_next(J); return ')';
569 case ',': jsY_next(J); return ',';
570 case ':': jsY_next(J); return ':';
571 case ';': jsY_next(J); return ';';
572 case '?': jsY_next(J); return '?';
573 case '[': jsY_next(J); return '[';
574 case ']': jsY_next(J); return ']';
575 case '{': jsY_next(J); return '{';
576 case '}': jsY_next(J); return '}';
577 case '~': jsY_next(J); return '~';
588 if (jsY_accept(J, '<')) {
589 if (jsY_accept(J, '='))
593 if (jsY_accept(J, '='))
599 if (jsY_accept(J, '>')) {
600 if (jsY_accept(J, '>')) {
601 if (jsY_accept(J, '='))
605 if (jsY_accept(J, '='))
609 if (jsY_accept(J, '='))
615 if (jsY_accept(J, '=')) {
616 if (jsY_accept(J, '='))
624 if (jsY_accept(J, '=')) {
625 if (jsY_accept(J, '='))
633 if (jsY_accept(J, '+'))
635 if (jsY_accept(J, '='))
641 if (jsY_accept(J, '-'))
643 if (jsY_accept(J, '='))
649 if (jsY_accept(J, '='))
655 if (jsY_accept(J, '='))
661 if (jsY_accept(J, '&'))
663 if (jsY_accept(J, '='))
669 if (jsY_accept(J, '|'))
671 if (jsY_accept(J, '='))
677 if (jsY_accept(J, '='))
685 /* Handle \uXXXX escapes in identifiers */
687 if (jsY_isidentifierstart(J->lexchar)) {
689 textpush(J, J->lexchar);
693 while (jsY_isidentifierpart(J->lexchar)) {
694 textpush(J, J->lexchar);
701 return jsY_findkeyword(J, J->lexbuf.text);
704 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
705 jsY_error(J, "unexpected character: '%c'", J->lexchar);
706 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
710 void jsY_initlex(js_State *J, const char *filename, const char *source)
712 J->filename = filename;
716 jsY_next(J); /* load first lookahead character */
719 int jsY_lex(js_State *J)
721 return J->lasttoken = jsY_lexx(J);
724 int jsY_lexjson(js_State *J)
727 J->lexline = J->line; /* save location of beginning of token */
729 while (jsY_iswhite(J->lexchar) || J->lexchar == '\n')
732 if (J->lexchar >= '0' && J->lexchar <= '9') {
736 switch (J->lexchar) {
737 case ',': jsY_next(J); return ',';
738 case ':': jsY_next(J); return ':';
739 case '[': jsY_next(J); return '[';
740 case ']': jsY_next(J); return ']';
741 case '{': jsY_next(J); return '{';
742 case '}': jsY_next(J); return '}';
751 jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e');
755 jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l');
759 jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e');
766 if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
767 jsY_error(J, "unexpected character: '%c'", J->lexchar);
768 jsY_error(J, "unexpected character: \\u%04X", J->lexchar);