File indexing completed on 2024-05-12 15:43:25
0001 /* 0002 * This file is part of the KDE libraries 0003 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 0004 * Copyright (C) 2006 Apple Computer, Inc. 0005 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) 0006 * 0007 * This library is free software; you can redistribute it and/or 0008 * modify it under the terms of the GNU Library General Public 0009 * License as published by the Free Software Foundation; either 0010 * version 2 of the License, or (at your option) any later version. 0011 * 0012 * This library is distributed in the hope that it will be useful, 0013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0015 * Library General Public License for more details. 0016 * 0017 * You should have received a copy of the GNU Library General Public License 0018 * along with this library; see the file COPYING.LIB. If not, write to 0019 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0020 * Boston, MA 02110-1301, USA. 0021 * 0022 */ 0023 0024 #include "lexer.h" 0025 #include <string.h> 0026 #include <limits.h> 0027 0028 #include "dtoa.h" 0029 #include "function.h" 0030 #include "interpreter.h" 0031 #include "nodes.h" 0032 #include "commonunicode.h" 0033 #include "wtf/ASCIICType.h" 0034 #include "wtf/DisallowCType.h" 0035 #include <wtf/unicode/libc/UnicodeLibC.h> 0036 0037 using namespace WTF; 0038 using namespace Unicode; 0039 0040 // GCC cstring uses these automatically, but not all implementations do. 0041 using std::strlen; 0042 using std::strcpy; 0043 using std::strncpy; 0044 using std::memset; 0045 using std::memcpy; 0046 0047 // we can't specify the namespace in yacc's C output, so do it here 0048 using namespace KJS; 0049 0050 #include "grammar.h" 0051 0052 #include "lookup.h" 0053 #include "lexer.lut.h" 0054 0055 extern YYLTYPE kjsyylloc; // global bison variable holding token info 0056 0057 // a bridge for yacc from the C world to C++ 0058 int kjsyylex() 0059 { 0060 return lexer().lex(); 0061 } 0062 0063 namespace KJS 0064 { 0065 0066 static bool isDecimalDigit(int c); 0067 0068 static const size_t initialReadBufferCapacity = 32; 0069 static const size_t initialStringTableCapacity = 64; 0070 0071 Lexer &lexer() 0072 { 0073 // ASSERT(JSLock::currentThreadIsHoldingLock()); 0074 0075 // FIXME: We'd like to avoid calling new here, but we don't currently 0076 // support tearing down the Lexer at app quit time, since that would involve 0077 // tearing down its UString data members without holding the JSLock. 0078 static Lexer *staticLexer = new Lexer; 0079 return *staticLexer; 0080 } 0081 0082 Lexer::Lexer() 0083 : yylineno(0) 0084 , restrKeyword(false) 0085 , eatNextIdentifier(false) 0086 , stackToken(-1) 0087 , lastToken(-1) 0088 , pos(0) 0089 , code(nullptr) 0090 , length(0) 0091 #ifndef KJS_PURE_ECMA 0092 , bol(true) 0093 #endif 0094 , current(0) 0095 , next1(0) 0096 , next2(0) 0097 , next3(0) 0098 { 0099 m_buffer8.reserveCapacity(initialReadBufferCapacity); 0100 m_buffer16.reserveCapacity(initialReadBufferCapacity); 0101 m_strings.reserveCapacity(initialStringTableCapacity); 0102 m_identifiers.reserveCapacity(initialStringTableCapacity); 0103 } 0104 0105 void Lexer::setCode(const UString &sourceURL, int startingLineNumber, const KJS::UChar *c, unsigned int len) 0106 { 0107 yylineno = startingLineNumber; 0108 m_sourceURL = sourceURL; 0109 restrKeyword = false; 0110 delimited = false; 0111 eatNextIdentifier = false; 0112 stackToken = -1; 0113 lastToken = -1; 0114 pos = 0; 0115 code = c; 0116 length = len; 0117 skipLF = false; 0118 skipCR = false; 0119 error = false; 0120 #ifndef KJS_PURE_ECMA 0121 bol = true; 0122 #endif 0123 0124 // read first characters 0125 current = (length > 0) ? code[0].uc : -1; 0126 next1 = (length > 1) ? code[1].uc : -1; 0127 next2 = (length > 2) ? code[2].uc : -1; 0128 next3 = (length > 3) ? code[3].uc : -1; 0129 } 0130 0131 void Lexer::shift(unsigned int p) 0132 { 0133 // Here would be a good place to strip Cf characters, but that has caused compatibility problems: 0134 // <http://bugs.webkit.org/show_bug.cgi?id=10183>. 0135 while (p--) { 0136 current = next1; 0137 next1 = next2; 0138 next2 = next3; 0139 pos++; 0140 next3 = (pos + 3 < length) ? code[pos + 3].uc : -1; 0141 } 0142 } 0143 0144 // called on each new line 0145 void Lexer::nextLine() 0146 { 0147 yylineno++; 0148 #ifndef KJS_PURE_ECMA 0149 bol = true; 0150 #endif 0151 } 0152 0153 void Lexer::setDone(State s) 0154 { 0155 state = s; 0156 done = true; 0157 } 0158 0159 int Lexer::lex() 0160 { 0161 int token = 0; 0162 state = Start; 0163 unsigned short stringType = 0; // either single or double quotes 0164 m_buffer8.clear(); 0165 m_buffer16.clear(); 0166 done = false; 0167 terminator = false; 0168 skipLF = false; 0169 skipCR = false; 0170 0171 // did we push a token on the stack previously ? 0172 // (after an automatic semicolon insertion) 0173 if (stackToken >= 0) { 0174 setDone(Other); 0175 token = stackToken; 0176 stackToken = 0; 0177 } 0178 0179 while (!done) { 0180 if (skipLF && current != '\n') { // found \r but not \n afterwards 0181 skipLF = false; 0182 } 0183 if (skipCR && current != '\r') { // found \n but not \r afterwards 0184 skipCR = false; 0185 } 0186 if (skipLF || skipCR) { // found \r\n or \n\r -> eat the second one 0187 skipLF = false; 0188 skipCR = false; 0189 shift(1); 0190 } 0191 switch (state) { 0192 case Start: 0193 if (isWhiteSpace()) { 0194 // do nothing 0195 } else if (current == '/' && next1 == '/') { 0196 shift(1); 0197 state = InSingleLineComment; 0198 } else if (current == '/' && next1 == '*') { 0199 shift(1); 0200 state = InMultiLineComment; 0201 } else if (current == -1) { 0202 if (!terminator && !delimited) { 0203 // automatic semicolon insertion if program incomplete 0204 token = ';'; 0205 stackToken = 0; 0206 setDone(Other); 0207 } else { 0208 setDone(Eof); 0209 } 0210 } else if (isLineTerminator()) { 0211 nextLine(); 0212 terminator = true; 0213 if (restrKeyword) { 0214 token = ';'; 0215 setDone(Other); 0216 } 0217 } else if (current == '"' || current == '\'') { 0218 state = InString; 0219 stringType = static_cast<unsigned short>(current); 0220 } else if (isIdentStart(current)) { 0221 record16(current); 0222 state = InIdentifierOrKeyword; 0223 } else if (current == '\\') { 0224 state = InIdentifierStartUnicodeEscapeStart; 0225 } else if (current == '0') { 0226 record8(current); 0227 state = InNum0; 0228 } else if (isDecimalDigit(current)) { 0229 record8(current); 0230 state = InNum; 0231 } else if (current == '.' && isDecimalDigit(next1)) { 0232 record8(current); 0233 state = InDecimal; 0234 #ifndef KJS_PURE_ECMA 0235 // <!-- marks the beginning of a line comment (for www usage) 0236 } else if (current == '<' && next1 == '!' && 0237 next2 == '-' && next3 == '-') { 0238 shift(3); 0239 state = InSingleLineComment; 0240 // same for --> 0241 } else if (bol && current == '-' && next1 == '-' && next2 == '>') { 0242 shift(2); 0243 state = InSingleLineComment; 0244 #endif 0245 } else { 0246 token = matchPunctuator(current, next1, next2, next3); 0247 if (token != -1) { 0248 setDone(Other); 0249 } else { 0250 // cerr << "encountered unknown character" << endl; 0251 setDone(Bad); 0252 } 0253 } 0254 break; 0255 case InString: 0256 switch (current) { 0257 case '\'': 0258 case '"': 0259 if (current == stringType) { 0260 shift(1); 0261 setDone(String); 0262 } else { 0263 record16(current); 0264 } 0265 break; 0266 case '\\': 0267 state = InEscapeSequence; 0268 break; 0269 case '\n': 0270 case '\r': 0271 case 0x2028: 0272 case 0x2029: 0273 case -1: 0274 // encountered newline or eof 0275 setDone(Bad); 0276 break; 0277 default: 0278 record16(current); 0279 break; 0280 } 0281 break; 0282 // Escape Sequences inside of strings 0283 case InEscapeSequence: 0284 if (isOctalDigit(current)) { 0285 if (current >= '0' && current <= '3' && 0286 isOctalDigit(next1) && isOctalDigit(next2)) { 0287 record16(convertOctal(current, next1, next2)); 0288 shift(2); 0289 state = InString; 0290 } else if (isOctalDigit(current) && isOctalDigit(next1)) { 0291 record16(convertOctal('0', current, next1)); 0292 shift(1); 0293 state = InString; 0294 } else if (isOctalDigit(current)) { 0295 record16(convertOctal('0', '0', current)); 0296 state = InString; 0297 } else { 0298 setDone(Bad); 0299 } 0300 } else if (current == 'x') { 0301 state = InHexEscape; 0302 } else if (current == 'u') { 0303 state = InUnicodeEscape; 0304 } else if (isLineTerminator()) { 0305 nextLine(); 0306 state = InString; 0307 } else { 0308 record16(singleEscape(static_cast<unsigned short>(current))); 0309 state = InString; 0310 } 0311 break; 0312 case InHexEscape: 0313 if (isHexDigit(current) && isHexDigit(next1)) { 0314 state = InString; 0315 record16(convertHex(current, next1)); 0316 shift(1); 0317 } else { 0318 setDone(Bad); 0319 } 0320 break; 0321 case InUnicodeEscape: 0322 if (isHexDigit(current) && isHexDigit(next1) && isHexDigit(next2) && isHexDigit(next3)) { 0323 record16(convertUnicode(current, next1, next2, next3)); 0324 shift(3); 0325 state = InString; 0326 } else if (current == stringType) { 0327 record16('u'); 0328 shift(1); 0329 setDone(String); 0330 } else { 0331 setDone(Bad); 0332 } 0333 break; 0334 case InSingleLineComment: 0335 if (isLineTerminator()) { 0336 nextLine(); 0337 terminator = true; 0338 if (restrKeyword) { 0339 token = ';'; 0340 setDone(Other); 0341 } else { 0342 state = Start; 0343 } 0344 } else if (current == -1) { 0345 setDone(Eof); 0346 } 0347 break; 0348 case InMultiLineComment: 0349 if (current == -1) { 0350 setDone(Bad); 0351 } else if (isLineTerminator()) { 0352 nextLine(); 0353 } else if (current == '*' && next1 == '/') { 0354 state = Start; 0355 shift(1); 0356 } 0357 break; 0358 case InIdentifierOrKeyword: 0359 case InIdentifier: 0360 if (isIdentPart(current)) { 0361 record16(current); 0362 } else if (current == '\\') { 0363 state = InIdentifierPartUnicodeEscapeStart; 0364 } else { 0365 setDone(state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier); 0366 } 0367 break; 0368 case InNum0: 0369 if (current == 'x' || current == 'X') { 0370 m_buffer8.clear(); 0371 state = InHex; 0372 } else if (current == 'b' || current == 'B') { 0373 m_buffer8.clear(); 0374 state = InBinary; 0375 } else if (current == 'o' || current == 'O') { 0376 m_buffer8.clear(); 0377 state = InOctal; 0378 } else if (current == '.') { 0379 record8(current); 0380 state = InDecimal; 0381 } else if (current == 'e' || current == 'E') { 0382 record8(current); 0383 state = InExponentIndicator; 0384 } else if (isOctalDigit(current)) { 0385 record8(current); 0386 state = InLegacyOctal; 0387 } else if (isDecimalDigit(current)) { 0388 record8(current); 0389 state = InDecimal; 0390 } else { 0391 setDone(Number); 0392 } 0393 break; 0394 case InHex: 0395 if (isHexDigit(current)) { 0396 record8(current); 0397 } else { 0398 setDone(Hex); 0399 } 0400 break; 0401 case InOctal: 0402 if (isOctalDigit(current)) { 0403 record8(current); 0404 } else if (isDecimalDigit(current)) { 0405 setDone(Bad); 0406 } else { 0407 setDone(Octal); 0408 } 0409 break; 0410 case InLegacyOctal: 0411 if (isOctalDigit(current)) { 0412 record8(current); 0413 } else if (isDecimalDigit(current)) { 0414 record8(current); 0415 state = InDecimal; 0416 } else { 0417 setDone(Octal); 0418 } 0419 break; 0420 case InBinary: 0421 if (isBinaryDigit(current)) { 0422 record8(current); 0423 } else if (isDecimalDigit(current)) { 0424 setDone(Bad); 0425 } else { 0426 setDone(Binary); 0427 } 0428 break; 0429 case InNum: 0430 if (isDecimalDigit(current)) { 0431 record8(current); 0432 } else if (current == '.') { 0433 record8(current); 0434 state = InDecimal; 0435 } else if (current == 'e' || current == 'E') { 0436 record8(current); 0437 state = InExponentIndicator; 0438 } else { 0439 setDone(Number); 0440 } 0441 break; 0442 case InDecimal: 0443 if (isDecimalDigit(current)) { 0444 record8(current); 0445 } else if (current == 'e' || current == 'E') { 0446 record8(current); 0447 state = InExponentIndicator; 0448 } else { 0449 setDone(Number); 0450 } 0451 break; 0452 case InExponentIndicator: 0453 if (current == '+' || current == '-') { 0454 record8(current); 0455 } else if (isDecimalDigit(current)) { 0456 record8(current); 0457 state = InExponent; 0458 } else { 0459 setDone(Bad); 0460 } 0461 break; 0462 case InExponent: 0463 if (isDecimalDigit(current)) { 0464 record8(current); 0465 } else { 0466 setDone(Number); 0467 } 0468 break; 0469 case InIdentifierStartUnicodeEscapeStart: 0470 if (current == 'u') { 0471 state = InIdentifierStartUnicodeEscape; 0472 } else { 0473 setDone(Bad); 0474 } 0475 break; 0476 case InIdentifierPartUnicodeEscapeStart: 0477 if (current == 'u') { 0478 state = InIdentifierPartUnicodeEscape; 0479 } else { 0480 setDone(Bad); 0481 } 0482 break; 0483 case InIdentifierStartUnicodeEscape: 0484 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { 0485 setDone(Bad); 0486 break; 0487 } 0488 token = convertUnicode(current, next1, next2, next3).uc; 0489 shift(3); 0490 if (!isIdentStart(token)) { 0491 setDone(Bad); 0492 break; 0493 } 0494 record16(token); 0495 state = InIdentifier; 0496 break; 0497 case InIdentifierPartUnicodeEscape: 0498 if (!isHexDigit(current) || !isHexDigit(next1) || !isHexDigit(next2) || !isHexDigit(next3)) { 0499 setDone(Bad); 0500 break; 0501 } 0502 token = convertUnicode(current, next1, next2, next3).uc; 0503 shift(3); 0504 if (!isIdentPart(token)) { 0505 setDone(Bad); 0506 break; 0507 } 0508 record16(token); 0509 state = InIdentifier; 0510 break; 0511 default: 0512 assert(!"Unhandled state in switch statement"); 0513 } 0514 0515 // move on to the next character 0516 if (!done) { 0517 shift(1); 0518 } 0519 #ifndef KJS_PURE_ECMA 0520 if (state != Start && state != InMultiLineComment) { 0521 bol = false; 0522 } 0523 #endif 0524 } 0525 0526 // no identifiers allowed directly after numeric literal, e.g. "3in" is bad 0527 if ((state == Number || state == Octal || state == Hex || state == Binary) && 0528 isIdentStart(current)) { 0529 state = Bad; 0530 } 0531 0532 // terminate string 0533 m_buffer8.append('\0'); 0534 0535 #ifdef KJS_DEBUG_LEX 0536 fprintf(stderr, "line: %d ", lineNo()); 0537 fprintf(stderr, "yytext (%x): ", m_buffer8[0]); 0538 fprintf(stderr, "%s ", m_buffer8.data()); 0539 #endif 0540 0541 double dval = 0; 0542 if (state == Number) { 0543 dval = kjs_strtod(m_buffer8.data(), nullptr); 0544 } else if (state == Hex) { // scan hex numbers 0545 // buffer contains "...\0" found after 0x 0546 if (m_buffer8.size() > 1) { 0547 const char *p = m_buffer8.data(); 0548 while (char c = *p++) { 0549 dval *= 16; 0550 dval += convertHex(c); 0551 } 0552 if (dval >= mantissaOverflowLowerBound) { 0553 dval = parseIntOverflow(m_buffer8.data(), m_buffer8.size() - 1, 16); 0554 } 0555 state = Number; 0556 } else { 0557 // no digits seen after 0x 0558 state = Bad; 0559 } 0560 } else if (state == Octal) { // scan octal number 0561 // buffer contains "...\0" found after 0o 0562 if (m_buffer8.size() > 1) { 0563 const char *p = m_buffer8.data(); 0564 while (char c = *p++) { 0565 dval *= 8; 0566 dval += c - '0'; 0567 } 0568 if (dval >= mantissaOverflowLowerBound) { 0569 dval = parseIntOverflow(m_buffer8.data(), m_buffer8.size() - 1, 8); 0570 } 0571 state = Number; 0572 } else { 0573 // no octal digits after 0o 0574 state = Bad; 0575 } 0576 } else if (state == Binary) { // scan binary numbers 0577 // buffer contains the binary digits after "0b". E.g. "1010\0" 0578 if (m_buffer8.size () > 1) { 0579 const char *p = m_buffer8.data(); 0580 while (char c = *p++) { 0581 dval *= 2; 0582 dval += convertHex(c); 0583 } 0584 if (dval >= mantissaOverflowLowerBound) { 0585 dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 2); 0586 } 0587 state = Number; 0588 } else { 0589 state = Bad; 0590 } 0591 } 0592 0593 #ifdef KJS_DEBUG_LEX 0594 switch (state) { 0595 case Eof: 0596 printf("(EOF)\n"); 0597 break; 0598 case Other: 0599 printf("(Other)\n"); 0600 break; 0601 case Identifier: 0602 printf("(Identifier)/(Keyword)\n"); 0603 break; 0604 case String: 0605 printf("(String)\n"); 0606 break; 0607 case Number: 0608 printf("(Number)\n"); 0609 break; 0610 default: 0611 printf("(unknown)"); 0612 } 0613 #endif 0614 0615 if (state != Identifier && eatNextIdentifier) { 0616 eatNextIdentifier = false; 0617 } 0618 0619 restrKeyword = false; 0620 delimited = false; 0621 kjsyylloc.first_line = yylineno; // ??? 0622 kjsyylloc.last_line = yylineno; 0623 0624 switch (state) { 0625 case Eof: 0626 token = 0; 0627 break; 0628 case Other: 0629 if (token == '}' || token == ';') { 0630 delimited = true; 0631 } 0632 break; 0633 case IdentifierOrKeyword: 0634 if ((token = Lookup::find(&mainTable, m_buffer16.data(), m_buffer16.size())) < 0) { 0635 case Identifier: 0636 // Lookup for keyword failed, means this is an identifier 0637 // Apply anonymous-function hack below (eat the identifier) 0638 if (eatNextIdentifier) { 0639 eatNextIdentifier = false; 0640 token = lex(); 0641 break; 0642 } 0643 kjsyylval.ident = makeIdentifier(m_buffer16); 0644 token = IDENT; 0645 break; 0646 } 0647 0648 eatNextIdentifier = false; 0649 // Hack for "f = function somename() { ... }", too hard to get into the grammar 0650 if (token == FUNCTION && lastToken == '=') { 0651 eatNextIdentifier = true; 0652 } 0653 0654 if (token == CONTINUE || token == BREAK || 0655 token == RETURN || token == THROW) { 0656 restrKeyword = true; 0657 } 0658 break; 0659 case String: 0660 kjsyylval.ustr = makeUString(m_buffer16); 0661 token = STRING; 0662 break; 0663 case Number: 0664 kjsyylval.dval = dval; 0665 token = NUMBER; 0666 break; 0667 case Bad: 0668 #ifdef KJS_DEBUG_LEX 0669 fprintf(stderr, "KJS: yylex: ERROR.\n"); 0670 #endif 0671 error = true; 0672 return -1; 0673 default: 0674 assert(!"unhandled numeration value in switch"); 0675 error = true; 0676 return -1; 0677 } 0678 lastToken = token; 0679 return token; 0680 } 0681 0682 bool Lexer::isWhiteSpace() const 0683 { 0684 return CommonUnicode::isWhiteSpace(current); 0685 } 0686 0687 bool Lexer::isLineTerminator() 0688 { 0689 bool cr = (current == '\r'); 0690 bool lf = (current == '\n'); 0691 if (cr) { 0692 skipLF = true; 0693 } else if (lf) { 0694 skipCR = true; 0695 } 0696 return cr || lf || current == 0x2028 || current == 0x2029; 0697 } 0698 0699 typedef bool (CharacterCheck)(int c); 0700 0701 static bool isIdentStartLibC(int c) 0702 { 0703 return (category(c) & (Letter_Uppercase | Letter_Lowercase | 0704 Letter_Titlecase | Letter_Modifier | Letter_Other)) 0705 || c == '$' || c == '_'; 0706 } 0707 0708 static bool isIdentPartLibC(int c) 0709 { 0710 return (category(c) & (Letter_Uppercase | Letter_Lowercase | 0711 Letter_Titlecase | Letter_Modifier | Letter_Other | 0712 Mark_NonSpacing | Mark_SpacingCombining | 0713 Number_DecimalDigit | Punctuation_Connector)) 0714 || c == '$' || c == '_'; 0715 } 0716 0717 static CharacterCheck *identStart = ::isIdentStartLibC; 0718 static CharacterCheck *identPart = ::isIdentPartLibC; 0719 0720 void Lexer::setIdentStartChecker(bool (*f)(int c)) 0721 { 0722 identStart = f; 0723 } 0724 0725 void Lexer::setIdentPartChecker(bool (*f)(int c)) 0726 { 0727 identPart = f; 0728 } 0729 0730 bool Lexer::isIdentStart(int c) 0731 { 0732 return (*identStart)(c); 0733 } 0734 0735 bool Lexer::isIdentPart(int c) 0736 { 0737 return (*identPart)(c); 0738 } 0739 0740 static bool isDecimalDigit(int c) 0741 { 0742 return (c >= '0' && c <= '9'); 0743 } 0744 0745 bool Lexer::isHexDigit(int c) 0746 { 0747 return ((c >= '0' && c <= '9') || 0748 (c >= 'a' && c <= 'f') || 0749 (c >= 'A' && c <= 'F')); 0750 } 0751 0752 bool Lexer::isBinaryDigit(int c) 0753 { 0754 return c == '0' || c == '1'; 0755 } 0756 0757 bool Lexer::isOctalDigit(int c) 0758 { 0759 return (c >= '0' && c <= '7'); 0760 } 0761 0762 int Lexer::matchPunctuator(int c1, int c2, int c3, int c4) 0763 { 0764 if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') { 0765 shift(4); 0766 return URSHIFTEQUAL; 0767 } else if (c1 == '=' && c2 == '=' && c3 == '=') { 0768 shift(3); 0769 return STREQ; 0770 } else if (c1 == '!' && c2 == '=' && c3 == '=') { 0771 shift(3); 0772 return STRNEQ; 0773 } else if (c1 == '>' && c2 == '>' && c3 == '>') { 0774 shift(3); 0775 return URSHIFT; 0776 } else if (c1 == '<' && c2 == '<' && c3 == '=') { 0777 shift(3); 0778 return LSHIFTEQUAL; 0779 } else if (c1 == '>' && c2 == '>' && c3 == '=') { 0780 shift(3); 0781 return RSHIFTEQUAL; 0782 } else if (c1 == '<' && c2 == '=') { 0783 shift(2); 0784 return LE; 0785 } else if (c1 == '>' && c2 == '=') { 0786 shift(2); 0787 return GE; 0788 } else if (c1 == '!' && c2 == '=') { 0789 shift(2); 0790 return NE; 0791 } else if (c1 == '+' && c2 == '+') { 0792 shift(2); 0793 if (terminator) { 0794 return AUTOPLUSPLUS; 0795 } else { 0796 return PLUSPLUS; 0797 } 0798 } else if (c1 == '-' && c2 == '-') { 0799 shift(2); 0800 if (terminator) { 0801 return AUTOMINUSMINUS; 0802 } else { 0803 return MINUSMINUS; 0804 } 0805 } else if (c1 == '=' && c2 == '=') { 0806 shift(2); 0807 return EQEQ; 0808 } else if (c1 == '+' && c2 == '=') { 0809 shift(2); 0810 return PLUSEQUAL; 0811 } else if (c1 == '-' && c2 == '=') { 0812 shift(2); 0813 return MINUSEQUAL; 0814 } else if (c1 == '*' && c2 == '=') { 0815 shift(2); 0816 return MULTEQUAL; 0817 } else if (c1 == '/' && c2 == '=') { 0818 shift(2); 0819 return DIVEQUAL; 0820 } else if (c1 == '*' && c2 == '*' && c3 == '=') { 0821 shift(3); 0822 return EXPEQUAL; 0823 } else if (c1 == '&' && c2 == '=') { 0824 shift(2); 0825 return ANDEQUAL; 0826 } else if (c1 == '^' && c2 == '=') { 0827 shift(2); 0828 return XOREQUAL; 0829 } else if (c1 == '%' && c2 == '=') { 0830 shift(2); 0831 return MODEQUAL; 0832 } else if (c1 == '|' && c2 == '=') { 0833 shift(2); 0834 return OREQUAL; 0835 } else if (c1 == '<' && c2 == '<') { 0836 shift(2); 0837 return LSHIFT; 0838 } else if (c1 == '>' && c2 == '>') { 0839 shift(2); 0840 return RSHIFT; 0841 } else if (c1 == '&' && c2 == '&') { 0842 shift(2); 0843 return AND; 0844 } else if (c1 == '|' && c2 == '|') { 0845 shift(2); 0846 return OR; 0847 } else if (c1 == '*' && c2 == '*') { 0848 shift(2); 0849 return T_EXP; 0850 } 0851 0852 switch (c1) { 0853 case '=': 0854 case '>': 0855 case '<': 0856 case ',': 0857 case '!': 0858 case '~': 0859 case '?': 0860 case ':': 0861 case '.': 0862 case '+': 0863 case '-': 0864 case '*': 0865 case '/': 0866 case '&': 0867 case '|': 0868 case '^': 0869 case '%': 0870 case '(': 0871 case ')': 0872 case '{': 0873 case '}': 0874 case '[': 0875 case ']': 0876 case ';': 0877 shift(1); 0878 return static_cast<int>(c1); 0879 default: 0880 return -1; 0881 } 0882 } 0883 0884 unsigned short Lexer::singleEscape(unsigned short c) 0885 { 0886 switch (c) { 0887 case 'b': 0888 return 0x08; 0889 case 't': 0890 return 0x09; 0891 case 'n': 0892 return 0x0A; 0893 case 'v': 0894 return 0x0B; 0895 case 'f': 0896 return 0x0C; 0897 case 'r': 0898 return 0x0D; 0899 case '"': 0900 return 0x22; 0901 case '\'': 0902 return 0x27; 0903 case '\\': 0904 return 0x5C; 0905 default: 0906 return c; 0907 } 0908 } 0909 0910 unsigned short Lexer::convertOctal(int c1, int c2, int c3) 0911 { 0912 return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0'); 0913 } 0914 0915 unsigned char Lexer::convertHex(int c) 0916 { 0917 if (c >= '0' && c <= '9') { 0918 return static_cast<unsigned char>(c - '0'); 0919 } 0920 if (c >= 'a' && c <= 'f') { 0921 return static_cast<unsigned char>(c - 'a' + 10); 0922 } 0923 return static_cast<unsigned char>(c - 'A' + 10); 0924 } 0925 0926 unsigned char Lexer::convertHex(int c1, int c2) 0927 { 0928 return ((convertHex(c1) << 4) + convertHex(c2)); 0929 } 0930 0931 KJS::UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) 0932 { 0933 return KJS::UChar((convertHex(c1) << 4) + convertHex(c2), 0934 (convertHex(c3) << 4) + convertHex(c4)); 0935 } 0936 0937 void Lexer::record8(int c) 0938 { 0939 ASSERT(c >= 0); 0940 ASSERT(c <= 0xff); 0941 m_buffer8.append(c); 0942 } 0943 0944 void Lexer::record16(int c) 0945 { 0946 ASSERT(c >= 0); 0947 ASSERT(c <= USHRT_MAX); 0948 record16(UChar(static_cast<unsigned short>(c))); 0949 } 0950 0951 void Lexer::record16(KJS::UChar c) 0952 { 0953 m_buffer16.append(c); 0954 } 0955 0956 bool Lexer::scanRegExp() 0957 { 0958 m_buffer16.clear(); 0959 bool lastWasEscape = false; 0960 bool inBrackets = false; 0961 0962 while (1) { 0963 if (isLineTerminator() || current == -1) { 0964 return false; 0965 } else if (current != '/' || lastWasEscape == true || inBrackets == true) { 0966 // keep track of '[' and ']' 0967 if (!lastWasEscape) { 0968 if (current == '[' && !inBrackets) { 0969 inBrackets = true; 0970 } 0971 if (current == ']' && inBrackets) { 0972 inBrackets = false; 0973 } 0974 } 0975 record16(current); 0976 lastWasEscape = 0977 !lastWasEscape && (current == '\\'); 0978 } else { // end of regexp 0979 m_pattern = UString(m_buffer16); 0980 m_buffer16.clear(); 0981 shift(1); 0982 break; 0983 } 0984 shift(1); 0985 } 0986 0987 while (isIdentPart(current)) { 0988 record16(current); 0989 shift(1); 0990 } 0991 m_flags = UString(m_buffer16); 0992 0993 return true; 0994 } 0995 0996 void Lexer::clear() 0997 { 0998 deleteAllValues(m_strings); 0999 Vector<UString *> newStrings; 1000 newStrings.reserveCapacity(initialStringTableCapacity); 1001 m_strings.swap(newStrings); 1002 deleteAllValues(m_identifiers); 1003 Vector<KJS::Identifier *> newIdentifiers; 1004 newIdentifiers.reserveCapacity(initialStringTableCapacity); 1005 m_identifiers.swap(newIdentifiers); 1006 1007 Vector<char> newBuffer8; 1008 newBuffer8.reserveCapacity(initialReadBufferCapacity); 1009 m_buffer8.swap(newBuffer8); 1010 1011 Vector<UChar> newBuffer16; 1012 newBuffer16.reserveCapacity(initialReadBufferCapacity); 1013 m_buffer16.swap(newBuffer16); 1014 1015 m_pattern = nullptr; 1016 m_flags = nullptr; 1017 m_sourceURL = nullptr; 1018 } 1019 1020 Identifier *Lexer::makeIdentifier(const Vector<KJS::UChar> &buffer) 1021 { 1022 KJS::Identifier *identifier = new KJS::Identifier(buffer.data(), buffer.size()); 1023 m_identifiers.append(identifier); 1024 return identifier; 1025 } 1026 1027 UString *Lexer::makeUString(const Vector<KJS::UChar> &buffer) 1028 { 1029 UString *string = new UString(buffer); 1030 m_strings.append(string); 1031 return string; 1032 } 1033 1034 } // namespace KJS