File indexing completed on 2024-05-12 15:43:25
0001 /* 0002 * This file is part of the KDE libraries 0003 * Copyright (C) 1999-2019 Harri Porten (porten@kde.org) 0004 * Copyright (C) 2007 Apple Inc. 0005 * 0006 * This library is free software; you can redistribute it and/or 0007 * modify it under the terms of the GNU Library General Public 0008 * License as published by the Free Software Foundation; either 0009 * version 2 of the License, or (at your option) any later version. 0010 * 0011 * This library is distributed in the hope that it will be useful, 0012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0014 * Library General Public License for more details. 0015 * 0016 * You should have received a copy of the GNU Library General Public License 0017 * along with this library; see the file COPYING.LIB. If not, write to 0018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 0019 * Boston, MA 02110-1301, USA. 0020 * 0021 */ 0022 0023 #ifndef Lexer_h 0024 #define Lexer_h 0025 0026 #include "ustring.h" 0027 #include <wtf/Vector.h> 0028 #include <wtf/Noncopyable.h> 0029 0030 namespace KJS 0031 { 0032 0033 class Identifier; 0034 class RegExp; 0035 0036 class Lexer : Noncopyable 0037 { 0038 public: 0039 void setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len); 0040 int lex(); 0041 0042 int lineNo() const 0043 { 0044 return yylineno; 0045 } 0046 UString sourceURL() const 0047 { 0048 return m_sourceURL; 0049 } 0050 0051 bool prevTerminator() const 0052 { 0053 return terminator; 0054 } 0055 0056 enum State { Start, 0057 IdentifierOrKeyword, 0058 Identifier, 0059 InIdentifierOrKeyword, 0060 InIdentifier, 0061 InIdentifierStartUnicodeEscapeStart, 0062 InIdentifierStartUnicodeEscape, 0063 InIdentifierPartUnicodeEscapeStart, 0064 InIdentifierPartUnicodeEscape, 0065 InSingleLineComment, 0066 InMultiLineComment, 0067 InNum, 0068 InNum0, 0069 InHex, 0070 InOctal, 0071 InLegacyOctal, 0072 InBinary, 0073 InDecimal, 0074 InExponentIndicator, 0075 InExponent, 0076 Hex, 0077 Octal, 0078 Binary, 0079 Number, 0080 String, 0081 Eof, 0082 InString, 0083 InEscapeSequence, 0084 InHexEscape, 0085 InUnicodeEscape, 0086 Other, 0087 Bad 0088 }; 0089 0090 bool scanRegExp(); 0091 const UString &pattern() const 0092 { 0093 return m_pattern; 0094 } 0095 const UString &flags() const 0096 { 0097 return m_flags; 0098 } 0099 0100 static unsigned char convertHex(int); 0101 static unsigned char convertHex(int c1, int c2); 0102 static UChar convertUnicode(int c1, int c2, int c3, int c4); 0103 static bool isIdentStart(int); 0104 static bool isIdentPart(int); 0105 static bool isHexDigit(int); 0106 static bool isBinaryDigit(int); 0107 0108 bool sawError() const 0109 { 0110 return error; 0111 } 0112 0113 void clear(); 0114 0115 static void setIdentStartChecker(bool (*f)(int c)); 0116 static void setIdentPartChecker(bool (*f)(int c)); 0117 0118 private: 0119 friend Lexer &lexer(); 0120 Lexer(); 0121 0122 int yylineno; 0123 UString m_sourceURL; 0124 bool done; 0125 Vector<char> m_buffer8; 0126 Vector<UChar> m_buffer16; 0127 bool terminator; 0128 bool restrKeyword; 0129 // encountered delimiter like "'" and "}" on last run 0130 bool delimited; 0131 bool skipLF; 0132 bool skipCR; 0133 bool eatNextIdentifier; 0134 int stackToken; 0135 int lastToken; 0136 0137 State state; 0138 void setDone(State s); 0139 unsigned int pos; 0140 void shift(unsigned int p); 0141 void nextLine(); 0142 int lookupKeyword(const char *); 0143 0144 bool isWhiteSpace() const; 0145 bool isLineTerminator(); 0146 static bool isOctalDigit(int c); 0147 0148 int matchPunctuator(int c1, int c2, int c3, int c4); 0149 static unsigned short singleEscape(unsigned short c); 0150 static unsigned short convertOctal(int c1, int c2, int c3); 0151 0152 void record8(int c); 0153 void record16(int c); 0154 void record16(UChar c); 0155 0156 KJS::Identifier *makeIdentifier(const Vector<UChar> &buffer); 0157 UString *makeUString(const Vector<UChar> &buffer); 0158 0159 const UChar *code; 0160 unsigned int length; 0161 int yycolumn; 0162 #ifndef KJS_PURE_ECMA 0163 int bol; // begin of line 0164 #endif 0165 bool error; 0166 0167 // current and following unicode characters (int to allow for -1 for end-of-file marker) 0168 int current, next1, next2, next3; 0169 0170 Vector<UString *> m_strings; 0171 Vector<KJS::Identifier *> m_identifiers; 0172 0173 UString m_pattern; 0174 UString m_flags; 0175 }; 0176 0177 Lexer &lexer(); // Returns the singletone JavaScript lexer. 0178 0179 } // namespace KJS 0180 0181 #endif // Lexer_h