File indexing completed on 2024-05-12 15:43:25

0001 /*
0002  *  This file is part of the KDE libraries
0003  *  Copyright (C) 1999-2019 Harri Porten (porten@kde.org)
0004  *  Copyright (C) 2007 Apple Inc.
0005  *
0006  *  This library is free software; you can redistribute it and/or
0007  *  modify it under the terms of the GNU Library General Public
0008  *  License as published by the Free Software Foundation; either
0009  *  version 2 of the License, or (at your option) any later version.
0010  *
0011  *  This library is distributed in the hope that it will be useful,
0012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
0013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0014  *  Library General Public License for more details.
0015  *
0016  *  You should have received a copy of the GNU Library General Public License
0017  *  along with this library; see the file COPYING.LIB.  If not, write to
0018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
0019  *  Boston, MA 02110-1301, USA.
0020  *
0021  */
0022 
0023 #ifndef Lexer_h
0024 #define Lexer_h
0025 
0026 #include "ustring.h"
0027 #include <wtf/Vector.h>
0028 #include <wtf/Noncopyable.h>
0029 
0030 namespace KJS
0031 {
0032 
0033 class Identifier;
0034 class RegExp;
0035 
0036 class Lexer : Noncopyable
0037 {
0038 public:
0039     void setCode(const UString &sourceURL, int startingLineNumber, const UChar *c, unsigned int len);
0040     int lex();
0041 
0042     int lineNo() const
0043     {
0044         return yylineno;
0045     }
0046     UString sourceURL() const
0047     {
0048         return m_sourceURL;
0049     }
0050 
0051     bool prevTerminator() const
0052     {
0053         return terminator;
0054     }
0055 
0056     enum State { Start,
0057                  IdentifierOrKeyword,
0058                  Identifier,
0059                  InIdentifierOrKeyword,
0060                  InIdentifier,
0061                  InIdentifierStartUnicodeEscapeStart,
0062                  InIdentifierStartUnicodeEscape,
0063                  InIdentifierPartUnicodeEscapeStart,
0064                  InIdentifierPartUnicodeEscape,
0065                  InSingleLineComment,
0066                  InMultiLineComment,
0067                  InNum,
0068                  InNum0,
0069                  InHex,
0070                  InOctal,
0071                  InLegacyOctal,
0072                  InBinary,
0073                  InDecimal,
0074                  InExponentIndicator,
0075                  InExponent,
0076                  Hex,
0077                  Octal,
0078                  Binary,
0079                  Number,
0080                  String,
0081                  Eof,
0082                  InString,
0083                  InEscapeSequence,
0084                  InHexEscape,
0085                  InUnicodeEscape,
0086                  Other,
0087                  Bad
0088                };
0089 
0090     bool scanRegExp();
0091     const UString &pattern() const
0092     {
0093         return m_pattern;
0094     }
0095     const UString &flags() const
0096     {
0097         return m_flags;
0098     }
0099 
0100     static unsigned char convertHex(int);
0101     static unsigned char convertHex(int c1, int c2);
0102     static UChar convertUnicode(int c1, int c2, int c3, int c4);
0103     static bool isIdentStart(int);
0104     static bool isIdentPart(int);
0105     static bool isHexDigit(int);
0106     static bool isBinaryDigit(int);
0107 
0108     bool sawError() const
0109     {
0110         return error;
0111     }
0112 
0113     void clear();
0114 
0115     static void setIdentStartChecker(bool (*f)(int c));
0116     static void setIdentPartChecker(bool (*f)(int c));
0117 
0118 private:
0119     friend Lexer &lexer();
0120     Lexer();
0121 
0122     int yylineno;
0123     UString m_sourceURL;
0124     bool done;
0125     Vector<char> m_buffer8;
0126     Vector<UChar> m_buffer16;
0127     bool terminator;
0128     bool restrKeyword;
0129     // encountered delimiter like "'" and "}" on last run
0130     bool delimited;
0131     bool skipLF;
0132     bool skipCR;
0133     bool eatNextIdentifier;
0134     int stackToken;
0135     int lastToken;
0136 
0137     State state;
0138     void setDone(State s);
0139     unsigned int pos;
0140     void shift(unsigned int p);
0141     void nextLine();
0142     int lookupKeyword(const char *);
0143 
0144     bool isWhiteSpace() const;
0145     bool isLineTerminator();
0146     static bool isOctalDigit(int c);
0147 
0148     int matchPunctuator(int c1, int c2, int c3, int c4);
0149     static unsigned short singleEscape(unsigned short c);
0150     static unsigned short convertOctal(int c1, int c2, int c3);
0151 
0152     void record8(int c);
0153     void record16(int c);
0154     void record16(UChar c);
0155 
0156     KJS::Identifier *makeIdentifier(const Vector<UChar> &buffer);
0157     UString *makeUString(const Vector<UChar> &buffer);
0158 
0159     const UChar *code;
0160     unsigned int length;
0161     int yycolumn;
0162 #ifndef KJS_PURE_ECMA
0163     int bol;     // begin of line
0164 #endif
0165     bool error;
0166 
0167     // current and following unicode characters (int to allow for -1 for end-of-file marker)
0168     int current, next1, next2, next3;
0169 
0170     Vector<UString *> m_strings;
0171     Vector<KJS::Identifier *> m_identifiers;
0172 
0173     UString m_pattern;
0174     UString m_flags;
0175 };
0176 
0177 Lexer &lexer(); // Returns the singletone JavaScript lexer.
0178 
0179 } // namespace KJS
0180 
0181 #endif // Lexer_h