File indexing completed on 2024-05-19 15:46:17
0001 /* 0002 SPDX-FileCopyrightText: 2007 Andreas Pakulat <apaku@gmx.de> 0003 0004 SPDX-License-Identifier: GPL-2.0-or-later 0005 */ 0006 0007 #include "qmakelexer.h" 0008 0009 #include <QString> 0010 #include "qmakeparser.h" 0011 #include <kdev-pg-location-table.h> 0012 #include <kdev-pg-token-stream.h> 0013 0014 namespace QMake { 0015 0016 bool isIdentifierCharacter(QChar* c, bool canLookAhead) 0017 { 0018 return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '-' 0019 || c->unicode() == '$' || c->unicode() == '*' 0020 || (canLookAhead && (c + 1)->unicode() != '=' && (c->unicode() == '+'))); 0021 } 0022 0023 bool isBeginIdentifierCharacter(QChar* c) 0024 { 0025 return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '$'); 0026 } 0027 0028 bool isEndIdentifierCharacter(QChar* c) 0029 { 0030 return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '$' 0031 || c->unicode() == '*'); 0032 } 0033 0034 bool isCont(QChar* c) 0035 { 0036 if (c->unicode() == '\\') { 0037 c++; 0038 while (c->isSpace() && c->unicode() != '\n') { 0039 c++; 0040 } 0041 if (c->unicode() == '\n' || c->unicode() == '#') { 0042 return true; 0043 } 0044 } 0045 return false; 0046 } 0047 0048 Lexer::Lexer(Parser* _parser, QString content) 0049 : m_content(std::move(content)) 0050 , m_parser(_parser) 0051 , m_curpos(0) 0052 , m_contentSize(m_content.size()) 0053 , m_tokenBegin(0) 0054 , m_tokenEnd(0) 0055 { 0056 pushState(ErrorState); 0057 pushState(DefaultState); 0058 } 0059 0060 int Lexer::state() const 0061 { 0062 return mState.top(); 0063 } 0064 0065 void Lexer::pushState(int state) 0066 { 0067 mState.push(state); 0068 } 0069 0070 void Lexer::popState() 0071 { 0072 mState.pop(); 0073 } 0074 0075 int Lexer::nextTokenKind() 0076 { 0077 int token = Parser::Token_INVALID; 0078 if (m_curpos >= m_contentSize) { 0079 return 0; 0080 } 0081 QChar* it = m_content.data(); 0082 it += m_curpos; 0083 switch (state()) { 0084 case VariableValueState: 0085 it = ignoreWhitespaceAndComment(it); 0086 m_tokenBegin = m_curpos; 0087 if (m_curpos < m_contentSize) { 0088 if (it->unicode() == '}') { 0089 popState(); 0090 token = Parser::Token_RBRACE; 0091 } else if (it->unicode() == '\n') { 0092 popState(); 0093 createNewline(m_curpos); 0094 token = Parser::Token_NEWLINE; 0095 } else if (it->unicode() == '\\' && isCont(it)) { 0096 pushState(ContState); 0097 token = Parser::Token_CONT; 0098 } else if (it->unicode() == '"') { 0099 it++; 0100 m_curpos++; 0101 QChar* lastit = it; 0102 while ((it->unicode() != '"' || (lastit->unicode() == '\\' && it->unicode() == '"')) 0103 && it->unicode() != '\n' && it->unicode() != '#' && !isCont(it) && m_curpos < m_contentSize) { 0104 lastit = it; 0105 it++; 0106 m_curpos++; 0107 } 0108 if (it->unicode() != '"' && it->unicode() != '#') { 0109 m_curpos--; 0110 } 0111 token = Parser::Token_VALUE; 0112 if (it->unicode() == '#') { 0113 m_tokenEnd = m_curpos - 1; 0114 do { 0115 it++; 0116 m_curpos++; 0117 } while (it->unicode() != '\n' && m_curpos < m_contentSize); 0118 if (it->unicode() == '\n') { 0119 m_curpos--; 0120 } 0121 return token; 0122 } 0123 } else if (it->unicode() == '(') { 0124 unsigned int bracecount = 0; 0125 while ((it->unicode() != ';' || bracecount > 0) && it->unicode() != '\n' && !isCont(it) 0126 && m_curpos < m_contentSize) { 0127 if (it->unicode() == '(') { 0128 bracecount++; 0129 } else if (it->unicode() == ')' && bracecount > 0) { 0130 bracecount--; 0131 } 0132 ++it; 0133 ++m_curpos; 0134 } 0135 if (it->unicode() != ';') { 0136 m_curpos--; 0137 } 0138 token = Parser::Token_VALUE; 0139 } else { 0140 while (!it->isSpace() && !isCont(it) && it->unicode() != '#' && m_curpos < m_contentSize) { 0141 it++; 0142 m_curpos++; 0143 } 0144 m_curpos--; 0145 token = Parser::Token_VALUE; 0146 } 0147 } 0148 break; 0149 case FunctionArgState: 0150 m_tokenBegin = m_curpos; 0151 if (it->unicode() == '\n') { 0152 createNewline(m_curpos); 0153 token = Parser::Token_NEWLINE; 0154 } else if (it->unicode() == '\\' && isCont(it)) { 0155 pushState(ContState); 0156 token = Parser::Token_CONT; 0157 } else if (it->unicode() == ',') { 0158 token = Parser::Token_COMMA; 0159 } else if (it->unicode() == ')') { 0160 popState(); 0161 token = Parser::Token_RPAREN; 0162 } else { 0163 unsigned int parentCount = 0; 0164 while (parentCount > 0 || (it->unicode() != ')' && it->unicode() != ',' && m_curpos < m_contentSize)) { 0165 if (it->unicode() == ')') { 0166 parentCount--; 0167 } else if (it->unicode() == '(') { 0168 parentCount++; 0169 } 0170 ++it; 0171 ++m_curpos; 0172 } 0173 m_curpos--; 0174 token = Parser::Token_VALUE; 0175 } 0176 break; 0177 case ContState: 0178 it = ignoreWhitespaceAndComment(it); 0179 m_tokenBegin = m_curpos; 0180 if (m_curpos < m_contentSize) { 0181 if (it->unicode() == '\n') { 0182 createNewline(m_curpos); 0183 token = Parser::Token_NEWLINE; 0184 m_tokenEnd = m_curpos; 0185 popState(); 0186 QChar* temp = it; 0187 int newpos = m_curpos; 0188 do { 0189 temp++; 0190 newpos++; 0191 if (temp->unicode() == '#') { 0192 while (temp->unicode() != '\n' && newpos < m_contentSize) { 0193 temp++; 0194 newpos++; 0195 } 0196 createNewline(m_curpos); 0197 temp++; 0198 m_curpos = newpos; 0199 newpos++; 0200 } 0201 } while (m_curpos < m_contentSize && temp->isSpace() && temp->unicode() != '\n'); 0202 m_curpos++; 0203 return token; 0204 } 0205 } 0206 break; 0207 case DefaultState: 0208 it = ignoreWhitespaceAndComment(it); 0209 m_tokenBegin = m_curpos; 0210 if (m_curpos < m_contentSize) { 0211 if (isBeginIdentifierCharacter(it)) { 0212 token = Parser::Token_IDENTIFIER; 0213 while (!it->isSpace() && isIdentifierCharacter(it, m_curpos + 1 < m_contentSize) 0214 && m_curpos < m_contentSize) { 0215 it++; 0216 m_curpos++; 0217 } 0218 if (!isEndIdentifierCharacter((it - 1))) { 0219 token = Parser::Token_INVALID; 0220 } else if (m_content.midRef(m_tokenBegin, m_curpos - m_tokenBegin) == QLatin1String("else")) { 0221 token = Parser::Token_ELSE; 0222 } 0223 m_curpos--; 0224 } else { 0225 // Now the stuff that will generate a proper token 0226 QChar* c2 = m_curpos < m_contentSize ? it + 1 : nullptr; 0227 switch (it->unicode()) { 0228 case '|': 0229 token = Parser::Token_OR; 0230 break; 0231 case '!': 0232 token = Parser::Token_EXCLAM; 0233 break; 0234 case '(': 0235 pushState(FunctionArgState); 0236 token = Parser::Token_LPAREN; 0237 break; 0238 case '{': 0239 token = Parser::Token_LBRACE; 0240 break; 0241 case '}': 0242 token = Parser::Token_RBRACE; 0243 break; 0244 case ':': 0245 token = Parser::Token_COLON; 0246 break; 0247 case '~': 0248 if (c2 && c2->unicode() == '=') { 0249 pushState(VariableValueState); 0250 m_curpos++; 0251 token = Parser::Token_TILDEEQ; 0252 } 0253 break; 0254 case '*': 0255 if (c2 && c2->unicode() == '=') { 0256 pushState(VariableValueState); 0257 m_curpos++; 0258 token = Parser::Token_STAREQ; 0259 } 0260 break; 0261 case '-': 0262 if (c2 && c2->unicode() == '=') { 0263 pushState(VariableValueState); 0264 m_curpos++; 0265 token = Parser::Token_MINUSEQ; 0266 } 0267 break; 0268 case '+': 0269 if (c2 && c2->unicode() == '=') { 0270 pushState(VariableValueState); 0271 m_curpos++; 0272 token = Parser::Token_PLUSEQ; 0273 } 0274 break; 0275 case '=': 0276 pushState(VariableValueState); 0277 token = Parser::Token_EQUAL; 0278 break; 0279 case '\n': 0280 createNewline(m_curpos); 0281 token = Parser::Token_NEWLINE; 0282 break; 0283 default: 0284 break; 0285 } 0286 } 0287 } 0288 break; 0289 default: 0290 token = Parser::Token_INVALID; 0291 break; 0292 } 0293 if (m_curpos >= m_contentSize) { 0294 return 0; 0295 } 0296 m_tokenEnd = m_curpos; 0297 m_curpos++; 0298 return token; 0299 } 0300 0301 qint64 Lexer::tokenBegin() const 0302 { 0303 return m_tokenBegin; 0304 } 0305 0306 qint64 Lexer::tokenEnd() const 0307 { 0308 return m_tokenEnd; 0309 } 0310 0311 QChar* Lexer::ignoreWhitespaceAndComment(QChar* it) 0312 { 0313 // Ignore whitespace, but preserve the newline 0314 bool comment = false; 0315 while (m_curpos < m_contentSize && (it->isSpace() || comment || it->unicode() == '#') && it->unicode() != '\n') { 0316 if (it->unicode() == '#') { 0317 comment = true; 0318 } 0319 ++it; 0320 ++m_curpos; 0321 } 0322 return it; 0323 } 0324 0325 void Lexer::createNewline(int pos) 0326 { 0327 if (m_parser) 0328 m_parser->tokenStream->locationTable()->newline(pos); 0329 } 0330 }