File indexing completed on 2024-03-24 16:04:29
0001 /* 0002 SPDX-FileCopyrightText: 2008 Niko Sams <niko.sams@gmail.com> 0003 0004 SPDX-License-Identifier: LGPL-2.0-or-later 0005 */ 0006 0007 #include "phplexer.h" 0008 0009 #include "phpparser.h" 0010 #include "tokenstream.h" 0011 0012 #include <QString> 0013 #include <QStringList> 0014 #include <QRegExp> 0015 #include <QDebug> 0016 0017 #include "parserdebug.h" 0018 0019 namespace Php 0020 { 0021 0022 Lexer::Lexer(TokenStream* tokenStream, const QString& content, int initialState): 0023 m_content(content), m_tokenStream(tokenStream), 0024 m_curpos(0), m_contentSize(m_content.size()), 0025 m_tokenBegin(0), m_tokenEnd(0), m_haltCompiler(0) 0026 { 0027 pushState(ErrorState); 0028 if (initialState == DefaultState) { 0029 pushState(HtmlState); 0030 } 0031 pushState(initialState); 0032 } 0033 0034 int Lexer::state(int deepness) const 0035 { 0036 return m_state.at(m_state.size() - deepness - 1); 0037 } 0038 void Lexer::printState() 0039 { 0040 int s = state(); 0041 if (s == ErrorState) 0042 qDebug() << "ErrorState"; 0043 else if (s == HtmlState) 0044 qDebug() << "HtmlState"; 0045 else if (s == DefaultState) 0046 qDebug() << "DefaultState"; 0047 else if (s == String) 0048 qDebug() << "String"; 0049 else if (s == StringVariable) 0050 qDebug() << "StringVariable"; 0051 else if (s == StringVariableBracket) 0052 qDebug() << "StringVariableBracket"; 0053 else if (s == StringVariableObjectOperator) 0054 qDebug() << "StringVariableObjectOperator"; 0055 else if (s == StringVariableCurly) 0056 qDebug() << "StringVariableCurly"; 0057 else if (s == StringVarname) 0058 qDebug() << "StringVarname"; 0059 else if (s == StringHeredoc) 0060 qDebug() << "StringHeredoc"; 0061 else if (s == StringBacktick) 0062 qDebug() << "StringBacktick"; 0063 } 0064 0065 void Lexer::pushState(int state) 0066 { 0067 m_state.push(state); 0068 } 0069 0070 void Lexer::popState() 0071 { 0072 m_state.pop(); 0073 } 0074 0075 int Lexer::nextTokenKind() 0076 { 0077 int token = Parser::Token_INVALID; 0078 if (m_curpos >= m_contentSize) { 0079 m_tokenBegin = -1; 0080 m_tokenEnd = -1; 0081 createNewline(m_curpos); 0082 return 0; 0083 } 0084 0085 const QChar* it = m_content.constData(); 0086 it += m_curpos; 0087 m_tokenBegin = m_curpos; 0088 switch (state()) { 0089 case HtmlState: 0090 if (it->unicode() == '<' && (it + 1)->unicode() == '?' 0091 ///TODO: per-project configuration to set whether we use shortags 0092 /// or not. In the former case we'd need to rise an error here 0093 && !( (it + 2)->toLower().unicode() == 'x' 0094 && (it + 3)->toLower().unicode() == 'm' 0095 && (it + 4)->toLower().unicode() == 'l' ) ) 0096 { 0097 token = Parser::Token_OPEN_TAG; 0098 if ((it + 2)->unicode() == '=') { 0099 token = Parser::Token_OPEN_TAG_WITH_ECHO; 0100 m_curpos++; 0101 it++; 0102 } else if ((it + 2)->toLower().unicode() == 'p' 0103 && (it + 3)->toLower().unicode() == 'h' 0104 && (it + 4)->toLower().unicode() == 'p' 0105 && (it + 5)->isSpace()) { 0106 m_curpos += 4; 0107 if ((it + 5)->unicode() == '\n') createNewline(m_curpos + 1); 0108 } 0109 m_curpos++; 0110 pushState(DefaultState); 0111 } else { 0112 token = Parser::Token_INLINE_HTML; 0113 while (m_curpos < m_contentSize) { 0114 if (it->unicode() == '\n') createNewline(m_curpos); 0115 if ((it + 1)->unicode() == '<' && (it + 2)->unicode() == '?') { 0116 break; 0117 } 0118 it++; 0119 m_curpos++; 0120 } 0121 } 0122 break; 0123 case DefaultState: 0124 case StringVariableCurly: { 0125 if (it->isSpace()) { 0126 token = Parser::Token_WHITESPACE; 0127 while (m_curpos < m_contentSize && it->isSpace()) { 0128 if (it->unicode() == '\n') createNewline(m_curpos); 0129 it++; 0130 m_curpos++; 0131 } 0132 m_curpos--; 0133 } else if (it->isDigit() || (it->unicode() == '.' && (it + 1)->isDigit())) { 0134 QString num;bool hasPoint = false; 0135 bool hex = false; 0136 bool bin = false; 0137 if (it->unicode() == '0' && (it + 1)->toLower() == 'x') { 0138 it += 2; 0139 m_curpos += 2; 0140 hex = true; 0141 } 0142 if (it->unicode() == '0' && (it + 1)->toLower() == 'b') { 0143 it += 2; 0144 m_curpos += 2; 0145 bin = true; 0146 } 0147 while (m_curpos < m_contentSize && ( 0148 it->isDigit() 0149 || (!hex && !hasPoint && it->unicode() == '.') 0150 || (bin && (it->unicode() == '0' || it->unicode() == '1')) 0151 || (hex && (it->toLower() == 'a' || it->toLower() == 'b' || 0152 it->toLower() == 'c' || it->toLower() == 'd' || 0153 it->toLower() == 'e' || it->toLower() == 'f')))) { 0154 if (it->unicode() == '.') hasPoint = true; 0155 num.append(*it); 0156 it++; 0157 m_curpos++; 0158 } 0159 if (!hex && !bin && it->toLower() == 'e' && 0160 ((it + 1)->isDigit() || 0161 (((it + 1)->unicode() == '-' || (it + 1)->unicode() == '+') && (it + 2)->isDigit()))) { 0162 //exponential number 0163 token = Parser::Token_DNUMBER; 0164 m_curpos++; 0165 it++; 0166 if (it->unicode() == '-' || it->unicode() == '+') { 0167 it++; 0168 m_curpos++; 0169 } 0170 while (m_curpos < m_contentSize && (it->isDigit())) { 0171 it++; 0172 m_curpos++; 0173 } 0174 m_curpos--; 0175 } else { 0176 m_curpos--; 0177 if (hasPoint) { 0178 token = Parser::Token_DNUMBER; 0179 } else { 0180 bool ok; 0181 //check if string can be converted to long 0182 //if we get an overflow use double 0183 num.toLong(&ok, hex ? 16 : 10); 0184 if (ok) { 0185 token = Parser::Token_LNUMBER; 0186 } else { 0187 token = Parser::Token_DNUMBER; 0188 } 0189 } 0190 } 0191 0192 } else if (processVariable(it)) { 0193 token = Parser::Token_VARIABLE; 0194 } else if (it->unicode() == '$') { 0195 //when it was not recognized as variable 0196 token = Parser::Token_DOLLAR; 0197 } else if (it->unicode() == '}') { 0198 token = Parser::Token_RBRACE; 0199 if (state() == StringVariableCurly) { 0200 popState(); 0201 } 0202 } else if (it->unicode() == '{') { 0203 token = Parser::Token_LBRACE; 0204 if (state() == StringVariableCurly) { 0205 pushState(StringVariableCurly); 0206 } 0207 } else if (it->unicode() == ')') { 0208 token = Parser::Token_RPAREN; 0209 } else if (it->unicode() == '(') { 0210 it++; 0211 int pos = m_curpos + 1; 0212 while (pos < m_contentSize && it->isSpace()) { 0213 it++; 0214 pos++; 0215 } 0216 const int nameStart = pos; 0217 while (pos < m_contentSize && it->isLetter()) { 0218 it++; 0219 pos++; 0220 } 0221 const auto name = m_content.midRef(nameStart, pos - nameStart); 0222 while (pos < m_contentSize && it->isSpace()) { 0223 it++; 0224 pos++; 0225 } 0226 if (it->unicode() == ')') { 0227 if (name.compare(QLatin1String("int"), Qt::CaseInsensitive) == 0 0228 || name.compare(QLatin1String("integer"), Qt::CaseInsensitive) == 0) 0229 { 0230 token = Parser::Token_INT_CAST; 0231 } else if (name.compare(QLatin1String("real"), Qt::CaseInsensitive) == 0 0232 || name.compare(QLatin1String("double"), Qt::CaseInsensitive) == 0 0233 || name.compare(QLatin1String("float"), Qt::CaseInsensitive) == 0) 0234 { 0235 token = Parser::Token_DOUBLE_CAST; 0236 } else if (name.compare(QLatin1String("string"), Qt::CaseInsensitive) == 0) { 0237 token = Parser::Token_STRING_CAST; 0238 } else if (name.compare(QLatin1String("binary"), Qt::CaseInsensitive) == 0) { 0239 //as in php 0240 token = Parser::Token_STRING_CAST; 0241 } else if (name.compare(QLatin1String("array"), Qt::CaseInsensitive) == 0) { 0242 token = Parser::Token_ARRAY_CAST; 0243 } else if (name.compare(QLatin1String("object"), Qt::CaseInsensitive) == 0) { 0244 token = Parser::Token_OBJECT_CAST; 0245 } else if (name.compare(QLatin1String("bool"), Qt::CaseInsensitive) == 0 0246 || name.compare(QLatin1String("boolean"), Qt::CaseInsensitive) == 0) 0247 { 0248 token = Parser::Token_BOOL_CAST; 0249 } else if (name.compare(QLatin1String("unset"), Qt::CaseInsensitive) == 0) { 0250 token = Parser::Token_UNSET_CAST; 0251 } else { 0252 token = Parser::Token_LPAREN; 0253 } 0254 0255 if (token != Parser::Token_LPAREN) { 0256 m_curpos = pos; 0257 } 0258 } else { 0259 token = Parser::Token_LPAREN; 0260 } 0261 } else if (it->unicode() == ']') { 0262 token = Parser::Token_RBRACKET; 0263 } else if (it->unicode() == '[') { 0264 token = Parser::Token_LBRACKET; 0265 } else if (it->unicode() == ',') { 0266 token = Parser::Token_COMMA; 0267 } else if (it->unicode() == '@') { 0268 token = Parser::Token_AT; 0269 } else if (it->unicode() == '!') { 0270 if ((it + 1)->unicode() == '=') { 0271 m_curpos++; 0272 if ((it + 2)->unicode() == '=') { 0273 m_curpos++; 0274 token = Parser::Token_IS_NOT_IDENTICAL; 0275 } else { 0276 token = Parser::Token_IS_NOT_EQUAL; 0277 } 0278 } else { 0279 token = Parser::Token_BANG; 0280 } 0281 } else if (it->unicode() == '<') { 0282 if ((it + 1)->unicode() == '<') { 0283 m_curpos++; 0284 if ((it + 2)->unicode() == '<' && state() != StringVariableCurly) { 0285 //HEREDOC string (<<< EOD\nfoo\nEOD;\n) 0286 int pos = 3; 0287 while (m_curpos + pos < m_contentSize && 0288 ((it + pos)->unicode() == ' ' || (it + pos)->unicode() == '\t')) { 0289 pos++; 0290 } 0291 bool isNowdoc = (it + pos)->unicode() == '\''; 0292 bool foundQuote = isNowdoc || (it + pos)->unicode() == '"'; 0293 if (foundQuote) { 0294 ++pos; 0295 } 0296 if ((it + pos)->isLetter() || (it + pos)->unicode() == '_') { //identifier must start with a letter 0297 m_hereNowDocIdentifier.clear(); 0298 while (m_curpos + pos < m_contentSize && 0299 ((it + pos)->isDigit() || (it + pos)->isLetter() || (it + pos)->unicode() == '_')) { 0300 m_hereNowDocIdentifier.append(*(it + pos)); 0301 pos++; 0302 } 0303 if (foundQuote && (m_curpos + pos) < m_contentSize) { 0304 if (isNowdoc && (it+pos)->unicode() == '\'') { 0305 ++pos; 0306 } else if ((it+pos)->unicode() == '"') { 0307 ++pos; 0308 } 0309 } 0310 if (m_curpos + pos < m_contentSize && (it + pos)->unicode() == '\n') { 0311 //identifier must be followed by newline, newline is part of HEREDOC token 0312 if (isNowdoc) { 0313 token = Parser::Token_START_NOWDOC; 0314 pushState(StringNowdoc); 0315 } else { 0316 token = Parser::Token_START_HEREDOC; 0317 pushState(StringHeredoc); 0318 } 0319 m_curpos += pos - 1; 0320 createNewline(m_curpos); 0321 } 0322 } 0323 } 0324 0325 if (token != Parser::Token_START_HEREDOC && token != Parser::Token_START_NOWDOC) { 0326 if ((it + 2)->unicode() == '=') { 0327 m_curpos++; 0328 token = Parser::Token_SL_ASSIGN; 0329 } else { 0330 token = Parser::Token_SL; 0331 } 0332 } 0333 } else if ((it + 1)->unicode() == '=') { 0334 if ((it + 2)->unicode() == '>') { 0335 m_curpos += 2; 0336 token = Parser::Token_SPACESHIP; 0337 } else { 0338 m_curpos++; 0339 token = Parser::Token_IS_SMALLER_OR_EQUAL; 0340 } 0341 } else if ((it + 1)->unicode() == '>') { 0342 m_curpos++; 0343 token = Parser::Token_IS_NOT_EQUAL; 0344 } else { 0345 token = Parser::Token_IS_SMALLER; 0346 } 0347 } else if (it->unicode() == '>') { 0348 if ((it + 1)->unicode() == '>') { 0349 m_curpos++; 0350 if ((it + 2)->unicode() == '=') { 0351 m_curpos++; 0352 token = Parser::Token_SR_ASSIGN; 0353 } else { 0354 token = Parser::Token_SR; 0355 } 0356 } else if ((it + 1)->unicode() == '=') { 0357 m_curpos++; 0358 token = Parser::Token_IS_GREATER_OR_EQUAL; 0359 } else { 0360 token = Parser::Token_IS_GREATER; 0361 } 0362 } else if (it->unicode() == '~') { 0363 token = Parser::Token_TILDE; 0364 } else if (it->unicode() == ':') { 0365 if ((it + 1)->unicode() == ':') { 0366 m_curpos++; 0367 token = Parser::Token_PAAMAYIM_NEKUDOTAYIM; 0368 } else { 0369 token = Parser::Token_COLON; 0370 } 0371 } else if (it->unicode() == '?') { 0372 if ((it + 1)->unicode() == '>') { 0373 //accept CLOSE_TAG inside StringVariableCurly too, as php does 0374 token = Parser::Token_CLOSE_TAG; 0375 m_curpos++; 0376 while (state() != HtmlState) popState(); 0377 } else if ((it + 1)->unicode() == '?') { 0378 token = Parser::Token_NULL_COALESCE; 0379 m_curpos++; 0380 } else { 0381 token = Parser::Token_QUESTION; 0382 } 0383 } else if (it->unicode() == '-' && (it + 1)->unicode() == '>') { 0384 m_curpos++; 0385 token = Parser::Token_OBJECT_OPERATOR; 0386 if (isValidVariableIdentifier(it + 2)) { 0387 pushState(StringVariableObjectOperator); 0388 } 0389 } else if (it->unicode() == '%') { 0390 if ((it + 1)->unicode() == '=') { 0391 m_curpos++; 0392 token = Parser::Token_MOD_ASSIGN; 0393 } else { 0394 token = Parser::Token_MOD; 0395 } 0396 } else if (it->unicode() == '/') { 0397 if ((it + 1)->unicode() == '=') { 0398 m_curpos++; 0399 token = Parser::Token_DIV_ASSIGN; 0400 } else if ((it + 1)->unicode() == '/') { 0401 //accept COMMENT inside StringVariableCurly too, as php does 0402 if ((it + 2)->unicode() == '/') { 0403 token = Parser::Token_DOC_COMMENT; 0404 } else { 0405 token = Parser::Token_COMMENT; 0406 } 0407 while (m_curpos < m_contentSize) { 0408 if (m_curpos + 1 < m_contentSize && it->unicode() == '?' && (it + 1)->unicode() == '>') { 0409 --it; 0410 --m_curpos; 0411 break; 0412 } 0413 if ( it->unicode() == '\n' ) { 0414 createNewline(m_curpos); 0415 if ( token == Parser::Token_COMMENT ) { 0416 break; 0417 } else { 0418 // lookahead to check whether this doc comment spans multiple lines 0419 const QChar* it2 = it + 1; 0420 int pos = m_curpos + 1; 0421 while ( pos < m_contentSize && (it2)->isSpace() && (it2)->unicode() != '\n' ) { 0422 ++it2; 0423 ++pos; 0424 } 0425 if ( it2->unicode() == '/' && (it2 + 1)->unicode() == '/' 0426 && (it2 + 2)->unicode() == '/' ) { 0427 // seems to be a multi-line doc-comment 0428 it = it2 + 2; 0429 m_curpos = pos + 2; 0430 continue; 0431 } else { 0432 // not a multi-line doc-comment 0433 break; 0434 } 0435 } 0436 } 0437 it++; 0438 m_curpos++; 0439 } 0440 } else if ((it + 1)->unicode() == '*') { 0441 //accept COMMENT inside StringVariableCurly too, as php does 0442 if ((it + 2)->unicode() == '*' && (it + 3)->isSpace()) { 0443 token = Parser::Token_DOC_COMMENT; 0444 } else { 0445 token = Parser::Token_COMMENT; 0446 } 0447 it += 2; 0448 m_curpos += 2; 0449 while (m_curpos < m_contentSize && !(it->unicode() == '*' && (it + 1)->unicode() == '/')) { 0450 if (it->unicode() == '\n') { 0451 createNewline(m_curpos); 0452 } 0453 it++; 0454 m_curpos++; 0455 } 0456 m_curpos++; 0457 } else { 0458 token = Parser::Token_DIV; 0459 } 0460 } else if (it->unicode() == '#') { 0461 //accept COMMENT inside StringVariableCurly too, as php does 0462 token = Parser::Token_COMMENT; 0463 while (m_curpos < m_contentSize) { 0464 if (m_curpos + 1 < m_contentSize && it->unicode() == '?' && (it + 1)->unicode() == '>') { 0465 --it; 0466 --m_curpos; 0467 break; 0468 } 0469 if (it->unicode() == '\n') { 0470 createNewline(m_curpos); 0471 break; 0472 } 0473 it++; 0474 m_curpos++; 0475 } 0476 } else if (it->unicode() == '^') { 0477 if ((it + 1)->unicode() == '=') { 0478 m_curpos++; 0479 token = Parser::Token_XOR_ASSIGN; 0480 } else { 0481 token = Parser::Token_BIT_XOR; 0482 } 0483 } else if (it->unicode() == '*') { 0484 if ((it + 1)->unicode() == '=') { 0485 m_curpos++; 0486 token = Parser::Token_MUL_ASSIGN; 0487 } else if ((it + 1)->unicode() == '*') { 0488 m_curpos++; 0489 if ((it + 2)->unicode() == '=') { 0490 m_curpos++; 0491 token = Parser::Token_EXP_ASSIGN; 0492 } else { 0493 token = Parser::Token_EXP; 0494 } 0495 } else { 0496 token = Parser::Token_MUL; 0497 } 0498 } else if (it->unicode() == '|') { 0499 if ((it + 1)->unicode() == '|') { 0500 m_curpos++; 0501 token = Parser::Token_BOOLEAN_OR; 0502 } else if ((it + 1)->unicode() == '=') { 0503 m_curpos++; 0504 token = Parser::Token_OR_ASSIGN; 0505 } else { 0506 token = Parser::Token_BIT_OR; 0507 } 0508 } else if (it->unicode() == '&') { 0509 if ((it + 1)->unicode() == '&') { 0510 m_curpos++; 0511 token = Parser::Token_BOOLEAN_AND; 0512 } else if ((it + 1)->unicode() == '=') { 0513 m_curpos++; 0514 token = Parser::Token_AND_ASSIGN; 0515 } else { 0516 token = Parser::Token_BIT_AND; 0517 } 0518 } else if (it->unicode() == '+') { 0519 if ((it + 1)->unicode() == '+') { 0520 m_curpos++; 0521 token = Parser::Token_INC; 0522 } else if ((it + 1)->unicode() == '=') { 0523 m_curpos++; 0524 token = Parser::Token_PLUS_ASSIGN; 0525 } else { 0526 token = Parser::Token_PLUS; 0527 } 0528 } else if (it->unicode() == '-') { 0529 if ((it + 1)->unicode() == '-') { 0530 m_curpos++; 0531 token = Parser::Token_DEC; 0532 } else if ((it + 1)->unicode() == '=') { 0533 m_curpos++; 0534 token = Parser::Token_MINUS_ASSIGN; 0535 } else { 0536 token = Parser::Token_MINUS; 0537 } 0538 } else if (it->unicode() == '.') { 0539 if ((it + 1)->unicode() == '=') { 0540 m_curpos++; 0541 token = Parser::Token_CONCAT_ASSIGN; 0542 } else if ((it + 1)->unicode() == '.' && (it + 2)->unicode() == '.') { 0543 m_curpos = m_curpos + 2; 0544 token = Parser::Token_ELLIPSIS; 0545 } else { 0546 token = Parser::Token_CONCAT; 0547 } 0548 } else if (it->unicode() == '\\') { 0549 token = Parser::Token_BACKSLASH; 0550 } else if (it->unicode() == ';') { 0551 token = Parser::Token_SEMICOLON; 0552 } else if (it->unicode() == '\'') { 0553 token = Parser::Token_CONSTANT_ENCAPSED_STRING; 0554 it++; 0555 m_curpos++; 0556 int startPos = m_curpos; 0557 while (m_curpos < m_contentSize 0558 && (it->unicode() != '\'' || isEscapedWithBackslash(it, m_curpos, startPos))) { 0559 if (it->unicode() == '\n') createNewline(m_curpos); 0560 it++; 0561 m_curpos++; 0562 } 0563 // if the string is never terminated, make sure we don't overflow the boundaries 0564 if ( m_curpos == m_contentSize ) { 0565 --m_curpos; 0566 } 0567 } else if (it->unicode() == '"') { 0568 it++; 0569 m_curpos++; 0570 int stringSize = 0; 0571 bool foundVar = false; 0572 while (m_curpos + stringSize < m_contentSize 0573 && (it->unicode() != '"' || isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos))) 0574 { 0575 if (it->unicode() == '$' && !isEscapedWithBackslash(it, m_curpos + stringSize, m_curpos) 0576 && ((it + 1)->unicode() == '{' 0577 || (isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) { 0578 foundVar = true; 0579 break; 0580 } 0581 it++; 0582 stringSize++; 0583 } 0584 if (!foundVar) { 0585 // if the string is never terminated, make sure we don't overflow the boundaries 0586 if ( m_curpos + stringSize == m_contentSize ) { 0587 m_curpos--; 0588 } 0589 token = Parser::Token_CONSTANT_ENCAPSED_STRING; 0590 it -= stringSize; 0591 for (int j = 0; j < stringSize; j++) { 0592 if (it->unicode() == '\n') { 0593 createNewline(m_curpos + j); 0594 } 0595 it++; 0596 } 0597 m_curpos += stringSize; 0598 } else { 0599 // properly set the token pos to the starting double quote 0600 m_curpos--; 0601 token = Parser::Token_DOUBLE_QUOTE; 0602 pushState(String); 0603 } 0604 } else if (it->unicode() == '`') { 0605 token = Parser::Token_BACKTICK; 0606 pushState(StringBacktick); 0607 } else if (it->unicode() == '=') { 0608 if ((it + 1)->unicode() == '=') { 0609 m_curpos++; 0610 if ((it + 2)->unicode() == '=') { 0611 m_curpos++; 0612 token = Parser::Token_IS_IDENTICAL; 0613 } else { 0614 token = Parser::Token_IS_EQUAL; 0615 } 0616 } else if ((it + 1)->unicode() == '>') { 0617 m_curpos++; 0618 token = Parser::Token_DOUBLE_ARROW; 0619 } else { 0620 token = Parser::Token_ASSIGN; 0621 } 0622 } else if (isValidVariableIdentifier(it) && !it->isDigit()) { 0623 const int from = m_curpos; 0624 while (m_curpos < m_contentSize && (isValidVariableIdentifier(it))) { 0625 it++; 0626 m_curpos++; 0627 } 0628 const QStringRef name = m_content.midRef(from, m_curpos - from); 0629 m_curpos--; 0630 if (name.compare(QLatin1String("echo"), Qt::CaseInsensitive) == 0) { 0631 token = Parser::Token_ECHO; 0632 } else if (name.compare(QLatin1String("include"), Qt::CaseInsensitive) == 0) { 0633 token = Parser::Token_INCLUDE; 0634 } else if (name.compare(QLatin1String("include_once"), Qt::CaseInsensitive) == 0) { 0635 token = Parser::Token_INCLUDE_ONCE; 0636 } else if (name.compare(QLatin1String("require"), Qt::CaseInsensitive) == 0) { 0637 token = Parser::Token_REQUIRE; 0638 } else if (name.compare(QLatin1String("require_once"), Qt::CaseInsensitive) == 0) { 0639 token = Parser::Token_REQUIRE_ONCE; 0640 } else if (name.compare(QLatin1String("eval"), Qt::CaseInsensitive) == 0) { 0641 token = Parser::Token_EVAL; 0642 } else if (name.compare(QLatin1String("print"), Qt::CaseInsensitive) == 0) { 0643 token = Parser::Token_PRINT; 0644 } else if (name.compare(QLatin1String("abstract"), Qt::CaseInsensitive) == 0) { 0645 token = Parser::Token_ABSTRACT; 0646 } else if (name.compare(QLatin1String("break"), Qt::CaseInsensitive) == 0) { 0647 token = Parser::Token_BREAK; 0648 } else if (name.compare(QLatin1String("case"), Qt::CaseInsensitive) == 0) { 0649 token = Parser::Token_CASE; 0650 } else if (name.compare(QLatin1String("catch"), Qt::CaseInsensitive) == 0) { 0651 token = Parser::Token_CATCH; 0652 } else if (name.compare(QLatin1String("class"), Qt::CaseInsensitive) == 0) { 0653 token = Parser::Token_CLASS; 0654 } else if (name.compare(QLatin1String("const"), Qt::CaseInsensitive) == 0) { 0655 token = Parser::Token_CONST; 0656 } else if (name.compare(QLatin1String("continue"), Qt::CaseInsensitive) == 0) { 0657 token = Parser::Token_CONTINUE; 0658 } else if (name.compare(QLatin1String("default"), Qt::CaseInsensitive) == 0) { 0659 token = Parser::Token_DEFAULT; 0660 } else if (name.compare(QLatin1String("do"), Qt::CaseInsensitive) == 0) { 0661 token = Parser::Token_DO; 0662 } else if (name.compare(QLatin1String("else"), Qt::CaseInsensitive) == 0) { 0663 token = Parser::Token_ELSE; 0664 } else if (name.compare(QLatin1String("extends"), Qt::CaseInsensitive) == 0) { 0665 token = Parser::Token_EXTENDS; 0666 } else if (name.compare(QLatin1String("final"), Qt::CaseInsensitive) == 0) { 0667 token = Parser::Token_FINAL; 0668 } else if (name.compare(QLatin1String("for"), Qt::CaseInsensitive) == 0) { 0669 token = Parser::Token_FOR; 0670 } else if (name.compare(QLatin1String("if"), Qt::CaseInsensitive) == 0) { 0671 token = Parser::Token_IF; 0672 } else if (name.compare(QLatin1String("implements"), Qt::CaseInsensitive) == 0) { 0673 token = Parser::Token_IMPLEMENTS; 0674 } else if (name.compare(QLatin1String("instanceof"), Qt::CaseInsensitive) == 0) { 0675 token = Parser::Token_INSTANCEOF; 0676 } else if (name.compare(QLatin1String("insteadof"), Qt::CaseInsensitive) == 0) { 0677 token = Parser::Token_INSTEADOF; 0678 } else if (name.compare(QLatin1String("interface"), Qt::CaseInsensitive) == 0) { 0679 token = Parser::Token_INTERFACE; 0680 } else if (name.compare(QLatin1String("trait"), Qt::CaseInsensitive) == 0) { 0681 token = Parser::Token_TRAIT; 0682 } else if (name.compare(QLatin1String("new"), Qt::CaseInsensitive) == 0) { 0683 token = Parser::Token_NEW; 0684 } else if (name.compare(QLatin1String("private"), Qt::CaseInsensitive) == 0) { 0685 token = Parser::Token_PRIVATE; 0686 } else if (name.compare(QLatin1String("protected"), Qt::CaseInsensitive) == 0) { 0687 token = Parser::Token_PROTECTED; 0688 } else if (name.compare(QLatin1String("public"), Qt::CaseInsensitive) == 0) { 0689 token = Parser::Token_PUBLIC; 0690 } else if (name.compare(QLatin1String("return"), Qt::CaseInsensitive) == 0) { 0691 token = Parser::Token_RETURN; 0692 } else if (name.compare(QLatin1String("static"), Qt::CaseInsensitive) == 0) { 0693 const QChar* lookAhead = it; 0694 int pos = m_curpos; 0695 while (pos < m_contentSize && lookAhead->isSpace()) { 0696 ++lookAhead; 0697 ++pos; 0698 } 0699 if (pos + 1 < m_contentSize && lookAhead->unicode() == ':' && (++lookAhead)->unicode() == ':') { 0700 // PHP 5.3 - late static 0701 token = Parser::Token_STRING; 0702 } else { 0703 token = Parser::Token_STATIC; 0704 } 0705 } else if (name.compare(QLatin1String("switch"), Qt::CaseInsensitive) == 0) { 0706 token = Parser::Token_SWITCH; 0707 } else if (name.compare(QLatin1String("throw"), Qt::CaseInsensitive) == 0) { 0708 token = Parser::Token_THROW; 0709 } else if (name.compare(QLatin1String("try"), Qt::CaseInsensitive) == 0) { 0710 token = Parser::Token_TRY; 0711 } else if (name.compare(QLatin1String("finally"), Qt::CaseInsensitive) == 0) { 0712 token = Parser::Token_FINALLY; 0713 } else if (name.compare(QLatin1String("while"), Qt::CaseInsensitive) == 0) { 0714 token = Parser::Token_WHILE; 0715 } else if (name.compare(QLatin1String("clone"), Qt::CaseInsensitive) == 0) { 0716 token = Parser::Token_CLONE; 0717 } else if (name.compare(QLatin1String("exit"), Qt::CaseInsensitive) == 0 || name.compare(QLatin1String("die"), Qt::CaseInsensitive) == 0) { 0718 token = Parser::Token_EXIT; 0719 } else if (name.compare(QLatin1String("elseif"), Qt::CaseInsensitive) == 0) { 0720 token = Parser::Token_ELSEIF; 0721 } else if (name.compare(QLatin1String("endif"), Qt::CaseInsensitive) == 0) { 0722 token = Parser::Token_ENDIF; 0723 } else if (name.compare(QLatin1String("endwhile"), Qt::CaseInsensitive) == 0) { 0724 token = Parser::Token_ENDWHILE; 0725 } else if (name.compare(QLatin1String("endfor"), Qt::CaseInsensitive) == 0) { 0726 token = Parser::Token_ENDFOR; 0727 } else if (name.compare(QLatin1String("foreach"), Qt::CaseInsensitive) == 0) { 0728 token = Parser::Token_FOREACH; 0729 } else if (name.compare(QLatin1String("endforeach"), Qt::CaseInsensitive) == 0) { 0730 token = Parser::Token_ENDFOREACH; 0731 } else if (name.compare(QLatin1String("declare"), Qt::CaseInsensitive) == 0) { 0732 token = Parser::Token_DECLARE; 0733 } else if (name.compare(QLatin1String("enddeclare"), Qt::CaseInsensitive) == 0) { 0734 token = Parser::Token_ENDDECLARE; 0735 } else if (name.compare(QLatin1String("as"), Qt::CaseInsensitive) == 0) { 0736 token = Parser::Token_AS; 0737 } else if (name.compare(QLatin1String("endswitch"), Qt::CaseInsensitive) == 0) { 0738 token = Parser::Token_ENDSWITCH; 0739 } else if (name.compare(QLatin1String("function"), Qt::CaseInsensitive) == 0) { 0740 token = Parser::Token_FUNCTION; 0741 } else if (name.compare(QLatin1String("use"), Qt::CaseInsensitive) == 0) { 0742 token = Parser::Token_USE; 0743 } else if (name.compare(QLatin1String("goto"), Qt::CaseInsensitive) == 0) { 0744 token = Parser::Token_GOTO; 0745 } else if (name.compare(QLatin1String("global"), Qt::CaseInsensitive) == 0) { 0746 token = Parser::Token_GLOBAL; 0747 } else if (name.compare(QLatin1String("var"), Qt::CaseInsensitive) == 0) { 0748 token = Parser::Token_VAR; 0749 } else if (name.compare(QLatin1String("unset"), Qt::CaseInsensitive) == 0) { 0750 token = Parser::Token_UNSET; 0751 } else if (name.compare(QLatin1String("isset"), Qt::CaseInsensitive) == 0) { 0752 token = Parser::Token_ISSET; 0753 } else if (name.compare(QLatin1String("empty"), Qt::CaseInsensitive) == 0) { 0754 token = Parser::Token_EMPTY; 0755 } else if (name.compare(QLatin1String("__halt_compiler"), Qt::CaseInsensitive) == 0) { 0756 token = Parser::Token_HALT_COMPILER; 0757 } else if (name.compare(QLatin1String("list"), Qt::CaseInsensitive) == 0) { 0758 token = Parser::Token_LIST; 0759 } else if (name.compare(QLatin1String("array"), Qt::CaseInsensitive) == 0) { 0760 token = Parser::Token_ARRAY; 0761 } else if (name.compare(QLatin1String("__class__"), Qt::CaseInsensitive) == 0) { 0762 token = Parser::Token_CLASS_C; 0763 } else if (name.compare(QLatin1String("__trait__"), Qt::CaseInsensitive) == 0) { 0764 token = Parser::Token_TRAIT_C; 0765 } else if (name.compare(QLatin1String("__method__"), Qt::CaseInsensitive) == 0) { 0766 token = Parser::Token_METHOD_C; 0767 } else if (name.compare(QLatin1String("__function__"), Qt::CaseInsensitive) == 0) { 0768 token = Parser::Token_FUNC_C; 0769 } else if (name.compare(QLatin1String("__line__"), Qt::CaseInsensitive) == 0) { 0770 token = Parser::Token_LINE; 0771 } else if (name.compare(QLatin1String("__file__"), Qt::CaseInsensitive) == 0) { 0772 token = Parser::Token_FILE; 0773 } else if (name.compare(QLatin1String("__dir__"), Qt::CaseInsensitive) == 0) { 0774 token = Parser::Token_DIR; 0775 } else if (name.compare(QLatin1String("or"), Qt::CaseInsensitive) == 0) { 0776 token = Parser::Token_LOGICAL_OR; 0777 } else if (name.compare(QLatin1String("and"), Qt::CaseInsensitive) == 0) { 0778 token = Parser::Token_LOGICAL_AND; 0779 } else if (name.compare(QLatin1String("xor"), Qt::CaseInsensitive) == 0) { 0780 token = Parser::Token_LOGICAL_XOR; 0781 } else if (name.compare(QLatin1String("namespace"), Qt::CaseInsensitive) == 0) { 0782 token = Parser::Token_NAMESPACE; 0783 } else if (name.compare(QLatin1String("__namespace__"), Qt::CaseInsensitive) == 0) { 0784 token = Parser::Token_NAMESPACE_C; 0785 } else if (name.compare(QLatin1String("callable"), Qt::CaseInsensitive) == 0) { 0786 token = Parser::Token_CALLABLE; 0787 } else if (name.compare(QLatin1String("void"), Qt::CaseInsensitive) == 0) { 0788 token = Parser::Token_VOID; 0789 } else if (name.compare(QLatin1String("yield"), Qt::CaseInsensitive) == 0) { 0790 const QChar* lookAhead = it; 0791 int pos = m_curpos; 0792 while (pos < m_contentSize && lookAhead->isSpace()) { 0793 ++lookAhead; 0794 ++pos; 0795 } 0796 0797 auto nextToken = QString(); 0798 nextToken += * lookAhead; 0799 nextToken += * ++lookAhead; 0800 nextToken += * ++lookAhead; 0801 nextToken += * ++lookAhead; 0802 if (pos + 4 < m_contentSize && nextToken == QStringLiteral("from")) { 0803 m_curpos = pos + 4; 0804 token = Parser::Token_YIELD_FROM; 0805 } else { 0806 token = Parser::Token_YIELD; 0807 } 0808 } else { 0809 token = Parser::Token_STRING; 0810 } 0811 } 0812 break; 0813 } 0814 0815 case StringVariable: 0816 case String: 0817 case StringHeredoc: 0818 case StringBacktick: 0819 if ((state() == String || state(1) == String) && it->unicode() == '"') { 0820 token = Parser::Token_DOUBLE_QUOTE; 0821 if (state() == StringVariable) popState(); 0822 popState(); 0823 } else if ((state() == StringBacktick || state(1) == StringBacktick) && it->unicode() == '`') { 0824 token = Parser::Token_BACKTICK; 0825 if (state() == StringVariable) popState(); 0826 popState(); 0827 } else if ((state() == StringHeredoc || state(1) == StringHeredoc) && isHereNowDocEnd(it)) { 0828 token = Parser::Token_END_HEREDOC; 0829 m_curpos += m_hereNowDocIdentifier.length() - 1; 0830 if (state() == StringVariable) popState(); 0831 popState(); 0832 } else if (processVariable(it)) { 0833 token = Parser::Token_VARIABLE; 0834 if (state() != StringVariable) pushState(StringVariable); 0835 } else if (state() != StringVariable && it->unicode() == '$' && (it + 1)->unicode() == '{') { 0836 token = Parser::Token_DOLLAR_OPEN_CURLY_BRACES; 0837 m_curpos++; 0838 it += 2; 0839 //check if a valid variable follows 0840 if ((isValidVariableIdentifier(it) && !it->isDigit())) { 0841 pushState(StringVarname); 0842 } 0843 0844 } else if (state() == StringVariable && it->unicode() == '[') { 0845 token = Parser::Token_LBRACKET; 0846 pushState(StringVariableBracket); 0847 } else if (state() != StringVariable && it->unicode() == '{' && (it + 1)->unicode() == '$' 0848 && ((isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) || (it + 2)->unicode() == '{')) { 0849 token = Parser::Token_CURLY_OPEN; 0850 pushState(StringVariableCurly); 0851 } else if (state() == StringVariable 0852 && it->unicode() == '-' && (it + 1)->unicode() == '>' 0853 && isValidVariableIdentifier(it + 2) && !(it + 2)->isDigit()) { 0854 token = Parser::Token_OBJECT_OPERATOR; 0855 m_curpos++; 0856 pushState(StringVariableObjectOperator); 0857 } else { 0858 if (state() == StringVariable) popState(); 0859 token = Parser::Token_ENCAPSED_AND_WHITESPACE; 0860 int startPos = m_curpos; 0861 while (m_curpos < m_contentSize) { 0862 if (!isEscapedWithBackslash(it, m_curpos, startPos) && 0863 ((it->unicode() == '$' && (it + 1)->unicode() == '{') || 0864 (it->unicode() == '{' && (it + 1)->unicode() == '$' && isValidVariableIdentifier(it + 2)) || 0865 (it->unicode() == '$' && isValidVariableIdentifier(it + 1) && !(it + 1)->isDigit()))) { 0866 //variable is next ${var} or {$var} 0867 break; 0868 } 0869 if (state() == String && it->unicode() == '"' 0870 && !isEscapedWithBackslash(it, m_curpos, startPos)) { 0871 //end of string 0872 break; 0873 } 0874 if (state() == StringBacktick && it->unicode() == '`' 0875 && !isEscapedWithBackslash(it, m_curpos, startPos)) { 0876 //end of string 0877 break; 0878 } 0879 0880 if (it->unicode() == '\n') createNewline(m_curpos); 0881 m_curpos++; 0882 it++; 0883 0884 if (state() == StringHeredoc && (it - 1)->unicode() == '\n') { 0885 //check for end of heredoc (\nEOD;\n) 0886 if (state() == StringHeredoc && isHereNowDocEnd(it)) { 0887 break; 0888 } 0889 } 0890 } 0891 m_curpos--; 0892 } 0893 break; 0894 case StringNowdoc: 0895 if (isHereNowDocEnd(it)) { 0896 token = Parser::Token_END_NOWDOC; 0897 m_curpos += m_hereNowDocIdentifier.length() - 1; 0898 popState(); 0899 } else { 0900 token = Parser::Token_STRING; 0901 while (m_curpos < m_contentSize) { 0902 if (it->unicode() == '\n') createNewline(m_curpos); 0903 m_curpos++; 0904 it++; 0905 0906 if ((it - 1)->unicode() == '\n' && isHereNowDocEnd(it)) { 0907 //check for end of nowdoc (\nEOD;\n) 0908 break; 0909 } 0910 } 0911 m_curpos--; 0912 } 0913 break; 0914 case StringVariableBracket: 0915 if (it->unicode() == ']') { 0916 token = Parser::Token_RBRACKET; 0917 popState(); 0918 popState(); 0919 } else if (it->isDigit()) { 0920 token = Parser::Token_NUM_STRING; 0921 while (m_curpos < m_contentSize && it->isDigit()) { 0922 it++; 0923 m_curpos++; 0924 } 0925 m_curpos--; 0926 } else { 0927 token = Parser::Token_STRING; 0928 while (m_curpos < m_contentSize && (it->unicode() != ']')) { 0929 if (it->unicode() == '\n') createNewline(m_curpos); 0930 it++; 0931 m_curpos++; 0932 } 0933 m_curpos--; 0934 } 0935 break; 0936 case StringVariableObjectOperator: 0937 token = Parser::Token_STRING; 0938 while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) { 0939 it++; 0940 m_curpos++; 0941 } 0942 m_curpos--; 0943 popState(); 0944 if (state() == StringVariable) popState(); 0945 break; 0946 case StringVarname: 0947 popState(); 0948 pushState(StringVariableCurly); 0949 token = Parser::Token_STRING_VARNAME; 0950 while (m_curpos < m_contentSize && isValidVariableIdentifier(it)) { 0951 it++; 0952 m_curpos++; 0953 } 0954 m_curpos--; 0955 break; 0956 default: 0957 token = Parser::Token_INVALID; 0958 break; 0959 } 0960 if (m_curpos > m_contentSize) { 0961 m_tokenBegin = -1; 0962 m_tokenEnd = -1; 0963 return 0; 0964 } 0965 m_tokenEnd = m_curpos; 0966 m_curpos++; 0967 0968 if (m_haltCompiler) { 0969 //look for __halt_compiler(); and stop lexer there 0970 if (m_haltCompiler == 4) { 0971 token = 0; //EOF 0972 } else if (token == Parser::Token_WHITESPACE || token == Parser::Token_COMMENT || token == Parser::Token_DOC_COMMENT) { 0973 //ignore 0974 } else if (m_haltCompiler == 1 && token == Parser::Token_LPAREN) { 0975 m_haltCompiler++; 0976 } else if (m_haltCompiler == 2 && token == Parser::Token_RPAREN) { 0977 m_haltCompiler++; 0978 } else if (m_haltCompiler == 3 && token == Parser::Token_SEMICOLON) { 0979 m_haltCompiler++; 0980 } else { 0981 m_haltCompiler = 0; 0982 } 0983 } 0984 if (token == Parser::Token_HALT_COMPILER && !m_haltCompiler) { 0985 m_haltCompiler = 1; 0986 } 0987 return token; 0988 } 0989 0990 qint64 Lexer::tokenBegin() const 0991 { 0992 return m_tokenBegin; 0993 } 0994 0995 qint64 Lexer::tokenEnd() const 0996 { 0997 return m_tokenEnd; 0998 } 0999 1000 bool Lexer::isHereNowDocEnd(const QChar* it) 1001 { 1002 int identiferLen = m_hereNowDocIdentifier.length(); 1003 QString lineStart; 1004 for (int i = 0; i < identiferLen; i++) { 1005 if (m_curpos + i >= m_contentSize) break; 1006 lineStart.append(*(it + i)); 1007 } 1008 if (lineStart == m_hereNowDocIdentifier && 1009 ((it + identiferLen)->unicode() == '\n' 1010 || ((it + identiferLen)->unicode() == ';' && 1011 (it + identiferLen + 1)->unicode() == '\n'))) { 1012 return true; 1013 } 1014 return false; 1015 } 1016 1017 //used for strings, to check if " is escaped (\" is, \\" not) 1018 bool Lexer::isEscapedWithBackslash(const QChar* it, int curPos, int startPos) 1019 { 1020 int cnt = 0; 1021 it--; 1022 while (curPos > startPos && it->unicode() == '\\') { 1023 cnt++; 1024 it--; 1025 } 1026 return (cnt % 2) == 1; 1027 } 1028 1029 bool Lexer::processVariable(const QChar* it) 1030 { 1031 const QChar* c2 = it + 1; 1032 if (it->unicode() == '$' && (isValidVariableIdentifier(c2) && !c2->isDigit())) { 1033 it++; 1034 m_curpos++; 1035 while (m_curpos < m_contentSize 1036 && (isValidVariableIdentifier(it))) { 1037 it++; 1038 m_curpos++; 1039 } 1040 m_curpos--; 1041 return true; 1042 } else { 1043 return false; 1044 } 1045 } 1046 bool Lexer::isValidVariableIdentifier(const QChar* it) 1047 { 1048 return it->isLetter() || it->isDigit() || it->unicode() == '_' || it->unicode() > 0x7f; 1049 } 1050 1051 void Lexer::createNewline(int pos) 1052 { 1053 if (m_tokenStream) m_tokenStream->locationTable()->newline(pos); 1054 } 1055 1056 } 1057