qmakemanager/parser/qmakelexer.cpp

0001 /*
0002     SPDX-FileCopyrightText: 2007 Andreas Pakulat <apaku@gmx.de>
0003
0004     SPDX-License-Identifier: GPL-2.0-or-later
0005 */
0006
0007 #include "qmakelexer.h"
0008
0009 #include <QString>
0010 #include "qmakeparser.h"
0011 #include <kdev-pg-location-table.h>
0012 #include <kdev-pg-token-stream.h>
0013
0014 namespace QMake {
0015
0016 bool isIdentifierCharacter(QChar* c, bool canLookAhead)
0017 {
0018     return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '-'
0019             || c->unicode() == '$' || c->unicode() == '*'
0020             || (canLookAhead && (c + 1)->unicode() != '=' && (c->unicode() == '+')));
0021 }
0022
0023 bool isBeginIdentifierCharacter(QChar* c)
0024 {
0025     return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '$');
0026 }
0027
0028 bool isEndIdentifierCharacter(QChar* c)
0029 {
0030     return (c->isLetter() || c->isDigit() || c->unicode() == '_' || c->unicode() == '.' || c->unicode() == '$'
0031             || c->unicode() == '*');
0032 }
0033
0034 bool isCont(QChar* c)
0035 {
0036     if (c->unicode() == '\\') {
0037         c++;
0038         while (c->isSpace() && c->unicode() != '\n') {
0039             c++;
0040         }
0041         if (c->unicode() == '\n' || c->unicode() == '#') {
0042             return true;
0043         }
0044     }
0045     return false;
0046 }
0047
0048 Lexer::Lexer(Parser* _parser, QString content)
0049     : m_content(std::move(content))
0050     , m_parser(_parser)
0051     , m_curpos(0)
0052     , m_contentSize(m_content.size())
0053     , m_tokenBegin(0)
0054     , m_tokenEnd(0)
0055 {
0056     pushState(ErrorState);
0057     pushState(DefaultState);
0058 }
0059
0060 int Lexer::state() const
0061 {
0062     return mState.top();
0063 }
0064
0065 void Lexer::pushState(int state)
0066 {
0067     mState.push(state);
0068 }
0069
0070 void Lexer::popState()
0071 {
0072     mState.pop();
0073 }
0074
0075 int Lexer::nextTokenKind()
0076 {
0077     int token = Parser::Token_INVALID;
0078     if (m_curpos >= m_contentSize) {
0079         return 0;
0080     }
0081     QChar* it = m_content.data();
0082     it += m_curpos;
0083     switch (state()) {
0084     case VariableValueState:
0085         it = ignoreWhitespaceAndComment(it);
0086         m_tokenBegin = m_curpos;
0087         if (m_curpos < m_contentSize) {
0088             if (it->unicode() == '}') {
0089                 popState();
0090                 token = Parser::Token_RBRACE;
0091             } else if (it->unicode() == '\n') {
0092                 popState();
0093                 createNewline(m_curpos);
0094                 token = Parser::Token_NEWLINE;
0095             } else if (it->unicode() == '\\' && isCont(it)) {
0096                 pushState(ContState);
0097                 token = Parser::Token_CONT;
0098             } else if (it->unicode() == '"') {
0099                 it++;
0100                 m_curpos++;
0101                 QChar* lastit = it;
0102                 while ((it->unicode() != '"' || (lastit->unicode() == '\\' && it->unicode() == '"'))
0103                        && it->unicode() != '\n' && it->unicode() != '#' && !isCont(it) && m_curpos < m_contentSize) {
0104                     lastit = it;
0105                     it++;
0106                     m_curpos++;
0107                 }
0108                 if (it->unicode() != '"' && it->unicode() != '#') {
0109                     m_curpos--;
0110                 }
0111                 token = Parser::Token_VALUE;
0112                 if (it->unicode() == '#') {
0113                     m_tokenEnd = m_curpos - 1;
0114                     do {
0115                         it++;
0116                         m_curpos++;
0117                     } while (it->unicode() != '\n' && m_curpos < m_contentSize);
0118                     if (it->unicode() == '\n') {
0119                         m_curpos--;
0120                     }
0121                     return token;
0122                 }
0123             } else if (it->unicode() == '(') {
0124                 unsigned int bracecount = 0;
0125                 while ((it->unicode() != ';' || bracecount > 0) && it->unicode() != '\n' && !isCont(it)
0126                        && m_curpos < m_contentSize) {
0127                     if (it->unicode() == '(') {
0128                         bracecount++;
0129                     } else if (it->unicode() == ')' && bracecount > 0) {
0130                         bracecount--;
0131                     }
0132                     ++it;
0133                     ++m_curpos;
0134                 }
0135                 if (it->unicode() != ';') {
0136                     m_curpos--;
0137                 }
0138                 token = Parser::Token_VALUE;
0139             } else {
0140                 while (!it->isSpace() && !isCont(it) && it->unicode() != '#' && m_curpos < m_contentSize) {
0141                     it++;
0142                     m_curpos++;
0143                 }
0144                 m_curpos--;
0145                 token = Parser::Token_VALUE;
0146             }
0147         }
0148         break;
0149     case FunctionArgState:
0150         m_tokenBegin = m_curpos;
0151         if (it->unicode() == '\n') {
0152             createNewline(m_curpos);
0153             token = Parser::Token_NEWLINE;
0154         } else if (it->unicode() == '\\' && isCont(it)) {
0155             pushState(ContState);
0156             token = Parser::Token_CONT;
0157         } else if (it->unicode() == ',') {
0158             token = Parser::Token_COMMA;
0159         } else if (it->unicode() == ')') {
0160             popState();
0161             token = Parser::Token_RPAREN;
0162         } else {
0163             unsigned int parentCount = 0;
0164             while (parentCount > 0 || (it->unicode() != ')' && it->unicode() != ',' && m_curpos < m_contentSize)) {
0165                 if (it->unicode() == ')') {
0166                     parentCount--;
0167                 } else if (it->unicode() == '(') {
0168                     parentCount++;
0169                 }
0170                 ++it;
0171                 ++m_curpos;
0172             }
0173             m_curpos--;
0174             token = Parser::Token_VALUE;
0175         }
0176         break;
0177     case ContState:
0178         it = ignoreWhitespaceAndComment(it);
0179         m_tokenBegin = m_curpos;
0180         if (m_curpos < m_contentSize) {
0181             if (it->unicode() == '\n') {
0182                 createNewline(m_curpos);
0183                 token = Parser::Token_NEWLINE;
0184                 m_tokenEnd = m_curpos;
0185                 popState();
0186                 QChar* temp = it;
0187                 int newpos = m_curpos;
0188                 do {
0189                     temp++;
0190                     newpos++;
0191                     if (temp->unicode() == '#') {
0192                         while (temp->unicode() != '\n' && newpos < m_contentSize) {
0193                             temp++;
0194                             newpos++;
0195                         }
0196                         createNewline(m_curpos);
0197                         temp++;
0198                         m_curpos = newpos;
0199                         newpos++;
0200                     }
0201                 } while (m_curpos < m_contentSize && temp->isSpace() && temp->unicode() != '\n');
0202                 m_curpos++;
0203                 return token;
0204             }
0205         }
0206         break;
0207     case DefaultState:
0208         it = ignoreWhitespaceAndComment(it);
0209         m_tokenBegin = m_curpos;
0210         if (m_curpos < m_contentSize) {
0211             if (isBeginIdentifierCharacter(it)) {
0212                 token = Parser::Token_IDENTIFIER;
0213                 while (!it->isSpace() && isIdentifierCharacter(it, m_curpos + 1 < m_contentSize)
0214                        && m_curpos < m_contentSize) {
0215                     it++;
0216                     m_curpos++;
0217                 }
0218                 if (!isEndIdentifierCharacter((it - 1))) {
0219                     token = Parser::Token_INVALID;
0220                 } else if (m_content.midRef(m_tokenBegin, m_curpos - m_tokenBegin) == QLatin1String("else")) {
0221                     token = Parser::Token_ELSE;
0222                 }
0223                 m_curpos--;
0224             } else {
0225                 // Now the stuff that will generate a proper token
0226                 QChar* c2 = m_curpos < m_contentSize ? it + 1 : nullptr;
0227                 switch (it->unicode()) {
0228                 case '|':
0229                     token = Parser::Token_OR;
0230                     break;
0231                 case '!':
0232                     token = Parser::Token_EXCLAM;
0233                     break;
0234                 case '(':
0235                     pushState(FunctionArgState);
0236                     token = Parser::Token_LPAREN;
0237                     break;
0238                 case '{':
0239                     token = Parser::Token_LBRACE;
0240                     break;
0241                 case '}':
0242                     token = Parser::Token_RBRACE;
0243                     break;
0244                 case ':':
0245                     token = Parser::Token_COLON;
0246                     break;
0247                 case '~':
0248                     if (c2 && c2->unicode() == '=') {
0249                         pushState(VariableValueState);
0250                         m_curpos++;
0251                         token = Parser::Token_TILDEEQ;
0252                     }
0253                     break;
0254                 case '*':
0255                     if (c2 && c2->unicode() == '=') {
0256                         pushState(VariableValueState);
0257                         m_curpos++;
0258                         token = Parser::Token_STAREQ;
0259                     }
0260                     break;
0261                 case '-':
0262                     if (c2 && c2->unicode() == '=') {
0263                         pushState(VariableValueState);
0264                         m_curpos++;
0265                         token = Parser::Token_MINUSEQ;
0266                     }
0267                     break;
0268                 case '+':
0269                     if (c2 && c2->unicode() == '=') {
0270                         pushState(VariableValueState);
0271                         m_curpos++;
0272                         token = Parser::Token_PLUSEQ;
0273                     }
0274                     break;
0275                 case '=':
0276                     pushState(VariableValueState);
0277                     token = Parser::Token_EQUAL;
0278                     break;
0279                 case '\n':
0280                     createNewline(m_curpos);
0281                     token = Parser::Token_NEWLINE;
0282                     break;
0283                 default:
0284                     break;
0285                 }
0286             }
0287         }
0288         break;
0289     default:
0290         token = Parser::Token_INVALID;
0291         break;
0292     }
0293     if (m_curpos >= m_contentSize) {
0294         return 0;
0295     }
0296     m_tokenEnd = m_curpos;
0297     m_curpos++;
0298     return token;
0299 }
0300
0301 qint64 Lexer::tokenBegin() const
0302 {
0303     return m_tokenBegin;
0304 }
0305
0306 qint64 Lexer::tokenEnd() const
0307 {
0308     return m_tokenEnd;
0309 }
0310
0311 QChar* Lexer::ignoreWhitespaceAndComment(QChar* it)
0312 {
0313     // Ignore whitespace, but preserve the newline
0314     bool comment = false;
0315     while (m_curpos < m_contentSize && (it->isSpace() || comment || it->unicode() == '#') && it->unicode() != '\n') {
0316         if (it->unicode() == '#') {
0317             comment = true;
0318         }
0319         ++it;
0320         ++m_curpos;
0321     }
0322     return it;
0323 }
0324
0325 void Lexer::createNewline(int pos)
0326 {
0327     if (m_parser)
0328         m_parser->tokenStream->locationTable()->newline(pos);
0329 }
0330 }