File indexing completed on 2024-05-19 15:42:37

0001 /*
0002     SPDX-FileCopyrightText: 2013-2015 Sven Brauch <mail@svenbrauch.de>
0003     SPDX-FileCopyrightText: 2016-2017 Francis Herne <mail@flherne.uk>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "rangefixvisitor.h"
0009 
0010 namespace Python {
0011 
0012 class NextAstFindVisitor : public AstDefaultVisitor {
0013 public:
0014     KTextEditor::Cursor findNext(Python::Ast* node) {
0015         m_root = node;
0016         auto parent = node;
0017         while ( parent->parent && parent->parent->isExpression() ) {
0018             parent = parent->parent;
0019         }
0020         visitNode(parent);
0021 
0022         while ( ! m_next.isValid() && parent->parent ) {
0023             // no next expression found in that statement, advance to the next statement
0024             parent = parent->parent;
0025             visitNode(parent);
0026         }
0027 
0028         return m_next;
0029     };
0030     void visitNode(Python::Ast* node) override {
0031         if ( ! node ) {
0032             return;
0033         }
0034         AstDefaultVisitor::visitNode(node);
0035         if ( node->start() > m_root->start() && ! node->isChildOf(m_root) ) {
0036             m_next = (m_next < node->start() && m_next.isValid()) ? m_next : node->start();
0037         }
0038     }
0039 
0040 private:
0041     KTextEditor::Cursor m_next{-1, -1};
0042     Ast* m_root;
0043 };
0044 
0045 
0046 //BEGIN RangeFixVisitor
0047 void RangeFixVisitor::visitNode(Ast* node) {
0048     AstDefaultVisitor::visitNode(node);
0049     if ( node && node->parent && node->parent->astType != Ast::AttributeAstType ) {
0050         if ( ( node->parent->endLine <= node->endLine && node->parent->endCol <= node->endCol )
0051                 || node->parent->endLine < node->endLine )
0052         {
0053             node->parent->endLine = node->endLine;
0054             node->parent->endCol = node->endCol;
0055         }
0056     }
0057 };
0058 
0059 void RangeFixVisitor::visitCode(CodeAst* node) {
0060     node->startLine = node->startCol = 0;
0061     AstDefaultVisitor::visitCode(node);
0062 }
0063 
0064 void RangeFixVisitor::visitFunctionDefinition(FunctionDefinitionAst* node) {
0065     cutDefinitionPreamble(node->name, node->async ? "asyncdef" : "def");
0066     AstDefaultVisitor::visitFunctionDefinition(node);
0067 };
0068 
0069 void RangeFixVisitor::visitClassDefinition(ClassDefinitionAst* node) {
0070     cutDefinitionPreamble(node->name, "class");
0071     AstDefaultVisitor::visitClassDefinition(node);
0072 };
0073 
0074 void RangeFixVisitor::visitAttribute(AttributeAst* node) {
0075     // Work around the weird way to count columns in Python's AST module.
0076 
0077     // Find where the next expression (of any kind) behind this one starts
0078     NextAstFindVisitor v;
0079     auto next_start = v.findNext(node);
0080     if ( ! next_start.isValid() ) {
0081         // use end of document as reference
0082         next_start = {lines.size() - 1, lines.last().size() - 1};
0083     }
0084 
0085     // take only the portion of the line up to that next expression
0086     auto endLine = next_start.line();
0087     auto endCol = next_start.column();
0088     if ( ! (next_start > node->start()) ) {
0089         endLine = node->startLine;
0090         endCol = -1;
0091     }
0092 
0093     const QString& name(node->attribute->value);
0094 
0095     QString line;
0096     for ( int n = node->startLine,
0097                 pos = node->value->endCol + 1,
0098                 dotFound = false,
0099                 nameFound = false;
0100             n <= endLine; ++n, pos = 0 ) {
0101         line = lines.at(n);
0102         if ( n == endLine && endCol != -1 ) {
0103             // Never look at the next expression.
0104             line = line.left(endCol);
0105         }
0106         if ( !dotFound ) {
0107             // The real attr name can never be before a dot.
0108             // Nor can the start of a comment.
0109             // (Don't be misled by `foo["bar"].bar` or `foo["#"].bar`)
0110             pos = line.indexOf('.', pos);
0111             if ( pos == -1 ) continue;
0112             dotFound = true;
0113         }
0114         if ( !nameFound ) {
0115             // Track if the attr name has appeared at least once.
0116             // This helps avoid RangeFixVisitor::interpreting '#'s in strings as comments -
0117             //   there can never be a comment before the real attr name.
0118             pos = line.indexOf(name, pos + 1);
0119             if ( pos == -1 ) continue;
0120             nameFound = true;
0121         }
0122         if ( dotFound && nameFound &&
0123                 (pos = line.indexOf('#', pos + name.length())) != -1) {
0124             // Remove the comment after a '#' iff we're certain it can't
0125             //  be inside a string literal (e.g. `foo["#"].bar`).
0126             line = line.left(pos);
0127         }
0128         // Take the last occurrence, any others are in string literals.
0129         pos = line.lastIndexOf(name);
0130         if ( pos != -1 ) {
0131             node->startLine = n;
0132             node->startCol = pos;
0133         }
0134         // N.B. we do this for all lines, the last non-comment occurrence
0135         //  is the real one.
0136     }
0137     // This fails (only, AFAIK) in a very limited case:
0138     // If the value expression (`foo` in `foo.bar`) contains a dot, the
0139     //   attr name, _and_ a hash in that order (may not be consecutive),
0140     //   and the hash is on the same line as the real attr name,
0141     //   we wrongly interpret the hash as the start of a comment.
0142     // e.g `foo["...barrier#"].bar` will highlight part of the string.
0143 
0144     node->endLine = node->startLine;
0145     node->endCol = node->startCol + name.length() - 1;
0146     node->attribute->copyRange(node);
0147 
0148     AstDefaultVisitor::visitAttribute(node);
0149 };
0150 
0151 // alias for imports (import foo as bar, baz as bang)
0152 // no strings, brackets, or whatever are allowed here, so the "parser"
0153 // can be very straightforward.
0154 void RangeFixVisitor::visitImport(ImportAst* node) {
0155     AstDefaultVisitor::visitImport(node);
0156     int aliasIndex = 0;
0157     foreach ( AliasAst* alias, node->names ) {
0158         fixAlias(alias->name, alias->asName, node->startLine, aliasIndex);
0159         aliasIndex += 1;
0160     }
0161 };
0162 
0163 // alias for exceptions (except FooBarException as somethingterriblehappened: ...)
0164 void RangeFixVisitor::visitExceptionHandler(ExceptionHandlerAst* node) {
0165     AstDefaultVisitor::visitExceptionHandler(node);
0166     if ( ! node->name ) {
0167         return;
0168     }
0169     const QString& line = lines.at(node->startLine);
0170     const int end = line.count() - 1;
0171     int back = backtrackDottedName(line, end);
0172     node->name->startCol = end - back;
0173     node->name->endCol = end;
0174 }
0175 
0176 void RangeFixVisitor::visitString(Python::StringAst* node) {
0177     AstDefaultVisitor::visitString(node);
0178     auto match = findString.match(lines.at(node->startLine), node->startCol);
0179     if ( match.capturedLength() > 0 ) {
0180         node->endCol += match.capturedLength() - 1; // Ranges are inclusive.
0181     }
0182 }
0183 void RangeFixVisitor::visitBytes(Python::BytesAst* node) {
0184     AstDefaultVisitor::visitBytes(node);
0185     auto match = findString.match(lines.at(node->startLine), node->startCol + 1);
0186     if ( match.capturedLength() > 0 ) {
0187         node->endCol += match.capturedLength(); // -1 then +1, because of the 'b'.
0188     }
0189 }
0190 void RangeFixVisitor::visitFormattedValue(Python::FormattedValueAst * node) {
0191     AstDefaultVisitor::visitFormattedValue(node);
0192     auto match = findString.match(lines.at(node->startLine), node->startCol + 1);
0193     if ( match.capturedLength() > 0 ) {
0194         node->endCol += match.capturedLength();
0195     }
0196 }
0197 
0198 void RangeFixVisitor::visitNumber(Python::NumberAst* node) {
0199     AstDefaultVisitor::visitNumber(node);
0200     auto match = findNumber.match(lines.at(node->startLine), node->startCol);
0201     if ( match.capturedLength() > 0 ) {
0202         node->endCol += match.capturedLength() - 1; // Ranges are inclusive.
0203     }
0204 }
0205 
0206 // Add one column after the last child to cover the closing bracket: `[1,2,3]`
0207 // TODO This is still wrong if the last child is followed by parens or whitespace.
0208 // endCol matters most in single-line expressions, so this isn't a huge problem.
0209 void RangeFixVisitor::visitSubscript(Python::SubscriptAst* node) {
0210     AstDefaultVisitor::visitSubscript(node);
0211     node->endCol++;
0212 }
0213 void RangeFixVisitor::visitComprehension(Python::ComprehensionAst* node) {
0214     AstDefaultVisitor::visitComprehension(node);
0215     node->endCol++;
0216 }
0217 void RangeFixVisitor::visitList(Python::ListAst* node) {
0218     AstDefaultVisitor::visitList(node);
0219     node->endCol++;
0220 }
0221 void RangeFixVisitor::visitTuple(Python::TupleAst* node) {
0222     AstDefaultVisitor::visitTuple(node);
0223     node->endCol++;
0224 }
0225 
0226 // skip the decorators and the "def" at the beginning
0227 // of a class or function declaration and modify @arg node
0228 // example:
0229 //  @decorate(foo)
0230 //  @decorate(bar)
0231 //  class myclass(parent): pass
0232 // before: start of class->name is [0, 0]
0233 // after: start of class->name is [2, 5]
0234 // line continuation characters are not supported,
0235 // because code needing those in this case is not worth being supported.
0236 void RangeFixVisitor::cutDefinitionPreamble(Ast* fixNode, const QString& defKeyword) {
0237     if ( ! fixNode ) {
0238         return;
0239     }
0240     int currentLine = fixNode->startLine;
0241 
0242     // cut away decorators
0243     while ( currentLine < lines.size() ) {
0244         if ( lines.at(currentLine).trimmed().remove(' ').remove('\t').startsWith(defKeyword) ) {
0245             // it's not a decorator, so stop skipping lines.
0246             break;
0247         }
0248         currentLine += 1;
0249     }
0250 //         qDebug() << "FIX:" << fixNode->range();
0251     fixNode->startLine = currentLine;
0252     fixNode->endLine = currentLine;
0253 //         qDebug() << "FIXED:" << fixNode->range() << fixNode->astType;
0254 
0255     // cut away the "def" / "class"
0256     int currentColumn = -1;
0257     if ( currentLine > lines.size() ) {
0258         // whops?
0259         return;
0260     }
0261     const QString& lineData = lines.at(currentLine);
0262     bool keywordFound = false;
0263     while ( currentColumn < lineData.size() - 1 ) {
0264         currentColumn += 1;
0265         if ( lineData.at(currentColumn).isSpace() ) {
0266             // skip space at the beginning of the line
0267             continue;
0268         }
0269         else if ( keywordFound ) {
0270             // if the "def" / "class" was already found, and the current char is
0271             // non space, then this is indeed the start of the identifier we're looking for.
0272             break;
0273         }
0274         else {
0275             keywordFound = true;
0276             currentColumn += defKeyword.size();
0277         }
0278     }
0279     const int previousLength = fixNode->endCol - fixNode->startCol;
0280     fixNode->startCol = currentColumn;
0281     fixNode->endCol = currentColumn + previousLength;
0282 };
0283 
0284 int RangeFixVisitor::backtrackDottedName(const QString& data, const int start) {
0285     bool haveDot = true;
0286     bool previousWasSpace = true;
0287     for ( int i = start - 1; i >= 0; i-- ) {
0288         if ( data.at(i).isSpace() ) {
0289             previousWasSpace = true;
0290             continue;
0291         }
0292         if ( data.at(i) == ':' ) {
0293             // excepthandler
0294             continue;
0295         }
0296         if ( data.at(i) == '.' ) {
0297             haveDot = true;
0298         }
0299         else if ( haveDot ) {
0300             haveDot = false;
0301             previousWasSpace = false;
0302             continue;
0303         }
0304         if ( previousWasSpace && ! haveDot ) {
0305             return start-i-2;
0306         }
0307         previousWasSpace = false;
0308     }
0309     return 0;
0310 }
0311 
0312 void RangeFixVisitor::fixAlias(Ast* dotted, Ast* asname, const int startLine, int aliasIndex) {
0313     if ( ! asname && ! dotted ) {
0314         return;
0315     }
0316     QString line = lines.at(startLine);
0317     int lineno = startLine;
0318     for ( int i = 0; i < line.size(); i++ ) {
0319         const QChar& current = line.at(i);
0320         if ( current == '\\' ) {
0321             // line continuation character
0322             // splitting like "import foo as \ \n bar" is not supported.
0323             lineno += 1;
0324             line = lines.at(lineno);
0325             i = 0;
0326             continue;
0327         }
0328         if ( current == ',' ) {
0329             if ( aliasIndex == 0 ) {
0330                 // nothing found, continue below
0331                 line = line.left(i);
0332                 break;
0333             }
0334             // next alias expression
0335             aliasIndex -= 1;
0336         }
0337         if ( i > line.length() - 3 ) {
0338             continue;
0339         }
0340         if ( current.isSpace() && line.mid(i+1).startsWith("as") && ( line.at(i+3).isSpace() || line.at(i+3) == '\\' ) ) {
0341             // there's an "as"
0342             if ( aliasIndex == 0 ) {
0343                 // it's the one we're looking for
0344                 // find the expression
0345                 if ( dotted ) {
0346                     int dottedNameLength = backtrackDottedName(line, i);
0347                     dotted->startLine = lineno;
0348                     dotted->endLine = lineno;
0349                     dotted->startCol = i-dottedNameLength;
0350                     dotted->endCol = i;
0351                 }
0352                 // find the asname
0353                 if ( asname ) {
0354                     bool atStart = true;
0355                     int textStart = i+3;
0356                     for ( int j = i+3; j < line.size(); j++ ) {
0357                         if ( atStart && ! line.at(j).isSpace() ) {
0358                             atStart = false;
0359                             textStart = j;
0360                         }
0361                         if ( ! atStart && ( line.at(j).isSpace() || j == line.size() - 1 ) ) {
0362                             // found it
0363                             asname->startLine = lineno;
0364                             asname->endLine = lineno;
0365                             asname->startCol = textStart - 1;
0366                             asname->endCol = j;
0367                         }
0368                     }
0369                 }
0370                 return;
0371             }
0372         }
0373     }
0374     // no "as" found, use last dotted name in line
0375     const int end = line.count() - whitespaceAtEnd(line);
0376     int back = backtrackDottedName(line, end);
0377     dotted->startLine = lineno;
0378     dotted->endLine = lineno;
0379     dotted->startCol = end - back;
0380     dotted->endCol = end;
0381 };
0382 
0383 int RangeFixVisitor::whitespaceAtEnd(const QString& line) {
0384     for ( int i = 0; i < line.size(); i++ ) {
0385         if ( ! line.at(line.size() - i - 1).isSpace() ) {
0386             return i;
0387         }
0388     }
0389     return 0;
0390 };
0391 
0392 // FIXME This doesn't work for triple-quoted strings
0393 //  (it gives length 2, which is no worse than before).
0394 const QRegularExpression RangeFixVisitor::findString("\\G(['\"]).*?(?<!\\\\)\\g1");
0395 // Looser than the real spec, but since we know there *is* a valid number it finds the end ok.
0396 const QRegularExpression RangeFixVisitor::findNumber("\\G(?:[\\d_\\.bjoxBJOX]|[eE][+-]?)*");
0397 
0398 }