File indexing completed on 2024-05-19 15:42:37
0001 /* 0002 SPDX-FileCopyrightText: 2013-2015 Sven Brauch <mail@svenbrauch.de> 0003 SPDX-FileCopyrightText: 2016-2017 Francis Herne <mail@flherne.uk> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "rangefixvisitor.h" 0009 0010 namespace Python { 0011 0012 class NextAstFindVisitor : public AstDefaultVisitor { 0013 public: 0014 KTextEditor::Cursor findNext(Python::Ast* node) { 0015 m_root = node; 0016 auto parent = node; 0017 while ( parent->parent && parent->parent->isExpression() ) { 0018 parent = parent->parent; 0019 } 0020 visitNode(parent); 0021 0022 while ( ! m_next.isValid() && parent->parent ) { 0023 // no next expression found in that statement, advance to the next statement 0024 parent = parent->parent; 0025 visitNode(parent); 0026 } 0027 0028 return m_next; 0029 }; 0030 void visitNode(Python::Ast* node) override { 0031 if ( ! node ) { 0032 return; 0033 } 0034 AstDefaultVisitor::visitNode(node); 0035 if ( node->start() > m_root->start() && ! node->isChildOf(m_root) ) { 0036 m_next = (m_next < node->start() && m_next.isValid()) ? m_next : node->start(); 0037 } 0038 } 0039 0040 private: 0041 KTextEditor::Cursor m_next{-1, -1}; 0042 Ast* m_root; 0043 }; 0044 0045 0046 //BEGIN RangeFixVisitor 0047 void RangeFixVisitor::visitNode(Ast* node) { 0048 AstDefaultVisitor::visitNode(node); 0049 if ( node && node->parent && node->parent->astType != Ast::AttributeAstType ) { 0050 if ( ( node->parent->endLine <= node->endLine && node->parent->endCol <= node->endCol ) 0051 || node->parent->endLine < node->endLine ) 0052 { 0053 node->parent->endLine = node->endLine; 0054 node->parent->endCol = node->endCol; 0055 } 0056 } 0057 }; 0058 0059 void RangeFixVisitor::visitCode(CodeAst* node) { 0060 node->startLine = node->startCol = 0; 0061 AstDefaultVisitor::visitCode(node); 0062 } 0063 0064 void RangeFixVisitor::visitFunctionDefinition(FunctionDefinitionAst* node) { 0065 cutDefinitionPreamble(node->name, node->async ? "asyncdef" : "def"); 0066 AstDefaultVisitor::visitFunctionDefinition(node); 0067 }; 0068 0069 void RangeFixVisitor::visitClassDefinition(ClassDefinitionAst* node) { 0070 cutDefinitionPreamble(node->name, "class"); 0071 AstDefaultVisitor::visitClassDefinition(node); 0072 }; 0073 0074 void RangeFixVisitor::visitAttribute(AttributeAst* node) { 0075 // Work around the weird way to count columns in Python's AST module. 0076 0077 // Find where the next expression (of any kind) behind this one starts 0078 NextAstFindVisitor v; 0079 auto next_start = v.findNext(node); 0080 if ( ! next_start.isValid() ) { 0081 // use end of document as reference 0082 next_start = {lines.size() - 1, lines.last().size() - 1}; 0083 } 0084 0085 // take only the portion of the line up to that next expression 0086 auto endLine = next_start.line(); 0087 auto endCol = next_start.column(); 0088 if ( ! (next_start > node->start()) ) { 0089 endLine = node->startLine; 0090 endCol = -1; 0091 } 0092 0093 const QString& name(node->attribute->value); 0094 0095 QString line; 0096 for ( int n = node->startLine, 0097 pos = node->value->endCol + 1, 0098 dotFound = false, 0099 nameFound = false; 0100 n <= endLine; ++n, pos = 0 ) { 0101 line = lines.at(n); 0102 if ( n == endLine && endCol != -1 ) { 0103 // Never look at the next expression. 0104 line = line.left(endCol); 0105 } 0106 if ( !dotFound ) { 0107 // The real attr name can never be before a dot. 0108 // Nor can the start of a comment. 0109 // (Don't be misled by `foo["bar"].bar` or `foo["#"].bar`) 0110 pos = line.indexOf('.', pos); 0111 if ( pos == -1 ) continue; 0112 dotFound = true; 0113 } 0114 if ( !nameFound ) { 0115 // Track if the attr name has appeared at least once. 0116 // This helps avoid RangeFixVisitor::interpreting '#'s in strings as comments - 0117 // there can never be a comment before the real attr name. 0118 pos = line.indexOf(name, pos + 1); 0119 if ( pos == -1 ) continue; 0120 nameFound = true; 0121 } 0122 if ( dotFound && nameFound && 0123 (pos = line.indexOf('#', pos + name.length())) != -1) { 0124 // Remove the comment after a '#' iff we're certain it can't 0125 // be inside a string literal (e.g. `foo["#"].bar`). 0126 line = line.left(pos); 0127 } 0128 // Take the last occurrence, any others are in string literals. 0129 pos = line.lastIndexOf(name); 0130 if ( pos != -1 ) { 0131 node->startLine = n; 0132 node->startCol = pos; 0133 } 0134 // N.B. we do this for all lines, the last non-comment occurrence 0135 // is the real one. 0136 } 0137 // This fails (only, AFAIK) in a very limited case: 0138 // If the value expression (`foo` in `foo.bar`) contains a dot, the 0139 // attr name, _and_ a hash in that order (may not be consecutive), 0140 // and the hash is on the same line as the real attr name, 0141 // we wrongly interpret the hash as the start of a comment. 0142 // e.g `foo["...barrier#"].bar` will highlight part of the string. 0143 0144 node->endLine = node->startLine; 0145 node->endCol = node->startCol + name.length() - 1; 0146 node->attribute->copyRange(node); 0147 0148 AstDefaultVisitor::visitAttribute(node); 0149 }; 0150 0151 // alias for imports (import foo as bar, baz as bang) 0152 // no strings, brackets, or whatever are allowed here, so the "parser" 0153 // can be very straightforward. 0154 void RangeFixVisitor::visitImport(ImportAst* node) { 0155 AstDefaultVisitor::visitImport(node); 0156 int aliasIndex = 0; 0157 foreach ( AliasAst* alias, node->names ) { 0158 fixAlias(alias->name, alias->asName, node->startLine, aliasIndex); 0159 aliasIndex += 1; 0160 } 0161 }; 0162 0163 // alias for exceptions (except FooBarException as somethingterriblehappened: ...) 0164 void RangeFixVisitor::visitExceptionHandler(ExceptionHandlerAst* node) { 0165 AstDefaultVisitor::visitExceptionHandler(node); 0166 if ( ! node->name ) { 0167 return; 0168 } 0169 const QString& line = lines.at(node->startLine); 0170 const int end = line.count() - 1; 0171 int back = backtrackDottedName(line, end); 0172 node->name->startCol = end - back; 0173 node->name->endCol = end; 0174 } 0175 0176 void RangeFixVisitor::visitString(Python::StringAst* node) { 0177 AstDefaultVisitor::visitString(node); 0178 auto match = findString.match(lines.at(node->startLine), node->startCol); 0179 if ( match.capturedLength() > 0 ) { 0180 node->endCol += match.capturedLength() - 1; // Ranges are inclusive. 0181 } 0182 } 0183 void RangeFixVisitor::visitBytes(Python::BytesAst* node) { 0184 AstDefaultVisitor::visitBytes(node); 0185 auto match = findString.match(lines.at(node->startLine), node->startCol + 1); 0186 if ( match.capturedLength() > 0 ) { 0187 node->endCol += match.capturedLength(); // -1 then +1, because of the 'b'. 0188 } 0189 } 0190 void RangeFixVisitor::visitFormattedValue(Python::FormattedValueAst * node) { 0191 AstDefaultVisitor::visitFormattedValue(node); 0192 auto match = findString.match(lines.at(node->startLine), node->startCol + 1); 0193 if ( match.capturedLength() > 0 ) { 0194 node->endCol += match.capturedLength(); 0195 } 0196 } 0197 0198 void RangeFixVisitor::visitNumber(Python::NumberAst* node) { 0199 AstDefaultVisitor::visitNumber(node); 0200 auto match = findNumber.match(lines.at(node->startLine), node->startCol); 0201 if ( match.capturedLength() > 0 ) { 0202 node->endCol += match.capturedLength() - 1; // Ranges are inclusive. 0203 } 0204 } 0205 0206 // Add one column after the last child to cover the closing bracket: `[1,2,3]` 0207 // TODO This is still wrong if the last child is followed by parens or whitespace. 0208 // endCol matters most in single-line expressions, so this isn't a huge problem. 0209 void RangeFixVisitor::visitSubscript(Python::SubscriptAst* node) { 0210 AstDefaultVisitor::visitSubscript(node); 0211 node->endCol++; 0212 } 0213 void RangeFixVisitor::visitComprehension(Python::ComprehensionAst* node) { 0214 AstDefaultVisitor::visitComprehension(node); 0215 node->endCol++; 0216 } 0217 void RangeFixVisitor::visitList(Python::ListAst* node) { 0218 AstDefaultVisitor::visitList(node); 0219 node->endCol++; 0220 } 0221 void RangeFixVisitor::visitTuple(Python::TupleAst* node) { 0222 AstDefaultVisitor::visitTuple(node); 0223 node->endCol++; 0224 } 0225 0226 // skip the decorators and the "def" at the beginning 0227 // of a class or function declaration and modify @arg node 0228 // example: 0229 // @decorate(foo) 0230 // @decorate(bar) 0231 // class myclass(parent): pass 0232 // before: start of class->name is [0, 0] 0233 // after: start of class->name is [2, 5] 0234 // line continuation characters are not supported, 0235 // because code needing those in this case is not worth being supported. 0236 void RangeFixVisitor::cutDefinitionPreamble(Ast* fixNode, const QString& defKeyword) { 0237 if ( ! fixNode ) { 0238 return; 0239 } 0240 int currentLine = fixNode->startLine; 0241 0242 // cut away decorators 0243 while ( currentLine < lines.size() ) { 0244 if ( lines.at(currentLine).trimmed().remove(' ').remove('\t').startsWith(defKeyword) ) { 0245 // it's not a decorator, so stop skipping lines. 0246 break; 0247 } 0248 currentLine += 1; 0249 } 0250 // qDebug() << "FIX:" << fixNode->range(); 0251 fixNode->startLine = currentLine; 0252 fixNode->endLine = currentLine; 0253 // qDebug() << "FIXED:" << fixNode->range() << fixNode->astType; 0254 0255 // cut away the "def" / "class" 0256 int currentColumn = -1; 0257 if ( currentLine > lines.size() ) { 0258 // whops? 0259 return; 0260 } 0261 const QString& lineData = lines.at(currentLine); 0262 bool keywordFound = false; 0263 while ( currentColumn < lineData.size() - 1 ) { 0264 currentColumn += 1; 0265 if ( lineData.at(currentColumn).isSpace() ) { 0266 // skip space at the beginning of the line 0267 continue; 0268 } 0269 else if ( keywordFound ) { 0270 // if the "def" / "class" was already found, and the current char is 0271 // non space, then this is indeed the start of the identifier we're looking for. 0272 break; 0273 } 0274 else { 0275 keywordFound = true; 0276 currentColumn += defKeyword.size(); 0277 } 0278 } 0279 const int previousLength = fixNode->endCol - fixNode->startCol; 0280 fixNode->startCol = currentColumn; 0281 fixNode->endCol = currentColumn + previousLength; 0282 }; 0283 0284 int RangeFixVisitor::backtrackDottedName(const QString& data, const int start) { 0285 bool haveDot = true; 0286 bool previousWasSpace = true; 0287 for ( int i = start - 1; i >= 0; i-- ) { 0288 if ( data.at(i).isSpace() ) { 0289 previousWasSpace = true; 0290 continue; 0291 } 0292 if ( data.at(i) == ':' ) { 0293 // excepthandler 0294 continue; 0295 } 0296 if ( data.at(i) == '.' ) { 0297 haveDot = true; 0298 } 0299 else if ( haveDot ) { 0300 haveDot = false; 0301 previousWasSpace = false; 0302 continue; 0303 } 0304 if ( previousWasSpace && ! haveDot ) { 0305 return start-i-2; 0306 } 0307 previousWasSpace = false; 0308 } 0309 return 0; 0310 } 0311 0312 void RangeFixVisitor::fixAlias(Ast* dotted, Ast* asname, const int startLine, int aliasIndex) { 0313 if ( ! asname && ! dotted ) { 0314 return; 0315 } 0316 QString line = lines.at(startLine); 0317 int lineno = startLine; 0318 for ( int i = 0; i < line.size(); i++ ) { 0319 const QChar& current = line.at(i); 0320 if ( current == '\\' ) { 0321 // line continuation character 0322 // splitting like "import foo as \ \n bar" is not supported. 0323 lineno += 1; 0324 line = lines.at(lineno); 0325 i = 0; 0326 continue; 0327 } 0328 if ( current == ',' ) { 0329 if ( aliasIndex == 0 ) { 0330 // nothing found, continue below 0331 line = line.left(i); 0332 break; 0333 } 0334 // next alias expression 0335 aliasIndex -= 1; 0336 } 0337 if ( i > line.length() - 3 ) { 0338 continue; 0339 } 0340 if ( current.isSpace() && line.mid(i+1).startsWith("as") && ( line.at(i+3).isSpace() || line.at(i+3) == '\\' ) ) { 0341 // there's an "as" 0342 if ( aliasIndex == 0 ) { 0343 // it's the one we're looking for 0344 // find the expression 0345 if ( dotted ) { 0346 int dottedNameLength = backtrackDottedName(line, i); 0347 dotted->startLine = lineno; 0348 dotted->endLine = lineno; 0349 dotted->startCol = i-dottedNameLength; 0350 dotted->endCol = i; 0351 } 0352 // find the asname 0353 if ( asname ) { 0354 bool atStart = true; 0355 int textStart = i+3; 0356 for ( int j = i+3; j < line.size(); j++ ) { 0357 if ( atStart && ! line.at(j).isSpace() ) { 0358 atStart = false; 0359 textStart = j; 0360 } 0361 if ( ! atStart && ( line.at(j).isSpace() || j == line.size() - 1 ) ) { 0362 // found it 0363 asname->startLine = lineno; 0364 asname->endLine = lineno; 0365 asname->startCol = textStart - 1; 0366 asname->endCol = j; 0367 } 0368 } 0369 } 0370 return; 0371 } 0372 } 0373 } 0374 // no "as" found, use last dotted name in line 0375 const int end = line.count() - whitespaceAtEnd(line); 0376 int back = backtrackDottedName(line, end); 0377 dotted->startLine = lineno; 0378 dotted->endLine = lineno; 0379 dotted->startCol = end - back; 0380 dotted->endCol = end; 0381 }; 0382 0383 int RangeFixVisitor::whitespaceAtEnd(const QString& line) { 0384 for ( int i = 0; i < line.size(); i++ ) { 0385 if ( ! line.at(line.size() - i - 1).isSpace() ) { 0386 return i; 0387 } 0388 } 0389 return 0; 0390 }; 0391 0392 // FIXME This doesn't work for triple-quoted strings 0393 // (it gives length 2, which is no worse than before). 0394 const QRegularExpression RangeFixVisitor::findString("\\G(['\"]).*?(?<!\\\\)\\g1"); 0395 // Looser than the real spec, but since we know there *is* a valid number it finds the end ok. 0396 const QRegularExpression RangeFixVisitor::findNumber("\\G(?:[\\d_\\.bjoxBJOX]|[eE][+-]?)*"); 0397 0398 }