File indexing completed on 2024-05-19 15:42:34
0001 /* 0002 SPDX-FileCopyrightText: 2007 Andreas Pakulat <apaku@gmx.de> 0003 SPDX-FileCopyrightText: 2010-2011 Sven Brauch <svenbrauch@googlemail.com> 0004 0005 SPDX-License-Identifier: LGPL-2.0-or-later 0006 */ 0007 0008 #include "astbuilder.h" 0009 #include "ast.h" 0010 0011 #include <language/duchain/problem.h> 0012 #include <language/duchain/duchain.h> 0013 #include <language/editor/documentrange.h> 0014 0015 #include <memory> 0016 0017 #include "python_header.h" 0018 #include "asttransformer.h" 0019 #include "astdefaultvisitor.h" 0020 #include "rangefixvisitor.h" 0021 0022 #include <QDebug> 0023 #include "parserdebug.h" 0024 0025 using namespace KDevelop; 0026 0027 namespace Python 0028 { 0029 0030 QMutex AstBuilder::pyInitLock; 0031 0032 QString PyUnicodeObjectToQString(PyObject* obj) { 0033 auto pyObjectCleanup = [](PyObject* o) { if (o) Py_DECREF(o); }; 0034 const auto strOwner = std::unique_ptr<PyObject, decltype(pyObjectCleanup)>(PyObject_Str(obj), pyObjectCleanup); 0035 const auto str = strOwner.get(); 0036 if (PyUnicode_READY(str) < 0) { 0037 qWarning("PyUnicode_READY(%p) returned false!", (void*)str); 0038 return QString(); 0039 } 0040 const auto length = PyUnicode_GET_LENGTH(str); 0041 switch(PyUnicode_KIND(str)) { 0042 case PyUnicode_1BYTE_KIND: 0043 return QString::fromLatin1((const char*)PyUnicode_1BYTE_DATA(str), length); 0044 case PyUnicode_2BYTE_KIND: 0045 return QString::fromUtf16(PyUnicode_2BYTE_DATA(str), length); 0046 case PyUnicode_4BYTE_KIND: 0047 return QString::fromUcs4(PyUnicode_4BYTE_DATA(str), length); 0048 } 0049 qCritical("PyUnicode_KIND(%p) returned an unexpected value, this should not happen!", (void*)str); 0050 Q_UNREACHABLE(); 0051 } 0052 0053 struct PythonParser : private QMutexLocker 0054 { 0055 PyObject* m_parser_mod = nullptr; 0056 PyObject* m_parse_func = nullptr; 0057 0058 PythonParser(QMutex& lock): QMutexLocker(&lock) 0059 { 0060 Py_InitializeEx(0); 0061 Q_ASSERT(Py_IsInitialized()); 0062 m_parser_mod = PyImport_ImportModule("ast"); 0063 Q_ASSERT(m_parser_mod); // parser import error 0064 m_parse_func = PyObject_GetAttrString(m_parser_mod, "parse"); 0065 Q_ASSERT(m_parse_func); // parser function renamed? 0066 } 0067 0068 // Call parser function and return the python ast.Module. 0069 // NOTE: The caller must DECREF the result 0070 PyObject* parse(QString const &source, QString const &filename) const 0071 { 0072 PyObject* args = PyTuple_New(3); 0073 PyTuple_SET_ITEM(args, 0, PyUnicode_FromString(source.toUtf8().data())); 0074 PyTuple_SET_ITEM(args, 1, PyUnicode_FromString(filename.toUtf8().data())); 0075 PyTuple_SET_ITEM(args, 2, PyUnicode_FromString("exec")); 0076 PyObject *result = PyObject_CallObject(m_parse_func, args); 0077 Py_DECREF(args); 0078 return result; 0079 } 0080 0081 ~PythonParser() 0082 { 0083 if (Py_IsInitialized()) 0084 { 0085 Py_XDECREF(m_parse_func); 0086 Py_XDECREF(m_parser_mod); 0087 Py_Finalize(); 0088 } 0089 } 0090 }; 0091 0092 CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents) 0093 { 0094 qCDebug(KDEV_PYTHON_PARSER) << " ====> AST ====> building abstract syntax tree for " << filename.path(); 0095 0096 Py_NoSiteFlag = 1; 0097 0098 contents.append('\n'); 0099 0100 PythonParser py_parser(pyInitLock); 0101 0102 PyObject* syntaxtree = py_parser.parse(contents, filename.fileName()); 0103 0104 if ( ! syntaxtree ) { 0105 qCDebug(KDEV_PYTHON_PARSER) << " ====< parse error, trying to fix"; 0106 0107 PyObject *exception, *value, *backtrace; 0108 PyErr_Fetch(&exception, &value, &backtrace); 0109 qCDebug(KDEV_PYTHON_PARSER) << "Error objects: " << exception << value << backtrace; 0110 0111 if ( ! value ) { 0112 qCWarning(KDEV_PYTHON_PARSER) << "Internal parser error: exception value is null, aborting"; 0113 return CodeAst::Ptr(); 0114 } 0115 PyErr_NormalizeException(&exception, &value, &backtrace); 0116 0117 if ( ! PyObject_IsInstance(value, PyExc_SyntaxError) ) { 0118 qCWarning(KDEV_PYTHON_PARSER) << "Exception was not a SyntaxError, aborting"; 0119 return CodeAst::Ptr(); 0120 } 0121 PyObject* errorMessage_str = PyObject_GetAttrString(value, "msg"); 0122 PyObject* linenoobj = PyObject_GetAttrString(value, "lineno"); 0123 PyObject* colnoobj = PyObject_GetAttrString(value, "offset"); 0124 0125 int lineno = PyLong_AsLong(linenoobj) - 1; 0126 int colno = PyLong_AsLong(colnoobj); 0127 0128 ProblemPointer p(new Problem()); 0129 KTextEditor::Cursor start(lineno, (colno-4 > 0 ? colno-4 : 0)); 0130 KTextEditor::Cursor end(lineno, (colno+4 > 4 ? colno+4 : 4)); 0131 KTextEditor::Range range(start, end); 0132 qCDebug(KDEV_PYTHON_PARSER) << "Problem range: " << range; 0133 DocumentRange location(IndexedString(filename.path()), range); 0134 p->setFinalLocation(location); 0135 p->setDescription(PyUnicodeObjectToQString(errorMessage_str)); 0136 p->setSource(IProblem::Parser); 0137 m_problems.append(p); 0138 0139 // try to recover. 0140 // Currently the following is tired: 0141 // * If the last non-space char before the error reported was ":", it's most likely an indent error. 0142 // The common easy-to-fix and annoying indent error is "for item in foo: <EOF>". In that case, just add "pass" after the ":" token. 0143 // * If it's not, we will just comment the line with the error, fixing problems like "foo = <EOF>". 0144 // * If both fails, everything including the first non-empty line before the one with the error will be deleted. 0145 int len = contents.length(); 0146 int currentLine = 0; 0147 QString currentLineContents; 0148 QChar c; 0149 QChar newline('\n'); 0150 int emptySince = 0; int emptySinceLine = 0; int emptyLinesSince = 0; int emptyLinesSinceLine = 0; 0151 unsigned short currentLineIndent = 0; 0152 bool atLineBeginning = true; 0153 QList<unsigned short> indents; 0154 int errline = qMax(0, lineno); 0155 int currentLineBeginning = 0; 0156 for ( int i = 0; i < len; i++ ) { 0157 c = contents.at(i); 0158 if ( ! c.isSpace() ) { 0159 emptySince = i; 0160 emptySinceLine = currentLine; 0161 atLineBeginning = false; 0162 if ( indents.length() <= currentLine ) indents.append(currentLineIndent); 0163 } 0164 else if ( c == newline ) { 0165 if ( currentLine == errline ) { 0166 atLineBeginning = false; 0167 } 0168 else { 0169 currentLine += 1; 0170 currentLineBeginning = i+1; 0171 // this line has had content, so reset the "empty lines since" counter 0172 if ( ! atLineBeginning ) { 0173 // lastNonemptyLineBeginning = emptyLinesSince; 0174 emptyLinesSince = i; 0175 emptyLinesSinceLine = currentLine; 0176 } 0177 atLineBeginning = true; 0178 if ( indents.length() <= currentLine ) indents.append(currentLineIndent); 0179 currentLineIndent = 0; 0180 } 0181 } 0182 else if ( atLineBeginning ) { 0183 currentLineIndent += 1; 0184 } 0185 0186 if ( currentLine == errline && ! atLineBeginning ) { 0187 // if the last non-empty char before the error opens a new block, it's likely an "empty block" problem 0188 // we can easily fix that by adding in a "pass" statement. However, we want to add that in the next line, if possible 0189 // so context ranges for autocompletion stay intact. 0190 if ( contents[emptySince] == QChar(':') ) { 0191 qCDebug(KDEV_PYTHON_PARSER) << indents.length() << emptySinceLine + 1 << indents; 0192 if ( indents.length() > emptySinceLine + 1 && indents.at(emptySinceLine) < indents.at(emptySinceLine + 1) ) { 0193 qCDebug(KDEV_PYTHON_PARSER) << indents.at(emptySinceLine) << indents.at(emptySinceLine + 1); 0194 contents.insert(emptyLinesSince + 1 + indents.at(emptyLinesSinceLine), "\tpass#"); 0195 } 0196 else { 0197 contents.insert(emptySince + 1, "\tpass#"); 0198 } 0199 } 0200 else if ( indents.length() >= currentLine && currentLine > 0 ) { 0201 qCDebug(KDEV_PYTHON_PARSER) << indents << currentLine; 0202 contents[i+1+indents.at(currentLine - 1)] = QChar('#'); 0203 contents.insert(i+1+indents.at(currentLine - 1), "pass"); 0204 } 0205 break; 0206 } 0207 } 0208 0209 syntaxtree = py_parser.parse(contents, filename.fileName()); 0210 // 3rd try: discard everything after the last non-empty line, but only until the next block start 0211 currentLineBeginning = qMin(contents.length() - 1, currentLineBeginning); 0212 errline = qMax(0, qMin(indents.length()-1, errline)); 0213 if ( ! syntaxtree ) { 0214 PyErr_Fetch(&exception, &value, &backtrace); 0215 qCDebug(KDEV_PYTHON_PARSER) << "Error objects: " << exception << value << backtrace; 0216 0217 qCWarning(KDEV_PYTHON_PARSER) << "Discarding parts of the code to be parsed because of previous errors"; 0218 qCDebug(KDEV_PYTHON_PARSER) << indents; 0219 int indentAtError = errline < indents.length() ? indents.at(errline): 0; 0220 QChar c; 0221 bool atLineBeginning = true; 0222 int currentIndent = -1; 0223 int currentLineBeginning_end = currentLineBeginning; 0224 int currentLineContentBeginning = currentLineBeginning; 0225 for ( int i = currentLineBeginning; i < len; i++ ) { 0226 c = contents.at(i); 0227 qCDebug(KDEV_PYTHON_PARSER) << c; 0228 if ( c == '\n' ) { 0229 if ( currentIndent <= indentAtError && currentIndent != -1 ) { 0230 qCDebug(KDEV_PYTHON_PARSER) << "Start of error code: " << currentLineBeginning; 0231 qCDebug(KDEV_PYTHON_PARSER) << "End of error block (current position): " << currentLineBeginning_end; 0232 qCDebug(KDEV_PYTHON_PARSER) << "Length: " << currentLineBeginning_end - currentLineBeginning; 0233 qCDebug(KDEV_PYTHON_PARSER) << "indent at error <> current indent:" << indentAtError << "<>" << currentIndent; 0234 // contents.remove(currentLineBeginning, currentLineBeginning_end-currentLineBeginning); 0235 break; 0236 } 0237 contents.insert(currentLineContentBeginning - 1, "pass#"); 0238 i += 5; 0239 i = qMin(i, contents.length()); 0240 len = contents.length(); 0241 atLineBeginning = true; 0242 currentIndent = 0; 0243 currentLineBeginning_end = i + 1; 0244 currentLineContentBeginning = i + 1; 0245 continue; 0246 } 0247 if ( ! c.isSpace() && atLineBeginning ) { 0248 currentLineContentBeginning = i; 0249 atLineBeginning = false; 0250 } 0251 if ( c.isSpace() && atLineBeginning ) currentIndent += 1; 0252 } 0253 qCDebug(KDEV_PYTHON_PARSER) << "This is what is left: " << contents; 0254 syntaxtree = py_parser.parse(contents, filename.fileName()); 0255 } 0256 if ( ! syntaxtree ) { 0257 return CodeAst::Ptr(); // everything fails, so we abort. 0258 } 0259 } 0260 QString kind = PyUnicodeObjectToQString(PyObject_Repr(syntaxtree)); 0261 qCDebug(KDEV_PYTHON_PARSER) << "Got syntax tree from python parser:" << kind; 0262 0263 AstTransformer t; 0264 t.run(syntaxtree, filename.fileName().replace(".py", "")); 0265 Py_DECREF(syntaxtree); 0266 0267 RangeFixVisitor fixVisitor(contents); 0268 fixVisitor.visitNode(t.ast); 0269 0270 return CodeAst::Ptr(t.ast); 0271 } 0272 0273 } 0274