File indexing completed on 2024-05-19 15:42:34

0001 /*
0002     SPDX-FileCopyrightText: 2007 Andreas Pakulat <apaku@gmx.de>
0003     SPDX-FileCopyrightText: 2010-2011 Sven Brauch <svenbrauch@googlemail.com>
0004 
0005     SPDX-License-Identifier: LGPL-2.0-or-later
0006 */
0007 
0008 #include "astbuilder.h"
0009 #include "ast.h"
0010 
0011 #include <language/duchain/problem.h>
0012 #include <language/duchain/duchain.h>
0013 #include <language/editor/documentrange.h>
0014 
0015 #include <memory>
0016 
0017 #include "python_header.h"
0018 #include "asttransformer.h"
0019 #include "astdefaultvisitor.h"
0020 #include "rangefixvisitor.h"
0021 
0022 #include <QDebug>
0023 #include "parserdebug.h"
0024 
0025 using namespace KDevelop;
0026 
0027 namespace Python
0028 {
0029 
0030 QMutex AstBuilder::pyInitLock;
0031 
0032 QString PyUnicodeObjectToQString(PyObject* obj) {
0033     auto pyObjectCleanup = [](PyObject* o) { if (o) Py_DECREF(o); };
0034     const auto strOwner = std::unique_ptr<PyObject, decltype(pyObjectCleanup)>(PyObject_Str(obj), pyObjectCleanup);
0035     const auto str = strOwner.get();
0036     if (PyUnicode_READY(str) < 0) {
0037         qWarning("PyUnicode_READY(%p) returned false!", (void*)str);
0038         return QString();
0039     }
0040     const auto length = PyUnicode_GET_LENGTH(str);
0041     switch(PyUnicode_KIND(str)) {
0042         case PyUnicode_1BYTE_KIND:
0043             return QString::fromLatin1((const char*)PyUnicode_1BYTE_DATA(str), length);
0044         case PyUnicode_2BYTE_KIND:
0045             return QString::fromUtf16(PyUnicode_2BYTE_DATA(str), length);
0046         case PyUnicode_4BYTE_KIND:
0047             return QString::fromUcs4(PyUnicode_4BYTE_DATA(str), length);
0048     }
0049     qCritical("PyUnicode_KIND(%p) returned an unexpected value, this should not happen!", (void*)str);
0050     Q_UNREACHABLE();
0051 }
0052 
0053 struct PythonParser : private QMutexLocker
0054 {
0055     PyObject* m_parser_mod = nullptr;
0056     PyObject* m_parse_func = nullptr;
0057 
0058     PythonParser(QMutex& lock): QMutexLocker(&lock)
0059     {
0060         Py_InitializeEx(0);
0061         Q_ASSERT(Py_IsInitialized());
0062         m_parser_mod = PyImport_ImportModule("ast");
0063         Q_ASSERT(m_parser_mod); // parser import error
0064         m_parse_func = PyObject_GetAttrString(m_parser_mod, "parse");
0065         Q_ASSERT(m_parse_func); // parser function renamed?
0066     }
0067 
0068     // Call parser function and return the python ast.Module.
0069     // NOTE: The caller must DECREF the result
0070     PyObject* parse(QString const &source, QString const &filename) const
0071     {
0072         PyObject* args = PyTuple_New(3);
0073         PyTuple_SET_ITEM(args, 0, PyUnicode_FromString(source.toUtf8().data()));
0074         PyTuple_SET_ITEM(args, 1, PyUnicode_FromString(filename.toUtf8().data()));
0075         PyTuple_SET_ITEM(args, 2, PyUnicode_FromString("exec"));
0076         PyObject *result = PyObject_CallObject(m_parse_func, args);
0077         Py_DECREF(args);
0078         return result;
0079     }
0080 
0081     ~PythonParser()
0082     {
0083         if (Py_IsInitialized())
0084         {
0085             Py_XDECREF(m_parse_func);
0086             Py_XDECREF(m_parser_mod);
0087             Py_Finalize();
0088         }
0089     }
0090 };
0091 
0092 CodeAst::Ptr AstBuilder::parse(const QUrl& filename, QString &contents)
0093 {
0094     qCDebug(KDEV_PYTHON_PARSER) << " ====> AST     ====>     building abstract syntax tree for " << filename.path();
0095     
0096     Py_NoSiteFlag = 1;
0097     
0098     contents.append('\n');
0099     
0100     PythonParser py_parser(pyInitLock);
0101 
0102     PyObject* syntaxtree = py_parser.parse(contents, filename.fileName());
0103 
0104     if ( ! syntaxtree ) {
0105         qCDebug(KDEV_PYTHON_PARSER) << " ====< parse error, trying to fix";
0106 
0107         PyObject *exception, *value, *backtrace;
0108         PyErr_Fetch(&exception, &value, &backtrace);
0109         qCDebug(KDEV_PYTHON_PARSER) << "Error objects: " << exception << value << backtrace;
0110 
0111         if ( ! value ) {
0112             qCWarning(KDEV_PYTHON_PARSER) << "Internal parser error: exception value is null, aborting";
0113             return CodeAst::Ptr();
0114         }
0115         PyErr_NormalizeException(&exception, &value, &backtrace);
0116 
0117         if ( ! PyObject_IsInstance(value, PyExc_SyntaxError) ) {
0118             qCWarning(KDEV_PYTHON_PARSER) << "Exception was not a SyntaxError, aborting";
0119             return CodeAst::Ptr();
0120         }
0121         PyObject* errorMessage_str = PyObject_GetAttrString(value, "msg");
0122         PyObject* linenoobj = PyObject_GetAttrString(value, "lineno");
0123         PyObject* colnoobj = PyObject_GetAttrString(value, "offset");
0124 
0125         int lineno = PyLong_AsLong(linenoobj) - 1;
0126         int colno = PyLong_AsLong(colnoobj);
0127 
0128         ProblemPointer p(new Problem());
0129         KTextEditor::Cursor start(lineno, (colno-4 > 0 ? colno-4 : 0));
0130         KTextEditor::Cursor end(lineno, (colno+4 > 4 ? colno+4 : 4));
0131         KTextEditor::Range range(start, end);
0132         qCDebug(KDEV_PYTHON_PARSER) << "Problem range: " << range;
0133         DocumentRange location(IndexedString(filename.path()), range);
0134         p->setFinalLocation(location);
0135         p->setDescription(PyUnicodeObjectToQString(errorMessage_str));
0136         p->setSource(IProblem::Parser);
0137         m_problems.append(p);
0138         
0139         // try to recover.
0140         // Currently the following is tired:
0141         // * If the last non-space char before the error reported was ":", it's most likely an indent error.
0142         //   The common easy-to-fix and annoying indent error is "for item in foo: <EOF>". In that case, just add "pass" after the ":" token.
0143         // * If it's not, we will just comment the line with the error, fixing problems like "foo = <EOF>".
0144         // * If both fails, everything including the first non-empty line before the one with the error will be deleted.
0145         int len = contents.length();
0146         int currentLine = 0;
0147         QString currentLineContents;
0148         QChar c;
0149         QChar newline('\n');
0150         int emptySince = 0; int emptySinceLine = 0; int emptyLinesSince = 0; int emptyLinesSinceLine = 0;
0151         unsigned short currentLineIndent = 0;
0152         bool atLineBeginning = true;
0153         QList<unsigned short> indents;
0154         int errline = qMax(0, lineno);
0155         int currentLineBeginning = 0;
0156         for ( int i = 0; i < len; i++ ) {
0157             c = contents.at(i);
0158             if ( ! c.isSpace() ) {
0159                 emptySince = i;
0160                 emptySinceLine = currentLine;
0161                 atLineBeginning = false;
0162                 if ( indents.length() <= currentLine ) indents.append(currentLineIndent);
0163             }
0164             else if ( c == newline ) {
0165                 if ( currentLine == errline ) {
0166                     atLineBeginning = false;
0167                 }
0168                 else {
0169                     currentLine += 1;
0170                     currentLineBeginning = i+1;
0171                     // this line has had content, so reset the "empty lines since" counter
0172                     if ( ! atLineBeginning ) {
0173 //                         lastNonemptyLineBeginning = emptyLinesSince;
0174                         emptyLinesSince = i;
0175                         emptyLinesSinceLine = currentLine;
0176                     }
0177                     atLineBeginning = true;
0178                     if ( indents.length() <= currentLine ) indents.append(currentLineIndent);
0179                     currentLineIndent = 0;
0180                 }
0181             }
0182             else if ( atLineBeginning ) {
0183                 currentLineIndent += 1;
0184             }
0185             
0186             if ( currentLine == errline && ! atLineBeginning ) {
0187                 // if the last non-empty char before the error opens a new block, it's likely an "empty block" problem
0188                 // we can easily fix that by adding in a "pass" statement. However, we want to add that in the next line, if possible
0189                 // so context ranges for autocompletion stay intact.
0190                 if ( contents[emptySince] == QChar(':') ) {
0191                     qCDebug(KDEV_PYTHON_PARSER) << indents.length() << emptySinceLine + 1 << indents;
0192                     if ( indents.length() > emptySinceLine + 1 && indents.at(emptySinceLine) < indents.at(emptySinceLine + 1) ) {
0193                         qCDebug(KDEV_PYTHON_PARSER) << indents.at(emptySinceLine) << indents.at(emptySinceLine + 1);
0194                         contents.insert(emptyLinesSince + 1 + indents.at(emptyLinesSinceLine), "\tpass#");
0195                     }
0196                     else {
0197                         contents.insert(emptySince + 1, "\tpass#");
0198                     }
0199                 }
0200                 else if ( indents.length() >= currentLine && currentLine > 0 ) {
0201                     qCDebug(KDEV_PYTHON_PARSER) << indents << currentLine;
0202                     contents[i+1+indents.at(currentLine - 1)] = QChar('#');
0203                     contents.insert(i+1+indents.at(currentLine - 1), "pass");
0204                 }
0205                 break;
0206             }
0207         }
0208 
0209         syntaxtree = py_parser.parse(contents, filename.fileName());
0210         // 3rd try: discard everything after the last non-empty line, but only until the next block start
0211         currentLineBeginning = qMin(contents.length() - 1, currentLineBeginning);
0212         errline = qMax(0, qMin(indents.length()-1, errline));
0213         if ( ! syntaxtree ) {
0214             PyErr_Fetch(&exception, &value, &backtrace);
0215             qCDebug(KDEV_PYTHON_PARSER) << "Error objects: " << exception << value << backtrace;
0216 
0217             qCWarning(KDEV_PYTHON_PARSER) << "Discarding parts of the code to be parsed because of previous errors";
0218             qCDebug(KDEV_PYTHON_PARSER) << indents;
0219             int indentAtError = errline < indents.length() ? indents.at(errline): 0;
0220             QChar c;
0221             bool atLineBeginning = true;
0222             int currentIndent = -1;
0223             int currentLineBeginning_end = currentLineBeginning;
0224             int currentLineContentBeginning = currentLineBeginning;
0225             for ( int i = currentLineBeginning; i < len; i++ ) {
0226                 c = contents.at(i);
0227                 qCDebug(KDEV_PYTHON_PARSER) << c;
0228                 if ( c == '\n' ) {
0229                     if ( currentIndent <= indentAtError && currentIndent != -1 ) {
0230                         qCDebug(KDEV_PYTHON_PARSER) << "Start of error code: " << currentLineBeginning;
0231                         qCDebug(KDEV_PYTHON_PARSER) << "End of error block (current position): " << currentLineBeginning_end;
0232                         qCDebug(KDEV_PYTHON_PARSER) << "Length: " << currentLineBeginning_end - currentLineBeginning;
0233                         qCDebug(KDEV_PYTHON_PARSER) << "indent at error <> current indent:" << indentAtError << "<>" << currentIndent;
0234 //                         contents.remove(currentLineBeginning, currentLineBeginning_end-currentLineBeginning);
0235                         break;
0236                     }
0237                     contents.insert(currentLineContentBeginning - 1, "pass#");
0238                     i += 5;
0239                     i = qMin(i, contents.length());
0240                     len = contents.length();
0241                     atLineBeginning = true;
0242                     currentIndent = 0;
0243                     currentLineBeginning_end = i + 1;
0244                     currentLineContentBeginning = i + 1;
0245                     continue;
0246                 }
0247                 if ( ! c.isSpace() && atLineBeginning ) {
0248                     currentLineContentBeginning = i;
0249                     atLineBeginning = false;
0250                 }
0251                 if ( c.isSpace() && atLineBeginning ) currentIndent += 1;
0252             }
0253             qCDebug(KDEV_PYTHON_PARSER) << "This is what is left: " << contents;
0254             syntaxtree = py_parser.parse(contents, filename.fileName());
0255         }
0256         if ( ! syntaxtree ) {
0257             return CodeAst::Ptr(); // everything fails, so we abort.
0258         }
0259     }
0260     QString kind = PyUnicodeObjectToQString(PyObject_Repr(syntaxtree));
0261     qCDebug(KDEV_PYTHON_PARSER) << "Got syntax tree from python parser:" << kind;
0262 
0263     AstTransformer t;
0264     t.run(syntaxtree, filename.fileName().replace(".py", ""));
0265     Py_DECREF(syntaxtree);
0266 
0267     RangeFixVisitor fixVisitor(contents);
0268     fixVisitor.visitNode(t.ast);
0269 
0270     return CodeAst::Ptr(t.ast);
0271 }
0272 
0273 }
0274