File indexing completed on 2024-05-19 05:42:01

0001 // ct_lvtclp_codebase_db.m.cpp                                      -*-C++-*--
0002 
0003 /*
0004 // Copyright 2023 Codethink Ltd <codethink@codethink.co.uk>
0005 // SPDX-License-Identifier: Apache-2.0
0006 //
0007 // Licensed under the Apache License, Version 2.0 (the "License");
0008 // you may not use this file except in compliance with the License.
0009 // You may obtain a copy of the License at
0010 //
0011 //     http://www.apache.org/licenses/LICENSE-2.0
0012 //
0013 // Unless required by applicable law or agreed to in writing, software
0014 // distributed under the License is distributed on an "AS IS" BASIS,
0015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 // See the License for the specific language governing permissions and
0017 // limitations under the License.
0018 */
0019 
0020 #include <ct_lvtclp_compilerutil.h>
0021 #include <ct_lvtclp_cpp_tool.h>
0022 #ifdef CT_ENABLE_FORTRAN_SCANNER
0023 #include <fortran/ct_lvtclp_fortran_c_interop.h>
0024 #include <fortran/ct_lvtclp_fortran_tool.h>
0025 #endif
0026 
0027 #include <clang/Tooling/JSONCompilationDatabase.h>
0028 #include <ct_lvtmdb_functionobject.h>
0029 #include <ct_lvtmdb_soci_reader.h>
0030 
0031 #include <algorithm>
0032 #include <cstdlib>
0033 #include <filesystem>
0034 #include <string>
0035 #include <vector>
0036 
0037 #include <QtGlobal>
0038 
0039 #ifdef Q_OS_WINDOWS
0040 #include <stdio.h>
0041 #include <windows.h>
0042 #else
0043 #include <csignal>
0044 #endif
0045 
0046 #include <QCommandLineOption>
0047 #include <QCommandLineParser>
0048 #include <QCoreApplication>
0049 #include <QJsonDocument>
0050 #include <QJsonObject>
0051 #include <QString>
0052 #include <QStringList>
0053 
0054 #include <ct_lvtmdb_objectstore.h>
0055 #include <ct_lvtmdb_soci_writer.h>
0056 
0057 #include <memory>
0058 
0059 #pragma push_macro("slots")
0060 #undef slots
0061 #include <pybind11/embed.h>
0062 #include <pybind11/pybind11.h>
0063 #pragma pop_macro("slots")
0064 
0065 namespace py = pybind11;
0066 struct PyDefaultGilReleasedContext {
0067     py::scoped_interpreter pyInterp;
0068     py::gil_scoped_release pyGilDefaultReleased;
0069 };
0070 
0071 namespace {
0072 
0073 struct CommandLineArgs {
0074     std::filesystem::path sourcePath;
0075     std::filesystem::path dbPath;
0076     std::vector<std::filesystem::path> compilationDbPaths;
0077     QJsonDocument compilationCommand;
0078     unsigned numThreads = 1;
0079     std::vector<std::string> ignoreList;
0080     std::vector<std::pair<std::string, std::string>> packageMappings;
0081     std::vector<std::filesystem::path> nonLakosianDirs;
0082     bool update = false;
0083     bool physicalOnly = false;
0084     bool silent = false;
0085     Codethink::lvtclp::CppTool::UseSystemHeaders useSystemHeaders =
0086         Codethink::lvtclp::CppTool::UseSystemHeaders::e_Query;
0087 };
0088 
0089 enum class CommandLineParseResult {
0090     Ok,
0091     Error,
0092     Help,
0093     Query,
0094 };
0095 
0096 CommandLineParseResult parseCommandLine(QCommandLineParser& parser, CommandLineArgs& args, std::string& errorMessage)
0097 {
0098     parser.setApplicationDescription("Build code database");
0099 
0100     const QCommandLineOption outputFile({"output", "o"}, "Output database file", "OUTPUT_FILE", "");
0101     const QCommandLineOption compileCommandsJson(
0102         "compile-commands-json",
0103         "Path to the compile_commands.json file from cmake. incompatible with compile-command option.",
0104         "COMPILE_COMMANDS",
0105         "");
0106     const QCommandLineOption compileCommand(
0107         "compile-command",
0108         "A single json-object of the compile_commands.json file, generating a single "
0109         "object file. Incompatible with compile-commands-json option."
0110         "It must contain the keys directory, command, file and output.",
0111         "COMPILE_COMMANDS_OBJ",
0112         "");
0113 
0114     const QCommandLineOption helpOption = parser.addHelpOption();
0115     const QCommandLineOption sourcePath("source-path", "Path for source code", "SOURCE_PATH", "");
0116     const QCommandLineOption numThreads("j", "Number of threads to use", "NUM_THREADS", "1");
0117     const QCommandLineOption ignoreList(
0118         "ignore",
0119         "Ignore file paths matching this glob pattern. This may be specified more than once.",
0120         "IGNORES",
0121         {"*.t.cpp"});
0122     const QCommandLineOption pkgmap("pkgmap",
0123                                     "Maps regex of places to package names so that they have meaningful packages in "
0124                                     "the generated database. e.g.: \"/llvm*/\":\"LLVM\"",
0125                                     "PKGMAP",
0126                                     {});
0127     const QCommandLineOption nonlakosianDirs("non-lakosian",
0128                                              "Treat any package inside this directory as part of the \"non-lakosian\" "
0129                                              "group. This may be specified more than once.",
0130                                              "NON-LAKOSIANS",
0131                                              {});
0132     const QCommandLineOption update("update", "updates an existing database file");
0133     const QCommandLineOption replace("replace", "replaces an existing database file");
0134     const QCommandLineOption physicalOnly("physical-only", "Only look for physical entities and relationships");
0135     const QCommandLineOption silent("silent", "supress stdout");
0136     const QCommandLineOption queryHeaders(
0137         "query-system-headers",
0138         "Query if we need system headers. the return code will be 0 for no and 1 for yes.");
0139 
0140     const QCommandLineOption useSystemHeaders(
0141         {QStringLiteral("use-system-headers")},
0142         "Asks clang to look for system headers. Must be checked beforehand with the --query-system-headers call. "
0143         "defaults to `Query`, meaning that we don't know if we have the system headers and we will look for them in "
0144         "the system. Possible values are yes/no/query",
0145         "USE_SYSTEM_HEADERS",
0146         "query");
0147 
0148     parser.addOptions({outputFile,
0149                        compileCommandsJson,
0150                        compileCommand,
0151                        sourcePath,
0152                        numThreads,
0153                        ignoreList,
0154                        pkgmap,
0155                        nonlakosianDirs,
0156                        update,
0157                        replace,
0158                        physicalOnly,
0159                        silent,
0160                        queryHeaders,
0161                        useSystemHeaders});
0162 
0163     if (!parser.parse(QCoreApplication::arguments())) {
0164         errorMessage = qPrintable(parser.errorText());
0165         return CommandLineParseResult::Error;
0166     }
0167 
0168     if (parser.isSet(helpOption)) {
0169         return CommandLineParseResult::Help;
0170     }
0171 
0172     if (parser.isSet(queryHeaders)) {
0173         return CommandLineParseResult::Query;
0174     }
0175 
0176     args.dbPath = parser.value(outputFile).toStdString();
0177     const auto compileCommands = parser.values(compileCommandsJson);
0178 
0179     args.compilationDbPaths.reserve(compileCommands.size());
0180     std::transform(compileCommands.begin(),
0181                    compileCommands.end(),
0182                    std::back_inserter(args.compilationDbPaths),
0183                    [](const QString& str) {
0184                        return std::filesystem::path(str.toStdString());
0185                    });
0186 
0187     for (const std::filesystem::path& compDbPath : args.compilationDbPaths) {
0188         if (!std::filesystem::is_regular_file(compDbPath)) {
0189             errorMessage = compDbPath.string() + " is not a file";
0190             return CommandLineParseResult::Error;
0191         }
0192     }
0193 
0194     // number of threads
0195     QString numThreadsStr = parser.value(numThreads);
0196     bool convOk;
0197     args.numThreads = numThreadsStr.toUInt(&convOk);
0198     if (!convOk) {
0199         errorMessage = '\'' + numThreadsStr.toStdString() + "' is not an unsigned integer";
0200         return CommandLineParseResult::Error;
0201     }
0202     if (args.numThreads < 1) {
0203         args.numThreads = 1;
0204     }
0205 
0206     // ignore list
0207     QStringList ignores = parser.values(ignoreList);
0208     std::transform(ignores.begin(), ignores.end(), std::back_inserter(args.ignoreList), [](const QString& ignore) {
0209         return ignore.toStdString();
0210     });
0211 
0212     // Package mapping
0213     QStringList pkgmaps = parser.values(pkgmap);
0214     for (auto const& p : pkgmaps) {
0215         auto mapAsString = p.toStdString();
0216         auto separatorPos = mapAsString.find(':');
0217         if (separatorPos == std::string::npos) {
0218             errorMessage =
0219                 "Unexpected package mapping: '" + mapAsString + R"('. Expected format = "path_regex":"PkgName".)";
0220             return CommandLineParseResult::Error;
0221         }
0222 
0223         auto pathRegex = mapAsString.substr(0, separatorPos);
0224         auto pkgName = mapAsString.substr(separatorPos + 1, mapAsString.size());
0225         args.packageMappings.emplace_back(std::make_pair(pathRegex, pkgName));
0226     }
0227 
0228     // non-lakosian dirs
0229     QStringList nonlakosians = parser.values(nonlakosianDirs);
0230     std::transform(nonlakosians.begin(),
0231                    nonlakosians.end(),
0232                    std::back_inserter(args.nonLakosianDirs),
0233                    [](const QString& dir) {
0234                        return dir.toStdString();
0235                    });
0236 
0237     // incremental update
0238     args.update = parser.isSet(update);
0239     if (args.update && parser.isSet(replace)) {
0240         errorMessage = "--update and --replace cannot be set together";
0241         return CommandLineParseResult::Error;
0242     }
0243 
0244     if (std::filesystem::is_regular_file(args.dbPath)) {
0245         if (parser.isSet(replace)) {
0246             if (!std::filesystem::remove(args.dbPath)) {
0247                 errorMessage = "Error removing " + args.dbPath.string();
0248                 return CommandLineParseResult::Error;
0249             }
0250         } else if (!args.update) {
0251             // database exists but we aren't overwriting
0252             errorMessage = args.dbPath.string() + " already exists. Try --update or --replace";
0253             return CommandLineParseResult::Error;
0254         }
0255     }
0256 
0257     if (parser.isSet(compileCommand)) {
0258         QJsonParseError possibleError{};
0259         args.compilationCommand = QJsonDocument::fromJson(parser.value(compileCommand).toLocal8Bit(), &possibleError);
0260         if (possibleError.error != QJsonParseError::NoError) {
0261             errorMessage = possibleError.errorString().toStdString();
0262             return CommandLineParseResult::Error;
0263         }
0264     }
0265 
0266     if (!parser.isSet(useSystemHeaders)) {
0267         args.useSystemHeaders = Codethink::lvtclp::CppTool::UseSystemHeaders::e_Query;
0268     } else {
0269         const QString val = parser.value(useSystemHeaders).toLower();
0270         args.useSystemHeaders = val == "yes" ? Codethink::lvtclp::CppTool::UseSystemHeaders::e_Yes
0271             : val == "no"                    ? Codethink::lvtclp::CppTool::UseSystemHeaders::e_No
0272                                              : Codethink::lvtclp::CppTool::UseSystemHeaders::e_Query;
0273     }
0274 
0275     // physicalOnly
0276     args.physicalOnly = parser.isSet(physicalOnly);
0277     args.silent = parser.isSet(silent);
0278     args.sourcePath = parser.value(sourcePath).toStdString();
0279 
0280     return CommandLineParseResult::Ok;
0281 }
0282 
0283 } // namespace
0284 
0285 static void setupPath(char **argv)
0286 {
0287     const std::filesystem::path argv0(argv[0]);
0288     const std::filesystem::path appimagePath = argv0.parent_path();
0289 
0290     qputenv("CT_LVT_BINDIR", QByteArray::fromStdString(appimagePath.string()));
0291 }
0292 
0293 cpp::result<clang::tooling::CompileCommand, std::string> fromJson(const QJsonDocument& doc)
0294 {
0295     const auto obj = doc.object();
0296 
0297     // validate the keys;
0298     if (!obj.contains("directory")) {
0299         return cpp::fail("Missing directory entry on the json field");
0300     }
0301     if (!obj.contains("command")) {
0302         return cpp::fail("Missing command entry on the json field");
0303     }
0304     if (!obj.contains("file")) {
0305         return cpp::fail("Missing file entry on the json field");
0306     }
0307     if (!obj.contains("output")) {
0308         return cpp::fail("Missing output entry on the json field");
0309     }
0310 
0311     std::string dir = obj["directory"].toString().toStdString();
0312     std::string file = obj["file"].toString().toStdString();
0313     std::string output = obj["output"].toString().toStdString();
0314 
0315 #if (QT_VERSION >= QT_VERSION_CHECK(5, 14, 0))
0316     QStringList commands = obj["command"].toString().split(" ", Qt::SkipEmptyParts);
0317 #else
0318     QStringList commands = obj["command"].toString().split(" ", QString::SkipEmptyParts);
0319 #endif
0320 
0321     std::vector<std::string> commandLine;
0322     for (const auto& command : commands) {
0323         commandLine.push_back(command.toStdString());
0324     }
0325 
0326     clang::tooling::CompileCommand cmd;
0327     cmd.Directory = dir;
0328     cmd.CommandLine = commandLine;
0329     cmd.Filename = file;
0330     cmd.Output = output;
0331 
0332     return cmd;
0333 }
0334 
0335 #ifdef Q_OS_WINDOWS
0336 BOOL WINAPI CtrlHandler(DWORD fdwCtrlType)
0337 {
0338     switch (fdwCtrlType) {
0339     case CTRL_C_EVENT:
0340         exit(1);
0341 
0342     case CTRL_CLOSE_EVENT:
0343         exit(1);
0344     default:
0345         return false;
0346     }
0347     return false;
0348 }
0349 #else
0350 void signal_callback_handler(int signum)
0351 {
0352     exit(signum);
0353 }
0354 #endif
0355 
0356 int main(int argc, char **argv)
0357 {
0358     using namespace Codethink::lvtclp;
0359 
0360     QCoreApplication app(argc, argv);
0361 #ifdef Q_OS_WINDOWS
0362     SetConsoleCtrlHandler(CtrlHandler, TRUE);
0363 #else
0364     (void) signal(SIGINT, signal_callback_handler);
0365 #endif
0366 
0367     setupPath(argv);
0368     PyDefaultGilReleasedContext _pyDefaultGilReleasedContext;
0369 
0370     QCommandLineParser parser;
0371     CommandLineArgs args;
0372     std::string errorMessage;
0373 
0374     switch (parseCommandLine(parser, args, errorMessage)) {
0375     case CommandLineParseResult::Ok:
0376         break;
0377     case CommandLineParseResult::Error:
0378         if (!errorMessage.empty()) {
0379             std::cerr << errorMessage << "\n\n";
0380         }
0381         parser.showHelp();
0382         Q_UNREACHABLE();
0383     case CommandLineParseResult::Help:
0384         parser.showHelp();
0385         Q_UNREACHABLE();
0386     case CommandLineParseResult::Query:
0387         const bool useSystemHeaders = CompilerUtil::weNeedSystemHeaders();
0388         std::cout << "We need system headers? " << (useSystemHeaders ? "yes" : "no") << "\n";
0389         return useSystemHeaders ? 1 : 0;
0390     }
0391 
0392     // using QString for the `.endsWith`.
0393     // TODO: remove this when we move to c++20.
0394     const QString dbPath = QString::fromStdString(args.dbPath.string());
0395     if (!dbPath.endsWith(".db")) {
0396         args.dbPath += ".db";
0397     }
0398     if (!args.sourcePath.empty()) {
0399         if (!exists(args.sourcePath)) {
0400             std::cerr << "Given source path doesn't exist: '" << args.sourcePath.string() << "'\n";
0401             return EXIT_FAILURE;
0402         }
0403         args.sourcePath = std::filesystem::canonical(args.sourcePath).string();
0404     }
0405 
0406     if (!args.packageMappings.empty()) {
0407         std::cout << "Using the following mapping regexes:\n";
0408         for (auto const& [k, v] : args.packageMappings) {
0409             std::cout << "- " << k << " => " << v << "\n";
0410         }
0411     }
0412 
0413     if (args.compilationCommand.isObject() && !args.compilationDbPaths.empty()) {
0414         std::cerr << "Choose only a compile command or the compile-commands.json file.\n";
0415         return EXIT_FAILURE;
0416     }
0417 
0418     if (args.compilationCommand.isObject() && args.numThreads != 1) {
0419         std::cout << "Multiple threads are ignored in this run.\n";
0420         return EXIT_FAILURE;
0421     }
0422 
0423     auto compileCommand = fromJson(args.compilationCommand);
0424     if (compileCommand.has_error() && args.compilationDbPaths.empty()) {
0425         std::cerr << "Invalid compile commands passed.\n";
0426         return EXIT_FAILURE;
0427     }
0428 
0429     auto sharedObjectStore = std::make_shared<Codethink::lvtmdb::ObjectStore>();
0430     auto clang_tool = !args.compilationDbPaths.empty() ? std::make_unique<CppTool>(args.sourcePath,
0431                                                                                    args.compilationDbPaths,
0432                                                                                    args.dbPath,
0433                                                                                    args.numThreads,
0434                                                                                    args.ignoreList,
0435                                                                                    args.nonLakosianDirs,
0436                                                                                    args.packageMappings,
0437                                                                                    !args.silent)
0438                                                        : std::make_unique<CppTool>(args.sourcePath,
0439                                                                                    compileCommand.value(),
0440                                                                                    args.dbPath,
0441                                                                                    args.ignoreList,
0442                                                                                    args.nonLakosianDirs,
0443                                                                                    args.packageMappings,
0444                                                                                    !args.silent);
0445     clang_tool->setSharedMemDb(sharedObjectStore);
0446     clang_tool->setUseSystemHeaders(args.useSystemHeaders);
0447 
0448 #ifdef CT_ENABLE_FORTRAN_SCANNER
0449     auto flang_tool = fortran::Tool::fromCompileCommands(args.compilationDbPaths[0]);
0450     flang_tool->setSharedMemDb(sharedObjectStore);
0451     const bool success = [&]() {
0452         if (args.physicalOnly) {
0453             auto clang_result = clang_tool->runPhysical();
0454             auto flang_result = flang_tool->runPhysical();
0455             return clang_result && flang_result;
0456         }
0457         auto clang_result = clang_tool->runFull();
0458         auto flang_result = flang_tool->runFull();
0459         return clang_result && flang_result;
0460     }();
0461     fortran::solveFortranToCInteropDeps(*sharedObjectStore);
0462 #else
0463     const bool success = [&]() {
0464         if (args.physicalOnly) {
0465             auto clang_result = clang_tool->runPhysical();
0466             return clang_result;
0467         }
0468         auto clang_result = clang_tool->runFull();
0469         return clang_result;
0470     }();
0471 #endif
0472 
0473     if (!success) {
0474         std::cerr << "Error generating database\n";
0475         return EXIT_FAILURE;
0476     }
0477 
0478     // Currently the call to `tool->runPhysical` and `tool->runFull` are
0479     // already saving data to a DB, and because of that acessing the DataWiter
0480     // as we are doing right now is way more slow than it should.
0481     // The database stored by the tool is in memory, and we need to dump
0482     // to disk, so we can ignore it - and use the DataWriter to fetch
0483     // information from the tool and dump *that* info to disk.
0484     {
0485         Codethink::lvtmdb::SociWriter writer;
0486         if (!writer.createOrOpen(args.dbPath.string())) {
0487             std::cerr << "Error saving database file to disk\n";
0488             return EXIT_FAILURE;
0489         }
0490         sharedObjectStore->writeToDatabase(writer);
0491     }
0492 
0493     return EXIT_SUCCESS;
0494 }