File indexing completed on 2024-05-19 05:41:59

0001 // ct_lvtclp_clputil.cpp                                              -*-C++-*-
0002 
0003 /*
0004 // Copyright 2023 Codethink Ltd <codethink@codethink.co.uk>
0005 // SPDX-License-Identifier: Apache-2.0
0006 //
0007 // Licensed under the Apache License, Version 2.0 (the "License");
0008 // you may not use this file except in compliance with the License.
0009 // You may obtain a copy of the License at
0010 //
0011 //     http://www.apache.org/licenses/LICENSE-2.0
0012 //
0013 // Unless required by applicable law or agreed to in writing, software
0014 // distributed under the License is distributed on an "AS IS" BASIS,
0015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0016 // See the License for the specific language governing permissions and
0017 // limitations under the License.
0018 */
0019 
0020 #include <ct_lvtclp_clputil.h>
0021 
0022 #include <ct_lvtclp_componentutil.h>
0023 
0024 #include <ct_lvtmdb_componentobject.h>
0025 #include <ct_lvtmdb_functionobject.h>
0026 #include <ct_lvtmdb_objectstore.h>
0027 #include <ct_lvtmdb_packageobject.h>
0028 #include <ct_lvtmdb_typeobject.h>
0029 
0030 #include <ct_lvtclp_fileutil.h>
0031 #include <ct_lvtshr_stringhelpers.h>
0032 
0033 #include <clang/Tooling/JSONCompilationDatabase.h>
0034 #include <llvm/Support/FileSystem.h>
0035 #include <llvm/Support/GlobPattern.h>
0036 #include <pybind11/embed.h>
0037 #include <pybind11/functional.h>
0038 #include <pybind11/stl.h>
0039 
0040 #include <QCoreApplication>
0041 #include <QDir>
0042 
0043 #include <algorithm>
0044 #include <memory>
0045 #include <regex>
0046 #include <set>
0047 #include <thread>
0048 
0049 namespace {
0050 
0051 using namespace Codethink;
0052 
0053 lvtmdb::PackageObject *getSourcePackage(const std::string& qualifiedName,
0054                                         std::string name,
0055                                         std::string diskPath,
0056                                         lvtmdb::ObjectStore& memDb,
0057                                         lvtmdb::PackageObject *parent,
0058                                         lvtmdb::RepositoryObject *repository)
0059 // assumes memDb is already locked for writing
0060 {
0061     if (qualifiedName.empty()) {
0062         return nullptr;
0063     }
0064 
0065     return memDb.getOrAddPackage(qualifiedName, std::move(name), std::move(diskPath), parent, repository);
0066 }
0067 
0068 /**
0069  * Visual studio was giving compiler error with local lambda function. In order to solve the issue, the lambda has been
0070  * extracted to this struct - It is meant to be used in `getPackageForPath` only.
0071  */
0072 struct addPkgForSemPackRuleHelper {
0073     explicit addPkgForSemPackRuleHelper(lvtmdb::ObjectStore& memDb): memDb(memDb)
0074     {
0075     }
0076 
0077     void operator()(std::string const& qualifiedName,
0078                     std::optional<std::string> parentQualifiedName = std::nullopt,
0079                     std::optional<std::string> repositoryName = std::nullopt,
0080                     std::optional<std::string> path = std::nullopt)
0081     {
0082         auto *repository = repositoryName ? memDb.getOrAddRepository(*repositoryName, "") : nullptr;
0083         auto *parent = parentQualifiedName
0084             ? memDb.getOrAddPackage(*parentQualifiedName, *parentQualifiedName, "", nullptr, repository)
0085             : nullptr;
0086         memDb.getOrAddPackage(qualifiedName, qualifiedName, path ? *path : "", parent, repository);
0087     }
0088 
0089   private:
0090     lvtmdb::ObjectStore& memDb;
0091 };
0092 
0093 lvtmdb::PackageObject *getPackageForPath(const std::filesystem::path& path,
0094                                          lvtmdb::ObjectStore& memDb,
0095                                          const std::filesystem::path& prefix,
0096                                          const std::vector<std::filesystem::path>& nonLakosianDirs,
0097                                          const std::vector<std::pair<std::string, std::string>>& thirdPartyDirs)
0098 // assumes path is already normalised using ClpUtil::normalisePath
0099 // assumes memDb is already locked for writing
0100 {
0101     using namespace Codethink::lvtclp;
0102 
0103     // synthesise package from the directory containing the source file
0104     const std::filesystem::path pkgPath = path.parent_path();
0105 
0106     lvtmdb::PackageObject *pkg = memDb.getPackage(pkgPath.string());
0107     if (pkg) {
0108         return pkg;
0109     }
0110 
0111     auto addPkg = addPkgForSemPackRuleHelper{memDb};
0112     auto fullFilePathQString = QString::fromStdString((prefix / path).string());
0113     auto fullFilePath = QDir::fromNativeSeparators(fullFilePathQString).toStdString();
0114     for (auto&& semanticPackingRule : ClpUtil::getAllSemanticPackingRules()) {
0115         if (semanticPackingRule->accept(fullFilePath)) {
0116             auto pkgQName = semanticPackingRule->process(fullFilePath, addPkg);
0117             return memDb.getPackage(pkgQName);
0118         }
0119     }
0120 
0121     const std::filesystem::path fullPkgPath = prefix / pkgPath;
0122     bool nonLakosian = false;
0123     for (const std::filesystem::path& nonLakosianDir : nonLakosianDirs) {
0124         if (FileUtil::pathStartsWith(nonLakosianDir, fullPkgPath)) {
0125             nonLakosian = true;
0126             break;
0127         }
0128     }
0129 
0130     std::string topLevelPkgQualifiedName;
0131     std::string topLevelPkgName;
0132     auto filePath = QString::fromStdString(fullPkgPath.string());
0133 
0134     bool isMapped = false;
0135     for (auto const& [mappedPathRegex, mappedGroupName] : thirdPartyDirs) {
0136         if (std::regex_search(path.string(), std::regex{mappedPathRegex})) {
0137             topLevelPkgQualifiedName = mappedGroupName;
0138             topLevelPkgName = mappedGroupName;
0139             isMapped = true;
0140             break;
0141         }
0142     }
0143 
0144     auto isStandalonePkg = false;
0145     if (!isMapped) {
0146         if (nonLakosian) {
0147             topLevelPkgQualifiedName = ClpUtil::NON_LAKOSIAN_GROUP_NAME;
0148             topLevelPkgName = ClpUtil::NON_LAKOSIAN_GROUP_NAME;
0149         } else {
0150             auto projectSource = QString::fromStdString(prefix.string());
0151             isStandalonePkg = ClpUtil::isComponentOnStandalonePackage(path);
0152             if (isStandalonePkg) {
0153                 topLevelPkgQualifiedName = ("standalones" / pkgPath.filename()).string();
0154                 topLevelPkgName = pkgPath.filename().string();
0155 
0156                 if (filePath.startsWith(projectSource)) {
0157                     filePath.replace(projectSource, "${SOURCE_DIR}/");
0158                 }
0159             } else if (ClpUtil::isComponentOnPackageGroup(path)) {
0160                 topLevelPkgQualifiedName = ("groups" / pkgPath.parent_path().filename()).string();
0161                 topLevelPkgName = pkgPath.parent_path().filename().string();
0162 
0163                 if (filePath.startsWith(projectSource)) {
0164                     filePath.replace(projectSource, "${SOURCE_DIR}/");
0165                 }
0166             } else {
0167                 topLevelPkgQualifiedName = lvtclp::ClpUtil::NON_LAKOSIAN_GROUP_NAME;
0168                 topLevelPkgName = lvtclp::ClpUtil::NON_LAKOSIAN_GROUP_NAME;
0169             }
0170         }
0171     }
0172 
0173     if (isStandalonePkg) {
0174         return getSourcePackage(topLevelPkgQualifiedName,
0175                                 std::move(topLevelPkgName),
0176                                 filePath.toStdString(),
0177                                 memDb,
0178                                 nullptr,
0179                                 nullptr);
0180     }
0181 
0182     // Either package inside a group or non-lakosian package
0183     auto *grp = getSourcePackage(topLevelPkgQualifiedName,
0184                                  std::move(topLevelPkgName),
0185                                  filePath.toStdString(),
0186                                  memDb,
0187                                  nullptr,
0188                                  nullptr);
0189     if (pkgPath.filename().string().empty()) {
0190         return grp;
0191     }
0192 
0193     return getSourcePackage(topLevelPkgQualifiedName + "/" + pkgPath.filename().string(),
0194                             pkgPath.filename().string(),
0195                             filePath.toStdString(),
0196                             memDb,
0197                             grp,
0198                             nullptr);
0199 }
0200 
0201 } // namespace
0202 
0203 namespace Codethink::lvtclp {
0204 
0205 std::filesystem::path ClpUtil::normalisePath(std::filesystem::path path, const std::filesystem::path& prefix)
0206 {
0207     if (!path.empty()) {
0208         path = std::filesystem::weakly_canonical(path);
0209     }
0210 
0211 #ifdef NDEBUG
0212     // the call to std::filesystem::weakly_canonical is expensive for the
0213     // amount of times we call it, but we are always passing `d->prefix` to it
0214     // so we can call that once, and cache the results.
0215     // the assert here makes sure we did not forgot to do this somewhere,
0216     // and it's only active on debug mode.
0217     assert(prefix == std::filesystem::weakly_canonical(prefix));
0218 #endif
0219 
0220     if (FileUtil::pathStartsWith(prefix, path)) {
0221         path = FileUtil::nonPrefixPart(prefix, path);
0222     }
0223 
0224     return path;
0225 }
0226 
0227 lvtmdb::FileObject *ClpUtil::writeSourceFile(const std::string& inFilename,
0228                                              bool isHeader,
0229                                              lvtmdb::ObjectStore& memDb,
0230                                              const std::filesystem::path& prefix,
0231                                              const std::vector<std::filesystem::path>& nonLakosianDirs,
0232                                              const std::vector<std::pair<std::string, std::string>>& thirdPartyDirs)
0233 {
0234     if (inFilename.empty()) {
0235         return nullptr;
0236     }
0237 
0238     const std::filesystem::path path = normalisePath(inFilename, prefix);
0239     const std::string filename = path.string();
0240 
0241     lvtmdb::FileObject *ret = nullptr;
0242     memDb.withROLock([&] {
0243         ret = memDb.getFile(filename);
0244     });
0245     if (ret) {
0246         return ret;
0247     }
0248 
0249     const std::filesystem::path fullPath = prefix / path;
0250     auto hash = [&fullPath]() -> std::string {
0251         auto result = llvm::sys::fs::md5_contents(fullPath.string());
0252         if (result) {
0253             return result.get().digest().str().str();
0254         }
0255 
0256         // allow failure to hash file contents because we use memory mapped files in tests
0257         return "";
0258     }();
0259 
0260     auto lock = memDb.rwLock();
0261     (void) lock; // cppcheck
0262     lvtmdb::PackageObject *package = getPackageForPath(path, memDb, prefix, nonLakosianDirs, thirdPartyDirs);
0263     lvtmdb::ComponentObject *component = ComponentUtil::addComponent(path, package, memDb);
0264     lvtmdb::FileObject *file =
0265         memDb.getOrAddFile(filename, path.filename().string(), isHeader, hash, package, component);
0266 
0267     component->withRWLock([&] {
0268         component->addFile(file);
0269     });
0270     package->withRWLock([&] {
0271         package->addComponent(component);
0272     });
0273 
0274     return file;
0275 }
0276 
0277 std::string ClpUtil::getRealPath(const clang::SourceLocation& loc, const clang::SourceManager& mgr)
0278 {
0279     auto pathFromLocation = [](const clang::SourceLocation& loc, const clang::SourceManager& mgr) -> std::string {
0280         const clang::FileID id = mgr.getFileID(loc);
0281         const clang::FileEntry *entry = mgr.getFileEntryForID(id);
0282 
0283         std::string filePath;
0284         if (entry) {
0285             filePath = entry->tryGetRealPathName().str();
0286         }
0287 
0288         return filePath;
0289     };
0290 
0291     std::string res = pathFromLocation(mgr.getSpellingLoc(loc), mgr);
0292     if (res.empty()) {
0293         res = pathFromLocation(mgr.getExpansionLoc(loc), mgr);
0294     }
0295 
0296     return res;
0297 }
0298 
0299 void ClpUtil::writeDependencyRelations(lvtmdb::PackageObject *source, lvtmdb::PackageObject *target)
0300 {
0301     if (!source || !target || source == target) {
0302         return; // RETURN
0303     }
0304     lvtmdb::PackageObject::addDependency(source, target);
0305 }
0306 
0307 void ClpUtil::addUsesInInter(lvtmdb::TypeObject *source, lvtmdb::TypeObject *target)
0308 {
0309     if (!source || !target || source == target) {
0310         return;
0311     }
0312     lvtmdb::TypeObject::addUsesInTheInterface(source, target);
0313 }
0314 
0315 void ClpUtil::addUsesInImpl(lvtmdb::TypeObject *source, lvtmdb::TypeObject *target)
0316 {
0317     if (!source || !target || source == target) {
0318         return;
0319     }
0320     lvtmdb::TypeObject::addUsesInTheImplementation(source, target);
0321 }
0322 
0323 void ClpUtil::addFnDependency(lvtmdb::FunctionObject *source, lvtmdb::FunctionObject *target)
0324 {
0325     if (!source || !target || source == target) {
0326         return;
0327     }
0328     lvtmdb::FunctionObject::addDependency(source, target);
0329 }
0330 
0331 FileType ClpUtil::categorisePath(const std::string& file)
0332 {
0333     const std::filesystem::path path(file);
0334     const std::string ext = path.extension().string();
0335 
0336     static const std::set<std::string> headerExtensions({".h", ".hh", ".h++", ".hpp"});
0337     static const std::set<std::string> sourceExtensions({".cpp", ".c", ".C", ".c++", ".cc", ".cxx", ".t.cpp"});
0338     static const std::set<std::string> otherExtensions({".dep", ".mem", ".o", ".swp", ".md", ".txt", ""});
0339 
0340     if (headerExtensions.count(ext)) {
0341         return FileType::e_Header;
0342     }
0343     if (sourceExtensions.count(ext)) {
0344         return FileType::e_Source;
0345     }
0346     if (otherExtensions.count(ext)) {
0347         return FileType::e_UnknownUnknown;
0348     }
0349     return FileType::e_KnownUnknown;
0350 }
0351 
0352 const char *const ClpUtil::NON_LAKOSIAN_GROUP_NAME = "non-lakosian group";
0353 
0354 struct CombinedCompilationDatabase::Private {
0355     std::vector<clang::tooling::CompileCommand> compileCommands;
0356     std::vector<std::string> files;
0357 };
0358 
0359 CombinedCompilationDatabase::CombinedCompilationDatabase(): d(std::make_unique<Private>())
0360 {
0361 }
0362 
0363 CombinedCompilationDatabase::~CombinedCompilationDatabase() noexcept = default;
0364 
0365 cpp::result<bool, CompilationDatabaseError>
0366 CombinedCompilationDatabase::addCompilationDatabase(const std::filesystem::path& path)
0367 {
0368     std::string errorMessage;
0369     std::unique_ptr<clang::tooling::JSONCompilationDatabase> jsonDb =
0370         clang::tooling::JSONCompilationDatabase::loadFromFile(path.string(),
0371                                                               errorMessage,
0372                                                               clang::tooling::JSONCommandLineSyntax::AutoDetect);
0373     if (!jsonDb) {
0374         return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::ErrorLoadingFromFile, errorMessage});
0375     }
0376 
0377     if (jsonDb->getAllCompileCommands().size() == 0) {
0378         return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::CompileCommandsContainsNoCommands});
0379     }
0380 
0381     if (jsonDb->getAllFiles().size() == 0) {
0382         return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::CompileCommandsContainsNoFiles});
0383     }
0384 
0385     const std::filesystem::path buildDir = path.parent_path();
0386     addCompilationDatabase(*jsonDb, buildDir);
0387     return {};
0388 }
0389 
0390 void CombinedCompilationDatabase::addCompilationDatabase(const clang::tooling::CompilationDatabase& db,
0391                                                          const std::filesystem::path& buildDir)
0392 {
0393     for (clang::tooling::CompileCommand cmd : db.getAllCompileCommands()) {
0394         // resolve any relative paths
0395         std::filesystem::path filename(cmd.Filename);
0396         if (filename.is_relative()) {
0397             filename = buildDir / filename;
0398         }
0399         cmd.Filename = filename.string();
0400 
0401         auto ext = filename.extension().string();
0402         if (ext != ".cc" && ext != ".cpp" && ext != ".c" && ext != ".h" && ext != ".hpp" && ext != ".hh") {
0403             continue;
0404         }
0405 
0406         d->files.push_back(cmd.Filename);
0407         d->compileCommands.push_back(std::move(cmd));
0408     }
0409 }
0410 
0411 std::vector<clang::tooling::CompileCommand>
0412 CombinedCompilationDatabase::getCompileCommands(llvm::StringRef FilePath) const
0413 {
0414     for (auto const& cmd : d->compileCommands) {
0415         if (cmd.Filename == FilePath) {
0416             return {cmd};
0417         }
0418     }
0419     return {};
0420 }
0421 
0422 std::vector<std::string> CombinedCompilationDatabase::getAllFiles() const
0423 {
0424     return d->files;
0425 }
0426 
0427 std::vector<clang::tooling::CompileCommand> CombinedCompilationDatabase::getAllCompileCommands() const
0428 {
0429     return d->compileCommands;
0430 }
0431 
0432 long ClpUtil::getThreadId()
0433 {
0434     return static_cast<long>(std::hash<std::thread::id>{}(std::this_thread::get_id()));
0435 }
0436 
0437 bool ClpUtil::isComponentOnPackageGroup(const std::filesystem::path& componentPath)
0438 {
0439     // Check if the component name starts with the package name, and if the package name starts with the package
0440     // group name. Those are the basic rules for a component be within a package that's inside a package group.
0441     auto componentName = QString::fromStdString(componentPath.filename().string());
0442     auto pkgName = QString::fromStdString(componentPath.parent_path().filename().string());
0443     auto pkgGroupName = QString::fromStdString(componentPath.parent_path().parent_path().filename().string());
0444     if (pkgGroupName.size() != 3) {
0445         return false;
0446     }
0447 
0448     if (componentName.startsWith(pkgName + "_") && pkgName.startsWith(pkgGroupName)) {
0449         return true;
0450     }
0451 
0452     // Except if the package name contains a '+' sign.
0453     auto trySpecialPkgName = pkgName.split("+");
0454     if (trySpecialPkgName.size() == 2) {
0455         auto specialPkgName = trySpecialPkgName[0];
0456         if (componentName.startsWith(specialPkgName + "_") && specialPkgName.startsWith(pkgGroupName)) {
0457             return true;
0458         }
0459     }
0460 
0461     return false;
0462 }
0463 
0464 bool ClpUtil::isComponentOnStandalonePackage(const std::filesystem::path& componentPath)
0465 {
0466     // Capture special component naming inside standalone package in the form
0467     // <prefix>_<pkgname>_<component_name>. e.g.: ct_lvtclp_filesystemscanner.cpp
0468     auto hasStandaloneNameWithPkgPrefix =
0469         std::regex_search(componentPath.string(), std::regex{"/([a-zA-Z]{1,2})_([a-zA-Z0-9_]+)\\."});
0470     if (hasStandaloneNameWithPkgPrefix) {
0471         // In order to be a valid standalone package, the component must be inside a package containing it's prefix
0472         auto componentName = QString::fromStdString(componentPath.filename().string());
0473         auto parentPkgName = QString::fromStdString(componentPath.parent_path().filename().string());
0474         if (componentName.startsWith(parentPkgName)) {
0475             return true;
0476         }
0477 
0478         // The containing package may be acceptable if it is named without the '<prefix>_' prefix.
0479         // In other words, a component named '<prefix>_<pkgname>_<component_name>' can be inside a package named
0480         // <pkgname>.
0481         auto splitComponentName = componentName.split("_");
0482         if (splitComponentName.size() >= 3) {
0483             if (splitComponentName[1] == parentPkgName) {
0484                 return true;
0485             }
0486         }
0487     }
0488 
0489     return false;
0490 }
0491 
0492 bool ClpUtil::isFileIgnored(const std::string& file, std::vector<llvm::GlobPattern> const& ignoreGlobs)
0493 {
0494     for (auto&& ignoreGlob : ignoreGlobs) {
0495         if (ignoreGlob.match(file)) {
0496             return true;
0497         }
0498     }
0499     return false;
0500 }
0501 
0502 ClpUtil::PySemanticPackingRule::PySemanticPackingRule(std::filesystem::path pythonFile):
0503     d_pythonFile(std::move(pythonFile))
0504 {
0505 }
0506 
0507 namespace detail {
0508 auto getPyFunFrom(std::filesystem::path const& pythonFile, std::string const& functionName)
0509 {
0510     namespace py = pybind11;
0511 
0512     auto modulePath = pythonFile.parent_path().string();
0513     auto pySys = py::module_::import("sys");
0514     pySys.attr("path").attr("append")(modulePath);
0515 
0516     auto pyUserModule = py::module_::import(pythonFile.stem().string().c_str());
0517     return py::function(pyUserModule.attr(functionName.c_str()));
0518 }
0519 } // namespace detail
0520 
0521 bool ClpUtil::PySemanticPackingRule::accept(std::string const& filepath) const
0522 {
0523     namespace py = pybind11;
0524     py::gil_scoped_acquire gil{};
0525     return detail::getPyFunFrom(d_pythonFile, "accept")(filepath).cast<bool>();
0526 }
0527 
0528 std::string ClpUtil::PySemanticPackingRule::process(std::string const& filepath,
0529                                                     PkgMatcherAddPkgFunction const& addPkg) const
0530 {
0531     namespace py = pybind11;
0532     py::gil_scoped_acquire gil{};
0533     return detail::getPyFunFrom(d_pythonFile, "process")(filepath, addPkg).cast<std::string>();
0534 }
0535 
0536 std::vector<std::unique_ptr<ClpUtil::SemanticPackingRule>> ClpUtil::getAllSemanticPackingRules()
0537 {
0538     auto searchPaths = std::vector<std::string>{};
0539     if (const char *env_p = std::getenv("SEMRULES_PATH")) {
0540         searchPaths.emplace_back(env_p);
0541     }
0542     auto appPath = QDir(QCoreApplication::applicationDirPath() + "/semrules").path().toStdString();
0543     searchPaths.emplace_back(appPath);
0544     auto homePath = QDir(QDir::homePath() + "/semrules").path().toStdString();
0545     searchPaths.emplace_back(homePath);
0546 
0547     std::vector<std::filesystem::path> sortedPaths;
0548     for (auto&& path : searchPaths) {
0549         if (!std::filesystem::exists(path)) {
0550             continue;
0551         }
0552 
0553         for (auto&& entry : std::filesystem::directory_iterator(path)) {
0554             if (entry.path().extension().string() == ".py") {
0555                 sortedPaths.push_back(entry.path());
0556             }
0557         }
0558     }
0559     std::sort(sortedPaths.begin(), sortedPaths.end());
0560 
0561     auto rules = std::vector<std::unique_ptr<ClpUtil::SemanticPackingRule>>{};
0562     for (auto&& path : sortedPaths) {
0563         rules.emplace_back(std::make_unique<PySemanticPackingRule>(path));
0564     }
0565     return rules;
0566 }
0567 
0568 } // end namespace Codethink::lvtclp