File indexing completed on 2024-05-19 05:41:59
0001 // ct_lvtclp_clputil.cpp -*-C++-*- 0002 0003 /* 0004 // Copyright 2023 Codethink Ltd <codethink@codethink.co.uk> 0005 // SPDX-License-Identifier: Apache-2.0 0006 // 0007 // Licensed under the Apache License, Version 2.0 (the "License"); 0008 // you may not use this file except in compliance with the License. 0009 // You may obtain a copy of the License at 0010 // 0011 // http://www.apache.org/licenses/LICENSE-2.0 0012 // 0013 // Unless required by applicable law or agreed to in writing, software 0014 // distributed under the License is distributed on an "AS IS" BASIS, 0015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0016 // See the License for the specific language governing permissions and 0017 // limitations under the License. 0018 */ 0019 0020 #include <ct_lvtclp_clputil.h> 0021 0022 #include <ct_lvtclp_componentutil.h> 0023 0024 #include <ct_lvtmdb_componentobject.h> 0025 #include <ct_lvtmdb_functionobject.h> 0026 #include <ct_lvtmdb_objectstore.h> 0027 #include <ct_lvtmdb_packageobject.h> 0028 #include <ct_lvtmdb_typeobject.h> 0029 0030 #include <ct_lvtclp_fileutil.h> 0031 #include <ct_lvtshr_stringhelpers.h> 0032 0033 #include <clang/Tooling/JSONCompilationDatabase.h> 0034 #include <llvm/Support/FileSystem.h> 0035 #include <llvm/Support/GlobPattern.h> 0036 #include <pybind11/embed.h> 0037 #include <pybind11/functional.h> 0038 #include <pybind11/stl.h> 0039 0040 #include <QCoreApplication> 0041 #include <QDir> 0042 0043 #include <algorithm> 0044 #include <memory> 0045 #include <regex> 0046 #include <set> 0047 #include <thread> 0048 0049 namespace { 0050 0051 using namespace Codethink; 0052 0053 lvtmdb::PackageObject *getSourcePackage(const std::string& qualifiedName, 0054 std::string name, 0055 std::string diskPath, 0056 lvtmdb::ObjectStore& memDb, 0057 lvtmdb::PackageObject *parent, 0058 lvtmdb::RepositoryObject *repository) 0059 // assumes memDb is already locked for writing 0060 { 0061 if (qualifiedName.empty()) { 0062 return nullptr; 0063 } 0064 0065 return memDb.getOrAddPackage(qualifiedName, std::move(name), std::move(diskPath), parent, repository); 0066 } 0067 0068 /** 0069 * Visual studio was giving compiler error with local lambda function. In order to solve the issue, the lambda has been 0070 * extracted to this struct - It is meant to be used in `getPackageForPath` only. 0071 */ 0072 struct addPkgForSemPackRuleHelper { 0073 explicit addPkgForSemPackRuleHelper(lvtmdb::ObjectStore& memDb): memDb(memDb) 0074 { 0075 } 0076 0077 void operator()(std::string const& qualifiedName, 0078 std::optional<std::string> parentQualifiedName = std::nullopt, 0079 std::optional<std::string> repositoryName = std::nullopt, 0080 std::optional<std::string> path = std::nullopt) 0081 { 0082 auto *repository = repositoryName ? memDb.getOrAddRepository(*repositoryName, "") : nullptr; 0083 auto *parent = parentQualifiedName 0084 ? memDb.getOrAddPackage(*parentQualifiedName, *parentQualifiedName, "", nullptr, repository) 0085 : nullptr; 0086 memDb.getOrAddPackage(qualifiedName, qualifiedName, path ? *path : "", parent, repository); 0087 } 0088 0089 private: 0090 lvtmdb::ObjectStore& memDb; 0091 }; 0092 0093 lvtmdb::PackageObject *getPackageForPath(const std::filesystem::path& path, 0094 lvtmdb::ObjectStore& memDb, 0095 const std::filesystem::path& prefix, 0096 const std::vector<std::filesystem::path>& nonLakosianDirs, 0097 const std::vector<std::pair<std::string, std::string>>& thirdPartyDirs) 0098 // assumes path is already normalised using ClpUtil::normalisePath 0099 // assumes memDb is already locked for writing 0100 { 0101 using namespace Codethink::lvtclp; 0102 0103 // synthesise package from the directory containing the source file 0104 const std::filesystem::path pkgPath = path.parent_path(); 0105 0106 lvtmdb::PackageObject *pkg = memDb.getPackage(pkgPath.string()); 0107 if (pkg) { 0108 return pkg; 0109 } 0110 0111 auto addPkg = addPkgForSemPackRuleHelper{memDb}; 0112 auto fullFilePathQString = QString::fromStdString((prefix / path).string()); 0113 auto fullFilePath = QDir::fromNativeSeparators(fullFilePathQString).toStdString(); 0114 for (auto&& semanticPackingRule : ClpUtil::getAllSemanticPackingRules()) { 0115 if (semanticPackingRule->accept(fullFilePath)) { 0116 auto pkgQName = semanticPackingRule->process(fullFilePath, addPkg); 0117 return memDb.getPackage(pkgQName); 0118 } 0119 } 0120 0121 const std::filesystem::path fullPkgPath = prefix / pkgPath; 0122 bool nonLakosian = false; 0123 for (const std::filesystem::path& nonLakosianDir : nonLakosianDirs) { 0124 if (FileUtil::pathStartsWith(nonLakosianDir, fullPkgPath)) { 0125 nonLakosian = true; 0126 break; 0127 } 0128 } 0129 0130 std::string topLevelPkgQualifiedName; 0131 std::string topLevelPkgName; 0132 auto filePath = QString::fromStdString(fullPkgPath.string()); 0133 0134 bool isMapped = false; 0135 for (auto const& [mappedPathRegex, mappedGroupName] : thirdPartyDirs) { 0136 if (std::regex_search(path.string(), std::regex{mappedPathRegex})) { 0137 topLevelPkgQualifiedName = mappedGroupName; 0138 topLevelPkgName = mappedGroupName; 0139 isMapped = true; 0140 break; 0141 } 0142 } 0143 0144 auto isStandalonePkg = false; 0145 if (!isMapped) { 0146 if (nonLakosian) { 0147 topLevelPkgQualifiedName = ClpUtil::NON_LAKOSIAN_GROUP_NAME; 0148 topLevelPkgName = ClpUtil::NON_LAKOSIAN_GROUP_NAME; 0149 } else { 0150 auto projectSource = QString::fromStdString(prefix.string()); 0151 isStandalonePkg = ClpUtil::isComponentOnStandalonePackage(path); 0152 if (isStandalonePkg) { 0153 topLevelPkgQualifiedName = ("standalones" / pkgPath.filename()).string(); 0154 topLevelPkgName = pkgPath.filename().string(); 0155 0156 if (filePath.startsWith(projectSource)) { 0157 filePath.replace(projectSource, "${SOURCE_DIR}/"); 0158 } 0159 } else if (ClpUtil::isComponentOnPackageGroup(path)) { 0160 topLevelPkgQualifiedName = ("groups" / pkgPath.parent_path().filename()).string(); 0161 topLevelPkgName = pkgPath.parent_path().filename().string(); 0162 0163 if (filePath.startsWith(projectSource)) { 0164 filePath.replace(projectSource, "${SOURCE_DIR}/"); 0165 } 0166 } else { 0167 topLevelPkgQualifiedName = lvtclp::ClpUtil::NON_LAKOSIAN_GROUP_NAME; 0168 topLevelPkgName = lvtclp::ClpUtil::NON_LAKOSIAN_GROUP_NAME; 0169 } 0170 } 0171 } 0172 0173 if (isStandalonePkg) { 0174 return getSourcePackage(topLevelPkgQualifiedName, 0175 std::move(topLevelPkgName), 0176 filePath.toStdString(), 0177 memDb, 0178 nullptr, 0179 nullptr); 0180 } 0181 0182 // Either package inside a group or non-lakosian package 0183 auto *grp = getSourcePackage(topLevelPkgQualifiedName, 0184 std::move(topLevelPkgName), 0185 filePath.toStdString(), 0186 memDb, 0187 nullptr, 0188 nullptr); 0189 if (pkgPath.filename().string().empty()) { 0190 return grp; 0191 } 0192 0193 return getSourcePackage(topLevelPkgQualifiedName + "/" + pkgPath.filename().string(), 0194 pkgPath.filename().string(), 0195 filePath.toStdString(), 0196 memDb, 0197 grp, 0198 nullptr); 0199 } 0200 0201 } // namespace 0202 0203 namespace Codethink::lvtclp { 0204 0205 std::filesystem::path ClpUtil::normalisePath(std::filesystem::path path, const std::filesystem::path& prefix) 0206 { 0207 if (!path.empty()) { 0208 path = std::filesystem::weakly_canonical(path); 0209 } 0210 0211 #ifdef NDEBUG 0212 // the call to std::filesystem::weakly_canonical is expensive for the 0213 // amount of times we call it, but we are always passing `d->prefix` to it 0214 // so we can call that once, and cache the results. 0215 // the assert here makes sure we did not forgot to do this somewhere, 0216 // and it's only active on debug mode. 0217 assert(prefix == std::filesystem::weakly_canonical(prefix)); 0218 #endif 0219 0220 if (FileUtil::pathStartsWith(prefix, path)) { 0221 path = FileUtil::nonPrefixPart(prefix, path); 0222 } 0223 0224 return path; 0225 } 0226 0227 lvtmdb::FileObject *ClpUtil::writeSourceFile(const std::string& inFilename, 0228 bool isHeader, 0229 lvtmdb::ObjectStore& memDb, 0230 const std::filesystem::path& prefix, 0231 const std::vector<std::filesystem::path>& nonLakosianDirs, 0232 const std::vector<std::pair<std::string, std::string>>& thirdPartyDirs) 0233 { 0234 if (inFilename.empty()) { 0235 return nullptr; 0236 } 0237 0238 const std::filesystem::path path = normalisePath(inFilename, prefix); 0239 const std::string filename = path.string(); 0240 0241 lvtmdb::FileObject *ret = nullptr; 0242 memDb.withROLock([&] { 0243 ret = memDb.getFile(filename); 0244 }); 0245 if (ret) { 0246 return ret; 0247 } 0248 0249 const std::filesystem::path fullPath = prefix / path; 0250 auto hash = [&fullPath]() -> std::string { 0251 auto result = llvm::sys::fs::md5_contents(fullPath.string()); 0252 if (result) { 0253 return result.get().digest().str().str(); 0254 } 0255 0256 // allow failure to hash file contents because we use memory mapped files in tests 0257 return ""; 0258 }(); 0259 0260 auto lock = memDb.rwLock(); 0261 (void) lock; // cppcheck 0262 lvtmdb::PackageObject *package = getPackageForPath(path, memDb, prefix, nonLakosianDirs, thirdPartyDirs); 0263 lvtmdb::ComponentObject *component = ComponentUtil::addComponent(path, package, memDb); 0264 lvtmdb::FileObject *file = 0265 memDb.getOrAddFile(filename, path.filename().string(), isHeader, hash, package, component); 0266 0267 component->withRWLock([&] { 0268 component->addFile(file); 0269 }); 0270 package->withRWLock([&] { 0271 package->addComponent(component); 0272 }); 0273 0274 return file; 0275 } 0276 0277 std::string ClpUtil::getRealPath(const clang::SourceLocation& loc, const clang::SourceManager& mgr) 0278 { 0279 auto pathFromLocation = [](const clang::SourceLocation& loc, const clang::SourceManager& mgr) -> std::string { 0280 const clang::FileID id = mgr.getFileID(loc); 0281 const clang::FileEntry *entry = mgr.getFileEntryForID(id); 0282 0283 std::string filePath; 0284 if (entry) { 0285 filePath = entry->tryGetRealPathName().str(); 0286 } 0287 0288 return filePath; 0289 }; 0290 0291 std::string res = pathFromLocation(mgr.getSpellingLoc(loc), mgr); 0292 if (res.empty()) { 0293 res = pathFromLocation(mgr.getExpansionLoc(loc), mgr); 0294 } 0295 0296 return res; 0297 } 0298 0299 void ClpUtil::writeDependencyRelations(lvtmdb::PackageObject *source, lvtmdb::PackageObject *target) 0300 { 0301 if (!source || !target || source == target) { 0302 return; // RETURN 0303 } 0304 lvtmdb::PackageObject::addDependency(source, target); 0305 } 0306 0307 void ClpUtil::addUsesInInter(lvtmdb::TypeObject *source, lvtmdb::TypeObject *target) 0308 { 0309 if (!source || !target || source == target) { 0310 return; 0311 } 0312 lvtmdb::TypeObject::addUsesInTheInterface(source, target); 0313 } 0314 0315 void ClpUtil::addUsesInImpl(lvtmdb::TypeObject *source, lvtmdb::TypeObject *target) 0316 { 0317 if (!source || !target || source == target) { 0318 return; 0319 } 0320 lvtmdb::TypeObject::addUsesInTheImplementation(source, target); 0321 } 0322 0323 void ClpUtil::addFnDependency(lvtmdb::FunctionObject *source, lvtmdb::FunctionObject *target) 0324 { 0325 if (!source || !target || source == target) { 0326 return; 0327 } 0328 lvtmdb::FunctionObject::addDependency(source, target); 0329 } 0330 0331 FileType ClpUtil::categorisePath(const std::string& file) 0332 { 0333 const std::filesystem::path path(file); 0334 const std::string ext = path.extension().string(); 0335 0336 static const std::set<std::string> headerExtensions({".h", ".hh", ".h++", ".hpp"}); 0337 static const std::set<std::string> sourceExtensions({".cpp", ".c", ".C", ".c++", ".cc", ".cxx", ".t.cpp"}); 0338 static const std::set<std::string> otherExtensions({".dep", ".mem", ".o", ".swp", ".md", ".txt", ""}); 0339 0340 if (headerExtensions.count(ext)) { 0341 return FileType::e_Header; 0342 } 0343 if (sourceExtensions.count(ext)) { 0344 return FileType::e_Source; 0345 } 0346 if (otherExtensions.count(ext)) { 0347 return FileType::e_UnknownUnknown; 0348 } 0349 return FileType::e_KnownUnknown; 0350 } 0351 0352 const char *const ClpUtil::NON_LAKOSIAN_GROUP_NAME = "non-lakosian group"; 0353 0354 struct CombinedCompilationDatabase::Private { 0355 std::vector<clang::tooling::CompileCommand> compileCommands; 0356 std::vector<std::string> files; 0357 }; 0358 0359 CombinedCompilationDatabase::CombinedCompilationDatabase(): d(std::make_unique<Private>()) 0360 { 0361 } 0362 0363 CombinedCompilationDatabase::~CombinedCompilationDatabase() noexcept = default; 0364 0365 cpp::result<bool, CompilationDatabaseError> 0366 CombinedCompilationDatabase::addCompilationDatabase(const std::filesystem::path& path) 0367 { 0368 std::string errorMessage; 0369 std::unique_ptr<clang::tooling::JSONCompilationDatabase> jsonDb = 0370 clang::tooling::JSONCompilationDatabase::loadFromFile(path.string(), 0371 errorMessage, 0372 clang::tooling::JSONCommandLineSyntax::AutoDetect); 0373 if (!jsonDb) { 0374 return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::ErrorLoadingFromFile, errorMessage}); 0375 } 0376 0377 if (jsonDb->getAllCompileCommands().size() == 0) { 0378 return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::CompileCommandsContainsNoCommands}); 0379 } 0380 0381 if (jsonDb->getAllFiles().size() == 0) { 0382 return cpp::fail(CompilationDatabaseError{CompilationDatabaseError::Kind::CompileCommandsContainsNoFiles}); 0383 } 0384 0385 const std::filesystem::path buildDir = path.parent_path(); 0386 addCompilationDatabase(*jsonDb, buildDir); 0387 return {}; 0388 } 0389 0390 void CombinedCompilationDatabase::addCompilationDatabase(const clang::tooling::CompilationDatabase& db, 0391 const std::filesystem::path& buildDir) 0392 { 0393 for (clang::tooling::CompileCommand cmd : db.getAllCompileCommands()) { 0394 // resolve any relative paths 0395 std::filesystem::path filename(cmd.Filename); 0396 if (filename.is_relative()) { 0397 filename = buildDir / filename; 0398 } 0399 cmd.Filename = filename.string(); 0400 0401 auto ext = filename.extension().string(); 0402 if (ext != ".cc" && ext != ".cpp" && ext != ".c" && ext != ".h" && ext != ".hpp" && ext != ".hh") { 0403 continue; 0404 } 0405 0406 d->files.push_back(cmd.Filename); 0407 d->compileCommands.push_back(std::move(cmd)); 0408 } 0409 } 0410 0411 std::vector<clang::tooling::CompileCommand> 0412 CombinedCompilationDatabase::getCompileCommands(llvm::StringRef FilePath) const 0413 { 0414 for (auto const& cmd : d->compileCommands) { 0415 if (cmd.Filename == FilePath) { 0416 return {cmd}; 0417 } 0418 } 0419 return {}; 0420 } 0421 0422 std::vector<std::string> CombinedCompilationDatabase::getAllFiles() const 0423 { 0424 return d->files; 0425 } 0426 0427 std::vector<clang::tooling::CompileCommand> CombinedCompilationDatabase::getAllCompileCommands() const 0428 { 0429 return d->compileCommands; 0430 } 0431 0432 long ClpUtil::getThreadId() 0433 { 0434 return static_cast<long>(std::hash<std::thread::id>{}(std::this_thread::get_id())); 0435 } 0436 0437 bool ClpUtil::isComponentOnPackageGroup(const std::filesystem::path& componentPath) 0438 { 0439 // Check if the component name starts with the package name, and if the package name starts with the package 0440 // group name. Those are the basic rules for a component be within a package that's inside a package group. 0441 auto componentName = QString::fromStdString(componentPath.filename().string()); 0442 auto pkgName = QString::fromStdString(componentPath.parent_path().filename().string()); 0443 auto pkgGroupName = QString::fromStdString(componentPath.parent_path().parent_path().filename().string()); 0444 if (pkgGroupName.size() != 3) { 0445 return false; 0446 } 0447 0448 if (componentName.startsWith(pkgName + "_") && pkgName.startsWith(pkgGroupName)) { 0449 return true; 0450 } 0451 0452 // Except if the package name contains a '+' sign. 0453 auto trySpecialPkgName = pkgName.split("+"); 0454 if (trySpecialPkgName.size() == 2) { 0455 auto specialPkgName = trySpecialPkgName[0]; 0456 if (componentName.startsWith(specialPkgName + "_") && specialPkgName.startsWith(pkgGroupName)) { 0457 return true; 0458 } 0459 } 0460 0461 return false; 0462 } 0463 0464 bool ClpUtil::isComponentOnStandalonePackage(const std::filesystem::path& componentPath) 0465 { 0466 // Capture special component naming inside standalone package in the form 0467 // <prefix>_<pkgname>_<component_name>. e.g.: ct_lvtclp_filesystemscanner.cpp 0468 auto hasStandaloneNameWithPkgPrefix = 0469 std::regex_search(componentPath.string(), std::regex{"/([a-zA-Z]{1,2})_([a-zA-Z0-9_]+)\\."}); 0470 if (hasStandaloneNameWithPkgPrefix) { 0471 // In order to be a valid standalone package, the component must be inside a package containing it's prefix 0472 auto componentName = QString::fromStdString(componentPath.filename().string()); 0473 auto parentPkgName = QString::fromStdString(componentPath.parent_path().filename().string()); 0474 if (componentName.startsWith(parentPkgName)) { 0475 return true; 0476 } 0477 0478 // The containing package may be acceptable if it is named without the '<prefix>_' prefix. 0479 // In other words, a component named '<prefix>_<pkgname>_<component_name>' can be inside a package named 0480 // <pkgname>. 0481 auto splitComponentName = componentName.split("_"); 0482 if (splitComponentName.size() >= 3) { 0483 if (splitComponentName[1] == parentPkgName) { 0484 return true; 0485 } 0486 } 0487 } 0488 0489 return false; 0490 } 0491 0492 bool ClpUtil::isFileIgnored(const std::string& file, std::vector<llvm::GlobPattern> const& ignoreGlobs) 0493 { 0494 for (auto&& ignoreGlob : ignoreGlobs) { 0495 if (ignoreGlob.match(file)) { 0496 return true; 0497 } 0498 } 0499 return false; 0500 } 0501 0502 ClpUtil::PySemanticPackingRule::PySemanticPackingRule(std::filesystem::path pythonFile): 0503 d_pythonFile(std::move(pythonFile)) 0504 { 0505 } 0506 0507 namespace detail { 0508 auto getPyFunFrom(std::filesystem::path const& pythonFile, std::string const& functionName) 0509 { 0510 namespace py = pybind11; 0511 0512 auto modulePath = pythonFile.parent_path().string(); 0513 auto pySys = py::module_::import("sys"); 0514 pySys.attr("path").attr("append")(modulePath); 0515 0516 auto pyUserModule = py::module_::import(pythonFile.stem().string().c_str()); 0517 return py::function(pyUserModule.attr(functionName.c_str())); 0518 } 0519 } // namespace detail 0520 0521 bool ClpUtil::PySemanticPackingRule::accept(std::string const& filepath) const 0522 { 0523 namespace py = pybind11; 0524 py::gil_scoped_acquire gil{}; 0525 return detail::getPyFunFrom(d_pythonFile, "accept")(filepath).cast<bool>(); 0526 } 0527 0528 std::string ClpUtil::PySemanticPackingRule::process(std::string const& filepath, 0529 PkgMatcherAddPkgFunction const& addPkg) const 0530 { 0531 namespace py = pybind11; 0532 py::gil_scoped_acquire gil{}; 0533 return detail::getPyFunFrom(d_pythonFile, "process")(filepath, addPkg).cast<std::string>(); 0534 } 0535 0536 std::vector<std::unique_ptr<ClpUtil::SemanticPackingRule>> ClpUtil::getAllSemanticPackingRules() 0537 { 0538 auto searchPaths = std::vector<std::string>{}; 0539 if (const char *env_p = std::getenv("SEMRULES_PATH")) { 0540 searchPaths.emplace_back(env_p); 0541 } 0542 auto appPath = QDir(QCoreApplication::applicationDirPath() + "/semrules").path().toStdString(); 0543 searchPaths.emplace_back(appPath); 0544 auto homePath = QDir(QDir::homePath() + "/semrules").path().toStdString(); 0545 searchPaths.emplace_back(homePath); 0546 0547 std::vector<std::filesystem::path> sortedPaths; 0548 for (auto&& path : searchPaths) { 0549 if (!std::filesystem::exists(path)) { 0550 continue; 0551 } 0552 0553 for (auto&& entry : std::filesystem::directory_iterator(path)) { 0554 if (entry.path().extension().string() == ".py") { 0555 sortedPaths.push_back(entry.path()); 0556 } 0557 } 0558 } 0559 std::sort(sortedPaths.begin(), sortedPaths.end()); 0560 0561 auto rules = std::vector<std::unique_ptr<ClpUtil::SemanticPackingRule>>{}; 0562 for (auto&& path : sortedPaths) { 0563 rules.emplace_back(std::make_unique<PySemanticPackingRule>(path)); 0564 } 0565 return rules; 0566 } 0567 0568 } // end namespace Codethink::lvtclp