File indexing completed on 2024-04-28 04:38:53

0001 #include "grepfindthread.h"
0002 #include "debug.h"
0003 
0004 #include <QDir>
0005 #include <QDirIterator>
0006 #include <QFileInfo>
0007 #include <QSet>
0008 
0009 #include <project/projectmodel.h>
0010 #include <interfaces/iproject.h>
0011 #include <interfaces/iprojectcontroller.h>
0012 #include <interfaces/icore.h>
0013 
0014 #include <serialization/indexedstring.h>
0015 
0016 #include <util/wildcardhelpers.h>
0017 
0018 #include <algorithm>
0019 #include <atomic>
0020 #include <queue>
0021 #include <utility>
0022 
0023 using KDevelop::IndexedString;
0024 
0025 namespace {
0026 QString removeTrailingSlashes(QString dirPath)
0027 {
0028     while (!dirPath.isEmpty() && std::as_const(dirPath).back() == QLatin1Char{'/'}) {
0029         dirPath.chop(1);
0030     }
0031     return dirPath;
0032 }
0033 
0034 /**
0035  * Is the file at @p filePath in the directory at @p dirPath within a maximum depth of @p maxDepth?
0036  * @param filePath the path to a file.
0037  * @param dirPath the path to a file or a directory without a trailing slash
0038  *                (an empty string is the root directory path).
0039  * @param maxDepth maximum depth of recursion or -1 if unlimited.
0040  * @note When @p dirPath points to a file rather than a directory, this function always returns @c false.
0041  */
0042 bool isInDirectory(const QString& filePath, const QString& dirPath, int maxDepth)
0043 {
0044     constexpr QLatin1Char slash{'/'};
0045 
0046     Q_ASSERT(!filePath.endsWith(slash)); // the path to a file cannot end with a slash
0047     Q_ASSERT(!dirPath.endsWith(slash)); // precondition
0048 
0049     // First check whether dirPath is a parent directory of filePath.
0050     // The parent directory check below is a simplified (thanks to preconditions) version of QUrl::isParentOf().
0051 
0052     if (!filePath.startsWith(dirPath)) {
0053         return false; // dirPath is not a parent directory of filePath
0054     }
0055 
0056     const auto dirPathSize = dirPath.size();
0057     if (filePath.size() == dirPathSize) {
0058         Q_ASSERT(filePath == dirPath);
0059         return false; // dirPath points to the same file as filePath
0060     }
0061     Q_ASSERT(filePath.size() > dirPathSize);
0062     if (filePath.at(dirPathSize) != slash) {
0063         return false; // dirPath is not a parent directory of filePath
0064     }
0065 
0066     // dirPath *is* a parent directory of filePath. Check whether it is within the maxDepth limit.
0067 
0068     if (maxDepth < 0) {
0069         return true; // unlimited depth
0070     }
0071 
0072     int indexOfSlashInUrlPath = 0;
0073     do {
0074         indexOfSlashInUrlPath = filePath.lastIndexOf(slash, indexOfSlashInUrlPath - 1);
0075         Q_ASSERT(indexOfSlashInUrlPath >= dirPathSize); // because dirPath is a parent directory of filePath
0076         if (indexOfSlashInUrlPath == dirPathSize) {
0077             return true;
0078         }
0079     } while (--maxDepth >= 0);
0080     return false;
0081 }
0082 
0083 /**
0084  * Returns a slash plus the path to @p filePath relative to its parent directory @p parentDirectoryPath.
0085  * @param filePath the path to a file.
0086  * @param parentDirectoryPath the path to a parent directory of @p filePath without a trailing slash
0087  *                            (an empty string is the root directory path).
0088  */
0089 QString removeDirectoryPrefix(QString filePath, const QString& parentDirectoryPath)
0090 {
0091     Q_ASSERT(filePath.startsWith(parentDirectoryPath));
0092     Q_ASSERT(filePath.size() > parentDirectoryPath.size());
0093     Q_ASSERT(filePath.at(parentDirectoryPath.size()) == QLatin1Char{'/'});
0094     filePath.remove(0, parentDirectoryPath.size());
0095     return filePath;
0096 }
0097 
0098 class FileFinder
0099 {
0100 public:
0101     explicit FileFinder(const QStringList& include, const QStringList& exclude,
0102                         const std::atomic<bool>& abort)
0103         : m_include{include}
0104         , m_exclude{exclude}
0105         , m_abort{abort}
0106     {}
0107 
0108     // dir or dirPath argument can be a path to a file, not a directory. In this case neither
0109     // depth nor Files filter nor Exclude filter prevent searching within the file search location.
0110     // This allows the user to specify the list of files to search in
0111     // without bothering to adjust filters to match all file paths in the list.
0112 
0113     void getProjectFiles(const QSet<IndexedString>& projectFileSet,
0114                          const QUrl& dir, int depth, QList<QUrl>& results);
0115     void findFiles(const QString& dirPath, int depth, QList<QUrl>& results);
0116 
0117 private:
0118     void findFilesCanonical(const QString& canonicalDirPath, const QString& pathRelativeToSearchLocation, int depth,
0119                             QList<QUrl>& results);
0120 
0121     bool shouldAbort() const { return m_abort.load(std::memory_order_relaxed); }
0122 
0123     const QStringList& m_include;
0124     /**
0125      * @note Matching an absolute path against the Exclude filter excludes the entire current search
0126      * location if it happens to be within a higher-level directory, whose name matches the Exclude filter.
0127      * Therefore, paths relative to current search location are matched against the Exclude filter.
0128      * In order to match Exclude patterns such as /build/ and /.git/ all such relative paths must
0129      * start with a slash, and relative paths to directories must also end with a slash.
0130      */
0131     const QStringList& m_exclude;
0132     const std::atomic<bool>& m_abort;
0133 };
0134 
0135 void FileFinder::getProjectFiles(const QSet<IndexedString>& projectFileSet,
0136                                  const QUrl& dir, int depth, QList<QUrl>& results)
0137 {
0138     // Cannot use dir.adjusted(QUrl::StripTrailingSlash) here, because it does not
0139     // remove the single slash of the root directory. Both isInDirectory() and
0140     // removeDirectoryPrefix() require the empty-string representation of the root directory.
0141     const auto dirPath = removeTrailingSlashes(dir.path());
0142 
0143     for (const IndexedString& item : projectFileSet) {
0144         if (shouldAbort()) {
0145             break;
0146         }
0147         QUrl url = item.toUrl();
0148         // The scheme and authority of url match those of dir, because
0149         // both belong to a common project (see getProjectFileSets() below).
0150         auto urlPath = url.path();
0151 
0152         if (urlPath == dirPath) {
0153             // Do not match against filters, because this particular URL is a search location.
0154             results.push_back(std::move(url));
0155             continue;
0156         }
0157 
0158         if (!isInDirectory(urlPath, dirPath, depth)) {
0159             continue;
0160         }
0161 
0162         const auto urlFileName = urlPath.mid(urlPath.lastIndexOf(QLatin1Char{'/'}) + 1);
0163         if (!QDir::match(m_include, urlFileName)) {
0164             continue;
0165         }
0166 
0167         const auto relativeFilePath = removeDirectoryPrefix(std::move(urlPath), dirPath);
0168         if (!WildcardHelpers::match(m_exclude, relativeFilePath)) {
0169             results.push_back(std::move(url));
0170         }
0171     }
0172 }
0173 
0174 void FileFinder::findFiles(const QString& dirPath, int depth, QList<QUrl>& results)
0175 {
0176     const QFileInfo info(dirPath);
0177     const auto canonicalFilePath = info.canonicalFilePath();
0178     if (info.isDir()) {
0179         findFilesCanonical(canonicalFilePath, QStringLiteral("/"), depth, results);
0180         return;
0181     }
0182 
0183     // Search in the single file at canonicalFilePath.
0184     // canonicalFilePath is empty if the file does not exist.
0185     if (!canonicalFilePath.isEmpty()) {
0186         // Do not match against filters, because this particular file path is a search location.
0187         results.push_back(QUrl::fromLocalFile(canonicalFilePath));
0188     }
0189 }
0190 
0191 void FileFinder::findFilesCanonical(const QString& canonicalDirPath, const QString& pathRelativeToSearchLocation,
0192                                     int depth, QList<QUrl>& results)
0193 {
0194     constexpr QLatin1Char slash{'/'};
0195     constexpr QDir::Filters entryFilter = QDir::NoSymLinks | QDir::NoDotAndDotDot | QDir::Readable | QDir::Hidden;
0196 
0197     Q_ASSERT(pathRelativeToSearchLocation.startsWith(slash));
0198     Q_ASSERT(pathRelativeToSearchLocation.endsWith(slash));
0199 
0200     for (QDirIterator it(canonicalDirPath, m_include, QDir::Files | entryFilter); it.hasNext();) {
0201         const auto filePath = it.next();
0202         const QString relativeFilePath = pathRelativeToSearchLocation + it.fileName();
0203         if (!WildcardHelpers::match(m_exclude, relativeFilePath)) {
0204             results.push_back(QUrl::fromLocalFile(filePath));
0205         }
0206     }
0207 
0208     if(depth != 0)
0209     {
0210         if (depth > 0) {
0211             --depth;
0212         }
0213 
0214         for (QDirIterator it(canonicalDirPath, QStringList{}, QDir::AllDirs | entryFilter); it.hasNext();) {
0215             if (shouldAbort()) {
0216                 break;
0217             }
0218             const auto dirPath = it.next();
0219             const QString relativeDirPath = pathRelativeToSearchLocation + it.fileName() + slash;
0220             if (!WildcardHelpers::match(m_exclude, relativeDirPath)) {
0221                 findFilesCanonical(dirPath, relativeDirPath, depth, results);
0222             }
0223         }
0224     }
0225 }
0226 
0227 using FileSetCollection = std::queue<QSet<IndexedString>>;
0228 
0229 FileSetCollection getProjectFileSets(const QList<QUrl>& dirs)
0230 {
0231     FileSetCollection fileSets;
0232     for (const QUrl& dir : dirs) {
0233         const auto* const project = KDevelop::ICore::self()->projectController()->findProjectForUrl(dir);
0234         // Store an empty file set when project==nullptr because each element
0235         // of fileSets must correspond to an element of dirs at the same index.
0236         fileSets.push(project ? project->fileSet() : FileSetCollection::value_type{});
0237     }
0238     return fileSets;
0239 }
0240 
0241 } // namespace
0242 
0243 class GrepFindFilesThreadPrivate
0244 {
0245 public:
0246     const QList<QUrl> m_startDirs;
0247     FileSetCollection m_projectFileSets;
0248     const QString m_patString;
0249     const QString m_exclString;
0250     const int m_depth;
0251     std::atomic<bool> m_tryAbort;
0252     QList<QUrl> m_files;
0253 };
0254 
0255 GrepFindFilesThread::GrepFindFilesThread(QObject* parent,
0256                                          const QList<QUrl>& startDirs,
0257                                          int depth, const QString& pats,
0258                                          const QString& excl,
0259                                          bool onlyProject)
0260     : QThread(parent)
0261     , d_ptr(new GrepFindFilesThreadPrivate{
0262                 startDirs,
0263                 onlyProject ? getProjectFileSets(startDirs) : FileSetCollection{},
0264                 pats, excl, depth, {false}, {}})
0265 {
0266     setTerminationEnabled(false);
0267 }
0268 
0269 GrepFindFilesThread::~GrepFindFilesThread() = default;
0270 
0271 void GrepFindFilesThread::tryAbort()
0272 {
0273     Q_D(GrepFindFilesThread);
0274 
0275     d->m_tryAbort.store(true, std::memory_order_relaxed);
0276 }
0277 
0278 void GrepFindFilesThread::run()
0279 {
0280     Q_D(GrepFindFilesThread);
0281 
0282     const QStringList include = GrepFindFilesThread::parseInclude(d->m_patString);
0283     const QStringList exclude = GrepFindFilesThread::parseExclude(d->m_exclString);
0284 
0285     qCDebug(PLUGIN_GREPVIEW) << "running with start dir" << d->m_startDirs;
0286 
0287     FileFinder finder(include, exclude, d->m_tryAbort);
0288     // m_projectFileSets contains a project file set for each element of m_startDirs at a
0289     // corresponding index if this search is limited to project files; is empty otherwise.
0290     Q_ASSERT(d->m_projectFileSets.empty() ||
0291                 d->m_projectFileSets.size() == static_cast<std::size_t>(d->m_startDirs.size()));
0292     for (const QUrl& directory : d->m_startDirs) {
0293         if (d->m_projectFileSets.empty()) {
0294             finder.findFiles(directory.toLocalFile(), d->m_depth, d->m_files);
0295         } else {
0296             finder.getProjectFiles(d->m_projectFileSets.front(), directory, d->m_depth, d->m_files);
0297             // Removing the no longer needed file set from the collection as
0298             // soon as possible may save some memory or prevent a copy on write
0299             // if the project's file set is changed during the search.
0300             d->m_projectFileSets.pop();
0301         }
0302     }
0303 }
0304 
0305 QList<QUrl> GrepFindFilesThread::takeFiles()
0306 {
0307     Q_D(GrepFindFilesThread);
0308     Q_ASSERT(isFinished());
0309 
0310     QList<QUrl> tmpList;
0311     d->m_files.swap(tmpList);
0312 
0313     std::sort(tmpList.begin(), tmpList.end());
0314     tmpList.erase(std::unique(tmpList.begin(), tmpList.end()), tmpList.end());
0315     return tmpList;
0316 }
0317 
0318 QStringList GrepFindFilesThread::parseExclude(const QString& excl)
0319 {
0320     QStringList exclude;
0321     // Split around commas or spaces
0322     const auto excludesList = excl.splitRef(QRegExp(QStringLiteral(",|\\s")), Qt::SkipEmptyParts);
0323     exclude.reserve(excludesList.size());
0324     for (const auto& sub : excludesList) {
0325         exclude << QStringLiteral("*%1*").arg(sub);
0326     }
0327     return exclude;
0328 }
0329 
0330 QStringList GrepFindFilesThread::parseInclude(const QString& inc)
0331 {
0332     // Split around commas or spaces
0333     return inc.split(QRegExp(QStringLiteral(",|\\s")), Qt::SkipEmptyParts);
0334 }
0335 
0336 #include "moc_grepfindthread.cpp"