File indexing completed on 2024-04-14 03:49:45

0001 /*
0002     This file is part of the KDE Project
0003     SPDX-FileCopyrightText: 2008-2010 Sebastian Trueg <trueg@kde.org>
0004     SPDX-FileCopyrightText: 2013-2014 Vishesh Handa <me@vhanda.in>
0005     SPDX-FileCopyrightText: 2020 Benjamin Port <benjamin.port@enioka.com>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 
0010 #include "fileindexerconfig.h"
0011 #include "fileexcludefilters.h"
0012 #include "storagedevices.h"
0013 #include "baloodebug.h"
0014 
0015 #include <QStringList>
0016 #include <QDir>
0017 
0018 #include <QStandardPaths>
0019 #include "baloosettings.h"
0020 
0021 namespace
0022 {
0023 QString normalizeTrailingSlashes(QString&& path)
0024 {
0025     while (path.endsWith(QLatin1Char('/'))) {
0026         path.chop(1);
0027     }
0028     path += QLatin1Char('/');
0029     return path;
0030 }
0031 
0032 }
0033 
0034 namespace Baloo
0035 {
0036 
0037 FileIndexerConfig::FileIndexerConfig(QObject* parent)
0038     : QObject(parent)
0039     , m_settings(new BalooSettings(this))
0040     , m_folderCacheDirty(true)
0041     , m_indexHidden(false)
0042     , m_devices(nullptr)
0043     , m_maxUncomittedFiles(40)
0044 {
0045     forceConfigUpdate();
0046 }
0047 
0048 FileIndexerConfig::~FileIndexerConfig()
0049 {
0050 }
0051 
0052 QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderConfig& entry)
0053 {
0054     QDebugStateSaver saver(dbg);
0055     dbg.nospace() << entry.path << ": "
0056                   << (entry.isIncluded ? "included" : "excluded");
0057     return dbg;
0058 }
0059 
0060 QStringList FileIndexerConfig::includeFolders() const
0061 {
0062     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0063 
0064     QStringList fl;
0065     for (const auto& entry : m_folderCache) {
0066         if (entry.isIncluded) {
0067             fl << entry.path;
0068         }
0069     }
0070     return fl;
0071 }
0072 
0073 QStringList FileIndexerConfig::excludeFolders() const
0074 {
0075     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0076 
0077     QStringList fl;
0078     for (const auto& entry : m_folderCache) {
0079         if (!entry.isIncluded) {
0080             fl << entry.path;
0081         }
0082     }
0083     return fl;
0084 }
0085 
0086 QStringList FileIndexerConfig::excludeFilters() const
0087 {
0088     // read configured exclude filters
0089     QStringList filters = m_settings->excludedFilters();
0090 
0091     // make sure we always keep the latest default exclude filters
0092     // TODO: there is one problem here. What if the user removed some of the default filters?
0093     if (m_settings->excludedFiltersVersion() < defaultExcludeFilterListVersion()) {
0094         filters += defaultExcludeFilterList();
0095         // in case the cfg entry was empty and filters == defaultExcludeFilterList()
0096         filters.removeDuplicates();
0097 
0098         // write the config directly since the KCM does not have support for the version yet
0099         m_settings->setExcludedFilters(filters);
0100         m_settings->setExcludedFiltersVersion(defaultExcludeFilterListVersion());
0101     }
0102 
0103     return filters;
0104 }
0105 
0106 QStringList FileIndexerConfig::excludeMimetypes() const
0107 {
0108     return QList<QString>(m_excludeMimetypes.begin(), m_excludeMimetypes.end());
0109 }
0110 
0111 bool FileIndexerConfig::indexHiddenFilesAndFolders() const
0112 {
0113     return m_indexHidden;
0114 }
0115 
0116 bool FileIndexerConfig::onlyBasicIndexing() const
0117 {
0118     return m_onlyBasicIndexing;
0119 }
0120 
0121 bool FileIndexerConfig::canBeSearched(const QString& folder) const
0122 {
0123     QFileInfo fi(folder);
0124     QString path = fi.absolutePath();
0125     if (!fi.isDir()) {
0126         return false;
0127     } else if (shouldFolderBeIndexed(path)) {
0128         return true;
0129     }
0130 
0131     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0132 
0133     // Look for included descendants
0134     for (const auto& entry : m_folderCache) {
0135         if (entry.isIncluded && entry.path.startsWith(path)) {
0136             return true;
0137         }
0138     }
0139 
0140     return false;
0141 }
0142 
0143 bool FileIndexerConfig::shouldBeIndexed(const QString& path) const
0144 {
0145     QFileInfo fi(path);
0146     if (fi.isDir()) {
0147         return shouldFolderBeIndexed(path);
0148     } else {
0149         return (shouldFolderBeIndexed(fi.absolutePath()) &&
0150                 (!fi.isHidden() || indexHiddenFilesAndFolders()) &&
0151                 shouldFileBeIndexed(fi.fileName()));
0152     }
0153 }
0154 
0155 bool FileIndexerConfig::shouldFolderBeIndexed(const QString& path) const
0156 {
0157     QString folder;
0158     auto normalizedPath = normalizeTrailingSlashes(QString(path));
0159 
0160     if (folderInFolderList(normalizedPath, folder)) {
0161         // we always index the folders in the list
0162         // ignoring the name filters
0163         if (folder == normalizedPath) {
0164             return true;
0165         }
0166 
0167         // check the exclude filters for all components of the path
0168         // after folder
0169 #ifndef __unix__
0170         QDir d(folder);
0171 #endif
0172 
0173         const QStringView trailingPath = QStringView(normalizedPath).mid(folder.size());
0174         const auto pathComponents = trailingPath.split(QLatin1Char('/'), Qt::SkipEmptyParts);
0175         for (const auto &c : pathComponents) {
0176             if (!shouldFileBeIndexed(c.toString())) {
0177                 return false;
0178             }
0179 #ifndef __unix__
0180             if (!indexHiddenFilesAndFolders() ||
0181                 !d.cd(c.toString()) || QFileInfo(d.path()).isHidden()) {
0182                 return false;
0183             }
0184 #endif
0185         }
0186         return true;
0187     }
0188 
0189     return false;
0190 }
0191 
0192 bool FileIndexerConfig::shouldFileBeIndexed(const QString& fileName) const
0193 {
0194     if (!indexHiddenFilesAndFolders() && fileName.startsWith(QLatin1Char('.'))) {
0195         return false;
0196     }
0197     return !m_excludeFilterRegExpCache.exactMatch(fileName);
0198 }
0199 
0200 bool FileIndexerConfig::shouldMimeTypeBeIndexed(const QString& mimeType) const
0201 {
0202     return !m_excludeMimetypes.contains(mimeType);
0203 }
0204 
0205 bool FileIndexerConfig::folderInFolderList(const QString& path, QString& folder) const
0206 {
0207     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0208 
0209     const QString p = normalizeTrailingSlashes(QString(path));
0210 
0211     for (const auto& entry : m_folderCache) {
0212         const QString& f = entry.path;
0213         if (p.startsWith(f)) {
0214             folder = f;
0215             return entry.isIncluded;
0216         }
0217     }
0218     // path is not in the list, thus it should not be included
0219     folder.clear();
0220     return false;
0221 }
0222 
0223 void FileIndexerConfig::FolderCache::cleanup()
0224 {
0225     // TODO There are two cases where "redundant" includes
0226     // should be kept:
0227     // 1. when the "tail" matches a path exclude filter
0228     //    (m_excludeFilterRegexpCache)
0229     // 2. when the explicitly adds a hidden directory, and
0230     //    we want to index hidden dirs (m_indexHidden)
0231     bool keepAllIncluded = true;
0232 
0233     auto entry = begin();
0234     while (entry != end()) {
0235         if ((*entry).isIncluded && keepAllIncluded) {
0236             ++entry;
0237             continue;
0238         }
0239 
0240         const QString entryPath = (*entry).path;
0241         auto start = entry; ++start;
0242         auto parent = std::find_if(start, end(),
0243             [&entryPath](const FolderConfig& _parent) {
0244                 return entryPath.startsWith(_parent.path);
0245             });
0246 
0247         if (parent != end()) {
0248             if ((*entry).isIncluded == (*parent).isIncluded) {
0249                 // remove identical config
0250                 entry = erase(entry);
0251             } else {
0252                 ++entry;
0253             }
0254         } else {
0255             if (!(*entry).isIncluded) {
0256                 // remove excluded a topmost level (default)
0257                 entry = erase(entry);
0258             } else {
0259                 ++entry;
0260             }
0261         }
0262     }
0263 }
0264 
0265 bool FileIndexerConfig::FolderConfig::operator<(const FolderConfig& other) const
0266 {
0267     return path.size() > other.path.size() ||
0268         (path.size() == other.path.size() && path < other.path);
0269 }
0270 
0271 bool FileIndexerConfig::FolderCache::addFolderConfig(const FolderConfig& config)
0272 {
0273     if (config.path.isEmpty()) {
0274         qCDebug(BALOO) << "Trying to add folder config entry with empty path";
0275         return false;
0276     }
0277     auto newConfig{config};
0278     newConfig.path = QDir::cleanPath(config.path) + QLatin1Char('/');
0279 
0280     auto it = std::lower_bound(cbegin(), cend(), newConfig);
0281     if (it != cend() && (*it).path == newConfig.path) {
0282         qCDebug(BALOO) << "Folder config entry for" << newConfig.path << "already exists";
0283         return false;
0284     }
0285 
0286     it = insert(it, newConfig);
0287     return true;
0288 }
0289 
0290 void FileIndexerConfig::buildFolderCache()
0291 {
0292     if (!m_folderCacheDirty) {
0293         return;
0294     }
0295 
0296     if (!m_devices) {
0297         m_devices = new StorageDevices(this);
0298     }
0299 
0300     FolderCache cache;
0301 
0302     const QStringList includeFolders = m_settings->folders();
0303     for (const auto& folder : includeFolders) {
0304         if (!cache.addFolderConfig({folder, true})) {
0305             qCWarning(BALOO) << "Failed to add include folder config entry for" << folder;
0306         }
0307     }
0308 
0309     const QStringList excludeFolders = m_settings->excludedFolders();
0310     for (const auto& folder : excludeFolders) {
0311         if (!cache.addFolderConfig({folder, false})) {
0312             qCWarning(BALOO) << "Failed to add exclude folder config entry for" << folder;
0313         }
0314     }
0315 
0316     // Add all removable media and network shares as ignored unless they have
0317     // been explicitly added in the include list
0318     const auto allMedia = m_devices->allMedia();
0319     for (const auto& device: allMedia) {
0320         const QString mountPath = device.mountPath();
0321         if (!device.isUsable() && !mountPath.isEmpty()) {
0322             if (!includeFolders.contains(mountPath)) {
0323                 cache.addFolderConfig({mountPath, false});
0324             }
0325         }
0326     }
0327 
0328     cache.cleanup();
0329     qCDebug(BALOO) << "Folder cache:" << cache;
0330     m_folderCache = cache;
0331 
0332     m_folderCacheDirty = false;
0333 }
0334 
0335 void FileIndexerConfig::buildExcludeFilterRegExpCache()
0336 {
0337     QStringList newFilters = excludeFilters();
0338     m_excludeFilterRegExpCache.rebuildCacheFromFilterList(newFilters);
0339 }
0340 
0341 void FileIndexerConfig::buildMimeTypeCache()
0342 {
0343     const QStringList excludedTypes = m_settings->excludedMimetypes();
0344     m_excludeMimetypes = QSet<QString>(excludedTypes.begin(), excludedTypes.end());
0345 }
0346 
0347 void FileIndexerConfig::forceConfigUpdate()
0348 {
0349     m_settings->load();
0350 
0351     m_folderCacheDirty = true;
0352     buildExcludeFilterRegExpCache();
0353     buildMimeTypeCache();
0354 
0355     m_indexHidden = m_settings->indexHiddenFolders();
0356     m_onlyBasicIndexing = m_settings->onlyBasicIndexing();
0357 }
0358 
0359 int FileIndexerConfig::databaseVersion() const
0360 {
0361     return m_settings->dbVersion();
0362 }
0363 
0364 void FileIndexerConfig::setDatabaseVersion(int version)
0365 {
0366     m_settings->setDbVersion(version);
0367     m_settings->save();
0368 }
0369 
0370 bool FileIndexerConfig::indexingEnabled() const
0371 {
0372     return m_settings->indexingEnabled();
0373 }
0374 
0375 uint FileIndexerConfig::maxUncomittedFiles() const
0376 {
0377     return m_maxUncomittedFiles;
0378 }
0379 
0380 } // namespace Baloo
0381 
0382 #include "moc_fileindexerconfig.cpp"