File indexing completed on 2024-04-28 15:17:36

0001 /*
0002     This file is part of the KDE Project
0003     SPDX-FileCopyrightText: 2008-2010 Sebastian Trueg <trueg@kde.org>
0004     SPDX-FileCopyrightText: 2013-2014 Vishesh Handa <me@vhanda.in>
0005     SPDX-FileCopyrightText: 2020 Benjamin Port <benjamin.port@enioka.com>
0006 
0007     SPDX-License-Identifier: LGPL-2.0-or-later
0008 */
0009 
0010 #include "fileindexerconfig.h"
0011 #include "fileexcludefilters.h"
0012 #include "storagedevices.h"
0013 #include "baloodebug.h"
0014 
0015 #include <QStringList>
0016 #include <QDir>
0017 
0018 #include <QStandardPaths>
0019 #include "baloosettings.h"
0020 
0021 namespace
0022 {
0023 QString normalizeTrailingSlashes(QString&& path)
0024 {
0025     while (path.endsWith(QLatin1Char('/'))) {
0026         path.chop(1);
0027     }
0028     path += QLatin1Char('/');
0029     return path;
0030 }
0031 
0032 }
0033 
0034 namespace Baloo
0035 {
0036 
0037 FileIndexerConfig::FileIndexerConfig(QObject* parent)
0038     : QObject(parent)
0039     , m_settings(new BalooSettings(this))
0040     , m_folderCacheDirty(true)
0041     , m_indexHidden(false)
0042     , m_devices(nullptr)
0043     , m_maxUncomittedFiles(40)
0044 {
0045     forceConfigUpdate();
0046 }
0047 
0048 FileIndexerConfig::~FileIndexerConfig()
0049 {
0050 }
0051 
0052 QDebug operator<<(QDebug dbg, const FileIndexerConfig::FolderConfig& entry)
0053 {
0054     QDebugStateSaver saver(dbg);
0055     dbg.nospace() << entry.path << ": "
0056                   << (entry.isIncluded ? "included" : "excluded");
0057     return dbg;
0058 }
0059 
0060 QStringList FileIndexerConfig::includeFolders() const
0061 {
0062     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0063 
0064     QStringList fl;
0065     for (const auto& entry : m_folderCache) {
0066         if (entry.isIncluded) {
0067             fl << entry.path;
0068         }
0069     }
0070     return fl;
0071 }
0072 
0073 QStringList FileIndexerConfig::excludeFolders() const
0074 {
0075     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0076 
0077     QStringList fl;
0078     for (const auto& entry : m_folderCache) {
0079         if (!entry.isIncluded) {
0080             fl << entry.path;
0081         }
0082     }
0083     return fl;
0084 }
0085 
0086 QStringList FileIndexerConfig::excludeFilters() const
0087 {
0088     // read configured exclude filters
0089     QStringList filters = m_settings->excludedFilters();
0090 
0091     // make sure we always keep the latest default exclude filters
0092     // TODO: there is one problem here. What if the user removed some of the default filters?
0093     if (m_settings->excludedFiltersVersion() < defaultExcludeFilterListVersion()) {
0094         filters += defaultExcludeFilterList();
0095         // in case the cfg entry was empty and filters == defaultExcludeFilterList()
0096         filters.removeDuplicates();
0097 
0098         // write the config directly since the KCM does not have support for the version yet
0099         m_settings->setExcludedFilters(filters);
0100         m_settings->setExcludedFiltersVersion(defaultExcludeFilterListVersion());
0101     }
0102 
0103     return filters;
0104 }
0105 
0106 QStringList FileIndexerConfig::excludeMimetypes() const
0107 {
0108     return QList<QString>(m_excludeMimetypes.begin(), m_excludeMimetypes.end());
0109 }
0110 
0111 bool FileIndexerConfig::indexHiddenFilesAndFolders() const
0112 {
0113     return m_indexHidden;
0114 }
0115 
0116 bool FileIndexerConfig::onlyBasicIndexing() const
0117 {
0118     return m_onlyBasicIndexing;
0119 }
0120 
0121 bool FileIndexerConfig::canBeSearched(const QString& folder) const
0122 {
0123     QFileInfo fi(folder);
0124     QString path = fi.absolutePath();
0125     if (!fi.isDir()) {
0126         return false;
0127     } else if (shouldFolderBeIndexed(path)) {
0128         return true;
0129     }
0130 
0131     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0132 
0133     // Look for included descendants
0134     for (const auto& entry : m_folderCache) {
0135         if (entry.isIncluded && entry.path.startsWith(path)) {
0136             return true;
0137         }
0138     }
0139 
0140     return false;
0141 }
0142 
0143 bool FileIndexerConfig::shouldBeIndexed(const QString& path) const
0144 {
0145     QFileInfo fi(path);
0146     if (fi.isDir()) {
0147         return shouldFolderBeIndexed(path);
0148     } else {
0149         return (shouldFolderBeIndexed(fi.absolutePath()) &&
0150                 (!fi.isHidden() || indexHiddenFilesAndFolders()) &&
0151                 shouldFileBeIndexed(fi.fileName()));
0152     }
0153 }
0154 
0155 bool FileIndexerConfig::shouldFolderBeIndexed(const QString& path) const
0156 {
0157     QString folder;
0158     auto normalizedPath = normalizeTrailingSlashes(QString(path));
0159 
0160     if (folderInFolderList(normalizedPath, folder)) {
0161         // we always index the folders in the list
0162         // ignoring the name filters
0163         if (folder == normalizedPath) {
0164             return true;
0165         }
0166 
0167         // check the exclude filters for all components of the path
0168         // after folder
0169 #ifndef __unix__
0170         QDir d(folder);
0171 #endif
0172 
0173 #if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
0174         const QStringView trailingPath = QStringView(normalizedPath).mid(folder.size());
0175 #else
0176         const auto trailingPath = normalizedPath.midRef(folder.size());
0177 #endif
0178         const auto pathComponents = trailingPath.split(QLatin1Char('/'), Qt::SkipEmptyParts);
0179         for (const auto &c : pathComponents) {
0180             if (!shouldFileBeIndexed(c.toString())) {
0181                 return false;
0182             }
0183 #ifndef __unix__
0184             if (!indexHiddenFilesAndFolders() ||
0185                 !d.cd(c.toString()) || QFileInfo(d.path()).isHidden()) {
0186                 return false;
0187             }
0188 #endif
0189         }
0190         return true;
0191     }
0192 
0193     return false;
0194 }
0195 
0196 bool FileIndexerConfig::shouldFileBeIndexed(const QString& fileName) const
0197 {
0198     if (!indexHiddenFilesAndFolders() && fileName.startsWith(QLatin1Char('.'))) {
0199         return false;
0200     }
0201     return !m_excludeFilterRegExpCache.exactMatch(fileName);
0202 }
0203 
0204 bool FileIndexerConfig::shouldMimeTypeBeIndexed(const QString& mimeType) const
0205 {
0206     return !m_excludeMimetypes.contains(mimeType);
0207 }
0208 
0209 bool FileIndexerConfig::folderInFolderList(const QString& path, QString& folder) const
0210 {
0211     const_cast<FileIndexerConfig*>(this)->buildFolderCache();
0212 
0213     const QString p = normalizeTrailingSlashes(QString(path));
0214 
0215     for (const auto& entry : m_folderCache) {
0216         const QString& f = entry.path;
0217         if (p.startsWith(f)) {
0218             folder = f;
0219             return entry.isIncluded;
0220         }
0221     }
0222     // path is not in the list, thus it should not be included
0223     folder.clear();
0224     return false;
0225 }
0226 
0227 void FileIndexerConfig::FolderCache::cleanup()
0228 {
0229     // TODO There are two cases where "redundant" includes
0230     // should be kept:
0231     // 1. when the "tail" matches a path exclude filter
0232     //    (m_excludeFilterRegexpCache)
0233     // 2. when the explicitly adds a hidden directory, and
0234     //    we want to index hidden dirs (m_indexHidden)
0235     bool keepAllIncluded = true;
0236 
0237     auto entry = begin();
0238     while (entry != end()) {
0239         if ((*entry).isIncluded && keepAllIncluded) {
0240             ++entry;
0241             continue;
0242         }
0243 
0244         const QString entryPath = (*entry).path;
0245         auto start = entry; ++start;
0246         auto parent = std::find_if(start, end(),
0247             [&entryPath](const FolderConfig& _parent) {
0248                 return entryPath.startsWith(_parent.path);
0249             });
0250 
0251         if (parent != end()) {
0252             if ((*entry).isIncluded == (*parent).isIncluded) {
0253                 // remove identical config
0254                 entry = erase(entry);
0255             } else {
0256                 ++entry;
0257             }
0258         } else {
0259             if (!(*entry).isIncluded) {
0260                 // remove excluded a topmost level (default)
0261                 entry = erase(entry);
0262             } else {
0263                 ++entry;
0264             }
0265         }
0266     }
0267 }
0268 
0269 bool FileIndexerConfig::FolderConfig::operator<(const FolderConfig& other) const
0270 {
0271     return path.size() > other.path.size() ||
0272         (path.size() == other.path.size() && path < other.path);
0273 }
0274 
0275 bool FileIndexerConfig::FolderCache::addFolderConfig(const FolderConfig& config)
0276 {
0277     if (config.path.isEmpty()) {
0278         qCDebug(BALOO) << "Trying to add folder config entry with empty path";
0279         return false;
0280     }
0281     auto newConfig{config};
0282     newConfig.path = QDir::cleanPath(config.path) + QLatin1Char('/');
0283 
0284     auto it = std::lower_bound(cbegin(), cend(), newConfig);
0285     if (it != cend() && (*it).path == newConfig.path) {
0286         qCDebug(BALOO) << "Folder config entry for" << newConfig.path << "already exists";
0287         return false;
0288     }
0289 
0290     it = insert(it, newConfig);
0291     return true;
0292 }
0293 
0294 void FileIndexerConfig::buildFolderCache()
0295 {
0296     if (!m_folderCacheDirty) {
0297         return;
0298     }
0299 
0300     if (!m_devices) {
0301         m_devices = new StorageDevices(this);
0302     }
0303 
0304     FolderCache cache;
0305 
0306     const QStringList includeFolders = m_settings->folders();
0307     for (const auto& folder : includeFolders) {
0308         if (!cache.addFolderConfig({folder, true})) {
0309             qCWarning(BALOO) << "Failed to add include folder config entry for" << folder;
0310         }
0311     }
0312 
0313     const QStringList excludeFolders = m_settings->excludedFolders();
0314     for (const auto& folder : excludeFolders) {
0315         if (!cache.addFolderConfig({folder, false})) {
0316             qCWarning(BALOO) << "Failed to add exclude folder config entry for" << folder;
0317         }
0318     }
0319 
0320     // Add all removable media and network shares as ignored unless they have
0321     // been explicitly added in the include list
0322     const auto allMedia = m_devices->allMedia();
0323     for (const auto& device: allMedia) {
0324         const QString mountPath = device.mountPath();
0325         if (!device.isUsable() && !mountPath.isEmpty()) {
0326             if (!includeFolders.contains(mountPath)) {
0327                 cache.addFolderConfig({mountPath, false});
0328             }
0329         }
0330     }
0331 
0332     cache.cleanup();
0333     qCDebug(BALOO) << "Folder cache:" << cache;
0334     m_folderCache = cache;
0335 
0336     m_folderCacheDirty = false;
0337 }
0338 
0339 void FileIndexerConfig::buildExcludeFilterRegExpCache()
0340 {
0341     QStringList newFilters = excludeFilters();
0342     m_excludeFilterRegExpCache.rebuildCacheFromFilterList(newFilters);
0343 }
0344 
0345 void FileIndexerConfig::buildMimeTypeCache()
0346 {
0347     const QStringList excludedTypes = m_settings->excludedMimetypes();
0348     m_excludeMimetypes = QSet<QString>(excludedTypes.begin(), excludedTypes.end());
0349 }
0350 
0351 void FileIndexerConfig::forceConfigUpdate()
0352 {
0353     m_settings->load();
0354 
0355     m_folderCacheDirty = true;
0356     buildExcludeFilterRegExpCache();
0357     buildMimeTypeCache();
0358 
0359     m_indexHidden = m_settings->indexHiddenFolders();
0360     m_onlyBasicIndexing = m_settings->onlyBasicIndexing();
0361 }
0362 
0363 int FileIndexerConfig::databaseVersion() const
0364 {
0365     return m_settings->dbVersion();
0366 }
0367 
0368 void FileIndexerConfig::setDatabaseVersion(int version)
0369 {
0370     m_settings->setDbVersion(version);
0371     m_settings->save();
0372 }
0373 
0374 bool FileIndexerConfig::indexingEnabled() const
0375 {
0376     return m_settings->indexingEnabled();
0377 }
0378 
0379 uint FileIndexerConfig::maxUncomittedFiles() const
0380 {
0381     return m_maxUncomittedFiles;
0382 }
0383 
0384 } // namespace Baloo
0385 
0386 #include "moc_fileindexerconfig.cpp"