File indexing completed on 2024-04-28 03:51:42
0001 /* 0002 This file is part of the KDE Baloo project. 0003 SPDX-FileCopyrightText: 2018 Michael Heidelbach <ottwolt@gmail.com> 0004 0005 SPDX-License-Identifier: GPL-2.0-only OR GPL-3.0-only OR LicenseRef-KDE-Accepted-GPL 0006 */ 0007 0008 #include "databasesanitizer.h" 0009 #include "documenturldb.h" 0010 #include "idutils.h" 0011 0012 #include <sys/sysmacros.h> 0013 0014 #include <KLocalizedString> 0015 #include <QFileInfo> 0016 #include <QStorageInfo> 0017 #include <QDebug> 0018 0019 namespace Baloo 0020 { 0021 0022 class DatabaseSanitizerImpl { 0023 public: 0024 DatabaseSanitizerImpl(const Database& db, Transaction::TransactionType type) 0025 : m_transaction(new Transaction(db, type)) 0026 { 0027 } 0028 0029 public: 0030 0031 /** 0032 * \brief Basic info about database items 0033 */ 0034 struct FileInfo { 0035 quint32 deviceId = 0; 0036 quint32 inode = 0; 0037 quint64 id = 0; 0038 bool isSymLink = false; 0039 bool accessible = true; 0040 QString url; 0041 }; 0042 0043 void printProgress(QTextStream& out, uint& cur, const uint max, const uint step) const 0044 { 0045 if (cur % step == 0) { 0046 out << QStringLiteral("%1%2\r").arg(100 * cur / max, 6).arg("%", -16); 0047 out.flush(); 0048 } 0049 cur++; 0050 } 0051 0052 /** 0053 * Summary of createList() actions 0054 */ 0055 struct Summary { 0056 quint64 total = 0; ///Count of all files 0057 quint64 ignored = 0; ///Count of filtered out files 0058 quint64 accessible = 0; ///Count of checked and accessible files 0059 }; 0060 /** 0061 * Create a list of \a FileInfo items. 0062 * 0063 * \p deviceIDs filter by device ids. If the vector is empty no filtering is done 0064 * and every item is collected. 0065 * Positive numbers are including filters collecting only the mentioned device ids. 0066 * Negative numbers are excluding filters collecting everything but the mentioned device ids. 0067 * 0068 * \p accessFilter Flags to filter items by accessibility. 0069 * 0070 * \p urlFilter Filter result urls. Default is null = Collect everything. 0071 */ 0072 QPair<QVector<FileInfo>, Summary> createList( 0073 const QVector<qint64>& deviceIds, 0074 const DatabaseSanitizer::ItemAccessFilters accessFilter, 0075 const QSharedPointer<QRegularExpression>& urlFilter 0076 ) const 0077 { 0078 Q_ASSERT(m_transaction); 0079 0080 const auto docUrlDb = DocumentUrlDB(m_transaction->m_dbis.idTreeDbi, 0081 m_transaction->m_dbis.idFilenameDbi, 0082 m_transaction->m_txn); 0083 const auto map = docUrlDb.toTestMap(); 0084 const auto keys = map.keys(); 0085 QVector<FileInfo> result; 0086 uint max = map.size(); 0087 uint i = 0; 0088 result.reserve(max); 0089 QVector<quint32> includeIds; 0090 QVector<quint32> excludeIds; 0091 for (qint64 deviceId : deviceIds) { 0092 if (deviceId > 0) { 0093 includeIds.append(deviceId); 0094 } else if (deviceId < 0) { 0095 excludeIds.append(-deviceId); 0096 } 0097 } 0098 Summary summary; 0099 summary.total = max; 0100 summary.ignored = max; 0101 QTextStream err(stderr); 0102 0103 for (auto it = map.constBegin(), end = map.constEnd(); it != end; it++) { 0104 printProgress(err, i, max, 100); 0105 const quint64 id = it.key(); 0106 const quint32 deviceId = idToDeviceId(id); 0107 if (!includeIds.isEmpty() && !includeIds.contains(deviceId)) { 0108 continue; 0109 } else if (excludeIds.contains(deviceId)) { 0110 continue; 0111 } else if (urlFilter && !urlFilter->match(it.value()).hasMatch()) { 0112 continue; 0113 } 0114 0115 FileInfo info; 0116 info.deviceId = deviceId; 0117 info.inode = idToInode(id); 0118 info.url = QFile::decodeName(it.value()); 0119 info.id = id; 0120 QFileInfo fileInfo(info.url); 0121 info.accessible = !info.url.isEmpty() && fileInfo.exists(); 0122 0123 if (info.accessible && (accessFilter & DatabaseSanitizer::IgnoreAvailable)) { 0124 continue; 0125 } else if (!info.accessible && (accessFilter & DatabaseSanitizer::IgnoreUnavailable)) { 0126 continue; 0127 } 0128 0129 info.isSymLink = fileInfo.isSymLink(); 0130 0131 result.append(info); 0132 summary.ignored--; 0133 if (info.accessible) { 0134 summary.accessible++; 0135 } 0136 } 0137 return {result, summary}; 0138 } 0139 0140 QStorageInfo getStorageInfo(const quint32 id) { 0141 static QMap<quint32, QStorageInfo> storageInfos = []() { 0142 QMap<quint32, QStorageInfo> result; 0143 const auto volumes = QStorageInfo::mountedVolumes(); 0144 for (const auto& vol : volumes) { 0145 const QByteArray rootPath = QFile::encodeName(vol.rootPath()); 0146 const auto id = filePathToId(rootPath); 0147 const quint32 deviceId = idToDeviceId(id); 0148 // qDebug() << vol; 0149 result[deviceId] = vol; 0150 } 0151 return result; 0152 }(); 0153 0154 QStorageInfo info = storageInfos.value(id); 0155 return info; 0156 } 0157 0158 QMap<quint32, bool> deviceFilters(QVector<FileInfo>& infos, const DatabaseSanitizer::ItemAccessFilters accessFilter) 0159 { 0160 QMap<quint32, bool> result; 0161 for (const auto& info : infos) { 0162 result[info.deviceId] = false; 0163 } 0164 0165 for (auto it = result.begin(), end = result.end(); it != end; it++) { 0166 const auto storageInfo = getStorageInfo(it.key()); 0167 it.value() = isIgnored(storageInfo, accessFilter); 0168 } 0169 return result; 0170 } 0171 0172 bool isIgnored(const QStorageInfo& storageInfo, const DatabaseSanitizer::ItemAccessFilters accessFilter) 0173 { 0174 const bool mounted = storageInfo.isValid(); 0175 if (mounted && (accessFilter & DatabaseSanitizer::IgnoreMounted)) { 0176 return true; 0177 } else if (!mounted && (accessFilter & DatabaseSanitizer::IgnoreUnmounted)) { 0178 return true; 0179 } 0180 0181 if (storageInfo.fileSystemType() == QLatin1String("tmpfs")) { 0182 // Due to the volatility of device ids, an id known by baloo may 0183 // appear as mounted, but is not what baloo expects. 0184 // For example at indexing time 43 was the id of a smb share, but 0185 // at runtime 43 is the id of /run/media/<uid> when other users are 0186 // logged in. The latter have a type of 'tmpfs' and should be ignored. 0187 return true; 0188 } 0189 0190 return false; 0191 } 0192 0193 void removeDocument(const quint64 id) { 0194 m_transaction->removeDocument(id); 0195 } 0196 0197 void commit() { 0198 m_transaction->commit(); 0199 } 0200 0201 void abort() { 0202 m_transaction->abort(); 0203 } 0204 0205 private: 0206 Transaction* m_transaction; 0207 }; 0208 } 0209 0210 using namespace Baloo; 0211 0212 DatabaseSanitizer::DatabaseSanitizer(const Database& db, Baloo::Transaction::TransactionType type) 0213 : m_pimpl(new DatabaseSanitizerImpl(db, type)) 0214 { 0215 } 0216 0217 DatabaseSanitizer::DatabaseSanitizer(Database* db, Transaction::TransactionType type) 0218 : DatabaseSanitizer(*db, type) 0219 { 0220 } 0221 0222 DatabaseSanitizer::~DatabaseSanitizer() 0223 { 0224 delete m_pimpl; 0225 m_pimpl = nullptr; 0226 } 0227 0228 /** 0229 * Create a list of \a FileInfo items and print it to stdout. 0230 * 0231 * \p deviceIDs filter by device ids. If the vector is empty no filtering is done 0232 * and everything is printed. 0233 * Positive numbers are including filters printing only the mentioned device ids. 0234 * Negative numbers are excluding filters printing everything but the mentioned device ids. 0235 * 0236 * \p missingOnly Simulate purging operation. Only inaccessible items are printed. 0237 * 0238 * \p urlFilter Filter result urls. Default is null = Print everything. 0239 */ 0240 void DatabaseSanitizer::printList( 0241 const QVector<qint64>& deviceIds, 0242 const ItemAccessFilters accessFilter, 0243 const QSharedPointer<QRegularExpression>& urlFilter) 0244 { 0245 auto listResult = m_pimpl->createList(deviceIds, accessFilter, urlFilter); 0246 const auto sep = QLatin1Char(' '); 0247 QTextStream out(stdout); 0248 QTextStream err(stderr); 0249 for (const auto& info: listResult.first) { 0250 out << QStringLiteral("%1").arg(info.accessible ? "+" : "!") 0251 << sep << QStringLiteral("device: %1").arg(info.deviceId) 0252 << sep << QStringLiteral("inode: %1").arg(info.inode) 0253 << sep << QStringLiteral("url: %1").arg(info.url) 0254 << endl; 0255 } 0256 0257 const auto& summary = listResult.second; 0258 if (accessFilter & IgnoreAvailable) { 0259 err << i18n("Total: %1, Inaccessible: %2", 0260 summary.total, 0261 summary.total - (summary.ignored + summary.accessible)) << endl; 0262 } else { 0263 err << i18n("Total: %1, Ignored: %2, Accessible: %3, Inaccessible: %4", 0264 summary.total, 0265 summary.ignored, 0266 summary.accessible, 0267 summary.total - (summary.ignored + summary.accessible)) << endl; 0268 } 0269 } 0270 0271 void DatabaseSanitizer::printDevices(const QVector<qint64>& deviceIds, const ItemAccessFilters accessFilter) 0272 { 0273 auto infos = m_pimpl->createList(deviceIds, accessFilter, nullptr); 0274 0275 QMap<quint32, quint64> useCount; 0276 for (const auto& info : infos.first) { 0277 useCount[info.deviceId]++; 0278 } 0279 0280 const auto sep = QLatin1Char(' '); 0281 QTextStream out(stdout); 0282 QTextStream err(stderr); 0283 int matchCount = 0; 0284 for (auto it = useCount.cbegin(); it != useCount.cend(); it++) { 0285 auto id = it.key(); 0286 auto info = m_pimpl->getStorageInfo(id); 0287 auto mounted = info.isValid(); 0288 if (info.fileSystemType() == QLatin1String("tmpfs")) { 0289 continue; 0290 } else if (mounted && (accessFilter & IgnoreMounted)) { 0291 continue; 0292 } else if (!mounted && (accessFilter & IgnoreUnmounted)) { 0293 continue; 0294 } 0295 matchCount++; 0296 // TODO coloring would be nice, but "...|grep '^!'" does not work with it. 0297 // out << QStringLiteral("%1").arg(dev.mounted ? "+" : "\033[1;31m!") 0298 // Can be done, see: https://code.qt.io/cgit/qt/qtbase.git/tree/src/corelib/global/qlogging.cpp#n263 0299 out << QStringLiteral("%1").arg(mounted ? "+" : "!") 0300 << sep << QStringLiteral("device:%1").arg(id) 0301 << sep << QStringLiteral("[%1:%2]") 0302 .arg(major(id), 4, 16, QLatin1Char('0')) 0303 .arg(minor(id), 4, 16, QLatin1Char('0')) 0304 << sep << QStringLiteral("indexed-items:%1").arg(it.value()); 0305 0306 if (mounted) { 0307 out 0308 << sep << QStringLiteral("fstype:%1").arg(info.fileSystemType().toPercentEncoding().constData()) 0309 << sep << QStringLiteral("device:%1").arg(info.device().constData()) 0310 << sep << QStringLiteral("path:%1").arg(info.rootPath()) 0311 ; 0312 } 0313 // TODO: see above 0314 // out << QStringLiteral("\033[0m") << endl; 0315 out << endl; 0316 } 0317 0318 err << i18n("Found %1 matching in %2 devices", matchCount, useCount.size()) << endl; 0319 } 0320 0321 void DatabaseSanitizer::removeStaleEntries(const QVector<qint64>& deviceIds, 0322 const DatabaseSanitizer::ItemAccessFilters accessFilter, 0323 const bool dryRun, 0324 const QSharedPointer<QRegularExpression>& urlFilter) 0325 { 0326 auto listResult = m_pimpl->createList(deviceIds, IgnoreAvailable, urlFilter); 0327 0328 const auto ignoredDevices = m_pimpl->deviceFilters(listResult.first, accessFilter); 0329 0330 const auto sep = QLatin1Char(' '); 0331 auto& summary = listResult.second; 0332 QTextStream out(stdout); 0333 QTextStream err(stderr); 0334 for (const auto& info: listResult.first) { 0335 if (ignoredDevices[info.deviceId] == true) { 0336 summary.ignored++; 0337 } else { 0338 if (info.isSymLink) { 0339 out << i18n("IgnoredSymbolicLink:"); 0340 summary.ignored++; 0341 } else { 0342 m_pimpl->removeDocument(info.id); 0343 out << i18n("Removing:"); 0344 } 0345 out << sep << QStringLiteral("device: %1").arg(info.deviceId) 0346 << sep << QStringLiteral("inode: %1").arg(info.inode) 0347 << sep << QStringLiteral("url: %1").arg(info.url) 0348 << endl; 0349 } 0350 } 0351 if (dryRun) { 0352 m_pimpl->abort(); 0353 } else { 0354 m_pimpl->commit(); 0355 } 0356 Q_ASSERT(summary.accessible == 0); 0357 err << i18nc("numbers", "Removed: %1, Total: %2, Ignored: %3", 0358 summary.total - summary.ignored, 0359 summary.total, 0360 summary.ignored) 0361 << endl; 0362 }