File indexing completed on 2025-01-05 04:47:04

0001 /*
0002     SPDX-FileCopyrightText: 2011 Volker Krause <vkrause@kde.org>
0003 
0004     SPDX-License-Identifier: LGPL-2.0-or-later
0005 */
0006 
0007 #include "storagejanitor.h"
0008 #include "agentmanagerinterface.h"
0009 #include "akonadi.h"
0010 #include "akonadiserver_debug.h"
0011 #include "akranges.h"
0012 #include "entities.h"
0013 #include "resourcemanager.h"
0014 #include "search/searchmanager.h"
0015 #include "search/searchrequest.h"
0016 #include "storage/collectionstatistics.h"
0017 #include "storage/datastore.h"
0018 #include "storage/dbconfig.h"
0019 #include "storage/dbtype.h"
0020 #include "storage/query.h"
0021 #include "storage/queryhelper.h"
0022 #include "storage/selectquerybuilder.h"
0023 #include "storage/transaction.h"
0024 
0025 #include "private/dbus_p.h"
0026 #include "private/externalpartstorage_p.h"
0027 #include "private/imapset_p.h"
0028 #include "private/standarddirs_p.h"
0029 
0030 #include <QDateTime>
0031 #include <QDir>
0032 #include <QDirIterator>
0033 #include <QSqlError>
0034 #include <QSqlQuery>
0035 #include <QStringBuilder>
0036 
0037 #include <algorithm>
0038 #include <functional>
0039 #include <qregularexpression.h>
0040 
0041 using namespace Akonadi;
0042 using namespace Akonadi::Server;
0043 using namespace AkRanges;
0044 
0045 class StorageJanitorDataStore : public DataStore
0046 {
0047 public:
0048     StorageJanitorDataStore(AkonadiServer *server, DbConfig *config)
0049         : DataStore(server, config)
0050     {
0051     }
0052 };
0053 
0054 StorageJanitor::StorageJanitor(AkonadiServer *akonadi, DbConfig *dbConfig)
0055     : AkThread(QStringLiteral("StorageJanitor"), QThread::IdlePriority)
0056     , m_lostFoundCollectionId(-1)
0057     , m_akonadi(akonadi)
0058     , m_dbConfig(dbConfig)
0059 {
0060 }
0061 
0062 StorageJanitor::StorageJanitor(DbConfig *dbConfig)
0063     : AkThread(QStringLiteral("StorageJanitor"), AkThread::NoThread)
0064     , m_lostFoundCollectionId(-1)
0065     , m_akonadi(nullptr)
0066     , m_dbConfig(dbConfig)
0067 {
0068     StorageJanitor::init();
0069 }
0070 
0071 StorageJanitor::~StorageJanitor()
0072 {
0073     quitThread();
0074 }
0075 
0076 void StorageJanitor::init()
0077 {
0078     AkThread::init();
0079 
0080     registerTasks();
0081 
0082     m_dataStore = std::make_unique<StorageJanitorDataStore>(m_akonadi, m_dbConfig);
0083     m_dataStore->open();
0084 
0085     QDBusConnection conn = QDBusConnection::sessionBus();
0086     conn.registerService(DBus::serviceName(DBus::StorageJanitor));
0087     conn.registerObject(QStringLiteral(AKONADI_DBUS_STORAGEJANITOR_PATH),
0088                         this,
0089                         QDBusConnection::ExportScriptableSlots | QDBusConnection::ExportScriptableSignals);
0090 }
0091 
0092 void StorageJanitor::quit()
0093 {
0094     QDBusConnection conn = QDBusConnection::sessionBus();
0095     conn.unregisterObject(QStringLiteral(AKONADI_DBUS_STORAGEJANITOR_PATH), QDBusConnection::UnregisterTree);
0096     conn.unregisterService(DBus::serviceName(DBus::StorageJanitor));
0097 
0098     // Make sure all children are deleted within context of this thread
0099     qDeleteAll(children());
0100 
0101     m_dataStore->close();
0102 
0103     AkThread::quit();
0104 }
0105 
0106 void StorageJanitor::registerTasks()
0107 {
0108     m_tasks = {{QStringLiteral("Looking for collections not belonging to a valid resource..."), &StorageJanitor::findOrphanedCollections},
0109                {QStringLiteral("Checking collection tree consistency..."), &StorageJanitor::checkCollectionTreeConsistency},
0110                {QStringLiteral("Looking for items not belonging to a valid collection..."), &StorageJanitor::findOrphanedItems},
0111                {QStringLiteral("Looking for item parts not belonging to a valid item..."), &StorageJanitor::findOrphanedParts},
0112                {QStringLiteral("Looking for item flags not belonging to a valid item..."), &StorageJanitor::findOrphanedPimItemFlags},
0113                {QStringLiteral("Looking for duplicate item flags..."), &StorageJanitor::findDuplicateFlags},
0114                {QStringLiteral("Looking for duplicate mime types..."), &StorageJanitor::findDuplicateMimeTypes},
0115                {QStringLiteral("Looking for duplicate part types..."), &StorageJanitor::findDuplicatePartTypes},
0116                {QStringLiteral("Looking for duplicate tag types..."), &StorageJanitor::findDuplicateTagTypes},
0117                {QStringLiteral("Looking for duplicate relation types..."), &StorageJanitor::findDuplicateRelationTypes},
0118                {QStringLiteral("Looking for overlapping external parts..."), &StorageJanitor::findOverlappingParts},
0119                {QStringLiteral("Verifying external parts..."), &StorageJanitor::verifyExternalParts},
0120                {QStringLiteral("Checking size threshold changes..."), &StorageJanitor::checkSizeTreshold},
0121                {QStringLiteral("Looking for dirty objects..."), &StorageJanitor::findDirtyObjects},
0122                {QStringLiteral("Looking for rid-duplicates not matching the content mime-type of the parent collection"), &StorageJanitor::findRIDDuplicates},
0123                {QStringLiteral("Migrating parts to new cache hierarchy..."), &StorageJanitor::migrateToLevelledCacheHierarchy},
0124                {QStringLiteral("Making sure virtual search resource and collections exist"), &StorageJanitor::ensureSearchCollection}};
0125 
0126     // Tasks that require a valid Akonadi instance
0127     if (m_akonadi) {
0128         m_tasks += {{QStringLiteral("Looking for resources in the DB not matching a configured resource..."), &StorageJanitor::findOrphanedResources},
0129                     {QStringLiteral("Checking search index consistency..."), &StorageJanitor::findOrphanSearchIndexEntries},
0130                     {QStringLiteral("Flushing collection statistics memory cache..."), &StorageJanitor::expireCollectionStatisticsCache}};
0131     }
0132 
0133     /* TODO some ideas for further checks:
0134      * the collection tree is non-cyclic
0135      * content type constraints of collections are not violated
0136      * find unused flags
0137      * find unused mimetypes
0138      * check for dead entries in relation tables
0139      * check if part size matches file size
0140      */
0141 }
0142 
0143 void StorageJanitor::check() // implementation of `akonadictl fsck`
0144 {
0145     m_lostFoundCollectionId = -1; // start with a fresh one each time
0146 
0147     for (const auto &[idx, task] : m_tasks | Views::enumerate(1)) {
0148         inform(QStringLiteral("%1/%2 %3").arg(idx, 2).arg(m_tasks.size()).arg(task.name));
0149         std::invoke(task.func, this);
0150     }
0151 
0152     inform("Consistency check done.");
0153 
0154     Q_EMIT done();
0155 }
0156 
0157 qint64 StorageJanitor::lostAndFoundCollection()
0158 {
0159     if (m_lostFoundCollectionId > 0) {
0160         return m_lostFoundCollectionId;
0161     }
0162 
0163     Transaction transaction(m_dataStore.get(), QStringLiteral("JANITOR LOST+FOUND"));
0164     Resource lfRes = Resource::retrieveByName(m_dataStore.get(), QStringLiteral("akonadi_lost+found_resource"));
0165     if (!lfRes.isValid()) {
0166         lfRes.setName(QStringLiteral("akonadi_lost+found_resource"));
0167         if (!lfRes.insert(m_dataStore.get())) {
0168             qCCritical(AKONADISERVER_LOG) << "Failed to create lost+found resource!";
0169         }
0170     }
0171 
0172     Collection lfRoot;
0173     SelectQueryBuilder<Collection> qb(m_dataStore.get());
0174     qb.addValueCondition(Collection::resourceIdFullColumnName(), Query::Equals, lfRes.id());
0175     qb.addValueCondition(Collection::parentIdFullColumnName(), Query::Is, QVariant());
0176     if (!qb.exec()) {
0177         qCCritical(AKONADISERVER_LOG) << "Failed to query top level collections";
0178         return -1;
0179     }
0180     const Collection::List cols = qb.result();
0181     if (cols.size() > 1) {
0182         qCCritical(AKONADISERVER_LOG) << "More than one top-level lost+found collection!?";
0183     } else if (cols.size() == 1) {
0184         lfRoot = cols.first();
0185     } else {
0186         lfRoot.setName(QStringLiteral("lost+found"));
0187         lfRoot.setResourceId(lfRes.id());
0188         lfRoot.setCachePolicyLocalParts(QStringLiteral("ALL"));
0189         lfRoot.setCachePolicyCacheTimeout(-1);
0190         lfRoot.setCachePolicyInherit(false);
0191         if (!lfRoot.insert(m_dataStore.get())) {
0192             qCCritical(AKONADISERVER_LOG) << "Failed to create lost+found root.";
0193         }
0194         if (m_akonadi) {
0195             m_dataStore->notificationCollector()->collectionAdded(lfRoot, lfRes.name().toUtf8());
0196         }
0197     }
0198 
0199     Collection lfCol;
0200     lfCol.setName(QDateTime::currentDateTime().toString(QStringLiteral("yyyy-MM-dd hh:mm:ss")));
0201     lfCol.setResourceId(lfRes.id());
0202     lfCol.setParentId(lfRoot.id());
0203     if (!lfCol.insert(m_dataStore.get())) {
0204         qCCritical(AKONADISERVER_LOG) << "Failed to create lost+found collection!";
0205     }
0206 
0207     const auto retrieveAll = MimeType::retrieveAll(m_dataStore.get());
0208     for (const MimeType &mt : retrieveAll) {
0209         lfCol.addMimeType(m_dataStore.get(), mt);
0210     }
0211 
0212     if (m_akonadi) {
0213         m_dataStore->notificationCollector()->collectionAdded(lfCol, lfRes.name().toUtf8());
0214     }
0215 
0216     transaction.commit();
0217     m_lostFoundCollectionId = lfCol.id();
0218     return m_lostFoundCollectionId;
0219 }
0220 
0221 void StorageJanitor::findOrphanedResources()
0222 {
0223     SelectQueryBuilder<Resource> qbres(m_dataStore.get());
0224     OrgFreedesktopAkonadiAgentManagerInterface iface(DBus::serviceName(DBus::Control), QStringLiteral("/AgentManager"), QDBusConnection::sessionBus(), this);
0225     if (!iface.isValid()) {
0226         inform(QStringLiteral("ERROR: Couldn't talk to %1").arg(DBus::Control));
0227         return;
0228     }
0229     const QStringList knownResources = iface.agentInstances();
0230     if (knownResources.isEmpty()) {
0231         inform(QStringLiteral("ERROR: no known resources. This must be a mistake?"));
0232         return;
0233     }
0234     qbres.addValueCondition(Resource::nameFullColumnName(), Query::NotIn, QVariant(knownResources));
0235     qbres.addValueCondition(Resource::idFullColumnName(), Query::NotEquals, 1); // skip akonadi_search_resource
0236     if (!qbres.exec()) {
0237         inform("Failed to query known resources, skipping test");
0238         return;
0239     }
0240     // qCDebug(AKONADISERVER_LOG) << "SQL:" << qbres.query().lastQuery();
0241     const Resource::List orphanResources = qbres.result();
0242     const int orphanResourcesSize(orphanResources.size());
0243     if (orphanResourcesSize > 0) {
0244         QStringList resourceNames;
0245         resourceNames.reserve(orphanResourcesSize);
0246         for (const Resource &resource : orphanResources) {
0247             resourceNames.append(resource.name());
0248         }
0249         inform(QStringLiteral("Found %1 orphan resources: %2").arg(orphanResourcesSize).arg(resourceNames.join(QLatin1Char(','))));
0250         for (const QString &resourceName : std::as_const(resourceNames)) {
0251             inform(QStringLiteral("Removing resource %1").arg(resourceName));
0252             m_akonadi->resourceManager().removeResourceInstance(resourceName);
0253         }
0254     }
0255 }
0256 
0257 void StorageJanitor::findOrphanedCollections()
0258 {
0259     SelectQueryBuilder<Collection> qb(m_dataStore.get());
0260     qb.addJoin(QueryBuilder::LeftJoin, Resource::tableName(), Collection::resourceIdFullColumnName(), Resource::idFullColumnName());
0261     qb.addValueCondition(Resource::idFullColumnName(), Query::Is, QVariant());
0262 
0263     if (!qb.exec()) {
0264         inform("Failed to query orphaned collections, skipping test");
0265         return;
0266     }
0267     const Collection::List orphans = qb.result();
0268     if (!orphans.isEmpty()) {
0269         inform(QLatin1StringView("Found ") + QString::number(orphans.size()) + QLatin1StringView(" orphan collections."));
0270         // TODO: attach to lost+found resource
0271     }
0272 }
0273 
0274 void StorageJanitor::checkCollectionTreeConsistency()
0275 {
0276     const Collection::List cols = Collection::retrieveAll(m_dataStore.get());
0277     std::for_each(cols.begin(), cols.end(), [this](const Collection &col) {
0278         checkPathToRoot(col);
0279     });
0280 }
0281 
0282 void StorageJanitor::checkPathToRoot(const Collection &col)
0283 {
0284     if (col.parentId() == 0) {
0285         return;
0286     }
0287     const Collection parent = col.parent(m_dataStore.get());
0288     if (!parent.isValid()) {
0289         inform(QLatin1StringView("Collection \"") + col.name() + QLatin1StringView("\" (id: ") + QString::number(col.id())
0290                + QLatin1StringView(") has no valid parent."));
0291         // TODO fix that by attaching to a top-level lost+found folder
0292         return;
0293     }
0294 
0295     if (col.resourceId() != parent.resourceId()) {
0296         inform(QLatin1StringView("Collection \"") + col.name() + QLatin1StringView("\" (id: ") + QString::number(col.id())
0297                + QLatin1StringView(") belongs to a different resource than its parent."));
0298         // can/should we actually fix that?
0299     }
0300 
0301     checkPathToRoot(parent);
0302 }
0303 
0304 void StorageJanitor::findOrphanedItems()
0305 {
0306     SelectQueryBuilder<PimItem> qb(m_dataStore.get());
0307     qb.addJoin(QueryBuilder::LeftJoin, Collection::tableName(), PimItem::collectionIdFullColumnName(), Collection::idFullColumnName());
0308     qb.addValueCondition(Collection::idFullColumnName(), Query::Is, QVariant());
0309     if (!qb.exec()) {
0310         inform("Failed to query orphaned items, skipping test");
0311         return;
0312     }
0313     const PimItem::List orphans = qb.result();
0314     if (!orphans.isEmpty()) {
0315         inform(QLatin1StringView("Found ") + QString::number(orphans.size()) + QLatin1StringView(" orphan items."));
0316         // Attach to lost+found collection
0317         Transaction transaction(m_dataStore.get(), QStringLiteral("JANITOR ORPHANS"));
0318         QueryBuilder qb(m_dataStore.get(), PimItem::tableName(), QueryBuilder::Update);
0319         qint64 col = lostAndFoundCollection();
0320         if (col == -1) {
0321             return;
0322         }
0323         qb.setColumnValue(PimItem::collectionIdColumn(), col);
0324         QList<ImapSet::Id> imapIds;
0325         imapIds.reserve(orphans.count());
0326         for (const PimItem &item : std::as_const(orphans)) {
0327             imapIds.append(item.id());
0328         }
0329         ImapSet set;
0330         set.add(imapIds);
0331         QueryHelper::setToQuery(set, PimItem::idFullColumnName(), qb);
0332         if (qb.exec() && transaction.commit()) {
0333             inform(QLatin1StringView("Moved orphan items to collection ") + QString::number(col));
0334         } else {
0335             inform(QLatin1StringView("Error moving orphan items to collection ") + QString::number(col) + QLatin1StringView(" : ")
0336                    + qb.query().lastError().text());
0337         }
0338     }
0339 }
0340 
0341 void StorageJanitor::findOrphanedParts()
0342 {
0343     SelectQueryBuilder<Part> qb(m_dataStore.get());
0344     qb.addJoin(QueryBuilder::LeftJoin, PimItem::tableName(), Part::pimItemIdFullColumnName(), PimItem::idFullColumnName());
0345     qb.addValueCondition(PimItem::idFullColumnName(), Query::Is, QVariant());
0346     if (!qb.exec()) {
0347         inform("Failed to query orphaned parts, skipping test");
0348         return;
0349     }
0350     const Part::List orphans = qb.result();
0351     if (!orphans.isEmpty()) {
0352         inform(QLatin1StringView("Found ") + QString::number(orphans.size()) + QLatin1StringView(" orphan parts."));
0353         // TODO: create lost+found items for those? delete?
0354     }
0355 }
0356 
0357 void StorageJanitor::findOrphanedPimItemFlags()
0358 {
0359     QueryBuilder sqb(m_dataStore.get(), PimItemFlagRelation::tableName(), QueryBuilder::Select);
0360     sqb.addColumn(PimItemFlagRelation::leftFullColumnName());
0361     sqb.addJoin(QueryBuilder::LeftJoin, PimItem::tableName(), PimItemFlagRelation::leftFullColumnName(), PimItem::idFullColumnName());
0362     sqb.addValueCondition(PimItem::idFullColumnName(), Query::Is, QVariant());
0363     if (!sqb.exec()) {
0364         inform("Failed to query orphaned item flags, skipping test");
0365         return;
0366     }
0367     QList<ImapSet::Id> imapIds;
0368     int count = 0;
0369     while (sqb.query().next()) {
0370         ++count;
0371         imapIds.append(sqb.query().value(0).toInt());
0372     }
0373     sqb.query().finish();
0374     if (count > 0) {
0375         ImapSet set;
0376         set.add(imapIds);
0377         QueryBuilder qb(m_dataStore.get(), PimItemFlagRelation::tableName(), QueryBuilder::Delete);
0378         QueryHelper::setToQuery(set, PimItemFlagRelation::leftFullColumnName(), qb);
0379         if (!qb.exec()) {
0380             qCCritical(AKONADISERVER_LOG) << "Error:" << qb.query().lastError().text();
0381             return;
0382         }
0383 
0384         inform(QLatin1StringView("Found and deleted ") + QString::number(count) + QLatin1StringView(" orphan pim item flags."));
0385     }
0386 }
0387 
0388 struct RelationDesc {
0389     QString tableName;
0390     QString deduplEntityIdColumnName;
0391 };
0392 
0393 template<typename DeduplEntity>
0394 std::optional<int> findDuplicatesImpl(DataStore *dataStore, const QString &nameColumn, const RelationDesc &relation)
0395 {
0396     QueryBuilder sqb(dataStore, DeduplEntity::tableName(), QueryBuilder::Select);
0397     sqb.addColumns({DeduplEntity::idColumn(), nameColumn});
0398     sqb.addSortColumn(DeduplEntity::idColumn());
0399     if (!sqb.exec()) {
0400         return std::nullopt;
0401     }
0402 
0403     QMap<QString, QVariantList> duplicates;
0404     while (sqb.query().next()) {
0405         const auto id = sqb.query().value(0).toLongLong();
0406         const auto name = sqb.query().value(1).toString();
0407 
0408         auto it = duplicates.find(name.trimmed());
0409         if (it == duplicates.end()) {
0410             it = duplicates.insert(name.trimmed(), QVariantList{});
0411         }
0412         it->push_back(id);
0413     }
0414 
0415     int removed = 0;
0416     for (const auto &[duplicateName, duplicateIds] : duplicates.asKeyValueRange()) {
0417         if (duplicateIds.size() <= 1) {
0418             // Not duplicated
0419             continue;
0420         }
0421 
0422         Transaction transaction(dataStore, QStringLiteral("StorageJanitor deduplicate %1 %2").arg(DeduplEntity::tableName(), duplicateName));
0423 
0424         // Update all relations with duplicated entity to use the lowest entity ID, so we can remove the
0425         // duplicates afterwards
0426         const auto firstId = duplicateIds.takeFirst();
0427 
0428         QueryBuilder updateQb(dataStore, relation.tableName, QueryBuilder::Update);
0429         updateQb.setColumnValue(relation.deduplEntityIdColumnName, firstId);
0430         updateQb.addValueCondition(relation.deduplEntityIdColumnName, Query::In, duplicateIds);
0431         if (!updateQb.exec()) {
0432             continue;
0433         }
0434 
0435         // Remove the duplicated entities
0436         QueryBuilder removeQb(dataStore, DeduplEntity::tableName(), QueryBuilder::Delete);
0437         removeQb.addValueCondition(DeduplEntity::idColumn(), Query::In, duplicateIds);
0438         if (!removeQb.exec()) {
0439             continue;
0440         }
0441 
0442         ++removed;
0443 
0444         transaction.commit();
0445     }
0446 
0447     return removed;
0448 }
0449 
0450 void StorageJanitor::findDuplicateFlags()
0451 {
0452     const auto removed =
0453         findDuplicatesImpl<Flag>(m_dataStore.get(), Flag::nameFullColumnName(), {PimItemFlagRelation::tableName(), PimItemFlagRelation::rightFullColumnName()});
0454     if (removed) {
0455         inform(u"Removed " % QString::number(*removed) % u" duplicate item flags");
0456     } else {
0457         inform("Error while trying to remove duplicate Flags");
0458     }
0459 }
0460 
0461 void StorageJanitor::findDuplicateMimeTypes()
0462 {
0463     const auto removed =
0464         findDuplicatesImpl<MimeType>(m_dataStore.get(), MimeType::nameFullColumnName(), {PimItem::tableName(), PimItem::mimeTypeIdFullColumnName()});
0465     if (removed) {
0466         inform(u"Removed " % QString::number(*removed) % u" duplicate mime types");
0467     } else {
0468         inform("Error while trying to remove duplicate MimeTypes");
0469     }
0470 }
0471 
0472 void StorageJanitor::findDuplicatePartTypes()
0473 {
0474     // Good thing that SQL is ANSI/ISO standardized...
0475     QString nameColumn;
0476     if (DbType::type(m_dataStore->database()) == DbType::MySQL) {
0477         nameColumn = QStringLiteral("CONCAT_WS(':', %1, %2) AS name");
0478     } else {
0479         nameColumn = QStringLiteral("(%1 || ':' || %2) AS name");
0480     }
0481 
0482     const auto removed = findDuplicatesImpl<PartType>(m_dataStore.get(),
0483                                                       nameColumn.arg(PartType::nsFullColumnName(), PartType::nameFullColumnName()),
0484                                                       {Part::tableName(), Part::partTypeIdFullColumnName()});
0485     if (removed) {
0486         inform(u"Removed " % QString::number(*removed) % u" duplicate part types");
0487     } else {
0488         inform("Error while trying to remove duplicate PartTypes");
0489     }
0490 }
0491 
0492 void StorageJanitor::findDuplicateTagTypes()
0493 {
0494     const auto removed = findDuplicatesImpl<TagType>(m_dataStore.get(), TagType::nameFullColumnName(), {Tag::tableName(), Tag::typeIdFullColumnName()});
0495     if (removed) {
0496         inform(u"Removed " % QString::number(*removed) % u" duplicate tag types");
0497     } else {
0498         inform("Error while trying to remove duplicate TagTypes");
0499     }
0500 }
0501 
0502 void StorageJanitor::findDuplicateRelationTypes()
0503 {
0504     const auto removed =
0505         findDuplicatesImpl<RelationType>(m_dataStore.get(), RelationType::nameFullColumnName(), {Relation::tableName(), Relation::typeIdFullColumnName()});
0506     if (removed) {
0507         inform(u"Removed " % QString::number(*removed) % u" duplicate relation types");
0508     } else {
0509         inform("Error while trying to remove duplicate RelationTypes");
0510     }
0511 }
0512 
0513 void StorageJanitor::findOverlappingParts()
0514 {
0515     QueryBuilder qb(m_dataStore.get(), Part::tableName(), QueryBuilder::Select);
0516     qb.addColumn(Part::dataColumn());
0517     qb.addColumn(QLatin1StringView("count(") + Part::idColumn() + QLatin1StringView(") as cnt"));
0518     qb.addValueCondition(Part::storageColumn(), Query::Equals, Part::External);
0519     qb.addValueCondition(Part::dataColumn(), Query::IsNot, QVariant());
0520     qb.addGroupColumn(Part::dataColumn());
0521     qb.addValueCondition(QLatin1StringView("count(") + Part::idColumn() + QLatin1StringView(")"), Query::Greater, 1, QueryBuilder::HavingCondition);
0522     if (!qb.exec()) {
0523         inform("Failed to query overlapping parts, skipping test");
0524         return;
0525     }
0526 
0527     int count = 0;
0528     while (qb.query().next()) {
0529         ++count;
0530         inform(QLatin1StringView("Found overlapping part data: ") + qb.query().value(0).toString());
0531         // TODO: uh oh, this is bad, how do we recover from that?
0532     }
0533     qb.query().finish();
0534 
0535     if (count > 0) {
0536         inform(QLatin1StringView("Found ") + QString::number(count) + QLatin1StringView(" overlapping parts - bad."));
0537     }
0538 }
0539 
0540 void StorageJanitor::verifyExternalParts()
0541 {
0542     QSet<QString> existingFiles;
0543     QSet<QString> usedFiles;
0544 
0545     // list all files
0546     const QString dataDir = StandardDirs::saveDir("data", QStringLiteral("file_db_data"));
0547     QDirIterator it(dataDir, QDir::Files, QDirIterator::Subdirectories);
0548     while (it.hasNext()) {
0549         existingFiles.insert(it.next());
0550     }
0551     existingFiles.remove(dataDir + QDir::separator() + QLatin1Char('.'));
0552     existingFiles.remove(dataDir + QDir::separator() + QLatin1StringView(".."));
0553     inform(QLatin1StringView("Found ") + QString::number(existingFiles.size()) + QLatin1StringView(" external files."));
0554 
0555     // list all parts from the db which claim to have an associated file
0556     QueryBuilder qb(m_dataStore.get(), Part::tableName(), QueryBuilder::Select);
0557     qb.addColumn(Part::dataColumn());
0558     qb.addColumn(Part::pimItemIdColumn());
0559     qb.addColumn(Part::idColumn());
0560     qb.addValueCondition(Part::storageColumn(), Query::Equals, Part::External);
0561     qb.addValueCondition(Part::dataColumn(), Query::IsNot, QVariant());
0562     if (!qb.exec()) {
0563         inform("Failed to query existing parts, skipping test");
0564         return;
0565     }
0566     while (qb.query().next()) {
0567         const auto filename = qb.query().value(0).toByteArray();
0568         const auto pimItemId = qb.query().value(1).value<Entity::Id>();
0569         const auto partId = qb.query().value(2).value<Entity::Id>();
0570         QString partPath;
0571         if (!filename.isEmpty()) {
0572             partPath = ExternalPartStorage::resolveAbsolutePath(filename);
0573         } else {
0574             partPath = ExternalPartStorage::resolveAbsolutePath(ExternalPartStorage::nameForPartId(partId));
0575         }
0576         if (existingFiles.contains(partPath)) {
0577             usedFiles.insert(partPath);
0578         } else {
0579             inform(QLatin1StringView("Cleaning up missing external file: ") + partPath + QLatin1StringView(" for item: ") + QString::number(pimItemId)
0580                    + QLatin1StringView(" on part: ") + QString::number(partId));
0581 
0582             Part part;
0583             part.setId(partId);
0584             part.setPimItemId(pimItemId);
0585             part.setData(QByteArray());
0586             part.setDatasize(0);
0587             part.setStorage(Part::Internal);
0588             part.update(m_dataStore.get());
0589         }
0590     }
0591     qb.query().finish();
0592     inform(QLatin1StringView("Found ") + QString::number(usedFiles.size()) + QLatin1StringView(" external parts."));
0593 
0594     // see what's left and move it to lost+found
0595     const QSet<QString> unreferencedFiles = existingFiles - usedFiles;
0596     if (!unreferencedFiles.isEmpty()) {
0597         const QString lfDir = StandardDirs::saveDir("data", QStringLiteral("file_lost+found"));
0598         for (const QString &file : unreferencedFiles) {
0599             inform(QLatin1StringView("Found unreferenced external file: ") + file);
0600             const QFileInfo f(file);
0601             QFile::rename(file, lfDir + QDir::separator() + f.fileName());
0602         }
0603         inform(QStringLiteral("Moved %1 unreferenced files to lost+found.").arg(unreferencedFiles.size()));
0604     } else {
0605         inform("Found no unreferenced external files.");
0606     }
0607 }
0608 
0609 void StorageJanitor::findDirtyObjects()
0610 {
0611     SelectQueryBuilder<Collection> cqb(m_dataStore.get());
0612     cqb.setSubQueryMode(Query::Or);
0613     cqb.addValueCondition(Collection::remoteIdColumn(), Query::Is, QVariant());
0614     cqb.addValueCondition(Collection::remoteIdColumn(), Query::Equals, QString());
0615     if (!cqb.exec()) {
0616         inform("Failed to query collections without RID, skipping test");
0617         return;
0618     }
0619     const Collection::List ridLessCols = cqb.result();
0620     for (const Collection &col : ridLessCols) {
0621         inform(QLatin1StringView("Collection \"") + col.name() + QLatin1StringView("\" (id: ") + QString::number(col.id())
0622                + QLatin1StringView(") has no RID."));
0623     }
0624     inform(QLatin1StringView("Found ") + QString::number(ridLessCols.size()) + QLatin1StringView(" collections without RID."));
0625 
0626     SelectQueryBuilder<PimItem> iqb1(m_dataStore.get());
0627     iqb1.setSubQueryMode(Query::Or);
0628     iqb1.addValueCondition(PimItem::remoteIdColumn(), Query::Is, QVariant());
0629     iqb1.addValueCondition(PimItem::remoteIdColumn(), Query::Equals, QString());
0630     if (!iqb1.exec()) {
0631         inform("Failed to query items without RID, skipping test");
0632         return;
0633     }
0634     const PimItem::List ridLessItems = iqb1.result();
0635     for (const PimItem &item : ridLessItems) {
0636         inform(QLatin1StringView("Item \"") + QString::number(item.id()) + QLatin1StringView("\" in collection \"") + QString::number(item.collectionId())
0637                + QLatin1StringView("\" has no RID."));
0638     }
0639     inform(QLatin1StringView("Found ") + QString::number(ridLessItems.size()) + QLatin1StringView(" items without RID."));
0640 
0641     SelectQueryBuilder<PimItem> iqb2(m_dataStore.get());
0642     iqb2.addValueCondition(PimItem::dirtyColumn(), Query::Equals, true);
0643     iqb2.addValueCondition(PimItem::remoteIdColumn(), Query::IsNot, QVariant());
0644     iqb2.addSortColumn(PimItem::idFullColumnName());
0645     if (!iqb2.exec()) {
0646         inform("Failed to query dirty items, skipping test");
0647         return;
0648     }
0649     const PimItem::List dirtyItems = iqb2.result();
0650     for (const PimItem &item : dirtyItems) {
0651         inform(QLatin1StringView("Item \"") + QString::number(item.id()) + QLatin1StringView("\" has RID and is dirty."));
0652     }
0653     inform(QLatin1StringView("Found ") + QString::number(dirtyItems.size()) + QLatin1StringView(" dirty items."));
0654 }
0655 
0656 void StorageJanitor::findRIDDuplicates()
0657 {
0658     QueryBuilder qb(m_dataStore.get(), Collection::tableName(), QueryBuilder::Select);
0659     qb.addColumn(Collection::idColumn());
0660     qb.addColumn(Collection::nameColumn());
0661     qb.exec();
0662 
0663     while (qb.query().next()) {
0664         const auto colId = qb.query().value(0).value<Collection::Id>();
0665         const QString name = qb.query().value(1).toString();
0666         inform(QStringLiteral("Checking ") + name);
0667 
0668         QueryBuilder duplicates(m_dataStore.get(), PimItem::tableName(), QueryBuilder::Select);
0669         duplicates.addColumn(PimItem::remoteIdColumn());
0670         duplicates.addColumn(QStringLiteral("count(") + PimItem::idColumn() + QStringLiteral(") as cnt"));
0671         duplicates.addValueCondition(PimItem::remoteIdColumn(), Query::IsNot, QVariant());
0672         duplicates.addValueCondition(PimItem::collectionIdColumn(), Query::Equals, colId);
0673         duplicates.addGroupColumn(PimItem::remoteIdColumn());
0674         duplicates.addValueCondition(QStringLiteral("count(") + PimItem::idColumn() + QLatin1Char(')'), Query::Greater, 1, QueryBuilder::HavingCondition);
0675         duplicates.exec();
0676 
0677         Akonadi::Server::Collection col = Akonadi::Server::Collection::retrieveById(m_dataStore.get(), colId);
0678         const QList<Akonadi::Server::MimeType> contentMimeTypes = col.mimeTypes(m_dataStore.get());
0679         QVariantList contentMimeTypesVariantList;
0680         contentMimeTypesVariantList.reserve(contentMimeTypes.count());
0681         for (const Akonadi::Server::MimeType &mimeType : contentMimeTypes) {
0682             contentMimeTypesVariantList << mimeType.id();
0683         }
0684         while (duplicates.query().next()) {
0685             const QString rid = duplicates.query().value(0).toString();
0686 
0687             Query::Condition condition(Query::And);
0688             condition.addValueCondition(PimItem::remoteIdColumn(), Query::Equals, rid);
0689             condition.addValueCondition(PimItem::mimeTypeIdColumn(), Query::NotIn, contentMimeTypesVariantList);
0690             condition.addValueCondition(PimItem::collectionIdColumn(), Query::Equals, colId);
0691 
0692             QueryBuilder items(m_dataStore.get(), PimItem::tableName(), QueryBuilder::Select);
0693             items.addColumn(PimItem::idColumn());
0694             items.addCondition(condition);
0695             if (!items.exec()) {
0696                 inform(QStringLiteral("Error while deleting duplicates: ") + items.query().lastError().text());
0697                 continue;
0698             }
0699             QVariantList itemsIds;
0700             while (items.query().next()) {
0701                 itemsIds.push_back(items.query().value(0));
0702             }
0703             items.query().finish();
0704             if (itemsIds.isEmpty()) {
0705                 // the mimetype filter may have dropped some entries from the
0706                 // duplicates query
0707                 continue;
0708             }
0709 
0710             inform(QStringLiteral("Found duplicates ") + rid);
0711 
0712             SelectQueryBuilder<Part> parts(m_dataStore.get());
0713             parts.addValueCondition(Part::pimItemIdFullColumnName(), Query::In, QVariant::fromValue(itemsIds));
0714             parts.addValueCondition(Part::storageFullColumnName(), Query::Equals, static_cast<int>(Part::External));
0715             if (parts.exec()) {
0716                 const auto partsList = parts.result();
0717                 for (const auto &part : partsList) {
0718                     bool exists = false;
0719                     const auto filename = ExternalPartStorage::resolveAbsolutePath(part.data(), &exists);
0720                     if (exists) {
0721                         QFile::remove(filename);
0722                     }
0723                 }
0724             }
0725 
0726             items = QueryBuilder(m_dataStore.get(), PimItem::tableName(), QueryBuilder::Delete);
0727             items.addCondition(condition);
0728             if (!items.exec()) {
0729                 inform(QStringLiteral("Error while deleting duplicates ") + items.query().lastError().text());
0730             }
0731         }
0732         duplicates.query().finish();
0733     }
0734     qb.query().finish();
0735 }
0736 
0737 void StorageJanitor::vacuum()
0738 {
0739     const DbType::Type dbType = DbType::type(m_dataStore->database());
0740     if (dbType == DbType::MySQL || dbType == DbType::PostgreSQL) {
0741         inform("vacuuming database, that'll take some time and require a lot of temporary disk space...");
0742         const auto tables = allDatabaseTables();
0743         for (const QString &table : tables) {
0744             inform(QStringLiteral("optimizing table %1...").arg(table));
0745 
0746             QString queryStr;
0747             if (dbType == DbType::MySQL) {
0748                 queryStr = QLatin1StringView("OPTIMIZE TABLE ") + table;
0749             } else if (dbType == DbType::PostgreSQL) {
0750                 queryStr = QLatin1StringView("VACUUM FULL ANALYZE ") + table;
0751             } else {
0752                 continue;
0753             }
0754             QSqlQuery q(m_dataStore->database());
0755             if (!q.exec(queryStr)) {
0756                 qCCritical(AKONADISERVER_LOG) << "failed to optimize table" << table << ":" << q.lastError().text();
0757             }
0758         }
0759         inform("vacuum done");
0760     } else {
0761         inform("Vacuum not supported for this database backend. (Sqlite backend)");
0762     }
0763 
0764     Q_EMIT done();
0765 }
0766 
0767 void StorageJanitor::checkSizeTreshold()
0768 {
0769     {
0770         QueryBuilder qb(m_dataStore.get(), Part::tableName(), QueryBuilder::Select);
0771         qb.addColumn(Part::idFullColumnName());
0772         qb.addValueCondition(Part::storageFullColumnName(), Query::Equals, Part::Internal);
0773         qb.addValueCondition(Part::datasizeFullColumnName(), Query::Greater, m_dbConfig->sizeThreshold());
0774         if (!qb.exec()) {
0775             inform("Failed to query parts larger than threshold, skipping test");
0776             return;
0777         }
0778 
0779         QSqlQuery query = qb.query();
0780         inform(QStringLiteral("Found %1 parts to be moved to external files").arg(query.size()));
0781 
0782         while (query.next()) {
0783             Transaction transaction(m_dataStore.get(), QStringLiteral("JANITOR CHECK SIZE THRESHOLD"));
0784             Part part = Part::retrieveById(m_dataStore.get(), query.value(0).toLongLong());
0785             const QByteArray name = ExternalPartStorage::nameForPartId(part.id());
0786             const QString partPath = ExternalPartStorage::resolveAbsolutePath(name);
0787             QFile f(partPath);
0788             if (f.exists()) {
0789                 qCDebug(AKONADISERVER_LOG) << "External payload file" << name << "already exists";
0790                 // That however is not a critical issue, since the part is not external,
0791                 // so we can safely overwrite it
0792             }
0793             if (!f.open(QIODevice::WriteOnly | QIODevice::Truncate)) {
0794                 qCCritical(AKONADISERVER_LOG) << "Failed to open file" << name << "for writing";
0795                 continue;
0796             }
0797             if (f.write(part.data()) != part.datasize()) {
0798                 qCCritical(AKONADISERVER_LOG) << "Failed to write data to payload file" << name;
0799                 f.remove();
0800                 continue;
0801             }
0802 
0803             part.setData(name);
0804             part.setStorage(Part::External);
0805             if (!part.update(m_dataStore.get()) || !transaction.commit()) {
0806                 qCCritical(AKONADISERVER_LOG) << "Failed to update database entry of part" << part.id();
0807                 f.remove();
0808                 continue;
0809             }
0810 
0811             inform(QStringLiteral("Moved part %1 from database into external file %2").arg(part.id()).arg(QString::fromLatin1(name)));
0812         }
0813         query.finish();
0814     }
0815 
0816     {
0817         QueryBuilder qb(m_dataStore.get(), Part::tableName(), QueryBuilder::Select);
0818         qb.addColumn(Part::idFullColumnName());
0819         qb.addValueCondition(Part::storageFullColumnName(), Query::Equals, Part::External);
0820         qb.addValueCondition(Part::datasizeFullColumnName(), Query::Less, DbConfig::configuredDatabase()->sizeThreshold());
0821         if (!qb.exec()) {
0822             inform("Failed to query parts smaller than threshold, skipping test");
0823             return;
0824         }
0825 
0826         QSqlQuery query = qb.query();
0827         inform(QStringLiteral("Found %1 parts to be moved to database").arg(query.size()));
0828 
0829         while (query.next()) {
0830             Transaction transaction(m_dataStore.get(), QStringLiteral("JANITOR CHECK SIZE THRESHOLD 2"));
0831             Part part = Part::retrieveById(m_dataStore.get(), query.value(0).toLongLong());
0832             const QString partPath = ExternalPartStorage::resolveAbsolutePath(part.data());
0833             QFile f(partPath);
0834             if (!f.exists()) {
0835                 qCCritical(AKONADISERVER_LOG) << "Part file" << part.data() << "does not exist";
0836                 continue;
0837             }
0838             if (!f.open(QIODevice::ReadOnly)) {
0839                 qCCritical(AKONADISERVER_LOG) << "Failed to open part file" << part.data() << "for reading";
0840                 continue;
0841             }
0842 
0843             part.setStorage(Part::Internal);
0844             part.setData(f.readAll());
0845             if (part.data().size() != part.datasize()) {
0846                 qCCritical(AKONADISERVER_LOG) << "Sizes of" << part.id() << "data don't match";
0847                 continue;
0848             }
0849             if (!part.update(m_dataStore.get()) || !transaction.commit()) {
0850                 qCCritical(AKONADISERVER_LOG) << "Failed to update database entry of part" << part.id();
0851                 continue;
0852             }
0853 
0854             f.close();
0855             f.remove();
0856             inform(QStringLiteral("Moved part %1 from external file into database").arg(part.id()));
0857         }
0858         query.finish();
0859     }
0860 }
0861 
0862 void StorageJanitor::migrateToLevelledCacheHierarchy()
0863 {
0864     /// First, check whether that's still necessary
0865     const QString db_data = StandardDirs::saveDir("data", QStringLiteral("file_db_data"));
0866     const auto entries = QDir(db_data).entryInfoList(QDir::Files | QDir::NoDotAndDotDot);
0867     if (entries.isEmpty()) {
0868         inform("No external parts in legacy location, skipping migration");
0869         return;
0870     }
0871 
0872     QueryBuilder qb(m_dataStore.get(), Part::tableName(), QueryBuilder::Select);
0873     qb.addColumn(Part::idColumn());
0874     qb.addColumn(Part::dataColumn());
0875     qb.addValueCondition(Part::storageColumn(), Query::Equals, Part::External);
0876     if (!qb.exec()) {
0877         inform("Failed to query external payload parts, skipping test");
0878         return;
0879     }
0880 
0881     QSqlQuery query = qb.query();
0882     while (query.next()) {
0883         const qint64 id = query.value(0).toLongLong();
0884         const QByteArray data = query.value(1).toByteArray();
0885         const QString fileName = QString::fromUtf8(data);
0886         bool oldExists = false;
0887         bool newExists = false;
0888         // Resolve the current path
0889         const QString currentPath = ExternalPartStorage::resolveAbsolutePath(fileName, &oldExists);
0890         // Resolve the new path with legacy fallback disabled, so that it always
0891         // returns the new levelled-cache path, even when the old one exists
0892         const QString newPath = ExternalPartStorage::resolveAbsolutePath(fileName, &newExists, false);
0893         if (!oldExists) {
0894             qCCritical(AKONADISERVER_LOG) << "Old payload part does not exist, skipping part" << fileName;
0895             continue;
0896         }
0897         if (currentPath != newPath) {
0898             if (newExists) {
0899                 qCCritical(AKONADISERVER_LOG) << "Part is in legacy location, but the destination file already exists, skipping part" << fileName;
0900                 continue;
0901             }
0902 
0903             QFile f(currentPath);
0904             if (!f.rename(newPath)) {
0905                 qCCritical(AKONADISERVER_LOG) << "Failed to move part from" << currentPath << " to " << newPath << ":" << f.errorString();
0906                 continue;
0907             }
0908             inform(QStringLiteral("Migrated part %1 to new levelled cache").arg(id));
0909         }
0910     }
0911     query.finish();
0912 }
0913 
0914 void StorageJanitor::findOrphanSearchIndexEntries()
0915 {
0916     QueryBuilder qb(m_dataStore.get(), Collection::tableName(), QueryBuilder::Select);
0917     qb.addSortColumn(Collection::idColumn(), Query::Ascending);
0918     qb.addColumn(Collection::idColumn());
0919     qb.addColumn(Collection::isVirtualColumn());
0920     if (!qb.exec()) {
0921         inform("Failed to query collections, skipping test");
0922         return;
0923     }
0924 
0925     QDBusInterface iface(DBus::agentServiceName(QStringLiteral("akonadi_indexing_agent"), DBus::Agent),
0926                          QStringLiteral("/"),
0927                          QStringLiteral("org.freedesktop.Akonadi.Indexer"),
0928                          QDBusConnection::sessionBus());
0929     if (!iface.isValid()) {
0930         inform("Akonadi Indexing Agent is not running, skipping test");
0931         return;
0932     }
0933 
0934     QSqlQuery query = qb.query();
0935     while (query.next()) {
0936         const qint64 colId = query.value(0).toLongLong();
0937         // Skip virtual collections, they are not indexed
0938         if (query.value(1).toBool()) {
0939             inform(QStringLiteral("Skipping virtual Collection %1").arg(colId));
0940             continue;
0941         }
0942 
0943         inform(QStringLiteral("Checking Collection %1 search index...").arg(colId));
0944         SearchRequest req("StorageJanitor", m_akonadi->searchManager(), m_akonadi->agentSearchManager());
0945         req.setStoreResults(true);
0946         req.setCollections({colId});
0947         req.setRemoteSearch(false);
0948         req.setQuery(QStringLiteral("{ }")); // empty query to match all
0949         QStringList mts;
0950         Collection col;
0951         col.setId(colId);
0952         const auto colMts = col.mimeTypes();
0953         if (colMts.isEmpty()) {
0954             // No mimetypes means we don't know which search store to look into,
0955             // skip it.
0956             continue;
0957         }
0958         mts.reserve(colMts.count());
0959         for (const auto &mt : colMts) {
0960             mts << mt.name();
0961         }
0962         req.setMimeTypes(mts);
0963         req.exec();
0964         auto searchResults = req.results();
0965 
0966         QueryBuilder iqb(m_dataStore.get(), PimItem::tableName(), QueryBuilder::Select);
0967         iqb.addColumn(PimItem::idColumn());
0968         iqb.addValueCondition(PimItem::collectionIdColumn(), Query::Equals, colId);
0969         if (!iqb.exec()) {
0970             inform(QStringLiteral("Failed to query items in collection %1").arg(colId));
0971             continue;
0972         }
0973 
0974         QSqlQuery itemQuery = iqb.query();
0975         while (itemQuery.next()) {
0976             searchResults.remove(itemQuery.value(0).toLongLong());
0977         }
0978         itemQuery.finish();
0979 
0980         if (!searchResults.isEmpty()) {
0981             inform(QStringLiteral("Collection %1 search index contains %2 orphan items. Scheduling reindexing").arg(colId).arg(searchResults.count()));
0982             iface.call(QDBus::NoBlock, QStringLiteral("reindexCollection"), colId);
0983         }
0984     }
0985     query.finish();
0986 }
0987 
0988 void StorageJanitor::ensureSearchCollection()
0989 {
0990     static const auto searchResourceName = QStringLiteral("akonadi_search_resource");
0991 
0992     auto searchResource = Resource::retrieveByName(m_dataStore.get(), searchResourceName);
0993     if (!searchResource.isValid()) {
0994         searchResource.setName(searchResourceName);
0995         searchResource.setIsVirtual(true);
0996         if (!searchResource.insert(m_dataStore.get())) {
0997             inform(QStringLiteral("Failed to create Search resource."));
0998             return;
0999         }
1000     }
1001 
1002     auto searchCols = Collection::retrieveFiltered(m_dataStore.get(), Collection::resourceIdColumn(), searchResource.id());
1003     if (searchCols.isEmpty()) {
1004         Collection searchCol;
1005         searchCol.setId(1);
1006         searchCol.setName(QStringLiteral("Search"));
1007         searchCol.setResource(searchResource);
1008         searchCol.setIndexPref(Collection::False);
1009         searchCol.setIsVirtual(true);
1010         if (!searchCol.insert(m_dataStore.get())) {
1011             inform(QStringLiteral("Failed to create Search Collection"));
1012             return;
1013         }
1014     }
1015 }
1016 
1017 void StorageJanitor::expireCollectionStatisticsCache()
1018 {
1019     m_akonadi->collectionStatistics().expireCache();
1020 }
1021 
1022 void StorageJanitor::inform(const char *msg)
1023 {
1024     inform(QLatin1StringView(msg));
1025 }
1026 
1027 void StorageJanitor::inform(const QString &msg)
1028 {
1029     qCDebug(AKONADISERVER_LOG) << msg;
1030     Q_EMIT information(msg);
1031 }
1032 
1033 #include "moc_storagejanitor.cpp"