Warning, file /pim/sink/tests/mailquerybenchmark.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /*
0002  * Copyright (C) 2016 Christian Mollekopf <chrigi_1@fastmail.fm>
0003  *
0004  * This library is free software; you can redistribute it and/or
0005  * modify it under the terms of the GNU Lesser General Public
0006  * License as published by the Free Software Foundation; either
0007  * version 2.1 of the License, or (at your option) version 3, or any
0008  * later version accepted by the membership of KDE e.V. (or its
0009  * successor approved by the membership of KDE e.V.), which shall
0010  * act as a proxy defined in Section 6 of version 3 of the license.
0011  *
0012  * This library is distributed in the hope that it will be useful,
0013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
0015  * Lesser General Public License for more details.
0016  *
0017  * You should have received a copy of the GNU Lesser General Public
0018  * License along with this library.  If not, see <http://www.gnu.org/licenses/>.
0019  */
0020 #include <QTest>
0021 
0022 #include <QString>
0023 
0024 #include "testimplementations.h"
0025 
0026 #include <common/resultprovider.h>
0027 #include <common/definitions.h>
0028 #include <common/query.h>
0029 #include <common/storage/entitystore.h>
0030 #include <common/resourcecontrol.h>
0031 
0032 #include "hawd/dataset.h"
0033 #include "hawd/formatter.h"
0034 
0035 #include <iostream>
0036 #include <math.h>
0037 
0038 #include "mail_generated.h"
0039 #include "createentity_generated.h"
0040 #include "getrssusage.h"
0041 #include "test.h"
0042 
0043 using namespace Sink;
0044 using namespace Sink::ApplicationDomain;
0045 
0046 /**
0047  * Benchmark mail query performance.
0048  */
0049 class MailQueryBenchmark : public QObject
0050 {
0051     Q_OBJECT
0052 
0053     QByteArray resourceIdentifier;
0054     HAWD::State mHawdState;
0055 
0056     void populateDatabase(int count, int folderSpreadFactor = 0, bool clear = true, int offset = 0)
0057     {
0058         if (clear) {
0059             TestResource::removeFromDisk(resourceIdentifier);
0060         }
0061 
0062         Sink::ResourceContext resourceContext{resourceIdentifier, "test", {{"mail", QSharedPointer<TestMailAdaptorFactory>::create()}}};
0063         Sink::Storage::EntityStore entityStore{resourceContext, {}};
0064         entityStore.startTransaction(Sink::Storage::DataStore::ReadWrite);
0065 
0066         const auto date = QDateTime::currentDateTimeUtc();
0067         for (int i = offset; i < offset + count; i++) {
0068             auto domainObject = Mail::createEntity<Mail>(resourceIdentifier);
0069             domainObject.setExtractedMessageId("uid");
0070             domainObject.setExtractedParentMessageIds({"parentuid"});
0071             domainObject.setExtractedSubject(QString("subject%1").arg(i));
0072             domainObject.setExtractedDate(date.addSecs(count));
0073             if (folderSpreadFactor == 0) {
0074                 domainObject.setFolder("folder1");
0075             } else {
0076                 domainObject.setFolder(QByteArray("folder") + QByteArray::number(i - (i % folderSpreadFactor)));
0077             }
0078 
0079             entityStore.add("mail", domainObject, false);
0080         }
0081 
0082         entityStore.commitTransaction();
0083     }
0084 
0085     //Execute query and block until the initial query is complete
0086     int load(const Sink::Query &query)
0087     {
0088         auto domainTypeAdaptorFactory = QSharedPointer<TestMailAdaptorFactory>::create();
0089         Sink::ResourceContext context{resourceIdentifier, "test", {{"mail", domainTypeAdaptorFactory}}};
0090         context.mResourceAccess = QSharedPointer<TestResourceAccess>::create();
0091         TestMailResourceFacade facade(context);
0092 
0093         auto ret = facade.load(query, Sink::Log::Context{"benchmark"});
0094         ret.first.exec().waitForFinished();
0095         auto emitter = ret.second;
0096         int i = 0;
0097         emitter->onAdded([&](const Mail::Ptr &) { i++; });
0098         bool done = false;
0099         emitter->onInitialResultSetComplete([&done](bool) { done = true; });
0100         emitter->fetch();
0101         QUICK_TRY_VERIFY(done);
0102         return i;
0103     }
0104 
0105     qreal testLoad(const Sink::Query &query, int count, int expectedSize)
0106     {
0107         const auto startingRss = getCurrentRSS();
0108 
0109         // Benchmark
0110         QTime time;
0111         time.start();
0112 
0113         auto loadedResults = load(query);
0114         Q_ASSERT(loadedResults == expectedSize);
0115 
0116         const auto elapsed = time.elapsed();
0117 
0118         const auto finalRss = getCurrentRSS();
0119         const auto rssGrowth = finalRss - startingRss;
0120         // Since the database is memory mapped it is attributted to the resident set size.
0121         const auto rssWithoutDb = finalRss - Sink::Storage::DataStore(Sink::storageLocation(), resourceIdentifier, Sink::Storage::DataStore::ReadWrite).diskUsage();
0122         const auto peakRss = getPeakRSS();
0123         // How much peak deviates from final rss in percent (should be around 0)
0124         const auto percentageRssError = static_cast<double>(peakRss - finalRss) * 100.0 / static_cast<double>(finalRss);
0125         auto rssGrowthPerEntity = rssGrowth / count;
0126 
0127         std::cout << "Loaded " << expectedSize << " results." << std::endl;
0128         std::cout << "The query took [ms]: " << elapsed << std::endl;
0129         std::cout << "Current Rss usage [kb]: " << finalRss / 1024 << std::endl;
0130         std::cout << "Peak Rss usage [kb]: " << peakRss / 1024 << std::endl;
0131         std::cout << "Rss growth [kb]: " << rssGrowth / 1024 << std::endl;
0132         std::cout << "Rss growth per entity [byte]: " << rssGrowthPerEntity << std::endl;
0133         std::cout << "Rss without db [kb]: " << rssWithoutDb / 1024 << std::endl;
0134         std::cout << "Percentage error: " << percentageRssError << std::endl;
0135 
0136         Q_ASSERT(percentageRssError < 10);
0137         // TODO This is much more than it should it seems, although adding the attachment results in pretty exactly a 1k increase,
0138         // so it doesn't look like that memory is being duplicated.
0139         Q_ASSERT(rssGrowthPerEntity < 3300);
0140 
0141         // Print memory layout, RSS is what is in memory
0142         // std::system("exec pmap -x \"$PPID\"");
0143         // std::system("top -p \"$PPID\" -b -n 1");
0144         return (qreal)expectedSize / elapsed;
0145     }
0146 
0147 private slots:
0148 
0149     void init()
0150     {
0151         resourceIdentifier = "sink.test.instance1";
0152     }
0153 
0154     void testInitialQueryResult()
0155     {
0156         int count = 50000;
0157         int limit = 1;
0158         populateDatabase(count);
0159 
0160         //Run a warm-up query first
0161         Sink::Query query{};
0162         query.request<Mail::MessageId>()
0163             .request<Mail::Subject>()
0164             .request<Mail::Date>();
0165         query.sort<Mail::Date>();
0166         query.filter<Mail::Folder>("folder1");
0167         query.limit(limit);
0168 
0169         load(query);
0170 
0171         int liveQueryTime = 0;
0172         {
0173             auto q = query;
0174             q.setFlags(Sink::Query::LiveQuery);
0175 
0176             QTime time;
0177             time.start();
0178             load(q);
0179             liveQueryTime = time.elapsed();
0180         }
0181 
0182         int nonLiveQueryTime = 0;
0183         {
0184             auto q = query;
0185 
0186             QTime time;
0187             time.start();
0188             load(q);
0189             nonLiveQueryTime = time.elapsed();
0190         }
0191 
0192         HAWD::Dataset dataset("mail_query_initial", mHawdState);
0193         HAWD::Dataset::Row row = dataset.row();
0194         row.setValue("live", liveQueryTime);
0195         row.setValue("nonlive", nonLiveQueryTime);
0196         dataset.insertRow(row);
0197         HAWD::Formatter::print(dataset);
0198     }
0199 
0200     void test50k()
0201     {
0202         int count = 50000;
0203         int limit = 1000;
0204         qreal simpleResultRate = 0;
0205         qreal threadResultRate = 0;
0206         {
0207             //A query that just filters by a property and sorts (using an index)
0208             Sink::Query query;
0209             query.request<Mail::MessageId>()
0210                 .request<Mail::Subject>()
0211                 .request<Mail::Date>();
0212             query.sort<Mail::Date>();
0213             query.filter<Mail::Folder>("folder1");
0214             query.limit(limit);
0215 
0216             populateDatabase(count);
0217             simpleResultRate = testLoad(query, count, query.limit());
0218         }
0219         {
0220             //A query that reduces (like the maillist query)
0221             Sink::Query query;
0222             query.request<Mail::MessageId>()
0223                 .request<Mail::Subject>()
0224                 .request<Mail::Date>();
0225             query.reduce<ApplicationDomain::Mail::Folder>(Query::Reduce::Selector::max<ApplicationDomain::Mail::Date>());
0226             query.limit(limit);
0227 
0228             int mailsPerFolder = 10;
0229 
0230             populateDatabase(count, mailsPerFolder);
0231             threadResultRate = testLoad(query, count, query.limit());
0232         }
0233         HAWD::Dataset dataset("mail_query", mHawdState);
0234         HAWD::Dataset::Row row = dataset.row();
0235         row.setValue("rows", limit);
0236         row.setValue("simple", simpleResultRate);
0237         row.setValue("threadleader", threadResultRate);
0238         dataset.insertRow(row);
0239         HAWD::Formatter::print(dataset);
0240     }
0241 
0242     void testIncremental()
0243     {
0244         Sink::Query query{Sink::Query::LiveQuery};
0245         query.request<Mail::MessageId>()
0246              .request<Mail::Subject>()
0247              .request<Mail::Date>();
0248         query.sort<ApplicationDomain::Mail::Date>();
0249         query.reduce<ApplicationDomain::Mail::Folder>(Query::Reduce::Selector::max<ApplicationDomain::Mail::Date>());
0250         query.limit(1000);
0251 
0252         int count = 1000;
0253         populateDatabase(count, 10);
0254         auto expectedSize = 100;
0255         QTime time;
0256         time.start();
0257         auto domainTypeAdaptorFactory = QSharedPointer<TestMailAdaptorFactory>::create();
0258         Sink::ResourceContext context{resourceIdentifier, "test", {{"mail", domainTypeAdaptorFactory}}};
0259         context.mResourceAccess = QSharedPointer<TestResourceAccess>::create();
0260         TestMailResourceFacade facade(context);
0261 
0262         auto ret = facade.load(query, Sink::Log::Context{"benchmark"});
0263         ret.first.exec().waitForFinished();
0264         auto emitter = ret.second;
0265         QList<Mail::Ptr> added;
0266         QList<Mail::Ptr> removed;
0267         QList<Mail::Ptr> modified;
0268         emitter->onAdded([&](const Mail::Ptr &mail) { added << mail; /*qWarning() << "Added";*/ });
0269         emitter->onRemoved([&](const Mail::Ptr &mail) { removed << mail; /*qWarning() << "Removed";*/ });
0270         emitter->onModified([&](const Mail::Ptr &mail) { modified << mail; /*qWarning() << "Modified";*/ });
0271         bool done = false;
0272         emitter->onInitialResultSetComplete([&done](bool) { done = true; });
0273         emitter->fetch();
0274         QUICK_TRY_VERIFY(done);
0275         QCOMPARE(added.size(), expectedSize);
0276 
0277         auto initialQueryTime = time.elapsed();
0278         std::cout << "Initial query took: " << initialQueryTime << std::endl;
0279 
0280         populateDatabase(count, 10, false, count);
0281         time.restart();
0282         for (int i = 0; i <= 10; i++) {
0283             //Simulate revision updates in steps of 100
0284             context.mResourceAccess->revisionChanged(1000 + i * 100);
0285         }
0286         //We should have 200 items in total in the end. 2000 mails / 10 folders => 200 reduced mails
0287         QUICK_TRY_VERIFY(added.count() == 200);
0288         //We get one modification per thread from the first 100 (1000 mails / 10 folders), everything else is optimized away because we ignore repeated updates to the same thread.
0289         QUICK_TRY_VERIFY(modified.count() == 100);
0290         auto incrementalQueryTime = time.elapsed();
0291         std::cout << "Incremental query took " << incrementalQueryTime << std::endl;
0292         std::cout << "added " << added.count() << std::endl;
0293         std::cout << "modified " << modified.count() << std::endl;
0294         std::cout << "removed " << removed.count() << std::endl;
0295 
0296         HAWD::Dataset dataset("mail_query_incremental", mHawdState);
0297         HAWD::Dataset::Row row = dataset.row();
0298         row.setValue("nonincremental", initialQueryTime);
0299         row.setValue("incremental", incrementalQueryTime);
0300         dataset.insertRow(row);
0301         HAWD::Formatter::print(dataset);
0302     }
0303 };
0304 
0305 QTEST_MAIN(MailQueryBenchmark)
0306 #include "mailquerybenchmark.moc"