Warning, file /pim/sink/tests/mailquerybenchmark.cpp was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /* 0002 * Copyright (C) 2016 Christian Mollekopf <chrigi_1@fastmail.fm> 0003 * 0004 * This library is free software; you can redistribute it and/or 0005 * modify it under the terms of the GNU Lesser General Public 0006 * License as published by the Free Software Foundation; either 0007 * version 2.1 of the License, or (at your option) version 3, or any 0008 * later version accepted by the membership of KDE e.V. (or its 0009 * successor approved by the membership of KDE e.V.), which shall 0010 * act as a proxy defined in Section 6 of version 3 of the license. 0011 * 0012 * This library is distributed in the hope that it will be useful, 0013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 0015 * Lesser General Public License for more details. 0016 * 0017 * You should have received a copy of the GNU Lesser General Public 0018 * License along with this library. If not, see <http://www.gnu.org/licenses/>. 0019 */ 0020 #include <QTest> 0021 0022 #include <QString> 0023 0024 #include "testimplementations.h" 0025 0026 #include <common/resultprovider.h> 0027 #include <common/definitions.h> 0028 #include <common/query.h> 0029 #include <common/storage/entitystore.h> 0030 #include <common/resourcecontrol.h> 0031 0032 #include "hawd/dataset.h" 0033 #include "hawd/formatter.h" 0034 0035 #include <iostream> 0036 #include <math.h> 0037 0038 #include "mail_generated.h" 0039 #include "createentity_generated.h" 0040 #include "getrssusage.h" 0041 #include "test.h" 0042 0043 using namespace Sink; 0044 using namespace Sink::ApplicationDomain; 0045 0046 /** 0047 * Benchmark mail query performance. 0048 */ 0049 class MailQueryBenchmark : public QObject 0050 { 0051 Q_OBJECT 0052 0053 QByteArray resourceIdentifier; 0054 HAWD::State mHawdState; 0055 0056 void populateDatabase(int count, int folderSpreadFactor = 0, bool clear = true, int offset = 0) 0057 { 0058 if (clear) { 0059 TestResource::removeFromDisk(resourceIdentifier); 0060 } 0061 0062 Sink::ResourceContext resourceContext{resourceIdentifier, "test", {{"mail", QSharedPointer<TestMailAdaptorFactory>::create()}}}; 0063 Sink::Storage::EntityStore entityStore{resourceContext, {}}; 0064 entityStore.startTransaction(Sink::Storage::DataStore::ReadWrite); 0065 0066 const auto date = QDateTime::currentDateTimeUtc(); 0067 for (int i = offset; i < offset + count; i++) { 0068 auto domainObject = Mail::createEntity<Mail>(resourceIdentifier); 0069 domainObject.setExtractedMessageId("uid"); 0070 domainObject.setExtractedParentMessageIds({"parentuid"}); 0071 domainObject.setExtractedSubject(QString("subject%1").arg(i)); 0072 domainObject.setExtractedDate(date.addSecs(count)); 0073 if (folderSpreadFactor == 0) { 0074 domainObject.setFolder("folder1"); 0075 } else { 0076 domainObject.setFolder(QByteArray("folder") + QByteArray::number(i - (i % folderSpreadFactor))); 0077 } 0078 0079 entityStore.add("mail", domainObject, false); 0080 } 0081 0082 entityStore.commitTransaction(); 0083 } 0084 0085 //Execute query and block until the initial query is complete 0086 int load(const Sink::Query &query) 0087 { 0088 auto domainTypeAdaptorFactory = QSharedPointer<TestMailAdaptorFactory>::create(); 0089 Sink::ResourceContext context{resourceIdentifier, "test", {{"mail", domainTypeAdaptorFactory}}}; 0090 context.mResourceAccess = QSharedPointer<TestResourceAccess>::create(); 0091 TestMailResourceFacade facade(context); 0092 0093 auto ret = facade.load(query, Sink::Log::Context{"benchmark"}); 0094 ret.first.exec().waitForFinished(); 0095 auto emitter = ret.second; 0096 int i = 0; 0097 emitter->onAdded([&](const Mail::Ptr &) { i++; }); 0098 bool done = false; 0099 emitter->onInitialResultSetComplete([&done](bool) { done = true; }); 0100 emitter->fetch(); 0101 QUICK_TRY_VERIFY(done); 0102 return i; 0103 } 0104 0105 qreal testLoad(const Sink::Query &query, int count, int expectedSize) 0106 { 0107 const auto startingRss = getCurrentRSS(); 0108 0109 // Benchmark 0110 QTime time; 0111 time.start(); 0112 0113 auto loadedResults = load(query); 0114 Q_ASSERT(loadedResults == expectedSize); 0115 0116 const auto elapsed = time.elapsed(); 0117 0118 const auto finalRss = getCurrentRSS(); 0119 const auto rssGrowth = finalRss - startingRss; 0120 // Since the database is memory mapped it is attributted to the resident set size. 0121 const auto rssWithoutDb = finalRss - Sink::Storage::DataStore(Sink::storageLocation(), resourceIdentifier, Sink::Storage::DataStore::ReadWrite).diskUsage(); 0122 const auto peakRss = getPeakRSS(); 0123 // How much peak deviates from final rss in percent (should be around 0) 0124 const auto percentageRssError = static_cast<double>(peakRss - finalRss) * 100.0 / static_cast<double>(finalRss); 0125 auto rssGrowthPerEntity = rssGrowth / count; 0126 0127 std::cout << "Loaded " << expectedSize << " results." << std::endl; 0128 std::cout << "The query took [ms]: " << elapsed << std::endl; 0129 std::cout << "Current Rss usage [kb]: " << finalRss / 1024 << std::endl; 0130 std::cout << "Peak Rss usage [kb]: " << peakRss / 1024 << std::endl; 0131 std::cout << "Rss growth [kb]: " << rssGrowth / 1024 << std::endl; 0132 std::cout << "Rss growth per entity [byte]: " << rssGrowthPerEntity << std::endl; 0133 std::cout << "Rss without db [kb]: " << rssWithoutDb / 1024 << std::endl; 0134 std::cout << "Percentage error: " << percentageRssError << std::endl; 0135 0136 Q_ASSERT(percentageRssError < 10); 0137 // TODO This is much more than it should it seems, although adding the attachment results in pretty exactly a 1k increase, 0138 // so it doesn't look like that memory is being duplicated. 0139 Q_ASSERT(rssGrowthPerEntity < 3300); 0140 0141 // Print memory layout, RSS is what is in memory 0142 // std::system("exec pmap -x \"$PPID\""); 0143 // std::system("top -p \"$PPID\" -b -n 1"); 0144 return (qreal)expectedSize / elapsed; 0145 } 0146 0147 private slots: 0148 0149 void init() 0150 { 0151 resourceIdentifier = "sink.test.instance1"; 0152 } 0153 0154 void testInitialQueryResult() 0155 { 0156 int count = 50000; 0157 int limit = 1; 0158 populateDatabase(count); 0159 0160 //Run a warm-up query first 0161 Sink::Query query{}; 0162 query.request<Mail::MessageId>() 0163 .request<Mail::Subject>() 0164 .request<Mail::Date>(); 0165 query.sort<Mail::Date>(); 0166 query.filter<Mail::Folder>("folder1"); 0167 query.limit(limit); 0168 0169 load(query); 0170 0171 int liveQueryTime = 0; 0172 { 0173 auto q = query; 0174 q.setFlags(Sink::Query::LiveQuery); 0175 0176 QTime time; 0177 time.start(); 0178 load(q); 0179 liveQueryTime = time.elapsed(); 0180 } 0181 0182 int nonLiveQueryTime = 0; 0183 { 0184 auto q = query; 0185 0186 QTime time; 0187 time.start(); 0188 load(q); 0189 nonLiveQueryTime = time.elapsed(); 0190 } 0191 0192 HAWD::Dataset dataset("mail_query_initial", mHawdState); 0193 HAWD::Dataset::Row row = dataset.row(); 0194 row.setValue("live", liveQueryTime); 0195 row.setValue("nonlive", nonLiveQueryTime); 0196 dataset.insertRow(row); 0197 HAWD::Formatter::print(dataset); 0198 } 0199 0200 void test50k() 0201 { 0202 int count = 50000; 0203 int limit = 1000; 0204 qreal simpleResultRate = 0; 0205 qreal threadResultRate = 0; 0206 { 0207 //A query that just filters by a property and sorts (using an index) 0208 Sink::Query query; 0209 query.request<Mail::MessageId>() 0210 .request<Mail::Subject>() 0211 .request<Mail::Date>(); 0212 query.sort<Mail::Date>(); 0213 query.filter<Mail::Folder>("folder1"); 0214 query.limit(limit); 0215 0216 populateDatabase(count); 0217 simpleResultRate = testLoad(query, count, query.limit()); 0218 } 0219 { 0220 //A query that reduces (like the maillist query) 0221 Sink::Query query; 0222 query.request<Mail::MessageId>() 0223 .request<Mail::Subject>() 0224 .request<Mail::Date>(); 0225 query.reduce<ApplicationDomain::Mail::Folder>(Query::Reduce::Selector::max<ApplicationDomain::Mail::Date>()); 0226 query.limit(limit); 0227 0228 int mailsPerFolder = 10; 0229 0230 populateDatabase(count, mailsPerFolder); 0231 threadResultRate = testLoad(query, count, query.limit()); 0232 } 0233 HAWD::Dataset dataset("mail_query", mHawdState); 0234 HAWD::Dataset::Row row = dataset.row(); 0235 row.setValue("rows", limit); 0236 row.setValue("simple", simpleResultRate); 0237 row.setValue("threadleader", threadResultRate); 0238 dataset.insertRow(row); 0239 HAWD::Formatter::print(dataset); 0240 } 0241 0242 void testIncremental() 0243 { 0244 Sink::Query query{Sink::Query::LiveQuery}; 0245 query.request<Mail::MessageId>() 0246 .request<Mail::Subject>() 0247 .request<Mail::Date>(); 0248 query.sort<ApplicationDomain::Mail::Date>(); 0249 query.reduce<ApplicationDomain::Mail::Folder>(Query::Reduce::Selector::max<ApplicationDomain::Mail::Date>()); 0250 query.limit(1000); 0251 0252 int count = 1000; 0253 populateDatabase(count, 10); 0254 auto expectedSize = 100; 0255 QTime time; 0256 time.start(); 0257 auto domainTypeAdaptorFactory = QSharedPointer<TestMailAdaptorFactory>::create(); 0258 Sink::ResourceContext context{resourceIdentifier, "test", {{"mail", domainTypeAdaptorFactory}}}; 0259 context.mResourceAccess = QSharedPointer<TestResourceAccess>::create(); 0260 TestMailResourceFacade facade(context); 0261 0262 auto ret = facade.load(query, Sink::Log::Context{"benchmark"}); 0263 ret.first.exec().waitForFinished(); 0264 auto emitter = ret.second; 0265 QList<Mail::Ptr> added; 0266 QList<Mail::Ptr> removed; 0267 QList<Mail::Ptr> modified; 0268 emitter->onAdded([&](const Mail::Ptr &mail) { added << mail; /*qWarning() << "Added";*/ }); 0269 emitter->onRemoved([&](const Mail::Ptr &mail) { removed << mail; /*qWarning() << "Removed";*/ }); 0270 emitter->onModified([&](const Mail::Ptr &mail) { modified << mail; /*qWarning() << "Modified";*/ }); 0271 bool done = false; 0272 emitter->onInitialResultSetComplete([&done](bool) { done = true; }); 0273 emitter->fetch(); 0274 QUICK_TRY_VERIFY(done); 0275 QCOMPARE(added.size(), expectedSize); 0276 0277 auto initialQueryTime = time.elapsed(); 0278 std::cout << "Initial query took: " << initialQueryTime << std::endl; 0279 0280 populateDatabase(count, 10, false, count); 0281 time.restart(); 0282 for (int i = 0; i <= 10; i++) { 0283 //Simulate revision updates in steps of 100 0284 context.mResourceAccess->revisionChanged(1000 + i * 100); 0285 } 0286 //We should have 200 items in total in the end. 2000 mails / 10 folders => 200 reduced mails 0287 QUICK_TRY_VERIFY(added.count() == 200); 0288 //We get one modification per thread from the first 100 (1000 mails / 10 folders), everything else is optimized away because we ignore repeated updates to the same thread. 0289 QUICK_TRY_VERIFY(modified.count() == 100); 0290 auto incrementalQueryTime = time.elapsed(); 0291 std::cout << "Incremental query took " << incrementalQueryTime << std::endl; 0292 std::cout << "added " << added.count() << std::endl; 0293 std::cout << "modified " << modified.count() << std::endl; 0294 std::cout << "removed " << removed.count() << std::endl; 0295 0296 HAWD::Dataset dataset("mail_query_incremental", mHawdState); 0297 HAWD::Dataset::Row row = dataset.row(); 0298 row.setValue("nonincremental", initialQueryTime); 0299 row.setValue("incremental", incrementalQueryTime); 0300 dataset.insertRow(row); 0301 HAWD::Formatter::print(dataset); 0302 } 0303 }; 0304 0305 QTEST_MAIN(MailQueryBenchmark) 0306 #include "mailquerybenchmark.moc"