threadweaver/benchmarks/QueueBenchmarks.cpp

0001 /* -*- C++ -*-
0002     This file contains a benchmark for job processing in ThreadWeaver.
0003
0004     SPDX-FileCopyrightText: 2005-2013 Mirko Boehm <mirko@kde.org>
0005
0006     SPDX-License-Identifier: LGPL-2.0-or-later
0007 */
0008
0009 #include <numeric>
0010
0011 #include <QCoreApplication>
0012 #include <QList>
0013 #include <QString>
0014 #include <QTest>
0015 #include <QtDebug>
0016
0017 #include <ThreadWeaver/Collection>
0018 #include <ThreadWeaver/Job>
0019 #include <ThreadWeaver/Queueing>
0020 #include <ThreadWeaver/Sequence>
0021 #include <ThreadWeaver/ThreadWeaver>
0022
0023 class AccumulateJob : public ThreadWeaver::Job
0024 {
0025 public:
0026     explicit AccumulateJob()
0027         : m_count(0)
0028         , m_result(0)
0029     {
0030     }
0031
0032     AccumulateJob(const AccumulateJob &a)
0033         : ThreadWeaver::Job()
0034         , m_count(a.m_count)
0035         , m_result(a.m_result)
0036     {
0037     }
0038
0039     void setCount(quint64 count)
0040     {
0041         m_count = count;
0042     }
0043
0044     quint64 result() const
0045     {
0046         return m_result;
0047     }
0048
0049     void payload()
0050     {
0051         std::vector<quint64> numbers(m_count);
0052         std::generate(numbers.begin(), numbers.end(), []() -> quint64 {
0053             static quint64 i = 0;
0054             return i++;
0055         });
0056         m_result = std::accumulate(numbers.begin(), numbers.end(), 0);
0057     }
0058
0059 protected:
0060     void run(ThreadWeaver::JobPointer, ThreadWeaver::Thread *) override
0061     {
0062         payload();
0063     }
0064
0065 private:
0066     quint64 m_count;
0067     quint64 m_result;
0068 };
0069
0070 class QueueBenchmarksTest : public QObject
0071 {
0072     Q_OBJECT
0073
0074 public:
0075     QueueBenchmarksTest();
0076
0077 private Q_SLOTS:
0078     void initTestCase();
0079     void cleanupTestCase();
0080     void BaselineBenchmark();
0081     void BaselineBenchmark_data();
0082     void BaselineAsJobsBenchmark();
0083     void BaselineAsJobsBenchmark_data();
0084     void IndividualJobsBenchmark();
0085     void IndividualJobsBenchmark_data();
0086     void CollectionsBenchmark();
0087     void CollectionsBenchmark_data();
0088     void SequencesBenchmark();
0089     void SequencesBenchmark_data();
0090
0091 private:
0092     void defaultBenchmarkData(bool singleThreaded);
0093 };
0094
0095 QueueBenchmarksTest::QueueBenchmarksTest()
0096 {
0097 }
0098
0099 void QueueBenchmarksTest::initTestCase()
0100 {
0101 }
0102
0103 void QueueBenchmarksTest::cleanupTestCase()
0104 {
0105 }
0106
0107 /** @brief BaselineBenchmark simply performs the same operations in a loop.
0108  *
0109  * The result amounts to what time the jobs used in the benchmark need to execute without queueing or thread
0110  * synchronization overhead. */
0111 void QueueBenchmarksTest::BaselineBenchmark()
0112 {
0113     QFETCH(int, m);
0114     QFETCH(int, c);
0115     QFETCH(int, b);
0116     QFETCH(int, t);
0117     const int n = c * b;
0118     Q_UNUSED(t); // in this case
0119
0120     QVector<AccumulateJob> jobs(n);
0121     for (int i = 0; i < n; ++i) {
0122         jobs[i].setCount(m);
0123     }
0124
0125     // executeLocal needs to emit similar signals as execute(), to be comparable to the threaded variants.
0126     // BaselineAsJobsBenchmark does that. Compare BaselineAsJobsBenchmark and BaselineBenchmark to evaluate the overhead of executing
0127     // an operation in a job.
0128     QBENCHMARK {
0129         for (int i = 0; i < n; ++i) {
0130             jobs[i].payload();
0131         }
0132     }
0133 }
0134
0135 void QueueBenchmarksTest::BaselineBenchmark_data()
0136 {
0137     defaultBenchmarkData(true);
0138 }
0139
0140 void QueueBenchmarksTest::BaselineAsJobsBenchmark()
0141 {
0142     QFETCH(int, m);
0143     QFETCH(int, c);
0144     QFETCH(int, b);
0145     QFETCH(int, t);
0146     const int n = c * b;
0147     Q_UNUSED(t); // in this case
0148
0149     QVector<AccumulateJob> jobs(n);
0150     for (int i = 0; i < n; ++i) {
0151         jobs[i].setCount(m);
0152     }
0153
0154     QBENCHMARK {
0155         for (int i = 0; i < n; ++i) {
0156             jobs[i].blockingExecute();
0157         }
0158     }
0159 }
0160
0161 void QueueBenchmarksTest::BaselineAsJobsBenchmark_data()
0162 {
0163     defaultBenchmarkData(true);
0164 }
0165
0166 void QueueBenchmarksTest::IndividualJobsBenchmark()
0167 {
0168     QFETCH(int, m);
0169     QFETCH(int, c);
0170     QFETCH(int, b);
0171     QFETCH(int, t);
0172     const int n = c * b;
0173
0174     ThreadWeaver::Queue weaver;
0175     weaver.setMaximumNumberOfThreads(t);
0176     weaver.suspend();
0177     QVector<AccumulateJob> jobs(n);
0178     {
0179         ThreadWeaver::QueueStream stream(&weaver);
0180         for (int i = 0; i < n; ++i) {
0181             jobs[i].setCount(m);
0182             stream << jobs[i];
0183         }
0184     }
0185     QBENCHMARK_ONCE {
0186         weaver.resume();
0187         weaver.finish();
0188     }
0189 }
0190
0191 void QueueBenchmarksTest::IndividualJobsBenchmark_data()
0192 {
0193     defaultBenchmarkData(false);
0194 }
0195
0196 void QueueBenchmarksTest::CollectionsBenchmark()
0197 {
0198     QFETCH(int, m);
0199     QFETCH(int, c);
0200     QFETCH(int, b);
0201     QFETCH(int, t);
0202     const int n = c * b;
0203
0204     ThreadWeaver::Queue weaver;
0205     weaver.setMaximumNumberOfThreads(t);
0206     weaver.suspend();
0207     QVector<AccumulateJob> jobs(n);
0208
0209     // FIXME currently, memory management of the job sequences (they are deleted when they go out of scope)
0210     // is measured as part of the benchmark
0211     qDebug() << b << "blocks" << c << "operations, queueing...";
0212     // queue the jobs blockwise as collections
0213     for (int block = 0; block < b; ++block) {
0214         ThreadWeaver::Collection *collection = new ThreadWeaver::Collection();
0215         for (int operation = 0; operation < c; ++operation) {
0216             const int index = block * b + operation;
0217             jobs[index].setCount(m);
0218             *collection << jobs[index];
0219         }
0220         weaver.stream() << collection;
0221     }
0222
0223     qDebug() << b << "blocks" << c << "operations, executing...";
0224     QBENCHMARK_ONCE {
0225         weaver.resume();
0226         weaver.finish();
0227     }
0228 }
0229
0230 void QueueBenchmarksTest::CollectionsBenchmark_data()
0231 {
0232     defaultBenchmarkData(false);
0233 }
0234
0235 void QueueBenchmarksTest::SequencesBenchmark()
0236 {
0237     QFETCH(int, m);
0238     QFETCH(int, c);
0239     QFETCH(int, b);
0240     QFETCH(int, t);
0241     const int n = c * b;
0242
0243     ThreadWeaver::Queue weaver;
0244     weaver.setMaximumNumberOfThreads(t);
0245     weaver.suspend();
0246     QVector<AccumulateJob> jobs(n);
0247
0248     qDebug() << b << "blocks" << c << "operations, queueing...";
0249     // queue the jobs blockwise as collections
0250     for (int block = 0; block < b; ++block) {
0251         ThreadWeaver::Sequence *sequence = new ThreadWeaver::Sequence();
0252         for (int operation = 0; operation < c; ++operation) {
0253             const int index = block * b + operation;
0254             jobs[index].setCount(m);
0255             *sequence << jobs[index];
0256         }
0257         weaver.stream() << sequence;
0258     }
0259
0260     qDebug() << b << "blocks" << c << "operations, executing...";
0261     QBENCHMARK_ONCE {
0262         weaver.resume();
0263         weaver.finish();
0264     }
0265 }
0266
0267 void QueueBenchmarksTest::SequencesBenchmark_data()
0268 {
0269     defaultBenchmarkData(false);
0270 }
0271
0272 void QueueBenchmarksTest::defaultBenchmarkData(bool singleThreaded)
0273 {
0274     QTest::addColumn<int>("m"); // number of quint64's to accumulate
0275     QTest::addColumn<int>("c"); // operations per block
0276     QTest::addColumn<int>("b"); // number of blocks, number of jobs is b*c
0277     QTest::addColumn<int>("t"); // number of worker threads
0278
0279     const QList<int> threads = singleThreaded ? QList<int>() << 1 : QList<int>() << 1 << 2 << 4 << 8 << 16 << 32 << 64 << 128;
0280     const QList<int> ms = QList<int>() << 1 << 10 << 100 << 1000 << 10000 << 100000;
0281     for (int m : ms) {
0282         for (int t : threads) {
0283             const QString name = QString::fromLatin1("%1 threads, %2 values").arg(t).arg(m);
0284             // newRow expects const char*, but then qstrdup's it in the QTestData constructor. Eeeew.
0285             QTest::newRow(qPrintable(name)) << m << 256 << 256 << t;
0286         }
0287     }
0288 }
0289
0290 QTEST_MAIN(QueueBenchmarksTest)
0291
0292 #include "QueueBenchmarks.moc"