File indexing completed on 2024-04-21 15:07:49
0001 /* -*- C++ -*- 0002 This file contains a benchmark for job processing in ThreadWeaver. 0003 0004 SPDX-FileCopyrightText: 2005-2013 Mirko Boehm <mirko@kde.org> 0005 0006 SPDX-License-Identifier: LGPL-2.0-or-later 0007 */ 0008 0009 #include <numeric> 0010 0011 #include <QCoreApplication> 0012 #include <QList> 0013 #include <QString> 0014 #include <QTest> 0015 #include <QtDebug> 0016 0017 #include <ThreadWeaver/Collection> 0018 #include <ThreadWeaver/Job> 0019 #include <ThreadWeaver/Queueing> 0020 #include <ThreadWeaver/Sequence> 0021 #include <ThreadWeaver/ThreadWeaver> 0022 0023 class AccumulateJob : public ThreadWeaver::Job 0024 { 0025 public: 0026 explicit AccumulateJob() 0027 : m_count(0) 0028 , m_result(0) 0029 { 0030 } 0031 0032 AccumulateJob(const AccumulateJob &a) 0033 : ThreadWeaver::Job() 0034 , m_count(a.m_count) 0035 , m_result(a.m_result) 0036 { 0037 } 0038 0039 void setCount(quint64 count) 0040 { 0041 m_count = count; 0042 } 0043 0044 quint64 result() const 0045 { 0046 return m_result; 0047 } 0048 0049 void payload() 0050 { 0051 std::vector<quint64> numbers(m_count); 0052 std::generate(numbers.begin(), numbers.end(), []() -> quint64 { 0053 static quint64 i = 0; 0054 return i++; 0055 }); 0056 m_result = std::accumulate(numbers.begin(), numbers.end(), 0); 0057 } 0058 0059 protected: 0060 void run(ThreadWeaver::JobPointer, ThreadWeaver::Thread *) override 0061 { 0062 payload(); 0063 } 0064 0065 private: 0066 quint64 m_count; 0067 quint64 m_result; 0068 }; 0069 0070 class QueueBenchmarksTest : public QObject 0071 { 0072 Q_OBJECT 0073 0074 public: 0075 QueueBenchmarksTest(); 0076 0077 private Q_SLOTS: 0078 void initTestCase(); 0079 void cleanupTestCase(); 0080 void BaselineBenchmark(); 0081 void BaselineBenchmark_data(); 0082 void BaselineAsJobsBenchmark(); 0083 void BaselineAsJobsBenchmark_data(); 0084 void IndividualJobsBenchmark(); 0085 void IndividualJobsBenchmark_data(); 0086 void CollectionsBenchmark(); 0087 void CollectionsBenchmark_data(); 0088 void SequencesBenchmark(); 0089 void SequencesBenchmark_data(); 0090 0091 private: 0092 void defaultBenchmarkData(bool singleThreaded); 0093 }; 0094 0095 QueueBenchmarksTest::QueueBenchmarksTest() 0096 { 0097 } 0098 0099 void QueueBenchmarksTest::initTestCase() 0100 { 0101 } 0102 0103 void QueueBenchmarksTest::cleanupTestCase() 0104 { 0105 } 0106 0107 /** @brief BaselineBenchmark simply performs the same operations in a loop. 0108 * 0109 * The result amounts to what time the jobs used in the benchmark need to execute without queueing or thread 0110 * synchronization overhead. */ 0111 void QueueBenchmarksTest::BaselineBenchmark() 0112 { 0113 QFETCH(int, m); 0114 QFETCH(int, c); 0115 QFETCH(int, b); 0116 QFETCH(int, t); 0117 const int n = c * b; 0118 Q_UNUSED(t); // in this case 0119 0120 QVector<AccumulateJob> jobs(n); 0121 for (int i = 0; i < n; ++i) { 0122 jobs[i].setCount(m); 0123 } 0124 0125 // executeLocal needs to emit similar signals as execute(), to be comparable to the threaded variants. 0126 // BaselineAsJobsBenchmark does that. Compare BaselineAsJobsBenchmark and BaselineBenchmark to evaluate the overhead of executing 0127 // an operation in a job. 0128 QBENCHMARK { 0129 for (int i = 0; i < n; ++i) { 0130 jobs[i].payload(); 0131 } 0132 } 0133 } 0134 0135 void QueueBenchmarksTest::BaselineBenchmark_data() 0136 { 0137 defaultBenchmarkData(true); 0138 } 0139 0140 void QueueBenchmarksTest::BaselineAsJobsBenchmark() 0141 { 0142 QFETCH(int, m); 0143 QFETCH(int, c); 0144 QFETCH(int, b); 0145 QFETCH(int, t); 0146 const int n = c * b; 0147 Q_UNUSED(t); // in this case 0148 0149 QVector<AccumulateJob> jobs(n); 0150 for (int i = 0; i < n; ++i) { 0151 jobs[i].setCount(m); 0152 } 0153 0154 QBENCHMARK { 0155 for (int i = 0; i < n; ++i) { 0156 jobs[i].blockingExecute(); 0157 } 0158 } 0159 } 0160 0161 void QueueBenchmarksTest::BaselineAsJobsBenchmark_data() 0162 { 0163 defaultBenchmarkData(true); 0164 } 0165 0166 void QueueBenchmarksTest::IndividualJobsBenchmark() 0167 { 0168 QFETCH(int, m); 0169 QFETCH(int, c); 0170 QFETCH(int, b); 0171 QFETCH(int, t); 0172 const int n = c * b; 0173 0174 ThreadWeaver::Queue weaver; 0175 weaver.setMaximumNumberOfThreads(t); 0176 weaver.suspend(); 0177 QVector<AccumulateJob> jobs(n); 0178 { 0179 ThreadWeaver::QueueStream stream(&weaver); 0180 for (int i = 0; i < n; ++i) { 0181 jobs[i].setCount(m); 0182 stream << jobs[i]; 0183 } 0184 } 0185 QBENCHMARK_ONCE { 0186 weaver.resume(); 0187 weaver.finish(); 0188 } 0189 } 0190 0191 void QueueBenchmarksTest::IndividualJobsBenchmark_data() 0192 { 0193 defaultBenchmarkData(false); 0194 } 0195 0196 void QueueBenchmarksTest::CollectionsBenchmark() 0197 { 0198 QFETCH(int, m); 0199 QFETCH(int, c); 0200 QFETCH(int, b); 0201 QFETCH(int, t); 0202 const int n = c * b; 0203 0204 ThreadWeaver::Queue weaver; 0205 weaver.setMaximumNumberOfThreads(t); 0206 weaver.suspend(); 0207 QVector<AccumulateJob> jobs(n); 0208 0209 // FIXME currently, memory management of the job sequences (they are deleted when they go out of scope) 0210 // is measured as part of the benchmark 0211 qDebug() << b << "blocks" << c << "operations, queueing..."; 0212 // queue the jobs blockwise as collections 0213 for (int block = 0; block < b; ++block) { 0214 ThreadWeaver::Collection *collection = new ThreadWeaver::Collection(); 0215 for (int operation = 0; operation < c; ++operation) { 0216 const int index = block * b + operation; 0217 jobs[index].setCount(m); 0218 *collection << jobs[index]; 0219 } 0220 weaver.stream() << collection; 0221 } 0222 0223 qDebug() << b << "blocks" << c << "operations, executing..."; 0224 QBENCHMARK_ONCE { 0225 weaver.resume(); 0226 weaver.finish(); 0227 } 0228 } 0229 0230 void QueueBenchmarksTest::CollectionsBenchmark_data() 0231 { 0232 defaultBenchmarkData(false); 0233 } 0234 0235 void QueueBenchmarksTest::SequencesBenchmark() 0236 { 0237 QFETCH(int, m); 0238 QFETCH(int, c); 0239 QFETCH(int, b); 0240 QFETCH(int, t); 0241 const int n = c * b; 0242 0243 ThreadWeaver::Queue weaver; 0244 weaver.setMaximumNumberOfThreads(t); 0245 weaver.suspend(); 0246 QVector<AccumulateJob> jobs(n); 0247 0248 qDebug() << b << "blocks" << c << "operations, queueing..."; 0249 // queue the jobs blockwise as collections 0250 for (int block = 0; block < b; ++block) { 0251 ThreadWeaver::Sequence *sequence = new ThreadWeaver::Sequence(); 0252 for (int operation = 0; operation < c; ++operation) { 0253 const int index = block * b + operation; 0254 jobs[index].setCount(m); 0255 *sequence << jobs[index]; 0256 } 0257 weaver.stream() << sequence; 0258 } 0259 0260 qDebug() << b << "blocks" << c << "operations, executing..."; 0261 QBENCHMARK_ONCE { 0262 weaver.resume(); 0263 weaver.finish(); 0264 } 0265 } 0266 0267 void QueueBenchmarksTest::SequencesBenchmark_data() 0268 { 0269 defaultBenchmarkData(false); 0270 } 0271 0272 void QueueBenchmarksTest::defaultBenchmarkData(bool singleThreaded) 0273 { 0274 QTest::addColumn<int>("m"); // number of quint64's to accumulate 0275 QTest::addColumn<int>("c"); // operations per block 0276 QTest::addColumn<int>("b"); // number of blocks, number of jobs is b*c 0277 QTest::addColumn<int>("t"); // number of worker threads 0278 0279 const QList<int> threads = singleThreaded ? QList<int>() << 1 : QList<int>() << 1 << 2 << 4 << 8 << 16 << 32 << 64 << 128; 0280 const QList<int> ms = QList<int>() << 1 << 10 << 100 << 1000 << 10000 << 100000; 0281 for (int m : ms) { 0282 for (int t : threads) { 0283 const QString name = QString::fromLatin1("%1 threads, %2 values").arg(t).arg(m); 0284 // newRow expects const char*, but then qstrdup's it in the QTestData constructor. Eeeew. 0285 QTest::newRow(qPrintable(name)) << m << 256 << 256 << t; 0286 } 0287 } 0288 } 0289 0290 QTEST_MAIN(QueueBenchmarksTest) 0291 0292 #include "QueueBenchmarks.moc"