File indexing completed on 2024-11-10 04:00:28
0001 /* 0002 * SPDX-FileCopyrightText: 2012 Dmitry Kazakov <dimula73@gmail.com> 0003 * SPDX-FileCopyrightText: 2015 Thorsten Zachmann <zachmann@kde.org> 0004 * SPDX-FileCopyrightText: 2020 Mathias Wein <lynx.mw+kde@gmail.com> 0005 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me> 0006 * 0007 * SPDX-License-Identifier: GPL-2.0-or-later 0008 */ 0009 0010 // for calculation of the needed alignment 0011 #include <xsimd_extensions/xsimd.hpp> 0012 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0013 #include <KoOptimizedCompositeOpOver32.h> 0014 #include <KoOptimizedCompositeOpOver128.h> 0015 #include <KoOptimizedCompositeOpCopy128.h> 0016 #include <KoOptimizedCompositeOpAlphaDarken32.h> 0017 #endif 0018 0019 #include "kis_composition_benchmark.h" 0020 #include <simpletest.h> 0021 #include <QElapsedTimer> 0022 0023 #include <KoColorSpace.h> 0024 #include <KoCompositeOp.h> 0025 #include <KoColorSpaceRegistry.h> 0026 0027 #include <KoColorSpaceTraits.h> 0028 #include <KoCompositeOpAlphaDarken.h> 0029 #include <KoCompositeOpOver.h> 0030 #include <KoCompositeOpCopy2.h> 0031 #include <KoOptimizedCompositeOpFactory.h> 0032 #include <KoAlphaDarkenParamsWrapper.h> 0033 0034 // for posix_memalign() 0035 #include <stdlib.h> 0036 0037 #include <kis_debug.h> 0038 0039 #if defined Q_OS_WIN 0040 #define MEMALIGN_ALLOC(p, a, s) ((*(p)) = _aligned_malloc((s), (a)), *(p) ? 0 : errno) 0041 #define MEMALIGN_FREE(p) _aligned_free((p)) 0042 #else 0043 #define MEMALIGN_ALLOC(p, a, s) posix_memalign((p), (a), (s)) 0044 #define MEMALIGN_FREE(p) free((p)) 0045 #endif 0046 0047 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0048 using float_v = xsimd::batch<float, xsimd::current_arch>; 0049 #endif 0050 0051 enum AlphaRange { 0052 ALPHA_ZERO, 0053 ALPHA_UNIT, 0054 ALPHA_RANDOM 0055 }; 0056 0057 0058 template <typename channel_type, class RandomGenerator> 0059 inline channel_type generateAlphaValue(AlphaRange range, RandomGenerator &rnd) { 0060 channel_type value = 0; 0061 0062 switch (range) { 0063 case ALPHA_ZERO: 0064 break; 0065 case ALPHA_UNIT: 0066 value = rnd.unit(); 0067 break; 0068 case ALPHA_RANDOM: 0069 value = rnd(); 0070 break; 0071 } 0072 0073 return value; 0074 } 0075 0076 #include <boost/random/mersenne_twister.hpp> 0077 #include <boost/random/uniform_smallint.hpp> 0078 #include <boost/random/uniform_real.hpp> 0079 0080 template <typename channel_type> 0081 struct RandomGenerator { 0082 channel_type operator() () { 0083 qFatal("Wrong template instantiation"); 0084 return channel_type(0); 0085 } 0086 0087 channel_type unit() { 0088 qFatal("Wrong template instantiation"); 0089 return channel_type(0); 0090 } 0091 }; 0092 0093 template <> 0094 struct RandomGenerator<quint8> 0095 { 0096 RandomGenerator(int seed) 0097 : m_smallint(0,255), 0098 m_rnd(seed) 0099 { 0100 } 0101 0102 quint8 operator() () { 0103 return m_smallint(m_rnd); 0104 } 0105 0106 quint8 unit() { 0107 return KoColorSpaceMathsTraits<quint8>::unitValue; 0108 } 0109 0110 boost::uniform_smallint<int> m_smallint; 0111 boost::mt11213b m_rnd; 0112 }; 0113 0114 template <> 0115 struct RandomGenerator<quint16> 0116 { 0117 RandomGenerator(int seed) 0118 : m_smallint(0,65535), 0119 m_rnd(seed) 0120 { 0121 } 0122 0123 quint16 operator() () { 0124 return m_smallint(m_rnd); 0125 } 0126 0127 quint16 unit() { 0128 return KoColorSpaceMathsTraits<quint16>::unitValue; 0129 } 0130 0131 boost::uniform_smallint<int> m_smallint; 0132 boost::mt11213b m_rnd; 0133 }; 0134 0135 template <> 0136 struct RandomGenerator<float> 0137 { 0138 RandomGenerator(int seed) 0139 : m_rnd(seed) 0140 { 0141 } 0142 0143 float operator() () { 0144 //return float(m_rnd()) / float(m_rnd.max()); 0145 return m_smallfloat(m_rnd); 0146 } 0147 0148 float unit() { 0149 return KoColorSpaceMathsTraits<float>::unitValue; 0150 } 0151 0152 boost::uniform_real<float> m_smallfloat; 0153 boost::mt11213b m_rnd; 0154 }; 0155 0156 template <> 0157 struct RandomGenerator<double> : RandomGenerator<float> 0158 { 0159 RandomGenerator(int seed) 0160 : RandomGenerator<float>(seed) 0161 { 0162 } 0163 }; 0164 0165 0166 template <typename channel_type> 0167 void generateDataLine(uint seed, int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask, AlphaRange srcAlphaRange, AlphaRange dstAlphaRange) 0168 { 0169 Q_ASSERT(numPixels >= 4); 0170 0171 RandomGenerator<channel_type> rnd(seed); 0172 RandomGenerator<quint8> maskRnd(seed + 1); 0173 0174 channel_type *srcArray = reinterpret_cast<channel_type*>(srcPixels); 0175 channel_type *dstArray = reinterpret_cast<channel_type*>(dstPixels); 0176 0177 for (int i = 0; i < numPixels; i++) { 0178 for (int j = 0; j < 3; j++) { 0179 channel_type s = rnd(); 0180 channel_type d = rnd(); 0181 *(srcArray++) = s; 0182 *(dstArray++) = d; 0183 } 0184 0185 channel_type sa = generateAlphaValue<channel_type>(srcAlphaRange, rnd); 0186 channel_type da = generateAlphaValue<channel_type>(dstAlphaRange, rnd); 0187 *(srcArray++) = sa; 0188 *(dstArray++) = da; 0189 0190 *(mask++) = maskRnd(); 0191 } 0192 } 0193 0194 void printData(int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask) 0195 { 0196 for (int i = 0; i < numPixels; i++) { 0197 qDebug() << "Src: " 0198 << srcPixels[i*4] << "\t" 0199 << srcPixels[i*4+1] << "\t" 0200 << srcPixels[i*4+2] << "\t" 0201 << srcPixels[i*4+3] << "\t" 0202 << "Msk:" << mask[i]; 0203 0204 qDebug() << "Dst: " 0205 << dstPixels[i*4] << "\t" 0206 << dstPixels[i*4+1] << "\t" 0207 << dstPixels[i*4+2] << "\t" 0208 << dstPixels[i*4+3]; 0209 } 0210 } 0211 0212 const int rowStride = 64; 0213 const int totalRows = 64; 0214 const QRect processRect(0,0,64,64); 0215 const int numPixels = rowStride * totalRows; 0216 const int numTiles = 1024; 0217 0218 0219 struct Tile { 0220 quint8 *src; 0221 quint8 *dst; 0222 quint8 *mask; 0223 }; 0224 #include <stdint.h> 0225 QVector<Tile> generateTiles(int size, 0226 const int srcAlignmentShift, 0227 const int dstAlignmentShift, 0228 AlphaRange srcAlphaRange, 0229 AlphaRange dstAlphaRange, 0230 const quint32 pixelSize) 0231 { 0232 QVector<Tile> tiles(size); 0233 0234 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0235 const int vecSize = float_v::size; 0236 #else 0237 const int vecSize = 1; 0238 #endif 0239 0240 // the 256 are used to make sure that we have a good alignment no matter what build options are used. 0241 const size_t pixelAlignment = qMax(size_t(vecSize * sizeof(float)), size_t(256)); 0242 const size_t maskAlignment = qMax(size_t(vecSize), size_t(256)); 0243 for (int i = 0; i < size; i++) { 0244 void *ptr = 0; 0245 int error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + srcAlignmentShift); 0246 if (error) { 0247 qFatal("posix_memalign failed: %d", error); 0248 } 0249 tiles[i].src = (quint8*)ptr + srcAlignmentShift; 0250 error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + dstAlignmentShift); 0251 if (error) { 0252 qFatal("posix_memalign failed: %d", error); 0253 } 0254 tiles[i].dst = (quint8*)ptr + dstAlignmentShift; 0255 error = MEMALIGN_ALLOC(&ptr, maskAlignment, numPixels); 0256 if (error) { 0257 qFatal("posix_memalign failed: %d", error); 0258 } 0259 tiles[i].mask = (quint8*)ptr; 0260 0261 if (pixelSize == 4) { 0262 generateDataLine<quint8>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange); 0263 } else if (pixelSize == 8) { 0264 generateDataLine<quint16>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange); 0265 } else if (pixelSize == 16) { 0266 generateDataLine<float>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange); 0267 } else { 0268 qFatal("Pixel size %i is not implemented", pixelSize); 0269 } 0270 } 0271 0272 return tiles; 0273 } 0274 0275 void freeTiles(QVector<Tile> tiles, 0276 const int srcAlignmentShift, 0277 const int dstAlignmentShift) 0278 { 0279 Q_FOREACH (const Tile &tile, tiles) { 0280 MEMALIGN_FREE(tile.src - srcAlignmentShift); 0281 MEMALIGN_FREE(tile.dst - dstAlignmentShift); 0282 MEMALIGN_FREE(tile.mask); 0283 } 0284 } 0285 0286 template <typename channel_type> 0287 inline bool fuzzyCompare(channel_type a, channel_type b, channel_type prec) { 0288 return qAbs(a - b) <= prec; 0289 } 0290 0291 template<typename channel_type> 0292 struct PixelEqualDirect 0293 { 0294 bool operator() (channel_type c1, channel_type a1, 0295 channel_type c2, channel_type a2, 0296 channel_type prec) { 0297 0298 Q_UNUSED(a1); 0299 Q_UNUSED(a2); 0300 0301 return fuzzyCompare(c1, c2, prec); 0302 } 0303 }; 0304 0305 template<typename channel_type> 0306 struct PixelEqualPremultiplied 0307 { 0308 bool operator() (channel_type c1, channel_type a1, 0309 channel_type c2, channel_type a2, 0310 channel_type prec) { 0311 0312 c1 = KoColorSpaceMaths<channel_type>::multiply(c1, a1); 0313 c2 = KoColorSpaceMaths<channel_type>::multiply(c2, a2); 0314 0315 return fuzzyCompare(c1, c2, prec); 0316 } 0317 }; 0318 0319 template <typename channel_type, template<typename> class Compare = PixelEqualDirect> 0320 inline bool comparePixels(channel_type *p1, channel_type *p2, channel_type prec) { 0321 Compare<channel_type> comp; 0322 0323 return (p1[3] == p2[3] && p1[3] == 0) || 0324 (comp(p1[0], p1[3], p2[0], p2[3], prec) && 0325 comp(p1[1], p1[3], p2[1], p2[3], prec) && 0326 comp(p1[2], p1[3], p2[2], p2[3], prec) && 0327 fuzzyCompare(p1[3], p2[3], prec)); 0328 } 0329 0330 template <typename channel_type, template<typename> class Compare> 0331 bool compareTwoOpsPixels(QVector<Tile> &tiles, channel_type prec) { 0332 channel_type *dst1 = reinterpret_cast<channel_type*>(tiles[0].dst); 0333 channel_type *dst2 = reinterpret_cast<channel_type*>(tiles[1].dst); 0334 0335 channel_type *src1 = reinterpret_cast<channel_type*>(tiles[0].src); 0336 channel_type *src2 = reinterpret_cast<channel_type*>(tiles[1].src); 0337 0338 for (int i = 0; i < numPixels; i++) { 0339 if (!comparePixels<channel_type, Compare>(dst1, dst2, prec)) { 0340 qDebug() << "Wrong result:" << i; 0341 qDebug() << "Act: " << dst1[0] << dst1[1] << dst1[2] << dst1[3]; 0342 qDebug() << "Exp: " << dst2[0] << dst2[1] << dst2[2] << dst2[3]; 0343 qDebug() << "Dif: " << dst1[0] - dst2[0] << dst1[1] - dst2[1] << dst1[2] - dst2[2] << dst1[3] - dst2[3]; 0344 0345 channel_type *s1 = src1 + 4 * i; 0346 channel_type *s2 = src2 + 4 * i; 0347 0348 qDebug() << "SrcA:" << s1[0] << s1[1] << s1[2] << s1[3]; 0349 qDebug() << "SrcE:" << s2[0] << s2[1] << s2[2] << s2[3]; 0350 0351 qDebug() << "MskA:" << tiles[0].mask[i]; 0352 qDebug() << "MskE:" << tiles[1].mask[i]; 0353 0354 return false; 0355 } 0356 dst1 += 4; 0357 dst2 += 4; 0358 } 0359 return true; 0360 } 0361 0362 template<template<typename> class Compare = PixelEqualDirect> 0363 bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp *op2) 0364 { 0365 Q_ASSERT(op1->colorSpace()->pixelSize() == op2->colorSpace()->pixelSize()); 0366 const quint32 pixelSize = op1->colorSpace()->pixelSize(); 0367 const int alignment = 16; 0368 QVector<Tile> tiles = generateTiles(2, alignment, alignment, ALPHA_RANDOM, ALPHA_RANDOM, op1->colorSpace()->pixelSize()); 0369 0370 KoCompositeOp::ParameterInfo params; 0371 params.dstRowStride = 4 * rowStride; 0372 params.srcRowStride = 4 * rowStride; 0373 params.maskRowStride = rowStride; 0374 params.rows = processRect.height(); 0375 params.cols = processRect.width(); 0376 // This is a hack as in the old version we get a rounding of opacity to this value 0377 params.opacity = float(Arithmetic::scale<quint8>(0.5*1.0f))/255.0; 0378 params.flow = 0.3*1.0f; 0379 params.channelFlags = QBitArray(); 0380 0381 params.dstRowStart = tiles[0].dst; 0382 params.srcRowStart = tiles[0].src; 0383 params.maskRowStart = haveMask ? tiles[0].mask : 0; 0384 op1->composite(params); 0385 0386 params.dstRowStart = tiles[1].dst; 0387 params.srcRowStart = tiles[1].src; 0388 params.maskRowStart = haveMask ? tiles[1].mask : 0; 0389 op2->composite(params); 0390 0391 bool compareResult = true; 0392 if (pixelSize == 4) { 0393 compareResult = compareTwoOpsPixels<quint8, Compare>(tiles, 10); 0394 } 0395 else if (pixelSize == 8) { 0396 compareResult = compareTwoOpsPixels<quint16, Compare>(tiles, 90); 0397 } 0398 else if (pixelSize == 16) { 0399 compareResult = compareTwoOpsPixels<float, Compare>(tiles, 2e-6); 0400 } 0401 else { 0402 qFatal("Pixel size %i is not implemented", pixelSize); 0403 } 0404 0405 freeTiles(tiles, alignment, alignment); 0406 0407 return compareResult; 0408 } 0409 0410 QString getTestName(bool haveMask, 0411 const int srcAlignmentShift, 0412 const int dstAlignmentShift, 0413 AlphaRange srcAlphaRange, 0414 AlphaRange dstAlphaRange) 0415 { 0416 0417 QString testName; 0418 testName += 0419 !srcAlignmentShift && !dstAlignmentShift ? "Aligned " : 0420 !srcAlignmentShift && dstAlignmentShift ? "SrcUnalign " : 0421 srcAlignmentShift && !dstAlignmentShift ? "DstUnalign " : 0422 srcAlignmentShift && dstAlignmentShift ? "Unaligned " : "###"; 0423 0424 testName += haveMask ? "Mask " : "NoMask "; 0425 0426 testName += 0427 srcAlphaRange == ALPHA_RANDOM ? "SrcRand " : 0428 srcAlphaRange == ALPHA_ZERO ? "SrcZero " : 0429 srcAlphaRange == ALPHA_UNIT ? "SrcUnit " : "###"; 0430 0431 testName += 0432 dstAlphaRange == ALPHA_RANDOM ? "DstRand" : 0433 dstAlphaRange == ALPHA_ZERO ? "DstZero" : 0434 dstAlphaRange == ALPHA_UNIT ? "DstUnit" : "###"; 0435 0436 return testName; 0437 } 0438 0439 void benchmarkCompositeOp(const KoCompositeOp *op, 0440 bool haveMask, 0441 qreal opacity, 0442 qreal flow, 0443 const int srcAlignmentShift, 0444 const int dstAlignmentShift, 0445 AlphaRange srcAlphaRange, 0446 AlphaRange dstAlphaRange) 0447 { 0448 QString testName = getTestName(haveMask, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange); 0449 0450 QVector<Tile> tiles = 0451 generateTiles(numTiles, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange, op->colorSpace()->pixelSize()); 0452 0453 const int tileOffset = 4 * (processRect.y() * rowStride + processRect.x()); 0454 0455 KoCompositeOp::ParameterInfo params; 0456 params.dstRowStride = 4 * rowStride; 0457 params.srcRowStride = 4 * rowStride; 0458 params.maskRowStride = rowStride; 0459 params.rows = processRect.height(); 0460 params.cols = processRect.width(); 0461 params.opacity = opacity; 0462 params.flow = flow; 0463 params.channelFlags = QBitArray(); 0464 0465 QElapsedTimer timer; 0466 timer.start(); 0467 0468 Q_FOREACH (const Tile &tile, tiles) { 0469 params.dstRowStart = tile.dst + tileOffset; 0470 params.srcRowStart = tile.src + tileOffset; 0471 params.maskRowStart = haveMask ? tile.mask : 0; 0472 op->composite(params); 0473 } 0474 0475 qDebug() << testName << "RESULT:" << timer.elapsed() << "msec"; 0476 0477 freeTiles(tiles, srcAlignmentShift, dstAlignmentShift); 0478 } 0479 0480 void benchmarkCompositeOp(const KoCompositeOp *op, const QString &postfix) 0481 { 0482 qDebug() << "Testing Composite Op:" << op->id() << "(" << postfix << ")"; 0483 0484 benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM); 0485 benchmarkCompositeOp(op, true, 0.5, 0.3, 8, 0, ALPHA_RANDOM, ALPHA_RANDOM); 0486 benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 8, ALPHA_RANDOM, ALPHA_RANDOM); 0487 benchmarkCompositeOp(op, true, 0.5, 0.3, 4, 8, ALPHA_RANDOM, ALPHA_RANDOM); 0488 0489 /// --- Vary the content of the source and destination 0490 0491 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM); 0492 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_RANDOM); 0493 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_RANDOM); 0494 0495 /// --- 0496 0497 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_ZERO); 0498 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_ZERO); 0499 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_ZERO); 0500 0501 /// --- 0502 0503 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_UNIT); 0504 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_UNIT); 0505 benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_UNIT); 0506 } 0507 0508 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0509 0510 template <typename channels_type> 0511 void printError(quint8 *s, quint8 *d1, quint8 *d2, quint8 *msk1, int pos) 0512 { 0513 const channels_type *src1 = reinterpret_cast<const channels_type*>(s); 0514 const channels_type *dst1 = reinterpret_cast<const channels_type*>(d1); 0515 const channels_type *dst2 = reinterpret_cast<const channels_type*>(d2); 0516 0517 qDebug() << "Wrong rounding in pixel:" << pos; 0518 qDebug() << "Vector version: " << dst1[0] << dst1[1] << dst1[2] << dst1[3]; 0519 qDebug() << "Scalar version: " << dst2[0] << dst2[1] << dst2[2] << dst2[3]; 0520 qDebug() << "Dif: " << dst1[0] - dst2[0] << dst1[1] - dst2[1] << dst1[2] - dst2[2] << dst1[3] - dst2[3]; 0521 0522 qDebug() << "src:" << src1[0] << src1[1] << src1[2] << src1[3]; 0523 qDebug() << "msk:" << msk1[0]; 0524 } 0525 0526 template<class Compositor> 0527 void checkRounding(qreal opacity, qreal flow, qreal averageOpacity = -1, quint32 pixelSize = 4) 0528 { 0529 QVector<Tile> tiles = 0530 generateTiles(2, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM, pixelSize); 0531 0532 const int vecSize = float_v::size; 0533 0534 const int numBlocks = numPixels / vecSize; 0535 0536 quint8 *src1 = tiles[0].src; 0537 quint8 *dst1 = tiles[0].dst; 0538 quint8 *msk1 = tiles[0].mask; 0539 0540 quint8 *src2 = tiles[1].src; 0541 quint8 *dst2 = tiles[1].dst; 0542 quint8 *msk2 = tiles[1].mask; 0543 0544 KoCompositeOp::ParameterInfo params; 0545 params.opacity = opacity; 0546 params.flow = flow; 0547 0548 if (averageOpacity >= 0.0) { 0549 params._lastOpacityData = averageOpacity; 0550 params.lastOpacity = ¶ms._lastOpacityData; 0551 } 0552 0553 params.channelFlags = QBitArray(); 0554 typename Compositor::ParamsWrapper paramsWrapper(params); 0555 0556 // The error count is needed as 38.5 gets rounded to 38 instead of 39 in the vc version. 0557 int errorcount = 0; 0558 for (int i = 0; i < numBlocks; i++) { 0559 Compositor::template compositeVector<true,true, xsimd::current_arch>(src1, dst1, msk1, params.opacity, paramsWrapper); 0560 for (int j = 0; j < vecSize; j++) { 0561 0562 //if (8 * i + j == 7080) { 0563 // qDebug() << "src: " << src2[0] << src2[1] << src2[2] << src2[3]; 0564 // qDebug() << "dst: " << dst2[0] << dst2[1] << dst2[2] << dst2[3]; 0565 // qDebug() << "msk:" << msk2[0]; 0566 //} 0567 0568 Compositor::template compositeOnePixelScalar<true, xsimd::current_arch>(src2, dst2, msk2, params.opacity, paramsWrapper); 0569 0570 bool compareResult = true; 0571 if (pixelSize == 4) { 0572 compareResult = comparePixels<quint8>(dst1, dst2, 0); 0573 if (!compareResult) { 0574 ++errorcount; 0575 compareResult = comparePixels<quint8>(dst1, dst2, 1); 0576 if (!compareResult) { 0577 ++errorcount; 0578 } 0579 } 0580 } 0581 else if (pixelSize == 8) { 0582 compareResult = comparePixels<quint16>(reinterpret_cast<quint16*>(dst1), reinterpret_cast<quint16*>(dst2), 0); 0583 } 0584 else if (pixelSize == 16) { 0585 compareResult = comparePixels<float>(reinterpret_cast<float*>(dst1), reinterpret_cast<float*>(dst2), 0); 0586 } 0587 else { 0588 qFatal("Pixel size %i is not implemented", pixelSize); 0589 } 0590 0591 if(!compareResult || errorcount > 1) { 0592 if (pixelSize == 4) { 0593 printError<quint8>(src1, dst1, dst2, msk1, 8 * i + j); 0594 } else if (pixelSize == 8) { 0595 printError<quint16>(src1, dst1, dst2, msk1, 8 * i + j); 0596 } else if (pixelSize == 16) { 0597 printError<float>(src1, dst1, dst2, msk1, 8 * i + j); 0598 } else { 0599 qFatal("Pixel size %i is not implemented", pixelSize); 0600 } 0601 0602 QFAIL("Wrong rounding"); 0603 } 0604 0605 src1 += pixelSize; 0606 dst1 += pixelSize; 0607 src2 += pixelSize; 0608 dst2 += pixelSize; 0609 msk1++; 0610 msk2++; 0611 } 0612 } 0613 0614 freeTiles(tiles, 0, 0); 0615 } 0616 0617 #endif 0618 0619 0620 void KisCompositionBenchmark::detectBuildArchitecture() 0621 { 0622 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0623 using namespace xsimd; 0624 0625 qDebug() << "built for" << ppVar(current_arch().name()); 0626 qDebug() << "built for" << ppVar(default_arch().name()); 0627 0628 qDebug() << ppVar(supported_architectures().contains<sse2>()); 0629 qDebug() << ppVar(supported_architectures().contains<sse3>()); 0630 qDebug() << ppVar(supported_architectures().contains<ssse3>()); 0631 qDebug() << ppVar(supported_architectures().contains<sse4_1>()); 0632 qDebug() << ppVar(supported_architectures().contains<sse4_2>()); 0633 qDebug() << ppVar(supported_architectures().contains<fma3<sse4_2>>()); 0634 0635 qDebug() << ppVar(supported_architectures().contains<avx>()); 0636 qDebug() << ppVar(supported_architectures().contains<avx2>()); 0637 qDebug() << ppVar(supported_architectures().contains<fma3<avx2>>()); 0638 qDebug() << ppVar(supported_architectures().contains<fma4>()); 0639 qDebug() << ppVar(supported_architectures().contains<avx512f>()); 0640 qDebug() << ppVar(supported_architectures().contains<avx512bw>()); 0641 qDebug() << ppVar(supported_architectures().contains<avx512dq>()); 0642 qDebug() << ppVar(supported_architectures().contains<avx512cd>()); 0643 qDebug().nospace() << "running on " << hex << "0x" << xsimd::available_architectures().best; 0644 #endif 0645 } 0646 0647 void KisCompositionBenchmark::checkRoundingAlphaDarken_05_03() 0648 { 0649 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0650 checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.3); 0651 #endif 0652 } 0653 0654 void KisCompositionBenchmark::checkRoundingAlphaDarken_05_05() 0655 { 0656 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0657 checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.5); 0658 #endif 0659 } 0660 0661 void KisCompositionBenchmark::checkRoundingAlphaDarken_05_07() 0662 { 0663 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0664 checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.7); 0665 #endif 0666 } 0667 0668 void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10() 0669 { 0670 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0671 checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,1.0); 0672 #endif 0673 } 0674 0675 void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10_08() 0676 { 0677 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0678 checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,1.0,0.8); 0679 #endif 0680 } 0681 0682 void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_03() 0683 { 0684 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0685 checkRounding<OverCompositor128<float, false, true> >(0.5, 0.3, -1, 16); 0686 #endif 0687 } 0688 0689 void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_05() 0690 { 0691 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0692 checkRounding<OverCompositor128<float, false, true> >(0.5, 0.5, -1, 16); 0693 #endif 0694 } 0695 0696 void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_07() 0697 { 0698 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0699 checkRounding<OverCompositor128<float, false, true> >(0.5, 0.7, -1, 16); 0700 #endif 0701 } 0702 0703 void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10() 0704 { 0705 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0706 checkRounding<OverCompositor128<float, false, true> >(0.5, 1.0, -1, 16); 0707 #endif 0708 } 0709 0710 void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10_08() 0711 { 0712 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0713 checkRounding<OverCompositor128<float, false, true> >(0.5, 1.0, 0.8, 16); 0714 #endif 0715 } 0716 0717 void KisCompositionBenchmark::checkRoundingOver() 0718 { 0719 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0720 checkRounding<OverCompositor32<quint8, quint32, false, true> >(0.5, 0.3); 0721 #endif 0722 } 0723 0724 void KisCompositionBenchmark::checkRoundingOverRgbaU16() 0725 { 0726 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0727 checkRounding<OverCompositor128<quint16, false, true> >(0.5, 1.0, -1, 8); 0728 #endif 0729 } 0730 0731 void KisCompositionBenchmark::checkRoundingOverRgbaF32() 0732 { 0733 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0734 checkRounding<OverCompositor128<float, false, true> >(0.5, 1.0, -1, 16); 0735 #endif 0736 } 0737 #include <cfenv> 0738 void KisCompositionBenchmark::checkRoundingCopyRgbaU16() 0739 { 0740 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0741 checkRounding<CopyCompositor128<quint16, false, true> >(0.5, 1.0, -1, 8); 0742 #endif 0743 } 0744 0745 void KisCompositionBenchmark::checkRoundingCopyRgbaF32() 0746 { 0747 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0748 checkRounding<CopyCompositor128<float, false, true> >(0.5, 1.0, -1, 16); 0749 #endif 0750 } 0751 0752 void KisCompositionBenchmark::compareAlphaDarkenOps() 0753 { 0754 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0755 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs); 0756 KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0757 0758 QVERIFY(compareTwoOps(true, opAct, opExp)); 0759 0760 delete opExp; 0761 delete opAct; 0762 } 0763 0764 void KisCompositionBenchmark::compareRgbF32AlphaDarkenOps() 0765 { 0766 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0767 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy128(cs); 0768 KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoRgbF32Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0769 0770 QVERIFY(compareTwoOps(true, opAct, opExp)); 0771 0772 delete opExp; 0773 delete opAct; 0774 } 0775 0776 void KisCompositionBenchmark::compareAlphaDarkenOpsNoMask() 0777 { 0778 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0779 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs); 0780 KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0781 0782 QVERIFY(compareTwoOps(false, opAct, opExp)); 0783 0784 delete opExp; 0785 delete opAct; 0786 } 0787 0788 void KisCompositionBenchmark::compareRgbU16AlphaDarkenOps() 0789 { 0790 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0791 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamyU64(cs); 0792 KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoRgbU16Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0793 0794 QVERIFY(compareTwoOps(true, opAct, opExp)); 0795 0796 delete opExp; 0797 delete opAct; 0798 } 0799 0800 void KisCompositionBenchmark::compareOverOps() 0801 { 0802 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0803 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs); 0804 KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs); 0805 0806 QVERIFY(compareTwoOps(true, opAct, opExp)); 0807 0808 delete opExp; 0809 delete opAct; 0810 } 0811 0812 void KisCompositionBenchmark::compareOverOpsNoMask() 0813 { 0814 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0815 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs); 0816 KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs); 0817 0818 QVERIFY(compareTwoOps(false, opAct, opExp)); 0819 0820 delete opExp; 0821 delete opAct; 0822 } 0823 0824 void KisCompositionBenchmark::compareRgbU16OverOps() 0825 { 0826 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0827 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOpU64(cs); 0828 KoCompositeOp *opExp = new KoCompositeOpOver<KoRgbU16Traits>(cs); 0829 0830 QVERIFY(compareTwoOps(false, opAct, opExp)); 0831 0832 delete opExp; 0833 delete opAct; 0834 } 0835 0836 void KisCompositionBenchmark::compareRgbF32OverOps() 0837 { 0838 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0839 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp128(cs); 0840 KoCompositeOp *opExp = new KoCompositeOpOver<KoRgbF32Traits>(cs); 0841 0842 QVERIFY(compareTwoOps(false, opAct, opExp)); 0843 0844 delete opExp; 0845 delete opAct; 0846 } 0847 0848 void KisCompositionBenchmark::compareRgbU8CopyOps() 0849 { 0850 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0851 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createCopyOp32(cs); 0852 KoCompositeOp *opExp = new KoCompositeOpCopy2<KoRgbU8Traits>(cs); 0853 0854 // Since composite copy involves a channel division operation, 0855 // there might be significant rounding difference with purely 0856 // integer implementation. So we should compare in premultiplied 0857 // form. 0858 QVERIFY(compareTwoOps<PixelEqualPremultiplied>(false, opAct, opExp)); 0859 0860 delete opExp; 0861 delete opAct; 0862 } 0863 0864 void KisCompositionBenchmark::compareRgbU16CopyOps() 0865 { 0866 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0867 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createCopyOpU64(cs); 0868 KoCompositeOp *opExp = new KoCompositeOpCopy2<KoRgbU16Traits>(cs); 0869 0870 QVERIFY(compareTwoOps(false, opAct, opExp)); 0871 0872 delete opExp; 0873 delete opAct; 0874 } 0875 0876 void KisCompositionBenchmark::compareRgbF32CopyOps() 0877 { 0878 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0879 KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createCopyOp128(cs); 0880 KoCompositeOp *opExp = new KoCompositeOpCopy2<KoRgbF32Traits>(cs); 0881 0882 QVERIFY(compareTwoOps(false, opAct, opExp)); 0883 0884 delete opExp; 0885 delete opAct; 0886 } 0887 0888 void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenLegacy() 0889 { 0890 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0891 KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0892 benchmarkCompositeOp(op, "Legacy"); 0893 delete op; 0894 } 0895 0896 void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenOptimized() 0897 { 0898 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0899 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs); 0900 benchmarkCompositeOp(op, "Optimized"); 0901 delete op; 0902 } 0903 0904 void KisCompositionBenchmark::testRgb8CompositeOverLegacy() 0905 { 0906 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0907 KoCompositeOp *op = new KoCompositeOpOver<KoBgrU8Traits>(cs); 0908 benchmarkCompositeOp(op, "Legacy"); 0909 delete op; 0910 } 0911 0912 void KisCompositionBenchmark::testRgb8CompositeOverOptimized() 0913 { 0914 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 0915 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp32(cs); 0916 benchmarkCompositeOp(op, "Optimized"); 0917 delete op; 0918 } 0919 0920 void KisCompositionBenchmark::testRgb16CompositeAlphaDarkenLegacy() 0921 { 0922 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0923 KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoBgrU16Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0924 benchmarkCompositeOp(op, "Legacy"); 0925 delete op; 0926 } 0927 0928 void KisCompositionBenchmark::testRgb16CompositeAlphaDarkenOptimized() 0929 { 0930 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0931 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamyU64(cs); 0932 benchmarkCompositeOp(op, "Optimized"); 0933 delete op; 0934 } 0935 0936 void KisCompositionBenchmark::testRgb16CompositeOverLegacy() 0937 { 0938 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0939 KoCompositeOp *op = new KoCompositeOpOver<KoBgrU16Traits>(cs); 0940 benchmarkCompositeOp(op, "Legacy"); 0941 delete op; 0942 } 0943 0944 void KisCompositionBenchmark::testRgb16CompositeOverOptimized() 0945 { 0946 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0947 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOpU64(cs); 0948 benchmarkCompositeOp(op, "Optimized"); 0949 delete op; 0950 } 0951 0952 0953 void KisCompositionBenchmark::testRgb16CompositeCopyLegacy() 0954 { 0955 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0956 KoCompositeOp *op = new KoCompositeOpCopy2<KoBgrU16Traits>(cs); 0957 benchmarkCompositeOp(op, "Legacy"); 0958 delete op; 0959 } 0960 0961 void KisCompositionBenchmark::testRgb16CompositeCopyOptimized() 0962 { 0963 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb16(); 0964 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createCopyOpU64(cs); 0965 benchmarkCompositeOp(op, "Optimized"); 0966 delete op; 0967 } 0968 void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenLegacy() 0969 { 0970 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0971 KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoRgbF32Traits, KoAlphaDarkenParamsWrapperCreamy>(cs); 0972 benchmarkCompositeOp(op, "Legacy"); 0973 delete op; 0974 } 0975 0976 void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenOptimized() 0977 { 0978 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0979 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy128(cs); 0980 benchmarkCompositeOp(op, "Optimized"); 0981 delete op; 0982 } 0983 0984 void KisCompositionBenchmark::testRgbF32CompositeOverLegacy() 0985 { 0986 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0987 KoCompositeOp *op = new KoCompositeOpOver<KoRgbF32Traits>(cs); 0988 benchmarkCompositeOp(op, "RGBF32 Legacy"); 0989 delete op; 0990 } 0991 0992 void KisCompositionBenchmark::testRgbF32CompositeOverOptimized() 0993 { 0994 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 0995 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp128(cs); 0996 benchmarkCompositeOp(op, "RGBF32 Optimized"); 0997 delete op; 0998 } 0999 1000 void KisCompositionBenchmark::testRgbF32CompositeCopyLegacy() 1001 { 1002 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 1003 KoCompositeOp *op = new KoCompositeOpCopy2<KoRgbF32Traits>(cs); 1004 benchmarkCompositeOp(op, "RGBF32 Legacy"); 1005 delete op; 1006 } 1007 1008 void KisCompositionBenchmark::testRgbF32CompositeCopyOptimized() 1009 { 1010 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", ""); 1011 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createCopyOp128(cs); 1012 benchmarkCompositeOp(op, "RGBF32 Optimized"); 1013 delete op; 1014 } 1015 1016 void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenReal_Aligned() 1017 { 1018 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 1019 const KoCompositeOp *op = cs->compositeOp(COMPOSITE_ALPHA_DARKEN); 1020 benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM); 1021 } 1022 1023 void KisCompositionBenchmark::testRgb8CompositeOverReal_Aligned() 1024 { 1025 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 1026 const KoCompositeOp *op = cs->compositeOp(COMPOSITE_OVER); 1027 benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM); 1028 } 1029 1030 void KisCompositionBenchmark::testRgb8CompositeCopyLegacy() 1031 { 1032 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 1033 KoCompositeOp *op = new KoCompositeOpCopy2<KoBgrU8Traits>(cs); 1034 benchmarkCompositeOp(op, "Copy"); 1035 delete op; 1036 } 1037 1038 void KisCompositionBenchmark::testRgb8CompositeCopyOptimized() 1039 { 1040 const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8(); 1041 KoCompositeOp *op = KoOptimizedCompositeOpFactory::createCopyOp32(cs); 1042 benchmarkCompositeOp(op, "Optimized"); 1043 delete op; 1044 } 1045 1046 void KisCompositionBenchmark::benchmarkMemcpy() 1047 { 1048 QVector<Tile> tiles = 1049 generateTiles(numTiles, 0, 0, ALPHA_UNIT, ALPHA_UNIT, 4); 1050 1051 QBENCHMARK_ONCE { 1052 Q_FOREACH (const Tile &tile, tiles) { 1053 memcpy(tile.dst, tile.src, 4 * numPixels); 1054 } 1055 } 1056 1057 freeTiles(tiles, 0, 0); 1058 } 1059 1060 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 1061 const int vecSize = float_v::size; 1062 const size_t uint8VecAlignment = qMax(vecSize * sizeof(quint8), sizeof(void *)); 1063 const size_t uint32VecAlignment = qMax(vecSize * sizeof(quint32), sizeof(void *)); 1064 const size_t floatVecAlignment = qMax(vecSize * sizeof(float), sizeof(void *)); 1065 #endif 1066 1067 void KisCompositionBenchmark::benchmarkUintFloat() 1068 { 1069 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 1070 using uint_v = xsimd::batch<unsigned int, xsimd::current_arch>; 1071 1072 const int dataSize = 4096; 1073 void *ptr = 0; 1074 int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize); 1075 if (error) { 1076 qFatal("posix_memalign failed: %d", error); 1077 } 1078 quint8 *iData = (quint8*)ptr; 1079 error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float)); 1080 if (error) { 1081 qFatal("posix_memalign failed: %d", error); 1082 } 1083 float *fData = (float*)ptr; 1084 1085 QBENCHMARK { 1086 for (int i = 0; i < dataSize; i += float_v::size) { 1087 // convert uint -> float directly, this causes 1088 // static_cast helper be called 1089 const auto b = xsimd::batch_cast<typename float_v::value_type>( 1090 xsimd::load_and_extend<uint_v>(iData + i) 1091 ); 1092 b.store_aligned(fData + i); 1093 } 1094 } 1095 1096 MEMALIGN_FREE(iData); 1097 MEMALIGN_FREE(fData); 1098 #endif 1099 } 1100 1101 void KisCompositionBenchmark::benchmarkUintIntFloat() 1102 { 1103 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 1104 using uint_v = xsimd::batch<unsigned int, xsimd::current_arch>; 1105 1106 const int dataSize = 4096; 1107 void *ptr = 0; 1108 int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize); 1109 if (error) { 1110 qFatal("posix_memalign failed: %d", error); 1111 } 1112 quint8 *iData = (quint8*)ptr; 1113 error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float)); 1114 if (error) { 1115 qFatal("posix_memalign failed: %d", error); 1116 } 1117 float *fData = (float*)ptr; 1118 1119 QBENCHMARK { 1120 for (int i = 0; i < dataSize; i += float_v::size) { 1121 // convert uint->int->float, that avoids special sign 1122 // treating, and gives 2.6 times speedup 1123 const auto b = xsimd::batch_cast<typename float_v::value_type>(xsimd::load_and_extend<uint_v>(iData + i)); 1124 b.store_aligned(fData + i); 1125 } 1126 } 1127 1128 MEMALIGN_FREE(iData); 1129 MEMALIGN_FREE(fData); 1130 #endif 1131 } 1132 1133 void KisCompositionBenchmark::benchmarkFloatUint() 1134 { 1135 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 1136 using uint_v = xsimd::batch<unsigned int, xsimd::current_arch>; 1137 1138 const int dataSize = 4096; 1139 void *ptr = 0; 1140 int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32)); 1141 if (error) { 1142 qFatal("posix_memalign failed: %d", error); 1143 } 1144 quint32 *iData = (quint32*)ptr; 1145 error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float)); 1146 if (error) { 1147 qFatal("posix_memalign failed: %d", error); 1148 } 1149 float *fData = (float*)ptr; 1150 1151 QBENCHMARK { 1152 for (int i = 0; i < dataSize; i += float_v::size) { 1153 // conversion float -> uint 1154 // this being a direct conversion, load_and_extend does not apply 1155 const auto b = xsimd::batch_cast<typename uint_v::value_type>(float_v::load_aligned(fData + i)); 1156 1157 b.store_aligned(iData + i); 1158 } 1159 } 1160 1161 MEMALIGN_FREE(iData); 1162 MEMALIGN_FREE(fData); 1163 #endif 1164 } 1165 1166 void KisCompositionBenchmark::benchmarkFloatIntUint() 1167 { 1168 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 1169 using uint_v = xsimd::batch<unsigned int, xsimd::current_arch>; 1170 const int dataSize = 4096; 1171 void *ptr = 0; 1172 int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32)); 1173 if (error) { 1174 qFatal("posix_memalign failed: %d", error); 1175 } 1176 quint32 *iData = (quint32*)ptr; 1177 error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float)); 1178 if (error) { 1179 qFatal("posix_memalign failed: %d", error); 1180 } 1181 float *fData = (float*)ptr; 1182 1183 QBENCHMARK { 1184 for (int i = 0; i < dataSize; i += float_v::size) { 1185 // conversion float -> int -> uint 1186 const auto b = xsimd::batch_cast<typename uint_v::value_type>(float_v::load_aligned(fData + i)); 1187 1188 b.store_aligned(iData + i); 1189 } 1190 } 1191 1192 MEMALIGN_FREE(iData); 1193 MEMALIGN_FREE(fData); 1194 #endif 1195 } 1196 1197 SIMPLE_TEST_MAIN(KisCompositionBenchmark) 1198