File indexing completed on 2024-05-12 15:58:11
0001 /* 0002 * SPDX-FileCopyrightText: 2012 Dmitry Kazakov <dimula73@gmail.com> 0003 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me> 0004 * 0005 * SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #include <xsimd_extensions/xsimd.hpp> 0009 0010 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS 0011 0012 #include "kis_circle_mask_generator.h" 0013 #include "kis_circle_mask_generator_p.h" 0014 #include "kis_curve_circle_mask_generator.h" 0015 #include "kis_curve_circle_mask_generator_p.h" 0016 #include "kis_curve_rect_mask_generator.h" 0017 #include "kis_curve_rect_mask_generator_p.h" 0018 #include "kis_gauss_circle_mask_generator.h" 0019 #include "kis_gauss_circle_mask_generator_p.h" 0020 #include "kis_gauss_rect_mask_generator.h" 0021 #include "kis_gauss_rect_mask_generator_p.h" 0022 #include "kis_rect_mask_generator.h" 0023 #include "kis_rect_mask_generator_p.h" 0024 0025 #include "kis_brush_mask_applicator_base.h" 0026 #include "kis_brush_mask_vector_applicator.h" 0027 0028 #include "vc_extra_math.h" 0029 0030 #define a(_s) #_s 0031 #define b(_s) a(_s) 0032 0033 template<> 0034 template<> 0035 void FastRowProcessor<KisCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0036 int width, 0037 float y, 0038 float cosa, 0039 float sina, 0040 float centerX, 0041 float centerY) 0042 { 0043 using float_v = xsimd::batch<float, xsimd::current_arch>; 0044 using float_m = typename float_v::batch_bool_type; 0045 0046 const bool useSmoothing = d->copyOfAntialiasEdges; 0047 0048 const float y_ = y - centerY; 0049 const float sinay_ = sina * y_; 0050 const float cosay_ = cosa * y_; 0051 0052 float *bufferPointer = buffer; 0053 0054 float_v currentIndices = 0055 xsimd::detail::make_sequence_as_batch<float_v>(); 0056 0057 const float_v increment((float)float_v::size); 0058 const float_v vCenterX(centerX); 0059 0060 const float_v vCosa(cosa); 0061 const float_v vSina(sina); 0062 const float_v vCosaY_(cosay_); 0063 const float_v vSinaY_(sinay_); 0064 0065 const float_v vXCoeff(static_cast<float>(d->xcoef)); 0066 const float_v vYCoeff(static_cast<float>(d->ycoef)); 0067 0068 const float_v vTransformedFadeX(static_cast<float>(d->transformedFadeX)); 0069 const float_v vTransformedFadeY(static_cast<float>(d->transformedFadeY)); 0070 0071 const float_v vOne(1); 0072 0073 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0074 const float_v x_ = currentIndices - vCenterX; 0075 0076 float_v xr = x_ * vCosa - vSinaY_; 0077 float_v yr = x_ * vSina + vCosaY_; 0078 0079 const float_v n = xsimd::pow2(xr * vXCoeff) + xsimd::pow2(yr * vYCoeff); 0080 const float_m outsideMask = n > vOne; 0081 0082 if (!xsimd::all(outsideMask)) { 0083 if (useSmoothing) { 0084 xr = xsimd::abs(xr) + vOne; 0085 yr = xsimd::abs(yr) + vOne; 0086 } 0087 float_v vNormFade = xsimd::pow2(xr * vTransformedFadeX) 0088 + xsimd::pow2(yr * vTransformedFadeY); 0089 const float_m vNormLowMask = vNormFade < vOne; 0090 vNormFade = xsimd::set_zero(vNormFade, vNormLowMask); 0091 0092 // 255 * n * (normeFade - 1) / (normeFade - n) 0093 float_v vFade = n * (vNormFade - vOne) / (vNormFade - n); 0094 0095 // Mask in the inner circle of the mask 0096 const float_m mask = vNormFade < vOne; 0097 vFade = xsimd::set_zero(vFade, mask); 0098 0099 // Mask out the outer circle of the mask 0100 vFade = xsimd::set_one(vFade, outsideMask); 0101 0102 vFade.store_aligned(bufferPointer); 0103 } else { 0104 // Mask out everything outside the circle 0105 vOne.store_aligned(bufferPointer); 0106 } 0107 0108 currentIndices = currentIndices + increment; 0109 0110 bufferPointer += float_v::size; 0111 } 0112 } 0113 0114 template<> 0115 template<> 0116 void FastRowProcessor<KisGaussCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0117 int width, 0118 float y, 0119 float cosa, 0120 float sina, 0121 float centerX, 0122 float centerY) 0123 { 0124 using float_v = xsimd::batch<float, xsimd::current_arch>; 0125 using float_m = float_v::batch_bool_type; 0126 0127 const float y_ = y - centerY; 0128 const float sinay_ = sina * y_; 0129 const float cosay_ = cosa * y_; 0130 0131 float *bufferPointer = buffer; 0132 0133 float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>(); 0134 0135 const float_v increment(static_cast<float>(float_v::size)); 0136 const float_v vCenterX(centerX); 0137 const float_v vCenter(static_cast<float>(d->center)); 0138 0139 const float_v vCosa(cosa); 0140 const float_v vSina(sina); 0141 const float_v vCosaY_(cosay_); 0142 const float_v vSinaY_(sinay_); 0143 0144 const float_v vYCoeff(static_cast<float>(d->ycoef)); 0145 const float_v vDistfactor(static_cast<float>(d->distfactor)); 0146 const float_v vAlphafactor(static_cast<float>(d->alphafactor)); 0147 0148 const float_v vZero(0); 0149 const float_v vValMax(255.f); 0150 0151 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0152 const float_v x_ = currentIndices - vCenterX; 0153 0154 const float_v xr = x_ * vCosa - vSinaY_; 0155 const float_v yr = x_ * vSina + vCosaY_; 0156 0157 float_v dist = 0158 xsimd::sqrt(xsimd::pow2(xr) + xsimd::pow2(yr * vYCoeff)); 0159 0160 // Apply FadeMaker mask and operations 0161 const float_m excludeMask = d->fadeMaker.needFade(dist); 0162 0163 if (!xsimd::all(excludeMask)) { 0164 const float_v valDist = dist * vDistfactor; 0165 float_v fullFade = vAlphafactor 0166 * (VcExtraMath::erf(valDist + vCenter) 0167 - VcExtraMath::erf(valDist - vCenter)); 0168 0169 // Mask in the inner circle of the mask 0170 const float_m mask = fullFade < vZero; 0171 fullFade = xsimd::set_zero(fullFade, mask); 0172 0173 // Mask the outer circle 0174 const float_m outerMask = fullFade > 254.974f; 0175 fullFade = xsimd::select(outerMask, vValMax, fullFade); 0176 0177 // Mask (value - value), precision errors. 0178 float_v vFade = (vValMax - fullFade) / vValMax; 0179 0180 // return original dist values before vFade transform 0181 vFade = xsimd::select(excludeMask, dist, vFade); 0182 vFade.store_aligned(bufferPointer); 0183 } else { 0184 dist.store_aligned(bufferPointer); 0185 } 0186 currentIndices = currentIndices + increment; 0187 0188 bufferPointer += float_v::size; 0189 } 0190 } 0191 0192 template<> 0193 template<> 0194 void FastRowProcessor<KisCurveCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0195 int width, 0196 float y, 0197 float cosa, 0198 float sina, 0199 float centerX, 0200 float centerY) 0201 { 0202 using int_v = xsimd::batch<int, xsimd::current_arch>; 0203 using float_v = xsimd::batch<float, xsimd::current_arch>; 0204 using float_m = float_v::batch_bool_type; 0205 0206 const float y_ = y - centerY; 0207 const float sinay_ = sina * y_; 0208 const float cosay_ = cosa * y_; 0209 0210 float *bufferPointer = buffer; 0211 0212 const qreal *curveDataPointer = d->curveData.data(); 0213 0214 float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>(); 0215 0216 const float_v increment((float)float_v::size); 0217 const float_v vCenterX(centerX); 0218 0219 const float_v vCosa(cosa); 0220 const float_v vSina(sina); 0221 const float_v vCosaY_(cosay_); 0222 const float_v vSinaY_(sinay_); 0223 0224 const float_v vYCoeff(static_cast<float>(d->ycoef)); 0225 const float_v vXCoeff(static_cast<float>(d->xcoef)); 0226 const float_v vCurveResolution(static_cast<float>(d->curveResolution)); 0227 0228 float_v vCurvedData(0); 0229 float_v vCurvedData1(0); 0230 0231 const float_v vOne(1); 0232 const float_v vZero(0); 0233 0234 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0235 const float_v x_ = currentIndices - vCenterX; 0236 0237 const float_v xr = x_ * vCosa - vSinaY_; 0238 const float_v yr = x_ * vSina + vCosaY_; 0239 0240 float_v dist = xsimd::pow2(xr * vXCoeff) + xsimd::pow2(yr * vYCoeff); 0241 0242 // Apply FadeMaker mask and operations 0243 const float_m excludeMask = d->fadeMaker.needFade(dist); 0244 0245 if (!xsimd::all(excludeMask)) { 0246 const float_v valDist = dist * vCurveResolution; 0247 // truncate 0248 int_v vAlphaValue = xsimd::to_int(valDist); 0249 const float_v vFloatAlphaValue = xsimd::to_float(vAlphaValue); 0250 0251 const float_v alphaValueF = valDist - vFloatAlphaValue; 0252 0253 const auto alphaMask = vAlphaValue < int_v(0); 0254 vAlphaValue = xsimd::set_zero(vAlphaValue, alphaMask); 0255 0256 vCurvedData = float_v::gather(curveDataPointer, vAlphaValue); 0257 vCurvedData1 = float_v::gather(curveDataPointer, vAlphaValue + 1); 0258 0259 // vAlpha 0260 float_v fullFade = ((vOne - alphaValueF) * vCurvedData 0261 + alphaValueF * vCurvedData1); 0262 0263 // Mask in the inner circle of the mask 0264 const float_m mask = fullFade < vZero; 0265 fullFade = xsimd::set_zero(fullFade, mask); 0266 0267 // Mask outer circle of mask 0268 const float_m outerMask = fullFade >= vOne; 0269 float_v vFade = (vOne - fullFade); 0270 vFade = xsimd::set_zero(vFade, outerMask); 0271 0272 // return original dist values before vFade transform 0273 vFade = xsimd::select(excludeMask, dist, vFade); 0274 vFade.store_aligned(bufferPointer); 0275 } else { 0276 dist.store_aligned(bufferPointer); 0277 } 0278 currentIndices = currentIndices + increment; 0279 0280 bufferPointer += float_v::size; 0281 } 0282 } 0283 0284 template<> 0285 template<> 0286 void FastRowProcessor<KisRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0287 int width, 0288 float y, 0289 float cosa, 0290 float sina, 0291 float centerX, 0292 float centerY) 0293 { 0294 using float_v = xsimd::batch<float, xsimd::current_arch>; 0295 using float_m = float_v::batch_bool_type; 0296 0297 const bool useSmoothing = d->copyOfAntialiasEdges; 0298 0299 const float y_ = y - centerY; 0300 const float sinay_ = sina * y_; 0301 const float cosay_ = cosa * y_; 0302 0303 float *bufferPointer = buffer; 0304 0305 float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>(); 0306 0307 const float_v increment((float)float_v::size); 0308 const float_v vCenterX(centerX); 0309 0310 const float_v vCosa(cosa); 0311 const float_v vSina(sina); 0312 const float_v vCosaY_(cosay_); 0313 const float_v vSinaY_(sinay_); 0314 0315 const float_v vXCoeff(static_cast<float>(d->xcoeff)); 0316 const float_v vYCoeff(static_cast<float>(d->ycoeff)); 0317 0318 const float_v vTransformedFadeX(static_cast<float>(d->transformedFadeX)); 0319 const float_v vTransformedFadeY(static_cast<float>(d->transformedFadeY)); 0320 0321 const float_v vOne(1); 0322 const float_v vZero(0); 0323 const float_v vTolerance(10000.f); 0324 0325 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0326 const float_v x_ = currentIndices - vCenterX; 0327 0328 float_v xr = xsimd::abs(x_ * vCosa - vSinaY_); 0329 float_v yr = xsimd::abs(x_ * vSina + vCosaY_); 0330 0331 const float_v nxr = xr * vXCoeff; 0332 const float_v nyr = yr * vYCoeff; 0333 0334 float_m outsideMask = (nxr > vOne) || (nyr > vOne); 0335 0336 if (!xsimd::all(outsideMask)) { 0337 if (useSmoothing) { 0338 xr = xsimd::abs(xr) + vOne; 0339 yr = xsimd::abs(yr) + vOne; 0340 } 0341 0342 const float_v fxr = xr * vTransformedFadeX; 0343 const float_v fyr = yr * vTransformedFadeY; 0344 0345 const float_v fxrNorm = nxr * (fxr - vOne) / (fxr - nxr); 0346 const float_v fyrNorm = nyr * (fyr - vOne) / (fyr - nyr); 0347 0348 float_v vFade(vZero); 0349 0350 const float_m vFadeMask = fxrNorm < fyrNorm; 0351 float_v vMaxVal = vFade; 0352 vMaxVal = xsimd::select(fxr > vOne, fxrNorm, vMaxVal); 0353 vMaxVal = xsimd::select(vFadeMask && fyr > vOne, fyrNorm, vMaxVal); 0354 vFade = vMaxVal; 0355 0356 // Mask out the outer circle of the mask 0357 vFade = xsimd::select(outsideMask, vOne, vFade); 0358 vFade.store_aligned(bufferPointer); 0359 } else { 0360 // Mask out everything outside the circle 0361 vOne.store_aligned(bufferPointer); 0362 } 0363 0364 currentIndices = currentIndices + increment; 0365 0366 bufferPointer += float_v::size; 0367 } 0368 } 0369 0370 template<> 0371 template<> 0372 void FastRowProcessor<KisGaussRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0373 int width, 0374 float y, 0375 float cosa, 0376 float sina, 0377 float centerX, 0378 float centerY) 0379 { 0380 using float_v = xsimd::batch<float, xsimd::current_arch>; 0381 using float_m = float_v::batch_bool_type; 0382 0383 const float y_ = y - centerY; 0384 const float sinay_ = sina * y_; 0385 const float cosay_ = cosa * y_; 0386 0387 float *bufferPointer = buffer; 0388 0389 float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>(); 0390 0391 const float_v increment((float)float_v::size); 0392 const float_v vCenterX(centerX); 0393 0394 const float_v vCosa(cosa); 0395 const float_v vSina(sina); 0396 const float_v vCosaY_(cosay_); 0397 const float_v vSinaY_(sinay_); 0398 0399 const float_v vhalfWidth(static_cast<float>(d->halfWidth)); 0400 const float_v vhalfHeight(static_cast<float>(d->halfHeight)); 0401 const float_v vXFade(static_cast<float>(d->xfade)); 0402 const float_v vYFade(static_cast<float>(d->yfade)); 0403 0404 const float_v vAlphafactor(static_cast<float>(d->alphafactor)); 0405 0406 const float_v vOne(1); 0407 const float_v vZero(0); 0408 const float_v vValMax(255.f); 0409 0410 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0411 const float_v x_ = currentIndices - vCenterX; 0412 0413 float_v xr = x_ * vCosa - vSinaY_; 0414 float_v yr = xsimd::abs(x_ * vSina + vCosaY_); 0415 0416 // check if we need to apply fader on values 0417 float_m excludeMask = d->fadeMaker.needFade(xr, yr); 0418 const float_v vValue = xsimd::select(excludeMask, vOne, vValue); 0419 0420 if (!xsimd::all(excludeMask)) { 0421 float_v fullFade = vValMax 0422 - (vAlphafactor * (VcExtraMath::erf((vhalfWidth + xr) * vXFade) + VcExtraMath::erf((vhalfWidth - xr) * vXFade)) 0423 * (VcExtraMath::erf((vhalfHeight + yr) * vYFade) + VcExtraMath::erf((vhalfHeight - yr) * vYFade))); 0424 0425 // apply antialias fader 0426 d->fadeMaker.apply2DFader(fullFade, excludeMask, xr, yr); 0427 0428 // Mask in the inner circle of the mask 0429 const float_m mask = fullFade < vZero; 0430 fullFade = xsimd::set_zero(fullFade, mask); 0431 0432 // Mask the outer circle 0433 const float_m outerMask = fullFade > 254.974f; 0434 fullFade = xsimd::select(outerMask, vValMax, fullFade); 0435 0436 // Mask (value - value), precision errors. 0437 float_v vFade = fullFade / vValMax; 0438 0439 // return original vValue values before vFade transform 0440 vFade = xsimd::select(excludeMask, vValue, vFade); 0441 vFade.store_aligned(bufferPointer); 0442 0443 } else { 0444 vValue.store_aligned(bufferPointer); 0445 } 0446 currentIndices = currentIndices + increment; 0447 0448 bufferPointer += float_v::size; 0449 } 0450 } 0451 0452 template<> 0453 template<> 0454 void FastRowProcessor<KisCurveRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer, 0455 int width, 0456 float y, 0457 float cosa, 0458 float sina, 0459 float centerX, 0460 float centerY) 0461 { 0462 using float_v = xsimd::batch<float, xsimd::current_arch>; 0463 using float_m = float_v::batch_bool_type; 0464 0465 const float y_ = y - centerY; 0466 const float sinay_ = sina * y_; 0467 const float cosay_ = cosa * y_; 0468 0469 float *bufferPointer = buffer; 0470 0471 const qreal *curveDataPointer = d->curveData.data(); 0472 0473 float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>(); 0474 0475 const float_v increment((float)float_v::size); 0476 const float_v vCenterX(centerX); 0477 0478 const float_v vCosa(cosa); 0479 const float_v vSina(sina); 0480 const float_v vCosaY_(cosay_); 0481 const float_v vSinaY_(sinay_); 0482 0483 const float_v vYCoeff(static_cast<float>(d->ycoeff)); 0484 const float_v vXCoeff(static_cast<float>(d->xcoeff)); 0485 const float_v vCurveResolution(static_cast<float>(d->curveResolution)); 0486 0487 const float_v vOne(1); 0488 const float_v vZero(0); 0489 const float_v vValMax(255.f); 0490 0491 for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) { 0492 const float_v x_ = currentIndices - vCenterX; 0493 0494 float_v xr = x_ * vCosa - vSinaY_; 0495 float_v yr = xsimd::abs(x_ * vSina + vCosaY_); 0496 0497 // check if we need to apply fader on values 0498 float_m excludeMask = d->fadeMaker.needFade(xr, yr); 0499 const float_v vValue = xsimd::set_one(float_v(0), excludeMask); 0500 0501 if (!xsimd::all(excludeMask)) { 0502 // We need to mask the extra area given for aliniation 0503 // the next operation should never give values above 1 0504 float_v preSIndex = xsimd::abs(xr) * vXCoeff; 0505 float_v preTIndex = xsimd::abs(yr) * vYCoeff; 0506 0507 preSIndex = xsimd::select(preSIndex > vOne, vOne, preSIndex); 0508 preTIndex = xsimd::select(preTIndex > vOne, vOne, preTIndex); 0509 0510 const auto sIndex = xsimd::nearbyint_as_int(preSIndex * vCurveResolution); 0511 const auto tIndex = xsimd::nearbyint_as_int(preTIndex * vCurveResolution); 0512 0513 const auto sIndexInverted = xsimd::to_int(vCurveResolution - xsimd::to_float(sIndex)); 0514 const auto tIndexInverted = xsimd::to_int(vCurveResolution - xsimd::to_float(tIndex)); 0515 0516 const auto vCurvedDataSIndex = float_v::gather(curveDataPointer, sIndex); 0517 const auto vCurvedDataTIndex = float_v::gather(curveDataPointer, tIndex); 0518 const auto vCurvedDataSIndexInv = float_v::gather(curveDataPointer, sIndexInverted); 0519 const auto vCurvedDataTIndexInv = float_v::gather(curveDataPointer, tIndexInverted); 0520 0521 float_v fullFade = vValMax 0522 * (vOne 0523 - (vCurvedDataSIndex * (vOne - vCurvedDataSIndexInv) * vCurvedDataTIndex 0524 * (vOne - vCurvedDataTIndexInv))); 0525 0526 // apply antialias fader 0527 d->fadeMaker.apply2DFader(fullFade, excludeMask, xr, yr); 0528 0529 // Mask in the inner circle of the mask 0530 const float_m mask = fullFade < vZero; 0531 fullFade = xsimd::set_zero(fullFade, mask); 0532 0533 // Mask the outer circle 0534 const float_m outerMask = fullFade > 254.974f; 0535 fullFade = xsimd::select(outerMask, vValMax, fullFade); 0536 0537 // Mask (value - value), precision errors. 0538 float_v vFade = fullFade / vValMax; 0539 0540 // return original vValue values before vFade transform 0541 vFade = xsimd::select(excludeMask, vValue, vFade); 0542 vFade.store_aligned(bufferPointer); 0543 } else { 0544 vValue.store_aligned(bufferPointer); 0545 } 0546 currentIndices = currentIndices + increment; 0547 0548 bufferPointer += float_v::size; 0549 } 0550 } 0551 0552 #endif /* defined HAVE_XSIMD && XSIMD_UNIVERSAL_BUILD_PASS */