File indexing completed on 2024-05-12 15:58:11

0001 /*
0002  *  SPDX-FileCopyrightText: 2012 Dmitry Kazakov <dimula73@gmail.com>
0003  *  SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me>
0004  *
0005  *  SPDX-License-Identifier: GPL-2.0-or-later
0006  */
0007 
0008 #include <xsimd_extensions/xsimd.hpp>
0009 
0010 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) && XSIMD_UNIVERSAL_BUILD_PASS
0011 
0012 #include "kis_circle_mask_generator.h"
0013 #include "kis_circle_mask_generator_p.h"
0014 #include "kis_curve_circle_mask_generator.h"
0015 #include "kis_curve_circle_mask_generator_p.h"
0016 #include "kis_curve_rect_mask_generator.h"
0017 #include "kis_curve_rect_mask_generator_p.h"
0018 #include "kis_gauss_circle_mask_generator.h"
0019 #include "kis_gauss_circle_mask_generator_p.h"
0020 #include "kis_gauss_rect_mask_generator.h"
0021 #include "kis_gauss_rect_mask_generator_p.h"
0022 #include "kis_rect_mask_generator.h"
0023 #include "kis_rect_mask_generator_p.h"
0024 
0025 #include "kis_brush_mask_applicator_base.h"
0026 #include "kis_brush_mask_vector_applicator.h"
0027 
0028 #include "vc_extra_math.h"
0029 
0030 #define a(_s) #_s
0031 #define b(_s) a(_s)
0032 
0033 template<>
0034 template<>
0035 void FastRowProcessor<KisCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0036                                                                             int width,
0037                                                                             float y,
0038                                                                             float cosa,
0039                                                                             float sina,
0040                                                                             float centerX,
0041                                                                             float centerY)
0042 {
0043     using float_v = xsimd::batch<float, xsimd::current_arch>;
0044     using float_m = typename float_v::batch_bool_type;
0045 
0046     const bool useSmoothing = d->copyOfAntialiasEdges;
0047 
0048     const float y_ = y - centerY;
0049     const float sinay_ = sina * y_;
0050     const float cosay_ = cosa * y_;
0051 
0052     float *bufferPointer = buffer;
0053 
0054     float_v currentIndices =
0055         xsimd::detail::make_sequence_as_batch<float_v>();
0056 
0057     const float_v increment((float)float_v::size);
0058     const float_v vCenterX(centerX);
0059 
0060     const float_v vCosa(cosa);
0061     const float_v vSina(sina);
0062     const float_v vCosaY_(cosay_);
0063     const float_v vSinaY_(sinay_);
0064 
0065     const float_v vXCoeff(static_cast<float>(d->xcoef));
0066     const float_v vYCoeff(static_cast<float>(d->ycoef));
0067 
0068     const float_v vTransformedFadeX(static_cast<float>(d->transformedFadeX));
0069     const float_v vTransformedFadeY(static_cast<float>(d->transformedFadeY));
0070 
0071     const float_v vOne(1);
0072 
0073     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0074         const float_v x_ = currentIndices - vCenterX;
0075 
0076         float_v xr = x_ * vCosa - vSinaY_;
0077         float_v yr = x_ * vSina + vCosaY_;
0078 
0079         const float_v n = xsimd::pow2(xr * vXCoeff) + xsimd::pow2(yr * vYCoeff);
0080         const float_m outsideMask = n > vOne;
0081 
0082         if (!xsimd::all(outsideMask)) {
0083             if (useSmoothing) {
0084                 xr = xsimd::abs(xr) + vOne;
0085                 yr = xsimd::abs(yr) + vOne;
0086             }
0087             float_v vNormFade = xsimd::pow2(xr * vTransformedFadeX)
0088                 + xsimd::pow2(yr * vTransformedFadeY);
0089             const float_m vNormLowMask = vNormFade < vOne;
0090             vNormFade = xsimd::set_zero(vNormFade, vNormLowMask);
0091 
0092             // 255 * n * (normeFade - 1) / (normeFade - n)
0093             float_v vFade = n * (vNormFade - vOne) / (vNormFade - n);
0094 
0095             // Mask in the inner circle of the mask
0096             const float_m mask = vNormFade < vOne;
0097             vFade = xsimd::set_zero(vFade, mask);
0098 
0099             // Mask out the outer circle of the mask
0100             vFade = xsimd::set_one(vFade, outsideMask);
0101 
0102             vFade.store_aligned(bufferPointer);
0103         } else {
0104             // Mask out everything outside the circle
0105             vOne.store_aligned(bufferPointer);
0106         }
0107 
0108         currentIndices = currentIndices + increment;
0109 
0110         bufferPointer += float_v::size;
0111     }
0112 }
0113 
0114 template<>
0115 template<>
0116 void FastRowProcessor<KisGaussCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0117                                                                                  int width,
0118                                                                                  float y,
0119                                                                                  float cosa,
0120                                                                                  float sina,
0121                                                                                  float centerX,
0122                                                                                  float centerY)
0123 {
0124     using float_v = xsimd::batch<float, xsimd::current_arch>;
0125     using float_m = float_v::batch_bool_type;
0126 
0127     const float y_ = y - centerY;
0128     const float sinay_ = sina * y_;
0129     const float cosay_ = cosa * y_;
0130 
0131     float *bufferPointer = buffer;
0132 
0133     float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>();
0134 
0135     const float_v increment(static_cast<float>(float_v::size));
0136     const float_v vCenterX(centerX);
0137     const float_v vCenter(static_cast<float>(d->center));
0138 
0139     const float_v vCosa(cosa);
0140     const float_v vSina(sina);
0141     const float_v vCosaY_(cosay_);
0142     const float_v vSinaY_(sinay_);
0143 
0144     const float_v vYCoeff(static_cast<float>(d->ycoef));
0145     const float_v vDistfactor(static_cast<float>(d->distfactor));
0146     const float_v vAlphafactor(static_cast<float>(d->alphafactor));
0147 
0148     const float_v vZero(0);
0149     const float_v vValMax(255.f);
0150 
0151     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0152         const float_v x_ = currentIndices - vCenterX;
0153 
0154         const float_v xr = x_ * vCosa - vSinaY_;
0155         const float_v yr = x_ * vSina + vCosaY_;
0156 
0157         float_v dist =
0158             xsimd::sqrt(xsimd::pow2(xr) + xsimd::pow2(yr * vYCoeff));
0159 
0160         // Apply FadeMaker mask and operations
0161         const float_m excludeMask = d->fadeMaker.needFade(dist);
0162 
0163         if (!xsimd::all(excludeMask)) {
0164             const float_v valDist = dist * vDistfactor;
0165             float_v fullFade = vAlphafactor
0166                 * (VcExtraMath::erf(valDist + vCenter)
0167                    - VcExtraMath::erf(valDist - vCenter));
0168 
0169             // Mask in the inner circle of the mask
0170             const float_m mask = fullFade < vZero;
0171             fullFade = xsimd::set_zero(fullFade, mask);
0172 
0173             // Mask the outer circle
0174             const float_m outerMask = fullFade > 254.974f;
0175             fullFade = xsimd::select(outerMask, vValMax, fullFade);
0176 
0177             // Mask (value - value), precision errors.
0178             float_v vFade = (vValMax - fullFade) / vValMax;
0179 
0180             // return original dist values before vFade transform
0181             vFade = xsimd::select(excludeMask, dist, vFade);
0182             vFade.store_aligned(bufferPointer);
0183         } else {
0184             dist.store_aligned(bufferPointer);
0185         }
0186         currentIndices = currentIndices + increment;
0187 
0188         bufferPointer += float_v::size;
0189     }
0190 }
0191 
0192 template<>
0193 template<>
0194 void FastRowProcessor<KisCurveCircleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0195                                                                                  int width,
0196                                                                                  float y,
0197                                                                                  float cosa,
0198                                                                                  float sina,
0199                                                                                  float centerX,
0200                                                                                  float centerY)
0201 {
0202     using int_v = xsimd::batch<int, xsimd::current_arch>;
0203     using float_v = xsimd::batch<float, xsimd::current_arch>;
0204     using float_m = float_v::batch_bool_type;
0205 
0206     const float y_ = y - centerY;
0207     const float sinay_ = sina * y_;
0208     const float cosay_ = cosa * y_;
0209 
0210     float *bufferPointer = buffer;
0211 
0212     const qreal *curveDataPointer = d->curveData.data();
0213 
0214     float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>();
0215 
0216     const float_v increment((float)float_v::size);
0217     const float_v vCenterX(centerX);
0218 
0219     const float_v vCosa(cosa);
0220     const float_v vSina(sina);
0221     const float_v vCosaY_(cosay_);
0222     const float_v vSinaY_(sinay_);
0223 
0224     const float_v vYCoeff(static_cast<float>(d->ycoef));
0225     const float_v vXCoeff(static_cast<float>(d->xcoef));
0226     const float_v vCurveResolution(static_cast<float>(d->curveResolution));
0227 
0228     float_v vCurvedData(0);
0229     float_v vCurvedData1(0);
0230 
0231     const float_v vOne(1);
0232     const float_v vZero(0);
0233 
0234     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0235         const float_v x_ = currentIndices - vCenterX;
0236 
0237         const float_v xr = x_ * vCosa - vSinaY_;
0238         const float_v yr = x_ * vSina + vCosaY_;
0239 
0240         float_v dist = xsimd::pow2(xr * vXCoeff) + xsimd::pow2(yr * vYCoeff);
0241 
0242         // Apply FadeMaker mask and operations
0243         const float_m excludeMask = d->fadeMaker.needFade(dist);
0244 
0245         if (!xsimd::all(excludeMask)) {
0246             const float_v valDist = dist * vCurveResolution;
0247             // truncate
0248             int_v vAlphaValue = xsimd::to_int(valDist);
0249             const float_v vFloatAlphaValue = xsimd::to_float(vAlphaValue);
0250 
0251             const float_v alphaValueF = valDist - vFloatAlphaValue;
0252 
0253             const auto alphaMask = vAlphaValue < int_v(0);
0254             vAlphaValue = xsimd::set_zero(vAlphaValue, alphaMask);
0255 
0256             vCurvedData = float_v::gather(curveDataPointer, vAlphaValue);
0257             vCurvedData1 = float_v::gather(curveDataPointer, vAlphaValue + 1);
0258 
0259             // vAlpha
0260             float_v fullFade = ((vOne - alphaValueF) * vCurvedData
0261                                       + alphaValueF * vCurvedData1);
0262 
0263             // Mask in the inner circle of the mask
0264             const float_m mask = fullFade < vZero;
0265             fullFade = xsimd::set_zero(fullFade, mask);
0266 
0267             // Mask outer circle of mask
0268             const float_m outerMask = fullFade >= vOne;
0269             float_v vFade = (vOne - fullFade);
0270             vFade = xsimd::set_zero(vFade, outerMask);
0271 
0272             // return original dist values before vFade transform
0273             vFade = xsimd::select(excludeMask, dist, vFade);
0274             vFade.store_aligned(bufferPointer);
0275         } else {
0276             dist.store_aligned(bufferPointer);
0277         }
0278         currentIndices = currentIndices + increment;
0279 
0280         bufferPointer += float_v::size;
0281     }
0282 }
0283 
0284 template<>
0285 template<>
0286 void FastRowProcessor<KisRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0287                                                                                int width,
0288                                                                                float y,
0289                                                                                float cosa,
0290                                                                                float sina,
0291                                                                                float centerX,
0292                                                                                float centerY)
0293 {
0294     using float_v = xsimd::batch<float, xsimd::current_arch>;
0295     using float_m = float_v::batch_bool_type;
0296 
0297     const bool useSmoothing = d->copyOfAntialiasEdges;
0298 
0299     const float y_ = y - centerY;
0300     const float sinay_ = sina * y_;
0301     const float cosay_ = cosa * y_;
0302 
0303     float *bufferPointer = buffer;
0304 
0305     float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>();
0306 
0307     const float_v increment((float)float_v::size);
0308     const float_v vCenterX(centerX);
0309 
0310     const float_v vCosa(cosa);
0311     const float_v vSina(sina);
0312     const float_v vCosaY_(cosay_);
0313     const float_v vSinaY_(sinay_);
0314 
0315     const float_v vXCoeff(static_cast<float>(d->xcoeff));
0316     const float_v vYCoeff(static_cast<float>(d->ycoeff));
0317 
0318     const float_v vTransformedFadeX(static_cast<float>(d->transformedFadeX));
0319     const float_v vTransformedFadeY(static_cast<float>(d->transformedFadeY));
0320 
0321     const float_v vOne(1);
0322     const float_v vZero(0);
0323     const float_v vTolerance(10000.f);
0324 
0325     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0326         const float_v x_ = currentIndices - vCenterX;
0327 
0328         float_v xr = xsimd::abs(x_ * vCosa - vSinaY_);
0329         float_v yr = xsimd::abs(x_ * vSina + vCosaY_);
0330 
0331         const float_v nxr = xr * vXCoeff;
0332         const float_v nyr = yr * vYCoeff;
0333 
0334         float_m outsideMask = (nxr > vOne) || (nyr > vOne);
0335 
0336         if (!xsimd::all(outsideMask)) {
0337             if (useSmoothing) {
0338                 xr = xsimd::abs(xr) + vOne;
0339                 yr = xsimd::abs(yr) + vOne;
0340             }
0341 
0342             const float_v fxr = xr * vTransformedFadeX;
0343             const float_v fyr = yr * vTransformedFadeY;
0344 
0345             const float_v fxrNorm = nxr * (fxr - vOne) / (fxr - nxr);
0346             const float_v fyrNorm = nyr * (fyr - vOne) / (fyr - nyr);
0347 
0348             float_v vFade(vZero);
0349 
0350             const float_m vFadeMask = fxrNorm < fyrNorm;
0351             float_v vMaxVal = vFade;
0352             vMaxVal = xsimd::select(fxr > vOne, fxrNorm, vMaxVal);
0353             vMaxVal = xsimd::select(vFadeMask && fyr > vOne, fyrNorm, vMaxVal);
0354             vFade = vMaxVal;
0355 
0356             // Mask out the outer circle of the mask
0357             vFade = xsimd::select(outsideMask, vOne, vFade);
0358             vFade.store_aligned(bufferPointer);
0359         } else {
0360             // Mask out everything outside the circle
0361             vOne.store_aligned(bufferPointer);
0362         }
0363 
0364         currentIndices = currentIndices + increment;
0365 
0366         bufferPointer += float_v::size;
0367     }
0368 }
0369 
0370 template<>
0371 template<>
0372 void FastRowProcessor<KisGaussRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0373                                                                                     int width,
0374                                                                                     float y,
0375                                                                                     float cosa,
0376                                                                                     float sina,
0377                                                                                     float centerX,
0378                                                                                     float centerY)
0379 {
0380     using float_v = xsimd::batch<float, xsimd::current_arch>;
0381     using float_m = float_v::batch_bool_type;
0382 
0383     const float y_ = y - centerY;
0384     const float sinay_ = sina * y_;
0385     const float cosay_ = cosa * y_;
0386 
0387     float *bufferPointer = buffer;
0388 
0389     float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>();
0390 
0391     const float_v increment((float)float_v::size);
0392     const float_v vCenterX(centerX);
0393 
0394     const float_v vCosa(cosa);
0395     const float_v vSina(sina);
0396     const float_v vCosaY_(cosay_);
0397     const float_v vSinaY_(sinay_);
0398 
0399     const float_v vhalfWidth(static_cast<float>(d->halfWidth));
0400     const float_v vhalfHeight(static_cast<float>(d->halfHeight));
0401     const float_v vXFade(static_cast<float>(d->xfade));
0402     const float_v vYFade(static_cast<float>(d->yfade));
0403 
0404     const float_v vAlphafactor(static_cast<float>(d->alphafactor));
0405 
0406     const float_v vOne(1);
0407     const float_v vZero(0);
0408     const float_v vValMax(255.f);
0409 
0410     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0411         const float_v x_ = currentIndices - vCenterX;
0412 
0413         float_v xr = x_ * vCosa - vSinaY_;
0414         float_v yr = xsimd::abs(x_ * vSina + vCosaY_);
0415 
0416         // check if we need to apply fader on values
0417         float_m excludeMask = d->fadeMaker.needFade(xr, yr);
0418         const float_v vValue = xsimd::select(excludeMask, vOne, vValue);
0419 
0420         if (!xsimd::all(excludeMask)) {
0421             float_v fullFade = vValMax
0422                 - (vAlphafactor * (VcExtraMath::erf((vhalfWidth + xr) * vXFade) + VcExtraMath::erf((vhalfWidth - xr) * vXFade))
0423                    * (VcExtraMath::erf((vhalfHeight + yr) * vYFade) + VcExtraMath::erf((vhalfHeight - yr) * vYFade)));
0424 
0425             // apply antialias fader
0426             d->fadeMaker.apply2DFader(fullFade, excludeMask, xr, yr);
0427 
0428             // Mask in the inner circle of the mask
0429             const float_m mask = fullFade < vZero;
0430             fullFade = xsimd::set_zero(fullFade, mask);
0431 
0432             // Mask the outer circle
0433             const float_m outerMask = fullFade > 254.974f;
0434             fullFade = xsimd::select(outerMask, vValMax, fullFade);
0435 
0436             // Mask (value - value), precision errors.
0437             float_v vFade = fullFade / vValMax;
0438 
0439             // return original vValue values before vFade transform
0440             vFade = xsimd::select(excludeMask, vValue, vFade);
0441             vFade.store_aligned(bufferPointer);
0442 
0443         } else {
0444             vValue.store_aligned(bufferPointer);
0445         }
0446         currentIndices = currentIndices + increment;
0447 
0448         bufferPointer += float_v::size;
0449     }
0450 }
0451 
0452 template<>
0453 template<>
0454 void FastRowProcessor<KisCurveRectangleMaskGenerator>::process<xsimd::current_arch>(float *buffer,
0455                                                                                     int width,
0456                                                                                     float y,
0457                                                                                     float cosa,
0458                                                                                     float sina,
0459                                                                                     float centerX,
0460                                                                                     float centerY)
0461 {
0462     using float_v = xsimd::batch<float, xsimd::current_arch>;
0463     using float_m = float_v::batch_bool_type;
0464 
0465     const float y_ = y - centerY;
0466     const float sinay_ = sina * y_;
0467     const float cosay_ = cosa * y_;
0468 
0469     float *bufferPointer = buffer;
0470 
0471     const qreal *curveDataPointer = d->curveData.data();
0472 
0473     float_v currentIndices = xsimd::detail::make_sequence_as_batch<float_v>();
0474 
0475     const float_v increment((float)float_v::size);
0476     const float_v vCenterX(centerX);
0477 
0478     const float_v vCosa(cosa);
0479     const float_v vSina(sina);
0480     const float_v vCosaY_(cosay_);
0481     const float_v vSinaY_(sinay_);
0482 
0483     const float_v vYCoeff(static_cast<float>(d->ycoeff));
0484     const float_v vXCoeff(static_cast<float>(d->xcoeff));
0485     const float_v vCurveResolution(static_cast<float>(d->curveResolution));
0486 
0487     const float_v vOne(1);
0488     const float_v vZero(0);
0489     const float_v vValMax(255.f);
0490 
0491     for (size_t i = 0; i < static_cast<size_t>(width); i += float_v::size) {
0492         const float_v x_ = currentIndices - vCenterX;
0493 
0494         float_v xr = x_ * vCosa - vSinaY_;
0495         float_v yr = xsimd::abs(x_ * vSina + vCosaY_);
0496 
0497         // check if we need to apply fader on values
0498         float_m excludeMask = d->fadeMaker.needFade(xr, yr);
0499         const float_v vValue = xsimd::set_one(float_v(0), excludeMask);
0500 
0501         if (!xsimd::all(excludeMask)) {
0502             // We need to mask the extra area given for aliniation
0503             // the next operation should never give values above 1
0504             float_v preSIndex = xsimd::abs(xr) * vXCoeff;
0505             float_v preTIndex = xsimd::abs(yr) * vYCoeff;
0506 
0507             preSIndex = xsimd::select(preSIndex > vOne, vOne, preSIndex);
0508             preTIndex = xsimd::select(preTIndex > vOne, vOne, preTIndex);
0509 
0510             const auto sIndex = xsimd::nearbyint_as_int(preSIndex * vCurveResolution);
0511             const auto tIndex = xsimd::nearbyint_as_int(preTIndex * vCurveResolution);
0512 
0513             const auto sIndexInverted = xsimd::to_int(vCurveResolution - xsimd::to_float(sIndex));
0514             const auto tIndexInverted = xsimd::to_int(vCurveResolution - xsimd::to_float(tIndex));
0515 
0516             const auto vCurvedDataSIndex = float_v::gather(curveDataPointer, sIndex);
0517             const auto vCurvedDataTIndex = float_v::gather(curveDataPointer, tIndex);
0518             const auto vCurvedDataSIndexInv = float_v::gather(curveDataPointer, sIndexInverted);
0519             const auto vCurvedDataTIndexInv = float_v::gather(curveDataPointer, tIndexInverted);
0520 
0521             float_v fullFade = vValMax
0522                 * (vOne
0523                    - (vCurvedDataSIndex * (vOne - vCurvedDataSIndexInv) * vCurvedDataTIndex
0524                       * (vOne - vCurvedDataTIndexInv)));
0525 
0526             // apply antialias fader
0527             d->fadeMaker.apply2DFader(fullFade, excludeMask, xr, yr);
0528 
0529             // Mask in the inner circle of the mask
0530             const float_m mask = fullFade < vZero;
0531             fullFade = xsimd::set_zero(fullFade, mask);
0532 
0533             // Mask the outer circle
0534             const float_m outerMask = fullFade > 254.974f;
0535             fullFade = xsimd::select(outerMask, vValMax, fullFade);
0536 
0537             // Mask (value - value), precision errors.
0538             float_v vFade = fullFade / vValMax;
0539 
0540             // return original vValue values before vFade transform
0541             vFade = xsimd::select(excludeMask, vValue, vFade);
0542             vFade.store_aligned(bufferPointer);
0543         } else {
0544             vValue.store_aligned(bufferPointer);
0545         }
0546         currentIndices = currentIndices + increment;
0547 
0548         bufferPointer += float_v::size;
0549     }
0550 }
0551 
0552 #endif /* defined HAVE_XSIMD && XSIMD_UNIVERSAL_BUILD_PASS */