File indexing completed on 2024-05-12 15:59:02

0001 /*
0002  *  SPDX-FileCopyrightText: 2018 Iván Santa María <ghevan@gmail.com>
0003  *  SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me>
0004  *
0005  *  SPDX-License-Identifier: GPL-2.0-or-later
0006  */
0007 
0008 #ifndef VC_ADDITIONAL_MATH_H
0009 #define VC_ADDITIONAL_MATH_H
0010 
0011 #include <xsimd_extensions/xsimd.hpp>
0012 
0013 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE)
0014 
0015 class VcExtraMath
0016 {
0017 public:
0018 
0019     // vectorized erf function, precision 1e-5
0020     template<typename A>
0021     static inline xsimd::batch<float, A> erf(const xsimd::batch<float, A> x)
0022     {
0023         using float_v = xsimd::batch<float, A>;
0024         using float_m = typename float_v::batch_bool_type;
0025         float_v xa = xsimd::abs(x);
0026         float_m precisionLimit = xa >= float_v(9.3f); // wrong result for any number beyond this
0027         xa = xsimd::set_zero(xa, precisionLimit);
0028         float_v sign(1.0f);
0029         float_m invertMask = x < float_v(0.f);
0030         sign = xsimd::select(invertMask, float_v(-1.f), sign);
0031 
0032         // CONSTANTS
0033         float a1 =  0.254829592f;
0034         float a2 = -0.284496736f;
0035         float a3 =  1.421413741f;
0036         float a4 = -1.453152027f;
0037         float a5 =  1.061405429f;
0038         float p  =  0.3275911f;
0039 
0040         float_v t = 1.0f / (1.0f + p * xa);
0041         float_v y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-xa * xa);
0042         y = xsimd::set_one(y, precisionLimit);
0043         return sign * y;
0044     }
0045 };
0046 #endif /* defined HAVE_XSIMD */
0047 
0048 
0049 #endif // VC_ADDITIONAL_MATH_H