File indexing completed on 2024-05-12 15:59:02
0001 /* 0002 * SPDX-FileCopyrightText: 2018 Iván Santa María <ghevan@gmail.com> 0003 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me> 0004 * 0005 * SPDX-License-Identifier: GPL-2.0-or-later 0006 */ 0007 0008 #ifndef VC_ADDITIONAL_MATH_H 0009 #define VC_ADDITIONAL_MATH_H 0010 0011 #include <xsimd_extensions/xsimd.hpp> 0012 0013 #if defined(HAVE_XSIMD) && !defined(XSIMD_NO_SUPPORTED_ARCHITECTURE) 0014 0015 class VcExtraMath 0016 { 0017 public: 0018 0019 // vectorized erf function, precision 1e-5 0020 template<typename A> 0021 static inline xsimd::batch<float, A> erf(const xsimd::batch<float, A> x) 0022 { 0023 using float_v = xsimd::batch<float, A>; 0024 using float_m = typename float_v::batch_bool_type; 0025 float_v xa = xsimd::abs(x); 0026 float_m precisionLimit = xa >= float_v(9.3f); // wrong result for any number beyond this 0027 xa = xsimd::set_zero(xa, precisionLimit); 0028 float_v sign(1.0f); 0029 float_m invertMask = x < float_v(0.f); 0030 sign = xsimd::select(invertMask, float_v(-1.f), sign); 0031 0032 // CONSTANTS 0033 float a1 = 0.254829592f; 0034 float a2 = -0.284496736f; 0035 float a3 = 1.421413741f; 0036 float a4 = -1.453152027f; 0037 float a5 = 1.061405429f; 0038 float p = 0.3275911f; 0039 0040 float_v t = 1.0f / (1.0f + p * xa); 0041 float_v y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * exp(-xa * xa); 0042 y = xsimd::set_one(y, precisionLimit); 0043 return sign * y; 0044 } 0045 }; 0046 #endif /* defined HAVE_XSIMD */ 0047 0048 0049 #endif // VC_ADDITIONAL_MATH_H