File indexing completed on 2025-01-26 04:07:17
0001 /* 0002 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me> 0003 * 0004 * SPDX-License-Identifier: BSD-3-Clause 0005 */ 0006 0007 #ifndef KIS_XSIMD_GENERIC_HPP 0008 #define KIS_XSIMD_GENERIC_HPP 0009 0010 #include "xsimd_generic_details.hpp" 0011 0012 #include <array> 0013 #include <type_traits> 0014 0015 namespace xsimd 0016 { 0017 /*********************** 0018 * Truncate-initialize * 0019 ***********************/ 0020 0021 template<typename V, typename T, typename A> 0022 inline batch<T, A> truncate_to_type(xsimd::batch<T, A> const &self) noexcept 0023 { 0024 return kernel::detail::apply_with_value( 0025 [](float i) -> float { 0026 if (std::numeric_limits<V>::min() > i) { 0027 return 0; 0028 } else if (std::numeric_limits<V>::max() < i) { 0029 return 0; 0030 } else { 0031 return static_cast<V>(i); 0032 } 0033 }, 0034 self); 0035 } 0036 0037 // Mask to 0 elements of a vector. 0038 template<typename T, typename A> 0039 inline auto set_zero(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept 0040 { 0041 return xsimd::select(mask, xsimd::batch<T, A>(0), src); 0042 } 0043 0044 // Mask to 1 elements of a vector. 0045 template<typename T, typename A> 0046 inline auto set_one(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept 0047 { 0048 return xsimd::select(mask, xsimd::batch<T, A>(1), src); 0049 } 0050 0051 /********************************** 0052 * Sign-extending unaligned loads * 0053 **********************************/ 0054 0055 // Load `T::size` values from the array of `T2` elements. 0056 template<typename T, typename T2> 0057 inline T load_and_extend(const T2 *src) noexcept 0058 { 0059 return kernel::detail::apply_with_index_and_value( 0060 [&](size_t i, typename T::value_type) { 0061 return static_cast<typename T::value_type>(src[i]); 0062 }, 0063 T{}); 0064 } 0065 0066 /************************************************* 0067 * Type-inferred, auto-aligned memory allocation * 0068 *************************************************/ 0069 0070 // Allocate size bytes of memory aligned to `batch<T, A>::alignment()`. 0071 template<typename T, typename A> 0072 inline T *aligned_malloc(size_t size) noexcept 0073 { 0074 using T_v = batch<T, A>; 0075 0076 return reinterpret_cast<T *>(xsimd::aligned_malloc(size, T_v::arch_type::alignment())); 0077 } 0078 0079 // Return the maximum of a list of templated values at compile time. 0080 template<size_t value, size_t... values> 0081 constexpr typename std::enable_if<sizeof...(values) == 0, size_t>::type max() 0082 { 0083 return value; 0084 } 0085 0086 // Return the maximum of a list of templated values at compile time. 0087 template<size_t value, size_t... values> 0088 constexpr typename std::enable_if<sizeof...(values) != 0, size_t>::type max() 0089 { 0090 return std::max(value, max<values...>()); 0091 } 0092 0093 // Allocate memory for `sz` T items, aligned to the selected architecture's 0094 // alignment. 0095 template<typename T, typename A> 0096 inline T *vector_aligned_malloc(size_t sz) noexcept 0097 { 0098 return static_cast<T *>(xsimd::aligned_malloc(sz * sizeof(T), A::alignment())); 0099 } 0100 0101 // Free allocated memory, hiding the `const_cast` if necessary. 0102 template<typename T> 0103 inline void vector_aligned_free(const T *ptr) noexcept 0104 { 0105 // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) 0106 return xsimd::aligned_free(const_cast<T *>(ptr)); 0107 } 0108 0109 /**************** 0110 * Interleaving * 0111 ****************/ 0112 0113 // Return the tuple of interleaved batches `a` and `b`. 0114 // First element is the low half, second is the upper half. 0115 template<typename V> 0116 inline std::pair<V, V> interleave(const V &a, const V &b) noexcept 0117 { 0118 return {xsimd::zip_lo(a, b), xsimd::zip_hi(a, b)}; 0119 } 0120 0121 /********************** 0122 * Quadratic function * 0123 **********************/ 0124 0125 template<typename T, typename A> 0126 inline batch<T, A> pow2 (batch<T, A> const& self) noexcept 0127 { 0128 return self * self; 0129 } 0130 0131 #if XSIMD_VERSION_MAJOR <= 10 0132 0133 template <class B, class T, class A> 0134 inline batch<B, A> bitwise_cast_compat(batch<T, A> const& x) noexcept 0135 { 0136 return bitwise_cast<batch<B, A>>(x); 0137 } 0138 0139 #else 0140 0141 template <class B, class T, class A> 0142 inline batch<B, A> bitwise_cast_compat(batch<T, A> const& x) noexcept 0143 { 0144 return bitwise_cast<B>(x); 0145 } 0146 0147 0148 #endif 0149 0150 }; // namespace xsimd 0151 0152 #endif