xsimd_extensions/arch/xsimd_generic.hpp

0001 /*
0002  * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me>
0003  *
0004  * SPDX-License-Identifier: BSD-3-Clause
0005  */
0006
0007 #ifndef KIS_XSIMD_GENERIC_HPP
0008 #define KIS_XSIMD_GENERIC_HPP
0009
0010 #include "xsimd_generic_details.hpp"
0011
0012 #include <array>
0013 #include <type_traits>
0014
0015 namespace xsimd
0016 {
0017 /***********************
0018  * Truncate-initialize *
0019  ***********************/
0020
0021 template<typename V, typename T, typename A>
0022 inline batch<T, A> truncate_to_type(xsimd::batch<T, A> const &self) noexcept
0023 {
0024     return kernel::detail::apply_with_value(
0025         [](float i) -> float {
0026             if (std::numeric_limits<V>::min() > i) {
0027                 return 0;
0028             } else if (std::numeric_limits<V>::max() < i) {
0029                 return 0;
0030             } else {
0031                 return static_cast<V>(i);
0032             }
0033         },
0034         self);
0035 }
0036
0037 // Mask to 0 elements of a vector.
0038 template<typename T, typename A>
0039 inline auto set_zero(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept
0040 {
0041     return xsimd::select(mask, xsimd::batch<T, A>(0), src);
0042 }
0043
0044 // Mask to 1 elements of a vector.
0045 template<typename T, typename A>
0046 inline auto set_one(const batch<T, A> &src, const batch_bool<T, A> &mask) noexcept
0047 {
0048     return xsimd::select(mask, xsimd::batch<T, A>(1), src);
0049 }
0050
0051 /**********************************
0052  * Sign-extending unaligned loads *
0053  **********************************/
0054
0055 // Load `T::size` values from the array of `T2` elements.
0056 template<typename T, typename T2>
0057 inline T load_and_extend(const T2 *src) noexcept
0058 {
0059     return kernel::detail::apply_with_index_and_value(
0060         [&](size_t i, typename T::value_type) {
0061             return static_cast<typename T::value_type>(src[i]);
0062         },
0063         T{});
0064 }
0065
0066 /*************************************************
0067  * Type-inferred, auto-aligned memory allocation *
0068  *************************************************/
0069
0070 // Allocate size bytes of memory aligned to `batch<T, A>::alignment()`.
0071 template<typename T, typename A>
0072 inline T *aligned_malloc(size_t size) noexcept
0073 {
0074     using T_v = batch<T, A>;
0075
0076     return reinterpret_cast<T *>(xsimd::aligned_malloc(size, T_v::arch_type::alignment()));
0077 }
0078
0079 // Return the maximum of a list of templated values at compile time.
0080 template<size_t value, size_t... values>
0081 constexpr typename std::enable_if<sizeof...(values) == 0, size_t>::type max()
0082 {
0083     return value;
0084 }
0085
0086 // Return the maximum of a list of templated values at compile time.
0087 template<size_t value, size_t... values>
0088 constexpr typename std::enable_if<sizeof...(values) != 0, size_t>::type max()
0089 {
0090     return std::max(value, max<values...>());
0091 }
0092
0093 // Allocate memory for `sz` T items, aligned to the selected architecture's
0094 // alignment.
0095 template<typename T, typename A>
0096 inline T *vector_aligned_malloc(size_t sz) noexcept
0097 {
0098     return static_cast<T *>(xsimd::aligned_malloc(sz * sizeof(T), A::alignment()));
0099 }
0100
0101 // Free allocated memory, hiding the `const_cast` if necessary.
0102 template<typename T>
0103 inline void vector_aligned_free(const T *ptr) noexcept
0104 {
0105     // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
0106     return xsimd::aligned_free(const_cast<T *>(ptr));
0107 }
0108
0109 /****************
0110  * Interleaving *
0111  ****************/
0112
0113 // Return the tuple of interleaved batches `a` and `b`.
0114 // First element is the low half, second is the upper half.
0115 template<typename V>
0116 inline std::pair<V, V> interleave(const V &a, const V &b) noexcept
0117 {
0118     return {xsimd::zip_lo(a, b), xsimd::zip_hi(a, b)};
0119 }
0120
0121 /**********************
0122  * Quadratic function *
0123  **********************/
0124
0125 template<typename T, typename A>
0126 inline batch<T, A> pow2 (batch<T, A> const& self) noexcept
0127 {
0128     return self * self;
0129 }
0130
0131 #if XSIMD_VERSION_MAJOR <= 10
0132
0133 template <class B, class T, class A>
0134 inline batch<B, A> bitwise_cast_compat(batch<T, A> const& x) noexcept
0135 {
0136     return bitwise_cast<batch<B, A>>(x);
0137 }
0138
0139 #else
0140
0141 template <class B, class T, class A>
0142 inline batch<B, A> bitwise_cast_compat(batch<T, A> const& x) noexcept
0143 {
0144     return bitwise_cast<B>(x);
0145 }
0146
0147
0148 #endif
0149
0150 }; // namespace xsimd
0151
0152 #endif