File indexing completed on 2025-01-26 04:07:17

0001 /*
0002  * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me>
0003  *
0004  * SPDX-License-Identifier: BSD-3-Clause
0005  */
0006 
0007 #ifndef KIS_XSIMD_ARCH_HPP
0008 #define KIS_XSIMD_ARCH_HPP
0009 
0010 #include "./xsimd_config.hpp"
0011 
0012 // Architecture initialization. Borrowed from Vc
0013 // Define the following strings to a unique integer, which is the only type the
0014 // preprocessor can compare. This allows to use -DXSIMD_IMPL=SSE3. The
0015 // preprocessor will then consider XSIMD_IMPL and SSE3 to be equal.
0016 // An additional define IMPL_MASK allows to detect the FMA extension.
0017 
0018 #define Scalar 0x00100000
0019 #define SSE2 0x00200000
0020 #define SSE3 0x00300000
0021 #define SSSE3 0x00400000
0022 #define SSE4_1 0x00500000
0023 #define SSE4_2 0x00600000
0024 #define FMA4 0x00700000
0025 #define AVX 0x00800000
0026 #define AVX2 0x00900000
0027 #define AVX512F 0x00A00000
0028 #define AVX512BW 0x00B00000
0029 #define AVX512CD 0x00C00000
0030 #define AVX512DQ 0x00D00000
0031 #define NEON 0x10100000
0032 #define NEON64 0x10200000
0033 
0034 #define FMA 0x00000001
0035 
0036 #define Intel_Architecture 0x00000000
0037 #define Arm_Architecture 0x10000000
0038 
0039 #define IMPL_MASK 0xFFF00000
0040 #define PLATFORM_MASK 0xF0000000
0041 
0042 namespace xsimd
0043 {
0044 #if !defined(HAVE_XSIMD) || defined(XSIMD_IMPL) && (XSIMD_IMPL & IMPL_MASK) == Scalar
0045 using current_arch = generic;
0046 #elif !defined(XSIMD_IMPL)
0047 using current_arch = default_arch;
0048 #elif (XSIMD_IMPL & IMPL_MASK) == SSE2
0049 using current_arch = sse2;
0050 #elif (XSIMD_IMPL & IMPL_MASK) == SSE3
0051 using current_arch = sse3;
0052 #elif (XSIMD_IMPL & IMPL_MASK) == SSSE3
0053 using current_arch = ssse3;
0054 #elif (XSIMD_IMPL & IMPL_MASK) == SSE4_1
0055 using current_arch = sse4_1;
0056 #elif (XSIMD_IMPL & IMPL_MASK) == SSE4_2
0057 #if (XSIMD_IMPL & FMA)
0058 using current_arch = fma3<sse4_2>;
0059 #else
0060 using current_arch = sse4_2;
0061 #endif
0062 #elif (XSIMD_IMPL & IMPL_MASK) == FMA4
0063 using current_arch = fma4;
0064 #elif (XSIMD_IMPL & IMPL_MASK) == AVX
0065 #if (XSIMD_IMPL & FMA)
0066 using current_arch = fma3<avx>;
0067 #else
0068 using current_arch = avx;
0069 #endif
0070 #elif (XSIMD_IMPL & IMPL_MASK) == AVX2
0071 #if (XSIMD_IMPL & FMA)
0072 using current_arch = fma3<avx2>;
0073 #else
0074 using current_arch = avx2;
0075 #endif
0076 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512F
0077 using current_arch = avx512f;
0078 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512CD
0079 using current_arch = avx512cd;
0080 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512DQ
0081 using current_arch = avx512dq;
0082 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512BW
0083 using current_arch = avx512bw;
0084 #elif (XSIMD_IMPL & IMPL_MASK) == NEON
0085 using current_arch = neon;
0086 #elif (XSIMD_IMPL & IMPL_MASK) == NEON64
0087 using current_arch = neon64;
0088 #endif
0089 }; // namespace xsimd
0090 
0091 // xsimd extension to block AppleClang's auto-lipoization of
0092 // compiled objects.
0093 // If the defined instruction sets don't match what's expected
0094 // from the build flags, zonk out the included file.
0095 
0096 #if !defined(HAVE_XSIMD) || !defined(XSIMD_IMPL) || defined(XSIMD_IMPL) && (XSIMD_IMPL & IMPL_MASK) == Scalar
0097 #define XSIMD_UNIVERSAL_BUILD_PASS 3
0098 #elif XSIMD_WITH_SSE2 && (XSIMD_IMPL & PLATFORM_MASK) == Intel_Architecture
0099 #define XSIMD_UNIVERSAL_BUILD_PASS 2
0100 #elif (XSIMD_WITH_NEON || XSIMD_WITH_NEON64) && (XSIMD_IMPL & PLATFORM_MASK) == Arm_Architecture
0101 #define XSIMD_UNIVERSAL_BUILD_PASS 1
0102 #endif
0103 
0104 #ifndef XSIMD_UNIVERSAL_BUILD_PASS
0105 #define XSIMD_UNIVERSAL_BUILD_PASS 0
0106 #endif
0107 
0108 #undef Scalar
0109 #undef SSE2
0110 #undef SSE3
0111 #undef SSSE3
0112 #undef SSE4_1
0113 #undef SSE4_2
0114 #undef AVX
0115 #undef AVX2
0116 #undef AVX512F
0117 #undef AVX512BW
0118 #undef AVX512CD
0119 #undef AVX512DQ
0120 #undef NEON
0121 #undef NEON64
0122 
0123 #undef FMA3
0124 #undef FMA4
0125 #undef IMPL_MASK
0126 
0127 #endif // KIS_XSIMD_ARCH_HPP