File indexing completed on 2025-01-26 04:07:17
0001 /* 0002 * SPDX-FileCopyrightText: 2022 L. E. Segovia <amy@amyspark.me> 0003 * 0004 * SPDX-License-Identifier: BSD-3-Clause 0005 */ 0006 0007 #ifndef KIS_XSIMD_ARCH_HPP 0008 #define KIS_XSIMD_ARCH_HPP 0009 0010 #include "./xsimd_config.hpp" 0011 0012 // Architecture initialization. Borrowed from Vc 0013 // Define the following strings to a unique integer, which is the only type the 0014 // preprocessor can compare. This allows to use -DXSIMD_IMPL=SSE3. The 0015 // preprocessor will then consider XSIMD_IMPL and SSE3 to be equal. 0016 // An additional define IMPL_MASK allows to detect the FMA extension. 0017 0018 #define Scalar 0x00100000 0019 #define SSE2 0x00200000 0020 #define SSE3 0x00300000 0021 #define SSSE3 0x00400000 0022 #define SSE4_1 0x00500000 0023 #define SSE4_2 0x00600000 0024 #define FMA4 0x00700000 0025 #define AVX 0x00800000 0026 #define AVX2 0x00900000 0027 #define AVX512F 0x00A00000 0028 #define AVX512BW 0x00B00000 0029 #define AVX512CD 0x00C00000 0030 #define AVX512DQ 0x00D00000 0031 #define NEON 0x10100000 0032 #define NEON64 0x10200000 0033 0034 #define FMA 0x00000001 0035 0036 #define Intel_Architecture 0x00000000 0037 #define Arm_Architecture 0x10000000 0038 0039 #define IMPL_MASK 0xFFF00000 0040 #define PLATFORM_MASK 0xF0000000 0041 0042 namespace xsimd 0043 { 0044 #if !defined(HAVE_XSIMD) || defined(XSIMD_IMPL) && (XSIMD_IMPL & IMPL_MASK) == Scalar 0045 using current_arch = generic; 0046 #elif !defined(XSIMD_IMPL) 0047 using current_arch = default_arch; 0048 #elif (XSIMD_IMPL & IMPL_MASK) == SSE2 0049 using current_arch = sse2; 0050 #elif (XSIMD_IMPL & IMPL_MASK) == SSE3 0051 using current_arch = sse3; 0052 #elif (XSIMD_IMPL & IMPL_MASK) == SSSE3 0053 using current_arch = ssse3; 0054 #elif (XSIMD_IMPL & IMPL_MASK) == SSE4_1 0055 using current_arch = sse4_1; 0056 #elif (XSIMD_IMPL & IMPL_MASK) == SSE4_2 0057 #if (XSIMD_IMPL & FMA) 0058 using current_arch = fma3<sse4_2>; 0059 #else 0060 using current_arch = sse4_2; 0061 #endif 0062 #elif (XSIMD_IMPL & IMPL_MASK) == FMA4 0063 using current_arch = fma4; 0064 #elif (XSIMD_IMPL & IMPL_MASK) == AVX 0065 #if (XSIMD_IMPL & FMA) 0066 using current_arch = fma3<avx>; 0067 #else 0068 using current_arch = avx; 0069 #endif 0070 #elif (XSIMD_IMPL & IMPL_MASK) == AVX2 0071 #if (XSIMD_IMPL & FMA) 0072 using current_arch = fma3<avx2>; 0073 #else 0074 using current_arch = avx2; 0075 #endif 0076 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512F 0077 using current_arch = avx512f; 0078 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512CD 0079 using current_arch = avx512cd; 0080 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512DQ 0081 using current_arch = avx512dq; 0082 #elif (XSIMD_IMPL & IMPL_MASK) == AVX512BW 0083 using current_arch = avx512bw; 0084 #elif (XSIMD_IMPL & IMPL_MASK) == NEON 0085 using current_arch = neon; 0086 #elif (XSIMD_IMPL & IMPL_MASK) == NEON64 0087 using current_arch = neon64; 0088 #endif 0089 }; // namespace xsimd 0090 0091 // xsimd extension to block AppleClang's auto-lipoization of 0092 // compiled objects. 0093 // If the defined instruction sets don't match what's expected 0094 // from the build flags, zonk out the included file. 0095 0096 #if !defined(HAVE_XSIMD) || !defined(XSIMD_IMPL) || defined(XSIMD_IMPL) && (XSIMD_IMPL & IMPL_MASK) == Scalar 0097 #define XSIMD_UNIVERSAL_BUILD_PASS 3 0098 #elif XSIMD_WITH_SSE2 && (XSIMD_IMPL & PLATFORM_MASK) == Intel_Architecture 0099 #define XSIMD_UNIVERSAL_BUILD_PASS 2 0100 #elif (XSIMD_WITH_NEON || XSIMD_WITH_NEON64) && (XSIMD_IMPL & PLATFORM_MASK) == Arm_Architecture 0101 #define XSIMD_UNIVERSAL_BUILD_PASS 1 0102 #endif 0103 0104 #ifndef XSIMD_UNIVERSAL_BUILD_PASS 0105 #define XSIMD_UNIVERSAL_BUILD_PASS 0 0106 #endif 0107 0108 #undef Scalar 0109 #undef SSE2 0110 #undef SSE3 0111 #undef SSSE3 0112 #undef SSE4_1 0113 #undef SSE4_2 0114 #undef AVX 0115 #undef AVX2 0116 #undef AVX512F 0117 #undef AVX512BW 0118 #undef AVX512CD 0119 #undef AVX512DQ 0120 #undef NEON 0121 #undef NEON64 0122 0123 #undef FMA3 0124 #undef FMA4 0125 #undef IMPL_MASK 0126 0127 #endif // KIS_XSIMD_ARCH_HPP