From 224051df8146de53b69cd51a96a8aaea8bc8d5fa Mon Sep 17 00:00:00 2001 From: Elena Demikhovsky <elena.demikhovsky@intel.com> Date: Thu, 30 Apr 2015 09:24:29 +0000 Subject: [PATCH] AVX-512: Added AVX-512 intrinsics and tests by Asaf Badouh (asaf.badouh@intel.com) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@236218 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 27 +++ lib/Headers/CMakeLists.txt | 2 + lib/Headers/avx512dqintrin.h | 237 +++++++++++++++++++ lib/Headers/avx512vldqintrin.h | 349 ++++++++++++++++++++++++++++ lib/Headers/immintrin.h | 8 + test/CodeGen/avx512dq-builtins.c | 164 +++++++++++++ test/CodeGen/avx512vldq-builtins.c | 231 ++++++++++++++++++ 7 files changed, 1018 insertions(+) create mode 100644 lib/Headers/avx512dqintrin.h create mode 100644 lib/Headers/avx512vldqintrin.h create mode 100644 test/CodeGen/avx512dq-builtins.c create mode 100644 test/CodeGen/avx512vldq-builtins.c diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 0f0b724d30f..d23f638233d 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1018,5 +1018,32 @@ BUILTIN(__builtin_ia32_psubq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "") BUILTIN(__builtin_ia32_paddd512_mask, "V16iV16iV16iV16iUs", "") BUILTIN(__builtin_ia32_psubd512_mask, "V16iV16iV16iV16iUs", "") BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "") +BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "") +BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "") +BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "") +BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "") +BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "") +BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "") +BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "") +BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "") +BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "") +BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "") +BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "") +BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "") +BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "") +BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "") +BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "") +BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "") +BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "") +BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "") +BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "") +BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "") +BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "") +BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "") +BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "") +BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "") +BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "") +BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "") +BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "") #undef BUILTIN diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt index 37278032218..29a738e7a81 100644 --- a/lib/Headers/CMakeLists.txt +++ b/lib/Headers/CMakeLists.txt @@ -9,6 +9,8 @@ set(files avx512fintrin.h avx512vlbwintrin.h avx512vlintrin.h + avx512dqintrin.h + avx512vldqintrin.h avxintrin.h bmi2intrin.h bmiintrin.h diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h new file mode 100644 index 00000000000..fd33be2f44a --- /dev/null +++ b/lib/Headers/avx512dqintrin.h @@ -0,0 +1,237 @@ +/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512DQINTRIN_H +#define __AVX512DQINTRIN_H + +static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mullo_epi64 (__m512i __A, __m512i __B) { + return (__m512i) ((__v8di) __A * (__v8di) __B); +} + +static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { + return (__m512i) __builtin_ia32_pmullq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_xor_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A ^ (__v8di) __B); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_xor_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A ^ (__v16si) __B); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_or_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A | (__v8di) __B); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_or_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A | (__v16si) __B); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_and_pd (__m512d __A, __m512d __B) { + return (__m512d) ((__v8di) __A & (__v8di) __B); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_and_ps (__m512 __A, __m512 __B) { + return (__m512) ((__v16si) __A & (__v16si) __B); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_andnot_pd (__m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, + (__v8df) __B, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_andnot_ps (__m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) -1); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __attribute__ ((__always_inline__, __nodebug__)) +_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, + (__v16sf) __B, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#endif diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h new file mode 100644 index 00000000000..4024446a3a6 --- /dev/null +++ b/lib/Headers/avx512vldqintrin.h @@ -0,0 +1,349 @@ +/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." +#endif + +#ifndef __AVX512VLDQINTRIN_H +#define __AVX512VLDQINTRIN_H + + +static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mullo_epi64 (__m256i __A, __m256i __B) { + return (__m256i) ((__v4di) __A * (__v4di) __B); +} + +static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __W, + (__mmask8) __U); +} + +static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) +_mm_mullo_epi64 (__m128i __A, __m128i __B) { + return (__m128i) ((__v2di) __A * (__v2di) __B); +} + +static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __W, + (__mmask8) __U); +} + +static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, + __m256d __B) { + return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, + (__v4df) __B, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) +_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, + (__v8sf) __B, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) +_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +#endif diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h index 2400fea499b..ac7d54a41db 100644 --- a/lib/Headers/immintrin.h +++ b/lib/Headers/immintrin.h @@ -88,10 +88,18 @@ #include <avx512bwintrin.h> #endif +#ifdef __AVX512DQ__ +#include <avx512dqintrin.h> +#endif + #if defined (__AVX512VL__) && defined (__AVX512BW__) #include <avx512vlbwintrin.h> #endif +#if defined (__AVX512VL__) && defined (__AVX512DQ__) +#include <avx512vldqintrin.h> +#endif + #ifdef __AVX512ER__ #include <avx512erintrin.h> #endif diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c new file mode 100644 index 00000000000..e35b243534a --- /dev/null +++ b/test/CodeGen/avx512dq-builtins.c @@ -0,0 +1,164 @@ +// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -emit-llvm -o - -Werror | FileCheck %s + +#include <immintrin.h> +__m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mullo_epi64 + // CHECK: mul <8 x i64> + return (__m512i) ((__v8di) __A * (__v8di) __B); +} + +__m512i test_mm512_mask_mullo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.512 + return (__m512i) _mm512_mask_mullo_epi64(__W, __U, __A, __B); +} + +__m512i test_mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.512 + return (__m512i) _mm512_maskz_mullo_epi64(__U, __A, __B); +} + +__m512d test_mm512_xor_pd (__m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_xor_pd + // CHECK: xor <8 x i64> + return (__m512d) _mm512_xor_pd(__A, __B); +} + +__m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_mask_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B); +} + +__m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_maskz_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + return (__m512d) _mm512_maskz_xor_pd(__U, __A, __B); +} + +__m512 test_mm512_xor_ps (__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_xor_ps + // CHECK: xor <16 x i32> + return (__m512) _mm512_xor_ps(__A, __B); +} + +__m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + return (__m512) _mm512_mask_xor_ps(__W, __U, __A, __B); +} + +__m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + return (__m512) _mm512_maskz_xor_ps(__U, __A, __B); +} + +__m512d test_mm512_or_pd (__m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_or_pd + // CHECK: or <8 x i64> + return (__m512d) _mm512_or_pd(__A, __B); +} + +__m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_mask_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.512 + return (__m512d) _mm512_mask_or_pd(__W, __U, __A, __B); +} + +__m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_maskz_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.512 + return (__m512d) _mm512_maskz_or_pd(__U, __A, __B); +} + +__m512 test_mm512_or_ps (__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_or_ps + // CHECK: or <16 x i32> + return (__m512) _mm512_or_ps(__A, __B); +} + +__m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.512 + return (__m512) _mm512_mask_or_ps(__W, __U, __A, __B); +} + +__m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.512 + return (__m512) _mm512_maskz_or_ps(__U, __A, __B); +} + +__m512d test_mm512_and_pd (__m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_and_pd + // CHECK: and <8 x i64> + return (__m512d) _mm512_and_pd(__A, __B); +} + +__m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_mask_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.512 + return (__m512d) _mm512_mask_and_pd(__W, __U, __A, __B); +} + +__m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_maskz_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.512 + return (__m512d) _mm512_maskz_and_pd(__U, __A, __B); +} + +__m512 test_mm512_and_ps (__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_and_ps + // CHECK: and <16 x i32> + return (__m512) _mm512_and_ps(__A, __B); +} + +__m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.512 + return (__m512) _mm512_mask_and_ps(__W, __U, __A, __B); +} + +__m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.512 + return (__m512) _mm512_maskz_and_ps(__U, __A, __B); +} + +__m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + return (__m512d) _mm512_andnot_pd(__A, __B); +} + +__m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_mask_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B); +} + +__m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { + // CHECK-LABEL: @test_mm512_maskz_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B); +} + +__m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + return (__m512) _mm512_andnot_ps(__A, __B); +} + +__m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_mask_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B); +} + +__m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { + // CHECK-LABEL: @test_mm512_maskz_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B); +} diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c new file mode 100644 index 00000000000..a9b6dbfa6fc --- /dev/null +++ b/test/CodeGen/avx512vldq-builtins.c @@ -0,0 +1,231 @@ +// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s + +#include <immintrin.h> + +__m256i test_mm256_mullo_epi64 (__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mullo_epi64 + // CHECK: mul <4 x i64> + return _mm256_mullo_epi64(__A, __B); +} + +__m256i test_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.256 + return (__m256i) _mm256_mask_mullo_epi64 ( __W, __U, __A, __B); +} + +__m256i test_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.256 + return (__m256i) _mm256_maskz_mullo_epi64 (__U, __A, __B); +} + +__m128i test_mm_mullo_epi64 (__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mullo_epi64 + // CHECK: mul <2 x i64> + return (__m128i) _mm_mullo_epi64(__A, __B); +} + +__m128i test_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.128 + return (__m128i) _mm_mask_mullo_epi64 ( __W, __U, __A, __B); +} + +__m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_mullo_epi64 + // CHECK: @llvm.x86.avx512.mask.pmull.q.128 + return (__m128i) _mm_maskz_mullo_epi64 (__U, __A, __B); +} + +__m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B); +} + +__m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B); +} + +__m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_andnot_pd + // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B); +} + +__m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B); +} + +__m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B); +} + +__m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_andnot_ps + // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + return (__m128) _mm_maskz_andnot_ps (__U, __A, __B); +} + +__m256d test_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.256 + return (__m256d) _mm256_mask_and_pd ( __W, __U, __A, __B); +} + +__m256d test_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.256 + return (__m256d) _mm256_maskz_and_pd (__U, __A, __B); +} + +__m128d test_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.128 + return (__m128d) _mm_mask_and_pd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_and_pd + // CHECK: @llvm.x86.avx512.mask.and.pd.128 + return (__m128d) _mm_maskz_and_pd (__U, __A, __B); +} + +__m256 test_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.256 + return (__m256) _mm256_mask_and_ps ( __W, __U, __A, __B); +} + +__m256 test_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.256 + return (__m256) _mm256_maskz_and_ps (__U, __A, __B); +} + +__m128 test_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.128 + return (__m128) _mm_mask_and_ps ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_and_ps + // CHECK: @llvm.x86.avx512.mask.and.ps.128 + return (__m128) _mm_maskz_and_ps (__U, __A, __B); +} + +__m256d test_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + return (__m256d) _mm256_mask_xor_pd ( __W, __U, __A, __B); +} + +__m256d test_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + return (__m256d) _mm256_maskz_xor_pd (__U, __A, __B); +} + +__m128d test_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + return (__m128d) _mm_mask_xor_pd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_xor_pd + // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + return (__m128d) _mm_maskz_xor_pd (__U, __A, __B); +} + +__m256 test_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + return (__m256) _mm256_mask_xor_ps ( __W, __U, __A, __B); +} + +__m256 test_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + return (__m256) _mm256_maskz_xor_ps (__U, __A, __B); +} + +__m128 test_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.128 + return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_xor_ps + // CHECK: @llvm.x86.avx512.mask.xor.ps.128 + return (__m128) _mm_maskz_xor_ps (__U, __A, __B); +} + +__m256d test_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_mask_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.256 + return (__m256d) _mm256_mask_or_pd ( __W, __U, __A, __B); +} + +__m256d test_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { + // CHECK-LABEL: @test_mm256_maskz_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.256 + return (__m256d) _mm256_maskz_or_pd (__U, __A, __B); +} + +__m128d test_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_mask_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.128 + return (__m128d) _mm_mask_or_pd ( __W, __U, __A, __B); +} + +__m128d test_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { + // CHECK-LABEL: @test_mm_maskz_or_pd + // CHECK: @llvm.x86.avx512.mask.or.pd.128 + return (__m128d) _mm_maskz_or_pd (__U, __A, __B); +} + +__m256 test_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_mask_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.256 + return (__m256) _mm256_mask_or_ps ( __W, __U, __A, __B); +} + +__m256 test_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { + // CHECK-LABEL: @test_mm256_maskz_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.256 + return (__m256) _mm256_maskz_or_ps (__U, __A, __B); +} + +__m128 test_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_mask_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.128 + return (__m128) _mm_mask_or_ps ( __W, __U, __A, __B); +} + +__m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { + // CHECK-LABEL: @test_mm_maskz_or_ps + // CHECK: @llvm.x86.avx512.mask.or.ps.128 + return (__m128) _mm_maskz_or_ps(__U, __A, __B); +} -- GitLab