From a2ae4a45d35099e69da87db7b51c72e3b8f55f50 Mon Sep 17 00:00:00 2001
From: Michael Zuckerman <Michael.zuckerman@intel.com>
Date: Thu, 28 Apr 2016 21:21:08 +0000
Subject: [PATCH] [clang][AVX512][Builtin] Adding intrinsics for the SAD
 instruction set.

Differential Revision: http://reviews.llvm.org/D19591


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@267942 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/clang/Basic/BuiltinsX86.def |  4 +++
 lib/Headers/avx512bwintrin.h        | 33 ++++++++++++++++++++
 lib/Headers/avx512vlbwintrin.h      | 48 +++++++++++++++++++++++++++++
 test/CodeGen/avx512bw-builtins.c    | 23 ++++++++++++++
 test/CodeGen/avx512vlbw-builtins.c  | 35 +++++++++++++++++++++
 5 files changed, 143 insertions(+)

diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 4a58c91dcd1..963a8c576ff 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -2214,6 +2214,10 @@ TARGET_BUILTIN(__builtin_ia32_movntps512, "vf*V16f","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64ciV64cULLi","","avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr128_mask, "V16cV16cV16ciV16cUs","","avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_palignr256_mask, "V32cV32cV32ciV32cUi","","avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc","","avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs","","avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi","","avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c","","avx512bw")
 
 
 #undef BUILTIN
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index 4f451df3f86..59e64c12adf 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -2191,6 +2191,39 @@ __builtin_ia32_palignr512_mask ((__v8di) __A,\
                  (__mmask64) __U);\
 })
 
+#define _mm512_dbsad_epu8( __A,  __B, __imm) __extension__ ({\
+__builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,\
+                                (__v64qi) __B,\
+                                __imm,\
+                                (__v32hi) _mm512_undefined_epi32(),\
+                                (__mmask32) -1);\
+})
+
+#define _mm512_mask_dbsad_epu8( __W, __U, __A, __B, __imm) ({\
+__builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,\
+                                (__v64qi) __B,\
+                                __imm,\
+                                (__v32hi) __W,\
+                                (__mmask32) __U);\
+})
+
+#define _mm512_maskz_dbsad_epu8( __U, __A, __B, __imm) ({\
+__builtin_ia32_dbpsadbw512_mask ((__v64qi) __A,\
+                                (__v64qi) __B,\
+                                __imm,\
+                                (__v32hi) _mm512_setzero_hi(),\
+                                (__mmask32) __U);\
+})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_sad_epu8 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
+               (__v64qi) __B);
+}
+
+
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index bee20aa183e..86a206b8ee6 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -3392,6 +3392,54 @@ __builtin_ia32_palignr256_mask ((__v4di)( __A),\
                (__mmask32)( __U));\
 })
 
+#define _mm_dbsad_epu8( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\
+                (__v16qi)( __B),\
+                ( __imm),\
+                (__v8hi) _mm_setzero_hi (),\
+                (__mmask8) -1);\
+})
+
+#define _mm_mask_dbsad_epu8( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\
+                (__v16qi)( __B),\
+                ( __imm),\
+                (__v8hi)( __W),\
+                (__mmask8)( __U));\
+})
+
+#define _mm_maskz_dbsad_epu8( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\
+                (__v16qi)( __B),\
+                ( __imm),\
+                (__v8hi) _mm_setzero_si128 (),\
+                (__mmask8)( __U));\
+})
+
+#define _mm256_dbsad_epu8( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\
+                (__v32qi)( __B),\
+                ( __imm),\
+                (__v16hi) _mm256_setzero_si256 (),\
+                (__mmask16) -1);\
+})
+
+#define _mm256_mask_dbsad_epu8( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\
+                (__v32qi)( __B),\
+                ( __imm),\
+                (__v16hi)( __W),\
+                (__mmask16)( __U));\
+})
+
+#define _mm256_maskz_dbsad_epu8( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\
+                (__v32qi)( __B),\
+                ( __imm),\
+                (__v16hi) _mm256_setzero_si256 (),\
+                (__mmask16)( __U));\
+})
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX512VLBWINTRIN_H */
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index 8925cbadc06..c701fd54bf0 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -1507,3 +1507,26 @@ __m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){
 
 
 
+__m512i test_mm512_mm_dbsad_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mm_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512
+  return _mm512_dbsad_epu8(__A, __B, 170); 
+}
+
+__m512i test_mm512_mm_mask_dbsad_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mm_mask_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512
+  return _mm512_mask_dbsad_epu8(__W, __U, __A, __B, 170); 
+}
+
+__m512i test_mm512_mm_maskz_dbsad_epu8(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mm_maskz_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512
+  return _mm512_maskz_dbsad_epu8(__U, __A, __B, 170); 
+}
+
+__m512i test_mm512_sad_epu8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sad_epu8
+  // CHECK: @llvm.x86.avx512.psad.bw.512
+  return _mm512_sad_epu8(__A, __B); 
+}
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index f72363d8e9c..69906fd7da7 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -2340,3 +2340,38 @@ __m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); 
 }
 
+__m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128
+  return _mm_dbsad_epu8(__A, __B, 170); 
+}
+
+__m128i test_mm_mask_dbsad_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128
+  return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); 
+}
+
+__m128i test_mm_maskz_dbsad_epu8(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128
+  return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); 
+}
+
+__m256i test_mm256_dbsad_epu8(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256
+  return _mm256_dbsad_epu8(__A, __B, 170); 
+}
+
+__m256i test_mm256_mask_dbsad_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256
+  return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); 
+}
+
+__m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_dbsad_epu8
+  // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256
+  return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); 
+}
-- 
GitLab