From 26f4742738df55dc94df441f85c9e7c1e207f1ed Mon Sep 17 00:00:00 2001
From: Michael Zuckerman <Michael.zuckerman@intel.com>
Date: Wed, 1 Jun 2016 14:41:41 +0000
Subject: [PATCH] [Clang][Intrinsics][avx512] Continue Adding round cvt to
 clang

And remove trailing spaces in intrinsic f test
Differential Revision: http://reviews.llvm.org/D20810


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271398 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Headers/avx512fintrin.h     |  66 ++++++++++++++++++-
 test/CodeGen/avx512f-builtins.c | 108 +++++++++++++++++++++++++-------
 2 files changed, 151 insertions(+), 23 deletions(-)

diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 4c2d5c848c1..2f311f79f3b 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -3585,6 +3585,27 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 
 /* Conversion */
 
+#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \
+__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
+                 (__v16si)\
+                 _mm512_undefined_epi32 (),\
+                 (__mmask16) -1,( __R));\
+})
+
+#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \
+__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
+                 (__v16si)( __W),\
+                 (__mmask16)( __U),( __R));\
+})
+
+#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \
+__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\
+                 (__v16si)\
+                 _mm512_setzero_si512 (),\
+                 (__mmask16)( __U),( __R));\
+})
+
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_cvttps_epu32(__m512 __A)
 {
@@ -3795,6 +3816,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
                 _MM_FROUND_CUR_DIRECTION);
 }
 
+#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
+                                             (__I),\
+                                             (__v16hi)_mm256_undefined_si256 (),\
+                                             (__mmask16) -1);\
+})
+
+#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
+                                             (__I),\
+                                             (__v16hi)( __U),\
+                                             (__mmask16)( __W));\
+})
+
+#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \
+  (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\
+                                             (__I),\
+                                             (__v16hi)\
+                                             _mm256_setzero_si256 (),\
+                                             (__mmask16)( __W));\
+})
+
 #define _mm512_cvtps_ph(A, I) __extension__ ({ \
   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
                                             (__v16hi)_mm256_setzero_si256(), \
@@ -3809,7 +3852,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
   (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
                                             (__v16hi)_mm256_setzero_si256(), \
                                             (__mmask16)(W)); })
-                   
+
+#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \
+__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
+               (__v16sf)\
+               _mm512_undefined_ps (),\
+               (__mmask16) -1,( __R));\
+})
+
+#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \
+__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
+               (__v16sf)( __W),\
+               (__mmask16)( __U),( __R));\
+})
+
+#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \
+__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\
+               (__v16sf)\
+               _mm512_setzero_ps (),\
+               (__mmask16)( __U),( __R));\
+})
+                     
+                     
 static  __inline __m512 __DEFAULT_FN_ATTRS
 _mm512_cvtph_ps(__m256i __A)
 {
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index 6e49d262f98..0acd3c81a45 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -2948,19 +2948,19 @@ __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) {
 int test_mm_cvt_roundsd_si32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvt_roundsd_si32
   // CHECK: @llvm.x86.avx512.vcvtsd2si32
-  return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvt_roundsd_i32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvt_roundsd_i32
   // CHECK: @llvm.x86.avx512.vcvtsd2si32
-  return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvt_roundsd_u32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvt_roundsd_u32
   // CHECK: @llvm.x86.avx512.vcvtsd2usi32
-  return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvtsd_u32(__m128d __A) {
@@ -2972,7 +2972,7 @@ unsigned test_mm_cvtsd_u32(__m128d __A) {
 unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvt_roundsd_u64
   // CHECK: @llvm.x86.avx512.vcvtsd2usi64
-  return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned long long test_mm_cvtsd_u64(__m128d __A) {
@@ -2984,31 +2984,31 @@ unsigned long long test_mm_cvtsd_u64(__m128d __A) {
 int test_mm_cvt_roundss_si32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_si32
   // CHECK: @llvm.x86.avx512.vcvtss2si32
-  return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvt_roundss_i32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_i32
   // CHECK: @llvm.x86.avx512.vcvtss2si32
-  return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvt_roundss_si64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_si64
   // CHECK: @llvm.x86.avx512.vcvtss2si64
-  return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 long long test_mm_cvt_roundss_i64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_i64
   // CHECK: @llvm.x86.avx512.vcvtss2si64
-  return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvt_roundss_u32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_u32
   // CHECK: @llvm.x86.avx512.vcvtss2usi32
-  return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvtss_u32(__m128 __A) {
@@ -3020,7 +3020,7 @@ unsigned test_mm_cvtss_u32(__m128 __A) {
 unsigned long long test_mm_cvt_roundss_u64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvt_roundss_u64
   // CHECK: @llvm.x86.avx512.vcvtss2usi64
-  return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned long long test_mm_cvtss_u64(__m128 __A) {
@@ -3032,13 +3032,13 @@ unsigned long long test_mm_cvtss_u64(__m128 __A) {
 int test_mm_cvtt_roundsd_i32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_i32
   // CHECK: @llvm.x86.avx512.cvttsd2si
-  return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvtt_roundsd_si32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_si32
   // CHECK: @llvm.x86.avx512.cvttsd2si
-  return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvttsd_i32(__m128d __A) {
@@ -3050,13 +3050,13 @@ int test_mm_cvttsd_i32(__m128d __A) {
 unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_si64
   // CHECK: @llvm.x86.avx512.cvttsd2si64
-  return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 long long test_mm_cvtt_roundsd_i64(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_i64
   // CHECK: @llvm.x86.avx512.cvttsd2si64
-  return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 long long test_mm_cvttsd_i64(__m128d __A) {
@@ -3068,7 +3068,7 @@ long long test_mm_cvttsd_i64(__m128d __A) {
 unsigned test_mm_cvtt_roundsd_u32(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_u32
   // CHECK: @llvm.x86.avx512.cvttsd2usi
-  return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvttsd_u32(__m128d __A) {
@@ -3080,7 +3080,7 @@ unsigned test_mm_cvttsd_u32(__m128d __A) {
 unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundsd_u64
   // CHECK: @llvm.x86.avx512.cvttsd2usi64
-  return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned long long test_mm_cvttsd_u64(__m128d __A) {
@@ -3092,13 +3092,13 @@ unsigned long long test_mm_cvttsd_u64(__m128d __A) {
 int test_mm_cvtt_roundss_i32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_i32
   // CHECK: @llvm.x86.avx512.cvttss2si
-  return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvtt_roundss_si32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_si32
   // CHECK: @llvm.x86.avx512.cvttss2si
-  return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 int test_mm_cvttss_i32(__m128 __A) {
@@ -3110,13 +3110,13 @@ int test_mm_cvttss_i32(__m128 __A) {
 float test_mm_cvtt_roundss_i64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_i64
   // CHECK: @llvm.x86.avx512.cvttss2si64
-  return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 long long test_mm_cvtt_roundss_si64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_si64
   // CHECK: @llvm.x86.avx512.cvttss2si64
-  return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 long long test_mm_cvttss_i64(__m128 __A) {
@@ -3128,7 +3128,7 @@ long long test_mm_cvttss_i64(__m128 __A) {
 unsigned test_mm_cvtt_roundss_u32(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_u32
   // CHECK: @llvm.x86.avx512.cvttss2usi
-  return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned test_mm_cvttss_u32(__m128 __A) {
@@ -3140,7 +3140,7 @@ unsigned test_mm_cvttss_u32(__m128 __A) {
 unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) {
   // CHECK-LABEL: @test_mm_cvtt_roundss_u64
   // CHECK: @llvm.x86.avx512.cvttss2usi64
-  return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); 
+  return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION);
 }
 
 unsigned long long test_mm_cvttss_u64(__m128 __A) {
@@ -3149,6 +3149,70 @@ unsigned long long test_mm_cvttss_u64(__m128 __A) {
   return _mm_cvttss_u64(__A); 
 }
 
+__m512i test_mm512_cvtt_roundps_epu32(__m512 __A) 
+{
+    // CHECK-LABEL: @test_mm512_cvtt_roundps_epu32
+    // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512
+    return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
+{
+    // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32
+    // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512
+    return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A)
+{
+    // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32
+    // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512
+
+    return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m256i test_mm512_cvt_roundps_ph(__m512  __A)
+{
+    // CHECK-LABEL: @test_mm512_cvt_roundps_ph
+    // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
+    return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512  __A)
+{
+    // CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph
+    // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
+    return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512  __A)
+{
+    // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph
+    // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512
+    return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512 test_mm512_cvt_roundph_ps(__m256i __A)
+{
+    // CHECK-LABEL: @test_mm512_cvt_roundph_ps
+    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
+    return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A)
+{
+    // CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps
+    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
+    return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
+__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A)
+{
+    // CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps
+    // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512
+    return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION);
+}
+
 __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps
-- 
GitLab