From bee521a5c22741cf01852a9704cdf5e0f87e9ae3 Mon Sep 17 00:00:00 2001
From: Tony Jiang <jtony@ca.ibm.com>
Date: Wed, 24 May 2017 15:54:13 +0000
Subject: [PATCH] [PowerPC] Implement vec_xxsldwi builtin.

The vec_xxsldwi builtin is missing from altivec.h. This has been requested by
developers working on libvpx for VP9 support for Google.

The patch fixes PR: https://bugs.llvm.org/show_bug.cgi?id=32653
Differential Revision: https://reviews.llvm.org/D33236

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@303766 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/clang/Basic/BuiltinsPPC.def |  1 +
 lib/CodeGen/CGBuiltin.cpp           | 41 +++++++++++++++++++++
 lib/Headers/altivec.h               |  1 +
 lib/Sema/SemaChecking.cpp           |  1 +
 test/CodeGen/builtins-ppc-error.c   |  8 +++++
 test/CodeGen/builtins-ppc-vsx.c     | 55 +++++++++++++++++++++++++++++
 6 files changed, 107 insertions(+)

diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def
index 2d2689e1f4b..119490314b2 100644
--- a/include/clang/Basic/BuiltinsPPC.def
+++ b/include/clang/Basic/BuiltinsPPC.def
@@ -421,6 +421,7 @@ BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "")
 BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "")
 
 BUILTIN(__builtin_vsx_xxpermdi, "v.", "t")
+BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")
 
 // HTM builtins
 BUILTIN(__builtin_tbegin, "UiUIi", "")
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index d1a3a9dfa12..f3a04a6a4e1 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -8475,6 +8475,47 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
     auto RetTy = ConvertType(BIRetType);
     return Builder.CreateBitCast(ShuffleCall, RetTy);
   }
+
+  case PPC::BI__builtin_vsx_xxsldwi: {
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI && "Third argument must be a compile time constant");
+    unsigned Index = ArgCI->getZExtValue() & 0x3;
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
+    Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
+
+    // Create a shuffle mask
+    unsigned ElemIdx0;
+    unsigned ElemIdx1;
+    unsigned ElemIdx2;
+    unsigned ElemIdx3;
+    if (getTarget().isLittleEndian()) {
+      // Little endian element N comes from element 8+N-Index of the
+      // concatenated wide vector (of course, using modulo arithmetic on
+      // the total number of elements).
+      ElemIdx0 = (8 - Index) % 8;
+      ElemIdx1 = (9 - Index) % 8;
+      ElemIdx2 = (10 - Index) % 8;
+      ElemIdx3 = (11 - Index) % 8;
+    } else {
+      // Big endian ElemIdx<N> = Index + N
+      ElemIdx0 = Index;
+      ElemIdx1 = Index + 1;
+      ElemIdx2 = Index + 2;
+      ElemIdx3 = Index + 3;
+    }
+
+    Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
+                                ConstantInt::get(Int32Ty, ElemIdx1),
+                                ConstantInt::get(Int32Ty, ElemIdx2),
+                                ConstantInt::get(Int32Ty, ElemIdx3)};
+
+    Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
+    Value *ShuffleCall =
+        Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
+    QualType BIRetType = E->getType();
+    auto RetTy = ConvertType(BIRetType);
+    return Builder.CreateBitCast(ShuffleCall, RetTy);
+  }
   }
 }
 
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index e32e84c90f5..957fd5f65e2 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h
@@ -12158,6 +12158,7 @@ static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a,
 
 #ifdef __VSX__
 #define vec_xxpermdi __builtin_vsx_xxpermdi
+#define vec_xxsldwi __builtin_vsx_xxsldwi
 #endif
 
 /* vec_xor */
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 5973a5ffde4..b794628db73 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -1697,6 +1697,7 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
     return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) ||
            SemaBuiltinConstantArgRange(TheCall, 2, 0, 31);
   case PPC::BI__builtin_vsx_xxpermdi:
+  case PPC::BI__builtin_vsx_xxsldwi:
     return SemaBuiltinVSX(TheCall);
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
diff --git a/test/CodeGen/builtins-ppc-error.c b/test/CodeGen/builtins-ppc-error.c
index 67730dca9f5..c3d6e639d82 100644
--- a/test/CodeGen/builtins-ppc-error.c
+++ b/test/CodeGen/builtins-ppc-error.c
@@ -26,3 +26,11 @@ void testXXPERMDI(int index) {
   vec_xxpermdi(1, 2, 3); //expected-error {{first two arguments to '__builtin_vsx_xxpermdi' must be vectors}}
   vec_xxpermdi(vsi, vuc, 2); //expected-error {{first two arguments to '__builtin_vsx_xxpermdi' must have the same type}}
 }
+
+void testXXSLDWI(int index) {
+  vec_xxsldwi(vsi); //expected-error {{too few arguments to function call, expected at least 3, have 1}}
+  vec_xxsldwi(vsi, vsi, 2, 4); //expected-error {{too many arguments to function call, expected at most 3, have 4}}
+  vec_xxsldwi(vsi, vsi, index); //expected-error {{argument 3 to '__builtin_vsx_xxsldwi' must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)}}
+  vec_xxsldwi(1, 2, 3); //expected-error {{first two arguments to '__builtin_vsx_xxsldwi' must be vectors}}
+  vec_xxsldwi(vsi, vuc, 2); //expected-error {{first two arguments to '__builtin_vsx_xxsldwi' must have the same type}}
+}
diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c
index 0ae7306125b..520c001fbb6 100644
--- a/test/CodeGen/builtins-ppc-vsx.c
+++ b/test/CodeGen/builtins-ppc-vsx.c
@@ -1731,6 +1731,47 @@ res_vsc = vec_xxpermdi(vsc, vsc, 0);
 res_vuc = vec_xxpermdi(vuc, vuc, 1);
 // CHECK: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 0, i32 3>
 // CHECK-LE: shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 2, i32 1>
+
+res_vd = vec_xxsldwi(vd, vd, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vf = vec_xxsldwi(vf, vf, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+
+res_vsll = vec_xxsldwi(vsll, vsll, 2);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+
+res_vull = vec_xxsldwi(vull, vull, 3);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+
+res_vsi = vec_xxsldwi(vsi, vsi, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vui = vec_xxsldwi(vui, vui, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+
+res_vss = vec_xxsldwi(vss, vss, 2);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+
+
+res_vus = vec_xxsldwi(vus, vus, 3);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+
+res_vsc = vec_xxsldwi(vsc, vsc, 0);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+res_vuc = vec_xxsldwi(vuc, vuc, 1);
+// CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+// CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
 }
 
 // The return type of the call expression may be different from the return type of the shufflevector.
@@ -1748,3 +1789,17 @@ vector int xxpermdi_should_not_assert(vector int a, vector int b) {
 // CHECK-LE-NEXT:  shufflevector <2 x i64> %{{[0-9]+}}, <2 x i64> %{{[0-9]+}}, <2 x i32> <i32 3, i32 1>
 // CHECK-LE-NEXT:  bitcast <2 x i64> %{{[0-9]+}} to <4 x i32>
 }
+
+vector double xxsldwi_should_not_assert(vector double a, vector double b) {
+  return vec_xxsldwi(a, b, 0);
+// CHECK-LABEL: xxsldwi_should_not_assert
+// CHECK:  bitcast <2 x double> %0 to <4 x i32>
+// CHECK-NEXT:  bitcast <2 x double> %1 to <4 x i32>
+// CHECK-NEXT:  shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:  bitcast <4 x i32> %4 to <2 x double>
+
+// CHECK-LE:  bitcast <2 x double> %0 to <4 x i32>
+// CHECK-NEXT-LE:  bitcast <2 x double> %1 to <4 x i32>
+// CHECK-NEXT-LE:  shufflevector <4 x i32> %2, <4 x i32> %3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT-LE:  bitcast <4 x i32> %4 to <2 x double>
+}
-- 
GitLab