diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index ce8dced1bc00299b6039417f06f1877a3798b5c3..acf2eb6acbb6307867591e059a5fdfa47825d005 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -582,7 +582,6 @@ BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "") BUILTIN(__builtin_ia32_vbroadcastss_ps, "V4fV4f", "") BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "") BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "") -BUILTIN(__builtin_ia32_vbroadcastsi256, "V4LLiV2LLi", "") BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "") BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "") BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index bf7d86fb12fa2f1421b2e66288891045e3948399..d3d1e22b4bb1a40f4717eb2e72ba5f23b184475a 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -6090,13 +6090,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); return Builder.CreateExtractValue(Call, 1); } - // AVX2 broadcast - case X86::BI__builtin_ia32_vbroadcastsi256: { - Value *VecTmp = CreateMemTemp(E->getArg(0)->getType()); - Builder.CreateStore(Ops[0], VecTmp); - Value *F = CGM.getIntrinsic(Intrinsic::x86_avx2_vbroadcasti128); - return Builder.CreateCall(F, Builder.CreateBitCast(VecTmp, Int8PtrTy)); - } // SSE comparison intrisics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpltps: diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h index 394fdfee96522ca4b4cd5cfe44f7a97f1b82f006..af43bec0efdd0496c544e1f13c5565502a727cd4 100644 --- a/lib/Headers/avx2intrin.h +++ b/lib/Headers/avx2intrin.h @@ -771,7 +771,7 @@ _mm256_broadcastsd_pd(__m128d __X) static __inline__ __m256i __attribute__((__always_inline__, __nodebug__)) _mm256_broadcastsi128_si256(__m128i __X) { - return (__m256i)__builtin_ia32_vbroadcastsi256(__X); + return (__m256i)__builtin_shufflevector(__X, __X, 0, 1, 0, 1); } #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \ diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c index 27ee91e4ada24451dd2a4929d1778be229049e66..371f9c6ee24c8a3aafad92221a0b98108018b185 100644 --- a/test/CodeGen/avx2-builtins.c +++ b/test/CodeGen/avx2-builtins.c @@ -612,7 +612,7 @@ __m256d test_mm256_broadcastsd_pd(__m128d a) { } __m256i test_mm256_broadcastsi128_si256(__m128i a) { - // CHECK: @llvm.x86.avx2.vbroadcasti128 + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 0, i32 1> return _mm256_broadcastsi128_si256(a); }