Skip to content
Snippets Groups Projects
Commit a2c687fa authored by Simon Pilgrim's avatar Simon Pilgrim
Browse files

[X86][SSE4A] Use native IR for mask movntsd/movntss intrinsics.

Depends on llvm side commit r273002.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@273003 91177308-0d34-0410-b5e6-96231b3b80d8
parent 2927bf7a
No related branches found
No related tags found
No related merge requests found
......@@ -6848,6 +6848,26 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
SI->setAlignment(1);
return SI;
}
case X86::BI__builtin_ia32_movntsd:
case X86::BI__builtin_ia32_movntss: {
llvm::MDNode *Node = llvm::MDNode::get(
getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
// Extract the 0'th element of the source vector.
Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
// Convert the type of the pointer to a pointer to the stored type.
Value *BC = Builder.CreateBitCast(Ops[0],
llvm::PointerType::getUnqual(Scl->getType()),
"cast");
// Unaligned nontemporal store of the scalar value.
StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
SI->setAlignment(1);
return SI;
}
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
......
......@@ -33,12 +33,14 @@ __m128i test_mm_insert_si64(__m128i x, __m128i y) {
void test_mm_stream_sd(double *p, __m128d a) {
// CHECK-LABEL: test_mm_stream_sd
// CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
_mm_stream_sd(p, a);
// CHECK: extractelement <2 x double> %{{.*}}, i64 0
// CHECK: store double %{{.*}}, double* %{{.*}}, align 1, !nontemporal
_mm_stream_sd(p, a);
}
void test_mm_stream_ss(float *p, __m128 a) {
// CHECK-LABEL: test_mm_stream_ss
// CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
// CHECK: extractelement <4 x float> %{{.*}}, i64 0
// CHECK: store float %{{.*}}, float* %{{.*}}, align 1, !nontemporal
_mm_stream_ss(p, a);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment