From 154e808228a44269dae211eaf3ac2ead854cc25a Mon Sep 17 00:00:00 2001 From: Matt Arsenault <Matthew.Arsenault@amd.com> Date: Tue, 28 Jun 2016 00:13:17 +0000 Subject: [PATCH] AMDGPU: Add builtin to read exec mask git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@273965 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsAMDGPU.def | 8 ++++++++ lib/CodeGen/CGBuiltin.cpp | 18 ++++++++++++++---- test/CodeGenOpenCL/builtins-amdgcn.cl | 12 ++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/include/clang/Basic/BuiltinsAMDGPU.def b/include/clang/Basic/BuiltinsAMDGPU.def index d75142a0ca8..1ebd9fe7f18 100644 --- a/include/clang/Basic/BuiltinsAMDGPU.def +++ b/include/clang/Basic/BuiltinsAMDGPU.def @@ -18,6 +18,9 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif +//===----------------------------------------------------------------------===// +// Instruction builtins. +//===----------------------------------------------------------------------===// BUILTIN(__builtin_amdgcn_s_barrier, "v", "n") BUILTIN(__builtin_amdgcn_div_scale, "dddbb*", "n") BUILTIN(__builtin_amdgcn_div_scalef, "fffbb*", "n") @@ -59,6 +62,11 @@ BUILTIN(__builtin_amdgcn_s_sleep, "vIi", "n") TARGET_BUILTIN(__builtin_amdgcn_s_memrealtime, "LUi", "n", "s-memrealtime") +//===----------------------------------------------------------------------===// +// Special builtins. +//===----------------------------------------------------------------------===// +BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") + //===----------------------------------------------------------------------===// // Legacy names with amdgpu prefix //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 7989edbdd41..e097457ea2b 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -3650,7 +3650,9 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, - llvm::Type *ValueType, bool IsRead) { + llvm::Type *ValueType, + bool IsRead, + StringRef SysReg = "") { // write and register intrinsics only support 32 and 64 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) && "Unsupported size for register."); @@ -3659,8 +3661,10 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, CodeGen::CodeGenModule &CGM = CGF.CGM; LLVMContext &Context = CGM.getLLVMContext(); - const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); - StringRef SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + if (SysReg.empty()) { + const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); + SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); + } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); @@ -7413,7 +7417,13 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_classf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); - // Legacy amdgpu prefix + case AMDGPU::BI__builtin_amdgcn_read_exec: { + CallInst *CI = cast<CallInst>( + EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); + CI->setConvergent(); + return CI; + } + // Legacy amdgpu prefix case AMDGPU::BI__builtin_amdgpu_rsq: case AMDGPU::BI__builtin_amdgpu_rsqf: { if (getTarget().getTriple().getArch() == Triple::amdgcn) diff --git a/test/CodeGenOpenCL/builtins-amdgcn.cl b/test/CodeGenOpenCL/builtins-amdgcn.cl index 36164f898bc..641bcb2d20b 100644 --- a/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -253,6 +253,14 @@ void test_cubema(global float* out, float a, float b, float c) { *out = __builtin_amdgcn_cubema(a, b, c); } +// CHECK-LABEL: @test_read_exec( +// CHECK: call i64 @llvm.read_register.i64(metadata ![[EXEC:[0-9]+]]) #[[READ_EXEC_ATTRS:[0-9]+]] +void test_read_exec(global ulong* out) { + *out = __builtin_amdgcn_read_exec(); +} + +// CHECK: declare i64 @llvm.read_register.i64(metadata) #[[NOUNWIND_READONLY:[0-9]+]] + // Legacy intrinsics with AMDGPU prefix // CHECK-LABEL: @test_legacy_rsq_f32 @@ -282,3 +290,7 @@ void test_legacy_ldexp_f64(global double* out, double a, int b) { *out = __builtin_amdgpu_ldexp(a, b); } + +// CHECK-DAG: attributes #[[NOUNWIND_READONLY:[0-9]+]] = { nounwind readonly } +// CHECK-DAG: attributes #[[READ_EXEC_ATTRS]] = { convergent } +// CHECK: ![[EXEC]] = !{!"exec"} -- GitLab