diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def index 41fa0bf342dc44a79ab6a2d6b029f247462b5840..d219414f82a912fc7ebc3e1fec84f7de3b2f34bf 100644 --- a/include/clang/Basic/BuiltinsARM.def +++ b/include/clang/Basic/BuiltinsARM.def @@ -34,14 +34,14 @@ BUILTIN(__builtin_neon_vaddl_v, "V16cV8cV8ci", "n") BUILTIN(__builtin_neon_vaddw_v, "V16cV16cV8ci", "n") BUILTIN(__builtin_neon_vbsl_v, "V8cV8cV8cV8ci", "n") BUILTIN(__builtin_neon_vbslq_v, "V16cV16cV16cV16ci", "n") -BUILTIN(__builtin_neon_vcage_v, "V8cV8cV8ci", "n") -BUILTIN(__builtin_neon_vcageq_v, "V16cV16cV16ci", "n") -BUILTIN(__builtin_neon_vcagt_v, "V8cV8cV8ci", "n") -BUILTIN(__builtin_neon_vcagtq_v, "V16cV16cV16ci", "n") -BUILTIN(__builtin_neon_vcale_v, "V8cV8cV8ci", "n") -BUILTIN(__builtin_neon_vcaleq_v, "V16cV16cV16ci", "n") -BUILTIN(__builtin_neon_vcalt_v, "V8cV8cV8ci", "n") -BUILTIN(__builtin_neon_vcaltq_v, "V16cV16cV16ci", "n") +BUILTIN(__builtin_neon_vcage_v, "V2iV8cV8ci", "n") +BUILTIN(__builtin_neon_vcageq_v, "V4iV16cV16ci", "n") +BUILTIN(__builtin_neon_vcagt_v, "V2iV8cV8ci", "n") +BUILTIN(__builtin_neon_vcagtq_v, "V4iV16cV16ci", "n") +BUILTIN(__builtin_neon_vcale_v, "V2iV8cV8ci", "n") +BUILTIN(__builtin_neon_vcaleq_v, "V4iV16cV16ci", "n") +BUILTIN(__builtin_neon_vcalt_v, "V2iV8cV8ci", "n") +BUILTIN(__builtin_neon_vcaltq_v, "V4iV16cV16ci", "n") BUILTIN(__builtin_neon_vcls_v, "V8cV8ci", "n") BUILTIN(__builtin_neon_vclsq_v, "V16cV16ci", "n") BUILTIN(__builtin_neon_vclz_v, "V8cV8ci", "n") @@ -54,14 +54,14 @@ BUILTIN(__builtin_neon_vcvtq_f32_v, "V4fV16ci", "n") BUILTIN(__builtin_neon_vcvt_f32_f16, "V16cV8ci", "n") BUILTIN(__builtin_neon_vcvt_n_f32_v, "V2fV8cii", "n") BUILTIN(__builtin_neon_vcvtq_n_f32_v, "V4fV16cii", "n") -BUILTIN(__builtin_neon_vcvt_n_s32_v, "V8cV8cii", "n") -BUILTIN(__builtin_neon_vcvtq_n_s32_v, "V16cV16cii", "n") -BUILTIN(__builtin_neon_vcvt_n_u32_v, "V8cV8cii", "n") -BUILTIN(__builtin_neon_vcvtq_n_u32_v, "V16cV16cii", "n") -BUILTIN(__builtin_neon_vcvt_s32_v, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vcvtq_s32_v, "V16cV16ci", "n") -BUILTIN(__builtin_neon_vcvt_u32_v, "V8cV8ci", "n") -BUILTIN(__builtin_neon_vcvtq_u32_v, "V16cV16ci", "n") +BUILTIN(__builtin_neon_vcvt_n_s32_v, "V2iV8cii", "n") +BUILTIN(__builtin_neon_vcvtq_n_s32_v, "V4iV16cii", "n") +BUILTIN(__builtin_neon_vcvt_n_u32_v, "V2iV8cii", "n") +BUILTIN(__builtin_neon_vcvtq_n_u32_v, "V4iV16cii", "n") +BUILTIN(__builtin_neon_vcvt_s32_v, "V2iV8ci", "n") +BUILTIN(__builtin_neon_vcvtq_s32_v, "V4iV16ci", "n") +BUILTIN(__builtin_neon_vcvt_u32_v, "V2iV8ci", "n") +BUILTIN(__builtin_neon_vcvtq_u32_v, "V4iV16ci", "n") BUILTIN(__builtin_neon_vext_v, "V8cV8cV8cii", "n") BUILTIN(__builtin_neon_vextq_v, "V16cV16cV16cii", "n") BUILTIN(__builtin_neon_vget_high_v, "V8cV16ci", "n") @@ -203,15 +203,15 @@ BUILTIN(__builtin_neon_vqrdmulhq_n_s32, "V16cV16ci", "n") BUILTIN(__builtin_neon_vqrshl_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqrshlq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vqrshrn_n_v, "V8cV16cii", "n") -BUILTIN(__builtin_neon_vqrshrun_n_v, "V8cV16cii", "n") +BUILTIN(__builtin_neon_vqrshrun_n_v, "V2iV16cii", "n") BUILTIN(__builtin_neon_vqshl_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqshlq_v, "V16cV16cV16ci", "n") -BUILTIN(__builtin_neon_vqshlu_n_v, "V8cV8cii", "n") -BUILTIN(__builtin_neon_vqshluq_n_v, "V16cV16cii", "n") +BUILTIN(__builtin_neon_vqshlu_n_v, "V2iV8cii", "n") +BUILTIN(__builtin_neon_vqshluq_n_v, "V4iV16cii", "n") BUILTIN(__builtin_neon_vqshl_n_v, "V8cV8cii", "n") BUILTIN(__builtin_neon_vqshlq_n_v, "V16cV16cii", "n") BUILTIN(__builtin_neon_vqshrn_n_v, "V8cV16cii", "n") -BUILTIN(__builtin_neon_vqshrun_n_v, "V8cV16cii", "n") +BUILTIN(__builtin_neon_vqshrun_n_v, "V2iV16cii", "n") BUILTIN(__builtin_neon_vqsub_v, "V8cV8cV8ci", "n") BUILTIN(__builtin_neon_vqsubq_v, "V16cV16cV16ci", "n") BUILTIN(__builtin_neon_vraddhn_v, "V8cV16cV16ci", "n") @@ -292,8 +292,8 @@ BUILTIN(__builtin_neon_vtbx3_v, "V8cV8cV8cV8cV8cV8ci", "n") BUILTIN(__builtin_neon_vtbx4_v, "V8cV8cV8cV8cV8cV8cV8ci", "n") BUILTIN(__builtin_neon_vtrn_v, "V16cV8cV8ci", "n") BUILTIN(__builtin_neon_vtrnq_v, "V32cV16cV16ci", "n") -BUILTIN(__builtin_neon_vtst_v, "V8cV8cV8ci", "n") -BUILTIN(__builtin_neon_vtstq_v, "V16cV16cV16ci", "n") +BUILTIN(__builtin_neon_vtst_v, "V2iV8cV8ci", "n") +BUILTIN(__builtin_neon_vtstq_v, "V4iV16cV16ci", "n") BUILTIN(__builtin_neon_vuzp_v, "V16cV8cV8ci", "n") BUILTIN(__builtin_neon_vuzpq_v, "V32cV16cV16ci", "n") BUILTIN(__builtin_neon_vzip_v, "V16cV8cV8ci", "n") diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index d9d9e075e1c3169e10755084dc8afb1b408e53e5..398e63c6e264823ce33737a3ed27adde34ee7486 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -894,15 +894,25 @@ const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) { return 0; } +Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, + const char *name) { + unsigned j = 0; + for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); + ai != ae; ++ai, ++j) + Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); + + return Builder.CreateCall(F, Ops.begin(), Ops.end(), name); +} + Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::SmallVector<Value*, 4> Ops; - bool usgn, poly, half; + bool usgn, quad, poly, half; const llvm::Type *Ty; unsigned Int; // Determine the type of this overloaded NEON intrinsic. - if (BuiltinID != ARM::BI__clear_cache) { + if (BuiltinID > ARM::BI__builtin_thread_pointer) { for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) Ops.push_back(EmitScalarExpr(E->getArg(i))); @@ -912,13 +922,14 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return 0; unsigned type = Result.getZExtValue(); - Ty = GetNeonType(VMContext, type & 0x7, type & 0x10); - if (!Ty) - return 0; - usgn = type & 0x08; + quad = type & 0x10; poly = type == 5 || type == 6; half = type == 7; + + Ty = GetNeonType(VMContext, type & 0x7, quad); + if (!Ty) + return 0; } switch (BuiltinID) { @@ -933,87 +944,80 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name), a, b); } - // FIXME: bitcast args, return. case ARM::BI__builtin_neon_vaba_v: - case ARM::BI__builtin_neon_vabaq_v: { + case ARM::BI__builtin_neon_vabaq_v: Int = usgn ? Intrinsic::arm_neon_vabau : Intrinsic::arm_neon_vabas; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 3, "vaba"); - } - case ARM::BI__builtin_neon_vabal_v: { + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaba"); + case ARM::BI__builtin_neon_vabal_v: Int = usgn ? Intrinsic::arm_neon_vabalu : Intrinsic::arm_neon_vabals; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 3, "vabal"); - } + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabal"); case ARM::BI__builtin_neon_vabd_v: - case ARM::BI__builtin_neon_vabdq_v: { + case ARM::BI__builtin_neon_vabdq_v: Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vabd"); - } - case ARM::BI__builtin_neon_vabdl_v: { + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd"); + case ARM::BI__builtin_neon_vabdl_v: Int = usgn ? Intrinsic::arm_neon_vabdlu : Intrinsic::arm_neon_vabdls; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vabdl"); - } + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabdl"); case ARM::BI__builtin_neon_vabs_v: case ARM::BI__builtin_neon_vabsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vabs"); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1); + return EmitNeonCall(F, Ops, "vabs"); } case ARM::BI__builtin_neon_vaddhn_v: { - Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddhn"); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1); + return EmitNeonCall(F, Ops, "vaddhn"); } - case ARM::BI__builtin_neon_vaddl_v: { + case ARM::BI__builtin_neon_vaddl_v: Int = usgn ? Intrinsic::arm_neon_vaddlu : Intrinsic::arm_neon_vaddls; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddl"); - } - case ARM::BI__builtin_neon_vaddw_v: { + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddl"); + case ARM::BI__builtin_neon_vaddw_v: Int = usgn ? Intrinsic::arm_neon_vaddws : Intrinsic::arm_neon_vaddwu; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddw"); - } + return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddw"); // FIXME: vbsl -> or ((0 & 1), (0 & 2)) in arm_neon.h case ARM::BI__builtin_neon_vcale_v: std::swap(Ops[0], Ops[1]); - case ARM::BI__builtin_neon_vcage_v: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacged), - &Ops[0], &Ops[0] + 2, "vcage"); + case ARM::BI__builtin_neon_vcage_v: { + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1); + return EmitNeonCall(F, Ops, "vcage"); + } case ARM::BI__builtin_neon_vcaleq_v: std::swap(Ops[0], Ops[1]); - case ARM::BI__builtin_neon_vcageq_v: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq), - &Ops[0], &Ops[0] + 2, "vcage"); + case ARM::BI__builtin_neon_vcageq_v: { + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1); + return EmitNeonCall(F, Ops, "vcage"); + } case ARM::BI__builtin_neon_vcalt_v: std::swap(Ops[0], Ops[1]); - case ARM::BI__builtin_neon_vcagt_v: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd), - &Ops[0], &Ops[0] + 2, "vcagt"); + case ARM::BI__builtin_neon_vcagt_v: { + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1); + return EmitNeonCall(F, Ops, "vcagt"); + } case ARM::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); - case ARM::BI__builtin_neon_vcagtq_v: - return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq), - &Ops[0], &Ops[0] + 2, "vcagt"); + case ARM::BI__builtin_neon_vcagtq_v: { + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1); + return EmitNeonCall(F, Ops, "vcagt"); + } case ARM::BI__builtin_neon_vcls_v: case ARM::BI__builtin_neon_vclsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcls"); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1); + return EmitNeonCall(F, Ops, "vcls"); } case ARM::BI__builtin_neon_vclz_v: case ARM::BI__builtin_neon_vclzq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vclz"); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1); + return EmitNeonCall(F, Ops, "vclz"); } case ARM::BI__builtin_neon_vcnt_v: case ARM::BI__builtin_neon_vcntq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcnt"); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1); + return EmitNeonCall(F, Ops, "vcnt"); } // FIXME: intrinsics for f16<->f32 convert missing from ARM target. case ARM::BI__builtin_neon_vcvt_f32_v: case ARM::BI__builtin_neon_vcvtq_f32_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ty = GetNeonType(VMContext, 4, quad); return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); } @@ -1021,22 +1025,44 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, case ARM::BI__builtin_neon_vcvt_u32_v: case ARM::BI__builtin_neon_vcvtq_s32_v: case ARM::BI__builtin_neon_vcvtq_u32_v: { + Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad)); return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } + // FIXME: these intrinsics often do not work due to the fragility of bitcasts + // coming and going during codegen. case ARM::BI__builtin_neon_vcvt_n_f32_v: case ARM::BI__builtin_neon_vcvtq_n_f32_v: { + const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty }; Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n"); + Function *F = CGM.getIntrinsic(Int, Tys, 2); + return EmitNeonCall(F, Ops, "vcvt_n"); } case ARM::BI__builtin_neon_vcvt_n_s32_v: case ARM::BI__builtin_neon_vcvt_n_u32_v: case ARM::BI__builtin_neon_vcvtq_n_s32_v: case ARM::BI__builtin_neon_vcvtq_n_u32_v: { + const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) }; Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs; - Value *F = CGM.getIntrinsic(Int, &Ty, 1); - return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n"); + Function *F = CGM.getIntrinsic(Int, Tys, 2); + return EmitNeonCall(F, Ops, "vcvt_n"); + } + case ARM::BI__builtin_neon_vext_v: + case ARM::BI__builtin_neon_vextq_v: { + ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]); + int CV = C->getSExtValue(); + + SmallVector<Constant*, 8> Indices; + + const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext); + for (unsigned i = 0, e = cast<llvm::VectorType>(Ty)->getNumElements(); + i != e; ++i) + Indices.push_back(ConstantInt::get(I32Ty, i+CV)); + + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size()); + return Builder.CreateShuffleVector(Ops[0], Ops[1], SV); } } } diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index ece275e7629e894c68faa058ace763a01a6cf31e..a0e5da1a0f6ff4b7c8f3940b8d7b16df176bfff0 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -1146,6 +1146,10 @@ public: llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitNeonCall(llvm::Function *F, + llvm::SmallVectorImpl<llvm::Value*> &O, + const char *name); + llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);