diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 0247bb5dd0d75cc22cb386f79cf0cc0f2c7d846d..8208a2c962dc9e8a95188b6ce0a7af98fa5441dc 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -944,13 +944,6 @@ def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; -def FRINTN : SInst<"vrndn", "dd", "fdQfQd">; -def FRINTA : SInst<"vrnda", "dd", "fdQfQd">; -def FRINTP : SInst<"vrndp", "dd", "fdQfQd">; -def FRINTM : SInst<"vrndm", "dd", "fdQfQd">; -def FRINTX : SInst<"vrndx", "dd", "fdQfQd">; -def FRINTZ : SInst<"vrnd", "dd", "fdQfQd">; -def FRINTI : SInst<"vrndi", "dd", "fdQfQd">; def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; def FRECPE : SInst<"vrecpe", "dd", "dQd">; @@ -982,11 +975,6 @@ def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; def MAX : SInst<"vmax", "ddd", "dQd">; def MIN : SInst<"vmin", "ddd", "dQd">; -//////////////////////////////////////////////////////////////////////////////// -// MaxNum/MinNum Floating Point -def FMAXNM : SInst<"vmaxnm", "ddd", "fdQfQd">; -def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">; - //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; @@ -1222,6 +1210,41 @@ def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; } +//////////////////////////////////////////////////////////////////////////////// +// Round to Integral + +let ArchGuard = "__ARM_ARCH >= 8" in { +def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">; +def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">; +def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">; +def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">; +def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">; +def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">; +} + +let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { +def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">; +def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">; +def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">; +def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">; +def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">; +def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">; +def FRINTI_S64 : SInst<"vrndi", "dd", "fdQfQd">; +} + +//////////////////////////////////////////////////////////////////////////////// +// MaxNum/MinNum Floating Point + +let ArchGuard = "__ARM_ARCH >= 8" in { +def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">; +def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">; +} + +let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { +def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">; +def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">; +} + //////////////////////////////////////////////////////////////////////////////// // Permutation def VTRN1 : SOpInst<"vtrn1", "ddd", diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 6b14796160fe091d58bfa2e9035806c618f9ff1b..401764a8ddf43a0cf71c93a1fb882f506a7a9981 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1995,8 +1995,12 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4q_v, arm_neon_vld4, 0), NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), + NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), + NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), + NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), + NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), @@ -2043,6 +2047,18 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), + NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), + NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), + NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), + NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), + NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), + NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), + NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), + NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), + NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), + NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), + NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), + NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), diff --git a/test/CodeGen/arm-neon-directed-rounding.c b/test/CodeGen/arm-neon-directed-rounding.c new file mode 100644 index 0000000000000000000000000000000000000000..84029318865ceb6697c8bcc2aa6ff8edb91fb996 --- /dev/null +++ b/test/CodeGen/arm-neon-directed-rounding.c @@ -0,0 +1,75 @@ +// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -O1 -emit-llvm %s -o - | FileCheck %s + +#include <arm_neon.h> + +float32x2_t test_vrnda_f32(float32x2_t a) { + // CHECK-LABEL: test_vrnda_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a) + return vrnda_f32(a); +} + +float32x4_t test_vrndaq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndaq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a) + return vrndaq_f32(a); +} + +float32x2_t test_vrndm_f32(float32x2_t a) { + // CHECK-LABEL: test_vrndm_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a) + return vrndm_f32(a); +} + +float32x4_t test_vrndmq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndmq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a) + return vrndmq_f32(a); +} + +float32x2_t test_vrndn_f32(float32x2_t a) { + // CHECK-LABEL: test_vrndn_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a) + return vrndn_f32(a); +} + +float32x4_t test_vrndnq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndnq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a) + return vrndnq_f32(a); +} + +float32x2_t test_vrndp_f32(float32x2_t a) { + // CHECK-LABEL: test_vrndp_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a) + return vrndp_f32(a); +} + +float32x4_t test_vrndpq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndpq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a) + return vrndpq_f32(a); +} + +float32x2_t test_vrndx_f32(float32x2_t a) { + // CHECK-LABEL: test_vrndx_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a) + return vrndx_f32(a); +} + +float32x4_t test_vrndxq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndxq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a) + return vrndxq_f32(a); +} + +float32x2_t test_vrnd_f32(float32x2_t a) { + // CHECK-LABEL: test_vrnd_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a) + return vrnd_f32(a); +} + +float32x4_t test_vrndq_f32(float32x4_t a) { + // CHECK-LABEL: test_vrndq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a) + return vrndq_f32(a); +} diff --git a/test/CodeGen/arm-neon-numeric-maxmin.c b/test/CodeGen/arm-neon-numeric-maxmin.c new file mode 100644 index 0000000000000000000000000000000000000000..615a854b5e213a6795ff532a136699f793a82303 --- /dev/null +++ b/test/CodeGen/arm-neon-numeric-maxmin.c @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -O1 -emit-llvm %s -o - | FileCheck %s + +#include <arm_neon.h> + +float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { + // CHECK-LABEL: test_vmaxnm_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) + return vmaxnm_f32(a, b); +} + +float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { + // CHECK-LABEL: test_vmaxnmq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) + return vmaxnmq_f32(a, b); +} + +float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { + // CHECK-LABEL: test_vminnm_f32 + // CHECK: call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) + return vminnm_f32(a, b); +} + +float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { + // CHECK-LABEL: test_vminnmq_f32 + // CHECK: call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) + return vminnmq_f32(a, b); +}