diff --git a/test/CodeGen/aarch64-neon-2velem.c b/test/CodeGen/aarch64-neon-2velem.c index 03f7df715c536da126f4df3765deaf6b33342e3b..885aad794c035d43f70b13f7655d62983b6cd8d0 100644 --- a/test/CodeGen/aarch64-neon-2velem.c +++ b/test/CodeGen/aarch64-neon-2velem.c @@ -272,6 +272,30 @@ float64x2_t test_vfmsq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] } +float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { + // CHECK-LABEL: test_vfmas_laneq_f32 + return vfmas_laneq_f32(a, b, v, 3); + // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +} + +float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { + // CHECK-LABEL: test_vfmsd_lane_f64 + return vfmsd_lane_f64(a, b, v, 0); + // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +} + +float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { + // CHECK-LABEL: test_vfmss_laneq_f32 + return vfmss_laneq_f32(a, b, v, 3); + // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +} + +float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { + // CHECK-LABEL: test_vfmsd_laneq_f64 + return vfmsd_laneq_f64(a, b, v, 1); + // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +} + int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t v) { // CHECK: test_vmlal_lane_s16 return vmlal_lane_s16(a, b, v, 3); @@ -1696,3 +1720,736 @@ float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmsq_n_f32(a, b, n); // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] } + +int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) { + // CHECK-LABEL: test_vmul_n_s16 + return vmul_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) { + // CHECK-LABEL: test_vmulq_n_s16 + return vmulq_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) { + // CHECK-LABEL: test_vmul_n_s32 + return vmul_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) { + // CHECK-LABEL: test_vmulq_n_s32 + return vmulq_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) { + // CHECK-LABEL: test_vmul_n_u16 + return vmul_n_u16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) { + // CHECK-LABEL: test_vmulq_n_u16 + return vmulq_n_u16(a, b); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) { + // CHECK-LABEL: test_vmul_n_u32 + return vmul_n_u32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) { + // CHECK-LABEL: test_vmulq_n_u32 + return vmulq_n_u32(a, b); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) { + // CHECK-LABEL: test_vmull_n_s16 + return vmull_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) { + // CHECK-LABEL: test_vmull_n_s32 + return vmull_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) { + // CHECK-LABEL: test_vmull_n_u16 + return vmull_n_u16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) { + // CHECK-LABEL: test_vmull_n_u32 + return vmull_n_u32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) { + // CHECK-LABEL: test_vqdmull_n_s16 + return vqdmull_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: sqdmull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) { + // CHECK-LABEL: test_vqdmull_n_s32 + return vqdmull_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: sqdmull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) { + // CHECK-LABEL: test_vqdmulh_n_s16 + return vqdmulh_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) { + // CHECK-LABEL: test_vqdmulhq_n_s16 + return vqdmulhq_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) { + // CHECK-LABEL: test_vqdmulh_n_s32 + return vqdmulh_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) { + // CHECK-LABEL: test_vqdmulhq_n_s32 + return vqdmulhq_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) { + // CHECK-LABEL: test_vqrdmulh_n_s16 + return vqrdmulh_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) { + // CHECK-LABEL: test_vqrdmulhq_n_s16 + return vqrdmulhq_n_s16(a, b); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) { + // CHECK-LABEL: test_vqrdmulh_n_s32 + return vqrdmulh_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) { + // CHECK-LABEL: test_vqrdmulhq_n_s32 + return vqrdmulhq_n_s32(a, b); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vmla_n_s16 + return vmla_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { + // CHECK-LABEL: test_vmlaq_n_s16 + return vmlaq_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vmla_n_s32 + return vmla_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { + // CHECK-LABEL: test_vmlaq_n_s32 + return vmlaq_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { + // CHECK-LABEL: test_vmla_n_u16 + return vmla_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { + // CHECK-LABEL: test_vmlaq_n_u16 + return vmlaq_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { + // CHECK-LABEL: test_vmla_n_u32 + return vmla_n_u32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { + // CHECK-LABEL: test_vmlaq_n_u32 + return vmlaq_n_u32(a, b, c); + // CHECK: dup {{v[0-9]+}}.4s, w0 + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vmlal_n_s16 + return vmlal_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vmlal_n_s32 + return vmlal_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { + // CHECK-LABEL: test_vmlal_n_u16 + return vmlal_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { + // CHECK-LABEL: test_vmlal_n_u32 + return vmlal_n_u32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vqdmlal_n_s16 + return vqdmlal_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vqdmlal_n_s32 + return vqdmlal_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vmls_n_s16 + return vmls_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { + // CHECK-LABEL: test_vmlsq_n_s16 + return vmlsq_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vmls_n_s32 + return vmls_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { + // CHECK-LABEL: test_vmlsq_n_s32 + return vmlsq_n_s32(a, b, c); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) { + // CHECK-LABEL: test_vmls_n_u16 + return vmls_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) { + // CHECK-LABEL: test_vmlsq_n_u16 + return vmlsq_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.8h, w0 + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) { + // CHECK-LABEL: test_vmls_n_u32 + return vmls_n_u32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) { + // CHECK-LABEL: test_vmlsq_n_u32 + return vmlsq_n_u32(a, b, c); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vmlsl_n_s16 + return vmlsl_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vmlsl_n_s32 + return vmlsl_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) { + // CHECK-LABEL: test_vmlsl_n_u16 + return vmlsl_n_u16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) { + // CHECK-LABEL: test_vmlsl_n_u32 + return vmlsl_n_u32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) { + // CHECK-LABEL: test_vqdmlsl_n_s16 + return vqdmlsl_n_s16(a, b, c); + // CHECK: dup {{v[0-9]+}}.4h, w0 + // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) { + // CHECK-LABEL: test_vqdmlsl_n_s32 + return vqdmlsl_n_s32(a, b, c); + // CHECK: dup {{v[0-9]+}}.2s, w0 + // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint16x4_t test_vmla_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmla_lane_u16_0 + return vmla_lane_u16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmlaq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmlaq_lane_u16_0 + return vmlaq_lane_u16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmla_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmla_lane_u32_0 + return vmla_lane_u32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlaq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmlaq_lane_u32_0 + return vmlaq_lane_u32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmla_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmla_laneq_u16_0 + return vmla_laneq_u16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmlaq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmlaq_laneq_u16_0 + return vmlaq_laneq_u16(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmla_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmla_laneq_u32_0 + return vmla_laneq_u32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlaq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmlaq_laneq_u32_0 + return vmlaq_laneq_u32(a, b, v, 0); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlal_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlal_laneq_s16_0 + return vqdmlal_laneq_s16(a, b, v, 0); + // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlal_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlal_laneq_s32_0 + return vqdmlal_laneq_s32(a, b, v, 0); + // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlal_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlal_high_laneq_s16_0 + return vqdmlal_high_laneq_s16(a, b, v, 0); + // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlal_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlal_high_laneq_s32_0 + return vqdmlal_high_laneq_s32(a, b, v, 0); + // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmls_lane_u16_0(uint16x4_t a, uint16x4_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmls_lane_u16_0 + return vmls_lane_u16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmlsq_lane_u16_0(uint16x8_t a, uint16x8_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmlsq_lane_u16_0 + return vmlsq_lane_u16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmls_lane_u32_0(uint32x2_t a, uint32x2_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmls_lane_u32_0 + return vmls_lane_u32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlsq_lane_u32_0(uint32x4_t a, uint32x4_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmlsq_lane_u32_0 + return vmlsq_lane_u32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmls_laneq_u16_0(uint16x4_t a, uint16x4_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmls_laneq_u16_0 + return vmls_laneq_u16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +uint16x8_t test_vmlsq_laneq_u16_0(uint16x8_t a, uint16x8_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmlsq_laneq_u16_0 + return vmlsq_laneq_u16(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +uint32x2_t test_vmls_laneq_u32_0(uint32x2_t a, uint32x2_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmls_laneq_u32_0 + return vmls_laneq_u32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +uint32x4_t test_vmlsq_laneq_u32_0(uint32x4_t a, uint32x4_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmlsq_laneq_u32_0 + return vmlsq_laneq_u32(a, b, v, 0); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlsl_laneq_s16_0(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlsl_laneq_s16_0 + return vqdmlsl_laneq_s16(a, b, v, 0); + // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlsl_laneq_s32_0(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlsl_laneq_s32_0 + return vqdmlsl_laneq_s32(a, b, v, 0); + // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmlsl_high_laneq_s16_0(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlsl_high_laneq_s16_0 + return vqdmlsl_high_laneq_s16(a, b, v, 0); + // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int64x2_t test_vqdmlsl_high_laneq_s32_0(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlsl_high_laneq_s32_0 + return vqdmlsl_high_laneq_s32(a, b, v, 0); + // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vqdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { + // CHECK-LABEL: test_vqdmulh_laneq_s16_0 + return vqdmulh_laneq_s16(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vqdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { + // CHECK-LABEL: test_vqdmulhq_laneq_s16_0 + return vqdmulhq_laneq_s16(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vqdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { + // CHECK-LABEL: test_vqdmulh_laneq_s32_0 + return vqdmulh_laneq_s32(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { + // CHECK-LABEL: test_vqdmulhq_laneq_s32_0 + return vqdmulhq_laneq_s32(a, v, 0); + // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +int16x4_t test_vqrdmulh_laneq_s16_0(int16x4_t a, int16x8_t v) { + // CHECK-LABEL: test_vqrdmulh_laneq_s16_0 + return vqrdmulh_laneq_s16(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[0] +} + +int16x8_t test_vqrdmulhq_laneq_s16_0(int16x8_t a, int16x8_t v) { + // CHECK-LABEL: test_vqrdmulhq_laneq_s16_0 + return vqrdmulhq_laneq_s16(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[0] +} + +int32x2_t test_vqrdmulh_laneq_s32_0(int32x2_t a, int32x4_t v) { + // CHECK-LABEL: test_vqrdmulh_laneq_s32_0 + return vqrdmulh_laneq_s32(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +int32x4_t test_vqrdmulhq_laneq_s32_0(int32x4_t a, int32x4_t v) { + // CHECK-LABEL: test_vqrdmulhq_laneq_s32_0 + return vqrdmulhq_laneq_s32(a, v, 0); + // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmla_lane_u16 + return vmla_lane_u16(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +} + +uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmlaq_lane_u16 + return vmlaq_lane_u16(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +} + +uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmla_lane_u32 + return vmla_lane_u32(a, b, v, 1); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmlaq_lane_u32 + return vmlaq_lane_u32(a, b, v, 1); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +uint16x4_t test_vmla_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmla_laneq_u16 + return vmla_laneq_u16(a, b, v, 7); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +uint16x8_t test_vmlaq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmlaq_laneq_u16 + return vmlaq_laneq_u16(a, b, v, 7); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +uint32x2_t test_vmla_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmla_laneq_u32 + return vmla_laneq_u32(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +uint32x4_t test_vmlaq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmlaq_laneq_u32 + return vmlaq_laneq_u32(a, b, v, 3); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqdmlal_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlal_laneq_s16 + return vqdmlal_laneq_s16(a, b, v, 7); + // CHECK: sqdmlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +int64x2_t test_vqdmlal_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlal_laneq_s32 + return vqdmlal_laneq_s32(a, b, v, 3); + // CHECK: sqdmlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqdmlal_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlal_high_laneq_s16 + return vqdmlal_high_laneq_s16(a, b, v, 7); + // CHECK: sqdmlal2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +int64x2_t test_vqdmlal_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlal_high_laneq_s32 + return vqdmlal_high_laneq_s32(a, b, v, 3); + // CHECK: sqdmlal2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmls_lane_u16 + return vmls_lane_u16(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +} + +uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t v) { + // CHECK-LABEL: test_vmlsq_lane_u16 + return vmlsq_lane_u16(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +} + +uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmls_lane_u32 + return vmls_lane_u32(a, b, v, 1); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t v) { + // CHECK-LABEL: test_vmlsq_lane_u32 + return vmlsq_lane_u32(a, b, v, 1); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +uint16x4_t test_vmls_laneq_u16(uint16x4_t a, uint16x4_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmls_laneq_u16 + return vmls_laneq_u16(a, b, v, 7); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +uint16x8_t test_vmlsq_laneq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t v) { + // CHECK-LABEL: test_vmlsq_laneq_u16 + return vmlsq_laneq_u16(a, b, v, 7); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +uint32x2_t test_vmls_laneq_u32(uint32x2_t a, uint32x2_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmls_laneq_u32 + return vmls_laneq_u32(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +uint32x4_t test_vmlsq_laneq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t v) { + // CHECK-LABEL: test_vmlsq_laneq_u32 + return vmlsq_laneq_u32(a, b, v, 3); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqdmlsl_laneq_s16(int32x4_t a, int16x4_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlsl_laneq_s16 + return vqdmlsl_laneq_s16(a, b, v, 7); + // CHECK: sqdmlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +int64x2_t test_vqdmlsl_laneq_s32(int64x2_t a, int32x2_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlsl_laneq_s32 + return vqdmlsl_laneq_s32(a, b, v, 3); + // CHECK: sqdmlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqdmlsl_high_laneq_s16(int32x4_t a, int16x8_t b, int16x8_t v) { + // CHECK-LABEL: test_vqdmlsl_high_laneq_s16 + return vqdmlsl_high_laneq_s16(a, b, v, 7); + // CHECK: sqdmlsl2 {{v[0-9]+}}.4s, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +int64x2_t test_vqdmlsl_high_laneq_s32(int64x2_t a, int32x4_t b, int32x4_t v) { + // CHECK-LABEL: test_vqdmlsl_high_laneq_s32 + return vqdmlsl_high_laneq_s32(a, b, v, 3); + // CHECK: sqdmlsl2 {{v[0-9]+}}.2d, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +int16x4_t test_vqdmulh_laneq_s16(int16x4_t a, int16x8_t v) { + // CHECK-LABEL: test_vqdmulh_laneq_s16 + return vqdmulh_laneq_s16(a, v, 7); + // CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +int16x8_t test_vqdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { + // CHECK-LABEL: test_vqdmulhq_laneq_s16 + return vqdmulhq_laneq_s16(a, v, 7); + // CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +int32x2_t test_vqdmulh_laneq_s32(int32x2_t a, int32x4_t v) { + // CHECK-LABEL: test_vqdmulh_laneq_s32 + return vqdmulh_laneq_s32(a, v, 3); + // CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { + // CHECK-LABEL: test_vqdmulhq_laneq_s32 + return vqdmulhq_laneq_s32(a, v, 3); + // CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +int16x4_t test_vqrdmulh_laneq_s16(int16x4_t a, int16x8_t v) { + // CHECK-LABEL: test_vqrdmulh_laneq_s16 + return vqrdmulh_laneq_s16(a, v, 7); + // CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] +} + +int16x8_t test_vqrdmulhq_laneq_s16(int16x8_t a, int16x8_t v) { + // CHECK-LABEL: test_vqrdmulhq_laneq_s16 + return vqrdmulhq_laneq_s16(a, v, 7); + // CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] +} + +int32x2_t test_vqrdmulh_laneq_s32(int32x2_t a, int32x4_t v) { + // CHECK-LABEL: test_vqrdmulh_laneq_s32 + return vqrdmulh_laneq_s32(a, v, 3); + // CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +int32x4_t test_vqrdmulhq_laneq_s32(int32x4_t a, int32x4_t v) { + // CHECK-LABEL: test_vqrdmulhq_laneq_s32 + return vqrdmulhq_laneq_s32(a, v, 3); + // CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + diff --git a/test/CodeGen/aarch64-neon-3v.c b/test/CodeGen/aarch64-neon-3v.c new file mode 100644 index 0000000000000000000000000000000000000000..fc18d0671ca863561f06fcc211f74ad956d88175 --- /dev/null +++ b/test/CodeGen/aarch64-neon-3v.c @@ -0,0 +1,487 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include <arm_neon.h> + +int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_vand_s8 + return vand_s8(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_vandq_s8 + return vandq_s8(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_vand_s16 + return vand_s16(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_vandq_s16 + return vandq_s16(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_vand_s32 + return vand_s32(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_vandq_s32 + return vandq_s32(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_vand_s64 + return vand_s64(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_vandq_s64 + return vandq_s64(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_vand_u8 + return vand_u8(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK-LABEL: test_vandq_u8 + return vandq_u8(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) { + // CHECK-LABEL: test_vand_u16 + return vand_u16(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK-LABEL: test_vandq_u16 + return vandq_u16(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) { + // CHECK-LABEL: test_vand_u32 + return vand_u32(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK-LABEL: test_vandq_u32 + return vandq_u32(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_vand_u64 + return vand_u64(a, b); + // CHECK: and {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK-LABEL: test_vandq_u64 + return vandq_u64(a, b); + // CHECK: and {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_vorr_s8 + return vorr_s8(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_vorrq_s8 + return vorrq_s8(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_vorr_s16 + return vorr_s16(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_vorrq_s16 + return vorrq_s16(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_vorr_s32 + return vorr_s32(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_vorrq_s32 + return vorrq_s32(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_vorr_s64 + return vorr_s64(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_vorrq_s64 + return vorrq_s64(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_vorr_u8 + return vorr_u8(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK-LABEL: test_vorrq_u8 + return vorrq_u8(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) { + // CHECK-LABEL: test_vorr_u16 + return vorr_u16(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK-LABEL: test_vorrq_u16 + return vorrq_u16(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) { + // CHECK-LABEL: test_vorr_u32 + return vorr_u32(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK-LABEL: test_vorrq_u32 + return vorrq_u32(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_vorr_u64 + return vorr_u64(a, b); + // CHECK: orr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK-LABEL: test_vorrq_u64 + return vorrq_u64(a, b); + // CHECK: orr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_veor_s8 + return veor_s8(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_veorq_s8 + return veorq_s8(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_veor_s16 + return veor_s16(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_veorq_s16 + return veorq_s16(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_veor_s32 + return veor_s32(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_veorq_s32 + return veorq_s32(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_veor_s64 + return veor_s64(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_veorq_s64 + return veorq_s64(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_veor_u8 + return veor_u8(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK-LABEL: test_veorq_u8 + return veorq_u8(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) { + // CHECK-LABEL: test_veor_u16 + return veor_u16(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK-LABEL: test_veorq_u16 + return veorq_u16(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) { + // CHECK-LABEL: test_veor_u32 + return veor_u32(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK-LABEL: test_veorq_u32 + return veorq_u32(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_veor_u64 + return veor_u64(a, b); + // CHECK: eor {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK-LABEL: test_veorq_u64 + return veorq_u64(a, b); + // CHECK: eor {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_vbic_s8 + return vbic_s8(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_vbicq_s8 + return vbicq_s8(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_vbic_s16 + return vbic_s16(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_vbicq_s16 + return vbicq_s16(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_vbic_s32 + return vbic_s32(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_vbicq_s32 + return vbicq_s32(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_vbic_s64 + return vbic_s64(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_vbicq_s64 + return vbicq_s64(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_vbic_u8 + return vbic_u8(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK-LABEL: test_vbicq_u8 + return vbicq_u8(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) { + // CHECK-LABEL: test_vbic_u16 + return vbic_u16(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK-LABEL: test_vbicq_u16 + return vbicq_u16(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) { + // CHECK-LABEL: test_vbic_u32 + return vbic_u32(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK-LABEL: test_vbicq_u32 + return vbicq_u32(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_vbic_u64 + return vbic_u64(a, b); + // CHECK: bic {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK-LABEL: test_vbicq_u64 + return vbicq_u64(a, b); + // CHECK: bic {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_vorn_s8 + return vorn_s8(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_vornq_s8 + return vornq_s8(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_vorn_s16 + return vorn_s16(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_vornq_s16 + return vornq_s16(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_vorn_s32 + return vorn_s32(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_vornq_s32 + return vornq_s32(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_vorn_s64 + return vorn_s64(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_vornq_s64 + return vornq_s64(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) { + // CHECK-LABEL: test_vorn_u8 + return vorn_u8(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) { + // CHECK-LABEL: test_vornq_u8 + return vornq_u8(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) { + // CHECK-LABEL: test_vorn_u16 + return vorn_u16(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) { + // CHECK-LABEL: test_vornq_u16 + return vornq_u16(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) { + // CHECK-LABEL: test_vorn_u32 + return vorn_u32(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) { + // CHECK-LABEL: test_vornq_u32 + return vornq_u32(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_vorn_u64 + return vorn_u64(a, b); + // CHECK: orn {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) { + // CHECK-LABEL: test_vornq_u64 + return vornq_u64(a, b); + // CHECK: orn {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} diff --git a/test/CodeGen/aarch64-neon-fma.c b/test/CodeGen/aarch64-neon-fma.c new file mode 100644 index 0000000000000000000000000000000000000000..b3a54be147b9749c5d49a75fff541a6f8dd7f34c --- /dev/null +++ b/test/CodeGen/aarch64-neon-fma.c @@ -0,0 +1,173 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck -check-prefix=CHECK-FMA %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include <arm_neon.h> + +float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) { + // CHECK-LABEL: test_vmla_n_f32 + return vmla_n_f32(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { + // CHECK-LABEL: test_vmlaq_n_f32 + return vmlaq_n_f32(a, b, c); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { + // CHECK-LABEL: test_vmlsq_n_f32 + return vmlsq_n_f32(a, b, c); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { + // CHECK-LABEL: test_vmls_n_f32 + return vmls_n_f32(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK-LABEL: test_vmla_lane_f32_0 + return vmla_lane_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK-LABEL: test_vmlaq_lane_f32_0 + return vmlaq_lane_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK-LABEL: test_vmla_laneq_f32_0 + return vmla_laneq_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmlaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK-LABEL: test_vmlaq_laneq_f32_0 + return vmlaq_laneq_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK-LABEL: test_vmls_lane_f32_0 + return vmls_lane_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK-LABEL: test_vmlsq_lane_f32_0 + return vmlsq_lane_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK-LABEL: test_vmls_laneq_f32_0 + return vmls_laneq_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] +} + +float32x4_t test_vmlsq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK-LABEL: test_vmlsq_laneq_f32_0 + return vmlsq_laneq_f32(a, b, v, 0); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] +} + +float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK-LABEL: test_vmla_lane_f32 + return vmla_lane_f32(a, b, v, 1); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK-LABEL: test_vmlaq_lane_f32 + return vmlaq_lane_f32(a, b, v, 1); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} + +float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK-LABEL: test_vmla_laneq_f32 + return vmla_laneq_f32(a, b, v, 3); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +float32x4_t test_vmlaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK-LABEL: test_vmlaq_laneq_f32 + return vmlaq_laneq_f32(a, b, v, 3); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + +float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { + // CHECK-LABEL: test_vmls_lane_f32 + return vmls_lane_f32(a, b, v, 1); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +} + +float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { + // CHECK-LABEL: test_vmlsq_lane_f32 + return vmlsq_lane_f32(a, b, v, 1); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +} +float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { + // CHECK-LABEL: test_vmls_laneq_f32 + return vmls_laneq_f32(a, b, v, 3); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] +} + +float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { + // CHECK-LABEL: test_vmlsq_laneq_f32 + return vmlsq_laneq_f32(a, b, v, 3); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +} + diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c index c0bca67aaa4da552c34a4000c18ac24fa6019561..35dd06ecc76f93bff2098554e26bd5867ab3edd8 100644 --- a/test/CodeGen/aarch64-neon-intrinsics.c +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -1119,6 +1119,17 @@ uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h } +uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { + // CHECK-LABEL: test_vtst_s64 + return vtst_s64(a, b); + // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { + // CHECK-LABEL: test_vtst_u64 + return vtst_u64(a, b); + // CHECK: cmtst {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { // CHECK: test_vceq_s8 @@ -2539,6 +2550,18 @@ uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { // CHECK: uqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } +poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { +// CHECK-LABEL: test_vsli_n_p64 + return vsli_n_p64(a, b, 0); +// CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #0 +} + +poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { +// CHECK-LABEL: test_vsliq_n_p64 + return vsliq_n_p64(a, b, 0); +// CHECK: sli {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +} + int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { // CHECK: test_vmax_s8 return vmax_s8(a, b); @@ -5771,6 +5794,18 @@ float64_t test_vrecpxd_f64(float64_t a) { return vrecpxd_f64(a); } +uint32x2_t test_vrsqrte_u32(uint32x2_t a) { +// CHECK-LABEL: test_vrsqrte_u32 +// CHECK: ursqrte {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + return vrsqrte_u32(a); +} + +uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { +// CHECK-LABEL: test_vrsqrteq_u32 +// CHECK: ursqrte {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + return vrsqrteq_u32(a); +} + float32_t test_vrsqrtes_f32(float32_t a) { // CHECK: vrsqrtes_f32 // CHECK: frsqrte {{s[0-9]+}}, {{s[0-9]+}} @@ -8872,6 +8907,90 @@ int64_t test_vqshld_n_s64(int64_t a) { return (int64_t)vqshld_n_s64(a, 63); } +int8x8_t test_vqshl_n_s8(int8x8_t a) { + // CHECK-LABEL: test_vqshl_n_s8 + return vqshl_n_s8(a, 0); + // CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +} + +int8x16_t test_vqshlq_n_s8(int8x16_t a) { + // CHECK-LABEL: test_vqshlq_n_s8 + return vqshlq_n_s8(a, 0); + // CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +} + +int16x4_t test_vqshl_n_s16(int16x4_t a) { + // CHECK-LABEL: test_vqshl_n_s16 + return vqshl_n_s16(a, 0); + // CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +} + +int16x8_t test_vqshlq_n_s16(int16x8_t a) { + // CHECK-LABEL: test_vqshlq_n_s16 + return vqshlq_n_s16(a, 0); + // CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +} + +int32x2_t test_vqshl_n_s32(int32x2_t a) { + // CHECK-LABEL: test_vqshl_n_s32 + return vqshl_n_s32(a, 0); + // CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +} + +int32x4_t test_vqshlq_n_s32(int32x4_t a) { + // CHECK-LABEL: test_vqshlq_n_s32 + return vqshlq_n_s32(a, 0); + // CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +} + +int64x2_t test_vqshlq_n_s64(int64x2_t a) { + // CHECK-LABEL: test_vqshlq_n_s64 + return vqshlq_n_s64(a, 0); + // CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +} + +uint8x8_t test_vqshl_n_u8(uint8x8_t a) { + // CHECK-LABEL: test_vqshl_n_u8 + return vqshl_n_u8(a, 0); + // CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0 +} + +uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { + // CHECK-LABEL: test_vqshlq_n_u8 + return vqshlq_n_u8(a, 0); + // CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0 +} + +uint16x4_t test_vqshl_n_u16(uint16x4_t a) { + // CHECK-LABEL: test_vqshl_n_u16 + return vqshl_n_u16(a, 0); + // CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #0 +} + +uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { + // CHECK-LABEL: test_vqshlq_n_u16 + return vqshlq_n_u16(a, 0); + // CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #0 +} + +uint32x2_t test_vqshl_n_u32(uint32x2_t a) { + // CHECK-LABEL: test_vqshl_n_u32 + return vqshl_n_u32(a, 0); + // CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #0 +} + +uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { + // CHECK-LABEL: test_vqshlq_n_u32 + return vqshlq_n_u32(a, 0); + // CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #0 +} + +uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { + // CHECK-LABEL: test_vqshlq_n_u64 + return vqshlq_n_u64(a, 0); + // CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #0 +} + int64x1_t test_vqshl_n_s64(int64x1_t a) { // CHECK: test_vqshl_n_s64 // CHECK: sqshl d{{[0-9]+}}, d{{[0-9]+}}, #1 @@ -11350,6 +11469,48 @@ uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { // CHECK: usqadd d{{[0-9]+}}, d{{[0-9]+}} } +uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { + // CHECK-LABEL: test_vsqadd_u8 + return vsqadd_u8(a, b); + // CHECK: usqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { + // CHECK-LABEL: test_vsqaddq_u8 + return vsqaddq_u8(a, b); + // CHECK: usqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { + // CHECK-LABEL: test_vsqadd_u16 + return vsqadd_u16(a, b); + // CHECK: usqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { + // CHECK-LABEL: test_vsqaddq_u16 + return vsqaddq_u16(a, b); + // CHECK: usqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { + // CHECK-LABEL: test_vsqadd_u32 + return vsqadd_u32(a, b); + // CHECK: usqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { + // CHECK-LABEL: test_vsqaddq_u32 + return vsqaddq_u32(a, b); + // CHECK: usqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { + // CHECK-LABEL: test_vsqaddq_u64 + return vsqaddq_u64(a, b); + // CHECK: usqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + int64x1_t test_vabs_s64(int64x1_t a) { // CHECK-LABEL: test_vabs_s64 return vabs_s64(a); diff --git a/test/CodeGen/aarch64-neon-ldst-one.c b/test/CodeGen/aarch64-neon-ldst-one.c index ec634e4f50e91ac211f5efa1567bfc3b5713781c..737810ee18d83d3312c710c79512e50cc9d786b6 100644 --- a/test/CodeGen/aarch64-neon-ldst-one.c +++ b/test/CodeGen/aarch64-neon-ldst-one.c @@ -900,6 +900,36 @@ poly64x1_t test_vld1_lane_p64(poly64_t *a, poly64x1_t b) { // CHECK: ld1r {v{{[0-9]+}}.1d}, [{{x[0-9]+|sp}}] } +int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) { + // CHECK-LABEL: test_vld2q_lane_s8 + return vld2q_lane_s8(ptr, src, 15); + // CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +} + +uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) { + // CHECK-LABEL: test_vld2q_lane_u8 + return vld2q_lane_u8(ptr, src, 15); + // CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +} + +poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) { + // CHECK-LABEL: test_vld2q_lane_p8 + return vld2q_lane_p8(ptr, src, 15); + // CHECK: ld2 {{{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +} + +int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) { + // CHECK-LABEL: test_vld3q_lane_s8 + return vld3q_lane_s8(ptr, src, 15); + // CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +} + +uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) { + // CHECK-LABEL: test_vld3q_lane_u8 + return vld3q_lane_u8(ptr, src, 15); + // CHECK: ld3 {{{v[0-9]+}}.b, {{v[0-9]+}}.b, {{v[0-9]+}}.b}[15], [x0] +} + uint16x8x2_t test_vld2q_lane_u16(uint16_t *a, uint16x8x2_t b) { // CHECK-LABEL: test_vld2q_lane_u16 return vld2q_lane_u16(a, b, 7);