diff --git a/include/clang/Basic/BuiltinsAArch64.def b/include/clang/Basic/BuiltinsAArch64.def index 768e4bb26c7dc413aa0e8a18e0edfcc415e8e66c..aafd202aae115452035a70f33c8eb9ff6a73c925 100644 --- a/include/clang/Basic/BuiltinsAArch64.def +++ b/include/clang/Basic/BuiltinsAArch64.def @@ -16,3 +16,10 @@ // In libgcc BUILTIN(__clear_cache, "vv*v*", "i") +// NEON +#define GET_NEON_AARCH64_BUILTINS +#include "clang/Basic/arm_neon.inc" +#undef GET_NEON_AARCH64_BUILTINS +#undef GET_NEON_BUILTINS + +#undef BUILTIN diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h index 66e378fa9b4526c00b4ba3cf2e5bf07ecac14bb0..4202a4a5028ff6164f0771f20e574d0cea46ed28 100644 --- a/include/clang/Basic/TargetBuiltins.h +++ b/include/clang/Basic/TargetBuiltins.h @@ -91,7 +91,8 @@ namespace clang { Poly8, Poly16, Float16, - Float32 + Float32, + Float64 }; NeonTypeFlags(unsigned F) : Flags(F) {} diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index 77bc797c5056bd3324feb7bc6519272249f198c4..ea03e8fe8a60cebd048e262cf9fdb5be70d6a840 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -69,6 +69,7 @@ def OP_REINT : Op; def OP_ABDL : Op; def OP_ABA : Op; def OP_ABAL : Op; +def OP_DIV : Op; class Inst <string n, string p, string t, Op o> { string Name = n; @@ -77,6 +78,7 @@ class Inst <string n, string p, string t, Op o> { Op Operand = o; bit isShift = 0; bit isVCVT_N = 0; + bit isA64 = 0; // Certain intrinsics have different names than their representative // instructions. This field allows us to handle this correctly when we @@ -145,6 +147,7 @@ class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} // l: long // f: float // h: half-float +// d: double // size modifiers: // U: unsigned @@ -452,3 +455,110 @@ def VREINTERPRET // Vector fused multiply-add operations def VFMA : SInst<"vfma", "dddd", "fQf">; + +//////////////////////////////////////////////////////////////////////////////// +// AArch64 Intrinsics + +let isA64 = 1 in { + +//////////////////////////////////////////////////////////////////////////////// +// Addition +// With additional Qd type. +def ADD : IOpInst<"vadd", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_ADD>; + +//////////////////////////////////////////////////////////////////////////////// +// Subtraction +// With additional Qd type. +def SUB : IOpInst<"vsub", "ddd", "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", OP_SUB>; + +//////////////////////////////////////////////////////////////////////////////// +// Multiplication +// With additional Qd type. +def MUL : IOpInst<"vmul", "ddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>; +def MLA : IOpInst<"vmla", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>; +def MLS : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>; + +//////////////////////////////////////////////////////////////////////////////// +// Multiplication Extended +def MULX : SInst<"vmulx", "ddd", "fQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Division +def FDIV : IOpInst<"vdiv", "ddd", "fQfQd", OP_DIV>; + +//////////////////////////////////////////////////////////////////////////////// +// Vector fused multiply-add operations +// With additional Qd type. +def FMLA : SInst<"vfma", "dddd", "fQfQd">; +def FMLS : SInst<"vfms", "dddd", "fQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Logical operations +// With additional Qd type. +def BSL : SInst<"vbsl", "dudd", "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Absolute Difference +// With additional Qd type. +def ABD : SInst<"vabd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Reciprocal/Sqrt +// With additional Qd type. +def FRECPS : IInst<"vrecps", "ddd", "fQfQd">; +def FRSQRTS : IInst<"vrsqrts", "ddd", "fQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Comparison +// With additional Qd type. +def FCAGE : IInst<"vcage", "udd", "fQfQd">; +def FCAGT : IInst<"vcagt", "udd", "fQfQd">; +def FCALE : IInst<"vcale", "udd", "fQfQd">; +def FCALT : IInst<"vcalt", "udd", "fQfQd">; +// With additional Ql, QUl, Qd types. +def CMTST : WInst<"vtst", "udd", "csiUcUsUiPcQcQsQiQlQUcQUsQUiQUlQPc">; +def CFMEQ : SOpInst<"vceq", "udd", + "csifUcUsUiPcQcQsQiQlQfQUcQUsQUiQUlQPcQd", OP_EQ>; +def CFMGE : SOpInst<"vcge", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GE>; +def CFMLE : SOpInst<"vcle", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LE>; +def CFMGT : SOpInst<"vcgt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_GT>; +def CFMLT : SOpInst<"vclt", "udd", "csifUcUsUiQcQsQiQlQfQUcQUsQUiQUlQd", OP_LT>; + +//////////////////////////////////////////////////////////////////////////////// +// Max/Min Integer +// With additional Qd type. +def MAX : SInst<"vmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +def MIN : SInst<"vmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// MaxNum/MinNum Floating Point +def FMAXNM : SInst<"vmaxnm", "ddd", "fQfQd">; +def FMINNM : SInst<"vminnm", "ddd", "fQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Pairwise Max/Min +// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. +def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Pairwise MaxNum/MinNum Floating Point +def FMAXNMP : SInst<"vpmaxnm", "ddd", "fQfQd">; +def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Pairwise Addition +// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. +def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; + +//////////////////////////////////////////////////////////////////////////////// +// Scalar Arithmetic + +// Scalar Addition + +def SCALAR_ADD : Inst<"vaddd", "ddd", "lUl", OP_ADD>; + +// Scalar Subtraction +def SCALAR_SUB : Inst<"vsubd", "ddd", "lUl", OP_SUB>; + +} diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 752dd985a9b8ab6ce3520e06203ce54f47d271c2..d14b38eb0c2d4c0a10f16bfc4582a2a23e36ba82 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -7557,7 +7557,7 @@ private: bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); - + bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool SemaBuiltinVAStart(CallExpr *TheCall); diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp index 76a8bf4ecc4ed45bd5ead6f75fd6ad5f6a762a5d..7016ee1d65aa2d8b655589ba930482a9ba6eb6d8 100644 --- a/lib/AST/ItaniumMangle.cpp +++ b/lib/AST/ItaniumMangle.cpp @@ -360,6 +360,7 @@ private: void mangleBareFunctionType(const FunctionType *T, bool MangleReturnType); void mangleNeonVectorType(const VectorType *T); + void mangleAArch64NeonVectorType(const VectorType *T); void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value); void mangleMemberExpr(const Expr *base, bool isArrow, @@ -2174,7 +2175,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) { case BuiltinType::LongLong: EltName = "int64_t"; break; case BuiltinType::ULongLong: EltName = "uint64_t"; break; case BuiltinType::Float: EltName = "float32_t"; break; - default: llvm_unreachable("unexpected Neon vector element type"); + case BuiltinType::Half: EltName = "float16_t";break; + default: + llvm_unreachable("unexpected Neon vector element type"); } } const char *BaseName = 0; @@ -2190,6 +2193,70 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) { Out << BaseName << EltName; } +static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) { + switch (EltType->getKind()) { + case BuiltinType::SChar: + return "Int8"; + case BuiltinType::Short: + return "Int16"; + case BuiltinType::Int: + return "Int32"; + case BuiltinType::LongLong: + return "Int64"; + case BuiltinType::UChar: + return "Uint8"; + case BuiltinType::UShort: + return "Uint16"; + case BuiltinType::UInt: + return "Uint32"; + case BuiltinType::ULongLong: + return "Uint64"; + case BuiltinType::Half: + return "Float16"; + case BuiltinType::Float: + return "Float32"; + case BuiltinType::Double: + return "Float64"; + default: + llvm_unreachable("Unexpected vector element base type"); + } +} + +// AArch64's ABI for Neon vector types specifies that they should be mangled as +// the equivalent internal name. The vector type must be one of the special +// types predefined by ARM. +void CXXNameMangler::mangleAArch64NeonVectorType(const VectorType *T) { + QualType EltType = T->getElementType(); + assert(EltType->isBuiltinType() && "Neon vector element not a BuiltinType"); + unsigned BitSize = + (T->getNumElements() * getASTContext().getTypeSize(EltType)); + + assert((BitSize == 64 || BitSize == 128) && + "Neon vector type not 64 or 128 bits"); + + assert(getASTContext().getTypeSize(EltType) != BitSize && + "Vector of 1 element not permitted"); + + StringRef EltName; + if (T->getVectorKind() == VectorType::NeonPolyVector) { + switch (cast<BuiltinType>(EltType)->getKind()) { + case BuiltinType::UChar: + EltName = "Poly8"; + break; + case BuiltinType::UShort: + EltName = "Poly16"; + break; + default: + llvm_unreachable("unexpected Neon polynomial vector element type"); + } + } else + EltName = mangleAArch64VectorBase(cast<BuiltinType>(EltType)); + + std::string TypeName = + ("__" + EltName + "x" + llvm::utostr(T->getNumElements()) + "_t").str(); + Out << TypeName.length() << TypeName; +} + // GNU extension: vector types // <type> ::= <vector-type> // <vector-type> ::= Dv <positive dimension number> _ @@ -2201,7 +2268,11 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) { void CXXNameMangler::mangleType(const VectorType *T) { if ((T->getVectorKind() == VectorType::NeonVector || T->getVectorKind() == VectorType::NeonPolyVector)) { - mangleNeonVectorType(T); + if (getASTContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::aarch64) + mangleAArch64NeonVectorType(T); + else + mangleNeonVectorType(T); return; } Out << "Dv" << T->getNumElements() << '_'; diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 596eb8cb1685abe4dc0ac48748b0e4e52020c2cf..718f3bb223ada07ae85590b70e662f656ffee802 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -3177,7 +3177,14 @@ class AArch64TargetInfo : public TargetInfo { static const char * const GCCRegNames[]; static const TargetInfo::GCCRegAlias GCCRegAliases[]; + enum FPUModeEnum { + FPUMode, + NeonMode + }; + + unsigned FPU; static const Builtin::Info BuiltinInfo[]; + public: AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) { BigEndian = false; @@ -3242,7 +3249,14 @@ public: Opts.ShortEnums ? "1" : "4"); if (BigEndian) - Builder.defineMacro("__ARM_BIG_ENDIAN"); + Builder.defineMacro("__AARCH_BIG_ENDIAN"); + + if (FPU == NeonMode) { + Builder.defineMacro("__AARCH_FEATURE_ADVSIMD"); + + // 64-bit NEON supports half, single and double precision operations. + Builder.defineMacro("__AARCH_ADVSIMD_FP", "0xe"); + } } virtual void getTargetBuiltins(const Builtin::Info *&Records, unsigned &NumRecords) const { @@ -3250,9 +3264,28 @@ public: NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin; } virtual bool hasFeature(StringRef Feature) const { - return Feature == "aarch64"; + return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode); } - virtual void getGCCRegNames(const char * const *&Names, + + virtual bool setFeatureEnabled(llvm::StringMap<bool> &Features, + StringRef Name, bool Enabled) const { + if (Name == "neon") { + Features[Name] = Enabled; + return true; + } + + return false; + } + + virtual void HandleTargetFeatures(std::vector<std::string> &Features) { + FPU = FPUMode; + for (unsigned i = 0, e = Features.size(); i != e; ++i) { + if (Features[i] == "+neon") + FPU = NeonMode; + } + } + + virtual void getGCCRegNames(const char *const *&Names, unsigned &NumNames) const; virtual void getGCCRegAliases(const GCCRegAlias *&Aliases, unsigned &NumAliases) const; diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 5b41237585c48f8e4a3278937d3ccfe772c1eeeb..d1dd7a0958d22e66466d36f06362cc01ff21e487 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -1614,6 +1614,8 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad); case NeonTypeFlags::Float32: return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad); + case NeonTypeFlags::Float64: + return llvm::VectorType::get(CGF->DoubleTy, 1 << IsQuad); } llvm_unreachable("Invalid NeonTypeFlags element type!"); } @@ -1718,7 +1720,200 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } - return 0; + SmallVector<Value *, 4> Ops; + for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { + Ops.push_back(EmitScalarExpr(E->getArg(i))); + } + + // Get the last argument, which specifies the vector type. + llvm::APSInt Result; + const Expr *Arg = E->getArg(E->getNumArgs() - 1); + if (!Arg->isIntegerConstantExpr(Result, getContext())) + return 0; + + // Determine the type of this overloaded NEON intrinsic. + NeonTypeFlags Type(Result.getZExtValue()); + bool usgn = Type.isUnsigned(); + + llvm::VectorType *VTy = GetNeonType(this, Type); + llvm::Type *Ty = VTy; + if (!Ty) + return 0; + + unsigned Int; + switch (BuiltinID) { + default: + return 0; + + // AArch64 builtins mapping to legacy ARM v7 builtins. + // FIXME: the mapped builtins listed correspond to what has been tested + // in aarch64-neon-intrinsics.c so far. + case AArch64::BI__builtin_neon_vmul_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmul_v, E); + case AArch64::BI__builtin_neon_vmulq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmulq_v, E); + case AArch64::BI__builtin_neon_vabd_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabd_v, E); + case AArch64::BI__builtin_neon_vabdq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vabdq_v, E); + case AArch64::BI__builtin_neon_vfma_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfma_v, E); + case AArch64::BI__builtin_neon_vfmaq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vfmaq_v, E); + case AArch64::BI__builtin_neon_vbsl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbsl_v, E); + case AArch64::BI__builtin_neon_vbslq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vbslq_v, E); + case AArch64::BI__builtin_neon_vrsqrts_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrts_v, E); + case AArch64::BI__builtin_neon_vrsqrtsq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrsqrtsq_v, E); + case AArch64::BI__builtin_neon_vrecps_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecps_v, E); + case AArch64::BI__builtin_neon_vrecpsq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrecpsq_v, E); + case AArch64::BI__builtin_neon_vcage_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcage_v, E); + case AArch64::BI__builtin_neon_vcale_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcale_v, E); + case AArch64::BI__builtin_neon_vcaleq_v: + std::swap(Ops[0], Ops[1]); + case AArch64::BI__builtin_neon_vcageq_v: { + Function *F; + if (VTy->getElementType()->isIntegerTy(64)) + F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgeq); + else + F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq); + return EmitNeonCall(F, Ops, "vcage"); + } + case AArch64::BI__builtin_neon_vcalt_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcalt_v, E); + case AArch64::BI__builtin_neon_vcagt_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vcagt_v, E); + case AArch64::BI__builtin_neon_vcaltq_v: + std::swap(Ops[0], Ops[1]); + case AArch64::BI__builtin_neon_vcagtq_v: { + Function *F; + if (VTy->getElementType()->isIntegerTy(64)) + F = CGM.getIntrinsic(Intrinsic::aarch64_neon_vacgtq); + else + F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq); + return EmitNeonCall(F, Ops, "vcagt"); + } + case AArch64::BI__builtin_neon_vtst_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtst_v, E); + case AArch64::BI__builtin_neon_vtstq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vtstq_v, E); + case AArch64::BI__builtin_neon_vhadd_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhadd_v, E); + case AArch64::BI__builtin_neon_vhaddq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhaddq_v, E); + case AArch64::BI__builtin_neon_vhsub_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsub_v, E); + case AArch64::BI__builtin_neon_vhsubq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vhsubq_v, E); + case AArch64::BI__builtin_neon_vrhadd_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhadd_v, E); + case AArch64::BI__builtin_neon_vrhaddq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrhaddq_v, E); + case AArch64::BI__builtin_neon_vqadd_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqadd_v, E); + case AArch64::BI__builtin_neon_vqaddq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqaddq_v, E); + case AArch64::BI__builtin_neon_vqsub_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsub_v, E); + case AArch64::BI__builtin_neon_vqsubq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqsubq_v, E); + case AArch64::BI__builtin_neon_vshl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshl_v, E); + case AArch64::BI__builtin_neon_vshlq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vshlq_v, E); + case AArch64::BI__builtin_neon_vqshl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshl_v, E); + case AArch64::BI__builtin_neon_vqshlq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqshlq_v, E); + case AArch64::BI__builtin_neon_vrshl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshl_v, E); + case AArch64::BI__builtin_neon_vrshlq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vrshlq_v, E); + case AArch64::BI__builtin_neon_vqrshl_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshl_v, E); + case AArch64::BI__builtin_neon_vqrshlq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrshlq_v, E); + case AArch64::BI__builtin_neon_vmax_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmax_v, E); + case AArch64::BI__builtin_neon_vmaxq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmaxq_v, E); + case AArch64::BI__builtin_neon_vmin_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vmin_v, E); + case AArch64::BI__builtin_neon_vminq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vminq_v, E); + case AArch64::BI__builtin_neon_vpmax_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmax_v, E); + case AArch64::BI__builtin_neon_vpmin_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpmin_v, E); + case AArch64::BI__builtin_neon_vpadd_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vpadd_v, E); + case AArch64::BI__builtin_neon_vqdmulh_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulh_v, E); + case AArch64::BI__builtin_neon_vqdmulhq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqdmulhq_v, E); + case AArch64::BI__builtin_neon_vqrdmulh_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulh_v, E); + case AArch64::BI__builtin_neon_vqrdmulhq_v: + return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vqrdmulhq_v, E); + + // AArch64-only builtins + case AArch64::BI__builtin_neon_vfms_v: + case AArch64::BI__builtin_neon_vfmsq_v: { + Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); + Ops[0] = Builder.CreateBitCast(Ops[0], Ty); + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Ops[1] = Builder.CreateFNeg(Ops[1]); + Ops[2] = Builder.CreateBitCast(Ops[2], Ty); + + // LLVM's fma intrinsic puts the accumulator in the last position, but the + // AArch64 intrinsic has it first. + return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); + } + case AArch64::BI__builtin_neon_vmaxnm_v: + case AArch64::BI__builtin_neon_vmaxnmq_v: { + Int = Intrinsic::aarch64_neon_vmaxnm; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); + } + case AArch64::BI__builtin_neon_vminnm_v: + case AArch64::BI__builtin_neon_vminnmq_v: { + Int = Intrinsic::aarch64_neon_vminnm; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); + } + case AArch64::BI__builtin_neon_vpmaxnm_v: + case AArch64::BI__builtin_neon_vpmaxnmq_v: { + Int = Intrinsic::aarch64_neon_vpmaxnm; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); + } + case AArch64::BI__builtin_neon_vpminnm_v: + case AArch64::BI__builtin_neon_vpminnmq_v: { + Int = Intrinsic::aarch64_neon_vpminnm; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); + } + case AArch64::BI__builtin_neon_vpmaxq_v: { + Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); + } + case AArch64::BI__builtin_neon_vpminq_v: { + Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); + } + case AArch64::BI__builtin_neon_vpaddq_v: { + Int = Intrinsic::arm_neon_vpadd; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpadd"); + } + case AArch64::BI__builtin_neon_vmulx_v: + case AArch64::BI__builtin_neon_vmulxq_v: { + Int = Intrinsic::aarch64_neon_vmulx; + return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); + } + } } Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp index 50630b698903152422070bd390763f4be22397c1..bc003facba02e0d497a31d9677a88ce41e01fba5 100644 --- a/lib/Driver/Tools.cpp +++ b/lib/Driver/Tools.cpp @@ -1407,6 +1407,14 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args, CmdArgs.push_back ("-machine-sink-split=0"); } +void Clang::AddAArch64TargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + const Driver &D = getToolChain().getDriver(); + // Honor -mfpu=. + if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) + addFPUArgs(D, A, Args, CmdArgs); +} + static bool shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime, const llvm::Triple &Triple) { @@ -2498,9 +2506,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, case llvm::Triple::hexagon: AddHexagonTargetArgs(Args, CmdArgs); break; - } - + case llvm::Triple::aarch64: + AddAArch64TargetArgs(Args, CmdArgs); + break; + } // Pass the linker version in use. if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) { diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h index 1dd4d5edfe324a98064b1134dc819589b7a96d25..9138ddf88ba5cdadb4afa2143f43a54202187908 100644 --- a/lib/Driver/Tools.h +++ b/lib/Driver/Tools.h @@ -65,6 +65,8 @@ using llvm::opt::ArgStringList; llvm::opt::ArgStringList &CmdArgs) const; void AddHexagonTargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + void AddAArch64TargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile }; diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index e417949013f87754c7e82eaddc3c15bdec74b684..9f2dc3457522e012be7dadc99098aac4488d70ca 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -306,6 +306,10 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::aarch64: + if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall)) + return ExprError(); + break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -342,6 +346,9 @@ static unsigned RFT(unsigned t, bool shift = false) { case NeonTypeFlags::Float32: assert(!shift && "cannot shift float types!"); return (2 << IsQuad) - 1; + case NeonTypeFlags::Float64: + assert(!shift && "cannot shift float types!"); + return (1 << IsQuad) - 1; } llvm_unreachable("Invalid NeonTypeFlag!"); } @@ -367,10 +374,90 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context) { return Context.UnsignedShortTy; case NeonTypeFlags::Float32: return Context.FloatTy; + case NeonTypeFlags::Float64: + return Context.DoubleTy; } llvm_unreachable("Invalid NeonTypeFlag!"); } +bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, + CallExpr *TheCall) { + + llvm::APSInt Result; + + uint64_t mask = 0; + unsigned TV = 0; + int PtrArgNum = -1; + bool HasConstPtr = false; + switch (BuiltinID) { +#define GET_NEON_AARCH64_OVERLOAD_CHECK +#include "clang/Basic/arm_neon.inc" +#undef GET_NEON_AARCH64_OVERLOAD_CHECK + } + + // For NEON intrinsics which are overloaded on vector element type, validate + // the immediate which specifies which variant to emit. + unsigned ImmArg = TheCall->getNumArgs() - 1; + if (mask) { + if (SemaBuiltinConstantArg(TheCall, ImmArg, Result)) + return true; + + TV = Result.getLimitedValue(64); + if ((TV > 63) || (mask & (1ULL << TV)) == 0) + return Diag(TheCall->getLocStart(), diag::err_invalid_neon_type_code) + << TheCall->getArg(ImmArg)->getSourceRange(); + } + + if (PtrArgNum >= 0) { + // Check that pointer arguments have the specified type. + Expr *Arg = TheCall->getArg(PtrArgNum); + if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) + Arg = ICE->getSubExpr(); + ExprResult RHS = DefaultFunctionArrayLvalueConversion(Arg); + QualType RHSTy = RHS.get()->getType(); + QualType EltTy = getNeonEltType(NeonTypeFlags(TV), Context); + if (HasConstPtr) + EltTy = EltTy.withConst(); + QualType LHSTy = Context.getPointerType(EltTy); + AssignConvertType ConvTy; + ConvTy = CheckSingleAssignmentConstraints(LHSTy, RHS); + if (RHS.isInvalid()) + return true; + if (DiagnoseAssignmentResult(ConvTy, Arg->getLocStart(), LHSTy, RHSTy, + RHS.get(), AA_Assigning)) + return true; + } + + // For NEON intrinsics which take an immediate value as part of the + // instruction, range check them here. + unsigned i = 0, l = 0, u = 0; + switch (BuiltinID) { + default: + return false; +#define GET_NEON_AARCH64_IMMEDIATE_CHECK +#include "clang/Basic/arm_neon.inc" +#undef GET_NEON_AARCH64_IMMEDIATE_CHECK + } + ; + + // We can't check the value of a dependent argument. + if (TheCall->getArg(i)->isTypeDependent() || + TheCall->getArg(i)->isValueDependent()) + return false; + + // Check that the immediate argument is actually a constant. + if (SemaBuiltinConstantArg(TheCall, i, Result)) + return true; + + // Range check against the upper/lower values for this isntruction. + unsigned Val = Result.getZExtValue(); + if (Val < l || Val > (u + l)) + return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range) + << l << u + l << TheCall->getArg(i)->getSourceRange(); + + return false; +} + bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall) { assert((BuiltinID == ARM::BI__builtin_arm_ldrex || BuiltinID == ARM::BI__builtin_arm_strex) && diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp index 0106a679b9415c696e71ff2827f250b3bcaae33c..bc6b6a52fa3ce9a34d24881c3491ce3c8c8b0093 100644 --- a/lib/Sema/SemaType.cpp +++ b/lib/Sema/SemaType.cpp @@ -4603,6 +4603,42 @@ static void HandleExtVectorTypeAttr(QualType &CurType, CurType = T; } +static bool isPermittedNeonBaseType(QualType &Ty, + VectorType::VectorKind VecKind, + bool IsAArch64) { + const BuiltinType *BTy = Ty->getAs<BuiltinType>(); + if (!BTy) + return false; + + if (VecKind == VectorType::NeonPolyVector) { + if (IsAArch64) { + // AArch64 polynomial vectors are unsigned + return BTy->getKind() == BuiltinType::UChar || + BTy->getKind() == BuiltinType::UShort; + } else { + // AArch32 polynomial vector are signed. + return BTy->getKind() == BuiltinType::SChar || + BTy->getKind() == BuiltinType::Short; + } + } + + // Non-polynomial vector types: the usual suspects are allowed, as well as + // float64_t on AArch64. + if (IsAArch64 && BTy->getKind() == BuiltinType::Double) + return true; + + return BTy->getKind() == BuiltinType::SChar || + BTy->getKind() == BuiltinType::UChar || + BTy->getKind() == BuiltinType::Short || + BTy->getKind() == BuiltinType::UShort || + BTy->getKind() == BuiltinType::Int || + BTy->getKind() == BuiltinType::UInt || + BTy->getKind() == BuiltinType::LongLong || + BTy->getKind() == BuiltinType::ULongLong || + BTy->getKind() == BuiltinType::Float || + BTy->getKind() == BuiltinType::Half; +} + /// HandleNeonVectorTypeAttr - The "neon_vector_type" and /// "neon_polyvector_type" attributes are used to create vector types that /// are mangled according to ARM's ABI. Otherwise, these types are identical @@ -4646,9 +4682,14 @@ static void HandleNeonVectorTypeAttr(QualType& CurType, BTy->getKind() != BuiltinType::LongLong && BTy->getKind() != BuiltinType::ULongLong && BTy->getKind() != BuiltinType::Float)) { - S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) <<CurType; - Attr.setInvalid(); - return; + llvm::Triple::ArchType Arch = + S.Context.getTargetInfo().getTriple().getArch(); + if (!isPermittedNeonBaseType(CurType, VecKind, + Arch == llvm::Triple::aarch64)) { + S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType; + Attr.setInvalid(); + return; + } } // The total size of the vector must be 64 or 128 bits. unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType)); diff --git a/test/CodeGen/aarch64-neon-intrinsics.c b/test/CodeGen/aarch64-neon-intrinsics.c new file mode 100644 index 0000000000000000000000000000000000000000..7ed8a39a023ecfcaaced93b44a4f2027ea62297f --- /dev/null +++ b/test/CodeGen/aarch64-neon-intrinsics.c @@ -0,0 +1,3023 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ +// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s + +// Test new aarch64 intrinsics and types + +#include <arm_neon.h> + +int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vadd_s8 + return vadd_s8(v1, v2); + // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vadd_s16 + return vadd_s16(v1, v2); + // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vadd_s32 + return vadd_s32(v1, v2); + // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { + // CHECK: test_vadd_s64 + return vadd_s64(v1, v2); + // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vadd_f32 + return vadd_f32(v1, v2); + // CHECK: fadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vadd_u8 + return vadd_u8(v1, v2); + // CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vadd_u16 + return vadd_u16(v1, v2); + // CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vadd_u32 + return vadd_u32(v1, v2); + // CHECK: add {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { + // CHECK: test_vadd_u64 + return vadd_u64(v1, v2); + // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vaddq_s8 + return vaddq_s8(v1, v2); + // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vaddq_s16 + return vaddq_s16(v1, v2); + // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vaddq_s32(int32x4_t v1,int32x4_t v2) { + // CHECK: test_vaddq_s32 + return vaddq_s32(v1, v2); + // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vaddq_s64 + return vaddq_s64(v1, v2); + // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vaddq_f32 + return vaddq_f32(v1, v2); + // CHECK: fadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vaddq_f64 + return vaddq_f64(v1, v2); + // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vaddq_u8 + return vaddq_u8(v1, v2); + // CHECK: add {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vaddq_u16 + return vaddq_u16(v1, v2); + // CHECK: add {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: vaddq_u32 + return vaddq_u32(v1, v2); + // CHECK: add {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vaddq_u64 + return vaddq_u64(v1, v2); + // CHECK: add {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vsub_s8 + return vsub_s8(v1, v2); + // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} +int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vsub_s16 + return vsub_s16(v1, v2); + // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} +int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vsub_s32 + return vsub_s32(v1, v2); + // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { + // CHECK: test_vsub_s64 + return vsub_s64(v1, v2); + // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vsub_f32 + return vsub_f32(v1, v2); + // CHECK: fsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vsub_u8 + return vsub_u8(v1, v2); + // CHECK: sub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vsub_u16 + return vsub_u16(v1, v2); + // CHECK: sub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vsub_u32 + return vsub_u32(v1, v2); + // CHECK: sub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { + // CHECK: test_vsub_u64 + return vsub_u64(v1, v2); + // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vsubq_s8 + return vsubq_s8(v1, v2); + // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vsubq_s16 + return vsubq_s16(v1, v2); + // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vsubq_s32(int32x4_t v1,int32x4_t v2) { + // CHECK: test_vsubq_s32 + return vsubq_s32(v1, v2); + // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vsubq_s64 + return vsubq_s64(v1, v2); + // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vsubq_f32 + return vsubq_f32(v1, v2); + // CHECK: fsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vsubq_f64 + return vsubq_f64(v1, v2); + // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vsubq_u8 + return vsubq_u8(v1, v2); + // CHECK: sub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vsubq_u16 + return vsubq_u16(v1, v2); + // CHECK: sub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: vsubq_u32 + return vsubq_u32(v1, v2); + // CHECK: sub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vsubq_u64 + return vsubq_u64(v1, v2); + // CHECK: sub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vmul_s8 + return vmul_s8(v1, v2); + // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vmul_s16 + return vmul_s16(v1, v2); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vmul_s32 + return vmul_s32(v1, v2); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vmul_f32 + return vmul_f32(v1, v2); + // CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + + +uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vmul_u8 + return vmul_u8(v1, v2); + // CHECK: mul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vmul_u16 + return vmul_u16(v1, v2); + // CHECK: mul {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vmul_u32 + return vmul_u32(v1, v2); + // CHECK: mul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vmulq_s8 + return vmulq_s8(v1, v2); + // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vmulq_s16 + return vmulq_s16(v1, v2); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vmulq_s32 + return vmulq_s32(v1, v2); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vmulq_u8 + return vmulq_u8(v1, v2); + // CHECK: mul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vmulq_u16 + return vmulq_u16(v1, v2); + // CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vmulq_u32 + return vmulq_u32(v1, v2); + // CHECK: mul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vmulq_f32 + return vmulq_f32(v1, v2); + // CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vmulq_f64 + return vmulq_f64(v1, v2); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { + // test_vmul_p8 + return vmul_p8(v1, v2); + // pmul {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { + // test_vmulq_p8 + return vmulq_p8(v1, v2); + // pmul {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + + +int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { + // CHECK: test_vmla_s8 + return vmla_s8(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { + // CHECK: test_vmla_s16 + return vmla_s16(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { + // CHECK: test_vmla_s32 + return vmla_s32(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CHECK: test_vmla_f32 + return vmla_f32(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { + // CHECK: test_vmla_u8 + return vmla_u8(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { + // CHECK: test_vmla_u16 + return vmla_u16(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { + // CHECK: test_vmla_u32 + return vmla_u32(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { + // CHECK: test_vmlaq_s8 + return vmlaq_s8(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { + // CHECK: test_vmlaq_s16 + return vmlaq_s16(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { + // CHECK: test_vmlaq_s32 + return vmlaq_s32(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CHECK: test_vmlaq_f32 + return vmlaq_f32(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { + // CHECK: test_vmlaq_u8 + return vmlaq_u8(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { + // CHECK: test_vmlaq_u16 + return vmlaq_u16(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { + // CHECK: test_vmlaq_u32 + return vmlaq_u32(v1, v2, v3); + // CHECK: mla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { + // CHECK: test_vmlaq_f64 + return vmlaq_f64(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { + // CHECK: test_vmls_s8 + return vmls_s8(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { + // CHECK: test_vmls_s16 + return vmls_s16(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { + // CHECK: test_vmls_s32 + return vmls_s32(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CHECK: test_vmls_f32 + return vmls_f32(v1, v2, v3); + // CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { + // CHECK: test_vmls_u8 + return vmls_u8(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { + // CHECK: test_vmls_u16 + return vmls_u16(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { + // CHECK: test_vmls_u32 + return vmls_u32(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} +int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { + // CHECK: test_vmlsq_s8 + return vmlsq_s8(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { + // CHECK: test_vmlsq_s16 + return vmlsq_s16(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { + // CHECK: test_vmlsq_s32 + return vmlsq_s32(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CHECK: test_vmlsq_f32 + return vmlsq_f32(v1, v2, v3); + // CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { + // CHECK: test_vmlsq_u8 + return vmlsq_u8(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { + // CHECK: test_vmlsq_u16 + return vmlsq_u16(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { + // CHECK: test_vmlsq_u32 + return vmlsq_u32(v1, v2, v3); + // CHECK: mls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { + // CHECK: test_vmlsq_f64 + return vmlsq_f64(v1, v2, v3); + // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} +float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CHECK: test_vfma_f32 + return vfma_f32(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CHECK: test_vfmaq_f32 + return vfmaq_f32(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { + // CHECK: test_vfmaq_f64 + return vfmaq_f64(v1, v2, v3); + // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} +float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CHECK: test_vfms_f32 + return vfms_f32(v1, v2, v3); + // CHECK: fmls v0.2s, v1.2s, v2.2s +} + +float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CHECK: test_vfmsq_f32 + return vfmsq_f32(v1, v2, v3); + // CHECK: fmls v0.4s, v1.4s, v2.4s +} + +float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { + // CHECK: vfmsq_f64 + return vfmsq_f64(v1, v2, v3); + // CHECK: fmls v0.2d, v1.2d, v2.2d +} + +float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vdivq_f64 + return vdivq_f64(v1, v2); + // CHECK: fdiv {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vdivq_f32 + return vdivq_f32(v1, v2); + // CHECK: fdiv {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vdiv_f32 + return vdiv_f32(v1, v2); + // CHECK: fdiv {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vaddd_u64(uint64x1_t v1, uint64x1_t v2) { + // CHECK: test_vaddd_u64 + return vaddd_u64(v1, v2); + // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int64x1_t test_vaddd_s64(int64x1_t v1, int64x1_t v2) { + // CHECK: test_vaddd_s64 + return vaddd_s64(v1, v2); + // CHECK: add {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint64x1_t test_vsubd_u64(uint64x1_t v1, uint64x1_t v2) { + // CHECK: test_vsubd_u64 + return vsubd_u64(v1, v2); + // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int64x1_t test_vsubd_s64(int64x1_t v1, int64x1_t v2) { + // CHECK: test_vsubd_s64 + return vsubd_s64(v1, v2); + // CHECK: sub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { + // CHECK: test_vaba_s8 + return vaba_s8(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { + // CHECK: test_vaba_s16 + return vaba_s16(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { + // CHECK: test_vaba_s32 + return vaba_s32(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { + // CHECK: test_vaba_u8 + return vaba_u8(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { + // CHECK: test_vaba_u16 + return vaba_u16(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { + // CHECK: test_vaba_u32 + return vaba_u32(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { + // CHECK: test_vabaq_s8 + return vabaq_s8(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { + // CHECK: test_vabaq_s16 + return vabaq_s16(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { + // CHECK: test_vabaq_s32 + return vabaq_s32(v1, v2, v3); + // CHECK: saba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { + // CHECK: test_vabaq_u8 + return vabaq_u8(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { + // CHECK: test_vabaq_u16 + return vabaq_u16(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { + // CHECK: test_vabaq_u32 + return vabaq_u32(v1, v2, v3); + // CHECK: uaba {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vabd_s8 + return vabd_s8(v1, v2); + // CHECK: sabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vabd_s16 + return vabd_s16(v1, v2); + // CHECK: sabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vabd_s32 + return vabd_s32(v1, v2); + // CHECK: sabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vabd_u8 + return vabd_u8(v1, v2); + // CHECK: uabd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vabd_u16 + return vabd_u16(v1, v2); + // CHECK: uabd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vabd_u32 + return vabd_u32(v1, v2); + // CHECK: uabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vabd_f32 + return vabd_f32(v1, v2); + // CHECK: fabd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vabdq_s8 + return vabdq_s8(v1, v2); + // CHECK: sabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vabdq_s16 + return vabdq_s16(v1, v2); + // CHECK: sabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vabdq_s32 + return vabdq_s32(v1, v2); + // CHECK: sabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vabdq_u8 + return vabdq_u8(v1, v2); + // CHECK: uabd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vabdq_u16 + return vabdq_u16(v1, v2); + // CHECK: uabd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vabdq_u32 + return vabdq_u32(v1, v2); + // CHECK: uabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vabdq_f32 + return vabdq_f32(v1, v2); + // CHECK: fabd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vabdq_f64 + return vabdq_f64(v1, v2); + // CHECK: fabd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { + // CHECK: test_vbsl_s8 + return vbsl_s8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { + // CHECK: test_vbsl_s16 + return vbsl_s16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { + // CHECK: test_vbsl_s32 + return vbsl_s32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x1_t test_vbsl_s64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { + // CHECK: test_vbsl_s64 + return vbsl_s64(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { + // CHECK: test_vbsl_u8 + return vbsl_u8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { + // CHECK: test_vbsl_u16 + return vbsl_u16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { + // CHECK: test_vbsl_u32 + return vbsl_u32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { + // CHECK: test_vbsl_u64 + return vbsl_u64(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +float32x2_t test_vbsl_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { + // CHECK: test_vbsl_f32 + return vbsl_f32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { + // CHECK: test_vbsl_p8 + return vbsl_p8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { + // CHECK: test_vbsl_p16 + return vbsl_p16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { + // CHECK: test_vbslq_s8 + return vbslq_s8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { + // CHECK: test_vbslq_s16 + return vbslq_s16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { + // CHECK: test_vbslq_s32 + return vbslq_s32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { + // CHECK: test_vbslq_s64 + return vbslq_s64(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { + // CHECK: test_vbslq_u8 + return vbslq_u8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { + // CHECK: test_vbslq_u16 + return vbslq_u16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { + // CHECK: test_vbslq_u32 + return vbslq_s32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { + // CHECK: test_vbslq_u64 + return vbslq_u64(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { + // CHECK: test_vbslq_f32 + return vbslq_f32(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { + // CHECK: test_vbslq_p8 + return vbslq_p8(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { + // CHECK: test_vbslq_p16 + return vbslq_p16(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { + // CHECK: test_vbslq_f64 + return vbslq_f64(v1, v2, v3); + // CHECK: bsl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vrecps_f32 + return vrecps_f32(v1, v2); + // CHECK: frecps {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vrecpsq_f32 + return vrecpsq_f32(v1, v2); + // CHECK: frecps {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vrecpsq_f64 + return vrecpsq_f64(v1, v2); + // CHECK: frecps {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vrsqrts_f32 + return vrsqrts_f32(v1, v2); + // CHECK: frsqrts {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vrsqrtsq_f32 + return vrsqrtsq_f32(v1, v2); + // CHECK: frsqrts {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vrsqrtsq_f64 + return vrsqrtsq_f64(v1, v2); + // CHECK: frsqrts {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcage_f32 + return vcage_f32(v1, v2); + // CHECK: facge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcageq_f32 + return vcageq_f32(v1, v2); + // CHECK: facge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcageq_f64 + return vcageq_f64(v1, v2); + // CHECK: facge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcagt_f32 + return vcagt_f32(v1, v2); + // CHECK: facgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcagtq_f32 + return vcagtq_f32(v1, v2); + // CHECK: facgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcagtq_f64 + return vcagtq_f64(v1, v2); + // CHECK: facgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcale_f32 + return vcale_f32(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facge {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcaleq_f32 + return vcaleq_f32(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facge {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcaleq_f64 + return vcaleq_f64(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facge {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcalt_f32 + return vcalt_f32(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facgt {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcaltq_f32 + return vcaltq_f32(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facgt {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcaltq_f64 + return vcaltq_f64(v1, v2); + // Using registers other than v0, v1 are possible, but would be odd. + // CHECK: facgt {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vtst_s8 + return vtst_s8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vtst_s16 + return vtst_s16(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vtst_s32 + return vtst_s32(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vtst_u8 + return vtst_u8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vtst_u16 + return vtst_u16(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vtst_u32 + return vtst_u32(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vtstq_s8 + return vtstq_s8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vtstq_s16 + return vtstq_s16(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vtstq_s32 + return vtstq_s32(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vtstq_u8 + return vtstq_u8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vtstq_u16 + return vtstq_u16(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vtstq_u32 + return vtstq_u32(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vtstq_s64 + return vtstq_s64(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vtstq_u64 + return vtstq_u64(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { + // CHECK: test_vtst_p8 + return vtst_p8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { + // CHECK: test_vtstq_p8 + return vtstq_p8(v1, v2); + // CHECK: cmtst {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + + +uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vceq_s8 + return vceq_s8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vceq_s16 + return vceq_s16(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vceq_s32 + return vceq_s32(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vceq_f32 + return vceq_f32(v1, v2); + // CHECK: fcmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vceq_u8 + return vceq_u8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vceq_u16 + return vceq_u16(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vceq_u32 + return vceq_u32(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { + // CHECK: test_vceq_p8 + return vceq_p8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vceqq_s8 + return vceqq_s8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vceqq_s16 + return vceqq_s16(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vceqq_s32 + return vceqq_s32(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vceqq_f32 + return vceqq_f32(v1, v2); + // CHECK: fcmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vceqq_u8 + return vceqq_u8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vceqq_u16 + return vceqq_u16(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vceqq_u32 + return vceqq_u32(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { + // CHECK: test_vceqq_p8 + return vceqq_p8(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + + +uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vceqq_s64 + return vceqq_s64(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vceqq_u64 + return vceqq_u64(v1, v2); + // CHECK: cmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vceqq_f64 + return vceqq_f64(v1, v2); + // CHECK: fcmeq {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} +uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { +// CHECK: test_vcge_s8 + return vcge_s8(v1, v2); +// CHECK: cmge {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { +// CHECK: test_vcge_s16 + return vcge_s16(v1, v2); +// CHECK: cmge {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { +// CHECK: test_vcge_s32 + return vcge_s32(v1, v2); +// CHECK: cmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { +// CHECK: test_vcge_f32 + return vcge_f32(v1, v2); +// CHECK: fcmge {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { +// CHECK: test_vcge_u8 + return vcge_u8(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { +// CHECK: test_vcge_u16 + return vcge_u16(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { +// CHECK: test_vcge_u32 + return vcge_u32(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { +// CHECK: test_vcgeq_s8 + return vcgeq_s8(v1, v2); +// CHECK: cmge {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { +// CHECK: test_vcgeq_s16 + return vcgeq_s16(v1, v2); +// CHECK: cmge {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { +// CHECK: test_vcgeq_s32 + return vcgeq_s32(v1, v2); +// CHECK: cmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { +// CHECK: test_vcgeq_f32 + return vcgeq_f32(v1, v2); +// CHECK: fcmge {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { +// CHECK: test_vcgeq_u8 + return vcgeq_u8(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { +// CHECK: test_vcgeq_u16 + return vcgeq_u16(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { +// CHECK: test_vcgeq_u32 + return vcgeq_u32(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { +// CHECK: test_vcgeq_s64 + return vcgeq_s64(v1, v2); +// CHECK: cmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { +// CHECK: test_vcgeq_u64 + return vcgeq_u64(v1, v2); +// CHECK: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { +// CHECK: test_vcgeq_f64 + return vcgeq_f64(v1, v2); +// CHECK: fcmge {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +// Notes about vcle: +// LE condition predicate implemented as GE, so check reversed operands. +// Using registers other than v0, v1 are possible, but would be odd. +uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vcle_s8 + return vcle_s8(v1, v2); + // CHECK: cmge {{v[0-9]+}}.8b, v1.8b, v0.8b +} + +uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vcle_s16 + return vcle_s16(v1, v2); + // CHECK: cmge {{v[0-9]+}}.4h, v1.4h, v0.4h +} + +uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vcle_s32 + return vcle_s32(v1, v2); + // CHECK: cmge {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcle_f32 + return vcle_f32(v1, v2); + // CHECK: fcmge {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vcle_u8 + return vcle_u8(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.8b, v1.8b, v0.8b +} + +uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vcle_u16 + return vcle_u16(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.4h, v1.4h, v0.4h +} + +uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vcle_u32 + return vcle_u32(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vcleq_s8 + return vcleq_s8(v1, v2); + // CHECK: cmge {{v[0-9]+}}.16b, v1.16b, v0.16b +} + +uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vcleq_s16 + return vcleq_s16(v1, v2); + // CHECK: cmge {{v[0-9]+}}.8h, v1.8h, v0.8h +} + +uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vcleq_s32 + return vcleq_s32(v1, v2); + // CHECK: cmge {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcleq_f32 + return vcleq_f32(v1, v2); + // CHECK: fcmge {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vcleq_u8 + return vcleq_u8(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.16b, v1.16b, v0.16b +} + +uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vcleq_u16 + return vcleq_u16(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.8h, v1.8h, v0.8h +} + +uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vcleq_u32 + return vcleq_u32(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vcleq_s64 + return vcleq_s64(v1, v2); + // CHECK: cmge {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vcleq_u64 + return vcleq_u64(v1, v2); + // CHECK: cmhs {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcleq_f64 + return vcleq_f64(v1, v2); + // CHECK: fcmge {{v[0-9]+}}.2d, v1.2d, v0.2d +} + + +uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vcgt_s8 + return vcgt_s8(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vcgt_s16 + return vcgt_s16(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vcgt_s32 + return vcgt_s32(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vcgt_f32 + return vcgt_f32(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vcgt_u8 + return vcgt_u8(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vcgt_u16 + return vcgt_u16(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vcgt_u32 + return vcgt_u32(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vcgtq_s8 + return vcgtq_s8(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vcgtq_s16 + return vcgtq_s16(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vcgtq_s32 + return vcgtq_s32(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcgtq_f32 + return vcgtq_f32(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vcgtq_u8 + return vcgtq_u8(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vcgtq_u16 + return vcgtq_u16(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vcgtq_u32 + return vcgtq_u32(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vcgtq_s64 + return vcgtq_s64(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vcgtq_u64 + return vcgtq_u64(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcgtq_f64 + return vcgtq_f64(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +// Notes about vclt: +// LT condition predicate implemented as GT, so check reversed operands. +// Using registers other than v0, v1 are possible, but would be odd. + +uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { + // CHECK: test_vclt_s8 + return vclt_s8(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.8b, v1.8b, v0.8b +} + +uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { + // CHECK: test_vclt_s16 + return vclt_s16(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.4h, v1.4h, v0.4h +} + +uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { + // CHECK: test_vclt_s32 + return vclt_s32(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { + // CHECK: test_vclt_f32 + return vclt_f32(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { + // CHECK: test_vclt_u8 + return vclt_u8(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.8b, v1.8b, v0.8b +} + +uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { + // CHECK: test_vclt_u16 + return vclt_u16(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h +} + +uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { + // CHECK: test_vclt_u32 + return vclt_u32(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s +} + +uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { + // CHECK: test_vcltq_s8 + return vcltq_s8(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.16b, v1.16b, v0.16b +} + +uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { + // CHECK: test_vcltq_s16 + return vcltq_s16(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.8h, v1.8h, v0.8h +} + +uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { + // CHECK: test_vcltq_s32 + return vcltq_s32(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { + // CHECK: test_vcltq_f32 + return vcltq_f32(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { + // CHECK: test_vcltq_u8 + return vcltq_u8(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b +} + +uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { + // CHECK: test_vcltq_u16 + return vcltq_u16(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h +} + +uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { + // CHECK: test_vcltq_u32 + return vcltq_u32(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s +} + +uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { + // CHECK: test_vcltq_s64 + return vcltq_s64(v1, v2); + // CHECK: cmgt {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { + // CHECK: test_vcltq_u64 + return vcltq_u64(v1, v2); + // CHECK: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d +} + +uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { + // CHECK: test_vcltq_f64 + return vcltq_f64(v1, v2); + // CHECK: fcmgt {{v[0-9]+}}.2d, v1.2d, v0.2d +} + + +int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { +// CHECK: test_vhadd_s8 + return vhadd_s8(v1, v2); + // CHECK: shadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { +// CHECK: test_vhadd_s16 + return vhadd_s16(v1, v2); + // CHECK: shadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { +// CHECK: test_vhadd_s32 + return vhadd_s32(v1, v2); + // CHECK: shadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { +// CHECK: test_vhadd_u8 + return vhadd_u8(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { +// CHECK: test_vhadd_u16 + return vhadd_u16(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { +// CHECK: test_vhadd_u32 + return vhadd_u32(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { +// CHECK: test_vhaddq_s8 + return vhaddq_s8(v1, v2); + // CHECK: shadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { +// CHECK: test_vhaddq_s16 + return vhaddq_s16(v1, v2); + // CHECK: shadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { +// CHECK: test_vhaddq_s32 + return vhaddq_s32(v1, v2); + // CHECK: shadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { +// CHECK: test_vhaddq_u8 + return vhaddq_u8(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { +// CHECK: test_vhaddq_u16 + return vhaddq_u16(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { +// CHECK: test_vhaddq_u32 + return vhaddq_u32(v1, v2); + // CHECK: uhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + + +int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { +// CHECK: test_vhsub_s8 + return vhsub_s8(v1, v2); + // CHECK: shsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { +// CHECK: test_vhsub_s16 + return vhsub_s16(v1, v2); + // CHECK: shsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { +// CHECK: test_vhsub_s32 + return vhsub_s32(v1, v2); + // CHECK: shsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { +// CHECK: test_vhsub_u8 + return vhsub_u8(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { +// CHECK: test_vhsub_u16 + return vhsub_u16(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { +// CHECK: test_vhsub_u32 + return vhsub_u32(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { +// CHECK: test_vhsubq_s8 + return vhsubq_s8(v1, v2); + // CHECK: shsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { +// CHECK: test_vhsubq_s16 + return vhsubq_s16(v1, v2); + // CHECK: shsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { +// CHECK: test_vhsubq_s32 + return vhsubq_s32(v1, v2); + // CHECK: shsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { +// CHECK: test_vhsubq_u8 + return vhsubq_u8(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { +// CHECK: test_vhsubq_u16 + return vhsubq_u16(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { +// CHECK: test_vhsubq_u32 + return vhsubq_u32(v1, v2); + // CHECK: uhsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + + +int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { +// CHECK: test_vrhadd_s8 + return vrhadd_s8(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { +// CHECK: test_vrhadd_s16 + return vrhadd_s16(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { +// CHECK: test_vrhadd_s32 + return vrhadd_s32(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { +// CHECK: test_vrhadd_u8 + return vrhadd_u8(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { +// CHECK: test_vrhadd_u16 + return vrhadd_u16(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { +// CHECK: test_vrhadd_u32 + return vrhadd_u32(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { +// CHECK: test_vrhaddq_s8 + return vrhaddq_s8(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { +// CHECK: test_vrhaddq_s16 + return vrhaddq_s16(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { +// CHECK: test_vrhaddq_s32 + return vrhaddq_s32(v1, v2); +// CHECK: srhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { +// CHECK: test_vrhaddq_u8 + return vrhaddq_u8(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { +// CHECK: test_vrhaddq_u16 + return vrhaddq_u16(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { +// CHECK: test_vrhaddq_u32 + return vrhaddq_u32(v1, v2); +// CHECK: urhadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} +int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vqadd_s8 + return vqadd_s8(a, b); + // CHECK: sqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqadd_s16 + return vqadd_s16(a, b); + // CHECK: sqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqadd_s32 + return vqadd_s32(a, b); + // CHECK: sqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vqadd_s64 + return vqadd_s64(a, b); +// CHECK: sqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vqadd_u8 + return vqadd_u8(a, b); + // CHECK: uqadd {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vqadd_u16 + return vqadd_u16(a, b); + // CHECK: uqadd {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vqadd_u32 + return vqadd_u32(a, b); + // CHECK: uqadd {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { +// CHECK: test_vqadd_u64 + return vqadd_u64(a, b); +// CHECK: uqadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vqaddq_s8 + return vqaddq_s8(a, b); + // CHECK: sqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqaddq_s16 + return vqaddq_s16(a, b); + // CHECK: sqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqaddq_s32 + return vqaddq_s32(a, b); + // CHECK: sqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vqaddq_s64 + return vqaddq_s64(a, b); +// CHECK: sqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vqaddq_u8 + return vqaddq_u8(a, b); + // CHECK: uqadd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vqaddq_u16 + return vqaddq_u16(a, b); + // CHECK: uqadd {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vqaddq_u32 + return vqaddq_u32(a, b); + // CHECK: uqadd {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { +// CHECK: test_vqaddq_u64 + return vqaddq_u64(a, b); +// CHECK: uqadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vqsub_s8 + return vqsub_s8(a, b); + // CHECK: sqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqsub_s16 + return vqsub_s16(a, b); + // CHECK: sqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqsub_s32 + return vqsub_s32(a, b); + // CHECK: sqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vqsub_s64 + return vqsub_s64(a, b); +// CHECK: sqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vqsub_u8 + return vqsub_u8(a, b); + // CHECK: uqsub {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vqsub_u16 + return vqsub_u16(a, b); + // CHECK: uqsub {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vqsub_u32 + return vqsub_u32(a, b); + // CHECK: uqsub {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { +// CHECK: test_vqsub_u64 + return vqsub_u64(a, b); +// CHECK: uqsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vqsubq_s8 + return vqsubq_s8(a, b); + // CHECK: sqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqsubq_s16 + return vqsubq_s16(a, b); + // CHECK: sqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqsubq_s32 + return vqsubq_s32(a, b); + // CHECK: sqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vqsubq_s64 + return vqsubq_s64(a, b); +// CHECK: sqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vqsubq_u8 + return vqsubq_u8(a, b); + // CHECK: uqsub {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vqsubq_u16 + return vqsubq_u16(a, b); + // CHECK: uqsub {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vqsubq_u32 + return vqsubq_u32(a, b); + // CHECK: uqsub {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { +// CHECK: test_vqsubq_u64 + return vqsubq_u64(a, b); + // CHECK: uqsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vshl_s8 + return vshl_s8(a, b); +// CHECK: sshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vshl_s16 + return vshl_s16(a, b); +// CHECK: sshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vshl_s32 + return vshl_s32(a, b); +// CHECK: sshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vshl_s64 + return vshl_s64(a, b); +// CHECK: sshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { +// CHECK: test_vshl_u8 + return vshl_u8(a, b); +// CHECK: ushl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { +// CHECK: test_vshl_u16 + return vshl_u16(a, b); +// CHECK: ushl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { +// CHECK: test_vshl_u32 + return vshl_u32(a, b); +// CHECK: ushl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { +// CHECK: test_vshl_u64 + return vshl_u64(a, b); +// CHECK: ushl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vshlq_s8 + return vshlq_s8(a, b); +// CHECK: sshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vshlq_s16 + return vshlq_s16(a, b); +// CHECK: sshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vshlq_s32 + return vshlq_s32(a, b); +// CHECK: sshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vshlq_s64 + return vshlq_s64(a, b); +// CHECK: sshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { +// CHECK: test_vshlq_u8 + return vshlq_u8(a, b); +// CHECK: ushl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { +// CHECK: test_vshlq_u16 + return vshlq_u16(a, b); +// CHECK: ushl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { +// CHECK: test_vshlq_u32 + return vshlq_u32(a, b); +// CHECK: ushl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { +// CHECK: test_vshlq_u64 + return vshlq_u64(a, b); +// CHECK: ushl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vqshl_s8 + return vqshl_s8(a, b); +// CHECK: sqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqshl_s16 + return vqshl_s16(a, b); +// CHECK: sqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqshl_s32 + return vqshl_s32(a, b); +// CHECK: sqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vqshl_s64 + return vqshl_s64(a, b); +// CHECK: sqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { +// CHECK: test_vqshl_u8 + return vqshl_u8(a, b); +// CHECK: uqshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { +// CHECK: test_vqshl_u16 + return vqshl_u16(a, b); +// CHECK: uqshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { +// CHECK: test_vqshl_u32 + return vqshl_u32(a, b); +// CHECK: uqshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { +// CHECK: test_vqshl_u64 + return vqshl_u64(a, b); +// CHECK: uqshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vqshlq_s8 + return vqshlq_s8(a, b); +// CHECK: sqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqshlq_s16 + return vqshlq_s16(a, b); +// CHECK: sqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqshlq_s32 + return vqshlq_s32(a, b); +// CHECK: sqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vqshlq_s64 + return vqshlq_s64(a, b); +// CHECK: sqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { +// CHECK: test_vqshlq_u8 + return vqshlq_u8(a, b); +// CHECK: uqshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { +// CHECK: test_vqshlq_u16 + return vqshlq_u16(a, b); +// CHECK: uqshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { +// CHECK: test_vqshlq_u32 + return vqshlq_u32(a, b); +// CHECK: uqshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { +// CHECK: test_vqshlq_u32 + return vqshlq_u64(a, b); +// CHECK: uqshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vrshl_s8 + return vrshl_s8(a, b); +// CHECK: srshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vrshl_s16 + return vrshl_s16(a, b); +// CHECK: srshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vrshl_s32 + return vrshl_s32(a, b); +// CHECK: srshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vrshl_s64 + return vrshl_s64(a, b); +// CHECK: srshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { +// CHECK: test_vrshl_u8 + return vrshl_u8(a, b); +// CHECK: urshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { +// CHECK: test_vrshl_u16 + return vrshl_u16(a, b); +// CHECK: urshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { +// CHECK: test_vrshl_u32 + return vrshl_u32(a, b); +// CHECK: urshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { +// CHECK: test_vrshl_u64 + return vrshl_u64(a, b); +// CHECK: urshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vrshlq_s8 + return vrshlq_s8(a, b); +// CHECK: srshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vrshlq_s16 + return vrshlq_s16(a, b); +// CHECK: srshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vrshlq_s32 + return vrshlq_s32(a, b); +// CHECK: srshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vrshlq_s64 + return vrshlq_s64(a, b); +// CHECK: srshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { +// CHECK: test_vrshlq_u8 + return vrshlq_u8(a, b); +// CHECK: urshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { +// CHECK: test_vrshlq_u16 + return vrshlq_u16(a, b); +// CHECK: urshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { +// CHECK: test_vrshlq_u32 + return vrshlq_u32(a, b); +// CHECK: urshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { +// CHECK: test_vrshlq_u64 + return vrshlq_u64(a, b); +// CHECK: urshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vqrshl_s8 + return vqrshl_s8(a, b); +// CHECK: sqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqrshl_s16 + return vqrshl_s16(a, b); +// CHECK: sqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqrshl_s32 + return vqrshl_s32(a, b); +// CHECK: sqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { +// CHECK: test_vqrshl_s64 + return vqrshl_s64(a, b); +// CHECK: sqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { +// CHECK: test_vqrshl_u8 + return vqrshl_u8(a, b); +// CHECK: uqrshl {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { +// CHECK: test_vqrshl_u16 + return vqrshl_u16(a, b); +// CHECK: uqrshl {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { +// CHECK: test_vqrshl_u32 + return vqrshl_u32(a, b); +// CHECK: uqrshl {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { +// CHECK: test_vqrshl_u64 + return vqrshl_u64(a, b); +// CHECK: uqrshl {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +} + +int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vqrshlq_s8 + return vqrshlq_s8(a, b); +// CHECK: sqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqrshlq_s16 + return vqrshlq_s16(a, b); +// CHECK: sqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqrshlq_s32 + return vqrshlq_s32(a, b); +// CHECK: sqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { +// CHECK: test_vqrshlq_s64 + return vqrshlq_s64(a, b); +// CHECK: sqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +// CHECK: test_vqrshlq_u8 +uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { + return vqrshlq_u8(a, b); +// CHECK: uqrshl {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { +// CHECK: test_vqrshlq_u16 + return vqrshlq_u16(a, b); +// CHECK: uqrshl {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { +// CHECK: test_vqrshlq_u32 + return vqrshlq_u32(a, b); +// CHECK: uqrshl {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { +// CHECK: test_vqrshlq_u64 + return vqrshlq_u64(a, b); +// CHECK: uqrshl {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vmax_s8 + return vmax_s8(a, b); +// CHECK: smax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vmax_s16 + return vmax_s16(a, b); +// CHECK: smax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vmax_s32 + return vmax_s32(a, b); +// CHECK: smax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vmax_u8 + return vmax_u8(a, b); +// CHECK: umax {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vmax_u16 + return vmax_u16(a, b); +// CHECK: umax {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vmax_u32 + return vmax_u32(a, b); +// CHECK: umax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vmax_f32 + return vmax_f32(a, b); +// CHECK: fmax {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vmaxq_s8 + return vmaxq_s8(a, b); +// CHECK: smax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vmaxq_s16 + return vmaxq_s16(a, b); +// CHECK: smax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vmaxq_s32 + return vmaxq_s32(a, b); +// CHECK: smax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vmaxq_u8 + return vmaxq_u8(a, b); +// CHECK: umax {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vmaxq_u16 + return vmaxq_u16(a, b); +// CHECK: umax {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vmaxq_u32 + return vmaxq_u32(a, b); +// CHECK: umax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vmaxq_f32 + return vmaxq_f32(a, b); +// CHECK: fmax {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vmaxq_f64 + return vmaxq_f64(a, b); +// CHECK: fmax {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + + +int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vmin_s8 + return vmin_s8(a, b); +// CHECK: smin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vmin_s16 + return vmin_s16(a, b); +// CHECK: smin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vmin_s32 + return vmin_s32(a, b); +// CHECK: smin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vmin_u8 + return vmin_u8(a, b); +// CHECK: umin {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vmin_u16 + return vmin_u16(a, b); +// CHECK: umin {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vmin_u32 + return vmin_u32(a, b); +// CHECK: umin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vmin_f32 + return vmin_f32(a, b); +// CHECK: fmin {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vminq_s8 + return vminq_s8(a, b); +// CHECK: smin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vminq_s16 + return vminq_s16(a, b); +// CHECK: smin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vminq_s32 + return vminq_s32(a, b); +// CHECK: smin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vminq_u8 + return vminq_u8(a, b); +// CHECK: umin {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vminq_u16 + return vminq_u16(a, b); +// CHECK: umin {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vminq_u32 + return vminq_u32(a, b); +// CHECK: umin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vminq_f32 + return vminq_f32(a, b); +// CHECK: fmin {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vminq_f64 + return vminq_f64(a, b); +// CHECK: fmin {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vmaxnm_f32 + return vmaxnm_f32(a, b); +// CHECK: fmaxnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vmaxnmq_f32 + return vmaxnmq_f32(a, b); +// CHECK: fmaxnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vmaxnmq_f64 + return vmaxnmq_f64(a, b); +// CHECK: fmaxnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vminnm_f32 + return vminnm_f32(a, b); +// CHECK: fminnm {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vminnmq_f32 + return vminnmq_f32(a, b); +// CHECK: fminnm {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vminnmq_f64 + return vminnmq_f64(a, b); +// CHECK: fminnm {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vpmax_s8 + return vpmax_s8(a, b); +// CHECK: smaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vpmax_s16 + return vpmax_s16(a, b); +// CHECK: smaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vpmax_s32 + return vpmax_s32(a, b); +// CHECK: smaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vpmax_u8 + return vpmax_u8(a, b); +// CHECK: umaxp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vpmax_u16 + return vpmax_u16(a, b); +// CHECK: umaxp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vpmax_u32 + return vpmax_u32(a, b); +// CHECK: umaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vpmax_f32 + return vpmax_f32(a, b); +// CHECK: fmaxp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vpmaxq_s8 + return vpmaxq_s8(a, b); +// CHECK: smaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vpmaxq_s16 + return vpmaxq_s16(a, b); +// CHECK: smaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vpmaxq_s32 + return vpmaxq_s32(a, b); +// CHECK: smaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vpmaxq_u8 + return vpmaxq_u8(a, b); +// CHECK: umaxp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vpmaxq_u16 + return vpmaxq_u16(a, b); +// CHECK: umaxp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vpmaxq_u32 + return vpmaxq_u32(a, b); +// CHECK: umaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vpmaxq_f32 + return vpmaxq_f32(a, b); +// CHECK: fmaxp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vpmaxq_f64 + return vpmaxq_f64(a, b); +// CHECK: fmaxp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vpmin_s8 + return vpmin_s8(a, b); +// CHECK: sminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vpmin_s16 + return vpmin_s16(a, b); +// CHECK: sminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vpmin_s32 + return vpmin_s32(a, b); +// CHECK: sminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vpmin_u8 + return vpmin_u8(a, b); +// CHECK: uminp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vpmin_u16 + return vpmin_u16(a, b); +// CHECK: uminp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vpmin_u32 + return vpmin_u32(a, b); +// CHECK: uminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vpmin_f32 + return vpmin_f32(a, b); +// CHECK: fminp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vpminq_s8 + return vpminq_s8(a, b); +// CHECK: sminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vpminq_s16 + return vpminq_s16(a, b); +// CHECK: sminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vpminq_s32 + return vpminq_s32(a, b); +// CHECK: sminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vpminq_u8 + return vpminq_u8(a, b); +// CHECK: uminp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vpminq_u16 + return vpminq_u16(a, b); +// CHECK: uminp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vpminq_u32 + return vpminq_u32(a, b); +// CHECK: uminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vpminq_f32 + return vpminq_f32(a, b); +// CHECK: fminp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vpminq_f64 + return vpminq_f64(a, b); +// CHECK: fminp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vpmaxnm_f32 + return vpmaxnm_f32(a, b); +// CHECK: fmaxnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vpmaxnmq_f32 + return vpmaxnmq_f32(a, b); +// CHECK: fmaxnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vpmaxnmq_f64 + return vpmaxnmq_f64(a, b); +// CHECK: fmaxnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vpminnm_f32 + return vpminnm_f32(a, b); +// CHECK: fminnmp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vpminnmq_f32 + return vpminnmq_f32(a, b); +// CHECK: fminnmp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vpminnmq_f64 + return vpminnmq_f64(a, b); +// CHECK: fminnmp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { +// CHECK: test_vpadd_s8 + return vpadd_s8(a, b); +// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vpadd_s16 + return vpadd_s16(a, b); +// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vpadd_s32 + return vpadd_s32(a, b); +// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { +// CHECK: test_vpadd_u8 + return vpadd_u8(a, b); +// CHECK: addp {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +} + +uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { +// CHECK: test_vpadd_u16 + return vpadd_u16(a, b); +// CHECK: addp {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { +// CHECK: test_vpadd_u32 + return vpadd_u32(a, b); +// CHECK: addp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vpadd_f32 + return vpadd_f32(a, b); +// CHECK: faddp {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { +// CHECK: test_vpaddq_s8 + return vpaddq_s8(a, b); +// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vpaddq_s16 + return vpaddq_s16(a, b); +// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vpaddq_s32 + return vpaddq_s32(a, b); +// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { +// CHECK: test_vpaddq_u8 + return vpaddq_u8(a, b); +// CHECK: addp {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b +} + +uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { +// CHECK: test_vpaddq_u16 + return vpaddq_u16(a, b); +// CHECK: addp {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { +// CHECK: test_vpaddq_u32 + return vpaddq_u32(a, b); +// CHECK: addp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vpaddq_f32 + return vpaddq_f32(a, b); +// CHECK: faddp {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vpaddq_f64 + return vpaddq_f64(a, b); +// CHECK: faddp {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqdmulh_s16 + return vqdmulh_s16(a, b); +// CHECK: sqdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqdmulh_s32 + return vqdmulh_s32(a, b); +// CHECK: sqdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqdmulhq_s16 + return vqdmulhq_s16(a, b); +// CHECK: sqdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqdmulhq_s32 + return vqdmulhq_s32(a, b); +// CHECK: sqdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { +// CHECK: test_vqrdmulh_s16 + return vqrdmulh_s16(a, b); +// CHECK: sqrdmulh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +} + +int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { +// CHECK: test_vqrdmulh_s32 + return vqrdmulh_s32(a, b); +// CHECK: sqrdmulh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { +// CHECK: test_vqrdmulhq_s16 + return vqrdmulhq_s16(a, b); +// CHECK: sqrdmulh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h +} + +int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { +// CHECK: test_vqrdmulhq_s32 + return vqrdmulhq_s32(a, b); +// CHECK: sqrdmulh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + + +float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { +// CHECK: test_vmulx_f32 + return vmulx_f32(a, b); +// CHECK: fmulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s +} + +float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { +// CHECK: test_vmulxq_f32 + return vmulxq_f32(a, b); +// CHECK: fmulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s +} + +float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { +// CHECK: test_vmulxq_f64 + return vmulxq_f64(a, b); +// CHECK: fmulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + diff --git a/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp b/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp new file mode 100644 index 0000000000000000000000000000000000000000..86509a0739d46955cd90d4e95319a0ab8b713e7b --- /dev/null +++ b/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp @@ -0,0 +1,85 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - | FileCheck %s + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed long long int64_t; +typedef unsigned long long uint64_t; +typedef unsigned char poly8_t; +typedef unsigned short poly16_t; +typedef __fp16 float16_t; +typedef float float32_t; +typedef double float64_t; + +typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; +typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; +typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; +typedef __attribute__((neon_vector_type(8))) int16_t int16x8_t; +typedef __attribute__((neon_vector_type(2))) int int32x2_t; +typedef __attribute__((neon_vector_type(4))) int int32x4_t; +typedef __attribute__((neon_vector_type(2))) int64_t int64x2_t; +typedef __attribute__((neon_vector_type(8))) uint8_t uint8x8_t; +typedef __attribute__((neon_vector_type(16))) uint8_t uint8x16_t; +typedef __attribute__((neon_vector_type(4))) uint16_t uint16x4_t; +typedef __attribute__((neon_vector_type(8))) uint16_t uint16x8_t; +typedef __attribute__((neon_vector_type(2))) unsigned int uint32x2_t; +typedef __attribute__((neon_vector_type(4))) unsigned int uint32x4_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t; +typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t; +typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; +typedef __attribute__((neon_polyvector_type(4))) poly16_t poly16x4_t; +typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; + +// CHECK: 10__Int8x8_t +void f1(int8x8_t) {} +// CHECK: 11__Int16x4_t +void f2(int16x4_t) {} +// CHECK: 11__Int32x2_t +void f3(int32x2_t) {} +// CHECK: 11__Uint8x8_t +void f4(uint8x8_t) {} +// CHECK: 12__Uint16x4_t +void f5(uint16x4_t) {} +// CHECK: 13__Float16x4_t +void f6(float16x4_t) {} +// CHECK: 13__Float16x8_t +void f7(float16x8_t) {} +// CHECK: 12__Uint32x2_t +void f8(uint32x2_t) {} +// CHECK: 13__Float32x2_t +void f9(float32x2_t) {} +// CHECK: 13__Float32x4_t +void f10(float32x4_t) {} +// CHECK: 11__Poly8x8_t +void f11(poly8x8_t v) {} +// CHECK: 12__Poly16x4_t +void f12(poly16x4_t v) {} +// CHECK:12__Poly8x16_t +void f13(poly8x16_t v) {} +// CHECK:12__Poly16x8_t +void f14(poly16x8_t v) {} +// CHECK: 11__Int8x16_t +void f15(int8x16_t) {} +// CHECK: 11__Int16x8_t +void f16(int16x8_t) {} +// CHECK:11__Int32x4_t +void f17(int32x4_t) {} +// CHECK: 12__Uint8x16_t +void f18(uint8x16_t) {} +// CHECK: 12__Uint16x8_t +void f19(uint16x8_t) {} +// CHECK: 12__Uint32x4_t +void f20(uint32x4_t) {} +// CHECK: 11__Int64x2_t +void f21(int64x2_t) {} +// CHECK: 12__Uint64x2_t +void f22(uint64x2_t) {} +// CHECK: 13__Float64x2_t +void f23(float64x2_t) {} diff --git a/test/CodeGenCXX/mangle-neon-vectors.cpp b/test/CodeGenCXX/mangle-neon-vectors.cpp index 3723deb192199efd6169d80defcc53f453bcfeb5..793c89803fc4794b4468dfacd62481060bffb3d2 100644 --- a/test/CodeGenCXX/mangle-neon-vectors.cpp +++ b/test/CodeGenCXX/mangle-neon-vectors.cpp @@ -1,6 +1,7 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple arm-none-linux-gnueabi %s -emit-llvm -o - | FileCheck %s typedef float float32_t; +typedef __fp16 float16_t; typedef signed char poly8_t; typedef short poly16_t; typedef unsigned long long uint64_t; @@ -11,8 +12,10 @@ typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; -typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; -typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; +typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; +typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; +typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; +typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; // CHECK: 16__simd64_int32_t void f1(int32x2_t v) { } @@ -26,7 +29,11 @@ void f4(uint64x2_t v) { } void f5(float32x2_t v) { } // CHECK: 19__simd128_float32_t void f6(float32x4_t v) { } +// CHECK: 18__simd64_float16_t +void f7(float16x4_t v) {} +// CHECK: 19__simd128_float16_t +void f8(float16x8_t v) {} // CHECK: 17__simd128_poly8_t -void f7(poly8x16_t v) { } +void f9(poly8x16_t v) {} // CHECK: 18__simd128_poly16_t -void f8(poly16x8_t v) { } +void f10(poly16x8_t v) {} diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c index 8bb8427c0d2da549b9360ad91846c83ab1edf933..25bdb71bc3c656e399a811d9094dd384c7452d95 100644 --- a/test/Preprocessor/aarch64-target-features.c +++ b/test/Preprocessor/aarch64-target-features.c @@ -30,3 +30,6 @@ // RUN: %clang -target aarch64-none-linux-gnu -fshort-enums -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTENUMS %s // CHECK-SHORTENUMS: __ARM_SIZEOF_MINIMAL_ENUM 1 +// RUN: %clang -target aarch64-none-linux-gnu -mfpu=neon -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NEON %s +// CHECK-NEON: __AARCH_ADVSIMD_FP +// CHECK-NEON: __AARCH_FEATURE_ADVSIMD diff --git a/test/Sema/aarch64-neon-vector-types.c b/test/Sema/aarch64-neon-vector-types.c new file mode 100644 index 0000000000000000000000000000000000000000..f4d58ffd09359090b4e081817cf7074528681853 --- /dev/null +++ b/test/Sema/aarch64-neon-vector-types.c @@ -0,0 +1,34 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 %s -triple aarch64-none-linux-gnu -fsyntax-only -verify + +typedef float float32_t; +typedef unsigned char poly8_t; +typedef unsigned short poly16_t; +typedef unsigned long long uint64_t; + +// Define some valid Neon types. +typedef __attribute__((neon_vector_type(2))) int int32x2_t; +typedef __attribute__((neon_vector_type(4))) int int32x4_t; +typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; +typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; +typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t; +typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; +typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; + +// The attributes must have a single argument. +typedef __attribute__((neon_vector_type(2, 4))) int only_one_arg; // expected-error{{attribute takes one argument}} + +// The number of elements must be an ICE. +typedef __attribute__((neon_vector_type(2.0))) int non_int_width; // expected-error{{attribute requires an integer constant}} + +// Only certain element types are allowed. +typedef __attribute__((neon_vector_type(2))) double double_elt; +typedef __attribute__((neon_vector_type(4))) void* ptr_elt; // expected-error{{invalid vector element type}} +typedef __attribute__((neon_polyvector_type(4))) float32_t bad_poly_elt; // expected-error{{invalid vector element type}} +struct aggr { signed char c; }; +typedef __attribute__((neon_vector_type(8))) struct aggr aggregate_elt; // expected-error{{invalid vector element type}} + +// The total vector size must be 64 or 128 bits. +typedef __attribute__((neon_vector_type(1))) int int32x1_t; // expected-error{{Neon vector size must be 64 or 128 bits}} +typedef __attribute__((neon_vector_type(3))) int int32x3_t; // expected-error{{Neon vector size must be 64 or 128 bits}} diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp index bb505de95daa6959e33b704868f63a710b510845..411aa7e4ab206a79bbef293acb54f99093aef50c 100644 --- a/utils/TableGen/NeonEmitter.cpp +++ b/utils/TableGen/NeonEmitter.cpp @@ -90,7 +90,8 @@ enum OpKind { OpReinterpret, OpAbdl, OpAba, - OpAbal + OpAbal, + OpDiv }; enum ClassKind { @@ -127,7 +128,8 @@ public: Poly8, Poly16, Float16, - Float32 + Float32, + Float64 }; NeonTypeFlags(unsigned F) : Flags(F) {} @@ -205,6 +207,7 @@ public: OpMap["OP_ABDL"] = OpAbdl; OpMap["OP_ABA"] = OpAba; OpMap["OP_ABAL"] = OpAbal; + OpMap["OP_DIV"] = OpDiv; Record *SI = R.getClass("SInst"); Record *II = R.getClass("IInst"); @@ -235,7 +238,18 @@ public: void runTests(raw_ostream &o); private: - void emitIntrinsic(raw_ostream &OS, Record *R); + void emitIntrinsic(raw_ostream &OS, Record *R, + StringMap<ClassKind> &EmittedMap); + void genBuiltinsDef(raw_ostream &OS, StringMap<ClassKind> &A64IntrinsicMap, + bool isA64GenBuiltinDef); + void genOverloadTypeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64TypeCheck); + void genIntrinsicRangeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64RangeCheck); + void genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, + bool isA64TestGen); }; } // end anonymous namespace @@ -259,6 +273,7 @@ static void ParseTypes(Record *r, std::string &s, case 'l': case 'h': case 'f': + case 'd': break; default: PrintFatalError(r->getLoc(), @@ -347,6 +362,8 @@ static char ModType(const char mod, char type, bool &quad, bool &poly, poly = false; if (type == 'f') type = 'i'; + if (type == 'd') + type = 'l'; break; case 'x': usgn = false; @@ -470,6 +487,13 @@ static std::string TypeString(const char mod, StringRef typestr) { break; s += quad ? "x4" : "x2"; break; + case 'd': + s += "float64"; + if (scal) + break; + s += quad ? "x2" : "x1"; + break; + default: PrintFatalError("unhandled type!"); } @@ -647,6 +671,18 @@ static void InstructionTypeCode(const StringRef &typeStr, default: break; } break; + case 'd': + switch (ck) { + case ClassS: + case ClassI: + typeCode += "f64"; + break; + case ClassW: + PrintFatalError("unhandled type!"); + default: + break; + } + break; default: PrintFatalError("unhandled type!"); } @@ -1252,6 +1288,9 @@ static unsigned GetNumElements(StringRef typestr, bool &quad) { case 'l': nElts = 1; break; case 'h': nElts = 4; break; case 'f': nElts = 2; break; + case 'd': + nElts = 1; + break; default: PrintFatalError("unhandled type!"); } @@ -1488,6 +1527,9 @@ static std::string GenOpString(OpKind op, const std::string &proto, } break; } + case OpDiv: + s += "__a / __b;"; + break; default: PrintFatalError("unknown OpKind!"); } @@ -1533,6 +1575,9 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) { case 'f': ET = NeonTypeFlags::Float32; break; + case 'd': + ET = NeonTypeFlags::Float64; + break; default: PrintFatalError("unhandled type!"); } @@ -1776,7 +1821,7 @@ void NeonEmitter::run(raw_ostream &OS) { OS << "#ifndef __ARM_NEON_H\n"; OS << "#define __ARM_NEON_H\n\n"; - OS << "#ifndef __ARM_NEON__\n"; + OS << "#if !defined(__ARM_NEON__) && !defined(__AARCH_FEATURE_ADVSIMD)\n"; OS << "#error \"NEON support not enabled\"\n"; OS << "#endif\n\n"; @@ -1784,19 +1829,39 @@ void NeonEmitter::run(raw_ostream &OS) { // Emit NEON-specific scalar typedefs. OS << "typedef float float32_t;\n"; + OS << "typedef __fp16 float16_t;\n"; + + OS << "#ifdef __aarch64__\n"; + OS << "typedef double float64_t;\n"; + OS << "#endif\n\n"; + + // For now, signedness of polynomial types depends on target + OS << "#ifdef __aarch64__\n"; + OS << "typedef uint8_t poly8_t;\n"; + OS << "typedef uint16_t poly16_t;\n"; + OS << "#else\n"; OS << "typedef int8_t poly8_t;\n"; OS << "typedef int16_t poly16_t;\n"; - OS << "typedef uint16_t float16_t;\n"; + OS << "#endif\n"; // Emit Neon vector typedefs. - std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs"); + std::string TypedefTypes( + "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfQdPcQPcPsQPs"); SmallVector<StringRef, 24> TDTypeVec; ParseTypes(0, TypedefTypes, TDTypeVec); // Emit vector typedefs. for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { bool dummy, quad = false, poly = false; - (void) ClassifyType(TDTypeVec[i], quad, poly, dummy); + char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); + bool isA64 = false; + + if (type == 'd' && quad) + isA64 = true; + + if (isA64) + OS << "#ifdef __aarch64__\n"; + if (poly) OS << "typedef __attribute__((neon_polyvector_type("; else @@ -1809,19 +1874,37 @@ void NeonEmitter::run(raw_ostream &OS) { OS << TypeString('s', TDTypeVec[i]); OS << " " << TypeString('d', TDTypeVec[i]) << ";\n"; + + if (isA64) + OS << "#endif\n"; } OS << "\n"; // Emit struct typedefs. for (unsigned vi = 2; vi != 5; ++vi) { for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) { + bool dummy, quad = false, poly = false; + char type = ClassifyType(TDTypeVec[i], quad, poly, dummy); + bool isA64 = false; + + if (type == 'd' && quad) + isA64 = true; + + if (isA64) + OS << "#ifdef __aarch64__\n"; + std::string ts = TypeString('d', TDTypeVec[i]); std::string vs = TypeString('0' + vi, TDTypeVec[i]); OS << "typedef struct " << vs << " {\n"; OS << " " << ts << " val"; OS << "[" << utostr(vi) << "]"; OS << ";\n} "; - OS << vs << ";\n\n"; + OS << vs << ";\n"; + + if (isA64) + OS << "#endif\n"; + + OS << "\n"; } } @@ -1829,30 +1912,58 @@ void NeonEmitter::run(raw_ostream &OS) { std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<ClassKind> EmittedMap; + // Emit vmovl, vmull and vabd intrinsics first so they can be used by other // intrinsics. (Some of the saturating multiply instructions are also // used to implement the corresponding "_lane" variants, but tablegen // sorts the records into alphabetical order so that the "_lane" variants // come after the intrinsics they use.) - emitIntrinsic(OS, Records.getDef("VMOVL")); - emitIntrinsic(OS, Records.getDef("VMULL")); - emitIntrinsic(OS, Records.getDef("VABD")); - + emitIntrinsic(OS, Records.getDef("VMOVL"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VMULL"), EmittedMap); + emitIntrinsic(OS, Records.getDef("VABD"), EmittedMap); + + // ARM intrinsics must be emitted before AArch64 intrinsics to ensure + // common intrinsics appear only once in the output stream. + // The check for uniquiness is done in emitIntrinsic. + // Emit ARM intrinsics. for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; - if (R->getName() != "VMOVL" && - R->getName() != "VMULL" && + + // Skip AArch64 intrinsics; they will be emitted at the end. + bool isA64 = R->getValueAsBit("isA64"); + if (isA64) + continue; + + if (R->getName() != "VMOVL" && R->getName() != "VMULL" && R->getName() != "VABD") - emitIntrinsic(OS, R); + emitIntrinsic(OS, R, EmittedMap); } + // Emit AArch64-specific intrinsics. + OS << "#ifdef __aarch64__\n"; + + for (unsigned i = 0, e = RV.size(); i != e; ++i) { + Record *R = RV[i]; + + // Skip ARM intrinsics already included above. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64) + continue; + + emitIntrinsic(OS, R, EmittedMap); + } + + OS << "#endif\n\n"; + OS << "#undef __ai\n\n"; OS << "#endif /* __ARM_NEON_H */\n"; } /// emitIntrinsic - Write out the arm_neon.h header file definitions for the -/// intrinsics specified by record R. -void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { +/// intrinsics specified by record R checking for intrinsic uniqueness. +void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R, + StringMap<ClassKind> &EmittedMap) { std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); std::string Types = R->getValueAsString("Types"); @@ -1879,12 +1990,20 @@ void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) { (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); if (srcti == ti || inQuad != outQuad) continue; - OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], - OpCast, ClassS); + std::string s = GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti], + OpCast, ClassS); + if (EmittedMap.count(s)) + continue; + EmittedMap[s] = ClassS; + OS << s; } } else { - OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], - kind, classKind); + std::string s = + GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti], kind, classKind); + if (EmittedMap.count(s)) + continue; + EmittedMap[s] = classKind; + OS << s; } } OS << "\n"; @@ -1912,56 +2031,151 @@ static unsigned RangeFromType(const char mod, StringRef typestr) { } } -/// runHeader - Emit a file with sections defining: -/// 1. the NEON section of BuiltinsARM.def. -/// 2. the SemaChecking code for the type overload checking. -/// 3. the SemaChecking code for validation of intrinsic immediate arguments. -void NeonEmitter::runHeader(raw_ostream &OS) { - std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); - +/// Generate the ARM and AArch64 intrinsic range checking code for +/// shift/lane immediates, checking for unique declarations. +void +NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64RangeCheck) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); StringMap<OpKind> EmittedMap; - // Generate BuiltinsARM.def for NEON - OS << "#ifdef GET_NEON_BUILTINS\n"; + // Generate the intrinsic range checking code for shift/lane immediates. + if (isA64RangeCheck) + OS << "#ifdef GET_NEON_AARCH64_IMMEDIATE_CHECK\n"; + else + OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; + OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; if (k != OpNone) continue; + std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); + std::string Types = R->getValueAsString("Types"); // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. if (Proto.find('a') != std::string::npos) continue; - std::string Types = R->getValueAsString("Types"); + // Functions which do not have an immediate do not need to have range + // checking code emitted. + size_t immPos = Proto.find('i'); + if (immPos == std::string::npos) + continue; + SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); if (R->getSuperClasses().size() < 2) PrintFatalError(R->getLoc(), "Builtin has no class kind"); - std::string name = R->getValueAsString("Name"); ClassKind ck = ClassMap[R->getSuperClasses()[1]]; + // Do not include AArch64 range checks if not generating code for AArch64. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64RangeCheck && isA64) + continue; + + // Include ARM range checks in AArch64 but only if ARM intrinsics are not + // redefined by AArch64 to handle new types. + if (isA64RangeCheck && !isA64 && A64IntrinsicMap.count(name)) { + ClassKind &A64CK = A64IntrinsicMap[name]; + if (A64CK == ck && ck != ClassNone) + continue; + } + for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - // Generate the BuiltinsARM.def declaration for this builtin, ensuring - // that each unique BUILTIN() macro appears only once in the output - // stream. - std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); - if (EmittedMap.count(bd)) + std::string namestr, shiftstr, rangestr; + + if (R->getValueAsBit("isVCVT_N")) { + // VCVT between floating- and fixed-point values takes an immediate + // in the range 1 to 32. + ck = ClassB; + rangestr = "l = 1; u = 31"; // upper bound = l + u + } else if (Proto.find('s') == std::string::npos) { + // Builtins which are overloaded by type will need to have their upper + // bound computed at Sema time based on the type constant. + ck = ClassB; + if (R->getValueAsBit("isShift")) { + shiftstr = ", true"; + + // Right shifts have an 'r' in the name, left shifts do not. + if (name.find('r') != std::string::npos) + rangestr = "l = 1; "; + } + rangestr += "u = RFT(TV" + shiftstr + ")"; + } else { + // The immediate generally refers to a lane in the preceding argument. + assert(immPos > 0 && "unexpected immediate operand"); + rangestr = + "u = " + utostr(RangeFromType(Proto[immPos - 1], TypeVec[ti])); + } + // Make sure cases appear only once by uniquing them in a string map. + namestr = MangleName(name, TypeVec[ti], ck); + if (EmittedMap.count(namestr)) continue; + EmittedMap[namestr] = OpNone; - EmittedMap[bd] = OpNone; - OS << bd << "\n"; + // Calculate the index of the immediate that should be range checked. + unsigned immidx = 0; + + // Builtins that return a struct of multiple vectors have an extra + // leading arg for the struct return. + if (Proto[0] >= '2' && Proto[0] <= '4') + ++immidx; + + // Add one to the index for each argument until we reach the immediate + // to be checked. Structs of vectors are passed as multiple arguments. + for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { + switch (Proto[ii]) { + default: + immidx += 1; + break; + case '2': + immidx += 2; + break; + case '3': + immidx += 3; + break; + case '4': + immidx += 4; + break; + case 'i': + ie = ii + 1; + break; + } + } + if (isA64RangeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[ti], ck) << ": i = " << immidx << "; " + << rangestr << "; break;\n"; } } OS << "#endif\n\n"; +} + +/// Generate the ARM and AArch64 overloaded type checking code for +/// SemaChecking.cpp, checking for unique builtin declarations. +void +NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64TypeCheck) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<OpKind> EmittedMap; // Generate the overloaded type checking code for SemaChecking.cpp - OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + if (isA64TypeCheck) + OS << "#ifdef GET_NEON_AARCH64_OVERLOAD_CHECK\n"; + else + OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n"; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; @@ -1988,6 +2202,21 @@ void NeonEmitter::runHeader(raw_ostream &OS) { if (R->getSuperClasses().size() < 2) PrintFatalError(R->getLoc(), "Builtin has no class kind"); + // Do not include AArch64 type checks if not generating code for AArch64. + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64TypeCheck && isA64) + continue; + + // Include ARM type check in AArch64 but only if ARM intrinsics + // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr + // redefined in AArch64 to handle an additional 2 x f64 type. + ClassKind ck = ClassMap[R->getSuperClasses()[1]]; + if (isA64TypeCheck && !isA64 && A64IntrinsicMap.count(name)) { + ClassKind &A64CK = A64IntrinsicMap[name]; + if (A64CK == ck && ck != ClassNone) + continue; + } + int si = -1, qi = -1; uint64_t mask = 0, qmask = 0; for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { @@ -2035,9 +2264,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) { } if (mask) { - OS << "case ARM::BI__builtin_neon_" - << MangleName(name, TypeVec[si], ClassB) - << ": mask = " << "0x" << utohexstr(mask) << "ULL"; + if (isA64TypeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[si], ClassB) << ": mask = " + << "0x" << utohexstr(mask) << "ULL"; if (PtrArgNum >= 0) OS << "; PtrArgNum = " << PtrArgNum; if (HasConstPtr) @@ -2045,9 +2277,12 @@ void NeonEmitter::runHeader(raw_ostream &OS) { OS << "; break;\n"; } if (qmask) { - OS << "case ARM::BI__builtin_neon_" - << MangleName(name, TypeVec[qi], ClassB) - << ": mask = " << "0x" << utohexstr(qmask) << "ULL"; + if (isA64TypeCheck) + OS << "case AArch64::BI__builtin_neon_"; + else + OS << "case ARM::BI__builtin_neon_"; + OS << MangleName(name, TypeVec[qi], ClassB) << ": mask = " + << "0x" << utohexstr(qmask) << "ULL"; if (PtrArgNum >= 0) OS << "; PtrArgNum = " << PtrArgNum; if (HasConstPtr) @@ -2056,31 +2291,37 @@ void NeonEmitter::runHeader(raw_ostream &OS) { } } OS << "#endif\n\n"; +} + +/// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def +/// declaration of builtins, checking for unique builtin declarations. +void NeonEmitter::genBuiltinsDef(raw_ostream &OS, + StringMap<ClassKind> &A64IntrinsicMap, + bool isA64GenBuiltinDef) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + StringMap<OpKind> EmittedMap; + + // Generate BuiltinsARM.def and BuiltinsAArch64.def + if (isA64GenBuiltinDef) + OS << "#ifdef GET_NEON_AARCH64_BUILTINS\n"; + else + OS << "#ifdef GET_NEON_BUILTINS\n"; - // Generate the intrinsic range checking code for shift/lane immediates. - OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n"; for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; - OpKind k = OpMap[R->getValueAsDef("Operand")->getName()]; if (k != OpNone) continue; - std::string name = R->getValueAsString("Name"); std::string Proto = R->getValueAsString("Prototype"); - std::string Types = R->getValueAsString("Types"); + std::string name = R->getValueAsString("Name"); // Functions with 'a' (the splat code) in the type prototype should not get // their own builtin as they use the non-splat variant. if (Proto.find('a') != std::string::npos) continue; - // Functions which do not have an immediate do not need to have range - // checking code emitted. - size_t immPos = Proto.find('i'); - if (immPos == std::string::npos) - continue; - + std::string Types = R->getValueAsString("Types"); SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); @@ -2089,70 +2330,90 @@ void NeonEmitter::runHeader(raw_ostream &OS) { ClassKind ck = ClassMap[R->getSuperClasses()[1]]; - for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { - std::string namestr, shiftstr, rangestr; - - if (R->getValueAsBit("isVCVT_N")) { - // VCVT between floating- and fixed-point values takes an immediate - // in the range 1 to 32. - ck = ClassB; - rangestr = "l = 1; u = 31"; // upper bound = l + u - } else if (Proto.find('s') == std::string::npos) { - // Builtins which are overloaded by type will need to have their upper - // bound computed at Sema time based on the type constant. - ck = ClassB; - if (R->getValueAsBit("isShift")) { - shiftstr = ", true"; + // Do not include AArch64 BUILTIN() macros if not generating + // code for AArch64 + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64GenBuiltinDef && isA64) + continue; - // Right shifts have an 'r' in the name, left shifts do not. - if (name.find('r') != std::string::npos) - rangestr = "l = 1; "; - } - rangestr += "u = RFT(TV" + shiftstr + ")"; - } else { - // The immediate generally refers to a lane in the preceding argument. - assert(immPos > 0 && "unexpected immediate operand"); - rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti])); - } - // Make sure cases appear only once by uniquing them in a string map. - namestr = MangleName(name, TypeVec[ti], ck); - if (EmittedMap.count(namestr)) + // Include ARM BUILTIN() macros in AArch64 but only if ARM intrinsics + // are not redefined in AArch64 to handle new types, e.g. "vabd" is a SIntr + // redefined in AArch64 to handle an additional 2 x f64 type. + if (isA64GenBuiltinDef && !isA64 && A64IntrinsicMap.count(name)) { + ClassKind &A64CK = A64IntrinsicMap[name]; + if (A64CK == ck && ck != ClassNone) continue; - EmittedMap[namestr] = OpNone; - - // Calculate the index of the immediate that should be range checked. - unsigned immidx = 0; + } - // Builtins that return a struct of multiple vectors have an extra - // leading arg for the struct return. - if (Proto[0] >= '2' && Proto[0] <= '4') - ++immidx; + for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) { + // Generate the declaration for this builtin, ensuring + // that each unique BUILTIN() macro appears only once in the output + // stream. + std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck); + if (EmittedMap.count(bd)) + continue; - // Add one to the index for each argument until we reach the immediate - // to be checked. Structs of vectors are passed as multiple arguments. - for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) { - switch (Proto[ii]) { - default: immidx += 1; break; - case '2': immidx += 2; break; - case '3': immidx += 3; break; - case '4': immidx += 4; break; - case 'i': ie = ii + 1; break; - } - } - OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck) - << ": i = " << immidx << "; " << rangestr << "; break;\n"; + EmittedMap[bd] = OpNone; + OS << bd << "\n"; } } OS << "#endif\n\n"; } +/// runHeader - Emit a file with sections defining: +/// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def. +/// 2. the SemaChecking code for the type overload checking. +/// 3. the SemaChecking code for validation of intrinsic immediate arguments. +void NeonEmitter::runHeader(raw_ostream &OS) { + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); + + // build a map of AArch64 intriniscs to be used in uniqueness checks. + StringMap<ClassKind> A64IntrinsicMap; + for (unsigned i = 0, e = RV.size(); i != e; ++i) { + Record *R = RV[i]; + + bool isA64 = R->getValueAsBit("isA64"); + if (!isA64) + continue; + + ClassKind CK = ClassNone; + if (R->getSuperClasses().size() >= 2) + CK = ClassMap[R->getSuperClasses()[1]]; + + std::string Name = R->getValueAsString("Name"); + if (A64IntrinsicMap.count(Name)) + continue; + A64IntrinsicMap[Name] = CK; + } + + // Generate BuiltinsARM.def for ARM + genBuiltinsDef(OS, A64IntrinsicMap, false); + + // Generate BuiltinsAArch64.def for AArch64 + genBuiltinsDef(OS, A64IntrinsicMap, true); + + // Generate ARM overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, A64IntrinsicMap, false); + + // Generate AArch64 overloaded type checking code for SemaChecking.cpp + genOverloadTypeCheckCode(OS, A64IntrinsicMap, true); + + // Generate ARM range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, false); + + // Generate the AArch64 range checking code for shift/lane immediates. + genIntrinsicRangeCheckCode(OS, A64IntrinsicMap, true); +} + /// GenTest - Write out a test for the intrinsic specified by the name and /// type strings, including the embedded patterns for FileCheck to match. static std::string GenTest(const std::string &name, const std::string &proto, StringRef outTypeStr, StringRef inTypeStr, bool isShift, bool isHiddenLOp, - ClassKind ck, const std::string &InstName) { + ClassKind ck, const std::string &InstName, + bool isA64, + std::string & testFuncProto) { assert(!proto.empty() && ""); std::string s; @@ -2167,12 +2428,17 @@ static std::string GenTest(const std::string &name, mangledName = MangleName(mangledName, inTypeNoQuad, ClassS); } + // todo: GenerateChecksForIntrinsic does not generate CHECK + // for aarch64 instructions yet std::vector<std::string> FileCheckPatterns; - GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, - isHiddenLOp, FileCheckPatterns); + if (!isA64) { + GenerateChecksForIntrinsic(name, proto, outTypeStr, inTypeStr, ck, InstName, + isHiddenLOp, FileCheckPatterns); + s+= "// CHECK_ARM: test_" + mangledName + "\n"; + } + s += "// CHECK_AARCH64: test_" + mangledName + "\n"; // Emit the FileCheck patterns. - s += "// CHECK: test_" + mangledName + "\n"; // If for any reason we do not want to emit a check, mangledInst // will be the empty string. if (FileCheckPatterns.size()) { @@ -2180,23 +2446,27 @@ static std::string GenTest(const std::string &name, e = FileCheckPatterns.end(); i != e; ++i) { - s += "// CHECK: " + *i + "\n"; + s += "// CHECK_ARM: " + *i + "\n"; } } // Emit the start of the test function. - s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; + + testFuncProto = TypeString(proto[0], outTypeStr) + " test_" + mangledName + "("; char arg = 'a'; std::string comma; for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) { // Do not create arguments for values that must be immediate constants. if (proto[i] == 'i') continue; - s += comma + TypeString(proto[i], inTypeStr) + " "; - s.push_back(arg); + testFuncProto += comma + TypeString(proto[i], inTypeStr) + " "; + testFuncProto.push_back(arg); comma = ", "; } - s += ") {\n "; + testFuncProto += ")"; + + s+= testFuncProto; + s+= " {\n "; if (proto[0] != 'v') s += "return "; @@ -2220,20 +2490,14 @@ static std::string GenTest(const std::string &name, return s; } -/// runTests - Write out a complete set of tests for all of the Neon -/// intrinsics. -void NeonEmitter::runTests(raw_ostream &OS) { - OS << - "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\\\n" - "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" - "// RUN: | FileCheck %s\n" - "\n" - "// REQUIRES: long_tests\n" - "\n" - "#include <arm_neon.h>\n" - "\n"; +/// Write out all intrinsic tests for the specified target, checking +/// for intrinsic test uniqueness. +void NeonEmitter::genTargetTest(raw_ostream &OS, StringMap<OpKind> &EmittedMap, + bool isA64GenTest) { + if (isA64GenTest) + OS << "#ifdef __aarch64__\n"; - std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst"); + std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst"); for (unsigned i = 0, e = RV.size(); i != e; ++i) { Record *R = RV[i]; std::string name = R->getValueAsString("Name"); @@ -2242,6 +2506,12 @@ void NeonEmitter::runTests(raw_ostream &OS) { bool isShift = R->getValueAsBit("isShift"); std::string InstName = R->getValueAsString("InstName"); bool isHiddenLOp = R->getValueAsBit("isHiddenLInst"); + bool isA64 = R->getValueAsBit("isA64"); + + // do not include AArch64 intrinsic test if not generating + // code for AArch64 + if (!isA64GenTest && isA64) + continue; SmallVector<StringRef, 16> TypeVec; ParseTypes(R, Types, TypeVec); @@ -2261,16 +2531,56 @@ void NeonEmitter::runTests(raw_ostream &OS) { (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy); if (srcti == ti || inQuad != outQuad) continue; - OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], - isShift, isHiddenLOp, ck, InstName); + std::string testFuncProto; + std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], + isShift, isHiddenLOp, ck, InstName, isA64, + testFuncProto); + if (EmittedMap.count(testFuncProto)) + continue; + EmittedMap[testFuncProto] = kind; + OS << s << "\n"; } } else { - OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], - isShift, isHiddenLOp, ck, InstName); + std::string testFuncProto; + std::string s = GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift, + isHiddenLOp, ck, InstName, isA64, testFuncProto); + if (EmittedMap.count(testFuncProto)) + continue; + EmittedMap[testFuncProto] = kind; + OS << s << "\n"; } } - OS << "\n"; } + + if (isA64GenTest) + OS << "#endif\n"; +} +/// runTests - Write out a complete set of tests for all of the Neon +/// intrinsics. +void NeonEmitter::runTests(raw_ostream &OS) { + OS << "// RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi " + "apcs-gnu\\\n" + "// RUN: -target-cpu swift -ffreestanding -Os -S -o - %s\\\n" + "// RUN: | FileCheck %s -check-prefix=CHECK_ARM\n" + "\n" + "// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \\\n" + "// RUN -target-feature +neon -ffreestanding -S -o - %s \\\n" + "// RUN: | FileCheck %s -check-prefix=CHECK_AARCH64\n" + "\n" + "// REQUIRES: long_tests\n" + "\n" + "#include <arm_neon.h>\n" + "\n"; + + // ARM tests must be emitted before AArch64 tests to ensure + // tests for intrinsics that are common to ARM and AArch64 + // appear only once in the output stream. + // The check for uniqueness is done in genTargetTest. + StringMap<OpKind> EmittedMap; + + genTargetTest(OS, EmittedMap, false); + + genTargetTest(OS, EmittedMap, true); } namespace clang {