diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h index 3718ee316a8d65bb72ed0f5f64d0fb19d1bbff03..81fb7b21170df938823fb9b9a4d4692485414f67 100644 --- a/include/clang/Basic/TargetOptions.h +++ b/include/clang/Basic/TargetOptions.h @@ -27,6 +27,10 @@ public: /// target will be selected to match the host. std::string Triple; + /// When compiling for the device side, contains the triple used to compile + /// for the host. + std::string HostTriple; + /// If given, the name of the target CPU to generate code for. std::string CPU; diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp index 0a87cdf061e8400d8fd09ba06748f4d87e57502c..1df35ab846594e3f08a9b44e4802896788ae6428 100644 --- a/lib/Basic/Targets.cpp +++ b/lib/Basic/Targets.cpp @@ -65,6 +65,9 @@ static void defineCPUMacros(MacroBuilder &Builder, StringRef CPUName, Builder.defineMacro("__tune_" + CPUName + "__"); } +static TargetInfo *AllocateTarget(const llvm::Triple &Triple, + const TargetOptions &Opts); + //===----------------------------------------------------------------------===// // Defines specific to certain operating systems. //===----------------------------------------------------------------------===// @@ -1627,7 +1630,7 @@ class NVPTXTargetInfo : public TargetInfo { } GPU; public: - NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TargetInfo(Triple) { BigEndian = false; TLSSupported = false; @@ -1639,6 +1642,65 @@ public: NoAsmVariants = true; // Set the default GPU to sm20 GPU = GK_SM20; + + // If possible, get a TargetInfo for our host triple, so we can match its + // types. + llvm::Triple HostTriple(Opts.HostTriple); + if (HostTriple.isNVPTX()) + return; + std::unique_ptr<TargetInfo> HostTarget( + AllocateTarget(llvm::Triple(Opts.HostTriple), Opts)); + if (!HostTarget) { + return; + } + + PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0); + PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0); + BoolWidth = HostTarget->getBoolWidth(); + BoolAlign = HostTarget->getBoolAlign(); + IntWidth = HostTarget->getIntWidth(); + IntAlign = HostTarget->getIntAlign(); + HalfWidth = HostTarget->getHalfWidth(); + HalfAlign = HostTarget->getHalfAlign(); + FloatWidth = HostTarget->getFloatWidth(); + FloatAlign = HostTarget->getFloatAlign(); + DoubleWidth = HostTarget->getDoubleWidth(); + DoubleAlign = HostTarget->getDoubleAlign(); + LongWidth = HostTarget->getLongWidth(); + LongAlign = HostTarget->getLongAlign(); + LongLongWidth = HostTarget->getLongLongWidth(); + LongLongAlign = HostTarget->getLongLongAlign(); + MinGlobalAlign = HostTarget->getMinGlobalAlign(); + DefaultAlignForAttributeAligned = + HostTarget->getDefaultAlignForAttributeAligned(); + SizeType = HostTarget->getSizeType(); + IntMaxType = HostTarget->getIntMaxType(); + PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0); + IntPtrType = HostTarget->getIntPtrType(); + WCharType = HostTarget->getWCharType(); + WIntType = HostTarget->getWIntType(); + Char16Type = HostTarget->getChar16Type(); + Char32Type = HostTarget->getChar32Type(); + Int64Type = HostTarget->getInt64Type(); + SigAtomicType = HostTarget->getSigAtomicType(); + ProcessIDType = HostTarget->getProcessIDType(); + + UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment(); + UseZeroLengthBitfieldAlignment = + HostTarget->useZeroLengthBitfieldAlignment(); + UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment(); + ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary(); + + // Properties intentionally not copied from host: + // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the + // host/device boundary. + // - SuitableAlign: Not visible across the host/device boundary, and may + // correctly be different on host/device, e.g. if host has wider vector + // types than device. + // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same + // as its double type, but that's not necessarily true on the host. + // TODO: nvcc emits a warning when using long double on device; we should + // do the same. } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp index cac73befff02f5f683b83c9af8e2062dd87541b5..d6f4749db24271f4887e4d4e9d0ce82729746dad 100644 --- a/lib/Frontend/CompilerInstance.cpp +++ b/lib/Frontend/CompilerInstance.cpp @@ -836,8 +836,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) { // Create TargetInfo for the other side of CUDA compilation. if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) { - std::shared_ptr<TargetOptions> TO(new TargetOptions); + auto TO = std::make_shared<TargetOptions>(); TO->Triple = getFrontendOpts().AuxTriple; + TO->HostTriple = getTarget().getTriple().str(); setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO)); } diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 4bef159f3bf30f49c9e6646794407fe65e7e3edc..02a570076b9a11611a8f771716caec2bc849bd99 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -2155,6 +2155,12 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res, LangOpts.ObjCExceptions = 1; } + // During CUDA device-side compilation, the aux triple is the triple used for + // host compilation. + if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { + Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; + } + // FIXME: Override value name discarding when asan or msan is used because the // backend passes depend on the name of the alloca in order to print out // names. diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu new file mode 100644 index 0000000000000000000000000000000000000000..dd8eef4aaef513c0dda88ecb0531aab1d87d3590 --- /dev/null +++ b/test/Preprocessor/cuda-types.cu @@ -0,0 +1,27 @@ +// Check that types, widths, etc. match on the host and device sides of CUDA +// compilations. Note that we filter out long double, as this is intentionally +// different on host and device. + +// RUN: %clang --cuda-host-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-host-defines +// RUN: %clang --cuda-device-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-device-defines +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-host-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-host-defines-filtered +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-device-defines-filtered +// RUN: diff %T/i386-host-defines-filtered %T/i386-device-defines-filtered + +// RUN: %clang --cuda-host-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-host-defines +// RUN: %clang --cuda-device-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-device-defines +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-host-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-host-defines-filtered +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-device-defines-filtered +// RUN: diff %T/x86_64-host-defines-filtered %T/x86_64-device-defines-filtered + +// RUN: %clang --cuda-host-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-host-defines +// RUN: %clang --cuda-device-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-device-defines +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-host-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-host-defines-filtered +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-device-defines-filtered +// RUN: diff %T/powerpc64-host-defines-filtered %T/powerpc64-device-defines-filtered + +// RUN: %clang --cuda-host-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-host-defines +// RUN: %clang --cuda-device-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-device-defines +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-host-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-host-defines-filtered +// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-device-defines-filtered +// RUN: diff %T/nvptx-host-defines-filtered %T/nvptx-device-defines-filtered