diff --git a/include/clang/Basic/TargetOptions.h b/include/clang/Basic/TargetOptions.h
index 3718ee316a8d65bb72ed0f5f64d0fb19d1bbff03..81fb7b21170df938823fb9b9a4d4692485414f67 100644
--- a/include/clang/Basic/TargetOptions.h
+++ b/include/clang/Basic/TargetOptions.h
@@ -27,6 +27,10 @@ public:
   /// target will be selected to match the host.
   std::string Triple;
 
+  /// When compiling for the device side, contains the triple used to compile
+  /// for the host.
+  std::string HostTriple;
+
   /// If given, the name of the target CPU to generate code for.
   std::string CPU;
 
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 0a87cdf061e8400d8fd09ba06748f4d87e57502c..1df35ab846594e3f08a9b44e4802896788ae6428 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -65,6 +65,9 @@ static void defineCPUMacros(MacroBuilder &Builder, StringRef CPUName,
     Builder.defineMacro("__tune_" + CPUName + "__");
 }
 
+static TargetInfo *AllocateTarget(const llvm::Triple &Triple,
+                                  const TargetOptions &Opts);
+
 //===----------------------------------------------------------------------===//
 // Defines specific to certain operating systems.
 //===----------------------------------------------------------------------===//
@@ -1627,7 +1630,7 @@ class NVPTXTargetInfo : public TargetInfo {
   } GPU;
 
 public:
-  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+  NVPTXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : TargetInfo(Triple) {
     BigEndian = false;
     TLSSupported = false;
@@ -1639,6 +1642,65 @@ public:
     NoAsmVariants = true;
     // Set the default GPU to sm20
     GPU = GK_SM20;
+
+    // If possible, get a TargetInfo for our host triple, so we can match its
+    // types.
+    llvm::Triple HostTriple(Opts.HostTriple);
+    if (HostTriple.isNVPTX())
+      return;
+    std::unique_ptr<TargetInfo> HostTarget(
+        AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
+    if (!HostTarget) {
+      return;
+    }
+
+    PointerWidth = HostTarget->getPointerWidth(/* AddrSpace = */ 0);
+    PointerAlign = HostTarget->getPointerAlign(/* AddrSpace = */ 0);
+    BoolWidth = HostTarget->getBoolWidth();
+    BoolAlign = HostTarget->getBoolAlign();
+    IntWidth = HostTarget->getIntWidth();
+    IntAlign = HostTarget->getIntAlign();
+    HalfWidth = HostTarget->getHalfWidth();
+    HalfAlign = HostTarget->getHalfAlign();
+    FloatWidth = HostTarget->getFloatWidth();
+    FloatAlign = HostTarget->getFloatAlign();
+    DoubleWidth = HostTarget->getDoubleWidth();
+    DoubleAlign = HostTarget->getDoubleAlign();
+    LongWidth = HostTarget->getLongWidth();
+    LongAlign = HostTarget->getLongAlign();
+    LongLongWidth = HostTarget->getLongLongWidth();
+    LongLongAlign = HostTarget->getLongLongAlign();
+    MinGlobalAlign = HostTarget->getMinGlobalAlign();
+    DefaultAlignForAttributeAligned =
+        HostTarget->getDefaultAlignForAttributeAligned();
+    SizeType = HostTarget->getSizeType();
+    IntMaxType = HostTarget->getIntMaxType();
+    PtrDiffType = HostTarget->getPtrDiffType(/* AddrSpace = */ 0);
+    IntPtrType = HostTarget->getIntPtrType();
+    WCharType = HostTarget->getWCharType();
+    WIntType = HostTarget->getWIntType();
+    Char16Type = HostTarget->getChar16Type();
+    Char32Type = HostTarget->getChar32Type();
+    Int64Type = HostTarget->getInt64Type();
+    SigAtomicType = HostTarget->getSigAtomicType();
+    ProcessIDType = HostTarget->getProcessIDType();
+
+    UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
+    UseZeroLengthBitfieldAlignment =
+        HostTarget->useZeroLengthBitfieldAlignment();
+    UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
+    ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
+
+    // Properties intentionally not copied from host:
+    // - LargeArrayMinWidth, LargeArrayAlign: Not visible across the
+    //   host/device boundary.
+    // - SuitableAlign: Not visible across the host/device boundary, and may
+    //   correctly be different on host/device, e.g. if host has wider vector
+    //   types than device.
+    // - LongDoubleWidth, LongDoubleAlign: nvptx's long double type is the same
+    //   as its double type, but that's not necessarily true on the host.
+    //   TODO: nvcc emits a warning when using long double on device; we should
+    //   do the same.
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index cac73befff02f5f683b83c9af8e2062dd87541b5..d6f4749db24271f4887e4d4e9d0ce82729746dad 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -836,8 +836,9 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
 
   // Create TargetInfo for the other side of CUDA compilation.
   if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) {
-    std::shared_ptr<TargetOptions> TO(new TargetOptions);
+    auto TO = std::make_shared<TargetOptions>();
     TO->Triple = getFrontendOpts().AuxTriple;
+    TO->HostTriple = getTarget().getTriple().str();
     setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
   }
 
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index 4bef159f3bf30f49c9e6646794407fe65e7e3edc..02a570076b9a11611a8f771716caec2bc849bd99 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -2155,6 +2155,12 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
       LangOpts.ObjCExceptions = 1;
   }
 
+  // During CUDA device-side compilation, the aux triple is the triple used for
+  // host compilation.
+  if (LangOpts.CUDA && LangOpts.CUDAIsDevice) {
+    Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple;
+  }
+
   // FIXME: Override value name discarding when asan or msan is used because the
   // backend passes depend on the name of the alloca in order to print out
   // names.
diff --git a/test/Preprocessor/cuda-types.cu b/test/Preprocessor/cuda-types.cu
new file mode 100644
index 0000000000000000000000000000000000000000..dd8eef4aaef513c0dda88ecb0531aab1d87d3590
--- /dev/null
+++ b/test/Preprocessor/cuda-types.cu
@@ -0,0 +1,27 @@
+// Check that types, widths, etc. match on the host and device sides of CUDA
+// compilations.  Note that we filter out long double, as this is intentionally
+// different on host and device.
+
+// RUN: %clang --cuda-host-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target i386-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/i386-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF|WIDTH\)' %T/i386-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/i386-device-defines-filtered
+// RUN: diff %T/i386-host-defines-filtered %T/i386-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target x86_64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/x86_64-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/x86_64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/x86_64-device-defines-filtered
+// RUN: diff %T/x86_64-host-defines-filtered %T/x86_64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target powerpc64-unknown-linux-gnu -x cuda -E -dM -o - /dev/null > %T/powerpc64-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/powerpc64-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/powerpc64-device-defines-filtered
+// RUN: diff %T/powerpc64-host-defines-filtered %T/powerpc64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-host-defines
+// RUN: %clang --cuda-device-only -nocudainc -target nvptx-nvidia-cuda -x cuda -E -dM -o - /dev/null > %T/nvptx-device-defines
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-host-defines   | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-host-defines-filtered
+// RUN: grep 'define __[^ ]*\(TYPE\|MAX\|SIZEOF\|WIDTH\)' %T/nvptx-device-defines | grep -v '__LDBL\|_LONG_DOUBLE' > %T/nvptx-device-defines-filtered
+// RUN: diff %T/nvptx-host-defines-filtered %T/nvptx-device-defines-filtered