diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 55a34e3541395c91c95406d50ea16c46c82a8ab8..66715209bebbb981e066559f3695b5a6d9242250 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -48,6 +48,12 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
     if (D->hasAttr<CUDAHostAttr>())
       return CFT_HostDevice;
     return CFT_Device;
+  } else if (D->hasAttr<CUDAHostAttr>()) {
+    return CFT_Host;
+  } else if (D->isImplicit()) {
+    // Some implicit declarations (like intrinsic functions) are not marked.
+    // Set the most lenient target on them for maximal flexibility.
+    return CFT_HostDevice;
   }
 
   return CFT_Host;
diff --git a/test/SemaCUDA/implicit-intrinsic.cu b/test/SemaCUDA/implicit-intrinsic.cu
new file mode 100644
index 0000000000000000000000000000000000000000..3d24aa719e57d902624925291e807e7ed686cd6e
--- /dev/null
+++ b/test/SemaCUDA/implicit-intrinsic.cu
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=gnu++11 -triple nvptx64-unknown-unknown -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+// expected-no-diagnostics
+__device__ void __threadfence_system() {
+  // This shouldn't produce an error, since __nvvm_membar_sys is inferred to
+  // be __host__ __device__ and thus callable from device code.
+  __nvvm_membar_sys();
+}