diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 6be83d22a2566534a5c362416729aa97f5153318..605f73802afb9c6ed23f625e900134a19ef44f4b 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3123,6 +3123,7 @@ public:
     case CC_Swift:
     case CC_X86Pascal:
     case CC_IntelOclBicc:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -4834,6 +4835,7 @@ public:
     case CC_PreserveMost:
     case CC_PreserveAll:
     case CC_X86RegCall:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -4907,6 +4909,7 @@ public:
     case CC_X86_64SysV:
     case CC_Swift:
     case CC_X86RegCall:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -5860,6 +5863,7 @@ public:
     case CC_AAPCS:
     case CC_AAPCS_VFP:
     case CC_Swift:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -6019,6 +6023,7 @@ public:
     case CC_X86VectorCall:
       return CCCR_Ignore;
     case CC_C:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -6329,6 +6334,7 @@ public:
     case CC_Swift:
     case CC_PreserveMost:
     case CC_PreserveAll:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -7380,6 +7386,7 @@ public:
     switch (CC) {
     case CC_C:
     case CC_Swift:
+    case CC_OpenCLKernel:
       return CCCR_OK;
     default:
       return CCCR_Warning;
@@ -7663,6 +7670,15 @@ public:
   ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
     return None;
   }
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+      default:
+        return CCCR_Warning;
+      case CC_C:
+      case CC_OpenCLKernel:
+        return CCCR_OK;
+    }
+  }
 };
 
 class MipsTargetInfo : public TargetInfo {
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index c0be60ef53bc5dc98aa8b17a97684cd7c5cee52c..e4dce2f2a004688b513eada6abdaa222954b4513 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -149,7 +149,6 @@ namespace swiftcall {
       return info->supportsSwift();
     }
   };
-
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index c677d9887acccc465e5d5c1aec9f93830cf55275..8f405eee6e5231ecdcf48febfac00ad2f789d653 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -707,6 +707,12 @@ CodeGenTypes::arrangeCall(const CGFunctionInfo &signature,
                                  signature.getRequiredArgs());
 }
 
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI);
+}
+}
+
 /// Arrange the argument and result information for an abstract value
 /// of a given function type.  This is the method which all of the
 /// above functions ultimately defer to.
@@ -741,12 +747,16 @@ CodeGenTypes::arrangeLLVMFunctionInfo(CanQualType resultType,
   bool inserted = FunctionsBeingProcessed.insert(FI).second;
   (void)inserted;
   assert(inserted && "Recursively being processed?");
-  
+
   // Compute ABI information.
-  if (info.getCC() != CC_Swift) {
-    getABIInfo().computeInfo(*FI);
-  } else {
+  if (CC == llvm::CallingConv::SPIR_KERNEL) {
+    // Force target independent argument handling for the host visible
+    // kernel functions.
+    computeSPIRKernelABIInfo(CGM, *FI);
+  } else if (info.getCC() == CC_Swift) {
     swiftcall::computeABIInfo(CGM, *FI);
+  } else {
+    getABIInfo().computeInfo(*FI);
   }
 
   // Loop over all of the computed argument and return value info.  If any of
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index d0ba74119b7da499d5b386bf7da6e9c29726b180..427ec06a2fff462a389f578307e2a7b1eeb93aa5 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -398,7 +398,17 @@ TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
 }
 
 unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
-  return llvm::CallingConv::C;
+  // OpenCL kernels are called via an explicit runtime API with arguments
+  // set with clSetKernelArg(), not as normal sub-functions.
+  // Return SPIR_KERNEL by default as the kernel calling convention to
+  // ensure the fingerprint is fixed such way that each OpenCL argument
+  // gets one matching argument in the produced kernel function argument
+  // list to enable feasible implementation of clSetKernelArg() with
+  // aggregates etc. In case we would use the default C calling conv here,
+  // clSetKernelArg() might break depending on the target-specific
+  // conventions; different targets might split structs passed as values
+  // to multiple function arguments etc.
+  return llvm::CallingConv::SPIR_KERNEL;
 }
 
 llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
@@ -8068,8 +8078,18 @@ public:
                     CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
 };
+
 } // End anonymous namespace.
 
+namespace clang {
+namespace CodeGen {
+void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
+  DefaultABIInfo SPIRABI(CGM.getTypes());
+  SPIRABI.computeInfo(FI);
+}
+}
+}
+
 /// Emit SPIR specific metadata: OpenCL and SPIR version.
 void SPIRTargetCodeGenInfo::emitTargetMD(const Decl *D, llvm::GlobalValue *GV,
                                          CodeGen::CodeGenModule &CGM) const {
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index f8970744389bdc49ed93ba45ad90a49a0383e3bb..e7315934b515745ac5e1381368ba81821d93ab65 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -3175,11 +3175,7 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
     for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
          Attr; Attr = Attr->getNext()) {
       if (Attr->getKind() == AttributeList::AT_OpenCLKernel) {
-        llvm::Triple::ArchType arch = S.Context.getTargetInfo().getTriple().getArch();
-        if (arch == llvm::Triple::spir || arch == llvm::Triple::spir64 ||
-            arch == llvm::Triple::amdgcn || arch == llvm::Triple::r600) {
-          CC = CC_OpenCLKernel;
-        }
+        CC = CC_OpenCLKernel;
         break;
       }
     }
diff --git a/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
new file mode 100644
index 0000000000000000000000000000000000000000..5bb52e9beb5147fead5e1098ce65aa8d85a3dcfe
--- /dev/null
+++ b/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple amdgcn-unknown-unknown -o - | FileCheck -check-prefixes=AMDGCN %s
+// Test that the kernels always use the SPIR calling convention
+// to have unambiguous mapping of arguments to feasibly implement
+// clSetKernelArg().
+
+typedef struct int_single {
+    int a;
+} int_single;
+
+typedef struct int_pair {
+    long a;
+    long b;
+} int_pair;
+
+typedef struct test_struct {
+    int elementA;
+    int elementB;
+    long elementC;
+    char elementD;
+    long elementE;
+    float elementF;
+    short elementG;
+    double elementH;
+} test_struct;
+
+kernel void test_single(int_single input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_single
+// CHECK: struct.int_single* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = input.a;
+}
+
+kernel void test_pair(int_pair input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_pair
+// CHECK: struct.int_pair* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = (int)input.a;
+ output[1] = (int)input.b;
+}
+
+kernel void test_kernel(test_struct input, global int* output) {
+// CHECK: spir_kernel
+// AMDGCN: define amdgpu_kernel void @test_kernel
+// CHECK: struct.test_struct* byval nocapture
+// CHECK: i32* nocapture %output
+ output[0] = input.elementA;
+ output[1] = input.elementB;
+ output[2] = (int)input.elementC;
+ output[3] = (int)input.elementD;
+ output[4] = (int)input.elementE;
+ output[5] = (int)input.elementF;
+ output[6] = (int)input.elementG;
+ output[7] = (int)input.elementH;
+};
+
+void test_function(int_pair input, global int* output) {
+// CHECK-NOT: spir_kernel
+// AMDGCN-NOT: define amdgpu_kernel void @test_function
+// CHECK: i64 %input.coerce0, i64 %input.coerce1, i32* nocapture %output
+ output[0] = (int)input.a;
+ output[1] = (int)input.b;
+}