diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index d2e57ac73be901f1b8438e0126457cb10da76eb4..1f98920ed4085ea490714cbfa8d5d7e1fac40d31 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -1013,24 +1013,22 @@ void
 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
                                    unsigned &StackOffset,
                                    ABIArgInfo &Info, QualType Type) const {
+  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
+  Info = ABIArgInfo::getInAlloca(FrameFields.size());
+  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
+
   // Insert padding bytes to respect alignment.  For x86_32, each argument is 4
   // byte aligned.
-  unsigned Align = 4U;
-  if (Info.getKind() == ABIArgInfo::Indirect && Info.getIndirectByVal())
-    Align = std::max(Align, Info.getIndirectAlign());
-  if (StackOffset & (Align - 1)) {
+  if (StackOffset % 4U) {
     unsigned OldOffset = StackOffset;
-    StackOffset = llvm::RoundUpToAlignment(StackOffset, Align);
+    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
     unsigned NumBytes = StackOffset - OldOffset;
     assert(NumBytes);
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
     Ty = llvm::ArrayType::get(Ty, NumBytes);
     FrameFields.push_back(Ty);
   }
-
-  Info = ABIArgInfo::getInAlloca(FrameFields.size());
-  FrameFields.push_back(CGT.ConvertTypeForMem(Type));
-  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
 }
 
 void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
diff --git a/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
index 3d5fe9c9a1739bd8b9ecfbe7fde0034425b4177e..bc937491a57635a077261538bd99422c777bdc06 100644
--- a/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ b/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -49,7 +49,7 @@ struct Big {
 
 // WIN32: declare void @"{{.*take_bools_and_chars.*}}"
 // WIN32:       (<{ i8, [3 x i8], i8, [3 x i8], %struct.SmallWithDtor,
-// WIN32:           i8, [3 x i8], i8, [3 x i8], i32, i8 }>* inalloca)
+// WIN32:           i8, [3 x i8], i8, [3 x i8], i32, i8, [3 x i8] }>* inalloca)
 void take_bools_and_chars(char a, char b, SmallWithDtor c, char d, bool e, int f, bool g);
 void call_bools_and_chars() {
   take_bools_and_chars('A', 'B', SmallWithDtor(), 'D', true, 13, false);
@@ -223,7 +223,7 @@ struct X {
 };
 void g(X) {
 }
-// WIN32: define void @"\01?g@@YAXUX@@@Z"(<{ %struct.X }>* inalloca) {{.*}} {
+// WIN32: define void @"\01?g@@YAXUX@@@Z"(<{ %struct.X, [3 x i8] }>* inalloca) {{.*}} {
 // WIN32:   call x86_thiscallcc void @"\01??1X@@QAE@XZ"(%struct.X* {{.*}})
 // WIN32: }
 void f() {
@@ -264,6 +264,20 @@ void bar() {
 
 }
 
+namespace test3 {
+
+// Check that we padded the inalloca struct to a multiple of 4.
+struct NonTrivial {
+  NonTrivial();
+  NonTrivial(const NonTrivial &o);
+  ~NonTrivial();
+  int a;
+};
+void foo(NonTrivial a, bool b) { }
+// WIN32-LABEL: define void @"\01?foo@test3@@YAXUNonTrivial@1@_N@Z"(<{ %"struct.test3::NonTrivial", i8, [3 x i8] }>* inalloca)
+
+}
+
 // We would crash here because the later definition of ForwardDeclare1 results
 // in a different IR type for the value we want to store.  However, the alloca's
 // type will use the argument type selected by fn1.