diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index a4547a9982be08b86a56b8c41a289d0475f4ddd1..947b3d5b8586ea0c114ce2791fbe537ec5536e81 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -967,12 +967,9 @@ void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
   Address ArgValue = Address::invalid();
   Address ArgPtr = CGF.EmitVAArg(VE, ArgValue);
 
+  // If EmitVAArg fails, emit an error.
   if (!ArgPtr.isValid()) {
-    // If EmitVAArg fails, we fall back to the LLVM instruction.
-    llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(),
-                                           CGF.ConvertType(VE->getType()));
-    if (!Dest.isIgnored())
-      Builder.CreateStore(Val, Dest.getAddress());
+    CGF.ErrorUnsupported(VE, "aggregate va_arg expression");
     return;
   }
 
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 5b39b5d43996ee0360fdc2cda315d0cb1e4554b0..540e01f90df2fecb6af40809f695b0140d948fa9 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -3366,9 +3366,11 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
 
   llvm::Type *ArgTy = ConvertType(VE->getType());
 
-  // If EmitVAArg fails, we fall back to the LLVM instruction.
-  if (!ArgPtr.isValid())
-    return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy);
+  // If EmitVAArg fails, emit an error.
+  if (!ArgPtr.isValid()) {
+    CGF.ErrorUnsupported(VE, "va_arg expression");
+    return llvm::UndefValue::get(ArgTy);
+  }
 
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index f19f31584f07673a5d4de1bc32167ccde2a2d44d..d9f4e7761aeb9f9f27312b9620b00942d3d16f87 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -525,6 +525,54 @@ static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) {
 }
 
 namespace {
+Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
+                       const ABIArgInfo &AI) {
+  // This default implementation defers to the llvm backend's va_arg
+  // instruction. It can handle only passing arguments directly
+  // (typically only handled in the backend for primitive types), or
+  // aggregates passed indirectly by pointer (NOTE: if the "byval"
+  // flag has ABI impact in the callee, this implementation cannot
+  // work.)
+
+  // Only a few cases are covered here at the moment -- those needed
+  // by the default abi.
+  llvm::Value *Val;
+
+  if (AI.isIndirect()) {
+    assert(!AI.getPaddingType() &&
+           "Unepxected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(
+        !AI.getIndirectRealign() &&
+        "Unepxected IndirectRealign seen in arginfo in generic VAArg emitter!");
+
+    auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
+    CharUnits TyAlignForABI = TyInfo.second;
+
+    llvm::Type *BaseTy =
+        llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+    llvm::Value *Addr =
+        CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
+    return Address(Addr, TyAlignForABI);
+  } else {
+    assert((AI.isDirect() || AI.isExtend()) &&
+           "Unexpected ArgInfo Kind in generic VAArg emitter!");
+
+    assert(!AI.getInReg() &&
+           "Unepxected InReg seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getPaddingType() &&
+           "Unepxected PaddingType seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getDirectOffset() &&
+           "Unepxected DirectOffset seen in arginfo in generic VAArg emitter!");
+    assert(!AI.getCoerceToType() &&
+           "Unepxected CoerceToType seen in arginfo in generic VAArg emitter!");
+
+    Address Temp = CGF.CreateMemTemp(Ty, "varet");
+    Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(), CGF.ConvertType(Ty));
+    CGF.Builder.CreateStore(Val, Temp);
+    return Temp;
+  }
+}
+
 /// DefaultABIInfo - The default implementation for ABI specific
 /// details. This implementation provides information which results in
 /// self-consistent and sensible LLVM IR generation, but does not
@@ -544,7 +592,9 @@ public:
   }
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                    QualType Ty) const override;
+                    QualType Ty) const override {
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
+  }
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -553,11 +603,6 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
 };
 
-Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
-                                  QualType Ty) const {
-  return Address::invalid();
-}
-
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
@@ -609,7 +654,8 @@ private:
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   // DefaultABIInfo's classifyReturnType and classifyArgumentType are
-  // non-virtual, but computeInfo is virtual, so we overload that.
+  // non-virtual, but computeInfo and EmitVAArg is virtual, so we
+  // overload them.
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
@@ -713,7 +759,13 @@ void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
 Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
-  return Address::invalid();
+  // The PNaCL ABI is a bit odd, in that varargs don't use normal
+  // function classification. Structs get passed directly for varargs
+  // functions, through a rewriting transform in
+  // pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
+  // this target to actually support a va_arg instructions with an
+  // aggregate type, unlike other targets.
+  return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 }
 
 /// \brief Classify argument of given type \p Ty.
@@ -3516,13 +3568,15 @@ public:
 
 }
 
+// TODO: this implementation is now likely redundant with
+// DefaultABIInfo::EmitVAArg.
 Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
                                       QualType Ty) const {
   const unsigned OverflowLimit = 8;
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
     (void)CTy;
-    return Address::invalid();
+    return Address::invalid(); // FIXME?
   }
 
   // struct __va_list_tag {
@@ -3706,7 +3760,7 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
 
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
-class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+class PPC64_SVR4_ABIInfo : public ABIInfo {
 public:
   enum ABIKind {
     ELFv1 = 0,
@@ -3748,7 +3802,7 @@ private:
 
 public:
   PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind, bool HasQPX)
-    : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
+      : ABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
   CharUnits getParamTypeAlignment(QualType Ty) const;
@@ -4747,7 +4801,7 @@ Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
   // illegal vector types.  Lower VAArg here for these cases and use
   // the LLVM va_arg instruction for everything else.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
-    return Address::invalid();
+    return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
 
   CharUnits SlotSize = CharUnits::fromQuantity(8);
 
@@ -6967,6 +7021,8 @@ public:
 
 } // End anonymous namespace.
 
+// TODO: this implementation is likely now redundant with the default
+// EmitVAArg.
 Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                                 QualType Ty) const {
   CGBuilderTy &Builder = CGF.Builder;
diff --git a/test/CodeGen/le32-vaarg.c b/test/CodeGen/le32-vaarg.c
index 51bbb0296846dab388bff7fed9edf70ba7470a48..c02af27691f25cc68ab51074fc1e88ce22de8688 100644
--- a/test/CodeGen/le32-vaarg.c
+++ b/test/CodeGen/le32-vaarg.c
@@ -6,7 +6,9 @@ int get_int(va_list *args) {
 }
 // CHECK: define i32 @get_int
 // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, i32{{$}}
-// CHECK: ret i32 [[RESULT]]
+// CHECK: store i32 [[RESULT]], i32* [[LOC:%[a-z_0-9]+]]
+// CHECK: [[RESULT2:%[a-z_0-9]+]] = load i32, i32* [[LOC]]
+// CHECK: ret i32 [[RESULT2]]
 
 struct Foo {
   int x;
@@ -19,7 +21,9 @@ void get_struct(va_list *args) {
 }
 // CHECK: define void @get_struct
 // CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo{{$}}
-// CHECK: store %struct.Foo [[RESULT]], %struct.Foo* @dest
+// CHECK: store %struct.Foo [[RESULT]], %struct.Foo* [[LOC:%[a-z_0-9]+]]
+// CHECK: [[LOC2:%[a-z_0-9]+]] = bitcast {{.*}} [[LOC]] to i8*
+// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[LOC2]]
 
 void skip_struct(va_list *args) {
   va_arg(*args, struct Foo);
diff --git a/test/CodeGen/sparc-vaarg.c b/test/CodeGen/sparc-vaarg.c
new file mode 100644
index 0000000000000000000000000000000000000000..3e4dd7c2c3f23c6a72c940d4ea5e415e75dd8572
--- /dev/null
+++ b/test/CodeGen/sparc-vaarg.c
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple sparc -emit-llvm -o - %s | FileCheck %s
+#include <stdarg.h>
+
+// CHECK-LABEL: define i32 @get_int
+// CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, i32{{$}}
+// CHECK: store i32 [[RESULT]], i32* [[LOC:%[a-z_0-9]+]]
+// CHECK: [[RESULT2:%[a-z_0-9]+]] = load i32, i32* [[LOC]]
+// CHECK: ret i32 [[RESULT2]]
+int get_int(va_list *args) {
+  return va_arg(*args, int);
+}
+
+struct Foo {
+  int x;
+};
+
+struct Foo dest;
+
+// CHECK-LABEL: define void @get_struct
+// CHECK: [[RESULT:%[a-z_0-9]+]] = va_arg {{.*}}, %struct.Foo*{{$}}
+// CHECK: [[RESULT2:%[a-z_0-9]+]] = bitcast {{.*}} [[RESULT]] to i8*
+// CHECK: call void @llvm.memcpy{{.*}}@dest{{.*}}, i8* [[RESULT2]]
+void get_struct(va_list *args) {
+ dest = va_arg(*args, struct Foo);
+}
+
+enum E { Foo_one = 1 };
+
+enum E enum_dest;
+
+// CHECK-LABEL: define void @get_enum
+// CHECK: va_arg i8** {{.*}}, i32
+void get_enum(va_list *args) {
+  enum_dest = va_arg(*args, enum E);
+}