diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 7f5ee91d496e7c05d104c4adbba2b475ed792936..b0dc3356386820ea618d98680bbef34e6fa76969 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -3292,10 +3292,8 @@ public:
     ComplexLongDoubleUsesFP2Ret = true;
 
     // x86-64 has atomics up to 16 bytes.
-    // FIXME: Once the backend is fixed, increase MaxAtomicInlineWidth to 128
-    // on CPUs with cmpxchg16b
     MaxAtomicPromoteWidth = 128;
-    MaxAtomicInlineWidth = 64;
+    MaxAtomicInlineWidth = 128;
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::X86_64ABIBuiltinVaList;
diff --git a/test/CodeGen/x86_64-atomic-128.c b/test/CodeGen/x86_64-atomic-128.c
new file mode 100644
index 0000000000000000000000000000000000000000..2069e455828d7a6a815b99d58d8951c34f880b98
--- /dev/null
+++ b/test/CodeGen/x86_64-atomic-128.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
+
+// All atomics up to 16 bytes should be emitted inline on x86_64. The
+// backend can reform __sync_whatever calls if necessary (e.g. the CPU
+// doesn't have cmpxchg16b).
+
+__int128 test_sync_call(__int128 *addr, __int128 val) {
+  // CHECK-LABEL: @test_sync_call
+  // CHECK: atomicrmw add i128
+  return __sync_fetch_and_add(addr, val);
+}
+
+__int128 test_c11_call(_Atomic __int128 *addr, __int128 val) {
+  // CHECK-LABEL: @test_c11_call
+  // CHECK: atomicrmw sub
+  return __c11_atomic_fetch_sub(addr, val, 0);
+}
+
+__int128 test_atomic_call(__int128 *addr, __int128 val) {
+  // CHECK-LABEL: @test_atomic_call
+  // CHECK: atomicrmw or
+  return __atomic_fetch_or(addr, val, 0);
+}
+
+__int128 test_expression(_Atomic __int128 *addr) {
+  // CHECK-LABEL: @test_expression
+  // CHECK: atomicrmw and
+  *addr &= 1;
+}