diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index bef723f5aedd15f45ceeab2213edf7d47edce5a7..cc69d60924ccec4f57ed9367d8e00756d10c539e 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -13,6 +13,7 @@
 
 #include "CGOpenMPRuntime.h"
 #include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/Decl.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -24,6 +25,49 @@
 using namespace clang;
 using namespace CodeGen;
 
+/// \brief API for captured statement code generation in OpenMP constructs.
+class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
+public:
+  CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
+                     const VarDecl *ThreadIDVar)
+      : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
+        Directive(D) {
+    assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
+  }
+
+  virtual ~CGOpenMPRegionInfo() override{};
+
+  /// \brief Gets a variable or parameter for storing global thread id
+  /// inside OpenMP construct.
+  const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
+
+  /// \brief Gets an LValue for the current ThreadID variable.
+  LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
+
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return Info->getKind() == CR_OpenMP;
+  }
+
+  /// \brief Emit the captured statement body.
+  virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
+
+  /// \brief Get the name of the capture helper.
+  virtual StringRef getHelperName() const override { return ".omp_outlined."; }
+
+private:
+  /// \brief A variable or parameter storing global thread id for OpenMP
+  /// constructs.
+  const VarDecl *ThreadIDVar;
+  /// \brief OpenMP executable directive associated with the region.
+  const OMPExecutableDirective &Directive;
+};
+
+LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
+  return CGF.MakeNaturalAlignAddrLValue(
+      CGF.GetAddrOfLocalVar(ThreadIDVar),
+      CGF.getContext().getPointerType(ThreadIDVar->getType()));
+}
+
 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
   CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
   CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
@@ -51,6 +95,16 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
   KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
 }
 
+llvm::Value *
+CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
+                                            const VarDecl *ThreadIDVar) {
+  const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+  CodeGenFunction CGF(CGM, true);
+  CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
+  CGF.CapturedStmtInfo = &CGInfo;
+  return CGF.GenerateCapturedStmtFunction(*CS);
+}
+
 llvm::Value *
 CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
@@ -92,14 +146,15 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   llvm::Value *LocValue = nullptr;
-  OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn);
-  if (I != OpenMPLocMap.end()) {
-    LocValue = I->second;
+  OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+  if (I != OpenMPLocThreadIDMap.end()) {
+    LocValue = I->second.DebugLoc;
   } else {
     // Generate "ident_t .kmpc_loc.addr;"
     llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
     AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
-    OpenMPLocMap[CGF.CurFn] = AI;
+    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+    Elem.second.DebugLoc = AI;
     LocValue = AI;
 
     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
@@ -139,46 +194,46 @@ llvm::Value *CGOpenMPRuntime::GetOpenMPThreadID(CodeGenFunction &CGF,
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
   llvm::Value *ThreadID = nullptr;
-  OpenMPThreadIDMapTy::iterator I = OpenMPThreadIDMap.find(CGF.CurFn);
-  if (I != OpenMPThreadIDMap.end()) {
-    ThreadID = I->second;
-  } else {
-    // Check if current function is a function which has first parameter
-    // with type int32 and name ".global_tid.".
-    if (!CGF.CurFn->arg_empty() &&
-        CGF.CurFn->arg_begin()->getType()->isPointerTy() &&
-        CGF.CurFn->arg_begin()
-            ->getType()
-            ->getPointerElementType()
-            ->isIntegerTy() &&
-        CGF.CurFn->arg_begin()
-                ->getType()
-                ->getPointerElementType()
-                ->getIntegerBitWidth() == 32 &&
-        CGF.CurFn->arg_begin()->hasName() &&
-        CGF.CurFn->arg_begin()->getName() == ".global_tid.") {
-      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-      CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
-      ThreadID = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin());
-    } else {
-      // Generate "int32 .kmpc_global_thread_num.addr;"
-      CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
-      CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
-      llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
-      ThreadID = CGF.EmitRuntimeCall(
-          CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+  // Check whether we've already cached a load of the thread id in this
+  // function.
+  OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+  if (I != OpenMPLocThreadIDMap.end()) {
+    ThreadID = I->second.ThreadID;
+  } else if (auto OMPRegionInfo =
+                 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+    // Check if this an outlined function with thread id passed as argument.
+    auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
+    auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+    auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
+    LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
+                                          ThreadIDVar->getType());
+    ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+    // If value loaded in entry block, cache it and use it everywhere in
+    // function.
+    if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+      auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+      Elem.second.ThreadID = ThreadID;
     }
-    OpenMPThreadIDMap[CGF.CurFn] = ThreadID;
+  } else {
+    // This is not an outlined function region - need to call __kmpc_int32
+    // kmpc_global_thread_num(ident_t *loc).
+    // Generate thread id value and cache this value for use across the
+    // function.
+    CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+    CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+    llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
+    ThreadID = CGF.EmitRuntimeCall(
+        CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+    auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+    Elem.second.ThreadID = ThreadID;
   }
   return ThreadID;
 }
 
 void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
-  if (OpenMPThreadIDMap.count(CGF.CurFn))
-    OpenMPThreadIDMap.erase(CGF.CurFn);
-  if (OpenMPLocMap.count(CGF.CurFn))
-    OpenMPLocMap.erase(CGF.CurFn);
+  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+    OpenMPLocThreadIDMap.erase(CGF.CurFn);
 }
 
 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index ec6115fb0948c97533fab98a868310953f3360b3..ce822ea7d1b8c25e53ae293b44be7ad28fd32f2f 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -14,49 +14,29 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
 
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/Type.h"
+#include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
 
-namespace clang {
-
-namespace CodeGen {
-
-/// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
-public:
-  CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &S,
-                     const VarDecl *ThreadIDVar)
-      : CGCapturedStmtInfo(S, CR_OpenMP), ThreadIDVar(ThreadIDVar),
-        Directive(D) {}
-
-  virtual ~CGOpenMPRegionInfo() override{};
+namespace llvm {
+class ArrayType;
+class Constant;
+class Function;
+class FunctionType;
+class StructType;
+class Type;
+class Value;
+} // namespace llvm
 
-  /// \brief Gets a variable or parameter for storing global thread id
-  /// inside OpenMP construct.
-  const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
-
-  static bool classof(const CGCapturedStmtInfo *Info) {
-    return Info->getKind() == CR_OpenMP;
-  }
+namespace clang {
 
-  /// \brief Emit the captured statement body.
-  virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
+class OMPExecutableDirective;
+class VarDecl;
 
-  /// \brief Get the name of the capture helper.
-  virtual StringRef getHelperName() const override { return ".omp_outlined."; }
+namespace CodeGen {
 
-private:
-  /// \brief A variable or parameter storing global thread id for OpenMP
-  /// constructs.
-  const VarDecl *ThreadIDVar;
-  /// \brief OpenMP executable directive associated with the region.
-  const OMPExecutableDirective &Directive;
-};
+class CodeGenFunction;
+class CodeGenModule;
 
 class CGOpenMPRuntime {
 public:
@@ -102,7 +82,7 @@ private:
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
   llvm::Constant *DefaultOpenMPPSource;
-  /// \brief Map of flags and corrsponding default locations.
+  /// \brief Map of flags and corresponding default locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
   OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
   llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
@@ -147,19 +127,22 @@ private:
     IdentField_PSource
   };
   llvm::StructType *IdentTy;
-  /// \brief Map for Sourcelocation and OpenMP runtime library debug locations.
+  /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
   OpenMPDebugLocMapTy OpenMPDebugLocMap;
   /// \brief The type for a microtask which gets passed to __kmpc_fork_call().
   /// Original representation is:
   /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
   llvm::FunctionType *Kmpc_MicroTy;
-  /// \brief Map of local debug location and functions.
-  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPLocMapTy;
-  OpenMPLocMapTy OpenMPLocMap;
-  /// \brief Map of local ThreadID and functions.
-  typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPThreadIDMapTy;
-  OpenMPThreadIDMapTy OpenMPThreadIDMap;
+  /// \brief Stores debug location and ThreadID for the function.
+  struct DebugLocThreadIdTy {
+    llvm::Value *DebugLoc;
+    llvm::Value *ThreadID;
+  };
+  /// \brief Map of local debug location, ThreadId and functions.
+  typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
+      OpenMPLocThreadIDMapTy;
+  OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
   /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
   /// kmp_critical_name[8];
   llvm::ArrayType *KmpCriticalNameTy;
@@ -167,8 +150,6 @@ private:
   llvm::StringMap<llvm::Value *, llvm::BumpPtrAllocator> CriticalRegionVarNames;
 
   /// \brief Emits object of ident_t type with info for source location.
-  /// \param CGF Reference to current CodeGenFunction.
-  /// \param Loc Clang source location.
   /// \param Flags Flags for OpenMP location.
   ///
   llvm::Value *
@@ -187,8 +168,6 @@ private:
   llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
 
   /// \brief Gets thread id value for the current thread.
-  /// \param CGF Reference to current CodeGenFunction.
-  /// \param Loc Clang source location.
   ///
   llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc);
 
@@ -196,16 +175,25 @@ public:
   explicit CGOpenMPRuntime(CodeGenModule &CGM);
   virtual ~CGOpenMPRuntime() {}
 
+  /// \brief Emits outlined function for the specified OpenMP directive \a D
+  /// (required for parallel and task directives). This outlined function has
+  /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
+  /// context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  ///
+  virtual llvm::Value *
+  EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
+                             const VarDecl *ThreadIDVar);
+
   /// \brief Cleans up references to the objects in finished function.
-  /// \param CGF Reference to finished CodeGenFunction.
   ///
   void FunctionFinished(CodeGenFunction &CGF);
 
   /// \brief Emits code for parallel call of the \a OutlinedFn with variables
   /// captured in a record which address is stored in \a CapturedStruct.
-  /// \param CGF Reference to current CodeGenFunction.
-  /// \param Loc Clang source location.
-  /// \param OutlinedFn Outlined function to be run in parallel threads.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
   /// \param CapturedStruct A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
   ///
@@ -216,14 +204,14 @@ public:
   /// \brief Returns corresponding lock object for the specified critical region
   /// name. If the lock object does not exist it is created, otherwise the
   /// reference to the existing copy is returned.
+  /// \param CriticalName Name of the critical region.
+  ///
   llvm::Value *GetCriticalRegionLock(StringRef CriticalName);
 
   /// \brief Emits start of the critical region by calling void
   /// __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
   /// * \a RegionLock)
-  /// \param CGF Reference to current CodeGenFunction.
   /// \param RegionLock The lock object for critical region.
-  /// \param Loc Location of the construct.
   virtual void EmitOMPCriticalRegionStart(CodeGenFunction &CGF,
                                           llvm::Value *RegionLock,
                                           SourceLocation Loc);
@@ -231,16 +219,12 @@ public:
   /// \brief Emits end of the critical region by calling void
   /// __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
   /// * \a RegionLock)
-  /// \param CGF Reference to current CodeGenFunction.
   /// \param RegionLock The lock object for critical region.
-  /// \param Loc Location of the construct.
   virtual void EmitOMPCriticalRegionEnd(CodeGenFunction &CGF,
                                         llvm::Value *RegionLock,
                                         SourceLocation Loc);
 
   /// \brief Emits a barrier for OpenMP threads.
-  /// \param CGF Reference to current CodeGenFunction.
-  /// \param Loc Clang source location.
   /// \param Flags Flags for the barrier.
   ///
   virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 1b3e786b30bd3c795ee050126b4e6cdabacdb248..2aec28f3051c47d9c992863e6c0ba3cfb86f5963 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -138,17 +138,10 @@ void CodeGenFunction::EmitOMPFirstprivateClause(
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
-  const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
-
-  llvm::Value *OutlinedFn;
-  {
-    CodeGenFunction CGF(CGM, true);
-    CGOpenMPRegionInfo CGInfo(S, *CS, *CS->getCapturedDecl()->param_begin());
-    CGF.CapturedStmtInfo = &CGInfo;
-    OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS);
-  }
-
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+  auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
+      S, *CS->getCapturedDecl()->param_begin());
   CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), OutlinedFn,
                                              CapturedStruct);
 }
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index d8d715db8b8ea46856b6ca8e3c28e1b53e3ff467..0bef47810480b4c8fc4533cfe4cdf4872717a983 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -215,6 +215,10 @@ public:
     bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
     FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
 
+    static bool classof(const CGCapturedStmtInfo *) {
+      return true;
+    }
+
     /// \brief Emit the captured statement body.
     virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
       RegionCounter Cnt = CGF.getPGORegionCounter(S);
diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp
index 804b80d5d9a075b67a5b77ac75ff94a6264a5e7d..d15616d781ad5cce517d43d7fdedde589094011e 100644
--- a/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -74,7 +74,7 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
 // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
@@ -110,6 +110,8 @@ int main() {
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
 // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@@ -127,7 +129,7 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
 // CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
@@ -163,6 +165,8 @@ int main() {
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
 // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*