diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index b280d53dd39c50c63d2697088e9be9044f9753b2..edb9ddad794f11ce3ebced467c824d9120f52a43 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3292,6 +3292,7 @@ static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { auto &C = CGM.getContext(); @@ -3322,6 +3323,16 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, PrivateVarsPos[VD] = Counter; ++Counter; } + for (auto *E: LastprivateVars) { + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) + .withConst() + .withRestrict())); + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + PrivateVarsPos[VD] = Counter; + ++Counter; + } auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); auto *TaskPrivatesMapTy = @@ -3361,6 +3372,179 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } +/// Emit initialization for private variables in task-based directives. +/// \return true if cleanups are required, false otherwise. +static bool emitPrivatesInit(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + Address KmpTaskSharedsPtr, LValue TDBase, + const RecordDecl *KmpTaskTWithPrivatesQTyRD, + QualType SharedsTy, QualType SharedsPtrTy, + const OMPTaskDataTy &Data, + ArrayRef<PrivateDataTy> Privates, bool ForDup) { + auto &C = CGF.getContext(); + bool NeedsCleanup = false; + auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); + LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); + LValue SrcBase; + if (!Data.FirstprivateVars.empty()) { + SrcBase = CGF.MakeAddrLValue( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), + SharedsTy); + } + CodeGenFunction::CGCapturedStmtInfo CapturesInfo( + cast<CapturedStmt>(*D.getAssociatedStmt())); + FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); + for (auto &&Pair : Privates) { + auto *VD = Pair.second.PrivateCopy; + auto *Init = VD->getAnyInitializer(); + LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); + if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && + !CGF.isTrivialInitializer(Init)))) { + if (auto *Elem = Pair.second.PrivateElemInit) { + auto *OriginalVD = Pair.second.Original; + auto *SharedField = CapturesInfo.lookup(OriginalVD); + auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); + SharedRefLValue = CGF.MakeAddrLValue( + Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), + SharedRefLValue.getType(), AlignmentSource::Decl); + QualType Type = OriginalVD->getType(); + if (Type->isArrayType()) { + // Initialize firstprivate array. + if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { + // Perform simple memcpy. + CGF.EmitAggregateAssign(PrivateLValue.getAddress(), + SharedRefLValue.getAddress(), Type); + } else { + // Initialize firstprivate array using element-by-element + // intialization. + CGF.EmitOMPAggregateAssign( + PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, + [&CGF, Elem, Init, &CapturesInfo](Address DestElement, + Address SrcElement) { + // Clean up any temporaries needed by the initialization. + CodeGenFunction::OMPPrivateScope InitScope(CGF); + InitScope.addPrivate( + Elem, [SrcElement]() -> Address { return SrcElement; }); + (void)InitScope.Privatize(); + // Emit initialization for single element. + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( + CGF, &CapturesInfo); + CGF.EmitAnyExprToMem(Init, DestElement, + Init->getType().getQualifiers(), + /*IsInitializer=*/false); + }); + } + } else { + CodeGenFunction::OMPPrivateScope InitScope(CGF); + InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { + return SharedRefLValue.getAddress(); + }); + (void)InitScope.Privatize(); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); + CGF.EmitExprAsInit(Init, VD, PrivateLValue, + /*capturedByInit=*/false); + } + } else + CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); + } + NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); + ++FI; + } + return NeedsCleanup; +} + +/// Check if duplication function is required for taskloops. +static bool checkInitIsRequired(CodeGenFunction &CGF, + ArrayRef<PrivateDataTy> Privates) { + bool InitRequired = false; + for (auto &&Pair : Privates) { + auto *VD = Pair.second.PrivateCopy; + auto *Init = VD->getAnyInitializer(); + InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && + !CGF.isTrivialInitializer(Init)); + } + return InitRequired; +} + + +/// Emit task_dup function (for initialization of +/// private/firstprivate/lastprivate vars and last_iter flag) +/// \code +/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int +/// lastpriv) { +/// // setup lastprivate flag +/// task_dst->last = lastpriv; +/// // could be constructor calls here... +/// } +/// \endcode +static llvm::Value * +emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, + const OMPExecutableDirective &D, + QualType KmpTaskTWithPrivatesPtrQTy, + const RecordDecl *KmpTaskTWithPrivatesQTyRD, + const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, + QualType SharedsPtrTy, const OMPTaskDataTy &Data, + ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { + auto &C = CGM.getContext(); + FunctionArgList Args; + ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, + /*Id=*/nullptr, C.IntTy); + Args.push_back(&DstArg); + Args.push_back(&SrcArg); + Args.push_back(&LastprivArg); + auto &TaskDupFnInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); + auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); + auto *TaskDup = + llvm::Function::Create(TaskDupTy, llvm::GlobalValue::InternalLinkage, + ".omp_task_dup.", &CGM.getModule()); + CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo); + CodeGenFunction CGF(CGM); + CGF.disableDebugInfo(); + CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args); + + LValue TDBase = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&DstArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); + // task_dst->liter = lastpriv; + if (WithLastIter) { + auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); + LValue Base = CGF.EmitLValueForField( + TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); + LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); + llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); + CGF.EmitStoreOfScalar(Lastpriv, LILVal); + } + + // Emit initial values for private copies (if any). + assert(!Privates.empty()); + Address KmpTaskSharedsPtr = Address::invalid(); + if (!Data.FirstprivateVars.empty()) { + LValue TDBase = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(&SrcArg), + KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); + LValue Base = CGF.EmitLValueForField( + TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); + KmpTaskSharedsPtr = Address( + CGF.EmitLoadOfScalar(CGF.EmitLValueForField( + Base, *std::next(KmpTaskTQTyRD->field_begin(), + KmpTaskTShareds)), + Loc), + CGF.getNaturalTypeAlignment(SharedsTy)); + } + (void)emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, + KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, + Data, Privates, /*ForDup=*/true); + CGF.FinishFunction(); + return TaskDup; +} + CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, @@ -3390,6 +3574,15 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, ++I; ++IElemInitRef; } + I = Data.LastprivateCopies.begin(); + for (auto *E : Data.LastprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Privates.push_back(std::make_pair( + C.getDeclAlign(VD), + PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), + /*PrivateElemInit=*/nullptr))); + ++I; + } llvm::array_pod_sort(Privates.begin(), Privates.end(), array_pod_sort_comparator); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); @@ -3420,9 +3613,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, ->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - TaskPrivatesMap = emitTaskPrivateMappingFunction(CGM, Loc, Data.PrivateVars, - Data.FirstprivateVars, - FI->getType(), Privates); + TaskPrivatesMap = emitTaskPrivateMappingFunction( + CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, + FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -3480,78 +3673,19 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). + TaskResultTy Result; bool NeedsCleanup = false; if (!Privates.empty()) { - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - auto PrivatesBase = CGF.EmitLValueForField(Base, *FI); - FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); - LValue SharedsBase; - if (!Data.FirstprivateVars.empty()) { - SharedsBase = CGF.MakeAddrLValue( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), - SharedsTy); - } - CodeGenFunction::CGCapturedStmtInfo CapturesInfo( - cast<CapturedStmt>(*D.getAssociatedStmt())); - for (auto &&Pair : Privates) { - auto *VD = Pair.second.PrivateCopy; - auto *Init = VD->getAnyInitializer(); - LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); - if (Init) { - if (auto *Elem = Pair.second.PrivateElemInit) { - auto *OriginalVD = Pair.second.Original; - auto *SharedField = CapturesInfo.lookup(OriginalVD); - auto SharedRefLValue = - CGF.EmitLValueForField(SharedsBase, SharedField); - SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), - SharedRefLValue.getType(), AlignmentSource::Decl); - QualType Type = OriginalVD->getType(); - if (Type->isArrayType()) { - // Initialize firstprivate array. - if (!isa<CXXConstructExpr>(Init) || - CGF.isTrivialInitializer(Init)) { - // Perform simple memcpy. - CGF.EmitAggregateAssign(PrivateLValue.getAddress(), - SharedRefLValue.getAddress(), Type); - } else { - // Initialize firstprivate array using element-by-element - // intialization. - CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(), SharedRefLValue.getAddress(), - Type, [&CGF, Elem, Init, &CapturesInfo]( - Address DestElement, Address SrcElement) { - // Clean up any temporaries needed by the initialization. - CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SrcElement]() -> Address { - return SrcElement; - }); - (void)InitScope.Privatize(); - // Emit initialization for single element. - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( - CGF, &CapturesInfo); - CGF.EmitAnyExprToMem(Init, DestElement, - Init->getType().getQualifiers(), - /*IsInitializer=*/false); - }); - } - } else { - CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { - return SharedRefLValue.getAddress(); - }); - (void)InitScope.Privatize(); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); - CGF.EmitExprAsInit(Init, VD, PrivateLValue, - /*capturedByInit=*/false); - } - } else { - CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); - } - } - NeedsCleanup = NeedsCleanup || FI->getType().isDestructedType(); - ++FI; + NeedsCleanup = emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, + KmpTaskTWithPrivatesQTyRD, SharedsTy, + SharedsPtrTy, Data, Privates, + /*ForDup=*/false); + if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && + (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { + Result.TaskDupFn = emitTaskDupFunction( + CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, + KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, + /*WithLastIter=*/!Data.LastprivateVars.empty()); } } // Provide pointer to function with destructors for privates. @@ -3566,7 +3700,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DestructorFn, KmpRoutineEntryPtrTy), Destructor); - TaskResultTy Result; Result.NewTask = NewTask; Result.TaskEntry = TaskEntry; Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; @@ -3823,7 +3956,10 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), - llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + Result.TaskDupFn + ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, + CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 25d6909dc8ef0f430ee99da3c0e213f29c528027..881bd6a9c3dfaaff5f83c679c46945e8258260d5 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -93,6 +93,8 @@ struct OMPTaskDataTy final { SmallVector<const Expr *, 4> FirstprivateVars; SmallVector<const Expr *, 4> FirstprivateCopies; SmallVector<const Expr *, 4> FirstprivateInits; + SmallVector<const Expr *, 4> LastprivateVars; + SmallVector<const Expr *, 4> LastprivateCopies; SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; llvm::PointerIntPair<llvm::Value *, 1, bool> Final; llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule; @@ -453,6 +455,7 @@ private: llvm::Value *NewTaskNewTaskTTy = nullptr; LValue TDBase; RecordDecl *KmpTaskTQTyRD = nullptr; + llvm::Value *TaskDupFn = nullptr; }; /// Emit task region for the task directive. The task region is emitted in /// several steps: diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 878cf3ff2d170e73e44508c7c141a99aff317f57..3c117316ee651fc96eec843cb7b1e77f9a8ad873 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -740,12 +740,16 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { HasAtLeastOneLastprivate = true; + if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) + break; auto IRef = C->varlist_begin(); auto IDestRef = C->destination_exprs().begin(); for (auto *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + // Taskloops do not require additional initialization, it is done in + // runtime support library. if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl()); PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address { @@ -761,12 +765,11 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = - PrivateScope.addPrivate(OrigVD, [&]() -> Address { - // Emit private VarDecl with copy init. - EmitDecl(*VD); - return GetAddrOfLocalVar(VD); - }); + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { + // Emit private VarDecl with copy init. + EmitDecl(*VD); + return GetAddrOfLocalVar(VD); + }); assert(IsRegistered && "lastprivate var already registered as private"); (void)IsRegistered; @@ -2363,15 +2366,34 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, ++IElemInitRef; } } + // Get list of lastprivate variables (for taskloops). + llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + auto IRef = C->varlist_begin(); + auto ID = C->destination_exprs().begin(); + for (auto *IInit : C->private_copies()) { + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + Data.LastprivateVars.push_back(*IRef); + Data.LastprivateCopies.push_back(IInit); + } + LastprivateDstsOrigs.insert( + {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), + cast<DeclRefExpr>(*IRef)}); + ++IRef; + ++ID; + } + } // Build list of dependences. for (const auto *C : S.getClausesOfKind<OMPDependClause>()) for (auto *IRef : C->varlists()) Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen, &LastprivateDstsOrigs]( + CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); - if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()) { + if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || + !Data.LastprivateVars.empty()) { auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2395,7 +2417,26 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } + for (auto *E : Data.LastprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), + ".lastpriv.ptr.addr"); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : LastprivateDstsOrigs) { + auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); + DeclRefExpr DRE( + const_cast<VarDecl *>(OrigVD), + /*RefersToEnclosingVariableOrCapture=*/CGF.CapturedStmtInfo->lookup( + OrigVD) != nullptr, + Pair.second->getType(), VK_LValue, Pair.second->getExprLoc()); + Scope.addPrivate(Pair.first, [&CGF, &DRE]() { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } for (auto &&Pair : PrivatePtrs) { Address Replacement(CGF.Builder.CreateLoad(Pair.second), CGF.getContext().getDeclAlign(Pair.first)); @@ -3402,6 +3443,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, LoopScope); CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); // Emit the loop iteration variable. const Expr *IVExpr = S.getIterationVariable(); @@ -3430,6 +3472,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); } + // Emit final copy of the lastprivate variables if IsLastIter != 0. + if (HasLastprivateClause) { + CGF.EmitOMPLastprivateClauseFinal( + S, isOpenMPSimdDirective(S.getDirectiveKind()), + CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( + CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, + (*LIP)->getType(), S.getLocStart()))); + } }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, diff --git a/test/OpenMP/taskloop_lastprivate_codegen.cpp b/test/OpenMP/taskloop_lastprivate_codegen.cpp new file mode 100644 index 0000000000000000000000000000000000000000..811229a744fee58ca8ea4d59cc1e638e5ec493b8 --- /dev/null +++ b/test/OpenMP/taskloop_lastprivate_codegen.cpp @@ -0,0 +1,527 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s +// expected-no-diagnostics +// REQUIRES: x86-registered-target +// It doesn't pass on win32. +// REQUIRES: shell +#ifndef ARRAY +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + S(const S &s, T t = T()) : f(s.f + t) {} + operator T() { return T(); } + ~S() {} +}; + +volatile double g; + +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)*, i64, i64, i64, i32 } +// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } +// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] +// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* } +// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } +// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } +// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } +// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } +template <typename T> +T tmain() { + S<T> ttt; + S<T> test; + T t_var __attribute__((aligned(128))) = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> var(3); +#pragma omp taskloop lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global double + // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + // LAMBDA-LABEL: @main + // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 +// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// LAMBDA: ret +#pragma omp taskloop lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: store double* %{{.+}}, double** %{{.+}}, + // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias) + g = 1; + sivar = 11; + // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, + // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: call void [[INNER_LAMBDA]](% + // LAMBDA: icmp ne i32 %{{.+}}, 0 + // LAMBDA: br i1 + // LAMBDA: load double, double* % + // LAMBDA: store volatile double % + // LAMBDA: load i32, i32* % + // LAMBDA: store i32 % + // LAMBDA: ret + [&]() { + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global double + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8 + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + + // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) + // BLOCKS: ret +#pragma omp taskloop lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // BLOCKS: define {{.+}} void {{@.+}}(i8* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store double 2.0{{.+}}, double* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: ret + + // BLOCKS: store double* %{{.+}}, double** %{{.+}}, + // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, + // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias) + g = 1; + sivar = 11; + // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8 + // BLOCKS: icmp ne i32 %{{.+}}, 0 + // BLOCKS: br i1 + // BLOCKS: load double, double* % + // BLOCKS: store volatile double % + // BLOCKS: load i32, i32* % + // BLOCKS: store i32 % + ^{ + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#else + S<double> ttt; + S<double> test; + int t_var = 0; + int vec[] = {1, 2}; + S<double> s_arr[] = {1, 2}; + S<double> var(3); +#pragma omp taskloop lastprivate(var, t_var, s_arr, vec, s_arr, var, sivar) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + sivar = 33; + } + return tmain<int>(); +#endif +} + +// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, +// CHECK: define i{{[0-9]+}} @main() +// CHECK: alloca [[S_DOUBLE_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]]) + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]], +// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T]] task_data; +// [[KMP_TASK_MAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 104, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// Also copy address of private copy to the corresponding shareds reference. +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + +// Constructors for s_arr and var. +// s_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// sivar; +// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) + +// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias, i32** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 +// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, +// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]* noalias) + +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], + +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) + +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] +// CHECK-DAG: [[PRIV_SIVAR]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* % +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32) +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]* noalias) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: alloca [[S_INT_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_TMAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias, i32** noalias, [2 x i32]** noalias, [2 x [[S_INT_TY]]]** noalias, [[S_INT_TY]]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* % +// CHECK: phi [[S_INT_TY]]* +// CHECK: phi [[S_INT_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32) +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +#endif +#else +// ARRAY-LABEL: array_func +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &) {} + ~St() {} +}; + +void array_func(int n, float a[n], St s[2]) { +// ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call void @__kmpc_taskloop( +// ARRAY: store float** %{{.+}}, float*** %{{.+}}, +// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, +// ARRAY: icmp ne i32 %{{.+}}, 0 +// ARRAY: store float* %{{.+}}, float** %{{.+}}, +// ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}}, +#pragma omp taskloop lastprivate(a, s) + for (int i = 0; i < 10; ++i) + ; +} +#endif + diff --git a/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp b/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e27114126af90499e5b674d80e46ed43ff2defef --- /dev/null +++ b/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp @@ -0,0 +1,527 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s +// expected-no-diagnostics +// REQUIRES: x86-registered-target +// It doesn't pass on win32. +// REQUIRES: shell +#ifndef ARRAY +#ifndef HEADER +#define HEADER + +template <class T> +struct S { + T f; + S(T a) : f(a) {} + S() : f() {} + S(const S &s, T t = T()) : f(s.f + t) {} + operator T() { return T(); } + ~S() {} +}; + +volatile double g; + +// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)*, i64, i64, i64, i32 } +// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } +// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] +// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* } +// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } +// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } +// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } +// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } +// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } +template <typename T> +T tmain() { + S<T> ttt; + S<T> test; + T t_var __attribute__((aligned(128))) = T(); + T vec[] = {1, 2}; + S<T> s_arr[] = {1, 2}; + S<T> var(3); +#pragma omp taskloop simd lastprivate(t_var, vec, s_arr, s_arr, var, var) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + } + return T(); +} + +int main() { + static int sivar; +#ifdef LAMBDA + // LAMBDA: [[G:@.+]] = global double + // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, + // LAMBDA-LABEL: @main + // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( + [&]() { + // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( + // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 +// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + +// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) +// LAMBDA: ret +#pragma omp taskloop simd lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]]) + // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], + // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] + // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 + // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] + // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] + + // LAMBDA: store double* %{{.+}}, double** %{{.+}}, + // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias) + g = 1; + sivar = 11; + // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, + // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // LAMBDA: call void [[INNER_LAMBDA]](% + // LAMBDA: icmp ne i32 %{{.+}}, 0 + // LAMBDA: br i1 + // LAMBDA: load double, double* % + // LAMBDA: store volatile double % + // LAMBDA: load i32, i32* % + // LAMBDA: store i32 % + // LAMBDA: ret + [&]() { + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#elif defined(BLOCKS) + // BLOCKS: [[G:@.+]] = global double + // BLOCKS-LABEL: @main + // BLOCKS: call void {{%.+}}(i8 + ^{ + // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* + // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) + // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 + // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + + // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 + // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) + // BLOCKS: ret +#pragma omp taskloop simd lastprivate(g, sivar) + for (int i = 0; i < 10; ++i) { + // BLOCKS: define {{.+}} void {{@.+}}(i8* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store double 2.0{{.+}}, double* + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: ret + + // BLOCKS: store double* %{{.+}}, double** %{{.+}}, + // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, + // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias) + g = 1; + sivar = 11; + // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, + // BLOCKS-NOT: [[G]]{{[[^:word:]]}} + // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, + // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} + // BLOCKS: call void {{%.+}}(i8 + // BLOCKS: icmp ne i32 %{{.+}}, 0 + // BLOCKS: br i1 + // BLOCKS: load double, double* % + // BLOCKS: store volatile double % + // BLOCKS: load i32, i32* % + // BLOCKS: store i32 % + ^{ + g = 2; + sivar = 22; + }(); + } + }(); + return 0; +#else + S<double> ttt; + S<double> test; + int t_var = 0; + int vec[] = {1, 2}; + S<double> s_arr[] = {1, 2}; + S<double> var(3); +#pragma omp taskloop simd lastprivate(var, t_var, s_arr, vec, s_arr, var, sivar) + for (int i = 0; i < 10; ++i) { + vec[0] = t_var; + s_arr[0] = var; + sivar = 33; + } + return tmain<int>(); +#endif +} + +// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, +// CHECK: define i{{[0-9]+}} @main() +// CHECK: alloca [[S_DOUBLE_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]]) + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]], +// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4 +// CHECK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T]] task_data; +// [[KMP_TASK_MAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 104, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// Also copy address of private copy to the corresponding shareds reference. +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 + +// Constructors for s_arr and var. +// s_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// sivar; +// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) + +// CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]() + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_DOUBLE_TY_DESTR:@.+]]([[S_DOUBLE_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias, i32** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, +// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], +// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 +// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, +// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]* noalias) + +// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, +// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, +// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], + +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) + +// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] +// CHECK-DAG: [[PRIV_SIVAR]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* % +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32) +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_DOUBLE_TY]]* +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_DOUBLE_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]* noalias) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call {{.*}} [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() +// CHECK: alloca [[S_INT_TY]], +// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 +// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], +// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], +// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[LOC:%.+]]) + +// CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) + +// Store original variables in capture struct. +// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: store [2 x i32]* [[VEC_ADDR]], [2 x i32]** [[VEC_REF]], +// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 +// CHECK: store i32* [[T_VAR_ADDR]], i32** [[T_VAR_REF]], +// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]], +// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3 +// CHECK: store [[S_INT_TY]]* [[VAR_ADDR]], [[S_INT_TY]]** [[VAR_REF]], + +// Allocate task. +// Returns struct kmp_task_t { +// [[KMP_TASK_T_TY]] task_data; +// [[KMP_TASK_TMAIN_TY]] privates; +// }; +// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) +// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* + +// Fill kmp_task_t->shareds by copying from original capture argument. +// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], +// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false) + +// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 + +// t_var; +// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 + +// vec; +// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 + +// Constructors for s_arr and var. +// a_arr; +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]]) +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 +// CHECK: icmp eq +// CHECK: br i1 + +// var; +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) + +// Provide pointer to destructor function, which will destroy private variables at the end of the task. +// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 +// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]], + +// Start task. +// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 0, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) + +// No destructors must be called for private copies of s_arr and var. +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: ret +// + +// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias, i32** noalias, [2 x i32]** noalias, [2 x [[S_INT_TY]]]** noalias, [[S_INT_TY]]** noalias) +// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** +// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 +// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, +// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], +// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 +// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, +// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], +// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 +// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, +// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], +// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 +// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, +// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], +// CHECK: ret void + +// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) +// CHECK: alloca i32*, +// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, +// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, +// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, +// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, +// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], +// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) +// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], +// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], +// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], +// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + +// Privates actually are used. +// CHECK-DAG: [[PRIV_VAR]] +// CHECK-DAG: [[PRIV_T_VAR]] +// CHECK-DAG: [[PRIV_S_ARR]] +// CHECK-DAG: [[PRIV_VEC]] + +// CHECK: icmp ne i32 %{{.+}}, 0 +// CHECK-NEXT: br i1 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: bitcast [2 x i32]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* % +// CHECK: phi [[S_INT_TY]]* +// CHECK: phi [[S_INT_TY]]* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK-NEXT: br i1 +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* % +// CHECK: br label +// CHECK: ret + +// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32) +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: load i32, i32* % +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 +// CHECK: br label % + +// CHECK: phi [[S_INT_TY]]* +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1 +// CHECK: icmp eq [[S_INT_TY]]* % +// CHECK: br i1 % + +// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 +// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* +// CHECK: ret void + +// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]* noalias) +// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 +// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 +// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]]) +// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 +// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 +// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 +// CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]]) +// CHECK: icmp eq +// CHECK: br i1 +// CHECK: ret i32 + +#endif +#else +// ARRAY-LABEL: array_func +struct St { + int a, b; + St() : a(0), b(0) {} + St(const St &) {} + ~St() {} +}; + +void array_func(int n, float a[n], St s[2]) { +// ARRAY: call i8* @__kmpc_omp_task_alloc( +// ARRAY: call void @__kmpc_taskloop( +// ARRAY: store float** %{{.+}}, float*** %{{.+}}, +// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, +// ARRAY: icmp ne i32 %{{.+}}, 0 +// ARRAY: store float* %{{.+}}, float** %{{.+}}, +// ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}}, +#pragma omp taskloop simd lastprivate(a, s) + for (int i = 0; i < 10; ++i) + ; +} +#endif +