From 5c40bec5ebd913c0f441dec18fa35e5eca0a9312 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 13 Jul 2017 13:36:14 +0000 Subject: [PATCH] [OPENMP] Generalization of codegen for reduction clauses. Reworked codegen for reduction clauses for future support of reductions in task-based directives. llvm-svn: 307910 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 394 ++++++++++++++ clang/lib/CodeGen/CGOpenMPRuntime.h | 80 +++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 479 ++++-------------- clang/test/OpenMP/for_reduction_codegen.cpp | 14 +- .../test/OpenMP/for_reduction_codegen_UDR.cpp | 15 +- 5 files changed, 580 insertions(+), 402 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a2ea0dec3e9d..76f446706127 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -697,6 +697,400 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { } } +/// Check if the combiner is a call to UDR combiner and if it is so return the +/// UDR decl used for reduction. +static const OMPDeclareReductionDecl * +getReductionInit(const Expr *ReductionOp) { + if (auto *CE = dyn_cast(ReductionOp)) + if (auto *OVE = dyn_cast(CE->getCallee())) + if (auto *DRE = + dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) + if (auto *DRD = dyn_cast(DRE->getDecl())) + return DRD; + return nullptr; +} + +static void emitInitWithReductionInitializer(CodeGenFunction &CGF, + const OMPDeclareReductionDecl *DRD, + const Expr *InitOp, + Address Private, Address Original, + QualType Ty) { + if (DRD->getInitializer()) { + std::pair Reduction = + CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); + auto *CE = cast(InitOp); + auto *OVE = cast(CE->getCallee()); + const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); + const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); + auto *LHSDRE = cast(cast(LHS)->getSubExpr()); + auto *RHSDRE = cast(cast(RHS)->getSubExpr()); + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate(cast(LHSDRE->getDecl()), + [=]() -> Address { return Private; }); + PrivateScope.addPrivate(cast(RHSDRE->getDecl()), + [=]() -> Address { return Original; }); + (void)PrivateScope.Privatize(); + RValue Func = RValue::get(Reduction.second); + CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); + CGF.EmitIgnoredExpr(InitOp); + } else { + llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); + auto *GV = new llvm::GlobalVariable( + CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, + llvm::GlobalValue::PrivateLinkage, Init, ".init"); + LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + RValue InitRVal; + switch (CGF.getEvaluationKind(Ty)) { + case TEK_Scalar: + InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); + break; + case TEK_Complex: + InitRVal = + RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); + break; + case TEK_Aggregate: + InitRVal = RValue::getAggregate(LV.getAddress()); + break; + } + OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + } +} + +/// \brief Emit initialization of arrays of complex types. +/// \param DestAddr Address of the array. +/// \param Type Type of array. +/// \param Init Initial expression of array. +/// \param SrcAddr Address of the original array. +static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, + QualType Type, const Expr *Init, + Address SrcAddr = Address::invalid()) { + auto *DRD = getReductionInit(Init); + // Perform element-by-element initialization. + QualType ElementTy; + + // Drill down to the base element type on both arrays. + auto ArrayTy = Type->getAsArrayTypeUnsafe(); + auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); + DestAddr = + CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); + if (DRD) + SrcAddr = + CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + + llvm::Value *SrcBegin = nullptr; + if (DRD) + SrcBegin = SrcAddr.getPointer(); + auto DestBegin = DestAddr.getPointer(); + // Cast from pointer to array type to pointer to single element. + auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + // The basic structure here is a while-do loop. + auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); + auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); + auto IsEmpty = + CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); + CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); + + // Enter the loop body, making that address the current address. + auto EntryBB = CGF.Builder.GetInsertBlock(); + CGF.EmitBlock(BodyBB); + + CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); + + llvm::PHINode *SrcElementPHI = nullptr; + Address SrcElementCurrent = Address::invalid(); + if (DRD) { + SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, + "omp.arraycpy.srcElementPast"); + SrcElementPHI->addIncoming(SrcBegin, EntryBB); + SrcElementCurrent = + Address(SrcElementPHI, + SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + } + llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( + DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); + DestElementPHI->addIncoming(DestBegin, EntryBB); + Address DestElementCurrent = + Address(DestElementPHI, + DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + + // Emit copy. + { + CodeGenFunction::RunCleanupsScope InitScope(CGF); + if (DRD && (DRD->getInitializer() || !Init)) { + emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, + SrcElementCurrent, ElementTy); + } else + CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), + /*IsInitializer=*/false); + } + + if (DRD) { + // Shift the address forward by one element. + auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( + SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); + } + + // Shift the address forward by one element. + auto DestElementNext = CGF.Builder.CreateConstGEP1_32( + DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + // Check whether we've reached the end. + auto Done = + CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); + CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); + DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); + + // Done. + CGF.EmitBlock(DoneBB, /*IsFinished=*/true); +} + +LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { + if (const auto *OASE = dyn_cast(E)) + return CGF.EmitOMPArraySectionExpr(OASE); + if (const auto *ASE = dyn_cast(E)) + return CGF.EmitLValue(ASE); + auto *OrigVD = cast(cast(E)->getDecl()); + DeclRefExpr DRE(const_cast(OrigVD), + CGF.CapturedStmtInfo && + CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr, + E->getType(), VK_LValue, E->getExprLoc()); + // Store the address of the original variable associated with the LHS + // implicit variable. + return CGF.EmitLValue(&DRE); +} + +LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, + const Expr *E) { + if (const auto *OASE = dyn_cast(E)) + return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + return LValue(); +} + +void ReductionCodeGen::emitAggregateInitialization(CodeGenFunction &CGF, + unsigned N, + Address PrivateAddr, + LValue SharedLVal) { + // Emit VarDecl with copy init for arrays. + // Get the address of the original variable captured in current + // captured region. + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), + DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(), + SharedLVal.getAddress()); +} + +ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, + ArrayRef Privates, + ArrayRef ReductionOps) { + ClausesData.reserve(Shareds.size()); + SharedAddresses.reserve(Shareds.size()); + Sizes.reserve(Shareds.size()); + auto IPriv = Privates.begin(); + auto IRed = ReductionOps.begin(); + for (const auto *Ref : Shareds) { + ClausesData.emplace_back(Ref, *IPriv, *IRed); + std::advance(IPriv, 1); + std::advance(IRed, 1); + } +} + +void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { + assert(SharedAddresses.size() == N && + "Number of generated lvalues must be exactly N."); + SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref), + emitSharedLValueUB(CGF, ClausesData[N].Ref)); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + Sizes.emplace_back(nullptr); + return; + } + llvm::Value *Size; + if (AsArraySection) { + Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), + SharedAddresses[N].first.getPointer()); + Size = CGF.Builder.CreateNUWAdd( + Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); + } else { + Size = CGF.getTypeSize( + SharedAddresses[N].first.getType().getNonReferenceType()); + } + Sizes.emplace_back(Size); + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, + llvm::Value *Size) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + bool AsArraySection = isa(ClausesData[N].Ref); + if (!AsArraySection && !PrivateType->isVariablyModifiedType()) { + assert(!Size && !Sizes[N] && + "Size should be nullptr for non-variably modified redution " + "items."); + return; + } + CodeGenFunction::OpaqueValueMapping OpaqueMap( + CGF, + cast( + CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), + RValue::get(Size)); + CGF.EmitVariablyModifiedType(PrivateType); +} + +void ReductionCodeGen::emitInitialization( + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + llvm::function_ref DefaultInit) { + assert(SharedAddresses.size() > N && "No variable was generated"); + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + auto *DRD = getReductionInit(ClausesData[N].ReductionOp); + QualType PrivateType = PrivateVD->getType(); + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + QualType SharedType = SharedAddresses[N].first.getType(); + SharedLVal = CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), + CGF.ConvertTypeForMem(SharedType)), + SharedType, SharedAddresses[N].first.getBaseInfo()); + if (isa(ClausesData[N].Ref) || + CGF.getContext().getAsArrayType(PrivateVD->getType())) { + emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal); + } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { + emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, + PrivateAddr, SharedLVal.getAddress(), + SharedLVal.getType()); + } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && + !CGF.isTrivialInitializer(PrivateVD->getInit())) { + CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, + PrivateVD->getType().getQualifiers(), + /*IsInitializer=*/false); + } +} + +bool ReductionCodeGen::needCleanups(unsigned N) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + return DTorKind != QualType::DK_none; +} + +void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + auto *PrivateVD = + cast(cast(ClausesData[N].Private)->getDecl()); + QualType PrivateType = PrivateVD->getType(); + QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); + if (needCleanups(N)) { + PrivateAddr = CGF.Builder.CreateElementBitCast( + PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); + } +} + +static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + LValue BaseLV) { + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + if (auto *PtrTy = BaseTy->getAs()) + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); + else { + BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), + BaseTy->castAs()); + } + BaseTy = BaseTy->getPointeeType(); + } + return CGF.MakeAddrLValue( + CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), + CGF.ConvertTypeForMem(ElTy)), + BaseLV.getType(), BaseLV.getBaseInfo()); +} + +static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, + llvm::Type *BaseLVType, CharUnits BaseLVAlignment, + llvm::Value *Addr) { + Address Tmp = Address::invalid(); + Address TopTmp = Address::invalid(); + Address MostTopTmp = Address::invalid(); + BaseTy = BaseTy.getNonReferenceType(); + while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && + !CGF.getContext().hasSameType(BaseTy, ElTy)) { + Tmp = CGF.CreateMemTemp(BaseTy); + if (TopTmp.isValid()) + CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); + else + MostTopTmp = Tmp; + TopTmp = Tmp; + BaseTy = BaseTy->getPointeeType(); + } + llvm::Type *Ty = BaseLVType; + if (Tmp.isValid()) + Ty = Tmp.getElementType(); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + CGF.Builder.CreateStore(Addr, Tmp); + return MostTopTmp; + } + return Address(Addr, BaseLVAlignment); +} + +Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr) { + const DeclRefExpr *DE; + const VarDecl *OrigVD = nullptr; + if (auto *OASE = dyn_cast(ClausesData[N].Ref)) { + auto *Base = OASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempOASE = dyn_cast(Base)) + Base = TempOASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast(Base); + OrigVD = cast(DE->getDecl()); + } else if (auto *ASE = dyn_cast(ClausesData[N].Ref)) { + auto *Base = ASE->getBase()->IgnoreParenImpCasts(); + while (auto *TempASE = dyn_cast(Base)) + Base = TempASE->getBase()->IgnoreParenImpCasts(); + DE = cast(Base); + OrigVD = cast(DE->getDecl()); + } + if (OrigVD) { + BaseDecls.emplace_back(OrigVD); + auto OriginalBaseLValue = CGF.EmitLValue(DE); + LValue BaseLValue = + loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), + OriginalBaseLValue); + llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( + BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); + llvm::Value *Ptr = + CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment); + return castToBase(CGF, OrigVD->getType(), + SharedAddresses[N].first.getType(), + OriginalBaseLValue.getPointer()->getType(), + OriginalBaseLValue.getAlignment(), Ptr); + } + BaseDecls.emplace_back( + cast(cast(ClausesData[N].Ref)->getDecl())); + return PrivateAddr; +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 6f460f121791..a9b7ef75c8f6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -105,6 +105,86 @@ struct OMPTaskDataTy final { bool Nogroup = false; }; +/// Class intended to support codegen of all kind of the reduction clauses. +class ReductionCodeGen { +private: + /// Data requiored for codegen of reduction clauses. + struct ReductionData { + /// Reference to the original shared item. + const Expr *Ref = nullptr; + /// Helper expression for generation of private copy. + const Expr *Private = nullptr; + /// Helper expression for generation reduction operation. + const Expr *ReductionOp = nullptr; + ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp) + : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {} + }; + /// List of reduction-based clauses. + SmallVector ClausesData; + + /// List of addresses of original shared variables/expressions. + SmallVector, 4> SharedAddresses; + /// Sizes of the reduction items in chars. + SmallVector Sizes; + /// Base declarations for the reduction items. + SmallVector BaseDecls; + /// Emits lvalue for shared expresion. + LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E); + /// Emits upper bound for shared expression (if array section). + LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E); + /// Performs aggregate initialization. + /// \param N Number of reduction item in the common list. + /// \param PrivateAddr Address of the corresponding private item. + /// \param SharedLVal Addreiss of the original shared variable. + void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr, LValue SharedLVal); + +public: + ReductionCodeGen(ArrayRef Shareds, + ArrayRef Privates, + ArrayRef ReductionOps); + /// Emits lvalue for a reduction item. + /// \param N Number of the reduction item. + void emitSharedLValue(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + void emitAggregateType(CodeGenFunction &CGF, unsigned N); + /// Emits the code for the variable-modified type, if required. + /// \param N Number of the reduction item. + /// \param Size Size of the type in chars. + void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size); + /// Performs initialization of the private copy for the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + /// \param DefaultInit Default initialization sequence that should be + /// performed if no reduction specific initialization is found. + /// \param SharedLVal Addreiss of the original shared variable. + /// \return true, if the initialization sequence was emitted, false otherwise. + void + emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, + LValue SharedLVal, + llvm::function_ref DefaultInit); + /// ReturCns true if the private copy requires cleanups. + bool needCleanups(unsigned N); + /// Emits cleanup code nfor the reduction item. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr); + /// Adjusts \p PrivatedAddr for using ninstead of the original variable + /// address in normal operations. + /// \param N Number of the reduction item. + /// \param PrivateAddr Address of the corresponding private item. + Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, + Address PrivateAddr); + /// Returns LValue for the reduction item. + LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; } + /// Returns the size of the reduction item in chars, or nullptr, if the size + /// is a constant. + llvm::Value *getSizeInChars(unsigned N) const { return Sizes[N]; } + /// Returns the base declaration of the reduction item. + const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; } +}; + class CGOpenMPRuntime { protected: CodeGenModule &CGM; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 71797e2e6fbe..50240d45717b 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -549,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign( EmitBlock(DoneBB, /*IsFinished=*/true); } -/// Check if the combiner is a call to UDR combiner and if it is so return the -/// UDR decl used for reduction. -static const OMPDeclareReductionDecl * -getReductionInit(const Expr *ReductionOp) { - if (auto *CE = dyn_cast(ReductionOp)) - if (auto *OVE = dyn_cast(CE->getCallee())) - if (auto *DRE = - dyn_cast(OVE->getSourceExpr()->IgnoreImpCasts())) - if (auto *DRD = dyn_cast(DRE->getDecl())) - return DRD; - return nullptr; -} - -static void emitInitWithReductionInitializer(CodeGenFunction &CGF, - const OMPDeclareReductionDecl *DRD, - const Expr *InitOp, - Address Private, Address Original, - QualType Ty) { - if (DRD->getInitializer()) { - std::pair Reduction = - CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); - auto *CE = cast(InitOp); - auto *OVE = cast(CE->getCallee()); - const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); - const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); - auto *LHSDRE = cast(cast(LHS)->getSubExpr()); - auto *RHSDRE = cast(cast(RHS)->getSubExpr()); - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast(LHSDRE->getDecl()), - [=]() -> Address { return Private; }); - PrivateScope.addPrivate(cast(RHSDRE->getDecl()), - [=]() -> Address { return Original; }); - (void)PrivateScope.Privatize(); - RValue Func = RValue::get(Reduction.second); - CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); - CGF.EmitIgnoredExpr(InitOp); - } else { - llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); - auto *GV = new llvm::GlobalVariable( - CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, Init, ".init"); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); - RValue InitRVal; - switch (CGF.getEvaluationKind(Ty)) { - case TEK_Scalar: - InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation()); - break; - case TEK_Complex: - InitRVal = - RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation())); - break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress()); - break; - } - OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); - CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), - /*IsInitializer=*/false); - } -} - -/// \brief Emit initialization of arrays of complex types. -/// \param DestAddr Address of the array. -/// \param Type Type of array. -/// \param Init Initial expression of array. -/// \param SrcAddr Address of the original array. -static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, - QualType Type, const Expr *Init, - Address SrcAddr = Address::invalid()) { - auto *DRD = getReductionInit(Init); - // Perform element-by-element initialization. - QualType ElementTy; - - // Drill down to the base element type on both arrays. - auto ArrayTy = Type->getAsArrayTypeUnsafe(); - auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); - if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); - - llvm::Value *SrcBegin = nullptr; - if (DRD) - SrcBegin = SrcAddr.getPointer(); - auto DestBegin = DestAddr.getPointer(); - // Cast from pointer to array type to pointer to single element. - auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); - // The basic structure here is a while-do loop. - auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); - auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); - auto IsEmpty = - CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); - CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); - - // Enter the loop body, making that address the current address. - auto EntryBB = CGF.Builder.GetInsertBlock(); - CGF.EmitBlock(BodyBB); - - CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); - - llvm::PHINode *SrcElementPHI = nullptr; - Address SrcElementCurrent = Address::invalid(); - if (DRD) { - SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, - "omp.arraycpy.srcElementPast"); - SrcElementPHI->addIncoming(SrcBegin, EntryBB); - SrcElementCurrent = - Address(SrcElementPHI, - SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - } - llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( - DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); - DestElementPHI->addIncoming(DestBegin, EntryBB); - Address DestElementCurrent = - Address(DestElementPHI, - DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); - - // Emit copy. - { - CodeGenFunction::RunCleanupsScope InitScope(CGF); - if (DRD && (DRD->getInitializer() || !Init)) { - emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, - SrcElementCurrent, ElementTy); - } else - CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), - /*IsInitializer=*/false); - } - - if (DRD) { - // Shift the address forward by one element. - auto SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); - } - - // Shift the address forward by one element. - auto DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); - // Check whether we've reached the end. - auto Done = - CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); - CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); - DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); - - // Done. - CGF.EmitBlock(DoneBB, /*IsFinished=*/true); -} - void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy) { @@ -1051,254 +901,107 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( EmitBlock(DoneBB, /*IsFinished=*/true); } -static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV, llvm::Value *Addr) { - Address Tmp = Address::invalid(); - Address TopTmp = Address::invalid(); - Address MostTopTmp = Address::invalid(); - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - Tmp = CGF.CreateMemTemp(BaseTy); - if (TopTmp.isValid()) - CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); - else - MostTopTmp = Tmp; - TopTmp = Tmp; - BaseTy = BaseTy->getPointeeType(); - } - llvm::Type *Ty = BaseLV.getPointer()->getType(); - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); - if (Tmp.isValid()) { - CGF.Builder.CreateStore(Addr, Tmp); - return MostTopTmp; - } - return Address(Addr, BaseLV.getAlignment()); -} - -static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - LValue BaseLV) { - BaseTy = BaseTy.getNonReferenceType(); - while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && - !CGF.getContext().hasSameType(BaseTy, ElTy)) { - if (auto *PtrTy = BaseTy->getAs()) - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); - else { - BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(), - BaseTy->castAs()); - } - BaseTy = BaseTy->getPointeeType(); - } - return CGF.MakeAddrLValue( - Address( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()), - BaseLV.getAlignment()), - BaseLV.getType(), BaseLV.getBaseInfo()); -} - void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { if (!HaveInsertPoint()) return; + SmallVector Shareds; + SmallVector Privates; + SmallVector ReductionOps; + SmallVector LHSs; + SmallVector RHSs; for (const auto *C : D.getClausesOfKind()) { - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); auto IPriv = C->privates().begin(); auto IRed = C->reduction_ops().begin(); - for (auto IRef : C->varlists()) { - auto *LHSVD = cast(cast(*ILHS)->getDecl()); - auto *RHSVD = cast(cast(*IRHS)->getDecl()); - auto *PrivateVD = cast(cast(*IPriv)->getDecl()); - auto *DRD = getReductionInit(*IRed); - if (auto *OASE = dyn_cast(IRef)) { - auto *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempOASE = dyn_cast(Base)) - Base = TempOASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast(Base); - auto *OrigVD = cast(DE->getDecl()); - auto OASELValueLB = EmitOMPArraySectionExpr(OASE); - auto OASELValueUB = - EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = - loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(), - OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address { - return OASELValueLB.getAddress(); - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB, - OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit VarDecl with copy init for arrays. - // Get the address of the original variable captured in current - // captured region. - auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(), - OASELValueLB.getPointer()); - Size = Builder.CreateNUWAdd( - Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get(Size)); - EmitVariablyModifiedType(PrivateVD->getType()); - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, - OASELValueLB.getAddress()); - EmitAutoVarCleanups(Emission); - // Emit private VarDecl with reduction init. - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - OASELValueLB.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), - OASELValueLB.getType(), OriginalBaseLValue, - Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } else if (auto *ASE = dyn_cast(IRef)) { - auto *Base = ASE->getBase()->IgnoreParenImpCasts(); - while (auto *TempASE = dyn_cast(Base)) - Base = TempASE->getBase()->IgnoreParenImpCasts(); - auto *DE = cast(Base); - auto *OrigVD = cast(DE->getDecl()); - auto ASELValue = EmitLValue(ASE); - auto OriginalBaseLValue = EmitLValue(DE); - LValue BaseLValue = loadToBegin( - *this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue); - // Store the address of the original variable associated with the LHS - // implicit variable. - PrivateScope.addPrivate( - LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue, - OriginalBaseLValue, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - ASELValue.getAddress(), - ASELValue.getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(), - ASELValue.getPointer()); - auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset); - return castToBase(*this, OrigVD->getType(), ASELValue.getType(), - OriginalBaseLValue, Ptr); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()), - "rhs.begin"); - }); - } else { - auto *OrigVD = cast(cast(IRef)->getDecl()); - QualType Type = PrivateVD->getType(); - if (getContext().getAsArrayType(Type)) { - // Store the address of the original variable associated with the LHS - // implicit variable. - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - Address OriginalAddr = EmitLValue(&DRE).getAddress(); - PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr, - LHSVD]() -> Address { - OriginalAddr = Builder.CreateElementBitCast( - OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); - return OriginalAddr; - }); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address { - if (Type->isVariablyModifiedType()) { - CodeGenFunction::OpaqueValueMapping OpaqueMap( - *this, cast( - getContext() - .getAsVariableArrayType(PrivateVD->getType()) - ->getSizeExpr()), - RValue::get( - getTypeSize(OrigVD->getType().getNonReferenceType()))); - EmitVariablyModifiedType(Type); - } - auto Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - auto *Init = PrivateVD->getInit(); - EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), - DRD ? *IRed : Init, OriginalAddr); - EmitAutoVarCleanups(Emission); - return Emission.getAllocatedAddress(); - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { - return Builder.CreateElementBitCast( - GetAddrOfLocalVar(PrivateVD), - ConvertTypeForMem(RHSVD->getType()), "rhs.begin"); - }); - } else { - // Store the address of the original variable associated with the LHS - // implicit variable. - Address OriginalAddr = Address::invalid(); - PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef, - &OriginalAddr]() -> Address { - DeclRefExpr DRE(const_cast(OrigVD), - CapturedStmtInfo->lookup(OrigVD) != nullptr, - IRef->getType(), VK_LValue, IRef->getExprLoc()); - OriginalAddr = EmitLValue(&DRE).getAddress(); - return OriginalAddr; - }); - // Emit reduction copy. - bool IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address { - // Emit private VarDecl with reduction init. - AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); - auto Addr = Emission.getAllocatedAddress(); - if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { - emitInitWithReductionInitializer(*this, DRD, *IRed, Addr, - OriginalAddr, - PrivateVD->getType()); - } else - EmitAutoVarInit(Emission); - EmitAutoVarCleanups(Emission); - return Addr; - }); - assert(IsRegistered && "private var already registered as private"); - // Silence the warning about unused variable. - (void)IsRegistered; - PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { - return GetAddrOfLocalVar(PrivateVD); - }); - } - } - ++ILHS; - ++IRHS; - ++IPriv; - ++IRed; + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (const auto *Ref : C->varlists()) { + Shareds.emplace_back(Ref); + Privates.emplace_back(*IPriv); + ReductionOps.emplace_back(*IRed); + LHSs.emplace_back(*ILHS); + RHSs.emplace_back(*IRHS); + std::advance(IPriv, 1); + std::advance(IRed, 1); + std::advance(ILHS, 1); + std::advance(IRHS, 1); } } + ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto ILHS = LHSs.begin(); + auto IRHS = RHSs.begin(); + auto IPriv = Privates.begin(); + for (const auto *IRef : Shareds) { + auto *PrivateVD = cast(cast(*IPriv)->getDecl()); + // Emit private VarDecl with reduction init. + RedCG.emitSharedLValue(*this, Count); + RedCG.emitAggregateType(*this, Count); + auto Emission = EmitAutoVarAlloca(*PrivateVD); + RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), + RedCG.getSharedLValue(Count), + [&Emission](CodeGenFunction &CGF) { + CGF.EmitAutoVarInit(Emission); + return true; + }); + EmitAutoVarCleanups(Emission); + Address BaseAddr = RedCG.adjustPrivateAddress( + *this, Count, Emission.getAllocatedAddress()); + bool IsRegistered = PrivateScope.addPrivate( + RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + + auto *LHSVD = cast(cast(*ILHS)->getDecl()); + auto *RHSVD = cast(cast(*IRHS)->getDecl()); + if (auto *OASE = dyn_cast(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address { + return GetAddrOfLocalVar(PrivateVD); + }); + } else if (auto *ASE = dyn_cast(IRef)) { + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address { + return RedCG.getSharedLValue(Count).getAddress(); + }); + PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address { + return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), + "rhs.begin"); + }); + } else { + QualType Type = PrivateVD->getType(); + bool IsArray = getContext().getAsArrayType(Type) != nullptr; + Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress(); + // Store the address of the original variable associated with the LHS + // implicit variable. + if (IsArray) { + OriginalAddr = Builder.CreateElementBitCast( + OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin"); + } + PrivateScope.addPrivate( + LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; }); + PrivateScope.addPrivate( + RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address { + return IsArray + ? Builder.CreateElementBitCast( + GetAddrOfLocalVar(PrivateVD), + ConvertTypeForMem(RHSVD->getType()), "rhs.begin") + : GetAddrOfLocalVar(PrivateVD); + }); + } + ++ILHS; + ++IRHS; + ++IPriv; + ++Count; + } } void CodeGenFunction::EmitOMPReductionClauseFinal( diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp index 274546b06547..01a179c374e0 100644 --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -718,7 +718,6 @@ int main() { // CHECK: br i1 [[DONE]], // Check initialization of private copy. -// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]* // CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0 // CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40 // CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[BEGIN]], [[END]] @@ -727,6 +726,7 @@ int main() { // CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* % // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], +// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]* // CHECK: [[ARRS_PRIV_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] @@ -903,8 +903,6 @@ int main() { // CHECK: getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %{{.+}}, i64 4 // CHECK: load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % // CHECK: getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 6 -// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], -// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -912,6 +910,8 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -933,7 +933,6 @@ int main() { // CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 4 -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -941,6 +940,7 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -964,8 +964,6 @@ int main() { // CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1 // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 2 -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -973,6 +971,8 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -997,11 +997,11 @@ int main() { // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], // CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 // CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2 // CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** % +// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: ret void diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp index 699e06dda85a..4f14b9df131d 100644 --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -522,8 +522,8 @@ int main() { // CHECK: br i1 [[DONE]], // Check initialization of private copy. -// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]* // CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0 +// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]* // CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40 // CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[BEGIN]], [[END]] // CHECK: br i1 [[ISEMPTY]], @@ -531,6 +531,7 @@ int main() { // CHECK: call void @_Z4initR6BaseS1RKS_(% // CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]] // CHECK: br i1 [[DONE]], +// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]* // CHECK: [[ARRS_PRIV_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]] to [[S_FLOAT_TY]]* // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]] @@ -703,8 +704,6 @@ int main() { // CHECK: getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %{{.+}}, i64 4 // CHECK: load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** % // CHECK: getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 6 -// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], -// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -712,6 +711,8 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]], +// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]], // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -733,7 +734,6 @@ int main() { // CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0 // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 4 -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -741,6 +741,7 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -764,8 +765,6 @@ int main() { // CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1 // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 2 -// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64 // CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]] @@ -773,6 +772,8 @@ int main() { // CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1 // CHECK: call i8* @llvm.stacksave() // CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]], +// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], +// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64 // CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64 // CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]] @@ -797,8 +798,8 @@ int main() { // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], // CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]], // CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]], -// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0 +// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]* // CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2 // CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %