[OPENMP] Generalization of codegen for reduction clauses.

Reworked codegen for reduction clauses for future support of reductions
in task-based directives.

llvm-svn: 307910
This commit is contained in:
Alexey Bataev 2017-07-13 13:36:14 +00:00
parent ec9b326569
commit 5c40bec5eb
5 changed files with 580 additions and 402 deletions

View File

@ -697,6 +697,400 @@ void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
}
}
/// Check if the combiner is a call to UDR combiner and if it is so return the
/// UDR decl used for reduction.
static const OMPDeclareReductionDecl *
getReductionInit(const Expr *ReductionOp) {
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
if (auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
return DRD;
return nullptr;
}
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
const OMPDeclareReductionDecl *DRD,
const Expr *InitOp,
Address Private, Address Original,
QualType Ty) {
if (DRD->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
auto *CE = cast<CallExpr>(InitOp);
auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
[=]() -> Address { return Private; });
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
[=]() -> Address { return Original; });
(void)PrivateScope.Privatize();
RValue Func = RValue::get(Reduction.second);
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
CGF.EmitIgnoredExpr(InitOp);
} else {
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
auto *GV = new llvm::GlobalVariable(
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage, Init, ".init");
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
RValue InitRVal;
switch (CGF.getEvaluationKind(Ty)) {
case TEK_Scalar:
InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
break;
case TEK_Complex:
InitRVal =
RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
break;
case TEK_Aggregate:
InitRVal = RValue::getAggregate(LV.getAddress());
break;
}
OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
}
}
/// \brief Emit initialization of arrays of complex types.
/// \param DestAddr Address of the array.
/// \param Type Type of array.
/// \param Init Initial expression of array.
/// \param SrcAddr Address of the original array.
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType Type, const Expr *Init,
Address SrcAddr = Address::invalid()) {
auto *DRD = getReductionInit(Init);
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
auto ArrayTy = Type->getAsArrayTypeUnsafe();
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
DestAddr =
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
if (DRD)
SrcAddr =
CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
llvm::Value *SrcBegin = nullptr;
if (DRD)
SrcBegin = SrcAddr.getPointer();
auto DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
// The basic structure here is a while-do loop.
auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
auto IsEmpty =
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
auto EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI = nullptr;
Address SrcElementCurrent = Address::invalid();
if (DRD) {
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
}
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
Address(DestElementPHI,
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
{
CodeGenFunction::RunCleanupsScope InitScope(CGF);
if (DRD && (DRD->getInitializer() || !Init)) {
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
SrcElementCurrent, ElementTy);
} else
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
/*IsInitializer=*/false);
}
if (DRD) {
// Shift the address forward by one element.
auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
// Check whether we've reached the end.
auto Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
// Done.
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
return CGF.EmitOMPArraySectionExpr(OASE);
if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
return CGF.EmitLValue(ASE);
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
CGF.CapturedStmtInfo &&
CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
E->getType(), VK_LValue, E->getExprLoc());
// Store the address of the original variable associated with the LHS
// implicit variable.
return CGF.EmitLValue(&DRE);
}
LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
const Expr *E) {
if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
return LValue();
}
void ReductionCodeGen::emitAggregateInitialization(CodeGenFunction &CGF,
unsigned N,
Address PrivateAddr,
LValue SharedLVal) {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
SharedLVal.getAddress());
}
ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps) {
ClausesData.reserve(Shareds.size());
SharedAddresses.reserve(Shareds.size());
Sizes.reserve(Shareds.size());
auto IPriv = Privates.begin();
auto IRed = ReductionOps.begin();
for (const auto *Ref : Shareds) {
ClausesData.emplace_back(Ref, *IPriv, *IRed);
std::advance(IPriv, 1);
std::advance(IRed, 1);
}
}
void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
assert(SharedAddresses.size() == N &&
"Number of generated lvalues must be exactly N.");
SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
emitSharedLValueUB(CGF, ClausesData[N].Ref));
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(nullptr);
return;
}
llvm::Value *Size;
if (AsArraySection) {
Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
SharedAddresses[N].first.getPointer());
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
} else {
Size = CGF.getTypeSize(
SharedAddresses[N].first.getType().getNonReferenceType());
}
Sizes.emplace_back(Size);
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N] &&
"Size should be nullptr for non-variably modified redution "
"items.");
return;
}
CodeGenFunction::OpaqueValueMapping OpaqueMap(
CGF,
cast<OpaqueValueExpr>(
CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
RValue::get(Size));
CGF.EmitVariablyModifiedType(PrivateType);
}
void ReductionCodeGen::emitInitialization(
CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
assert(SharedAddresses.size() > N && "No variable was generated");
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
QualType PrivateType = PrivateVD->getType();
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
QualType SharedType = SharedAddresses[N].first.getType();
SharedLVal = CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
CGF.ConvertTypeForMem(SharedType)),
SharedType, SharedAddresses[N].first.getBaseInfo());
if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
CGF.getContext().getAsArrayType(PrivateVD->getType())) {
emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal);
} else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
PrivateAddr, SharedLVal.getAddress(),
SharedLVal.getType());
} else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
!CGF.isTrivialInitializer(PrivateVD->getInit())) {
CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
PrivateVD->getType().getQualifiers(),
/*IsInitializer=*/false);
}
}
bool ReductionCodeGen::needCleanups(unsigned N) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
return DTorKind != QualType::DK_none;
}
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
auto *PrivateVD =
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
QualType PrivateType = PrivateVD->getType();
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
if (needCleanups(N)) {
PrivateAddr = CGF.Builder.CreateElementBitCast(
PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
}
}
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
LValue BaseLV) {
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (auto *PtrTy = BaseTy->getAs<PointerType>())
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
else {
BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
BaseTy->castAs<ReferenceType>());
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
CGF.Builder.CreateElementBitCast(BaseLV.getAddress(),
CGF.ConvertTypeForMem(ElTy)),
BaseLV.getType(), BaseLV.getBaseInfo());
}
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
llvm::Value *Addr) {
Address Tmp = Address::invalid();
Address TopTmp = Address::invalid();
Address MostTopTmp = Address::invalid();
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
Tmp = CGF.CreateMemTemp(BaseTy);
if (TopTmp.isValid())
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
else
MostTopTmp = Tmp;
TopTmp = Tmp;
BaseTy = BaseTy->getPointeeType();
}
llvm::Type *Ty = BaseLVType;
if (Tmp.isValid())
Ty = Tmp.getElementType();
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
if (Tmp.isValid()) {
CGF.Builder.CreateStore(Addr, Tmp);
return MostTopTmp;
}
return Address(Addr, BaseLVAlignment);
}
Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
const DeclRefExpr *DE;
const VarDecl *OrigVD = nullptr;
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
auto *Base = OASE->getBase()->IgnoreParenImpCasts();
while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
Base = TempOASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
auto *Base = ASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
DE = cast<DeclRefExpr>(Base);
OrigVD = cast<VarDecl>(DE->getDecl());
}
if (OrigVD) {
BaseDecls.emplace_back(OrigVD);
auto OriginalBaseLValue = CGF.EmitLValue(DE);
LValue BaseLValue =
loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
OriginalBaseLValue);
llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
llvm::Value *Ptr =
CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
return castToBase(CGF, OrigVD->getType(),
SharedAddresses[N].first.getType(),
OriginalBaseLValue.getPointer()->getType(),
OriginalBaseLValue.getAlignment(), Ptr);
}
BaseDecls.emplace_back(
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
return PrivateAddr;
}
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
return CGF.EmitLoadOfPointerLValue(
CGF.GetAddrOfLocalVar(getThreadIDVariable()),

View File

@ -105,6 +105,86 @@ struct OMPTaskDataTy final {
bool Nogroup = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
class ReductionCodeGen {
private:
/// Data requiored for codegen of reduction clauses.
struct ReductionData {
/// Reference to the original shared item.
const Expr *Ref = nullptr;
/// Helper expression for generation of private copy.
const Expr *Private = nullptr;
/// Helper expression for generation reduction operation.
const Expr *ReductionOp = nullptr;
ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp)
: Ref(Ref), Private(Private), ReductionOp(ReductionOp) {}
};
/// List of reduction-based clauses.
SmallVector<ReductionData, 4> ClausesData;
/// List of addresses of original shared variables/expressions.
SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses;
/// Sizes of the reduction items in chars.
SmallVector<llvm::Value *, 4> Sizes;
/// Base declarations for the reduction items.
SmallVector<const VarDecl *, 4> BaseDecls;
/// Emits lvalue for shared expresion.
LValue emitSharedLValue(CodeGenFunction &CGF, const Expr *E);
/// Emits upper bound for shared expression (if array section).
LValue emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E);
/// Performs aggregate initialization.
/// \param N Number of reduction item in the common list.
/// \param PrivateAddr Address of the corresponding private item.
/// \param SharedLVal Addreiss of the original shared variable.
void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr, LValue SharedLVal);
public:
ReductionCodeGen(ArrayRef<const Expr *> Shareds,
ArrayRef<const Expr *> Privates,
ArrayRef<const Expr *> ReductionOps);
/// Emits lvalue for a reduction item.
/// \param N Number of the reduction item.
void emitSharedLValue(CodeGenFunction &CGF, unsigned N);
/// Emits the code for the variable-modified type, if required.
/// \param N Number of the reduction item.
void emitAggregateType(CodeGenFunction &CGF, unsigned N);
/// Emits the code for the variable-modified type, if required.
/// \param N Number of the reduction item.
/// \param Size Size of the type in chars.
void emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size);
/// Performs initialization of the private copy for the reduction item.
/// \param N Number of the reduction item.
/// \param PrivateAddr Address of the corresponding private item.
/// \param DefaultInit Default initialization sequence that should be
/// performed if no reduction specific initialization is found.
/// \param SharedLVal Addreiss of the original shared variable.
/// \return true, if the initialization sequence was emitted, false otherwise.
void
emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr,
LValue SharedLVal,
llvm::function_ref<bool(CodeGenFunction &)> DefaultInit);
/// ReturCns true if the private copy requires cleanups.
bool needCleanups(unsigned N);
/// Emits cleanup code nfor the reduction item.
/// \param N Number of the reduction item.
/// \param PrivateAddr Address of the corresponding private item.
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr);
/// Adjusts \p PrivatedAddr for using ninstead of the original variable
/// address in normal operations.
/// \param N Number of the reduction item.
/// \param PrivateAddr Address of the corresponding private item.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr);
/// Returns LValue for the reduction item.
LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; }
/// Returns the size of the reduction item in chars, or nullptr, if the size
/// is a constant.
llvm::Value *getSizeInChars(unsigned N) const { return Sizes[N]; }
/// Returns the base declaration of the reduction item.
const VarDecl *getBaseDecl(unsigned N) const { return BaseDecls[N]; }
};
class CGOpenMPRuntime {
protected:
CodeGenModule &CGM;

View File

@ -549,156 +549,6 @@ void CodeGenFunction::EmitOMPAggregateAssign(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
/// Check if the combiner is a call to UDR combiner and if it is so return the
/// UDR decl used for reduction.
static const OMPDeclareReductionDecl *
getReductionInit(const Expr *ReductionOp) {
if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
if (auto *DRE =
dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
return DRD;
return nullptr;
}
static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
const OMPDeclareReductionDecl *DRD,
const Expr *InitOp,
Address Private, Address Original,
QualType Ty) {
if (DRD->getInitializer()) {
std::pair<llvm::Function *, llvm::Function *> Reduction =
CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
auto *CE = cast<CallExpr>(InitOp);
auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
[=]() -> Address { return Private; });
PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
[=]() -> Address { return Original; });
(void)PrivateScope.Privatize();
RValue Func = RValue::get(Reduction.second);
CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
CGF.EmitIgnoredExpr(InitOp);
} else {
llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
auto *GV = new llvm::GlobalVariable(
CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
llvm::GlobalValue::PrivateLinkage, Init, ".init");
LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
RValue InitRVal;
switch (CGF.getEvaluationKind(Ty)) {
case TEK_Scalar:
InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
break;
case TEK_Complex:
InitRVal =
RValue::getComplex(CGF.EmitLoadOfComplex(LV, SourceLocation()));
break;
case TEK_Aggregate:
InitRVal = RValue::getAggregate(LV.getAddress());
break;
}
OpaqueValueExpr OVE(SourceLocation(), Ty, VK_RValue);
CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
/*IsInitializer=*/false);
}
}
/// \brief Emit initialization of arrays of complex types.
/// \param DestAddr Address of the array.
/// \param Type Type of array.
/// \param Init Initial expression of array.
/// \param SrcAddr Address of the original array.
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
QualType Type, const Expr *Init,
Address SrcAddr = Address::invalid()) {
auto *DRD = getReductionInit(Init);
// Perform element-by-element initialization.
QualType ElementTy;
// Drill down to the base element type on both arrays.
auto ArrayTy = Type->getAsArrayTypeUnsafe();
auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
DestAddr =
CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
if (DRD)
SrcAddr =
CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
llvm::Value *SrcBegin = nullptr;
if (DRD)
SrcBegin = SrcAddr.getPointer();
auto DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
// The basic structure here is a while-do loop.
auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
auto IsEmpty =
CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
// Enter the loop body, making that address the current address.
auto EntryBB = CGF.Builder.GetInsertBlock();
CGF.EmitBlock(BodyBB);
CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
llvm::PHINode *SrcElementPHI = nullptr;
Address SrcElementCurrent = Address::invalid();
if (DRD) {
SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
"omp.arraycpy.srcElementPast");
SrcElementPHI->addIncoming(SrcBegin, EntryBB);
SrcElementCurrent =
Address(SrcElementPHI,
SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
}
llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
DestElementPHI->addIncoming(DestBegin, EntryBB);
Address DestElementCurrent =
Address(DestElementPHI,
DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
// Emit copy.
{
CodeGenFunction::RunCleanupsScope InitScope(CGF);
if (DRD && (DRD->getInitializer() || !Init)) {
emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
SrcElementCurrent, ElementTy);
} else
CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
/*IsInitializer=*/false);
}
if (DRD) {
// Shift the address forward by one element.
auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
}
// Shift the address forward by one element.
auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
// Check whether we've reached the end.
auto Done =
CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
// Done.
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
Address SrcAddr, const VarDecl *DestVD,
const VarDecl *SrcVD, const Expr *Copy) {
@ -1051,254 +901,107 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
LValue BaseLV, llvm::Value *Addr) {
Address Tmp = Address::invalid();
Address TopTmp = Address::invalid();
Address MostTopTmp = Address::invalid();
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
Tmp = CGF.CreateMemTemp(BaseTy);
if (TopTmp.isValid())
CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
else
MostTopTmp = Tmp;
TopTmp = Tmp;
BaseTy = BaseTy->getPointeeType();
}
llvm::Type *Ty = BaseLV.getPointer()->getType();
if (Tmp.isValid())
Ty = Tmp.getElementType();
Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
if (Tmp.isValid()) {
CGF.Builder.CreateStore(Addr, Tmp);
return MostTopTmp;
}
return Address(Addr, BaseLV.getAlignment());
}
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
LValue BaseLV) {
BaseTy = BaseTy.getNonReferenceType();
while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
!CGF.getContext().hasSameType(BaseTy, ElTy)) {
if (auto *PtrTy = BaseTy->getAs<PointerType>())
BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
else {
BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
BaseTy->castAs<ReferenceType>());
}
BaseTy = BaseTy->getPointeeType();
}
return CGF.MakeAddrLValue(
Address(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
BaseLV.getPointer(), CGF.ConvertTypeForMem(ElTy)->getPointerTo()),
BaseLV.getAlignment()),
BaseLV.getType(), BaseLV.getBaseInfo());
}
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
CodeGenFunction::OMPPrivateScope &PrivateScope) {
if (!HaveInsertPoint())
return;
SmallVector<const Expr *, 4> Shareds;
SmallVector<const Expr *, 4> Privates;
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
auto IPriv = C->privates().begin();
auto IRed = C->reduction_ops().begin();
for (auto IRef : C->varlists()) {
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
auto *DRD = getReductionInit(*IRed);
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
auto *Base = OASE->getBase()->IgnoreParenImpCasts();
while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
Base = TempOASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
auto *DE = cast<DeclRefExpr>(Base);
auto *OrigVD = cast<VarDecl>(DE->getDecl());
auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
auto OASELValueUB =
EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
auto OriginalBaseLValue = EmitLValue(DE);
LValue BaseLValue =
loadToBegin(*this, OrigVD->getType(), OASELValueLB.getType(),
OriginalBaseLValue);
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(LHSVD, [OASELValueLB]() -> Address {
return OASELValueLB.getAddress();
});
// Emit reduction copy.
bool IsRegistered = PrivateScope.addPrivate(
OrigVD, [this, OrigVD, PrivateVD, BaseLValue, OASELValueLB,
OASELValueUB, OriginalBaseLValue, DRD, IRed]() -> Address {
// Emit VarDecl with copy init for arrays.
// Get the address of the original variable captured in current
// captured region.
auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
OASELValueLB.getPointer());
Size = Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
CodeGenFunction::OpaqueValueMapping OpaqueMap(
*this, cast<OpaqueValueExpr>(
getContext()
.getAsVariableArrayType(PrivateVD->getType())
->getSizeExpr()),
RValue::get(Size));
EmitVariablyModifiedType(PrivateVD->getType());
auto Emission = EmitAutoVarAlloca(*PrivateVD);
auto Addr = Emission.getAllocatedAddress();
auto *Init = PrivateVD->getInit();
EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
DRD ? *IRed : Init,
OASELValueLB.getAddress());
EmitAutoVarCleanups(Emission);
// Emit private VarDecl with reduction init.
auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
OASELValueLB.getPointer());
auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
return castToBase(*this, OrigVD->getType(),
OASELValueLB.getType(), OriginalBaseLValue,
Ptr);
});
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
return GetAddrOfLocalVar(PrivateVD);
});
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
auto *Base = ASE->getBase()->IgnoreParenImpCasts();
while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
Base = TempASE->getBase()->IgnoreParenImpCasts();
auto *DE = cast<DeclRefExpr>(Base);
auto *OrigVD = cast<VarDecl>(DE->getDecl());
auto ASELValue = EmitLValue(ASE);
auto OriginalBaseLValue = EmitLValue(DE);
LValue BaseLValue = loadToBegin(
*this, OrigVD->getType(), ASELValue.getType(), OriginalBaseLValue);
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(
LHSVD, [ASELValue]() -> Address { return ASELValue.getAddress(); });
// Emit reduction copy.
bool IsRegistered = PrivateScope.addPrivate(
OrigVD, [this, OrigVD, PrivateVD, BaseLValue, ASELValue,
OriginalBaseLValue, DRD, IRed]() -> Address {
// Emit private VarDecl with reduction init.
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
auto Addr = Emission.getAllocatedAddress();
if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
ASELValue.getAddress(),
ASELValue.getType());
} else
EmitAutoVarInit(Emission);
EmitAutoVarCleanups(Emission);
auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
ASELValue.getPointer());
auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
return castToBase(*this, OrigVD->getType(), ASELValue.getType(),
OriginalBaseLValue, Ptr);
});
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
return Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD), ConvertTypeForMem(RHSVD->getType()),
"rhs.begin");
});
} else {
auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
QualType Type = PrivateVD->getType();
if (getContext().getAsArrayType(Type)) {
// Store the address of the original variable associated with the LHS
// implicit variable.
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
IRef->getType(), VK_LValue, IRef->getExprLoc());
Address OriginalAddr = EmitLValue(&DRE).getAddress();
PrivateScope.addPrivate(LHSVD, [this, &OriginalAddr,
LHSVD]() -> Address {
OriginalAddr = Builder.CreateElementBitCast(
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
return OriginalAddr;
});
bool IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
if (Type->isVariablyModifiedType()) {
CodeGenFunction::OpaqueValueMapping OpaqueMap(
*this, cast<OpaqueValueExpr>(
getContext()
.getAsVariableArrayType(PrivateVD->getType())
->getSizeExpr()),
RValue::get(
getTypeSize(OrigVD->getType().getNonReferenceType())));
EmitVariablyModifiedType(Type);
}
auto Emission = EmitAutoVarAlloca(*PrivateVD);
auto Addr = Emission.getAllocatedAddress();
auto *Init = PrivateVD->getInit();
EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(),
DRD ? *IRed : Init, OriginalAddr);
EmitAutoVarCleanups(Emission);
return Emission.getAllocatedAddress();
});
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
return Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()), "rhs.begin");
});
} else {
// Store the address of the original variable associated with the LHS
// implicit variable.
Address OriginalAddr = Address::invalid();
PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef,
&OriginalAddr]() -> Address {
DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
CapturedStmtInfo->lookup(OrigVD) != nullptr,
IRef->getType(), VK_LValue, IRef->getExprLoc());
OriginalAddr = EmitLValue(&DRE).getAddress();
return OriginalAddr;
});
// Emit reduction copy.
bool IsRegistered = PrivateScope.addPrivate(
OrigVD, [this, PrivateVD, OriginalAddr, DRD, IRed]() -> Address {
// Emit private VarDecl with reduction init.
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
auto Addr = Emission.getAllocatedAddress();
if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
emitInitWithReductionInitializer(*this, DRD, *IRed, Addr,
OriginalAddr,
PrivateVD->getType());
} else
EmitAutoVarInit(Emission);
EmitAutoVarCleanups(Emission);
return Addr;
});
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
return GetAddrOfLocalVar(PrivateVD);
});
}
}
++ILHS;
++IRHS;
++IPriv;
++IRed;
auto ILHS = C->lhs_exprs().begin();
auto IRHS = C->rhs_exprs().begin();
for (const auto *Ref : C->varlists()) {
Shareds.emplace_back(Ref);
Privates.emplace_back(*IPriv);
ReductionOps.emplace_back(*IRed);
LHSs.emplace_back(*ILHS);
RHSs.emplace_back(*IRHS);
std::advance(IPriv, 1);
std::advance(IRed, 1);
std::advance(ILHS, 1);
std::advance(IRHS, 1);
}
}
ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
unsigned Count = 0;
auto ILHS = LHSs.begin();
auto IRHS = RHSs.begin();
auto IPriv = Privates.begin();
for (const auto *IRef : Shareds) {
auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
// Emit private VarDecl with reduction init.
RedCG.emitSharedLValue(*this, Count);
RedCG.emitAggregateType(*this, Count);
auto Emission = EmitAutoVarAlloca(*PrivateVD);
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
RedCG.getSharedLValue(Count),
[&Emission](CodeGenFunction &CGF) {
CGF.EmitAutoVarInit(Emission);
return true;
});
EmitAutoVarCleanups(Emission);
Address BaseAddr = RedCG.adjustPrivateAddress(
*this, Count, Emission.getAllocatedAddress());
bool IsRegistered = PrivateScope.addPrivate(
RedCG.getBaseDecl(Count), [BaseAddr]() -> Address { return BaseAddr; });
assert(IsRegistered && "private var already registered as private");
// Silence the warning about unused variable.
(void)IsRegistered;
auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
return RedCG.getSharedLValue(Count).getAddress();
});
PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
return GetAddrOfLocalVar(PrivateVD);
});
} else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
// Store the address of the original variable associated with the LHS
// implicit variable.
PrivateScope.addPrivate(LHSVD, [&RedCG, Count]() -> Address {
return RedCG.getSharedLValue(Count).getAddress();
});
PrivateScope.addPrivate(RHSVD, [this, PrivateVD, RHSVD]() -> Address {
return Builder.CreateElementBitCast(GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()),
"rhs.begin");
});
} else {
QualType Type = PrivateVD->getType();
bool IsArray = getContext().getAsArrayType(Type) != nullptr;
Address OriginalAddr = RedCG.getSharedLValue(Count).getAddress();
// Store the address of the original variable associated with the LHS
// implicit variable.
if (IsArray) {
OriginalAddr = Builder.CreateElementBitCast(
OriginalAddr, ConvertTypeForMem(LHSVD->getType()), "lhs.begin");
}
PrivateScope.addPrivate(
LHSVD, [OriginalAddr]() -> Address { return OriginalAddr; });
PrivateScope.addPrivate(
RHSVD, [this, PrivateVD, RHSVD, IsArray]() -> Address {
return IsArray
? Builder.CreateElementBitCast(
GetAddrOfLocalVar(PrivateVD),
ConvertTypeForMem(RHSVD->getType()), "rhs.begin")
: GetAddrOfLocalVar(PrivateVD);
});
}
++ILHS;
++IRHS;
++IPriv;
++Count;
}
}
void CodeGenFunction::EmitOMPReductionClauseFinal(

View File

@ -718,7 +718,6 @@ int main() {
// CHECK: br i1 [[DONE]],
// Check initialization of private copy.
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
// CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[BEGIN]], [[END]]
@ -727,6 +726,7 @@ int main() {
// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* %
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
// CHECK: [[ARRS_PRIV_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]] to [[S_FLOAT_TY]]*
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
@ -903,8 +903,6 @@ int main() {
// CHECK: getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %{{.+}}, i64 4
// CHECK: load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
// CHECK: getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 6
// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -912,6 +910,8 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -933,7 +933,6 @@ int main() {
// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 4
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -941,6 +940,7 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -964,8 +964,6 @@ int main() {
// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 2
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -973,6 +971,8 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -997,11 +997,11 @@ int main() {
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %
// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: ret void

View File

@ -522,8 +522,8 @@ int main() {
// CHECK: br i1 [[DONE]],
// Check initialization of private copy.
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
// CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[BEGIN]], [[END]]
// CHECK: br i1 [[ISEMPTY]],
@ -531,6 +531,7 @@ int main() {
// CHECK: call void @_Z4initR6BaseS1RKS_(%
// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
// CHECK: br i1 [[DONE]],
// CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
// CHECK: [[ARRS_PRIV_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]] to [[S_FLOAT_TY]]*
// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
@ -703,8 +704,6 @@ int main() {
// CHECK: getelementptr inbounds [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %{{.+}}, i64 4
// CHECK: load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
// CHECK: getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 6
// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -712,6 +711,8 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[LD:%.+]] = load [[S_FLOAT_TY]]**, [[S_FLOAT_TY]]*** [[VAR2_ORIG]],
// CHECK: [[ORIG_START:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[LD]],
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -733,7 +734,6 @@ int main() {
// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 0
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]], i64 0, i64 4
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -741,6 +741,7 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VVAR2_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VVAR2_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -764,8 +765,6 @@ int main() {
// CHECK: [[LOW:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 1
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], i64 0, i64 2
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[LAST:%.+]] = ptrtoint [[S_FLOAT_TY]]* %{{.+}} to i64
// CHECK: [[FIRST:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[BYTE_DIF:%.+]] = sub i64 [[LAST]], [[FIRST]]
@ -773,6 +772,8 @@ int main() {
// CHECK: [[SIZE:%.+]] = add nuw i64 [[DIF]], 1
// CHECK: call i8* @llvm.stacksave()
// CHECK: [[VAR3_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[SIZE]],
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: [[ORIG_START:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: [[START:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[ORIG_START]] to i64
// CHECK: [[LOW_BOUND:%.+]] = ptrtoint [[S_FLOAT_TY]]* [[LOW]] to i64
// CHECK: [[OFFSET_BYTES:%.+]] = sub i64 [[START]], [[LOW_BOUND]]
@ -797,8 +798,8 @@ int main() {
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]], [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR:%.+]],
// CHECK: [[VAR3_ORIG:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[VAR3_ORIG_ADDR]],
// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], i32 0, i32 0
// CHECK: bitcast [2 x [[S_FLOAT_TY]]]* [[VAR3_ORIG]] to [[S_FLOAT_TY]]*
// CHECK: getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* %{{.+}}, i64 2
// CHECK: store [2 x [[S_FLOAT_TY]]]* [[VAR3_PRIV]], [2 x [[S_FLOAT_TY]]]** %